diff options
author | Peng Chang (Charles) | 2011-05-13 23:47:04 +0800 |
---|---|---|
committer | Peng Chang (Charles) | 2011-05-13 23:47:04 +0800 |
commit | a560a7660578302f1b540cafe07aaa78155ea838 (patch) | |
tree | 142c453f56a3fc2223d715257769ac64e6c53419 /cluster | |
parent | 0b9f330535df76e4bc34b148cdbd54b067751dff (diff) |
atlas: actually add emit_mm.patch into git
Diffstat (limited to 'cluster')
-rw-r--r-- | cluster/atlas/emit_mm.patch | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/cluster/atlas/emit_mm.patch b/cluster/atlas/emit_mm.patch new file mode 100644 index 0000000000..0ce40f249b --- /dev/null +++ b/cluster/atlas/emit_mm.patch @@ -0,0 +1,154 @@ +Generated by Charles Peng <chp@sourcemage.org> +--- emit_mm.c 2009-02-19 02:48:25.000000000 +0800 ++++ emit_mm.c 2011-05-13 16:21:41.491001028 +0800 +@@ -1,5 +1,5 @@ + /* +- * Automatically Tuned Linear Algebra Software v3.8.3 ++ * Automatically Tuned Linear Algebra Software v3.8.4 + * (C) Copyright 1997 R. Clint Whaley + * + * Redistribution and use in source and binary forms, with or without +@@ -1638,7 +1638,7 @@ void MMDeclare(FILE *fpout, char *spc, c + cTA, cTB, M, N, K); + fprintf(fpout, " * lda=%d, ldb=%d, ldc=%d, mu=%d, nu=%d, ku=%d, pf=%d\n", + lda, ldb, ldc, mu, nu, ku, pfA); +- fprintf(fpout, " * Generated by ATLAS/tune/blas/gemm/emit_mm.c (3.8.3)\n"); ++ fprintf(fpout, " * Generated by ATLAS/tune/blas/gemm/emit_mm.c (3.8.4)\n"); + fprintf(fpout, " */\n"); + } + +@@ -3082,7 +3082,17 @@ int CompMultHandled(CLEANNODE *cp, int n + * Returns 1 if nb is handled by succeeding case, 0 otherwise + */ + { +- for (; cp; cp = cp->next) if (nb % cp->imult == 0) return(1); ++ ++ for (; cp; cp = cp->next) ++ { ++ if (cp->fixed == 2) ++ { ++ if (cp->imult == nb) ++ return(1); ++ } ++ else if (nb % cp->imult == 0) ++ return(1); ++ } + return(0); + } + +@@ -3110,14 +3120,6 @@ int *GetCompNBs(enum CW which, CLEANNODE + istop = cp0->nb; + while (istop % cp0->imult) istop--; + if (istop == cp0->nb) istop -= cp0->imult; +- for (cp=cp0->next; cp; cp = cp->next) +- { +- if (cp->imult % cp0->imult == 0 && cp->fixed != 2) +- { +- istop = cp->imult; +- break; +- } +- } + + for (i=2*cp0->imult; i <= istop; i += cp0->imult) + { +@@ -3582,7 +3584,7 @@ void GenUpNB_if(char pre, enum CW which, + CLEANNODE *wp, *cp0; + FILE *fp; + int i; +- const char *ifs = "else if", *ifp; ++ const char *ifs = "else if", *ifp, *sp; + const char cwh[3] = {'M', 'N', 'K'}; + char ln[128], st[2], *typ; + +@@ -3613,7 +3615,11 @@ void GenUpNB_if(char pre, enum CW which, + + if (pre == 'c') pre = 'C'; + else if (pre == 'z') pre = 'Z'; +- ++/* ++ * fixed=2 means a kernel specialized for that exact N. They will have not ++ * survived pruning if they weren't better than all the general algorithms, ++ * so we can test for them all up front ++ */ + for (cp=cp0; cp; cp = cp->next) /* handle fixed = 2 cases */ + { + if (cp->fixed == 2) +@@ -3625,36 +3631,49 @@ void GenUpNB_if(char pre, enum CW which, + ifp = ifs; + } + } +- +- for (cp=cp0; cp; cp = cp->next) /* fixed = 1 cases */ +- { +- if (cp->fixed == 1) +- { +- for (i=cp->ncomps-1; i >= 0; i--) +- { +- if (cp->imult > 1) +- fprintf(fp, " %s (%c == %d)\n", ifp, cwh[which], cp->NBs[i]); +- else fprintf(fp, " else\n"); +- fprintf(fp, " {\n ATL_%cup%cBmm%d_%d_%d_b%c%s;\n }\n", +- pre, cwh[which], cp->NBs[i], cp->imult, cp->fixed, +- cbeta, MMARGS); +- ifp = ifs; +- } +- } +- } +- for (cp=cp0; cp; cp = cp->next) /* fixed = 0 cases */ +- { ++/* ++ * All remaining kernels are selected by being a multiple of imult; ++ * fixed=0 will be called directly, while fixed=1 will have a nested ++ * if to find the right compiled version to call ++ */ ++ for (cp=cp0; cp; cp = cp->next) /* fixed = 0/1 cases */ ++ { ++ if (cp->fixed != 1 && cp->fixed != 0) ++ continue; ++ if (cp->imult > 1) ++ fprintf(fp, " %s (%c == %s)\n", ifp, cwh[which], ++ GetInc(cp->imult, GetDiv(cp->imult, st))); ++ else if (ifp == ifs) fprintf(fp, " else\n"); + if (cp->fixed == 0) + { +- if (cp->imult > 1) +- fprintf(fp, " %s (%c == %s)\n", ifp, cwh[which], +- GetInc(cp->imult, GetDiv(cp->imult, st))); +- else if (ifp == ifs) fprintf(fp, " else\n"); + fprintf(fp, " {\n ATL_%cup%cBmm0_%d_%d_b%c%s;\n }\n", + pre, cwh[which], cp->imult, cp->fixed, cbeta, MMARGS); + ifp = ifs; + } ++ else if (cp->fixed == 1) ++ { ++ fprintf(fp, " {\n"); ++ if (cp->ncomps == 1) ++ fprintf(fp, " ATL_%cup%cBmm%d_%d_%d_b%c%s;\n", ++ pre, cwh[which], cp->NBs[0], cp->imult, cp->fixed, ++ cbeta, MMARGS); ++ else /* must select kernel by NB[i] */ ++ { ++ sp = "if"; ++ for (i=cp->ncomps-1; i >= 0; i--) ++ { ++ fprintf(fp, " %s (%c == %d)\n", sp, cwh[which], cp->NBs[i]); ++ fprintf(fp, ++ " {\n ATL_%cup%cBmm%d_%d_%d_b%c%s;\n }\n", ++ pre, cwh[which], cp->NBs[i], cp->imult, cp->fixed, ++ cbeta, MMARGS); ++ sp = "else if"; ++ } ++ } ++ fprintf(fp, " }\n"); ++ } + } ++ + for (cp=cp0; cp && cp->imult != 1; cp = cp->next); + if (!cp) + { +@@ -4330,3 +4349,5 @@ main(int nargs, char **args) + } + exit(0); + } ++ ++ |