summaryrefslogtreecommitdiffstats
path: root/cluster/atlas
diff options
context:
space:
mode:
Diffstat (limited to 'cluster/atlas')
-rw-r--r--cluster/atlas/emit_mm.patch154
1 files changed, 154 insertions, 0 deletions
diff --git a/cluster/atlas/emit_mm.patch b/cluster/atlas/emit_mm.patch
new file mode 100644
index 0000000000..0ce40f249b
--- /dev/null
+++ b/cluster/atlas/emit_mm.patch
@@ -0,0 +1,154 @@
+Generated by Charles Peng <chp@sourcemage.org>
+--- emit_mm.c 2009-02-19 02:48:25.000000000 +0800
++++ emit_mm.c 2011-05-13 16:21:41.491001028 +0800
+@@ -1,5 +1,5 @@
+ /*
+- * Automatically Tuned Linear Algebra Software v3.8.3
++ * Automatically Tuned Linear Algebra Software v3.8.4
+ * (C) Copyright 1997 R. Clint Whaley
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -1638,7 +1638,7 @@ void MMDeclare(FILE *fpout, char *spc, c
+ cTA, cTB, M, N, K);
+ fprintf(fpout, " * lda=%d, ldb=%d, ldc=%d, mu=%d, nu=%d, ku=%d, pf=%d\n",
+ lda, ldb, ldc, mu, nu, ku, pfA);
+- fprintf(fpout, " * Generated by ATLAS/tune/blas/gemm/emit_mm.c (3.8.3)\n");
++ fprintf(fpout, " * Generated by ATLAS/tune/blas/gemm/emit_mm.c (3.8.4)\n");
+ fprintf(fpout, " */\n");
+ }
+
+@@ -3082,7 +3082,17 @@ int CompMultHandled(CLEANNODE *cp, int n
+ * Returns 1 if nb is handled by succeeding case, 0 otherwise
+ */
+ {
+- for (; cp; cp = cp->next) if (nb % cp->imult == 0) return(1);
++
++ for (; cp; cp = cp->next)
++ {
++ if (cp->fixed == 2)
++ {
++ if (cp->imult == nb)
++ return(1);
++ }
++ else if (nb % cp->imult == 0)
++ return(1);
++ }
+ return(0);
+ }
+
+@@ -3110,14 +3120,6 @@ int *GetCompNBs(enum CW which, CLEANNODE
+ istop = cp0->nb;
+ while (istop % cp0->imult) istop--;
+ if (istop == cp0->nb) istop -= cp0->imult;
+- for (cp=cp0->next; cp; cp = cp->next)
+- {
+- if (cp->imult % cp0->imult == 0 && cp->fixed != 2)
+- {
+- istop = cp->imult;
+- break;
+- }
+- }
+
+ for (i=2*cp0->imult; i <= istop; i += cp0->imult)
+ {
+@@ -3582,7 +3584,7 @@ void GenUpNB_if(char pre, enum CW which,
+ CLEANNODE *wp, *cp0;
+ FILE *fp;
+ int i;
+- const char *ifs = "else if", *ifp;
++ const char *ifs = "else if", *ifp, *sp;
+ const char cwh[3] = {'M', 'N', 'K'};
+ char ln[128], st[2], *typ;
+
+@@ -3613,7 +3615,11 @@ void GenUpNB_if(char pre, enum CW which,
+
+ if (pre == 'c') pre = 'C';
+ else if (pre == 'z') pre = 'Z';
+-
++/*
++ * fixed=2 means a kernel specialized for that exact N. They will have not
++ * survived pruning if they weren't better than all the general algorithms,
++ * so we can test for them all up front
++ */
+ for (cp=cp0; cp; cp = cp->next) /* handle fixed = 2 cases */
+ {
+ if (cp->fixed == 2)
+@@ -3625,36 +3631,49 @@ void GenUpNB_if(char pre, enum CW which,
+ ifp = ifs;
+ }
+ }
+-
+- for (cp=cp0; cp; cp = cp->next) /* fixed = 1 cases */
+- {
+- if (cp->fixed == 1)
+- {
+- for (i=cp->ncomps-1; i >= 0; i--)
+- {
+- if (cp->imult > 1)
+- fprintf(fp, " %s (%c == %d)\n", ifp, cwh[which], cp->NBs[i]);
+- else fprintf(fp, " else\n");
+- fprintf(fp, " {\n ATL_%cup%cBmm%d_%d_%d_b%c%s;\n }\n",
+- pre, cwh[which], cp->NBs[i], cp->imult, cp->fixed,
+- cbeta, MMARGS);
+- ifp = ifs;
+- }
+- }
+- }
+- for (cp=cp0; cp; cp = cp->next) /* fixed = 0 cases */
+- {
++/*
++ * All remaining kernels are selected by being a multiple of imult;
++ * fixed=0 will be called directly, while fixed=1 will have a nested
++ * if to find the right compiled version to call
++ */
++ for (cp=cp0; cp; cp = cp->next) /* fixed = 0/1 cases */
++ {
++ if (cp->fixed != 1 && cp->fixed != 0)
++ continue;
++ if (cp->imult > 1)
++ fprintf(fp, " %s (%c == %s)\n", ifp, cwh[which],
++ GetInc(cp->imult, GetDiv(cp->imult, st)));
++ else if (ifp == ifs) fprintf(fp, " else\n");
+ if (cp->fixed == 0)
+ {
+- if (cp->imult > 1)
+- fprintf(fp, " %s (%c == %s)\n", ifp, cwh[which],
+- GetInc(cp->imult, GetDiv(cp->imult, st)));
+- else if (ifp == ifs) fprintf(fp, " else\n");
+ fprintf(fp, " {\n ATL_%cup%cBmm0_%d_%d_b%c%s;\n }\n",
+ pre, cwh[which], cp->imult, cp->fixed, cbeta, MMARGS);
+ ifp = ifs;
+ }
++ else if (cp->fixed == 1)
++ {
++ fprintf(fp, " {\n");
++ if (cp->ncomps == 1)
++ fprintf(fp, " ATL_%cup%cBmm%d_%d_%d_b%c%s;\n",
++ pre, cwh[which], cp->NBs[0], cp->imult, cp->fixed,
++ cbeta, MMARGS);
++ else /* must select kernel by NB[i] */
++ {
++ sp = "if";
++ for (i=cp->ncomps-1; i >= 0; i--)
++ {
++ fprintf(fp, " %s (%c == %d)\n", sp, cwh[which], cp->NBs[i]);
++ fprintf(fp,
++ " {\n ATL_%cup%cBmm%d_%d_%d_b%c%s;\n }\n",
++ pre, cwh[which], cp->NBs[i], cp->imult, cp->fixed,
++ cbeta, MMARGS);
++ sp = "else if";
++ }
++ }
++ fprintf(fp, " }\n");
++ }
+ }
++
+ for (cp=cp0; cp && cp->imult != 1; cp = cp->next);
+ if (!cp)
+ {
+@@ -4330,3 +4349,5 @@ main(int nargs, char **args)
+ }
+ exit(0);
+ }
++
++