26 #include "pips_config.h"
69 (
dagvtx v,
bool * north,
bool * south,
bool * west,
bool * east)
72 *north =
true, *south =
true, *west =
true, *east =
true;
73 intptr_t k00, k10, k20, k01, k11, k21, k02, k12, k22;
75 &k00, &k10, &k20, &k01, &k11, &k21, &k02, &k12, &k22);
77 *north = k00 || k10 || k20;
78 *south = k02 || k12 || k22;
79 *west = k00 || k01 || k02;
80 *east = k20 || k21 || k22;
86 #define NORTH(v) ((void*) (((_int)v)+0))
87 #define SOUTH(v) ((void*) (((_int)v)+1))
88 #define WEST(v) ((void*) (((_int)v)+2))
89 #define EAST(v) ((void*) (((_int)v)+3))
97 _int n = 0, s = 0, w = 0, e = 0;
135 bool north =
true, south =
true, west =
true, east =
true;
159 int * width,
int * cost,
int * nops,
160 int * north,
int * south,
int * west,
int * east)
164 bool keep_erosion =
erosion!=NULL;
194 if (level_width>dwidth) dwidth = level_width;
202 if (nouts>dwidth) dwidth = nouts;
205 int n=0, s=0, w=0, e=0;
223 *north = n, *south = s, *west = w, *east = e,
224 *width = dwidth, *cost = dcost, *nops = dnops;
253 bool * gram = (
bool *)
malloc(
sizeof(
bool)*row_size*col_size);
255 for (
int i=0; i<row_size*col_size; i++)
264 (
bool * used,
int width,
int height,
int *
x,
int * y)
268 for (
int j = 0; j<col_size-height+1; j++)
270 for (
int i = 0; i<row_size-width+1; i++)
273 for (
int w = 0;
ok && w<width; w++)
274 for (
int h = 0;
ok && h<height; h++)
275 ok &= !used[(i+w)+(j+h)*row_size];
278 for (
int w = 0; w<width; w++)
279 for (
int h = 0; h<height; h++)
280 used[(i+w)+(j+h)*row_size] =
true;
301 set_fprint(stderr,
"availables", availables, (
string (*)())
i2a);
327 #define IMG_PTR "imagelet_"
328 #define RED_PTR "reduction_"
368 sb_cat(
code,
" mcu_macro[0][",
i2a(op),
"].", r,
" = ", s,
";\n");
369 sb_cat(
code,
" mcu_macro[1][",
i2a(op),
"].", r,
" = ", s,
";\n");
389 int width,
int height,
bool is_kernel,
bool * used)
391 int size = width*height;
397 sb_cat(decl,
" // operation ", name,
" parameters\n");
398 sb_cat(decl,
" int16_t p_", name,
"[",
i2a(size),
"];\n");
399 sb_cat(decl,
" const int32_t x_", name,
" = ",
i2a(
x),
";\n");
400 sb_cat(decl,
" const int32_t y_", name,
" = ",
i2a(y),
";\n");
402 sb_cat(
code,
" // copy of operation ", name,
" parameters\n");
410 " for(i=0; i<",
i2a(size),
"; i++)\n"
411 " p_", name,
"[i] = ", p1,
"[i];\n");
418 sb_cat(
code,
" p_", name,
"[0] = ", p1,
";\n");
421 sb_cat(
code,
" p_", name,
"[0] = ", p1,
";\n");
432 sb_cat(
code,
" gram.xoffset = x_", name,
";\n");
433 sb_cat(
code,
" gram.yoffset = y_", name,
";\n");
436 sb_cat(
code,
" gram.params = p_", name,
";\n");
437 sb_cat(
code,
" freia_mg_write_dynamic_param(&dyn_param);\n");
460 string name =
hash_get(hparams, p1);
590 sb_cat(decl,
" // operation ", name,
" initialization\n"
591 " int16_t p_", name,
"[1];\n");
592 sb_cat(decl,
" const int32_t x_", name,
" = ",
i2a(
x),
";\n");
593 sb_cat(decl,
" const int32_t y_", name,
" = ",
i2a(y),
";\n");
596 " p_", name,
"[0] = ", val,
";\n"
597 " gram.xoffset = x_", name,
";\n"
598 " gram.yoffset = y_", name,
";\n"
600 " gram.height = 1;\n"
601 " gram.params = p_", name,
";\n"
602 " freia_mg_write_dynamic_param(&dyn_param);\n");
606 " // initialize memory for operation ", name,
"\n"
607 " mem_init.xmin1 = ", mem,
";\n"
608 " mem_init.ymin1 = 0;\n"
609 " mem_init.xmin2 = 0;\n"
610 " mem_init.ymin2 = 0;\n"
611 " mem_init.xmin3 = 0;\n"
612 " mem_init.ymin3 = 0;\n"
613 " mem_init.iter1 = TERAPIX_PE_NUMBER;\n"
614 " mem_init.iter2 = ",
i2a(nrow),
";\n"
615 " mem_init.iter3 = 0;\n"
616 " mem_init.iter4 = 0;\n"
617 " mem_init.addrStart = TERAPIX_UCODE_SET_CONST;\n"
618 " param.size = sizeof(terapix_mcu_macrocode); // not used?\n"
619 " param.raw = (void*) (&mem_init);\n"
620 " ret |= freia_mg_work(¶m);\n"
621 " ret |= freia_mg_end_work();\n");
682 " // array for reduction ", sop,
" extraction\n"
685 " redter.xres = ", mem,
";\n"
686 " redter.yres = 0;\n"
687 " redter.width = ", width,
";\n"
688 " redter.height = TERAPIX_PE_NUMBER;\n"
689 " redter.result = (void*) red_", sop,
";\n"
693 " redter.imgwidth = i0->width;\n"
694 " redter.imgheight = i0->height;\n"
695 " redter.subimgwidth = TERAPIX_PE_NUMBER;\n"
696 " redter.subimgheight = imagelet_size;\n"
698 " ret |= freia_cg_read_reduction_results(&redres);\n"
712 const string fname_dag,
726 int n_double_buffers;
729 n_double_buffers = n_ins+n_outs;
731 n_double_buffers = (n_ins>n_outs)? n_ins: n_outs;
733 pips_assert(
"some I/O images", n_double_buffers>0);
756 int length, width, cost, nops, n, s, w, e;
758 &width, &cost, &nops, &n, &s, &w, &e);
763 #define ip2s(n) i2a(get_int_property(n))
767 "/* FREIA terapix helper function for module ",
module,
"\n");
777 sb_cat(head,
" * IMAGE H = ",
ip2s(
"FREIA_IMAGE_HEIGHT"),
"\n");
781 sb_cat(head,
" * ",
i2a(n_ins),
" input image", n_ins>1?
"s":
"");
782 sb_cat(head,
", ",
i2a(n_outs),
" output image", n_outs>1?
"s":
"",
"\n");
783 sb_cat(head,
" * ",
i2a(nops),
" image operations in dag\n");
784 sb_cat(head,
" * dag length is ",
i2a(length));
785 sb_cat(head,
", dag width is ",
i2a(width),
"\n");
786 sb_cat(head,
" * costs in cycles per imagelet row:\n");
787 sb_cat(head,
" * - computation: ",
i2a(cost),
"\n");
790 sb_cat(head,
" * - communication: ",
i2a(comm*n_trs),
"\n");
794 sb_cat(head,
"freia_status ", fname_dag,
"(");
795 for (
int i = 0; i<n_outs; i++)
797 for (
int i = 0; i<n_ins; i++)
811 " // declarations:\n"
812 " freia_microcode mcode;\n"
813 " freia_op_param param;\n"
814 " freia_dynamic_param dyn_param;\n"
815 " terapix_gram gram;\n"
817 " freia_status ret = FREIA_OK;\n"
818 " // data structures for reductions\n"
819 " terapix_mcu_macrocode mem_init;\n"
820 " freia_reduction_results redres;\n"
821 " terapix_reduction redter;\n"
822 " // overall structure which describes the computation\n"
823 " terapix_mcu_instr mcu_instr;\n");
829 " mcode.raw = (void*) terapix_ucode_array;\n"
830 " mcode.size = TERAPIX_UCODE_SIZE_T;\n"
831 " freia_mg_write_microcode(&mcode);\n"
833 " // dyn_param contents\n"
834 " dyn_param.raw = &gram;\n"
835 " dyn_param.size = sizeof(terapix_gram);\n"
837 " // redres contents\n"
838 " redres.raw = (void*) &redter;\n"
839 " redres.size = sizeof(terapix_reduction);\n"
860 while (n_imagelets<n_outs)
868 sb_cat(dbio,
"\n // inputs:\n");
881 sb_cat(dbio,
" // - imagelet ", si,
" is i", sn,
" for ",
885 sb_cat(dbio,
" tile_in[0][", sn,
"].x = " IMG_PTR "io_", si,
"_0;\n");
886 sb_cat(dbio,
" tile_in[0][", sn,
"].y = 0;\n");
887 sb_cat(dbio,
" tile_in[1][", sn,
"].x = " IMG_PTR "io_", si,
"_1;\n");
888 sb_cat(dbio,
" tile_in[1][", sn,
"].y = 0;\n");
897 sb_cat(dbio,
"\n // no input\n\n");
901 while (n_imagelets<n_double_buffers)
957 sb_cat(body, in_count++?
",":
"",
i2a(i>0? i: -i));
980 head, NULL, hparams, &nargs));
985 sb_cat(body,
" // *special* set parameter for replace_const\n");
996 "TERAPIX_UCODE_SET_CONST_RAMREG");
998 sb_cat(body,
" // now take care of actual operation\n");
1008 sb_cat(
init,
" int ", mem,
" = ",
i2a(available_memory),
";\n");
1024 " result from ", mem,
"\n");
1027 sb_cat(tail,
" // assign reduction parameter",
1038 "red_", sop,
"[",
i2a(i),
"];\n");
1050 sb_cat(body,
" // in place copy skipped\n");
1070 if (img<0) img=-img;
1086 sb_cat(dbio,
" // outputs:\n");
1090 if (oimg<0) oimg=-oimg;
1102 pips_assert(
"IO imagelet found for output", oimg<=n_double_buffers);
1107 sb_cat(body,
" // output copy ",
i2a(old));
1113 NULL, NULL, NULL, lic, -oimg);
1120 sb_cat(dbio,
" // - imagelet ", so);
1121 sb_cat(dbio,
" is o", sn,
" for ");
1125 sb_cat(dbio,
" tile_out[0][", sn,
"].x = " IMG_PTR"io_", so,
"_0;\n");
1126 sb_cat(dbio,
" tile_out[0][", sn,
"].y = 0;\n");
1127 sb_cat(dbio,
" tile_out[1][", sn,
"].x = " IMG_PTR"io_", so,
"_1;\n");
1128 sb_cat(dbio,
" tile_out[1][", sn,
"].y = 0;\n");
1138 sb_cat(dbio,
" // no output\n\n");
1142 int total_imagelets = n_imagelets + n_double_buffers;
1143 int imagelet_rows = available_memory/total_imagelets;
1144 int imagelet_max_rows = imagelet_rows;
1148 sb_cat(decl,
" // flip flop macro code and I/Os\n");
1149 sb_cat(decl,
" terapix_mcu_macrocode mcu_macro[2][",
i2a(n_ops),
"];\n");
1151 sb_cat(decl,
" terapix_tile_info tile_in[2][",
i2a(n_ins),
"];\n");
1153 sb_cat(decl,
" terapix_tile_info tile_out[2][",
i2a(n_outs),
"];\n");
1156 sb_cat(decl,
" // imagelets definitions:\n");
1157 sb_cat(decl,
" // - ",
i2a(n_imagelets),
" computation imagelets\n");
1158 sb_cat(decl,
" // - ",
i2a(n_double_buffers),
" double buffer imagelets\n");
1162 int vertical_border = n>s? n: s;
1163 int max_computed_size = imagelet_rows-2*vertical_border;
1167 if (image_height==0)
1170 pips_assert(
"at least one image is needed!", n_ins||n_outs);
1173 " // dynamic optimal imagelet size computation\n"
1174 " // this formula must match what the scheduler does!\n"
1175 " int vertical_border = ",
i2a(vertical_border),
";\n"
1177 " int image_height = ", n_ins?
"i":
"o",
"0->heightWa;\n");
1179 " int max_computed_size = ",
i2a(max_computed_size),
";\n"
1180 " int n_tiles = (image_height+max_computed_size-1)/max_computed_size;\n"
1181 " int imagelet_size = (n_tiles==1)? image_height:\n"
1182 " ((image_height+n_tiles-1)/n_tiles)+2*vertical_border;\n");
1186 " // max imagelet size requested..."
1187 " int max_size = ",
i2a(max_size),
";\n"
1188 " if (imagelet_size>max_size)\n"
1189 " imagelet_size = max_size;\n");
1198 int n_tiles = (image_height+max_computed_size-1)/max_computed_size;
1200 int optim_rows = ((image_height+n_tiles-1)/n_tiles)+2*vertical_border;
1202 if (optim_rows>image_height) optim_rows = image_height;
1203 imagelet_rows = optim_rows;
1205 pips_assert(
"optimized row size lower than max row size",
1206 optim_rows<=imagelet_rows && optim_rows>0);
1209 sb_cat(decl,
" // imagelet max size: ",
i2a(imagelet_max_rows),
"\n");
1212 sb_cat(decl,
" int imagelet_size = ",
1215 (max_size<imagelet_rows? max_size: imagelet_rows):
1217 imagelet_rows),
";\n");
1221 for (
int i=1; i<=total_imagelets; i++)
1224 sb_cat(decl,
i2a(imagelet_max_rows * (i-1)),
";\n");
1231 sb_cat(decl,
" // memory for reductions\n");
1239 sb_cat(decl,
" // double buffer assignment\n");
1240 for (
int i=1; i<=n_double_buffers; i++)
1257 sb_cat(body,
" // imagelet erosion for the computation\n");
1259 sb_cat(body,
" mcu_instr.borderTop = ",
i2a(n),
";\n");
1260 sb_cat(body,
" mcu_instr.borderBottom = ",
i2a(s),
";\n");
1261 sb_cat(body,
" mcu_instr.borderLeft = ",
i2a(w),
";\n");
1262 sb_cat(body,
" mcu_instr.borderRight = ",
i2a(e),
";\n");
1263 sb_cat(body,
" mcu_instr.imagelet_height = imagelet_size;\n"
1264 " mcu_instr.imagelet_width = TERAPIX_PE_NUMBER;\n"
1267 sb_cat(body,
" // outputs\n"
1268 " mcu_instr.nbout = ",
i2a(n_outs),
";\n");
1271 " mcu_instr.out0 = tile_out[0];\n"
1272 " mcu_instr.out1 = tile_out[1];\n");
1275 " mcu_instr.out0 = NULL;\n"
1276 " mcu_instr.out1 = NULL;\n");
1280 " mcu_instr.nbin = ",
i2a(n_ins),
";\n");
1283 " mcu_instr.in0 = tile_in[0];\n"
1284 " mcu_instr.in1 = tile_in[1];\n");
1287 " mcu_instr.in0 = NULL;\n"
1288 " mcu_instr.in1 = NULL;\n");
1292 " // actual instructions\n"
1293 " mcu_instr.nbinstr = ",
i2a(n_ops),
";\n"
1294 " mcu_instr.instr0 = mcu_macro[0];\n"
1295 " mcu_instr.instr1 = mcu_macro[1];\n");
1301 " // call terapix runtime\n"
1302 " param.size = -1; // not used\n"
1303 " param.raw = (void*) &mcu_instr;\n"
1304 " ret |= freia_cg_template_process(¶m");
1305 for (
int i=0; i<n_outs; i++)
1307 for (
int i=0; i<n_ins; i++)
1362 string fname_fulldag,
1365 set global_remainings,
1380 string fname_dag =
strdup(
cat(fname_fulldag,
"_",
i2a(n_split)));
1400 ls, fname_dag,
lparams, helpers, stnb);
1406 free(fname_dag), fname_dag = NULL;
1446 string why =
"none";
1461 result = 1, why =
"impl";
1463 result = -1, why =
"impl";
1466 result = -1, why =
"scal";
1468 result = 1, why =
"scal";
1471 result = -1, why =
"mes";
1473 result = 1, why =
"mes";
1476 result = 1, why =
"copy";
1478 result = -1, why =
"copy";
1482 result = 1, why =
"gen";
1485 result = -1, why =
"gen";
1489 result = 1, why =
"inplace";
1491 result = -1, why =
"inplace";
1527 result = e1-e2, why =
"erosion";
1541 int nms1 = 0, nms2 = 0;
1549 if (l1!=l2 && (l1==0 || l2==0))
1551 result = l2-l1, why =
"args";
1552 else if (nms1!=nms2 && l1==1 && l2==1)
1559 result = nms1 - nms2, why =
"succs";
1562 result = l2-l1, why =
"args2";
1574 result<0? ">
": (result==0? "=
": "<
"),
1575 dagvtx_number(*v2), dagvtx_operation(*v2), why);
1577 pips_assert("total order
", v1==v2 || result!=0);
1581 /* @brief whether vertex is not implemented in terapix
1583 static bool not_implemented(dagvtx v)
1585 if (freia_convolution_p(v)) // special case
1587 // skip if parametric
1589 return !freia_convolution_width_height(v, &w, &h, false);
1591 return !freia_aipo_terapix_implemented(dagvtx_freia_api(v));
1594 /* @brief whether dag is not implemented in terapix
1596 static bool terapix_not_implemented(dag d)
1598 FOREACH(dagvtx, v, dag_vertices(d))
1599 if (not_implemented(v))
1604 /* @brief choose a vertex, avoiding non combinable stuff if the list is started
1606 static dagvtx choose_terapix_vertex(const list lv, bool started)
1608 pips_assert("list contains vertices
", lv);
1611 FOREACH(dagvtx, v, lv)
1612 if (!not_implemented(v))
1615 // just return the first vertex
1616 return DAGVTX(CAR(lv));
1619 /*********************************************************** TERAPIX DAG CUT */
1621 /* would it seem interesting to split d?
1622 * @return the erosion up to which to split, or 0 of no split
1623 * should we also/instead consider the expected cost?
1625 static int cut_decision(dag d, hash_table erosion)
1627 int com_cost_per_row = get_int_property(trpx_dmabw_prop);
1628 int width, cost, nops, n, s, w, e;
1629 (void)dag_terapix_measures(d, erosion, &width, &cost, &nops, &n, &s, &w, &e);
1632 if (width==0) return 0;
1634 int nins = gen_length(dag_inputs(d)), nouts = gen_length(dag_outputs(d));
1636 // if we assume that the imagelet size is quite large, say around 128
1637 // even with double buffers. The only reason to cut is because
1638 // of the erosion on the side which reduces the amount of valid data,
1639 // but there is really a point to do that only communications are still
1640 // masked by computations after splitting the dag...
1642 // first we compute a possible number of splits
1643 // computation cost = communication cost (in cycle per imagelet row)
1644 // communication cost = (nins + 2*width*n_splits + nouts) * cost_per_row
1645 // the width is taken as the expected number of images to extract and
1646 // reinject (hence 2*) if the dag is split.
1647 // this is really an approximation... indeed, nothing ensures that
1648 // the initial input is not still alive at the chosen cut?
1650 // for anr999 the gradient of depth 10 is just enough to cover the coms.
1651 // for lp, about 1(.2) split is suggested.
1653 // compute number of cuts, that is the number of amortizable load/store
1654 // ??? maybe I should incorporate a margin?
1657 // please note that these formula are somehow approximated and the results
1658 // may be proved wrong.
1659 if (trpx_overlap_io_p())
1661 // number of image to communicate is MAX(#in,#out)
1662 int nimgs = nins>nouts? nins: nouts;
1663 // the overhead of a cut is one transfer
1664 n_cuts = ((1.0*cost/com_cost_per_row)-nimgs)/(1.0*width);
1667 n_cuts = ((1.0*cost/com_cost_per_row)-nins-nouts)/(2.0*width);
1669 pips_debug(2, "cost=%d com_cost=%d nins=%d width=%d nouts=%d n_cuts=%
f\n
",
1670 cost, com_cost_per_row, nins, width, nouts, n_cuts);
1672 if (n_cuts < 1.0) return 0;
1674 // we also have to check that there is a significant erosion!
1675 // I first summarize the erosion to the max(n,s,e,w)
1676 // grrr... C really lacks a stupid max/min function varyadic!
1677 // I could compute per direction, if necessary...
1679 if (s>erode) erode=s;
1680 if (e>erode) erode=e;
1681 if (w>erode) erode=w;
1683 // then we should decide...
1684 // there should be enough computations to amortize a split,
1685 // given that an erode/dilate costs about 15 cycles per row
1686 // there should be about 2 of them to amortize/hide one imagelet transfer,
1687 // whether as input or output.
1689 int cut = erode/((int)(n_cuts+1));
1691 // try to fix the balance chosen by the integer division
1692 // hmmm... should really look at the weights to choose a side here...
1693 if (erode%2==1 && n_cuts<2.0 && nouts<=nins)
1699 /* cut dag "d
", possibly a subdag of "fulld
", at "erosion" "cut
"
1701 static dag cut_perform(dag d, int cut, hash_table erodes, dag fulld,
1702 const set output_images)
1704 pips_debug(2, "cutting with cut=%d\n
", cut);
1705 pips_assert("something cut width
", cut>0);
1708 // current set of vertices to group
1709 current = set_make(set_pointer),
1710 // all vertices which are considered computed
1711 done = set_make(set_pointer);
1713 list lcurrent = NIL, computables;
1714 set_assign_list(done, dag_inputs(d));
1717 pips_assert("erosion is clean
", erosion==NULL);
1718 erosion = hash_table_make(hash_pointer, 0);
1719 dag_terapix_erosion(d, erosion);
1721 // transitive closure
1722 bool changed = true;
1724 (computables = dag_computable_vertices(d, done, done, current)))
1726 // ensure determinism
1727 gen_sort_list(computables, (gen_cmp_func_t) dagvtx_terapix_priority);
1729 FOREACH(dagvtx, v, computables)
1731 // keep erosion up to cut
1732 // hmmm. what about \sigma_{d \in NSEW} erosion_d ?
1733 // would not work because the erosion only make sense if it is
1734 // the same for all imagelet, or said otherwise the erosion is
1735 // aligned to the worst case so that tiling can reasonnably take place.
1736 if ((((_int) hash_get(erodes, NORTH(v))) <= cut) &&
1737 (((_int) hash_get(erodes, SOUTH(v))) <= cut) &&
1738 (((_int) hash_get(erodes, EAST(v))) <= cut) &&
1739 (((_int) hash_get(erodes, WEST(v))) <= cut))
1741 set_add_element(current, current, v);
1742 set_add_element(done, done, v);
1743 lcurrent = CONS(dagvtx, v, lcurrent);
1749 gen_free_list(computables), computables = NIL;
1753 hash_table_free(erosion), erosion = NULL;
1755 lcurrent = gen_nreverse(lcurrent);
1756 pips_assert("some vertices where extracted
", lcurrent!=NIL);
1758 // build extracted dag
1759 dag nd = make_dag(NIL, NIL, NIL);
1760 FOREACH(dagvtx, v, lcurrent)
1762 // pips_debug(7, "extracting
node %
" _intFMT "\n
", dagvtx_number(v));
1763 dag_append_vertex(nd, copy_dagvtx_norec(v));
1765 dag_compute_outputs(nd, NULL, output_images, NIL, false);
1766 dag_cleanup_other_statements(nd);
1769 FOREACH(dagvtx, v, lcurrent)
1770 dag_remove_vertex(d, v);
1772 // ??? should not be needed?
1773 freia_hack_fix_global_ins_outs(fulld, nd);
1774 freia_hack_fix_global_ins_outs(fulld, d);
1778 dag_consistency_asserts(nd);
1779 dag_consistency_asserts(d);
1783 gen_free_list(lcurrent), lcurrent = NIL;
1789 /*************************************************** TERAPIX HANDLE SEQUENCE */
1791 static void migrate_statements(sequence sq, dag d, set dones)
1793 set stats = set_make(set_pointer);
1794 dag_statements(stats, d);
1795 freia_migrate_statements(sq, stats, dones);
1796 set_union(dones, dones, stats);
1800 /* do compile a list of statements for terapix
1801 * @param module, current module (function) name
1802 * @param ls, list of statements taken from the sequence
1803 * @param occs, occurences of images (image -> set of statements)
1804 * @param helper_file, file to which code is to be generated
1805 * @param number, number of this statement sequence in module
1806 * @return list of intermediate image to allocate
1808 list freia_trpx_compile_calls
1812 list /* of statements */ ls,
1813 const hash_table occs,
1814 hash_table exchanges,
1815 const set output_images,
1821 get_bool_property("HWAC_TERAPIX_REDUCE_TO_CONNECTED_COMPONENTS
");
1824 pips_debug(3, "considering %d statements\n
", (int) gen_length(ls));
1825 pips_assert("some statements
", ls);
1827 int n_op_init, n_op_init_copies;
1828 freia_aipo_count(fulld, &n_op_init, &n_op_init_copies);
1830 // must have distinct images in the graph for optimizations
1831 hash_table init = hash_table_make(hash_pointer, 0);
1832 list new_images = dag_fix_image_reuse(fulld, init, occs);
1834 list added_before = NIL, added_after = NIL;
1835 freia_dag_optimize(fulld, exchanges, &added_before, &added_after);
1837 int n_op_opt, n_op_opt_copies;
1838 freia_aipo_count(fulld, &n_op_opt, &n_op_opt_copies);
1840 fprintf(helper_file,
1843 "optimized to %d ops and %d+%d+%d copies\n",
1844 number, n_op_init, n_op_init_copies,
1845 n_op_opt, n_op_opt_copies,
1850 added_before, added_after);
1909 -1, global_remainings, helper_file, helpers, stnb,
init);
1940 n_cut++, global_remainings, helper_file, helpers, stnb,
init);
1953 n_cut++, global_remainings, helper_file, helpers, stnb,
init);
1972 set_free(global_remainings), global_remainings = NULL;
1973 free(fname_fulldag), fname_fulldag = NULL;
1979 list real_new_images =
1983 return real_new_images;
static void node(FILE *out, string name)
Build for module name a node and link to its successors.
int get_int_property(const string)
void free_dagvtx(dagvtx p)
struct paramStruct params
static reference ref
Current stmt (an integer)
void const char const char const int
bdt base
Current expression.
void dag_consistency_asserts(dag d)
do some consistency checking...
list dag_split_connected_components(dag d, set output_images)
build connected components
_int dagvtx_optype(const dagvtx v)
list dag_vertex_preds(const dag d, const dagvtx target)
return target predecessor vertices as a list.
_int dagvtx_number(const dagvtx v)
returns the vertex number, i.e.
bool dagvtx_other_stuff_p(const dagvtx v)
a vertex with a non AIPO or image related statement.
bool dag_no_image_operation(dag d)
tell whether we have something to do with images ??? hmmm...
list dag_split_on_scalars(const dag initial, bool(*alone_only)(const dagvtx), dagvtx(*choose_vertex)(const list, bool), gen_cmp_func_t priority, void(*priority_update)(const dag), const set output_images)
split a dag on scalar dependencies only, with a greedy heuristics.
void dagvtx_dump(FILE *out, const string name, const dagvtx v)
for dag debug.
list dag_computable_vertices(dag d, const set computed, const set maybe, const set currents)
return the vertices which may be computed from the list of available images, excluding vertices in ex...
dagvtx dagvtx_get_producer(const dag d, const dagvtx sink, const entity e, _int before_number)
return (last) producer of image e for vertex sink, or NULL if none found.
void dag_remove_vertex(dag d, const dagvtx v)
remove vertex v from dag d.
void dag_dump(FILE *out, const string what, const dag d)
for dag debug
void dag_dot_dump(const string module, const string name, const dag d, const list lb, const list la)
generate a "dot" format from a dag to a file.
void set_append_vertex_statements(set s, list lv)
string dagvtx_operation(const dagvtx v)
_int dagvtx_opid(const dagvtx v)
void dag_dot_dump_prefix(const string module, const string prefix, int number, const dag d, const list lb, const list la)
char * get_string_property(const char *)
const freia_api_t * hwac_freia_api(const char *function)
freia-utils.c
void freia_add_image_arguments(list limg, list *lparams)
prepend limg images in front of the argument list limg is consummed by the operation.
list freia_get_vertex_params(const dagvtx v)
list freia_extract_params(const int napi, list args, string_buffer head, string_buffer head2, hash_table params, int *nparams)
returns an allocated expression list of the parameters only (i.e.
void hwac_kill_statement(statement s)
remove contents of statement s.
list freia_allocate_new_images_if_needed(list ls, list images, const hash_table occs, const hash_table init, const hash_table signatures)
insert image allocation if needed, for intermediate image inserted before if an image is used only tw...
int freia_substitute_by_helper_call(dag d, set global_remainings, set remainings, list ls, const string function_name, list lparams, set helpers, int preceeding)
substitute those statement in ls that are in dag d and accelerated by a call to function_name(lparams...
bool freia_convolution_p(dagvtx v)
is it the convolution special case?
const freia_api_t * get_freia_api(int index)
bool freia_extract_kernel_vtx(dagvtx v, bool strict, intptr_t *k00, intptr_t *k10, intptr_t *k20, intptr_t *k01, intptr_t *k11, intptr_t *k21, intptr_t *k02, intptr_t *k12, intptr_t *k22)
vertex-based version
call freia_statement_to_call(const statement s)
return the actual function call from a statement, dealing with assign and returns....
bool freia_convolution_width_height(dagvtx v, _int *pw, _int *ph, bool check)
get width & height of convolution
void freia_insert_added_stats(list ls, list stats, bool before)
insert statements to actual code sequence in "ls" BEWARE that ls is assumed to be in reverse order....
bool freia_aipo_terapix_implemented(const freia_api_t *api)
whether api available with Ter@pix
#define FREIA_DEFAULT_HEIGHT
#define dagvtx_freia_api(v)
#define pstatement_statement_p(x)
#define dagvtx_content(x)
#define vtxcontent_optype(x)
#define vtxcontent_out(x)
#define pstatement_statement(x)
#define vtxcontent_inputs(x)
#define vtxcontent_source(x)
static void terapix_mcu_img(string_buffer code, int op, string ref, int n)
set a double buffered image argument.
static hash_table erosion
global variable used by the dagvtx_terapix_priority function, because qsort does not allow to pass so...
static void dag_terapix_reset_erosion(const dag d)
static void terapix_gram_allocate(bool *used, int width, int height, int *x, int *y)
terapix allocate widthxheight in global memory
static int cut_decision(dag d, hash_table erosion)
would it seem interesting to split d?
static void terapix_init_row(string_buffer decl, string_buffer code, string base, string suff, string mem, int nrow, string val, bool *used)
initialize a few rows at mem address with value val
static void dag_terapix_erosion(const dag d, hash_table erosion)
fill in erosion hash table from dag d.
static int dag_terapix_measures(const dag d, hash_table erosion, int *width, int *cost, int *nops, int *north, int *south, int *west, int *east)
compute some measures about DAG d.
static bool not_implemented(dagvtx v)
whether vertex is not implemented in terapix
static int freia_trpx_compile_one_dag(string module, list ls, dag d, string fname_fulldag, int n_split, int n_cut, set global_remainings, FILE *helper_file, set helpers, int stnb, hash_table signatures)
generate terapix code for this one dag, which should be already split.
static void erosion_optimization(dagvtx v, bool *north, bool *south, bool *west, bool *east)
tell whether the kernel is used on each of the 4 directions.
static void terapix_get_reduction(string_buffer decl, string_buffer tail, int n_op, string mem, const freia_api_t *api)
generate reduction extraction code
static void terapix_macro_code(string_buffer code, string_buffer decl, int op, const freia_api_t *api, bool *used, hash_table hparams, const dagvtx v, const list ins, int out)
generate terapix code for
static void gram_param(string_buffer code, string_buffer decl, string name, dagvtx v, hash_table hparams, int width, int height, bool is_kernel, bool *used)
copy some operator parameters in the global ram (aka gram).
static _int freia_terapix_call(const string module, const string fname_dag, string_buffer code, dag thedag, list *params)
generate a terapix call for dag thedag.
static void terapix_initialize_memory(string_buffer decl, string_buffer body, int nop, string mem, const freia_api_t *api, bool *used)
initialize the memory at addr depending on the operation to perform
static dag cut_perform(dag d, int cut, hash_table erodes, dag fulld, const set output_images)
cut dag "d", possibly a subdag of "fulld", at "erosion" "cut"
static void terapix_image(string_buffer sb, int ff, int n)
generate an image symbolic pointer (a name:-).
static bool * terapix_gram_init(void)
allocate bitfield to described used cells in global memory.
static bool terapix_not_implemented(dag d)
whether dag is not implemented in terapix
static list dag_vertex_pred_imagelets(const dag d, const dagvtx v, const hash_table allocation)
of ints
static void terapix_gram_management(string_buffer code, string_buffer decl, int op, const freia_api_t *api, const dagvtx v, hash_table hparams, bool *used)
manage GRAM global memory to pass parameters.
static void terapix_mcu_pval(string_buffer code, int op, string ref, string p, string s)
set some prefixed value string argument.
static int dagvtx_terapix_priority(const dagvtx *v1, const dagvtx *v2)
comparison function for sorting dagvtx in qsort, this is deep voodoo, because the priority has an imp...
static void compute_dead_vertices(set deads, const set computed, const dag d, const dagvtx v)
static void update_erosions(const dag d, const dagvtx v, hash_table erosion)
update_erosions().
static dagvtx choose_terapix_vertex(const list lv, bool started)
choose a vertex, avoiding non combinable stuff if the list is started
static _int select_imagelet(set availables, int *nimgs, bool first)
Return the first/last available imagelet, or create one if necessary This ensures that the choice is ...
static void terapix_mcu_val(string_buffer code, int op, string r, string s)
set some value string argument.
static void migrate_statements(sequence sq, dag d, set dones)
static void terapix_mcu_int(string_buffer code, int op, string ref, int val)
set an integer argument.
#define trpx_dag_cut_none_p(s)
#define trpx_overlap_io_p()
#define trpx_dag_cut_is_valid(s)
#define trpx_dag_cut_compute_p(s)
#define trpx_dag_cut_enumerate_p(s)
list gen_nreverse(list cp)
reverse a list in place
#define NIL
The empty list (nil in Lisp)
list gen_copy_seq(list l)
Copy a list structure.
size_t gen_length(const list l)
#define CONS(_t_, _i_, _l_)
List element cell constructor (insert an element at the beginning of a list)
list gen_nconc(list cp1, list cp2)
physically concatenates CP1 and CP2 but do not duplicates the elements
#define CAR(pcons)
Get the value of the first element of a list.
void gen_free_list(list l)
free the spine of the list
bool gen_in_list_p(const void *vo, const list lx)
tell whether vo belongs to lx
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
#define CDR(pcons)
Get the list less its first element.
hash_table hash_table_make(hash_key_type key_type, size_t size)
void * hash_get(const hash_table htp, const void *key)
this function retrieves in the hash table pointed to by htp the couple whose key is equal to key.
void hash_put(hash_table htp, const void *key, const void *val)
This functions stores a couple (key,val) in the hash table pointed to by htp.
void hash_table_free(hash_table htp)
this function deletes a hash table that is no longer useful.
bool hash_defined_p(const hash_table htp, const void *key)
true if key has e value in htp.
void hash_table_clear(hash_table htp)
Clears all entries of a hash table HTP.
float_t space[SIZE][SIZE]
int allocation
External variables for direct call to PIP.
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
#define pips_assert(what, predicate)
common macros, two flavors depending on NDEBUG
#define pips_internal_error
char * i2a(int)
I2A (Integer TO Ascii) yields a string for a given Integer.
string bool_to_string(bool)
#define same_string_p(s1, s2)
bool set_empty_p(const set)
tell whether set s is empty.
set set_assign_list(set, const list)
assigns a list contents to a set all duplicated elements are lost
set set_del_element(set, const set, const void *)
bool list_in_set_p(const list, const set)
#define SET_FOREACH(type_name, the_item, the_set)
enumerate set elements in their internal order.
set set_clear(set)
Assign the empty set to s s := {}.
bool set_belong_p(const set, const void *)
void set_fprint(FILE *, string, const set, gen_string_func_t)
print set s to file stream out.
set set_append_list(set, const list)
add list l items to set s, which is returned.
set set_make(set_type)
Create an empty set of any type but hash_private.
set set_add_element(set, const set, const void *)
void string_buffer_append_sb(string_buffer, const string_buffer)
append the string buffer sb2 to string buffer sb.
size_t string_buffer_size(const string_buffer)
return the size of the string in string_buffer sb
void string_buffer_to_file(const string_buffer, FILE *)
put string buffer into file.
void string_buffer_free(string_buffer *)
free string buffer structure, also free string contents according to the dup field
string_buffer string_buffer_make(bool dup)
allocate a new string buffer
int(* gen_cmp_func_t)(const void *, const void *)
int f(int off1, int off2, int n, float r[n], float a[n], float b[n])
list lparams
Array bounds.
const char * entity_user_name(entity e)
Since entity_local_name may contain PIPS special characters such as prefixes (label,...
entity local_name_to_top_level_entity(const char *n)
This function try to find a top-level entity from a local name.
static int init
Maximal value set for Fortran 77.
#define EXPRESSION(x)
EXPRESSION.
#define call_arguments(x)
internally defined structure.
FI: I do not understand why the type is duplicated at the set level.
The structure used to build lists in NewGen.
FREIA API function name -> SPoC hardware description (and others?)
unsigned int arg_misc_out