26 #include "pips_config.h"
53 dagvtx v,
int stage,
int side,
bool flip)
162 if (alu!=flipped_alu)
172 sb_cat(body,
" si.alu[", s_stage,
"][0].op = ", aluop->
setting,
";\n");
178 sb_cat(body,
" sp.alu[", s_stage,
"][0].constant = ", s_var,
";\n");
210 sb_cat(body,
" si.poc[", s_stag,
"][", s_side,
"].op = ");
212 sb_cat(body,
"SPOC_POC_ERODE;\n");
214 sb_cat(body,
"SPOC_POC_DILATE;\n");
216 sb_cat(body,
"SPOC_POC_CONV;\n");
221 sb_cat(body,
" si.poc[", s_stag,
"][", s_side,
"].grid = ");
223 sb_cat(body,
"SPOC_POC_6_CONNEX;\n");
225 sb_cat(body,
"SPOC_POC_8_CONNEX;\n");
231 " for(i=0 ; i<9 ; i++)\n"
232 " sp.poc[", s_stag,
"][", s_side,
"].kernel[i] = ",
269 " si.th[", s_stag,
"][", s_side,
"].op = ",
270 s_bin,
"? SPOC_TH_BINARIZE : SPOC_TH_NO_BINARIZE;\n",
271 " sp.th[", s_stag,
"][", s_side,
"].boundmin = ", s_inf,
";\n",
272 " sp.th[", s_stag,
"][", s_side,
"].boundmax = ", s_sup,
";\n");
311 reduc =
strdup(
cat(
"reduc.measure[", s_stage,
"][", s_side,
"]."));
319 v_2 = NULL, v_3 = NULL;
331 sb_cat(tail,
" *", v_1,
" = (int32_t) ", reduc,
333 "minimum":
"maximum",
";\n");
336 sb_cat(tail,
" *", v_2,
" = (uint32_t) ", reduc,
338 "min":
"max",
"_coord_x;\n",
339 " *", v_3,
" = (uint32_t) ", reduc,
341 "min":
"max",
"_coord_y;\n");
345 sb_cat(tail,
" *", v_1,
" = (int32_t) ", reduc,
"volume;\n");
481 op->
used?
"":
"not ",
487 _int index = 2 * (stage*4+mux) + 1;
493 _int index = 2 * (stage*4+mux) + 1;
494 hash_put(wiring, (
void *) index, (
void *) index);
508 return 2 * (2 * (stage*4+
level) + side) + 2;
514 hash_put(wiring, (
void *) index, (
void *) index);
547 int start_stage,
int level,
int side,
550 int * pstage,
int * pside)
552 int stage = start_stage;
553 int preferred, other;
554 if (side>=0) preferred = side, other = 1 - side;
556 else preferred = 0, other = 1;
558 preferred=0, other=0;
560 if (target_level<=
level)
573 (stage==start_stage+1 && target_level<spoc_type_alu && level>
spoc_type_alu);
580 skip_first_other =
false;
589 pips_assert(
"symmetric component must be available",
597 *pside = pok? preferred: other;
620 sb_cat(
code,
" si.mux[", s_stage,
"][", s_mux,
"].op = "
621 "SPOC_MUX_IN",
value?
"1":
"0",
";\n");
646 out->used_stage = -1;
668 measured==in0->
image || measured==in1->
image);
683 if (measured==in0->
image)
724 in0->
used =
false, in1->
used =
false;
739 used = in1, notused = in0;
741 used = in0, notused = in1;
744 used = in0, notused = in1;
746 used = in1, notused = in0;
750 used = in1, notused = in0;
752 used = in0, notused = in1;
756 used = dep==in0->
image? in0: in1;
757 notused = dep==in0->
image? in1: in0;
770 bool used_image_still_needed =
798 if (used_image_still_needed)
804 out->side = 1 -
out->side;
825 pips_debug(8,
"copied image %s is not needed further\n",
845 notused->
used =
false;
854 if (!in1->
image) *in1 = *in0;
855 if (!in0->
image) *in0 = *in1;
880 else if (
out->side==-1)
911 in_side>=-1 && in_side<=1 && out_side>=0 && out_side<=1);
912 pips_debug(7,
"stage %d in %d -> out %d\n", stage, in_side, out_side);
1063 prefered = (
out->side>=0)?
out->side: 0;
1066 prefered = (
out->side==-1)? in->
side:
out->side;
1074 in->
side = prefered;
1083 in->
side = 1 - prefered;
1095 pips_debug(7,
"restoring previous schedule...\n");
1129 string function_name,
1136 int n_im_out,
int n_im_in,
1138 bool some_reductions,
bool some_kernels,
1140 const string out0,
const string out1,
const string in0,
const string in1)
1145 "// FREIA-SPoC helper function for module ",
module,
"\n"
1146 "freia_status ", function_name,
"(\n");
1150 if (n_im_out!=0 && n_im_in>0)
sb_cat(
code,
",\n");
1159 if (some_reductions)
1161 " spoc_reduction reduc;\n"
1162 " freia_reduction_results redres;\n");
1167 " // init pipe to nop\n"
1175 if (some_reductions)
1178 " // actual call of spoc hardware\n"
1179 " freia_cg_template_process_2i_2o",
1180 "(¶m, ", out0,
", ", out1,
", ", in0,
", ", in1,
");\n",
1183 if (some_reductions)
1186 " // get reductions\n"
1187 " freia_cg_read_reduction_results(&redres);\n");
1242 const set output_images)
1246 bool some_reductions =
false, some_kernels =
false;
1249 pips_debug(3,
"running on '%s' for %d operations\n",
1268 pips_assert(
"0, 1, 2 input images", n_im_in>=0 && n_im_in<=2);
1271 string p_in0 =
"NULL", p_in1 =
"NULL", p_out0 =
"NULL", p_out1 =
"NULL";
1273 entity a_in0 = NULL, a_in1 = NULL;
1279 int live_images = 0;
1301 else if (n_im_in>=2)
1338 bool toskip =
false;
1383 if (
out.stage >= pipeline_depth)
1409 if (
out.stage!=stage)
1412 sb_cat(body,
"\n // STAGE ",
i2a(stage),
"\n");
1425 &(api->
spoc), v, hparams);
1438 else if (!in0_needed && in0.
used)
1440 else if (!in1_needed && in1.
used)
1443 else if (!in0_needed ||
out.image==in0.
image)
1450 else if (!in1_needed ||
out.image==in1.
image)
1467 else if (in0.
side ==
out.side)
1469 else if (in1.
side ==
out.side)
1471 else if (
out.side==-1)
1556 set_free(computed), computed = NULL;
1560 pips_assert(
"0, 1, 2 output images", n_im_out>=0 && n_im_out<=2);
1561 pips_assert(
"some input or output images", n_im_out || n_im_in);
1564 sb_cat(body,
"\n // no output image\n");
1566 else if (n_im_out==1)
1576 out = in0, in0 = in1, in1 =
out;
1579 else if (in1.
image==imout && in0.
image!=imout)
1580 out = in0, in0 = in1, in1 =
out;
1581 else if (in0.image==imout)
1587 out.producer = NULL;
1589 out.stage = in0.stage;
1590 out.side = in0.side>=0? in0.side: 0;
1598 " on ",
i2a(
out.side),
"\n");
1603 out.stage = pipeline_depth-1;
1604 sb_cat(body,
"\n // fill in to the end...\n");
1614 else if (n_im_out==2)
1619 int out0_side, out1_side;
1621 pips_assert(
"output two results in two variables", out0!=out1);
1624 if (out0 != in0.
image)
1625 out = in0, in0 = in1, in1 =
out;
1626 pips_assert(
"results are available", out0==in0.image && out1==in1.image);
1628 pips_debug(7,
"out0 %s out1 %s in0(%d.%d.%d) %s in1(%d.%d.%d) %s\n",
1634 bool in0_advanced = in0.stage>in1.stage ||
1635 (in0.stage==in1.stage && in0.level>in1.level);
1639 out0_side = in0.side;
1640 if (out0_side==-1) out0_side = 0;
1641 out1_side = 1 - out0_side;
1645 out1_side = in1.side;
1646 if (out1_side==-1) out1_side = 1;
1647 out0_side = 1 - out1_side;
1651 (out0_side==0 && out1_side==1) || (out0_side==1 && out1_side==0));
1655 p_out0 =
"o0", p_out1 =
"o1";
1657 p_out0 =
"o1", p_out1 =
"o0";
1659 int out_stage = (in0.stage>in1.stage)? in0.stage: in1.stage;
1663 " on ",
i2a(out0_side));
1665 " on ",
i2a(out1_side),
"\n");
1669 out.producer = NULL;
1671 out.side = out0_side;
1672 out.stage = out_stage;
1677 out.stage = pipeline_depth-1;
1678 sb_cat(body,
"\n // fill in to the end...\n");
1684 out.producer = NULL;
1686 out.side = out1_side;
1687 out.stage = out_stage;
1692 out.stage = pipeline_depth-1;
1693 sb_cat(body,
"\n // fill in to the end...\n");
1704 n_im_out, n_im_in, some_reductions, some_kernels,
1705 p_out0, p_out1, p_in0, p_in1);
1732 static int poc_count_same_inputs(
const dagvtx ref)
1777 string why =
"none";
1789 result = 1, why =
"not";
1791 result = -1, why =
"not";
1794 result = -1, why =
"scal";
1796 result = 1, why =
"scal";
1799 result = -1, why =
"mes";
1801 result = 1, why =
"mes";
1804 result = 1, why =
"copy";
1806 result = -1, why =
"copy";
1809 result = 1, why =
"shared";
1811 result = -1, why =
"shared";
1822 int nms1 = 0, nms2 = 0;
1832 if (l1!=l2 && (l1==0 || l2==0))
1834 result = l2-l1, why =
"args";
1835 else if (nms1!=nms2 && l1==1 && l2==1)
1842 result = nms1 - nms2, why =
"succs";
1845 result = l2-l1, why =
"args2";
1872 result<0? ">
": (result==0? "=
": "<
"),
1873 dagvtx_number(*v2), dagvtx_operation(*v2), why);
1875 pips_assert("total order
", v1==v2 || result!=0);
1879 /************************************************************ IMPLEMENTATION */
1881 /* update sure/maybe set of live images after computing vertex v
1882 * that is the images that may be output from the pipeline.
1885 static void live_update(dag d, dagvtx v, set sure, set maybe)
1887 vtxcontent c = dagvtx_content(v);
1889 // skip "other
" statements
1890 if (dagvtx_other_stuff_p(v))
1893 list ins = vtxcontent_inputs(c);
1894 int nins = gen_length(ins);
1895 entity out = vtxcontent_out(c);
1896 int nout = out!=entity_undefined? 1: 0;
1897 list preds = dag_vertex_preds(d, v);
1899 set all = set_make(set_pointer);
1900 set_union(all, sure, maybe);
1901 pips_assert("inputs are available
", list_in_set_p(preds, all));
1902 set_free(all), all = NULL;
1907 // no input image is used
1908 // maybe is unchanged
1909 // I guess it is not "NOP
"...
1910 pips_assert("one output...
", nout==1);
1911 set_union(maybe, maybe, sure);
1917 if (set_size(sure)==1 && !list_in_set_p(preds, sure))
1919 set_assign_list(maybe, preds);
1920 set_union(maybe, maybe, sure);
1924 set_append_list(maybe, preds);
1929 // any of the inputs may be kept
1930 set_assign_list(maybe, preds);
1933 pips_internal_error("unpexted number of inputs to
vertex: %d
", nins);
1939 set_add_element(sure, sure, v);
1941 // else sure is kept
1942 set_difference(maybe, maybe, sure);
1945 gen_free_list(preds), preds = NIL;
1948 /* returns an allocated set of vertices with live outputs.
1950 static set output_arcs(dag d, set vs)
1952 set out_nodes = set_make(set_pointer);
1953 // direct output nodes
1954 SET_FOREACH(dagvtx, v, vs)
1956 if (vtxcontent_out(dagvtx_content(v))!=entity_undefined)
1958 // this vertex produces an image
1959 bool is_needed = false;
1961 // it is needed if...
1962 list succs = dagvtx_succs(v);
1963 if (gen_in_list_p(v, dag_outputs(d)))
1967 // some succs are not yet computed...
1968 FOREACH(dagvtx, vsucc, succs)
1969 if (!set_belong_p(vs, vsucc))
1972 // else there is no successor!?
1975 set_add_element(out_nodes, out_nodes, v);
1981 /* how many output arcs from this set of vertices?
1983 static int number_of_output_arcs(dag d, set vs)
1985 set out_nodes = output_arcs(d, vs);
1986 int n_arcs = set_size(out_nodes);
1987 set_free(out_nodes);
1991 /* @return whether it is a 3x3 convolution
1993 static bool convolution_33(dagvtx v)
1995 if (freia_convolution_p(v)) {
1997 if (freia_convolution_width_height(v, &w, &h, false) && w==3 && h==3)
2003 /* does this dag contains a spoc non implemented operation?
2005 static bool dag_spoc_not_implemented(dag d)
2007 FOREACH(dagvtx, v, dag_vertices(d))
2008 if (dagvtx_optype(v)==spoc_type_sni ||
2009 // special handling of convolution
2010 (freia_convolution_p(v) && !convolution_33(v)))
2015 /* return first vertex in the list which is compatible, or NULL if none.
2017 static dagvtx first_which_may_be_added
2019 set current, // of dagvtx
2020 list lv, // of candidate dagvtx
2021 set sure, // of dagvtx
2022 set maybe) // image entities
2024 dagvtx chosen = NULL;
2025 set inputs = set_make(set_pointer);
2026 set current_output = output_arcs(dall, current);
2027 int n_outputs = set_size(current_output);
2028 pips_assert("should be okay at first!
", n_outputs<=2);
2030 pips_debug(8, "#outputs = %d\n
", n_outputs);
2032 // output arcs from this subset
2033 // set outputs = output_arcs(current);
2035 FOREACH(dagvtx, v, lv)
2037 pips_debug(8, "considering
vertex %
"_intFMT"\n
", dagvtx_number(v));
2038 pips_assert("not yet there
", !set_belong_p(current, v));
2040 // some intermediate stuff
2041 if (dagvtx_optype(v)==spoc_type_oth)
2047 // no image is produce, so no image output is added...
2048 if (!dagvtx_image(v))
2050 pips_assert("is a mesure
", dagvtx_optype(v)==spoc_type_mes);
2055 list preds = dag_vertex_preds(dall, v);
2056 set_assign_list(inputs, preds);
2057 pips_debug(8, "#inputs = %d\n
", set_size(inputs));
2058 int npreds = gen_length(preds);
2059 gen_free_list(preds), preds = NIL;
2061 //pips_debug(7, "vertex %
"_intFMT": %d preds\n
", dagvtx_number(v), npreds);
2063 if (npreds==2) // binary alu operations...
2065 if (!set_inclusion_p(sure, inputs))
2067 // two lives, but the var reuse will break the pipe by hidding
2068 // the other live which is not used by the computation of v.
2069 if (n_outputs==2 && set_size(inputs)==1)
2072 // another more general case from freia_68
2073 set all_lives = set_make(set_pointer);
2074 set_intersection(all_lives, inputs, maybe);
2075 set_union(all_lives, current_output, all_lives);
2076 int n_lives = set_size(all_lives);
2077 set_free(all_lives);
2079 // cannot do, we would go over the limit...
2084 set_add_element(current, current, v);
2085 int narcs_with_v = number_of_output_arcs(dall, current);
2086 set_del_element(current, current, v);
2088 if (narcs_with_v <= 2)
2099 /* split dag dall into a list of pipelinable dags
2100 * which must be processed in that order (?)
2101 * side effect: dall is more or less consummed...
2103 static list /* of dags */ split_dag(dag initial, const set output_images)
2105 // this may happen if an input image is also an output image...
2106 // pips_assert("no
image reuse
", single_image_assignement_p(initial));
2107 if (!single_image_assignement_p(initial))
2108 pips_user_warning("still some
image reuse...\n
");
2110 // ifdebug(1) pips_assert("initial
dag ok", dag_consistent_p(initial));
2111 // if everything was removed by optimizations, there is nothing to do.
2112 if (dag_computation_count(initial)==0) return NIL;
2114 dag dall = copy_dag(initial);
2115 int nvertices = gen_length(dag_vertices(dall));
2116 list ld = NIL, lcurrent = NIL;
2118 current = set_make(set_pointer),
2119 computed = set_make(set_pointer),
2120 maybe = set_make(set_pointer),
2121 sure = set_make(set_pointer),
2122 avails = set_make(set_pointer);
2124 // well, there are not all available always!
2125 set_assign_list(maybe, dag_inputs(dall));
2126 // set_assign(avails, maybe);
2127 set_assign(computed, maybe);
2133 set_union(avails, sure, maybe);
2136 pips_debug(4, "round %d:\n
", count);
2137 set_fprint(stderr, "computed
", computed,
2138 (gen_string_func_t) dagvtx_to_string);
2139 set_fprint(stderr, "current", current,
2140 (gen_string_func_t) dagvtx_to_string);
2141 set_fprint(stderr, "avails
", avails,
2142 (gen_string_func_t) dagvtx_to_string);
2143 set_fprint(stderr, "maybe
", maybe, (gen_string_func_t) dagvtx_to_string);
2144 set_fprint(stderr, "sure
", sure, (gen_string_func_t) dagvtx_to_string);
2147 list computables = dag_computable_vertices(dall, computed, avails, current);
2148 dagvtx_spoc_priority_computables = computables;
2149 dagvtx_spoc_priority_current = lcurrent;
2150 gen_sort_list(computables,
2151 (int(*)(const void*,const void*)) dagvtx_spoc_priority);
2152 dagvtx_spoc_priority_computables = NIL;
2153 dagvtx_spoc_priority_current = NIL;
2155 pips_assert("something must be computable
if current is
empty",
2156 computables || !set_empty_p(current));
2158 pips_debug(4, "%d computable vertices\n
", (int) gen_length(computables));
2159 ifdebug(5) dagvtx_nb_dump(stderr, "computables
", computables);
2161 // take the first one possible in the pipe, if any
2163 first_which_may_be_added(dall, current, computables, sure, maybe);
2165 if (vok && (dagvtx_optype(vok)==spoc_type_sni ||
2166 (freia_convolution_p(vok) && !convolution_33(vok)))
2168 // extract non implemented nodes alone only!
2173 pips_debug(5, "extracting %
" _intFMT "...\n
", dagvtx_number(vok));
2174 set_add_element(current, current, vok);
2175 lcurrent = CONS(dagvtx, vok, lcurrent);
2176 set_add_element(computed, computed, vok);
2177 live_update(dall, vok, sure, maybe);
2178 // set_union(avails, sure, maybe);
2181 // no stuff vertex can be added, or it was the last one,
2182 // or it is not implemented and thus to be extracted "alone
"
2183 if (!vok || set_size(computed)==nvertices ||
2184 (vok && (dagvtx_optype(vok)==spoc_type_sni ||
2185 (freia_convolution_p(vok) && !convolution_33(vok)))))
2188 // set_fprint(stderr, "closing
current", current, )
2189 pips_debug(5, "closing
current...\n
");
2190 pips_assert("current not
empty", !set_empty_p(current));
2192 // gen_sort_list(lcurrent, (gen_cmp_func_t) dagvtx_ordering);
2193 lcurrent = gen_nreverse(lcurrent);
2194 // close current and build a deterministic dag...
2195 dag nd = make_dag(NIL, NIL, NIL);
2196 FOREACH(dagvtx, v, lcurrent)
2198 pips_debug(7, "extracting
node %
" _intFMT "\n
", dagvtx_number(v));
2199 dag_append_vertex(nd, copy_dagvtx_norec(v));
2201 dag_compute_outputs(nd, NULL, output_images, NIL, false);
2202 dag_cleanup_other_statements(nd);
2205 // dag_dump(stderr, "updated dall
", dall);
2206 dag_dump(stderr, "pushed
dag", nd);
2209 // update global list of dags to return.
2210 ld = CONS(dag, nd, ld);
2213 gen_free_list(lcurrent), lcurrent = NIL;
2216 set_assign(maybe, computed);
2219 gen_free_list(computables);
2221 while (set_size(computed)!=nvertices);
2223 // checks and cleanup
2225 pips_assert("all vertices were computed
", set_size(computed)==nvertices);
2233 pips_debug(5, "returning %d dags\n
", (int) gen_length(ld));
2234 return gen_nreverse(ld);
2237 /* generate helpers for statements in ls of module
2238 * output resulting functions in helper, which may be empty in some cases.
2240 * @param ls list of statements for the dag (in reverse order)
2241 * @param helper output file
2242 * @param helpers created functions
2243 * @param number current helper dag count
2244 * @return list of intermediate images to allocate
2246 list freia_spoc_compile_calls
2250 list /* of statements */ ls,
2251 const hash_table occs,
2252 hash_table exchanges,
2253 const set output_images,
2259 pips_debug(3, "considering %d statements\n
", (int) gen_length(ls));
2260 pips_assert("some statements
", ls);
2262 int n_op_init, n_op_init_copies;
2263 freia_aipo_count(fulld, &n_op_init, &n_op_init_copies);
2265 // must have distinct images...
2266 hash_table init = hash_table_make(hash_pointer, 0);
2267 list new_images = dag_fix_image_reuse(fulld, init, occs);
2269 list added_before = NIL, added_after = NIL;
2270 freia_dag_optimize(fulld, exchanges, &added_before, &added_after);
2272 // remove copies and duplicates if possible...
2273 // ??? maybe there should be an underlying transitive closure? not sure.
2274 int n_op_opt, n_op_opt_copies;
2275 freia_aipo_count(fulld, &n_op_opt, &n_op_opt_copies);
2277 fprintf(helper_file,
2280 "optimized to %d ops and %d+%d+%d copies\n",
2281 number, n_op_init, n_op_init_copies,
2282 n_op_opt, n_op_opt_copies,
2287 added_before, added_after);
2307 int n_spoc_calls = 0;
2331 string fname_dag =
strdup(
cat(fname_fulldag,
"_",
i2a(n_pipes++)));
2349 string fname_split =
strdup(
cat(fname_dag,
"_",
i2a(split++)));
2355 ls, fname_split,
lparams, helpers, stnb);
2358 free(fname_split), fname_split = NULL;
2361 n_spoc_calls += split;
2362 fprintf(helper_file,
"// split %d: %d cut%s\n",
2363 n_pipes-1, split, split>1?
"s":
"");
2365 set_free(remainings), remainings = NULL;
2366 free(fname_dag), fname_dag = NULL;
2372 fprintf(helper_file,
"// # SPOC calls: %d\n", n_spoc_calls);
2384 set_free(global_remainings), global_remainings = NULL;
2385 free(fname_fulldag), fname_fulldag = NULL;
2391 list real_new_images =
2395 return real_new_images;
float a2sf[2] __attribute__((aligned(16)))
USER generates a user error (i.e., non fatal) by printing the given MSG according to the FMT.
static void node(FILE *out, string name)
Build for module name a node and link to its successors.
int get_int_property(const string)
void free_dagvtx(dagvtx p)
static reference ref
Current stmt (an integer)
void const char const char const int
_int dagvtx_optype(const dagvtx v)
list dag_vertex_preds(const dag d, const dagvtx target)
return target predecessor vertices as a list.
_int dagvtx_number(const dagvtx v)
returns the vertex number, i.e.
void freia_hack_fix_global_ins_outs(dag dfull, dag d)
catch some cases of missing outs between splits...
entity dagvtx_image(const dagvtx v)
return the produced image or NULL
void dag_remove_vertex(dag d, const dagvtx v)
remove vertex v from dag d.
void dag_dump(FILE *out, const string what, const dag d)
for dag debug
void dag_compute_outputs(dag d, const hash_table occs, const set output_images, const list ld, bool inloop)
(re)compute the list of GLOBAL input & output images for this dag ??? BUG the output is rather an app...
void dag_dot_dump(const string module, const string name, const dag d, const list lb, const list la)
generate a "dot" format from a dag to a file.
void set_append_vertex_statements(set s, list lv)
string dagvtx_operation(const dagvtx v)
_int dagvtx_opid(const dagvtx v)
void dag_statements(set stats, const dag d)
build the set of actual statements in d
void dag_dot_dump_prefix(const string module, const string prefix, int number, const dag d, const list lb, const list la)
void freia_add_image_arguments(list limg, list *lparams)
prepend limg images in front of the argument list limg is consummed by the operation.
list freia_get_vertex_params(const dagvtx v)
list freia_extract_params(const int napi, list args, string_buffer head, string_buffer head2, hash_table params, int *nparams)
returns an allocated expression list of the parameters only (i.e.
void hwac_kill_statement(statement s)
remove contents of statement s.
list freia_allocate_new_images_if_needed(list ls, list images, const hash_table occs, const hash_table init, const hash_table signatures)
insert image allocation if needed, for intermediate image inserted before if an image is used only tw...
void freia_migrate_statements(sequence sq, const set stats, const set before)
int freia_substitute_by_helper_call(dag d, set global_remainings, set remainings, list ls, const string function_name, list lparams, set helpers, int preceeding)
substitute those statement in ls that are in dag d and accelerated by a call to function_name(lparams...
const freia_api_t * get_freia_api(int index)
call freia_statement_to_call(const statement s)
return the actual function call from a statement, dealing with assign and returns....
string what_operation(const _int type)
void freia_insert_added_stats(list ls, list stats, bool before)
insert statements to actual code sequence in "ls" BEWARE that ls is assumed to be in reverse order....
#define FREIA_DEFAULT_BPP
static int find_first_crossing(hash_table wiring, int stage, int level)
return the first stage after stage/level with both paths available
static list dagvtx_spoc_priority_computables
current list of computable that may be used to know about the global context when comparing to vertic...
static bool check_wiring_output(hash_table wiring, int stage, int side)
can I get out of stage on this side?
static bool available_component(hash_table wiring, int stage, int level, int side)
static void basic_spoc_conf(spoc_hardware_type op, string_buffer body, string_buffer tail, int stage, int side, bool flip, const spoc_hw_t *conf, dagvtx orig, hash_table hp)
basic configuration generation for a stage, depending on hw description
static void generate_wiring_stage(string_buffer code, int stage, int in_side, int out_side, hash_table wiring)
all possible wirings at one stage
static void generate_wiring(string_buffer code, op_schedule *in, op_schedule *out, hash_table wiring)
generate wire code from in to out, record choices in wiring.
static _int freia_spoc_pipeline(string module, string helper, string_buffer code, dag dpipe, list *lparams, const set output_images)
generate a SPoC pipeline from a single DAG for module.
static void print_op_schedule(FILE *out, const string name, const op_schedule *op)
static bool image_is_needed(dagvtx prod, dag d, set todo)
is image needed?
static void init_op_schedule(op_schedule *op, dagvtx v, int side)
static _int component_index(int stage, int level, int side)
static bool erode_alu_shared_p(vtxcontent c1, vtxcontent c2)
? = Morpho(I); ? = ALU(I, ?);
static int spoc_measure_n_params(spoc_measure_t measure)
static void set_component(hash_table wiring, int stage, int level, int side)
static void spoc_alu_conf(spoc_alu_t alu, string_buffer body, __attribute__((__unused__)) string_buffer tail, int stage, bool flip, dagvtx orig, hash_table hp)
generate a configuration for the ALU hardware component.
static bool is_consummed_by_vertex(dagvtx prod, dagvtx v, dag d, set todo)
tell whether the image produced by prod is definitely consummed by v given the global dag d and the s...
static void where_to_perform_operation(const dagvtx op, op_schedule *in0, op_schedule *in1, dag computed, set todo, op_schedule *out, hash_table wiring)
depending on available images (stage d, level, side 0/1) and vertex operation to perform ,...
static void spoc_poc_conf(spoc_poc_t poc, string_buffer body, __attribute__((__unused__)) string_buffer tail, int stage, int side, dagvtx orig, hash_table hp)
generate a configuration for a POC (morpho) hardware component.
static const spoc_alu_op_t ALU_OP[]
static int max_stage(const op_schedule *in0, const op_schedule *in1)
static void comment(string_buffer code, spoc_hardware_type hw, dagvtx v, int stage, int side, bool flip)
static void freia_spoc_code_buildup(string module, string function_name, string_buffer code, const string_buffer head, const string_buffer body, const string_buffer tail, int n_im_out, int n_im_in, bool some_reductions, bool some_kernels, const string out0, const string out1, const string in0, const string in1)
build up final pipeline code from various pieces
static bool dag_spoc_not_implemented(dag d)
does this dag contains a spoc non implemented operation?
static void spoc_measure_conf(spoc_measure_t measure, __attribute__((__unused__)) string_buffer body, string_buffer tail, int stage, int side, dagvtx orig, hash_table hp)
there is no real configuration for the measures, the issue is just to fetch them.
static const spoc_alu_op_t * get_spoc_alu_conf(spoc_alu_t alu)
static void spoc_th_conf(string_buffer body, __attribute__((__unused__)) string_buffer tail, int stage, int side, dagvtx orig, hash_table hp)
generate a configuration for a threshold component.
static int dagvtx_spoc_priority(const dagvtx *v1, const dagvtx *v2)
comparison function for sorting dagvtx in qsort, this is deep voodoo, because the priority has an imp...
static void find_first_available_component(hash_table wiring, int start_stage, int level, int side, int target_level, bool crossing, int *pstage, int *pside)
return the stage & side of the first component available after (start_stage, level,...
static void set_mux(hash_table wiring, int stage, int mux)
static void set_wiring(string_buffer code, int stage, int mux, _int value, hash_table wiring)
generate wiring code for mux if necessary.
static list split_dag(dag initial, const set output_images)
split dag dall into a list of pipelinable dags which must be processed in that order (?...
static list dagvtx_spoc_priority_current
static bool check_mux_availibity(hash_table wiring, int stage, int mux)
#define FREIA_SPOC_CALL_START
#define FREIA_SPOC_CALL_REDUC
spoc_alu_t
all SPoC ALU operations
#define FREIA_SPOC_CALL_END
#define pstatement_statement_p(x)
#define dagvtx_content(x)
#define vtxcontent_optype(x)
#define vtxcontent_out(x)
#define pstatement_statement(x)
#define vtxcontent_inputs(x)
#define vtxcontent_source(x)
list gen_nreverse(list cp)
reverse a list in place
void gen_remove(list *cpp, const void *o)
remove all occurences of item o from list *cpp, which is thus modified.
#define NIL
The empty list (nil in Lisp)
list gen_copy_seq(list l)
Copy a list structure.
size_t gen_length(const list l)
#define CONS(_t_, _i_, _l_)
List element cell constructor (insert an element at the beginning of a list)
list gen_nconc(list cp1, list cp2)
physically concatenates CP1 and CP2 but do not duplicates the elements
#define CAR(pcons)
Get the value of the first element of a list.
void gen_free_list(list l)
free the spine of the list
bool gen_in_list_p(const void *vo, const list lx)
tell whether vo belongs to lx
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
#define CDR(pcons)
Get the list less its first element.
hash_table hash_table_make(hash_key_type key_type, size_t size)
void * hash_get(const hash_table htp, const void *key)
this function retrieves in the hash table pointed to by htp the couple whose key is equal to key.
void hash_put(hash_table htp, const void *key, const void *val)
This functions stores a couple (key,val) in the hash table pointed to by htp.
void hash_table_free(hash_table htp)
this function deletes a hash table that is no longer useful.
bool hash_defined_p(const hash_table htp, const void *key)
true if key has e value in htp.
#define src(name, suf)
HPFC by Fabien Coelho, May 1993 and later...
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
#define pips_assert(what, predicate)
common macros, two flavors depending on NDEBUG
#define pips_internal_error
#define message_assert(msg, ex)
char * i2a(int)
I2A (Integer TO Ascii) yields a string for a given Integer.
bool set_empty_p(const set)
tell whether set s is empty.
set set_assign_list(set, const list)
assigns a list contents to a set all duplicated elements are lost
bool set_intersection_p(const set, const set)
returns whether s1 n s2 <> 0 complexity of the intersection
set set_del_element(set, const set, const void *)
int set_size(const set)
returns the number of items in s.
set set_difference(set, const set, const set)
#define SET_FOREACH(type_name, the_item, the_set)
enumerate set elements in their internal order.
bool set_belong_p(const set, const void *)
set set_union(set, const set, const set)
set set_make(set_type)
Create an empty set of any type but hash_private.
set set_add_element(set, const set, const void *)
void string_buffer_append_sb(string_buffer, const string_buffer)
append the string buffer sb2 to string buffer sb.
void string_buffer_to_file(const string_buffer, FILE *)
put string buffer into file.
void string_buffer_free(string_buffer *)
free string buffer structure, also free string contents according to the dup field
string_buffer string_buffer_make(bool dup)
allocate a new string buffer
list lparams
Array bounds.
const char * entity_local_name(entity e)
entity_local_name modified so that it does not core when used in vect_fprint, since someone thought t...
entity local_name_to_top_level_entity(const char *n)
This function try to find a top-level entity from a local name.
static int init
Maximal value set for Fortran 77.
#define EXPRESSION(x)
EXPRESSION.
#define call_arguments(x)
int fprintf()
test sc_min : ce test s'appelle par : programme fichier1.data fichier2.data ...
internally defined structure.
FI: I do not understand why the type is duplicated at the set level.
The structure used to build lists in NewGen.
FREIA API function name -> SPoC hardware description (and others?)
code taken from http://fast-edge.googlecode.com and adapted to c99
a data structure to describe a schedule for an operation
spoc_hardware_type used_level
ALU operation full description.
description of a SPoC hardware configuration should be precise enough to generate a full AIPO functio...
@ empty
b1 < bj -> h1/hj = empty