24 #include "pips_config.h"
45 #define MPPA_DEFAULT_SMEM_SLOTS 4
50 #define SMEM_SLOT_UNDEFINED get_int_property("HWAC_MPPA_MAX_SMEM_SLOTS")
65 for (
unsigned int i = 0; i < max_smem_slots; i++) {
81 for (
unsigned int i = 0; i < max_smem_slots; i++) {
108 hash_put(slot_uses_ht, vtx, (
void *)nb_succs);
133 if (pred_rem_use == 0) {
137 hash_update(slot_uses_ht, pred, (
void *)(pred_rem_use - 1));
144 hash_put(slot_uses_ht, vtx, (
void *)nb_succs);
161 hash_update(slot_uses_ht, vtx, (
void *)(rem_uses - 1));
195 unsigned long len = strlen(str);
196 for (
unsigned int i = 0; i < len; i++) {
257 sb_prf(sb_args,
"int32_t %s, ",
275 &se[4], &se[5], &se[6], &se[7], &se[8])) {
280 sb_prf(sb_args,
"int32_t *%s, ", se);
292 if (fargs[strlen(fargs) - 2] ==
',') {
293 fargs[strlen(fargs) - 2] =
'\0';
296 for (
unsigned int i = 0; i < strlen(fargs); i++) {
297 if (fargs[i] ==
'&') {
318 unsigned int dagi,
list largs) {
319 bool call_inserted =
false;
333 call_inserted =
true;
350 const string fname,
const int dagi,
351 FILE *
const helper) {
353 fprintf(helper,
"\n// module=%s fname=%s split=%d\n",
module, fname, dagi);
362 unsigned int meas_ctr = 0;
364 unsigned int instr_ctr = 0;
372 for (
unsigned int i = 0; i < max_smem_slots; i++) {
380 sb_prf(sb_cmd,
"int %s_%d(%s) {\n", fname, dagi, fparams);
382 sb_cat(sb_cmd,
" mppa_cc_instr_t *instrs;\n");
383 sb_cat(sb_cmd,
" unsigned int i = 0;\n");
384 sb_prf(sb_cmd,
" mppa_cc_cmd_t %s;\n", curr_cmd);
385 sb_prf(sb_cmd,
" instrs = %s.instrs;\n", curr_cmd);
388 pips_assert(
"non-null number of SMEM slots", max_smem_slots > 0);
389 pips_assert(
"enough number of SMEM slots", max_smem_slots < 255);
390 sb_cat(sb_cmd,
" /* override default SMEM slots number */\n");
391 sb_prf(sb_cmd,
" mppa_smem_slots = %d;\n", max_smem_slots);
403 sb_cat(sb_cmd,
" instrs[i].kind = MPPA_CMD_GET_IO_TILE;\n");
405 " instrs[i].com.io_pos = ((io_image_h *)%s->mppa_ptr)->pos;\n",
408 sb_prf(sb_cmd,
" instrs[i].com.cc_pos = %d;\n",
413 sb_prf(sb_cmd,
" instrs[i].kind = MPPA_CMD_EXECUTE_KERNEL;\n");
417 unsigned int predi = 0;
419 sb_prf(sb_cmd,
" instrs[i].opr.pos[%d] = %d; /* input */\n", predi + 1,
428 if (measvar[0] ==
'&') {
429 measvar = &measvar[1];
431 sb_prf(sb_cmd,
" instrs[i].opr.red_dst[0] = %s; /* result */\n",
437 sb_prf(sb_cmd,
" uint32_t meas_off%d;\n", meas_ctr);
439 " instrs[i].opr.red_dst[1] = &meas_off%d; /* offset */\n",
453 sb_prf(sb_cmd,
" instrs[i].opr.pos[0] = %d; /* output */\n", slot);
461 &se[3], &se[4], &se[5], &se[6], &se[7],
465 sb_cmd,
" instrs[i].opr.scalars[0] = 0b%d%d%d%d%d%d%d%d%d;\n",
466 se[0], se[1], se[2], se[3], se[4], se[5], se[6], se[7], se[8]);
469 sb_prf(sb_cmd,
" instrs[i].opr.scalars[0] = se2scal(%s);\n",
477 unsigned int argi = 0;
486 " instrs[i].opr.kernel = MPPA_KERNEL_THRESHOLD_BIN;\n");
491 sb_prf(sb_cmd,
" instrs[i].opr.scalars[%d] = %s;\n", argi,
501 sb_prf(sb_cmd,
" i++; /* instr #%u, vtx #%d */\n", instr_ctr,
507 sb_cat(sb_cmd,
" instrs[i].kind = MPPA_CMD_PUT_IO_TILE;\n");
510 " instrs[i].com.io_pos = ((io_image_h *)%s->mppa_ptr)->pos;\n",
516 sb_prf(sb_cmd,
" i++; /* instr #%u, vtx #%d */\n", instr_ctr,
524 unsigned int max_instrs_per_cmd =
527 instr_ctr <= max_instrs_per_cmd);
530 sb_cat(sb_cmd,
" /* launch computation... */\n");
531 sb_prf(sb_cmd,
" mppa_compute(i, &%s);\n", curr_cmd);
539 pips_assert(
"coord var name contains more than '&' char",
541 pips_assert(
"coord var name contains more than '&' char",
547 sb_prf(sb_cmd,
" *%s = meas_off%d %% %s->widthWa;\n", xcoord,
value, imin);
548 sb_prf(sb_cmd,
" *%s = meas_off%d / %s->widthWa;\n", ycoord,
value, imin);
553 sb_cat(sb_cmd,
" return 0;\n");
580 unsigned int n_vtx_dag) {
584 for (
unsigned int i = 0; i <
n_dags; i++) {
588 unsigned int vtxi = 0;
590 if (vtxi >= i * n_vtx_dag && vtxi < (i + 1) * n_vtx_dag) {
676 unsigned int n_instrs =
679 if (n_instrs <= upper_bound) {
684 unsigned int n_dags = n_instrs / upper_bound + 1;
685 unsigned int n_vtx_dag = n_instrs /
n_dags + 1;
688 if (n_vtx_dag > upper_bound - 4) {
690 n_vtx_dag = n_instrs /
n_dags + 1;
693 pips_assert(
"splitted dags are small enough", n_vtx_dag <= upper_bound);
713 const set output_images, FILE *helper_file,
float a2sf[2] __attribute__((aligned(16)))
USER generates a user error (i.e., non fatal) by printing the given MSG according to the FMT.
int get_int_property(const string)
pstatement make_pstatement_empty(void)
dagvtx make_dagvtx(vtxcontent a1, list a2)
vtxcontent make_vtxcontent(intptr_t a1, intptr_t a2, pstatement a3, list a4, entity a5)
dag make_dag(list a1, list a2, list a3)
call make_call(entity a1, list a2)
struct paramStruct params
static graph dg
dg is the dependency graph ; FIXME : should not be static global ?
void dag_cleanup_other_statements(dag d)
remove unneeded statements? you must know they are really un-needed!
_int dagvtx_optype(const dagvtx v)
list dag_vertex_preds(const dag d, const dagvtx target)
return target predecessor vertices as a list.
_int dagvtx_number(const dagvtx v)
returns the vertex number, i.e.
bool dag_no_image_operation(dag d)
tell whether we have something to do with images ??? hmmm...
list dag_split_on_scalars(const dag initial, bool(*alone_only)(const dagvtx), dagvtx(*choose_vertex)(const list, bool), gen_cmp_func_t priority, void(*priority_update)(const dag), const set output_images)
split a dag on scalar dependencies only, with a greedy heuristics.
int dagvtx_ordering(const dagvtx *v1, const dagvtx *v2)
list dag_fix_image_reuse(dag d, hash_table init, const hash_table occs)
fix intermediate image reuse in dag
bool dagvtx_is_measurement_p(const dagvtx v)
returns whether the vertex is an image measurement operation.
entity dagvtx_image(const dagvtx v)
return the produced image or NULL
void dag_dump(FILE *out, const string what, const dag d)
for dag debug
void freia_dag_optimize(dag d, hash_table exchanges, list *lbefore, list *lafter)
remove dead image operations.
string dagvtx_operation(const dagvtx v)
statement dagvtx_statement(const dagvtx v)
return statement if any, or NULL (for input nodes).
_int dagvtx_opid(const dagvtx v)
void dag_statements(set stats, const dag d)
build the set of actual statements in d
void dag_dot_dump_prefix(const string module, const string prefix, int number, const dag d, const list lb, const list la)
entity freia_create_helper_function(const string function_name, list lparams)
list freia_get_vertex_params(const dagvtx v)
list freia_allocate_new_images_if_needed(list ls, list images, const hash_table occs, const hash_table init, const hash_table signatures)
insert image allocation if needed, for intermediate image inserted before if an image is used only tw...
void freia_migrate_statements(sequence sq, const set stats, const set before)
void hwac_replace_statement(statement s, call newc, bool kill)
replace statement contents with call to c, or continue if kill
bool freia_extract_kernel_vtx(dagvtx v, bool strict, intptr_t *k00, intptr_t *k10, intptr_t *k20, intptr_t *k01, intptr_t *k11, intptr_t *k21, intptr_t *k02, intptr_t *k12, intptr_t *k22)
vertex-based version
call freia_ok(void)
build all is well freia constant
void freia_insert_added_stats(list ls, list stats, bool before)
insert statements to actual code sequence in "ls" BEWARE that ls is assumed to be in reverse order....
const freia_api_t * get_freia_api_vtx(dagvtx v)
static string dots2us(string str)
Replace dots in string with underscores.
static _int reuse_pred_slot(const dagvtx vtx, const list preds, hash_table slot_uses_ht)
Try to find a reusable memory slot for in-place operators.
static void mppa_compile_dag(const string module, const dag cdg, const string fname, const int dagi, FILE *const helper)
Generate an optimized, FREIA-MPPA low level version of this dag.
#define MPPA_DEFAULT_SMEM_SLOTS
Default number of memory slots.
#define SMEM_SLOT_UNDEFINED
Placeholder for an undefined slot.
static void mppa_call_helper(const dag dg, const string fname, unsigned int dagi, list largs)
Replace FREIA calls by PIPS generated ones.
list freia_mppa_compile_calls(string module, dag fulld, sequence sq, list ls, const hash_table occs, hash_table exchanges, const set output_images, FILE *helper_file, __attribute__((__unused__)) set helpers, int number)
Compile one dag with AIPO optimizations.
static list mppa_helper_args_params(const dag cdg, string *params)
Build a dag list of arguments and a string of corresponding parameters.
static _int get_a_smem_slot(const dagvtx vtx, hash_table slot_uses_ht)
Provide a valid unused SMEM slot and link it to vertex.
static _int get_output_slot(dagvtx vtx)
Get output slot used by input vertex.
static list mppa_dag_split(const dag dg, unsigned int n_dags, unsigned int n_vtx_dag)
Split a dag into several subdags.
static void process_used_slots(list vtx_preds, hash_table slot_uses_ht)
Update preds usage table, unused slot list.
static dagvtx * smem_slot_users
Array of vertices using SMEM slots (indices) as output.
static list mppa_dag_maybe_split_instrs_cmd(const dag dg)
Split a dag in several sub-dags if too large.
static _int use_output_slot(const dagvtx vtx, hash_table slot_uses_ht)
Provide vertex used SMEM slot, update uses table.
static _int get_free_slot()
Get first unused SMEM slot.
#define dagvtx_content(x)
#define vtxcontent_out(x)
#define vtxcontent_inputs(x)
list gen_nreverse(list cp)
reverse a list in place
void gen_remove(list *cpp, const void *o)
remove all occurences of item o from list *cpp, which is thus modified.
#define NIL
The empty list (nil in Lisp)
list gen_copy_seq(list l)
Copy a list structure.
size_t gen_length(const list l)
#define CONS(_t_, _i_, _l_)
List element cell constructor (insert an element at the beginning of a list)
list gen_nconc(list cp1, list cp2)
physically concatenates CP1 and CP2 but do not duplicates the elements
#define CAR(pcons)
Get the value of the first element of a list.
void gen_free_list(list l)
free the spine of the list
bool gen_in_list_p(const void *vo, const list lx)
tell whether vo belongs to lx
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
gen_chunk gen_nth(int n, const list l)
to be used as ENTITY(gen_nth(3, l))...
list gen_full_copy_list(list l)
Copy a list structure with element copy.
list gen_insert_before(const void *no, const void *o, list l)
hash_table hash_table_make(hash_key_type key_type, size_t size)
void * hash_get(const hash_table htp, const void *key)
this function retrieves in the hash table pointed to by htp the couple whose key is equal to key.
void hash_put(hash_table htp, const void *key, const void *val)
This functions stores a couple (key,val) in the hash table pointed to by htp.
void hash_update(hash_table htp, const void *key, const void *val)
update key->val in htp, that MUST be pre-existent.
void hash_table_free(hash_table htp)
this function deletes a hash table that is no longer useful.
void * hash_del(hash_table htp, const void *key)
this function removes from the hash table pointed to by htp the couple whose key is equal to key.
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
#define pips_assert(what, predicate)
common macros, two flavors depending on NDEBUG
char * i2a(int)
I2A (Integer TO Ascii) yields a string for a given Integer.
#define HASH_FOREACH(key_type, k, value_type, v, ht)
#define same_string_p(s1, s2)
bool set_belong_p(const set, const void *)
set set_union(set, const set, const set)
set set_make(set_type)
Create an empty set of any type but hash_private.
set set_add_element(set, const set, const void *)
string string_buffer_to_string(const string_buffer)
return malloc'ed string from string buffer sb
void string_buffer_to_file(const string_buffer, FILE *)
put string buffer into file.
void string_buffer_free(string_buffer *)
free string buffer structure, also free string contents according to the dup field
string_buffer string_buffer_make(bool dup)
allocate a new string buffer
int(* gen_cmp_func_t)(const void *, const void *)
string expression_to_string(expression e)
entity local_name_to_top_level_entity(const char *n)
This function try to find a top-level entity from a local name.
static int init
Maximal value set for Fortran 77.
expression entity_to_expression(entity e)
if v is a constant, returns a constant call.
#define EXPRESSION(x)
EXPRESSION.
#define entity_undefined_p(x)
int fprintf()
test sc_min : ce test s'appelle par : programme fichier1.data fichier2.data ...
internally defined structure.
FI: I do not understand why the type is duplicated at the set level.
The structure used to build lists in NewGen.
FREIA API function name -> SPoC hardware description (and others?)
unsigned int arg_misc_out