27 #ifdef BUILDER_NEST_PARALLELIZATION
30 #include "pips_config.h"
77 static bool parallel_loop_has_been_selected;
85 #define UNKNOWN_LOOP_COUNT 0
86 #define SMALL_LOOP_COUNT 1
87 #define MEDIUM_LOOP_COUNT 2
88 #define LARGE_LOOP_COUNT 3
90 #define SEQUENTIAL_DIRECTION 0
91 #define VECTOR_DIRECTION 1
92 #define PARALLEL_DIRECTION 2
96 typedef struct transformation_strategy {
97 int maximum_iteration_count;
99 } transformation_strategy;
132 if(parallel_loop_has_been_selected && !parallel_loop_has_been_selected)
146 parallel_loop_has_been_selected =
true;
179 static bool current_loop_index_p(
reference r)
254 debug(9,
"estimate_range_count",
"output\n");
266 static int numerical_loop_iteration_count(
loop l)
286 static transformation_strategy
287 one_loop_transformation_strategies
288 [PARALLEL_DIRECTION+1][LARGE_LOOP_COUNT+1] =
290 {{-1, loop_preserve},
291 {4, tuned_loop_unroll },
292 {80, loop_preserve },
293 {INT_MAX, loop_preserve}},
294 {{-1, loop_vectorize},
295 {4, tuned_loop_unroll },
296 {80, loop_vectorize },
297 {INT_MAX, tuned_loop_strip_mine}},
298 {{-1, tuned_loop_parallelize},
299 {4, tuned_loop_unroll },
300 {80, tuned_loop_parallelize },
301 {INT_MAX, tuned_loop_parallelize}}
321 if(carried_dependence_p(s))
322 kind = SEQUENTIAL_DIRECTION;
324 kind = VECTOR_DIRECTION;
326 kind = PARALLEL_DIRECTION;
329 for( size = UNKNOWN_LOOP_COUNT; size <= LARGE_LOOP_COUNT; size++)
330 if( c <= one_loop_transformation_strategies[kind][size].maximum_iteration_count)
333 if(size>LARGE_LOOP_COUNT) {
335 " for kind %d and count %d\n",
339 pips_debug(9,
"kind = %d, size = %d, c = %d\n", kind, size, c);
340 (* one_loop_transformation_strategies[kind][size].loop_transformation)
365 count += look_for_references_in_expression(rl, reference_transformation, reference_predicate);
366 count += look_for_references_in_expression(ru, reference_transformation, reference_predicate);
367 count += look_for_references_in_expression(ri, reference_transformation, reference_predicate);
401 count += look_for_references_in_expression(e,
402 reference_transformation,
403 reference_predicate);
426 if ( (*reference_predicate)(r)) {
437 count += look_for_references_in_expression(indice, reference_transformation,
438 reference_predicate);
444 reference_predicate);
447 count = look_for_references_in_call(
syntax_call(s), reference_transformation,
448 reference_predicate);
470 reference_transformation,
471 reference_predicate);
478 reference_predicate);
479 count += look_for_references_in_statement(
test_true(t), reference_transformation,
480 reference_predicate);
481 count += look_for_references_in_statement(
test_false(t), reference_transformation,
482 reference_predicate);
487 count = look_for_references_in_range(
loop_range(l), reference_transformation,
488 reference_predicate);
489 count += look_for_references_in_statement(
loop_body(l), reference_transformation,
490 reference_predicate);
496 reference_predicate);
498 reference_predicate);
503 reference_predicate);
525 static bool constant_array_reference_p(
reference r)
547 int count = look_for_references_in_expression(i, reference_identity,
548 current_loop_index_p);
560 static bool contiguous_array_reference_p(
reference r)
566 bool contiguous_p =
false;
605 static int current_loop_depth = -1;
609 static int count = 0;
612 return count == current_loop_depth;
638 #define DIRECTION_CONTIGUOUS_COUNT 0
639 #define DIRECTION_PARALLEL_P 1
640 #define DIRECTION_REUSE_COUNT 2
641 #define DIRECTION_ITERATION_COUNT 3
642 #define CHARACTERISTICS_NUMBER 4
643 int *characteristics[CHARACTERISTICS_NUMBER];
646 int vector_loop_number;
647 int optimal_performance;
653 for(i=0; i < CHARACTERISTICS_NUMBER; i++)
654 characteristics[i] = (
int *)
malloc(loop_count*(
sizeof(ln)));
660 *(characteristics[DIRECTION_CONTIGUOUS_COUNT]+ln) =
661 look_for_references_in_statement(ls, reference_identity, contiguous_array_reference_p);
672 *(characteristics[DIRECTION_PARALLEL_P]+ln) =
680 *(characteristics[DIRECTION_REUSE_COUNT]+ln) =
681 look_for_references_in_statement(ls,
683 constant_array_reference_p);
689 *(characteristics[DIRECTION_ITERATION_COUNT]+ln) =
698 (void)
fprintf(stderr,
"loop %d index %s\t#contiguous %d\t// %s\t#reuse %d\t#range %d\n",
701 *(characteristics[DIRECTION_CONTIGUOUS_COUNT]+ln),
703 *(characteristics[DIRECTION_REUSE_COUNT]+ln),
704 *(characteristics[DIRECTION_ITERATION_COUNT]+ln));
715 optimal_performance = 0;
716 vector_loop_number = -1;
717 for(ln = 0; ln < loop_count; ln++) {
719 #define REUSE_WEIGHT 8
720 #define CONTIGUITY_WEIGHT 4
721 #define ITERATION_COUNT_WEIGHT 1
723 REUSE_WEIGHT*(*(characteristics[DIRECTION_REUSE_COUNT]+ln))
724 + CONTIGUITY_WEIGHT*(*(characteristics[DIRECTION_CONTIGUOUS_COUNT]+ln));
725 int iteration_count = *(characteristics[DIRECTION_ITERATION_COUNT]+ln);
731 performance += ITERATION_COUNT_WEIGHT*(iteration_count);
748 if(*(characteristics[DIRECTION_PARALLEL_P]+ln)
749 && performance > optimal_performance) {
750 optimal_performance = performance;
751 vector_loop_number = ln;
754 if(vector_loop_number != -1) {
757 pips_debug(8,
"Vector loop is loop %d with performance %d\n",
758 vector_loop_number, optimal_performance);
761 if(vector_loop_number != loop_count-1) {
763 pips_debug(8,
"Interchange innermost loop with vector loop\n");
770 vector_loop_number+1,
799 debug_on(
"NEST_PARALLELIZATION_DEBUG_LEVEL");
808 s = loop_nest_parallelization(lls);
841 debug_on(
"NEST_PARALLELIZATION_DEBUG_LEVEL");
843 parallel_loop_has_been_selected =
false;
860 fprintf(stderr,
" gen consistent ");
867 (
char*) mod_parallel_stat);
float a2sf[2] __attribute__((aligned(16)))
USER generates a user error (i.e., non fatal) by printing the given MSG according to the FMT.
statement copy_statement(statement p)
STATEMENT.
bool statement_consistent_p(statement p)
dg_vertex_label vertex_label
#define VALUE_TO_INT(val)
static graph dg
dg is the dependency graph ; FIXME : should not be static global ?
struct _newgen_struct_statement_ * statement
bool vect_constant_p(Pvecteur)
bool vect_constant_p(Pvecteur v): v contains only a constant term, may be zero
void look_for_nested_loop_statements(statement, statement(*)(list, bool(*)(statement)), bool(*)(statement))
look_for_nested_loops.c
const char * module_name(const char *s)
Return the module part of an entity name.
struct _newgen_struct_graph_ * graph
void reset_current_module_entity(void)
Reset the current module entity.
void reset_current_module_statement(void)
Reset the current module statement.
statement set_current_module_statement(statement)
Set the current module statement.
statement get_current_module_statement(void)
Get the current module statement.
entity set_current_module_entity(entity)
static.c
entity get_current_module_entity(void)
Get the entity of the current module.
#define ENDP(l)
Test if a list is empty.
list gen_nreverse(list cp)
reverse a list in place
size_t gen_length(const list l)
#define CAR(pcons)
Get the value of the first element of a list.
list gen_last(list l)
Return the last element of a list.
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
#define MAPL(_map_list_cp, _code, _l)
Apply some code on the addresses of all the elements of a list.
string db_get_memory_resource(const char *rname, const char *oname, bool pure)
Return the pointer to the resource, whatever it is.
#define DB_PUT_MEMORY_RESOURCE(res_name, own_name, res_val)
conform to old interface.
loop statement_loop(statement)
Get the loop of a statement.
bool assignment_block_or_statement_p(statement)
static statement mod_stat
We want to keep track of the current statement inside the recurse.
void vect_dump(Pvecteur v)
void vect_dump(Pvecteur v): print sparse vector v on stderr.
int vect_dimension(Pvecteur v)
int vect_dimension(Pvecteur v): calcul du nombre de composantes non nulles et non constantes d'un vec...
void loop_unroll(statement loop_statement, int rate)
fallbacks on do_loop_unroll without statement post processing
void full_loop_unroll(statement loop_statement)
get rid of the loop by body replication;
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
#define pips_assert(what, predicate)
common macros, two flavors depending on NDEBUG
#define pips_internal_error
int get_debug_level(void)
GET_DEBUG_LEVEL returns the current debugging level.
void debug(const int the_expected_debug_level, const char *calling_function_name, const char *a_message_format,...)
ARARGS0.
string bool_to_string(bool)
int f(int off1, int off2, int n, float r[n], float a[n], float b[n])
hash_table set_ordering_to_statement(statement s)
To be used instead of initialize_ordering_to_statement() to make sure that the hash table ots is in s...
void reset_ordering_to_statement(void)
Reset the mapping from ordering to statement.
void print_reference(reference r)
void print_statement(statement)
Print a statement on stderr.
bool module_body_reorder(statement body)
Reorder a module.
#define NORMALIZE_EXPRESSION(e)
#define is_instruction_block
soft block->sequence transition
#define instruction_block(i)
const char * entity_local_name(entity e)
entity_local_name modified so that it does not core when used in vect_fprint, since someone thought t...
entity module_name_to_entity(const char *mn)
This is an alias for local_name_to_top_level_entity.
bool entity_module_p(entity e)
bool expression_integer_value(expression e, intptr_t *pval)
bool array_reference_p(reference r)
predicates on references
bool c_language_module_p(entity m)
bool fortran_language_module_p(entity m)
#define normalized_undefined
#define loop_execution(x)
#define syntax_reference(x)
#define normalized_linear_p(x)
#define instruction_loop_p(x)
#define reference_variable(x)
#define instruction_loop(x)
#define range_increment(x)
#define EXPRESSION(x)
EXPRESSION.
#define expression_undefined
@ is_instruction_unstructured
@ is_instruction_whileloop
#define instruction_tag(x)
#define reference_indices(x)
#define test_condition(x)
#define instruction_whileloop(x)
#define whileloop_body(x)
#define statement_instruction(x)
#define instruction_call(x)
@ is_execution_sequential
#define call_arguments(x)
#define instruction_test(x)
#define whileloop_condition(x)
#define normalized_linear(x)
#define expression_syntax(x)
#define execution_parallel_p(x)
#define statement_undefined
#define STATEMENT(x)
STATEMENT.
#define entity_initial(x)
int fprintf()
test sc_min : ce test s'appelle par : programme fichier1.data fichier2.data ...
Pvecteur vect_div(Pvecteur v, Value x)
Pvecteur vect_div(Pvecteur v, Value x): division du vecteur v par le scalaire x, si x est different d...
statement loop_strip_mine(statement loop_statement, int chunk_size, int chunk_number)
loop_strip_mine():
le type des coefficients dans les vecteurs: Value est defini dans le package arithmetique
The structure used to build lists in NewGen.
#define TCST
VARIABLE REPRESENTANT LE TERME CONSTANT.
#define VECTEUR_UNDEFINED
struct Svecteur * Pvecteur
void * Variable
arithmetique is a requirement for vecteur, but I do not want to inforce it in all pips files....
#define VECTEUR_UNDEFINED_P(v)
void vect_rm(Pvecteur v)
void vect_rm(Pvecteur v): desallocation des couples de v;
Pvecteur vect_substract(Pvecteur v1, Pvecteur v2)
Pvecteur vect_substract(Pvecteur v1, Pvecteur v2): allocation d'un vecteur v dont la valeur est la di...
void vect_add_elem(Pvecteur *pvect, Variable var, Value val)
void vect_add_elem(Pvecteur * pvect, Variable var, Value val): addition d'un vecteur colineaire au ve...
Value vect_coeff(Variable var, Pvecteur vect)
Variable vect_coeff(Variable var, Pvecteur vect): coefficient de coordonnee var du vecteur vect —> So...