Message ID | CAAgBjM=Xy2VS8JtOuRijgn8pOjZhaDf=XE4aexeKmEy6VQfCYQ@mail.gmail.com |
---|---|
State | New |
Headers | show |
On 8 August 2016 at 19:33, Martin Jambor <mjambor@suse.cz> wrote: > Hi, > > thanks for following through. You'll need an approval from Honza, but > I think the code looks good (I have looked at the places that I > believe have changed since the last week). However, I have discovered > one new thing I don't like and still believe you need to handle > different precisions in lattice need: > > On Mon, Aug 08, 2016 at 03:08:35AM +0530, Prathamesh Kulkarni wrote: >> On 5 August 2016 at 18:06, Martin Jambor <mjambor@suse.cz> wrote: >> >> ... >> >> >> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c >> >> index 5b6cb9a..b770f6a 100644 >> >> --- a/gcc/ipa-cp.c >> >> +++ b/gcc/ipa-cp.c >> >> @@ -120,6 +120,7 @@ along with GCC; see the file COPYING3. If not see >> >> #include "params.h" >> >> #include "ipa-inline.h" >> >> #include "ipa-utils.h" >> >> +#include "tree-ssa-ccp.h" >> >> >> >> template <typename valtype> class ipcp_value; >> >> >> >> @@ -266,6 +267,40 @@ private: >> >> bool meet_with_1 (unsigned new_align, unsigned new_misalign); >> >> }; >> >> >> >> +/* Lattice of known bits, only capable of holding one value. >> >> + Similar to ccp_prop_value_t, mask represents which bits of value are constant. >> >> + If a bit in mask is set to 0, then the corresponding bit in >> >> + value is known to be constant. */ >> >> + >> >> +class ipcp_bits_lattice >> >> +{ >> >> +public: >> >> + bool bottom_p () { return lattice_val == IPA_BITS_VARYING; } >> >> + bool top_p () { return lattice_val == IPA_BITS_UNDEFINED; } >> >> + bool constant_p () { return lattice_val == IPA_BITS_CONSTANT; } >> >> + bool set_to_bottom (); >> >> + bool set_to_constant (widest_int, widest_int, signop, unsigned); >> >> + >> >> + widest_int get_value () { return value; } >> >> + widest_int get_mask () { return mask; } >> >> + signop get_sign () { return sgn; } >> >> + unsigned get_precision () { return precision; } >> >> + >> >> + bool meet_with (ipcp_bits_lattice& other, enum tree_code, tree); >> >> + bool meet_with (widest_int, widest_int, signop, unsigned); >> >> + >> >> + void print (FILE *); >> >> + >> >> +private: >> >> + enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } lattice_val; >> >> + widest_int value, mask; >> >> + signop sgn; >> >> + unsigned precision; > > I know that the existing code in ipa-cp.c does not do this, but please > prefix member variables with "m_" like our coding style guidelines > suggest (or even require?). You routinely reuse those same names in > names of parameters of meet_with and I believe that is a practice that > will sooner or later lead to confusing the two and bugs. Sorry about this, will change to m_ prefix in followup patch. > >> >> + >> >> + bool meet_with_1 (widest_int, widest_int); >> >> + void get_value_and_mask (tree, widest_int *, widest_int *); >> >> +}; >> >> + >> >> /* Structure containing lattices for a parameter itself and for pieces of >> >> aggregates that are passed in the parameter or by a reference in a parameter >> >> plus some other useful flags. */ >> >> @@ -281,6 +316,8 @@ public: >> >> ipcp_agg_lattice *aggs; >> >> /* Lattice describing known alignment. */ >> >> ipcp_alignment_lattice alignment; >> >> + /* Lattice describing known bits. */ >> >> + ipcp_bits_lattice bits_lattice; >> >> /* Number of aggregate lattices */ >> >> int aggs_count; >> >> /* True if aggregate data were passed by reference (as opposed to by >> >> @@ -458,6 +495,21 @@ ipcp_alignment_lattice::print (FILE * f) >> >> fprintf (f, " Alignment %u, misalignment %u\n", align, misalign); >> >> } >> >> >> >> ... >> >> >> +/* Convert operand to value, mask form. */ >> >> + >> >> +void >> >> +ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp) >> >> +{ >> >> + wide_int get_nonzero_bits (const_tree); >> >> + >> >> + if (TREE_CODE (operand) == INTEGER_CST) >> >> + { >> >> + *valuep = wi::to_widest (operand); >> >> + *maskp = 0; >> >> + } >> >> + else if (TREE_CODE (operand) == SSA_NAME) >> > >> > IIUC, operand is the operand from pass-through jump function and that >> > should never be an SSA_NAME. I have even looked at how we generate >> > them and it seems fairly safe to say that they never are. If you have >> > seen an SSA_NAME here, it is a bug and please let me know because >> > sooner or later it will cause an assert. >> > >> >> + { >> >> + *valuep = 0; >> >> + *maskp = widest_int::from (get_nonzero_bits (operand), UNSIGNED); >> >> + } >> >> + else >> >> + gcc_unreachable (); >> > >> > The operand however can be any any other is_gimple_ip_invariant tree. >> > I assume that you could hit this gcc_unreachable only in a program >> > with undefined behavior (or with a Fortran CONST_DECL?) but you should >> > not ICE here. >> Changed to: >> if (TREE_CODE (operand) == INTEGER_CST) >> { >> *valuep = wi::to_widest (operand); >> *maskp = 0; >> } >> else >> { >> *valuep = 0; >> *maskp = -1; >> } >> >> I am not sure how to extract nonzero bits for gimple_ip_invariant if >> it's not INTEGER_CST, > > I don't think that you reasonably can. > >> so setting to unknown (value = 0, mask = -1). >> Does this look OK ? > > Yes. > >> > >> > >> >> +} >> >> + >> >> +/* Meet operation, similar to ccp_lattice_meet, we xor values >> >> + if this->value, value have different values at same bit positions, we want >> >> + to drop that bit to varying. Return true if mask is changed. >> >> + This function assumes that the lattice value is in CONSTANT state */ >> >> + >> >> +bool >> >> +ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask) >> >> +{ >> >> + gcc_assert (constant_p ()); >> >> + >> >> + widest_int old_mask = this->mask; >> >> + this->mask = (this->mask | mask) | (this->value ^ value); >> >> + >> >> + if (wi::sext (this->mask, this->precision) == -1) >> >> + return set_to_bottom (); >> >> + >> >> + bool changed = this->mask != old_mask; >> >> + return changed; >> >> +} >> >> + >> >> +/* Meet the bits lattice with operand >> >> + described by <value, mask, sgn, precision. */ >> >> + >> >> +bool >> >> +ipcp_bits_lattice::meet_with (widest_int value, widest_int mask, >> >> + signop sgn, unsigned precision) >> >> +{ >> >> + if (bottom_p ()) >> >> + return false; >> >> + >> >> + if (top_p ()) >> >> + { >> >> + if (wi::sext (mask, precision) == -1) >> >> + return set_to_bottom (); >> >> + return set_to_constant (value, mask, sgn, precision); >> >> + } >> >> + >> >> + return meet_with_1 (value, mask); >> > >> > What if precisions do not match? >> Sorry I don't understand. Since we extend to widest_int, precision >> would be same ? > > I meant what if: > > this->precision != precision /* the parameter value */ > > (you see, giving both the same name is error-prone). You are > propagating the recorded precision gathered from types of the actual > arguments in calls, those can be different. For example, one caller > can pass a direct integer value with full integer precision, another > caller can pass in that same argument an enum value with a very > limited precision. Your code ignores that difference and the > resulting precision is the one that arrives first. If it is the enum, > it might be too small for the integer value from the other call-site? Ah indeed the patch incorrectly propagates precision of argument. So IIUC in ipcp_bits_lattice, we want m_precision to be the precision of parameter's type and _not_ of argument's type. The patch incorrectly propagates precision in following case: __attribute__((noinline)) static int f2(short z) { return z; } __attribute__((noinline)) static int f1(int y) { return f2 (y & 0xff); } __attribute__((noinline)) int f(int x) { return f1 (x); } Precision for 'z' should be 16 while the patch propagates 32, which is precision of type of the argument passed by the caller. We only set m_precision when changing from TOP to CONSTANT state. Instead of storing arg's precision and sign, we should store parameter's precision and sign in ipa_compute_jump_functions_for_edge (). Diff with respect to previous patch: @@ -1688,9 +1690,9 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi, && (TREE_CODE (arg) == SSA_NAME || TREE_CODE (arg) == INTEGER_CST)) { jfunc->bits.known = true; - jfunc->bits.sgn = TYPE_SIGN (TREE_TYPE (arg)); - jfunc->bits.precision = TYPE_PRECISION (TREE_TYPE (arg)); - + jfunc->bits.sgn = TYPE_SIGN (param_type); + jfunc->bits.precision = TYPE_PRECISION (param_type); + if (TREE_CODE (arg) == SSA_NAME) { jfunc->bits.value = 0; So in ipcp_bits_lattice::meet_with(value, mask, signop, precision) when we propagate into parameter's lattice for first time, we will set m_precision == precision of it's own type. rather than precision of the argument For eg, consider following test-case: int f(int x) { return some_operation (x); } int f1(short y) { return f (y & 0xf); } int f2(char z) { return f (z & 0xff); } Assume we first propagate from f2->f. In this case, jump_function from f2->f is unknown (but bits.known is true), so we call meet_with (0, 0xff, SIGNED, 32). The precision and sign are of param's type because we extract them from param_type as shown above. (I suppose the reason this is not pass-thru is because result y & 0xf is wrapped by convert_expr, which is actually passed to f(), so the parameter y isn't really involved in the call to f ?) Since lattice of x is TOP, it will change to CONSTANT, and m_precision will get assigned 32. Next propagate from f1->f jump_function from f1->f is unknown (but bits.known is true) so we call meet_with (0, 0xf, 32, SIGNED). Since lattice of x is already CONSTANT, it doesn't change m_precision anymore on this call or any subsequent calls. So when we propagate into callee for first time, only then do we set the precision. Does this look reasonable ? > >> bit_value_binop_1 requires original precision for few cases (shifts, >> rotates, plus, mult), so > > Yeah, so wrong precision could only be used if it got fed into the > binary operation routines, making the bug much harder to trigger. But > it would still be a bug (or you do not need to care for original > precisions at all). > >> I was preserving the original precision in jump function. >> Later in ipcp_update_bits(), the mask is set after narrowing to the >> precision of the parameter. >> > >> >> +} >> >> + >> >> +/* Meet bits lattice with the result of bit_value_binop_1 (other, operand) >> >> + if code is binary operation or bit_value_unop_1 (other) if code is unary op. >> >> + In the case when code is nop_expr, no adjustment is required. */ >> >> + >> >> +bool >> >> +ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, enum tree_code code, tree operand) >> >> +{ >> >> + if (other.bottom_p ()) >> >> + return set_to_bottom (); >> >> + >> >> + if (bottom_p () || other.top_p ()) >> >> + return false; >> >> + >> >> + widest_int adjusted_value, adjusted_mask; >> >> + >> >> + if (TREE_CODE_CLASS (code) == tcc_binary) >> >> + { >> >> + tree type = TREE_TYPE (operand); >> >> + gcc_assert (INTEGRAL_TYPE_P (type)); >> >> + widest_int o_value, o_mask; >> >> + get_value_and_mask (operand, &o_value, &o_mask); >> >> + >> >> + signop sgn = other.get_sign (); >> >> + unsigned prec = other.get_precision (); >> >> + >> >> + bit_value_binop_1 (code, sgn, prec, &adjusted_value, &adjusted_mask, >> >> + sgn, prec, other.get_value (), other.get_mask (), >> >> + TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask); >> > >> > It is probably just me not being particularly sharp on a Friday >> > afternoon and I might not understand the semantics of mask well (also, >> > you did not document it :-), but... assume that we are looking at a >> > binary and operation, other comes from an SSA pointer and its mask >> > would be binary 100 and its value 0 because that's what you set for >> > ssa names in ipa-prop.h, and the operand is binary value 101, which >> > means that get_value_and_mask returns mask 0 and value 101. Now, >> > bit_value_binop_1 would return value 0 & 101 = 0 and mask according to >> > >> > (m1 | m2) & ((v1 | m1) & (v2 | m2)) >> > >> > so in our case >> > >> > (100b & 0) & ((0 | 100b) & (101b | 0)) = 0 & 100b = 0. >> Shouldn't this be: >> (100b | 0) & ((0 | 100b) & (101b | 0)) = 100 & 100 = 100 -;) > > Eh, right, sorry. I just find the term mask confusing when we do not > actually mask anything with it (but I guess it is good to be > consistent so let's keep it). > >> diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h >> index e32d078..1b9d0ef 100644 >> --- a/gcc/ipa-prop.h >> +++ b/gcc/ipa-prop.h >> @@ -154,6 +154,23 @@ struct GTY(()) ipa_alignment >> unsigned misalign; >> }; >> >> +/* Information about zero/non-zero bits. */ >> +struct GTY(()) ipa_bits >> +{ >> + /* The propagated value. */ >> + widest_int value; >> + /* Mask corresponding to the value. >> + Similar to ccp_prop_t, if xth bit of mask is 0, > > Is ccp_prop_t a typo? I did not find it anywhere when I grepped for it. ah, it's ccp_lattice_t -;) Thanks, Prathamesh > > Thanks, > > Martin >
On 9 August 2016 at 16:39, Martin Jambor <mjambor@suse.cz> wrote: > Hi, > > On Tue, Aug 09, 2016 at 01:41:21PM +0530, Prathamesh Kulkarni wrote: >> On 8 August 2016 at 19:33, Martin Jambor <mjambor@suse.cz> wrote: >> >> >> +class ipcp_bits_lattice >> >> >> +{ >> >> >> +public: >> >> >> + bool bottom_p () { return lattice_val == IPA_BITS_VARYING; } >> >> >> + bool top_p () { return lattice_val == IPA_BITS_UNDEFINED; } >> >> >> + bool constant_p () { return lattice_val == IPA_BITS_CONSTANT; } >> >> >> + bool set_to_bottom (); >> >> >> + bool set_to_constant (widest_int, widest_int, signop, unsigned); >> >> >> + >> >> >> + widest_int get_value () { return value; } >> >> >> + widest_int get_mask () { return mask; } >> >> >> + signop get_sign () { return sgn; } >> >> >> + unsigned get_precision () { return precision; } >> >> >> + >> >> >> + bool meet_with (ipcp_bits_lattice& other, enum tree_code, tree); >> >> >> + bool meet_with (widest_int, widest_int, signop, unsigned); >> >> >> + >> >> >> + void print (FILE *); >> >> >> + >> >> >> +private: >> >> >> + enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } lattice_val; >> >> >> + widest_int value, mask; >> >> >> + signop sgn; >> >> >> + unsigned precision; >> > >> > I know that the existing code in ipa-cp.c does not do this, but please >> > prefix member variables with "m_" like our coding style guidelines >> > suggest (or even require?). You routinely reuse those same names in >> > names of parameters of meet_with and I believe that is a practice that >> > will sooner or later lead to confusing the two and bugs. >> Sorry about this, will change to m_ prefix in followup patch. > > Thanks a lot. > >> > >> >> >> + >> >> >> + bool meet_with_1 (widest_int, widest_int); >> >> >> + void get_value_and_mask (tree, widest_int *, widest_int *); >> >> >> +}; >> >> >> + >> >> >> /* Structure containing lattices for a parameter itself and for pieces of >> >> >> aggregates that are passed in the parameter or by a reference in a parameter >> >> >> plus some other useful flags. */ >> >> >> @@ -281,6 +316,8 @@ public: >> >> >> ipcp_agg_lattice *aggs; >> >> >> /* Lattice describing known alignment. */ >> >> >> ipcp_alignment_lattice alignment; >> >> >> + /* Lattice describing known bits. */ >> >> >> + ipcp_bits_lattice bits_lattice; >> >> >> /* Number of aggregate lattices */ >> >> >> int aggs_count; >> >> >> /* True if aggregate data were passed by reference (as opposed to by >> >> >> @@ -458,6 +495,21 @@ ipcp_alignment_lattice::print (FILE * f) >> >> >> fprintf (f, " Alignment %u, misalignment %u\n", align, misalign); >> >> >> } >> >> >> >> >> >> >> ... >> >> >> >> >> +} >> >> >> + >> >> >> +/* Meet operation, similar to ccp_lattice_meet, we xor values >> >> >> + if this->value, value have different values at same bit positions, we want >> >> >> + to drop that bit to varying. Return true if mask is changed. >> >> >> + This function assumes that the lattice value is in CONSTANT state */ >> >> >> + >> >> >> +bool >> >> >> +ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask) >> >> >> +{ >> >> >> + gcc_assert (constant_p ()); >> >> >> + >> >> >> + widest_int old_mask = this->mask; >> >> >> + this->mask = (this->mask | mask) | (this->value ^ value); >> >> >> + >> >> >> + if (wi::sext (this->mask, this->precision) == -1) >> >> >> + return set_to_bottom (); >> >> >> + >> >> >> + bool changed = this->mask != old_mask; >> >> >> + return changed; >> >> >> +} >> >> >> + >> >> >> +/* Meet the bits lattice with operand >> >> >> + described by <value, mask, sgn, precision. */ >> >> >> + >> >> >> +bool >> >> >> +ipcp_bits_lattice::meet_with (widest_int value, widest_int mask, >> >> >> + signop sgn, unsigned precision) >> >> >> +{ >> >> >> + if (bottom_p ()) >> >> >> + return false; >> >> >> + >> >> >> + if (top_p ()) >> >> >> + { >> >> >> + if (wi::sext (mask, precision) == -1) >> >> >> + return set_to_bottom (); >> >> >> + return set_to_constant (value, mask, sgn, precision); >> >> >> + } >> >> >> + >> >> >> + return meet_with_1 (value, mask); >> >> > >> >> > What if precisions do not match? >> >> Sorry I don't understand. Since we extend to widest_int, precision >> >> would be same ? >> > >> > I meant what if: >> > >> > this->precision != precision /* the parameter value */ >> > >> > (you see, giving both the same name is error-prone). You are >> > propagating the recorded precision gathered from types of the actual >> > arguments in calls, those can be different. For example, one caller >> > can pass a direct integer value with full integer precision, another >> > caller can pass in that same argument an enum value with a very >> > limited precision. Your code ignores that difference and the >> > resulting precision is the one that arrives first. If it is the enum, >> > it might be too small for the integer value from the other call-site? >> Ah indeed the patch incorrectly propagates precision of argument. >> So IIUC in ipcp_bits_lattice, we want m_precision to be the precision >> of parameter's type and _not_ of argument's type. >> >> The patch incorrectly propagates precision in following case: >> >> __attribute__((noinline)) >> static int f2(short z) >> { >> return z; >> } >> >> __attribute__((noinline)) >> static int f1(int y) >> { >> return f2 (y & 0xff); >> } >> >> __attribute__((noinline)) >> int f(int x) >> { >> return f1 (x); >> } >> >> Precision for 'z' should be 16 while the patch propagates 32, which >> is precision of type of the argument passed by the caller. > > That is true but you never use precison of z (in this example), do > you? You would only use a precision from a jump function of y in > meet_with if there was a pass-through function, am I right? Yes, the example was artificial. > >> We only set m_precision when changing from TOP to CONSTANT >> state. >> >> Instead of storing arg's precision and sign, we should store >> parameter's precision and sign in ipa_compute_jump_functions_for_edge (). >> Diff with respect to previous patch: >> >> @@ -1688,9 +1690,9 @@ ipa_compute_jump_functions_for_edge (struct >> ipa_func_body_info *fbi, >> && (TREE_CODE (arg) == SSA_NAME || TREE_CODE (arg) == INTEGER_CST)) >> { >> jfunc->bits.known = true; >> - jfunc->bits.sgn = TYPE_SIGN (TREE_TYPE (arg)); >> - jfunc->bits.precision = TYPE_PRECISION (TREE_TYPE (arg)); >> - >> + jfunc->bits.sgn = TYPE_SIGN (param_type); >> + jfunc->bits.precision = TYPE_PRECISION (param_type); >> + > > If you want to use the precision of the formal parameter then you do > not need to store it to jump functions. Parameter DECLs along with > their types are readily accessible in IPA (even with LTO). It would > also be much clearer what is going on, IMHO. Could you please point out how to access parameter decl in wpa ? The only reason I ended up putting this in jump function was because I couldn't figure out how to access param decl during WPA. I see there's ipa_get_param() in ipa-prop.h however it's gated on gcc_checking_assert (!flag_wpa), so I suppose I can't use this during WPA ? Alternatively I think I could access cs->callee->decl and get to the param decl by walking DECL_ARGUMENTS ? Getting param decl would be indeed much clearer. Storing param precision and sign in jump function is admittedly quite ugly and as you mentioned below, we could get rid of precision and signop from ipcp_bits_lattice and ipa_bits. > >> if (TREE_CODE (arg) == SSA_NAME) >> { >> jfunc->bits.value = 0; >> >> So in ipcp_bits_lattice::meet_with(value, mask, signop, precision) >> when we propagate into >> parameter's lattice for first time, we will set m_precision == >> precision of it's own type. >> rather than precision of the argument >> >> For eg, consider following test-case: >> int f(int x) >> { >> return some_operation (x); >> } >> >> int f1(short y) >> { >> return f (y & 0xf); >> } >> >> int f2(char z) >> { >> return f (z & 0xff); >> } >> >> Assume we first propagate from f2->f. >> In this case, jump_function from f2->f is unknown (but bits.known is true), >> so we call meet_with (0, 0xff, SIGNED, 32). >> The precision and sign are of param's type because we extract them >> from param_type as shown above. >> >> (I suppose the reason this is not pass-thru is because result y & 0xf >> is wrapped by convert_expr, >> which is actually passed to f(), so the parameter y isn't really >> involved in the call to f ?) > > I haven't seen the IL but most probably yes. If you want to be > experimenting with this, I beleive you need an example with types of > the same size but different precisions (C++ enums might work nicely, I > beleive). I will try to come up with a test-case. Thanks, Prathamesh > > If you managed to come up with a testcase with a pass through jump > function with an operation in which the precision actually affects the > result, that would be great. > >> >> Since lattice of x is TOP, it will change to CONSTANT, >> and m_precision will get assigned 32. >> >> Next propagate from f1->f >> jump_function from f1->f is unknown (but bits.known is true) >> so we call meet_with (0, 0xf, 32, SIGNED). >> Since lattice of x is already CONSTANT, it doesn't change m_precision anymore >> on this call or any subsequent calls. >> >> So when we propagate into callee for first time, only then do we set >> the precision. >> Does this look reasonable ? > > It does, but to summarize what I wrote above, I believe that with this > approach you can and should remove the precision field from jump > functions and lattices. > > Thanks, > > > Martin
diff --git a/gcc/common.opt b/gcc/common.opt index 8a292ed..8bac0a2 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1561,6 +1561,10 @@ fipa-cp-alignment Common Report Var(flag_ipa_cp_alignment) Optimization Perform alignment discovery and propagation to make Interprocedural constant propagation stronger. +fipa-cp-bit +Common Report Var(flag_ipa_cp_bit) Optimization +Perform interprocedural bitwise constant propagation. + fipa-profile Common Report Var(flag_ipa_profile) Init(0) Optimization Perform interprocedural profile propagation. diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 5b6cb9a..a26a1a6 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -120,6 +120,7 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "ipa-inline.h" #include "ipa-utils.h" +#include "tree-ssa-ccp.h" template <typename valtype> class ipcp_value; @@ -266,6 +267,40 @@ private: bool meet_with_1 (unsigned new_align, unsigned new_misalign); }; +/* Lattice of known bits, only capable of holding one value. + Similar to ccp_prop_value_t, mask represents which bits of value are constant. + If a bit in mask is set to 0, then the corresponding bit in + value is known to be constant. */ + +class ipcp_bits_lattice +{ +public: + bool bottom_p () { return lattice_val == IPA_BITS_VARYING; } + bool top_p () { return lattice_val == IPA_BITS_UNDEFINED; } + bool constant_p () { return lattice_val == IPA_BITS_CONSTANT; } + bool set_to_bottom (); + bool set_to_constant (widest_int, widest_int, signop, unsigned); + + widest_int get_value () { return value; } + widest_int get_mask () { return mask; } + signop get_sign () { return sgn; } + unsigned get_precision () { return precision; } + + bool meet_with (ipcp_bits_lattice& other, enum tree_code, tree); + bool meet_with (widest_int, widest_int, signop, unsigned); + + void print (FILE *); + +private: + enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } lattice_val; + widest_int value, mask; + signop sgn; + unsigned precision; + + bool meet_with_1 (widest_int, widest_int); + void get_value_and_mask (tree, widest_int *, widest_int *); +}; + /* Structure containing lattices for a parameter itself and for pieces of aggregates that are passed in the parameter or by a reference in a parameter plus some other useful flags. */ @@ -281,6 +316,8 @@ public: ipcp_agg_lattice *aggs; /* Lattice describing known alignment. */ ipcp_alignment_lattice alignment; + /* Lattice describing known bits. */ + ipcp_bits_lattice bits_lattice; /* Number of aggregate lattices */ int aggs_count; /* True if aggregate data were passed by reference (as opposed to by @@ -458,6 +495,21 @@ ipcp_alignment_lattice::print (FILE * f) fprintf (f, " Alignment %u, misalignment %u\n", align, misalign); } +void +ipcp_bits_lattice::print (FILE *f) +{ + if (top_p ()) + fprintf (f, " Bits unknown (TOP)\n"); + else if (bottom_p ()) + fprintf (f, " Bits unusable (BOTTOM)\n"); + else + { + fprintf (f, " Bits: value = "); print_hex (get_value (), f); + fprintf (f, ", mask = "); print_hex (get_mask (), f); + fprintf (f, "\n"); + } +} + /* Print all ipcp_lattices of all functions to F. */ static void @@ -484,6 +536,7 @@ print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits) fprintf (f, " ctxs: "); plats->ctxlat.print (f, dump_sources, dump_benefits); plats->alignment.print (f); + plats->bits_lattice.print (f); if (plats->virt_call) fprintf (f, " virt_call flag set\n"); @@ -911,6 +964,159 @@ ipcp_alignment_lattice::meet_with (const ipcp_alignment_lattice &other, return meet_with_1 (other.align, adjusted_misalign); } +/* Set lattice value to bottom, if it already isn't the case. */ + +bool +ipcp_bits_lattice::set_to_bottom () +{ + if (bottom_p ()) + return false; + lattice_val = IPA_BITS_VARYING; + value = 0; + mask = -1; + return true; +} + +/* Set to constant if it isn't already. Only meant to be called + when switching state from TOP. */ + +bool +ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask, + signop sgn, unsigned precision) +{ + gcc_assert (top_p ()); + this->lattice_val = IPA_BITS_CONSTANT; + this->value = value; + this->mask = mask; + this->sgn = sgn; + this->precision = precision; + return true; +} + +/* Convert operand to value, mask form. */ + +void +ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp) +{ + wide_int get_nonzero_bits (const_tree); + + if (TREE_CODE (operand) == INTEGER_CST) + { + *valuep = wi::to_widest (operand); + *maskp = 0; + } + else + { + *valuep = 0; + *maskp = -1; + } +} + +/* Meet operation, similar to ccp_lattice_meet, we xor values + if this->value, value have different values at same bit positions, we want + to drop that bit to varying. Return true if mask is changed. + This function assumes that the lattice value is in CONSTANT state */ + +bool +ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask) +{ + gcc_assert (constant_p ()); + + widest_int old_mask = this->mask; + this->mask = (this->mask | mask) | (this->value ^ value); + + if (wi::sext (this->mask, this->precision) == -1) + return set_to_bottom (); + + bool changed = this->mask != old_mask; + return changed; +} + +/* Meet the bits lattice with operand + described by <value, mask, sgn, precision. */ + +bool +ipcp_bits_lattice::meet_with (widest_int value, widest_int mask, + signop sgn, unsigned precision) +{ + if (bottom_p ()) + return false; + + if (top_p ()) + { + if (wi::sext (mask, precision) == -1) + return set_to_bottom (); + return set_to_constant (value, mask, sgn, precision); + } + + return meet_with_1 (value, mask); +} + +/* Meet bits lattice with the result of bit_value_binop (other, operand) + if code is binary operation or bit_value_unop (other) if code is unary op. + In the case when code is nop_expr, no adjustment is required. */ + +bool +ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, enum tree_code code, tree operand) +{ + if (other.bottom_p ()) + return set_to_bottom (); + + if (bottom_p () || other.top_p ()) + return false; + + widest_int adjusted_value, adjusted_mask; + + if (TREE_CODE_CLASS (code) == tcc_binary) + { + tree type = TREE_TYPE (operand); + gcc_assert (INTEGRAL_TYPE_P (type)); + widest_int o_value, o_mask; + get_value_and_mask (operand, &o_value, &o_mask); + + signop sgn = other.get_sign (); + unsigned prec = other.get_precision (); + + bit_value_binop (code, sgn, prec, &adjusted_value, &adjusted_mask, + sgn, prec, other.get_value (), other.get_mask (), + TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask); + + if (wi::sext (adjusted_mask, prec) == -1) + return set_to_bottom (); + } + + else if (TREE_CODE_CLASS (code) == tcc_unary) + { + signop sgn = other.get_sign (); + unsigned prec = other.get_precision (); + + bit_value_unop (code, sgn, prec, &adjusted_value, + &adjusted_mask, sgn, prec, other.get_value (), + other.get_mask ()); + + if (wi::sext (adjusted_mask, prec) == -1) + return set_to_bottom (); + } + + else if (code == NOP_EXPR) + { + adjusted_value = other.value; + adjusted_mask = other.mask; + } + + else + return set_to_bottom (); + + if (top_p ()) + { + if (wi::sext (adjusted_mask, other.get_precision ()) == -1) + return set_to_bottom (); + return set_to_constant (adjusted_value, adjusted_mask, other.get_sign (), other.get_precision ()); + } + else + return meet_with_1 (adjusted_value, adjusted_mask); +} + /* Mark bot aggregate and scalar lattices as containing an unknown variable, return true is any of them has not been marked as such so far. */ @@ -922,6 +1128,7 @@ set_all_contains_variable (struct ipcp_param_lattices *plats) ret |= plats->ctxlat.set_contains_variable (); ret |= set_agg_lats_contain_variable (plats); ret |= plats->alignment.set_to_bottom (); + ret |= plats->bits_lattice.set_to_bottom (); return ret; } @@ -1003,6 +1210,7 @@ initialize_node_lattices (struct cgraph_node *node) plats->ctxlat.set_to_bottom (); set_agg_lats_to_bottom (plats); plats->alignment.set_to_bottom (); + plats->bits_lattice.set_to_bottom (); } else set_all_contains_variable (plats); @@ -1621,6 +1829,57 @@ propagate_alignment_accross_jump_function (cgraph_edge *cs, } } +/* Propagate bits across jfunc that is associated with + edge cs and update dest_lattice accordingly. */ + +bool +propagate_bits_accross_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc, + ipcp_bits_lattice *dest_lattice) +{ + if (dest_lattice->bottom_p ()) + return false; + + if (jfunc->type == IPA_JF_PASS_THROUGH) + { + struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + enum tree_code code = ipa_get_jf_pass_through_operation (jfunc); + tree operand = NULL_TREE; + + if (code != NOP_EXPR) + operand = ipa_get_jf_pass_through_operand (jfunc); + + int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); + struct ipcp_param_lattices *src_lats + = ipa_get_parm_lattices (caller_info, src_idx); + + /* Try to propagate bits if src_lattice is bottom, but jfunc is known. + for eg consider: + int f(int x) + { + g (x & 0xff); + } + Assume lattice for x is bottom, however we can still propagate + result of x & 0xff == 0xff, which gets computed during ccp1 pass + and we store it in jump function during analysis stage. */ + + if (src_lats->bits_lattice.bottom_p () + && jfunc->bits.known) + return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask, + jfunc->bits.sgn, jfunc->bits.precision); + else + return dest_lattice->meet_with (src_lats->bits_lattice, code, operand); + } + + else if (jfunc->type == IPA_JF_ANCESTOR) + return dest_lattice->set_to_bottom (); + + else if (jfunc->bits.known) + return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask, + jfunc->bits.sgn, jfunc->bits.precision); + else + return dest_lattice->set_to_bottom (); +} + /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all other cases, return false). If there are no aggregate items, set @@ -1968,6 +2227,8 @@ propagate_constants_accross_call (struct cgraph_edge *cs) &dest_plats->ctxlat); ret |= propagate_alignment_accross_jump_function (cs, jump_func, &dest_plats->alignment); + ret |= propagate_bits_accross_jump_function (cs, jump_func, + &dest_plats->bits_lattice); ret |= propagate_aggs_accross_jump_function (cs, jump_func, dest_plats); } @@ -4592,6 +4853,83 @@ ipcp_store_alignment_results (void) } } +/* Look up all the bits information that we have discovered and copy it over + to the transformation summary. */ + +static void +ipcp_store_bits_results (void) +{ + cgraph_node *node; + + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) + { + ipa_node_params *info = IPA_NODE_REF (node); + bool dumped_sth = false; + bool found_useful_result = false; + + if (!opt_for_fn (node->decl, flag_ipa_cp_bit)) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for ipa bitwise propagation " + "; -fipa-cp-bit: disabled.\n", + node->name ()); + continue; + } + + if (info->ipcp_orig_node) + info = IPA_NODE_REF (info->ipcp_orig_node); + + unsigned count = ipa_get_param_count (info); + for (unsigned i = 0; i < count; i++) + { + ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + if (plats->bits_lattice.constant_p ()) + { + found_useful_result = true; + break; + } + } + + if (!found_useful_result) + continue; + + ipcp_grow_transformations_if_necessary (); + ipcp_transformation_summary *ts = ipcp_get_transformation_summary (node); + vec_safe_reserve_exact (ts->bits, count); + + for (unsigned i = 0; i < count; i++) + { + ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + ipa_bits bits_jfunc; + + if (plats->bits_lattice.constant_p ()) + { + bits_jfunc.known = true; + bits_jfunc.value = plats->bits_lattice.get_value (); + bits_jfunc.mask = plats->bits_lattice.get_mask (); + bits_jfunc.sgn = plats->bits_lattice.get_sign (); + bits_jfunc.precision = plats->bits_lattice.get_precision (); + } + else + bits_jfunc.known = false; + + ts->bits->quick_push (bits_jfunc); + if (!dump_file || !bits_jfunc.known) + continue; + if (!dumped_sth) + { + fprintf (dump_file, "Propagated bits info for function %s/%i:\n", + node->name (), node->order); + dumped_sth = true; + } + fprintf (dump_file, " param %i: value = ", i); + print_hex (bits_jfunc.value, dump_file); + fprintf (dump_file, ", mask = "); + print_hex (bits_jfunc.mask, dump_file); + fprintf (dump_file, "\n"); + } + } +} /* The IPCP driver. */ static unsigned int @@ -4625,6 +4963,8 @@ ipcp_driver (void) ipcp_decision_stage (&topo); /* Store results of alignment propagation. */ ipcp_store_alignment_results (); + /* Store results of bits propagation. */ + ipcp_store_bits_results (); /* Free all IPCP structures. */ free_toporder_info (&topo); diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index 132b622..46955bb 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -302,6 +302,15 @@ ipa_print_node_jump_functions_for_edge (FILE *f, struct cgraph_edge *cs) } else fprintf (f, " Unknown alignment\n"); + + if (jump_func->bits.known) + { + fprintf (f, " value: "); print_hex (jump_func->bits.value, f); + fprintf (f, ", mask: "); print_hex (jump_func->bits.mask, f); + fprintf (f, "\n"); + } + else + fprintf (f, " Unknown bits\n"); } } @@ -381,6 +390,7 @@ ipa_set_jf_unknown (struct ipa_jump_func *jfunc) { jfunc->type = IPA_JF_UNKNOWN; jfunc->alignment.known = false; + jfunc->bits.known = false; } /* Set JFUNC to be a copy of another jmp (to be used by jump function @@ -1674,6 +1684,27 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi, else gcc_assert (!jfunc->alignment.known); + if (INTEGRAL_TYPE_P (TREE_TYPE (arg)) + && (TREE_CODE (arg) == SSA_NAME || TREE_CODE (arg) == INTEGER_CST)) + { + jfunc->bits.known = true; + jfunc->bits.sgn = TYPE_SIGN (TREE_TYPE (arg)); + jfunc->bits.precision = TYPE_PRECISION (TREE_TYPE (arg)); + + if (TREE_CODE (arg) == SSA_NAME) + { + jfunc->bits.value = 0; + jfunc->bits.mask = widest_int::from (get_nonzero_bits (arg), UNSIGNED); + } + else + { + jfunc->bits.value = wi::to_widest (arg); + jfunc->bits.mask = 0; + } + } + else + gcc_assert (!jfunc->bits.known); + if (is_gimple_ip_invariant (arg) || (TREE_CODE (arg) == VAR_DECL && is_global_var (arg) @@ -3690,6 +3721,18 @@ ipa_node_params_t::duplicate(cgraph_node *src, cgraph_node *dst, for (unsigned i = 0; i < src_alignments->length (); ++i) dst_alignments->quick_push ((*src_alignments)[i]); } + + if (src_trans && vec_safe_length (src_trans->bits) > 0) + { + ipcp_grow_transformations_if_necessary (); + src_trans = ipcp_get_transformation_summary (src); + const vec<ipa_bits, va_gc> *src_bits = src_trans->bits; + vec<ipa_bits, va_gc> *&dst_bits + = ipcp_get_transformation_summary (dst)->bits; + vec_safe_reserve_exact (dst_bits, src_bits->length ()); + for (unsigned i = 0; i < src_bits->length (); ++i) + dst_bits->quick_push ((*src_bits)[i]); + } } /* Register our cgraph hooks if they are not already there. */ @@ -4609,6 +4652,17 @@ ipa_write_jump_function (struct output_block *ob, streamer_write_uhwi (ob, jump_func->alignment.align); streamer_write_uhwi (ob, jump_func->alignment.misalign); } + + bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, jump_func->bits.known, 1); + streamer_write_bitpack (&bp); + if (jump_func->bits.known) + { + streamer_write_widest_int (ob, jump_func->bits.value); + streamer_write_widest_int (ob, jump_func->bits.mask); + streamer_write_enum (ob->main_stream, signop, UNSIGNED + 1, jump_func->bits.sgn); + streamer_write_uhwi (ob, jump_func->bits.precision); + } } /* Read in jump function JUMP_FUNC from IB. */ @@ -4685,6 +4739,19 @@ ipa_read_jump_function (struct lto_input_block *ib, } else jump_func->alignment.known = false; + + bp = streamer_read_bitpack (ib); + bool bits_known = bp_unpack_value (&bp, 1); + if (bits_known) + { + jump_func->bits.known = true; + jump_func->bits.value = streamer_read_widest_int (ib); + jump_func->bits.mask = streamer_read_widest_int (ib); + jump_func->bits.sgn = streamer_read_enum (ib, signop, UNSIGNED + 1); + jump_func->bits.precision = streamer_read_uhwi (ib); + } + else + jump_func->bits.known = false; } /* Stream out parts of cgraph_indirect_call_info corresponding to CS that are @@ -5050,6 +5117,31 @@ write_ipcp_transformation_info (output_block *ob, cgraph_node *node) } else streamer_write_uhwi (ob, 0); + + ts = ipcp_get_transformation_summary (node); + if (ts && vec_safe_length (ts->bits) > 0) + { + count = ts->bits->length (); + streamer_write_uhwi (ob, count); + + for (unsigned i = 0; i < count; ++i) + { + const ipa_bits& bits_jfunc = (*ts->bits)[i]; + struct bitpack_d bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, bits_jfunc.known, 1); + streamer_write_bitpack (&bp); + if (bits_jfunc.known) + { + streamer_write_widest_int (ob, bits_jfunc.value); + streamer_write_widest_int (ob, bits_jfunc.mask); + streamer_write_enum (ob->main_stream, signop, + UNSIGNED + 1, bits_jfunc.sgn); + streamer_write_uhwi (ob, bits_jfunc.precision); + } + } + } + else + streamer_write_uhwi (ob, 0); } /* Stream in the aggregate value replacement chain for NODE from IB. */ @@ -5102,6 +5194,28 @@ read_ipcp_transformation_info (lto_input_block *ib, cgraph_node *node, } } } + + count = streamer_read_uhwi (ib); + if (count > 0) + { + ipcp_grow_transformations_if_necessary (); + ipcp_transformation_summary *ts = ipcp_get_transformation_summary (node); + vec_safe_grow_cleared (ts->bits, count); + + for (i = 0; i < count; i++) + { + ipa_bits& bits_jfunc = (*ts->bits)[i]; + struct bitpack_d bp = streamer_read_bitpack (ib); + bits_jfunc.known = bp_unpack_value (&bp, 1); + if (bits_jfunc.known) + { + bits_jfunc.value = streamer_read_widest_int (ib); + bits_jfunc.mask = streamer_read_widest_int (ib); + bits_jfunc.sgn = streamer_read_enum (ib, signop, UNSIGNED + 1); + bits_jfunc.precision = streamer_read_uhwi (ib); + } + } + } } /* Write all aggregate replacement for nodes in set. */ @@ -5404,6 +5518,54 @@ ipcp_update_alignments (struct cgraph_node *node) } } +/* Update bits info of formal parameters as described in + ipcp_transformation_summary. */ + +static void +ipcp_update_bits (struct cgraph_node *node) +{ + tree parm = DECL_ARGUMENTS (node->decl); + tree next_parm = parm; + ipcp_transformation_summary *ts = ipcp_get_transformation_summary (node); + + if (!ts || vec_safe_length (ts->bits) == 0) + return; + + vec<ipa_bits, va_gc> &bits = *ts->bits; + unsigned count = bits.length (); + + for (unsigned i = 0; i < count; ++i, parm = next_parm) + { + if (node->clone.combined_args_to_skip + && bitmap_bit_p (node->clone.combined_args_to_skip, i)) + continue; + + gcc_checking_assert (parm); + next_parm = DECL_CHAIN (parm); + + if (!bits[i].known + || !INTEGRAL_TYPE_P (TREE_TYPE (parm)) + || !is_gimple_reg (parm)) + continue; + + tree ddef = ssa_default_def (DECL_STRUCT_FUNCTION (node->decl), parm); + if (!ddef) + continue; + + if (dump_file) + { + fprintf (dump_file, "Adjusting mask for param %u to ", i); + print_hex (bits[i].mask, dump_file); + fprintf (dump_file, "\n"); + } + + unsigned prec = TYPE_PRECISION (TREE_TYPE (ddef)); + wide_int nonzero_bits = wide_int::from (bits[i].mask, prec, UNSIGNED) + | wide_int::from (bits[i].value, prec, bits[i].sgn); + set_nonzero_bits (ddef, nonzero_bits); + } +} + /* IPCP transformation phase doing propagation of aggregate values. */ unsigned int @@ -5423,6 +5585,7 @@ ipcp_transform_function (struct cgraph_node *node) node->name (), node->order); ipcp_update_alignments (node); + ipcp_update_bits (node); aggval = ipa_get_agg_replacements_for_node (node); if (!aggval) return 0; diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index e32d078..1b9d0ef 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -154,6 +154,23 @@ struct GTY(()) ipa_alignment unsigned misalign; }; +/* Information about zero/non-zero bits. */ +struct GTY(()) ipa_bits +{ + /* The propagated value. */ + widest_int value; + /* Mask corresponding to the value. + Similar to ccp_prop_t, if xth bit of mask is 0, + implies xth bit of value is constant. */ + widest_int mask; + /* Original precision of the value. */ + unsigned precision; + /* Sign obtained from TYPE_SIGN. */ + enum signop sgn; + /* True if jump function is known. */ + bool known; +}; + /* A jump function for a callsite represents the values passed as actual arguments of the callsite. See enum jump_func_type for the various types of jump functions supported. */ @@ -166,6 +183,9 @@ struct GTY (()) ipa_jump_func /* Information about alignment of pointers. */ struct ipa_alignment alignment; + /* Information about zero/non-zero bits. */ + struct ipa_bits bits; + enum jump_func_type type; /* Represents a value of a jump function. pass_through is used only in jump function context. constant represents the actual constant in constant jump @@ -482,6 +502,8 @@ struct GTY(()) ipcp_transformation_summary ipa_agg_replacement_value *agg_values; /* Alignment information for pointers. */ vec<ipa_alignment, va_gc> *alignments; + /* Known bits information. */ + vec<ipa_bits, va_gc> *bits; }; void ipa_set_node_agg_value_chain (struct cgraph_node *node, diff --git a/gcc/opts.c b/gcc/opts.c index 4053fb1..cde9a7b 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -505,6 +505,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_2_PLUS, OPT_ftree_switch_conversion, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fipa_cp, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fipa_cp_alignment, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_fipa_cp_bit, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fipa_sra, NULL, 1 }, @@ -1422,6 +1423,9 @@ enable_fdo_optimizations (struct gcc_options *opts, if (!opts_set->x_flag_ipa_cp_alignment && value && opts->x_flag_ipa_cp) opts->x_flag_ipa_cp_alignment = value; + if (!opts_set->x_flag_ipa_cp_bit + && value && opts->x_flag_ipa_cp) + opts->x_flag_ipa_cp_bit = value; if (!opts_set->x_flag_predictive_commoning) opts->x_flag_predictive_commoning = value; if (!opts_set->x_flag_unswitch_loops)