===================================================================
@@ -46,3 +46,7 @@ INT_MODE (XI, 64);
/* Quad float: 128-bit floating mode for long doubles. */
FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Coefficient 1 is multiplied by the number of 128-bit chunks in an
+ SVE vector (referred to as "VQ") minus one. */
+#define NUM_POLY_INT_COEFFS 2
===================================================================
@@ -333,7 +333,7 @@ enum simd_immediate_check {
extern struct tune_params aarch64_tune_params;
-HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
+poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
int aarch64_get_condition_code (rtx);
bool aarch64_address_valid_for_prefetch_p (rtx, bool);
bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
@@ -366,7 +366,7 @@ bool aarch64_zero_extend_const_eq (machi
bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
bool aarch64_mov_operand_p (rtx, machine_mode);
rtx aarch64_reverse_mask (machine_mode, unsigned int);
-bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
+bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
char *aarch64_output_simd_mov_immediate (rtx, unsigned,
enum simd_immediate_check w = AARCH64_CHECK_MOV);
===================================================================
@@ -554,7 +554,7 @@ #define LIBCALL_VALUE(MODE) \
#define DEFAULT_PCC_STRUCT_RETURN 0
-#ifdef HOST_WIDE_INT
+#ifdef HAVE_POLY_INT_H
struct GTY (()) aarch64_frame
{
HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER];
@@ -572,20 +572,20 @@ struct GTY (()) aarch64_frame
/* Offset from the base of the frame (incomming SP) to the
top of the locals area. This value is always a multiple of
STACK_BOUNDARY. */
- HOST_WIDE_INT locals_offset;
+ poly_int64 locals_offset;
/* Offset from the base of the frame (incomming SP) to the
hard_frame_pointer. This value is always a multiple of
STACK_BOUNDARY. */
- HOST_WIDE_INT hard_fp_offset;
+ poly_int64 hard_fp_offset;
/* The size of the frame. This value is the offset from base of the
- * frame (incomming SP) to the stack_pointer. This value is always
- * a multiple of STACK_BOUNDARY. */
- HOST_WIDE_INT frame_size;
+ frame (incomming SP) to the stack_pointer. This value is always
+ a multiple of STACK_BOUNDARY. */
+ poly_int64 frame_size;
/* The size of the initial stack adjustment before saving callee-saves. */
- HOST_WIDE_INT initial_adjust;
+ poly_int64 initial_adjust;
/* The writeback value when pushing callee-save registers.
It is zero when no push is used. */
@@ -593,10 +593,10 @@ struct GTY (()) aarch64_frame
/* The offset from SP to the callee-save registers after initial_adjust.
It may be non-zero if no push is used (ie. callee_adjust == 0). */
- HOST_WIDE_INT callee_offset;
+ poly_int64 callee_offset;
/* The size of the stack adjustment after saving callee-saves. */
- HOST_WIDE_INT final_adjust;
+ poly_int64 final_adjust;
/* Store FP,LR and setup a frame pointer. */
bool emit_frame_chain;
===================================================================
@@ -1065,9 +1065,9 @@ aarch64_simd_expand_args (rtx target, in
gcc_assert (opc > 1);
if (CONST_INT_P (op[opc]))
{
- aarch64_simd_lane_bounds (op[opc], 0,
- GET_MODE_NUNITS (builtin_mode),
- exp);
+ unsigned int nunits
+ = GET_MODE_NUNITS (builtin_mode).to_constant ();
+ aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
/* Keep to GCC-vector-extension lane indices in the RTL. */
op[opc] = aarch64_endian_lane_rtx (builtin_mode,
INTVAL (op[opc]));
@@ -1080,8 +1080,9 @@ aarch64_simd_expand_args (rtx target, in
if (CONST_INT_P (op[opc]))
{
machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
- aarch64_simd_lane_bounds (op[opc],
- 0, GET_MODE_NUNITS (vmode), exp);
+ unsigned int nunits
+ = GET_MODE_NUNITS (vmode).to_constant ();
+ aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
/* Keep to GCC-vector-extension lane indices in the RTL. */
op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
}
@@ -1400,16 +1401,17 @@ aarch64_builtin_vectorized_function (uns
tree type_in)
{
machine_mode in_mode, out_mode;
- int in_n, out_n;
+ unsigned HOST_WIDE_INT in_n, out_n;
if (TREE_CODE (type_out) != VECTOR_TYPE
|| TREE_CODE (type_in) != VECTOR_TYPE)
return NULL_TREE;
out_mode = TYPE_MODE (TREE_TYPE (type_out));
- out_n = TYPE_VECTOR_SUBPARTS (type_out);
in_mode = TYPE_MODE (TREE_TYPE (type_in));
- in_n = TYPE_VECTOR_SUBPARTS (type_in);
+ if (!TYPE_VECTOR_SUBPARTS (type_out).is_constant (&out_n)
+ || !TYPE_VECTOR_SUBPARTS (type_in).is_constant (&in_n))
+ return NULL_TREE;
#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
===================================================================
@@ -31,9 +31,9 @@ (define_expand "mov<mode>"
normal str, so the check need not apply. */
if (GET_CODE (operands[0]) == MEM
&& !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
- && ((GET_MODE_SIZE (<MODE>mode) == 16
+ && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
&& aarch64_mem_pair_operand (operands[0], DImode))
- || GET_MODE_SIZE (<MODE>mode) == 8)))
+ || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
operands[1] = force_reg (<MODE>mode, operands[1]);
"
)
@@ -5334,9 +5334,7 @@ (define_expand "aarch64_ld<VSTRUCT:nregs
set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
* <VSTRUCT:nregs>);
- aarch64_simd_lane_bounds (operands[3], 0,
- GET_MODE_NUNITS (<VALLDIF:MODE>mode),
- NULL);
+ aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
operands[0], mem, operands[2], operands[3]));
DONE;
===================================================================
@@ -1139,13 +1139,18 @@ aarch64_array_mode_supported_p (machine_
static unsigned int
aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
{
+ /* ??? Logically we should only need to provide a value when
+ HARD_REGNO_MODE_OK says that the combination is valid,
+ but at the moment we need to handle all modes. Just ignore
+ any runtime parts for registers that can't store them. */
+ HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
switch (aarch64_regno_regclass (regno))
{
case FP_REGS:
case FP_LO_REGS:
- return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
+ return CEIL (lowest_size, UNITS_PER_VREG);
default:
- return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ return CEIL (lowest_size, UNITS_PER_WORD);
}
gcc_unreachable ();
}
@@ -1188,25 +1193,17 @@ aarch64_hard_regno_mode_ok (unsigned reg
static bool
aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
{
- return FP_REGNUM_P (regno) && GET_MODE_SIZE (mode) > 8;
+ return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
}
/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
machine_mode
-aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
- machine_mode mode)
+aarch64_hard_regno_caller_save_mode (unsigned, unsigned, machine_mode mode)
{
- /* Handle modes that fit within single registers. */
- if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
- {
- if (GET_MODE_SIZE (mode) >= 4)
- return mode;
- else
- return SImode;
- }
- /* Fall back to generic for multi-reg and very large modes. */
+ if (known_ge (GET_MODE_SIZE (mode), 4))
+ return mode;
else
- return choose_hard_reg_mode (regno, nregs, false);
+ return SImode;
}
/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
@@ -1319,11 +1316,10 @@ aarch64_tls_get_addr (void)
tls_symbolic_operand_type (rtx addr)
{
enum tls_model tls_kind = TLS_MODEL_NONE;
- rtx sym, addend;
-
if (GET_CODE (addr) == CONST)
{
- split_const (addr, &sym, &addend);
+ poly_int64 addend;
+ rtx sym = strip_offset (addr, &addend);
if (GET_CODE (sym) == SYMBOL_REF)
tls_kind = SYMBOL_REF_TLS_MODEL (sym);
}
@@ -2262,8 +2258,12 @@ aarch64_pass_by_reference (cumulative_ar
int nregs;
/* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
- size = (mode == BLKmode && type)
- ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ if (mode == BLKmode && type)
+ size = int_size_in_bytes (type);
+ else
+ /* No frontends can create types with variable-sized modes, so we
+ shouldn't be asked to pass or return them. */
+ size = GET_MODE_SIZE (mode).to_constant ();
/* Aggregates are passed by reference based on their size. */
if (type && AGGREGATE_TYPE_P (type))
@@ -2360,8 +2360,8 @@ aarch64_function_value (const_tree type,
for (i = 0; i < count; i++)
{
rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
- tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
- GEN_INT (i * GET_MODE_SIZE (ag_mode)));
+ rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
XVECEXP (par, 0, i) = tmp;
}
return par;
@@ -2488,9 +2488,13 @@ aarch64_layout_arg (cumulative_args_t pc
pcum->aapcs_arg_processed = true;
/* Size in bytes, rounded to the nearest multiple of 8 bytes. */
- size
- = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
- UNITS_PER_WORD);
+ if (type)
+ size = int_size_in_bytes (type);
+ else
+ /* No frontends can create types with variable-sized modes, so we
+ shouldn't be asked to pass or return them. */
+ size = GET_MODE_SIZE (mode).to_constant ();
+ size = ROUND_UP (size, UNITS_PER_WORD);
allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
@@ -2527,9 +2531,9 @@ aarch64_layout_arg (cumulative_args_t pc
{
rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
V0_REGNUM + nvrn + i);
- tmp = gen_rtx_EXPR_LIST
- (VOIDmode, tmp,
- GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
+ rtx offset = gen_int_mode
+ (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
XVECEXP (par, 0, i) = tmp;
}
pcum->aapcs_reg = par;
@@ -2754,8 +2758,13 @@ aarch64_pad_reg_upward (machine_mode mod
/* Small composite types are always padded upward. */
if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
{
- HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
- : GET_MODE_SIZE (mode));
+ HOST_WIDE_INT size;
+ if (type)
+ size = int_size_in_bytes (type);
+ else
+ /* No frontends can create types with variable-sized modes, so we
+ shouldn't be asked to pass or return them. */
+ size = GET_MODE_SIZE (mode).to_constant ();
if (size < 2 * UNITS_PER_WORD)
return true;
}
@@ -2784,12 +2793,19 @@ #define ARITH_FACTOR 4096
#define PROBE_STACK_FIRST_REG 9
#define PROBE_STACK_SECOND_REG 10
-/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
inclusive. These are offsets from the current stack pointer. */
static void
-aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
{
+ HOST_WIDE_INT size;
+ if (!poly_size.is_constant (&size))
+ {
+ sorry ("stack probes for SVE frames");
+ return;
+ }
+
rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
/* See the same assertion on PROBE_INTERVAL above. */
@@ -3067,13 +3083,16 @@ #define SLOT_REQUIRED (-1)
= offset + cfun->machine->frame.saved_varargs_size;
cfun->machine->frame.hard_fp_offset
- = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (),
- STACK_BOUNDARY / BITS_PER_UNIT);
+ = aligned_upper_bound (varargs_and_saved_regs_size
+ + get_frame_size (),
+ STACK_BOUNDARY / BITS_PER_UNIT);
+ /* Both these values are already aligned. */
+ gcc_assert (multiple_p (crtl->outgoing_args_size,
+ STACK_BOUNDARY / BITS_PER_UNIT));
cfun->machine->frame.frame_size
- = ROUND_UP (cfun->machine->frame.hard_fp_offset
- + crtl->outgoing_args_size,
- STACK_BOUNDARY / BITS_PER_UNIT);
+ = (cfun->machine->frame.hard_fp_offset
+ + crtl->outgoing_args_size);
cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
@@ -3088,18 +3107,21 @@ #define SLOT_REQUIRED (-1)
else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
max_push_offset = 256;
- if (cfun->machine->frame.frame_size < max_push_offset
- && crtl->outgoing_args_size == 0)
+ HOST_WIDE_INT const_size, const_fp_offset;
+ if (cfun->machine->frame.frame_size.is_constant (&const_size)
+ && const_size < max_push_offset
+ && known_eq (crtl->outgoing_args_size, 0))
{
/* Simple, small frame with no outgoing arguments:
stp reg1, reg2, [sp, -frame_size]!
stp reg3, reg4, [sp, 16] */
- cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size;
+ cfun->machine->frame.callee_adjust = const_size;
}
- else if ((crtl->outgoing_args_size
- + cfun->machine->frame.saved_regs_size < 512)
+ else if (known_lt (crtl->outgoing_args_size
+ + cfun->machine->frame.saved_regs_size, 512)
&& !(cfun->calls_alloca
- && cfun->machine->frame.hard_fp_offset < max_push_offset))
+ && known_lt (cfun->machine->frame.hard_fp_offset,
+ max_push_offset)))
{
/* Frame with small outgoing arguments:
sub sp, sp, frame_size
@@ -3109,13 +3131,14 @@ #define SLOT_REQUIRED (-1)
cfun->machine->frame.callee_offset
= cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
}
- else if (cfun->machine->frame.hard_fp_offset < max_push_offset)
+ else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
+ && const_fp_offset < max_push_offset)
{
/* Frame with large outgoing arguments but a small local area:
stp reg1, reg2, [sp, -hard_fp_offset]!
stp reg3, reg4, [sp, 16]
sub sp, sp, outgoing_args_size */
- cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset;
+ cfun->machine->frame.callee_adjust = const_fp_offset;
cfun->machine->frame.final_adjust
= cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
}
@@ -3328,7 +3351,7 @@ aarch64_return_address_signing_enabled (
skipping any write-back candidates if SKIP_WB is true. */
static void
-aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
+aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
unsigned start, unsigned limit, bool skip_wb)
{
rtx_insn *insn;
@@ -3340,7 +3363,7 @@ aarch64_save_callee_saves (machine_mode
regno = aarch64_next_callee_save (regno + 1, limit))
{
rtx reg, mem;
- HOST_WIDE_INT offset;
+ poly_int64 offset;
if (skip_wb
&& (regno == cfun->machine->frame.wb_candidate1
@@ -3393,13 +3416,13 @@ aarch64_save_callee_saves (machine_mode
static void
aarch64_restore_callee_saves (machine_mode mode,
- HOST_WIDE_INT start_offset, unsigned start,
+ poly_int64 start_offset, unsigned start,
unsigned limit, bool skip_wb, rtx *cfi_ops)
{
rtx base_rtx = stack_pointer_rtx;
unsigned regno;
unsigned regno2;
- HOST_WIDE_INT offset;
+ poly_int64 offset;
for (regno = aarch64_next_callee_save (start, limit);
regno <= limit;
@@ -3444,25 +3467,27 @@ aarch64_restore_callee_saves (machine_mo
static inline bool
offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
- HOST_WIDE_INT offset)
+ poly_int64 offset)
{
- return offset >= -256 && offset < 256;
+ HOST_WIDE_INT const_offset;
+ return (offset.is_constant (&const_offset)
+ && IN_RANGE (const_offset, -256, 255));
}
static inline bool
-offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
+offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
{
- return (offset >= 0
- && offset < 4096 * GET_MODE_SIZE (mode)
- && offset % GET_MODE_SIZE (mode) == 0);
+ HOST_WIDE_INT multiple;
+ return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
+ && IN_RANGE (multiple, 0, 4095));
}
bool
-aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
+aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
{
- return (offset >= -64 * GET_MODE_SIZE (mode)
- && offset < 64 * GET_MODE_SIZE (mode)
- && offset % GET_MODE_SIZE (mode) == 0);
+ HOST_WIDE_INT multiple;
+ return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
+ && IN_RANGE (multiple, -64, 63));
}
/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
@@ -3479,7 +3504,7 @@ aarch64_get_separate_components (void)
for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
if (aarch64_register_saved_on_entry (regno))
{
- HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
+ poly_int64 offset = cfun->machine->frame.reg_offset[regno];
if (!frame_pointer_needed)
offset += cfun->machine->frame.frame_size
- cfun->machine->frame.hard_fp_offset;
@@ -3583,7 +3608,7 @@ aarch64_process_components (sbitmap comp
so DFmode for the vector registers is enough. */
machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
rtx reg = gen_rtx_REG (mode, regno);
- HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
+ poly_int64 offset = cfun->machine->frame.reg_offset[regno];
if (!frame_pointer_needed)
offset += cfun->machine->frame.frame_size
- cfun->machine->frame.hard_fp_offset;
@@ -3605,13 +3630,13 @@ aarch64_process_components (sbitmap comp
break;
}
- HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2];
+ poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
/* The next register is not of the same class or its offset is not
mergeable with the current one into a pair. */
if (!satisfies_constraint_Ump (mem)
|| GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
- || (offset2 - cfun->machine->frame.reg_offset[regno])
- != GET_MODE_SIZE (mode))
+ || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
+ GET_MODE_SIZE (mode)))
{
insn = emit_insn (set);
RTX_FRAME_RELATED_P (insn) = 1;
@@ -3681,11 +3706,19 @@ aarch64_set_handled_components (sbitmap
cfun->machine->reg_is_wrapped_separately[regno] = true;
}
-/* Allocate SIZE bytes of stack space using TEMP1 as a scratch register. */
+/* Allocate POLY_SIZE bytes of stack space using TEMP1 as a scratch
+ register. */
static void
-aarch64_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size)
+aarch64_allocate_and_probe_stack_space (rtx temp1, poly_int64 poly_size)
{
+ HOST_WIDE_INT size;
+ if (!poly_size.is_constant (&size))
+ {
+ sorry ("stack probes for SVE frames");
+ return;
+ }
+
HOST_WIDE_INT probe_interval
= 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
HOST_WIDE_INT guard_size
@@ -3845,11 +3878,11 @@ aarch64_expand_prologue (void)
{
aarch64_layout_frame ();
- HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
- HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
+ poly_int64 frame_size = cfun->machine->frame.frame_size;
+ poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
- HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
- HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
+ poly_int64 final_adjust = cfun->machine->frame.final_adjust;
+ poly_int64 callee_offset = cfun->machine->frame.callee_offset;
unsigned reg1 = cfun->machine->frame.wb_candidate1;
unsigned reg2 = cfun->machine->frame.wb_candidate2;
bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
@@ -3864,19 +3897,19 @@ aarch64_expand_prologue (void)
}
if (flag_stack_usage_info)
- current_function_static_stack_size = frame_size;
+ current_function_static_stack_size = constant_lower_bound (frame_size);
if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
{
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (frame_size > PROBE_INTERVAL
- && frame_size > get_stack_check_protect ())
+ if (maybe_gt (frame_size, PROBE_INTERVAL)
+ && maybe_gt (frame_size, get_stack_check_protect ()))
aarch64_emit_probe_stack_range (get_stack_check_protect (),
(frame_size
- get_stack_check_protect ()));
}
- else if (frame_size > 0)
+ else if (maybe_gt (frame_size, 0))
aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
}
@@ -3911,23 +3944,23 @@ aarch64_expand_prologue (void)
HOST_WIDE_INT guard_used_by_caller = 1024;
if (flag_stack_clash_protection)
{
- if (frame_size == 0)
+ if (known_eq (frame_size, 0))
dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
- else if (initial_adjust < guard_size - guard_used_by_caller
- && final_adjust < guard_size - guard_used_by_caller)
+ else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
+ && known_lt (final_adjust, guard_size - guard_used_by_caller))
dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
}
/* In theory we should never have both an initial adjustment
and a callee save adjustment. Verify that is the case since the
code below does not handle it for -fstack-clash-protection. */
- gcc_assert (initial_adjust == 0 || callee_adjust == 0);
+ gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
/* Only probe if the initial adjustment is larger than the guard
less the amount of the guard reserved for use by the caller's
outgoing args. */
if (flag_stack_clash_protection
- && initial_adjust >= guard_size - guard_used_by_caller)
+ && maybe_ge (initial_adjust, guard_size - guard_used_by_caller))
aarch64_allocate_and_probe_stack_space (ip0_rtx, initial_adjust);
else
aarch64_sub_sp (ip0_rtx, initial_adjust, true);
@@ -3952,19 +3985,19 @@ aarch64_expand_prologue (void)
callee_adjust != 0 || emit_frame_chain);
/* We may need to probe the final adjustment as well. */
- if (flag_stack_clash_protection && final_adjust != 0)
+ if (flag_stack_clash_protection && maybe_ne (final_adjust, 0))
{
/* First probe if the final adjustment is larger than the guard size
less the amount of the guard reserved for use by the caller's
outgoing args. */
- if (final_adjust >= guard_size - guard_used_by_caller)
+ if (maybe_ge (final_adjust, guard_size - guard_used_by_caller))
aarch64_allocate_and_probe_stack_space (ip1_rtx, final_adjust);
else
aarch64_sub_sp (ip1_rtx, final_adjust, !frame_pointer_needed);
/* We must also probe if the final adjustment is larger than the guard
that is assumed used by the caller. This may be sub-optimal. */
- if (final_adjust >= guard_used_by_caller)
+ if (maybe_ge (final_adjust, guard_used_by_caller))
{
if (dump_file)
fprintf (dump_file,
@@ -3993,7 +4026,7 @@ aarch64_use_return_insn_p (void)
aarch64_layout_frame ();
- return cfun->machine->frame.frame_size == 0;
+ return known_eq (cfun->machine->frame.frame_size, 0);
}
/* Generate the epilogue instructions for returning from a function.
@@ -4006,21 +4039,23 @@ aarch64_expand_epilogue (bool for_sibcal
{
aarch64_layout_frame ();
- HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
+ poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
- HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
- HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
+ poly_int64 final_adjust = cfun->machine->frame.final_adjust;
+ poly_int64 callee_offset = cfun->machine->frame.callee_offset;
unsigned reg1 = cfun->machine->frame.wb_candidate1;
unsigned reg2 = cfun->machine->frame.wb_candidate2;
rtx cfi_ops = NULL;
rtx_insn *insn;
/* We need to add memory barrier to prevent read from deallocated stack. */
- bool need_barrier_p = (get_frame_size ()
- + cfun->machine->frame.saved_varargs_size) != 0;
+ bool need_barrier_p
+ = maybe_ne (get_frame_size ()
+ + cfun->machine->frame.saved_varargs_size, 0);
/* Emit a barrier to prevent loads from a deallocated stack. */
- if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+ if (maybe_gt (final_adjust, crtl->outgoing_args_size)
+ || cfun->calls_alloca
|| crtl->calls_eh_return)
{
emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
@@ -4031,7 +4066,8 @@ aarch64_expand_epilogue (bool for_sibcal
be the same as the stack pointer. */
rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
- if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
+ if (frame_pointer_needed
+ && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
/* If writeback is used when restoring callee-saves, the CFA
is restored on the instruction doing the writeback. */
aarch64_add_offset (Pmode, stack_pointer_rtx,
@@ -4055,7 +4091,7 @@ aarch64_expand_epilogue (bool for_sibcal
if (callee_adjust != 0)
aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
- if (callee_adjust != 0 || initial_adjust > 65536)
+ if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
{
/* Emit delayed restores and set the CFA to be SP + initial_adjust. */
insn = get_last_insn ();
@@ -4656,9 +4692,9 @@ aarch64_classify_index (struct aarch64_a
&& contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
index = SUBREG_REG (index);
- if ((shift == 0 ||
- (shift > 0 && shift <= 3
- && (1 << shift) == GET_MODE_SIZE (mode)))
+ if ((shift == 0
+ || (shift > 0 && shift <= 3
+ && known_eq (1 << shift, GET_MODE_SIZE (mode))))
&& REG_P (index)
&& aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
{
@@ -4680,7 +4716,7 @@ aarch64_mode_valid_for_sched_fusion_p (m
return mode == SImode || mode == DImode
|| mode == SFmode || mode == DFmode
|| (aarch64_vector_mode_supported_p (mode)
- && GET_MODE_SIZE (mode) == 8);
+ && known_eq (GET_MODE_SIZE (mode), 8));
}
/* Return true if REGNO is a virtual pointer register, or an eliminable
@@ -4706,6 +4742,7 @@ aarch64_classify_address (struct aarch64
{
enum rtx_code code = GET_CODE (x);
rtx op0, op1;
+ HOST_WIDE_INT const_size;
/* On BE, we use load/store pair for all large int mode load/stores.
TI/TFmode may also use a load/store pair. */
@@ -4715,10 +4752,10 @@ aarch64_classify_address (struct aarch64
|| (BYTES_BIG_ENDIAN
&& aarch64_vect_struct_mode_p (mode)));
- bool allow_reg_index_p =
- !load_store_pair_p
- && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
- && !aarch64_vect_struct_mode_p (mode);
+ bool allow_reg_index_p = (!load_store_pair_p
+ && (maybe_ne (GET_MODE_SIZE (mode), 16)
+ || aarch64_vector_mode_supported_p (mode))
+ && !aarch64_vect_struct_mode_p (mode));
/* On LE, for AdvSIMD, don't support anything other than POST_INC or
REG addressing. */
@@ -4751,7 +4788,7 @@ aarch64_classify_address (struct aarch64
return true;
}
- if (GET_MODE_SIZE (mode) != 0
+ if (maybe_ne (GET_MODE_SIZE (mode), 0)
&& CONST_INT_P (op1)
&& aarch64_base_register_rtx_p (op0, strict_p))
{
@@ -4798,7 +4835,8 @@ aarch64_classify_address (struct aarch64
offset + 32));
if (load_store_pair_p)
- return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+ return ((known_eq (GET_MODE_SIZE (mode), 4)
+ || known_eq (GET_MODE_SIZE (mode), 8))
&& aarch64_offset_7bit_signed_scaled_p (mode, offset));
else
return (offset_9bit_signed_unscaled_p (mode, offset)
@@ -4858,7 +4896,8 @@ aarch64_classify_address (struct aarch64
&& offset_9bit_signed_unscaled_p (mode, offset));
if (load_store_pair_p)
- return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+ return ((known_eq (GET_MODE_SIZE (mode), 4)
+ || known_eq (GET_MODE_SIZE (mode), 8))
&& aarch64_offset_7bit_signed_scaled_p (mode, offset));
else
return offset_9bit_signed_unscaled_p (mode, offset);
@@ -4872,7 +4911,9 @@ aarch64_classify_address (struct aarch64
for SI mode or larger. */
info->type = ADDRESS_SYMBOLIC;
- if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
+ if (!load_store_pair_p
+ && GET_MODE_SIZE (mode).is_constant (&const_size)
+ && const_size >= 4)
{
rtx sym, addend;
@@ -4898,7 +4939,6 @@ aarch64_classify_address (struct aarch64
{
/* The symbol and offset must be aligned to the access size. */
unsigned int align;
- unsigned int ref_size;
if (CONSTANT_POOL_ADDRESS_P (sym))
align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
@@ -4916,12 +4956,12 @@ aarch64_classify_address (struct aarch64
else
align = BITS_PER_UNIT;
- ref_size = GET_MODE_SIZE (mode);
- if (ref_size == 0)
+ poly_int64 ref_size = GET_MODE_SIZE (mode);
+ if (known_eq (ref_size, 0))
ref_size = GET_MODE_SIZE (DImode);
- return ((INTVAL (offs) & (ref_size - 1)) == 0
- && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
+ return (multiple_p (INTVAL (offs), ref_size)
+ && multiple_p (align / BITS_PER_UNIT, ref_size));
}
}
return false;
@@ -4999,19 +5039,24 @@ aarch64_legitimate_address_p (machine_mo
static bool
aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
{
- HOST_WIDE_INT offset = INTVAL (*disp);
- HOST_WIDE_INT base;
+ HOST_WIDE_INT size;
+ if (GET_MODE_SIZE (mode).is_constant (&size))
+ {
+ HOST_WIDE_INT offset = INTVAL (*disp);
+ HOST_WIDE_INT base;
- if (mode == TImode || mode == TFmode)
- base = (offset + 0x100) & ~0x1f8;
- else if ((offset & (GET_MODE_SIZE (mode) - 1)) != 0)
- base = (offset + 0x100) & ~0x1ff;
- else
- base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
+ if (mode == TImode || mode == TFmode)
+ base = (offset + 0x100) & ~0x1f8;
+ else if ((offset & (size - 1)) != 0)
+ base = (offset + 0x100) & ~0x1ff;
+ else
+ base = offset & ~(size < 4 ? 0xfff : 0x3ffc);
- *off = GEN_INT (base);
- *disp = GEN_INT (offset - base);
- return true;
+ *off = GEN_INT (base);
+ *disp = GEN_INT (offset - base);
+ return true;
+ }
+ return false;
}
/* Return the binary representation of floating point constant VALUE in INTVAL.
@@ -5399,26 +5444,13 @@ aarch64_get_condition_code_1 (machine_mo
bool
aarch64_const_vec_all_same_in_range_p (rtx x,
- HOST_WIDE_INT minval,
- HOST_WIDE_INT maxval)
+ HOST_WIDE_INT minval,
+ HOST_WIDE_INT maxval)
{
- HOST_WIDE_INT firstval;
- int count, i;
-
- if (GET_CODE (x) != CONST_VECTOR
- || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
- return false;
-
- firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
- if (firstval < minval || firstval > maxval)
- return false;
-
- count = CONST_VECTOR_NUNITS (x);
- for (i = 1; i < count; i++)
- if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
- return false;
-
- return true;
+ rtx elt;
+ return (const_vec_duplicate_p (x, &elt)
+ && CONST_INT_P (elt)
+ && IN_RANGE (INTVAL (elt), minval, maxval));
}
bool
@@ -5860,7 +5892,7 @@ #define buf_size 20
machine_mode mode = GET_MODE (x);
if (GET_CODE (x) != MEM
- || (code == 'y' && GET_MODE_SIZE (mode) != 16))
+ || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
{
output_operand_lossage ("invalid operand for '%%%c'", code);
return;
@@ -5891,6 +5923,7 @@ aarch64_print_address_internal (FILE *f,
aarch64_addr_query_type type)
{
struct aarch64_address_info addr;
+ unsigned int size;
/* Check all addresses are Pmode - including ILP32. */
gcc_assert (GET_MODE (x) == Pmode);
@@ -5934,30 +5967,28 @@ aarch64_print_address_internal (FILE *f,
return true;
case ADDRESS_REG_WB:
+ /* Writeback is only supported for fixed-width modes. */
+ size = GET_MODE_SIZE (mode).to_constant ();
switch (GET_CODE (x))
{
case PRE_INC:
- asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
- GET_MODE_SIZE (mode));
+ asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
return true;
case POST_INC:
- asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
- GET_MODE_SIZE (mode));
+ asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
return true;
case PRE_DEC:
- asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
- GET_MODE_SIZE (mode));
+ asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
return true;
case POST_DEC:
- asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
- GET_MODE_SIZE (mode));
+ asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
return true;
case PRE_MODIFY:
- asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
+ asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
INTVAL (addr.offset));
return true;
case POST_MODIFY:
- asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
+ asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
INTVAL (addr.offset));
return true;
default:
@@ -6048,6 +6079,39 @@ aarch64_regno_regclass (unsigned regno)
return NO_REGS;
}
+/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
+ If OFFSET is out of range, return an offset of an anchor point
+ that is in range. Return 0 otherwise. */
+
+static HOST_WIDE_INT
+aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
+ machine_mode mode)
+{
+ /* Does it look like we'll need a 16-byte load/store-pair operation? */
+ if (size > 16)
+ return (offset + 0x400) & ~0x7f0;
+
+ /* For offsets that aren't a multiple of the access size, the limit is
+ -256...255. */
+ if (offset & (size - 1))
+ {
+ /* BLKmode typically uses LDP of X-registers. */
+ if (mode == BLKmode)
+ return (offset + 512) & ~0x3ff;
+ return (offset + 0x100) & ~0x1ff;
+ }
+
+ /* Small negative offsets are supported. */
+ if (IN_RANGE (offset, -256, 0))
+ return 0;
+
+ if (mode == TImode || mode == TFmode)
+ return (offset + 0x100) & ~0x1ff;
+
+ /* Use 12-bit offset by access size. */
+ return offset & (~0xfff * size);
+}
+
static rtx
aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
{
@@ -6097,34 +6161,17 @@ aarch64_legitimize_address (rtx x, rtx /
x = gen_rtx_PLUS (Pmode, base, offset_rtx);
}
- /* Does it look like we'll need a 16-byte load/store-pair operation? */
- HOST_WIDE_INT base_offset;
- if (GET_MODE_SIZE (mode) > 16)
- base_offset = (offset + 0x400) & ~0x7f0;
- /* For offsets aren't a multiple of the access size, the limit is
- -256...255. */
- else if (offset & (GET_MODE_SIZE (mode) - 1))
- {
- base_offset = (offset + 0x100) & ~0x1ff;
-
- /* BLKmode typically uses LDP of X-registers. */
- if (mode == BLKmode)
- base_offset = (offset + 512) & ~0x3ff;
- }
- /* Small negative offsets are supported. */
- else if (IN_RANGE (offset, -256, 0))
- base_offset = 0;
- else if (mode == TImode || mode == TFmode)
- base_offset = (offset + 0x100) & ~0x1ff;
- /* Use 12-bit offset by access size. */
- else
- base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
-
- if (base_offset != 0)
+ HOST_WIDE_INT size;
+ if (GET_MODE_SIZE (mode).is_constant (&size))
{
- base = plus_constant (Pmode, base, base_offset);
- base = force_operand (base, NULL_RTX);
- return plus_constant (Pmode, base, offset - base_offset);
+ HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
+ mode);
+ if (base_offset != 0)
+ {
+ base = plus_constant (Pmode, base, base_offset);
+ base = force_operand (base, NULL_RTX);
+ return plus_constant (Pmode, base, offset - base_offset);
+ }
}
}
@@ -6211,7 +6258,7 @@ aarch64_secondary_reload (bool in_p ATTR
because AArch64 has richer addressing modes for LDR/STR instructions
than LDP/STP instructions. */
if (TARGET_FLOAT && rclass == GENERAL_REGS
- && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
+ && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
return FP_REGS;
if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
@@ -6232,7 +6279,7 @@ aarch64_can_eliminate (const int from AT
return true;
}
-HOST_WIDE_INT
+poly_int64
aarch64_initial_elimination_offset (unsigned from, unsigned to)
{
aarch64_layout_frame ();
@@ -6318,6 +6365,11 @@ aarch64_trampoline_init (rtx m_tramp, tr
static unsigned char
aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
{
+ /* ??? Logically we should only need to provide a value when
+ HARD_REGNO_MODE_OK says that at least one register in REGCLASS
+ can hold MODE, but at the moment we need to handle all modes.
+ Just ignore any runtime parts for registers that can't store them. */
+ HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
switch (regclass)
{
case CALLER_SAVE_REGS:
@@ -6327,10 +6379,9 @@ aarch64_class_max_nregs (reg_class_t reg
case POINTER_AND_FP_REGS:
case FP_REGS:
case FP_LO_REGS:
- return
- aarch64_vector_mode_p (mode)
- ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
- : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ return (aarch64_vector_mode_p (mode)
+ ? CEIL (lowest_size, UNITS_PER_VREG)
+ : CEIL (lowest_size, UNITS_PER_WORD));
case STACK_REG:
return 1;
@@ -6881,25 +6932,15 @@ aarch64_address_cost (rtx x,
{
/* For the sake of calculating the cost of the shifted register
component, we can treat same sized modes in the same way. */
- switch (GET_MODE_BITSIZE (mode))
- {
- case 16:
- cost += addr_cost->addr_scale_costs.hi;
- break;
-
- case 32:
- cost += addr_cost->addr_scale_costs.si;
- break;
-
- case 64:
- cost += addr_cost->addr_scale_costs.di;
- break;
-
- /* We can't tell, or this is a 128-bit vector. */
- default:
- cost += addr_cost->addr_scale_costs.ti;
- break;
- }
+ if (known_eq (GET_MODE_BITSIZE (mode), 16))
+ cost += addr_cost->addr_scale_costs.hi;
+ else if (known_eq (GET_MODE_BITSIZE (mode), 32))
+ cost += addr_cost->addr_scale_costs.si;
+ else if (known_eq (GET_MODE_BITSIZE (mode), 64))
+ cost += addr_cost->addr_scale_costs.di;
+ else
+ /* We can't tell, or this is a 128-bit vector. */
+ cost += addr_cost->addr_scale_costs.ti;
}
return cost;
@@ -8028,7 +8069,8 @@ aarch64_rtx_costs (rtx x, machine_mode m
if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
&& CONST_INT_P (XEXP (op1, 1))
- && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
+ && known_eq (INTVAL (XEXP (op1, 1)),
+ GET_MODE_BITSIZE (mode) - 1))
{
*cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
/* We already demanded XEXP (op1, 0) to be REG_P, so
@@ -8076,7 +8118,8 @@ aarch64_rtx_costs (rtx x, machine_mode m
if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
&& CONST_INT_P (XEXP (op1, 1))
- && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
+ && known_eq (INTVAL (XEXP (op1, 1)),
+ GET_MODE_BITSIZE (mode) - 1))
{
*cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
/* We already demanded XEXP (op1, 0) to be REG_P, so
@@ -8502,7 +8545,7 @@ aarch64_register_move_cost (machine_mode
return aarch64_register_move_cost (mode, from, GENERAL_REGS)
+ aarch64_register_move_cost (mode, GENERAL_REGS, to);
- if (GET_MODE_SIZE (mode) == 16)
+ if (known_eq (GET_MODE_SIZE (mode), 16))
{
/* 128-bit operations on general registers require 2 instructions. */
if (from == GENERAL_REGS && to == GENERAL_REGS)
@@ -8878,7 +8921,7 @@ aarch64_builtin_vectorization_cost (enum
return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
case vec_construct:
- elements = TYPE_VECTOR_SUBPARTS (vectype);
+ elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
return elements / 2 + 1;
default:
@@ -10925,6 +10968,10 @@ aarch64_gimplify_va_arg_expr (tree valis
&nregs,
&is_ha))
{
+ /* No frontends can create types with variable-sized modes, so we
+ shouldn't be asked to pass or return them. */
+ unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
+
/* TYPE passed in fp/simd registers. */
if (!TARGET_FLOAT)
aarch64_err_no_fpadvsimd (mode, "varargs");
@@ -10938,8 +10985,8 @@ aarch64_gimplify_va_arg_expr (tree valis
if (is_ha)
{
- if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
- adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
+ if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
+ adjust = UNITS_PER_VREG - ag_size;
}
else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
&& size < UNITS_PER_VREG)
@@ -11327,8 +11374,8 @@ aapcs_vfp_sub_candidate (const_tree type
- tree_to_uhwi (TYPE_MIN_VALUE (index)));
/* There must be no padding. */
- if (wi::to_wide (TYPE_SIZE (type))
- != count * GET_MODE_BITSIZE (*modep))
+ if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
+ count * GET_MODE_BITSIZE (*modep)))
return -1;
return count;
@@ -11358,8 +11405,8 @@ aapcs_vfp_sub_candidate (const_tree type
}
/* There must be no padding. */
- if (wi::to_wide (TYPE_SIZE (type))
- != count * GET_MODE_BITSIZE (*modep))
+ if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
+ count * GET_MODE_BITSIZE (*modep)))
return -1;
return count;
@@ -11391,8 +11438,8 @@ aapcs_vfp_sub_candidate (const_tree type
}
/* There must be no padding. */
- if (wi::to_wide (TYPE_SIZE (type))
- != count * GET_MODE_BITSIZE (*modep))
+ if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
+ count * GET_MODE_BITSIZE (*modep)))
return -1;
return count;
@@ -11414,7 +11461,7 @@ aapcs_vfp_sub_candidate (const_tree type
aarch64_short_vector_p (const_tree type,
machine_mode mode)
{
- HOST_WIDE_INT size = -1;
+ poly_int64 size = -1;
if (type && TREE_CODE (type) == VECTOR_TYPE)
size = int_size_in_bytes (type);
@@ -11422,7 +11469,7 @@ aarch64_short_vector_p (const_tree type,
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
size = GET_MODE_SIZE (mode);
- return (size == 8 || size == 16);
+ return known_eq (size, 8) || known_eq (size, 16);
}
/* Return TRUE if the type, as described by TYPE and MODE, is a composite
@@ -11874,8 +11921,9 @@ aarch64_simd_valid_immediate (rtx op, si
unsigned int n_elts;
if (const_vec_duplicate_p (op, &elt))
n_elts = 1;
- else if (GET_CODE (op) == CONST_VECTOR)
- n_elts = CONST_VECTOR_NUNITS (op);
+ else if (GET_CODE (op) == CONST_VECTOR
+ && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
+ ;
else
return false;
@@ -12064,11 +12112,11 @@ aarch64_simd_vect_par_cnst_half (machine
aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
bool high)
{
- if (!VECTOR_MODE_P (mode))
+ int nelts;
+ if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
return false;
- rtx ideal = aarch64_simd_vect_par_cnst_half (mode, GET_MODE_NUNITS (mode),
- high);
+ rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
HOST_WIDE_INT count_op = XVECLEN (op, 0);
HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
int i = 0;
@@ -12153,7 +12201,8 @@ aarch64_simd_emit_reg_reg_move (rtx *ope
int
aarch64_simd_attr_length_rglist (machine_mode mode)
{
- return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
+ /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
+ return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
}
/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
@@ -12233,7 +12282,6 @@ aarch64_simd_make_constant (rtx vals)
machine_mode mode = GET_MODE (vals);
rtx const_dup;
rtx const_vec = NULL_RTX;
- int n_elts = GET_MODE_NUNITS (mode);
int n_const = 0;
int i;
@@ -12244,6 +12292,7 @@ aarch64_simd_make_constant (rtx vals)
/* A CONST_VECTOR must contain only CONST_INTs and
CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
Only store valid constants in a CONST_VECTOR. */
+ int n_elts = XVECLEN (vals, 0);
for (i = 0; i < n_elts; ++i)
{
rtx x = XVECEXP (vals, 0, i);
@@ -12282,7 +12331,7 @@ aarch64_expand_vector_init (rtx target,
machine_mode mode = GET_MODE (target);
scalar_mode inner_mode = GET_MODE_INNER (mode);
/* The number of vector elements. */
- int n_elts = GET_MODE_NUNITS (mode);
+ int n_elts = XVECLEN (vals, 0);
/* The number of vector elements which are not constant. */
int n_var = 0;
rtx any_const = NULL_RTX;
@@ -12464,7 +12513,9 @@ aarch64_shift_truncation_mask (machine_m
return
(!SHIFT_COUNT_TRUNCATED
|| aarch64_vector_mode_supported_p (mode)
- || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
+ || aarch64_vect_struct_mode_p (mode))
+ ? 0
+ : (GET_MODE_UNIT_BITSIZE (mode) - 1);
}
/* Select a format to encode pointers in exception handling data. */
@@ -13587,7 +13638,8 @@ aarch64_expand_vec_perm (rtx target, rtx
static bool
aarch64_evpc_trn (struct expand_vec_perm_d *d)
{
- unsigned int odd, nelt = d->perm.length ();
+ HOST_WIDE_INT odd;
+ poly_uint64 nelt = d->perm.length ();
rtx out, in0, in1, x;
machine_mode vmode = d->vmode;
@@ -13596,8 +13648,8 @@ aarch64_evpc_trn (struct expand_vec_perm
/* Note that these are little-endian tests.
We correct for big-endian later. */
- odd = d->perm[0];
- if ((odd != 0 && odd != 1)
+ if (!d->perm[0].is_constant (&odd)
+ || (odd != 0 && odd != 1)
|| !d->perm.series_p (0, 2, odd, 2)
|| !d->perm.series_p (1, 2, nelt + odd, 2))
return false;
@@ -13624,7 +13676,7 @@ aarch64_evpc_trn (struct expand_vec_perm
static bool
aarch64_evpc_uzp (struct expand_vec_perm_d *d)
{
- unsigned int odd;
+ HOST_WIDE_INT odd;
rtx out, in0, in1, x;
machine_mode vmode = d->vmode;
@@ -13633,8 +13685,8 @@ aarch64_evpc_uzp (struct expand_vec_perm
/* Note that these are little-endian tests.
We correct for big-endian later. */
- odd = d->perm[0];
- if ((odd != 0 && odd != 1)
+ if (!d->perm[0].is_constant (&odd)
+ || (odd != 0 && odd != 1)
|| !d->perm.series_p (0, 1, odd, 2))
return false;
@@ -13660,7 +13712,8 @@ aarch64_evpc_uzp (struct expand_vec_perm
static bool
aarch64_evpc_zip (struct expand_vec_perm_d *d)
{
- unsigned int high, nelt = d->perm.length ();
+ unsigned int high;
+ poly_uint64 nelt = d->perm.length ();
rtx out, in0, in1, x;
machine_mode vmode = d->vmode;
@@ -13669,11 +13722,12 @@ aarch64_evpc_zip (struct expand_vec_perm
/* Note that these are little-endian tests.
We correct for big-endian later. */
- high = d->perm[0];
- if ((high != 0 && high * 2 != nelt)
- || !d->perm.series_p (0, 2, high, 1)
- || !d->perm.series_p (1, 2, high + nelt, 1))
+ poly_uint64 first = d->perm[0];
+ if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
+ || !d->perm.series_p (0, 2, first, 1)
+ || !d->perm.series_p (1, 2, first + nelt, 1))
return false;
+ high = maybe_ne (first, 0U);
/* Success! */
if (d->testing_p)
@@ -13698,13 +13752,13 @@ aarch64_evpc_zip (struct expand_vec_perm
static bool
aarch64_evpc_ext (struct expand_vec_perm_d *d)
{
- unsigned int nelt = d->perm.length ();
+ HOST_WIDE_INT location;
rtx offset;
- unsigned int location = d->perm[0]; /* Always < nelt. */
-
- /* Check if the extracted indices are increasing by one. */
- if (!d->perm.series_p (0, 1, location, 1))
+ /* The first element always refers to the first vector.
+ Check if the extracted indices are increasing by one. */
+ if (!d->perm[0].is_constant (&location)
+ || !d->perm.series_p (0, 1, location, 1))
return false;
/* Success! */
@@ -13720,8 +13774,10 @@ aarch64_evpc_ext (struct expand_vec_perm
at the LSB end of the register), and the low elements of the second
vector (stored at the MSB end of the register). So swap. */
std::swap (d->op0, d->op1);
- /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
- location = nelt - location;
+ /* location != 0 (above), so safe to assume (nelt - location) < nelt.
+ to_constant () is safe since this is restricted to Advanced SIMD
+ vectors. */
+ location = d->perm.length ().to_constant () - location;
}
offset = GEN_INT (location);
@@ -13737,12 +13793,13 @@ aarch64_evpc_ext (struct expand_vec_perm
static bool
aarch64_evpc_rev (struct expand_vec_perm_d *d)
{
- unsigned int i, diff, size, unspec;
+ HOST_WIDE_INT diff;
+ unsigned int i, size, unspec;
- if (!d->one_vector_p)
+ if (!d->one_vector_p
+ || !d->perm[0].is_constant (&diff))
return false;
- diff = d->perm[0];
size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
if (size == 8)
unspec = UNSPEC_REV64;
@@ -13772,19 +13829,18 @@ aarch64_evpc_dup (struct expand_vec_perm
{
rtx out = d->target;
rtx in0;
+ HOST_WIDE_INT elt;
machine_mode vmode = d->vmode;
- unsigned int elt;
rtx lane;
- if (d->perm.encoding ().encoded_nelts () != 1)
+ if (d->perm.encoding ().encoded_nelts () != 1
+ || !d->perm[0].is_constant (&elt))
return false;
/* Success! */
if (d->testing_p)
return true;
- elt = d->perm[0];
-
/* The generic preparation in aarch64_expand_vec_perm_const_1
swaps the operand order and the permute indices if it finds
d->perm[0] to be in the second operand. Thus, we can always
@@ -13804,7 +13860,12 @@ aarch64_evpc_tbl (struct expand_vec_perm
{
rtx rperm[MAX_VECT_LEN], sel;
machine_mode vmode = d->vmode;
- unsigned int i, nelt = d->perm.length ();
+
+ /* Make sure that the indices are constant. */
+ unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
+ for (unsigned int i = 0; i < encoded_nelts; ++i)
+ if (!d->perm[i].is_constant ())
+ return false;
if (d->testing_p)
return true;
@@ -13815,16 +13876,17 @@ aarch64_evpc_tbl (struct expand_vec_perm
if (vmode != V8QImode && vmode != V16QImode)
return false;
- for (i = 0; i < nelt; ++i)
- {
- int nunits = GET_MODE_NUNITS (vmode);
+ /* to_constant is safe since this routine is specific to Advanced SIMD
+ vectors. */
+ unsigned int nelt = d->perm.length ().to_constant ();
+ for (unsigned int i = 0; i < nelt; ++i)
+ /* If big-endian and two vectors we end up with a weird mixed-endian
+ mode on NEON. Reverse the index within each word but not the word
+ itself. to_constant is safe because we checked is_constant above. */
+ rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
+ ? d->perm[i].to_constant () ^ (nelt - 1)
+ : d->perm[i].to_constant ());
- /* If big-endian and two vectors we end up with a weird mixed-endian
- mode on NEON. Reverse the index within each word but not the word
- itself. */
- rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
- : (HOST_WIDE_INT) d->perm[i]);
- }
sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
sel = force_reg (vmode, sel);
@@ -13838,14 +13900,14 @@ aarch64_expand_vec_perm_const_1 (struct
/* The pattern matching functions above are written to look for a small
number to begin the sequence (0, 1, N/2). If we begin with an index
from the second operand, we can swap the operands. */
- unsigned int nelt = d->perm.length ();
- if (d->perm[0] >= nelt)
+ poly_int64 nelt = d->perm.length ();
+ if (known_ge (d->perm[0], nelt))
{
d->perm.rotate_inputs (1);
std::swap (d->op0, d->op1);
}
- if (TARGET_SIMD && nelt > 1)
+ if (TARGET_SIMD && known_gt (nelt, 1))
{
if (aarch64_evpc_rev (d))
return true;
@@ -13961,7 +14023,7 @@ aarch64_modes_tieable_p (machine_mode mo
AMOUNT bytes. */
static rtx
-aarch64_move_pointer (rtx pointer, int amount)
+aarch64_move_pointer (rtx pointer, poly_int64 amount)
{
rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
@@ -13975,9 +14037,7 @@ aarch64_move_pointer (rtx pointer, int a
static rtx
aarch64_progress_pointer (rtx pointer)
{
- HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
-
- return aarch64_move_pointer (pointer, amount);
+ return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
}
/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
@@ -14788,7 +14848,9 @@ aarch64_operands_ok_for_ldpstp (rtx *ope
offval_1 = INTVAL (offset_1);
offval_2 = INTVAL (offset_2);
- msize = GET_MODE_SIZE (mode);
+ /* We should only be trying this for fixed-sized modes. There is no
+ SVE LDP/STP instruction. */
+ msize = GET_MODE_SIZE (mode).to_constant ();
/* Check if the offsets are consecutive. */
if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
return false;
@@ -15148,7 +15210,9 @@ aarch64_fpconst_pow_of_2 (rtx x)
int
aarch64_vec_fpconst_pow_of_2 (rtx x)
{
- if (GET_CODE (x) != CONST_VECTOR)
+ int nelts;
+ if (GET_CODE (x) != CONST_VECTOR
+ || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
return -1;
if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
@@ -15158,7 +15222,7 @@ aarch64_vec_fpconst_pow_of_2 (rtx x)
if (firstval <= 0)
return -1;
- for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
+ for (int i = 1; i < nelts; i++)
if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
return -1;
===================================================================
@@ -3328,7 +3328,7 @@ (define_insn "aarch64_<crc_variant>"
CRC))]
"TARGET_CRC32"
{
- if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64)
+ if (GET_MODE_BITSIZE (<crc_mode>mode) >= 64)
return "<crc_variant>\\t%w0, %w1, %x2";
else
return "<crc_variant>\\t%w0, %w1, %w2";