PING: [11/nn] [AArch64] Set NUM_POLY_INT_COEFFS to 2

Message ID	87wp0wlf5o.fsf_-_@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of gcc-patches-return-470222-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:references:date:in-reply-to:message-id :mime-version:content-type; q=dns; s=default; b=JMhLiOgEsBMp76uN bypqmdccD8pX5nIddnZTJNQ1mXk3JPP++ZXVjd0fTnr2SkWZJCPO/DixxupnpaD8 Xx/mTYT8sjMm9aEq6tw6f++Yfpsaz5hVDeUC9ApMmMQWoaKa4IGz7GUofT1zboAO cYlATs6u0Ma8JAYjEbeNYp0T5z0= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org From: Richard Sandiford <richard.sandiford@linaro.org> To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.earnshaw@arm.com, james.greenhalgh@arm.com, marcus.shawcroft@arm.com, richard.sandiford@linaro.org Cc: richard.earnshaw@arm.com, james.greenhalgh@arm.com, marcus.shawcroft@arm.com Subject: PING: [11/nn] [AArch64] Set NUM_POLY_INT_COEFFS to 2 References: <873764d8y3.fsf@linaro.org> <87o9osafaf.fsf@linaro.org> Date: Fri, 05 Jan 2018 11:26:59 +0000 In-Reply-To: <87o9osafaf.fsf@linaro.org> (Richard Sandiford's message of "Fri, 27 Oct 2017 14:31:04 +0100") Message-ID: <87wp0wlf5o.fsf_-_@linaro.org> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.3 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain
Series	PING: [11/nn] [AArch64] Set NUM_POLY_INT_COEFFS to 2 \| expand PING: [11/nn] [AArch64] Set NUM_POLY_INT_COEFFS to 2

Index: gcc/config/aarch64/aarch64-modes.def =================================================================== --- gcc/config/aarch64/aarch64-modes.def 2018-01-05 11:24:44.647408566 +0000 +++ gcc/config/aarch64/aarch64-modes.def 2018-01-05 11:24:44.864399697 +0000 @@ -46,3 +46,7 @@ INT_MODE (XI, 64); /* Quad float: 128-bit floating mode for long doubles. */ FLOAT_MODE (TF, 16, ieee_quad_format); + +/* Coefficient 1 is multiplied by the number of 128-bit chunks in an + SVE vector (referred to as "VQ") minus one. */ +#define NUM_POLY_INT_COEFFS 2 Index: gcc/config/aarch64/aarch64-protos.h =================================================================== --- gcc/config/aarch64/aarch64-protos.h 2018-01-05 11:24:44.647408566 +0000 +++ gcc/config/aarch64/aarch64-protos.h 2018-01-05 11:24:44.864399697 +0000 @@ -333,7 +333,7 @@ enum simd_immediate_check { extern struct tune_params aarch64_tune_params; -HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); +poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned); int aarch64_get_condition_code (rtx); bool aarch64_address_valid_for_prefetch_p (rtx, bool); bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); @@ -366,7 +366,7 @@ bool aarch64_zero_extend_const_eq (machi bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); bool aarch64_mov_operand_p (rtx, machine_mode); rtx aarch64_reverse_mask (machine_mode, unsigned int); -bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT); +bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode); char *aarch64_output_simd_mov_immediate (rtx, unsigned, enum simd_immediate_check w = AARCH64_CHECK_MOV); Index: gcc/config/aarch64/aarch64.h =================================================================== --- gcc/config/aarch64/aarch64.h 2018-01-05 11:24:44.647408566 +0000 +++ gcc/config/aarch64/aarch64.h 2018-01-05 11:24:44.867399574 +0000 @@ -554,7 +554,7 @@ #define LIBCALL_VALUE(MODE) \ #define DEFAULT_PCC_STRUCT_RETURN 0 -#ifdef HOST_WIDE_INT +#ifdef HAVE_POLY_INT_H struct GTY (()) aarch64_frame { HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; @@ -572,20 +572,20 @@ struct GTY (()) aarch64_frame /* Offset from the base of the frame (incomming SP) to the top of the locals area. This value is always a multiple of STACK_BOUNDARY. */ - HOST_WIDE_INT locals_offset; + poly_int64 locals_offset; /* Offset from the base of the frame (incomming SP) to the hard_frame_pointer. This value is always a multiple of STACK_BOUNDARY. */ - HOST_WIDE_INT hard_fp_offset; + poly_int64 hard_fp_offset; /* The size of the frame. This value is the offset from base of the - * frame (incomming SP) to the stack_pointer. This value is always - * a multiple of STACK_BOUNDARY. */ - HOST_WIDE_INT frame_size; + frame (incomming SP) to the stack_pointer. This value is always + a multiple of STACK_BOUNDARY. */ + poly_int64 frame_size; /* The size of the initial stack adjustment before saving callee-saves. */ - HOST_WIDE_INT initial_adjust; + poly_int64 initial_adjust; /* The writeback value when pushing callee-save registers. It is zero when no push is used. */ @@ -593,10 +593,10 @@ struct GTY (()) aarch64_frame /* The offset from SP to the callee-save registers after initial_adjust. It may be non-zero if no push is used (ie. callee_adjust == 0). */ - HOST_WIDE_INT callee_offset; + poly_int64 callee_offset; /* The size of the stack adjustment after saving callee-saves. */ - HOST_WIDE_INT final_adjust; + poly_int64 final_adjust; /* Store FP,LR and setup a frame pointer. */ bool emit_frame_chain; Index: gcc/config/aarch64/aarch64-builtins.c =================================================================== --- gcc/config/aarch64/aarch64-builtins.c 2018-01-05 11:24:44.647408566 +0000 +++ gcc/config/aarch64/aarch64-builtins.c 2018-01-05 11:24:44.864399697 +0000 @@ -1065,9 +1065,9 @@ aarch64_simd_expand_args (rtx target, in gcc_assert (opc > 1); if (CONST_INT_P (op[opc])) { - aarch64_simd_lane_bounds (op[opc], 0, - GET_MODE_NUNITS (builtin_mode), - exp); + unsigned int nunits + = GET_MODE_NUNITS (builtin_mode).to_constant (); + aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); /* Keep to GCC-vector-extension lane indices in the RTL. */ op[opc] = aarch64_endian_lane_rtx (builtin_mode, INTVAL (op[opc])); @@ -1080,8 +1080,9 @@ aarch64_simd_expand_args (rtx target, in if (CONST_INT_P (op[opc])) { machine_mode vmode = insn_data[icode].operand[opc - 1].mode; - aarch64_simd_lane_bounds (op[opc], - 0, GET_MODE_NUNITS (vmode), exp); + unsigned int nunits + = GET_MODE_NUNITS (vmode).to_constant (); + aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); /* Keep to GCC-vector-extension lane indices in the RTL. */ op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); } @@ -1400,16 +1401,17 @@ aarch64_builtin_vectorized_function (uns tree type_in) { machine_mode in_mode, out_mode; - int in_n, out_n; + unsigned HOST_WIDE_INT in_n, out_n; if (TREE_CODE (type_out) != VECTOR_TYPE || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; out_mode = TYPE_MODE (TREE_TYPE (type_out)); - out_n = TYPE_VECTOR_SUBPARTS (type_out); in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); + if (!TYPE_VECTOR_SUBPARTS (type_out).is_constant (&out_n) + || !TYPE_VECTOR_SUBPARTS (type_in).is_constant (&in_n)) + return NULL_TREE; #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 Index: gcc/config/aarch64/aarch64-simd.md =================================================================== --- gcc/config/aarch64/aarch64-simd.md 2018-01-05 11:24:44.647408566 +0000 +++ gcc/config/aarch64/aarch64-simd.md 2018-01-05 11:24:44.865399656 +0000 @@ -31,9 +31,9 @@ (define_expand "mov<mode>" normal str, so the check need not apply. */ if (GET_CODE (operands[0]) == MEM && !(aarch64_simd_imm_zero (operands[1], <MODE>mode) - && ((GET_MODE_SIZE (<MODE>mode) == 16 + && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16) && aarch64_mem_pair_operand (operands[0], DImode)) - || GET_MODE_SIZE (<MODE>mode) == 8))) + || known_eq (GET_MODE_SIZE (<MODE>mode), 8)))) operands[1] = force_reg (<MODE>mode, operands[1]); " ) @@ -5334,9 +5334,7 @@ (define_expand "aarch64_ld<VSTRUCT:nregs set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) * <VSTRUCT:nregs>); - aarch64_simd_lane_bounds (operands[3], 0, - GET_MODE_NUNITS (<VALLDIF:MODE>mode), - NULL); + aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL); emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( operands[0], mem, operands[2], operands[3])); DONE; Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2018-01-05 11:24:44.647408566 +0000 +++ gcc/config/aarch64/aarch64.c 2018-01-05 11:24:44.867399574 +0000 @@ -1139,13 +1139,18 @@ aarch64_array_mode_supported_p (machine_ static unsigned int aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) { + /* ??? Logically we should only need to provide a value when + HARD_REGNO_MODE_OK says that the combination is valid, + but at the moment we need to handle all modes. Just ignore + any runtime parts for registers that can't store them. */ + HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode)); switch (aarch64_regno_regclass (regno)) { case FP_REGS: case FP_LO_REGS: - return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG; + return CEIL (lowest_size, UNITS_PER_VREG); default: - return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + return CEIL (lowest_size, UNITS_PER_WORD); } gcc_unreachable (); } @@ -1188,25 +1193,17 @@ aarch64_hard_regno_mode_ok (unsigned reg static bool aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode) { - return FP_REGNUM_P (regno) && GET_MODE_SIZE (mode) > 8; + return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8); } /* Implement HARD_REGNO_CALLER_SAVE_MODE. */ machine_mode -aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs, - machine_mode mode) +aarch64_hard_regno_caller_save_mode (unsigned, unsigned, machine_mode mode) { - /* Handle modes that fit within single registers. */ - if (nregs == 1 && GET_MODE_SIZE (mode) <= 16) - { - if (GET_MODE_SIZE (mode) >= 4) - return mode; - else - return SImode; - } - /* Fall back to generic for multi-reg and very large modes. */ + if (known_ge (GET_MODE_SIZE (mode), 4)) + return mode; else - return choose_hard_reg_mode (regno, nregs, false); + return SImode; } /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so @@ -1319,11 +1316,10 @@ aarch64_tls_get_addr (void) tls_symbolic_operand_type (rtx addr) { enum tls_model tls_kind = TLS_MODEL_NONE; - rtx sym, addend; - if (GET_CODE (addr) == CONST) { - split_const (addr, &sym, &addend); + poly_int64 addend; + rtx sym = strip_offset (addr, &addend); if (GET_CODE (sym) == SYMBOL_REF) tls_kind = SYMBOL_REF_TLS_MODEL (sym); } @@ -2262,8 +2258,12 @@ aarch64_pass_by_reference (cumulative_ar int nregs; /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */ - size = (mode == BLKmode && type) - ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + if (mode == BLKmode && type) + size = int_size_in_bytes (type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (mode).to_constant (); /* Aggregates are passed by reference based on their size. */ if (type && AGGREGATE_TYPE_P (type)) @@ -2360,8 +2360,8 @@ aarch64_function_value (const_tree type, for (i = 0; i < count; i++) { rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i); - tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, - GEN_INT (i * GET_MODE_SIZE (ag_mode))); + rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset); XVECEXP (par, 0, i) = tmp; } return par; @@ -2488,9 +2488,13 @@ aarch64_layout_arg (cumulative_args_t pc pcum->aapcs_arg_processed = true; /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ - size - = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode), - UNITS_PER_WORD); + if (type) + size = int_size_in_bytes (type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (mode).to_constant (); + size = ROUND_UP (size, UNITS_PER_WORD); allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, @@ -2527,9 +2531,9 @@ aarch64_layout_arg (cumulative_args_t pc { rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode, V0_REGNUM + nvrn + i); - tmp = gen_rtx_EXPR_LIST - (VOIDmode, tmp, - GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode))); + rtx offset = gen_int_mode + (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset); XVECEXP (par, 0, i) = tmp; } pcum->aapcs_reg = par; @@ -2754,8 +2758,13 @@ aarch64_pad_reg_upward (machine_mode mod /* Small composite types are always padded upward. */ if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode)) { - HOST_WIDE_INT size = (type ? int_size_in_bytes (type) - : GET_MODE_SIZE (mode)); + HOST_WIDE_INT size; + if (type) + size = int_size_in_bytes (type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (mode).to_constant (); if (size < 2 * UNITS_PER_WORD) return true; } @@ -2784,12 +2793,19 @@ #define ARITH_FACTOR 4096 #define PROBE_STACK_FIRST_REG 9 #define PROBE_STACK_SECOND_REG 10 -/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, +/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE, inclusive. These are offsets from the current stack pointer. */ static void -aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) +aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size) { + HOST_WIDE_INT size; + if (!poly_size.is_constant (&size)) + { + sorry ("stack probes for SVE frames"); + return; + } + rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG); /* See the same assertion on PROBE_INTERVAL above. */ @@ -3067,13 +3083,16 @@ #define SLOT_REQUIRED (-1) = offset + cfun->machine->frame.saved_varargs_size; cfun->machine->frame.hard_fp_offset - = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (), - STACK_BOUNDARY / BITS_PER_UNIT); + = aligned_upper_bound (varargs_and_saved_regs_size + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + /* Both these values are already aligned. */ + gcc_assert (multiple_p (crtl->outgoing_args_size, + STACK_BOUNDARY / BITS_PER_UNIT)); cfun->machine->frame.frame_size - = ROUND_UP (cfun->machine->frame.hard_fp_offset - + crtl->outgoing_args_size, - STACK_BOUNDARY / BITS_PER_UNIT); + = (cfun->machine->frame.hard_fp_offset + + crtl->outgoing_args_size); cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size; @@ -3088,18 +3107,21 @@ #define SLOT_REQUIRED (-1) else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM) max_push_offset = 256; - if (cfun->machine->frame.frame_size < max_push_offset - && crtl->outgoing_args_size == 0) + HOST_WIDE_INT const_size, const_fp_offset; + if (cfun->machine->frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset + && known_eq (crtl->outgoing_args_size, 0)) { /* Simple, small frame with no outgoing arguments: stp reg1, reg2, [sp, -frame_size]! stp reg3, reg4, [sp, 16] */ - cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size; + cfun->machine->frame.callee_adjust = const_size; } - else if ((crtl->outgoing_args_size - + cfun->machine->frame.saved_regs_size < 512) + else if (known_lt (crtl->outgoing_args_size + + cfun->machine->frame.saved_regs_size, 512) && !(cfun->calls_alloca - && cfun->machine->frame.hard_fp_offset < max_push_offset)) + && known_lt (cfun->machine->frame.hard_fp_offset, + max_push_offset))) { /* Frame with small outgoing arguments: sub sp, sp, frame_size @@ -3109,13 +3131,14 @@ #define SLOT_REQUIRED (-1) cfun->machine->frame.callee_offset = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; } - else if (cfun->machine->frame.hard_fp_offset < max_push_offset) + else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset) { /* Frame with large outgoing arguments but a small local area: stp reg1, reg2, [sp, -hard_fp_offset]! stp reg3, reg4, [sp, 16] sub sp, sp, outgoing_args_size */ - cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset; + cfun->machine->frame.callee_adjust = const_fp_offset; cfun->machine->frame.final_adjust = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; } @@ -3328,7 +3351,7 @@ aarch64_return_address_signing_enabled ( skipping any write-back candidates if SKIP_WB is true. */ static void -aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, +aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, unsigned start, unsigned limit, bool skip_wb) { rtx_insn *insn; @@ -3340,7 +3363,7 @@ aarch64_save_callee_saves (machine_mode regno = aarch64_next_callee_save (regno + 1, limit)) { rtx reg, mem; - HOST_WIDE_INT offset; + poly_int64 offset; if (skip_wb && (regno == cfun->machine->frame.wb_candidate1 @@ -3393,13 +3416,13 @@ aarch64_save_callee_saves (machine_mode static void aarch64_restore_callee_saves (machine_mode mode, - HOST_WIDE_INT start_offset, unsigned start, + poly_int64 start_offset, unsigned start, unsigned limit, bool skip_wb, rtx *cfi_ops) { rtx base_rtx = stack_pointer_rtx; unsigned regno; unsigned regno2; - HOST_WIDE_INT offset; + poly_int64 offset; for (regno = aarch64_next_callee_save (start, limit); regno <= limit; @@ -3444,25 +3467,27 @@ aarch64_restore_callee_saves (machine_mo static inline bool offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED, - HOST_WIDE_INT offset) + poly_int64 offset) { - return offset >= -256 && offset < 256; + HOST_WIDE_INT const_offset; + return (offset.is_constant (&const_offset) + && IN_RANGE (const_offset, -256, 255)); } static inline bool -offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset) +offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) { - return (offset >= 0 - && offset < 4096 * GET_MODE_SIZE (mode) - && offset % GET_MODE_SIZE (mode) == 0); + HOST_WIDE_INT multiple; + return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple) + && IN_RANGE (multiple, 0, 4095)); } bool -aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset) +aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset) { - return (offset >= -64 * GET_MODE_SIZE (mode) - && offset < 64 * GET_MODE_SIZE (mode) - && offset % GET_MODE_SIZE (mode) == 0); + HOST_WIDE_INT multiple; + return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple) + && IN_RANGE (multiple, -64, 63)); } /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ @@ -3479,7 +3504,7 @@ aarch64_get_separate_components (void) for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++) if (aarch64_register_saved_on_entry (regno)) { - HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno]; + poly_int64 offset = cfun->machine->frame.reg_offset[regno]; if (!frame_pointer_needed) offset += cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; @@ -3583,7 +3608,7 @@ aarch64_process_components (sbitmap comp so DFmode for the vector registers is enough. */ machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode; rtx reg = gen_rtx_REG (mode, regno); - HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno]; + poly_int64 offset = cfun->machine->frame.reg_offset[regno]; if (!frame_pointer_needed) offset += cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; @@ -3605,13 +3630,13 @@ aarch64_process_components (sbitmap comp break; } - HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2]; + poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2]; /* The next register is not of the same class or its offset is not mergeable with the current one into a pair. */ if (!satisfies_constraint_Ump (mem) || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) - || (offset2 - cfun->machine->frame.reg_offset[regno]) - != GET_MODE_SIZE (mode)) + || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]), + GET_MODE_SIZE (mode))) { insn = emit_insn (set); RTX_FRAME_RELATED_P (insn) = 1; @@ -3681,11 +3706,19 @@ aarch64_set_handled_components (sbitmap cfun->machine->reg_is_wrapped_separately[regno] = true; } -/* Allocate SIZE bytes of stack space using TEMP1 as a scratch register. */ +/* Allocate POLY_SIZE bytes of stack space using TEMP1 as a scratch + register. */ static void -aarch64_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size) +aarch64_allocate_and_probe_stack_space (rtx temp1, poly_int64 poly_size) { + HOST_WIDE_INT size; + if (!poly_size.is_constant (&size)) + { + sorry ("stack probes for SVE frames"); + return; + } + HOST_WIDE_INT probe_interval = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); HOST_WIDE_INT guard_size @@ -3845,11 +3878,11 @@ aarch64_expand_prologue (void) { aarch64_layout_frame (); - HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size; - HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust; + poly_int64 frame_size = cfun->machine->frame.frame_size; + poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; - HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust; - HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; + poly_int64 final_adjust = cfun->machine->frame.final_adjust; + poly_int64 callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; @@ -3864,19 +3897,19 @@ aarch64_expand_prologue (void) } if (flag_stack_usage_info) - current_function_static_stack_size = frame_size; + current_function_static_stack_size = constant_lower_bound (frame_size); if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) { if (crtl->is_leaf && !cfun->calls_alloca) { - if (frame_size > PROBE_INTERVAL - && frame_size > get_stack_check_protect ()) + if (maybe_gt (frame_size, PROBE_INTERVAL) + && maybe_gt (frame_size, get_stack_check_protect ())) aarch64_emit_probe_stack_range (get_stack_check_protect (), (frame_size - get_stack_check_protect ())); } - else if (frame_size > 0) + else if (maybe_gt (frame_size, 0)) aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size); } @@ -3911,23 +3944,23 @@ aarch64_expand_prologue (void) HOST_WIDE_INT guard_used_by_caller = 1024; if (flag_stack_clash_protection) { - if (frame_size == 0) + if (known_eq (frame_size, 0)) dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); - else if (initial_adjust < guard_size - guard_used_by_caller - && final_adjust < guard_size - guard_used_by_caller) + else if (known_lt (initial_adjust, guard_size - guard_used_by_caller) + && known_lt (final_adjust, guard_size - guard_used_by_caller)) dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); } /* In theory we should never have both an initial adjustment and a callee save adjustment. Verify that is the case since the code below does not handle it for -fstack-clash-protection. */ - gcc_assert (initial_adjust == 0 || callee_adjust == 0); + gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0); /* Only probe if the initial adjustment is larger than the guard less the amount of the guard reserved for use by the caller's outgoing args. */ if (flag_stack_clash_protection - && initial_adjust >= guard_size - guard_used_by_caller) + && maybe_ge (initial_adjust, guard_size - guard_used_by_caller)) aarch64_allocate_and_probe_stack_space (ip0_rtx, initial_adjust); else aarch64_sub_sp (ip0_rtx, initial_adjust, true); @@ -3952,19 +3985,19 @@ aarch64_expand_prologue (void) callee_adjust != 0 || emit_frame_chain); /* We may need to probe the final adjustment as well. */ - if (flag_stack_clash_protection && final_adjust != 0) + if (flag_stack_clash_protection && maybe_ne (final_adjust, 0)) { /* First probe if the final adjustment is larger than the guard size less the amount of the guard reserved for use by the caller's outgoing args. */ - if (final_adjust >= guard_size - guard_used_by_caller) + if (maybe_ge (final_adjust, guard_size - guard_used_by_caller)) aarch64_allocate_and_probe_stack_space (ip1_rtx, final_adjust); else aarch64_sub_sp (ip1_rtx, final_adjust, !frame_pointer_needed); /* We must also probe if the final adjustment is larger than the guard that is assumed used by the caller. This may be sub-optimal. */ - if (final_adjust >= guard_used_by_caller) + if (maybe_ge (final_adjust, guard_used_by_caller)) { if (dump_file) fprintf (dump_file, @@ -3993,7 +4026,7 @@ aarch64_use_return_insn_p (void) aarch64_layout_frame (); - return cfun->machine->frame.frame_size == 0; + return known_eq (cfun->machine->frame.frame_size, 0); } /* Generate the epilogue instructions for returning from a function. @@ -4006,21 +4039,23 @@ aarch64_expand_epilogue (bool for_sibcal { aarch64_layout_frame (); - HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust; + poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; - HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust; - HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; + poly_int64 final_adjust = cfun->machine->frame.final_adjust; + poly_int64 callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; rtx cfi_ops = NULL; rtx_insn *insn; /* We need to add memory barrier to prevent read from deallocated stack. */ - bool need_barrier_p = (get_frame_size () - + cfun->machine->frame.saved_varargs_size) != 0; + bool need_barrier_p + = maybe_ne (get_frame_size () + + cfun->machine->frame.saved_varargs_size, 0); /* Emit a barrier to prevent loads from a deallocated stack. */ - if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca + if (maybe_gt (final_adjust, crtl->outgoing_args_size) + || cfun->calls_alloca || crtl->calls_eh_return) { emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); @@ -4031,7 +4066,8 @@ aarch64_expand_epilogue (bool for_sibcal be the same as the stack pointer. */ rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM); rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM); - if (frame_pointer_needed && (final_adjust || cfun->calls_alloca)) + if (frame_pointer_needed + && (maybe_ne (final_adjust, 0) || cfun->calls_alloca)) /* If writeback is used when restoring callee-saves, the CFA is restored on the instruction doing the writeback. */ aarch64_add_offset (Pmode, stack_pointer_rtx, @@ -4055,7 +4091,7 @@ aarch64_expand_epilogue (bool for_sibcal if (callee_adjust != 0) aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops); - if (callee_adjust != 0 || initial_adjust > 65536) + if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536)) { /* Emit delayed restores and set the CFA to be SP + initial_adjust. */ insn = get_last_insn (); @@ -4656,9 +4692,9 @@ aarch64_classify_index (struct aarch64_a && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))]) index = SUBREG_REG (index); - if ((shift == 0 || - (shift > 0 && shift <= 3 - && (1 << shift) == GET_MODE_SIZE (mode))) + if ((shift == 0 + || (shift > 0 && shift <= 3 + && known_eq (1 << shift, GET_MODE_SIZE (mode)))) && REG_P (index) && aarch64_regno_ok_for_index_p (REGNO (index), strict_p)) { @@ -4680,7 +4716,7 @@ aarch64_mode_valid_for_sched_fusion_p (m return mode == SImode || mode == DImode || mode == SFmode || mode == DFmode || (aarch64_vector_mode_supported_p (mode) - && GET_MODE_SIZE (mode) == 8); + && known_eq (GET_MODE_SIZE (mode), 8)); } /* Return true if REGNO is a virtual pointer register, or an eliminable @@ -4706,6 +4742,7 @@ aarch64_classify_address (struct aarch64 { enum rtx_code code = GET_CODE (x); rtx op0, op1; + HOST_WIDE_INT const_size; /* On BE, we use load/store pair for all large int mode load/stores. TI/TFmode may also use a load/store pair. */ @@ -4715,10 +4752,10 @@ aarch64_classify_address (struct aarch64 || (BYTES_BIG_ENDIAN && aarch64_vect_struct_mode_p (mode))); - bool allow_reg_index_p = - !load_store_pair_p - && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode)) - && !aarch64_vect_struct_mode_p (mode); + bool allow_reg_index_p = (!load_store_pair_p + && (maybe_ne (GET_MODE_SIZE (mode), 16) + || aarch64_vector_mode_supported_p (mode)) + && !aarch64_vect_struct_mode_p (mode)); /* On LE, for AdvSIMD, don't support anything other than POST_INC or REG addressing. */ @@ -4751,7 +4788,7 @@ aarch64_classify_address (struct aarch64 return true; } - if (GET_MODE_SIZE (mode) != 0 + if (maybe_ne (GET_MODE_SIZE (mode), 0) && CONST_INT_P (op1) && aarch64_base_register_rtx_p (op0, strict_p)) { @@ -4798,7 +4835,8 @@ aarch64_classify_address (struct aarch64 offset + 32)); if (load_store_pair_p) - return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + return ((known_eq (GET_MODE_SIZE (mode), 4) + || known_eq (GET_MODE_SIZE (mode), 8)) && aarch64_offset_7bit_signed_scaled_p (mode, offset)); else return (offset_9bit_signed_unscaled_p (mode, offset) @@ -4858,7 +4896,8 @@ aarch64_classify_address (struct aarch64 && offset_9bit_signed_unscaled_p (mode, offset)); if (load_store_pair_p) - return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + return ((known_eq (GET_MODE_SIZE (mode), 4) + || known_eq (GET_MODE_SIZE (mode), 8)) && aarch64_offset_7bit_signed_scaled_p (mode, offset)); else return offset_9bit_signed_unscaled_p (mode, offset); @@ -4872,7 +4911,9 @@ aarch64_classify_address (struct aarch64 for SI mode or larger. */ info->type = ADDRESS_SYMBOLIC; - if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4) + if (!load_store_pair_p + && GET_MODE_SIZE (mode).is_constant (&const_size) + && const_size >= 4) { rtx sym, addend; @@ -4898,7 +4939,6 @@ aarch64_classify_address (struct aarch64 { /* The symbol and offset must be aligned to the access size. */ unsigned int align; - unsigned int ref_size; if (CONSTANT_POOL_ADDRESS_P (sym)) align = GET_MODE_ALIGNMENT (get_pool_mode (sym)); @@ -4916,12 +4956,12 @@ aarch64_classify_address (struct aarch64 else align = BITS_PER_UNIT; - ref_size = GET_MODE_SIZE (mode); - if (ref_size == 0) + poly_int64 ref_size = GET_MODE_SIZE (mode); + if (known_eq (ref_size, 0)) ref_size = GET_MODE_SIZE (DImode); - return ((INTVAL (offs) & (ref_size - 1)) == 0 - && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0); + return (multiple_p (INTVAL (offs), ref_size) + && multiple_p (align / BITS_PER_UNIT, ref_size)); } } return false; @@ -4999,19 +5039,24 @@ aarch64_legitimate_address_p (machine_mo static bool aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) { - HOST_WIDE_INT offset = INTVAL (*disp); - HOST_WIDE_INT base; + HOST_WIDE_INT size; + if (GET_MODE_SIZE (mode).is_constant (&size)) + { + HOST_WIDE_INT offset = INTVAL (*disp); + HOST_WIDE_INT base; - if (mode == TImode || mode == TFmode) - base = (offset + 0x100) & ~0x1f8; - else if ((offset & (GET_MODE_SIZE (mode) - 1)) != 0) - base = (offset + 0x100) & ~0x1ff; - else - base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc); + if (mode == TImode || mode == TFmode) + base = (offset + 0x100) & ~0x1f8; + else if ((offset & (size - 1)) != 0) + base = (offset + 0x100) & ~0x1ff; + else + base = offset & ~(size < 4 ? 0xfff : 0x3ffc); - *off = GEN_INT (base); - *disp = GEN_INT (offset - base); - return true; + *off = GEN_INT (base); + *disp = GEN_INT (offset - base); + return true; + } + return false; } /* Return the binary representation of floating point constant VALUE in INTVAL. @@ -5399,26 +5444,13 @@ aarch64_get_condition_code_1 (machine_mo bool aarch64_const_vec_all_same_in_range_p (rtx x, - HOST_WIDE_INT minval, - HOST_WIDE_INT maxval) + HOST_WIDE_INT minval, + HOST_WIDE_INT maxval) { - HOST_WIDE_INT firstval; - int count, i; - - if (GET_CODE (x) != CONST_VECTOR - || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT) - return false; - - firstval = INTVAL (CONST_VECTOR_ELT (x, 0)); - if (firstval < minval || firstval > maxval) - return false; - - count = CONST_VECTOR_NUNITS (x); - for (i = 1; i < count; i++) - if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval) - return false; - - return true; + rtx elt; + return (const_vec_duplicate_p (x, &elt) + && CONST_INT_P (elt) + && IN_RANGE (INTVAL (elt), minval, maxval)); } bool @@ -5860,7 +5892,7 @@ #define buf_size 20 machine_mode mode = GET_MODE (x); if (GET_CODE (x) != MEM - || (code == 'y' && GET_MODE_SIZE (mode) != 16)) + || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16))) { output_operand_lossage ("invalid operand for '%%%c'", code); return; @@ -5891,6 +5923,7 @@ aarch64_print_address_internal (FILE *f, aarch64_addr_query_type type) { struct aarch64_address_info addr; + unsigned int size; /* Check all addresses are Pmode - including ILP32. */ gcc_assert (GET_MODE (x) == Pmode); @@ -5934,30 +5967,28 @@ aarch64_print_address_internal (FILE *f, return true; case ADDRESS_REG_WB: + /* Writeback is only supported for fixed-width modes. */ + size = GET_MODE_SIZE (mode).to_constant (); switch (GET_CODE (x)) { case PRE_INC: - asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size); return true; case POST_INC: - asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size); return true; case PRE_DEC: - asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size); return true; case POST_DEC: - asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size); return true; case PRE_MODIFY: - asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)], INTVAL (addr.offset)); return true; case POST_MODIFY: - asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)], INTVAL (addr.offset)); return true; default: @@ -6048,6 +6079,39 @@ aarch64_regno_regclass (unsigned regno) return NO_REGS; } +/* OFFSET is an address offset for mode MODE, which has SIZE bytes. + If OFFSET is out of range, return an offset of an anchor point + that is in range. Return 0 otherwise. */ + +static HOST_WIDE_INT +aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size, + machine_mode mode) +{ + /* Does it look like we'll need a 16-byte load/store-pair operation? */ + if (size > 16) + return (offset + 0x400) & ~0x7f0; + + /* For offsets that aren't a multiple of the access size, the limit is + -256...255. */ + if (offset & (size - 1)) + { + /* BLKmode typically uses LDP of X-registers. */ + if (mode == BLKmode) + return (offset + 512) & ~0x3ff; + return (offset + 0x100) & ~0x1ff; + } + + /* Small negative offsets are supported. */ + if (IN_RANGE (offset, -256, 0)) + return 0; + + if (mode == TImode || mode == TFmode) + return (offset + 0x100) & ~0x1ff; + + /* Use 12-bit offset by access size. */ + return offset & (~0xfff * size); +} + static rtx aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) { @@ -6097,34 +6161,17 @@ aarch64_legitimize_address (rtx x, rtx / x = gen_rtx_PLUS (Pmode, base, offset_rtx); } - /* Does it look like we'll need a 16-byte load/store-pair operation? */ - HOST_WIDE_INT base_offset; - if (GET_MODE_SIZE (mode) > 16) - base_offset = (offset + 0x400) & ~0x7f0; - /* For offsets aren't a multiple of the access size, the limit is - -256...255. */ - else if (offset & (GET_MODE_SIZE (mode) - 1)) - { - base_offset = (offset + 0x100) & ~0x1ff; - - /* BLKmode typically uses LDP of X-registers. */ - if (mode == BLKmode) - base_offset = (offset + 512) & ~0x3ff; - } - /* Small negative offsets are supported. */ - else if (IN_RANGE (offset, -256, 0)) - base_offset = 0; - else if (mode == TImode || mode == TFmode) - base_offset = (offset + 0x100) & ~0x1ff; - /* Use 12-bit offset by access size. */ - else - base_offset = offset & (~0xfff * GET_MODE_SIZE (mode)); - - if (base_offset != 0) + HOST_WIDE_INT size; + if (GET_MODE_SIZE (mode).is_constant (&size)) { - base = plus_constant (Pmode, base, base_offset); - base = force_operand (base, NULL_RTX); - return plus_constant (Pmode, base, offset - base_offset); + HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size, + mode); + if (base_offset != 0) + { + base = plus_constant (Pmode, base, base_offset); + base = force_operand (base, NULL_RTX); + return plus_constant (Pmode, base, offset - base_offset); + } } } @@ -6211,7 +6258,7 @@ aarch64_secondary_reload (bool in_p ATTR because AArch64 has richer addressing modes for LDR/STR instructions than LDP/STP instructions. */ if (TARGET_FLOAT && rclass == GENERAL_REGS - && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) + && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x)) return FP_REGS; if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) @@ -6232,7 +6279,7 @@ aarch64_can_eliminate (const int from AT return true; } -HOST_WIDE_INT +poly_int64 aarch64_initial_elimination_offset (unsigned from, unsigned to) { aarch64_layout_frame (); @@ -6318,6 +6365,11 @@ aarch64_trampoline_init (rtx m_tramp, tr static unsigned char aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) { + /* ??? Logically we should only need to provide a value when + HARD_REGNO_MODE_OK says that at least one register in REGCLASS + can hold MODE, but at the moment we need to handle all modes. + Just ignore any runtime parts for registers that can't store them. */ + HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode)); switch (regclass) { case CALLER_SAVE_REGS: @@ -6327,10 +6379,9 @@ aarch64_class_max_nregs (reg_class_t reg case POINTER_AND_FP_REGS: case FP_REGS: case FP_LO_REGS: - return - aarch64_vector_mode_p (mode) - ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG - : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + return (aarch64_vector_mode_p (mode) + ? CEIL (lowest_size, UNITS_PER_VREG) + : CEIL (lowest_size, UNITS_PER_WORD)); case STACK_REG: return 1; @@ -6881,25 +6932,15 @@ aarch64_address_cost (rtx x, { /* For the sake of calculating the cost of the shifted register component, we can treat same sized modes in the same way. */ - switch (GET_MODE_BITSIZE (mode)) - { - case 16: - cost += addr_cost->addr_scale_costs.hi; - break; - - case 32: - cost += addr_cost->addr_scale_costs.si; - break; - - case 64: - cost += addr_cost->addr_scale_costs.di; - break; - - /* We can't tell, or this is a 128-bit vector. */ - default: - cost += addr_cost->addr_scale_costs.ti; - break; - } + if (known_eq (GET_MODE_BITSIZE (mode), 16)) + cost += addr_cost->addr_scale_costs.hi; + else if (known_eq (GET_MODE_BITSIZE (mode), 32)) + cost += addr_cost->addr_scale_costs.si; + else if (known_eq (GET_MODE_BITSIZE (mode), 64)) + cost += addr_cost->addr_scale_costs.di; + else + /* We can't tell, or this is a 128-bit vector. */ + cost += addr_cost->addr_scale_costs.ti; } return cost; @@ -8028,7 +8069,8 @@ aarch64_rtx_costs (rtx x, machine_mode m if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) && CONST_INT_P (XEXP (op1, 1)) - && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) + && known_eq (INTVAL (XEXP (op1, 1)), + GET_MODE_BITSIZE (mode) - 1)) { *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); /* We already demanded XEXP (op1, 0) to be REG_P, so @@ -8076,7 +8118,8 @@ aarch64_rtx_costs (rtx x, machine_mode m if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) && CONST_INT_P (XEXP (op1, 1)) - && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) + && known_eq (INTVAL (XEXP (op1, 1)), + GET_MODE_BITSIZE (mode) - 1)) { *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); /* We already demanded XEXP (op1, 0) to be REG_P, so @@ -8502,7 +8545,7 @@ aarch64_register_move_cost (machine_mode return aarch64_register_move_cost (mode, from, GENERAL_REGS) + aarch64_register_move_cost (mode, GENERAL_REGS, to); - if (GET_MODE_SIZE (mode) == 16) + if (known_eq (GET_MODE_SIZE (mode), 16)) { /* 128-bit operations on general registers require 2 instructions. */ if (from == GENERAL_REGS && to == GENERAL_REGS) @@ -8878,7 +8921,7 @@ aarch64_builtin_vectorization_cost (enum return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost; case vec_construct: - elements = TYPE_VECTOR_SUBPARTS (vectype); + elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); return elements / 2 + 1; default: @@ -10925,6 +10968,10 @@ aarch64_gimplify_va_arg_expr (tree valis &nregs, &is_ha)) { + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant (); + /* TYPE passed in fp/simd registers. */ if (!TARGET_FLOAT) aarch64_err_no_fpadvsimd (mode, "varargs"); @@ -10938,8 +10985,8 @@ aarch64_gimplify_va_arg_expr (tree valis if (is_ha) { - if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG) - adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode); + if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG) + adjust = UNITS_PER_VREG - ag_size; } else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD && size < UNITS_PER_VREG) @@ -11327,8 +11374,8 @@ aapcs_vfp_sub_candidate (const_tree type - tree_to_uhwi (TYPE_MIN_VALUE (index))); /* There must be no padding. */ - if (wi::to_wide (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep)) + if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)), + count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11358,8 +11405,8 @@ aapcs_vfp_sub_candidate (const_tree type } /* There must be no padding. */ - if (wi::to_wide (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep)) + if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)), + count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11391,8 +11438,8 @@ aapcs_vfp_sub_candidate (const_tree type } /* There must be no padding. */ - if (wi::to_wide (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep)) + if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)), + count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11414,7 +11461,7 @@ aapcs_vfp_sub_candidate (const_tree type aarch64_short_vector_p (const_tree type, machine_mode mode) { - HOST_WIDE_INT size = -1; + poly_int64 size = -1; if (type && TREE_CODE (type) == VECTOR_TYPE) size = int_size_in_bytes (type); @@ -11422,7 +11469,7 @@ aarch64_short_vector_p (const_tree type, || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) size = GET_MODE_SIZE (mode); - return (size == 8 || size == 16); + return known_eq (size, 8) || known_eq (size, 16); } /* Return TRUE if the type, as described by TYPE and MODE, is a composite @@ -11874,8 +11921,9 @@ aarch64_simd_valid_immediate (rtx op, si unsigned int n_elts; if (const_vec_duplicate_p (op, &elt)) n_elts = 1; - else if (GET_CODE (op) == CONST_VECTOR) - n_elts = CONST_VECTOR_NUNITS (op); + else if (GET_CODE (op) == CONST_VECTOR + && CONST_VECTOR_NUNITS (op).is_constant (&n_elts)) + ; else return false; @@ -12064,11 +12112,11 @@ aarch64_simd_vect_par_cnst_half (machine aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high) { - if (!VECTOR_MODE_P (mode)) + int nelts; + if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts)) return false; - rtx ideal = aarch64_simd_vect_par_cnst_half (mode, GET_MODE_NUNITS (mode), - high); + rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high); HOST_WIDE_INT count_op = XVECLEN (op, 0); HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0); int i = 0; @@ -12153,7 +12201,8 @@ aarch64_simd_emit_reg_reg_move (rtx *ope int aarch64_simd_attr_length_rglist (machine_mode mode) { - return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4; + /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */ + return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4; } /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum @@ -12233,7 +12282,6 @@ aarch64_simd_make_constant (rtx vals) machine_mode mode = GET_MODE (vals); rtx const_dup; rtx const_vec = NULL_RTX; - int n_elts = GET_MODE_NUNITS (mode); int n_const = 0; int i; @@ -12244,6 +12292,7 @@ aarch64_simd_make_constant (rtx vals) /* A CONST_VECTOR must contain only CONST_INTs and CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). Only store valid constants in a CONST_VECTOR. */ + int n_elts = XVECLEN (vals, 0); for (i = 0; i < n_elts; ++i) { rtx x = XVECEXP (vals, 0, i); @@ -12282,7 +12331,7 @@ aarch64_expand_vector_init (rtx target, machine_mode mode = GET_MODE (target); scalar_mode inner_mode = GET_MODE_INNER (mode); /* The number of vector elements. */ - int n_elts = GET_MODE_NUNITS (mode); + int n_elts = XVECLEN (vals, 0); /* The number of vector elements which are not constant. */ int n_var = 0; rtx any_const = NULL_RTX; @@ -12464,7 +12513,9 @@ aarch64_shift_truncation_mask (machine_m return (!SHIFT_COUNT_TRUNCATED || aarch64_vector_mode_supported_p (mode) - || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1); + || aarch64_vect_struct_mode_p (mode)) + ? 0 + : (GET_MODE_UNIT_BITSIZE (mode) - 1); } /* Select a format to encode pointers in exception handling data. */ @@ -13587,7 +13638,8 @@ aarch64_expand_vec_perm (rtx target, rtx static bool aarch64_evpc_trn (struct expand_vec_perm_d *d) { - unsigned int odd, nelt = d->perm.length (); + HOST_WIDE_INT odd; + poly_uint64 nelt = d->perm.length (); rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13596,8 +13648,8 @@ aarch64_evpc_trn (struct expand_vec_perm /* Note that these are little-endian tests. We correct for big-endian later. */ - odd = d->perm[0]; - if ((odd != 0 && odd != 1) + if (!d->perm[0].is_constant (&odd) + || (odd != 0 && odd != 1) || !d->perm.series_p (0, 2, odd, 2) || !d->perm.series_p (1, 2, nelt + odd, 2)) return false; @@ -13624,7 +13676,7 @@ aarch64_evpc_trn (struct expand_vec_perm static bool aarch64_evpc_uzp (struct expand_vec_perm_d *d) { - unsigned int odd; + HOST_WIDE_INT odd; rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13633,8 +13685,8 @@ aarch64_evpc_uzp (struct expand_vec_perm /* Note that these are little-endian tests. We correct for big-endian later. */ - odd = d->perm[0]; - if ((odd != 0 && odd != 1) + if (!d->perm[0].is_constant (&odd) + || (odd != 0 && odd != 1) || !d->perm.series_p (0, 1, odd, 2)) return false; @@ -13660,7 +13712,8 @@ aarch64_evpc_uzp (struct expand_vec_perm static bool aarch64_evpc_zip (struct expand_vec_perm_d *d) { - unsigned int high, nelt = d->perm.length (); + unsigned int high; + poly_uint64 nelt = d->perm.length (); rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13669,11 +13722,12 @@ aarch64_evpc_zip (struct expand_vec_perm /* Note that these are little-endian tests. We correct for big-endian later. */ - high = d->perm[0]; - if ((high != 0 && high * 2 != nelt) - || !d->perm.series_p (0, 2, high, 1) - || !d->perm.series_p (1, 2, high + nelt, 1)) + poly_uint64 first = d->perm[0]; + if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt)) + || !d->perm.series_p (0, 2, first, 1) + || !d->perm.series_p (1, 2, first + nelt, 1)) return false; + high = maybe_ne (first, 0U); /* Success! */ if (d->testing_p) @@ -13698,13 +13752,13 @@ aarch64_evpc_zip (struct expand_vec_perm static bool aarch64_evpc_ext (struct expand_vec_perm_d *d) { - unsigned int nelt = d->perm.length (); + HOST_WIDE_INT location; rtx offset; - unsigned int location = d->perm[0]; /* Always < nelt. */ - - /* Check if the extracted indices are increasing by one. */ - if (!d->perm.series_p (0, 1, location, 1)) + /* The first element always refers to the first vector. + Check if the extracted indices are increasing by one. */ + if (!d->perm[0].is_constant (&location) + || !d->perm.series_p (0, 1, location, 1)) return false; /* Success! */ @@ -13720,8 +13774,10 @@ aarch64_evpc_ext (struct expand_vec_perm at the LSB end of the register), and the low elements of the second vector (stored at the MSB end of the register). So swap. */ std::swap (d->op0, d->op1); - /* location != 0 (above), so safe to assume (nelt - location) < nelt. */ - location = nelt - location; + /* location != 0 (above), so safe to assume (nelt - location) < nelt. + to_constant () is safe since this is restricted to Advanced SIMD + vectors. */ + location = d->perm.length ().to_constant () - location; } offset = GEN_INT (location); @@ -13737,12 +13793,13 @@ aarch64_evpc_ext (struct expand_vec_perm static bool aarch64_evpc_rev (struct expand_vec_perm_d *d) { - unsigned int i, diff, size, unspec; + HOST_WIDE_INT diff; + unsigned int i, size, unspec; - if (!d->one_vector_p) + if (!d->one_vector_p + || !d->perm[0].is_constant (&diff)) return false; - diff = d->perm[0]; size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode); if (size == 8) unspec = UNSPEC_REV64; @@ -13772,19 +13829,18 @@ aarch64_evpc_dup (struct expand_vec_perm { rtx out = d->target; rtx in0; + HOST_WIDE_INT elt; machine_mode vmode = d->vmode; - unsigned int elt; rtx lane; - if (d->perm.encoding ().encoded_nelts () != 1) + if (d->perm.encoding ().encoded_nelts () != 1 + || !d->perm[0].is_constant (&elt)) return false; /* Success! */ if (d->testing_p) return true; - elt = d->perm[0]; - /* The generic preparation in aarch64_expand_vec_perm_const_1 swaps the operand order and the permute indices if it finds d->perm[0] to be in the second operand. Thus, we can always @@ -13804,7 +13860,12 @@ aarch64_evpc_tbl (struct expand_vec_perm { rtx rperm[MAX_VECT_LEN], sel; machine_mode vmode = d->vmode; - unsigned int i, nelt = d->perm.length (); + + /* Make sure that the indices are constant. */ + unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts (); + for (unsigned int i = 0; i < encoded_nelts; ++i) + if (!d->perm[i].is_constant ()) + return false; if (d->testing_p) return true; @@ -13815,16 +13876,17 @@ aarch64_evpc_tbl (struct expand_vec_perm if (vmode != V8QImode && vmode != V16QImode) return false; - for (i = 0; i < nelt; ++i) - { - int nunits = GET_MODE_NUNITS (vmode); + /* to_constant is safe since this routine is specific to Advanced SIMD + vectors. */ + unsigned int nelt = d->perm.length ().to_constant (); + for (unsigned int i = 0; i < nelt; ++i) + /* If big-endian and two vectors we end up with a weird mixed-endian + mode on NEON. Reverse the index within each word but not the word + itself. to_constant is safe because we checked is_constant above. */ + rperm[i] = GEN_INT (BYTES_BIG_ENDIAN + ? d->perm[i].to_constant () ^ (nelt - 1) + : d->perm[i].to_constant ()); - /* If big-endian and two vectors we end up with a weird mixed-endian - mode on NEON. Reverse the index within each word but not the word - itself. */ - rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1) - : (HOST_WIDE_INT) d->perm[i]); - } sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); sel = force_reg (vmode, sel); @@ -13838,14 +13900,14 @@ aarch64_expand_vec_perm_const_1 (struct /* The pattern matching functions above are written to look for a small number to begin the sequence (0, 1, N/2). If we begin with an index from the second operand, we can swap the operands. */ - unsigned int nelt = d->perm.length (); - if (d->perm[0] >= nelt) + poly_int64 nelt = d->perm.length (); + if (known_ge (d->perm[0], nelt)) { d->perm.rotate_inputs (1); std::swap (d->op0, d->op1); } - if (TARGET_SIMD && nelt > 1) + if (TARGET_SIMD && known_gt (nelt, 1)) { if (aarch64_evpc_rev (d)) return true; @@ -13961,7 +14023,7 @@ aarch64_modes_tieable_p (machine_mode mo AMOUNT bytes. */ static rtx -aarch64_move_pointer (rtx pointer, int amount) +aarch64_move_pointer (rtx pointer, poly_int64 amount) { rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount); @@ -13975,9 +14037,7 @@ aarch64_move_pointer (rtx pointer, int a static rtx aarch64_progress_pointer (rtx pointer) { - HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer)); - - return aarch64_move_pointer (pointer, amount); + return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer))); } /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by @@ -14788,7 +14848,9 @@ aarch64_operands_ok_for_ldpstp (rtx *ope offval_1 = INTVAL (offset_1); offval_2 = INTVAL (offset_2); - msize = GET_MODE_SIZE (mode); + /* We should only be trying this for fixed-sized modes. There is no + SVE LDP/STP instruction. */ + msize = GET_MODE_SIZE (mode).to_constant (); /* Check if the offsets are consecutive. */ if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize)) return false; @@ -15148,7 +15210,9 @@ aarch64_fpconst_pow_of_2 (rtx x) int aarch64_vec_fpconst_pow_of_2 (rtx x) { - if (GET_CODE (x) != CONST_VECTOR) + int nelts; + if (GET_CODE (x) != CONST_VECTOR + || !CONST_VECTOR_NUNITS (x).is_constant (&nelts)) return -1; if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT) @@ -15158,7 +15222,7 @@ aarch64_vec_fpconst_pow_of_2 (rtx x) if (firstval <= 0) return -1; - for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++) + for (int i = 1; i < nelts; i++) if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval) return -1; Index: gcc/config/aarch64/aarch64.md =================================================================== --- gcc/config/aarch64/aarch64.md 2018-01-05 11:24:44.647408566 +0000 +++ gcc/config/aarch64/aarch64.md 2018-01-05 11:24:44.868399534 +0000 @@ -3328,7 +3328,7 @@ (define_insn "aarch64_<crc_variant>" CRC))] "TARGET_CRC32" { - if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64) + if (GET_MODE_BITSIZE (<crc_mode>mode) >= 64) return "<crc_variant>\\t%w0, %w1, %x2"; else return "<crc_variant>\\t%w0, %w1, %w2";

PING: [11/nn] [AArch64] Set NUM_POLY_INT_COEFFS to 2

Commit Message

Comments

Patch