[11/nn,AArch64] Set NUM_POLY_INT_COEFFS to 2

Message ID	87o9osafaf.fsf@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of gcc-patches-return-465350-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:references:date:in-reply-to:message-id :mime-version:content-type; q=dns; s=default; b=UsN4N/OuAspEq++Z bD3ybu2Pnld/i0ocU8VMVzz/himqNbnoPOy0aNjSoVea5LCGHzseJKg7yktj2b7N AtbXTVqMkDHoOxf3t9tP4fXUF/Aia1VWsgiSuYZ+9RryBVznV8vB/cFXSCvaGDrx YDs1QFvofCptsTyBAtvk+UXczrg= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org From: Richard Sandiford <richard.sandiford@linaro.org> To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.earnshaw@arm.com, james.greenhalgh@arm.com, marcus.shawcroft@arm.com, richard.sandiford@linaro.org Cc: richard.earnshaw@arm.com, james.greenhalgh@arm.com, marcus.shawcroft@arm.com Subject: [11/nn] [AArch64] Set NUM_POLY_INT_COEFFS to 2 References: <873764d8y3.fsf@linaro.org> Date: Fri, 27 Oct 2017 14:31:04 +0100 In-Reply-To: <873764d8y3.fsf@linaro.org> (Richard Sandiford's message of "Fri, 27 Oct 2017 14:19:48 +0100") Message-ID: <87o9osafaf.fsf@linaro.org> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.2 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain
Series	[11/nn,AArch64] Set NUM_POLY_INT_COEFFS to 2 \| expand [11/nn,AArch64] Set NUM_POLY_INT_COEFFS to 2

Index: gcc/config/aarch64/aarch64-modes.def =================================================================== --- gcc/config/aarch64/aarch64-modes.def 2017-10-27 13:55:34.246963419 +0100 +++ gcc/config/aarch64/aarch64-modes.def 2017-10-27 14:12:17.397395751 +0100 @@ -46,3 +46,7 @@ INT_MODE (XI, 64); /* Quad float: 128-bit floating mode for long doubles. */ FLOAT_MODE (TF, 16, ieee_quad_format); + +/* Coefficient 1 is multiplied by the number of 128-bit chunks in an + SVE vector (referred to as "VQ") minus one. */ +#define NUM_POLY_INT_COEFFS 2 Index: gcc/config/aarch64/aarch64-protos.h =================================================================== --- gcc/config/aarch64/aarch64-protos.h 2017-10-27 14:12:11.042239887 +0100 +++ gcc/config/aarch64/aarch64-protos.h 2017-10-27 14:12:17.398324460 +0100 @@ -333,7 +333,7 @@ enum simd_immediate_check { extern struct tune_params aarch64_tune_params; -HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); +poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned); int aarch64_get_condition_code (rtx); bool aarch64_address_valid_for_prefetch_p (rtx, bool); bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); @@ -366,7 +366,7 @@ bool aarch64_zero_extend_const_eq (machi bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); bool aarch64_mov_operand_p (rtx, machine_mode); rtx aarch64_reverse_mask (machine_mode, unsigned int); -bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT); +bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode); char *aarch64_output_simd_mov_immediate (rtx, unsigned, enum simd_immediate_check w = AARCH64_CHECK_MOV); Index: gcc/config/aarch64/aarch64.h =================================================================== --- gcc/config/aarch64/aarch64.h 2017-10-27 14:12:00.603550436 +0100 +++ gcc/config/aarch64/aarch64.h 2017-10-27 14:12:17.402039296 +0100 @@ -551,7 +551,7 @@ #define LIBCALL_VALUE(MODE) \ #define DEFAULT_PCC_STRUCT_RETURN 0 -#ifdef HOST_WIDE_INT +#ifdef HAVE_POLY_INT_H struct GTY (()) aarch64_frame { HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; @@ -569,20 +569,20 @@ struct GTY (()) aarch64_frame /* Offset from the base of the frame (incomming SP) to the top of the locals area. This value is always a multiple of STACK_BOUNDARY. */ - HOST_WIDE_INT locals_offset; + poly_int64 locals_offset; /* Offset from the base of the frame (incomming SP) to the hard_frame_pointer. This value is always a multiple of STACK_BOUNDARY. */ - HOST_WIDE_INT hard_fp_offset; + poly_int64 hard_fp_offset; /* The size of the frame. This value is the offset from base of the - * frame (incomming SP) to the stack_pointer. This value is always - * a multiple of STACK_BOUNDARY. */ - HOST_WIDE_INT frame_size; + frame (incomming SP) to the stack_pointer. This value is always + a multiple of STACK_BOUNDARY. */ + poly_int64 frame_size; /* The size of the initial stack adjustment before saving callee-saves. */ - HOST_WIDE_INT initial_adjust; + poly_int64 initial_adjust; /* The writeback value when pushing callee-save registers. It is zero when no push is used. */ @@ -590,10 +590,10 @@ struct GTY (()) aarch64_frame /* The offset from SP to the callee-save registers after initial_adjust. It may be non-zero if no push is used (ie. callee_adjust == 0). */ - HOST_WIDE_INT callee_offset; + poly_int64 callee_offset; /* The size of the stack adjustment after saving callee-saves. */ - HOST_WIDE_INT final_adjust; + poly_int64 final_adjust; /* Store FP,LR and setup a frame pointer. */ bool emit_frame_chain; Index: gcc/config/aarch64/aarch64-builtins.c =================================================================== --- gcc/config/aarch64/aarch64-builtins.c 2017-10-27 14:12:00.601693018 +0100 +++ gcc/config/aarch64/aarch64-builtins.c 2017-10-27 14:12:17.397395751 +0100 @@ -1065,9 +1065,9 @@ aarch64_simd_expand_args (rtx target, in gcc_assert (opc > 1); if (CONST_INT_P (op[opc])) { - aarch64_simd_lane_bounds (op[opc], 0, - GET_MODE_NUNITS (builtin_mode), - exp); + unsigned int nunits + = GET_MODE_NUNITS (builtin_mode).to_constant (); + aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); /* Keep to GCC-vector-extension lane indices in the RTL. */ op[opc] = aarch64_endian_lane_rtx (builtin_mode, INTVAL (op[opc])); @@ -1080,8 +1080,9 @@ aarch64_simd_expand_args (rtx target, in if (CONST_INT_P (op[opc])) { machine_mode vmode = insn_data[icode].operand[opc - 1].mode; - aarch64_simd_lane_bounds (op[opc], - 0, GET_MODE_NUNITS (vmode), exp); + unsigned int nunits + = GET_MODE_NUNITS (vmode).to_constant (); + aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); /* Keep to GCC-vector-extension lane indices in the RTL. */ op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); } @@ -1400,16 +1401,17 @@ aarch64_builtin_vectorized_function (uns tree type_in) { machine_mode in_mode, out_mode; - int in_n, out_n; + unsigned HOST_WIDE_INT in_n, out_n; if (TREE_CODE (type_out) != VECTOR_TYPE || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; out_mode = TYPE_MODE (TREE_TYPE (type_out)); - out_n = TYPE_VECTOR_SUBPARTS (type_out); in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); + if (!TYPE_VECTOR_SUBPARTS (type_out).is_constant (&out_n) + || !TYPE_VECTOR_SUBPARTS (type_in).is_constant (&in_n)) + return NULL_TREE; #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 Index: gcc/config/aarch64/aarch64-simd.md =================================================================== --- gcc/config/aarch64/aarch64-simd.md 2017-10-27 14:12:11.043168596 +0100 +++ gcc/config/aarch64/aarch64-simd.md 2017-10-27 14:12:17.398324460 +0100 @@ -31,9 +31,9 @@ (define_expand "mov<mode>" normal str, so the check need not apply. */ if (GET_CODE (operands[0]) == MEM && !(aarch64_simd_imm_zero (operands[1], <MODE>mode) - && ((GET_MODE_SIZE (<MODE>mode) == 16 + && ((must_eq (GET_MODE_SIZE (<MODE>mode), 16) && aarch64_mem_pair_operand (operands[0], DImode)) - || GET_MODE_SIZE (<MODE>mode) == 8))) + || must_eq (GET_MODE_SIZE (<MODE>mode), 8)))) operands[1] = force_reg (<MODE>mode, operands[1]); " ) @@ -5180,9 +5180,7 @@ (define_expand "aarch64_ld<VSTRUCT:nregs set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) * <VSTRUCT:nregs>); - aarch64_simd_lane_bounds (operands[3], 0, - GET_MODE_NUNITS (<VALLDIF:MODE>mode), - NULL); + aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL); emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( operands[0], mem, operands[2], operands[3])); DONE; Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2017-10-27 14:12:14.533257115 +0100 +++ gcc/config/aarch64/aarch64.c 2017-10-27 14:13:59.548121066 +0100 @@ -1112,13 +1112,18 @@ aarch64_array_mode_supported_p (machine_ static unsigned int aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) { + /* ??? Logically we should only need to provide a value when + HARD_REGNO_MODE_OK says that the combination is valid, + but at the moment we need to handle all modes. Just ignore + any runtime parts for registers that can't store them. */ + HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode)); switch (aarch64_regno_regclass (regno)) { case FP_REGS: case FP_LO_REGS: - return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG; + return CEIL (lowest_size, UNITS_PER_VREG); default: - return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + return CEIL (lowest_size, UNITS_PER_WORD); } gcc_unreachable (); } @@ -1161,25 +1166,17 @@ aarch64_hard_regno_mode_ok (unsigned reg static bool aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode) { - return FP_REGNUM_P (regno) && GET_MODE_SIZE (mode) > 8; + return FP_REGNUM_P (regno) && may_gt (GET_MODE_SIZE (mode), 8); } /* Implement HARD_REGNO_CALLER_SAVE_MODE. */ machine_mode -aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs, - machine_mode mode) +aarch64_hard_regno_caller_save_mode (unsigned, unsigned, machine_mode mode) { - /* Handle modes that fit within single registers. */ - if (nregs == 1 && GET_MODE_SIZE (mode) <= 16) - { - if (GET_MODE_SIZE (mode) >= 4) - return mode; - else - return SImode; - } - /* Fall back to generic for multi-reg and very large modes. */ + if (must_ge (GET_MODE_SIZE (mode), 4)) + return mode; else - return choose_hard_reg_mode (regno, nregs, false); + return SImode; } /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so @@ -1292,11 +1289,10 @@ aarch64_tls_get_addr (void) tls_symbolic_operand_type (rtx addr) { enum tls_model tls_kind = TLS_MODEL_NONE; - rtx sym, addend; - if (GET_CODE (addr) == CONST) { - split_const (addr, &sym, &addend); + poly_int64 addend; + rtx sym = strip_offset (addr, &addend); if (GET_CODE (sym) == SYMBOL_REF) tls_kind = SYMBOL_REF_TLS_MODEL (sym); } @@ -2235,8 +2231,12 @@ aarch64_pass_by_reference (cumulative_ar int nregs; /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */ - size = (mode == BLKmode && type) - ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + if (mode == BLKmode && type) + size = int_size_in_bytes (type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (mode).to_constant (); /* Aggregates are passed by reference based on their size. */ if (type && AGGREGATE_TYPE_P (type)) @@ -2333,8 +2333,8 @@ aarch64_function_value (const_tree type, for (i = 0; i < count; i++) { rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i); - tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, - GEN_INT (i * GET_MODE_SIZE (ag_mode))); + rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset); XVECEXP (par, 0, i) = tmp; } return par; @@ -2461,9 +2461,13 @@ aarch64_layout_arg (cumulative_args_t pc pcum->aapcs_arg_processed = true; /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ - size - = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode), - UNITS_PER_WORD); + if (type) + size = int_size_in_bytes (type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (mode).to_constant (); + size = ROUND_UP (size, UNITS_PER_WORD); allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, @@ -2500,9 +2504,9 @@ aarch64_layout_arg (cumulative_args_t pc { rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode, V0_REGNUM + nvrn + i); - tmp = gen_rtx_EXPR_LIST - (VOIDmode, tmp, - GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode))); + rtx offset = gen_int_mode + (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset); XVECEXP (par, 0, i) = tmp; } pcum->aapcs_reg = par; @@ -2727,8 +2731,13 @@ aarch64_pad_reg_upward (machine_mode mod /* Small composite types are always padded upward. */ if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode)) { - HOST_WIDE_INT size = (type ? int_size_in_bytes (type) - : GET_MODE_SIZE (mode)); + HOST_WIDE_INT size; + if (type) + size = int_size_in_bytes (type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (mode).to_constant (); if (size < 2 * UNITS_PER_WORD) return true; } @@ -2757,12 +2766,19 @@ #define ARITH_FACTOR 4096 #define PROBE_STACK_FIRST_REG 9 #define PROBE_STACK_SECOND_REG 10 -/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, +/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE, inclusive. These are offsets from the current stack pointer. */ static void -aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) +aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size) { + HOST_WIDE_INT size; + if (!poly_size.is_constant (&size)) + { + sorry ("stack probes for SVE frames"); + return; + } + rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG); /* See the same assertion on PROBE_INTERVAL above. */ @@ -3055,13 +3071,16 @@ #define SLOT_REQUIRED (-1) = offset + cfun->machine->frame.saved_varargs_size; cfun->machine->frame.hard_fp_offset - = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (), - STACK_BOUNDARY / BITS_PER_UNIT); + = aligned_upper_bound (varargs_and_saved_regs_size + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + /* Both these values are already aligned. */ + gcc_assert (multiple_p (crtl->outgoing_args_size, + STACK_BOUNDARY / BITS_PER_UNIT)); cfun->machine->frame.frame_size - = ROUND_UP (cfun->machine->frame.hard_fp_offset - + crtl->outgoing_args_size, - STACK_BOUNDARY / BITS_PER_UNIT); + = (cfun->machine->frame.hard_fp_offset + + crtl->outgoing_args_size); cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size; @@ -3076,18 +3095,21 @@ #define SLOT_REQUIRED (-1) else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM) max_push_offset = 256; - if (cfun->machine->frame.frame_size < max_push_offset - && crtl->outgoing_args_size == 0) + HOST_WIDE_INT const_size, const_fp_offset; + if (cfun->machine->frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset + && must_eq (crtl->outgoing_args_size, 0)) { /* Simple, small frame with no outgoing arguments: stp reg1, reg2, [sp, -frame_size]! stp reg3, reg4, [sp, 16] */ - cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size; + cfun->machine->frame.callee_adjust = const_size; } - else if ((crtl->outgoing_args_size - + cfun->machine->frame.saved_regs_size < 512) + else if (must_lt (crtl->outgoing_args_size + + cfun->machine->frame.saved_regs_size, 512) && !(cfun->calls_alloca - && cfun->machine->frame.hard_fp_offset < max_push_offset)) + && must_lt (cfun->machine->frame.hard_fp_offset, + max_push_offset))) { /* Frame with small outgoing arguments: sub sp, sp, frame_size @@ -3097,13 +3119,14 @@ #define SLOT_REQUIRED (-1) cfun->machine->frame.callee_offset = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; } - else if (cfun->machine->frame.hard_fp_offset < max_push_offset) + else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset) { /* Frame with large outgoing arguments but a small local area: stp reg1, reg2, [sp, -hard_fp_offset]! stp reg3, reg4, [sp, 16] sub sp, sp, outgoing_args_size */ - cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset; + cfun->machine->frame.callee_adjust = const_fp_offset; cfun->machine->frame.final_adjust = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; } @@ -3316,7 +3339,7 @@ aarch64_return_address_signing_enabled ( skipping any write-back candidates if SKIP_WB is true. */ static void -aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, +aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, unsigned start, unsigned limit, bool skip_wb) { rtx_insn *insn; @@ -3328,7 +3351,7 @@ aarch64_save_callee_saves (machine_mode regno = aarch64_next_callee_save (regno + 1, limit)) { rtx reg, mem; - HOST_WIDE_INT offset; + poly_int64 offset; if (skip_wb && (regno == cfun->machine->frame.wb_candidate1 @@ -3381,13 +3404,13 @@ aarch64_save_callee_saves (machine_mode static void aarch64_restore_callee_saves (machine_mode mode, - HOST_WIDE_INT start_offset, unsigned start, + poly_int64 start_offset, unsigned start, unsigned limit, bool skip_wb, rtx *cfi_ops) { rtx base_rtx = stack_pointer_rtx; unsigned regno; unsigned regno2; - HOST_WIDE_INT offset; + poly_int64 offset; for (regno = aarch64_next_callee_save (start, limit); regno <= limit; @@ -3432,25 +3455,27 @@ aarch64_restore_callee_saves (machine_mo static inline bool offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED, - HOST_WIDE_INT offset) + poly_int64 offset) { - return offset >= -256 && offset < 256; + HOST_WIDE_INT const_offset; + return (offset.is_constant (&const_offset) + && IN_RANGE (const_offset, -256, 255)); } static inline bool -offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset) +offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) { - return (offset >= 0 - && offset < 4096 * GET_MODE_SIZE (mode) - && offset % GET_MODE_SIZE (mode) == 0); + HOST_WIDE_INT multiple; + return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple) + && IN_RANGE (multiple, 0, 4095)); } bool -aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset) +aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset) { - return (offset >= -64 * GET_MODE_SIZE (mode) - && offset < 64 * GET_MODE_SIZE (mode) - && offset % GET_MODE_SIZE (mode) == 0); + HOST_WIDE_INT multiple; + return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple) + && IN_RANGE (multiple, -64, 63)); } /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ @@ -3467,7 +3492,7 @@ aarch64_get_separate_components (void) for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++) if (aarch64_register_saved_on_entry (regno)) { - HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno]; + poly_int64 offset = cfun->machine->frame.reg_offset[regno]; if (!frame_pointer_needed) offset += cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; @@ -3571,7 +3596,7 @@ aarch64_process_components (sbitmap comp so DFmode for the vector registers is enough. */ machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode; rtx reg = gen_rtx_REG (mode, regno); - HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno]; + poly_int64 offset = cfun->machine->frame.reg_offset[regno]; if (!frame_pointer_needed) offset += cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; @@ -3593,13 +3618,13 @@ aarch64_process_components (sbitmap comp break; } - HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2]; + poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2]; /* The next register is not of the same class or its offset is not mergeable with the current one into a pair. */ if (!satisfies_constraint_Ump (mem) || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) - || (offset2 - cfun->machine->frame.reg_offset[regno]) - != GET_MODE_SIZE (mode)) + || may_ne ((offset2 - cfun->machine->frame.reg_offset[regno]), + GET_MODE_SIZE (mode))) { insn = emit_insn (set); RTX_FRAME_RELATED_P (insn) = 1; @@ -3669,11 +3694,19 @@ aarch64_set_handled_components (sbitmap cfun->machine->reg_is_wrapped_separately[regno] = true; } -/* Allocate SIZE bytes of stack space using TEMP1 as a scratch register. */ +/* Allocate POLY_SIZE bytes of stack space using TEMP1 as a scratch + register. */ static void -aarch64_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size) +aarch64_allocate_and_probe_stack_space (rtx temp1, poly_int64 poly_size) { + HOST_WIDE_INT size; + if (!poly_size.is_constant (&size)) + { + sorry ("stack probes for SVE frames"); + return; + } + HOST_WIDE_INT probe_interval = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); HOST_WIDE_INT guard_size @@ -3833,11 +3866,11 @@ aarch64_expand_prologue (void) { aarch64_layout_frame (); - HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size; - HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust; + poly_int64 frame_size = cfun->machine->frame.frame_size; + poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; - HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust; - HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; + poly_int64 final_adjust = cfun->machine->frame.final_adjust; + poly_int64 callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; @@ -3852,19 +3885,19 @@ aarch64_expand_prologue (void) } if (flag_stack_usage_info) - current_function_static_stack_size = frame_size; + current_function_static_stack_size = constant_lower_bound (frame_size); if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) { if (crtl->is_leaf && !cfun->calls_alloca) { - if (frame_size > PROBE_INTERVAL - && frame_size > get_stack_check_protect ()) + if (may_gt (frame_size, PROBE_INTERVAL) + && may_gt (frame_size, get_stack_check_protect ())) aarch64_emit_probe_stack_range (get_stack_check_protect (), (frame_size - get_stack_check_protect ())); } - else if (frame_size > 0) + else if (may_gt (frame_size, 0)) aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size); } @@ -3899,23 +3932,23 @@ aarch64_expand_prologue (void) HOST_WIDE_INT guard_used_by_caller = 1024; if (flag_stack_clash_protection) { - if (frame_size == 0) + if (must_eq (frame_size, 0)) dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); - else if (initial_adjust < guard_size - guard_used_by_caller - && final_adjust < guard_size - guard_used_by_caller) + else if (must_lt (initial_adjust, guard_size - guard_used_by_caller) + && must_lt (final_adjust, guard_size - guard_used_by_caller)) dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); } /* In theory we should never have both an initial adjustment and a callee save adjustment. Verify that is the case since the code below does not handle it for -fstack-clash-protection. */ - gcc_assert (initial_adjust == 0 || callee_adjust == 0); + gcc_assert (must_eq (initial_adjust, 0) || callee_adjust == 0); /* Only probe if the initial adjustment is larger than the guard less the amount of the guard reserved for use by the caller's outgoing args. */ if (flag_stack_clash_protection - && initial_adjust >= guard_size - guard_used_by_caller) + && may_ge (initial_adjust, guard_size - guard_used_by_caller)) aarch64_allocate_and_probe_stack_space (ip0_rtx, initial_adjust); else aarch64_sub_sp (ip0_rtx, initial_adjust, true); @@ -3940,19 +3973,19 @@ aarch64_expand_prologue (void) callee_adjust != 0 || emit_frame_chain); /* We may need to probe the final adjustment as well. */ - if (flag_stack_clash_protection && final_adjust != 0) + if (flag_stack_clash_protection && may_ne (final_adjust, 0)) { /* First probe if the final adjustment is larger than the guard size less the amount of the guard reserved for use by the caller's outgoing args. */ - if (final_adjust >= guard_size - guard_used_by_caller) + if (may_ge (final_adjust, guard_size - guard_used_by_caller)) aarch64_allocate_and_probe_stack_space (ip1_rtx, final_adjust); else aarch64_sub_sp (ip1_rtx, final_adjust, !frame_pointer_needed); /* We must also probe if the final adjustment is larger than the guard that is assumed used by the caller. This may be sub-optimal. */ - if (final_adjust >= guard_used_by_caller) + if (may_ge (final_adjust, guard_used_by_caller)) { if (dump_file) fprintf (dump_file, @@ -3981,7 +4014,7 @@ aarch64_use_return_insn_p (void) aarch64_layout_frame (); - return cfun->machine->frame.frame_size == 0; + return must_eq (cfun->machine->frame.frame_size, 0); } /* Generate the epilogue instructions for returning from a function. @@ -3994,21 +4027,22 @@ aarch64_expand_epilogue (bool for_sibcal { aarch64_layout_frame (); - HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust; + poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; - HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust; - HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; + poly_int64 final_adjust = cfun->machine->frame.final_adjust; + poly_int64 callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; rtx cfi_ops = NULL; rtx_insn *insn; /* We need to add memory barrier to prevent read from deallocated stack. */ - bool need_barrier_p = (get_frame_size () - + cfun->machine->frame.saved_varargs_size) != 0; + bool need_barrier_p = may_ne (get_frame_size () + + cfun->machine->frame.saved_varargs_size, 0); /* Emit a barrier to prevent loads from a deallocated stack. */ - if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca + if (may_gt (final_adjust, crtl->outgoing_args_size) + || cfun->calls_alloca || crtl->calls_eh_return) { emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); @@ -4019,7 +4053,7 @@ aarch64_expand_epilogue (bool for_sibcal be the same as the stack pointer. */ rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM); rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM); - if (frame_pointer_needed && (final_adjust || cfun->calls_alloca)) + if (frame_pointer_needed && (may_ne (final_adjust, 0) || cfun->calls_alloca)) /* If writeback is used when restoring callee-saves, the CFA is restored on the instruction doing the writeback. */ aarch64_add_offset (Pmode, stack_pointer_rtx, @@ -4043,7 +4077,7 @@ aarch64_expand_epilogue (bool for_sibcal if (callee_adjust != 0) aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops); - if (callee_adjust != 0 || initial_adjust > 65536) + if (callee_adjust != 0 || may_gt (initial_adjust, 65536)) { /* Emit delayed restores and set the CFA to be SP + initial_adjust. */ insn = get_last_insn (); @@ -4644,9 +4678,9 @@ aarch64_classify_index (struct aarch64_a && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))]) index = SUBREG_REG (index); - if ((shift == 0 || - (shift > 0 && shift <= 3 - && (1 << shift) == GET_MODE_SIZE (mode))) + if ((shift == 0 + || (shift > 0 && shift <= 3 + && must_eq (1 << shift, GET_MODE_SIZE (mode)))) && REG_P (index) && aarch64_regno_ok_for_index_p (REGNO (index), strict_p)) { @@ -4668,7 +4702,7 @@ aarch64_mode_valid_for_sched_fusion_p (m return mode == SImode || mode == DImode || mode == SFmode || mode == DFmode || (aarch64_vector_mode_supported_p (mode) - && GET_MODE_SIZE (mode) == 8); + && must_eq (GET_MODE_SIZE (mode), 8)); } /* Return true if REGNO is a virtual pointer register, or an eliminable @@ -4694,6 +4728,7 @@ aarch64_classify_address (struct aarch64 { enum rtx_code code = GET_CODE (x); rtx op0, op1; + HOST_WIDE_INT const_size; /* On BE, we use load/store pair for all large int mode load/stores. TI/TFmode may also use a load/store pair. */ @@ -4703,10 +4738,10 @@ aarch64_classify_address (struct aarch64 || (BYTES_BIG_ENDIAN && aarch64_vect_struct_mode_p (mode))); - bool allow_reg_index_p = - !load_store_pair_p - && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode)) - && !aarch64_vect_struct_mode_p (mode); + bool allow_reg_index_p = (!load_store_pair_p + && (may_ne (GET_MODE_SIZE (mode), 16) + || aarch64_vector_mode_supported_p (mode)) + && !aarch64_vect_struct_mode_p (mode)); /* On LE, for AdvSIMD, don't support anything other than POST_INC or REG addressing. */ @@ -4739,7 +4774,7 @@ aarch64_classify_address (struct aarch64 return true; } - if (GET_MODE_SIZE (mode) != 0 + if (may_ne (GET_MODE_SIZE (mode), 0) && CONST_INT_P (op1) && aarch64_base_register_rtx_p (op0, strict_p)) { @@ -4786,7 +4821,8 @@ aarch64_classify_address (struct aarch64 offset + 32)); if (load_store_pair_p) - return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + return ((must_eq (GET_MODE_SIZE (mode), 4) + || must_eq (GET_MODE_SIZE (mode), 8)) && aarch64_offset_7bit_signed_scaled_p (mode, offset)); else return (offset_9bit_signed_unscaled_p (mode, offset) @@ -4846,7 +4882,8 @@ aarch64_classify_address (struct aarch64 && offset_9bit_signed_unscaled_p (mode, offset)); if (load_store_pair_p) - return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + return ((must_eq (GET_MODE_SIZE (mode), 4) + || must_eq (GET_MODE_SIZE (mode), 8)) && aarch64_offset_7bit_signed_scaled_p (mode, offset)); else return offset_9bit_signed_unscaled_p (mode, offset); @@ -4860,7 +4897,9 @@ aarch64_classify_address (struct aarch64 for SI mode or larger. */ info->type = ADDRESS_SYMBOLIC; - if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4) + if (!load_store_pair_p + && GET_MODE_SIZE (mode).is_constant (&const_size) + && const_size >= 4) { rtx sym, addend; @@ -4886,7 +4925,6 @@ aarch64_classify_address (struct aarch64 { /* The symbol and offset must be aligned to the access size. */ unsigned int align; - unsigned int ref_size; if (CONSTANT_POOL_ADDRESS_P (sym)) align = GET_MODE_ALIGNMENT (get_pool_mode (sym)); @@ -4904,12 +4942,12 @@ aarch64_classify_address (struct aarch64 else align = BITS_PER_UNIT; - ref_size = GET_MODE_SIZE (mode); - if (ref_size == 0) + poly_int64 ref_size = GET_MODE_SIZE (mode); + if (must_eq (ref_size, 0)) ref_size = GET_MODE_SIZE (DImode); - return ((INTVAL (offs) & (ref_size - 1)) == 0 - && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0); + return (multiple_p (INTVAL (offs), ref_size) + && multiple_p (align / BITS_PER_UNIT, ref_size)); } } return false; @@ -4987,19 +5025,24 @@ aarch64_legitimate_address_p (machine_mo static bool aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) { - HOST_WIDE_INT offset = INTVAL (*disp); - HOST_WIDE_INT base; + HOST_WIDE_INT size; + if (GET_MODE_SIZE (mode).is_constant (&size)) + { + HOST_WIDE_INT offset = INTVAL (*disp); + HOST_WIDE_INT base; - if (mode == TImode || mode == TFmode) - base = (offset + 0x100) & ~0x1f8; - else if ((offset & (GET_MODE_SIZE (mode) - 1)) != 0) - base = (offset + 0x100) & ~0x1ff; - else - base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc); + if (mode == TImode || mode == TFmode) + base = (offset + 0x100) & ~0x1f8; + else if ((offset & (size - 1)) != 0) + base = (offset + 0x100) & ~0x1ff; + else + base = offset & ~(size < 4 ? 0xfff : 0x3ffc); - *off = GEN_INT (base); - *disp = GEN_INT (offset - base); - return true; + *off = GEN_INT (base); + *disp = GEN_INT (offset - base); + return true; + } + return false; } /* Return the binary representation of floating point constant VALUE in INTVAL. @@ -5850,6 +5893,7 @@ #define buf_size 20 aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x) { struct aarch64_address_info addr; + unsigned int size; if (aarch64_classify_address (&addr, x, mode, true)) switch (addr.type) @@ -5890,30 +5934,28 @@ aarch64_print_operand_address (FILE *f, return; case ADDRESS_REG_WB: + /* Writeback is only supported for fixed-width modes. */ + size = GET_MODE_SIZE (mode).to_constant (); switch (GET_CODE (x)) { case PRE_INC: - asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s, %d]!", reg_names[REGNO (addr.base)], size); return; case POST_INC: - asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s], %d", reg_names[REGNO (addr.base)], size); return; case PRE_DEC: - asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s, -%d]!", reg_names[REGNO (addr.base)], size); return; case POST_DEC: - asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s], -%d", reg_names[REGNO (addr.base)], size); return; case PRE_MODIFY: - asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)], INTVAL (addr.offset)); return; case POST_MODIFY: - asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)], INTVAL (addr.offset)); return; default: @@ -5988,6 +6030,39 @@ aarch64_regno_regclass (unsigned regno) return NO_REGS; } +/* OFFSET is an address offset for mode MODE, which has SIZE bytes. + If OFFSET is out of range, return an offset of an anchor point + that is in range. Return 0 otherwise. */ + +static HOST_WIDE_INT +aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size, + machine_mode mode) +{ + /* Does it look like we'll need a 16-byte load/store-pair operation? */ + if (size > 16) + return (offset + 0x400) & ~0x7f0; + + /* For offsets that aren't a multiple of the access size, the limit is + -256...255. */ + if (offset & (size - 1)) + { + /* BLKmode typically uses LDP of X-registers. */ + if (mode == BLKmode) + return (offset + 512) & ~0x3ff; + return (offset + 0x100) & ~0x1ff; + } + + /* Small negative offsets are supported. */ + if (IN_RANGE (offset, -256, 0)) + return 0; + + if (mode == TImode || mode == TFmode) + return (offset + 0x100) & ~0x1ff; + + /* Use 12-bit offset by access size. */ + return offset & (~0xfff * size); +} + static rtx aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) { @@ -6037,34 +6112,17 @@ aarch64_legitimize_address (rtx x, rtx / x = gen_rtx_PLUS (Pmode, base, offset_rtx); } - /* Does it look like we'll need a 16-byte load/store-pair operation? */ - HOST_WIDE_INT base_offset; - if (GET_MODE_SIZE (mode) > 16) - base_offset = (offset + 0x400) & ~0x7f0; - /* For offsets aren't a multiple of the access size, the limit is - -256...255. */ - else if (offset & (GET_MODE_SIZE (mode) - 1)) - { - base_offset = (offset + 0x100) & ~0x1ff; - - /* BLKmode typically uses LDP of X-registers. */ - if (mode == BLKmode) - base_offset = (offset + 512) & ~0x3ff; - } - /* Small negative offsets are supported. */ - else if (IN_RANGE (offset, -256, 0)) - base_offset = 0; - else if (mode == TImode || mode == TFmode) - base_offset = (offset + 0x100) & ~0x1ff; - /* Use 12-bit offset by access size. */ - else - base_offset = offset & (~0xfff * GET_MODE_SIZE (mode)); - - if (base_offset != 0) + HOST_WIDE_INT size; + if (GET_MODE_SIZE (mode).is_constant (&size)) { - base = plus_constant (Pmode, base, base_offset); - base = force_operand (base, NULL_RTX); - return plus_constant (Pmode, base, offset - base_offset); + HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size, + mode); + if (base_offset != 0) + { + base = plus_constant (Pmode, base, base_offset); + base = force_operand (base, NULL_RTX); + return plus_constant (Pmode, base, offset - base_offset); + } } } @@ -6151,7 +6209,7 @@ aarch64_secondary_reload (bool in_p ATTR because AArch64 has richer addressing modes for LDR/STR instructions than LDP/STP instructions. */ if (TARGET_FLOAT && rclass == GENERAL_REGS - && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) + && must_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x)) return FP_REGS; if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) @@ -6195,7 +6253,7 @@ aarch64_can_eliminate (const int from, c return true; } -HOST_WIDE_INT +poly_int64 aarch64_initial_elimination_offset (unsigned from, unsigned to) { aarch64_layout_frame (); @@ -6281,6 +6339,11 @@ aarch64_trampoline_init (rtx m_tramp, tr static unsigned char aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) { + /* ??? Logically we should only need to provide a value when + HARD_REGNO_MODE_OK says that at least one register in REGCLASS + can hold MODE, but at the moment we need to handle all modes. + Just ignore any runtime parts for registers that can't store them. */ + HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode)); switch (regclass) { case CALLER_SAVE_REGS: @@ -6290,10 +6353,9 @@ aarch64_class_max_nregs (reg_class_t reg case POINTER_AND_FP_REGS: case FP_REGS: case FP_LO_REGS: - return - aarch64_vector_mode_p (mode) - ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG - : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + return (aarch64_vector_mode_p (mode) + ? CEIL (lowest_size, UNITS_PER_VREG) + : CEIL (lowest_size, UNITS_PER_WORD)); case STACK_REG: return 1; @@ -6844,25 +6906,15 @@ aarch64_address_cost (rtx x, { /* For the sake of calculating the cost of the shifted register component, we can treat same sized modes in the same way. */ - switch (GET_MODE_BITSIZE (mode)) - { - case 16: - cost += addr_cost->addr_scale_costs.hi; - break; - - case 32: - cost += addr_cost->addr_scale_costs.si; - break; - - case 64: - cost += addr_cost->addr_scale_costs.di; - break; - - /* We can't tell, or this is a 128-bit vector. */ - default: - cost += addr_cost->addr_scale_costs.ti; - break; - } + if (must_eq (GET_MODE_BITSIZE (mode), 16)) + cost += addr_cost->addr_scale_costs.hi; + else if (must_eq (GET_MODE_BITSIZE (mode), 32)) + cost += addr_cost->addr_scale_costs.si; + else if (must_eq (GET_MODE_BITSIZE (mode), 64)) + cost += addr_cost->addr_scale_costs.di; + else + /* We can't tell, or this is a 128-bit vector. */ + cost += addr_cost->addr_scale_costs.ti; } return cost; @@ -7991,7 +8043,8 @@ aarch64_rtx_costs (rtx x, machine_mode m if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) && CONST_INT_P (XEXP (op1, 1)) - && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) + && must_eq (INTVAL (XEXP (op1, 1)), + GET_MODE_BITSIZE (mode) - 1)) { *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); /* We already demanded XEXP (op1, 0) to be REG_P, so @@ -8039,7 +8092,8 @@ aarch64_rtx_costs (rtx x, machine_mode m if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) && CONST_INT_P (XEXP (op1, 1)) - && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) + && must_eq (INTVAL (XEXP (op1, 1)), + GET_MODE_BITSIZE (mode) - 1)) { *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); /* We already demanded XEXP (op1, 0) to be REG_P, so @@ -8465,7 +8519,7 @@ aarch64_register_move_cost (machine_mode return aarch64_register_move_cost (mode, from, GENERAL_REGS) + aarch64_register_move_cost (mode, GENERAL_REGS, to); - if (GET_MODE_SIZE (mode) == 16) + if (must_eq (GET_MODE_SIZE (mode), 16)) { /* 128-bit operations on general registers require 2 instructions. */ if (from == GENERAL_REGS && to == GENERAL_REGS) @@ -8838,7 +8892,7 @@ aarch64_builtin_vectorization_cost (enum return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost; case vec_construct: - elements = TYPE_VECTOR_SUBPARTS (vectype); + elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); return elements / 2 + 1; default: @@ -10900,6 +10954,10 @@ aarch64_gimplify_va_arg_expr (tree valis &nregs, &is_ha)) { + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant (); + /* TYPE passed in fp/simd registers. */ if (!TARGET_FLOAT) aarch64_err_no_fpadvsimd (mode, "varargs"); @@ -10913,8 +10971,8 @@ aarch64_gimplify_va_arg_expr (tree valis if (is_ha) { - if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG) - adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode); + if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG) + adjust = UNITS_PER_VREG - ag_size; } else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD && size < UNITS_PER_VREG) @@ -11302,8 +11360,8 @@ aapcs_vfp_sub_candidate (const_tree type - tree_to_uhwi (TYPE_MIN_VALUE (index))); /* There must be no padding. */ - if (wi::to_wide (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep)) + if (may_ne (wi::to_poly_wide (TYPE_SIZE (type)), + count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11333,8 +11391,8 @@ aapcs_vfp_sub_candidate (const_tree type } /* There must be no padding. */ - if (wi::to_wide (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep)) + if (may_ne (wi::to_poly_wide (TYPE_SIZE (type)), + count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11366,8 +11424,8 @@ aapcs_vfp_sub_candidate (const_tree type } /* There must be no padding. */ - if (wi::to_wide (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep)) + if (may_ne (wi::to_poly_wide (TYPE_SIZE (type)), + count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11389,7 +11447,7 @@ aapcs_vfp_sub_candidate (const_tree type aarch64_short_vector_p (const_tree type, machine_mode mode) { - HOST_WIDE_INT size = -1; + poly_int64 size = -1; if (type && TREE_CODE (type) == VECTOR_TYPE) size = int_size_in_bytes (type); @@ -11397,7 +11455,7 @@ aarch64_short_vector_p (const_tree type, || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) size = GET_MODE_SIZE (mode); - return (size == 8 || size == 16); + return must_eq (size, 8) || must_eq (size, 16); } /* Return TRUE if the type, as described by TYPE and MODE, is a composite @@ -12039,11 +12097,11 @@ aarch64_simd_vect_par_cnst_half (machine aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high) { - if (!VECTOR_MODE_P (mode)) + int nelts; + if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts)) return false; - rtx ideal = aarch64_simd_vect_par_cnst_half (mode, GET_MODE_NUNITS (mode), - high); + rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high); HOST_WIDE_INT count_op = XVECLEN (op, 0); HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0); int i = 0; @@ -12128,7 +12186,8 @@ aarch64_simd_emit_reg_reg_move (rtx *ope int aarch64_simd_attr_length_rglist (machine_mode mode) { - return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4; + /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */ + return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4; } /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum @@ -12208,7 +12267,6 @@ aarch64_simd_make_constant (rtx vals) machine_mode mode = GET_MODE (vals); rtx const_dup; rtx const_vec = NULL_RTX; - int n_elts = GET_MODE_NUNITS (mode); int n_const = 0; int i; @@ -12219,6 +12277,7 @@ aarch64_simd_make_constant (rtx vals) /* A CONST_VECTOR must contain only CONST_INTs and CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). Only store valid constants in a CONST_VECTOR. */ + int n_elts = XVECLEN (vals, 0); for (i = 0; i < n_elts; ++i) { rtx x = XVECEXP (vals, 0, i); @@ -12257,7 +12316,7 @@ aarch64_expand_vector_init (rtx target, machine_mode mode = GET_MODE (target); scalar_mode inner_mode = GET_MODE_INNER (mode); /* The number of vector elements. */ - int n_elts = GET_MODE_NUNITS (mode); + int n_elts = XVECLEN (vals, 0); /* The number of vector elements which are not constant. */ int n_var = 0; rtx any_const = NULL_RTX; @@ -12397,7 +12456,9 @@ aarch64_shift_truncation_mask (machine_m return (!SHIFT_COUNT_TRUNCATED || aarch64_vector_mode_supported_p (mode) - || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1); + || aarch64_vect_struct_mode_p (mode)) + ? 0 + : (GET_MODE_UNIT_BITSIZE (mode) - 1); } /* Select a format to encode pointers in exception handling data. */ @@ -13798,15 +13859,13 @@ aarch64_evpc_tbl (struct expand_vec_perm return false; for (i = 0; i < nelt; ++i) - { - int nunits = GET_MODE_NUNITS (vmode); + /* If big-endian and two vectors we end up with a weird mixed-endian + mode on NEON. Reverse the index within each word but not the word + itself. */ + rperm[i] = GEN_INT (BYTES_BIG_ENDIAN + ? d->perm[i] ^ (nelt - 1) + : d->perm[i]); - /* If big-endian and two vectors we end up with a weird mixed-endian - mode on NEON. Reverse the index within each word but not the word - itself. */ - rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1) - : d->perm[i]); - } sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); sel = force_reg (vmode, sel); @@ -14003,7 +14062,7 @@ aarch64_modes_tieable_p (machine_mode mo AMOUNT bytes. */ static rtx -aarch64_move_pointer (rtx pointer, int amount) +aarch64_move_pointer (rtx pointer, poly_int64 amount) { rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount); @@ -14017,9 +14076,7 @@ aarch64_move_pointer (rtx pointer, int a static rtx aarch64_progress_pointer (rtx pointer) { - HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer)); - - return aarch64_move_pointer (pointer, amount); + return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer))); } /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by @@ -14846,7 +14903,9 @@ aarch64_operands_ok_for_ldpstp (rtx *ope offval_1 = INTVAL (offset_1); offval_2 = INTVAL (offset_2); - msize = GET_MODE_SIZE (mode); + /* We should only be trying this for fixed-sized modes. There is no + SVE LDP/STP instruction. */ + msize = GET_MODE_SIZE (mode).to_constant (); /* Check if the offsets are consecutive. */ if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize)) return false; Index: gcc/config/aarch64/aarch64.md =================================================================== --- gcc/config/aarch64/aarch64.md 2017-10-27 14:11:54.071011147 +0100 +++ gcc/config/aarch64/aarch64.md 2017-10-27 14:12:17.402039296 +0100 @@ -3328,7 +3328,7 @@ (define_insn "aarch64_<crc_variant>" CRC))] "TARGET_CRC32" { - if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64) + if (GET_MODE_BITSIZE (<crc_mode>mode) >= 64) return "<crc_variant>\\t%w0, %w1, %x2"; else return "<crc_variant>\\t%w0, %w1, %w2";

[11/nn,AArch64] Set NUM_POLY_INT_COEFFS to 2

Commit Message

Patch