diff mbox

RFR: Optimised multiplyExact patch

Message ID 1404747253.4274.21.camel@localhost.localdomain
State New
Headers show

Commit Message

Edward Nevill July 7, 2014, 3:34 p.m. UTC
Hi,

The following patch optimises multiplyExact to generate the following code

Integer case:-

  0x0000007f751404e0: smull     x8, w19, w11
  0x0000007f751404e4: cmp       x8, w8, sxtw
  0x0000007f751404e8: b.ne      0x0000007f75140530  ;*invokestatic multiplyExact

Long case:-

  0x0000007f811404e0: mul       x8, x19, x10
  0x0000007f811404e4: smulh     x9, x19, x10
  0x0000007f811404e8: cmp       x9, x8, asr #31
  0x0000007f811404ec: b.ne      0x0000007f81140534  ;*invokestatic multiplyExact

The patch has additional rules to convert the bvs after the multiply exact into a bne and therefor no longer needs the ugly code to generate the V flag from the Z flag.

OK?
Ed.

--- CUT HERE ---
# HG changeset patch
# User Edward Nevill edward.nevill@linaro.org
# Date 1404746752 -3600
#      Mon Jul 07 16:25:52 2014 +0100
# Node ID 76a6867e8c34fb6ac892db0a0d2ea76aaf0c3415
# Parent  aafb8a6d2b38862426dda0d3eb8061d7a1291fe0
Add support for multiplyExact

Comments

Andrew Haley July 7, 2014, 3:56 p.m. UTC | #1
On 07/07/2014 04:34 PM, Edward Nevill wrote:
> Hi,
> 
> The following patch optimises multiplyExact to generate the following code
> 
> Integer case:-
> 
>   0x0000007f751404e0: smull     x8, w19, w11
>   0x0000007f751404e4: cmp       x8, w8, sxtw
>   0x0000007f751404e8: b.ne      0x0000007f75140530  ;*invokestatic multiplyExact
> 
> Long case:-
> 
>   0x0000007f811404e0: mul       x8, x19, x10
>   0x0000007f811404e4: smulh     x9, x19, x10
>   0x0000007f811404e8: cmp       x9, x8, asr #31
>   0x0000007f811404ec: b.ne      0x0000007f81140534  ;*invokestatic multiplyExact
> 
> The patch has additional rules to convert the bvs after the multiply exact into a bne and therefor no longer needs the ugly code to generate the V flag from the Z flag.
> 
> OK?
> Ed.
> 

Great, thanks.

Andrew.
diff mbox

Patch

diff -r aafb8a6d2b38 -r 76a6867e8c34 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Mon Jul 07 16:24:51 2014 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad	Mon Jul 07 16:25:52 2014 +0100
@@ -10602,6 +10602,96 @@ 
   ins_pipe(pipe_class_default);
 %}
 
+instruct overflowMulI_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+
+  format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
+            "cmp   rscratch1, rscratch1, sxtw\n\t"
+            "movw  rscratch1, #0x80000000\n\t"
+            "cselw rscratch1, rscratch1, zr, NE\n\t"
+            "cmpw  rscratch1, #1" %}
+  ins_cost(5 * INSN_COST);
+  ins_encode %{
+    __ smull(rscratch1, $op1$$Register, $op2$$Register);
+    __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
+    __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
+    __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
+    __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulI_reg_branch(cmpOp cmp, iRegI op1, iRegI op2, label labl, rFlagsReg cr)
+%{
+  match(If cmp (OverflowMulI op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
+	    || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
+  effect(USE labl, KILL cr);
+
+  format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
+            "cmp   rscratch1, rscratch1, sxtw\n\t"
+            "b$cmp   $labl" %}
+  ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ smull(rscratch1, $op1$$Register, $op2$$Register);
+    __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
+    __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
+%{
+  match(Set cr (OverflowMulL op1 op2));
+
+  format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
+            "smulh rscratch2, $op1, $op2\n\t"
+            "cmp   rscratch2, rscratch1, ASR #31\n\t"
+            "movw  rscratch1, #0x80000000\n\t"
+            "cselw rscratch1, rscratch1, zr, NE\n\t"
+            "cmpw  rscratch1, #1" %}
+  ins_cost(6 * INSN_COST);
+  ins_encode %{
+    __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
+    __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
+    __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
+    __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
+    __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
+    __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
+%{
+  match(If cmp (OverflowMulL op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
+	    || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
+  effect(USE labl, KILL cr);
+
+  format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
+            "smulh rscratch2, $op1, $op2\n\t"
+            "cmp   rscratch2, rscratch1, ASR #31\n\t"
+            "b$cmp $labl" %}
+  ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
+    __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
+    __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
+    __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
 // ============================================================================
 // Compare Instructions
 
--- CUT HERE ---