diff mbox

RFR: Optimise store of 0 byte into card table

Message ID 1409323637.3470.15.camel@localhost.localdomain
State New
Headers show

Commit Message

Edward Nevill Aug. 29, 2014, 2:47 p.m. UTC
Hi,

The following patch optimises the storing of 0 bytes into the card table.

Firstly the existing code was generating

  mov wS, zr
  stlrb wS, [xN]

because it didn't have a rule for storing 0.

This has been optimised to

  stlrb zr, [xN]

Note: I have only done this optimisation for bytes, should I also do it for 16, 32 & 64 bit values? How often do these actually occur?

Secondly, if the byte in memory is already 0 it skips the store. Since in the vast majority of cases the byte is in fact 0 because the card is already dirty this avoids doing unnecessary STRLB instructions.

So it generates

  ldr rScratch, [xN]
  cbz rScratch, skip
  stlrb zr, [xN]
skip:

This, in combination with the previous patch generates significant performance improvements on programs that do extensive stores of non volatile oops.

OK to push?
Ed.

Patch also available at http://people.linaro.org/~edward.nevill/patches/memorder.patch in case there is any problem with the formatting below.

--- CUT HERE ---
# HG changeset patch
# User Edward Nevill edward.nevill@linaro.org
# Date 1409322430 -3600
#      Fri Aug 29 15:27:10 2014 +0100
# Node ID 953a1b5e5b1726470045bfa0dbe1b2bff799b906
# Parent  4aa306297dafb02943645761f2477d0d95c4a157
Optimise store of 0 byte into card table
diff mbox

Patch

diff -r 4aa306297daf -r 953a1b5e5b17 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Fri Aug 29 11:12:45 2014 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad	Fri Aug 29 15:27:10 2014 +0100
@@ -2160,6 +2160,18 @@ 
 		 rscratch1, stlrb);
   %}
 
+  // Special case of storing 0 to volatile for storing into card table
+  enc_class aarch64_enc_stlrb0(memory mem) %{
+    Label skip;
+    {
+      MacroAssembler _masm(&cbuf);
+      __ ldrb(rscratch1, as_Register($mem$$base));
+      __ cbz(rscratch1, skip);
+    }
+    MOV_VOLATILE(zr, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, rscratch1, stlrb);
+    __ bind(skip);
+  %}
+
   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
 		 rscratch1, stlrh);
@@ -5909,6 +5921,19 @@ 
   ins_pipe(pipe_class_memory);
 %}
 
+// Special rule for store of 0 byte to volatile for card table
+instruct storeB_volatile_imm0(immI0 zero, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreB mem zero));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrb  zr, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_stlrb0(mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
 // Store Char/Short
 instruct storeC_volatile(iRegI src, /* sync_memory*/indirect mem)
 %{
diff -r 4aa306297daf -r 953a1b5e5b17 src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Fri Aug 29 11:12:45 2014 +0100
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Fri Aug 29 15:27:10 2014 +0100
@@ -1081,7 +1081,7 @@ 
     Register Rn, enum operand_size sz, int op, int o0) {
     starti;
     f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
-    rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
+    rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), zrf(Rt1, 0);
   }
 
 #define INSN4(NAME, sz, op, o0) /* Four registers */			\
--- CUT HERE ---