RFR: Re: [aarch64-port-dev ] Error in server compiler when packing/unpacking data from arrays using shift and mask ops.

Message ID 1386248464.8800.20.camel@localhost.localdomain
State New
Headers show

Commit Message

Edward Nevill Dec. 5, 2013, 1:01 p.m.
On Wed, 2013-12-04 at 16:07 -0500, Andy Johnson wrote:
> The jtreg hotspot/compiler test TestCharVect.java contains the following
> code snippet:
>       long l0 = (long)a1[i*4+0];
>       long l1 = (long)a1[i*4+1];
>       long l2 = (long)a1[i*4+2];
>       long l3 = (long)a1[i*4+3];
>       p4[i] = (l0 & 0xFFFFl) |
>              ((l1 & 0xFFFFl) << 16) |
>              ((l2 & 0xFFFFl) << 32) |
>              ((l3 & 0xFFFFl) << 48);

Much code elided.

> 
>   0x00007fcac91a7d28: sbfx    x15, x15, #16, #16    <<<<<<<<<<<
>   0x00007fcac91a7d2c: sbfiz    x14, x14, #32, #32
>   0x00007fcac91a7d30: sbfiz    x16, x17, #16, #32
>   0x00007fcac91a7d34: sxtw    x11, w11
>   0x00007fcac91a7d38: orr    x11, x11, x16
>   0x00007fcac91a7d3c: orr    x11, x11, x14
>   0x00007fcac91a7d40: sbfiz    x14, x5, #32, #32
>   0x00007fcac91a7d44: sbfiz    x16, x1, #16, #32
>   0x00007fcac91a7d48: sxtw    x17, w3
>   0x00007fcac91a7d4c: orr    x16, x17, x16
>   0x00007fcac91a7d50: orr    x14, x16, x14
>   0x00007fcac91a7d54: orr    x14, x14, x15
>   0x00007fcac91a7d58: add    xscratch1, x13, #0x10
>   0x00007fcac91a7d5c: str    x14, [xscratch1,w0,sxtw #3]
>   0x00007fcac91a7d60: sbfx    x14, x18, #16, #16   <<<<<<<<<<<<<

I believe the lines marked "<<<<<<<<<" are the source of the problem. What they are trying to do is shift left by 48, what they in fact do is a bitfield extract of bit 16..31.

This is due to the baroque encoding of the sbfiz and sbfx instructions.

I believe the following patch will fix the problem.

Ok to push?
Ed.

--- CUT HERE ---
exporting patch:
# HG changeset patch
# User Edward Nevill edward.nevill@linaro.org
# Date 1386246975 0
#      Thu Dec 05 12:36:15 2013 +0000
# Node ID 9a4f9705f626b50214d9b11917fd0aaef88685f3
# Parent  141fc5d4229ae66293617edb25050506932471ec
Fix lshift_ext in C2 for shifts >= 32



--- CUT HERE ---


The following gcc test program shows how gcc encodes shifts and shows how sbfm encodes variusly to sbfiz/sbfx.

I had to write this to try to get my head around the sbfm encoding.

--- CUT HERE ---
long shift8(int a)
{
	return (long)a << 8;
}
long shift16(int a)
{
	return (long)a << 16;
}
long shift24(int a)
{
	return (long)a << 24;
}
long shift32(int a)
{
	return (long)a << 32;
}
long shift40(int a)
{
	return (long)a << 40;
}
long shift48(int a)
{
	return (long)a << 48;
}
long shift56(int a)
{
	return (long)a << 56;
}
long asm_shift8(int a)
{
	long b;
	asm("sbfm %[result], %[source], 56, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift16(int a)
{
	long b;
	asm("sbfm %[result], %[source], 48, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift24(int a)
{
	long b;
	asm("sbfm %[result], %[source], 40, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift32(int a)
{
	long b;
	asm("sbfm %[result], %[source], 32, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift40(int a)
{
	long b;
	asm("sbfm %[result], %[source], 24, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift48(int a)
{
	long b;
	asm("sbfm %[result], %[source], 16, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift56(int a)
{
	long b;
	asm("sbfm %[result], %[source], 8, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
--- CUT HERE ---

And here is the output from objdump when compiled

gcc -O3 -c shift.c

--- CUT HERE ---
shift.o:     file format elf64-littleaarch64


Disassembly of section .text:

0000000000000000 <shift8>:
   0:	93787c00 	sbfiz	x0, x0, #8, #32
   4:	d65f03c0 	ret

0000000000000008 <shift16>:
   8:	93707c00 	sbfiz	x0, x0, #16, #32
   c:	d65f03c0 	ret

0000000000000010 <shift24>:
  10:	93687c00 	sbfiz	x0, x0, #24, #32
  14:	d65f03c0 	ret

0000000000000018 <shift32>:
  18:	d3607c00 	lsl	x0, x0, #32
  1c:	d65f03c0 	ret

0000000000000020 <shift40>:
  20:	d3585c00 	lsl	x0, x0, #40
  24:	d65f03c0 	ret

0000000000000028 <shift48>:
  28:	d3503c00 	lsl	x0, x0, #48
  2c:	d65f03c0 	ret

0000000000000030 <shift56>:
  30:	d3481c00 	lsl	x0, x0, #56
  34:	d65f03c0 	ret

0000000000000038 <asm_shift8>:
  38:	93787c00 	sbfiz	x0, x0, #8, #32
  3c:	d65f03c0 	ret

0000000000000040 <asm_shift16>:
  40:	93707c00 	sbfiz	x0, x0, #16, #32
  44:	d65f03c0 	ret

0000000000000048 <asm_shift24>:
  48:	93687c00 	sbfiz	x0, x0, #24, #32
  4c:	d65f03c0 	ret

0000000000000050 <asm_shift32>:
  50:	93607c00 	sbfiz	x0, x0, #32, #32
  54:	d65f03c0 	ret

0000000000000058 <asm_shift40>:
  58:	93587c00 	sbfx	x0, x0, #24, #8
  5c:	d65f03c0 	ret

0000000000000060 <asm_shift48>:
  60:	93507c00 	sbfx	x0, x0, #16, #16
  64:	d65f03c0 	ret

0000000000000068 <asm_shift56>:
  68:	93487c00 	sbfx	x0, x0, #8, #24
  6c:	d65f03c0 	ret
--- CUT HERE ---

Comments

Andrew Haley Dec. 5, 2013, 2:36 p.m. | #1
On 12/05/2013 01:01 PM, Edward Nevill wrote:
> On Wed, 2013-12-04 at 16:07 -0500, Andy Johnson wrote:
>> The jtreg hotspot/compiler test TestCharVect.java contains the following
>> code snippet:
>>       long l0 = (long)a1[i*4+0];
>>       long l1 = (long)a1[i*4+1];
>>       long l2 = (long)a1[i*4+2];
>>       long l3 = (long)a1[i*4+3];
>>       p4[i] = (l0 & 0xFFFFl) |
>>              ((l1 & 0xFFFFl) << 16) |
>>              ((l2 & 0xFFFFl) << 32) |
>>              ((l3 & 0xFFFFl) << 48);
> 
> Much code elided.
> 
>>
>>   0x00007fcac91a7d28: sbfx    x15, x15, #16, #16    <<<<<<<<<<<
>>   0x00007fcac91a7d2c: sbfiz    x14, x14, #32, #32
>>   0x00007fcac91a7d30: sbfiz    x16, x17, #16, #32
>>   0x00007fcac91a7d34: sxtw    x11, w11
>>   0x00007fcac91a7d38: orr    x11, x11, x16
>>   0x00007fcac91a7d3c: orr    x11, x11, x14
>>   0x00007fcac91a7d40: sbfiz    x14, x5, #32, #32
>>   0x00007fcac91a7d44: sbfiz    x16, x1, #16, #32
>>   0x00007fcac91a7d48: sxtw    x17, w3
>>   0x00007fcac91a7d4c: orr    x16, x17, x16
>>   0x00007fcac91a7d50: orr    x14, x16, x14
>>   0x00007fcac91a7d54: orr    x14, x14, x15
>>   0x00007fcac91a7d58: add    xscratch1, x13, #0x10
>>   0x00007fcac91a7d5c: str    x14, [xscratch1,w0,sxtw #3]
>>   0x00007fcac91a7d60: sbfx    x14, x18, #16, #16   <<<<<<<<<<<<<
> 
> I believe the lines marked "<<<<<<<<<" are the source of the problem. What they are trying to do is shift left by 48, what they in fact do is a bitfield extract of bit 16..31.
> 
> This is due to the baroque encoding of the sbfiz and sbfx instructions.
> 
> I believe the following patch will fix the problem.
> 
> Ok to push?
> Ed.
> 
> --- CUT HERE ---
> exporting patch:
> # HG changeset patch
> # User Edward Nevill edward.nevill@linaro.org
> # Date 1386246975 0
> #      Thu Dec 05 12:36:15 2013 +0000
> # Node ID 9a4f9705f626b50214d9b11917fd0aaef88685f3
> # Parent  141fc5d4229ae66293617edb25050506932471ec
> Fix lshift_ext in C2 for shifts >= 32
> 
> diff -r 141fc5d4229a -r 9a4f9705f626 src/cpu/aarch64/vm/aarch64.ad
> --- a/src/cpu/aarch64/vm/aarch64.ad	Mon Dec 02 17:19:42 2013 +0000
> +++ b/src/cpu/aarch64/vm/aarch64.ad	Thu Dec 05 12:36:15 2013 +0000
> @@ -6930,8 +6930,13 @@
>    format %{ "sbfm $dst, $src, 64-$scale, 31\t" %}
>  
>    ins_encode %{
> -    __ sbfm(as_Register($dst$$reg),
> -            as_Register($src$$reg), (64u - $scale$$constant) & 63, 31);
> +    if ($scale$$constant >= 32)
> +      // If scale >= 32 must encode this as LSL, sbfm encodes as SBFX, not SBFIZ
> +      __ ubfm(as_Register($dst$$reg),
> +		    as_Register($src$$reg), (64u - $scale$$constant) & 63, 63 - $scale$$constant);
> +    else
> +      __ sbfm(as_Register($dst$$reg),
> +		    as_Register($src$$reg), (64u - $scale$$constant) & 63, 31);
>    %}
>  
>    ins_pipe(pipe_class_default);
> 

Thank you, but the condition is rather fugly.  Can you try this instead,
please?

instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
  match(Set dst (LShiftL (ConvI2L src) scale));

  ins_cost(DEFAULT_COST);
  format %{ "sbfm $dst, $src, 64-$scale, 31\t" %}

  ins_encode %{
    // __ sbfiz(as_Register($dst$$reg),
    // 	     as_Register($src$$reg),
    // 	     $scale$$constant & 63, (-$scale$$constant) & 63);
    __ sbfm(as_Register($dst$$reg),
	    as_Register($src$$reg),
	    (-$scale$$constant) & 63, ((-$scale$$constant) & 63) - 1);
  %}

  ins_pipe(pipe_class_default);
%}

Note that the "sbfiz" and the "sbfm" forms should be identical.  Maybe "sbfiz" is
easier to understand.

Reproducer (for C2, use -Xcomp) attached.

Andrew.


public class Sbfm {
    static long shift0(int n) {
        return (long)n << 0;
    }

    static long shift8(int n) {
        return (long)n << 8;
    }

    static long shift16(int n) {
        return (long)n << 16;
    }

    static long shift24(int n) {
        return (long)n << 24;
    }

    static long shift32(int n) {
        return (long)n << 32;
    }

    static long shift40(int n) {
        return (long)n << 40;
    }

    static long shift48(int n) {
        return (long)n << 48;
    }

    static long shift56(int n) {
        return (long)n << 56;
    }

    static long shift64(int n) {
        return (long)n << 64;
    }

    public static void main(String[] args) {
        int ival = 0x12345678;
        long val = -1;
        for (int i = 0; i <= 64; i+=8) {
            switch (i) {
            case 0:
                val = shift0(ival);
                break;
            case 8:
                val = shift8(ival);
                break;
            case 16:
                val = shift16(ival);
                break;
            case 24:
                val = shift24(ival);
                break;
            case 32:
                val = shift32(ival);
                break;
            case 40:
                val = shift40(ival);
                break;
            case 48:
                val = shift48(ival);
                break;
            case 56:
                val = shift56(ival);
                break;
            case 64:
                val = shift64(ival);
                break;
            }
            System.out.println(Long.toHexString(val));
        }
    }
}

Patch

diff -r 141fc5d4229a -r 9a4f9705f626 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Mon Dec 02 17:19:42 2013 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Thu Dec 05 12:36:15 2013 +0000
@@ -6930,8 +6930,13 @@ 
   format %{ "sbfm $dst, $src, 64-$scale, 31\t" %}
 
   ins_encode %{
-    __ sbfm(as_Register($dst$$reg),
-            as_Register($src$$reg), (64u - $scale$$constant) & 63, 31);
+    if ($scale$$constant >= 32)
+      // If scale >= 32 must encode this as LSL, sbfm encodes as SBFX, not SBFIZ
+      __ ubfm(as_Register($dst$$reg),
+		    as_Register($src$$reg), (64u - $scale$$constant) & 63, 63 - $scale$$constant);
+    else
+      __ sbfm(as_Register($dst$$reg),
+		    as_Register($src$$reg), (64u - $scale$$constant) & 63, 31);
   %}
 
   ins_pipe(pipe_class_default);