diff mbox

[RFC,AARCH64] : Using standard patterns for stack protection.

Message ID CAJK_mQ0JNdLBL6nzTM+GPrCcMZvLPRs3aCXBO-kFmyrU77GAfw@mail.gmail.com
State New
Headers show

Commit Message

Venkataramanan Kumar Jan. 22, 2014, 4:57 p.m. UTC
Hi Marcus,

After we changed the frame growing direction (downwards) in Aarch64,
the back-end now generates stack smashing set and test based on
generic code available in GCC.

But most of the ports (i386, spu, rs6000, s390, sh, sparc, tilepro and
tilegx) define machine descriptions using standard pattern names
‘stack_protect_set’ and ‘stack_protect_test’. This is done for both
TLS model as well as global variable based stack guard model.

Also all these ports in their machine descriptions,  have cleared the
register that loaded the canary value using an additional instruction.

(GCC internals)
‘stack_protect_set’
This pattern, if defined, moves a ptr_mode value from the memory in operand
1 to the memory in operand 0 without leaving the value in a register afterward.
This is to avoid leaking the value some place that an attacker might use to
rewrite the stack guard slot after having clobbered it.
If this pattern is not defined, then a plain move pattern is generated.
(GCC internals)

I believe this is done for extra security.  Also each target can
control the way of clearing the register that loaded the canary value.

In the attached patch, I have written machine descriptions patterns
for stack_protect_set and stack_protect_test for Aarch64.
Also I am clearing the register by moving 0 to the register while
setting the stack and using "eor" instruction while testing the stack.

However this generates un-optimal code when compared to generic GCC code.

While setting up stack canary ,

Generic code

        adrp    x19, __stack_chk_guard
        ldr     x1, [x19,#:lo12:__stack_chk_guard]
        str     x1, [x29,40]


Patch

        adrp    x19, __stack_chk_guard
        add     x1, x19, :lo12:__stack_chk_guard
        ldr     x2, [x1]
       str     x1, [x29,40]
       mov     x2, 0

while testing stack canary

generic code
        ldr     x1, [x29,40]
        ldr     x0, [x19,#:lo12:__stack_chk_guard]
        cmp     x1, x0
        bne     .L7

patch
        add     x19, x19, :lo12:__stack_chk_guard
        ldr     x1, [x29,40]
        ldr     x0, [x19]
        eor     x0, x1, x0
        cbnz    x0, .L7

Please let me know if this change is fine for Aarch64.

2014-01-22 Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
        * config/aarch64/aarch64.md (stack_protect_set, stack_protect_test)
        (stack_protect_set_<mode>, stack_protect_test_<mode>): Add
        machine descriptions for Stack Smashing Protector.

2014-01-22  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
        * lib/target-supports.exp
          (check_effective_target_stack_protection): New procedure.
        * g++.dg/fstack-protector-strong.C: Add target check for
          stack protection.
        * gcc.dg/fstack-protector-strong.c: Likewise.


regards,
Venkat.

Comments

Venkataramanan Kumar Jan. 27, 2014, 4:14 a.m. UTC | #1
ping.

On 22 January 2014 22:27, Venkataramanan Kumar
<venkataramanan.kumar@linaro.org> wrote:
> Hi Marcus,
>
> After we changed the frame growing direction (downwards) in Aarch64,
> the back-end now generates stack smashing set and test based on
> generic code available in GCC.
>
> But most of the ports (i386, spu, rs6000, s390, sh, sparc, tilepro and
> tilegx) define machine descriptions using standard pattern names
> ‘stack_protect_set’ and ‘stack_protect_test’. This is done for both
> TLS model as well as global variable based stack guard model.
>
> Also all these ports in their machine descriptions,  have cleared the
> register that loaded the canary value using an additional instruction.
>
> (GCC internals)
> ‘stack_protect_set’
> This pattern, if defined, moves a ptr_mode value from the memory in operand
> 1 to the memory in operand 0 without leaving the value in a register afterward.
> This is to avoid leaking the value some place that an attacker might use to
> rewrite the stack guard slot after having clobbered it.
> If this pattern is not defined, then a plain move pattern is generated.
> (GCC internals)
>
> I believe this is done for extra security.  Also each target can
> control the way of clearing the register that loaded the canary value.
>
> In the attached patch, I have written machine descriptions patterns
> for stack_protect_set and stack_protect_test for Aarch64.
> Also I am clearing the register by moving 0 to the register while
> setting the stack and using "eor" instruction while testing the stack.
>
> However this generates un-optimal code when compared to generic GCC code.
>
> While setting up stack canary ,
>
> Generic code
>
>         adrp    x19, __stack_chk_guard
>         ldr     x1, [x19,#:lo12:__stack_chk_guard]
>         str     x1, [x29,40]
>
>
> Patch
>
>         adrp    x19, __stack_chk_guard
>         add     x1, x19, :lo12:__stack_chk_guard
>         ldr     x2, [x1]
>        str     x1, [x29,40]
>        mov     x2, 0
>
> while testing stack canary
>
> generic code
>         ldr     x1, [x29,40]
>         ldr     x0, [x19,#:lo12:__stack_chk_guard]
>         cmp     x1, x0
>         bne     .L7
>
> patch
>         add     x19, x19, :lo12:__stack_chk_guard
>         ldr     x1, [x29,40]
>         ldr     x0, [x19]
>         eor     x0, x1, x0
>         cbnz    x0, .L7
>
> Please let me know if this change is fine for Aarch64.
>
> 2014-01-22 Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
>         * config/aarch64/aarch64.md (stack_protect_set, stack_protect_test)
>         (stack_protect_set_<mode>, stack_protect_test_<mode>): Add
>         machine descriptions for Stack Smashing Protector.
>
> 2014-01-22  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
>         * lib/target-supports.exp
>           (check_effective_target_stack_protection): New procedure.
>         * g++.dg/fstack-protector-strong.C: Add target check for
>           stack protection.
>         * gcc.dg/fstack-protector-strong.c: Likewise.
>
>
> regards,
> Venkat.
Venkataramanan Kumar Feb. 5, 2014, 10:29 a.m. UTC | #2
Hi Marcus,

> +  "ldr\\t%x2, %1\;str\\t%x2, %0\;mov\t%x2,0"
> +  [(set_attr "length" "12")])
>
> This pattern emits an opaque sequence of instructions that cannot be
> scheduled, is that necessary? Can we not expand individual
> instructions or at least split ?

Almost all the ports emits a template of assembly instructions.
I m not sure why they have to be generated this way.
But usage of these pattern is to clear the register that holds canary
value immediately after its usage.

> -/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */
> +/* { dg-do compile { target stack_protection } } */
>  /* { dg-options "-O2 -fstack-protector-strong" } */
>
> Do we need a new effective target test, why is the existing
> "fstack_protector" not appropriate?

"stack_protector" does a run time test. It failed in cross compilation
environment and these are compile only tests.
Also I thought  richard suggested  me to add a new option for this.
ref: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg03358.html

regards,
Venkat.

On 4 February 2014 21:39, Marcus Shawcroft <marcus.shawcroft@gmail.com> wrote:
> Hi Venkat,
>
> On 22 January 2014 16:57, Venkataramanan Kumar
> <venkataramanan.kumar@linaro.org> wrote:
>> Hi Marcus,
>>
>> After we changed the frame growing direction (downwards) in Aarch64,
>> the back-end now generates stack smashing set and test based on
>> generic code available in GCC.
>>
>> But most of the ports (i386, spu, rs6000, s390, sh, sparc, tilepro and
>> tilegx) define machine descriptions using standard pattern names
>> 'stack_protect_set' and 'stack_protect_test'. This is done for both
>> TLS model as well as global variable based stack guard model.
>
> +  ""
> +  "ldr\\t%x2, %1\;str\\t%x2, %0\;mov\t%x2,0"
> +  [(set_attr "length" "12")])
>
> This pattern emits an opaque sequence of instructions that cannot be
> scheduled, is that necessary? Can we not expand individual
> instructions or at least split ?
>
> +  "ldr\t%x3, %x1\;ldr\t%x0, %x2\;eor\t%x0, %x3, %x0"
> +  [(set_attr "length" "12")])
>
> Likewise.
>
> -/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */
> +/* { dg-do compile { target stack_protection } } */
>  /* { dg-options "-O2 -fstack-protector-strong" } */
>
> Do we need a new effective target test, why is the existing
> "fstack_protector" not appropriate?
>
> Cheers
> /Marcus
Andrew Pinski March 14, 2014, 4:05 a.m. UTC | #3
On Wed, Feb 5, 2014 at 2:29 AM, Venkataramanan Kumar
<venkataramanan.kumar@linaro.org> wrote:
> Hi Marcus,
>
>> +  "ldr\\t%x2, %1\;str\\t%x2, %0\;mov\t%x2,0"
>> +  [(set_attr "length" "12")])
>>
>> This pattern emits an opaque sequence of instructions that cannot be
>> scheduled, is that necessary? Can we not expand individual
>> instructions or at least split ?
>
> Almost all the ports emits a template of assembly instructions.
> I m not sure why they have to be generated this way.
> But usage of these pattern is to clear the register that holds canary
> value immediately after its usage.

http://gcc.gnu.org/ml/gcc-patches/2005-06/msg01981.html answer the
original question of why.  It was a reply to the exact same question
being asked here but about the rs6000 (PowerPC) patch.

Thanks,
Andrew Pinski

>
>> -/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */
>> +/* { dg-do compile { target stack_protection } } */
>>  /* { dg-options "-O2 -fstack-protector-strong" } */
>>
>> Do we need a new effective target test, why is the existing
>> "fstack_protector" not appropriate?
>
> "stack_protector" does a run time test. It failed in cross compilation
> environment and these are compile only tests.
> Also I thought  richard suggested  me to add a new option for this.
> ref: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg03358.html
>
> regards,
> Venkat.
>
> On 4 February 2014 21:39, Marcus Shawcroft <marcus.shawcroft@gmail.com> wrote:
>> Hi Venkat,
>>
>> On 22 January 2014 16:57, Venkataramanan Kumar
>> <venkataramanan.kumar@linaro.org> wrote:
>>> Hi Marcus,
>>>
>>> After we changed the frame growing direction (downwards) in Aarch64,
>>> the back-end now generates stack smashing set and test based on
>>> generic code available in GCC.
>>>
>>> But most of the ports (i386, spu, rs6000, s390, sh, sparc, tilepro and
>>> tilegx) define machine descriptions using standard pattern names
>>> 'stack_protect_set' and 'stack_protect_test'. This is done for both
>>> TLS model as well as global variable based stack guard model.
>>
>> +  ""
>> +  "ldr\\t%x2, %1\;str\\t%x2, %0\;mov\t%x2,0"
>> +  [(set_attr "length" "12")])
>>
>> This pattern emits an opaque sequence of instructions that cannot be
>> scheduled, is that necessary? Can we not expand individual
>> instructions or at least split ?
>>
>> +  "ldr\t%x3, %x1\;ldr\t%x0, %x2\;eor\t%x0, %x3, %x0"
>> +  [(set_attr "length" "12")])
>>
>> Likewise.
>>
>> -/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */
>> +/* { dg-do compile { target stack_protection } } */
>>  /* { dg-options "-O2 -fstack-protector-strong" } */
>>
>> Do we need a new effective target test, why is the existing
>> "fstack_protector" not appropriate?
>>
>> Cheers
>> /Marcus
Ramana Radhakrishnan March 14, 2014, 2:09 p.m. UTC | #4
On Fri, Mar 14, 2014 at 4:05 AM, Andrew Pinski <pinskia@gmail.com> wrote:
> On Wed, Feb 5, 2014 at 2:29 AM, Venkataramanan Kumar
> <venkataramanan.kumar@linaro.org> wrote:
>> Hi Marcus,
>>
>>> +  "ldr\\t%x2, %1\;str\\t%x2, %0\;mov\t%x2,0"
>>> +  [(set_attr "length" "12")])
>>>
>>> This pattern emits an opaque sequence of instructions that cannot be
>>> scheduled, is that necessary? Can we not expand individual
>>> instructions or at least split ?
>>
>> Almost all the ports emits a template of assembly instructions.
>> I m not sure why they have to be generated this way.
>> But usage of these pattern is to clear the register that holds canary
>> value immediately after its usage.
>
> http://gcc.gnu.org/ml/gcc-patches/2005-06/msg01981.html answer the
> original question of why.  It was a reply to the exact same question
> being asked here but about the rs6000 (PowerPC) patch.
>

To be precise,  you probably want this one (
http://gcc.gnu.org/ml/gcc-patches/2005-06/msg01968.html
) which gives the reason rather than the other bits about xor. on PowerPC.

It looks like the fundamental reason is to keep this in registers for
as little time as possible and not allow this to get spilled to the
stack where it may be picked up for an exploit. That may make more
sense from a security point of view.


regards
Ramana


> Thanks,
> Andrew Pinski
>
>>
>>> -/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */
>>> +/* { dg-do compile { target stack_protection } } */
>>>  /* { dg-options "-O2 -fstack-protector-strong" } */
>>>
>>> Do we need a new effective target test, why is the existing
>>> "fstack_protector" not appropriate?
>>
>> "stack_protector" does a run time test. It failed in cross compilation
>> environment and these are compile only tests.
>> Also I thought  richard suggested  me to add a new option for this.
>> ref: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg03358.html
>>
>> regards,
>> Venkat.
>>
>> On 4 February 2014 21:39, Marcus Shawcroft <marcus.shawcroft@gmail.com> wrote:
>>> Hi Venkat,
>>>
>>> On 22 January 2014 16:57, Venkataramanan Kumar
>>> <venkataramanan.kumar@linaro.org> wrote:
>>>> Hi Marcus,
>>>>
>>>> After we changed the frame growing direction (downwards) in Aarch64,
>>>> the back-end now generates stack smashing set and test based on
>>>> generic code available in GCC.
>>>>
>>>> But most of the ports (i386, spu, rs6000, s390, sh, sparc, tilepro and
>>>> tilegx) define machine descriptions using standard pattern names
>>>> 'stack_protect_set' and 'stack_protect_test'. This is done for both
>>>> TLS model as well as global variable based stack guard model.
>>>
>>> +  ""
>>> +  "ldr\\t%x2, %1\;str\\t%x2, %0\;mov\t%x2,0"
>>> +  [(set_attr "length" "12")])
>>>
>>> This pattern emits an opaque sequence of instructions that cannot be
>>> scheduled, is that necessary? Can we not expand individual
>>> instructions or at least split ?
>>>
>>> +  "ldr\t%x3, %x1\;ldr\t%x0, %x2\;eor\t%x0, %x3, %x0"
>>> +  [(set_attr "length" "12")])
>>>
>>> Likewise.
>>>
>>> -/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */
>>> +/* { dg-do compile { target stack_protection } } */
>>>  /* { dg-options "-O2 -fstack-protector-strong" } */
>>>
>>> Do we need a new effective target test, why is the existing
>>> "fstack_protector" not appropriate?
>>>
>>> Cheers
>>> /Marcus
Marcus Shawcroft March 14, 2014, 2:12 p.m. UTC | #5
Hi Venkat

On 5 February 2014 10:29, Venkataramanan Kumar
<venkataramanan.kumar@linaro.org> wrote:
> Hi Marcus,
>
>> +  "ldr\\t%x2, %1\;str\\t%x2, %0\;mov\t%x2,0"
>> +  [(set_attr "length" "12")])
>>
>> This pattern emits an opaque sequence of instructions that cannot be
>> scheduled, is that necessary? Can we not expand individual
>> instructions or at least split ?
>
> Almost all the ports emits a template of assembly instructions.
> I m not sure why they have to be generated this way.
> But usage of these pattern is to clear the register that holds canary
> value immediately after its usage.

I've just read the thread Andrew pointed out, thanks, I'm happy that
there is a good reason to do it this way.  Andrew, thanks for
providing the background.

+  [(set_attr "length" "12")])
+

These patterns should also set the "type" attribute,  a reasonable
value would be "multiple".

>> -/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */
>> +/* { dg-do compile { target stack_protection } } */
>>  /* { dg-options "-O2 -fstack-protector-strong" } */
>>
>> Do we need a new effective target test, why is the existing
>> "fstack_protector" not appropriate?
>
> "stack_protector" does a run time test. It failed in cross compilation
> environment and these are compile only tests.

This works fine in my cross environment, how does yours fail?


> Also I thought  richard suggested  me to add a new option for this.
> ref: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg03358.html

I read that comment to mean use an effective target test instead of
matching triples. I don't see that re-using an existing effective
target test contradicts that suggestion.

Looking through the test suite I see that there are:

6 tests that use dg-do compile with dg-require-effective-target fstack_protector

4 tests that use dg-do run with dg-require-effective-target fstack_protector

2 tests that use dg-do run {target native} dg-require-effective-target
fstack_protector

and finally the 2 tests we are discussing that use dg-compile with a
triple test.

so there are already tests in the testsuite that use dg-do compile
with the existing effective target test.

I see no immediately obvious reason why the two tests that require
target native require the native constraint... but I guess that is a
different issue.

The proposed patch moves the triple matching from the test case into
the .exp file, iff the existing run time test is inappropriate, would
it not be better to write a new effective target test that performs a
compile/link test rather resorting to a triple match?

This patch as presented would also result in two effective target
tests with names that are easily confused and provide no hint as to
their different purposes:

fstack_protector
fstack_protection

... if we are going to have two similar effective target tests then
they ought to be named in a fashion that doesn;t lead to confusion in
the future.

Can you respin and split the patch into two please one with the
aarch64 target change the other with the testsuite effective target
change?

Cheers
/Marcus
diff mbox

Patch

Index: gcc/config/aarch64/aarch64.md
===================================================================
--- gcc/config/aarch64/aarch64.md	(revision 206874)
+++ gcc/config/aarch64/aarch64.md	(working copy)
@@ -99,6 +99,8 @@ 
     UNSPEC_TLSDESC
     UNSPEC_USHL_2S
     UNSPEC_VSTRUCTDUMMY
+    UNSPEC_SP_SET
+    UNSPEC_SP_TEST
 ])
 
 (define_c_enum "unspecv" [
@@ -3631,6 +3633,65 @@ 
   DONE;
 })
 
+;; Named patterns for stack smashing protection.
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "memory_operand")]
+  ""
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  emit_insn ((mode == DImode
+	      ? gen_stack_protect_set_di
+	      : gen_stack_protect_set_si) (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "stack_protect_set_<mode>"
+  [(set (match_operand:PTR 0 "memory_operand" "=m")
+	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
+	 UNSPEC_SP_SET))
+   (set (match_scratch:PTR 2 "=&r") (const_int 0))]
+  ""
+  "ldr\\t%x2, %1\;str\\t%x2, %0\;mov\t%x2,0"
+  [(set_attr "length" "12")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "memory_operand")
+   (match_operand 2)]
+  ""
+{
+
+  rtx result = gen_reg_rtx (Pmode);
+
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  emit_insn ((mode == DImode
+	      ? gen_stack_protect_test_di
+	      : gen_stack_protect_test_si) (result,
+					    operands[0],
+					    operands[1]));
+
+  if (mode == DImode)
+    emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
+				    result, const0_rtx, operands[2]));
+  else
+    emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
+				    result, const0_rtx, operands[2]));
+  DONE;
+})
+
+(define_insn "stack_protect_test_<mode>"
+  [(set (match_operand:PTR 0 "register_operand")
+	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")
+		     (match_operand:PTR 2 "memory_operand" "m")]
+	 UNSPEC_SP_TEST))
+   (clobber (match_scratch:PTR 3 "=&r"))]
+  ""
+  "ldr\t%x3, %x1\;ldr\t%x0, %x2\;eor\t%x0, %x3, %x0"
+  [(set_attr "length" "12")])
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
Index: gcc/testsuite/g++.dg/fstack-protector-strong.C
===================================================================
--- gcc/testsuite/g++.dg/fstack-protector-strong.C	(revision 206874)
+++ gcc/testsuite/g++.dg/fstack-protector-strong.C	(working copy)
@@ -1,6 +1,6 @@ 
 /* Test that stack protection is done on chosen functions. */
 
-/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-do compile { target stack_protection } } */
 /* { dg-options "-O2 -fstack-protector-strong" } */
 
 class A
Index: gcc/testsuite/gcc.dg/fstack-protector-strong.c
===================================================================
--- gcc/testsuite/gcc.dg/fstack-protector-strong.c	(revision 206874)
+++ gcc/testsuite/gcc.dg/fstack-protector-strong.c	(working copy)
@@ -1,6 +1,6 @@ 
 /* Test that stack protection is done on chosen functions. */
 
-/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */
+/* { dg-do compile { target stack_protection } } */
 /* { dg-options "-O2 -fstack-protector-strong" } */
 
 #include<string.h>
Index: gcc/testsuite/lib/target-supports.exp
===================================================================
--- gcc/testsuite/lib/target-supports.exp	(revision 206874)
+++ gcc/testsuite/lib/target-supports.exp	(working copy)
@@ -810,6 +810,17 @@ 
     } "-fstack-protector"]
 }
 
+# Return 1 the target supports stack protection.
+proc check_effective_target_stack_protection { } {
+    if { [istarget i?86-*-*]
+         || [istarget x86_64-*-*]
+         || [istarget aarch64-*-*]
+         || [istarget s390x-*-*] } {
+        return 1;
+    }
+    return 0
+}
+
 # Return 1 if compilation with -freorder-blocks-and-partition is error-free
 # for trivial code, 0 otherwise.