diff mbox

[v5,1/3] ARM: probes: check stack operation when decoding

Message ID 1409144552-12751-2-git-send-email-wangnan0@huawei.com
State New
Headers show

Commit Message

Wang Nan Aug. 27, 2014, 1:02 p.m. UTC
This patch improves arm instruction decoder, allows it check whether an
instruction is a stack store operation. This information is important
for kprobe optimization.

For normal str instruction, this patch add a series of _SP_STACK
register indicator in the decoder to test the base and offset register
in ldr <Rt>, [<Rn>, <Rm>] against sp.

For stm instruction, it check sp register in instruction specific
decoder.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "David A. Long" <dave.long@linaro.org> 
Cc: Jon Medhurst <tixy@linaro.org>
Cc: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Cc: Ben Dooks <ben.dooks@codethink.co.uk>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Will Deacon <will.deacon@arm.com>

---
 arch/arm/include/asm/probes.h    |  1 +
 arch/arm/kernel/kprobes-common.c |  4 ++++
 arch/arm/kernel/probes-arm.c     |  4 ++--
 arch/arm/kernel/probes-thumb.c   |  6 +++---
 arch/arm/kernel/probes.c         | 20 ++++++++++++++++++--
 arch/arm/kernel/probes.h         |  6 ++++++
 6 files changed, 34 insertions(+), 7 deletions(-)

Comments

Masami Hiramatsu Aug. 28, 2014, 9:51 a.m. UTC | #1
(2014/08/27 22:02), Wang Nan wrote:
> This patch improves arm instruction decoder, allows it check whether an
> instruction is a stack store operation. This information is important
> for kprobe optimization.
> 
> For normal str instruction, this patch add a series of _SP_STACK
> register indicator in the decoder to test the base and offset register
> in ldr <Rt>, [<Rn>, <Rm>] against sp.
> 
> For stm instruction, it check sp register in instruction specific
> decoder.

OK, reviewed. but since I'm not so sure about arm32 ISA,
I need help from ARM32 maintainer to ack this.

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>

Thank you,

> 
> Signed-off-by: Wang Nan <wangnan0@huawei.com>
> Cc: Russell King <linux@arm.linux.org.uk>
> Cc: "David A. Long" <dave.long@linaro.org> 
> Cc: Jon Medhurst <tixy@linaro.org>
> Cc: Taras Kondratiuk <taras.kondratiuk@linaro.org>
> Cc: Ben Dooks <ben.dooks@codethink.co.uk>
> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
> Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> Cc: Will Deacon <will.deacon@arm.com>
> 
> ---
>  arch/arm/include/asm/probes.h    |  1 +
>  arch/arm/kernel/kprobes-common.c |  4 ++++
>  arch/arm/kernel/probes-arm.c     |  4 ++--
>  arch/arm/kernel/probes-thumb.c   |  6 +++---
>  arch/arm/kernel/probes.c         | 20 ++++++++++++++++++--
>  arch/arm/kernel/probes.h         |  6 ++++++
>  6 files changed, 34 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm/include/asm/probes.h b/arch/arm/include/asm/probes.h
> index 806cfe6..3f6912c 100644
> --- a/arch/arm/include/asm/probes.h
> +++ b/arch/arm/include/asm/probes.h
> @@ -38,6 +38,7 @@ struct arch_probes_insn {
>  	probes_check_cc			*insn_check_cc;
>  	probes_insn_singlestep_t	*insn_singlestep;
>  	probes_insn_fn_t		*insn_fn;
> +	bool				is_stack_operation;
>  };
>  
>  #endif
> diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/kernel/kprobes-common.c
> index 0bf5d64..4e8b918 100644
> --- a/arch/arm/kernel/kprobes-common.c
> +++ b/arch/arm/kernel/kprobes-common.c
> @@ -133,6 +133,10 @@ kprobe_decode_ldmstm(probes_opcode_t insn, struct arch_probes_insn *asi,
>  	int is_ldm = insn & 0x100000;
>  	int rn = (insn >> 16) & 0xf;
>  
> +	/* whether this is a push instruction? */
> +	if ((rn == 0xd) && (!is_ldm))
> +		asi->is_stack_operation = true;
> +
>  	if (rn <= 12 && (reglist & 0xe000) == 0) {
>  		/* Instruction only uses registers in the range R0..R12 */
>  		handler = emulate_generic_r0_12_noflags;
> diff --git a/arch/arm/kernel/probes-arm.c b/arch/arm/kernel/probes-arm.c
> index 8eaef81..5c187ba 100644
> --- a/arch/arm/kernel/probes-arm.c
> +++ b/arch/arm/kernel/probes-arm.c
> @@ -577,7 +577,7 @@ static const union decode_item arm_cccc_01xx_table[] = {
>  	/* STR (immediate)	cccc 010x x0x0 xxxx xxxx xxxx xxxx xxxx */
>  	/* STRB (immediate)	cccc 010x x1x0 xxxx xxxx xxxx xxxx xxxx */
>  	DECODE_EMULATEX	(0x0e100000, 0x04000000, PROBES_STORE,
> -						 REGS(NOPCWB, ANY, 0, 0, 0)),
> +						 REGS(NOPCWB_SP_STACK, ANY, 0, 0, 0)),
>  
>  	/* LDR (immediate)	cccc 010x x0x1 xxxx xxxx xxxx xxxx xxxx */
>  	/* LDRB (immediate)	cccc 010x x1x1 xxxx xxxx xxxx xxxx xxxx */
> @@ -587,7 +587,7 @@ static const union decode_item arm_cccc_01xx_table[] = {
>  	/* STR (register)	cccc 011x x0x0 xxxx xxxx xxxx xxxx xxxx */
>  	/* STRB (register)	cccc 011x x1x0 xxxx xxxx xxxx xxxx xxxx */
>  	DECODE_EMULATEX	(0x0e100000, 0x06000000, PROBES_STORE,
> -						 REGS(NOPCWB, ANY, 0, 0, NOPC)),
> +						 REGS(NOPCWB_SP_STACK, ANY, 0, 0, NOPC_SP_STACK)),
>  
>  	/* LDR (register)	cccc 011x x0x1 xxxx xxxx xxxx xxxx xxxx */
>  	/* LDRB (register)	cccc 011x x1x1 xxxx xxxx xxxx xxxx xxxx */
> diff --git a/arch/arm/kernel/probes-thumb.c b/arch/arm/kernel/probes-thumb.c
> index 4131351..d0d30d8 100644
> --- a/arch/arm/kernel/probes-thumb.c
> +++ b/arch/arm/kernel/probes-thumb.c
> @@ -54,7 +54,7 @@ static const union decode_item t32_table_1110_100x_x1xx[] = {
>  	/* STRD (immediate)	1110 1001 x1x0 xxxx xxxx xxxx xxxx xxxx */
>  	/* LDRD (immediate)	1110 1001 x1x1 xxxx xxxx xxxx xxxx xxxx */
>  	DECODE_EMULATEX	(0xff400000, 0xe9400000, PROBES_T32_LDRDSTRD,
> -						 REGS(NOPCWB, NOSPPC, NOSPPC, 0, 0)),
> +						 REGS(NOPCWB_SP_STACK, NOSPPC, NOSPPC, 0, 0)),
>  
>  	/* TBB			1110 1000 1101 xxxx xxxx xxxx 0000 xxxx */
>  	/* TBH			1110 1000 1101 xxxx xxxx xxxx 0001 xxxx */
> @@ -345,12 +345,12 @@ static const union decode_item t32_table_1111_100x[] = {
>  	/* STR (immediate)	1111 1000 1100 xxxx xxxx xxxx xxxx xxxx */
>  	/* LDR (immediate)	1111 1000 1101 xxxx xxxx xxxx xxxx xxxx */
>  	DECODE_EMULATEX	(0xffe00000, 0xf8c00000, PROBES_T32_LDRSTR,
> -						 REGS(NOPCX, ANY, 0, 0, 0)),
> +						 REGS(NOPCX_SP_STACK, ANY, 0, 0, 0)),
>  
>  	/* STR (register)	1111 1000 0100 xxxx xxxx 0000 00xx xxxx */
>  	/* LDR (register)	1111 1000 0101 xxxx xxxx 0000 00xx xxxx */
>  	DECODE_EMULATEX	(0xffe00fc0, 0xf8400000, PROBES_T32_LDRSTR,
> -						 REGS(NOPCX, ANY, 0, 0, NOSPPC)),
> +						 REGS(NOPCX_SP_STACK, ANY, 0, 0, NOSPPC)),
>  
>  	/* LDRB (literal)	1111 1000 x001 1111 xxxx xxxx xxxx xxxx */
>  	/* LDRSB (literal)	1111 1001 x001 1111 xxxx xxxx xxxx xxxx */
> diff --git a/arch/arm/kernel/probes.c b/arch/arm/kernel/probes.c
> index 1c77b8d..f811cac 100644
> --- a/arch/arm/kernel/probes.c
> +++ b/arch/arm/kernel/probes.c
> @@ -258,7 +258,9 @@ set_emulated_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
>   * non-zero value, the corresponding nibble in pinsn is validated and modified
>   * according to the type.
>   */
> -static bool __kprobes decode_regs(probes_opcode_t *pinsn, u32 regs, bool modify)
> +static bool __kprobes decode_regs(probes_opcode_t *pinsn,
> +		struct arch_probes_insn *asi,
> +		u32 regs, bool modify)
>  {
>  	probes_opcode_t insn = *pinsn;
>  	probes_opcode_t mask = 0xf; /* Start at least significant nibble */
> @@ -307,11 +309,14 @@ static bool __kprobes decode_regs(probes_opcode_t *pinsn, u32 regs, bool modify)
>  				goto reject;
>  			break;
>  
> +		case REG_TYPE_NOPCWB_SP_STACK:
>  		case REG_TYPE_NOPCWB:
>  			if (!is_writeback(insn))
>  				break; /* No writeback, so any register is OK */
>  			/* fall through... */
> +		case REG_TYPE_NOPC_SP_STACK:
>  		case REG_TYPE_NOPC:
> +		case REG_TYPE_NOPCX_SP_STACK:
>  		case REG_TYPE_NOPCX:
>  			/* Reject PC (R15) */
>  			if (((insn ^ 0xffffffff) & mask) == 0)
> @@ -319,6 +324,15 @@ static bool __kprobes decode_regs(probes_opcode_t *pinsn, u32 regs, bool modify)
>  			break;
>  		}
>  
> +		/* check stack operation */
> +		switch (regs & 0xf) {
> +			case REG_TYPE_NOPCWB_SP_STACK:
> +			case REG_TYPE_NOPC_SP_STACK:
> +			case REG_TYPE_NOPCX_SP_STACK:
> +				if (((insn ^ 0xdddddddd) & mask) == 0)
> +					asi->is_stack_operation = true;
> +		}
> +
>  		/* Replace value of nibble with new register number... */
>  		insn &= ~mask;
>  		insn |= new_bits & mask;
> @@ -394,6 +408,8 @@ probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
>  	const struct decode_header *next;
>  	bool matched = false;
>  
> +	asi->is_stack_operation = false;
> +
>  	if (emulate)
>  		insn = prepare_emulated_insn(insn, asi, thumb);
>  
> @@ -410,7 +426,7 @@ probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
>  		if (!matched && (insn & h->mask.bits) != h->value.bits)
>  			continue;
>  
> -		if (!decode_regs(&insn, regs, emulate))
> +		if (!decode_regs(&insn, asi, regs, emulate))
>  			return INSN_REJECTED;
>  
>  		switch (type) {
> diff --git a/arch/arm/kernel/probes.h b/arch/arm/kernel/probes.h
> index dba9f24..568fd01 100644
> --- a/arch/arm/kernel/probes.h
> +++ b/arch/arm/kernel/probes.h
> @@ -278,13 +278,19 @@ enum decode_reg_type {
>  	REG_TYPE_NOSP,	   /* Register must not be SP */
>  	REG_TYPE_NOSPPC,   /* Register must not be SP or PC */
>  	REG_TYPE_NOPC,	   /* Register must not be PC */
> +	REG_TYPE_NOPC_SP_STACK,	   /* REG_TYPE_NOPC and if this reg is sp
> +				      then this is a stack operation */
>  	REG_TYPE_NOPCWB,   /* No PC if load/store write-back flag also set */
> +	REG_TYPE_NOPCWB_SP_STACK,   /* REG_TYPE_NOPCWB and, if this reg is sp
> +				       then this is a stack operation */
>  
>  	/* The following types are used when the encoding for PC indicates
>  	 * another instruction form. This distiction only matters for test
>  	 * case coverage checks.
>  	 */
>  	REG_TYPE_NOPCX,	   /* Register must not be PC */
> +	REG_TYPE_NOPCX_SP_STACK,	   /* REG_TYPE_NOPCX and if this reg is sp
> +					      then this is a stack operation */
>  	REG_TYPE_NOSPPCX,  /* Register must not be SP or PC */
>  
>  	/* Alias to allow '0' arg to be used in REGS macro. */
>
Russell King - ARM Linux Aug. 28, 2014, 10:20 a.m. UTC | #2
On Thu, Aug 28, 2014 at 06:51:15PM +0900, Masami Hiramatsu wrote:
> (2014/08/27 22:02), Wang Nan wrote:
> > This patch improves arm instruction decoder, allows it check whether an
> > instruction is a stack store operation. This information is important
> > for kprobe optimization.
> > 
> > For normal str instruction, this patch add a series of _SP_STACK
> > register indicator in the decoder to test the base and offset register
> > in ldr <Rt>, [<Rn>, <Rm>] against sp.
> > 
> > For stm instruction, it check sp register in instruction specific
> > decoder.
> 
> OK, reviewed. but since I'm not so sure about arm32 ISA,
> I need help from ARM32 maintainer to ack this.

What you actually need is an ack from the ARM kprobes people who
understand this code.  That would be much more meaningful than my
ack.  They're already on the Cc list.
Will Deacon Aug. 28, 2014, 10:24 a.m. UTC | #3
On Thu, Aug 28, 2014 at 11:20:21AM +0100, Russell King - ARM Linux wrote:
> On Thu, Aug 28, 2014 at 06:51:15PM +0900, Masami Hiramatsu wrote:
> > (2014/08/27 22:02), Wang Nan wrote:
> > > This patch improves arm instruction decoder, allows it check whether an
> > > instruction is a stack store operation. This information is important
> > > for kprobe optimization.
> > > 
> > > For normal str instruction, this patch add a series of _SP_STACK
> > > register indicator in the decoder to test the base and offset register
> > > in ldr <Rt>, [<Rn>, <Rm>] against sp.
> > > 
> > > For stm instruction, it check sp register in instruction specific
> > > decoder.
> > 
> > OK, reviewed. but since I'm not so sure about arm32 ISA,
> > I need help from ARM32 maintainer to ack this.
> 
> What you actually need is an ack from the ARM kprobes people who
> understand this code.  That would be much more meaningful than my
> ack.  They're already on the Cc list.

Tixy, can you take a look please?

Will
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Wang Nan Aug. 30, 2014, 1:28 a.m. UTC | #4
On 2014/8/29 16:47, Jon Medhurst (Tixy) wrote:
> On Thu, 2014-08-28 at 11:24 +0100, Will Deacon wrote:
>> On Thu, Aug 28, 2014 at 11:20:21AM +0100, Russell King - ARM Linux wrote:
>>> On Thu, Aug 28, 2014 at 06:51:15PM +0900, Masami Hiramatsu wrote:
>>>> (2014/08/27 22:02), Wang Nan wrote:
>>>>> This patch improves arm instruction decoder, allows it check whether an
>>>>> instruction is a stack store operation. This information is important
>>>>> for kprobe optimization.
>>>>>
>>>>> For normal str instruction, this patch add a series of _SP_STACK
>>>>> register indicator in the decoder to test the base and offset register
>>>>> in ldr <Rt>, [<Rn>, <Rm>] against sp.
>>>>>
>>>>> For stm instruction, it check sp register in instruction specific
>>>>> decoder.
>>>>
>>>> OK, reviewed. but since I'm not so sure about arm32 ISA,
>>>> I need help from ARM32 maintainer to ack this.
>>>
>>> What you actually need is an ack from the ARM kprobes people who
>>> understand this code.  That would be much more meaningful than my
>>> ack.  They're already on the Cc list.
>>
>> Tixy, can you take a look please?
> 
> I'll take an in depth look on Monday as I'm currently on holiday, so for
> now just some brief and possibly not well thought out comments...
> 
> - If the intent is to not optimise stack push operations, then this
> actually excludes the main use of kprobes which I believe is to insert
> probes at the start of functions (there's even a specific jprobes API
> for that) this is because functions usually start by saving registers on
> the stack.

Agree. If the decoder can bring up more information, kprobeopt can dynamically
compute the range of stack an instruction require, then adjust stack protection range.
This need ARM decoder bring up more information. For example: for a "push {r4, r5}"
instruction, decoder should report it is a stack store operation, require 8 bytes
of stack, then when composing trampoline code, we can put registers at
[sp, #-8]. Only instructions such as "str r0, [sp, r1]" should be prevented.

However, this need more improvement on decoder: all store operations should use
a special decorer then. What do you think?

> 
> - Crowbarring in special case testing for stack operations looks a bit
> inelegant and not a sustainable way of doing this, what about the next
> special case we need? However, stack push operations _are_ a general
> special cases for instruction emulation so perhaps that's OK, and leads
> me to...
> 
> - The current 'unoptimised' kprobes implementation allows for pushing on
> the stack (see __und_svc and the unused (?) jprobe_return) but this is
> just aimed at stm instructions, not things like "str r0, [sp, -imm]!"
> that might be used to simultaneously save a register and reserve an
> arbitrary amount of stack space. Probing such instructions could lead to
> the kprobes code trashing the kernel stack.

By a quick search I just find tow instructions matching "str.*\[sp,[^\]]*-[^4]",
one in Ldiv0_64, another in Ldiv0, both are "str     lr, [sp, #-8]!". So I think
such instructions are very special.

Furthermore, I thought "unoptimised" kprobe use another stack, could you please
explain how such probing trashing normal kernel stack?

Thank you.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Jon Medhurst (Tixy) Sept. 1, 2014, 5:29 p.m. UTC | #5
On Sat, 2014-08-30 at 09:28 +0800, Wang Nan wrote:
> On 2014/8/29 16:47, Jon Medhurst (Tixy) wrote:
> > On Thu, 2014-08-28 at 11:24 +0100, Will Deacon wrote:
> >> On Thu, Aug 28, 2014 at 11:20:21AM +0100, Russell King - ARM Linux wrote:
> >>> On Thu, Aug 28, 2014 at 06:51:15PM +0900, Masami Hiramatsu wrote:
> >>>> (2014/08/27 22:02), Wang Nan wrote:
> >>>>> This patch improves arm instruction decoder, allows it check whether an
> >>>>> instruction is a stack store operation. This information is important
> >>>>> for kprobe optimization.
> >>>>>
> >>>>> For normal str instruction, this patch add a series of _SP_STACK
> >>>>> register indicator in the decoder to test the base and offset register
> >>>>> in ldr <Rt>, [<Rn>, <Rm>] against sp.
> >>>>>
> >>>>> For stm instruction, it check sp register in instruction specific
> >>>>> decoder.
> >>>>
> >>>> OK, reviewed. but since I'm not so sure about arm32 ISA,
> >>>> I need help from ARM32 maintainer to ack this.
> >>>
> >>> What you actually need is an ack from the ARM kprobes people who
> >>> understand this code.  That would be much more meaningful than my
> >>> ack.  They're already on the Cc list.
> >>
> >> Tixy, can you take a look please?
> > 
> > I'll take an in depth look on Monday as I'm currently on holiday, so for
> > now just some brief and possibly not well thought out comments...
> > 
> > - If the intent is to not optimise stack push operations, then this
> > actually excludes the main use of kprobes which I believe is to insert
> > probes at the start of functions (there's even a specific jprobes API
> > for that) this is because functions usually start by saving registers on
> > the stack.
> 
> Agree. If the decoder can bring up more information, kprobeopt can dynamically
> compute the range of stack an instruction require, then adjust stack protection range.
> This need ARM decoder bring up more information. For example: for a "push {r4, r5}"
> instruction, decoder should report it is a stack store operation, require 8 bytes
> of stack, then when composing trampoline code, we can put registers at
> [sp, #-8]. Only instructions such as "str r0, [sp, r1]" should be prevented.
> 
> However, this need more improvement on decoder: all store operations should use
> a special decorer then. What do you think?

This doesn't work for the non-optimised kprobes case because, when a
probe is hit, we couldn't know what stack addresses to reserve until
we're several calls deep in the exception handler and possibly already
using those addresses. Anyway, perhaps we don't need to worry about
these instructions after all, more below...

> 
> > 
> > - Crowbarring in special case testing for stack operations looks a bit
> > inelegant and not a sustainable way of doing this, what about the next
> > special case we need? However, stack push operations _are_ a general
> > special cases for instruction emulation so perhaps that's OK, and leads
> > me to...
> > 
> > - The current 'unoptimised' kprobes implementation allows for pushing on
> > the stack (see __und_svc and the unused (?) jprobe_return) but this is
> > just aimed at stm instructions, not things like "str r0, [sp, -imm]!"
> > that might be used to simultaneously save a register and reserve an
> > arbitrary amount of stack space. Probing such instructions could lead to
> > the kprobes code trashing the kernel stack.
> 
> By a quick search I just find tow instructions matching "str.*\[sp,[^\]]*-[^4]",
> one in Ldiv0_64, another in Ldiv0, both are "str     lr, [sp, #-8]!". So I think
> such instructions are very special.

Yes, I built a multi_v7_defconfig kernel with GCC 4.9 and I too could
only find those occurrences of the problematic instructions, which come
human written assembler, so we probably aren't restricting any kprobes
users if we don't support probing of those types of str instructions.

That would just leave us to support stm instructions which push
registers onto the stack, and the optimised kprobes could take the same
approach as the unoptimised ones and just unconditionally reserve 64
bytes of stack on every probe (see __und_svc in entry-armv.S).

> Furthermore, I thought "unoptimised" kprobe use another stack, could you please
> explain how such probing trashing normal kernel stack?

No, unoptimised probes doesn't use another stack, they use the stack of
the current kernel task.
diff mbox

Patch

diff --git a/arch/arm/include/asm/probes.h b/arch/arm/include/asm/probes.h
index 806cfe6..3f6912c 100644
--- a/arch/arm/include/asm/probes.h
+++ b/arch/arm/include/asm/probes.h
@@ -38,6 +38,7 @@  struct arch_probes_insn {
 	probes_check_cc			*insn_check_cc;
 	probes_insn_singlestep_t	*insn_singlestep;
 	probes_insn_fn_t		*insn_fn;
+	bool				is_stack_operation;
 };
 
 #endif
diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/kernel/kprobes-common.c
index 0bf5d64..4e8b918 100644
--- a/arch/arm/kernel/kprobes-common.c
+++ b/arch/arm/kernel/kprobes-common.c
@@ -133,6 +133,10 @@  kprobe_decode_ldmstm(probes_opcode_t insn, struct arch_probes_insn *asi,
 	int is_ldm = insn & 0x100000;
 	int rn = (insn >> 16) & 0xf;
 
+	/* whether this is a push instruction? */
+	if ((rn == 0xd) && (!is_ldm))
+		asi->is_stack_operation = true;
+
 	if (rn <= 12 && (reglist & 0xe000) == 0) {
 		/* Instruction only uses registers in the range R0..R12 */
 		handler = emulate_generic_r0_12_noflags;
diff --git a/arch/arm/kernel/probes-arm.c b/arch/arm/kernel/probes-arm.c
index 8eaef81..5c187ba 100644
--- a/arch/arm/kernel/probes-arm.c
+++ b/arch/arm/kernel/probes-arm.c
@@ -577,7 +577,7 @@  static const union decode_item arm_cccc_01xx_table[] = {
 	/* STR (immediate)	cccc 010x x0x0 xxxx xxxx xxxx xxxx xxxx */
 	/* STRB (immediate)	cccc 010x x1x0 xxxx xxxx xxxx xxxx xxxx */
 	DECODE_EMULATEX	(0x0e100000, 0x04000000, PROBES_STORE,
-						 REGS(NOPCWB, ANY, 0, 0, 0)),
+						 REGS(NOPCWB_SP_STACK, ANY, 0, 0, 0)),
 
 	/* LDR (immediate)	cccc 010x x0x1 xxxx xxxx xxxx xxxx xxxx */
 	/* LDRB (immediate)	cccc 010x x1x1 xxxx xxxx xxxx xxxx xxxx */
@@ -587,7 +587,7 @@  static const union decode_item arm_cccc_01xx_table[] = {
 	/* STR (register)	cccc 011x x0x0 xxxx xxxx xxxx xxxx xxxx */
 	/* STRB (register)	cccc 011x x1x0 xxxx xxxx xxxx xxxx xxxx */
 	DECODE_EMULATEX	(0x0e100000, 0x06000000, PROBES_STORE,
-						 REGS(NOPCWB, ANY, 0, 0, NOPC)),
+						 REGS(NOPCWB_SP_STACK, ANY, 0, 0, NOPC_SP_STACK)),
 
 	/* LDR (register)	cccc 011x x0x1 xxxx xxxx xxxx xxxx xxxx */
 	/* LDRB (register)	cccc 011x x1x1 xxxx xxxx xxxx xxxx xxxx */
diff --git a/arch/arm/kernel/probes-thumb.c b/arch/arm/kernel/probes-thumb.c
index 4131351..d0d30d8 100644
--- a/arch/arm/kernel/probes-thumb.c
+++ b/arch/arm/kernel/probes-thumb.c
@@ -54,7 +54,7 @@  static const union decode_item t32_table_1110_100x_x1xx[] = {
 	/* STRD (immediate)	1110 1001 x1x0 xxxx xxxx xxxx xxxx xxxx */
 	/* LDRD (immediate)	1110 1001 x1x1 xxxx xxxx xxxx xxxx xxxx */
 	DECODE_EMULATEX	(0xff400000, 0xe9400000, PROBES_T32_LDRDSTRD,
-						 REGS(NOPCWB, NOSPPC, NOSPPC, 0, 0)),
+						 REGS(NOPCWB_SP_STACK, NOSPPC, NOSPPC, 0, 0)),
 
 	/* TBB			1110 1000 1101 xxxx xxxx xxxx 0000 xxxx */
 	/* TBH			1110 1000 1101 xxxx xxxx xxxx 0001 xxxx */
@@ -345,12 +345,12 @@  static const union decode_item t32_table_1111_100x[] = {
 	/* STR (immediate)	1111 1000 1100 xxxx xxxx xxxx xxxx xxxx */
 	/* LDR (immediate)	1111 1000 1101 xxxx xxxx xxxx xxxx xxxx */
 	DECODE_EMULATEX	(0xffe00000, 0xf8c00000, PROBES_T32_LDRSTR,
-						 REGS(NOPCX, ANY, 0, 0, 0)),
+						 REGS(NOPCX_SP_STACK, ANY, 0, 0, 0)),
 
 	/* STR (register)	1111 1000 0100 xxxx xxxx 0000 00xx xxxx */
 	/* LDR (register)	1111 1000 0101 xxxx xxxx 0000 00xx xxxx */
 	DECODE_EMULATEX	(0xffe00fc0, 0xf8400000, PROBES_T32_LDRSTR,
-						 REGS(NOPCX, ANY, 0, 0, NOSPPC)),
+						 REGS(NOPCX_SP_STACK, ANY, 0, 0, NOSPPC)),
 
 	/* LDRB (literal)	1111 1000 x001 1111 xxxx xxxx xxxx xxxx */
 	/* LDRSB (literal)	1111 1001 x001 1111 xxxx xxxx xxxx xxxx */
diff --git a/arch/arm/kernel/probes.c b/arch/arm/kernel/probes.c
index 1c77b8d..f811cac 100644
--- a/arch/arm/kernel/probes.c
+++ b/arch/arm/kernel/probes.c
@@ -258,7 +258,9 @@  set_emulated_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
  * non-zero value, the corresponding nibble in pinsn is validated and modified
  * according to the type.
  */
-static bool __kprobes decode_regs(probes_opcode_t *pinsn, u32 regs, bool modify)
+static bool __kprobes decode_regs(probes_opcode_t *pinsn,
+		struct arch_probes_insn *asi,
+		u32 regs, bool modify)
 {
 	probes_opcode_t insn = *pinsn;
 	probes_opcode_t mask = 0xf; /* Start at least significant nibble */
@@ -307,11 +309,14 @@  static bool __kprobes decode_regs(probes_opcode_t *pinsn, u32 regs, bool modify)
 				goto reject;
 			break;
 
+		case REG_TYPE_NOPCWB_SP_STACK:
 		case REG_TYPE_NOPCWB:
 			if (!is_writeback(insn))
 				break; /* No writeback, so any register is OK */
 			/* fall through... */
+		case REG_TYPE_NOPC_SP_STACK:
 		case REG_TYPE_NOPC:
+		case REG_TYPE_NOPCX_SP_STACK:
 		case REG_TYPE_NOPCX:
 			/* Reject PC (R15) */
 			if (((insn ^ 0xffffffff) & mask) == 0)
@@ -319,6 +324,15 @@  static bool __kprobes decode_regs(probes_opcode_t *pinsn, u32 regs, bool modify)
 			break;
 		}
 
+		/* check stack operation */
+		switch (regs & 0xf) {
+			case REG_TYPE_NOPCWB_SP_STACK:
+			case REG_TYPE_NOPC_SP_STACK:
+			case REG_TYPE_NOPCX_SP_STACK:
+				if (((insn ^ 0xdddddddd) & mask) == 0)
+					asi->is_stack_operation = true;
+		}
+
 		/* Replace value of nibble with new register number... */
 		insn &= ~mask;
 		insn |= new_bits & mask;
@@ -394,6 +408,8 @@  probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
 	const struct decode_header *next;
 	bool matched = false;
 
+	asi->is_stack_operation = false;
+
 	if (emulate)
 		insn = prepare_emulated_insn(insn, asi, thumb);
 
@@ -410,7 +426,7 @@  probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
 		if (!matched && (insn & h->mask.bits) != h->value.bits)
 			continue;
 
-		if (!decode_regs(&insn, regs, emulate))
+		if (!decode_regs(&insn, asi, regs, emulate))
 			return INSN_REJECTED;
 
 		switch (type) {
diff --git a/arch/arm/kernel/probes.h b/arch/arm/kernel/probes.h
index dba9f24..568fd01 100644
--- a/arch/arm/kernel/probes.h
+++ b/arch/arm/kernel/probes.h
@@ -278,13 +278,19 @@  enum decode_reg_type {
 	REG_TYPE_NOSP,	   /* Register must not be SP */
 	REG_TYPE_NOSPPC,   /* Register must not be SP or PC */
 	REG_TYPE_NOPC,	   /* Register must not be PC */
+	REG_TYPE_NOPC_SP_STACK,	   /* REG_TYPE_NOPC and if this reg is sp
+				      then this is a stack operation */
 	REG_TYPE_NOPCWB,   /* No PC if load/store write-back flag also set */
+	REG_TYPE_NOPCWB_SP_STACK,   /* REG_TYPE_NOPCWB and, if this reg is sp
+				       then this is a stack operation */
 
 	/* The following types are used when the encoding for PC indicates
 	 * another instruction form. This distiction only matters for test
 	 * case coverage checks.
 	 */
 	REG_TYPE_NOPCX,	   /* Register must not be PC */
+	REG_TYPE_NOPCX_SP_STACK,	   /* REG_TYPE_NOPCX and if this reg is sp
+					      then this is a stack operation */
 	REG_TYPE_NOSPPCX,  /* Register must not be SP or PC */
 
 	/* Alias to allow '0' arg to be used in REGS macro. */