diff mbox

ARM: add get_user() support for 8 byte types

Message ID 1352495853-9790-1-git-send-email-rob.clark@linaro.org
State New
Headers show

Commit Message

Rob Clark Nov. 9, 2012, 9:17 p.m. UTC
From: Rob Clark <rob@ti.com>

A new atomic modeset/pageflip ioctl being developed in DRM requires
get_user() to work for 64bit types (in addition to just put_user()).

Signed-off-by: Rob Clark <rob@ti.com>
---
 arch/arm/include/asm/uaccess.h | 25 ++++++++++++++++++++-----
 arch/arm/lib/getuser.S         | 17 ++++++++++++++++-
 2 files changed, 36 insertions(+), 6 deletions(-)

Comments

Will Deacon Nov. 12, 2012, 10:46 a.m. UTC | #1
On Fri, Nov 09, 2012 at 09:17:33PM +0000, Rob Clark wrote:
> From: Rob Clark <rob@ti.com>
> 
> A new atomic modeset/pageflip ioctl being developed in DRM requires
> get_user() to work for 64bit types (in addition to just put_user()).
> 
> Signed-off-by: Rob Clark <rob@ti.com>
> ---
>  arch/arm/include/asm/uaccess.h | 25 ++++++++++++++++++++-----
>  arch/arm/lib/getuser.S         | 17 ++++++++++++++++-
>  2 files changed, 36 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
> index 7e1f760..2e3fdb2 100644
> --- a/arch/arm/include/asm/uaccess.h
> +++ b/arch/arm/include/asm/uaccess.h
> @@ -100,6 +100,7 @@ static inline void set_fs(mm_segment_t fs)
>  extern int __get_user_1(void *);
>  extern int __get_user_2(void *);
>  extern int __get_user_4(void *);
> +extern int __get_user_8(void *);
>  
>  #define __GUP_CLOBBER_1	"lr", "cc"
>  #ifdef CONFIG_CPU_USE_DOMAINS
> @@ -108,6 +109,7 @@ extern int __get_user_4(void *);
>  #define __GUP_CLOBBER_2 "lr", "cc"
>  #endif
>  #define __GUP_CLOBBER_4	"lr", "cc"
> +#define __GUP_CLOBBER_8	"lr", "cc"
>  
>  #define __get_user_x(__r2,__p,__e,__l,__s)				\
>  	   __asm__ __volatile__ (					\
> @@ -122,22 +124,35 @@ extern int __get_user_4(void *);
>  	({								\
>  		unsigned long __limit = current_thread_info()->addr_limit - 1; \
>  		register const typeof(*(p)) __user *__p asm("r0") = (p);\
> -		register unsigned long __r2 asm("r2");			\
>  		register unsigned long __l asm("r1") = __limit;		\
>  		register int __e asm("r0");				\
>  		switch (sizeof(*(__p))) {				\
> -		case 1:							\
> +		case 1: {						\
> +			register unsigned long __r2 asm("r2");		\
>  			__get_user_x(__r2, __p, __e, __l, 1);		\
> +			x = (typeof(*(p))) __r2;			\
>  			break;						\
> -		case 2:							\
> +		}							\
> +		case 2: {						\
> +			register unsigned long __r2 asm("r2");		\
>  			__get_user_x(__r2, __p, __e, __l, 2);		\
> +			x = (typeof(*(p))) __r2;			\
>  			break;						\
> -		case 4:							\
> +		}							\
> +		case 4: {						\
> +			register unsigned long __r2 asm("r2");		\
>  			__get_user_x(__r2, __p, __e, __l, 4);		\
> +			x = (typeof(*(p))) __r2;			\
> +			break;						\
> +		}							\
> +		case 8: {						\
> +			register unsigned long long __r2 asm("r2");	\

Does this matter? For EABI, we'll pass in (r2, r3) and it's all handcrafted
asm, so the compiler shouldn't care much. For OABI, I think you may have to
do some more work to get the two words where you want them.

> +			__get_user_x(__r2, __p, __e, __l, 8);		\
> +			x = (typeof(*(p))) __r2;			\
>  			break;						\
> +		}							\
>  		default: __e = __get_user_bad(); break;			\
>  		}							\
> -		x = (typeof(*(p))) __r2;				\
>  		__e;							\
>  	})
>  
> diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
> index 9b06bb4..d05285c 100644
> --- a/arch/arm/lib/getuser.S
> +++ b/arch/arm/lib/getuser.S
> @@ -18,7 +18,7 @@
>   * Inputs:	r0 contains the address
>   *		r1 contains the address limit, which must be preserved
>   * Outputs:	r0 is the error code
> - *		r2 contains the zero-extended value
> + *		r2, r3 contains the zero-extended value
>   *		lr corrupted
>   *
>   * No other registers must be altered.  (see <asm/uaccess.h>
> @@ -66,6 +66,19 @@ ENTRY(__get_user_4)
>  	mov	pc, lr
>  ENDPROC(__get_user_4)
>  
> +ENTRY(__get_user_8)
> +	check_uaccess r0, 4, r1, r2, __get_user_bad

Shouldn't you be passing 8 here, so that we validate the correct range?

> +#ifdef CONFIG_THUMB2_KERNEL
> +5: TUSER(ldr)	r2, [r0]
> +6: TUSER(ldr)	r3, [r0, #4]
> +#else
> +5: TUSER(ldr)	r2, [r0], #4
> +6: TUSER(ldr)	r3, [r0]
> +#endif

This doesn't work for EABI big-endian systems.

Will
Rob Clark Nov. 12, 2012, 1:46 p.m. UTC | #2
On Mon, Nov 12, 2012 at 4:46 AM, Will Deacon <will.deacon@arm.com> wrote:
> On Fri, Nov 09, 2012 at 09:17:33PM +0000, Rob Clark wrote:
>> From: Rob Clark <rob@ti.com>
>>
>> A new atomic modeset/pageflip ioctl being developed in DRM requires
>> get_user() to work for 64bit types (in addition to just put_user()).
>>
>> Signed-off-by: Rob Clark <rob@ti.com>
>> ---
>>  arch/arm/include/asm/uaccess.h | 25 ++++++++++++++++++++-----
>>  arch/arm/lib/getuser.S         | 17 ++++++++++++++++-
>>  2 files changed, 36 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
>> index 7e1f760..2e3fdb2 100644
>> --- a/arch/arm/include/asm/uaccess.h
>> +++ b/arch/arm/include/asm/uaccess.h
>> @@ -100,6 +100,7 @@ static inline void set_fs(mm_segment_t fs)
>>  extern int __get_user_1(void *);
>>  extern int __get_user_2(void *);
>>  extern int __get_user_4(void *);
>> +extern int __get_user_8(void *);
>>
>>  #define __GUP_CLOBBER_1      "lr", "cc"
>>  #ifdef CONFIG_CPU_USE_DOMAINS
>> @@ -108,6 +109,7 @@ extern int __get_user_4(void *);
>>  #define __GUP_CLOBBER_2 "lr", "cc"
>>  #endif
>>  #define __GUP_CLOBBER_4      "lr", "cc"
>> +#define __GUP_CLOBBER_8      "lr", "cc"
>>
>>  #define __get_user_x(__r2,__p,__e,__l,__s)                           \
>>          __asm__ __volatile__ (                                       \
>> @@ -122,22 +124,35 @@ extern int __get_user_4(void *);
>>       ({                                                              \
>>               unsigned long __limit = current_thread_info()->addr_limit - 1; \
>>               register const typeof(*(p)) __user *__p asm("r0") = (p);\
>> -             register unsigned long __r2 asm("r2");                  \
>>               register unsigned long __l asm("r1") = __limit;         \
>>               register int __e asm("r0");                             \
>>               switch (sizeof(*(__p))) {                               \
>> -             case 1:                                                 \
>> +             case 1: {                                               \
>> +                     register unsigned long __r2 asm("r2");          \
>>                       __get_user_x(__r2, __p, __e, __l, 1);           \
>> +                     x = (typeof(*(p))) __r2;                        \
>>                       break;                                          \
>> -             case 2:                                                 \
>> +             }                                                       \
>> +             case 2: {                                               \
>> +                     register unsigned long __r2 asm("r2");          \
>>                       __get_user_x(__r2, __p, __e, __l, 2);           \
>> +                     x = (typeof(*(p))) __r2;                        \
>>                       break;                                          \
>> -             case 4:                                                 \
>> +             }                                                       \
>> +             case 4: {                                               \
>> +                     register unsigned long __r2 asm("r2");          \
>>                       __get_user_x(__r2, __p, __e, __l, 4);           \
>> +                     x = (typeof(*(p))) __r2;                        \
>> +                     break;                                          \
>> +             }                                                       \
>> +             case 8: {                                               \
>> +                     register unsigned long long __r2 asm("r2");     \
>
> Does this matter? For EABI, we'll pass in (r2, r3) and it's all handcrafted
> asm, so the compiler shouldn't care much. For OABI, I think you may have to
> do some more work to get the two words where you want them.

Is the question whether the compiler is guaranteed to allocate r2 and
r3 in all cases?  I'm not quite sure, I confess to usually trying to
avoid inline asm.  But from looking at the disassembly (for little
endian EABI build) it seemed to do the right thing.

The only other idea I had was to explicitly declare two 'unsigned
long's and then shift them into a 64bit x, although I'm open to
suggestions if there is a better way.

>> +                     __get_user_x(__r2, __p, __e, __l, 8);           \
>> +                     x = (typeof(*(p))) __r2;                        \
>>                       break;                                          \
>> +             }                                                       \
>>               default: __e = __get_user_bad(); break;                 \
>>               }                                                       \
>> -             x = (typeof(*(p))) __r2;                                \
>>               __e;                                                    \
>>       })
>>
>> diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
>> index 9b06bb4..d05285c 100644
>> --- a/arch/arm/lib/getuser.S
>> +++ b/arch/arm/lib/getuser.S
>> @@ -18,7 +18,7 @@
>>   * Inputs:   r0 contains the address
>>   *           r1 contains the address limit, which must be preserved
>>   * Outputs:  r0 is the error code
>> - *           r2 contains the zero-extended value
>> + *           r2, r3 contains the zero-extended value
>>   *           lr corrupted
>>   *
>>   * No other registers must be altered.  (see <asm/uaccess.h>
>> @@ -66,6 +66,19 @@ ENTRY(__get_user_4)
>>       mov     pc, lr
>>  ENDPROC(__get_user_4)
>>
>> +ENTRY(__get_user_8)
>> +     check_uaccess r0, 4, r1, r2, __get_user_bad
>
> Shouldn't you be passing 8 here, so that we validate the correct range?

yes, sorry, I'll fix that

>> +#ifdef CONFIG_THUMB2_KERNEL
>> +5: TUSER(ldr)        r2, [r0]
>> +6: TUSER(ldr)        r3, [r0, #4]
>> +#else
>> +5: TUSER(ldr)        r2, [r0], #4
>> +6: TUSER(ldr)        r3, [r0]
>> +#endif
>
> This doesn't work for EABI big-endian systems.

Hmm, is that true?  Wouldn't put_user() then have the same problem?

I guess __ARMEB__ is the flag for big-endian?

BR,
-R

> Will
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Will Deacon Nov. 12, 2012, 2:38 p.m. UTC | #3
On Mon, Nov 12, 2012 at 01:46:57PM +0000, Rob Clark wrote:
> On Mon, Nov 12, 2012 at 4:46 AM, Will Deacon <will.deacon@arm.com> wrote:
> > On Fri, Nov 09, 2012 at 09:17:33PM +0000, Rob Clark wrote:
> >> @@ -122,22 +124,35 @@ extern int __get_user_4(void *);
> >>       ({                                                              \
> >>               unsigned long __limit = current_thread_info()->addr_limit - 1; \
> >>               register const typeof(*(p)) __user *__p asm("r0") = (p);\
> >> -             register unsigned long __r2 asm("r2");                  \
> >>               register unsigned long __l asm("r1") = __limit;         \
> >>               register int __e asm("r0");                             \
> >>               switch (sizeof(*(__p))) {                               \
> >> -             case 1:                                                 \
> >> +             case 1: {                                               \
> >> +                     register unsigned long __r2 asm("r2");          \
> >>                       __get_user_x(__r2, __p, __e, __l, 1);           \
> >> +                     x = (typeof(*(p))) __r2;                        \
> >>                       break;                                          \
> >> -             case 2:                                                 \
> >> +             }                                                       \
> >> +             case 2: {                                               \
> >> +                     register unsigned long __r2 asm("r2");          \
> >>                       __get_user_x(__r2, __p, __e, __l, 2);           \
> >> +                     x = (typeof(*(p))) __r2;                        \
> >>                       break;                                          \
> >> -             case 4:                                                 \
> >> +             }                                                       \
> >> +             case 4: {                                               \
> >> +                     register unsigned long __r2 asm("r2");          \
> >>                       __get_user_x(__r2, __p, __e, __l, 4);           \
> >> +                     x = (typeof(*(p))) __r2;                        \
> >> +                     break;                                          \
> >> +             }                                                       \
> >> +             case 8: {                                               \
> >> +                     register unsigned long long __r2 asm("r2");     \
> >
> > Does this matter? For EABI, we'll pass in (r2, r3) and it's all handcrafted
> > asm, so the compiler shouldn't care much. For OABI, I think you may have to
> > do some more work to get the two words where you want them.
> 
> Is the question whether the compiler is guaranteed to allocate r2 and
> r3 in all cases?  I'm not quite sure, I confess to usually trying to
> avoid inline asm.  But from looking at the disassembly (for little
> endian EABI build) it seemed to do the right thing.

I can't recall how OABI represents 64-bit values and particularly whether this
differs between little and big-endian, so I wondered whether you may have to
do some marshalling when you assign x. However, a few quick experiments with
GCC suggest that the register representation matches EABI in regards to word
ordering (it just doesn't require an even base register), although it would
be good to find this written down somewhere...

> The only other idea I had was to explicitly declare two 'unsigned
> long's and then shift them into a 64bit x, although I'm open to
> suggestions if there is a better way.

Can't you just use register unsigned long long for all cases? Even better,
follow what put_user does and use typeof(*(p))?

> >> diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
> >> index 9b06bb4..d05285c 100644
> >> --- a/arch/arm/lib/getuser.S
> >> +++ b/arch/arm/lib/getuser.S
> >> @@ -18,7 +18,7 @@
> >>   * Inputs:   r0 contains the address
> >>   *           r1 contains the address limit, which must be preserved
> >>   * Outputs:  r0 is the error code
> >> - *           r2 contains the zero-extended value
> >> + *           r2, r3 contains the zero-extended value
> >>   *           lr corrupted
> >>   *
> >>   * No other registers must be altered.  (see <asm/uaccess.h>
> >> @@ -66,6 +66,19 @@ ENTRY(__get_user_4)
> >>       mov     pc, lr
> >>  ENDPROC(__get_user_4)
> >>
> >> +ENTRY(__get_user_8)
> >> +     check_uaccess r0, 4, r1, r2, __get_user_bad
> >
> > Shouldn't you be passing 8 here, so that we validate the correct range?
> 
> yes, sorry, I'll fix that
> 
> >> +#ifdef CONFIG_THUMB2_KERNEL
> >> +5: TUSER(ldr)        r2, [r0]
> >> +6: TUSER(ldr)        r3, [r0, #4]
> >> +#else
> >> +5: TUSER(ldr)        r2, [r0], #4
> >> +6: TUSER(ldr)        r3, [r0]
> >> +#endif
> >
> > This doesn't work for EABI big-endian systems.
> 
> Hmm, is that true?  Wouldn't put_user() then have the same problem?

I dug up the PCS and it seems that we arrange the two halves of the
doubleword to match the ldm/stm memory representation for EABI, so sorry
for the confusion.

> I guess __ARMEB__ is the flag for big-endian?

That's the thing defined by the compiler, yes.

Will
Rob Clark Nov. 12, 2012, 3:09 p.m. UTC | #4
On Mon, Nov 12, 2012 at 8:38 AM, Will Deacon <will.deacon@arm.com> wrote:
> On Mon, Nov 12, 2012 at 01:46:57PM +0000, Rob Clark wrote:
>> On Mon, Nov 12, 2012 at 4:46 AM, Will Deacon <will.deacon@arm.com> wrote:
>> > On Fri, Nov 09, 2012 at 09:17:33PM +0000, Rob Clark wrote:
>> >> @@ -122,22 +124,35 @@ extern int __get_user_4(void *);
>> >>       ({                                                              \
>> >>               unsigned long __limit = current_thread_info()->addr_limit - 1; \
>> >>               register const typeof(*(p)) __user *__p asm("r0") = (p);\
>> >> -             register unsigned long __r2 asm("r2");                  \
>> >>               register unsigned long __l asm("r1") = __limit;         \
>> >>               register int __e asm("r0");                             \
>> >>               switch (sizeof(*(__p))) {                               \
>> >> -             case 1:                                                 \
>> >> +             case 1: {                                               \
>> >> +                     register unsigned long __r2 asm("r2");          \
>> >>                       __get_user_x(__r2, __p, __e, __l, 1);           \
>> >> +                     x = (typeof(*(p))) __r2;                        \
>> >>                       break;                                          \
>> >> -             case 2:                                                 \
>> >> +             }                                                       \
>> >> +             case 2: {                                               \
>> >> +                     register unsigned long __r2 asm("r2");          \
>> >>                       __get_user_x(__r2, __p, __e, __l, 2);           \
>> >> +                     x = (typeof(*(p))) __r2;                        \
>> >>                       break;                                          \
>> >> -             case 4:                                                 \
>> >> +             }                                                       \
>> >> +             case 4: {                                               \
>> >> +                     register unsigned long __r2 asm("r2");          \
>> >>                       __get_user_x(__r2, __p, __e, __l, 4);           \
>> >> +                     x = (typeof(*(p))) __r2;                        \
>> >> +                     break;                                          \
>> >> +             }                                                       \
>> >> +             case 8: {                                               \
>> >> +                     register unsigned long long __r2 asm("r2");     \
>> >
>> > Does this matter? For EABI, we'll pass in (r2, r3) and it's all handcrafted
>> > asm, so the compiler shouldn't care much. For OABI, I think you may have to
>> > do some more work to get the two words where you want them.
>>
>> Is the question whether the compiler is guaranteed to allocate r2 and
>> r3 in all cases?  I'm not quite sure, I confess to usually trying to
>> avoid inline asm.  But from looking at the disassembly (for little
>> endian EABI build) it seemed to do the right thing.
>
> I can't recall how OABI represents 64-bit values and particularly whether this
> differs between little and big-endian, so I wondered whether you may have to
> do some marshalling when you assign x. However, a few quick experiments with
> GCC suggest that the register representation matches EABI in regards to word
> ordering (it just doesn't require an even base register), although it would
> be good to find this written down somewhere...

yeah, I was kinda hoping that someone a bit closer to the compiler
would speak up here :-)

>> The only other idea I had was to explicitly declare two 'unsigned
>> long's and then shift them into a 64bit x, although I'm open to
>> suggestions if there is a better way.
>
> Can't you just use register unsigned long long for all cases? Even better,
> follow what put_user does and use typeof(*(p))?

typeof(*(p) was my first try but:

  register typeof(*(p)) __r2 asm("r2");

gives me the error:

  error: read-only variable ‘__r2’ used as ‘asm’ output

I guess because 'const' ends up being part of the typeof *p?  I
suppose I could do typeof(x) instead

BR,
-R

>> >> diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
>> >> index 9b06bb4..d05285c 100644
>> >> --- a/arch/arm/lib/getuser.S
>> >> +++ b/arch/arm/lib/getuser.S
>> >> @@ -18,7 +18,7 @@
>> >>   * Inputs:   r0 contains the address
>> >>   *           r1 contains the address limit, which must be preserved
>> >>   * Outputs:  r0 is the error code
>> >> - *           r2 contains the zero-extended value
>> >> + *           r2, r3 contains the zero-extended value
>> >>   *           lr corrupted
>> >>   *
>> >>   * No other registers must be altered.  (see <asm/uaccess.h>
>> >> @@ -66,6 +66,19 @@ ENTRY(__get_user_4)
>> >>       mov     pc, lr
>> >>  ENDPROC(__get_user_4)
>> >>
>> >> +ENTRY(__get_user_8)
>> >> +     check_uaccess r0, 4, r1, r2, __get_user_bad
>> >
>> > Shouldn't you be passing 8 here, so that we validate the correct range?
>>
>> yes, sorry, I'll fix that
>>
>> >> +#ifdef CONFIG_THUMB2_KERNEL
>> >> +5: TUSER(ldr)        r2, [r0]
>> >> +6: TUSER(ldr)        r3, [r0, #4]
>> >> +#else
>> >> +5: TUSER(ldr)        r2, [r0], #4
>> >> +6: TUSER(ldr)        r3, [r0]
>> >> +#endif
>> >
>> > This doesn't work for EABI big-endian systems.
>>
>> Hmm, is that true?  Wouldn't put_user() then have the same problem?
>
> I dug up the PCS and it seems that we arrange the two halves of the
> doubleword to match the ldm/stm memory representation for EABI, so sorry
> for the confusion.
>
>> I guess __ARMEB__ is the flag for big-endian?
>
> That's the thing defined by the compiler, yes.
>
> Will
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux Nov. 12, 2012, 7:27 p.m. UTC | #5
On Fri, Nov 09, 2012 at 03:17:33PM -0600, Rob Clark wrote:
> From: Rob Clark <rob@ti.com>
> 
> A new atomic modeset/pageflip ioctl being developed in DRM requires
> get_user() to work for 64bit types (in addition to just put_user()).

NAK.

(I did write a better email explaining all the ins and outs of why this
won't work and why 64-bit get_user isn't possible, but my editor crapped
out and lost all that well written message; I don't fancy typing it all
out again.)

Nevertheless,
int test_ptr(unsigned int **v, unsigned int **p)
{
        return get_user(*v, p);
}

produces a warning, and you can't get away from that if you stick 64-bit
support into get_user().

Sorry, 64-bit get_user() is a no-no.
Rob Clark Nov. 12, 2012, 7:58 p.m. UTC | #6
On Mon, Nov 12, 2012 at 1:27 PM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Fri, Nov 09, 2012 at 03:17:33PM -0600, Rob Clark wrote:
>> From: Rob Clark <rob@ti.com>
>>
>> A new atomic modeset/pageflip ioctl being developed in DRM requires
>> get_user() to work for 64bit types (in addition to just put_user()).
>
> NAK.
>
> (I did write a better email explaining all the ins and outs of why this
> won't work and why 64-bit get_user isn't possible, but my editor crapped
> out and lost all that well written message; I don't fancy typing it all
> out again.)
>
> Nevertheless,
> int test_ptr(unsigned int **v, unsigned int **p)
> {
>         return get_user(*v, p);
> }
>
> produces a warning, and you can't get away from that if you stick 64-bit
> support into get_user().

Actually, it seems like using 'register typeof(x) __r2 asm("r2");'
does avoid that warning..

I don't know if that was the only argument against 64-bit get_user().
But it will at least be inconvenient that get_user() works for 64bit
on x86 but not arm..

BR,
-R

> Sorry, 64-bit get_user() is a no-no.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux Nov. 12, 2012, 11:08 p.m. UTC | #7
On Mon, Nov 12, 2012 at 01:58:32PM -0600, Rob Clark wrote:
> On Mon, Nov 12, 2012 at 1:27 PM, Russell King - ARM Linux
> <linux@arm.linux.org.uk> wrote:
> > On Fri, Nov 09, 2012 at 03:17:33PM -0600, Rob Clark wrote:
> >> From: Rob Clark <rob@ti.com>
> >>
> >> A new atomic modeset/pageflip ioctl being developed in DRM requires
> >> get_user() to work for 64bit types (in addition to just put_user()).
> >
> > NAK.
> >
> > (I did write a better email explaining all the ins and outs of why this
> > won't work and why 64-bit get_user isn't possible, but my editor crapped
> > out and lost all that well written message; I don't fancy typing it all
> > out again.)
> >
> > Nevertheless,
> > int test_ptr(unsigned int **v, unsigned int **p)
> > {
> >         return get_user(*v, p);
> > }
> >
> > produces a warning, and you can't get away from that if you stick 64-bit
> > support into get_user().
> 
> Actually, it seems like using 'register typeof(x) __r2 asm("r2");'
> does avoid that warning..

That seems to pass the checks I've done on it so far, and seems rather
obvious (there's been a number of people looking at this, none of whom
have come up with that solution).  Provided the final cast is kept
(which is there to ensure proper typechecking), it seems like it might
be a solution.
Rob Clark Nov. 12, 2012, 11:33 p.m. UTC | #8
On Mon, Nov 12, 2012 at 5:08 PM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Mon, Nov 12, 2012 at 01:58:32PM -0600, Rob Clark wrote:
>> On Mon, Nov 12, 2012 at 1:27 PM, Russell King - ARM Linux
>> <linux@arm.linux.org.uk> wrote:
>> > On Fri, Nov 09, 2012 at 03:17:33PM -0600, Rob Clark wrote:
>> >> From: Rob Clark <rob@ti.com>
>> >>
>> >> A new atomic modeset/pageflip ioctl being developed in DRM requires
>> >> get_user() to work for 64bit types (in addition to just put_user()).
>> >
>> > NAK.
>> >
>> > (I did write a better email explaining all the ins and outs of why this
>> > won't work and why 64-bit get_user isn't possible, but my editor crapped
>> > out and lost all that well written message; I don't fancy typing it all
>> > out again.)
>> >
>> > Nevertheless,
>> > int test_ptr(unsigned int **v, unsigned int **p)
>> > {
>> >         return get_user(*v, p);
>> > }
>> >
>> > produces a warning, and you can't get away from that if you stick 64-bit
>> > support into get_user().
>>
>> Actually, it seems like using 'register typeof(x) __r2 asm("r2");'
>> does avoid that warning..
>
> That seems to pass the checks I've done on it so far, and seems rather
> obvious (there's been a number of people looking at this, none of whom
> have come up with that solution).  Provided the final cast is kept
> (which is there to ensure proper typechecking), it seems like it might
> be a solution.

I'm sort of thinking maybe we want to change 'switch (sizeof(*(__p)))'
with 'switch (sizeof(typeof(x)))' in case someone ignores the compiler
warning when they try something like:

   uint32_t x;
   uint64_t *p = ...;
   get_user(x, p);

that was my one concern about 'register typeof(x) __r2 ...', but I
think just changing the switch condition is enough.  But maybe good to
have some eyes on in case there is something else I'm not thinking of.

BR,
-R

> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux Nov. 12, 2012, 11:53 p.m. UTC | #9
On Mon, Nov 12, 2012 at 05:33:41PM -0600, Rob Clark wrote:
> I'm sort of thinking maybe we want to change 'switch (sizeof(*(__p)))'
> with 'switch (sizeof(typeof(x)))' in case someone ignores the compiler
> warning when they try something like:

Definitely not.  Ttype of access is controlled by the pointer, not by the
size of what it's being assigned to.  Switching that around is likely to
break stuff hugely.  Consider this:

	unsigned char __user *p;
	int val;

	get_user(val, p);

If the pointer type is used to determine the access size, a char will be
accessed.  This is legal - because we end up assigning an unsigned character
to an int.

If the size of the destination was used, we'd access an int instead, which
is larger than the pointer, and probably the wrong thing to do anyway.

Think of get_user(a, b) as being a special accessor having the ultimate
semantics of: "a = *b;" but done in a safe way with error checking.

>    uint32_t x;
>    uint64_t *p = ...;
>    get_user(x, p);
> 
> that was my one concern about 'register typeof(x) __r2 ...', but I
> think just changing the switch condition is enough.  But maybe good to
> have some eyes on in case there is something else I'm not thinking of.

And what should happen in the above is exactly the same as what happens
if you do:

	x = *p;

with those types.  For ARM, that would be a 64-bit access (if the
compiler decides not to optimize away the upper 32-bit access) followed
by a narrowing cast down to 32-bit.  With get_user() of course, there's
no option not to optimize it away.

However, this _does_ reveal a bug with your approach.  With sizeof(*p)
being 8, and the type of __r2 being a 32-bit quantity, the compiler will
choose the 64-bit accessor, which will corrupt r3 - and the compiler
won't know that r3 has been corrupted.

That's too unsafe.

I just tried a variant of your approach, but got lots of warnings again:
	register unsigned long long __r2 asm("r2");
	__get_user_x(__r2, __p, __e, 8, "lr");
	x = (typeof(*(__p))) ((typeof(x))__r2);
because typeof(x) in the test_ptr() case ends up being a pointer itself.

So, back to the drawing board, and I think back to the original position
of "we don't support this".
Rob Clark Nov. 13, 2012, 12:31 a.m. UTC | #10
On Mon, Nov 12, 2012 at 5:53 PM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Mon, Nov 12, 2012 at 05:33:41PM -0600, Rob Clark wrote:
>> I'm sort of thinking maybe we want to change 'switch (sizeof(*(__p)))'
>> with 'switch (sizeof(typeof(x)))' in case someone ignores the compiler
>> warning when they try something like:
>
> Definitely not.  Ttype of access is controlled by the pointer, not by the
> size of what it's being assigned to.  Switching that around is likely to
> break stuff hugely.  Consider this:
>
>         unsigned char __user *p;
>         int val;
>
>         get_user(val, p);
>
> If the pointer type is used to determine the access size, a char will be
> accessed.  This is legal - because we end up assigning an unsigned character
> to an int.
>
> If the size of the destination was used, we'd access an int instead, which
> is larger than the pointer, and probably the wrong thing to do anyway.
>
> Think of get_user(a, b) as being a special accessor having the ultimate
> semantics of: "a = *b;" but done in a safe way with error checking.
>
>>    uint32_t x;
>>    uint64_t *p = ...;
>>    get_user(x, p);
>>
>> that was my one concern about 'register typeof(x) __r2 ...', but I
>> think just changing the switch condition is enough.  But maybe good to
>> have some eyes on in case there is something else I'm not thinking of.
>
> And what should happen in the above is exactly the same as what happens
> if you do:
>
>         x = *p;
>
> with those types.  For ARM, that would be a 64-bit access (if the
> compiler decides not to optimize away the upper 32-bit access) followed
> by a narrowing cast down to 32-bit.  With get_user() of course, there's
> no option not to optimize it away.
>
> However, this _does_ reveal a bug with your approach.  With sizeof(*p)
> being 8, and the type of __r2 being a 32-bit quantity, the compiler will
> choose the 64-bit accessor, which will corrupt r3 - and the compiler
> won't know that r3 has been corrupted.

right, that is what I was worried about..  but what about something
along the lines of:

		case 8: {						\
			if (sizeof(x) < 8)				\
				__get_user_x(__r2, __p, __e, __l, 4);	\
			else						\
				__get_user_x(__r2, __p, __e, __l, 8);	\
			break;						\
		}							\

maybe we need a special variant of __get_user_8() instead to get the
right 32bits on big vs little endian systems, but I think something
roughly along these lines could work.

Or maybe in sizeof(x)<8 case, we just __get_user_bad()..  I'm not 100%
sure on whether this is supposed to be treated as an error case at
compile time or not.

BR,
-R

> That's too unsafe.
>
> I just tried a variant of your approach, but got lots of warnings again:
>         register unsigned long long __r2 asm("r2");
>         __get_user_x(__r2, __p, __e, 8, "lr");
>         x = (typeof(*(__p))) ((typeof(x))__r2);
> because typeof(x) in the test_ptr() case ends up being a pointer itself.
>
> So, back to the drawing board, and I think back to the original position
> of "we don't support this".
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Nov. 13, 2012, 9:11 a.m. UTC | #11
On Tuesday 13 November 2012, Rob Clark wrote:
> right, that is what I was worried about..  but what about something
> along the lines of:
> 
>                 case 8: {                                               \
>                         if (sizeof(x) < 8)                              \
>                                 __get_user_x(__r2, __p, __e, __l, 4);   \
>                         else                                            \
>                                 __get_user_x(__r2, __p, __e, __l, 8);   \
>                         break;                                          \
>                 }                                                       \

I guess that's still broken if x is 8 or 16 bits wide.

> maybe we need a special variant of __get_user_8() instead to get the
> right 32bits on big vs little endian systems, but I think something
> roughly along these lines could work.
> 
> Or maybe in sizeof(x)<8 case, we just __get_user_bad()..  I'm not 100%
> sure on whether this is supposed to be treated as an error case at
> compile time or not.

We know that nobody is using that at the moment, so we could define
it to be a compile-time error.

But I still think this is a pointless exercise, a number of people have
concluded independently that it's not worth trying to come up with a
solution, whether one exists or not. Why can't you just use copy_from_user()
anyway?

	Arnd
Russell King - ARM Linux Nov. 13, 2012, 11:04 a.m. UTC | #12
On Mon, Nov 12, 2012 at 06:31:50PM -0600, Rob Clark wrote:
> right, that is what I was worried about..  but what about something
> along the lines of:
> 
> 		case 8: {						\
> 			if (sizeof(x) < 8)				\
> 				__get_user_x(__r2, __p, __e, __l, 4);	\
> 			else						\
> 				__get_user_x(__r2, __p, __e, __l, 8);	\
> 			break;						\
> 		}							\
> 
> maybe we need a special variant of __get_user_8() instead to get the
> right 32bits on big vs little endian systems, but I think something
> roughly along these lines could work.

The problem with that is... big endian systems - where the 32-bit word we
want is the second one, so you can't just reduce that down to a 32-bit
access like that.  You also need to add an offset to the pointer in the BE
case (which can be done.)

I'd suggest calling the 4-byte version in there __get_user_xb() and doing
the 4-byte offset for BE inside that (or aliasing it to __get_user_x for
LE systems).
Russell King - ARM Linux Nov. 13, 2012, 11:24 a.m. UTC | #13
On Tue, Nov 13, 2012 at 09:11:09AM +0000, Arnd Bergmann wrote:
> On Tuesday 13 November 2012, Rob Clark wrote:
> > right, that is what I was worried about..  but what about something
> > along the lines of:
> > 
> >                 case 8: {                                               \
> >                         if (sizeof(x) < 8)                              \
> >                                 __get_user_x(__r2, __p, __e, __l, 4);   \
> >                         else                                            \
> >                                 __get_user_x(__r2, __p, __e, __l, 8);   \
> >                         break;                                          \
> >                 }                                                       \
> 
> I guess that's still broken if x is 8 or 16 bits wide.

Actually, it isn't - because if x is 8 or 16 bits wide, and we load
a 32-bit quantity, all that follows is a narrowing cast which is exactly
what happens today.  We don't have a problem with register allocation
like we have in the 32-bit x vs 64-bit pointer target type, which is what
the above code works around.

> > maybe we need a special variant of __get_user_8() instead to get the
> > right 32bits on big vs little endian systems, but I think something
> > roughly along these lines could work.
> > 
> > Or maybe in sizeof(x)<8 case, we just __get_user_bad()..  I'm not 100%
> > sure on whether this is supposed to be treated as an error case at
> > compile time or not.
> 
> We know that nobody is using that at the moment, so we could define
> it to be a compile-time error.
> 
> But I still think this is a pointless exercise, a number of people have
> concluded independently that it's not worth trying to come up with a
> solution, whether one exists or not. Why can't you just use copy_from_user()
> anyway?

You're missing something; that is one of the greatest powers of open
source.  The many eyes (and minds) effect.  Someone out there probably
has a solution to whatever problem, the trick is to find that person. :)

I think we have a working solution for this for ARM.  It won't be suitable
for every arch, where they have 8-bit and 16-bit registers able to be
allocated by the compiler, but for architectures where the minimum register
size is 32-bit, what we have below should work.

In other words, I don't think this will work for x86-32 where ax, al, ah
as well as eax are still available.

What I have currently in my test file, which appears to work correctly,
is (bear in mind this is based upon an older version of get_user() which
pre-dates Will's cleanups):

#define __get_user_x(__r2,__p,__e,__s,__i...)                           \
           __asm__ __volatile__ (                                       \
                "bl     __get_user_" #__s                               \
                : "=&r" (__e), "=r" (__r2)                              \
                : "0" (__p)                                             \
                : __i, "cc")

#ifdef BIG_ENDIAN
#define __get_user_xb(__r2,__p,__e,__s,__i...)                          \
	__get_user_x(__r2,(uintptr_t)__p + 4,__s,__i)
#else
#define __get_user_xb __get_user_x
#endif

#define get_user(x,p)                                                   \
        ({                                                              \
                register const typeof(*(p)) __user *__p asm("r0") = (p);\
                register int __e asm("r0");                             \
                register typeof(x) __r2 asm("r2");                      \
                switch (sizeof(*(__p))) {                               \
                case 1:                                                 \
                        __get_user_x(__r2, __p, __e, 1, "lr");          \
                        break;                                          \
                case 2:                                                 \
                        __get_user_x(__r2, __p, __e, 2, "r3", "lr");    \
                        break;                                          \
                case 4:                                                 \
                        __get_user_x(__r2, __p, __e, 4, "lr");          \
                        break;                                          \
                case 8:                                                 \
                        if (sizeof((x)) < 8)                            \
                                __get_user_xb(__r2, __p, __e, 4, "lr"); \
                        else                                            \
                                __get_user_x(__r2, __p, __e, 8, "lr");  \
                        break;                                          \
                default: __e = __get_user_bad(); break;                 \
                }                                                       \
                x = (typeof(*(__p))) __r2;                              \
                __e;                                                    \
        })

Tested with 8, 16, 32, 64 ints, 32bit int with const p, pointers, const
pointers, pointer to 64bit with 32bit target, pointer-to-const-pointer
to non-const pointer (which should and does warn).
Arnd Bergmann Nov. 15, 2012, 9:19 a.m. UTC | #14
On Tuesday 13 November 2012, Russell King - ARM Linux wrote:
> You're missing something; that is one of the greatest powers of open
> source.  The many eyes (and minds) effect.  Someone out there probably
> has a solution to whatever problem, the trick is to find that person. :)
> 
> I think we have a working solution for this for ARM.  It won't be suitable
> for every arch, where they have 8-bit and 16-bit registers able to be
> allocated by the compiler, but for architectures where the minimum register
> size is 32-bit, what we have below should work.

I don't mind at all adding the extension to ARM, and I think it's pretty
cool that you guys actually found a working solution.

The part that worries me is that we are making architecture independent
code depend on a clever hack that may or may not be possible to implement
on a given architecture, and that most architecture maintainers wouldn't
know how to implement correctly even if it's possible.

	Arnd
Rob Clark Nov. 15, 2012, 1:04 p.m. UTC | #15
On Thu, Nov 15, 2012 at 3:19 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Tuesday 13 November 2012, Russell King - ARM Linux wrote:
>> You're missing something; that is one of the greatest powers of open
>> source.  The many eyes (and minds) effect.  Someone out there probably
>> has a solution to whatever problem, the trick is to find that person. :)
>>
>> I think we have a working solution for this for ARM.  It won't be suitable
>> for every arch, where they have 8-bit and 16-bit registers able to be
>> allocated by the compiler, but for architectures where the minimum register
>> size is 32-bit, what we have below should work.
>
> I don't mind at all adding the extension to ARM, and I think it's pretty
> cool that you guys actually found a working solution.
>
> The part that worries me is that we are making architecture independent
> code depend on a clever hack that may or may not be possible to implement
> on a given architecture, and that most architecture maintainers wouldn't
> know how to implement correctly even if it's possible.

I could always send a 3rd version with a comment smashed on about why
that works if you think this is a problem..

BR,
-R

>         Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Nov. 15, 2012, 1:39 p.m. UTC | #16
On Thursday 15 November 2012, Rob Clark wrote:
> On Thu, Nov 15, 2012 at 3:19 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Tuesday 13 November 2012, Russell King - ARM Linux wrote:
> >> You're missing something; that is one of the greatest powers of open
> >> source.  The many eyes (and minds) effect.  Someone out there probably
> >> has a solution to whatever problem, the trick is to find that person. :)
> >>
> >> I think we have a working solution for this for ARM.  It won't be suitable
> >> for every arch, where they have 8-bit and 16-bit registers able to be
> >> allocated by the compiler, but for architectures where the minimum register
> >> size is 32-bit, what we have below should work.
> >
> > I don't mind at all adding the extension to ARM, and I think it's pretty
> > cool that you guys actually found a working solution.
> >
> > The part that worries me is that we are making architecture independent
> > code depend on a clever hack that may or may not be possible to implement
> > on a given architecture, and that most architecture maintainers wouldn't
> > know how to implement correctly even if it's possible.
> 
> I could always send a 3rd version with a comment smashed on about why
> that works if you think this is a problem..

Comments are always good, so I'd surely like to see those get added.
As I said, I don't have any objections to the addition of your patch to
the ARM code, which sounds useful to have.

I still haven't heard a conclusive argument why we need to use get_user()
rather than copy_from_user() in the DRM code. Is this about a fast path
where you want to shave off a few cycles for each call, or does this
simplify the code structure, or something else?

	Arnd
Rob Clark Nov. 15, 2012, 1:46 p.m. UTC | #17
On Thu, Nov 15, 2012 at 7:39 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Thursday 15 November 2012, Rob Clark wrote:
>> On Thu, Nov 15, 2012 at 3:19 AM, Arnd Bergmann <arnd@arndb.de> wrote:
>> > On Tuesday 13 November 2012, Russell King - ARM Linux wrote:
>> >> You're missing something; that is one of the greatest powers of open
>> >> source.  The many eyes (and minds) effect.  Someone out there probably
>> >> has a solution to whatever problem, the trick is to find that person. :)
>> >>
>> >> I think we have a working solution for this for ARM.  It won't be suitable
>> >> for every arch, where they have 8-bit and 16-bit registers able to be
>> >> allocated by the compiler, but for architectures where the minimum register
>> >> size is 32-bit, what we have below should work.
>> >
>> > I don't mind at all adding the extension to ARM, and I think it's pretty
>> > cool that you guys actually found a working solution.
>> >
>> > The part that worries me is that we are making architecture independent
>> > code depend on a clever hack that may or may not be possible to implement
>> > on a given architecture, and that most architecture maintainers wouldn't
>> > know how to implement correctly even if it's possible.
>>
>> I could always send a 3rd version with a comment smashed on about why
>> that works if you think this is a problem..
>
> Comments are always good, so I'd surely like to see those get added.
> As I said, I don't have any objections to the addition of your patch to
> the ARM code, which sounds useful to have.

ok, I'll send a v3 w/ some additional comments

> I still haven't heard a conclusive argument why we need to use get_user()
> rather than copy_from_user() in the DRM code. Is this about a fast path
> where you want to shave off a few cycles for each call, or does this
> simplify the code structure, or something else?

well, it is mostly because it seemed like a good idea to first try to
solve the root issue, rather than having to fix things up in each
driver when someone from x86-world introduces a 64b get_user()..

There are some other arch's that don't have 64b get_user(), but I
don't think any that have any DRM drivers.  If 64b get_user() is
really not intended to be supported by all archs, it is better to
remove it from x86 and the other arch's that do currently support it,
rather than making it possible to write drivers that are broken on
some archs.

BR,
-R

>         Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Nov. 15, 2012, 2:39 p.m. UTC | #18
On Thursday 15 November 2012, Rob Clark wrote:
> > I still haven't heard a conclusive argument why we need to use get_user()
> > rather than copy_from_user() in the DRM code. Is this about a fast path
> > where you want to shave off a few cycles for each call, or does this
> > simplify the code structure, or something else?
> 
> well, it is mostly because it seemed like a good idea to first try to
> solve the root issue, rather than having to fix things up in each
> driver when someone from x86-world introduces a 64b get_user()..

As pointed out by hpa earlier, x86-32 doesn't have a 64b get_user
either. I don't think we have a lot of drivers that are used only
on 64-bit x86 and on 32-bit ARM but not on 32-bit x86.

> There are some other arch's that don't have 64b get_user(), but I
> don't think any that have any DRM drivers.  If 64b get_user() is
> really not intended to be supported by all archs, it is better to
> remove it from x86 and the other arch's that do currently support it,
> rather than making it possible to write drivers that are broken on
> some archs.

The majority of architectures we support have PCI and should be able
to build the regular (radeon, nouveau, MGA, VIA, ...) DRM drivers
AFAICT.

	Arnd
diff mbox

Patch

diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 7e1f760..2e3fdb2 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -100,6 +100,7 @@  static inline void set_fs(mm_segment_t fs)
 extern int __get_user_1(void *);
 extern int __get_user_2(void *);
 extern int __get_user_4(void *);
+extern int __get_user_8(void *);
 
 #define __GUP_CLOBBER_1	"lr", "cc"
 #ifdef CONFIG_CPU_USE_DOMAINS
@@ -108,6 +109,7 @@  extern int __get_user_4(void *);
 #define __GUP_CLOBBER_2 "lr", "cc"
 #endif
 #define __GUP_CLOBBER_4	"lr", "cc"
+#define __GUP_CLOBBER_8	"lr", "cc"
 
 #define __get_user_x(__r2,__p,__e,__l,__s)				\
 	   __asm__ __volatile__ (					\
@@ -122,22 +124,35 @@  extern int __get_user_4(void *);
 	({								\
 		unsigned long __limit = current_thread_info()->addr_limit - 1; \
 		register const typeof(*(p)) __user *__p asm("r0") = (p);\
-		register unsigned long __r2 asm("r2");			\
 		register unsigned long __l asm("r1") = __limit;		\
 		register int __e asm("r0");				\
 		switch (sizeof(*(__p))) {				\
-		case 1:							\
+		case 1: {						\
+			register unsigned long __r2 asm("r2");		\
 			__get_user_x(__r2, __p, __e, __l, 1);		\
+			x = (typeof(*(p))) __r2;			\
 			break;						\
-		case 2:							\
+		}							\
+		case 2: {						\
+			register unsigned long __r2 asm("r2");		\
 			__get_user_x(__r2, __p, __e, __l, 2);		\
+			x = (typeof(*(p))) __r2;			\
 			break;						\
-		case 4:							\
+		}							\
+		case 4: {						\
+			register unsigned long __r2 asm("r2");		\
 			__get_user_x(__r2, __p, __e, __l, 4);		\
+			x = (typeof(*(p))) __r2;			\
+			break;						\
+		}							\
+		case 8: {						\
+			register unsigned long long __r2 asm("r2");	\
+			__get_user_x(__r2, __p, __e, __l, 8);		\
+			x = (typeof(*(p))) __r2;			\
 			break;						\
+		}							\
 		default: __e = __get_user_bad(); break;			\
 		}							\
-		x = (typeof(*(p))) __r2;				\
 		__e;							\
 	})
 
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb4..d05285c 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@ 
  * Inputs:	r0 contains the address
  *		r1 contains the address limit, which must be preserved
  * Outputs:	r0 is the error code
- *		r2 contains the zero-extended value
+ *		r2, r3 contains the zero-extended value
  *		lr corrupted
  *
  * No other registers must be altered.  (see <asm/uaccess.h>
@@ -66,6 +66,19 @@  ENTRY(__get_user_4)
 	mov	pc, lr
 ENDPROC(__get_user_4)
 
+ENTRY(__get_user_8)
+	check_uaccess r0, 4, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr)	r2, [r0]
+6: TUSER(ldr)	r3, [r0, #4]
+#else
+5: TUSER(ldr)	r2, [r0], #4
+6: TUSER(ldr)	r3, [r0]
+#endif
+	mov	r0, #0
+	mov	pc, lr
+ENDPROC(__get_user_8)
+
 __get_user_bad:
 	mov	r2, #0
 	mov	r0, #-EFAULT
@@ -77,4 +90,6 @@  ENDPROC(__get_user_bad)
 	.long	2b, __get_user_bad
 	.long	3b, __get_user_bad
 	.long	4b, __get_user_bad
+	.long	5b, __get_user_bad
+	.long	6b, __get_user_bad
 .popsection