diff mbox

[20/25] arm64:ilp32: add sys_ilp32.c and a separate table (in entry.S) to use it

Message ID 1459894127-17698-21-git-send-email-ynorov@caviumnetworks.com
State New
Headers show

Commit Message

Yury Norov April 5, 2016, 10:08 p.m. UTC
From: Andrew Pinski <apinski@cavium.com>


Add a separate syscall-table for ILP32, which dispatches either to native
LP64 system call implementation or to compat-syscalls, as appropriate.

Signed-off-by: Andrew Pinski <Andrew.Pinski@caviumnetworks.com>

Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>

---
 arch/arm64/include/asm/unistd.h | 11 ++++++-
 arch/arm64/kernel/Makefile      |  2 +-
 arch/arm64/kernel/entry.S       | 12 +++++++-
 arch/arm64/kernel/sys_ilp32.c   | 65 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm64/kernel/sys_ilp32.c

-- 
2.5.0

Comments

Catalin Marinas April 25, 2016, 5:26 p.m. UTC | #1
On Wed, Apr 06, 2016 at 01:08:42AM +0300, Yury Norov wrote:
> --- a/arch/arm64/kernel/entry.S

> +++ b/arch/arm64/kernel/entry.S

> @@ -715,9 +715,13 @@ ENDPROC(ret_from_fork)

>   */

>  	.align	6

>  el0_svc:

> -	adrp	stbl, sys_call_table		// load syscall table pointer

>  	uxtw	scno, w8			// syscall number in w8

>  	mov	sc_nr, #__NR_syscalls

> +#ifdef CONFIG_ARM64_ILP32

> +	ldr	x16, [tsk, #TI_FLAGS]

> +	tbnz	x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32

> +#endif


There is another ldr x16, [tsk, #TI_FLAGS] load further down in the
el0_svc_naked block. We should rework these a bit to avoid loading the
same location twice unnecessarily. E.g. move the ldr x16 just before
el0_svc_naked and branch one line after in case of the ILP32 syscall.

> +	adrp	stbl, sys_call_table		// load syscall table pointer

>  el0_svc_naked:					// compat entry point

>  	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number

>  	enable_dbg_and_irq

> @@ -737,6 +741,12 @@ ni_sys:

>  	b	ret_fast_syscall

>  ENDPROC(el0_svc)

>  

> +#ifdef CONFIG_ARM64_ILP32

> +el0_ilp32_svc:

> +	adrp	stbl, sys_call_ilp32_table // load syscall table pointer

> +	b el0_svc_naked

> +#endif

> +

>  	/*

>  	 * This is the really slow path.  We're going to be doing context

>  	 * switches, and waiting for our parent to respond.


-- 
Catalin
Yury Norov April 25, 2016, 6:19 p.m. UTC | #2
On Mon, Apr 25, 2016 at 06:26:56PM +0100, Catalin Marinas wrote:
> On Wed, Apr 06, 2016 at 01:08:42AM +0300, Yury Norov wrote:

> > --- a/arch/arm64/kernel/entry.S

> > +++ b/arch/arm64/kernel/entry.S

> > @@ -715,9 +715,13 @@ ENDPROC(ret_from_fork)

> >   */

> >  	.align	6

> >  el0_svc:

> > -	adrp	stbl, sys_call_table		// load syscall table pointer

> >  	uxtw	scno, w8			// syscall number in w8

> >  	mov	sc_nr, #__NR_syscalls

> > +#ifdef CONFIG_ARM64_ILP32

> > +	ldr	x16, [tsk, #TI_FLAGS]

> > +	tbnz	x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32

> > +#endif

> 

> There is another ldr x16, [tsk, #TI_FLAGS] load further down in the

> el0_svc_naked block. We should rework these a bit to avoid loading the

> same location twice unnecessarily. E.g. move the ldr x16 just before

> el0_svc_naked and branch one line after in case of the ILP32 syscall.

> 


Yes, I thiks we can refactor it. Thanks for a catch.

> > +	adrp	stbl, sys_call_table		// load syscall table pointer

> >  el0_svc_naked:					// compat entry point

> >  	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number

> >  	enable_dbg_and_irq

> > @@ -737,6 +741,12 @@ ni_sys:

> >  	b	ret_fast_syscall

> >  ENDPROC(el0_svc)

> >  

> > +#ifdef CONFIG_ARM64_ILP32

> > +el0_ilp32_svc:

> > +	adrp	stbl, sys_call_ilp32_table // load syscall table pointer

> > +	b el0_svc_naked

> > +#endif

> > +

> >  	/*

> >  	 * This is the really slow path.  We're going to be doing context

> >  	 * switches, and waiting for our parent to respond.

> 

> -- 

> Catalin
Yury Norov April 25, 2016, 6:47 p.m. UTC | #3
On Mon, Apr 25, 2016 at 09:19:13PM +0300, Yury Norov wrote:
> On Mon, Apr 25, 2016 at 06:26:56PM +0100, Catalin Marinas wrote:

> > On Wed, Apr 06, 2016 at 01:08:42AM +0300, Yury Norov wrote:

> > > --- a/arch/arm64/kernel/entry.S

> > > +++ b/arch/arm64/kernel/entry.S

> > > @@ -715,9 +715,13 @@ ENDPROC(ret_from_fork)

> > >   */

> > >  	.align	6

> > >  el0_svc:

> > > -	adrp	stbl, sys_call_table		// load syscall table pointer

> > >  	uxtw	scno, w8			// syscall number in w8

> > >  	mov	sc_nr, #__NR_syscalls

> > > +#ifdef CONFIG_ARM64_ILP32

> > > +	ldr	x16, [tsk, #TI_FLAGS]

> > > +	tbnz	x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32

> > > +#endif

> > 

> > There is another ldr x16, [tsk, #TI_FLAGS] load further down in the

> > el0_svc_naked block. We should rework these a bit to avoid loading the

> > same location twice unnecessarily. E.g. move the ldr x16 just before

> > el0_svc_naked and branch one line after in case of the ILP32 syscall.

> > 

> 

> Yes, I thiks we can refactor it. Thanks for a catch.


Now it's better, I thinkdiff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf4d1ae..21312bb 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -715,16 +715,22 @@ ENDPROC(ret_from_fork)
  */
 	.align	6
 el0_svc:
-	adrp	stbl, sys_call_table		// load syscall table pointer
 	uxtw	scno, w8			// syscall number in w8
 	mov	sc_nr, #__NR_syscalls
+	ldr	x16, [tsk, #TI_FLAGS]
+#ifdef CONFIG_ARM64_ILP32
+	tbz	x16, #TIF_32BIT_AARCH64, el0_lp64_svc // We are using ILP32
+	adrp	stbl, sys_call_ilp32_table	// load ilp32 syscall table pointer
+	b el0_svc_naked
+el0_lp64_svc:
+#endif
+	adrp	stbl, sys_call_table		// load syscall table pointer
 el0_svc_naked:					// compat entry point
 	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
 	enable_dbg_and_irq
 	ct_user_exit 1
 
-	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks
-	tst	x16, #_TIF_SYSCALL_WORK
+	tst	x16, #_TIF_SYSCALL_WORK		// check for syscall hooks
 	b.ne	__sys_trace
 	cmp     scno, sc_nr                     // check upper syscall limit
 	b.hs	ni_sys

Catalin Marinas April 26, 2016, 10:08 a.m. UTC | #4
On Mon, Apr 25, 2016 at 09:47:40PM +0300, Yury Norov wrote:
> On Mon, Apr 25, 2016 at 09:19:13PM +0300, Yury Norov wrote:

> > On Mon, Apr 25, 2016 at 06:26:56PM +0100, Catalin Marinas wrote:

> > > On Wed, Apr 06, 2016 at 01:08:42AM +0300, Yury Norov wrote:

> > > > --- a/arch/arm64/kernel/entry.S

> > > > +++ b/arch/arm64/kernel/entry.S

> > > > @@ -715,9 +715,13 @@ ENDPROC(ret_from_fork)

> > > >   */

> > > >  	.align	6

> > > >  el0_svc:

> > > > -	adrp	stbl, sys_call_table		// load syscall table pointer

> > > >  	uxtw	scno, w8			// syscall number in w8

> > > >  	mov	sc_nr, #__NR_syscalls

> > > > +#ifdef CONFIG_ARM64_ILP32

> > > > +	ldr	x16, [tsk, #TI_FLAGS]

> > > > +	tbnz	x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32

> > > > +#endif

> > > 

> > > There is another ldr x16, [tsk, #TI_FLAGS] load further down in the

> > > el0_svc_naked block. We should rework these a bit to avoid loading the

> > > same location twice unnecessarily. E.g. move the ldr x16 just before

> > > el0_svc_naked and branch one line after in case of the ILP32 syscall.

> > > 

> > 

> > Yes, I thiks we can refactor it. Thanks for a catch.

> 

> Now it's better, I think

> 

> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S

> index cf4d1ae..21312bb 100644

> --- a/arch/arm64/kernel/entry.S

> +++ b/arch/arm64/kernel/entry.S

> @@ -715,16 +715,22 @@ ENDPROC(ret_from_fork)

>   */

>  	.align	6

>  el0_svc:

> -	adrp	stbl, sys_call_table		// load syscall table pointer

>  	uxtw	scno, w8			// syscall number in w8

>  	mov	sc_nr, #__NR_syscalls

> +	ldr	x16, [tsk, #TI_FLAGS]


You can move this higher up for interlocking reasons (though these days
CPUs do a lot of speculative loads).

> +#ifdef CONFIG_ARM64_ILP32

> +	tbz	x16, #TIF_32BIT_AARCH64, el0_lp64_svc // We are using ILP32


	// We are *not* using ILP32

> +	adrp	stbl, sys_call_ilp32_table	// load ilp32 syscall table pointer

> +	b el0_svc_naked

> +el0_lp64_svc:

> +#endif

> +	adrp	stbl, sys_call_table		// load syscall table pointer


You can avoid the branches by using csel, something like this:

	ldr	x16, [tsk, #TI_FLAGS]
	adrp	stbl, sys_call_table
	...
#ifdef CONFIG_ARM64_ILP32
	adrp	x17, sys_call_ilp32_table
	tst	x16, #_TIF_32BIT_AARCH64
	csel	stbl, stbl, x17, eq
#endif
el0_svc_naked:
	...

>  el0_svc_naked:					// compat entry point

>  	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number

>  	enable_dbg_and_irq

>  	ct_user_exit 1

>  

> -	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks

> -	tst	x16, #_TIF_SYSCALL_WORK

> +	tst	x16, #_TIF_SYSCALL_WORK		// check for syscall hooks

>  	b.ne	__sys_trace

>  	cmp     scno, sc_nr                     // check upper syscall limit

>  	b.hs	ni_sys


There is el0_svc_compat branching to el0_svc_naked and it won't have x16
set anymore. So you need to add an ldr x16 to el0_svc_compat as well.

-- 
Catalin
Catalin Marinas April 26, 2016, 4:57 p.m. UTC | #5
On Wed, Apr 06, 2016 at 01:08:42AM +0300, Yury Norov wrote:
> +/* Using non-compat syscalls where necessary */

> +#define compat_sys_fadvise64_64        sys_fadvise64_64

> +#define compat_sys_fallocate           sys_fallocate

> +#define compat_sys_ftruncate64         sys_ftruncate

> +#define compat_sys_lookup_dcookie      sys_lookup_dcookie

> +#define compat_sys_pread64             sys_pread64

> +#define compat_sys_pwrite64            sys_pwrite64

> +#define compat_sys_readahead           sys_readahead

> +#define compat_sys_shmat               sys_shmat


Why don't we use compat_sys_shmat? Is it because of COMPAT_SHMLBA?

> +#define compat_sys_sync_file_range     sys_sync_file_range

> +#define compat_sys_truncate64          sys_truncate

> +#define sys_llseek                     sys_lseek

> +#define sys_mmap2		       sys_mmap


Nitpick: there are some whitespace inconsistencies above (just convert
all spaces to tabs).

I think you should also update Documentation/arm64/ilp32.txt to include
the list above.

> +

> +#include <asm/syscall.h>

> +

> +#undef __SYSCALL

> +#undef __SC_COMP

> +#undef __SC_WRAP

> +#undef __SC_3264

> +#undef __SC_COMP_3264


Minor detail: do we actually need to undef all these? Maybe we can get
away with just defining __SYSCALL_COMPAT at the top of the file.

> +

> +#define __SYSCALL_COMPAT

> +#define __SYSCALL(nr, sym)	[nr] = sym,

> +#define __SC_WRAP(nr, sym)	[nr] = compat_##sym,

> +

> +/*

> + * The sys_call_ilp32_table array must be 4K aligned to be accessible from

> + * kernel/entry.S.

> + */

> +void *sys_call_ilp32_table[__NR_syscalls] __aligned(4096) = {

> +	[0 ... __NR_syscalls - 1] = sys_ni_syscall,

> +#include <asm/unistd.h>

> +};


-- 
Catalin
Yury Norov April 28, 2016, 7:19 p.m. UTC | #6
On Tue, Apr 26, 2016 at 05:57:01PM +0100, Catalin Marinas wrote:
> On Wed, Apr 06, 2016 at 01:08:42AM +0300, Yury Norov wrote:

> > +/* Using non-compat syscalls where necessary */

> > +#define compat_sys_fadvise64_64        sys_fadvise64_64

> > +#define compat_sys_fallocate           sys_fallocate

> > +#define compat_sys_ftruncate64         sys_ftruncate

> > +#define compat_sys_lookup_dcookie      sys_lookup_dcookie

> > +#define compat_sys_pread64             sys_pread64

> > +#define compat_sys_pwrite64            sys_pwrite64

> > +#define compat_sys_readahead           sys_readahead

> > +#define compat_sys_shmat               sys_shmat

> 

> Why don't we use compat_sys_shmat? Is it because of COMPAT_SHMLBA?


Yes. COMPAT_SHMLBA is 4 pages, and it's aarch32-only limitation.

> 

> > +#define compat_sys_sync_file_range     sys_sync_file_range

> > +#define compat_sys_truncate64          sys_truncate

> > +#define sys_llseek                     sys_lseek

> > +#define sys_mmap2		       sys_mmap

> 

> Nitpick: there are some whitespace inconsistencies above (just convert

> all spaces to tabs).

> 

> I think you should also update Documentation/arm64/ilp32.txt to include

> the list above.


OK

> 

> > +

> > +#include <asm/syscall.h>

> > +

> > +#undef __SYSCALL

> > +#undef __SC_COMP

> > +#undef __SC_WRAP

> > +#undef __SC_3264

> > +#undef __SC_COMP_3264

> 

> Minor detail: do we actually need to undef all these? Maybe we can get

> away with just defining __SYSCALL_COMPAT at the top of the file.

> 


Yes, we need. Otherwise we have circular dependency like this:
arch/arm64/kernel/sys_ilp32.c:60:0: warning: "__SC_WRAP" redefined
 #define __SC_WRAP(nr, sym) [nr] = compat_##sym,
  ^
  In file included from include/asm-generic/unistd.h:1:0,
                   from ./arch/arm64/include/uapi/asm/unistd.h:16,
                   from ./arch/arm64/include/asm/unistd.h:62,
                   from ./include/uapi/linux/unistd.h:7,
                   from include/linux/syscalls.h:23,
                   from arch/arm64/kernel/sys_ilp32.c:30:
include/uapi/asm-generic/unistd.h:33:0: note: this is the location of the previous definition
 #define __SC_WRAP __SYSCALL

Defining __SYSCALL_COMPAT at the top of the file does not help much.

> > +

> > +#define __SYSCALL_COMPAT

> > +#define __SYSCALL(nr, sym)	[nr] = sym,

> > +#define __SC_WRAP(nr, sym)	[nr] = compat_##sym,

> > +

> > +/*

> > + * The sys_call_ilp32_table array must be 4K aligned to be accessible from

> > + * kernel/entry.S.

> > + */

> > +void *sys_call_ilp32_table[__NR_syscalls] __aligned(4096) = {

> > +	[0 ... __NR_syscalls - 1] = sys_ni_syscall,

> > +#include <asm/unistd.h>

> > +};

> 

> -- 

> Catalin
Arnd Bergmann April 28, 2016, 8:43 p.m. UTC | #7
On Thursday 28 April 2016 22:19:14 Yury Norov wrote:
> 

> Yes, we need. Otherwise we have circular dependency like this:

> arch/arm64/kernel/sys_ilp32.c:60:0: warning: "__SC_WRAP" redefined

>  #define __SC_WRAP(nr, sym) [nr] = compat_##sym,

>   ^

>   In file included from include/asm-generic/unistd.h:1:0,

>                    from ./arch/arm64/include/uapi/asm/unistd.h:16,

>                    from ./arch/arm64/include/asm/unistd.h:62,

>                    from ./include/uapi/linux/unistd.h:7,

>                    from include/linux/syscalls.h:23,

>                    from arch/arm64/kernel/sys_ilp32.c:30:

> include/uapi/asm-generic/unistd.h:33:0: note: this is the location of the previous definition

>  #define __SC_WRAP __SYSCALL

> 

> Defining __SYSCALL_COMPAT at the top of the file does not help much.


Hmm, this sounds like something that we should fix in the asm-generic/unistd.h
file. Is it just for __SC_WRAP, or also the other macros?

	Arnd
Yury Norov April 28, 2016, 10:21 p.m. UTC | #8
On Thu, Apr 28, 2016 at 10:43:59PM +0200, Arnd Bergmann wrote:
> On Thursday 28 April 2016 22:19:14 Yury Norov wrote:

> > 

> > Yes, we need. Otherwise we have circular dependency like this:

> > arch/arm64/kernel/sys_ilp32.c:60:0: warning: "__SC_WRAP" redefined

> >  #define __SC_WRAP(nr, sym) [nr] = compat_##sym,

> >   ^

> >   In file included from include/asm-generic/unistd.h:1:0,

> >                    from ./arch/arm64/include/uapi/asm/unistd.h:16,

> >                    from ./arch/arm64/include/asm/unistd.h:62,

> >                    from ./include/uapi/linux/unistd.h:7,

> >                    from include/linux/syscalls.h:23,

> >                    from arch/arm64/kernel/sys_ilp32.c:30:

> > include/uapi/asm-generic/unistd.h:33:0: note: this is the location of the previous definition

> >  #define __SC_WRAP __SYSCALL

> > 

> > Defining __SYSCALL_COMPAT at the top of the file does not help much.

> 

> Hmm, this sounds like something that we should fix in the asm-generic/unistd.h

> file. Is it just for __SC_WRAP, or also the other macros?

> 

> 	Arnd


For __SYSCALL and __SC_WRAP:

 #include <linux/fs.h>
@@ -48,13 +50,12 @@ asmlinkage long
 ilp32_sys_rt_sigreturn_wrapper(void);
   
 #include <asm/syscall.h>
     
-#undef __SYSCALL
-#undef __SC_COMP
-#undef __SC_WRAP
-#undef __SC_3264
-#undef __SC_COMP_3264
 
-#define __SYSCALL_COMPAT
 #define __SYSCALL(nr, sym)     [nr] = sym,
 #define __SC_WRAP(nr, sym)     [nr] = compat_##sym,
         
This patch makes gcc warn about redefinition.

arch/arm64/kernel/sys_ilp32.c:59:0: warning: "__SYSCALL" redefined
 #define __SYSCALL(nr, sym) [nr] = sym,
 ^
In file included from include/asm-generic/unistd.h:1:0,
                 from ./arch/arm64/include/uapi/asm/unistd.h:16,
                 from ./arch/arm64/include/asm/unistd.h:62,
                 from ./include/uapi/linux/unistd.h:7,
                 from include/linux/syscalls.h:23,
                 from arch/arm64/kernel/sys_ilp32.c:30:
include/uapi/asm-generic/unistd.h:15:0: note: this is the location of the previous definition
 #define __SYSCALL(x, y)
 ^
arch/arm64/kernel/sys_ilp32.c:60:0: warning: "__SC_WRAP" redefined
 #define __SC_WRAP(nr, sym) [nr] = compat_##sym,
 ^
In file included from include/asm-generic/unistd.h:1:0,
                 from ./arch/arm64/include/uapi/asm/unistd.h:16,
                 from ./arch/arm64/include/asm/unistd.h:62,
                 from ./include/uapi/linux/unistd.h:7,
                 from include/linux/syscalls.h:23,
                 from arch/arm64/kernel/sys_ilp32.c:30:
include/uapi/asm-generic/unistd.h:33:0: note: this is the location of the previous definition
 #define __SC_WRAP __SYSCALL
 ^diff --git a/arch/arm64/kernel/sys_ilp32.c
b/arch/arm64/kernel/sys_ilp32.c
index 1458ad7..410d817 100644
--- a/arch/arm64/kernel/sys_ilp32.c
+++ b/arch/arm64/kernel/sys_ilp32.c
@@ -17,6 +17,8 @@
 * along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 */
    
+#define __SYSCALL_COMPAT
+
 #include <linux/compiler.h>
 #include <linux/errno.h>

Zhangjian (Bamvor) May 6, 2016, 12:16 p.m. UTC | #9
Hi,

On 2016/4/6 6:08, Yury Norov wrote:
> From: Andrew Pinski <apinski@cavium.com>

>

> Add a separate syscall-table for ILP32, which dispatches either to native

> LP64 system call implementation or to compat-syscalls, as appropriate.

>

> Signed-off-by: Andrew Pinski <Andrew.Pinski@caviumnetworks.com>

> Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>

> ---

>   arch/arm64/include/asm/unistd.h | 11 ++++++-

>   arch/arm64/kernel/Makefile      |  2 +-

>   arch/arm64/kernel/entry.S       | 12 +++++++-

>   arch/arm64/kernel/sys_ilp32.c   | 65 +++++++++++++++++++++++++++++++++++++++++

>   4 files changed, 87 insertions(+), 3 deletions(-)

>   create mode 100644 arch/arm64/kernel/sys_ilp32.c

>

> diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h

> index 2971dea..5ea18ef 100644

> --- a/arch/arm64/include/asm/unistd.h

> +++ b/arch/arm64/include/asm/unistd.h

> @@ -13,9 +13,18 @@

>    * You should have received a copy of the GNU General Public License

>    * along with this program.  If not, see <http://www.gnu.org/licenses/>.

>    */

> +

> +#ifdef CONFIG_COMPAT

> +#define __ARCH_WANT_COMPAT_STAT64

> +#endif

> +

> +#ifdef CONFIG_ARM64_ILP32

> +#define __ARCH_WANT_COMPAT_SYS_PREADV64

> +#define __ARCH_WANT_COMPAT_SYS_PWRITEV64

> +#endif

> +

>   #ifdef CONFIG_AARCH32_EL0

>   #define __ARCH_WANT_COMPAT_SYS_GETDENTS64

> -#define __ARCH_WANT_COMPAT_STAT64

>   #define __ARCH_WANT_SYS_GETHOSTNAME

>   #define __ARCH_WANT_SYS_PAUSE

>   #define __ARCH_WANT_SYS_GETPGRP

> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile

> index 9dfdf86..7aa65ea 100644

> --- a/arch/arm64/kernel/Makefile

> +++ b/arch/arm64/kernel/Makefile

> @@ -28,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE

>   arm64-obj-$(CONFIG_AARCH32_EL0)		+= sys32.o kuser32.o signal32.o 	\

>   					   sys_compat.o entry32.o		\

>   					   ../../arm/kernel/opcodes.o binfmt_elf32.o

> -arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o

> +arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o sys_ilp32.o

>   arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o

>   arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o

>   arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)	+= module-plts.o

> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S

> index cf4d1ae..1f7a145 100644

> --- a/arch/arm64/kernel/entry.S

> +++ b/arch/arm64/kernel/entry.S

> @@ -715,9 +715,13 @@ ENDPROC(ret_from_fork)

>    */

>   	.align	6

>   el0_svc:

> -	adrp	stbl, sys_call_table		// load syscall table pointer

>   	uxtw	scno, w8			// syscall number in w8

>   	mov	sc_nr, #__NR_syscalls

> +#ifdef CONFIG_ARM64_ILP32

> +	ldr	x16, [tsk, #TI_FLAGS]

> +	tbnz	x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32

> +#endif

> +	adrp	stbl, sys_call_table		// load syscall table pointer

>   el0_svc_naked:					// compat entry point

>   	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number

>   	enable_dbg_and_irq

> @@ -737,6 +741,12 @@ ni_sys:

>   	b	ret_fast_syscall

>   ENDPROC(el0_svc)

>

> +#ifdef CONFIG_ARM64_ILP32

> +el0_ilp32_svc:

> +	adrp	stbl, sys_call_ilp32_table // load syscall table pointer

> +	b el0_svc_naked

> +#endif

> +

>   	/*

>   	 * This is the really slow path.  We're going to be doing context

>   	 * switches, and waiting for our parent to respond.

> diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c

> new file mode 100644

> index 0000000..0996d8e

> --- /dev/null

> +++ b/arch/arm64/kernel/sys_ilp32.c

> @@ -0,0 +1,65 @@

> +/*

> + * AArch64- ILP32 specific system calls implementation

> + *

> + * Copyright (C) 2016 Cavium Inc.

> + * Author: Andrew Pinski <apinski@cavium.com>

> + *

> + * This program is free software; you can redistribute it and/or modify

> + * it under the terms of the GNU General Public License version 2 as

> + * published by the Free Software Foundation.

> + *

> + * This program is distributed in the hope that it will be useful,

> + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

> + * GNU General Public License for more details.

> + *

> + * You should have received a copy of the GNU General Public License

> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.

> + */

> +

> +#include <linux/compiler.h>

> +#include <linux/errno.h>

> +#include <linux/fs.h>

> +#include <linux/mm.h>

> +#include <linux/msg.h>

> +#include <linux/export.h>

> +#include <linux/sched.h>

> +#include <linux/slab.h>

> +#include <linux/syscalls.h>

> +#include <linux/compat.h>

> +#include <asm-generic/syscalls.h>

> +

> +/* Using non-compat syscalls where necessary */

> +#define compat_sys_fadvise64_64        sys_fadvise64_64

> +#define compat_sys_fallocate           sys_fallocate

> +#define compat_sys_ftruncate64         sys_ftruncate

> +#define compat_sys_lookup_dcookie      sys_lookup_dcookie

> +#define compat_sys_pread64             sys_pread64

> +#define compat_sys_pwrite64            sys_pwrite64

> +#define compat_sys_readahead           sys_readahead

> +#define compat_sys_shmat               sys_shmat

> +#define compat_sys_sync_file_range     sys_sync_file_range

> +#define compat_sys_truncate64          sys_truncate

> +#define sys_llseek                     sys_lseek

> +#define sys_mmap2		       sys_mmap

I am a little bit confused here. We wrap the mmap to mmap2 in glibc
without shift the 4096 and We map mmap2 to mmap in kernel which
means we shift with the real page size. It works unless the
application want to mmap the offset bigger then 2G. In ILP32 app,
if the offset is bigger than 2G(e.g. 0xfb000000), it is a negative
number and extend to 64bit nagetive number in kernel
(0xfffffff fb000000). I add the "COMPAT_SYSCALL_WRAP6(mmap, ...)" in
kernel/compat_wrapper.c. But it is not works. I am not sure if it is
already sign extended in userspace.

On the other hand, I read the code of mmap in arm and other
architecture. Usually, they will shift 4096 in userspace and shift
others in kernel if needed. Should we follow the similar ways or we
could call mmap_pgoff in glibc and do the shift according the real
page shift(getpages())?

Thanks

Bamvor
Yury Norov May 6, 2016, 12:37 p.m. UTC | #10
On Fri, May 06, 2016 at 08:16:48PM +0800, Zhangjian (Bamvor) wrote:
> Hi,

> 

> On 2016/4/6 6:08, Yury Norov wrote:

> >From: Andrew Pinski <apinski@cavium.com>

> >

> >Add a separate syscall-table for ILP32, which dispatches either to native

> >LP64 system call implementation or to compat-syscalls, as appropriate.

> >

> >Signed-off-by: Andrew Pinski <Andrew.Pinski@caviumnetworks.com>

> >Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>

> >---

> >  arch/arm64/include/asm/unistd.h | 11 ++++++-

> >  arch/arm64/kernel/Makefile      |  2 +-

> >  arch/arm64/kernel/entry.S       | 12 +++++++-

> >  arch/arm64/kernel/sys_ilp32.c   | 65 +++++++++++++++++++++++++++++++++++++++++

> >  4 files changed, 87 insertions(+), 3 deletions(-)

> >  create mode 100644 arch/arm64/kernel/sys_ilp32.c

> >

> >diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h

> >index 2971dea..5ea18ef 100644

> >--- a/arch/arm64/include/asm/unistd.h

> >+++ b/arch/arm64/include/asm/unistd.h

> >@@ -13,9 +13,18 @@

> >   * You should have received a copy of the GNU General Public License

> >   * along with this program.  If not, see <http://www.gnu.org/licenses/>.

> >   */

> >+

> >+#ifdef CONFIG_COMPAT

> >+#define __ARCH_WANT_COMPAT_STAT64

> >+#endif

> >+

> >+#ifdef CONFIG_ARM64_ILP32

> >+#define __ARCH_WANT_COMPAT_SYS_PREADV64

> >+#define __ARCH_WANT_COMPAT_SYS_PWRITEV64

> >+#endif

> >+

> >  #ifdef CONFIG_AARCH32_EL0

> >  #define __ARCH_WANT_COMPAT_SYS_GETDENTS64

> >-#define __ARCH_WANT_COMPAT_STAT64

> >  #define __ARCH_WANT_SYS_GETHOSTNAME

> >  #define __ARCH_WANT_SYS_PAUSE

> >  #define __ARCH_WANT_SYS_GETPGRP

> >diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile

> >index 9dfdf86..7aa65ea 100644

> >--- a/arch/arm64/kernel/Makefile

> >+++ b/arch/arm64/kernel/Makefile

> >@@ -28,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE

> >  arm64-obj-$(CONFIG_AARCH32_EL0)		+= sys32.o kuser32.o signal32.o 	\

> >  					   sys_compat.o entry32.o		\

> >  					   ../../arm/kernel/opcodes.o binfmt_elf32.o

> >-arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o

> >+arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o sys_ilp32.o

> >  arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o

> >  arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o

> >  arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)	+= module-plts.o

> >diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S

> >index cf4d1ae..1f7a145 100644

> >--- a/arch/arm64/kernel/entry.S

> >+++ b/arch/arm64/kernel/entry.S

> >@@ -715,9 +715,13 @@ ENDPROC(ret_from_fork)

> >   */

> >  	.align	6

> >  el0_svc:

> >-	adrp	stbl, sys_call_table		// load syscall table pointer

> >  	uxtw	scno, w8			// syscall number in w8

> >  	mov	sc_nr, #__NR_syscalls

> >+#ifdef CONFIG_ARM64_ILP32

> >+	ldr	x16, [tsk, #TI_FLAGS]

> >+	tbnz	x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32

> >+#endif

> >+	adrp	stbl, sys_call_table		// load syscall table pointer

> >  el0_svc_naked:					// compat entry point

> >  	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number

> >  	enable_dbg_and_irq

> >@@ -737,6 +741,12 @@ ni_sys:

> >  	b	ret_fast_syscall

> >  ENDPROC(el0_svc)

> >

> >+#ifdef CONFIG_ARM64_ILP32

> >+el0_ilp32_svc:

> >+	adrp	stbl, sys_call_ilp32_table // load syscall table pointer

> >+	b el0_svc_naked

> >+#endif

> >+

> >  	/*

> >  	 * This is the really slow path.  We're going to be doing context

> >  	 * switches, and waiting for our parent to respond.

> >diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c

> >new file mode 100644

> >index 0000000..0996d8e

> >--- /dev/null

> >+++ b/arch/arm64/kernel/sys_ilp32.c

> >@@ -0,0 +1,65 @@

> >+/*

> >+ * AArch64- ILP32 specific system calls implementation

> >+ *

> >+ * Copyright (C) 2016 Cavium Inc.

> >+ * Author: Andrew Pinski <apinski@cavium.com>

> >+ *

> >+ * This program is free software; you can redistribute it and/or modify

> >+ * it under the terms of the GNU General Public License version 2 as

> >+ * published by the Free Software Foundation.

> >+ *

> >+ * This program is distributed in the hope that it will be useful,

> >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of

> >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

> >+ * GNU General Public License for more details.

> >+ *

> >+ * You should have received a copy of the GNU General Public License

> >+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.

> >+ */

> >+

> >+#include <linux/compiler.h>

> >+#include <linux/errno.h>

> >+#include <linux/fs.h>

> >+#include <linux/mm.h>

> >+#include <linux/msg.h>

> >+#include <linux/export.h>

> >+#include <linux/sched.h>

> >+#include <linux/slab.h>

> >+#include <linux/syscalls.h>

> >+#include <linux/compat.h>

> >+#include <asm-generic/syscalls.h>

> >+

> >+/* Using non-compat syscalls where necessary */

> >+#define compat_sys_fadvise64_64        sys_fadvise64_64

> >+#define compat_sys_fallocate           sys_fallocate

> >+#define compat_sys_ftruncate64         sys_ftruncate

> >+#define compat_sys_lookup_dcookie      sys_lookup_dcookie

> >+#define compat_sys_pread64             sys_pread64

> >+#define compat_sys_pwrite64            sys_pwrite64

> >+#define compat_sys_readahead           sys_readahead

> >+#define compat_sys_shmat               sys_shmat

> >+#define compat_sys_sync_file_range     sys_sync_file_range

> >+#define compat_sys_truncate64          sys_truncate

> >+#define sys_llseek                     sys_lseek

> >+#define sys_mmap2		       sys_mmap

> I am a little bit confused here. We wrap the mmap to mmap2 in glibc

> without shift the 4096 and We map mmap2 to mmap in kernel which

> means we shift with the real page size. It works unless the

> application want to mmap the offset bigger then 2G. In ILP32 app,

> if the offset is bigger than 2G(e.g. 0xfb000000), it is a negative

> number and extend to 64bit nagetive number in kernel

> (0xfffffff fb000000). I add the "COMPAT_SYSCALL_WRAP6(mmap, ...)" in

> kernel/compat_wrapper.c. But it is not works. I am not sure if it is

> already sign extended in userspace.

> 

> On the other hand, I read the code of mmap in arm and other

> architecture. Usually, they will shift 4096 in userspace and shift

> others in kernel if needed. Should we follow the similar ways or we

> could call mmap_pgoff in glibc and do the shift according the real

> page shift(getpages())?

> 

> Thanks

> 

> Bamvor

> 

> 


Hi,

AFAIR, here we don't shift offset, as it's 64-bit both in user-
and kernel-space, and just pass it from user to kernel thru glibc
with no changes.

It definitely works, as there are many mappings made by linker and
libc in 2G+ area, and there are no problems with them. This is a
typical ILP32 application map:

00400000-00401000 r-xp 00000000 08:00 130400     /root/mykill
00410000-00411000 rwxp 00000000 08:00 130400     /root/mykill
00527000-00549000 rwxp 00000000 00:00 0          [heap]
c6278000-c6298000 rwxp 00000000 00:00 0 
c6298000-c63d0000 r-xp 00000000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so
c63d0000-c63e0000 ---p 00138000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so
c63e0000-c63e2000 r-xp 00138000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so
c63e2000-c63e3000 rwxp 0013a000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so
c63e3000-c63e6000 rwxp 00000000 00:00 0 
c63e6000-c63fc000 r-xp 00000000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so
c63fc000-c640b000 ---p 00016000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so
c640b000-c640c000 r-xp 00015000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so
c640c000-c640d000 rwxp 00016000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so
c640d000-c640f000 rwxp 00000000 00:00 0 
c640f000-c642c000 r-xp 00000000 08:00 135288     /root/sys-root/libilp32/ld-2.22.so
c6437000-c6439000 rwxp 00000000 00:00 0 
c6439000-c643a000 r--p 00000000 00:00 0          [vvar]
c643a000-c643b000 r-xp 00000000 00:00 0          [vdso]
c643b000-c643c000 r-xp 0001c000 08:00 135288     /root/sys-root/libilp32/ld-2.22.so
c643c000-c643d000 rwxp 0001d000 08:00 135288     /root/sys-root/libilp32/ld-2.22.so
ffe2d000-ffe4e000 rw-p 00000000 00:00 0          [stack]


> _______________________________________________

> linux-arm-kernel mailing list

> linux-arm-kernel@lists.infradead.org

> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Zhangjian (Bamvor) May 10, 2016, 7:42 a.m. UTC | #11
Hi, Yury

On 2016/5/6 20:37, Yury Norov wrote:
> On Fri, May 06, 2016 at 08:16:48PM +0800, Zhangjian (Bamvor) wrote:

>> Hi,

>>

>> On 2016/4/6 6:08, Yury Norov wrote:

>>> From: Andrew Pinski <apinski@cavium.com>

>>>

>>> Add a separate syscall-table for ILP32, which dispatches either to native

>>> LP64 system call implementation or to compat-syscalls, as appropriate.

>>>

>>> Signed-off-by: Andrew Pinski <Andrew.Pinski@caviumnetworks.com>

>>> Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>

>>> ---

>>>   arch/arm64/include/asm/unistd.h | 11 ++++++-

>>>   arch/arm64/kernel/Makefile      |  2 +-

>>>   arch/arm64/kernel/entry.S       | 12 +++++++-

>>>   arch/arm64/kernel/sys_ilp32.c   | 65 +++++++++++++++++++++++++++++++++++++++++

>>>   4 files changed, 87 insertions(+), 3 deletions(-)

>>>   create mode 100644 arch/arm64/kernel/sys_ilp32.c

>>>

>>> diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h

>>> index 2971dea..5ea18ef 100644

>>> --- a/arch/arm64/include/asm/unistd.h

>>> +++ b/arch/arm64/include/asm/unistd.h

>>> @@ -13,9 +13,18 @@

>>>    * You should have received a copy of the GNU General Public License

>>>    * along with this program.  If not, see <http://www.gnu.org/licenses/>.

>>>    */

>>> +

>>> +#ifdef CONFIG_COMPAT

>>> +#define __ARCH_WANT_COMPAT_STAT64

>>> +#endif

>>> +

>>> +#ifdef CONFIG_ARM64_ILP32

>>> +#define __ARCH_WANT_COMPAT_SYS_PREADV64

>>> +#define __ARCH_WANT_COMPAT_SYS_PWRITEV64

>>> +#endif

>>> +

>>>   #ifdef CONFIG_AARCH32_EL0

>>>   #define __ARCH_WANT_COMPAT_SYS_GETDENTS64

>>> -#define __ARCH_WANT_COMPAT_STAT64

>>>   #define __ARCH_WANT_SYS_GETHOSTNAME

>>>   #define __ARCH_WANT_SYS_PAUSE

>>>   #define __ARCH_WANT_SYS_GETPGRP

>>> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile

>>> index 9dfdf86..7aa65ea 100644

>>> --- a/arch/arm64/kernel/Makefile

>>> +++ b/arch/arm64/kernel/Makefile

>>> @@ -28,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE

>>>   arm64-obj-$(CONFIG_AARCH32_EL0)		+= sys32.o kuser32.o signal32.o 	\

>>>   					   sys_compat.o entry32.o		\

>>>   					   ../../arm/kernel/opcodes.o binfmt_elf32.o

>>> -arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o

>>> +arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o sys_ilp32.o

>>>   arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o

>>>   arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o

>>>   arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)	+= module-plts.o

>>> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S

>>> index cf4d1ae..1f7a145 100644

>>> --- a/arch/arm64/kernel/entry.S

>>> +++ b/arch/arm64/kernel/entry.S

>>> @@ -715,9 +715,13 @@ ENDPROC(ret_from_fork)

>>>    */

>>>   	.align	6

>>>   el0_svc:

>>> -	adrp	stbl, sys_call_table		// load syscall table pointer

>>>   	uxtw	scno, w8			// syscall number in w8

>>>   	mov	sc_nr, #__NR_syscalls

>>> +#ifdef CONFIG_ARM64_ILP32

>>> +	ldr	x16, [tsk, #TI_FLAGS]

>>> +	tbnz	x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32

>>> +#endif

>>> +	adrp	stbl, sys_call_table		// load syscall table pointer

>>>   el0_svc_naked:					// compat entry point

>>>   	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number

>>>   	enable_dbg_and_irq

>>> @@ -737,6 +741,12 @@ ni_sys:

>>>   	b	ret_fast_syscall

>>>   ENDPROC(el0_svc)

>>>

>>> +#ifdef CONFIG_ARM64_ILP32

>>> +el0_ilp32_svc:

>>> +	adrp	stbl, sys_call_ilp32_table // load syscall table pointer

>>> +	b el0_svc_naked

>>> +#endif

>>> +

>>>   	/*

>>>   	 * This is the really slow path.  We're going to be doing context

>>>   	 * switches, and waiting for our parent to respond.

>>> diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c

>>> new file mode 100644

>>> index 0000000..0996d8e

>>> --- /dev/null

>>> +++ b/arch/arm64/kernel/sys_ilp32.c

>>> @@ -0,0 +1,65 @@

>>> +/*

>>> + * AArch64- ILP32 specific system calls implementation

>>> + *

>>> + * Copyright (C) 2016 Cavium Inc.

>>> + * Author: Andrew Pinski <apinski@cavium.com>

>>> + *

>>> + * This program is free software; you can redistribute it and/or modify

>>> + * it under the terms of the GNU General Public License version 2 as

>>> + * published by the Free Software Foundation.

>>> + *

>>> + * This program is distributed in the hope that it will be useful,

>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of

>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

>>> + * GNU General Public License for more details.

>>> + *

>>> + * You should have received a copy of the GNU General Public License

>>> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.

>>> + */

>>> +

>>> +#include <linux/compiler.h>

>>> +#include <linux/errno.h>

>>> +#include <linux/fs.h>

>>> +#include <linux/mm.h>

>>> +#include <linux/msg.h>

>>> +#include <linux/export.h>

>>> +#include <linux/sched.h>

>>> +#include <linux/slab.h>

>>> +#include <linux/syscalls.h>

>>> +#include <linux/compat.h>

>>> +#include <asm-generic/syscalls.h>

>>> +

>>> +/* Using non-compat syscalls where necessary */

>>> +#define compat_sys_fadvise64_64        sys_fadvise64_64

>>> +#define compat_sys_fallocate           sys_fallocate

>>> +#define compat_sys_ftruncate64         sys_ftruncate

>>> +#define compat_sys_lookup_dcookie      sys_lookup_dcookie

>>> +#define compat_sys_pread64             sys_pread64

>>> +#define compat_sys_pwrite64            sys_pwrite64

>>> +#define compat_sys_readahead           sys_readahead

>>> +#define compat_sys_shmat               sys_shmat

>>> +#define compat_sys_sync_file_range     sys_sync_file_range

>>> +#define compat_sys_truncate64          sys_truncate

>>> +#define sys_llseek                     sys_lseek

>>> +#define sys_mmap2		       sys_mmap

>> I am a little bit confused here. We wrap the mmap to mmap2 in glibc

>> without shift the 4096 and We map mmap2 to mmap in kernel which

>> means we shift with the real page size. It works unless the

>> application want to mmap the offset bigger then 2G. In ILP32 app,

>> if the offset is bigger than 2G(e.g. 0xfb000000), it is a negative

>> number and extend to 64bit nagetive number in kernel

>> (0xfffffff fb000000). I add the "COMPAT_SYSCALL_WRAP6(mmap, ...)" in

>> kernel/compat_wrapper.c. But it is not works. I am not sure if it is

>> already sign extended in userspace.

>>

>> On the other hand, I read the code of mmap in arm and other

>> architecture. Usually, they will shift 4096 in userspace and shift

>> others in kernel if needed. Should we follow the similar ways or we

>> could call mmap_pgoff in glibc and do the shift according the real

>> page shift(getpages())?

>>

>> Thanks

>>

>> Bamvor

>>

>>

>

> Hi,

>

> AFAIR, here we don't shift offset, as it's 64-bit both in user-

> and kernel-space,

In your ilp32-2.22 branch, you wrapper mmap to mmap2 in which type of
offset is off_t. And off_t is 32bit in ilp32, correct?
"sysdeps/unix/sysv/linux/aarch64/ilp32/mmap64.c"
/* mmap is provided by mmap as they are the same. */
void *__mmap (void *__addr, size_t __len, int __prot,
                      int __flags, int __fd, __off_t __offset)
{
    void *result;
    result = (void *)
      INLINE_SYSCALL (mmap2, 6, __addr,
                      __len, __prot, __flags, __fd, __offset);
    return result;
}
> and just pass it from user to kernel thru glibc

> with no changes.

>

> It definitely works, as there are many mappings made by linker and

> libc in 2G+ area, and there are no problems with them. This is a

> typical ILP32 application map:

Ok, the different is I am talking about the offset in mmap. I am NOT
talking about the map result.
If I run my test case with strace:
"strace -e trace=mmap ./mmap.arm64_ilp32 0xfb000000 0x1000", here is
the part of log:

1 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xf7721000
2 mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xf7557000
3 page size<0x1000>, offset is <0xfb000000>
4 mmap(NULL, 4096, PROT_READ, MAP_SHARED, 3, 0xfffffffffb000000) = -1 EINVAL (Invalid argument)
5 mmap of mmapfile failed

As you said, line 1 and 2 show that mmap could map above 2G. But it
is NOT what I want to discussion.
As I said, when I pass the offset above 2G(e.g. 0xfb0000000), we
could find that the actual offset pass to kernel is
0xfffffffffb000000(reference line 4).
It will fail if I map in /dev/mmem. It will not fail if the fd is
a normal file. But in both of case the offset is wrong.

Regards

Bamvor

> 00400000-00401000 r-xp 00000000 08:00 130400     /root/mykill

> 00410000-00411000 rwxp 00000000 08:00 130400     /root/mykill

> 00527000-00549000 rwxp 00000000 00:00 0          [heap]

> c6278000-c6298000 rwxp 00000000 00:00 0

> c6298000-c63d0000 r-xp 00000000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so

> c63d0000-c63e0000 ---p 00138000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so

> c63e0000-c63e2000 r-xp 00138000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so

> c63e2000-c63e3000 rwxp 0013a000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so

> c63e3000-c63e6000 rwxp 00000000 00:00 0

> c63e6000-c63fc000 r-xp 00000000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so

> c63fc000-c640b000 ---p 00016000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so

> c640b000-c640c000 r-xp 00015000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so

> c640c000-c640d000 rwxp 00016000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so

> c640d000-c640f000 rwxp 00000000 00:00 0

> c640f000-c642c000 r-xp 00000000 08:00 135288     /root/sys-root/libilp32/ld-2.22.so

> c6437000-c6439000 rwxp 00000000 00:00 0

> c6439000-c643a000 r--p 00000000 00:00 0          [vvar]

> c643a000-c643b000 r-xp 00000000 00:00 0          [vdso]

> c643b000-c643c000 r-xp 0001c000 08:00 135288     /root/sys-root/libilp32/ld-2.22.so

> c643c000-c643d000 rwxp 0001d000 08:00 135288     /root/sys-root/libilp32/ld-2.22.so

> ffe2d000-ffe4e000 rw-p 00000000 00:00 0          [stack]

>

>

>> _______________________________________________

>> linux-arm-kernel mailing list

>> linux-arm-kernel@lists.infradead.org

>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Zhangjian (Bamvor) May 10, 2016, 7:55 a.m. UTC | #12
Hi,

Sorry I forget to paste my test code:

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>

#define TEMPFILE "mmapfile"

int main(int argc, char *argv[])
{
	int fd;
	void *addr;
	unsigned long offset;
	unsigned long size;

	if (argc == 3) {
		if (argv[1][0] == '0' && argv[1][1] == 'x')
			offset = strtoll(&argv[1][2], NULL, 16);
		else
			offset = atoi(argv[1]);

		if (argv[2][0] == '0' && argv[2][1] == 'x')
			size = strtoll(&argv[2][2], NULL, 16);
		else
			size = atoi(argv[2]);
	} else {
		exit(2);
	}

	printf("page size<0x%x>, offset is <0x%x>\n", size, offset);
//	if ((fd = open(TEMPFILE, O_RDWR | O_CREAT, 0666)) < 0) {
//		fprintf(stderr, "opening %s failed\n", TEMPFILE);
//		exit(2);
//	}
	fd = open("/dev/mem", O_RDWR | O_SYNC);
	if (-1 == fd)
	{
		printf( "open /dev/mem fail!\n" );
		return 1;
	}

	//addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, offset);
	addr = mmap(0, size, PROT_READ, MAP_FILE | MAP_SHARED, fd, offset);
	if(addr == MAP_FAILED) {
		fprintf(stderr, "mmap of %s failed\n", TEMPFILE);
		exit(2);
	}
	printf("addr: <0x%x>\n", addr);

	return 0;
}

Regards

Bamvor

On 2016/5/10 15:42, Zhangjian (Bamvor) wrote:
> Hi, Yury

>

> On 2016/5/6 20:37, Yury Norov wrote:

>> On Fri, May 06, 2016 at 08:16:48PM +0800, Zhangjian (Bamvor) wrote:

>>> Hi,

>>>

>>> On 2016/4/6 6:08, Yury Norov wrote:

>>>> From: Andrew Pinski <apinski@cavium.com>

>>>>

>>>> Add a separate syscall-table for ILP32, which dispatches either to native

>>>> LP64 system call implementation or to compat-syscalls, as appropriate.

>>>>

>>>> Signed-off-by: Andrew Pinski <Andrew.Pinski@caviumnetworks.com>

>>>> Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>

>>>> ---

>>>>   arch/arm64/include/asm/unistd.h | 11 ++++++-

>>>>   arch/arm64/kernel/Makefile      |  2 +-

>>>>   arch/arm64/kernel/entry.S       | 12 +++++++-

>>>>   arch/arm64/kernel/sys_ilp32.c   | 65 +++++++++++++++++++++++++++++++++++++++++

>>>>   4 files changed, 87 insertions(+), 3 deletions(-)

>>>>   create mode 100644 arch/arm64/kernel/sys_ilp32.c

>>>>

>>>> diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h

>>>> index 2971dea..5ea18ef 100644

>>>> --- a/arch/arm64/include/asm/unistd.h

>>>> +++ b/arch/arm64/include/asm/unistd.h

>>>> @@ -13,9 +13,18 @@

>>>>    * You should have received a copy of the GNU General Public License

>>>>    * along with this program.  If not, see <http://www.gnu.org/licenses/>.

>>>>    */

>>>> +

>>>> +#ifdef CONFIG_COMPAT

>>>> +#define __ARCH_WANT_COMPAT_STAT64

>>>> +#endif

>>>> +

>>>> +#ifdef CONFIG_ARM64_ILP32

>>>> +#define __ARCH_WANT_COMPAT_SYS_PREADV64

>>>> +#define __ARCH_WANT_COMPAT_SYS_PWRITEV64

>>>> +#endif

>>>> +

>>>>   #ifdef CONFIG_AARCH32_EL0

>>>>   #define __ARCH_WANT_COMPAT_SYS_GETDENTS64

>>>> -#define __ARCH_WANT_COMPAT_STAT64

>>>>   #define __ARCH_WANT_SYS_GETHOSTNAME

>>>>   #define __ARCH_WANT_SYS_PAUSE

>>>>   #define __ARCH_WANT_SYS_GETPGRP

>>>> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile

>>>> index 9dfdf86..7aa65ea 100644

>>>> --- a/arch/arm64/kernel/Makefile

>>>> +++ b/arch/arm64/kernel/Makefile

>>>> @@ -28,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE

>>>>   arm64-obj-$(CONFIG_AARCH32_EL0)        += sys32.o kuser32.o signal32.o     \

>>>>                          sys_compat.o entry32.o        \

>>>>                          ../../arm/kernel/opcodes.o binfmt_elf32.o

>>>> -arm64-obj-$(CONFIG_ARM64_ILP32)        += binfmt_ilp32.o

>>>> +arm64-obj-$(CONFIG_ARM64_ILP32)        += binfmt_ilp32.o sys_ilp32.o

>>>>   arm64-obj-$(CONFIG_FUNCTION_TRACER)    += ftrace.o entry-ftrace.o

>>>>   arm64-obj-$(CONFIG_MODULES)        += arm64ksyms.o module.o

>>>>   arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)    += module-plts.o

>>>> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S

>>>> index cf4d1ae..1f7a145 100644

>>>> --- a/arch/arm64/kernel/entry.S

>>>> +++ b/arch/arm64/kernel/entry.S

>>>> @@ -715,9 +715,13 @@ ENDPROC(ret_from_fork)

>>>>    */

>>>>       .align    6

>>>>   el0_svc:

>>>> -    adrp    stbl, sys_call_table        // load syscall table pointer

>>>>       uxtw    scno, w8            // syscall number in w8

>>>>       mov    sc_nr, #__NR_syscalls

>>>> +#ifdef CONFIG_ARM64_ILP32

>>>> +    ldr    x16, [tsk, #TI_FLAGS]

>>>> +    tbnz    x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32

>>>> +#endif

>>>> +    adrp    stbl, sys_call_table        // load syscall table pointer

>>>>   el0_svc_naked:                    // compat entry point

>>>>       stp    x0, scno, [sp, #S_ORIG_X0]    // save the original x0 and syscall number

>>>>       enable_dbg_and_irq

>>>> @@ -737,6 +741,12 @@ ni_sys:

>>>>       b    ret_fast_syscall

>>>>   ENDPROC(el0_svc)

>>>>

>>>> +#ifdef CONFIG_ARM64_ILP32

>>>> +el0_ilp32_svc:

>>>> +    adrp    stbl, sys_call_ilp32_table // load syscall table pointer

>>>> +    b el0_svc_naked

>>>> +#endif

>>>> +

>>>>       /*

>>>>        * This is the really slow path.  We're going to be doing context

>>>>        * switches, and waiting for our parent to respond.

>>>> diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c

>>>> new file mode 100644

>>>> index 0000000..0996d8e

>>>> --- /dev/null

>>>> +++ b/arch/arm64/kernel/sys_ilp32.c

>>>> @@ -0,0 +1,65 @@

>>>> +/*

>>>> + * AArch64- ILP32 specific system calls implementation

>>>> + *

>>>> + * Copyright (C) 2016 Cavium Inc.

>>>> + * Author: Andrew Pinski <apinski@cavium.com>

>>>> + *

>>>> + * This program is free software; you can redistribute it and/or modify

>>>> + * it under the terms of the GNU General Public License version 2 as

>>>> + * published by the Free Software Foundation.

>>>> + *

>>>> + * This program is distributed in the hope that it will be useful,

>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of

>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

>>>> + * GNU General Public License for more details.

>>>> + *

>>>> + * You should have received a copy of the GNU General Public License

>>>> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.

>>>> + */

>>>> +

>>>> +#include <linux/compiler.h>

>>>> +#include <linux/errno.h>

>>>> +#include <linux/fs.h>

>>>> +#include <linux/mm.h>

>>>> +#include <linux/msg.h>

>>>> +#include <linux/export.h>

>>>> +#include <linux/sched.h>

>>>> +#include <linux/slab.h>

>>>> +#include <linux/syscalls.h>

>>>> +#include <linux/compat.h>

>>>> +#include <asm-generic/syscalls.h>

>>>> +

>>>> +/* Using non-compat syscalls where necessary */

>>>> +#define compat_sys_fadvise64_64        sys_fadvise64_64

>>>> +#define compat_sys_fallocate           sys_fallocate

>>>> +#define compat_sys_ftruncate64         sys_ftruncate

>>>> +#define compat_sys_lookup_dcookie      sys_lookup_dcookie

>>>> +#define compat_sys_pread64             sys_pread64

>>>> +#define compat_sys_pwrite64            sys_pwrite64

>>>> +#define compat_sys_readahead           sys_readahead

>>>> +#define compat_sys_shmat               sys_shmat

>>>> +#define compat_sys_sync_file_range     sys_sync_file_range

>>>> +#define compat_sys_truncate64          sys_truncate

>>>> +#define sys_llseek                     sys_lseek

>>>> +#define sys_mmap2               sys_mmap

>>> I am a little bit confused here. We wrap the mmap to mmap2 in glibc

>>> without shift the 4096 and We map mmap2 to mmap in kernel which

>>> means we shift with the real page size. It works unless the

>>> application want to mmap the offset bigger then 2G. In ILP32 app,

>>> if the offset is bigger than 2G(e.g. 0xfb000000), it is a negative

>>> number and extend to 64bit nagetive number in kernel

>>> (0xfffffff fb000000). I add the "COMPAT_SYSCALL_WRAP6(mmap, ...)" in

>>> kernel/compat_wrapper.c. But it is not works. I am not sure if it is

>>> already sign extended in userspace.

>>>

>>> On the other hand, I read the code of mmap in arm and other

>>> architecture. Usually, they will shift 4096 in userspace and shift

>>> others in kernel if needed. Should we follow the similar ways or we

>>> could call mmap_pgoff in glibc and do the shift according the real

>>> page shift(getpages())?

>>>

>>> Thanks

>>>

>>> Bamvor

>>>

>>>

>>

>> Hi,

>>

>> AFAIR, here we don't shift offset, as it's 64-bit both in user-

>> and kernel-space,

> In your ilp32-2.22 branch, you wrapper mmap to mmap2 in which type of

> offset is off_t. And off_t is 32bit in ilp32, correct?

> "sysdeps/unix/sysv/linux/aarch64/ilp32/mmap64.c"

> /* mmap is provided by mmap as they are the same. */

> void *__mmap (void *__addr, size_t __len, int __prot,

>                       int __flags, int __fd, __off_t __offset)

> {

>     void *result;

>     result = (void *)

>       INLINE_SYSCALL (mmap2, 6, __addr,

>                       __len, __prot, __flags, __fd, __offset);

>     return result;

> }

>> and just pass it from user to kernel thru glibc

>> with no changes.

>>

>> It definitely works, as there are many mappings made by linker and

>> libc in 2G+ area, and there are no problems with them. This is a

>> typical ILP32 application map:

> Ok, the different is I am talking about the offset in mmap. I am NOT

> talking about the map result.

> If I run my test case with strace:

> "strace -e trace=mmap ./mmap.arm64_ilp32 0xfb000000 0x1000", here is

> the part of log:

>

> 1 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xf7721000

> 2 mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xf7557000

> 3 page size<0x1000>, offset is <0xfb000000>

> 4 mmap(NULL, 4096, PROT_READ, MAP_SHARED, 3, 0xfffffffffb000000) = -1 EINVAL (Invalid argument)

> 5 mmap of mmapfile failed

>

> As you said, line 1 and 2 show that mmap could map above 2G. But it

> is NOT what I want to discussion.

> As I said, when I pass the offset above 2G(e.g. 0xfb0000000), we

> could find that the actual offset pass to kernel is

> 0xfffffffffb000000(reference line 4).

> It will fail if I map in /dev/mmem. It will not fail if the fd is

> a normal file. But in both of case the offset is wrong.

>

> Regards

>

> Bamvor

>

>> 00400000-00401000 r-xp 00000000 08:00 130400     /root/mykill

>> 00410000-00411000 rwxp 00000000 08:00 130400     /root/mykill

>> 00527000-00549000 rwxp 00000000 00:00 0          [heap]

>> c6278000-c6298000 rwxp 00000000 00:00 0

>> c6298000-c63d0000 r-xp 00000000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so

>> c63d0000-c63e0000 ---p 00138000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so

>> c63e0000-c63e2000 r-xp 00138000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so

>> c63e2000-c63e3000 rwxp 0013a000 08:00 135293     /root/sys-root/libilp32/libc-2.22.so

>> c63e3000-c63e6000 rwxp 00000000 00:00 0

>> c63e6000-c63fc000 r-xp 00000000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so

>> c63fc000-c640b000 ---p 00016000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so

>> c640b000-c640c000 r-xp 00015000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so

>> c640c000-c640d000 rwxp 00016000 08:00 135313     /root/sys-root/libilp32/libpthread-2.22.so

>> c640d000-c640f000 rwxp 00000000 00:00 0

>> c640f000-c642c000 r-xp 00000000 08:00 135288     /root/sys-root/libilp32/ld-2.22.so

>> c6437000-c6439000 rwxp 00000000 00:00 0

>> c6439000-c643a000 r--p 00000000 00:00 0          [vvar]

>> c643a000-c643b000 r-xp 00000000 00:00 0          [vdso]

>> c643b000-c643c000 r-xp 0001c000 08:00 135288     /root/sys-root/libilp32/ld-2.22.so

>> c643c000-c643d000 rwxp 0001d000 08:00 135288     /root/sys-root/libilp32/ld-2.22.so

>> ffe2d000-ffe4e000 rw-p 00000000 00:00 0          [stack]

>>

>>

>>> _______________________________________________

>>> linux-arm-kernel mailing list

>>> linux-arm-kernel@lists.infradead.org

>>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

>
Yury Norov May 14, 2016, 3:03 p.m. UTC | #13
So, after all discussions, this patch is looking like this.

Changes:
 - assembler part reworked to be more clear, as Catalin recommended;
 - mmap now points to mmap2, as in 1st versions (suggested by Bamvor),
   and ilp32 wrapper delouses required arguments;
 - pread64 and pwrite64 wrappers introduced to delouse args as well;
 - removed unneeded #undefs;

Did I miss something?diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 2971dea..5ea18ef 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -13,9 +13,18 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
+
+#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_COMPAT_STAT64
+#endif
+
+#ifdef CONFIG_ARM64_ILP32
+#define __ARCH_WANT_COMPAT_SYS_PREADV64
+#define __ARCH_WANT_COMPAT_SYS_PWRITEV64
+#endif
+
 #ifdef CONFIG_AARCH32_EL0
 #define __ARCH_WANT_COMPAT_SYS_GETDENTS64
-#define __ARCH_WANT_COMPAT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_GETPGRP
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9dfdf86..7aa65ea 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
 arm64-obj-$(CONFIG_AARCH32_EL0)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o entry32.o		\
 					   ../../arm/kernel/opcodes.o binfmt_elf32.o
-arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o
+arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o sys_ilp32.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)	+= module-plts.o
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf4d1ae..0f651bc 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -500,6 +500,7 @@ el0_svc_compat:
 	 * AArch32 syscall handling
 	 */
 	adrp	stbl, compat_sys_call_table	// load compat syscall table pointer
+	ldr     x16, [tsk, #TI_FLAGS]
 	uxtw	scno, w7			// syscall number in w7 (r7)
 	mov     sc_nr, #__NR_compat_syscalls
 	b	el0_svc_naked
@@ -716,15 +717,20 @@ ENDPROC(ret_from_fork)
 	.align	6
 el0_svc:
 	adrp	stbl, sys_call_table		// load syscall table pointer
+	ldr	x16, [tsk, #TI_FLAGS]
 	uxtw	scno, w8			// syscall number in w8
 	mov	sc_nr, #__NR_syscalls
+#ifdef CONFIG_ARM64_ILP32
+	adrp	x17, sys_call_ilp32_table	// load ilp32 syscall table pointer
+	tst	x16, #_TIF_32BIT_AARCH64
+	csel    stbl, stbl, x17, eq		// We are using ILP32
+#endif
 el0_svc_naked:					// compat entry point
 	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
 	enable_dbg_and_irq
 	ct_user_exit 1
 
-	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks
-	tst	x16, #_TIF_SYSCALL_WORK
+	tst	x16, #_TIF_SYSCALL_WORK		// check for syscall hooks
 	b.ne	__sys_trace
 	cmp     scno, sc_nr                     // check upper syscall limit
 	b.hs	ni_sys
diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c
new file mode 100644
index 0000000..64db612
--- /dev/null
+++ b/arch/arm64/kernel/sys_ilp32.c
@@ -0,0 +1,84 @@
+/*
+ * AArch64- ILP32 specific system calls implementation
+ *
+ * Copyright (C) 2016 Cavium Inc.
+ * Author: Andrew Pinski <apinski@cavium.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define __SYSCALL_COMPAT
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/msg.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm-generic/syscalls.h>
+
+/* Using non-compat syscalls where necessary */
+#define compat_sys_fadvise64_64        sys_fadvise64_64
+#define compat_sys_fallocate           sys_fallocate
+#define compat_sys_ftruncate64         sys_ftruncate
+#define compat_sys_lookup_dcookie      sys_lookup_dcookie
+#define compat_sys_readahead           sys_readahead
+#define compat_sys_shmat               sys_shmat
+#define compat_sys_sync_file_range     sys_sync_file_range
+#define compat_sys_truncate64          sys_truncate
+#define sys_llseek                     sys_lseek
+
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
+       unsigned long, prot, unsigned long, flags, unsigned long, fd,
+       unsigned long, pgoff)
+{
+       if (pgoff & (~PAGE_MASK >> 12))
+               return -EINVAL;
+
+       return sys_mmap_pgoff((compat_uptr_t) addr, (compat_size_t) len,
+		       (int) prot, (int) flags, (int) fd,
+		       pgoff >> (PAGE_SHIFT-12));
+}
+
+COMPAT_SYSCALL_DEFINE4(pread64, unsigned int, fd, compat_uptr_t __user *, ubuf,
+		       compat_size_t, count, off_t, offset)
+{
+	return sys_pread64(fd, (char *) ubuf, count, offset);
+}
+
+COMPAT_SYSCALL_DEFINE4(pwrite64, unsigned int, fd, compat_uptr_t __user *, ubuf,
+		       compat_size_t, count, off_t, offset)
+{
+	return sys_pwrite64(fd, (char *) ubuf, count, offset);
+}
+
+#include <asm/syscall.h>
+
+#undef __SYSCALL
+#undef __SC_WRAP
+
+#define __SYSCALL(nr, sym)	[nr] = sym,
+#define __SC_WRAP(nr, sym)	[nr] = compat_##sym,
+
+/*
+ * The sys_call_ilp32_table array must be 4K aligned to be accessible from
+ * kernel/entry.S.
+ */
+void *sys_call_ilp32_table[__NR_syscalls] __aligned(4096) = {
+	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};

Catalin Marinas May 16, 2016, 5:06 p.m. UTC | #14
On Sat, May 14, 2016 at 06:03:52PM +0300, Yury Norov wrote:
> +SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,

> +       unsigned long, prot, unsigned long, flags, unsigned long, fd,

> +       unsigned long, pgoff)


To avoid the types confusion we could add __SC_WRAP to mmap2 in unistd.h
and use COMPAT_SYSCALL_DEFINE6 here (together with compat_ptr_t etc.).

> +{

> +       if (pgoff & (~PAGE_MASK >> 12))

> +               return -EINVAL;

> +

> +       return sys_mmap_pgoff((compat_uptr_t) addr, (compat_size_t) len,

> +		       (int) prot, (int) flags, (int) fd,

> +		       pgoff >> (PAGE_SHIFT-12));


Then we wouldn't need the explicit casting here.

> +}

> +

> +COMPAT_SYSCALL_DEFINE4(pread64, unsigned int, fd, compat_uptr_t __user *, ubuf,

> +		       compat_size_t, count, off_t, offset)

> +{

> +	return sys_pread64(fd, (char *) ubuf, count, offset);

> +}

> +

> +COMPAT_SYSCALL_DEFINE4(pwrite64, unsigned int, fd, compat_uptr_t __user *, ubuf,

> +		       compat_size_t, count, off_t, offset)

> +{

> +	return sys_pwrite64(fd, (char *) ubuf, count, offset);


Nitpick: no space between cast type and variable name: (char *)ubuf, ...

We can also make these functions static as they are not used outside
this file.

-- 
Catalin
Yury Norov May 17, 2016, 7:05 p.m. UTC | #15
On Mon, May 16, 2016 at 06:06:05PM +0100, Catalin Marinas wrote:
> On Sat, May 14, 2016 at 06:03:52PM +0300, Yury Norov wrote:

> > +SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,

> > +       unsigned long, prot, unsigned long, flags, unsigned long, fd,

> > +       unsigned long, pgoff)

> 

> To avoid the types confusion we could add __SC_WRAP to mmap2 in unistd.h

> and use COMPAT_SYSCALL_DEFINE6 here (together with compat_ptr_t etc.).

> 

> > +{

> > +       if (pgoff & (~PAGE_MASK >> 12))

> > +               return -EINVAL;

> > +

> > +       return sys_mmap_pgoff((compat_uptr_t) addr, (compat_size_t) len,

> > +		       (int) prot, (int) flags, (int) fd,

> > +		       pgoff >> (PAGE_SHIFT-12));

> 

> Then we wouldn't need the explicit casting here.


See below

> 

> > +}

> > +

> > +COMPAT_SYSCALL_DEFINE4(pread64, unsigned int, fd, compat_uptr_t __user *, ubuf,

> > +		       compat_size_t, count, off_t, offset)

> > +{

> > +	return sys_pread64(fd, (char *) ubuf, count, offset);

> > +}

> > +

> > +COMPAT_SYSCALL_DEFINE4(pwrite64, unsigned int, fd, compat_uptr_t __user *, ubuf,

> > +		       compat_size_t, count, off_t, offset)

> > +{

> > +	return sys_pwrite64(fd, (char *) ubuf, count, offset);

> 

> Nitpick: no space between cast type and variable name: (char *)ubuf, ...


I think it's really a matter of taste. I prefer to have a space, and
there's no solid rule in coding style.

And there are 13032 insertions of my version vs 35030 of yours:
~/work/linux$ git grep ' \*)[a-zA-Z]'|wc -l
35030
~/work/linux$ git grep ' \*) [a-zA-Z]'|wc -l
13032

Of course, I will change it if you insist.

> We can also make these functions static as they are not used outside

> this file.


If it's OK, we can use just compat_sys_xxx() instead of
COMPAT_SYSCALL_DEFINEx(xxx), and for mmap2 we'll not need to change 
_SYSCALL to _SC_WRAP in unistd.h that way.

> > -- 

> Catalin
Catalin Marinas May 18, 2016, 11:21 a.m. UTC | #16
On Tue, May 17, 2016 at 10:05:26PM +0300, Yury Norov wrote:
> On Mon, May 16, 2016 at 06:06:05PM +0100, Catalin Marinas wrote:

> > On Sat, May 14, 2016 at 06:03:52PM +0300, Yury Norov wrote:

> > > +SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,

> > > +       unsigned long, prot, unsigned long, flags, unsigned long, fd,

> > > +       unsigned long, pgoff)

> > 

> > To avoid the types confusion we could add __SC_WRAP to mmap2 in unistd.h

> > and use COMPAT_SYSCALL_DEFINE6 here (together with compat_ptr_t etc.).

> > 

> > > +{

> > > +       if (pgoff & (~PAGE_MASK >> 12))

> > > +               return -EINVAL;

> > > +

> > > +       return sys_mmap_pgoff((compat_uptr_t) addr, (compat_size_t) len,

> > > +		       (int) prot, (int) flags, (int) fd,

> > > +		       pgoff >> (PAGE_SHIFT-12));

> > 

> > Then we wouldn't need the explicit casting here.

> 

> See below

> 

> > > +}

> > > +

> > > +COMPAT_SYSCALL_DEFINE4(pread64, unsigned int, fd, compat_uptr_t __user *, ubuf,

> > > +		       compat_size_t, count, off_t, offset)

> > > +{

> > > +	return sys_pread64(fd, (char *) ubuf, count, offset);

> > > +}

> > > +

> > > +COMPAT_SYSCALL_DEFINE4(pwrite64, unsigned int, fd, compat_uptr_t __user *, ubuf,

> > > +		       compat_size_t, count, off_t, offset)

> > > +{

> > > +	return sys_pwrite64(fd, (char *) ubuf, count, offset);

> > 

> > Nitpick: no space between cast type and variable name: (char *)ubuf, ...

> 

> I think it's really a matter of taste. I prefer to have a space, and

> there's no solid rule in coding style.

> 

> And there are 13032 insertions of my version vs 35030 of yours:

> ~/work/linux$ git grep ' \*)[a-zA-Z]'|wc -l

> 35030

> ~/work/linux$ git grep ' \*) [a-zA-Z]'|wc -l

> 13032

> 

> Of course, I will change it if you insist.


Not really, I thought it's covered by CodingStyle but it doesn't seem
to.

> > We can also make these functions static as they are not used outside

> > this file.

> 

> If it's OK, we can use just compat_sys_xxx() instead of

> COMPAT_SYSCALL_DEFINEx(xxx),


I got lost in macros, what difference would COMPAT_SYSCALL_DEFINE vs
compat_sys_*() make? Is this the delouse stuff?

> and for mmap2 we'll not need to change _SYSCALL to _SC_WRAP in

> unistd.h that way.


Looking at the generic unistd.h, adding _SC_WRAP for sys_mmap2 is
indeed not easy:

#define __NR3264_mmap 222
__SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)

So defining a compat_sys_mmap2() would work but I think you'd also need:

#define sys_mmap2	compat_sys_mmap2()

-- 
Catalin
Yury Norov May 18, 2016, 5:58 p.m. UTC | #17
On Wed, May 18, 2016 at 12:21:46PM +0100, Catalin Marinas wrote:
> On Tue, May 17, 2016 at 10:05:26PM +0300, Yury Norov wrote:

> > On Mon, May 16, 2016 at 06:06:05PM +0100, Catalin Marinas wrote:

> > > On Sat, May 14, 2016 at 06:03:52PM +0300, Yury Norov wrote:

> > > > +SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,

> > > > +       unsigned long, prot, unsigned long, flags, unsigned long, fd,

> > > > +       unsigned long, pgoff)

> > > 

> > > To avoid the types confusion we could add __SC_WRAP to mmap2 in unistd.h

> > > and use COMPAT_SYSCALL_DEFINE6 here (together with compat_ptr_t etc.).

> > > 

> > > > +{

> > > > +       if (pgoff & (~PAGE_MASK >> 12))

> > > > +               return -EINVAL;

> > > > +

> > > > +       return sys_mmap_pgoff((compat_uptr_t) addr, (compat_size_t) len,

> > > > +		       (int) prot, (int) flags, (int) fd,

> > > > +		       pgoff >> (PAGE_SHIFT-12));

> > > 

> > > Then we wouldn't need the explicit casting here.

> > 

> > See below

> > 

> > > > +}

> > > > +

> > > > +COMPAT_SYSCALL_DEFINE4(pread64, unsigned int, fd, compat_uptr_t __user *, ubuf,

> > > > +		       compat_size_t, count, off_t, offset)

> > > > +{

> > > > +	return sys_pread64(fd, (char *) ubuf, count, offset);

> > > > +}

> > > > +

> > > > +COMPAT_SYSCALL_DEFINE4(pwrite64, unsigned int, fd, compat_uptr_t __user *, ubuf,

> > > > +		       compat_size_t, count, off_t, offset)

> > > > +{

> > > > +	return sys_pwrite64(fd, (char *) ubuf, count, offset);

> > > 

> > > Nitpick: no space between cast type and variable name: (char *)ubuf, ...

> > 

> > I think it's really a matter of taste. I prefer to have a space, and

> > there's no solid rule in coding style.

> > 

> > And there are 13032 insertions of my version vs 35030 of yours:

> > ~/work/linux$ git grep ' \*)[a-zA-Z]'|wc -l

> > 35030

> > ~/work/linux$ git grep ' \*) [a-zA-Z]'|wc -l

> > 13032

> > 

> > Of course, I will change it if you insist.

> 

> Not really, I thought it's covered by CodingStyle but it doesn't seem

> to.

> 

> > > We can also make these functions static as they are not used outside

> > > this file.

> > 

> > If it's OK, we can use just compat_sys_xxx() instead of

> > COMPAT_SYSCALL_DEFINEx(xxx),

> 

> I got lost in macros, what difference would COMPAT_SYSCALL_DEFINE vs

> compat_sys_*() make? Is this the delouse stuff?

> 


Hmm... I looked again. It seems that COMPAT delouses all arguments,
including off_t, and that's what we try to avoid. So we *should* use
naked functions. 

include/linux/compat.h:
53 #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
54         asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\
55              __attribute__((alias(__stringify(compat_SyS##name))));  \
56         static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
57         asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
58         asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
59         { \
60                 return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \
61         } \
62         static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))


> > and for mmap2 we'll not need to change _SYSCALL to _SC_WRAP in

> > unistd.h that way.

> 

> Looking at the generic unistd.h, adding _SC_WRAP for sys_mmap2 is

> indeed not easy:

> 

> #define __NR3264_mmap 222

> __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)

> 

> So defining a compat_sys_mmap2() would work but I think you'd also need:

> 


> #define sys_mmap2	compat_sys_mmap2()


OK.

> 

> -- 

> Catalin
diff mbox

Patch

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 2971dea..5ea18ef 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -13,9 +13,18 @@ 
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
+
+#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_COMPAT_STAT64
+#endif
+
+#ifdef CONFIG_ARM64_ILP32
+#define __ARCH_WANT_COMPAT_SYS_PREADV64
+#define __ARCH_WANT_COMPAT_SYS_PWRITEV64
+#endif
+
 #ifdef CONFIG_AARCH32_EL0
 #define __ARCH_WANT_COMPAT_SYS_GETDENTS64
-#define __ARCH_WANT_COMPAT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_GETPGRP
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9dfdf86..7aa65ea 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,7 +28,7 @@  $(obj)/%.stub.o: $(obj)/%.o FORCE
 arm64-obj-$(CONFIG_AARCH32_EL0)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o entry32.o		\
 					   ../../arm/kernel/opcodes.o binfmt_elf32.o
-arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o
+arm64-obj-$(CONFIG_ARM64_ILP32)		+= binfmt_ilp32.o sys_ilp32.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)	+= module-plts.o
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf4d1ae..1f7a145 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -715,9 +715,13 @@  ENDPROC(ret_from_fork)
  */
 	.align	6
 el0_svc:
-	adrp	stbl, sys_call_table		// load syscall table pointer
 	uxtw	scno, w8			// syscall number in w8
 	mov	sc_nr, #__NR_syscalls
+#ifdef CONFIG_ARM64_ILP32
+	ldr	x16, [tsk, #TI_FLAGS]
+	tbnz	x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32
+#endif
+	adrp	stbl, sys_call_table		// load syscall table pointer
 el0_svc_naked:					// compat entry point
 	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
 	enable_dbg_and_irq
@@ -737,6 +741,12 @@  ni_sys:
 	b	ret_fast_syscall
 ENDPROC(el0_svc)
 
+#ifdef CONFIG_ARM64_ILP32
+el0_ilp32_svc:
+	adrp	stbl, sys_call_ilp32_table // load syscall table pointer
+	b el0_svc_naked
+#endif
+
 	/*
 	 * This is the really slow path.  We're going to be doing context
 	 * switches, and waiting for our parent to respond.
diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c
new file mode 100644
index 0000000..0996d8e
--- /dev/null
+++ b/arch/arm64/kernel/sys_ilp32.c
@@ -0,0 +1,65 @@ 
+/*
+ * AArch64- ILP32 specific system calls implementation
+ *
+ * Copyright (C) 2016 Cavium Inc.
+ * Author: Andrew Pinski <apinski@cavium.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/msg.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm-generic/syscalls.h>
+
+/* Using non-compat syscalls where necessary */
+#define compat_sys_fadvise64_64        sys_fadvise64_64
+#define compat_sys_fallocate           sys_fallocate
+#define compat_sys_ftruncate64         sys_ftruncate
+#define compat_sys_lookup_dcookie      sys_lookup_dcookie
+#define compat_sys_pread64             sys_pread64
+#define compat_sys_pwrite64            sys_pwrite64
+#define compat_sys_readahead           sys_readahead
+#define compat_sys_shmat               sys_shmat
+#define compat_sys_sync_file_range     sys_sync_file_range
+#define compat_sys_truncate64          sys_truncate
+#define sys_llseek                     sys_lseek
+#define sys_mmap2		       sys_mmap
+
+#include <asm/syscall.h>
+
+#undef __SYSCALL
+#undef __SC_COMP
+#undef __SC_WRAP
+#undef __SC_3264
+#undef __SC_COMP_3264
+
+#define __SYSCALL_COMPAT
+#define __SYSCALL(nr, sym)	[nr] = sym,
+#define __SC_WRAP(nr, sym)	[nr] = compat_##sym,
+
+/*
+ * The sys_call_ilp32_table array must be 4K aligned to be accessible from
+ * kernel/entry.S.
+ */
+void *sys_call_ilp32_table[__NR_syscalls] __aligned(4096) = {
+	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};