diff mbox series

[v5,04/17] common-user: Move syscall error detection into safe_syscall_base

Message ID 20211117160412.71563-5-richard.henderson@linaro.org
State New
Headers show
Series linux-user: simplify safe signal handling | expand

Commit Message

Richard Henderson Nov. 17, 2021, 4:03 p.m. UTC
The current api from safe_syscall_base() is to return -errno, which is
the interface provided by *some* linux kernel abis.  The wrapper macro,
safe_syscall(), detects error, stores into errno, and returns -1, to
match the api of the system syscall().

For those kernel abis that do not return -errno natively, this leads
to double syscall error detection.  E.g. Linux ppc64, which sets the
SO flag for error.

Simplify the usage from C by moving the error detection into assembly.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/safe-syscall.h                   | 20 +++---
 common-user/host/aarch64/safe-syscall.inc.S | 55 +++++++++-------
 common-user/host/arm/safe-syscall.inc.S     | 58 ++++++++++-------
 common-user/host/i386/safe-syscall.inc.S    | 51 +++++++++------
 common-user/host/ppc64/safe-syscall.inc.S   | 63 +++++++++++--------
 common-user/host/riscv/safe-syscall.inc.S   | 50 +++++++++------
 common-user/host/s390x/safe-syscall.inc.S   | 50 +++++++++------
 common-user/host/x86_64/safe-syscall.inc.S  | 70 ++++++++++++---------
 8 files changed, 243 insertions(+), 174 deletions(-)

Comments

Warner Losh Nov. 17, 2021, 4:28 p.m. UTC | #1
On Wed, Nov 17, 2021 at 9:04 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> The current api from safe_syscall_base() is to return -errno, which is
> the interface provided by *some* linux kernel abis.  The wrapper macro,
> safe_syscall(), detects error, stores into errno, and returns -1, to
> match the api of the system syscall().
>
> For those kernel abis that do not return -errno natively, this leads
> to double syscall error detection.  E.g. Linux ppc64, which sets the
> SO flag for error.
>
> Simplify the usage from C by moving the error detection into assembly.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  linux-user/safe-syscall.h                   | 20 +++---
>  common-user/host/aarch64/safe-syscall.inc.S | 55 +++++++++-------
>  common-user/host/arm/safe-syscall.inc.S     | 58 ++++++++++-------
>  common-user/host/i386/safe-syscall.inc.S    | 51 +++++++++------
>  common-user/host/ppc64/safe-syscall.inc.S   | 63 +++++++++++--------
>  common-user/host/riscv/safe-syscall.inc.S   | 50 +++++++++------
>  common-user/host/s390x/safe-syscall.inc.S   | 50 +++++++++------
>  common-user/host/x86_64/safe-syscall.inc.S  | 70 ++++++++++++---------
>  8 files changed, 243 insertions(+), 174 deletions(-)
>

Reviewed by: Warner Losh <imp@bsdimp.com>

And I think it may fix a bug, when integrated with bsd-user fork,
in PowerPC hosts (though there's other bugs lingering there).


> diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h
> index aaa9ffc0e2..ea0e8a8d24 100644
> --- a/linux-user/safe-syscall.h
> +++ b/linux-user/safe-syscall.h
> @@ -125,23 +125,17 @@
>   * kinds of restartability.
>   */
>  #ifdef HAVE_SAFE_SYSCALL
> -/* The core part of this function is implemented in assembly */
> -extern long safe_syscall_base(int *pending, long number, ...);
> +
> +/* The core part of this function is implemented in assembly. */
> +extern long safe_syscall_base(int *pending, int *errnop, long number,
> ...);
> +
>  /* These are defined by the safe-syscall.inc.S file */
>  extern char safe_syscall_start[];
>  extern char safe_syscall_end[];
>
> -#define safe_syscall(...)                                               \
> -    ({                                                                  \
> -        long ret_;                                                      \
> -        int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
> -        ret_ = safe_syscall_base(psp_, __VA_ARGS__);                    \
> -        if (is_error(ret_)) {                                           \
> -            errno = -ret_;                                              \
> -            ret_ = -1;                                                  \
> -        }                                                               \
> -        ret_;                                                           \
> -    })
> +#define safe_syscall(...)
>  \
> +    safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending,
> \
> +                      &errno, __VA_ARGS__)
>
>  #else
>
> diff --git a/common-user/host/aarch64/safe-syscall.inc.S
> b/common-user/host/aarch64/safe-syscall.inc.S
> index bc1f5a9792..95c60d8609 100644
> --- a/common-user/host/aarch64/safe-syscall.inc.S
> +++ b/common-user/host/aarch64/safe-syscall.inc.S
> @@ -17,22 +17,21 @@
>         .type   safe_syscall_start, #function
>         .type   safe_syscall_end, #function
>
> -       /* This is the entry point for making a system call. The calling
> +       /*
> +        * This is the entry point for making a system call. The calling
>          * convention here is that of a C varargs function with the
>          * first argument an 'int *' to the signal_pending flag, the
>          * second one the system call number (as a 'long'), and all further
>          * arguments being syscall arguments (also 'long').
> -        * We return a long which is the syscall's return value, which
> -        * may be negative-errno on failure. Conversion to the
> -        * -1-and-errno-set convention is done by the calling wrapper.
>          */
>  safe_syscall_base:
>         .cfi_startproc
> -       /* The syscall calling convention isn't the same as the
> -        * C one:
> +       /*
> +         * The syscall calling convention isn't the same as the C one:
>          * we enter with x0 == *signal_pending
> -        *               x1 == syscall number
> -        *               x2 ... x7, (stack) == syscall arguments
> +        *               x1 == errno
> +        *               x2 == syscall number
> +        *               x3 ... x7, (stack) == syscall arguments
>          *               and return the result in x0
>          * and the syscall instruction needs
>          *               x8 == syscall number
> @@ -40,17 +39,18 @@ safe_syscall_base:
>          *               and returns the result in x0
>          * Shuffle everything around appropriately.
>          */
> -       mov     x9, x0          /* signal_pending pointer */
> -       mov     x8, x1          /* syscall number */
> -       mov     x0, x2          /* syscall arguments */
> -       mov     x1, x3
> -       mov     x2, x4
> -       mov     x3, x5
> -       mov     x4, x6
> -       mov     x5, x7
> -       ldr     x6, [sp]
> +       mov     x10, x0         /* signal_pending pointer */
> +       mov     x11, x1         /* errno pointer */
> +       mov     x8, x2          /* syscall number */
> +       mov     x0, x3          /* syscall arguments */
> +       mov     x1, x4
> +       mov     x2, x5
> +       mov     x3, x6
> +       mov     x4, x7
> +       ldp     x5, x6, [sp]
>
> -       /* This next sequence of code works in conjunction with the
> +       /*
> +         * This next sequence of code works in conjunction with the
>          * rewind_if_safe_syscall_function(). If a signal is taken
>          * and the interrupted PC is anywhere between 'safe_syscall_start'
>          * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -59,17 +59,26 @@ safe_syscall_base:
>          */
>  safe_syscall_start:
>         /* if signal_pending is non-zero, don't do the call */
> -       ldr     w10, [x9]
> -       cbnz    w10, 0f
> +       ldr     w9, [x10]
> +       cbnz    w9, 2f
>         svc     0x0
>  safe_syscall_end:
> +
>         /* code path for having successfully executed the syscall */
> +       cmn     x0, #4095
> +       b.cs    1f
>         ret
>
> -0:
> -       /* code path when we didn't execute the syscall */
> -       mov     x0, #-TARGET_ERESTARTSYS
> +       /* code path setting errno */
> +0:     neg     w0, w0                  /* create positive errno */
> +1:     str     w0, [x11]               /* store errno */
> +       mov     x0, #-1
>         ret
> +
> +       /* code path when we didn't execute the syscall */
> +2:     mov     w0, #TARGET_ERESTARTSYS
> +       b       1b
> +
>         .cfi_endproc
>
>         .size   safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/arm/safe-syscall.inc.S
> b/common-user/host/arm/safe-syscall.inc.S
> index 88c4958504..17839c6486 100644
> --- a/common-user/host/arm/safe-syscall.inc.S
> +++ b/common-user/host/arm/safe-syscall.inc.S
> @@ -22,33 +22,35 @@
>         .arm
>         .align 2
>
> -       /* This is the entry point for making a system call. The calling
> +       /*
> +        * This is the entry point for making a system call. The calling
>          * convention here is that of a C varargs function with the
>          * first argument an 'int *' to the signal_pending flag, the
>          * second one the system call number (as a 'long'), and all further
>          * arguments being syscall arguments (also 'long').
> -        * We return a long which is the syscall's return value, which
> -        * may be negative-errno on failure. Conversion to the
> -        * -1-and-errno-set convention is done by the calling wrapper.
>          */
>  safe_syscall_base:
>         .fnstart
>         .cfi_startproc
>         mov     r12, sp                 /* save entry stack */
> -       push    { r4, r5, r6, r7, r8, lr }
> -       .save   { r4, r5, r6, r7, r8, lr }
> -       .cfi_adjust_cfa_offset 24
> +       push    { r4, r5, r6, r7, r8, r9, r10, lr }
> +       .save   { r4, r5, r6, r7, r8, r9, r10, lr }
> +       .cfi_adjust_cfa_offset 32
>         .cfi_rel_offset r4, 0
>         .cfi_rel_offset r5, 4
>         .cfi_rel_offset r6, 8
>         .cfi_rel_offset r7, 12
>         .cfi_rel_offset r8, 16
> -       .cfi_rel_offset lr, 20
> +       .cfi_rel_offset r9, 20
> +       .cfi_rel_offset r10, 24
> +       .cfi_rel_offset lr, 28
>
> -       /* The syscall calling convention isn't the same as the C one:
> -        * we enter with r0 == *signal_pending
> -        *               r1 == syscall number
> -        *               r2, r3, [sp+0] ... [sp+12] == syscall arguments
> +       /*
> +        * The syscall calling convention isn't the same as the C one:
> +        * we enter with r0 == &signal_pending
> +         *               r1 == &errno
> +        *               r2 == syscall number
> +        *               r3, [sp+0] ... [sp+16] == syscall arguments
>          *               and return the result in r0
>          * and the syscall instruction needs
>          *               r7 == syscall number
> @@ -58,12 +60,13 @@ safe_syscall_base:
>          * Note the 16 bytes that we pushed to save registers.
>          */
>         mov     r8, r0                  /* copy signal_pending */
> -       mov     r7, r1                  /* syscall number */
> -       mov     r0, r2                  /* syscall args */
> -       mov     r1, r3
> -       ldm     r12, { r2, r3, r4, r5, r6 }
> +       mov     r9, r1                  /* copy errnop */
> +       mov     r7, r2                  /* syscall number */
> +       mov     r0, r3                  /* syscall args */
> +       ldm     r12, { r1, r2, r3, r4, r5, r6 }
>
> -       /* This next sequence of code works in conjunction with the
> +       /*
> +        * This next sequence of code works in conjunction with the
>          * rewind_if_safe_syscall_function(). If a signal is taken
>          * and the interrupted PC is anywhere between 'safe_syscall_start'
>          * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -74,16 +77,25 @@ safe_syscall_start:
>         /* if signal_pending is non-zero, don't do the call */
>         ldr     r12, [r8]               /* signal_pending */
>         tst     r12, r12
> -       bne     1f
> +       bne     2f
>         swi     0
>  safe_syscall_end:
> -       /* code path for having successfully executed the syscall */
> -       pop     { r4, r5, r6, r7, r8, pc }
>
> -1:
> +       /* code path for having successfully executed the syscall */
> +       cmp     r0, #-4096
> +       bhi     0f
> +9:     pop     { r4, r5, r6, r7, r8, r9, r10, pc }
> +
> +       /* code path setting errno */
> +0:     neg     r0, r0          /* create positive errno */
> +1:     str     r0, [r9]        /* store errno */
> +       mov     r0, #-1
> +       b       9b
> +
>         /* code path when we didn't execute the syscall */
> -       ldr     r0, =-TARGET_ERESTARTSYS
> -       pop     { r4, r5, r6, r7, r8, pc }
> +2:     ldr     r0, =TARGET_ERESTARTSYS
> +       b       1b
> +
>         .fnend
>         .cfi_endproc
>
> diff --git a/common-user/host/i386/safe-syscall.inc.S
> b/common-user/host/i386/safe-syscall.inc.S
> index 9e58fc6504..ad89521783 100644
> --- a/common-user/host/i386/safe-syscall.inc.S
> +++ b/common-user/host/i386/safe-syscall.inc.S
> @@ -15,14 +15,12 @@
>         .global safe_syscall_end
>         .type   safe_syscall_base, @function
>
> -       /* This is the entry point for making a system call. The calling
> +       /*
> +        * This is the entry point for making a system call. The calling
>          * convention here is that of a C varargs function with the
>          * first argument an 'int *' to the signal_pending flag, the
>          * second one the system call number (as a 'long'), and all further
>          * arguments being syscall arguments (also 'long').
> -        * We return a long which is the syscall's return value, which
> -        * may be negative-errno on failure. Conversion to the
> -        * -1-and-errno-set convention is done by the calling wrapper.
>          */
>  safe_syscall_base:
>         .cfi_startproc
> @@ -41,9 +39,10 @@ safe_syscall_base:
>
>         /* The syscall calling convention isn't the same as the C one:
>          * we enter with 0(%esp) == return address
> -        *               4(%esp) == *signal_pending
> -        *               8(%esp) == syscall number
> -        *               12(%esp) ... 32(%esp) == syscall arguments
> +        *               4(%esp) == &signal_pending
> +        *               8(%esp) == &errno
> +        *               12(%esp) == syscall number
> +        *               16(%esp) ... 36(%esp) == syscall arguments
>          *               and return the result in eax
>          * and the syscall instruction needs
>          *               eax == syscall number
> @@ -52,14 +51,15 @@ safe_syscall_base:
>          * Shuffle everything around appropriately.
>          * Note the 16 bytes that we pushed to save registers.
>          */
> -       mov     12+16(%esp), %ebx       /* the syscall arguments */
> -       mov     16+16(%esp), %ecx
> -       mov     20+16(%esp), %edx
> -       mov     24+16(%esp), %esi
> -       mov     28+16(%esp), %edi
> -       mov     32+16(%esp), %ebp
> +       mov     16+16(%esp), %ebx       /* the syscall arguments */
> +       mov     20+16(%esp), %ecx
> +       mov     24+16(%esp), %edx
> +       mov     28+16(%esp), %esi
> +       mov     32+16(%esp), %edi
> +       mov     36+16(%esp), %ebp
>
> -       /* This next sequence of code works in conjunction with the
> +       /*
> +        * This next sequence of code works in conjunction with the
>          * rewind_if_safe_syscall_function(). If a signal is taken
>          * and the interrupted PC is anywhere between 'safe_syscall_start'
>          * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -70,12 +70,16 @@ safe_syscall_start:
>         /* if signal_pending is non-zero, don't do the call */
>         mov     4+16(%esp), %eax        /* signal_pending */
>         cmpl    $0, (%eax)
> -       jnz     1f
> +       jnz     2f
>         mov     8+16(%esp), %eax        /* syscall number */
>         int     $0x80
>  safe_syscall_end:
> +
>         /* code path for having successfully executed the syscall */
> -       pop     %ebx
> +       cmp     $-4095, %eax
> +       jae     0f
> +
> +9:     pop     %ebx
>         .cfi_remember_state
>         .cfi_adjust_cfa_offset -4
>         .cfi_restore ebx
> @@ -90,11 +94,18 @@ safe_syscall_end:
>         .cfi_restore ebp
>         ret
>
> -1:
> -       /* code path when we didn't execute the syscall */
>         .cfi_restore_state
> -       mov     $-TARGET_ERESTARTSYS, %eax
> -       jmp     safe_syscall_end
> +
> +       /* code path setting errno */
> +0:     neg     %eax                    /* create positive errno */
> +1:     mov     8+16(%esp), %ebx        /* load errno pointer */
> +       mov     %eax, (%ebx)            /* store errno */
> +       mov     $-1, %eax
> +       jmp     9b
> +
> +       /* code path when we didn't execute the syscall */
> +2:     mov     $TARGET_ERESTARTSYS, %eax
> +       jmp     1b
>         .cfi_endproc
>
>         .size   safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/ppc64/safe-syscall.inc.S
> b/common-user/host/ppc64/safe-syscall.inc.S
> index 875133173b..e35408c5fb 100644
> --- a/common-user/host/ppc64/safe-syscall.inc.S
> +++ b/common-user/host/ppc64/safe-syscall.inc.S
> @@ -17,14 +17,19 @@
>
>         .text
>
> -       /* This is the entry point for making a system call. The calling
> +#if _CALL_ELF == 2
> +#define PARAM_OFS   32
> +#else
> +#define PARAM_OFS   48
> +#endif
> +#define PARAM(X)    PARAM_OFS + X*8
> +
> +       /*
> +        * This is the entry point for making a system call. The calling
>          * convention here is that of a C varargs function with the
>          * first argument an 'int *' to the signal_pending flag, the
>          * second one the system call number (as a 'long'), and all further
>          * arguments being syscall arguments (also 'long').
> -        * We return a long which is the syscall's return value, which
> -        * may be negative-errno on failure. Conversion to the
> -        * -1-and-errno-set convention is done by the calling wrapper.
>          */
>  #if _CALL_ELF == 2
>  safe_syscall_base:
> @@ -39,9 +44,11 @@ safe_syscall_base:
>  .L.safe_syscall_base:
>         .cfi_startproc
>  #endif
> -       /* We enter with r3 == *signal_pending
> -        *               r4 == syscall number
> -        *               r5 ... r10 == syscall arguments
> +       /*
> +        * We enter with r3 == &signal_pending
> +        *               r4 == &errno
> +        *               r5 == syscall number
> +        *               r6 ... r10, (stack) == syscall arguments
>          *               and return the result in r3
>          * and the syscall instruction needs
>          *               r0 == syscall number
> @@ -49,18 +56,18 @@ safe_syscall_base:
>          *               and returns the result in r3
>          * Shuffle everything around appropriately.
>          */
> -       std     14, 16(1) /* Preserve r14 in SP+16 */
> -       .cfi_offset 14, 16
> -       mr      14, 3   /* signal_pending */
> -       mr      0, 4    /* syscall number */
> -       mr      3, 5    /* syscall arguments */
> -       mr      4, 6
> -       mr      5, 7
> -       mr      6, 8
> -       mr      7, 9
> -       mr      8, 10
> +       mr      11, 3           /* signal_pending pointer */
> +       std     4, PARAM(1)(1)  /* save errno pointer in param slot */
> +       mr      0, 5            /* syscall number */
> +       mr      3, 6            /* syscall arguments */
> +       mr      4, 7
> +       mr      5, 8
> +       mr      6, 9
> +       mr      7, 10
> +       ld      8, PARAM(8)(1)
>
> -       /* This next sequence of code works in conjunction with the
> +       /*
> +        * This next sequence of code works in conjunction with the
>          * rewind_if_safe_syscall_function(). If a signal is taken
>          * and the interrupted PC is anywhere between 'safe_syscall_start'
>          * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -69,23 +76,25 @@ safe_syscall_base:
>          */
>  safe_syscall_start:
>         /* if signal_pending is non-zero, don't do the call */
> -       lwz     12, 0(14)
> +       lwz     12, 0(11)
>         cmpwi   0, 12, 0
>         bne-    0f
>         sc
>  safe_syscall_end:
> -       /* code path when we did execute the syscall */
> -       ld 14, 16(1) /* restore r14 to its original value */
> -       bnslr+
>
> -       /* syscall failed; return negative errno */
> -       neg     3, 3
> +       /* code path for having successfully executed the syscall */
> +       bnslr+                  /* SO set for syscall error */
> +
> +       /* code path setting errno */
> +1:     ld      11, PARAM(1)(1) /* restore errno pointer */
> +       stw     3, 0(11)        /* store errno */
> +       li      3, -1
>         blr
>
>         /* code path when we didn't execute the syscall */
> -0:     addi    3, 0, -TARGET_ERESTARTSYS
> -       ld 14, 16(1) /* restore r14 to its original value */
> -       blr
> +0:     li      3, TARGET_ERESTARTSYS
> +       b       1b
> +
>         .cfi_endproc
>
>  #if _CALL_ELF == 2
> diff --git a/common-user/host/riscv/safe-syscall.inc.S
> b/common-user/host/riscv/safe-syscall.inc.S
> index 9ca3fbfd1e..eddede702b 100644
> --- a/common-user/host/riscv/safe-syscall.inc.S
> +++ b/common-user/host/riscv/safe-syscall.inc.S
> @@ -23,17 +23,15 @@
>          * first argument an 'int *' to the signal_pending flag, the
>          * second one the system call number (as a 'long'), and all further
>          * arguments being syscall arguments (also 'long').
> -        * We return a long which is the syscall's return value, which
> -        * may be negative-errno on failure. Conversion to the
> -        * -1-and-errno-set convention is done by the calling wrapper.
>          */
>  safe_syscall_base:
>         .cfi_startproc
>         /*
>          * The syscall calling convention is nearly the same as C:
> -        * we enter with a0 == *signal_pending
> -        *               a1 == syscall number
> -        *               a2 ... a7 == syscall arguments
> +        * we enter with a0 == &signal_pending
> +        *               a1 == &errno
> +        *               a2 == syscall number
> +        *               a3 ... a7, [sp] == syscall arguments
>          *               and return the result in a0
>          * and the syscall instruction needs
>          *               a7 == syscall number
> @@ -42,14 +40,19 @@ safe_syscall_base:
>          * Shuffle everything around appropriately.
>          */
>         mv      t0, a0          /* signal_pending pointer */
> -       mv      t1, a1          /* syscall number */
> -       mv      a0, a2          /* syscall arguments */
> -       mv      a1, a3
> -       mv      a2, a4
> -       mv      a3, a5
> -       mv      a4, a6
> -       mv      a5, a7
> -       mv      a7, t1
> +       mv      t1, a1          /* errno pointer */
> +       mv      t2, a2          /* syscall number */
> +       mv      a0, a3          /* syscall arguments */
> +       mv      a1, a4
> +       mv      a2, a5
> +       mv      a3, a6
> +       mv      a4, a7
> +#if __riscv_xlen == 32
> +       lw      a5, 0(sp)
> +#else
> +       ld      a5, 0(sp)
> +#endif
> +       mv      a7, t2
>
>         /*
>          * This next sequence of code works in conjunction with the
> @@ -61,17 +64,26 @@ safe_syscall_base:
>          */
>  safe_syscall_start:
>         /* If signal_pending is non-zero, don't do the call */
> -       lw      t1, 0(t0)
> -       bnez    t1, 0f
> +       lw      t2, 0(t0)
> +       bnez    t2, 2f
>         scall
>  safe_syscall_end:
> +
>         /* code path for having successfully executed the syscall */
> +       li      t2, -4096
> +       bgtu    a0, t2, 0f
>         ret
>
> -0:
> -       /* code path when we didn't execute the syscall */
> -       li      a0, -TARGET_ERESTARTSYS
> +       /* code path setting errno */
> +0:     neg     a0, a0                  /* create positive errno */
> +1:     sw      a0, 0(t1)               /* store errno */
> +       li      a0, -1
>         ret
> +
> +       /* code path when we didn't execute the syscall */
> +2:     li      a0, TARGET_ERESTARTSYS
> +       j       1b
> +
>         .cfi_endproc
>
>         .size   safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/s390x/safe-syscall.inc.S
> b/common-user/host/s390x/safe-syscall.inc.S
> index 414b44ad38..f2a3bccc13 100644
> --- a/common-user/host/s390x/safe-syscall.inc.S
> +++ b/common-user/host/s390x/safe-syscall.inc.S
> @@ -15,14 +15,12 @@
>         .global safe_syscall_end
>         .type   safe_syscall_base, @function
>
> -       /* This is the entry point for making a system call. The calling
> +       /*
> +        * This is the entry point for making a system call. The calling
>          * convention here is that of a C varargs function with the
>          * first argument an 'int *' to the signal_pending flag, the
>          * second one the system call number (as a 'long'), and all further
>          * arguments being syscall arguments (also 'long').
> -        * We return a long which is the syscall's return value, which
> -        * may be negative-errno on failure. Conversion to the
> -        * -1-and-errno-set convention is done by the calling wrapper.
>          */
>  safe_syscall_base:
>         .cfi_startproc
> @@ -44,11 +42,13 @@ safe_syscall_base:
>         stg     %r1,0(%r15)             /* store back chain */
>         stg     %r0,8(%r15)             /* store eos */
>
> -       /* The syscall calling convention isn't the same as the
> +       /*
> +        * The syscall calling convention isn't the same as the
>          * C one:
> -        * we enter with r2 == *signal_pending
> -        *               r3 == syscall number
> -        *               r4, r5, r6, (stack) == syscall arguments
> +        * we enter with r2 == &signal_pending
> +        *               r3 == &errno
> +        *               r4 == syscall number
> +        *               r5, r6, (stack) == syscall arguments
>          *               and return the result in r2
>          * and the syscall instruction needs
>          *               r1 == syscall number
> @@ -57,13 +57,14 @@ safe_syscall_base:
>          * Shuffle everything around appropriately.
>          */
>         lgr     %r8,%r2                 /* signal_pending pointer */
> -       lgr     %r1,%r3                 /* syscall number */
> -       lgr     %r2,%r4                 /* syscall args */
> -       lgr     %r3,%r5
> -       lgr     %r4,%r6
> -       lmg     %r5,%r7,320(%r15)
> +       lgr     %r9,%r3                 /* errno pointer */
> +       lgr     %r1,%r4                 /* syscall number */
> +       lgr     %r2,%r5                 /* syscall args */
> +       lgr     %r3,%r6
> +       lmg     %r4,%r7,320(%r15)
>
> -       /* This next sequence of code works in conjunction with the
> +       /*
> +        * This next sequence of code works in conjunction with the
>          * rewind_if_safe_syscall_function(). If a signal is taken
>          * and the interrupted PC is anywhere between 'safe_syscall_start'
>          * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -73,18 +74,31 @@ safe_syscall_base:
>  safe_syscall_start:
>         /* if signal_pending is non-zero, don't do the call */
>         icm     %r0,15,0(%r8)
> -       jne     2f
> +       jne     1f
>         svc     0
>  safe_syscall_end:
>
> -1:     lg      %r15,0(%r15)            /* load back chain */
> +       /* code path for having successfully executed the syscall */
> +       lghi    %r0, -4095              /* check for syscall error */
> +       clgr    %r2, %r0
> +       jgnl    0f
> +
> +9:     lg      %r15,0(%r15)            /* load back chain */
>         .cfi_remember_state
>         .cfi_adjust_cfa_offset -160
>         lmg     %r6,%r15,48(%r15)       /* load saved registers */
>         br      %r14
>         .cfi_restore_state
> -2:     lghi    %r2, -TARGET_ERESTARTSYS
> -       j       1b
> +
> +       /* code path when we didn't execute the syscall */
> +1:     lghi    %r2, -TARGET_ERESTARTSYS
> +
> +       /* code path setting errno */
> +0:     lcr     %r2, %r2                /* create positive errno */
> +       st      %r2, 0(%r9)             /* store errno */
> +       lghi    %r2, -1
> +       j       9b
> +
>         .cfi_endproc
>
>         .size   safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/x86_64/safe-syscall.inc.S
> b/common-user/host/x86_64/safe-syscall.inc.S
> index f36992daa3..9a0c4c93b4 100644
> --- a/common-user/host/x86_64/safe-syscall.inc.S
> +++ b/common-user/host/x86_64/safe-syscall.inc.S
> @@ -14,18 +14,17 @@
>          .global safe_syscall_end
>          .type   safe_syscall_base, @function
>
> -        /* This is the entry point for making a system call. The calling
> +        /*
> +         * This is the entry point for making a system call. The calling
>           * convention here is that of a C varargs function with the
>           * first argument an 'int *' to the signal_pending flag, the
>           * second one the system call number (as a 'long'), and all
> further
>           * arguments being syscall arguments (also 'long').
> -         * We return a long which is the syscall's return value, which
> -         * may be negative-errno on failure. Conversion to the
> -         * -1-and-errno-set convention is done by the calling wrapper.
>           */
>  safe_syscall_base:
>          .cfi_startproc
> -        /* This saves a frame pointer and aligns the stack for the
> syscall.
> +        /*
> +         * This saves a frame pointer and aligns the stack for the
> syscall.
>           * (It's unclear if the syscall ABI has the same stack alignment
>           * requirements as the userspace function call ABI, but better
> safe than
>           * sorry. Appendix A2 of
> http://www.x86-64.org/documentation/abi.pdf
> @@ -35,11 +34,12 @@ safe_syscall_base:
>          .cfi_adjust_cfa_offset 8
>          .cfi_rel_offset rbp, 0
>
> -        /* The syscall calling convention isn't the same as the
> -         * C one:
> -         * we enter with rdi == *signal_pending
> -         *               rsi == syscall number
> -         *               rdx, rcx, r8, r9, (stack), (stack) == syscall
> arguments
> +        /*
> +         * The syscall calling convention isn't the same as the C one:
> +         * we enter with rdi == &signal_pending
> +         *               rsi == &errno
> +         *               rdx == syscall number
> +         *               rcx, r8, r9, (stack...) == syscall arguments
>           *               and return the result in rax
>           * and the syscall instruction needs
>           *               rax == syscall number
> @@ -48,17 +48,19 @@ safe_syscall_base:
>           * Shuffle everything around appropriately.
>           * Note that syscall will trash rcx and r11.
>           */
> -        mov     %rsi, %rax /* syscall number */
> -        mov     %rdi, %rbp /* signal_pending pointer */
> +        mov     %rdi, %r11 /* signal_pending pointer */
> +        mov     %rsi, %rbp /* errno pointer */
> +        mov     %rdx, %rax /* syscall number */
>          /* and the syscall arguments */
> -        mov     %rdx, %rdi
> -        mov     %rcx, %rsi
> -        mov     %r8,  %rdx
> -        mov     %r9,  %r10
> -        mov     16(%rsp), %r8
> -        mov     24(%rsp), %r9
> +        mov     %rcx, %rdi
> +        mov     %r8, %rsi
> +        mov     %r9,  %rdx
> +        mov     16(%rsp), %r10
> +        mov     24(%rsp), %r8
> +        mov     32(%rsp), %r9
>
> -        /* This next sequence of code works in conjunction with the
> +        /*
> +         * This next sequence of code works in conjunction with the
>           * rewind_if_safe_syscall_function(). If a signal is taken
>           * and the interrupted PC is anywhere between 'safe_syscall_start'
>           * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -67,25 +69,31 @@ safe_syscall_base:
>           */
>  safe_syscall_start:
>          /* if signal_pending is non-zero, don't do the call */
> -        cmpl   $0, (%rbp)
> -        jnz     1f
> +        cmpl    $0, (%r11)
> +        jnz     2f
>          syscall
>  safe_syscall_end:
> +
>          /* code path for having successfully executed the syscall */
> -        pop     %rbp
> +        cmp    $-4095, %rax
> +        jae    0f
> +
> +9:      pop     %rbp
>          .cfi_remember_state
>          .cfi_def_cfa_offset 8
>          .cfi_restore rbp
>          ret
> -
> -1:
> -        /* code path when we didn't execute the syscall */
>          .cfi_restore_state
> -        mov     $-TARGET_ERESTARTSYS, %rax
> -        pop     %rbp
> -        .cfi_def_cfa_offset 8
> -        .cfi_restore rbp
> -        ret
> -        .cfi_endproc
>
> +        /* code path setting errno */
> +0:      neg    %eax                    /* create positive errno */
> +1:      mov    %eax, (%rbp)            /* store errno */
> +        mov    $-1, %rax
> +        jmp    9b
> +
> +        /* code path when we didn't execute the syscall */
> +2:      mov     $TARGET_ERESTARTSYS, %eax
> +        jmp    1b
> +
> +        .cfi_endproc
>          .size   safe_syscall_base, .-safe_syscall_base
> --
> 2.25.1
>
>
Peter Maydell Nov. 22, 2021, 11:55 a.m. UTC | #2
On Wed, 17 Nov 2021 at 16:12, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The current api from safe_syscall_base() is to return -errno, which is
> the interface provided by *some* linux kernel abis.  The wrapper macro,
> safe_syscall(), detects error, stores into errno, and returns -1, to
> match the api of the system syscall().
>
> For those kernel abis that do not return -errno natively, this leads
> to double syscall error detection.  E.g. Linux ppc64, which sets the
> SO flag for error.
>
> Simplify the usage from C by moving the error detection into assembly.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  linux-user/safe-syscall.h                   | 20 +++---
>  common-user/host/aarch64/safe-syscall.inc.S | 55 +++++++++-------
>  common-user/host/arm/safe-syscall.inc.S     | 58 ++++++++++-------
>  common-user/host/i386/safe-syscall.inc.S    | 51 +++++++++------
>  common-user/host/ppc64/safe-syscall.inc.S   | 63 +++++++++++--------
>  common-user/host/riscv/safe-syscall.inc.S   | 50 +++++++++------
>  common-user/host/s390x/safe-syscall.inc.S   | 50 +++++++++------
>  common-user/host/x86_64/safe-syscall.inc.S  | 70 ++++++++++++---------
>  8 files changed, 243 insertions(+), 174 deletions(-)
>
> diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h
> index aaa9ffc0e2..ea0e8a8d24 100644
> --- a/linux-user/safe-syscall.h
> +++ b/linux-user/safe-syscall.h
> @@ -125,23 +125,17 @@
>   * kinds of restartability.
>   */
>  #ifdef HAVE_SAFE_SYSCALL
> -/* The core part of this function is implemented in assembly */
> -extern long safe_syscall_base(int *pending, long number, ...);
> +
> +/* The core part of this function is implemented in assembly. */
> +extern long safe_syscall_base(int *pending, int *errnop, long number, ...);
> +
>  /* These are defined by the safe-syscall.inc.S file */
>  extern char safe_syscall_start[];
>  extern char safe_syscall_end[];
>
> -#define safe_syscall(...)                                               \
> -    ({                                                                  \
> -        long ret_;                                                      \
> -        int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
> -        ret_ = safe_syscall_base(psp_, __VA_ARGS__);                    \
> -        if (is_error(ret_)) {                                           \
> -            errno = -ret_;                                              \
> -            ret_ = -1;                                                  \
> -        }                                                               \
> -        ret_;                                                           \
> -    })
> +#define safe_syscall(...)                                                 \
> +    safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending, \
> +                      &errno, __VA_ARGS__)
>
>  #else
>
> diff --git a/common-user/host/aarch64/safe-syscall.inc.S b/common-user/host/aarch64/safe-syscall.inc.S
> index bc1f5a9792..95c60d8609 100644
> --- a/common-user/host/aarch64/safe-syscall.inc.S
> +++ b/common-user/host/aarch64/safe-syscall.inc.S
> @@ -17,22 +17,21 @@
>         .type   safe_syscall_start, #function
>         .type   safe_syscall_end, #function
>
> -       /* This is the entry point for making a system call. The calling
> +       /*
> +        * This is the entry point for making a system call. The calling
>          * convention here is that of a C varargs function with the
>          * first argument an 'int *' to the signal_pending flag, the
>          * second one the system call number (as a 'long'), and all further
>          * arguments being syscall arguments (also 'long').

This comment text needs updating to mention the new errnop argument.
(Applies to all the similar comments in the files for the other archs.)

> -        * We return a long which is the syscall's return value, which
> -        * may be negative-errno on failure. Conversion to the
> -        * -1-and-errno-set convention is done by the calling wrapper.
>          */
>  safe_syscall_base:
>         .cfi_startproc
> -       /* The syscall calling convention isn't the same as the
> -        * C one:
> +       /*
> +         * The syscall calling convention isn't the same as the C one:

Looks like the indent here is wrong ?

>          * we enter with x0 == *signal_pending
> -        *               x1 == syscall number
> -        *               x2 ... x7, (stack) == syscall arguments
> +        *               x1 == errno

"int* address of errno"

> +        *               x2 == syscall number
> +        *               x3 ... x7, (stack) == syscall arguments
>          *               and return the result in x0
>          * and the syscall instruction needs
>          *               x8 == syscall number
> @@ -40,17 +39,18 @@ safe_syscall_base:
>          *               and returns the result in x0
>          * Shuffle everything around appropriately.
>          */
> -       mov     x9, x0          /* signal_pending pointer */
> -       mov     x8, x1          /* syscall number */
> -       mov     x0, x2          /* syscall arguments */
> -       mov     x1, x3
> -       mov     x2, x4
> -       mov     x3, x5
> -       mov     x4, x6
> -       mov     x5, x7
> -       ldr     x6, [sp]
> +       mov     x10, x0         /* signal_pending pointer */
> +       mov     x11, x1         /* errno pointer */
> +       mov     x8, x2          /* syscall number */
> +       mov     x0, x3          /* syscall arguments */
> +       mov     x1, x4
> +       mov     x2, x5
> +       mov     x3, x6
> +       mov     x4, x7
> +       ldp     x5, x6, [sp]
>
> -       /* This next sequence of code works in conjunction with the
> +       /*
> +         * This next sequence of code works in conjunction with the
>          * rewind_if_safe_syscall_function(). If a signal is taken
>          * and the interrupted PC is anywhere between 'safe_syscall_start'
>          * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
> @@ -59,17 +59,26 @@ safe_syscall_base:
>          */
>  safe_syscall_start:
>         /* if signal_pending is non-zero, don't do the call */
> -       ldr     w10, [x9]
> -       cbnz    w10, 0f
> +       ldr     w9, [x10]
> +       cbnz    w9, 2f
>         svc     0x0
>  safe_syscall_end:
> +
>         /* code path for having successfully executed the syscall */
> +       cmn     x0, #4095
> +       b.cs    1f

Shouldn't this be going to label 0f ? We need to do the 'neg',
and unless I'm misreading the diff there's currently no path
of execution that gets to that.

Alternatively, branch on the opposite-sense condition to the
'ret' after the set-errno stuff.

>         ret
>
> -0:
> -       /* code path when we didn't execute the syscall */
> -       mov     x0, #-TARGET_ERESTARTSYS
> +       /* code path setting errno */
> +0:     neg     w0, w0                  /* create positive errno */
> +1:     str     w0, [x11]               /* store errno */
> +       mov     x0, #-1
>         ret
> +
> +       /* code path when we didn't execute the syscall */
> +2:     mov     w0, #TARGET_ERESTARTSYS
> +       b       1b
> +
>         .cfi_endproc
>
>         .size   safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/arm/safe-syscall.inc.S b/common-user/host/arm/safe-syscall.inc.S
> index 88c4958504..17839c6486 100644
> --- a/common-user/host/arm/safe-syscall.inc.S
> +++ b/common-user/host/arm/safe-syscall.inc.S
> @@ -22,33 +22,35 @@
>         .arm
>         .align 2
>
> -       /* This is the entry point for making a system call. The calling
> +       /*
> +        * This is the entry point for making a system call. The calling
>          * convention here is that of a C varargs function with the
>          * first argument an 'int *' to the signal_pending flag, the
>          * second one the system call number (as a 'long'), and all further
>          * arguments being syscall arguments (also 'long').
> -        * We return a long which is the syscall's return value, which
> -        * may be negative-errno on failure. Conversion to the
> -        * -1-and-errno-set convention is done by the calling wrapper.
>          */
>  safe_syscall_base:
>         .fnstart
>         .cfi_startproc
>         mov     r12, sp                 /* save entry stack */
> -       push    { r4, r5, r6, r7, r8, lr }
> -       .save   { r4, r5, r6, r7, r8, lr }
> -       .cfi_adjust_cfa_offset 24
> +       push    { r4, r5, r6, r7, r8, r9, r10, lr }
> +       .save   { r4, r5, r6, r7, r8, r9, r10, lr }
> +       .cfi_adjust_cfa_offset 32
>         .cfi_rel_offset r4, 0
>         .cfi_rel_offset r5, 4
>         .cfi_rel_offset r6, 8
>         .cfi_rel_offset r7, 12
>         .cfi_rel_offset r8, 16
> -       .cfi_rel_offset lr, 20
> +       .cfi_rel_offset r9, 20
> +       .cfi_rel_offset r10, 24
> +       .cfi_rel_offset lr, 28
>
> -       /* The syscall calling convention isn't the same as the C one:
> -        * we enter with r0 == *signal_pending
> -        *               r1 == syscall number
> -        *               r2, r3, [sp+0] ... [sp+12] == syscall arguments
> +       /*
> +        * The syscall calling convention isn't the same as the C one:
> +        * we enter with r0 == &signal_pending
> +         *               r1 == &errno

Odd indent ?

> +        *               r2 == syscall number
> +        *               r3, [sp+0] ... [sp+16] == syscall arguments
>          *               and return the result in r0

Don't we wind up with a potential issue here with 64-bit arguments
due to the calling convention wanting to put those in aligned
memory/register locations? Previously because we had just two
extra arguments the arguments started at r2 and had the same
alignment behaviour as the syscall wants for them starting at
r0; but now we start at r3 so if for instance the first argument
is 64-bit it will be in [sp+0][sp+4] but should go in r0:r1
I think...

(Stopped reviewing here because if we need to change the
way we call these functions there's no point my reviewing
the fine detail of the asm.)

-- PMM
Richard Henderson Nov. 22, 2021, 12:21 p.m. UTC | #3
On 11/22/21 12:55 PM, Peter Maydell wrote:
>> -       /* This is the entry point for making a system call. The calling
>> +       /*
>> +        * This is the entry point for making a system call. The calling
>>           * convention here is that of a C varargs function with the
>>           * first argument an 'int *' to the signal_pending flag, the
>>           * second one the system call number (as a 'long'), and all further
>>           * arguments being syscall arguments (also 'long').
> 
> This comment text needs updating to mention the new errnop argument.
> (Applies to all the similar comments in the files for the other archs.)

Yep.

>> +       /*
>> +         * The syscall calling convention isn't the same as the C one:
> 
> Looks like the indent here is wrong ?

Irritatingly, these files are a mix of tabs/spaces.

>>           * we enter with x0 == *signal_pending
>> -        *               x1 == syscall number
>> -        *               x2 ... x7, (stack) == syscall arguments
>> +        *               x1 == errno
> 
> "int* address of errno"

Arg, fixed some of these, but clearly. not all.

>>          /* code path for having successfully executed the syscall */
>> +       cmn     x0, #4095
>> +       b.cs    1f
> 
> Shouldn't this be going to label 0f ? We need to do the 'neg',
> and unless I'm misreading the diff there's currently no path
> of execution that gets to that.

Oops, rebase error, where the fix landed in the next patch.

>> +        *               r2 == syscall number
>> +        *               r3, [sp+0] ... [sp+16] == syscall arguments
>>           *               and return the result in r0
> 
> Don't we wind up with a potential issue here with 64-bit arguments
> due to the calling convention wanting to put those in aligned
> memory/register locations? Previously because we had just two
> extra arguments the arguments started at r2 and had the same
> alignment behaviour as the syscall wants for them starting at
> r0; but now we start at r3 so if for instance the first argument
> is 64-bit it will be in [sp+0][sp+4] but should go in r0:r1
> I think...
> 
> (Stopped reviewing here because if we need to change the
> way we call these functions there's no point my reviewing
> the fine detail of the asm.)

Oof.  I missed that detail.  Yes, that is a problem (I think arm is the only such 
supported host).  I think the best solution would be to *not* pass in &errno, but to have 
the assembly tail-call to

long safe_syscall_errno_tail(int value)
{
     errno = value;
     return -1;
}

Which is probably more efficient in any case.  I'll re-work this.


r~
Peter Maydell Nov. 22, 2021, 3:07 p.m. UTC | #4
On Mon, 22 Nov 2021 at 12:21, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 11/22/21 12:55 PM, Peter Maydell wrote:
> >> +       /*
> >> +         * The syscall calling convention isn't the same as the C one:
> >
> > Looks like the indent here is wrong ?
>
> Irritatingly, these files are a mix of tabs/spaces.

Hmm, so they are; I wonder how we let that slip in. Maybe do
a set of preparatory patches doing a mechanical tab-to-space
conversion?

-- PMM
diff mbox series

Patch

diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h
index aaa9ffc0e2..ea0e8a8d24 100644
--- a/linux-user/safe-syscall.h
+++ b/linux-user/safe-syscall.h
@@ -125,23 +125,17 @@ 
  * kinds of restartability.
  */
 #ifdef HAVE_SAFE_SYSCALL
-/* The core part of this function is implemented in assembly */
-extern long safe_syscall_base(int *pending, long number, ...);
+
+/* The core part of this function is implemented in assembly. */
+extern long safe_syscall_base(int *pending, int *errnop, long number, ...);
+
 /* These are defined by the safe-syscall.inc.S file */
 extern char safe_syscall_start[];
 extern char safe_syscall_end[];
 
-#define safe_syscall(...)                                               \
-    ({                                                                  \
-        long ret_;                                                      \
-        int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
-        ret_ = safe_syscall_base(psp_, __VA_ARGS__);                    \
-        if (is_error(ret_)) {                                           \
-            errno = -ret_;                                              \
-            ret_ = -1;                                                  \
-        }                                                               \
-        ret_;                                                           \
-    })
+#define safe_syscall(...)                                                 \
+    safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending, \
+                      &errno, __VA_ARGS__)
 
 #else
 
diff --git a/common-user/host/aarch64/safe-syscall.inc.S b/common-user/host/aarch64/safe-syscall.inc.S
index bc1f5a9792..95c60d8609 100644
--- a/common-user/host/aarch64/safe-syscall.inc.S
+++ b/common-user/host/aarch64/safe-syscall.inc.S
@@ -17,22 +17,21 @@ 
 	.type	safe_syscall_start, #function
 	.type	safe_syscall_end, #function
 
-	/* This is the entry point for making a system call. The calling
+	/*
+	 * This is the entry point for making a system call. The calling
 	 * convention here is that of a C varargs function with the
 	 * first argument an 'int *' to the signal_pending flag, the
 	 * second one the system call number (as a 'long'), and all further
 	 * arguments being syscall arguments (also 'long').
-	 * We return a long which is the syscall's return value, which
-	 * may be negative-errno on failure. Conversion to the
-	 * -1-and-errno-set convention is done by the calling wrapper.
 	 */
 safe_syscall_base:
 	.cfi_startproc
-	/* The syscall calling convention isn't the same as the
-	 * C one:
+	/*
+         * The syscall calling convention isn't the same as the C one:
 	 * we enter with x0 == *signal_pending
-	 *               x1 == syscall number
-	 *               x2 ... x7, (stack) == syscall arguments
+	 *               x1 == errno
+	 *               x2 == syscall number
+	 *               x3 ... x7, (stack) == syscall arguments
 	 *               and return the result in x0
 	 * and the syscall instruction needs
 	 *               x8 == syscall number
@@ -40,17 +39,18 @@  safe_syscall_base:
 	 *               and returns the result in x0
 	 * Shuffle everything around appropriately.
 	 */
-	mov	x9, x0		/* signal_pending pointer */
-	mov	x8, x1		/* syscall number */
-	mov	x0, x2		/* syscall arguments */
-	mov	x1, x3
-	mov	x2, x4
-	mov	x3, x5
-	mov	x4, x6
-	mov	x5, x7
-	ldr	x6, [sp]
+	mov	x10, x0		/* signal_pending pointer */
+	mov	x11, x1		/* errno pointer */
+	mov	x8, x2		/* syscall number */
+	mov	x0, x3		/* syscall arguments */
+	mov	x1, x4
+	mov	x2, x5
+	mov	x3, x6
+	mov	x4, x7
+	ldp	x5, x6, [sp]
 
-	/* This next sequence of code works in conjunction with the
+	/*
+         * This next sequence of code works in conjunction with the
 	 * rewind_if_safe_syscall_function(). If a signal is taken
 	 * and the interrupted PC is anywhere between 'safe_syscall_start'
 	 * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -59,17 +59,26 @@  safe_syscall_base:
 	 */
 safe_syscall_start:
 	/* if signal_pending is non-zero, don't do the call */
-	ldr	w10, [x9]
-	cbnz	w10, 0f 
+	ldr	w9, [x10]
+	cbnz	w9, 2f
 	svc	0x0
 safe_syscall_end:
+
 	/* code path for having successfully executed the syscall */
+	cmn	x0, #4095
+	b.cs	1f
 	ret
 
-0:
-	/* code path when we didn't execute the syscall */
-	mov	x0, #-TARGET_ERESTARTSYS
+	/* code path setting errno */
+0:	neg	w0, w0			/* create positive errno */
+1:	str	w0, [x11]		/* store errno */
+	mov	x0, #-1
 	ret
+
+	/* code path when we didn't execute the syscall */
+2:	mov	w0, #TARGET_ERESTARTSYS
+	b	1b
+
 	.cfi_endproc
 
 	.size	safe_syscall_base, .-safe_syscall_base
diff --git a/common-user/host/arm/safe-syscall.inc.S b/common-user/host/arm/safe-syscall.inc.S
index 88c4958504..17839c6486 100644
--- a/common-user/host/arm/safe-syscall.inc.S
+++ b/common-user/host/arm/safe-syscall.inc.S
@@ -22,33 +22,35 @@ 
 	.arm
 	.align 2
 
-	/* This is the entry point for making a system call. The calling
+	/*
+	 * This is the entry point for making a system call. The calling
 	 * convention here is that of a C varargs function with the
 	 * first argument an 'int *' to the signal_pending flag, the
 	 * second one the system call number (as a 'long'), and all further
 	 * arguments being syscall arguments (also 'long').
-	 * We return a long which is the syscall's return value, which
-	 * may be negative-errno on failure. Conversion to the
-	 * -1-and-errno-set convention is done by the calling wrapper.
 	 */
 safe_syscall_base:
 	.fnstart
 	.cfi_startproc
 	mov	r12, sp			/* save entry stack */
-	push	{ r4, r5, r6, r7, r8, lr }
-	.save	{ r4, r5, r6, r7, r8, lr }
-	.cfi_adjust_cfa_offset 24
+	push	{ r4, r5, r6, r7, r8, r9, r10, lr }
+	.save	{ r4, r5, r6, r7, r8, r9, r10, lr }
+	.cfi_adjust_cfa_offset 32
 	.cfi_rel_offset r4, 0
 	.cfi_rel_offset r5, 4
 	.cfi_rel_offset r6, 8
 	.cfi_rel_offset r7, 12
 	.cfi_rel_offset r8, 16
-	.cfi_rel_offset lr, 20
+	.cfi_rel_offset r9, 20
+	.cfi_rel_offset r10, 24
+	.cfi_rel_offset lr, 28
 
-	/* The syscall calling convention isn't the same as the C one:
-	 * we enter with r0 == *signal_pending
-	 *               r1 == syscall number
-	 *               r2, r3, [sp+0] ... [sp+12] == syscall arguments
+	/*
+	 * The syscall calling convention isn't the same as the C one:
+	 * we enter with r0 == &signal_pending
+         *               r1 == &errno
+	 *               r2 == syscall number
+	 *               r3, [sp+0] ... [sp+16] == syscall arguments
 	 *               and return the result in r0
 	 * and the syscall instruction needs
 	 *               r7 == syscall number
@@ -58,12 +60,13 @@  safe_syscall_base:
 	 * Note the 16 bytes that we pushed to save registers.
 	 */
 	mov	r8, r0			/* copy signal_pending */
-	mov	r7, r1			/* syscall number */
-	mov	r0, r2			/* syscall args */
-	mov	r1, r3
-	ldm	r12, { r2, r3, r4, r5, r6 }
+	mov	r9, r1			/* copy errnop */
+	mov	r7, r2			/* syscall number */
+	mov	r0, r3			/* syscall args */
+	ldm	r12, { r1, r2, r3, r4, r5, r6 }
 
-	/* This next sequence of code works in conjunction with the
+	/*
+	 * This next sequence of code works in conjunction with the
 	 * rewind_if_safe_syscall_function(). If a signal is taken
 	 * and the interrupted PC is anywhere between 'safe_syscall_start'
 	 * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -74,16 +77,25 @@  safe_syscall_start:
 	/* if signal_pending is non-zero, don't do the call */
 	ldr	r12, [r8]		/* signal_pending */
 	tst	r12, r12
-	bne	1f
+	bne	2f
 	swi	0
 safe_syscall_end:
-	/* code path for having successfully executed the syscall */
-	pop	{ r4, r5, r6, r7, r8, pc }
 
-1:
+	/* code path for having successfully executed the syscall */
+	cmp	r0, #-4096
+	bhi	0f
+9:	pop	{ r4, r5, r6, r7, r8, r9, r10, pc }
+
+	/* code path setting errno */
+0:	neg	r0, r0		/* create positive errno */
+1:	str	r0, [r9]	/* store errno */
+	mov	r0, #-1
+	b	9b
+
 	/* code path when we didn't execute the syscall */
-	ldr	r0, =-TARGET_ERESTARTSYS
-	pop	{ r4, r5, r6, r7, r8, pc }
+2:	ldr	r0, =TARGET_ERESTARTSYS
+	b	1b
+
 	.fnend
 	.cfi_endproc
 
diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S
index 9e58fc6504..ad89521783 100644
--- a/common-user/host/i386/safe-syscall.inc.S
+++ b/common-user/host/i386/safe-syscall.inc.S
@@ -15,14 +15,12 @@ 
 	.global safe_syscall_end
 	.type	safe_syscall_base, @function
 
-	/* This is the entry point for making a system call. The calling
+	/*
+	 * This is the entry point for making a system call. The calling
 	 * convention here is that of a C varargs function with the
 	 * first argument an 'int *' to the signal_pending flag, the
 	 * second one the system call number (as a 'long'), and all further
 	 * arguments being syscall arguments (also 'long').
-	 * We return a long which is the syscall's return value, which
-	 * may be negative-errno on failure. Conversion to the
-	 * -1-and-errno-set convention is done by the calling wrapper.
 	 */
 safe_syscall_base:
 	.cfi_startproc
@@ -41,9 +39,10 @@  safe_syscall_base:
 
 	/* The syscall calling convention isn't the same as the C one:
 	 * we enter with 0(%esp) == return address
-	 *               4(%esp) == *signal_pending
-	 *               8(%esp) == syscall number
-	 *               12(%esp) ... 32(%esp) == syscall arguments
+	 *               4(%esp) == &signal_pending
+	 *               8(%esp) == &errno
+	 *               12(%esp) == syscall number
+	 *               16(%esp) ... 36(%esp) == syscall arguments
 	 *               and return the result in eax
 	 * and the syscall instruction needs
 	 *               eax == syscall number
@@ -52,14 +51,15 @@  safe_syscall_base:
 	 * Shuffle everything around appropriately.
 	 * Note the 16 bytes that we pushed to save registers.
 	 */
-	mov	12+16(%esp), %ebx	/* the syscall arguments */
-	mov	16+16(%esp), %ecx
-	mov	20+16(%esp), %edx
-	mov	24+16(%esp), %esi
-	mov	28+16(%esp), %edi
-	mov	32+16(%esp), %ebp
+	mov	16+16(%esp), %ebx	/* the syscall arguments */
+	mov	20+16(%esp), %ecx
+	mov	24+16(%esp), %edx
+	mov	28+16(%esp), %esi
+	mov	32+16(%esp), %edi
+	mov	36+16(%esp), %ebp
 
-	/* This next sequence of code works in conjunction with the
+	/*
+	 * This next sequence of code works in conjunction with the
 	 * rewind_if_safe_syscall_function(). If a signal is taken
 	 * and the interrupted PC is anywhere between 'safe_syscall_start'
 	 * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -70,12 +70,16 @@  safe_syscall_start:
 	/* if signal_pending is non-zero, don't do the call */
 	mov	4+16(%esp), %eax	/* signal_pending */
 	cmpl	$0, (%eax)
-	jnz	1f
+	jnz	2f
 	mov	8+16(%esp), %eax	/* syscall number */
 	int	$0x80
 safe_syscall_end:
+
 	/* code path for having successfully executed the syscall */
-	pop	%ebx
+	cmp	$-4095, %eax
+	jae	0f
+
+9:	pop	%ebx
 	.cfi_remember_state
 	.cfi_adjust_cfa_offset -4
 	.cfi_restore ebx
@@ -90,11 +94,18 @@  safe_syscall_end:
 	.cfi_restore ebp
 	ret
 
-1:
-	/* code path when we didn't execute the syscall */
 	.cfi_restore_state
-	mov	$-TARGET_ERESTARTSYS, %eax
-	jmp	safe_syscall_end
+
+	/* code path setting errno */
+0:	neg	%eax			/* create positive errno */
+1:	mov	8+16(%esp), %ebx	/* load errno pointer */
+	mov	%eax, (%ebx)		/* store errno */
+	mov	$-1, %eax
+	jmp	9b
+
+	/* code path when we didn't execute the syscall */
+2:	mov	$TARGET_ERESTARTSYS, %eax
+	jmp	1b
 	.cfi_endproc
 
 	.size	safe_syscall_base, .-safe_syscall_base
diff --git a/common-user/host/ppc64/safe-syscall.inc.S b/common-user/host/ppc64/safe-syscall.inc.S
index 875133173b..e35408c5fb 100644
--- a/common-user/host/ppc64/safe-syscall.inc.S
+++ b/common-user/host/ppc64/safe-syscall.inc.S
@@ -17,14 +17,19 @@ 
 
 	.text
 
-	/* This is the entry point for making a system call. The calling
+#if _CALL_ELF == 2
+#define PARAM_OFS   32
+#else
+#define PARAM_OFS   48
+#endif
+#define PARAM(X)    PARAM_OFS + X*8
+
+	/*
+	 * This is the entry point for making a system call. The calling
 	 * convention here is that of a C varargs function with the
 	 * first argument an 'int *' to the signal_pending flag, the
 	 * second one the system call number (as a 'long'), and all further
 	 * arguments being syscall arguments (also 'long').
-	 * We return a long which is the syscall's return value, which
-	 * may be negative-errno on failure. Conversion to the
-	 * -1-and-errno-set convention is done by the calling wrapper.
 	 */
 #if _CALL_ELF == 2
 safe_syscall_base:
@@ -39,9 +44,11 @@  safe_syscall_base:
 .L.safe_syscall_base:
 	.cfi_startproc
 #endif
-	/* We enter with r3 == *signal_pending
-	 *               r4 == syscall number
-	 *               r5 ... r10 == syscall arguments
+	/*
+	 * We enter with r3 == &signal_pending
+	 *               r4 == &errno
+	 *               r5 == syscall number
+	 *               r6 ... r10, (stack) == syscall arguments
 	 *               and return the result in r3
 	 * and the syscall instruction needs
 	 *               r0 == syscall number
@@ -49,18 +56,18 @@  safe_syscall_base:
 	 *               and returns the result in r3
 	 * Shuffle everything around appropriately.
 	 */
-	std     14, 16(1) /* Preserve r14 in SP+16 */
-	.cfi_offset 14, 16
-	mr	14, 3	/* signal_pending */
-	mr	0, 4	/* syscall number */
-	mr	3, 5	/* syscall arguments */
-	mr	4, 6
-	mr	5, 7
-	mr	6, 8
-	mr	7, 9
-	mr	8, 10
+	mr	11, 3		/* signal_pending pointer */
+	std	4, PARAM(1)(1)	/* save errno pointer in param slot */
+	mr	0, 5		/* syscall number */
+	mr	3, 6		/* syscall arguments */
+	mr	4, 7
+	mr	5, 8
+	mr	6, 9
+	mr	7, 10
+	ld	8, PARAM(8)(1)
 
-	/* This next sequence of code works in conjunction with the
+	/*
+	 * This next sequence of code works in conjunction with the
 	 * rewind_if_safe_syscall_function(). If a signal is taken
 	 * and the interrupted PC is anywhere between 'safe_syscall_start'
 	 * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -69,23 +76,25 @@  safe_syscall_base:
 	 */
 safe_syscall_start:
 	/* if signal_pending is non-zero, don't do the call */
-	lwz	12, 0(14)
+	lwz	12, 0(11)
 	cmpwi	0, 12, 0
 	bne-	0f
 	sc
 safe_syscall_end:
-	/* code path when we did execute the syscall */
-	ld 14, 16(1) /* restore r14 to its original value */
-	bnslr+
 
-	/* syscall failed; return negative errno */
-	neg	3, 3
+	/* code path for having successfully executed the syscall */
+	bnslr+			/* SO set for syscall error */
+
+	/* code path setting errno */
+1:	ld	11, PARAM(1)(1)	/* restore errno pointer */
+	stw	3, 0(11)	/* store errno */
+	li	3, -1
 	blr
 
 	/* code path when we didn't execute the syscall */
-0:	addi	3, 0, -TARGET_ERESTARTSYS
-	ld 14, 16(1) /* restore r14 to its original value */
-	blr
+0:	li	3, TARGET_ERESTARTSYS
+	b	1b
+
 	.cfi_endproc
 
 #if _CALL_ELF == 2
diff --git a/common-user/host/riscv/safe-syscall.inc.S b/common-user/host/riscv/safe-syscall.inc.S
index 9ca3fbfd1e..eddede702b 100644
--- a/common-user/host/riscv/safe-syscall.inc.S
+++ b/common-user/host/riscv/safe-syscall.inc.S
@@ -23,17 +23,15 @@ 
 	 * first argument an 'int *' to the signal_pending flag, the
 	 * second one the system call number (as a 'long'), and all further
 	 * arguments being syscall arguments (also 'long').
-	 * We return a long which is the syscall's return value, which
-	 * may be negative-errno on failure. Conversion to the
-	 * -1-and-errno-set convention is done by the calling wrapper.
 	 */
 safe_syscall_base:
 	.cfi_startproc
 	/*
 	 * The syscall calling convention is nearly the same as C:
-	 * we enter with a0 == *signal_pending
-	 *               a1 == syscall number
-	 *               a2 ... a7 == syscall arguments
+	 * we enter with a0 == &signal_pending
+	 *               a1 == &errno
+	 *               a2 == syscall number
+	 *               a3 ... a7, [sp] == syscall arguments
 	 *               and return the result in a0
 	 * and the syscall instruction needs
 	 *               a7 == syscall number
@@ -42,14 +40,19 @@  safe_syscall_base:
 	 * Shuffle everything around appropriately.
 	 */
 	mv	t0, a0		/* signal_pending pointer */
-	mv	t1, a1		/* syscall number */
-	mv	a0, a2		/* syscall arguments */
-	mv	a1, a3
-	mv	a2, a4
-	mv	a3, a5
-	mv	a4, a6
-	mv	a5, a7
-	mv	a7, t1
+	mv	t1, a1		/* errno pointer */
+	mv	t2, a2		/* syscall number */
+	mv	a0, a3		/* syscall arguments */
+	mv	a1, a4
+	mv	a2, a5
+	mv	a3, a6
+	mv	a4, a7
+#if __riscv_xlen == 32
+	lw	a5, 0(sp)
+#else
+	ld	a5, 0(sp)
+#endif
+	mv	a7, t2
 
 	/*
 	 * This next sequence of code works in conjunction with the
@@ -61,17 +64,26 @@  safe_syscall_base:
 	 */
 safe_syscall_start:
 	/* If signal_pending is non-zero, don't do the call */
-	lw	t1, 0(t0)
-	bnez	t1, 0f
+	lw	t2, 0(t0)
+	bnez	t2, 2f
 	scall
 safe_syscall_end:
+
 	/* code path for having successfully executed the syscall */
+	li	t2, -4096
+	bgtu	a0, t2, 0f
 	ret
 
-0:
-	/* code path when we didn't execute the syscall */
-	li	a0, -TARGET_ERESTARTSYS
+	/* code path setting errno */
+0:	neg	a0, a0			/* create positive errno */
+1:	sw	a0, 0(t1)		/* store errno */
+	li	a0, -1
 	ret
+
+	/* code path when we didn't execute the syscall */
+2:	li	a0, TARGET_ERESTARTSYS
+	j	1b
+
 	.cfi_endproc
 
 	.size	safe_syscall_base, .-safe_syscall_base
diff --git a/common-user/host/s390x/safe-syscall.inc.S b/common-user/host/s390x/safe-syscall.inc.S
index 414b44ad38..f2a3bccc13 100644
--- a/common-user/host/s390x/safe-syscall.inc.S
+++ b/common-user/host/s390x/safe-syscall.inc.S
@@ -15,14 +15,12 @@ 
 	.global safe_syscall_end
 	.type	safe_syscall_base, @function
 
-	/* This is the entry point for making a system call. The calling
+	/*
+	 * This is the entry point for making a system call. The calling
 	 * convention here is that of a C varargs function with the
 	 * first argument an 'int *' to the signal_pending flag, the
 	 * second one the system call number (as a 'long'), and all further
 	 * arguments being syscall arguments (also 'long').
-	 * We return a long which is the syscall's return value, which
-	 * may be negative-errno on failure. Conversion to the
-	 * -1-and-errno-set convention is done by the calling wrapper.
 	 */
 safe_syscall_base:
 	.cfi_startproc
@@ -44,11 +42,13 @@  safe_syscall_base:
 	stg	%r1,0(%r15)		/* store back chain */
 	stg	%r0,8(%r15)		/* store eos */
 
-	/* The syscall calling convention isn't the same as the
+	/*
+	 * The syscall calling convention isn't the same as the
 	 * C one:
-	 * we enter with r2 == *signal_pending
-	 *               r3 == syscall number
-	 *               r4, r5, r6, (stack) == syscall arguments
+	 * we enter with r2 == &signal_pending
+	 *               r3 == &errno
+	 *               r4 == syscall number
+	 *               r5, r6, (stack) == syscall arguments
 	 *               and return the result in r2
 	 * and the syscall instruction needs
 	 *               r1 == syscall number
@@ -57,13 +57,14 @@  safe_syscall_base:
 	 * Shuffle everything around appropriately.
 	 */
 	lgr	%r8,%r2			/* signal_pending pointer */
-	lgr	%r1,%r3			/* syscall number */
-	lgr	%r2,%r4			/* syscall args */
-	lgr	%r3,%r5
-	lgr	%r4,%r6
-	lmg	%r5,%r7,320(%r15)
+	lgr	%r9,%r3			/* errno pointer */
+	lgr	%r1,%r4			/* syscall number */
+	lgr	%r2,%r5			/* syscall args */
+	lgr	%r3,%r6
+	lmg	%r4,%r7,320(%r15)
 
-	/* This next sequence of code works in conjunction with the
+	/*
+	 * This next sequence of code works in conjunction with the
 	 * rewind_if_safe_syscall_function(). If a signal is taken
 	 * and the interrupted PC is anywhere between 'safe_syscall_start'
 	 * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -73,18 +74,31 @@  safe_syscall_base:
 safe_syscall_start:
 	/* if signal_pending is non-zero, don't do the call */
 	icm	%r0,15,0(%r8)
-	jne	2f
+	jne	1f
 	svc	0
 safe_syscall_end:
 
-1:	lg	%r15,0(%r15)		/* load back chain */
+	/* code path for having successfully executed the syscall */
+	lghi	%r0, -4095		/* check for syscall error */
+	clgr	%r2, %r0
+	jgnl	0f
+
+9:	lg	%r15,0(%r15)		/* load back chain */
 	.cfi_remember_state
 	.cfi_adjust_cfa_offset -160
 	lmg	%r6,%r15,48(%r15)	/* load saved registers */
 	br	%r14
 	.cfi_restore_state
-2:	lghi	%r2, -TARGET_ERESTARTSYS
-	j	1b
+
+	/* code path when we didn't execute the syscall */
+1:	lghi	%r2, -TARGET_ERESTARTSYS
+
+	/* code path setting errno */
+0:	lcr	%r2, %r2		/* create positive errno */
+	st	%r2, 0(%r9)		/* store errno */
+	lghi	%r2, -1
+	j	9b
+
 	.cfi_endproc
 
 	.size	safe_syscall_base, .-safe_syscall_base
diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S
index f36992daa3..9a0c4c93b4 100644
--- a/common-user/host/x86_64/safe-syscall.inc.S
+++ b/common-user/host/x86_64/safe-syscall.inc.S
@@ -14,18 +14,17 @@ 
         .global safe_syscall_end
         .type   safe_syscall_base, @function
 
-        /* This is the entry point for making a system call. The calling
+        /*
+         * This is the entry point for making a system call. The calling
          * convention here is that of a C varargs function with the
          * first argument an 'int *' to the signal_pending flag, the
          * second one the system call number (as a 'long'), and all further
          * arguments being syscall arguments (also 'long').
-         * We return a long which is the syscall's return value, which
-         * may be negative-errno on failure. Conversion to the
-         * -1-and-errno-set convention is done by the calling wrapper.
          */
 safe_syscall_base:
         .cfi_startproc
-        /* This saves a frame pointer and aligns the stack for the syscall.
+        /*
+         * This saves a frame pointer and aligns the stack for the syscall.
          * (It's unclear if the syscall ABI has the same stack alignment
          * requirements as the userspace function call ABI, but better safe than
          * sorry. Appendix A2 of http://www.x86-64.org/documentation/abi.pdf
@@ -35,11 +34,12 @@  safe_syscall_base:
         .cfi_adjust_cfa_offset 8
         .cfi_rel_offset rbp, 0
 
-        /* The syscall calling convention isn't the same as the
-         * C one:
-         * we enter with rdi == *signal_pending
-         *               rsi == syscall number
-         *               rdx, rcx, r8, r9, (stack), (stack) == syscall arguments
+        /*
+         * The syscall calling convention isn't the same as the C one:
+         * we enter with rdi == &signal_pending
+         *               rsi == &errno
+         *               rdx == syscall number
+         *               rcx, r8, r9, (stack...) == syscall arguments
          *               and return the result in rax
          * and the syscall instruction needs
          *               rax == syscall number
@@ -48,17 +48,19 @@  safe_syscall_base:
          * Shuffle everything around appropriately.
          * Note that syscall will trash rcx and r11.
          */
-        mov     %rsi, %rax /* syscall number */
-        mov     %rdi, %rbp /* signal_pending pointer */
+        mov     %rdi, %r11 /* signal_pending pointer */
+        mov     %rsi, %rbp /* errno pointer */
+        mov     %rdx, %rax /* syscall number */
         /* and the syscall arguments */
-        mov     %rdx, %rdi
-        mov     %rcx, %rsi
-        mov     %r8,  %rdx
-        mov     %r9,  %r10
-        mov     16(%rsp), %r8
-        mov     24(%rsp), %r9
+        mov     %rcx, %rdi
+        mov     %r8, %rsi
+        mov     %r9,  %rdx
+        mov     16(%rsp), %r10
+        mov     24(%rsp), %r8
+        mov     32(%rsp), %r9
 
-        /* This next sequence of code works in conjunction with the
+        /*
+         * This next sequence of code works in conjunction with the
          * rewind_if_safe_syscall_function(). If a signal is taken
          * and the interrupted PC is anywhere between 'safe_syscall_start'
          * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -67,25 +69,31 @@  safe_syscall_base:
          */
 safe_syscall_start:
         /* if signal_pending is non-zero, don't do the call */
-        cmpl	$0, (%rbp)
-        jnz     1f
+        cmpl    $0, (%r11)
+        jnz     2f
         syscall
 safe_syscall_end:
+
         /* code path for having successfully executed the syscall */
-        pop     %rbp
+        cmp	$-4095, %rax
+        jae	0f
+
+9:      pop     %rbp
         .cfi_remember_state
         .cfi_def_cfa_offset 8
         .cfi_restore rbp
         ret
-
-1:
-        /* code path when we didn't execute the syscall */
         .cfi_restore_state
-        mov     $-TARGET_ERESTARTSYS, %rax
-        pop     %rbp
-        .cfi_def_cfa_offset 8
-        .cfi_restore rbp
-        ret
-        .cfi_endproc
 
+        /* code path setting errno */
+0:      neg	%eax			/* create positive errno */
+1:      mov	%eax, (%rbp)		/* store errno */
+        mov	$-1, %rax
+        jmp	9b
+
+        /* code path when we didn't execute the syscall */
+2:      mov     $TARGET_ERESTARTSYS, %eax
+        jmp	1b
+
+        .cfi_endproc
         .size   safe_syscall_base, .-safe_syscall_base