Message ID | 20190523204409.21068-10-jan.bobek@gmail.com |
---|---|
State | New |
Headers | show |
Series | None | expand |
Jan Bobek <jan.bobek@gmail.com> writes: > From: Richard Henderson <richard.henderson@linaro.org> > > The state expected for a given test must be specifically requested > with the --xfeatures=mask command-line argument. This is recorded > with the saved state so that it is obvious if the apprentice is given > a different argument. Any features beyond what are present on the > running cpu will read as zero. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> > --- > risu_reginfo_i386.h | 14 +++ > risu_reginfo_i386.c | 228 ++++++++++++++++++++++++++++++++++++++++++-- > test_i386.S | 39 ++++++++ > 3 files changed, 273 insertions(+), 8 deletions(-) > > diff --git a/risu_reginfo_i386.h b/risu_reginfo_i386.h > index e350f01..b468f79 100644 > --- a/risu_reginfo_i386.h > +++ b/risu_reginfo_i386.h > @@ -12,6 +12,10 @@ > #ifndef RISU_REGINFO_I386_H > #define RISU_REGINFO_I386_H > > +struct avx512_reg { > + uint64_t q[8]; > +}; > + > /* > * This is the data structure we pass over the socket. > * It is a simplified and reduced subset of what can > @@ -19,7 +23,17 @@ > */ > struct reginfo { > uint32_t faulting_insn; > + uint32_t mxcsr; > + uint64_t xfeatures; > + > gregset_t gregs; > + > +#ifdef __x86_64__ > + struct avx512_reg vregs[32]; > +#else > + struct avx512_reg vregs[8]; > +#endif > + uint64_t kregs[8]; > }; > > /* > diff --git a/risu_reginfo_i386.c b/risu_reginfo_i386.c > index c4dc14a..83f9541 100644 > --- a/risu_reginfo_i386.c > +++ b/risu_reginfo_i386.c > @@ -11,19 +11,32 @@ > > #include <stdio.h> > #include <stdlib.h> > +#include <stddef.h> > #include <string.h> > #include <ucontext.h> > #include <assert.h> > +#include <cpuid.h> > > #include "risu.h" > #include "risu_reginfo_i386.h" > > -const struct option * const arch_long_opts; > -const char * const arch_extra_help; > +#include <asm/sigcontext.h> > + > +static uint64_t xfeatures = 3; /* SSE */ > + > +static const struct option extra_ops[] = { > + {"xfeatures", required_argument, NULL, FIRST_ARCH_OPT }, > + {0, 0, 0, 0} > +}; > + > +const struct option * const arch_long_opts = extra_ops; > +const char * const arch_extra_help > + = " --xfeatures=<mask> Use features in mask for XSAVE\n"; > > void process_arch_opt(int opt, const char *arg) > { > - abort(); > + assert(opt == FIRST_ARCH_OPT); > + xfeatures = strtoull(arg, 0, 0); > } > > const int reginfo_size(void) > @@ -31,13 +44,37 @@ const int reginfo_size(void) > return sizeof(struct reginfo); > } > > +static void *xsave_feature_buf(struct _xstate *xs, int feature) > +{ > + unsigned int eax, ebx, ecx, edx; > + int ok; > + > + /* > + * Get the location of the XSAVE feature from the cpuid leaf. > + * Given that we know the xfeature bit is set, this must succeed. > + */ > + ok = __get_cpuid_count(0xd, feature, &eax, &ebx, &ecx, &edx); > + assert(ok); > + > + /* Sanity check that the frame stored by the kernel contains the data. */ > + assert(xs->fpstate.sw_reserved.extended_size >= eax + ebx); > + > + return (void *)xs + ebx; > +} > + > /* reginfo_init: initialize with a ucontext */ > void reginfo_init(struct reginfo *ri, ucontext_t *uc) > { > - int i; > + int i, nvecregs; > + struct _fpstate *fp; > + struct _xstate *xs; > + uint64_t features; > > memset(ri, 0, sizeof(*ri)); > > + /* Require master and apprentice to be given the same arguments. */ > + ri->xfeatures = xfeatures; > + > for (i = 0; i < NGREG; i++) { > switch (i) { > case REG_E(IP): > @@ -79,12 +116,89 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc) > * distinguish 'do compare' from 'stop'. > */ > ri->faulting_insn = *(uint32_t *)uc->uc_mcontext.gregs[REG_E(IP)]; > + > + /* > + * FP state is omitted if unused (aka in init state). > + * Use the <asm/sigcontext.h> struct for access to AVX state. > + */ > + > + fp = (struct _fpstate *)uc->uc_mcontext.fpregs; > + if (fp == NULL) { > + return; > + } > + > +#ifdef __x86_64__ > + nvecregs = 16; > +#else > + /* We don't (currently) care about the 80387 state, only SSE+. */ > + if (fp->magic != X86_FXSR_MAGIC) { > + return; > + } > + nvecregs = 8; > +#endif > + > + /* > + * Now we know that _fpstate contains FXSAVE data. > + */ > + ri->mxcsr = fp->mxcsr; > + > + for (i = 0; i < nvecregs; ++i) { > +#ifdef __x86_64__ > + memcpy(&ri->vregs[i], &fp->xmm_space[i * 4], 16); > +#else > + memcpy(&ri->vregs[i], &fp->_xmm[i], 16); > +#endif > + } > + > + if (fp->sw_reserved.magic1 != FP_XSTATE_MAGIC1) { > + return; > + } > + xs = (struct _xstate *)fp; > + features = xfeatures & xs->xstate_hdr.xfeatures; > + > + /* > + * Now we know that _fpstate contains XSAVE data. > + */ > + > + if (features & (1 << 2)) { > + /* YMM_Hi128 state */ > + void *buf = xsave_feature_buf(xs, 2); > + for (i = 0; i < nvecregs; ++i) { > + memcpy(&ri->vregs[i].q[2], buf + 16 * i, 16); > + } > + } > + > + if (features & (1 << 5)) { > + /* Opmask state */ > + uint64_t *buf = xsave_feature_buf(xs, 5); > + for (i = 0; i < 8; ++i) { > + ri->kregs[i] = buf[i]; > + } > + } > + > + if (features & (1 << 6)) { > + /* ZMM_Hi256 state */ > + void *buf = xsave_feature_buf(xs, 6); > + for (i = 0; i < nvecregs; ++i) { > + memcpy(&ri->vregs[i].q[4], buf + 32 * i, 32); > + } > + } > + > +#ifdef __x86_64__ > + if (features & (1 << 7)) { > + /* Hi16_ZMM state */ > + void *buf = xsave_feature_buf(xs, 7); > + for (i = 0; i < 16; ++i) { > + memcpy(&ri->vregs[i + 16], buf + 64 * i, 64); > + } > + } > +#endif > } > > /* reginfo_is_eq: compare the reginfo structs, returns nonzero if equal */ > int reginfo_is_eq(struct reginfo *m, struct reginfo *a) > { > - return 0 == memcmp(m, a, sizeof(*m)); > + return !memcmp(m, a, sizeof(*m)); > } > > static const char *const regname[NGREG] = { > @@ -126,28 +240,126 @@ static const char *const regname[NGREG] = { > # define PRIxREG "%08x" > #endif > > +static int get_nvecregs(uint64_t features) > +{ > +#ifdef __x86_64__ > + return features & (1 << 7) ? 32 : 16; > +#else > + return 8; > +#endif > +} > + > +static int get_nvecquads(uint64_t features) > +{ > + if (features & (1 << 6)) { > + return 8; > + } else if (features & (1 << 2)) { > + return 4; > + } else { > + return 2; > + } > +} > + > +static char get_vecletter(uint64_t features) > +{ > + if (features & (1 << 6 | 1 << 7)) { > + return 'z'; > + } else if (features & (1 << 2)) { > + return 'y'; > + } else { > + return 'x'; > + } > +} > + > /* reginfo_dump: print state to a stream, returns nonzero on success */ > int reginfo_dump(struct reginfo *ri, FILE *f) > { > - int i; > + uint64_t features; > + int i, j, n, w; > + char r; > + > fprintf(f, " faulting insn %x\n", ri->faulting_insn); > for (i = 0; i < NGREG; i++) { > if (regname[i]) { > fprintf(f, " %-6s: " PRIxREG "\n", regname[i], ri->gregs[i]); > } > } > + > + fprintf(f, " mxcsr : %x\n", ri->mxcsr); > + fprintf(f, " xfeat : %" PRIx64 "\n", ri->xfeatures); > + > + features = ri->xfeatures; > + n = get_nvecregs(features); > + w = get_nvecquads(features); > + r = get_vecletter(features); > + > + for (i = 0; i < n; i++) { > + fprintf(f, " %cmm%-3d: ", r, i); > + for (j = w - 1; j >= 0; j--) { > + fprintf(f, "%016" PRIx64 "%c", > + ri->vregs[i].q[j], j == 0 ? '\n' : ' '); > + } > + } > + > + if (features & (1 << 5)) { > + for (i = 0; i < 8; i++) { > + fprintf(f, " k%-5d: %016" PRIx64 "\n", i, ri->kregs[i]); > + } > + } > + > return !ferror(f); > } > > int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f) > { > - int i; > + int i, j, n, w; > + uint64_t features; > + char r; > + > + fprintf(f, "Mismatch (master v apprentice):\n"); > + > for (i = 0; i < NGREG; i++) { > if (m->gregs[i] != a->gregs[i]) { > assert(regname[i]); > - fprintf(f, "Mismatch: %s: " PRIxREG " v " PRIxREG "\n", > + fprintf(f, " %-6s: " PRIxREG " v " PRIxREG "\n", > regname[i], m->gregs[i], a->gregs[i]); > } > } > + > + if (m->mxcsr != a->mxcsr) { > + fprintf(f, " mxcsr : %x v %x\n", m->mxcsr, a->mxcsr); > + } > + if (m->xfeatures != a->xfeatures) { > + fprintf(f, " xfeat : %" PRIx64 " v %" PRIx64 "\n", > + m->xfeatures, a->xfeatures); > + } > + > + features = m->xfeatures; > + n = get_nvecregs(features); > + w = get_nvecquads(features); > + r = get_vecletter(features); > + > + for (i = 0; i < n; i++) { > + if (memcmp(&m->vregs[i], &a->vregs[i], w * 8)) { > + fprintf(f, " %cmm%-3d: ", r, i); > + for (j = w - 1; j >= 0; j--) { > + fprintf(f, "%016" PRIx64 "%c", > + m->vregs[i].q[j], j == 0 ? '\n' : ' '); > + } > + fprintf(f, " v: "); > + for (j = w - 1; j >= 0; j--) { > + fprintf(f, "%016" PRIx64 "%c", > + a->vregs[i].q[j], j == 0 ? '\n' : ' '); > + } > + } > + } > + > + for (i = 0; i < 8; i++) { > + if (m->kregs[i] != a->kregs[i]) { > + fprintf(f, " k%-5d: %016" PRIx64 " v %016" PRIx64 "\n", > + i, m->kregs[i], a->kregs[i]); > + } > + } > + > return !ferror(f); > } > diff --git a/test_i386.S b/test_i386.S > index 456b99c..05344d7 100644 > --- a/test_i386.S > +++ b/test_i386.S > @@ -12,6 +12,37 @@ > /* A trivial test image for x86 */ > > /* Initialise the registers to avoid spurious mismatches */ > + > +#ifdef __x86_64__ > +#define BASE %rax > + lea 2f(%rip), BASE > +#else > +#define BASE %eax > + call 1f > +1: pop BASE > + add $2f-1b, BASE > +#endif > + > + movdqa 0(BASE), %xmm0 > + movdqa 1*16(BASE), %xmm1 > + movdqa 2*16(BASE), %xmm2 > + movdqa 3*16(BASE), %xmm3 > + movdqa 4*16(BASE), %xmm4 > + movdqa 5*16(BASE), %xmm5 > + movdqa 6*16(BASE), %xmm6 > + movdqa 7*16(BASE), %xmm7 > + > +#ifdef __x86_64__ > + movdqa 8*16(BASE), %xmm8 > + movdqa 9*16(BASE), %xmm9 > + movdqa 10*16(BASE), %xmm10 > + movdqa 11*16(BASE), %xmm11 > + movdqa 12*16(BASE), %xmm12 > + movdqa 13*16(BASE), %xmm13 > + movdqa 14*16(BASE), %xmm14 > + movdqa 15*16(BASE), %xmm15 > +#endif > + > xor %eax, %eax > sahf /* init eflags */ > > @@ -39,3 +70,11 @@ > > /* exit test */ > ud1 %ecx, %eax > + > + .p2align 16 > +2: > + .set i, 0 > + .rept 256 > + .byte i > + .set i, i + 1 > + .endr -- Alex Bennée
diff --git a/risu_reginfo_i386.h b/risu_reginfo_i386.h index e350f01..b468f79 100644 --- a/risu_reginfo_i386.h +++ b/risu_reginfo_i386.h @@ -12,6 +12,10 @@ #ifndef RISU_REGINFO_I386_H #define RISU_REGINFO_I386_H +struct avx512_reg { + uint64_t q[8]; +}; + /* * This is the data structure we pass over the socket. * It is a simplified and reduced subset of what can @@ -19,7 +23,17 @@ */ struct reginfo { uint32_t faulting_insn; + uint32_t mxcsr; + uint64_t xfeatures; + gregset_t gregs; + +#ifdef __x86_64__ + struct avx512_reg vregs[32]; +#else + struct avx512_reg vregs[8]; +#endif + uint64_t kregs[8]; }; /* diff --git a/risu_reginfo_i386.c b/risu_reginfo_i386.c index c4dc14a..83f9541 100644 --- a/risu_reginfo_i386.c +++ b/risu_reginfo_i386.c @@ -11,19 +11,32 @@ #include <stdio.h> #include <stdlib.h> +#include <stddef.h> #include <string.h> #include <ucontext.h> #include <assert.h> +#include <cpuid.h> #include "risu.h" #include "risu_reginfo_i386.h" -const struct option * const arch_long_opts; -const char * const arch_extra_help; +#include <asm/sigcontext.h> + +static uint64_t xfeatures = 3; /* SSE */ + +static const struct option extra_ops[] = { + {"xfeatures", required_argument, NULL, FIRST_ARCH_OPT }, + {0, 0, 0, 0} +}; + +const struct option * const arch_long_opts = extra_ops; +const char * const arch_extra_help + = " --xfeatures=<mask> Use features in mask for XSAVE\n"; void process_arch_opt(int opt, const char *arg) { - abort(); + assert(opt == FIRST_ARCH_OPT); + xfeatures = strtoull(arg, 0, 0); } const int reginfo_size(void) @@ -31,13 +44,37 @@ const int reginfo_size(void) return sizeof(struct reginfo); } +static void *xsave_feature_buf(struct _xstate *xs, int feature) +{ + unsigned int eax, ebx, ecx, edx; + int ok; + + /* + * Get the location of the XSAVE feature from the cpuid leaf. + * Given that we know the xfeature bit is set, this must succeed. + */ + ok = __get_cpuid_count(0xd, feature, &eax, &ebx, &ecx, &edx); + assert(ok); + + /* Sanity check that the frame stored by the kernel contains the data. */ + assert(xs->fpstate.sw_reserved.extended_size >= eax + ebx); + + return (void *)xs + ebx; +} + /* reginfo_init: initialize with a ucontext */ void reginfo_init(struct reginfo *ri, ucontext_t *uc) { - int i; + int i, nvecregs; + struct _fpstate *fp; + struct _xstate *xs; + uint64_t features; memset(ri, 0, sizeof(*ri)); + /* Require master and apprentice to be given the same arguments. */ + ri->xfeatures = xfeatures; + for (i = 0; i < NGREG; i++) { switch (i) { case REG_E(IP): @@ -79,12 +116,89 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc) * distinguish 'do compare' from 'stop'. */ ri->faulting_insn = *(uint32_t *)uc->uc_mcontext.gregs[REG_E(IP)]; + + /* + * FP state is omitted if unused (aka in init state). + * Use the <asm/sigcontext.h> struct for access to AVX state. + */ + + fp = (struct _fpstate *)uc->uc_mcontext.fpregs; + if (fp == NULL) { + return; + } + +#ifdef __x86_64__ + nvecregs = 16; +#else + /* We don't (currently) care about the 80387 state, only SSE+. */ + if (fp->magic != X86_FXSR_MAGIC) { + return; + } + nvecregs = 8; +#endif + + /* + * Now we know that _fpstate contains FXSAVE data. + */ + ri->mxcsr = fp->mxcsr; + + for (i = 0; i < nvecregs; ++i) { +#ifdef __x86_64__ + memcpy(&ri->vregs[i], &fp->xmm_space[i * 4], 16); +#else + memcpy(&ri->vregs[i], &fp->_xmm[i], 16); +#endif + } + + if (fp->sw_reserved.magic1 != FP_XSTATE_MAGIC1) { + return; + } + xs = (struct _xstate *)fp; + features = xfeatures & xs->xstate_hdr.xfeatures; + + /* + * Now we know that _fpstate contains XSAVE data. + */ + + if (features & (1 << 2)) { + /* YMM_Hi128 state */ + void *buf = xsave_feature_buf(xs, 2); + for (i = 0; i < nvecregs; ++i) { + memcpy(&ri->vregs[i].q[2], buf + 16 * i, 16); + } + } + + if (features & (1 << 5)) { + /* Opmask state */ + uint64_t *buf = xsave_feature_buf(xs, 5); + for (i = 0; i < 8; ++i) { + ri->kregs[i] = buf[i]; + } + } + + if (features & (1 << 6)) { + /* ZMM_Hi256 state */ + void *buf = xsave_feature_buf(xs, 6); + for (i = 0; i < nvecregs; ++i) { + memcpy(&ri->vregs[i].q[4], buf + 32 * i, 32); + } + } + +#ifdef __x86_64__ + if (features & (1 << 7)) { + /* Hi16_ZMM state */ + void *buf = xsave_feature_buf(xs, 7); + for (i = 0; i < 16; ++i) { + memcpy(&ri->vregs[i + 16], buf + 64 * i, 64); + } + } +#endif } /* reginfo_is_eq: compare the reginfo structs, returns nonzero if equal */ int reginfo_is_eq(struct reginfo *m, struct reginfo *a) { - return 0 == memcmp(m, a, sizeof(*m)); + return !memcmp(m, a, sizeof(*m)); } static const char *const regname[NGREG] = { @@ -126,28 +240,126 @@ static const char *const regname[NGREG] = { # define PRIxREG "%08x" #endif +static int get_nvecregs(uint64_t features) +{ +#ifdef __x86_64__ + return features & (1 << 7) ? 32 : 16; +#else + return 8; +#endif +} + +static int get_nvecquads(uint64_t features) +{ + if (features & (1 << 6)) { + return 8; + } else if (features & (1 << 2)) { + return 4; + } else { + return 2; + } +} + +static char get_vecletter(uint64_t features) +{ + if (features & (1 << 6 | 1 << 7)) { + return 'z'; + } else if (features & (1 << 2)) { + return 'y'; + } else { + return 'x'; + } +} + /* reginfo_dump: print state to a stream, returns nonzero on success */ int reginfo_dump(struct reginfo *ri, FILE *f) { - int i; + uint64_t features; + int i, j, n, w; + char r; + fprintf(f, " faulting insn %x\n", ri->faulting_insn); for (i = 0; i < NGREG; i++) { if (regname[i]) { fprintf(f, " %-6s: " PRIxREG "\n", regname[i], ri->gregs[i]); } } + + fprintf(f, " mxcsr : %x\n", ri->mxcsr); + fprintf(f, " xfeat : %" PRIx64 "\n", ri->xfeatures); + + features = ri->xfeatures; + n = get_nvecregs(features); + w = get_nvecquads(features); + r = get_vecletter(features); + + for (i = 0; i < n; i++) { + fprintf(f, " %cmm%-3d: ", r, i); + for (j = w - 1; j >= 0; j--) { + fprintf(f, "%016" PRIx64 "%c", + ri->vregs[i].q[j], j == 0 ? '\n' : ' '); + } + } + + if (features & (1 << 5)) { + for (i = 0; i < 8; i++) { + fprintf(f, " k%-5d: %016" PRIx64 "\n", i, ri->kregs[i]); + } + } + return !ferror(f); } int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f) { - int i; + int i, j, n, w; + uint64_t features; + char r; + + fprintf(f, "Mismatch (master v apprentice):\n"); + for (i = 0; i < NGREG; i++) { if (m->gregs[i] != a->gregs[i]) { assert(regname[i]); - fprintf(f, "Mismatch: %s: " PRIxREG " v " PRIxREG "\n", + fprintf(f, " %-6s: " PRIxREG " v " PRIxREG "\n", regname[i], m->gregs[i], a->gregs[i]); } } + + if (m->mxcsr != a->mxcsr) { + fprintf(f, " mxcsr : %x v %x\n", m->mxcsr, a->mxcsr); + } + if (m->xfeatures != a->xfeatures) { + fprintf(f, " xfeat : %" PRIx64 " v %" PRIx64 "\n", + m->xfeatures, a->xfeatures); + } + + features = m->xfeatures; + n = get_nvecregs(features); + w = get_nvecquads(features); + r = get_vecletter(features); + + for (i = 0; i < n; i++) { + if (memcmp(&m->vregs[i], &a->vregs[i], w * 8)) { + fprintf(f, " %cmm%-3d: ", r, i); + for (j = w - 1; j >= 0; j--) { + fprintf(f, "%016" PRIx64 "%c", + m->vregs[i].q[j], j == 0 ? '\n' : ' '); + } + fprintf(f, " v: "); + for (j = w - 1; j >= 0; j--) { + fprintf(f, "%016" PRIx64 "%c", + a->vregs[i].q[j], j == 0 ? '\n' : ' '); + } + } + } + + for (i = 0; i < 8; i++) { + if (m->kregs[i] != a->kregs[i]) { + fprintf(f, " k%-5d: %016" PRIx64 " v %016" PRIx64 "\n", + i, m->kregs[i], a->kregs[i]); + } + } + return !ferror(f); } diff --git a/test_i386.S b/test_i386.S index 456b99c..05344d7 100644 --- a/test_i386.S +++ b/test_i386.S @@ -12,6 +12,37 @@ /* A trivial test image for x86 */ /* Initialise the registers to avoid spurious mismatches */ + +#ifdef __x86_64__ +#define BASE %rax + lea 2f(%rip), BASE +#else +#define BASE %eax + call 1f +1: pop BASE + add $2f-1b, BASE +#endif + + movdqa 0(BASE), %xmm0 + movdqa 1*16(BASE), %xmm1 + movdqa 2*16(BASE), %xmm2 + movdqa 3*16(BASE), %xmm3 + movdqa 4*16(BASE), %xmm4 + movdqa 5*16(BASE), %xmm5 + movdqa 6*16(BASE), %xmm6 + movdqa 7*16(BASE), %xmm7 + +#ifdef __x86_64__ + movdqa 8*16(BASE), %xmm8 + movdqa 9*16(BASE), %xmm9 + movdqa 10*16(BASE), %xmm10 + movdqa 11*16(BASE), %xmm11 + movdqa 12*16(BASE), %xmm12 + movdqa 13*16(BASE), %xmm13 + movdqa 14*16(BASE), %xmm14 + movdqa 15*16(BASE), %xmm15 +#endif + xor %eax, %eax sahf /* init eflags */ @@ -39,3 +70,11 @@ /* exit test */ ud1 %ecx, %eax + + .p2align 16 +2: + .set i, 0 + .rept 256 + .byte i + .set i, i + 1 + .endr