Message ID | 1447798238-29608-3-git-send-email-apinski@cavium.com |
---|---|
State | New |
Headers | show |
Hi Andrew, On 17/11/15 22:10, Andrew Pinski wrote: > Because the imp and parts are really integer rather than strings, this patch > moves the comparisons to be integer. Also allows saving around integers are > easier than doing string comparisons. This allows for the next change. > > The way I store BIG.little is (big<<12)|little as each part num is only 12bits > long. So it would be nice if someone could test -mpu=native on a big.little > system to make sure it works still. > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > Thanks, > Andrew Pinski > > > > * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are integer constants. > * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change implementer_id to unsigned char. > Change part_no to unsigned int. > (AARCH64_BIG_LITTLE): New define. > (INVALID_IMP): New define. > (INVALID_CORE): New define. > (cpu_data): Change the last element's implementer_id and part_no to integers. > (valid_bL_string_p): Rewrite to .. > (valid_bL_core_p): this for integers instead of strings. > (parse_field): New function. > (contains_string_p): Rewrite to ... > (contains_core_p): this for integers and only for the part_no. > (host_detect_local_cpu): Rewrite handling of implementation and part num to be integers; > simplifying the code. > --- > gcc/config/aarch64/aarch64-cores.def | 25 +++++----- > gcc/config/aarch64/driver-aarch64.c | 90 ++++++++++++++++++++---------------- > 2 files changed, 62 insertions(+), 53 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def > index 0b456f7..798f3e3 100644 > --- a/gcc/config/aarch64/aarch64-cores.def > +++ b/gcc/config/aarch64/aarch64-cores.def > @@ -33,25 +33,26 @@ > This need not include flags implied by the architecture. > COSTS is the name of the rtx_costs routine to use. > IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can > - be found in /proc/cpuinfo. > + be found in /proc/cpuinfo. There is a list in the ARM ARM. > PART is the part number of the CPU. On a GNU/Linux system it can be found > - in /proc/cpuinfo. For big.LITTLE systems this should have the form at of > - "<big core part number>.<LITTLE core part number>". */ > + in /proc/cpuinfo. For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE > + where the big part number comes as the first arugment to the macro and little is the > + second. */ > > /* V8 Architecture Processors. */ > > -AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03") > -AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07") > -AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08") > -AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001") > -AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800") > -AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1") > -AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000") > +AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03) > +AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07) > +AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08) > +AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001) > +AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800) > +AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a1) > +AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000) > > /* V8 big.LITTLE implementations. */ > > -AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03") > -AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03") > +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03)) > +AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03)) > > > #undef AARCH64_CORE > diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c > index d03654c..92388a9 100644 > --- a/gcc/config/aarch64/driver-aarch64.c > +++ b/gcc/config/aarch64/driver-aarch64.c > @@ -37,18 +37,23 @@ static struct arch_extension ext_to_feat_string[] = > struct aarch64_core_data > { > const char* name; > - const char* arch; > - const char* implementer_id; > - const char* part_no; > + const char *arch; > + unsigned char implementer_id; /* Exactly 8 bits */ > + unsigned int part_no; /* 12 bits + 12 bits */ > }; > > +#define AARCH64_BIG_LITTLE(BIG, LITTLE) \ > + (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu)) > +#define INVALID_IMP ((unsigned char) -1) > +#define INVALID_CORE ((unsigned)-1) > + > #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ > { CORE_NAME, #ARCH, IMP, PART }, > > static struct aarch64_core_data cpu_data [] = > { > #include "aarch64-cores.def" > - { NULL, NULL, NULL, NULL } > + { NULL, NULL, INVALID_IMP, INVALID_CORE } > }; > > > @@ -91,27 +96,35 @@ get_arch_name_from_id (const char* id) > should return true. */ > > static bool > -valid_bL_string_p (const char** core, const char* bL_string) > +valid_bL_core_p (unsigned int *core, unsigned int bL_core) > { > - return strstr (bL_string, core[0]) != NULL > - && strstr (bL_string, core[1]) != NULL; > + return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core; > } This needs the change you suggested in https://gcc.gnu.org/ml/gcc-patches/2015-11/msg02436.html I've checked that it works now. Also, please update the comment for this function since the arguments are no longer strings. Thanks, Kyrill > > -/* Return true iff ARR contains STR in one of its two elements. */ > > -static bool > -contains_string_p (const char** arr, const char* str) > +/* Returns the integer that is after ':' for the field. */ > +static unsigned parse_field (const char *field) > { > - bool res = false; > + const char *rest = strchr (field, ':'); > + char *after; > + unsigned fint = strtol (rest+1, &after, 16); > + if (after == rest+1) > + return -1; > + return fint; > +} > + > +/* Return true iff ARR contains CORE, in either of the two elements. */ > > - if (arr[0] != NULL) > +static bool > +contains_core_p (unsigned *arr, unsigned core) > +{ > + if (arr[0] != INVALID_CORE) > { > - res = strstr (arr[0], str) != NULL; > - if (res) > - return res; > + if (arr[0] == core) > + return true; > > - if (arr[1] != NULL) > - return strstr (arr[1], str) != NULL; > + if (arr[1] != INVALID_CORE) > + return arr[1] == core; > } > > return false; > @@ -146,10 +159,9 @@ host_detect_local_cpu (int argc, const char **argv) > bool cpu = false; > unsigned int i = 0; > unsigned int core_idx = 0; > - const char* imps[2] = { NULL, NULL }; > - const char* cores[2] = { NULL, NULL }; > + unsigned char imp = INVALID_IMP; > + unsigned int cores[2] = { INVALID_CORE, INVALID_CORE }; > unsigned int n_cores = 0; > - unsigned int n_imps = 0; > bool processed_exts = false; > const char *ext_string = ""; > > @@ -180,30 +192,28 @@ host_detect_local_cpu (int argc, const char **argv) > { > if (strstr (buf, "implementer") != NULL) > { > - for (i = 0; cpu_data[i].name != NULL; i++) > - if (strstr (buf, cpu_data[i].implementer_id) != NULL > - && !contains_string_p (imps, cpu_data[i].implementer_id)) > - { > - if (n_imps == 2) > - goto not_found; > - > - imps[n_imps++] = cpu_data[i].implementer_id; > - > - break; > - } > - continue; > + unsigned cimp = parse_field (buf); > + if (cimp == INVALID_IMP) > + goto not_found; > + > + if (imp == INVALID_IMP) > + imp = cimp; > + /* BIG.little implementers are always equal. */ > + else if (imp != cimp) > + goto not_found; > } > > if (strstr (buf, "part") != NULL) > { > + unsigned ccore = parse_field (buf); > for (i = 0; cpu_data[i].name != NULL; i++) > - if (strstr (buf, cpu_data[i].part_no) != NULL > - && !contains_string_p (cores, cpu_data[i].part_no)) > + if (ccore == cpu_data[i].part_no > + && !contains_core_p (cores, ccore)) > { > if (n_cores == 2) > goto not_found; > > - cores[n_cores++] = cpu_data[i].part_no; > + cores[n_cores++] = ccore; > core_idx = i; > arch_id = cpu_data[i].arch; > break; > @@ -240,7 +250,7 @@ host_detect_local_cpu (int argc, const char **argv) > f = NULL; > > /* Weird cpuinfo format that we don't know how to handle. */ > - if (n_cores == 0 || n_cores > 2 || n_imps != 1) > + if (n_cores == 0 || n_cores > 2 || imp == INVALID_IMP) > goto not_found; > > if (arch && !arch_id) > @@ -261,9 +271,8 @@ host_detect_local_cpu (int argc, const char **argv) > { > for (i = 0; cpu_data[i].name != NULL; i++) > { > - if (strchr (cpu_data[i].part_no, '.') != NULL > - && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0 > - && valid_bL_string_p (cores, cpu_data[i].part_no)) > + if (cpu_data[i].implementer_id == imp > + && valid_bL_core_p (cores, cpu_data[i].part_no)) > { > res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL); > break; > @@ -275,8 +284,7 @@ host_detect_local_cpu (int argc, const char **argv) > /* The simple, non-big.LITTLE case. */ > else > { > - if (strncmp (cpu_data[core_idx].implementer_id, imps[0], > - strlen (imps[0]) - 1) != 0) > + if (cpu_data[core_idx].implementer_id != imp) > goto not_found; > > res = concat ("-m", cpu ? "cpu" : "tune", "=",
On Tue, Nov 17, 2015 at 02:10:35PM -0800, Andrew Pinski wrote: > > Because the imp and parts are really integer rather than strings, this patch > moves the comparisons to be integer. Also allows saving around integers are > easier than doing string comparisons. This allows for the next change. > > The way I store BIG.little is (big<<12)|little as each part num is only 12bits > long. So it would be nice if someone could test -mpu=native on a big.little > system to make sure it works still. > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > Thanks, > Andrew Pinski > > > > * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are integer constants. > * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change implementer_id to unsigned char. > Change part_no to unsigned int. > (AARCH64_BIG_LITTLE): New define. > (INVALID_IMP): New define. > (INVALID_CORE): New define. > (cpu_data): Change the last element's implementer_id and part_no to integers. > (valid_bL_string_p): Rewrite to .. > (valid_bL_core_p): this for integers instead of strings. > (parse_field): New function. > (contains_string_p): Rewrite to ... > (contains_core_p): this for integers and only for the part_no. > (host_detect_local_cpu): Rewrite handling of implementation and part num to be integers; > simplifying the code. > --- > gcc/config/aarch64/aarch64-cores.def | 25 +++++----- > gcc/config/aarch64/driver-aarch64.c | 90 ++++++++++++++++++++---------------- > 2 files changed, 62 insertions(+), 53 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def > index 0b456f7..798f3e3 100644 > --- a/gcc/config/aarch64/aarch64-cores.def > +++ b/gcc/config/aarch64/aarch64-cores.def > @@ -33,25 +33,26 @@ > This need not include flags implied by the architecture. > COSTS is the name of the rtx_costs routine to use. > IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can > - be found in /proc/cpuinfo. > + be found in /proc/cpuinfo. There is a list in the ARM ARM. Let's be precise with this comment. "A partial list of implementer IDs is given in the ARM Architecture Reference Manual ARMv8, for ARMv8-A architecture profile" > PART is the part number of the CPU. On a GNU/Linux system it can be found > - in /proc/cpuinfo. For big.LITTLE systems this should have the form at of > - "<big core part number>.<LITTLE core part number>". */ > + in /proc/cpuinfo. For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE > + where the big part number comes as the first arugment to the macro and little is the s/arugment/argument/. > + second. */ > > /* V8 Architecture Processors. */ > > -AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03") > -AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07") > -AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08") > -AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001") > -AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800") > -AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1") > -AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000") > +AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03) > +AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07) > +AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08) > +AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001) > +AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800) > +AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a1) > +AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000) > > /* V8 big.LITTLE implementations. */ > > -AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03") > -AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03") > +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03)) > +AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03)) > > > #undef AARCH64_CORE > diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c > index d03654c..92388a9 100644 > --- a/gcc/config/aarch64/driver-aarch64.c > +++ b/gcc/config/aarch64/driver-aarch64.c > @@ -37,18 +37,23 @@ static struct arch_extension ext_to_feat_string[] = > struct aarch64_core_data > { > const char* name; > - const char* arch; > - const char* implementer_id; > - const char* part_no; > + const char *arch; > + unsigned char implementer_id; /* Exactly 8 bits */ Can we redesign this around the most general case - big.LITTLE cores with different implementer IDs? There is nothing in the architecture that would forbid this, and Samsung recently announced that their Exynos 8 Octa 8890 which would support a combination of four of their custom cores with four ARM Cortex-A53 cores. ( http://www.samsung.com/semiconductor/minisite/Exynos/w/mediacenter.html#?v=news_Samsung_Unveils_a_New_Exynos_8_Octa_Processor_based_on_advanced_14-Nanometer_FinFET_Process_Technology ) > + unsigned int part_no; /* 12 bits + 12 bits */ > }; > > +#define AARCH64_BIG_LITTLE(BIG, LITTLE) \ > + (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu)) > +#define INVALID_IMP ((unsigned char) -1) > +#define INVALID_CORE ((unsigned)-1) > + > #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ > { CORE_NAME, #ARCH, IMP, PART }, > > static struct aarch64_core_data cpu_data [] = > { > #include "aarch64-cores.def" > - { NULL, NULL, NULL, NULL } > + { NULL, NULL, INVALID_IMP, INVALID_CORE } > }; > > > @@ -91,27 +96,35 @@ get_arch_name_from_id (const char* id) > should return true. */ > > static bool > -valid_bL_string_p (const char** core, const char* bL_string) > +valid_bL_core_p (unsigned int *core, unsigned int bL_core) > { > - return strstr (bL_string, core[0]) != NULL > - && strstr (bL_string, core[1]) != NULL; > + return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core; > } > > -/* Return true iff ARR contains STR in one of its two elements. */ > > -static bool > -contains_string_p (const char** arr, const char* str) > +/* Returns the integer that is after ':' for the field. */ > +static unsigned parse_field (const char *field) > { > - bool res = false; > + const char *rest = strchr (field, ':'); > + char *after; > + unsigned fint = strtol (rest+1, &after, 16); > + if (after == rest+1) "rest + 1" in both cases. > + return -1; > + return fint; > +} > + > +/* Return true iff ARR contains CORE, in either of the two elements. */ > > - if (arr[0] != NULL) > +static bool > +contains_core_p (unsigned *arr, unsigned core) > +{ > + if (arr[0] != INVALID_CORE) > { > - res = strstr (arr[0], str) != NULL; > - if (res) > - return res; > + if (arr[0] == core) > + return true; > > - if (arr[1] != NULL) > - return strstr (arr[1], str) != NULL; > + if (arr[1] != INVALID_CORE) > + return arr[1] == core; > } > > return false; > @@ -146,10 +159,9 @@ host_detect_local_cpu (int argc, const char **argv) > bool cpu = false; > unsigned int i = 0; > unsigned int core_idx = 0; > - const char* imps[2] = { NULL, NULL }; > - const char* cores[2] = { NULL, NULL }; > + unsigned char imp = INVALID_IMP; > + unsigned int cores[2] = { INVALID_CORE, INVALID_CORE }; > unsigned int n_cores = 0; > - unsigned int n_imps = 0; > bool processed_exts = false; > const char *ext_string = ""; > > @@ -180,30 +192,28 @@ host_detect_local_cpu (int argc, const char **argv) > { > if (strstr (buf, "implementer") != NULL) > { > - for (i = 0; cpu_data[i].name != NULL; i++) > - if (strstr (buf, cpu_data[i].implementer_id) != NULL > - && !contains_string_p (imps, cpu_data[i].implementer_id)) > - { > - if (n_imps == 2) > - goto not_found; > - > - imps[n_imps++] = cpu_data[i].implementer_id; > - > - break; > - } > - continue; > + unsigned cimp = parse_field (buf); > + if (cimp == INVALID_IMP) > + goto not_found; > + > + if (imp == INVALID_IMP) > + imp = cimp; > + /* BIG.little implementers are always equal. */ See my comment above. This comment does not neccessarily hold. In addition, this needs updating for Kyrill's comments. Thanks, James
On Sat, Oct 15, 2016 at 07:38:40PM -0700, Andrew Pinski wrote: > On Wed, Nov 25, 2015 at 11:59 AM, Andrew Pinski <pinskia@gmail.com> wrote: > Here is finally an updated (fixed) patch (I did not implement the two > implementer big.LITTLE support yet, that will be for a different patch > since I also fixed the part no not being unique as a separate patch. > Once I get a new enough kernel, I will also look into doing the > /sys/cpu/* style detection first. > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions > (and tested hacking the location of the read in file to see if it > works with big.LITTLE and other formats of /proc/cpuinfo). I'm OK with this in principle, but it needs some polish for pedantic style comments... > * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are > integer constants. > * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change > implementer_id to unsigned char. > Change part_no to unsigned int. > (AARCH64_BIG_LITTLE): New define. > (INVALID_IMP): New define. > (INVALID_CORE): New define. > (cpu_data): Change the last element's implementer_id and part_no to integers. > (valid_bL_string_p): Rewrite to .. > (valid_bL_core_p): this for integers instead of strings. > (parse_field): New function. > (contains_string_p): Rewrite to ... > (contains_core_p): this for integers and only for the part_no. > (host_detect_local_cpu): Rewrite handling of implementation and part > num to be integers; > simplifying the code. > Index: config/aarch64/aarch64-cores.def > =================================================================== > --- config/aarch64/aarch64-cores.def (revision 241200) > +++ config/aarch64/aarch64-cores.def (working copy) > @@ -32,43 +32,46 @@ > FLAGS are the bitwise-or of the traits that apply to that core. > This need not include flags implied by the architecture. > COSTS is the name of the rtx_costs routine to use. > - IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can > - be found in /proc/cpuinfo. > + IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it > + can be found in /proc/cpuinfo. A partial list of implementer IDs is > + given in the ARM Architecture Reference Manual ARMv8, for > - in /proc/cpuinfo. For big.LITTLE systems this should have the form at of > - "<big core part number>.<LITTLE core part number>". */ > + in /proc/cpuinfo. For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE > + where the big part number comes as the first arugment to the macro and little is the > + second. */ Needs rewrapped for 80 char width. > > -static bool > -valid_bL_string_p (const char** core, const char* bL_string) > + static bool > +valid_bL_core_p (unsigned int *core, unsigned int bL_core) Stray space before static. > { > - return strstr (bL_string, core[0]) != NULL > - && strstr (bL_string, core[1]) != NULL; > + return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core > + || AARCH64_BIG_LITTLE (core[1], core[0]) == bL_core; > +} > + > +/* Returns the integer that is after ':' for the field. */ > +static unsigned parse_field (const char *field) parse_field should be on a new line, FIELD should be capitalised in the explanatory comment. OK with the appropriate changes to rectify these points. Thanks, James
On 21/10/16 14:59, James Greenhalgh wrote: > On Sat, Oct 15, 2016 at 07:38:40PM -0700, Andrew Pinski wrote: >> On Wed, Nov 25, 2015 at 11:59 AM, Andrew Pinski <pinskia@gmail.com> wrote: >> Here is finally an updated (fixed) patch (I did not implement the two >> implementer big.LITTLE support yet, that will be for a different patch >> since I also fixed the part no not being unique as a separate patch. >> Once I get a new enough kernel, I will also look into doing the >> /sys/cpu/* style detection first. >> >> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions >> (and tested hacking the location of the read in file to see if it >> works with big.LITTLE and other formats of /proc/cpuinfo). > > I'm OK with this in principle, but it needs some polish for pedantic > style comments... > >> * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are >> integer constants. >> * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change >> implementer_id to unsigned char. >> Change part_no to unsigned int. >> (AARCH64_BIG_LITTLE): New define. >> (INVALID_IMP): New define. >> (INVALID_CORE): New define. >> (cpu_data): Change the last element's implementer_id and part_no to integers. >> (valid_bL_string_p): Rewrite to .. >> (valid_bL_core_p): this for integers instead of strings. >> (parse_field): New function. >> (contains_string_p): Rewrite to ... >> (contains_core_p): this for integers and only for the part_no. >> (host_detect_local_cpu): Rewrite handling of implementation and part >> num to be integers; >> simplifying the code. > >> Index: config/aarch64/aarch64-cores.def >> =================================================================== >> --- config/aarch64/aarch64-cores.def (revision 241200) >> +++ config/aarch64/aarch64-cores.def (working copy) >> @@ -32,43 +32,46 @@ >> FLAGS are the bitwise-or of the traits that apply to that core. >> This need not include flags implied by the architecture. >> COSTS is the name of the rtx_costs routine to use. >> - IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can >> - be found in /proc/cpuinfo. >> + IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it >> + can be found in /proc/cpuinfo. A partial list of implementer IDs is >> + given in the ARM Architecture Reference Manual ARMv8, for >> - in /proc/cpuinfo. For big.LITTLE systems this should have the form at of >> - "<big core part number>.<LITTLE core part number>". */ >> + in /proc/cpuinfo. For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE >> + where the big part number comes as the first arugment to the macro and little is the >> + second. */ > > Needs rewrapped for 80 char width. > I don't think it's a good idea to line wrap the def files, some of them are processed with AWK during configure and having a complete entry per line avoids potential matching problems. R. >> >> -static bool >> -valid_bL_string_p (const char** core, const char* bL_string) >> + static bool >> +valid_bL_core_p (unsigned int *core, unsigned int bL_core) > > Stray space before static. > >> { >> - return strstr (bL_string, core[0]) != NULL >> - && strstr (bL_string, core[1]) != NULL; >> + return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core >> + || AARCH64_BIG_LITTLE (core[1], core[0]) == bL_core; >> +} >> + >> +/* Returns the integer that is after ':' for the field. */ >> +static unsigned parse_field (const char *field) > > parse_field should be on a new line, FIELD should be capitalised in the > explanatory comment. > > OK with the appropriate changes to rectify these points. > > Thanks, > James >
On Fri, Oct 21, 2016 at 04:57:22PM +0100, Richard Earnshaw (lists) wrote: > On 21/10/16 14:59, James Greenhalgh wrote: > > On Sat, Oct 15, 2016 at 07:38:40PM -0700, Andrew Pinski wrote: > >> On Wed, Nov 25, 2015 at 11:59 AM, Andrew Pinski <pinskia@gmail.com> wrote: > >> Here is finally an updated (fixed) patch (I did not implement the two > >> implementer big.LITTLE support yet, that will be for a different patch > >> since I also fixed the part no not being unique as a separate patch. > >> Once I get a new enough kernel, I will also look into doing the > >> /sys/cpu/* style detection first. > >> > >> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions > >> (and tested hacking the location of the read in file to see if it > >> works with big.LITTLE and other formats of /proc/cpuinfo). > > > > I'm OK with this in principle, but it needs some polish for pedantic > > style comments... > > > >> * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are > >> integer constants. > >> * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change > >> implementer_id to unsigned char. > >> Change part_no to unsigned int. > >> (AARCH64_BIG_LITTLE): New define. > >> (INVALID_IMP): New define. > >> (INVALID_CORE): New define. > >> (cpu_data): Change the last element's implementer_id and part_no to integers. > >> (valid_bL_string_p): Rewrite to .. > >> (valid_bL_core_p): this for integers instead of strings. > >> (parse_field): New function. > >> (contains_string_p): Rewrite to ... > >> (contains_core_p): this for integers and only for the part_no. > >> (host_detect_local_cpu): Rewrite handling of implementation and part > >> num to be integers; > >> simplifying the code. > > > >> Index: config/aarch64/aarch64-cores.def > >> =================================================================== > >> --- config/aarch64/aarch64-cores.def (revision 241200) > >> +++ config/aarch64/aarch64-cores.def (working copy) > >> @@ -32,43 +32,46 @@ > >> FLAGS are the bitwise-or of the traits that apply to that core. > >> This need not include flags implied by the architecture. > >> COSTS is the name of the rtx_costs routine to use. > >> - IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can > >> - be found in /proc/cpuinfo. > >> + IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it > >> + can be found in /proc/cpuinfo. A partial list of implementer IDs is > >> + given in the ARM Architecture Reference Manual ARMv8, for > >> - in /proc/cpuinfo. For big.LITTLE systems this should have the form at of > >> - "<big core part number>.<LITTLE core part number>". */ > >> + in /proc/cpuinfo. For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE > >> + where the big part number comes as the first arugment to the macro and little is the > >> + second. */ > > > > Needs rewrapped for 80 char width. > > > > I don't think it's a good idea to line wrap the def files, some of them > are processed with AWK during configure and having a complete entry per > line avoids potential matching problems. Agreed (and essential) for the entries themselves. This is just a comment that hangs over the end and should be fixed. While I'm here... > >> + where the big part number comes as the first arugment to the macro and little is the s/arugment/argument. Cheers, James
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 0b456f7..798f3e3 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -33,25 +33,26 @@ This need not include flags implied by the architecture. COSTS is the name of the rtx_costs routine to use. IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can - be found in /proc/cpuinfo. + be found in /proc/cpuinfo. There is a list in the ARM ARM. PART is the part number of the CPU. On a GNU/Linux system it can be found - in /proc/cpuinfo. For big.LITTLE systems this should have the form at of - "<big core part number>.<LITTLE core part number>". */ + in /proc/cpuinfo. For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE + where the big part number comes as the first arugment to the macro and little is the + second. */ /* V8 Architecture Processors. */ -AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03") -AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07") -AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08") -AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001") -AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800") -AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1") -AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000") +AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03) +AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07) +AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08) +AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001) +AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800) +AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a1) +AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000) /* V8 big.LITTLE implementations. */ -AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03") -AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03") +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03)) +AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03)) #undef AARCH64_CORE diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c index d03654c..92388a9 100644 --- a/gcc/config/aarch64/driver-aarch64.c +++ b/gcc/config/aarch64/driver-aarch64.c @@ -37,18 +37,23 @@ static struct arch_extension ext_to_feat_string[] = struct aarch64_core_data { const char* name; - const char* arch; - const char* implementer_id; - const char* part_no; + const char *arch; + unsigned char implementer_id; /* Exactly 8 bits */ + unsigned int part_no; /* 12 bits + 12 bits */ }; +#define AARCH64_BIG_LITTLE(BIG, LITTLE) \ + (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu)) +#define INVALID_IMP ((unsigned char) -1) +#define INVALID_CORE ((unsigned)-1) + #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ { CORE_NAME, #ARCH, IMP, PART }, static struct aarch64_core_data cpu_data [] = { #include "aarch64-cores.def" - { NULL, NULL, NULL, NULL } + { NULL, NULL, INVALID_IMP, INVALID_CORE } }; @@ -91,27 +96,35 @@ get_arch_name_from_id (const char* id) should return true. */ static bool -valid_bL_string_p (const char** core, const char* bL_string) +valid_bL_core_p (unsigned int *core, unsigned int bL_core) { - return strstr (bL_string, core[0]) != NULL - && strstr (bL_string, core[1]) != NULL; + return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core; } -/* Return true iff ARR contains STR in one of its two elements. */ -static bool -contains_string_p (const char** arr, const char* str) +/* Returns the integer that is after ':' for the field. */ +static unsigned parse_field (const char *field) { - bool res = false; + const char *rest = strchr (field, ':'); + char *after; + unsigned fint = strtol (rest+1, &after, 16); + if (after == rest+1) + return -1; + return fint; +} + +/* Return true iff ARR contains CORE, in either of the two elements. */ - if (arr[0] != NULL) +static bool +contains_core_p (unsigned *arr, unsigned core) +{ + if (arr[0] != INVALID_CORE) { - res = strstr (arr[0], str) != NULL; - if (res) - return res; + if (arr[0] == core) + return true; - if (arr[1] != NULL) - return strstr (arr[1], str) != NULL; + if (arr[1] != INVALID_CORE) + return arr[1] == core; } return false; @@ -146,10 +159,9 @@ host_detect_local_cpu (int argc, const char **argv) bool cpu = false; unsigned int i = 0; unsigned int core_idx = 0; - const char* imps[2] = { NULL, NULL }; - const char* cores[2] = { NULL, NULL }; + unsigned char imp = INVALID_IMP; + unsigned int cores[2] = { INVALID_CORE, INVALID_CORE }; unsigned int n_cores = 0; - unsigned int n_imps = 0; bool processed_exts = false; const char *ext_string = ""; @@ -180,30 +192,28 @@ host_detect_local_cpu (int argc, const char **argv) { if (strstr (buf, "implementer") != NULL) { - for (i = 0; cpu_data[i].name != NULL; i++) - if (strstr (buf, cpu_data[i].implementer_id) != NULL - && !contains_string_p (imps, cpu_data[i].implementer_id)) - { - if (n_imps == 2) - goto not_found; - - imps[n_imps++] = cpu_data[i].implementer_id; - - break; - } - continue; + unsigned cimp = parse_field (buf); + if (cimp == INVALID_IMP) + goto not_found; + + if (imp == INVALID_IMP) + imp = cimp; + /* BIG.little implementers are always equal. */ + else if (imp != cimp) + goto not_found; } if (strstr (buf, "part") != NULL) { + unsigned ccore = parse_field (buf); for (i = 0; cpu_data[i].name != NULL; i++) - if (strstr (buf, cpu_data[i].part_no) != NULL - && !contains_string_p (cores, cpu_data[i].part_no)) + if (ccore == cpu_data[i].part_no + && !contains_core_p (cores, ccore)) { if (n_cores == 2) goto not_found; - cores[n_cores++] = cpu_data[i].part_no; + cores[n_cores++] = ccore; core_idx = i; arch_id = cpu_data[i].arch; break; @@ -240,7 +250,7 @@ host_detect_local_cpu (int argc, const char **argv) f = NULL; /* Weird cpuinfo format that we don't know how to handle. */ - if (n_cores == 0 || n_cores > 2 || n_imps != 1) + if (n_cores == 0 || n_cores > 2 || imp == INVALID_IMP) goto not_found; if (arch && !arch_id) @@ -261,9 +271,8 @@ host_detect_local_cpu (int argc, const char **argv) { for (i = 0; cpu_data[i].name != NULL; i++) { - if (strchr (cpu_data[i].part_no, '.') != NULL - && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0 - && valid_bL_string_p (cores, cpu_data[i].part_no)) + if (cpu_data[i].implementer_id == imp + && valid_bL_core_p (cores, cpu_data[i].part_no)) { res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL); break; @@ -275,8 +284,7 @@ host_detect_local_cpu (int argc, const char **argv) /* The simple, non-big.LITTLE case. */ else { - if (strncmp (cpu_data[core_idx].implementer_id, imps[0], - strlen (imps[0]) - 1) != 0) + if (cpu_data[core_idx].implementer_id != imp) goto not_found; res = concat ("-m", cpu ? "cpu" : "tune", "=",