Message ID | 1403687030.3355.19.camel@localhost.localdomain |
---|---|
State | New |
Headers | show |
Edward, Thanks for your contribution. Please file a JIRA ticket at http://issues.apache.org/jira/browse/HADOOP and upload your patch there. Other reviewers/committers can take it from there and provide feedback. For more instructions, you can see http://wiki.apache.org/hadoop/HowToContribute. HTH +Vinod On Jun 25, 2014, at 2:03 AM, Edward Nevill <edward.nevill@linaro.org> wrote: > Hi, > > I would like to add support for hardware crc for ARM's new 64 bit architecture, aarch64. > > I would be grateful if some committer could help me though the process of getting this change pushed into the trunk. > > I have prepared an initial patch below. > > The patch is completely conditionalized on __arch64__ > > For the moment I have only done the non pipelined version as the hw I have only has 1 crc execute unit. > > Some initial benchmarks on terasort give > > sw crc: 107 sec > hw crc: 103 sec > > The performance improvement is quite small, but this is limited by the fact that I am using early stage hw which is not performant. > > I have also built it on x86 and I think the change is fairly safe for other architectures because post conditionalization the src is identical on other architectures. > > Thanks for you help, > Ed. > > > --- CUT HERE --- > Index: hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c > =================================================================== > --- hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c (revision 1605031) > +++ hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c (working copy) > @@ -38,7 +38,7 @@ > #include "bulk_crc32.h" > #include "gcc_optimizations.h" > > -#if (!defined(__FreeBSD__) && !defined(WINDOWS)) > +#if (!defined(__FreeBSD__) && !defined(WINDOWS)) && !defined(__aarch64__) > #define USE_PIPELINED > #endif > > @@ -672,8 +672,61 @@ > > # endif // 64-bit vs 32-bit > > -#else // end x86 architecture > +#elif defined(__aarch64__) // end x86 architecture > > +#include <sys/auxv.h> > +#include <asm/hwcap.h> > + > +#ifndef HWCAP_CRC32 > +#define HWCAP_CRC32 (1<<7) > +#endif > + > +/** > + * On library load, determine what sort of crc we are going to do > + * and set cached_cpu_supports_crc32 appropriately. > + */ > +void __attribute__ ((constructor)) init_cpu_support_flag(void) { > + unsigned long auxv = getauxval(AT_HWCAP); > + cached_cpu_supports_crc32 = auxv & HWCAP_CRC32; > +} > + > +#define CRC32X(crc,value) asm("crc32cx %w[c], %w[c], %x[v]" : [c]"+r"(crc) : [v]"r"(value)) > +#define CRC32W(crc,value) asm("crc32cw %w[c], %w[c], %w[v]" : [c]"+r"(crc) : [v]"r"(value)) > +#define CRC32H(crc,value) asm("crc32ch %w[c], %w[c], %w[v]" : [c]"+r"(crc) : [v]"r"(value)) > +#define CRC32B(crc,value) asm("crc32cb %w[c], %w[c], %w[v]" : [c]"+r"(crc) : [v]"r"(value)) > + > +/** > + * Hardware-accelerated CRC32C calculation using the 64-bit instructions. > + */ > +static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* p_buf, size_t length) { > + int64_t len = length; > + asm(".cpu generic+crc"); // Allow crc instructions in asm > + if ((len -= sizeof(uint64_t)) >= 0) { > + do { > + CRC32X(crc, *(uint64_t*)p_buf); > + p_buf += sizeof(uint64_t); > + } while ((len -= sizeof(uint64_t)) >= 0); > + } > + > + // The following is more efficient than the straight loop > + if (len & sizeof(uint32_t)) { > + CRC32W(crc, *(uint32_t*)p_buf); > + p_buf += sizeof(uint32_t); > + } > + if (len & sizeof(uint16_t)) { > + CRC32H(crc, *(uint16_t*)p_buf); > + p_buf += sizeof(uint16_t); > + } > + if (len & sizeof(uint8_t)) { > + CRC32B(crc, *p_buf); > + p_buf++; > + } > + > + return crc; > +} > + > +#else > + > static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length) { > // never called! > assert(0 && "hardware crc called on an unsupported platform"); > --- CUT HERE --- > >
=================================================================== --- hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c (revision 1605031) +++ hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c (working copy) @@ -38,7 +38,7 @@ #include "bulk_crc32.h" #include "gcc_optimizations.h" -#if (!defined(__FreeBSD__) && !defined(WINDOWS)) +#if (!defined(__FreeBSD__) && !defined(WINDOWS)) && !defined(__aarch64__) #define USE_PIPELINED #endif @@ -672,8 +672,61 @@ # endif // 64-bit vs 32-bit -#else // end x86 architecture +#elif defined(__aarch64__) // end x86 architecture +#include <sys/auxv.h> +#include <asm/hwcap.h> + +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1<<7) +#endif + +/** + * On library load, determine what sort of crc we are going to do + * and set cached_cpu_supports_crc32 appropriately. + */ +void __attribute__ ((constructor)) init_cpu_support_flag(void) { + unsigned long auxv = getauxval(AT_HWCAP); + cached_cpu_supports_crc32 = auxv & HWCAP_CRC32; +} + +#define CRC32X(crc,value) asm("crc32cx %w[c], %w[c], %x[v]" : [c]"+r"(crc) : [v]"r"(value)) +#define CRC32W(crc,value) asm("crc32cw %w[c], %w[c], %w[v]" : [c]"+r"(crc) : [v]"r"(value)) +#define CRC32H(crc,value) asm("crc32ch %w[c], %w[c], %w[v]" : [c]"+r"(crc) : [v]"r"(value)) +#define CRC32B(crc,value) asm("crc32cb %w[c], %w[c], %w[v]" : [c]"+r"(crc) : [v]"r"(value)) + +/** + * Hardware-accelerated CRC32C calculation using the 64-bit instructions. + */ +static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* p_buf, size_t length) { + int64_t len = length; + asm(".cpu generic+crc"); // Allow crc instructions in asm + if ((len -= sizeof(uint64_t)) >= 0) { + do { + CRC32X(crc, *(uint64_t*)p_buf); + p_buf += sizeof(uint64_t); + } while ((len -= sizeof(uint64_t)) >= 0); + } + + // The following is more efficient than the straight loop + if (len & sizeof(uint32_t)) { + CRC32W(crc, *(uint32_t*)p_buf); + p_buf += sizeof(uint32_t); + } + if (len & sizeof(uint16_t)) { + CRC32H(crc, *(uint16_t*)p_buf); + p_buf += sizeof(uint16_t); + } + if (len & sizeof(uint8_t)) { + CRC32B(crc, *p_buf); + p_buf++; + } + + return crc; +} + +#else + static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length) { // never called! assert(0 && "hardware crc called on an unsupported platform"); --- CUT HERE ---