Message ID | 1463056577-31920-5-git-send-email-matias.elo@nokia.com |
---|---|
State | New |
Headers | show |
On Thu, May 12, 2016 at 7:36 AM, Matias Elo <matias.elo@nokia.com> wrote: > Using memset to initialize struct odp_packet_hdr_t contents > to 0 has a significant overhead. Instead, initialize only > the required members of the struct. > > Signed-off-by: Matias Elo <matias.elo@nokia.com> > --- > platform/linux-generic/include/odp_packet_internal.h | 8 +++++--- > platform/linux-generic/odp_packet.c | 18 > ++++++------------ > 2 files changed, 11 insertions(+), 15 deletions(-) > > diff --git a/platform/linux-generic/include/odp_packet_internal.h > b/platform/linux-generic/include/odp_packet_internal.h > index 2a12503..cdee1e1 100644 > --- a/platform/linux-generic/include/odp_packet_internal.h > +++ b/platform/linux-generic/include/odp_packet_internal.h > @@ -143,15 +143,17 @@ typedef struct { > uint32_t l4_offset; /**< offset to L4 hdr (TCP, UDP, SCTP, also > ICMP) */ > uint32_t payload_offset; /**< offset to payload */ > > - uint32_t l3_len; /**< Layer 3 length */ > - uint32_t l4_len; /**< Layer 4 length */ > - > uint32_t frame_len; > uint32_t headroom; > uint32_t tailroom; > > odp_pktio_t input; > > + /* Values below are not initialized by packet_init() */ > + > + uint32_t l3_len; /**< Layer 3 length */ > + uint32_t l4_len; /**< Layer 4 length */ > + > uint32_t flow_hash; /**< Flow hash value */ > odp_time_t timestamp; /**< Timestamp value */ > > diff --git a/platform/linux-generic/odp_packet.c > b/platform/linux-generic/odp_packet.c > index 436265e..f5f224d 100644 > --- a/platform/linux-generic/odp_packet.c > +++ b/platform/linux-generic/odp_packet.c > @@ -50,23 +50,17 @@ void packet_parse_reset(odp_packet_hdr_t *pkt_hdr) > static void packet_init(pool_entry_t *pool, odp_packet_hdr_t *pkt_hdr, > size_t size, int parse) > { > - /* > - * Reset parser metadata. Note that we clear via memset to make > - * this routine indepenent of any additional adds to packet > metadata. > - */ > - const size_t start_offset = ODP_FIELD_SIZEOF(odp_packet_hdr_t, > buf_hdr); > - uint8_t *start; > - size_t len; > + pkt_hdr->input_flags.all = 0; > + pkt_hdr->output_flags.all = 0; > + pkt_hdr->error_flags.all = 0; > > - start = (uint8_t *)pkt_hdr + start_offset; > - len = sizeof(odp_packet_hdr_t) - start_offset; > - memset(start, 0, len); > The reason for this memset is to "future proof" the header by allowing any new fields that are added to get initialized to a known value without having to update this routine. Is the impact of this single memset() all that large? I can understand wanting to compress the header to possibly avoid another cache line reference but this optimization seems inherently error-prone. I'm also wondering if this might confuse tools like Coverity which seems to be paranoid about uninitialized variables. > - > - /* Set metadata items that initialize to non-zero values */ > + pkt_hdr->l2_offset = 0; > pkt_hdr->l3_offset = ODP_PACKET_OFFSET_INVALID; > pkt_hdr->l4_offset = ODP_PACKET_OFFSET_INVALID; > pkt_hdr->payload_offset = ODP_PACKET_OFFSET_INVALID; > > + pkt_hdr->input = ODP_PKTIO_INVALID; > + > /* Disable lazy parsing on user allocated packets */ > if (!parse) > packet_parse_disable(pkt_hdr); > -- > 1.9.1 > > _______________________________________________ > lng-odp mailing list > lng-odp@lists.linaro.org > https://lists.linaro.org/mailman/listinfo/lng-odp >
On 13 May 2016 at 03:29, Elo, Matias (Nokia - FI/Espoo) < matias.elo@nokia.com> wrote: > > > > > *From:* EXT Bill Fischofer [mailto:bill.fischofer@linaro.org] > *Sent:* Thursday, May 12, 2016 10:34 PM > *To:* Elo, Matias (Nokia - FI/Espoo) <matias.elo@nokia.com> > *Cc:* LNG ODP Mailman List <lng-odp@lists.linaro.org> > *Subject:* Re: [lng-odp] [PATCH 4/4] linux-generic: packet: initialize > only selected odp_packet_hdr_t members > > > > > > > > On Thu, May 12, 2016 at 7:36 AM, Matias Elo <matias.elo@nokia.com> wrote: > > Using memset to initialize struct odp_packet_hdr_t contents > to 0 has a significant overhead. Instead, initialize only > the required members of the struct. > > Signed-off-by: Matias Elo <matias.elo@nokia.com> > --- > platform/linux-generic/include/odp_packet_internal.h | 8 +++++--- > platform/linux-generic/odp_packet.c | 18 > ++++++------------ > 2 files changed, 11 insertions(+), 15 deletions(-) > > diff --git a/platform/linux-generic/include/odp_packet_internal.h > b/platform/linux-generic/include/odp_packet_internal.h > index 2a12503..cdee1e1 100644 > --- a/platform/linux-generic/include/odp_packet_internal.h > +++ b/platform/linux-generic/include/odp_packet_internal.h > @@ -143,15 +143,17 @@ typedef struct { > uint32_t l4_offset; /**< offset to L4 hdr (TCP, UDP, SCTP, also > ICMP) */ > uint32_t payload_offset; /**< offset to payload */ > > - uint32_t l3_len; /**< Layer 3 length */ > - uint32_t l4_len; /**< Layer 4 length */ > - > uint32_t frame_len; > uint32_t headroom; > uint32_t tailroom; > > odp_pktio_t input; > > + /* Values below are not initialized by packet_init() */ > + > + uint32_t l3_len; /**< Layer 3 length */ > + uint32_t l4_len; /**< Layer 4 length */ > + > uint32_t flow_hash; /**< Flow hash value */ > odp_time_t timestamp; /**< Timestamp value */ > > diff --git a/platform/linux-generic/odp_packet.c > b/platform/linux-generic/odp_packet.c > index 436265e..f5f224d 100644 > --- a/platform/linux-generic/odp_packet.c > +++ b/platform/linux-generic/odp_packet.c > @@ -50,23 +50,17 @@ void packet_parse_reset(odp_packet_hdr_t *pkt_hdr) > static void packet_init(pool_entry_t *pool, odp_packet_hdr_t *pkt_hdr, > size_t size, int parse) > { > - /* > - * Reset parser metadata. Note that we clear via memset to make > - * this routine indepenent of any additional adds to packet > metadata. > - */ > - const size_t start_offset = ODP_FIELD_SIZEOF(odp_packet_hdr_t, > buf_hdr); > - uint8_t *start; > - size_t len; > + pkt_hdr->input_flags.all = 0; > + pkt_hdr->output_flags.all = 0; > + pkt_hdr->error_flags.all = 0; > > - start = (uint8_t *)pkt_hdr + start_offset; > - len = sizeof(odp_packet_hdr_t) - start_offset; > - memset(start, 0, len); > > > > The reason for this memset is to "future proof" the header by allowing any > new fields that are added to get initialized to a known value without > having to update this routine. Is the impact of this single memset() all > that large? I can understand wanting to compress the header to possibly > avoid another cache line reference but this optimization seems inherently > error-prone. I'm also wondering if this might confuse tools like Coverity > which seems to be paranoid about uninitialized variables. > > > > Hi Bill, > > > > I can understand the future proofing. However, this single memset() is > currently the largest cycle hog when forwarding small packets. Below are > some throughput numbers: > > > > odp_l2fwd -i p1p1 -c 1 -a 0 (netmap pktio): > > > > Niantic (1x10Gbps): 7.9 vs 8.6 Mpps (64B) => 8% improvement > > Fortville (1x40Gbps): 7.9 vs 8.8 Mpps (64B) => 11% improvement > > > > When the performance gain is this big I would pay the price of being more > error-prone. Is there a way to run Coverity before merging the patch? > Not easily until we resolve the licence/number of coverty runs per week issue - on going :/ Coverty might be ok as long as we dont use the field for anything, and for 11% I am happy to mark it a false positive in coverty if it is flagged Can we mitigate some of the problems with a doxygen warning on this struct about its behaviour ? > > > -Matias > > > > - > - /* Set metadata items that initialize to non-zero values */ > + pkt_hdr->l2_offset = 0; > pkt_hdr->l3_offset = ODP_PACKET_OFFSET_INVALID; > pkt_hdr->l4_offset = ODP_PACKET_OFFSET_INVALID; > pkt_hdr->payload_offset = ODP_PACKET_OFFSET_INVALID; > > + pkt_hdr->input = ODP_PKTIO_INVALID; > + > /* Disable lazy parsing on user allocated packets */ > if (!parse) > packet_parse_disable(pkt_hdr); > -- > 1.9.1 > > _______________________________________________ > lng-odp mailing list > lng-odp@lists.linaro.org > https://lists.linaro.org/mailman/listinfo/lng-odp > > > > _______________________________________________ > lng-odp mailing list > lng-odp@lists.linaro.org > https://lists.linaro.org/mailman/listinfo/lng-odp > > -- Mike Holmes Technical Manager - Linaro Networking Group Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM SoCs "Work should be fun and collaborative, the rest follows"
I agree we can't afford to ignore a major performance gain, however this is an area that should be documented in the code. Something to the effect that any new fields added must be reviewed for initialization requirements. On Fri, May 13, 2016 at 9:46 AM, Mike Holmes <mike.holmes@linaro.org> wrote: > > > On 13 May 2016 at 03:29, Elo, Matias (Nokia - FI/Espoo) < > matias.elo@nokia.com> wrote: > >> >> >> >> >> *From:* EXT Bill Fischofer [mailto:bill.fischofer@linaro.org] >> *Sent:* Thursday, May 12, 2016 10:34 PM >> *To:* Elo, Matias (Nokia - FI/Espoo) <matias.elo@nokia.com> >> *Cc:* LNG ODP Mailman List <lng-odp@lists.linaro.org> >> *Subject:* Re: [lng-odp] [PATCH 4/4] linux-generic: packet: initialize >> only selected odp_packet_hdr_t members >> >> >> >> >> >> >> >> On Thu, May 12, 2016 at 7:36 AM, Matias Elo <matias.elo@nokia.com> wrote: >> >> Using memset to initialize struct odp_packet_hdr_t contents >> to 0 has a significant overhead. Instead, initialize only >> the required members of the struct. >> >> Signed-off-by: Matias Elo <matias.elo@nokia.com> >> --- >> platform/linux-generic/include/odp_packet_internal.h | 8 +++++--- >> platform/linux-generic/odp_packet.c | 18 >> ++++++------------ >> 2 files changed, 11 insertions(+), 15 deletions(-) >> >> diff --git a/platform/linux-generic/include/odp_packet_internal.h >> b/platform/linux-generic/include/odp_packet_internal.h >> index 2a12503..cdee1e1 100644 >> --- a/platform/linux-generic/include/odp_packet_internal.h >> +++ b/platform/linux-generic/include/odp_packet_internal.h >> @@ -143,15 +143,17 @@ typedef struct { >> uint32_t l4_offset; /**< offset to L4 hdr (TCP, UDP, SCTP, also >> ICMP) */ >> uint32_t payload_offset; /**< offset to payload */ >> >> - uint32_t l3_len; /**< Layer 3 length */ >> - uint32_t l4_len; /**< Layer 4 length */ >> - >> uint32_t frame_len; >> uint32_t headroom; >> uint32_t tailroom; >> >> odp_pktio_t input; >> >> + /* Values below are not initialized by packet_init() */ >> + >> + uint32_t l3_len; /**< Layer 3 length */ >> + uint32_t l4_len; /**< Layer 4 length */ >> + >> uint32_t flow_hash; /**< Flow hash value */ >> odp_time_t timestamp; /**< Timestamp value */ >> >> diff --git a/platform/linux-generic/odp_packet.c >> b/platform/linux-generic/odp_packet.c >> index 436265e..f5f224d 100644 >> --- a/platform/linux-generic/odp_packet.c >> +++ b/platform/linux-generic/odp_packet.c >> @@ -50,23 +50,17 @@ void packet_parse_reset(odp_packet_hdr_t *pkt_hdr) >> static void packet_init(pool_entry_t *pool, odp_packet_hdr_t *pkt_hdr, >> size_t size, int parse) >> { >> - /* >> - * Reset parser metadata. Note that we clear via memset to make >> - * this routine indepenent of any additional adds to packet >> metadata. >> - */ >> - const size_t start_offset = ODP_FIELD_SIZEOF(odp_packet_hdr_t, >> buf_hdr); >> - uint8_t *start; >> - size_t len; >> + pkt_hdr->input_flags.all = 0; >> + pkt_hdr->output_flags.all = 0; >> + pkt_hdr->error_flags.all = 0; >> >> - start = (uint8_t *)pkt_hdr + start_offset; >> - len = sizeof(odp_packet_hdr_t) - start_offset; >> - memset(start, 0, len); >> >> >> >> The reason for this memset is to "future proof" the header by allowing >> any new fields that are added to get initialized to a known value without >> having to update this routine. Is the impact of this single memset() all >> that large? I can understand wanting to compress the header to possibly >> avoid another cache line reference but this optimization seems inherently >> error-prone. I'm also wondering if this might confuse tools like Coverity >> which seems to be paranoid about uninitialized variables. >> >> >> >> Hi Bill, >> >> >> >> I can understand the future proofing. However, this single memset() is >> currently the largest cycle hog when forwarding small packets. Below are >> some throughput numbers: >> >> >> >> odp_l2fwd -i p1p1 -c 1 -a 0 (netmap pktio): >> >> >> >> Niantic (1x10Gbps): 7.9 vs 8.6 Mpps (64B) => 8% improvement >> >> Fortville (1x40Gbps): 7.9 vs 8.8 Mpps (64B) => 11% improvement >> >> >> >> When the performance gain is this big I would pay the price of being more >> error-prone. Is there a way to run Coverity before merging the patch? >> > > Not easily until we resolve the licence/number of coverty runs per week > issue - on going :/ > Coverty might be ok as long as we dont use the field for anything, and > for 11% I am happy to mark it a false positive in coverty if it is flagged > Can we mitigate some of the problems with a doxygen warning on this struct > about its behaviour ? > > > >> >> >> -Matias >> >> >> >> - >> - /* Set metadata items that initialize to non-zero values */ >> + pkt_hdr->l2_offset = 0; >> pkt_hdr->l3_offset = ODP_PACKET_OFFSET_INVALID; >> pkt_hdr->l4_offset = ODP_PACKET_OFFSET_INVALID; >> pkt_hdr->payload_offset = ODP_PACKET_OFFSET_INVALID; >> >> + pkt_hdr->input = ODP_PKTIO_INVALID; >> + >> /* Disable lazy parsing on user allocated packets */ >> if (!parse) >> packet_parse_disable(pkt_hdr); >> -- >> 1.9.1 >> >> _______________________________________________ >> lng-odp mailing list >> lng-odp@lists.linaro.org >> https://lists.linaro.org/mailman/listinfo/lng-odp >> >> >> >> _______________________________________________ >> lng-odp mailing list >> lng-odp@lists.linaro.org >> https://lists.linaro.org/mailman/listinfo/lng-odp >> >> > > > -- > Mike Holmes > Technical Manager - Linaro Networking Group > Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM SoCs > "Work should be fun and collaborative, the rest follows" > > >
diff --git a/platform/linux-generic/include/odp_packet_internal.h b/platform/linux-generic/include/odp_packet_internal.h index 2a12503..cdee1e1 100644 --- a/platform/linux-generic/include/odp_packet_internal.h +++ b/platform/linux-generic/include/odp_packet_internal.h @@ -143,15 +143,17 @@ typedef struct { uint32_t l4_offset; /**< offset to L4 hdr (TCP, UDP, SCTP, also ICMP) */ uint32_t payload_offset; /**< offset to payload */ - uint32_t l3_len; /**< Layer 3 length */ - uint32_t l4_len; /**< Layer 4 length */ - uint32_t frame_len; uint32_t headroom; uint32_t tailroom; odp_pktio_t input; + /* Values below are not initialized by packet_init() */ + + uint32_t l3_len; /**< Layer 3 length */ + uint32_t l4_len; /**< Layer 4 length */ + uint32_t flow_hash; /**< Flow hash value */ odp_time_t timestamp; /**< Timestamp value */ diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c index 436265e..f5f224d 100644 --- a/platform/linux-generic/odp_packet.c +++ b/platform/linux-generic/odp_packet.c @@ -50,23 +50,17 @@ void packet_parse_reset(odp_packet_hdr_t *pkt_hdr) static void packet_init(pool_entry_t *pool, odp_packet_hdr_t *pkt_hdr, size_t size, int parse) { - /* - * Reset parser metadata. Note that we clear via memset to make - * this routine indepenent of any additional adds to packet metadata. - */ - const size_t start_offset = ODP_FIELD_SIZEOF(odp_packet_hdr_t, buf_hdr); - uint8_t *start; - size_t len; + pkt_hdr->input_flags.all = 0; + pkt_hdr->output_flags.all = 0; + pkt_hdr->error_flags.all = 0; - start = (uint8_t *)pkt_hdr + start_offset; - len = sizeof(odp_packet_hdr_t) - start_offset; - memset(start, 0, len); - - /* Set metadata items that initialize to non-zero values */ + pkt_hdr->l2_offset = 0; pkt_hdr->l3_offset = ODP_PACKET_OFFSET_INVALID; pkt_hdr->l4_offset = ODP_PACKET_OFFSET_INVALID; pkt_hdr->payload_offset = ODP_PACKET_OFFSET_INVALID; + pkt_hdr->input = ODP_PKTIO_INVALID; + /* Disable lazy parsing on user allocated packets */ if (!parse) packet_parse_disable(pkt_hdr);
Using memset to initialize struct odp_packet_hdr_t contents to 0 has a significant overhead. Instead, initialize only the required members of the struct. Signed-off-by: Matias Elo <matias.elo@nokia.com> --- platform/linux-generic/include/odp_packet_internal.h | 8 +++++--- platform/linux-generic/odp_packet.c | 18 ++++++------------ 2 files changed, 11 insertions(+), 15 deletions(-)