diff mbox

[4/4] linux-generic: packet: initialize only selected odp_packet_hdr_t members

Message ID 1463056577-31920-5-git-send-email-matias.elo@nokia.com
State New
Headers show

Commit Message

Elo, Matias (Nokia - FI/Espoo) May 12, 2016, 12:36 p.m. UTC
Using memset to initialize struct odp_packet_hdr_t contents
to 0 has a significant overhead. Instead, initialize only
the required members of the struct.

Signed-off-by: Matias Elo <matias.elo@nokia.com>
---
 platform/linux-generic/include/odp_packet_internal.h |  8 +++++---
 platform/linux-generic/odp_packet.c                  | 18 ++++++------------
 2 files changed, 11 insertions(+), 15 deletions(-)

Comments

Bill Fischofer May 12, 2016, 7:33 p.m. UTC | #1
On Thu, May 12, 2016 at 7:36 AM, Matias Elo <matias.elo@nokia.com> wrote:

> Using memset to initialize struct odp_packet_hdr_t contents

> to 0 has a significant overhead. Instead, initialize only

> the required members of the struct.

>

> Signed-off-by: Matias Elo <matias.elo@nokia.com>

> ---

>  platform/linux-generic/include/odp_packet_internal.h |  8 +++++---

>  platform/linux-generic/odp_packet.c                  | 18

> ++++++------------

>  2 files changed, 11 insertions(+), 15 deletions(-)

>

> diff --git a/platform/linux-generic/include/odp_packet_internal.h

> b/platform/linux-generic/include/odp_packet_internal.h

> index 2a12503..cdee1e1 100644

> --- a/platform/linux-generic/include/odp_packet_internal.h

> +++ b/platform/linux-generic/include/odp_packet_internal.h

> @@ -143,15 +143,17 @@ typedef struct {

>         uint32_t l4_offset; /**< offset to L4 hdr (TCP, UDP, SCTP, also

> ICMP) */

>         uint32_t payload_offset; /**< offset to payload */

>

> -       uint32_t l3_len;         /**< Layer 3 length */

> -       uint32_t l4_len;         /**< Layer 4 length */

> -

>         uint32_t frame_len;

>         uint32_t headroom;

>         uint32_t tailroom;

>

>         odp_pktio_t input;

>

> +       /* Values below are not initialized by packet_init() */

> +

> +       uint32_t l3_len;         /**< Layer 3 length */

> +       uint32_t l4_len;         /**< Layer 4 length */

> +

>         uint32_t flow_hash;      /**< Flow hash value */

>         odp_time_t timestamp;    /**< Timestamp value */

>

> diff --git a/platform/linux-generic/odp_packet.c

> b/platform/linux-generic/odp_packet.c

> index 436265e..f5f224d 100644

> --- a/platform/linux-generic/odp_packet.c

> +++ b/platform/linux-generic/odp_packet.c

> @@ -50,23 +50,17 @@ void packet_parse_reset(odp_packet_hdr_t *pkt_hdr)

>  static void packet_init(pool_entry_t *pool, odp_packet_hdr_t *pkt_hdr,

>                         size_t size, int parse)

>  {

> -       /*

> -       * Reset parser metadata.  Note that we clear via memset to make

> -       * this routine indepenent of any additional adds to packet

> metadata.

> -       */

> -       const size_t start_offset = ODP_FIELD_SIZEOF(odp_packet_hdr_t,

> buf_hdr);

> -       uint8_t *start;

> -       size_t len;

> +       pkt_hdr->input_flags.all  = 0;

> +       pkt_hdr->output_flags.all = 0;

> +       pkt_hdr->error_flags.all  = 0;

>

> -       start = (uint8_t *)pkt_hdr + start_offset;

> -       len = sizeof(odp_packet_hdr_t) - start_offset;

> -       memset(start, 0, len);

>


The reason for this memset is to "future proof" the header by allowing any
new fields that are added to get initialized to a known value without
having to update this routine. Is the impact of this single memset() all
that large?  I can understand wanting to compress the header to possibly
avoid another cache line reference but this optimization seems inherently
error-prone. I'm also wondering if this might confuse tools like Coverity
which seems to be paranoid about uninitialized variables.


> -

> -       /* Set metadata items that initialize to non-zero values */

> +       pkt_hdr->l2_offset = 0;

>         pkt_hdr->l3_offset = ODP_PACKET_OFFSET_INVALID;

>         pkt_hdr->l4_offset = ODP_PACKET_OFFSET_INVALID;

>         pkt_hdr->payload_offset = ODP_PACKET_OFFSET_INVALID;

>

> +       pkt_hdr->input = ODP_PKTIO_INVALID;

> +

>         /* Disable lazy parsing on user allocated packets */

>         if (!parse)

>                 packet_parse_disable(pkt_hdr);

> --

> 1.9.1

>

> _______________________________________________

> lng-odp mailing list

> lng-odp@lists.linaro.org

> https://lists.linaro.org/mailman/listinfo/lng-odp

>
Mike Holmes May 13, 2016, 2:46 p.m. UTC | #2
On 13 May 2016 at 03:29, Elo, Matias (Nokia - FI/Espoo) <
matias.elo@nokia.com> wrote:

>

>

>

>

> *From:* EXT Bill Fischofer [mailto:bill.fischofer@linaro.org]

> *Sent:* Thursday, May 12, 2016 10:34 PM

> *To:* Elo, Matias (Nokia - FI/Espoo) <matias.elo@nokia.com>

> *Cc:* LNG ODP Mailman List <lng-odp@lists.linaro.org>

> *Subject:* Re: [lng-odp] [PATCH 4/4] linux-generic: packet: initialize

> only selected odp_packet_hdr_t members

>

>

>

>

>

>

>

> On Thu, May 12, 2016 at 7:36 AM, Matias Elo <matias.elo@nokia.com> wrote:

>

> Using memset to initialize struct odp_packet_hdr_t contents

> to 0 has a significant overhead. Instead, initialize only

> the required members of the struct.

>

> Signed-off-by: Matias Elo <matias.elo@nokia.com>

> ---

>  platform/linux-generic/include/odp_packet_internal.h |  8 +++++---

>  platform/linux-generic/odp_packet.c                  | 18

> ++++++------------

>  2 files changed, 11 insertions(+), 15 deletions(-)

>

> diff --git a/platform/linux-generic/include/odp_packet_internal.h

> b/platform/linux-generic/include/odp_packet_internal.h

> index 2a12503..cdee1e1 100644

> --- a/platform/linux-generic/include/odp_packet_internal.h

> +++ b/platform/linux-generic/include/odp_packet_internal.h

> @@ -143,15 +143,17 @@ typedef struct {

>         uint32_t l4_offset; /**< offset to L4 hdr (TCP, UDP, SCTP, also

> ICMP) */

>         uint32_t payload_offset; /**< offset to payload */

>

> -       uint32_t l3_len;         /**< Layer 3 length */

> -       uint32_t l4_len;         /**< Layer 4 length */

> -

>         uint32_t frame_len;

>         uint32_t headroom;

>         uint32_t tailroom;

>

>         odp_pktio_t input;

>

> +       /* Values below are not initialized by packet_init() */

> +

> +       uint32_t l3_len;         /**< Layer 3 length */

> +       uint32_t l4_len;         /**< Layer 4 length */

> +

>         uint32_t flow_hash;      /**< Flow hash value */

>         odp_time_t timestamp;    /**< Timestamp value */

>

> diff --git a/platform/linux-generic/odp_packet.c

> b/platform/linux-generic/odp_packet.c

> index 436265e..f5f224d 100644

> --- a/platform/linux-generic/odp_packet.c

> +++ b/platform/linux-generic/odp_packet.c

> @@ -50,23 +50,17 @@ void packet_parse_reset(odp_packet_hdr_t *pkt_hdr)

>  static void packet_init(pool_entry_t *pool, odp_packet_hdr_t *pkt_hdr,

>                         size_t size, int parse)

>  {

> -       /*

> -       * Reset parser metadata.  Note that we clear via memset to make

> -       * this routine indepenent of any additional adds to packet

> metadata.

> -       */

> -       const size_t start_offset = ODP_FIELD_SIZEOF(odp_packet_hdr_t,

> buf_hdr);

> -       uint8_t *start;

> -       size_t len;

> +       pkt_hdr->input_flags.all  = 0;

> +       pkt_hdr->output_flags.all = 0;

> +       pkt_hdr->error_flags.all  = 0;

>

> -       start = (uint8_t *)pkt_hdr + start_offset;

> -       len = sizeof(odp_packet_hdr_t) - start_offset;

> -       memset(start, 0, len);

>

>

>

> The reason for this memset is to "future proof" the header by allowing any

> new fields that are added to get initialized to a known value without

> having to update this routine. Is the impact of this single memset() all

> that large?  I can understand wanting to compress the header to possibly

> avoid another cache line reference but this optimization seems inherently

> error-prone. I'm also wondering if this might confuse tools like Coverity

> which seems to be paranoid about uninitialized variables.

>

>

>

> Hi Bill,

>

>

>

> I can understand the future proofing. However, this single memset() is

> currently the largest cycle hog when forwarding small packets. Below are

> some throughput numbers:

>

>

>

> odp_l2fwd -i p1p1 -c 1 -a 0 (netmap pktio):

>

>

>

> Niantic (1x10Gbps):    7.9 vs 8.6 Mpps (64B) => 8% improvement

>

> Fortville (1x40Gbps):  7.9 vs 8.8 Mpps (64B) => 11% improvement

>

>

>

> When the performance gain is this big I would pay the price of being more

> error-prone. Is there a way to run Coverity before merging the patch?

>


Not easily until we resolve the licence/number of coverty runs per week
issue - on going :/
Coverty might be ok as long as we dont use the field for anything, and  for
11% I am happy to mark it a false positive in coverty if it is flagged
Can we mitigate some of the problems with a doxygen warning on this struct
about its behaviour ?



>

>

> -Matias

>

>

>

> -

> -       /* Set metadata items that initialize to non-zero values */

> +       pkt_hdr->l2_offset = 0;

>         pkt_hdr->l3_offset = ODP_PACKET_OFFSET_INVALID;

>         pkt_hdr->l4_offset = ODP_PACKET_OFFSET_INVALID;

>         pkt_hdr->payload_offset = ODP_PACKET_OFFSET_INVALID;

>

> +       pkt_hdr->input = ODP_PKTIO_INVALID;

> +

>         /* Disable lazy parsing on user allocated packets */

>         if (!parse)

>                 packet_parse_disable(pkt_hdr);

> --

> 1.9.1

>

> _______________________________________________

> lng-odp mailing list

> lng-odp@lists.linaro.org

> https://lists.linaro.org/mailman/listinfo/lng-odp

>

>

>

> _______________________________________________

> lng-odp mailing list

> lng-odp@lists.linaro.org

> https://lists.linaro.org/mailman/listinfo/lng-odp

>

>



-- 
Mike Holmes
Technical Manager - Linaro Networking Group
Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM SoCs
"Work should be fun and collaborative, the rest follows"
Bill Fischofer May 14, 2016, 5:44 p.m. UTC | #3
I agree we can't afford to ignore a major performance gain, however this is
an area that should be documented in the code. Something to the effect that
any new fields added must be reviewed for initialization requirements.

On Fri, May 13, 2016 at 9:46 AM, Mike Holmes <mike.holmes@linaro.org> wrote:

>

>

> On 13 May 2016 at 03:29, Elo, Matias (Nokia - FI/Espoo) <

> matias.elo@nokia.com> wrote:

>

>>

>>

>>

>>

>> *From:* EXT Bill Fischofer [mailto:bill.fischofer@linaro.org]

>> *Sent:* Thursday, May 12, 2016 10:34 PM

>> *To:* Elo, Matias (Nokia - FI/Espoo) <matias.elo@nokia.com>

>> *Cc:* LNG ODP Mailman List <lng-odp@lists.linaro.org>

>> *Subject:* Re: [lng-odp] [PATCH 4/4] linux-generic: packet: initialize

>> only selected odp_packet_hdr_t members

>>

>>

>>

>>

>>

>>

>>

>> On Thu, May 12, 2016 at 7:36 AM, Matias Elo <matias.elo@nokia.com> wrote:

>>

>> Using memset to initialize struct odp_packet_hdr_t contents

>> to 0 has a significant overhead. Instead, initialize only

>> the required members of the struct.

>>

>> Signed-off-by: Matias Elo <matias.elo@nokia.com>

>> ---

>>  platform/linux-generic/include/odp_packet_internal.h |  8 +++++---

>>  platform/linux-generic/odp_packet.c                  | 18

>> ++++++------------

>>  2 files changed, 11 insertions(+), 15 deletions(-)

>>

>> diff --git a/platform/linux-generic/include/odp_packet_internal.h

>> b/platform/linux-generic/include/odp_packet_internal.h

>> index 2a12503..cdee1e1 100644

>> --- a/platform/linux-generic/include/odp_packet_internal.h

>> +++ b/platform/linux-generic/include/odp_packet_internal.h

>> @@ -143,15 +143,17 @@ typedef struct {

>>         uint32_t l4_offset; /**< offset to L4 hdr (TCP, UDP, SCTP, also

>> ICMP) */

>>         uint32_t payload_offset; /**< offset to payload */

>>

>> -       uint32_t l3_len;         /**< Layer 3 length */

>> -       uint32_t l4_len;         /**< Layer 4 length */

>> -

>>         uint32_t frame_len;

>>         uint32_t headroom;

>>         uint32_t tailroom;

>>

>>         odp_pktio_t input;

>>

>> +       /* Values below are not initialized by packet_init() */

>> +

>> +       uint32_t l3_len;         /**< Layer 3 length */

>> +       uint32_t l4_len;         /**< Layer 4 length */

>> +

>>         uint32_t flow_hash;      /**< Flow hash value */

>>         odp_time_t timestamp;    /**< Timestamp value */

>>

>> diff --git a/platform/linux-generic/odp_packet.c

>> b/platform/linux-generic/odp_packet.c

>> index 436265e..f5f224d 100644

>> --- a/platform/linux-generic/odp_packet.c

>> +++ b/platform/linux-generic/odp_packet.c

>> @@ -50,23 +50,17 @@ void packet_parse_reset(odp_packet_hdr_t *pkt_hdr)

>>  static void packet_init(pool_entry_t *pool, odp_packet_hdr_t *pkt_hdr,

>>                         size_t size, int parse)

>>  {

>> -       /*

>> -       * Reset parser metadata.  Note that we clear via memset to make

>> -       * this routine indepenent of any additional adds to packet

>> metadata.

>> -       */

>> -       const size_t start_offset = ODP_FIELD_SIZEOF(odp_packet_hdr_t,

>> buf_hdr);

>> -       uint8_t *start;

>> -       size_t len;

>> +       pkt_hdr->input_flags.all  = 0;

>> +       pkt_hdr->output_flags.all = 0;

>> +       pkt_hdr->error_flags.all  = 0;

>>

>> -       start = (uint8_t *)pkt_hdr + start_offset;

>> -       len = sizeof(odp_packet_hdr_t) - start_offset;

>> -       memset(start, 0, len);

>>

>>

>>

>> The reason for this memset is to "future proof" the header by allowing

>> any new fields that are added to get initialized to a known value without

>> having to update this routine. Is the impact of this single memset() all

>> that large?  I can understand wanting to compress the header to possibly

>> avoid another cache line reference but this optimization seems inherently

>> error-prone. I'm also wondering if this might confuse tools like Coverity

>> which seems to be paranoid about uninitialized variables.

>>

>>

>>

>> Hi Bill,

>>

>>

>>

>> I can understand the future proofing. However, this single memset() is

>> currently the largest cycle hog when forwarding small packets. Below are

>> some throughput numbers:

>>

>>

>>

>> odp_l2fwd -i p1p1 -c 1 -a 0 (netmap pktio):

>>

>>

>>

>> Niantic (1x10Gbps):    7.9 vs 8.6 Mpps (64B) => 8% improvement

>>

>> Fortville (1x40Gbps):  7.9 vs 8.8 Mpps (64B) => 11% improvement

>>

>>

>>

>> When the performance gain is this big I would pay the price of being more

>> error-prone. Is there a way to run Coverity before merging the patch?

>>

>

> Not easily until we resolve the licence/number of coverty runs per week

> issue - on going :/

> Coverty might be ok as long as we dont use the field for anything, and

>  for 11% I am happy to mark it a false positive in coverty if it is flagged

> Can we mitigate some of the problems with a doxygen warning on this struct

> about its behaviour ?

>

>

>

>>

>>

>> -Matias

>>

>>

>>

>> -

>> -       /* Set metadata items that initialize to non-zero values */

>> +       pkt_hdr->l2_offset = 0;

>>         pkt_hdr->l3_offset = ODP_PACKET_OFFSET_INVALID;

>>         pkt_hdr->l4_offset = ODP_PACKET_OFFSET_INVALID;

>>         pkt_hdr->payload_offset = ODP_PACKET_OFFSET_INVALID;

>>

>> +       pkt_hdr->input = ODP_PKTIO_INVALID;

>> +

>>         /* Disable lazy parsing on user allocated packets */

>>         if (!parse)

>>                 packet_parse_disable(pkt_hdr);

>> --

>> 1.9.1

>>

>> _______________________________________________

>> lng-odp mailing list

>> lng-odp@lists.linaro.org

>> https://lists.linaro.org/mailman/listinfo/lng-odp

>>

>>

>>

>> _______________________________________________

>> lng-odp mailing list

>> lng-odp@lists.linaro.org

>> https://lists.linaro.org/mailman/listinfo/lng-odp

>>

>>

>

>

> --

> Mike Holmes

> Technical Manager - Linaro Networking Group

> Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM SoCs

> "Work should be fun and collaborative, the rest follows"

>

>

>
diff mbox

Patch

diff --git a/platform/linux-generic/include/odp_packet_internal.h b/platform/linux-generic/include/odp_packet_internal.h
index 2a12503..cdee1e1 100644
--- a/platform/linux-generic/include/odp_packet_internal.h
+++ b/platform/linux-generic/include/odp_packet_internal.h
@@ -143,15 +143,17 @@  typedef struct {
 	uint32_t l4_offset; /**< offset to L4 hdr (TCP, UDP, SCTP, also ICMP) */
 	uint32_t payload_offset; /**< offset to payload */
 
-	uint32_t l3_len;         /**< Layer 3 length */
-	uint32_t l4_len;         /**< Layer 4 length */
-
 	uint32_t frame_len;
 	uint32_t headroom;
 	uint32_t tailroom;
 
 	odp_pktio_t input;
 
+	/* Values below are not initialized by packet_init() */
+
+	uint32_t l3_len;         /**< Layer 3 length */
+	uint32_t l4_len;         /**< Layer 4 length */
+
 	uint32_t flow_hash;      /**< Flow hash value */
 	odp_time_t timestamp;    /**< Timestamp value */
 
diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c
index 436265e..f5f224d 100644
--- a/platform/linux-generic/odp_packet.c
+++ b/platform/linux-generic/odp_packet.c
@@ -50,23 +50,17 @@  void packet_parse_reset(odp_packet_hdr_t *pkt_hdr)
 static void packet_init(pool_entry_t *pool, odp_packet_hdr_t *pkt_hdr,
 			size_t size, int parse)
 {
-       /*
-	* Reset parser metadata.  Note that we clear via memset to make
-	* this routine indepenent of any additional adds to packet metadata.
-	*/
-	const size_t start_offset = ODP_FIELD_SIZEOF(odp_packet_hdr_t, buf_hdr);
-	uint8_t *start;
-	size_t len;
+	pkt_hdr->input_flags.all  = 0;
+	pkt_hdr->output_flags.all = 0;
+	pkt_hdr->error_flags.all  = 0;
 
-	start = (uint8_t *)pkt_hdr + start_offset;
-	len = sizeof(odp_packet_hdr_t) - start_offset;
-	memset(start, 0, len);
-
-	/* Set metadata items that initialize to non-zero values */
+	pkt_hdr->l2_offset = 0;
 	pkt_hdr->l3_offset = ODP_PACKET_OFFSET_INVALID;
 	pkt_hdr->l4_offset = ODP_PACKET_OFFSET_INVALID;
 	pkt_hdr->payload_offset = ODP_PACKET_OFFSET_INVALID;
 
+	pkt_hdr->input = ODP_PKTIO_INVALID;
+
 	/* Disable lazy parsing on user allocated packets */
 	if (!parse)
 		packet_parse_disable(pkt_hdr);