[API-NEXTv5,3/3] linux-generic: classification implement packet hashing

Message ID 1501160903-25098-3-git-send-email-bala.manoharan@linaro.org
State New
Headers show
Series
  • [API-NEXTv5,1/3] api: classification: add support for packet hashing in classification
Related show

Commit Message

Balasubramanian Manoharan July 27, 2017, 1:08 p.m.
implementation for packet hashing on classification

Signed-off-by: Balasubramanian Manoharan <bala.manoharan@linaro.org>

---
 .../include/odp_classification_datamodel.h         |  32 ++-
 .../include/odp_classification_internal.h          |   7 +
 platform/linux-generic/odp_classification.c        | 234 ++++++++++++++++++---
 3 files changed, 244 insertions(+), 29 deletions(-)

-- 
1.9.1

Comments

Bill Fischofer July 28, 2017, 1:56 p.m. | #1
Compilation fails with clang:

  CC       odp_classification.lo
In file included from odp_classification.c:20:
./include/protocols/thash.h:89:30: error: cast from 'const uint8_t *' (aka
      'const unsigned char *') to 'const uint32_t *'
      (aka 'const unsigned int *') increases required alignment from 1 to 4
      [-Werror,-Wcast-align]
                                ret ^= odp_cpu_to_be_32(((const uint32_t *)
                                                         ^~~~~~~~~~~~~~~~~~
./include/protocols/thash.h:91:24: error: cast from 'const uint8_t *' (aka
      'const unsigned char *') to 'const uint32_t *'
      (aka 'const unsigned int *') increases required alignment from 1 to 4
      [-Werror,-Wcast-align]
                                (odp_cpu_to_be_32(((const uint32_t *)key)
                                                   ^~~~~~~~~~~~~~~~~~~~~
2 errors generated.
Makefile:1220: recipe for target 'odp_classification.lo' failed


On Thu, Jul 27, 2017 at 8:08 AM, Balasubramanian Manoharan <
bala.manoharan@linaro.org> wrote:

> implementation for packet hashing on classification

>

> Signed-off-by: Balasubramanian Manoharan <bala.manoharan@linaro.org>

> ---

>  .../include/odp_classification_datamodel.h         |  32 ++-

>  .../include/odp_classification_internal.h          |   7 +

>  platform/linux-generic/odp_classification.c        | 234

> ++++++++++++++++++---

>  3 files changed, 244 insertions(+), 29 deletions(-)

>

> diff --git a/platform/linux-generic/include/odp_classification_datamodel.h

> b/platform/linux-generic/include/odp_classification_datamodel.h

> index fbe10cb..3cfe8c5 100644

> --- a/platform/linux-generic/include/odp_classification_datamodel.h

> +++ b/platform/linux-generic/include/odp_classification_datamodel.h

> @@ -46,6 +46,22 @@ extern "C" {

>  /* Max PMR Term bits */

>  #define ODP_PMR_TERM_BYTES_MAX         16

>

> +#define ODP_COS_QUEUE_MAX              32

> +

> +#define ODP_CLS_QUEUE_GROUP_MAX                (ODP_COS_MAX_ENTRY *

> ODP_COS_QUEUE_MAX)

> +

> +typedef union {

> +       /* All proto fileds */

> +       uint32_t all;

> +

> +       struct {

> +               uint32_t ipv4:1;

> +               uint32_t ipv6:1;

> +               uint32_t udp:1;

> +               uint32_t tcp:1;

> +       };

> +} odp_cls_hash_proto_t;

> +

>  /**

>  Packet Matching Rule Term Value

>

> @@ -85,7 +101,7 @@ typedef struct pmr_term_value {

>  Class Of Service

>  */

>  struct cos_s {

> -       queue_t queue;                  /* Associated Queue */

> +       odp_queue_t queue;                      /* Associated Queue */

>         odp_pool_t pool;                /* Associated Buffer pool */

>         union pmr_u *pmr[ODP_PMR_PER_COS_MAX];  /* Chained PMR */

>         union cos_u *linked_cos[ODP_PMR_PER_COS_MAX]; /* Chained CoS with

> PMR*/

> @@ -94,7 +110,12 @@ struct cos_s {

>         size_t headroom;                /* Headroom for this CoS */

>         odp_spinlock_t lock;            /* cos lock */

>         odp_atomic_u32_t num_rule;      /* num of PMRs attached with this

> CoS */

> +       bool queue_group;

> +       odp_cls_hash_proto_t hash_proto;

> +       uint32_t num_queue;

> +       odp_queue_param_t queue_param;

>         char name[ODP_COS_NAME_LEN];    /* name */

> +       uint8_t index;

>  };

>

>  typedef union cos_u {

> @@ -122,6 +143,15 @@ typedef union pmr_u {

>         uint8_t pad[ROUNDUP_CACHE_LINE(sizeof(struct pmr_s))];

>  } pmr_t;

>

> +typedef struct _cls_queue_grp_tbl_s {

> +       odp_queue_t queue[ODP_CLS_QUEUE_GROUP_MAX];

> +} _cls_queue_grp_tbl_s;

> +

> +typedef union _cls_queue_grp_tbl_t {

> +       _cls_queue_grp_tbl_s s;

> +       uint8_t pad[ROUNDUP_CACHE_LINE(sizeof(_cls_queue_grp_tbl_s))];

> +} _cls_queue_grp_tbl_t;

> +

>  /**

>  L2 QoS and CoS Map

>

> diff --git a/platform/linux-generic/include/odp_classification_internal.h

> b/platform/linux-generic/include/odp_classification_internal.h

> index 78eaac9..aca8e8c 100644

> --- a/platform/linux-generic/include/odp_classification_internal.h

> +++ b/platform/linux-generic/include/odp_classification_internal.h

> @@ -79,6 +79,9 @@ This function returns the CoS associated with L3 QoS

> value

>  cos_t *match_qos_l3_cos(pmr_l3_cos_t *l3_cos, const uint8_t *pkt_addr,

>                         odp_packet_hdr_t *hdr);

>

> +void _odp_cls_update_hash_proto(cos_t *cos,

> +                               odp_pktin_hash_proto_t hash_proto);

> +

>  /**

>  @internal

>  CoS associated with L2 QoS value

> @@ -138,6 +141,10 @@ Otherwise.

>  **/

>  int verify_pmr(pmr_t *pmr, const uint8_t *pkt_addr, odp_packet_hdr_t

> *pkt_hdr);

>

> +uint32_t packet_rss_hash(odp_packet_hdr_t *pkt_hdr,

> +                        odp_cls_hash_proto_t hash_proto,

> +                        const uint8_t *base);

> +

>  #ifdef __cplusplus

>  }

>  #endif

> diff --git a/platform/linux-generic/odp_classification.c

> b/platform/linux-generic/odp_classification.c

> index cc08b9f..1959493 100644

> --- a/platform/linux-generic/odp_classification.c

> +++ b/platform/linux-generic/odp_classification.c

> @@ -17,6 +17,7 @@

>  #include <odp_classification_inlines.h>

>  #include <odp_classification_internal.h>

>  #include <odp/api/shared_memory.h>

> +#include <protocols/thash.h>

>  #include <protocols/eth.h>

>  #include <protocols/ip.h>

>  #include <string.h>

> @@ -30,6 +31,15 @@

>

>  static cos_tbl_t *cos_tbl;

>  static pmr_tbl_t       *pmr_tbl;

> +static _cls_queue_grp_tbl_t *queue_grp_tbl;

> +

> +const uint8_t default_rss[] = {

> +       0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,

> +       0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,

> +       0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,

> +       0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,

> +       0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,

> +};

>

>  cos_t *get_cos_entry_internal(odp_cos_t cos_id)

>  {

> @@ -45,6 +55,7 @@ int odp_classification_init_global(void)

>  {

>         odp_shm_t cos_shm;

>         odp_shm_t pmr_shm;

> +       odp_shm_t queue_grp_shm;

>         int i;

>

>         cos_shm = odp_shm_reserve("shm_odp_cos_tbl",

> @@ -89,8 +100,22 @@ int odp_classification_init_global(void)

>                 LOCK_INIT(&pmr->s.lock);

>         }

>

> +       queue_grp_shm = odp_shm_reserve("shm_odp_cls_queue_grp_tbl",

> +                                       sizeof(_cls_queue_grp_tbl_t),

> +                                       sizeof(queue_entry_t *), 0);

> +

> +       if (queue_grp_shm == ODP_SHM_INVALID) {

> +               ODP_ERR("shm allocation failed for queue_grp_tbl");

> +               goto error_queue_grp;

> +       }

> +

> +       queue_grp_tbl = odp_shm_addr(queue_grp_shm);

> +       memset(queue_grp_tbl, 0, sizeof(_cls_queue_grp_tbl_t));

> +

>         return 0;

>

> +error_queue_grp:

> +       odp_shm_free(queue_grp_shm);

>  error_pmr:

>         odp_shm_free(pmr_shm);

>  error_cos:

> @@ -124,6 +149,8 @@ void odp_cls_cos_param_init(odp_cls_cos_param_t

> *param)

>         param->queue = ODP_QUEUE_INVALID;

>         param->pool = ODP_POOL_INVALID;

>         param->drop_policy = ODP_COS_DROP_NEVER;

> +       param->num_queue = 1;

> +       odp_queue_param_init(&param->queue_param);

>  }

>

>  void odp_cls_pmr_param_init(odp_pmr_param_t *param)

> @@ -154,24 +181,39 @@ int odp_cls_capability(odp_cls_capability_t

> *capability)

>         return 0;

>  }

>

> +void _odp_cls_update_hash_proto(cos_t *cos, odp_pktin_hash_proto_t

> hash_proto)

> +{

> +       if (hash_proto.proto.ipv4 || hash_proto.proto.ipv4_tcp ||

> +           hash_proto.proto.ipv4_udp)

> +               cos->s.hash_proto.ipv4 = 1;

> +       if (hash_proto.proto.ipv6 || hash_proto.proto.ipv6_tcp ||

> +           hash_proto.proto.ipv6_udp)

> +               cos->s.hash_proto.ipv6 = 1;

> +       if (hash_proto.proto.ipv4_tcp || hash_proto.proto.ipv6_tcp)

> +               cos->s.hash_proto.tcp = 1;

> +       if (hash_proto.proto.ipv4_udp || hash_proto.proto.ipv6_udp)

> +               cos->s.hash_proto.udp = 1;

> +}

> +

>  odp_cos_t odp_cls_cos_create(const char *name, odp_cls_cos_param_t *param)

>  {

>         int i, j;

> -       queue_t queue;

> +       odp_queue_t queue;

>         odp_cls_drop_t drop_policy;

> +       cos_t *cos;

> +       uint32_t tbl_index;

>

>         /* Packets are dropped if Queue or Pool is invalid*/

> -       if (param->queue == ODP_QUEUE_INVALID)

> -               queue = QUEUE_NULL;

> -       else

> -               queue = queue_fn->from_ext(param->queue);

> +       if (param->num_queue > ODP_COS_QUEUE_MAX)

> +               return ODP_COS_INVALID;

>

>         drop_policy = param->drop_policy;

>

>         for (i = 0; i < ODP_COS_MAX_ENTRY; i++) {

> -               LOCK(&cos_tbl->cos_entry[i].s.lock);

> -               if (0 == cos_tbl->cos_entry[i].s.valid) {

> -                       char *cos_name = cos_tbl->cos_entry[i].s.name;

> +               cos = &cos_tbl->cos_entry[i];

> +               LOCK(&cos->s.lock);

> +               if (0 == cos->s.valid) {

> +                       char *cos_name = cos->s.name;

>

>                         if (name == NULL) {

>                                 cos_name[0] = 0;

> @@ -180,20 +222,43 @@ odp_cos_t odp_cls_cos_create(const char *name,

> odp_cls_cos_param_t *param)

>                                 cos_name[ODP_COS_NAME_LEN - 1] = 0;

>                         }

>                         for (j = 0; j < ODP_PMR_PER_COS_MAX; j++) {

> -                               cos_tbl->cos_entry[i].s.pmr[j] = NULL;

> -                               cos_tbl->cos_entry[i].s.linked_cos[j] =

> NULL;

> +                               cos->s.pmr[j] = NULL;

> +                               cos->s.linked_cos[j] = NULL;

> +                       }

> +

> +                       if (param->num_queue > 1) {

> +                               odp_queue_param_init(&cos->s.queue_param);

> +                               cos->s.queue_group = true;

> +                               cos->s.queue = ODP_QUEUE_INVALID;

> +                               cos->s.num_queue = param->num_queue;

> +                               _odp_cls_update_hash_proto(cos,

> +

> param->hash_proto);

> +                               tbl_index = cos->s.index *

> ODP_COS_QUEUE_MAX;

> +                               for (j = 0; j < ODP_COS_QUEUE_MAX; j++) {

> +                                       queue = odp_queue_create(NULL,

> &cos->s.

> +

> queue_param);

> +                                       if (queue == ODP_QUEUE_INVALID) {

> +                                               UNLOCK(&cos->s.lock);

> +                                               return ODP_COS_INVALID;

> +                                       }

> +                                       queue_grp_tbl->s.queue[tbl_index

> + j] =

> +                                                       queue;

> +                               }

> +

> +                       } else {

> +                               cos->s.queue = param->queue;

>                         }

> -                       cos_tbl->cos_entry[i].s.queue = queue;

> -                       cos_tbl->cos_entry[i].s.pool = param->pool;

> -                       cos_tbl->cos_entry[i].s.headroom = 0;

> -                       cos_tbl->cos_entry[i].s.valid = 1;

> -                       cos_tbl->cos_entry[i].s.drop_policy = drop_policy;

> -                       odp_atomic_init_u32(&cos_tbl->cos_entry[i]

> -                                           .s.num_rule, 0);

> -                       UNLOCK(&cos_tbl->cos_entry[i].s.lock);

> +

> +                       cos->s.pool = param->pool;

> +                       cos->s.headroom = 0;

> +                       cos->s.valid = 1;

> +                       cos->s.drop_policy = drop_policy;

> +                       odp_atomic_init_u32(&cos->s.num_rule, 0);

> +                       cos->s.index = i;

> +                       UNLOCK(&cos->s.lock);

>                         return _odp_cast_scalar(odp_cos_t, i);

>                 }

> -               UNLOCK(&cos_tbl->cos_entry[i].s.lock);

> +               UNLOCK(&cos->s.lock);

>         }

>

>         ODP_ERR("ODP_COS_MAX_ENTRY reached");

> @@ -263,10 +328,7 @@ int odp_cos_queue_set(odp_cos_t cos_id, odp_queue_t

> queue_id)

>         }

>         /* Locking is not required as intermittent stale

>         data during CoS modification is acceptable*/

> -       if (queue_id == ODP_QUEUE_INVALID)

> -               cos->s.queue = QUEUE_NULL;

> -       else

> -               cos->s.queue = queue_fn->from_ext(queue_id);

> +       cos->s.queue = queue_id;

>         return 0;

>  }

>

> @@ -279,10 +341,45 @@ odp_queue_t odp_cos_queue(odp_cos_t cos_id)

>                 return ODP_QUEUE_INVALID;

>         }

>

> -       if (cos->s.queue == QUEUE_NULL)

> -               return ODP_QUEUE_INVALID;

> +       return cos->s.queue;

> +}

> +

> +uint32_t odp_cls_cos_num_queue(odp_cos_t cos_id)

> +{

> +       cos_t *cos = get_cos_entry(cos_id);

> +

> +       if (!cos) {

> +               ODP_ERR("Invalid odp_cos_t handle");

> +               return 0;

> +       }

> +

> +       return cos->s.num_queue;

> +}

> +

> +uint32_t odp_cls_cos_queues(odp_cos_t cos_id, odp_queue_t queue[],

> +                           uint32_t num)

> +{

> +       uint32_t num_queues;

> +       cos_t *cos;

> +       uint32_t tbl_index;

> +       uint32_t i;

> +

> +       cos  = get_cos_entry(cos_id);

> +       if (!cos) {

> +               ODP_ERR("Invalid odp_cos_t handle");

> +               return 0;

> +       }

>

> -       return queue_fn->to_ext(cos->s.queue);

> +       if (num < cos->s.num_queue)

> +               num_queues = num;

> +       else

> +               num_queues = cos->s.num_queue;

> +

> +       tbl_index = cos->s.index * ODP_COS_QUEUE_MAX;

> +       for (i = 0; i < num_queues; i++)

> +               queue[i] = queue_grp_tbl->s.queue[tbl_index + i];

> +

> +       return num_queues;

>  }

>

>  int odp_cos_drop_set(odp_cos_t cos_id, odp_cls_drop_t drop_policy)

> @@ -830,6 +927,8 @@ int cls_classify_packet(pktio_entry_t *entry, const

> uint8_t *base,

>                         odp_packet_hdr_t *pkt_hdr)

>  {

>         cos_t *cos;

> +       uint32_t tbl_index;

> +       uint32_t hash;

>

>         packet_parse_reset(pkt_hdr);

>         packet_set_len(pkt_hdr, pkt_len);

> @@ -841,16 +940,95 @@ int cls_classify_packet(pktio_entry_t *entry, const

> uint8_t *base,

>         if (cos == NULL)

>                 return -EINVAL;

>

> -       if (cos->s.queue == QUEUE_NULL || cos->s.pool == ODP_POOL_INVALID)

> +       if (cos->s.queue == ODP_QUEUE_INVALID ||

> +           cos->s.pool == ODP_POOL_INVALID)

>                 return -EFAULT;

>

>         *pool = cos->s.pool;

>         pkt_hdr->p.input_flags.dst_queue = 1;

> -       pkt_hdr->dst_queue = cos->s.queue;

>

> +       if (!cos->s.queue_group) {

> +               pkt_hdr->dst_queue = queue_fn->from_ext(cos->s.queue);

> +               return 0;

> +       }

> +

> +       hash = packet_rss_hash(pkt_hdr, cos->s.hash_proto, base);

> +       /* ODP_COS_QUEUE_MAX is a power of 2 */

> +       hash = hash & (ODP_COS_QUEUE_MAX - 1);

> +       tbl_index = (cos->s.index * ODP_COS_QUEUE_MAX) + hash;

> +       pkt_hdr->dst_queue = queue_fn->from_ext(queue_grp_tbl->

> +                                               s.queue[tbl_index]);

>         return 0;

>  }

>

> +uint32_t packet_rss_hash(odp_packet_hdr_t *pkt_hdr,

> +                        odp_cls_hash_proto_t hash_proto,

> +                        const uint8_t *base)

> +{

> +       thash_tuple_t tuple;

> +       const _odp_ipv4hdr_t *ipv4;

> +       const _odp_udphdr_t *udp;

> +       const _odp_tcphdr_t *tcp;

> +       const _odp_ipv6hdr_t *ipv6;

> +       uint32_t hash;

> +       uint32_t tuple_len;

> +

> +       tuple_len = 0;

> +       hash = 0;

> +       if (pkt_hdr->p.input_flags.ipv4) {

> +               if (hash_proto.ipv4) {

> +                       /* add ipv4 */

> +                       ipv4 = (const _odp_ipv4hdr_t *)base +

> +                               pkt_hdr->p.l3_offset;

> +                       tuple.v4.src_addr = ipv4->src_addr;

> +                       tuple.v4.dst_addr = ipv4->dst_addr;

> +                       tuple_len += 2;

> +               }

> +

> +               if (pkt_hdr->p.input_flags.tcp && hash_proto.tcp) {

> +                       /* add tcp */

> +                       tcp = (const _odp_tcphdr_t *)base +

> +                              pkt_hdr->p.l4_offset;

> +                       tuple.v4.sport = tcp->src_port;

> +                       tuple.v4.dport = tcp->dst_port;

> +                       tuple_len += 1;

> +               } else if (pkt_hdr->p.input_flags.udp && hash_proto.udp) {

> +                       /* add udp */

> +                       udp = (const _odp_udphdr_t *)base +

> +                              pkt_hdr->p.l4_offset;

> +                       tuple.v4.sport = udp->src_port;

> +                       tuple.v4.dport = udp->dst_port;

> +                       tuple_len += 1;

> +               }

> +       } else if (pkt_hdr->p.input_flags.ipv6) {

> +               if (hash_proto.ipv6) {

> +                       /* add ipv6 */

> +                       ipv6 = (const _odp_ipv6hdr_t *)base +

> +                               pkt_hdr->p.l3_offset;

> +                       thash_load_ipv6_addr(ipv6, &tuple);

> +                       tuple_len += 8;

> +               }

> +               if (pkt_hdr->p.input_flags.tcp && hash_proto.tcp) {

> +                       tcp = (const _odp_tcphdr_t *)base +

> +                              pkt_hdr->p.l4_offset;

> +                       tuple.v4.sport = tcp->src_port;

> +                       tuple.v4.dport = tcp->dst_port;

> +                       tuple_len += 1;

> +               } else if (pkt_hdr->p.input_flags.udp && hash_proto.udp) {

> +                       /* add udp */

> +                       udp = (const _odp_udphdr_t *)base +

> +                              pkt_hdr->p.l4_offset;

> +                       tuple.v4.sport = udp->src_port;

> +                       tuple.v4.dport = udp->dst_port;

> +                       tuple_len += 1;

> +               }

> +       }

> +       if (tuple_len)

> +               hash = thash_softrss((uint32_t *)&tuple,

> +                                    tuple_len, default_rss);

> +       return hash;

> +}

> +

>  cos_t *match_qos_l3_cos(pmr_l3_cos_t *l3_cos, const uint8_t *pkt_addr,

>                         odp_packet_hdr_t *hdr)

>  {

> --

> 1.9.1

>

>

Patch

diff --git a/platform/linux-generic/include/odp_classification_datamodel.h b/platform/linux-generic/include/odp_classification_datamodel.h
index fbe10cb..3cfe8c5 100644
--- a/platform/linux-generic/include/odp_classification_datamodel.h
+++ b/platform/linux-generic/include/odp_classification_datamodel.h
@@ -46,6 +46,22 @@  extern "C" {
 /* Max PMR Term bits */
 #define ODP_PMR_TERM_BYTES_MAX		16
 
+#define ODP_COS_QUEUE_MAX		32
+
+#define ODP_CLS_QUEUE_GROUP_MAX		(ODP_COS_MAX_ENTRY * ODP_COS_QUEUE_MAX)
+
+typedef union {
+	/* All proto fileds */
+	uint32_t all;
+
+	struct {
+		uint32_t ipv4:1;
+		uint32_t ipv6:1;
+		uint32_t udp:1;
+		uint32_t tcp:1;
+	};
+} odp_cls_hash_proto_t;
+
 /**
 Packet Matching Rule Term Value
 
@@ -85,7 +101,7 @@  typedef struct pmr_term_value {
 Class Of Service
 */
 struct cos_s {
-	queue_t queue;			/* Associated Queue */
+	odp_queue_t queue;			/* Associated Queue */
 	odp_pool_t pool;		/* Associated Buffer pool */
 	union pmr_u *pmr[ODP_PMR_PER_COS_MAX];	/* Chained PMR */
 	union cos_u *linked_cos[ODP_PMR_PER_COS_MAX]; /* Chained CoS with PMR*/
@@ -94,7 +110,12 @@  struct cos_s {
 	size_t headroom;		/* Headroom for this CoS */
 	odp_spinlock_t lock;		/* cos lock */
 	odp_atomic_u32_t num_rule;	/* num of PMRs attached with this CoS */
+	bool queue_group;
+	odp_cls_hash_proto_t hash_proto;
+	uint32_t num_queue;
+	odp_queue_param_t queue_param;
 	char name[ODP_COS_NAME_LEN];	/* name */
+	uint8_t index;
 };
 
 typedef union cos_u {
@@ -122,6 +143,15 @@  typedef union pmr_u {
 	uint8_t pad[ROUNDUP_CACHE_LINE(sizeof(struct pmr_s))];
 } pmr_t;
 
+typedef struct _cls_queue_grp_tbl_s {
+	odp_queue_t queue[ODP_CLS_QUEUE_GROUP_MAX];
+} _cls_queue_grp_tbl_s;
+
+typedef union _cls_queue_grp_tbl_t {
+	_cls_queue_grp_tbl_s s;
+	uint8_t pad[ROUNDUP_CACHE_LINE(sizeof(_cls_queue_grp_tbl_s))];
+} _cls_queue_grp_tbl_t;
+
 /**
 L2 QoS and CoS Map
 
diff --git a/platform/linux-generic/include/odp_classification_internal.h b/platform/linux-generic/include/odp_classification_internal.h
index 78eaac9..aca8e8c 100644
--- a/platform/linux-generic/include/odp_classification_internal.h
+++ b/platform/linux-generic/include/odp_classification_internal.h
@@ -79,6 +79,9 @@  This function returns the CoS associated with L3 QoS value
 cos_t *match_qos_l3_cos(pmr_l3_cos_t *l3_cos, const uint8_t *pkt_addr,
 			odp_packet_hdr_t *hdr);
 
+void _odp_cls_update_hash_proto(cos_t *cos,
+				odp_pktin_hash_proto_t hash_proto);
+
 /**
 @internal
 CoS associated with L2 QoS value
@@ -138,6 +141,10 @@  Otherwise.
 **/
 int verify_pmr(pmr_t *pmr, const uint8_t *pkt_addr, odp_packet_hdr_t *pkt_hdr);
 
+uint32_t packet_rss_hash(odp_packet_hdr_t *pkt_hdr,
+			 odp_cls_hash_proto_t hash_proto,
+			 const uint8_t *base);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/platform/linux-generic/odp_classification.c b/platform/linux-generic/odp_classification.c
index cc08b9f..1959493 100644
--- a/platform/linux-generic/odp_classification.c
+++ b/platform/linux-generic/odp_classification.c
@@ -17,6 +17,7 @@ 
 #include <odp_classification_inlines.h>
 #include <odp_classification_internal.h>
 #include <odp/api/shared_memory.h>
+#include <protocols/thash.h>
 #include <protocols/eth.h>
 #include <protocols/ip.h>
 #include <string.h>
@@ -30,6 +31,15 @@ 
 
 static cos_tbl_t *cos_tbl;
 static pmr_tbl_t	*pmr_tbl;
+static _cls_queue_grp_tbl_t *queue_grp_tbl;
+
+const uint8_t default_rss[] = {
+	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
+};
 
 cos_t *get_cos_entry_internal(odp_cos_t cos_id)
 {
@@ -45,6 +55,7 @@  int odp_classification_init_global(void)
 {
 	odp_shm_t cos_shm;
 	odp_shm_t pmr_shm;
+	odp_shm_t queue_grp_shm;
 	int i;
 
 	cos_shm = odp_shm_reserve("shm_odp_cos_tbl",
@@ -89,8 +100,22 @@  int odp_classification_init_global(void)
 		LOCK_INIT(&pmr->s.lock);
 	}
 
+	queue_grp_shm = odp_shm_reserve("shm_odp_cls_queue_grp_tbl",
+					sizeof(_cls_queue_grp_tbl_t),
+					sizeof(queue_entry_t *), 0);
+
+	if (queue_grp_shm == ODP_SHM_INVALID) {
+		ODP_ERR("shm allocation failed for queue_grp_tbl");
+		goto error_queue_grp;
+	}
+
+	queue_grp_tbl = odp_shm_addr(queue_grp_shm);
+	memset(queue_grp_tbl, 0, sizeof(_cls_queue_grp_tbl_t));
+
 	return 0;
 
+error_queue_grp:
+	odp_shm_free(queue_grp_shm);
 error_pmr:
 	odp_shm_free(pmr_shm);
 error_cos:
@@ -124,6 +149,8 @@  void odp_cls_cos_param_init(odp_cls_cos_param_t *param)
 	param->queue = ODP_QUEUE_INVALID;
 	param->pool = ODP_POOL_INVALID;
 	param->drop_policy = ODP_COS_DROP_NEVER;
+	param->num_queue = 1;
+	odp_queue_param_init(&param->queue_param);
 }
 
 void odp_cls_pmr_param_init(odp_pmr_param_t *param)
@@ -154,24 +181,39 @@  int odp_cls_capability(odp_cls_capability_t *capability)
 	return 0;
 }
 
+void _odp_cls_update_hash_proto(cos_t *cos, odp_pktin_hash_proto_t hash_proto)
+{
+	if (hash_proto.proto.ipv4 || hash_proto.proto.ipv4_tcp ||
+	    hash_proto.proto.ipv4_udp)
+		cos->s.hash_proto.ipv4 = 1;
+	if (hash_proto.proto.ipv6 || hash_proto.proto.ipv6_tcp ||
+	    hash_proto.proto.ipv6_udp)
+		cos->s.hash_proto.ipv6 = 1;
+	if (hash_proto.proto.ipv4_tcp || hash_proto.proto.ipv6_tcp)
+		cos->s.hash_proto.tcp = 1;
+	if (hash_proto.proto.ipv4_udp || hash_proto.proto.ipv6_udp)
+		cos->s.hash_proto.udp = 1;
+}
+
 odp_cos_t odp_cls_cos_create(const char *name, odp_cls_cos_param_t *param)
 {
 	int i, j;
-	queue_t queue;
+	odp_queue_t queue;
 	odp_cls_drop_t drop_policy;
+	cos_t *cos;
+	uint32_t tbl_index;
 
 	/* Packets are dropped if Queue or Pool is invalid*/
-	if (param->queue == ODP_QUEUE_INVALID)
-		queue = QUEUE_NULL;
-	else
-		queue = queue_fn->from_ext(param->queue);
+	if (param->num_queue > ODP_COS_QUEUE_MAX)
+		return ODP_COS_INVALID;
 
 	drop_policy = param->drop_policy;
 
 	for (i = 0; i < ODP_COS_MAX_ENTRY; i++) {
-		LOCK(&cos_tbl->cos_entry[i].s.lock);
-		if (0 == cos_tbl->cos_entry[i].s.valid) {
-			char *cos_name = cos_tbl->cos_entry[i].s.name;
+		cos = &cos_tbl->cos_entry[i];
+		LOCK(&cos->s.lock);
+		if (0 == cos->s.valid) {
+			char *cos_name = cos->s.name;
 
 			if (name == NULL) {
 				cos_name[0] = 0;
@@ -180,20 +222,43 @@  odp_cos_t odp_cls_cos_create(const char *name, odp_cls_cos_param_t *param)
 				cos_name[ODP_COS_NAME_LEN - 1] = 0;
 			}
 			for (j = 0; j < ODP_PMR_PER_COS_MAX; j++) {
-				cos_tbl->cos_entry[i].s.pmr[j] = NULL;
-				cos_tbl->cos_entry[i].s.linked_cos[j] = NULL;
+				cos->s.pmr[j] = NULL;
+				cos->s.linked_cos[j] = NULL;
+			}
+
+			if (param->num_queue > 1) {
+				odp_queue_param_init(&cos->s.queue_param);
+				cos->s.queue_group = true;
+				cos->s.queue = ODP_QUEUE_INVALID;
+				cos->s.num_queue = param->num_queue;
+				_odp_cls_update_hash_proto(cos,
+							   param->hash_proto);
+				tbl_index = cos->s.index * ODP_COS_QUEUE_MAX;
+				for (j = 0; j < ODP_COS_QUEUE_MAX; j++) {
+					queue = odp_queue_create(NULL, &cos->s.
+								 queue_param);
+					if (queue == ODP_QUEUE_INVALID) {
+						UNLOCK(&cos->s.lock);
+						return ODP_COS_INVALID;
+					}
+					queue_grp_tbl->s.queue[tbl_index + j] =
+							queue;
+				}
+
+			} else {
+				cos->s.queue = param->queue;
 			}
-			cos_tbl->cos_entry[i].s.queue = queue;
-			cos_tbl->cos_entry[i].s.pool = param->pool;
-			cos_tbl->cos_entry[i].s.headroom = 0;
-			cos_tbl->cos_entry[i].s.valid = 1;
-			cos_tbl->cos_entry[i].s.drop_policy = drop_policy;
-			odp_atomic_init_u32(&cos_tbl->cos_entry[i]
-					    .s.num_rule, 0);
-			UNLOCK(&cos_tbl->cos_entry[i].s.lock);
+
+			cos->s.pool = param->pool;
+			cos->s.headroom = 0;
+			cos->s.valid = 1;
+			cos->s.drop_policy = drop_policy;
+			odp_atomic_init_u32(&cos->s.num_rule, 0);
+			cos->s.index = i;
+			UNLOCK(&cos->s.lock);
 			return _odp_cast_scalar(odp_cos_t, i);
 		}
-		UNLOCK(&cos_tbl->cos_entry[i].s.lock);
+		UNLOCK(&cos->s.lock);
 	}
 
 	ODP_ERR("ODP_COS_MAX_ENTRY reached");
@@ -263,10 +328,7 @@  int odp_cos_queue_set(odp_cos_t cos_id, odp_queue_t queue_id)
 	}
 	/* Locking is not required as intermittent stale
 	data during CoS modification is acceptable*/
-	if (queue_id == ODP_QUEUE_INVALID)
-		cos->s.queue = QUEUE_NULL;
-	else
-		cos->s.queue = queue_fn->from_ext(queue_id);
+	cos->s.queue = queue_id;
 	return 0;
 }
 
@@ -279,10 +341,45 @@  odp_queue_t odp_cos_queue(odp_cos_t cos_id)
 		return ODP_QUEUE_INVALID;
 	}
 
-	if (cos->s.queue == QUEUE_NULL)
-		return ODP_QUEUE_INVALID;
+	return cos->s.queue;
+}
+
+uint32_t odp_cls_cos_num_queue(odp_cos_t cos_id)
+{
+	cos_t *cos = get_cos_entry(cos_id);
+
+	if (!cos) {
+		ODP_ERR("Invalid odp_cos_t handle");
+		return 0;
+	}
+
+	return cos->s.num_queue;
+}
+
+uint32_t odp_cls_cos_queues(odp_cos_t cos_id, odp_queue_t queue[],
+			    uint32_t num)
+{
+	uint32_t num_queues;
+	cos_t *cos;
+	uint32_t tbl_index;
+	uint32_t i;
+
+	cos  = get_cos_entry(cos_id);
+	if (!cos) {
+		ODP_ERR("Invalid odp_cos_t handle");
+		return 0;
+	}
 
-	return queue_fn->to_ext(cos->s.queue);
+	if (num < cos->s.num_queue)
+		num_queues = num;
+	else
+		num_queues = cos->s.num_queue;
+
+	tbl_index = cos->s.index * ODP_COS_QUEUE_MAX;
+	for (i = 0; i < num_queues; i++)
+		queue[i] = queue_grp_tbl->s.queue[tbl_index + i];
+
+	return num_queues;
 }
 
 int odp_cos_drop_set(odp_cos_t cos_id, odp_cls_drop_t drop_policy)
@@ -830,6 +927,8 @@  int cls_classify_packet(pktio_entry_t *entry, const uint8_t *base,
 			odp_packet_hdr_t *pkt_hdr)
 {
 	cos_t *cos;
+	uint32_t tbl_index;
+	uint32_t hash;
 
 	packet_parse_reset(pkt_hdr);
 	packet_set_len(pkt_hdr, pkt_len);
@@ -841,16 +940,95 @@  int cls_classify_packet(pktio_entry_t *entry, const uint8_t *base,
 	if (cos == NULL)
 		return -EINVAL;
 
-	if (cos->s.queue == QUEUE_NULL || cos->s.pool == ODP_POOL_INVALID)
+	if (cos->s.queue == ODP_QUEUE_INVALID ||
+	    cos->s.pool == ODP_POOL_INVALID)
 		return -EFAULT;
 
 	*pool = cos->s.pool;
 	pkt_hdr->p.input_flags.dst_queue = 1;
-	pkt_hdr->dst_queue = cos->s.queue;
 
+	if (!cos->s.queue_group) {
+		pkt_hdr->dst_queue = queue_fn->from_ext(cos->s.queue);
+		return 0;
+	}
+
+	hash = packet_rss_hash(pkt_hdr, cos->s.hash_proto, base);
+	/* ODP_COS_QUEUE_MAX is a power of 2 */
+	hash = hash & (ODP_COS_QUEUE_MAX - 1);
+	tbl_index = (cos->s.index * ODP_COS_QUEUE_MAX) + hash;
+	pkt_hdr->dst_queue = queue_fn->from_ext(queue_grp_tbl->
+						s.queue[tbl_index]);
 	return 0;
 }
 
+uint32_t packet_rss_hash(odp_packet_hdr_t *pkt_hdr,
+			 odp_cls_hash_proto_t hash_proto,
+			 const uint8_t *base)
+{
+	thash_tuple_t tuple;
+	const _odp_ipv4hdr_t *ipv4;
+	const _odp_udphdr_t *udp;
+	const _odp_tcphdr_t *tcp;
+	const _odp_ipv6hdr_t *ipv6;
+	uint32_t hash;
+	uint32_t tuple_len;
+
+	tuple_len = 0;
+	hash = 0;
+	if (pkt_hdr->p.input_flags.ipv4) {
+		if (hash_proto.ipv4) {
+			/* add ipv4 */
+			ipv4 = (const _odp_ipv4hdr_t *)base +
+				pkt_hdr->p.l3_offset;
+			tuple.v4.src_addr = ipv4->src_addr;
+			tuple.v4.dst_addr = ipv4->dst_addr;
+			tuple_len += 2;
+		}
+
+		if (pkt_hdr->p.input_flags.tcp && hash_proto.tcp) {
+			/* add tcp */
+			tcp = (const _odp_tcphdr_t *)base +
+			       pkt_hdr->p.l4_offset;
+			tuple.v4.sport = tcp->src_port;
+			tuple.v4.dport = tcp->dst_port;
+			tuple_len += 1;
+		} else if (pkt_hdr->p.input_flags.udp && hash_proto.udp) {
+			/* add udp */
+			udp = (const _odp_udphdr_t *)base +
+			       pkt_hdr->p.l4_offset;
+			tuple.v4.sport = udp->src_port;
+			tuple.v4.dport = udp->dst_port;
+			tuple_len += 1;
+		}
+	} else if (pkt_hdr->p.input_flags.ipv6) {
+		if (hash_proto.ipv6) {
+			/* add ipv6 */
+			ipv6 = (const _odp_ipv6hdr_t *)base +
+				pkt_hdr->p.l3_offset;
+			thash_load_ipv6_addr(ipv6, &tuple);
+			tuple_len += 8;
+		}
+		if (pkt_hdr->p.input_flags.tcp && hash_proto.tcp) {
+			tcp = (const _odp_tcphdr_t *)base +
+			       pkt_hdr->p.l4_offset;
+			tuple.v4.sport = tcp->src_port;
+			tuple.v4.dport = tcp->dst_port;
+			tuple_len += 1;
+		} else if (pkt_hdr->p.input_flags.udp && hash_proto.udp) {
+			/* add udp */
+			udp = (const _odp_udphdr_t *)base +
+			       pkt_hdr->p.l4_offset;
+			tuple.v4.sport = udp->src_port;
+			tuple.v4.dport = udp->dst_port;
+			tuple_len += 1;
+		}
+	}
+	if (tuple_len)
+		hash = thash_softrss((uint32_t *)&tuple,
+				     tuple_len, default_rss);
+	return hash;
+}
+
 cos_t *match_qos_l3_cos(pmr_l3_cos_t *l3_cos, const uint8_t *pkt_addr,
 			odp_packet_hdr_t *hdr)
 {