diff mbox series

[rdma-next,06/10] RDMA/nldev: Add support to add and remove optional counters

Message ID 20210818112428.209111-7-markzhang@nvidia.com
State New
Headers show
Series Optional counter statistics support | expand

Commit Message

Mark Zhang Aug. 18, 2021, 11:24 a.m. UTC
From: Aharon Landau <aharonl@nvidia.com>

This patch adds the ability to add/remove optional counter to a link
through RDMA netlink. Limit it to users with ADMIN capability only.

Examples:
$ sudo rdma statistic add link rocep8s0f0/1 optional-set cc_rx_ce_pkts
$ sudo rdma statistic remove link rocep8s0f0/1 optional-set cc_rx_ce_pkts

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Neta Ostrovsky <netao@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
---
 drivers/infiniband/core/counters.c | 50 ++++++++++++++++
 drivers/infiniband/core/device.c   |  2 +
 drivers/infiniband/core/nldev.c    | 93 ++++++++++++++++++++++++++++++
 include/rdma/ib_verbs.h            |  7 +++
 include/rdma/rdma_counter.h        |  4 ++
 include/rdma/rdma_netlink.h        |  1 +
 include/uapi/rdma/rdma_netlink.h   |  9 +++
 7 files changed, 166 insertions(+)

Comments

Jason Gunthorpe Aug. 23, 2021, 7:42 p.m. UTC | #1
On Wed, Aug 18, 2021 at 02:24:24PM +0300, Mark Zhang wrote:
> From: Aharon Landau <aharonl@nvidia.com>

> 

> This patch adds the ability to add/remove optional counter to a link

> through RDMA netlink. Limit it to users with ADMIN capability only.

> 

> Examples:

> $ sudo rdma statistic add link rocep8s0f0/1 optional-set cc_rx_ce_pkts

> $ sudo rdma statistic remove link rocep8s0f0/1 optional-set cc_rx_ce_pkts

> 

> Signed-off-by: Aharon Landau <aharonl@nvidia.com>

> Signed-off-by: Neta Ostrovsky <netao@nvidia.com>

> Signed-off-by: Mark Zhang <markzhang@nvidia.com>

>  drivers/infiniband/core/counters.c | 50 ++++++++++++++++

>  drivers/infiniband/core/device.c   |  2 +

>  drivers/infiniband/core/nldev.c    | 93 ++++++++++++++++++++++++++++++

>  include/rdma/ib_verbs.h            |  7 +++

>  include/rdma/rdma_counter.h        |  4 ++

>  include/rdma/rdma_netlink.h        |  1 +

>  include/uapi/rdma/rdma_netlink.h   |  9 +++

>  7 files changed, 166 insertions(+)

> 

> diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c

> index b8b6db98bfdf..fa04178aa0eb 100644

> +++ b/drivers/infiniband/core/counters.c

> @@ -106,6 +106,56 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,

>  	return ret;

>  }

>  

> +static struct rdma_op_counter *get_opcounter(struct rdma_op_stats *opstats,

> +					     const char *name)

> +{

> +	int i;

> +

> +	for (i = 0; i < opstats->num_opcounters; i++)

> +		if (!strcmp(opstats->opcounters[i].name, name))

> +			return opstats->opcounters + i;

> +

> +	return NULL;

> +}


Export this and have the netlink code call it instead of working with
strings.

> +static int rdma_opcounter_set(struct ib_device *dev, u32 port,

> +			      const char *name, bool is_add)

> +{

> +	struct rdma_port_counter *port_counter;

> +	struct rdma_op_counter *opc;

> +	int ret;

> +

> +	if (!dev->ops.add_op_stat || !dev->ops.remove_op_stat)

> +		return -EOPNOTSUPP;

> +

> +	port_counter = &dev->port_data[port].port_counter;

> +	opc = get_opcounter(port_counter->opstats, name);

> +	if (!opc)

> +		return -EINVAL;

> +

> +	mutex_lock(&port_counter->opstats->lock);

> +	ret = is_add ? dev->ops.add_op_stat(dev, port, opc->type) :

> +		dev->ops.remove_op_stat(dev, port, opc->type);


Drivers should work by indexes not types, that is how the counter API
is designed

> +int rdma_opcounter_add(struct ib_device *dev, u32 port, const char *name)

> +{

> +	return rdma_opcounter_set(dev, port, name, true);

> +}

> +

> +int rdma_opcounter_remove(struct ib_device *dev, u32 port,

> +			  const char *name)

> +{

> +	return rdma_opcounter_set(dev, port, name, false);

> +}


Just pass in the add/remove flag - all this switching between wrappers
adding the flag is ugly. Do it all the way to the driver.

> +static int nldev_stat_set_op_stat(struct sk_buff *skb,

> +				  struct nlmsghdr *nlh,

> +				  struct netlink_ext_ack *extack,

> +				  bool cmd_add)

> +{

> +	char opcounter[RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE] = {};

> +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];

> +	struct ib_device *device;

> +	struct sk_buff *msg;

> +	u32 index, port;

> +	int ret;

> +

> +	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,

> +			  nldev_policy, extack);

> +

> +	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] ||

> +	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||

> +	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])

> +		return -EINVAL;

> +

> +	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);

> +	device = ib_device_get_by_index(sock_net(skb->sk), index);

> +	if (!device)

> +		return -EINVAL;

> +

> +	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);

> +	if (!rdma_is_port_valid(device, port)) {

> +		ret = -EINVAL;

> +		goto err;

> +	}

> +

> +	nla_strscpy(opcounter, tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME],

> +		    sizeof(opcounter));

> +

> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);

> +	if (!msg) {

> +		ret = -ENOMEM;

> +		goto err;

> +	}

> +

> +	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,

> +			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,

> +					 (cmd_add ?

> +					  RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER :

> +					  RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER)),

> +			0, 0);

> +

> +	if (cmd_add)

> +		ret = rdma_opcounter_add(device, port, opcounter);

> +	else

> +		ret = rdma_opcounter_remove(device, port, opcounter);

> +	if (ret)

> +		goto err_msg;

> +

> +	nlmsg_end(msg, nlh);


Shouldn't the netlink message for a 'set' always return the current
value of the thing being set on return? Eg the same output that GET
would generate?

> +static int nldev_stat_add_op_stat_doit(struct sk_buff *skb,

> +				       struct nlmsghdr *nlh,

> +				       struct netlink_ext_ack *extack)

> +{

> +	return nldev_stat_set_op_stat(skb, nlh, extack, true);

> +}

> +

> +static int nldev_stat_remove_op_stat_doit(struct sk_buff *skb,

> +					  struct nlmsghdr *nlh,

> +					  struct netlink_ext_ack *extack)

> +{

> +	return nldev_stat_set_op_stat(skb, nlh, extack, false);

> +}

> +

>  static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,

>  			       struct netlink_ext_ack *extack)

>  {

> @@ -2342,6 +2427,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {

>  		.dump = nldev_res_get_mr_raw_dumpit,

>  		.flags = RDMA_NL_ADMIN_PERM,

>  	},

> +	[RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER] = {

> +		.doit = nldev_stat_add_op_stat_doit,

> +		.flags = RDMA_NL_ADMIN_PERM,

> +	},

> +	[RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER] = {

> +		.doit = nldev_stat_remove_op_stat_doit,

> +		.flags = RDMA_NL_ADMIN_PERM,

> +	},

>  };


And here I wonder if this is the cannonical way to manipulate lists of
strings in netlink? I'm trying to think of another case like this, did
you reference something?

Are you sure this shouldn't be done via some set on some counter
object?

Jason
Mark Zhang Aug. 24, 2021, 2:09 a.m. UTC | #2
On 8/24/2021 3:42 AM, Jason Gunthorpe wrote:
> On Wed, Aug 18, 2021 at 02:24:24PM +0300, Mark Zhang wrote:

>> From: Aharon Landau <aharonl@nvidia.com>

>>

>> This patch adds the ability to add/remove optional counter to a link

>> through RDMA netlink. Limit it to users with ADMIN capability only.

>>

>> Examples:

>> $ sudo rdma statistic add link rocep8s0f0/1 optional-set cc_rx_ce_pkts

>> $ sudo rdma statistic remove link rocep8s0f0/1 optional-set cc_rx_ce_pkts

>>

>> Signed-off-by: Aharon Landau <aharonl@nvidia.com>

>> Signed-off-by: Neta Ostrovsky <netao@nvidia.com>

>> Signed-off-by: Mark Zhang <markzhang@nvidia.com>

>>   drivers/infiniband/core/counters.c | 50 ++++++++++++++++

>>   drivers/infiniband/core/device.c   |  2 +

>>   drivers/infiniband/core/nldev.c    | 93 ++++++++++++++++++++++++++++++

>>   include/rdma/ib_verbs.h            |  7 +++

>>   include/rdma/rdma_counter.h        |  4 ++

>>   include/rdma/rdma_netlink.h        |  1 +

>>   include/uapi/rdma/rdma_netlink.h   |  9 +++

>>   7 files changed, 166 insertions(+)

>>

>> diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c


...

>> +static int nldev_stat_set_op_stat(struct sk_buff *skb,

>> +				  struct nlmsghdr *nlh,

>> +				  struct netlink_ext_ack *extack,

>> +				  bool cmd_add)

>> +{


...

>> +

>> +	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,

>> +			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,

>> +					 (cmd_add ?

>> +					  RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER :

>> +					  RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER)),

>> +			0, 0);

>> +

>> +	if (cmd_add)

>> +		ret = rdma_opcounter_add(device, port, opcounter);

>> +	else

>> +		ret = rdma_opcounter_remove(device, port, opcounter);

>> +	if (ret)

>> +		goto err_msg;

>> +

>> +	nlmsg_end(msg, nlh);

> 

> Shouldn't the netlink message for a 'set' always return the current

> value of the thing being set on return? Eg the same output that GET

> would generate?


May I ask why can't just return an error code?

>> +static int nldev_stat_add_op_stat_doit(struct sk_buff *skb,

>> +				       struct nlmsghdr *nlh,

>> +				       struct netlink_ext_ack *extack)

>> +{

>> +	return nldev_stat_set_op_stat(skb, nlh, extack, true);

>> +}

>> +

>> +static int nldev_stat_remove_op_stat_doit(struct sk_buff *skb,

>> +					  struct nlmsghdr *nlh,

>> +					  struct netlink_ext_ack *extack)

>> +{

>> +	return nldev_stat_set_op_stat(skb, nlh, extack, false);

>> +}

>> +

>>   static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,

>>   			       struct netlink_ext_ack *extack)

>>   {

>> @@ -2342,6 +2427,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {

>>   		.dump = nldev_res_get_mr_raw_dumpit,

>>   		.flags = RDMA_NL_ADMIN_PERM,

>>   	},

>> +	[RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER] = {

>> +		.doit = nldev_stat_add_op_stat_doit,

>> +		.flags = RDMA_NL_ADMIN_PERM,

>> +	},

>> +	[RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER] = {

>> +		.doit = nldev_stat_remove_op_stat_doit,

>> +		.flags = RDMA_NL_ADMIN_PERM,

>> +	},

>>   };

> 

> And here I wonder if this is the cannonical way to manipulate lists of

> strings in netlink? I'm trying to think of another case like this, did

> you reference something?


For add/remove, we only support one op-counter at one time (for 
simplicity), so it's just a string, not a list of string.

This is supported:
#  rdma stat add link mlx5_0/1 optional-set cc_rx_ce_pkts

This is not supported:
# rdma stat add link mlx5_0/1 optional-set cc_rx_ce_pkts cc_tx_cnp_pkts

> Are you sure this shouldn't be done via some set on some counter

> object?


Currently we don't support do on a counter object, just per-port.

> Jason

>
diff mbox series

Patch

diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index b8b6db98bfdf..fa04178aa0eb 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -106,6 +106,56 @@  static int __rdma_counter_bind_qp(struct rdma_counter *counter,
 	return ret;
 }
 
+static struct rdma_op_counter *get_opcounter(struct rdma_op_stats *opstats,
+					     const char *name)
+{
+	int i;
+
+	for (i = 0; i < opstats->num_opcounters; i++)
+		if (!strcmp(opstats->opcounters[i].name, name))
+			return opstats->opcounters + i;
+
+	return NULL;
+}
+
+static int rdma_opcounter_set(struct ib_device *dev, u32 port,
+			      const char *name, bool is_add)
+{
+	struct rdma_port_counter *port_counter;
+	struct rdma_op_counter *opc;
+	int ret;
+
+	if (!dev->ops.add_op_stat || !dev->ops.remove_op_stat)
+		return -EOPNOTSUPP;
+
+	port_counter = &dev->port_data[port].port_counter;
+	opc = get_opcounter(port_counter->opstats, name);
+	if (!opc)
+		return -EINVAL;
+
+	mutex_lock(&port_counter->opstats->lock);
+	ret = is_add ? dev->ops.add_op_stat(dev, port, opc->type) :
+		dev->ops.remove_op_stat(dev, port, opc->type);
+	if (ret)
+		goto end;
+
+	opc->enabled = is_add;
+end:
+	mutex_unlock(&port_counter->opstats->lock);
+	return ret;
+}
+
+int rdma_opcounter_add(struct ib_device *dev, u32 port, const char *name)
+{
+	return rdma_opcounter_set(dev, port, name, true);
+}
+
+int rdma_opcounter_remove(struct ib_device *dev, u32 port,
+			  const char *name)
+{
+	return rdma_opcounter_set(dev, port, name, false);
+}
+
 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
 					   struct ib_qp *qp,
 					   enum rdma_nl_counter_mode mode)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 23e1ae50b2e4..b9138f20f9a8 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2590,6 +2590,7 @@  void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 		ops->uverbs_no_driver_id_binding;
 
 	SET_DEVICE_OP(dev_ops, add_gid);
+	SET_DEVICE_OP(dev_ops, add_op_stat);
 	SET_DEVICE_OP(dev_ops, advise_mr);
 	SET_DEVICE_OP(dev_ops, alloc_dm);
 	SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
@@ -2701,6 +2702,7 @@  void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, reg_dm_mr);
 	SET_DEVICE_OP(dev_ops, reg_user_mr);
 	SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
+	SET_DEVICE_OP(dev_ops, remove_op_stat);
 	SET_DEVICE_OP(dev_ops, req_notify_cq);
 	SET_DEVICE_OP(dev_ops, rereg_user_mr);
 	SET_DEVICE_OP(dev_ops, resize_cq);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index e9b4b2cccaa0..17d55d89f11c 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -154,6 +154,11 @@  static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
 	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
+	[RDMA_NLDEV_ATTR_STAT_OPCOUNTERS]       = { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY]  = { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING,
+				  .len = RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE },
+	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -1888,6 +1893,86 @@  static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return err;
 }
 
+static int nldev_stat_set_op_stat(struct sk_buff *skb,
+				  struct nlmsghdr *nlh,
+				  struct netlink_ext_ack *extack,
+				  bool cmd_add)
+{
+	char opcounter[RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE] = {};
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	struct ib_device *device;
+	struct sk_buff *msg;
+	u32 index, port;
+	int ret;
+
+	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+
+	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] ||
+	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+		return -EINVAL;
+
+	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(sock_net(skb->sk), index);
+	if (!device)
+		return -EINVAL;
+
+	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+	if (!rdma_is_port_valid(device, port)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	nla_strscpy(opcounter, tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME],
+		    sizeof(opcounter));
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+					 (cmd_add ?
+					  RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER :
+					  RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER)),
+			0, 0);
+
+	if (cmd_add)
+		ret = rdma_opcounter_add(device, port, opcounter);
+	else
+		ret = rdma_opcounter_remove(device, port, opcounter);
+	if (ret)
+		goto err_msg;
+
+	nlmsg_end(msg, nlh);
+	ib_device_put(device);
+	return rdma_nl_unicast(sock_net(skb->sk), msg,
+			       NETLINK_CB(skb).portid);
+
+err_msg:
+	nlmsg_free(msg);
+err:
+	ib_device_put(device);
+	return ret;
+}
+
+static int nldev_stat_add_op_stat_doit(struct sk_buff *skb,
+				       struct nlmsghdr *nlh,
+				       struct netlink_ext_ack *extack)
+{
+	return nldev_stat_set_op_stat(skb, nlh, extack, true);
+}
+
+static int nldev_stat_remove_op_stat_doit(struct sk_buff *skb,
+					  struct nlmsghdr *nlh,
+					  struct netlink_ext_ack *extack)
+{
+	return nldev_stat_set_op_stat(skb, nlh, extack, false);
+}
+
 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			       struct netlink_ext_ack *extack)
 {
@@ -2342,6 +2427,14 @@  static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 		.dump = nldev_res_get_mr_raw_dumpit,
 		.flags = RDMA_NL_ADMIN_PERM,
 	},
+	[RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER] = {
+		.doit = nldev_stat_add_op_stat_doit,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
+	[RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER] = {
+		.doit = nldev_stat_remove_op_stat_doit,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
 };
 
 void __init nldev_init(void)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 40b0f7825975..fa9e668b9b14 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -600,11 +600,14 @@  static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 
 /**
  * struct rdma_op_counter
+ * @enabled - To indicate if this counter is currently enabled (as optional
+ *    counters can be dynamically enabled/disabled)
  * @type - The vendor-specific type of the counter
  * @name - The name of the counter
  * @value - The value of the counter
  */
 struct rdma_op_counter {
+	bool enabled;
 	int type;
 	const char *name;
 	u64 value;
@@ -2595,6 +2598,10 @@  struct ib_device_ops {
 	struct rdma_op_stats *(*alloc_op_port_stats)(struct ib_device *device,
 						     u32 port_num);
 
+	int (*add_op_stat)(struct ib_device *device, u32 port,
+			   int optional_stat);
+	int (*remove_op_stat)(struct ib_device *device, u32 port,
+			      int optional_stat);
 	/**
 	 * Allows rdma drivers to add their own restrack attributes.
 	 */
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
index 3531c5061718..48086a7248ac 100644
--- a/include/rdma/rdma_counter.h
+++ b/include/rdma/rdma_counter.h
@@ -63,5 +63,9 @@  int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
 int rdma_counter_get_mode(struct ib_device *dev, u32 port,
 			  enum rdma_nl_counter_mode *mode,
 			  enum rdma_nl_counter_mask *mask);
+int rdma_opcounter_add(struct ib_device *dev, u32 port,
+		       const char *name);
+int rdma_opcounter_remove(struct ib_device *dev, u32 port,
+			  const char *name);
 
 #endif /* _RDMA_COUNTER_H_ */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 2758d9df71ee..ac47a0cc0508 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -10,6 +10,7 @@  enum {
 	RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
 	RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
 	RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32,
+	RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE = 64,
 };
 
 struct rdma_nl_cbs {
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 75a1ae2311d8..79e6ca87d2e0 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -297,6 +297,10 @@  enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_RES_SRQ_GET, /* can dump */
 
+	RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER,
+
+	RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER,
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -549,6 +553,11 @@  enum rdma_nldev_attr {
 
 	RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK,	/* u8 */
 
+	RDMA_NLDEV_ATTR_STAT_OPCOUNTERS,	/* nested table */
+	RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY,	/* nested table */
+	RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME,	/* string */
+	RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_VALUE,	/* u64 */
+
 	/*
 	 * Always the end
 	 */