diff mbox series

[rdma-next,05/10] RDMA/mlx5: Add steering support in optional flow counters

Message ID 20210818112428.209111-6-markzhang@nvidia.com
State New
Headers show
Series Optional counter statistics support | expand

Commit Message

Mark Zhang Aug. 18, 2021, 11:24 a.m. UTC
From: Aharon Landau <aharonl@nvidia.com>

Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
---
 drivers/infiniband/hw/mlx5/fs.c      | 111 +++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  12 +++
 include/rdma/ib_hdrs.h               |   1 +
 3 files changed, 124 insertions(+)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 5fbc0a8454b9..be6a00969ddb 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -10,12 +10,14 @@ 
 #include <rdma/uverbs_std_types.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
 #include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
 #include <rdma/ib_umem.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/fs_helpers.h>
 #include <linux/mlx5/accel.h>
 #include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
 #include "mlx5_ib.h"
 #include "counters.h"
 #include "devx.h"
@@ -847,6 +849,115 @@  static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 	return prio;
 }
 
+enum {
+	RDMA_RX_ECN_OPCOUNTER_PRIO,
+	RDMA_RX_CNP_OPCOUNTER_PRIO,
+};
+
+enum {
+	RDMA_TX_CNP_OPCOUNTER_PRIO,
+};
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+			 enum mlx5_ib_optional_counter_type type)
+{
+	enum mlx5_flow_namespace_type fn_type;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_destination dst;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_ib_flow_prio *prio;
+	struct mlx5_flow_spec *spec;
+	int priority, err = 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	switch (type) {
+	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.ip_ecn);
+		MLX5_SET(fte_match_param, spec->match_value,
+			 outer_headers.ip_ecn, INET_ECN_CE);
+		break;
+
+	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+					ft_field_support_2_nic_receive_rdma.bth_opcode)) {
+			err = -EOPNOTSUPP;
+			goto free;
+		}
+		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 misc_parameters.bth_opcode);
+		MLX5_SET(fte_match_param, spec->match_value,
+			 misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+		break;
+
+	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+					ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
+			err = -EOPNOTSUPP;
+			goto free;
+		}
+		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 misc_parameters.bth_opcode);
+		MLX5_SET(fte_match_param, spec->match_value,
+			 misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+		break;
+
+	default:
+		err = -EOPNOTSUPP;
+		goto free;
+	}
+
+	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+	if (!ns) {
+		err = -EOPNOTSUPP;
+		goto free;
+	}
+	prio = _get_prio(ns, &opfc->prio, priority, 1, 1, 0);
+	if (IS_ERR(prio)) {
+		err = PTR_ERR(prio);
+		goto free;
+	}
+
+	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	dst.counter_id = mlx5_fc_id(opfc->fc);
+
+	flow_act.action =
+		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+	opfc->rule =
+		mlx5_add_flow_rules(prio->flow_table, spec, &flow_act, &dst, 1);
+	if (IS_ERR(opfc->rule)) {
+		put_flow_table(dev, prio, false);
+		err = PTR_ERR(opfc->rule);
+		goto free;
+	}
+	prio->refcount++;
+
+free:
+	kfree(spec);
+	return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+			     struct mlx5_ib_op_fc *opfc)
+{
+	mlx5_del_flow_rules(opfc->rule);
+	put_flow_table(dev, &opfc->prio, true);
+	WARN_ON(opfc->prio.flow_table);
+}
+
 static void set_underlay_qp(struct mlx5_ib_dev *dev,
 			    struct mlx5_flow_spec *spec,
 			    u32 underlay_qpn)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2ba352702294..130b2ed79ba2 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -805,6 +805,12 @@  enum mlx5_ib_optional_counter_type {
 	MLX5_IB_OPCOUNTER_MAX,
 };
 
+struct mlx5_ib_op_fc {
+	struct mlx5_fc *fc;
+	struct mlx5_ib_flow_prio prio;
+	struct mlx5_flow_handle *rule;
+};
+
 struct mlx5_ib_counters {
 	const char **names;
 	size_t *offsets;
@@ -814,6 +820,12 @@  struct mlx5_ib_counters {
 	u16 set_id;
 };
 
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+			 enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+			     struct mlx5_ib_op_fc *opfc);
+
 struct mlx5_ib_multiport_info;
 
 struct mlx5_ib_multiport {
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 7e542205861c..8ae07c0ecdf7 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -232,6 +232,7 @@  static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
 #define IB_BTH_SE_SHIFT	23
 #define IB_BTH_TVER_MASK	0xf
 #define IB_BTH_TVER_SHIFT	16
+#define IB_BTH_OPCODE_CNP	0x81
 
 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
 {