diff mbox series

[bpf-next,v4,2/4] selftests/bpf: Add Launch Time request to xdp_hw_metadata

Message ID 20250106135638.9719-1-yoong.siang.song@intel.com
State New
Headers show
Series [bpf-next,v4,1/4] xsk: Add launch time hardware offload support to XDP Tx metadata | expand

Commit Message

Song, Yoong Siang Jan. 6, 2025, 1:56 p.m. UTC
Add Launch Time hw offload request to xdp_hw_metadata. User can configure
the delta of launch time to HW RX-time by using "-l" argument. The default
delta is 100,000,000 nanosecond.

Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
---
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++--
 1 file changed, 27 insertions(+), 3 deletions(-)

Comments

Stanislav Fomichev Jan. 7, 2025, 4:57 p.m. UTC | #1
On 01/06, Song Yoong Siang wrote:
> Add Launch Time hw offload request to xdp_hw_metadata. User can configure
> the delta of launch time to HW RX-time by using "-l" argument. The default
> delta is 100,000,000 nanosecond.
> 
> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
> ---
>  tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++--
>  1 file changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> index 6f7b15d6c6ed..795c1d14e02d 100644
> --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
> +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> @@ -13,6 +13,7 @@
>   * - UDP 9091 packets trigger TX reply
>   * - TX HW timestamp is requested and reported back upon completion
>   * - TX checksum is requested
> + * - TX launch time HW offload is requested for transmission
>   */
>  
>  #include <test_progs.h>
> @@ -64,6 +65,8 @@ int rxq;
>  bool skip_tx;
>  __u64 last_hw_rx_timestamp;
>  __u64 last_xdp_rx_timestamp;
> +__u64 last_launch_time;
> +__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */
>  
>  void test__fail(void) { /* for network_helpers.c */ }
>  
> @@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
>  	if (meta->completion.tx_timestamp) {
>  		__u64 ref_tstamp = gettime(clock_id);
>  
> +		print_tstamp_delta("HW Launch-time", "HW TX-complete-time",
> +				   last_launch_time, meta->completion.tx_timestamp);
>  		print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
>  				   meta->completion.tx_timestamp, ref_tstamp);
>  		print_tstamp_delta("XDP RX-time", "User TX-complete-time",
> @@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
>  	       xsk, ntohs(udph->check), ntohs(want_csum),
>  	       meta->request.csum_start, meta->request.csum_offset);
>  
> +	/* Set the value of launch time */
> +	meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
> +	meta->request.launch_time = last_hw_rx_timestamp +
> +				    launch_time_delta_to_hw_rx_timestamp;
> +	last_launch_time = meta->request.launch_time;
> +	print_tstamp_delta("HW RX-time", "HW Launch-time", last_hw_rx_timestamp,
> +			   meta->request.launch_time);
> +
>  	memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
>  	tx_desc->options |= XDP_TX_METADATA;
>  	tx_desc->len = len;
> @@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
>  	xsk_ring_prod__submit(&xsk->tx, 1);
>  }
>  
> +#define SLEEP_PER_ITERATION_IN_US 10
> +#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000)
> +#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500)
>  static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
>  {
>  	const struct xdp_desc *rx_desc;
>  	struct pollfd fds[rxq + 1];
> +	int max_iterations;
>  	__u64 comp_addr;
>  	__u64 addr;
>  	__u32 idx = 0;
> @@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
>  		fds[i].revents = 0;
>  	}
>  
> +	/* Calculate max iterations to wait for transmit completion */
> +	max_iterations = MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp);
> +
>  	fds[rxq].fd = server_fd;
>  	fds[rxq].events = POLLIN;
>  	fds[rxq].revents = 0;
> @@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
>  					if (ret)
>  						printf("kick_tx ret=%d\n", ret);
>  

[..]

> -					for (int j = 0; j < 500; j++) {
> +					for (int j = 0; j < max_iterations; j++) {
>  						if (complete_tx(xsk, clock_id))
>  							break;
> -						usleep(10);
> +						usleep(SLEEP_PER_ITERATION_IN_US);

nit: instead of doing MAX_ITERATION/max_iterations, can we simplify this
to the following?

static u64 now(void)
{
	clock_gettime(...);
	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
}

/* wait 5 seconds + cover launch time */
deadline = now() + 5 * NSEC_PER_SEC + launch_time_delta_to_hw_rx_timestamp;
while (true) {
	if (complete_tx())
		break;
	if (now() >= deadline)
		break;
	usleep(10);
}

It is a bit more readable than converting time to wait to the
iterations..
Song, Yoong Siang Jan. 9, 2025, 7:08 a.m. UTC | #2
On Wednesday, January 8, 2025 12:58 AM, Stanislav Fomichev <stfomichev@gmail.com> wrote:
>On 01/06, Song Yoong Siang wrote:
>> Add Launch Time hw offload request to xdp_hw_metadata. User can configure
>> the delta of launch time to HW RX-time by using "-l" argument. The default
>> delta is 100,000,000 nanosecond.
>>
>> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
>> ---
>>  tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++--
>>  1 file changed, 27 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c
>b/tools/testing/selftests/bpf/xdp_hw_metadata.c
>> index 6f7b15d6c6ed..795c1d14e02d 100644
>> --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
>> +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
>> @@ -13,6 +13,7 @@
>>   * - UDP 9091 packets trigger TX reply
>>   * - TX HW timestamp is requested and reported back upon completion
>>   * - TX checksum is requested
>> + * - TX launch time HW offload is requested for transmission
>>   */
>>
>>  #include <test_progs.h>
>> @@ -64,6 +65,8 @@ int rxq;
>>  bool skip_tx;
>>  __u64 last_hw_rx_timestamp;
>>  __u64 last_xdp_rx_timestamp;
>> +__u64 last_launch_time;
>> +__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */
>>
>>  void test__fail(void) { /* for network_helpers.c */ }
>>
>> @@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t
>clock_id)
>>  	if (meta->completion.tx_timestamp) {
>>  		__u64 ref_tstamp = gettime(clock_id);
>>
>> +		print_tstamp_delta("HW Launch-time", "HW TX-complete-time",
>> +				   last_launch_time, meta-
>>completion.tx_timestamp);
>>  		print_tstamp_delta("HW TX-complete-time", "User TX-complete-
>time",
>>  				   meta->completion.tx_timestamp, ref_tstamp);
>>  		print_tstamp_delta("XDP RX-time", "User TX-complete-time",
>> @@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet,
>clockid_t clock_id)
>>  	       xsk, ntohs(udph->check), ntohs(want_csum),
>>  	       meta->request.csum_start, meta->request.csum_offset);
>>
>> +	/* Set the value of launch time */
>> +	meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
>> +	meta->request.launch_time = last_hw_rx_timestamp +
>> +				    launch_time_delta_to_hw_rx_timestamp;
>> +	last_launch_time = meta->request.launch_time;
>> +	print_tstamp_delta("HW RX-time", "HW Launch-time",
>last_hw_rx_timestamp,
>> +			   meta->request.launch_time);
>> +
>>  	memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
>>  	tx_desc->options |= XDP_TX_METADATA;
>>  	tx_desc->len = len;
>> @@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet,
>clockid_t clock_id)
>>  	xsk_ring_prod__submit(&xsk->tx, 1);
>>  }
>>
>> +#define SLEEP_PER_ITERATION_IN_US 10
>> +#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000)
>> +#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500)
>>  static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
>clock_id)
>>  {
>>  	const struct xdp_desc *rx_desc;
>>  	struct pollfd fds[rxq + 1];
>> +	int max_iterations;
>>  	__u64 comp_addr;
>>  	__u64 addr;
>>  	__u32 idx = 0;
>> @@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int
>server_fd, clockid_t
>>  		fds[i].revents = 0;
>>  	}
>>
>> +	/* Calculate max iterations to wait for transmit completion */
>> +	max_iterations =
>MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp);
>> +
>>  	fds[rxq].fd = server_fd;
>>  	fds[rxq].events = POLLIN;
>>  	fds[rxq].revents = 0;
>> @@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq,
>int server_fd, clockid_t
>>  					if (ret)
>>  						printf("kick_tx ret=%d\n", ret);
>>
>
>[..]
>
>> -					for (int j = 0; j < 500; j++) {
>> +					for (int j = 0; j < max_iterations; j++) {
>>  						if (complete_tx(xsk, clock_id))
>>  							break;
>> -						usleep(10);
>> +
>	usleep(SLEEP_PER_ITERATION_IN_US);
>
>nit: instead of doing MAX_ITERATION/max_iterations, can we simplify this
>to the following?
>
>static u64 now(void)
>{
>	clock_gettime(...);
>	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
>}
>
>/* wait 5 seconds + cover launch time */
>deadline = now() + 5 * NSEC_PER_SEC + launch_time_delta_to_hw_rx_timestamp;
>while (true) {
>	if (complete_tx())
>		break;
>	if (now() >= deadline)
>		break;
>	usleep(10);
>}
>
>It is a bit more readable than converting time to wait to the
>iterations..

Agree that your code is more readable.
I will use your suggestion in next version.

Thanks & Regards
Siang
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 6f7b15d6c6ed..795c1d14e02d 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -13,6 +13,7 @@ 
  * - UDP 9091 packets trigger TX reply
  * - TX HW timestamp is requested and reported back upon completion
  * - TX checksum is requested
+ * - TX launch time HW offload is requested for transmission
  */
 
 #include <test_progs.h>
@@ -64,6 +65,8 @@  int rxq;
 bool skip_tx;
 __u64 last_hw_rx_timestamp;
 __u64 last_xdp_rx_timestamp;
+__u64 last_launch_time;
+__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */
 
 void test__fail(void) { /* for network_helpers.c */ }
 
@@ -298,6 +301,8 @@  static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
 	if (meta->completion.tx_timestamp) {
 		__u64 ref_tstamp = gettime(clock_id);
 
+		print_tstamp_delta("HW Launch-time", "HW TX-complete-time",
+				   last_launch_time, meta->completion.tx_timestamp);
 		print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
 				   meta->completion.tx_timestamp, ref_tstamp);
 		print_tstamp_delta("XDP RX-time", "User TX-complete-time",
@@ -395,6 +400,14 @@  static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
 	       xsk, ntohs(udph->check), ntohs(want_csum),
 	       meta->request.csum_start, meta->request.csum_offset);
 
+	/* Set the value of launch time */
+	meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
+	meta->request.launch_time = last_hw_rx_timestamp +
+				    launch_time_delta_to_hw_rx_timestamp;
+	last_launch_time = meta->request.launch_time;
+	print_tstamp_delta("HW RX-time", "HW Launch-time", last_hw_rx_timestamp,
+			   meta->request.launch_time);
+
 	memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
 	tx_desc->options |= XDP_TX_METADATA;
 	tx_desc->len = len;
@@ -402,10 +415,14 @@  static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
 	xsk_ring_prod__submit(&xsk->tx, 1);
 }
 
+#define SLEEP_PER_ITERATION_IN_US 10
+#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000)
+#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500)
 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
 {
 	const struct xdp_desc *rx_desc;
 	struct pollfd fds[rxq + 1];
+	int max_iterations;
 	__u64 comp_addr;
 	__u64 addr;
 	__u32 idx = 0;
@@ -418,6 +435,9 @@  static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 		fds[i].revents = 0;
 	}
 
+	/* Calculate max iterations to wait for transmit completion */
+	max_iterations = MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp);
+
 	fds[rxq].fd = server_fd;
 	fds[rxq].events = POLLIN;
 	fds[rxq].revents = 0;
@@ -477,10 +497,10 @@  static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 					if (ret)
 						printf("kick_tx ret=%d\n", ret);
 
-					for (int j = 0; j < 500; j++) {
+					for (int j = 0; j < max_iterations; j++) {
 						if (complete_tx(xsk, clock_id))
 							break;
-						usleep(10);
+						usleep(SLEEP_PER_ITERATION_IN_US);
 					}
 				}
 			}
@@ -608,6 +628,7 @@  static void print_usage(void)
 		"  -h    Display this help and exit\n\n"
 		"  -m    Enable multi-buffer XDP for larger MTU\n"
 		"  -r    Don't generate AF_XDP reply (rx metadata only)\n"
+		"  -l    Delta of launch time to HW RX-time in ns (default: 100,000,000ns)\n"
 		"Generate test packets on the other machine with:\n"
 		"  echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
 
@@ -618,7 +639,7 @@  static void read_args(int argc, char *argv[])
 {
 	int opt;
 
-	while ((opt = getopt(argc, argv, "chmr")) != -1) {
+	while ((opt = getopt(argc, argv, "chmrl:")) != -1) {
 		switch (opt) {
 		case 'c':
 			bind_flags &= ~XDP_USE_NEED_WAKEUP;
@@ -634,6 +655,9 @@  static void read_args(int argc, char *argv[])
 		case 'r':
 			skip_tx = true;
 			break;
+		case 'l':
+			launch_time_delta_to_hw_rx_timestamp = atoll(optarg);
+			break;
 		case '?':
 			if (isprint(optopt))
 				fprintf(stderr, "Unknown option: -%c\n", optopt);