Message ID | 20250106135638.9719-1-yoong.siang.song@intel.com |
---|---|
State | New |
Headers | show |
Series | [bpf-next,v4,1/4] xsk: Add launch time hardware offload support to XDP Tx metadata | expand |
On 01/06, Song Yoong Siang wrote: > Add Launch Time hw offload request to xdp_hw_metadata. User can configure > the delta of launch time to HW RX-time by using "-l" argument. The default > delta is 100,000,000 nanosecond. > > Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com> > --- > tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++-- > 1 file changed, 27 insertions(+), 3 deletions(-) > > diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c > index 6f7b15d6c6ed..795c1d14e02d 100644 > --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c > +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c > @@ -13,6 +13,7 @@ > * - UDP 9091 packets trigger TX reply > * - TX HW timestamp is requested and reported back upon completion > * - TX checksum is requested > + * - TX launch time HW offload is requested for transmission > */ > > #include <test_progs.h> > @@ -64,6 +65,8 @@ int rxq; > bool skip_tx; > __u64 last_hw_rx_timestamp; > __u64 last_xdp_rx_timestamp; > +__u64 last_launch_time; > +__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */ > > void test__fail(void) { /* for network_helpers.c */ } > > @@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id) > if (meta->completion.tx_timestamp) { > __u64 ref_tstamp = gettime(clock_id); > > + print_tstamp_delta("HW Launch-time", "HW TX-complete-time", > + last_launch_time, meta->completion.tx_timestamp); > print_tstamp_delta("HW TX-complete-time", "User TX-complete-time", > meta->completion.tx_timestamp, ref_tstamp); > print_tstamp_delta("XDP RX-time", "User TX-complete-time", > @@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) > xsk, ntohs(udph->check), ntohs(want_csum), > meta->request.csum_start, meta->request.csum_offset); > > + /* Set the value of launch time */ > + meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME; > + meta->request.launch_time = last_hw_rx_timestamp + > + launch_time_delta_to_hw_rx_timestamp; > + last_launch_time = meta->request.launch_time; > + print_tstamp_delta("HW RX-time", "HW Launch-time", last_hw_rx_timestamp, > + meta->request.launch_time); > + > memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */ > tx_desc->options |= XDP_TX_METADATA; > tx_desc->len = len; > @@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) > xsk_ring_prod__submit(&xsk->tx, 1); > } > > +#define SLEEP_PER_ITERATION_IN_US 10 > +#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000) > +#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500) > static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) > { > const struct xdp_desc *rx_desc; > struct pollfd fds[rxq + 1]; > + int max_iterations; > __u64 comp_addr; > __u64 addr; > __u32 idx = 0; > @@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t > fds[i].revents = 0; > } > > + /* Calculate max iterations to wait for transmit completion */ > + max_iterations = MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp); > + > fds[rxq].fd = server_fd; > fds[rxq].events = POLLIN; > fds[rxq].revents = 0; > @@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t > if (ret) > printf("kick_tx ret=%d\n", ret); > [..] > - for (int j = 0; j < 500; j++) { > + for (int j = 0; j < max_iterations; j++) { > if (complete_tx(xsk, clock_id)) > break; > - usleep(10); > + usleep(SLEEP_PER_ITERATION_IN_US); nit: instead of doing MAX_ITERATION/max_iterations, can we simplify this to the following? static u64 now(void) { clock_gettime(...); return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; } /* wait 5 seconds + cover launch time */ deadline = now() + 5 * NSEC_PER_SEC + launch_time_delta_to_hw_rx_timestamp; while (true) { if (complete_tx()) break; if (now() >= deadline) break; usleep(10); } It is a bit more readable than converting time to wait to the iterations..
On Wednesday, January 8, 2025 12:58 AM, Stanislav Fomichev <stfomichev@gmail.com> wrote: >On 01/06, Song Yoong Siang wrote: >> Add Launch Time hw offload request to xdp_hw_metadata. User can configure >> the delta of launch time to HW RX-time by using "-l" argument. The default >> delta is 100,000,000 nanosecond. >> >> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com> >> --- >> tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++-- >> 1 file changed, 27 insertions(+), 3 deletions(-) >> >> diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c >b/tools/testing/selftests/bpf/xdp_hw_metadata.c >> index 6f7b15d6c6ed..795c1d14e02d 100644 >> --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c >> +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c >> @@ -13,6 +13,7 @@ >> * - UDP 9091 packets trigger TX reply >> * - TX HW timestamp is requested and reported back upon completion >> * - TX checksum is requested >> + * - TX launch time HW offload is requested for transmission >> */ >> >> #include <test_progs.h> >> @@ -64,6 +65,8 @@ int rxq; >> bool skip_tx; >> __u64 last_hw_rx_timestamp; >> __u64 last_xdp_rx_timestamp; >> +__u64 last_launch_time; >> +__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */ >> >> void test__fail(void) { /* for network_helpers.c */ } >> >> @@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t >clock_id) >> if (meta->completion.tx_timestamp) { >> __u64 ref_tstamp = gettime(clock_id); >> >> + print_tstamp_delta("HW Launch-time", "HW TX-complete-time", >> + last_launch_time, meta- >>completion.tx_timestamp); >> print_tstamp_delta("HW TX-complete-time", "User TX-complete- >time", >> meta->completion.tx_timestamp, ref_tstamp); >> print_tstamp_delta("XDP RX-time", "User TX-complete-time", >> @@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, >clockid_t clock_id) >> xsk, ntohs(udph->check), ntohs(want_csum), >> meta->request.csum_start, meta->request.csum_offset); >> >> + /* Set the value of launch time */ >> + meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME; >> + meta->request.launch_time = last_hw_rx_timestamp + >> + launch_time_delta_to_hw_rx_timestamp; >> + last_launch_time = meta->request.launch_time; >> + print_tstamp_delta("HW RX-time", "HW Launch-time", >last_hw_rx_timestamp, >> + meta->request.launch_time); >> + >> memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */ >> tx_desc->options |= XDP_TX_METADATA; >> tx_desc->len = len; >> @@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, >clockid_t clock_id) >> xsk_ring_prod__submit(&xsk->tx, 1); >> } >> >> +#define SLEEP_PER_ITERATION_IN_US 10 >> +#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000) >> +#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500) >> static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t >clock_id) >> { >> const struct xdp_desc *rx_desc; >> struct pollfd fds[rxq + 1]; >> + int max_iterations; >> __u64 comp_addr; >> __u64 addr; >> __u32 idx = 0; >> @@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int >server_fd, clockid_t >> fds[i].revents = 0; >> } >> >> + /* Calculate max iterations to wait for transmit completion */ >> + max_iterations = >MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp); >> + >> fds[rxq].fd = server_fd; >> fds[rxq].events = POLLIN; >> fds[rxq].revents = 0; >> @@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, >int server_fd, clockid_t >> if (ret) >> printf("kick_tx ret=%d\n", ret); >> > >[..] > >> - for (int j = 0; j < 500; j++) { >> + for (int j = 0; j < max_iterations; j++) { >> if (complete_tx(xsk, clock_id)) >> break; >> - usleep(10); >> + > usleep(SLEEP_PER_ITERATION_IN_US); > >nit: instead of doing MAX_ITERATION/max_iterations, can we simplify this >to the following? > >static u64 now(void) >{ > clock_gettime(...); > return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; >} > >/* wait 5 seconds + cover launch time */ >deadline = now() + 5 * NSEC_PER_SEC + launch_time_delta_to_hw_rx_timestamp; >while (true) { > if (complete_tx()) > break; > if (now() >= deadline) > break; > usleep(10); >} > >It is a bit more readable than converting time to wait to the >iterations.. Agree that your code is more readable. I will use your suggestion in next version. Thanks & Regards Siang
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 6f7b15d6c6ed..795c1d14e02d 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -13,6 +13,7 @@ * - UDP 9091 packets trigger TX reply * - TX HW timestamp is requested and reported back upon completion * - TX checksum is requested + * - TX launch time HW offload is requested for transmission */ #include <test_progs.h> @@ -64,6 +65,8 @@ int rxq; bool skip_tx; __u64 last_hw_rx_timestamp; __u64 last_xdp_rx_timestamp; +__u64 last_launch_time; +__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */ void test__fail(void) { /* for network_helpers.c */ } @@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id) if (meta->completion.tx_timestamp) { __u64 ref_tstamp = gettime(clock_id); + print_tstamp_delta("HW Launch-time", "HW TX-complete-time", + last_launch_time, meta->completion.tx_timestamp); print_tstamp_delta("HW TX-complete-time", "User TX-complete-time", meta->completion.tx_timestamp, ref_tstamp); print_tstamp_delta("XDP RX-time", "User TX-complete-time", @@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) xsk, ntohs(udph->check), ntohs(want_csum), meta->request.csum_start, meta->request.csum_offset); + /* Set the value of launch time */ + meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME; + meta->request.launch_time = last_hw_rx_timestamp + + launch_time_delta_to_hw_rx_timestamp; + last_launch_time = meta->request.launch_time; + print_tstamp_delta("HW RX-time", "HW Launch-time", last_hw_rx_timestamp, + meta->request.launch_time); + memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */ tx_desc->options |= XDP_TX_METADATA; tx_desc->len = len; @@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) xsk_ring_prod__submit(&xsk->tx, 1); } +#define SLEEP_PER_ITERATION_IN_US 10 +#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000) +#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500) static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) { const struct xdp_desc *rx_desc; struct pollfd fds[rxq + 1]; + int max_iterations; __u64 comp_addr; __u64 addr; __u32 idx = 0; @@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t fds[i].revents = 0; } + /* Calculate max iterations to wait for transmit completion */ + max_iterations = MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp); + fds[rxq].fd = server_fd; fds[rxq].events = POLLIN; fds[rxq].revents = 0; @@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t if (ret) printf("kick_tx ret=%d\n", ret); - for (int j = 0; j < 500; j++) { + for (int j = 0; j < max_iterations; j++) { if (complete_tx(xsk, clock_id)) break; - usleep(10); + usleep(SLEEP_PER_ITERATION_IN_US); } } } @@ -608,6 +628,7 @@ static void print_usage(void) " -h Display this help and exit\n\n" " -m Enable multi-buffer XDP for larger MTU\n" " -r Don't generate AF_XDP reply (rx metadata only)\n" + " -l Delta of launch time to HW RX-time in ns (default: 100,000,000ns)\n" "Generate test packets on the other machine with:\n" " echo -n xdp | nc -u -q1 <dst_ip> 9091\n"; @@ -618,7 +639,7 @@ static void read_args(int argc, char *argv[]) { int opt; - while ((opt = getopt(argc, argv, "chmr")) != -1) { + while ((opt = getopt(argc, argv, "chmrl:")) != -1) { switch (opt) { case 'c': bind_flags &= ~XDP_USE_NEED_WAKEUP; @@ -634,6 +655,9 @@ static void read_args(int argc, char *argv[]) case 'r': skip_tx = true; break; + case 'l': + launch_time_delta_to_hw_rx_timestamp = atoll(optarg); + break; case '?': if (isprint(optopt)) fprintf(stderr, "Unknown option: -%c\n", optopt);
Add Launch Time hw offload request to xdp_hw_metadata. User can configure the delta of launch time to HW RX-time by using "-l" argument. The default delta is 100,000,000 nanosecond. Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com> --- tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-)