[3/3] linux-generic: pktio: handle transmit errors correctly

Message ID 1427972861-14593-3-git-send-email-stuart.haslam@linaro.org
State New
Headers show

Commit Message

Stuart Haslam April 2, 2015, 11:07 a.m.
The linux-generic pktio implementations don't correctly handle socket
errors which occur while sending packets via odp_pktio_send(). The
behaviour is also not consistent across the three implementations.

The problems being addressed are;

 - calls may block indefinitely in certain error conditions
 - packets may be freed even though they weren't sent
 - return value doesn't accurately reflect number of packets sent
 - inconsistent use of __odp_errno

Signed-off-by: Stuart Haslam <stuart.haslam@linaro.org>
---
 platform/linux-generic/odp_packet_io.c     |   2 +-
 platform/linux-generic/odp_packet_socket.c | 157 +++++++++++++--------------
 test/validation/odp_pktio.c                | 163 +++++++++++++++++++++++++----
 3 files changed, 224 insertions(+), 98 deletions(-)

Comments

Maxim Uvarov April 2, 2015, 11:17 a.m. | #1
On 04/02/15 14:07, Stuart Haslam wrote:
>   {
> -	odp_packet_t pkt;
>   	uint8_t *frame;
>   	uint32_t frame_len;
> -	unsigned i;
> -	unsigned flags;
>   	int sockfd;
> -	int nb_tx;
>   	int ret;
> +	unsigned i, n;
>   
>   	sockfd = pkt_sock->sockfd;
> -	flags = MSG_DONTWAIT;
>   	i = 0;
>   	while (i < len) {
> -		pkt = pkt_table[i];
> +		frame = odp_packet_l2_ptr(pkt_table[i], &frame_len);
>   
> -		frame = odp_packet_l2_ptr(pkt, &frame_len);
> -
> -		ret = send(sockfd, frame, frame_len, flags);
> +		ret = send(sockfd, frame, frame_len, MSG_DONTWAIT);
>   		if (odp_unlikely(ret == -1)) {
> -			if (odp_likely(errno == EAGAIN)) {
> -				flags = 0;	/* blocking for next rounds */
> -				continue;	/* resend buffer */
> -			} else {
> -				break;
> +			if (i == 0 && SOCK_ERR_REPORT(errno)) {
> +				__odp_errno = errno;
> +				ODP_ERR("send(basic): %s\n", strerror(errno));
> +				return -1;
>   			}
> +			break;
>   		}
>   
>   		i++;
> -	}			/* end while */
> -	nb_tx = i;
> +	}
>   
> -	for (i = 0; i < len; i++)
> -		odp_packet_free(pkt_table[i]);
> +	for (n = 0; n < i; ++n)
> +		odp_packet_free(pkt_table[n]);
>   
> -	return nb_tx;
> +	return i;
>   }
>   
>   /*
> @@ -383,9 +379,7 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
>   	struct iovec iovecs[ODP_PACKET_SOCKET_MAX_BURST_TX];
>   	int ret;
>   	int sockfd;
> -	unsigned i;
> -	unsigned sent_msgs = 0;
> -	unsigned flags;
> +	unsigned i, n;
>   
>   	if (odp_unlikely(len > ODP_PACKET_SOCKET_MAX_BURST_TX))
>   		return -1;
> @@ -401,17 +395,24 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
>   		msgvec[i].msg_hdr.msg_iovlen = 1;
>   	}
>   
> -	flags = MSG_DONTWAIT;
> -	for (i = 0; i < len; i += sent_msgs) {
> -		ret = sendmmsg(sockfd, &msgvec[i], len - i, flags);
> -		sent_msgs = ret > 0 ? (unsigned)ret : 0;
> -		flags = 0;	/* blocking for next rounds */
> +	for (i = 0; i < len; ) {
> +		ret = sendmmsg(sockfd, &msgvec[i], len - i, MSG_DONTWAIT);
> +		if (odp_unlikely(ret == -1)) {
> +			if (i == 0 && SOCK_ERR_REPORT(errno)) {
> +				__odp_errno = errno;
> +				ODP_ERR("sendmmsg(): %s\n", strerror(errno));
> +				return -1;
> +			}
> +			break;
> +		}
> +
> +		i += (unsigned)ret;
it looks like i should be simple int not unsigned.

Maxim.
>   	}
>   
> -	for (i = 0; i < len; i++)
> -		odp_packet_free(pkt_table[i]);
> +	for (n = 0; n < i; ++n)
> +		odp_packet_free(pkt_table[n]);
>   
> -	return len;
> +	return i;
>   }
Stuart Haslam April 2, 2015, 3:21 p.m. | #2
On Thu, Apr 02, 2015 at 02:17:02PM +0300, Maxim Uvarov wrote:
> On 04/02/15 14:07, Stuart Haslam wrote:
> >  {
> >-	odp_packet_t pkt;
> >  	uint8_t *frame;
> >  	uint32_t frame_len;
> >-	unsigned i;
> >-	unsigned flags;
> >  	int sockfd;
> >-	int nb_tx;
> >  	int ret;
> >+	unsigned i, n;
> >  	sockfd = pkt_sock->sockfd;
> >-	flags = MSG_DONTWAIT;
> >  	i = 0;
> >  	while (i < len) {
> >-		pkt = pkt_table[i];
> >+		frame = odp_packet_l2_ptr(pkt_table[i], &frame_len);
> >-		frame = odp_packet_l2_ptr(pkt, &frame_len);
> >-
> >-		ret = send(sockfd, frame, frame_len, flags);
> >+		ret = send(sockfd, frame, frame_len, MSG_DONTWAIT);
> >  		if (odp_unlikely(ret == -1)) {
> >-			if (odp_likely(errno == EAGAIN)) {
> >-				flags = 0;	/* blocking for next rounds */
> >-				continue;	/* resend buffer */
> >-			} else {
> >-				break;
> >+			if (i == 0 && SOCK_ERR_REPORT(errno)) {
> >+				__odp_errno = errno;
> >+				ODP_ERR("send(basic): %s\n", strerror(errno));
> >+				return -1;
> >  			}
> >+			break;
> >  		}
> >  		i++;
> >-	}			/* end while */
> >-	nb_tx = i;
> >+	}
> >-	for (i = 0; i < len; i++)
> >-		odp_packet_free(pkt_table[i]);
> >+	for (n = 0; n < i; ++n)
> >+		odp_packet_free(pkt_table[n]);
> >-	return nb_tx;
> >+	return i;
> >  }
> >  /*
> >@@ -383,9 +379,7 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
> >  	struct iovec iovecs[ODP_PACKET_SOCKET_MAX_BURST_TX];
> >  	int ret;
> >  	int sockfd;
> >-	unsigned i;
> >-	unsigned sent_msgs = 0;
> >-	unsigned flags;
> >+	unsigned i, n;
> >  	if (odp_unlikely(len > ODP_PACKET_SOCKET_MAX_BURST_TX))
> >  		return -1;
> >@@ -401,17 +395,24 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
> >  		msgvec[i].msg_hdr.msg_iovlen = 1;
> >  	}
> >-	flags = MSG_DONTWAIT;
> >-	for (i = 0; i < len; i += sent_msgs) {
> >-		ret = sendmmsg(sockfd, &msgvec[i], len - i, flags);
> >-		sent_msgs = ret > 0 ? (unsigned)ret : 0;
> >-		flags = 0;	/* blocking for next rounds */
> >+	for (i = 0; i < len; ) {
> >+		ret = sendmmsg(sockfd, &msgvec[i], len - i, MSG_DONTWAIT);
> >+		if (odp_unlikely(ret == -1)) {
> >+			if (i == 0 && SOCK_ERR_REPORT(errno)) {
> >+				__odp_errno = errno;
> >+				ODP_ERR("sendmmsg(): %s\n", strerror(errno));
> >+				return -1;
> >+			}
> >+			break;
> >+		}
> >+
> >+		i += (unsigned)ret;
> it looks like i should be simple int not unsigned.
> 
> Maxim.

Possibly would be tidier, yes. I'll take a look at the best way to sort
it out, as it's compared with len which is unsigned, even though len is
signed in odp_pktio_send().
Mike Holmes July 23, 2015, 4:11 p.m. | #3
Any progress with https://bugs.linaro.org/show_bug.cgi?id=1365

On 2 April 2015 at 11:21, Stuart Haslam <stuart.haslam@linaro.org> wrote:

> On Thu, Apr 02, 2015 at 02:17:02PM +0300, Maxim Uvarov wrote:
> > On 04/02/15 14:07, Stuart Haslam wrote:
> > >  {
> > >-    odp_packet_t pkt;
> > >     uint8_t *frame;
> > >     uint32_t frame_len;
> > >-    unsigned i;
> > >-    unsigned flags;
> > >     int sockfd;
> > >-    int nb_tx;
> > >     int ret;
> > >+    unsigned i, n;
> > >     sockfd = pkt_sock->sockfd;
> > >-    flags = MSG_DONTWAIT;
> > >     i = 0;
> > >     while (i < len) {
> > >-            pkt = pkt_table[i];
> > >+            frame = odp_packet_l2_ptr(pkt_table[i], &frame_len);
> > >-            frame = odp_packet_l2_ptr(pkt, &frame_len);
> > >-
> > >-            ret = send(sockfd, frame, frame_len, flags);
> > >+            ret = send(sockfd, frame, frame_len, MSG_DONTWAIT);
> > >             if (odp_unlikely(ret == -1)) {
> > >-                    if (odp_likely(errno == EAGAIN)) {
> > >-                            flags = 0;      /* blocking for next
> rounds */
> > >-                            continue;       /* resend buffer */
> > >-                    } else {
> > >-                            break;
> > >+                    if (i == 0 && SOCK_ERR_REPORT(errno)) {
> > >+                            __odp_errno = errno;
> > >+                            ODP_ERR("send(basic): %s\n",
> strerror(errno));
> > >+                            return -1;
> > >                     }
> > >+                    break;
> > >             }
> > >             i++;
> > >-    }                       /* end while */
> > >-    nb_tx = i;
> > >+    }
> > >-    for (i = 0; i < len; i++)
> > >-            odp_packet_free(pkt_table[i]);
> > >+    for (n = 0; n < i; ++n)
> > >+            odp_packet_free(pkt_table[n]);
> > >-    return nb_tx;
> > >+    return i;
> > >  }
> > >  /*
> > >@@ -383,9 +379,7 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
> > >     struct iovec iovecs[ODP_PACKET_SOCKET_MAX_BURST_TX];
> > >     int ret;
> > >     int sockfd;
> > >-    unsigned i;
> > >-    unsigned sent_msgs = 0;
> > >-    unsigned flags;
> > >+    unsigned i, n;
> > >     if (odp_unlikely(len > ODP_PACKET_SOCKET_MAX_BURST_TX))
> > >             return -1;
> > >@@ -401,17 +395,24 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
> > >             msgvec[i].msg_hdr.msg_iovlen = 1;
> > >     }
> > >-    flags = MSG_DONTWAIT;
> > >-    for (i = 0; i < len; i += sent_msgs) {
> > >-            ret = sendmmsg(sockfd, &msgvec[i], len - i, flags);
> > >-            sent_msgs = ret > 0 ? (unsigned)ret : 0;
> > >-            flags = 0;      /* blocking for next rounds */
> > >+    for (i = 0; i < len; ) {
> > >+            ret = sendmmsg(sockfd, &msgvec[i], len - i, MSG_DONTWAIT);
> > >+            if (odp_unlikely(ret == -1)) {
> > >+                    if (i == 0 && SOCK_ERR_REPORT(errno)) {
> > >+                            __odp_errno = errno;
> > >+                            ODP_ERR("sendmmsg(): %s\n",
> strerror(errno));
> > >+                            return -1;
> > >+                    }
> > >+                    break;
> > >+            }
> > >+
> > >+            i += (unsigned)ret;
> > it looks like i should be simple int not unsigned.
> >
> > Maxim.
>
> Possibly would be tidier, yes. I'll take a look at the best way to sort
> it out, as it's compared with len which is unsigned, even though len is
> signed in odp_pktio_send().
>
> --
> Stuart.
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> https://lists.linaro.org/mailman/listinfo/lng-odp
>
Maxim Uvarov July 30, 2015, 3:27 p.m. | #4
Sturart, test case looks like valuable. Do you plan to update that 
patch? I think it's better to have test case in separate commit.

Maxim.

On 04/02/15 14:07, Stuart Haslam wrote:
> The linux-generic pktio implementations don't correctly handle socket
> errors which occur while sending packets via odp_pktio_send(). The
> behaviour is also not consistent across the three implementations.
>
> The problems being addressed are;
>
>   - calls may block indefinitely in certain error conditions
>   - packets may be freed even though they weren't sent
>   - return value doesn't accurately reflect number of packets sent
>   - inconsistent use of __odp_errno
>
> Signed-off-by: Stuart Haslam <stuart.haslam@linaro.org>
> ---
>   platform/linux-generic/odp_packet_io.c     |   2 +-
>   platform/linux-generic/odp_packet_socket.c | 157 +++++++++++++--------------
>   test/validation/odp_pktio.c                | 163 +++++++++++++++++++++++++----
>   3 files changed, 224 insertions(+), 98 deletions(-)
>
> diff --git a/platform/linux-generic/odp_packet_io.c b/platform/linux-generic/odp_packet_io.c
> index b04ce8b..0c7f2dd 100644
> --- a/platform/linux-generic/odp_packet_io.c
> +++ b/platform/linux-generic/odp_packet_io.c
> @@ -26,7 +26,7 @@
>   #include <errno.h>
>   
>   /* MTU to be reported for the "loop" interface */
> -#define PKTIO_LOOP_MTU 1500
> +#define PKTIO_LOOP_MTU INT_MAX
>   /* MAC address for the "loop" interface */
>   static const char pktio_loop_mac[] = {0x02, 0xe9, 0x34, 0x80, 0x73, 0x01};
>   
> diff --git a/platform/linux-generic/odp_packet_socket.c b/platform/linux-generic/odp_packet_socket.c
> index 2802c43..f6d7330 100644
> --- a/platform/linux-generic/odp_packet_socket.c
> +++ b/platform/linux-generic/odp_packet_socket.c
> @@ -43,6 +43,9 @@
>   #include <odp/helper/eth.h>
>   #include <odp/helper/ip.h>
>   
> +/** determine if a socket read/write error should be reported */
> +#define SOCK_ERR_REPORT(e) (e != EAGAIN && e != EWOULDBLOCK && e != EINTR)
> +
>   /** Provide a sendmmsg wrapper for systems with no libc or kernel support.
>    *  As it is implemented as a weak symbol, it has zero effect on systems
>    *  with both.
> @@ -270,41 +273,34 @@ int recv_pkt_sock_basic(pkt_sock_t *const pkt_sock,
>   int send_pkt_sock_basic(pkt_sock_t *const pkt_sock,
>   			odp_packet_t pkt_table[], unsigned len)
>   {
> -	odp_packet_t pkt;
>   	uint8_t *frame;
>   	uint32_t frame_len;
> -	unsigned i;
> -	unsigned flags;
>   	int sockfd;
> -	int nb_tx;
>   	int ret;
> +	unsigned i, n;
>   
>   	sockfd = pkt_sock->sockfd;
> -	flags = MSG_DONTWAIT;
>   	i = 0;
>   	while (i < len) {
> -		pkt = pkt_table[i];
> +		frame = odp_packet_l2_ptr(pkt_table[i], &frame_len);
>   
> -		frame = odp_packet_l2_ptr(pkt, &frame_len);
> -
> -		ret = send(sockfd, frame, frame_len, flags);
> +		ret = send(sockfd, frame, frame_len, MSG_DONTWAIT);
>   		if (odp_unlikely(ret == -1)) {
> -			if (odp_likely(errno == EAGAIN)) {
> -				flags = 0;	/* blocking for next rounds */
> -				continue;	/* resend buffer */
> -			} else {
> -				break;
> +			if (i == 0 && SOCK_ERR_REPORT(errno)) {
> +				__odp_errno = errno;
> +				ODP_ERR("send(basic): %s\n", strerror(errno));
> +				return -1;
>   			}
> +			break;
>   		}
>   
>   		i++;
> -	}			/* end while */
> -	nb_tx = i;
> +	}
>   
> -	for (i = 0; i < len; i++)
> -		odp_packet_free(pkt_table[i]);
> +	for (n = 0; n < i; ++n)
> +		odp_packet_free(pkt_table[n]);
>   
> -	return nb_tx;
> +	return i;
>   }
>   
>   /*
> @@ -383,9 +379,7 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
>   	struct iovec iovecs[ODP_PACKET_SOCKET_MAX_BURST_TX];
>   	int ret;
>   	int sockfd;
> -	unsigned i;
> -	unsigned sent_msgs = 0;
> -	unsigned flags;
> +	unsigned i, n;
>   
>   	if (odp_unlikely(len > ODP_PACKET_SOCKET_MAX_BURST_TX))
>   		return -1;
> @@ -401,17 +395,24 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
>   		msgvec[i].msg_hdr.msg_iovlen = 1;
>   	}
>   
> -	flags = MSG_DONTWAIT;
> -	for (i = 0; i < len; i += sent_msgs) {
> -		ret = sendmmsg(sockfd, &msgvec[i], len - i, flags);
> -		sent_msgs = ret > 0 ? (unsigned)ret : 0;
> -		flags = 0;	/* blocking for next rounds */
> +	for (i = 0; i < len; ) {
> +		ret = sendmmsg(sockfd, &msgvec[i], len - i, MSG_DONTWAIT);
> +		if (odp_unlikely(ret == -1)) {
> +			if (i == 0 && SOCK_ERR_REPORT(errno)) {
> +				__odp_errno = errno;
> +				ODP_ERR("sendmmsg(): %s\n", strerror(errno));
> +				return -1;
> +			}
> +			break;
> +		}
> +
> +		i += (unsigned)ret;
>   	}
>   
> -	for (i = 0; i < len; i++)
> -		odp_packet_free(pkt_table[i]);
> +	for (n = 0; n < i; ++n)
> +		odp_packet_free(pkt_table[n]);
>   
> -	return len;
> +	return i;
>   }
>   
>   /*
> @@ -538,49 +539,69 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring,
>   {
>   	union frame_map ppd;
>   	uint32_t pkt_len;
> -	unsigned frame_num, next_frame_num;
> +	unsigned first_frame_num, frame_num, next_frame_num;
>   	int ret;
> -	unsigned i = 0;
> +	unsigned n, i = 0;
> +	unsigned nb_tx = 0;
> +	int send_errno;
>   
> -	frame_num = ring->frame_num;
> +	first_frame_num = ring->frame_num;
> +	frame_num = first_frame_num;
>   
>   	while (i < len) {
> -		if (mmap_tx_kernel_ready(ring->rd[frame_num].iov_base)) {
> -			ppd.raw = ring->rd[frame_num].iov_base;
> +		ppd.raw = ring->rd[frame_num].iov_base;
>   
> -			next_frame_num = (frame_num + 1) % ring->rd_num;
> +		if (!odp_unlikely(mmap_tx_kernel_ready(ppd.raw)))
> +			break;
>   
> -			pkt_len = odp_packet_len(pkt_table[i]);
> -			ppd.v2->tp_h.tp_snaplen = pkt_len;
> -			ppd.v2->tp_h.tp_len = pkt_len;
> +		next_frame_num = (frame_num + 1) % ring->rd_num;
>   
> -			odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
> -						(uint8_t *)ppd.raw +
> -						TPACKET2_HDRLEN -
> -						sizeof(struct sockaddr_ll));
> +		pkt_len = odp_packet_len(pkt_table[i]);
> +		ppd.v2->tp_h.tp_snaplen = pkt_len;
> +		ppd.v2->tp_h.tp_len = pkt_len;
>   
> -			mmap_tx_user_ready(ppd.raw);
> +		odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
> +					(uint8_t *)ppd.raw +
> +					TPACKET2_HDRLEN -
> +					sizeof(struct sockaddr_ll));
>   
> -			odp_packet_free(pkt_table[i]);
> -			frame_num = next_frame_num;
> -			i++;
> -		} else {
> +		mmap_tx_user_ready(ppd.raw);
> +
> +		frame_num = next_frame_num;
> +		i++;
> +	}
> +
> +	ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
> +	send_errno = errno;
> +
> +	/* On success, the return value indicates the number of bytes sent. On
> +	 * failure a value of -1 is returned, even if the failure occurred
> +	 * after some of the packets in the ring have already been sent, so we
> +	 * need to inspect the packet status to determine which were sent. */
> +	for (n = first_frame_num; n < first_frame_num+i; ++n) {
> +		struct tpacket2_hdr *hdr = ring->rd[n].iov_base;
> +		if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) {
> +			nb_tx++;
> +		} else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) {
> +			/* status will be cleared on the next send request */
>   			break;
>   		}
>   	}
>   
> -	ring->frame_num = frame_num;
> +	ring->frame_num += nb_tx;
>   
> -	ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
> -	if (ret == -1) {
> -		if (errno != EAGAIN) {
> -			__odp_errno = errno;
> -			ODP_ERR("sendto(pkt mmap): %s\n", strerror(errno));
> -			return -1;
> -		}
> +	if (odp_unlikely(ret == -1 &&
> +			 nb_tx == 0 &&
> +			 SOCK_ERR_REPORT(send_errno))) {
> +		__odp_errno = send_errno;
> +		ODP_ERR("sendto(pkt mmap): %s\n", strerror(send_errno));
> +		return -1;
>   	}
>   
> -	return i;
> +	for (i = 0; i < nb_tx; ++i)
> +		odp_packet_free(pkt_table[i]);
> +
> +	return nb_tx;
>   }
>   
>   static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl, int fanout)
> @@ -624,21 +645,6 @@ static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl, int fanout)
>   	ring->flen = ring->req.tp_frame_size;
>   }
>   
> -static int mmap_set_packet_loss_discard(int sock)
> -{
> -	int ret, discard = 1;
> -
> -	ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *)&discard,
> -			 sizeof(discard));
> -	if (ret == -1) {
> -		__odp_errno = errno;
> -		ODP_ERR("setsockopt(PACKET_LOSS): %s\n", strerror(errno));
> -		return -1;
> -	}
> -
> -	return 0;
> -}
> -
>   static int mmap_setup_ring(int sock, struct ring *ring, int type,
>   			   odp_pool_t pool_hdl, int fanout)
>   {
> @@ -648,12 +654,6 @@ static int mmap_setup_ring(int sock, struct ring *ring, int type,
>   	ring->type = type;
>   	ring->version = TPACKET_V2;
>   
> -	if (type == PACKET_TX_RING) {
> -		ret = mmap_set_packet_loss_discard(sock);
> -		if (ret != 0)
> -			return -1;
> -	}
> -
>   	mmap_fill_ring(ring, pool_hdl, fanout);
>   
>   	ret = setsockopt(sock, SOL_PACKET, type, &ring->req, sizeof(ring->req));
> @@ -747,6 +747,7 @@ static int mmap_bind_sock(pkt_sock_mmap_t *pkt_sock, const char *netdev)
>   	return 0;
>   }
>   
> +
>   static int mmap_store_hw_addr(pkt_sock_mmap_t *const pkt_sock,
>   			      const char *netdev)
>   {
> diff --git a/test/validation/odp_pktio.c b/test/validation/odp_pktio.c
> index a078efe..55df12e 100644
> --- a/test/validation/odp_pktio.c
> +++ b/test/validation/odp_pktio.c
> @@ -21,6 +21,11 @@
>   #define MAX_NUM_IFACES         2
>   #define TEST_SEQ_INVALID       ((uint32_t)~0)
>   #define TEST_SEQ_MAGIC         0x92749451
> +#define TX_BATCH_LEN           4
> +
> +/** Maximum reported MTU size at which the transmit failure test will
> + *  attempt to send oversized packets. */
> +#define TEST_MTU_OVERSIZE_LIMIT 65536
>   
>   /** interface names used for testing */
>   static const char *iface_name[MAX_NUM_IFACES];
> @@ -34,6 +39,7 @@ typedef struct {
>   	odp_pktio_t id;
>   	odp_queue_t outq;
>   	odp_queue_t inq;
> +	enum { PKTIN_MODE_RECV, PKTIN_MODE_POLL, PKTIN_MODE_SCHED } mode;
>   } pktio_info_t;
>   
>   /** magic number and sequence at start of UDP payload */
> @@ -323,30 +329,40 @@ static odp_event_t queue_deq_wait_time(odp_queue_t queue, uint64_t ns)
>   	return ODP_EVENT_INVALID;
>   }
>   
> -static odp_packet_t wait_for_packet(odp_queue_t queue,
> +static odp_packet_t wait_for_packet(pktio_info_t *pktio_rx,
>   				    uint32_t seq, uint64_t ns)
>   {
>   	uint64_t start, now, diff;
>   	odp_event_t ev;
> -	odp_packet_t pkt = ODP_PACKET_INVALID;
> +	odp_packet_t pkt;
>   
>   	start = odp_time_cycles();
>   
>   	do {
> -		if (queue != ODP_QUEUE_INVALID)
> -			ev = queue_deq_wait_time(queue, ns);
> -		else
> -			ev  = odp_schedule(NULL, ns);
> -
> -		if (ev != ODP_EVENT_INVALID) {
> -			if (odp_event_type(ev) == ODP_EVENT_PACKET) {
> -				pkt = odp_packet_from_event(ev);
> -				if (pktio_pkt_seq(pkt) == seq)
> -					return pkt;
> +		pkt = ODP_PACKET_INVALID;
> +
> +		if (pktio_rx->mode == PKTIN_MODE_RECV) {
> +			odp_pktio_recv(pktio_rx->id, &pkt, 1);
> +		} else {
> +			if (pktio_rx->mode == PKTIN_MODE_POLL)
> +				ev = queue_deq_wait_time(pktio_rx->inq, ns);
> +			else
> +				ev = odp_schedule(NULL, ns);
> +
> +			if (ev != ODP_EVENT_INVALID) {
> +				if (odp_event_type(ev) == ODP_EVENT_PACKET)
> +					pkt = odp_packet_from_event(ev);
> +				else
> +					odp_buffer_free(
> +						odp_buffer_from_event(ev));
>   			}
> +		}
>   
> -			/* not interested in this event */
> -			odp_buffer_free(odp_buffer_from_event(ev));
> +		if (pkt != ODP_PACKET_INVALID) {
> +			if (pktio_pkt_seq(pkt) == seq)
> +				return pkt;
> +
> +			odp_packet_free(pkt);
>   		}
>   
>   		now = odp_time_cycles();
> @@ -406,7 +422,7 @@ static void pktio_txrx_multi(pktio_info_t *pktio_a, pktio_info_t *pktio_b,
>   
>   	/* and wait for them to arrive back */
>   	for (i = 0; i < num_pkts; ++i) {
> -		rx_pkt = wait_for_packet(pktio_b->inq, tx_seq[i], ODP_TIME_SEC);
> +		rx_pkt = wait_for_packet(pktio_b, tx_seq[i], ODP_TIME_SEC);
>   
>   		if (rx_pkt == ODP_PACKET_INVALID)
>   			break;
> @@ -436,10 +452,13 @@ static void pktio_test_txrx(odp_queue_type_t q_type, int num_pkts)
>   		}
>   		create_inq(io->id);
>   		io->outq = odp_pktio_outq_getdef(io->id);
> -		if (q_type == ODP_QUEUE_TYPE_POLL)
> +		if (q_type == ODP_QUEUE_TYPE_POLL) {
> +			io->mode = PKTIN_MODE_POLL;
>   			io->inq = odp_pktio_inq_getdef(io->id);
> -		else
> +		} else {
> +			io->mode = PKTIN_MODE_SCHED;
>   			io->inq = ODP_QUEUE_INVALID;
> +		}
>   	}
>   
>   	/* if we have two interfaces then send through one and receive on
> @@ -461,7 +480,7 @@ static void test_odp_pktio_poll_queue(void)
>   
>   static void test_odp_pktio_poll_multi(void)
>   {
> -	pktio_test_txrx(ODP_QUEUE_TYPE_POLL, 4);
> +	pktio_test_txrx(ODP_QUEUE_TYPE_POLL, TX_BATCH_LEN);
>   }
>   
>   static void test_odp_pktio_sched_queue(void)
> @@ -471,7 +490,7 @@ static void test_odp_pktio_sched_queue(void)
>   
>   static void test_odp_pktio_sched_multi(void)
>   {
> -	pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, 4);
> +	pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, TX_BATCH_LEN);
>   }
>   
>   static void test_odp_pktio_jumbo(void)
> @@ -635,6 +654,111 @@ static void test_odp_pktio_close(void)
>   	CU_ASSERT_EQUAL(res, -1);
>   }
>   
> +
> +static void test_odp_pktio_send_failure(void)
> +{
> +	odp_pktio_t pktio_tx, pktio_rx;
> +	odp_packet_t pkt_tbl[TX_BATCH_LEN];
> +	uint32_t pkt_seq[TX_BATCH_LEN];
> +	int ret, mtu, i = 0;
> +	odp_pool_param_t pool_params;
> +	odp_pool_t pkt_pool;
> +	int long_pkt_idx = TX_BATCH_LEN/2;
> +	pktio_info_t info_rx;
> +
> +	pktio_tx = create_pktio(iface_name[0]);
> +	if (pktio_tx == ODP_PKTIO_INVALID) {
> +		CU_FAIL("failed to open pktio");
> +		return;
> +	}
> +
> +	/* read the MTU from the transmit interface */
> +	mtu = odp_pktio_mtu(pktio_tx);
> +
> +	if (mtu > TEST_MTU_OVERSIZE_LIMIT) {
> +		CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
> +		return;
> +	}
> +
> +	/* configure the pool so that we can generate test packets larger
> +	 * than the interface MTU */
> +	memset(&pool_params, 0, sizeof(pool_params));
> +	pool_params.pkt.len     = mtu + 32;
> +	pool_params.pkt.seg_len = pool_params.pkt.len;
> +	pool_params.pkt.num     = TX_BATCH_LEN+1;
> +	pool_params.type        = ODP_POOL_PACKET;
> +	pkt_pool = odp_pool_create("pkt_pool_oversize",
> +				   ODP_SHM_NULL, &pool_params);
> +	CU_ASSERT_FATAL(pkt_pool != ODP_POOL_INVALID);
> +
> +	if (num_ifaces > 1)
> +		pktio_rx = create_pktio(iface_name[1]);
> +	else
> +		pktio_rx = pktio_tx;
> +
> +	/* generate a batch of packets with a single overly long packet
> +	 * in the middle */
> +	for (i = 0; i < TX_BATCH_LEN; ++i) {
> +		uint32_t pkt_len;
> +		if (i == long_pkt_idx)
> +			pkt_len = pool_params.pkt.len;
> +		else
> +			pkt_len = PKT_LEN_NORMAL;
> +
> +		pkt_tbl[i] = odp_packet_alloc(pkt_pool, pkt_len);
> +		if (pkt_tbl[i] == ODP_PACKET_INVALID)
> +			break;
> +
> +		pkt_seq[i] = pktio_init_packet(pkt_tbl[i]);
> +		if (pkt_seq[i] == TEST_SEQ_INVALID)
> +			break;
> +	}
> +
> +	if (i != TX_BATCH_LEN) {
> +		CU_FAIL("failed to generate test packets\n");
> +		return;
> +	}
> +
> +	/* try to send the batch with the long packet in the middle, the
> +	 * initial short packets should be sent */
> +	odp_errno_zero();
> +	ret = odp_pktio_send(pktio_tx, pkt_tbl, TX_BATCH_LEN);
> +	CU_ASSERT(ret == long_pkt_idx);
> +	CU_ASSERT(odp_errno() == 0);
> +
> +	info_rx.id   = pktio_rx;
> +	info_rx.outq = ODP_QUEUE_INVALID;
> +	info_rx.inq  = ODP_QUEUE_INVALID;
> +	info_rx.mode = PKTIN_MODE_RECV;
> +
> +	for (i = 0; i < ret; ++i) {
> +		pkt_tbl[i] = wait_for_packet(&info_rx,
> +					     pkt_seq[i], ODP_TIME_SEC);
> +		if (pkt_tbl[i] == ODP_PACKET_INVALID)
> +			break;
> +	}
> +	CU_ASSERT(i == ret);
> +
> +	/* now try to send starting with the too-long packet and verify
> +	 * it fails */
> +	odp_errno_zero();
> +	ret = odp_pktio_send(pktio_tx,
> +			     &pkt_tbl[long_pkt_idx],
> +			     TX_BATCH_LEN-long_pkt_idx);
> +	CU_ASSERT(ret == -1);
> +	CU_ASSERT(odp_errno() != 0);
> +
> +	for (i = 0; i < TX_BATCH_LEN; ++i) {
> +		if (pkt_tbl[i] != ODP_PACKET_INVALID)
> +			odp_packet_free(pkt_tbl[i]);
> +	}
> +
> +	if (pktio_rx != pktio_tx)
> +		CU_ASSERT(odp_pktio_close(pktio_rx) == 0);
> +	CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
> +	CU_ASSERT(odp_pool_destroy(pkt_pool) == 0);
> +}
> +
>   static int init_pktio_suite(void)
>   {
>   	iface_name[0] = getenv("ODP_PKTIO_IF0");
> @@ -704,6 +828,7 @@ CU_TestInfo pktio_tests[] = {
>   	{"pktio promisc mode",	test_odp_pktio_promisc},
>   	{"pktio mac",		test_odp_pktio_mac},
>   	{"pktio inq_remdef",	test_odp_pktio_inq_remdef},
> +	{"pktio send failure",	test_odp_pktio_send_failure},
>   	CU_TEST_INFO_NULL
>   };
>
Mike Holmes Aug. 6, 2015, 3:55 p.m. | #5
Ping, this is referenced as the fix for
https://bugs.linaro.org/show_bug.cgi?id=1365

On 30 July 2015 at 11:27, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

> Sturart, test case looks like valuable. Do you plan to update that patch?
> I think it's better to have test case in separate commit.
>
> Maxim.
>
> On 04/02/15 14:07, Stuart Haslam wrote:
>
>> The linux-generic pktio implementations don't correctly handle socket
>> errors which occur while sending packets via odp_pktio_send(). The
>> behaviour is also not consistent across the three implementations.
>>
>> The problems being addressed are;
>>
>>   - calls may block indefinitely in certain error conditions
>>   - packets may be freed even though they weren't sent
>>   - return value doesn't accurately reflect number of packets sent
>>   - inconsistent use of __odp_errno
>>
>> Signed-off-by: Stuart Haslam <stuart.haslam@linaro.org>
>> ---
>>   platform/linux-generic/odp_packet_io.c     |   2 +-
>>   platform/linux-generic/odp_packet_socket.c | 157
>> +++++++++++++--------------
>>   test/validation/odp_pktio.c                | 163
>> +++++++++++++++++++++++++----
>>   3 files changed, 224 insertions(+), 98 deletions(-)
>>
>> diff --git a/platform/linux-generic/odp_packet_io.c
>> b/platform/linux-generic/odp_packet_io.c
>> index b04ce8b..0c7f2dd 100644
>> --- a/platform/linux-generic/odp_packet_io.c
>> +++ b/platform/linux-generic/odp_packet_io.c
>> @@ -26,7 +26,7 @@
>>   #include <errno.h>
>>     /* MTU to be reported for the "loop" interface */
>> -#define PKTIO_LOOP_MTU 1500
>> +#define PKTIO_LOOP_MTU INT_MAX
>>   /* MAC address for the "loop" interface */
>>   static const char pktio_loop_mac[] = {0x02, 0xe9, 0x34, 0x80, 0x73,
>> 0x01};
>>   diff --git a/platform/linux-generic/odp_packet_socket.c
>> b/platform/linux-generic/odp_packet_socket.c
>> index 2802c43..f6d7330 100644
>> --- a/platform/linux-generic/odp_packet_socket.c
>> +++ b/platform/linux-generic/odp_packet_socket.c
>> @@ -43,6 +43,9 @@
>>   #include <odp/helper/eth.h>
>>   #include <odp/helper/ip.h>
>>   +/** determine if a socket read/write error should be reported */
>> +#define SOCK_ERR_REPORT(e) (e != EAGAIN && e != EWOULDBLOCK && e !=
>> EINTR)
>> +
>>   /** Provide a sendmmsg wrapper for systems with no libc or kernel
>> support.
>>    *  As it is implemented as a weak symbol, it has zero effect on systems
>>    *  with both.
>> @@ -270,41 +273,34 @@ int recv_pkt_sock_basic(pkt_sock_t *const pkt_sock,
>>   int send_pkt_sock_basic(pkt_sock_t *const pkt_sock,
>>                         odp_packet_t pkt_table[], unsigned len)
>>   {
>> -       odp_packet_t pkt;
>>         uint8_t *frame;
>>         uint32_t frame_len;
>> -       unsigned i;
>> -       unsigned flags;
>>         int sockfd;
>> -       int nb_tx;
>>         int ret;
>> +       unsigned i, n;
>>         sockfd = pkt_sock->sockfd;
>> -       flags = MSG_DONTWAIT;
>>         i = 0;
>>         while (i < len) {
>> -               pkt = pkt_table[i];
>> +               frame = odp_packet_l2_ptr(pkt_table[i], &frame_len);
>>   -             frame = odp_packet_l2_ptr(pkt, &frame_len);
>> -
>> -               ret = send(sockfd, frame, frame_len, flags);
>> +               ret = send(sockfd, frame, frame_len, MSG_DONTWAIT);
>>                 if (odp_unlikely(ret == -1)) {
>> -                       if (odp_likely(errno == EAGAIN)) {
>> -                               flags = 0;      /* blocking for next
>> rounds */
>> -                               continue;       /* resend buffer */
>> -                       } else {
>> -                               break;
>> +                       if (i == 0 && SOCK_ERR_REPORT(errno)) {
>> +                               __odp_errno = errno;
>> +                               ODP_ERR("send(basic): %s\n",
>> strerror(errno));
>> +                               return -1;
>>                         }
>> +                       break;
>>                 }
>>                 i++;
>> -       }                       /* end while */
>> -       nb_tx = i;
>> +       }
>>   -     for (i = 0; i < len; i++)
>> -               odp_packet_free(pkt_table[i]);
>> +       for (n = 0; n < i; ++n)
>> +               odp_packet_free(pkt_table[n]);
>>   -     return nb_tx;
>> +       return i;
>>   }
>>     /*
>> @@ -383,9 +379,7 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
>>         struct iovec iovecs[ODP_PACKET_SOCKET_MAX_BURST_TX];
>>         int ret;
>>         int sockfd;
>> -       unsigned i;
>> -       unsigned sent_msgs = 0;
>> -       unsigned flags;
>> +       unsigned i, n;
>>         if (odp_unlikely(len > ODP_PACKET_SOCKET_MAX_BURST_TX))
>>                 return -1;
>> @@ -401,17 +395,24 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
>>                 msgvec[i].msg_hdr.msg_iovlen = 1;
>>         }
>>   -     flags = MSG_DONTWAIT;
>> -       for (i = 0; i < len; i += sent_msgs) {
>> -               ret = sendmmsg(sockfd, &msgvec[i], len - i, flags);
>> -               sent_msgs = ret > 0 ? (unsigned)ret : 0;
>> -               flags = 0;      /* blocking for next rounds */
>> +       for (i = 0; i < len; ) {
>> +               ret = sendmmsg(sockfd, &msgvec[i], len - i, MSG_DONTWAIT);
>> +               if (odp_unlikely(ret == -1)) {
>> +                       if (i == 0 && SOCK_ERR_REPORT(errno)) {
>> +                               __odp_errno = errno;
>> +                               ODP_ERR("sendmmsg(): %s\n",
>> strerror(errno));
>> +                               return -1;
>> +                       }
>> +                       break;
>> +               }
>> +
>> +               i += (unsigned)ret;
>>         }
>>   -     for (i = 0; i < len; i++)
>> -               odp_packet_free(pkt_table[i]);
>> +       for (n = 0; n < i; ++n)
>> +               odp_packet_free(pkt_table[n]);
>>   -     return len;
>> +       return i;
>>   }
>>     /*
>> @@ -538,49 +539,69 @@ static inline unsigned pkt_mmap_v2_tx(int sock,
>> struct ring *ring,
>>   {
>>         union frame_map ppd;
>>         uint32_t pkt_len;
>> -       unsigned frame_num, next_frame_num;
>> +       unsigned first_frame_num, frame_num, next_frame_num;
>>         int ret;
>> -       unsigned i = 0;
>> +       unsigned n, i = 0;
>> +       unsigned nb_tx = 0;
>> +       int send_errno;
>>   -     frame_num = ring->frame_num;
>> +       first_frame_num = ring->frame_num;
>> +       frame_num = first_frame_num;
>>         while (i < len) {
>> -               if (mmap_tx_kernel_ready(ring->rd[frame_num].iov_base)) {
>> -                       ppd.raw = ring->rd[frame_num].iov_base;
>> +               ppd.raw = ring->rd[frame_num].iov_base;
>>   -                     next_frame_num = (frame_num + 1) % ring->rd_num;
>> +               if (!odp_unlikely(mmap_tx_kernel_ready(ppd.raw)))
>> +                       break;
>>   -                     pkt_len = odp_packet_len(pkt_table[i]);
>> -                       ppd.v2->tp_h.tp_snaplen = pkt_len;
>> -                       ppd.v2->tp_h.tp_len = pkt_len;
>> +               next_frame_num = (frame_num + 1) % ring->rd_num;
>>   -                     odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
>> -                                               (uint8_t *)ppd.raw +
>> -                                               TPACKET2_HDRLEN -
>> -                                               sizeof(struct
>> sockaddr_ll));
>> +               pkt_len = odp_packet_len(pkt_table[i]);
>> +               ppd.v2->tp_h.tp_snaplen = pkt_len;
>> +               ppd.v2->tp_h.tp_len = pkt_len;
>>   -                     mmap_tx_user_ready(ppd.raw);
>> +               odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
>> +                                       (uint8_t *)ppd.raw +
>> +                                       TPACKET2_HDRLEN -
>> +                                       sizeof(struct sockaddr_ll));
>>   -                     odp_packet_free(pkt_table[i]);
>> -                       frame_num = next_frame_num;
>> -                       i++;
>> -               } else {
>> +               mmap_tx_user_ready(ppd.raw);
>> +
>> +               frame_num = next_frame_num;
>> +               i++;
>> +       }
>> +
>> +       ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
>> +       send_errno = errno;
>> +
>> +       /* On success, the return value indicates the number of bytes
>> sent. On
>> +        * failure a value of -1 is returned, even if the failure occurred
>> +        * after some of the packets in the ring have already been sent,
>> so we
>> +        * need to inspect the packet status to determine which were
>> sent. */
>> +       for (n = first_frame_num; n < first_frame_num+i; ++n) {
>> +               struct tpacket2_hdr *hdr = ring->rd[n].iov_base;
>> +               if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) {
>> +                       nb_tx++;
>> +               } else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) {
>> +                       /* status will be cleared on the next send
>> request */
>>                         break;
>>                 }
>>         }
>>   -     ring->frame_num = frame_num;
>> +       ring->frame_num += nb_tx;
>>   -     ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
>> -       if (ret == -1) {
>> -               if (errno != EAGAIN) {
>> -                       __odp_errno = errno;
>> -                       ODP_ERR("sendto(pkt mmap): %s\n",
>> strerror(errno));
>> -                       return -1;
>> -               }
>> +       if (odp_unlikely(ret == -1 &&
>> +                        nb_tx == 0 &&
>> +                        SOCK_ERR_REPORT(send_errno))) {
>> +               __odp_errno = send_errno;
>> +               ODP_ERR("sendto(pkt mmap): %s\n", strerror(send_errno));
>> +               return -1;
>>         }
>>   -     return i;
>> +       for (i = 0; i < nb_tx; ++i)
>> +               odp_packet_free(pkt_table[i]);
>> +
>> +       return nb_tx;
>>   }
>>     static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl,
>> int fanout)
>> @@ -624,21 +645,6 @@ static void mmap_fill_ring(struct ring *ring,
>> odp_pool_t pool_hdl, int fanout)
>>         ring->flen = ring->req.tp_frame_size;
>>   }
>>   -static int mmap_set_packet_loss_discard(int sock)
>> -{
>> -       int ret, discard = 1;
>> -
>> -       ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *)&discard,
>> -                        sizeof(discard));
>> -       if (ret == -1) {
>> -               __odp_errno = errno;
>> -               ODP_ERR("setsockopt(PACKET_LOSS): %s\n", strerror(errno));
>> -               return -1;
>> -       }
>> -
>> -       return 0;
>> -}
>> -
>>   static int mmap_setup_ring(int sock, struct ring *ring, int type,
>>                            odp_pool_t pool_hdl, int fanout)
>>   {
>> @@ -648,12 +654,6 @@ static int mmap_setup_ring(int sock, struct ring
>> *ring, int type,
>>         ring->type = type;
>>         ring->version = TPACKET_V2;
>>   -     if (type == PACKET_TX_RING) {
>> -               ret = mmap_set_packet_loss_discard(sock);
>> -               if (ret != 0)
>> -                       return -1;
>> -       }
>> -
>>         mmap_fill_ring(ring, pool_hdl, fanout);
>>         ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
>> sizeof(ring->req));
>> @@ -747,6 +747,7 @@ static int mmap_bind_sock(pkt_sock_mmap_t *pkt_sock,
>> const char *netdev)
>>         return 0;
>>   }
>>   +
>>   static int mmap_store_hw_addr(pkt_sock_mmap_t *const pkt_sock,
>>                               const char *netdev)
>>   {
>> diff --git a/test/validation/odp_pktio.c b/test/validation/odp_pktio.c
>> index a078efe..55df12e 100644
>> --- a/test/validation/odp_pktio.c
>> +++ b/test/validation/odp_pktio.c
>> @@ -21,6 +21,11 @@
>>   #define MAX_NUM_IFACES         2
>>   #define TEST_SEQ_INVALID       ((uint32_t)~0)
>>   #define TEST_SEQ_MAGIC         0x92749451
>> +#define TX_BATCH_LEN           4
>> +
>> +/** Maximum reported MTU size at which the transmit failure test will
>> + *  attempt to send oversized packets. */
>> +#define TEST_MTU_OVERSIZE_LIMIT 65536
>>     /** interface names used for testing */
>>   static const char *iface_name[MAX_NUM_IFACES];
>> @@ -34,6 +39,7 @@ typedef struct {
>>         odp_pktio_t id;
>>         odp_queue_t outq;
>>         odp_queue_t inq;
>> +       enum { PKTIN_MODE_RECV, PKTIN_MODE_POLL, PKTIN_MODE_SCHED } mode;
>>   } pktio_info_t;
>>     /** magic number and sequence at start of UDP payload */
>> @@ -323,30 +329,40 @@ static odp_event_t queue_deq_wait_time(odp_queue_t
>> queue, uint64_t ns)
>>         return ODP_EVENT_INVALID;
>>   }
>>   -static odp_packet_t wait_for_packet(odp_queue_t queue,
>> +static odp_packet_t wait_for_packet(pktio_info_t *pktio_rx,
>>                                     uint32_t seq, uint64_t ns)
>>   {
>>         uint64_t start, now, diff;
>>         odp_event_t ev;
>> -       odp_packet_t pkt = ODP_PACKET_INVALID;
>> +       odp_packet_t pkt;
>>         start = odp_time_cycles();
>>         do {
>> -               if (queue != ODP_QUEUE_INVALID)
>> -                       ev = queue_deq_wait_time(queue, ns);
>> -               else
>> -                       ev  = odp_schedule(NULL, ns);
>> -
>> -               if (ev != ODP_EVENT_INVALID) {
>> -                       if (odp_event_type(ev) == ODP_EVENT_PACKET) {
>> -                               pkt = odp_packet_from_event(ev);
>> -                               if (pktio_pkt_seq(pkt) == seq)
>> -                                       return pkt;
>> +               pkt = ODP_PACKET_INVALID;
>> +
>> +               if (pktio_rx->mode == PKTIN_MODE_RECV) {
>> +                       odp_pktio_recv(pktio_rx->id, &pkt, 1);
>> +               } else {
>> +                       if (pktio_rx->mode == PKTIN_MODE_POLL)
>> +                               ev = queue_deq_wait_time(pktio_rx->inq,
>> ns);
>> +                       else
>> +                               ev = odp_schedule(NULL, ns);
>> +
>> +                       if (ev != ODP_EVENT_INVALID) {
>> +                               if (odp_event_type(ev) ==
>> ODP_EVENT_PACKET)
>> +                                       pkt = odp_packet_from_event(ev);
>> +                               else
>> +                                       odp_buffer_free(
>> +
>>  odp_buffer_from_event(ev));
>>                         }
>> +               }
>>   -                     /* not interested in this event */
>> -                       odp_buffer_free(odp_buffer_from_event(ev));
>> +               if (pkt != ODP_PACKET_INVALID) {
>> +                       if (pktio_pkt_seq(pkt) == seq)
>> +                               return pkt;
>> +
>> +                       odp_packet_free(pkt);
>>                 }
>>                 now = odp_time_cycles();
>> @@ -406,7 +422,7 @@ static void pktio_txrx_multi(pktio_info_t *pktio_a,
>> pktio_info_t *pktio_b,
>>         /* and wait for them to arrive back */
>>         for (i = 0; i < num_pkts; ++i) {
>> -               rx_pkt = wait_for_packet(pktio_b->inq, tx_seq[i],
>> ODP_TIME_SEC);
>> +               rx_pkt = wait_for_packet(pktio_b, tx_seq[i],
>> ODP_TIME_SEC);
>>                 if (rx_pkt == ODP_PACKET_INVALID)
>>                         break;
>> @@ -436,10 +452,13 @@ static void pktio_test_txrx(odp_queue_type_t
>> q_type, int num_pkts)
>>                 }
>>                 create_inq(io->id);
>>                 io->outq = odp_pktio_outq_getdef(io->id);
>> -               if (q_type == ODP_QUEUE_TYPE_POLL)
>> +               if (q_type == ODP_QUEUE_TYPE_POLL) {
>> +                       io->mode = PKTIN_MODE_POLL;
>>                         io->inq = odp_pktio_inq_getdef(io->id);
>> -               else
>> +               } else {
>> +                       io->mode = PKTIN_MODE_SCHED;
>>                         io->inq = ODP_QUEUE_INVALID;
>> +               }
>>         }
>>         /* if we have two interfaces then send through one and receive on
>> @@ -461,7 +480,7 @@ static void test_odp_pktio_poll_queue(void)
>>     static void test_odp_pktio_poll_multi(void)
>>   {
>> -       pktio_test_txrx(ODP_QUEUE_TYPE_POLL, 4);
>> +       pktio_test_txrx(ODP_QUEUE_TYPE_POLL, TX_BATCH_LEN);
>>   }
>>     static void test_odp_pktio_sched_queue(void)
>> @@ -471,7 +490,7 @@ static void test_odp_pktio_sched_queue(void)
>>     static void test_odp_pktio_sched_multi(void)
>>   {
>> -       pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, 4);
>> +       pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, TX_BATCH_LEN);
>>   }
>>     static void test_odp_pktio_jumbo(void)
>> @@ -635,6 +654,111 @@ static void test_odp_pktio_close(void)
>>         CU_ASSERT_EQUAL(res, -1);
>>   }
>>   +
>> +static void test_odp_pktio_send_failure(void)
>> +{
>> +       odp_pktio_t pktio_tx, pktio_rx;
>> +       odp_packet_t pkt_tbl[TX_BATCH_LEN];
>> +       uint32_t pkt_seq[TX_BATCH_LEN];
>> +       int ret, mtu, i = 0;
>> +       odp_pool_param_t pool_params;
>> +       odp_pool_t pkt_pool;
>> +       int long_pkt_idx = TX_BATCH_LEN/2;
>> +       pktio_info_t info_rx;
>> +
>> +       pktio_tx = create_pktio(iface_name[0]);
>> +       if (pktio_tx == ODP_PKTIO_INVALID) {
>> +               CU_FAIL("failed to open pktio");
>> +               return;
>> +       }
>> +
>> +       /* read the MTU from the transmit interface */
>> +       mtu = odp_pktio_mtu(pktio_tx);
>> +
>> +       if (mtu > TEST_MTU_OVERSIZE_LIMIT) {
>> +               CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
>> +               return;
>> +       }
>> +
>> +       /* configure the pool so that we can generate test packets larger
>> +        * than the interface MTU */
>> +       memset(&pool_params, 0, sizeof(pool_params));
>> +       pool_params.pkt.len     = mtu + 32;
>> +       pool_params.pkt.seg_len = pool_params.pkt.len;
>> +       pool_params.pkt.num     = TX_BATCH_LEN+1;
>> +       pool_params.type        = ODP_POOL_PACKET;
>> +       pkt_pool = odp_pool_create("pkt_pool_oversize",
>> +                                  ODP_SHM_NULL, &pool_params);
>> +       CU_ASSERT_FATAL(pkt_pool != ODP_POOL_INVALID);
>> +
>> +       if (num_ifaces > 1)
>> +               pktio_rx = create_pktio(iface_name[1]);
>> +       else
>> +               pktio_rx = pktio_tx;
>> +
>> +       /* generate a batch of packets with a single overly long packet
>> +        * in the middle */
>> +       for (i = 0; i < TX_BATCH_LEN; ++i) {
>> +               uint32_t pkt_len;
>> +               if (i == long_pkt_idx)
>> +                       pkt_len = pool_params.pkt.len;
>> +               else
>> +                       pkt_len = PKT_LEN_NORMAL;
>> +
>> +               pkt_tbl[i] = odp_packet_alloc(pkt_pool, pkt_len);
>> +               if (pkt_tbl[i] == ODP_PACKET_INVALID)
>> +                       break;
>> +
>> +               pkt_seq[i] = pktio_init_packet(pkt_tbl[i]);
>> +               if (pkt_seq[i] == TEST_SEQ_INVALID)
>> +                       break;
>> +       }
>> +
>> +       if (i != TX_BATCH_LEN) {
>> +               CU_FAIL("failed to generate test packets\n");
>> +               return;
>> +       }
>> +
>> +       /* try to send the batch with the long packet in the middle, the
>> +        * initial short packets should be sent */
>> +       odp_errno_zero();
>> +       ret = odp_pktio_send(pktio_tx, pkt_tbl, TX_BATCH_LEN);
>> +       CU_ASSERT(ret == long_pkt_idx);
>> +       CU_ASSERT(odp_errno() == 0);
>> +
>> +       info_rx.id   = pktio_rx;
>> +       info_rx.outq = ODP_QUEUE_INVALID;
>> +       info_rx.inq  = ODP_QUEUE_INVALID;
>> +       info_rx.mode = PKTIN_MODE_RECV;
>> +
>> +       for (i = 0; i < ret; ++i) {
>> +               pkt_tbl[i] = wait_for_packet(&info_rx,
>> +                                            pkt_seq[i], ODP_TIME_SEC);
>> +               if (pkt_tbl[i] == ODP_PACKET_INVALID)
>> +                       break;
>> +       }
>> +       CU_ASSERT(i == ret);
>> +
>> +       /* now try to send starting with the too-long packet and verify
>> +        * it fails */
>> +       odp_errno_zero();
>> +       ret = odp_pktio_send(pktio_tx,
>> +                            &pkt_tbl[long_pkt_idx],
>> +                            TX_BATCH_LEN-long_pkt_idx);
>> +       CU_ASSERT(ret == -1);
>> +       CU_ASSERT(odp_errno() != 0);
>> +
>> +       for (i = 0; i < TX_BATCH_LEN; ++i) {
>> +               if (pkt_tbl[i] != ODP_PACKET_INVALID)
>> +                       odp_packet_free(pkt_tbl[i]);
>> +       }
>> +
>> +       if (pktio_rx != pktio_tx)
>> +               CU_ASSERT(odp_pktio_close(pktio_rx) == 0);
>> +       CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
>> +       CU_ASSERT(odp_pool_destroy(pkt_pool) == 0);
>> +}
>> +
>>   static int init_pktio_suite(void)
>>   {
>>         iface_name[0] = getenv("ODP_PKTIO_IF0");
>> @@ -704,6 +828,7 @@ CU_TestInfo pktio_tests[] = {
>>         {"pktio promisc mode",  test_odp_pktio_promisc},
>>         {"pktio mac",           test_odp_pktio_mac},
>>         {"pktio inq_remdef",    test_odp_pktio_inq_remdef},
>> +       {"pktio send failure",  test_odp_pktio_send_failure},
>>         CU_TEST_INFO_NULL
>>   };
>>
>>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> https://lists.linaro.org/mailman/listinfo/lng-odp
>
Stuart Haslam Aug. 6, 2015, 3:59 p.m. | #6
On Thu, Aug 06, 2015 at 11:55:34AM -0400, Mike Holmes wrote:
> Ping, this is referenced as the fix for
> https://bugs.linaro.org/show_bug.cgi?id=1365
> 

It needs a non-trivial rebase since a lot of the files it touches have
been moved around. I'll take a look at it tomorrow.

--
Stuart.

Patch

diff --git a/platform/linux-generic/odp_packet_io.c b/platform/linux-generic/odp_packet_io.c
index b04ce8b..0c7f2dd 100644
--- a/platform/linux-generic/odp_packet_io.c
+++ b/platform/linux-generic/odp_packet_io.c
@@ -26,7 +26,7 @@ 
 #include <errno.h>
 
 /* MTU to be reported for the "loop" interface */
-#define PKTIO_LOOP_MTU 1500
+#define PKTIO_LOOP_MTU INT_MAX
 /* MAC address for the "loop" interface */
 static const char pktio_loop_mac[] = {0x02, 0xe9, 0x34, 0x80, 0x73, 0x01};
 
diff --git a/platform/linux-generic/odp_packet_socket.c b/platform/linux-generic/odp_packet_socket.c
index 2802c43..f6d7330 100644
--- a/platform/linux-generic/odp_packet_socket.c
+++ b/platform/linux-generic/odp_packet_socket.c
@@ -43,6 +43,9 @@ 
 #include <odp/helper/eth.h>
 #include <odp/helper/ip.h>
 
+/** determine if a socket read/write error should be reported */
+#define SOCK_ERR_REPORT(e) (e != EAGAIN && e != EWOULDBLOCK && e != EINTR)
+
 /** Provide a sendmmsg wrapper for systems with no libc or kernel support.
  *  As it is implemented as a weak symbol, it has zero effect on systems
  *  with both.
@@ -270,41 +273,34 @@  int recv_pkt_sock_basic(pkt_sock_t *const pkt_sock,
 int send_pkt_sock_basic(pkt_sock_t *const pkt_sock,
 			odp_packet_t pkt_table[], unsigned len)
 {
-	odp_packet_t pkt;
 	uint8_t *frame;
 	uint32_t frame_len;
-	unsigned i;
-	unsigned flags;
 	int sockfd;
-	int nb_tx;
 	int ret;
+	unsigned i, n;
 
 	sockfd = pkt_sock->sockfd;
-	flags = MSG_DONTWAIT;
 	i = 0;
 	while (i < len) {
-		pkt = pkt_table[i];
+		frame = odp_packet_l2_ptr(pkt_table[i], &frame_len);
 
-		frame = odp_packet_l2_ptr(pkt, &frame_len);
-
-		ret = send(sockfd, frame, frame_len, flags);
+		ret = send(sockfd, frame, frame_len, MSG_DONTWAIT);
 		if (odp_unlikely(ret == -1)) {
-			if (odp_likely(errno == EAGAIN)) {
-				flags = 0;	/* blocking for next rounds */
-				continue;	/* resend buffer */
-			} else {
-				break;
+			if (i == 0 && SOCK_ERR_REPORT(errno)) {
+				__odp_errno = errno;
+				ODP_ERR("send(basic): %s\n", strerror(errno));
+				return -1;
 			}
+			break;
 		}
 
 		i++;
-	}			/* end while */
-	nb_tx = i;
+	}
 
-	for (i = 0; i < len; i++)
-		odp_packet_free(pkt_table[i]);
+	for (n = 0; n < i; ++n)
+		odp_packet_free(pkt_table[n]);
 
-	return nb_tx;
+	return i;
 }
 
 /*
@@ -383,9 +379,7 @@  int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
 	struct iovec iovecs[ODP_PACKET_SOCKET_MAX_BURST_TX];
 	int ret;
 	int sockfd;
-	unsigned i;
-	unsigned sent_msgs = 0;
-	unsigned flags;
+	unsigned i, n;
 
 	if (odp_unlikely(len > ODP_PACKET_SOCKET_MAX_BURST_TX))
 		return -1;
@@ -401,17 +395,24 @@  int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
 		msgvec[i].msg_hdr.msg_iovlen = 1;
 	}
 
-	flags = MSG_DONTWAIT;
-	for (i = 0; i < len; i += sent_msgs) {
-		ret = sendmmsg(sockfd, &msgvec[i], len - i, flags);
-		sent_msgs = ret > 0 ? (unsigned)ret : 0;
-		flags = 0;	/* blocking for next rounds */
+	for (i = 0; i < len; ) {
+		ret = sendmmsg(sockfd, &msgvec[i], len - i, MSG_DONTWAIT);
+		if (odp_unlikely(ret == -1)) {
+			if (i == 0 && SOCK_ERR_REPORT(errno)) {
+				__odp_errno = errno;
+				ODP_ERR("sendmmsg(): %s\n", strerror(errno));
+				return -1;
+			}
+			break;
+		}
+
+		i += (unsigned)ret;
 	}
 
-	for (i = 0; i < len; i++)
-		odp_packet_free(pkt_table[i]);
+	for (n = 0; n < i; ++n)
+		odp_packet_free(pkt_table[n]);
 
-	return len;
+	return i;
 }
 
 /*
@@ -538,49 +539,69 @@  static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring,
 {
 	union frame_map ppd;
 	uint32_t pkt_len;
-	unsigned frame_num, next_frame_num;
+	unsigned first_frame_num, frame_num, next_frame_num;
 	int ret;
-	unsigned i = 0;
+	unsigned n, i = 0;
+	unsigned nb_tx = 0;
+	int send_errno;
 
-	frame_num = ring->frame_num;
+	first_frame_num = ring->frame_num;
+	frame_num = first_frame_num;
 
 	while (i < len) {
-		if (mmap_tx_kernel_ready(ring->rd[frame_num].iov_base)) {
-			ppd.raw = ring->rd[frame_num].iov_base;
+		ppd.raw = ring->rd[frame_num].iov_base;
 
-			next_frame_num = (frame_num + 1) % ring->rd_num;
+		if (!odp_unlikely(mmap_tx_kernel_ready(ppd.raw)))
+			break;
 
-			pkt_len = odp_packet_len(pkt_table[i]);
-			ppd.v2->tp_h.tp_snaplen = pkt_len;
-			ppd.v2->tp_h.tp_len = pkt_len;
+		next_frame_num = (frame_num + 1) % ring->rd_num;
 
-			odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
-						(uint8_t *)ppd.raw +
-						TPACKET2_HDRLEN -
-						sizeof(struct sockaddr_ll));
+		pkt_len = odp_packet_len(pkt_table[i]);
+		ppd.v2->tp_h.tp_snaplen = pkt_len;
+		ppd.v2->tp_h.tp_len = pkt_len;
 
-			mmap_tx_user_ready(ppd.raw);
+		odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
+					(uint8_t *)ppd.raw +
+					TPACKET2_HDRLEN -
+					sizeof(struct sockaddr_ll));
 
-			odp_packet_free(pkt_table[i]);
-			frame_num = next_frame_num;
-			i++;
-		} else {
+		mmap_tx_user_ready(ppd.raw);
+
+		frame_num = next_frame_num;
+		i++;
+	}
+
+	ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
+	send_errno = errno;
+
+	/* On success, the return value indicates the number of bytes sent. On
+	 * failure a value of -1 is returned, even if the failure occurred
+	 * after some of the packets in the ring have already been sent, so we
+	 * need to inspect the packet status to determine which were sent. */
+	for (n = first_frame_num; n < first_frame_num+i; ++n) {
+		struct tpacket2_hdr *hdr = ring->rd[n].iov_base;
+		if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) {
+			nb_tx++;
+		} else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) {
+			/* status will be cleared on the next send request */
 			break;
 		}
 	}
 
-	ring->frame_num = frame_num;
+	ring->frame_num += nb_tx;
 
-	ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
-	if (ret == -1) {
-		if (errno != EAGAIN) {
-			__odp_errno = errno;
-			ODP_ERR("sendto(pkt mmap): %s\n", strerror(errno));
-			return -1;
-		}
+	if (odp_unlikely(ret == -1 &&
+			 nb_tx == 0 &&
+			 SOCK_ERR_REPORT(send_errno))) {
+		__odp_errno = send_errno;
+		ODP_ERR("sendto(pkt mmap): %s\n", strerror(send_errno));
+		return -1;
 	}
 
-	return i;
+	for (i = 0; i < nb_tx; ++i)
+		odp_packet_free(pkt_table[i]);
+
+	return nb_tx;
 }
 
 static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl, int fanout)
@@ -624,21 +645,6 @@  static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl, int fanout)
 	ring->flen = ring->req.tp_frame_size;
 }
 
-static int mmap_set_packet_loss_discard(int sock)
-{
-	int ret, discard = 1;
-
-	ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *)&discard,
-			 sizeof(discard));
-	if (ret == -1) {
-		__odp_errno = errno;
-		ODP_ERR("setsockopt(PACKET_LOSS): %s\n", strerror(errno));
-		return -1;
-	}
-
-	return 0;
-}
-
 static int mmap_setup_ring(int sock, struct ring *ring, int type,
 			   odp_pool_t pool_hdl, int fanout)
 {
@@ -648,12 +654,6 @@  static int mmap_setup_ring(int sock, struct ring *ring, int type,
 	ring->type = type;
 	ring->version = TPACKET_V2;
 
-	if (type == PACKET_TX_RING) {
-		ret = mmap_set_packet_loss_discard(sock);
-		if (ret != 0)
-			return -1;
-	}
-
 	mmap_fill_ring(ring, pool_hdl, fanout);
 
 	ret = setsockopt(sock, SOL_PACKET, type, &ring->req, sizeof(ring->req));
@@ -747,6 +747,7 @@  static int mmap_bind_sock(pkt_sock_mmap_t *pkt_sock, const char *netdev)
 	return 0;
 }
 
+
 static int mmap_store_hw_addr(pkt_sock_mmap_t *const pkt_sock,
 			      const char *netdev)
 {
diff --git a/test/validation/odp_pktio.c b/test/validation/odp_pktio.c
index a078efe..55df12e 100644
--- a/test/validation/odp_pktio.c
+++ b/test/validation/odp_pktio.c
@@ -21,6 +21,11 @@ 
 #define MAX_NUM_IFACES         2
 #define TEST_SEQ_INVALID       ((uint32_t)~0)
 #define TEST_SEQ_MAGIC         0x92749451
+#define TX_BATCH_LEN           4
+
+/** Maximum reported MTU size at which the transmit failure test will
+ *  attempt to send oversized packets. */
+#define TEST_MTU_OVERSIZE_LIMIT 65536
 
 /** interface names used for testing */
 static const char *iface_name[MAX_NUM_IFACES];
@@ -34,6 +39,7 @@  typedef struct {
 	odp_pktio_t id;
 	odp_queue_t outq;
 	odp_queue_t inq;
+	enum { PKTIN_MODE_RECV, PKTIN_MODE_POLL, PKTIN_MODE_SCHED } mode;
 } pktio_info_t;
 
 /** magic number and sequence at start of UDP payload */
@@ -323,30 +329,40 @@  static odp_event_t queue_deq_wait_time(odp_queue_t queue, uint64_t ns)
 	return ODP_EVENT_INVALID;
 }
 
-static odp_packet_t wait_for_packet(odp_queue_t queue,
+static odp_packet_t wait_for_packet(pktio_info_t *pktio_rx,
 				    uint32_t seq, uint64_t ns)
 {
 	uint64_t start, now, diff;
 	odp_event_t ev;
-	odp_packet_t pkt = ODP_PACKET_INVALID;
+	odp_packet_t pkt;
 
 	start = odp_time_cycles();
 
 	do {
-		if (queue != ODP_QUEUE_INVALID)
-			ev = queue_deq_wait_time(queue, ns);
-		else
-			ev  = odp_schedule(NULL, ns);
-
-		if (ev != ODP_EVENT_INVALID) {
-			if (odp_event_type(ev) == ODP_EVENT_PACKET) {
-				pkt = odp_packet_from_event(ev);
-				if (pktio_pkt_seq(pkt) == seq)
-					return pkt;
+		pkt = ODP_PACKET_INVALID;
+
+		if (pktio_rx->mode == PKTIN_MODE_RECV) {
+			odp_pktio_recv(pktio_rx->id, &pkt, 1);
+		} else {
+			if (pktio_rx->mode == PKTIN_MODE_POLL)
+				ev = queue_deq_wait_time(pktio_rx->inq, ns);
+			else
+				ev = odp_schedule(NULL, ns);
+
+			if (ev != ODP_EVENT_INVALID) {
+				if (odp_event_type(ev) == ODP_EVENT_PACKET)
+					pkt = odp_packet_from_event(ev);
+				else
+					odp_buffer_free(
+						odp_buffer_from_event(ev));
 			}
+		}
 
-			/* not interested in this event */
-			odp_buffer_free(odp_buffer_from_event(ev));
+		if (pkt != ODP_PACKET_INVALID) {
+			if (pktio_pkt_seq(pkt) == seq)
+				return pkt;
+
+			odp_packet_free(pkt);
 		}
 
 		now = odp_time_cycles();
@@ -406,7 +422,7 @@  static void pktio_txrx_multi(pktio_info_t *pktio_a, pktio_info_t *pktio_b,
 
 	/* and wait for them to arrive back */
 	for (i = 0; i < num_pkts; ++i) {
-		rx_pkt = wait_for_packet(pktio_b->inq, tx_seq[i], ODP_TIME_SEC);
+		rx_pkt = wait_for_packet(pktio_b, tx_seq[i], ODP_TIME_SEC);
 
 		if (rx_pkt == ODP_PACKET_INVALID)
 			break;
@@ -436,10 +452,13 @@  static void pktio_test_txrx(odp_queue_type_t q_type, int num_pkts)
 		}
 		create_inq(io->id);
 		io->outq = odp_pktio_outq_getdef(io->id);
-		if (q_type == ODP_QUEUE_TYPE_POLL)
+		if (q_type == ODP_QUEUE_TYPE_POLL) {
+			io->mode = PKTIN_MODE_POLL;
 			io->inq = odp_pktio_inq_getdef(io->id);
-		else
+		} else {
+			io->mode = PKTIN_MODE_SCHED;
 			io->inq = ODP_QUEUE_INVALID;
+		}
 	}
 
 	/* if we have two interfaces then send through one and receive on
@@ -461,7 +480,7 @@  static void test_odp_pktio_poll_queue(void)
 
 static void test_odp_pktio_poll_multi(void)
 {
-	pktio_test_txrx(ODP_QUEUE_TYPE_POLL, 4);
+	pktio_test_txrx(ODP_QUEUE_TYPE_POLL, TX_BATCH_LEN);
 }
 
 static void test_odp_pktio_sched_queue(void)
@@ -471,7 +490,7 @@  static void test_odp_pktio_sched_queue(void)
 
 static void test_odp_pktio_sched_multi(void)
 {
-	pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, 4);
+	pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, TX_BATCH_LEN);
 }
 
 static void test_odp_pktio_jumbo(void)
@@ -635,6 +654,111 @@  static void test_odp_pktio_close(void)
 	CU_ASSERT_EQUAL(res, -1);
 }
 
+
+static void test_odp_pktio_send_failure(void)
+{
+	odp_pktio_t pktio_tx, pktio_rx;
+	odp_packet_t pkt_tbl[TX_BATCH_LEN];
+	uint32_t pkt_seq[TX_BATCH_LEN];
+	int ret, mtu, i = 0;
+	odp_pool_param_t pool_params;
+	odp_pool_t pkt_pool;
+	int long_pkt_idx = TX_BATCH_LEN/2;
+	pktio_info_t info_rx;
+
+	pktio_tx = create_pktio(iface_name[0]);
+	if (pktio_tx == ODP_PKTIO_INVALID) {
+		CU_FAIL("failed to open pktio");
+		return;
+	}
+
+	/* read the MTU from the transmit interface */
+	mtu = odp_pktio_mtu(pktio_tx);
+
+	if (mtu > TEST_MTU_OVERSIZE_LIMIT) {
+		CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
+		return;
+	}
+
+	/* configure the pool so that we can generate test packets larger
+	 * than the interface MTU */
+	memset(&pool_params, 0, sizeof(pool_params));
+	pool_params.pkt.len     = mtu + 32;
+	pool_params.pkt.seg_len = pool_params.pkt.len;
+	pool_params.pkt.num     = TX_BATCH_LEN+1;
+	pool_params.type        = ODP_POOL_PACKET;
+	pkt_pool = odp_pool_create("pkt_pool_oversize",
+				   ODP_SHM_NULL, &pool_params);
+	CU_ASSERT_FATAL(pkt_pool != ODP_POOL_INVALID);
+
+	if (num_ifaces > 1)
+		pktio_rx = create_pktio(iface_name[1]);
+	else
+		pktio_rx = pktio_tx;
+
+	/* generate a batch of packets with a single overly long packet
+	 * in the middle */
+	for (i = 0; i < TX_BATCH_LEN; ++i) {
+		uint32_t pkt_len;
+		if (i == long_pkt_idx)
+			pkt_len = pool_params.pkt.len;
+		else
+			pkt_len = PKT_LEN_NORMAL;
+
+		pkt_tbl[i] = odp_packet_alloc(pkt_pool, pkt_len);
+		if (pkt_tbl[i] == ODP_PACKET_INVALID)
+			break;
+
+		pkt_seq[i] = pktio_init_packet(pkt_tbl[i]);
+		if (pkt_seq[i] == TEST_SEQ_INVALID)
+			break;
+	}
+
+	if (i != TX_BATCH_LEN) {
+		CU_FAIL("failed to generate test packets\n");
+		return;
+	}
+
+	/* try to send the batch with the long packet in the middle, the
+	 * initial short packets should be sent */
+	odp_errno_zero();
+	ret = odp_pktio_send(pktio_tx, pkt_tbl, TX_BATCH_LEN);
+	CU_ASSERT(ret == long_pkt_idx);
+	CU_ASSERT(odp_errno() == 0);
+
+	info_rx.id   = pktio_rx;
+	info_rx.outq = ODP_QUEUE_INVALID;
+	info_rx.inq  = ODP_QUEUE_INVALID;
+	info_rx.mode = PKTIN_MODE_RECV;
+
+	for (i = 0; i < ret; ++i) {
+		pkt_tbl[i] = wait_for_packet(&info_rx,
+					     pkt_seq[i], ODP_TIME_SEC);
+		if (pkt_tbl[i] == ODP_PACKET_INVALID)
+			break;
+	}
+	CU_ASSERT(i == ret);
+
+	/* now try to send starting with the too-long packet and verify
+	 * it fails */
+	odp_errno_zero();
+	ret = odp_pktio_send(pktio_tx,
+			     &pkt_tbl[long_pkt_idx],
+			     TX_BATCH_LEN-long_pkt_idx);
+	CU_ASSERT(ret == -1);
+	CU_ASSERT(odp_errno() != 0);
+
+	for (i = 0; i < TX_BATCH_LEN; ++i) {
+		if (pkt_tbl[i] != ODP_PACKET_INVALID)
+			odp_packet_free(pkt_tbl[i]);
+	}
+
+	if (pktio_rx != pktio_tx)
+		CU_ASSERT(odp_pktio_close(pktio_rx) == 0);
+	CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
+	CU_ASSERT(odp_pool_destroy(pkt_pool) == 0);
+}
+
 static int init_pktio_suite(void)
 {
 	iface_name[0] = getenv("ODP_PKTIO_IF0");
@@ -704,6 +828,7 @@  CU_TestInfo pktio_tests[] = {
 	{"pktio promisc mode",	test_odp_pktio_promisc},
 	{"pktio mac",		test_odp_pktio_mac},
 	{"pktio inq_remdef",	test_odp_pktio_inq_remdef},
+	{"pktio send failure",	test_odp_pktio_send_failure},
 	CU_TEST_INFO_NULL
 };