diff mbox series

[RFC,v3] packet: experimental support for 64-bit timestamps

Message ID 20171128203346.1582725-1-arnd@arndb.de
State Superseded
Headers show
Series [RFC,v3] packet: experimental support for 64-bit timestamps | expand

Commit Message

Arnd Bergmann Nov. 28, 2017, 8:32 p.m. UTC
As I noticed in my previous patch to remove the 'timespec' usage in
the packet socket, the timestamps in the packet socket are slightly
inefficient as they convert a nanosecond value into seconds/nanoseconds
or seconds/microseconds.

This adds two new socket options for the timestamp to resolve that:

PACKET_SKIPTIMESTAMP sets a flag to indicate whether to generate
timestamps at all. When this is set, all timestamps are hardcoded to
zero, which saves a few cycles for the conversion and the access of
the hardware clocksource. The idea was taken from pktgen, which has an
F_NO_TIMESTAMP option for the same purpose.

PACKET_TIMESTAMP_NS64 changes the interpretation of the time stamp fields:
instead of having 32 bits for seconds plus 32 bits for nanoseconds or
microseconds, we now always send down 64 bits worth of nanoseconds when
this flag is set.

Link: https://patchwork.kernel.org/patch/10077199/
Suggested-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

---
I still have not done any runtime testing on this patch,
only implemented the suggestions from the previous versions.

While I don't think anyone is actively looking for this feature,
I don't think there are any reasons left against merging it either,
and it might come in handy for someone.
---
 include/uapi/linux/if_packet.h |   2 +
 net/packet/af_packet.c         | 159 +++++++++++++++++++++++++++++------------
 net/packet/internal.h          |   2 +
 3 files changed, 116 insertions(+), 47 deletions(-)

-- 
2.9.0

Comments

Willem de Bruijn Nov. 28, 2017, 10:28 p.m. UTC | #1
On Tue, Nov 28, 2017 at 3:32 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> As I noticed in my previous patch to remove the 'timespec' usage in

> the packet socket, the timestamps in the packet socket are slightly

> inefficient as they convert a nanosecond value into seconds/nanoseconds

> or seconds/microseconds.

>

> This adds two new socket options for the timestamp to resolve that:

>

> PACKET_SKIPTIMESTAMP sets a flag to indicate whether to generate

> timestamps at all. When this is set, all timestamps are hardcoded to

> zero, which saves a few cycles for the conversion and the access of

> the hardware clocksource. The idea was taken from pktgen, which has an

> F_NO_TIMESTAMP option for the same purpose.

>

> PACKET_TIMESTAMP_NS64 changes the interpretation of the time stamp fields:

> instead of having 32 bits for seconds plus 32 bits for nanoseconds or

> microseconds, we now always send down 64 bits worth of nanoseconds when

> this flag is set.

>

> Link: https://patchwork.kernel.org/patch/10077199/

> Suggested-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>

> Signed-off-by: Arnd Bergmann <arnd@arndb.de>


This works. Another option would be to add a PACKET_TIMESTAMP_EX
with the semantics we discussed previously + fail hard when any undefined
bits are set. I don't feel strong either way, we don't intend to extend further.

If taking this approach, it might be good to split into separate patches, one
for each flag?

> -static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,

> +static __u32 tpacket_get_timestamp(struct sk_buff *skb, __u32 *hi, __u32 *lo,

>                                    unsigned int flags)


Argument flags is no longer used.

>  {

> +       struct packet_sock *po = pkt_sk(skb->sk);

>         struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);

> +       ktime_t stamp;

> +       u32 type;

> +

> +       if (po->tp_skiptstamp)

> +               return 0;

>

>         if (shhwtstamps &&

> -           (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&

> -           ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))

> -               return TP_STATUS_TS_RAW_HARDWARE;

> +           (po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) &&

> +           shhwtstamps->hwtstamp) {

> +               stamp = shhwtstamps->hwtstamp;

> +               type = TP_STATUS_TS_RAW_HARDWARE;

> +       } else if (skb->tstamp) {

> +               stamp = skb->tstamp;

> +               type = TP_STATUS_TS_SOFTWARE;

> +       } else {

> +               return 0;

> +       }

>

> -       if (ktime_to_timespec64_cond(skb->tstamp, ts))

> -               return TP_STATUS_TS_SOFTWARE;

> +       if (po->tp_tstamp_ns64) {

> +               __u64 ns = ktime_to_ns(stamp);

>

> -       return 0;

> +               *hi = upper_32_bits(ns);

> +               *lo = lower_32_bits(ns);

> +       } else {

> +               struct timespec64 ts = ktime_to_timespec64(stamp);

> +

> +               *hi = ts.tv_sec;

> +               if (po->tp_version == TPACKET_V1)


Very minor: may want to invert test to make newer the protocols the
likely branch.

>  static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,

>                                     struct sk_buff *skb)

>  {

>         union tpacket_uhdr h;

> -       struct timespec64 ts;

> -       __u32 ts_status;

> +       __u32 ts_status, hi, lo;

>

> -       if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))

> +       if (!(ts_status = tpacket_get_timestamp(skb, &hi, &lo, po->tp_tstamp)))

>                 return 0;

>

>         h.raw = frame;

> -       /*

> -        * versions 1 through 3 overflow the timestamps in y2106, since they

> -        * all store the seconds in a 32-bit unsigned integer.

> -        * If we create a version 4, that should have a 64-bit timestamp,

> -        * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit

> -        * nanoseconds.

> -        */


Probably no need to introduce this in patch 1/2 when removing it in 2/2.

> @@ -2191,8 +2226,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,

>         unsigned long status = TP_STATUS_USER;

>         unsigned short macoff, netoff, hdrlen;

>         struct sk_buff *copy_skb = NULL;

> -       struct timespec64 ts;

>         __u32 ts_status;

> +       __u32 hi, lo;


since this function is not time-specific, the context of hi and lo is not
immediately obvious here. tstamp_hi, tstamp_lo? Or even __u32
tstamp[2] and have tpacket_get_timestamp and packet_get_time take
one fewer argument.
Arnd Bergmann Nov. 29, 2017, 12:31 p.m. UTC | #2
On Tue, Nov 28, 2017 at 11:28 PM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> On Tue, Nov 28, 2017 at 3:32 PM, Arnd Bergmann <arnd@arndb.de> wrote:

>> As I noticed in my previous patch to remove the 'timespec' usage in

>> the packet socket, the timestamps in the packet socket are slightly

>> inefficient as they convert a nanosecond value into seconds/nanoseconds

>> or seconds/microseconds.

>>

>> This adds two new socket options for the timestamp to resolve that:

>>

>> PACKET_SKIPTIMESTAMP sets a flag to indicate whether to generate

>> timestamps at all. When this is set, all timestamps are hardcoded to

>> zero, which saves a few cycles for the conversion and the access of

>> the hardware clocksource. The idea was taken from pktgen, which has an

>> F_NO_TIMESTAMP option for the same purpose.

>>

>> PACKET_TIMESTAMP_NS64 changes the interpretation of the time stamp fields:

>> instead of having 32 bits for seconds plus 32 bits for nanoseconds or

>> microseconds, we now always send down 64 bits worth of nanoseconds when

>> this flag is set.

>>

>> Link: https://patchwork.kernel.org/patch/10077199/

>> Suggested-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>

>> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

>

> This works. Another option would be to add a PACKET_TIMESTAMP_EX

> with the semantics we discussed previously + fail hard when any undefined

> bits are set. I don't feel strong either way, we don't intend to extend further.

>

> If taking this approach, it might be good to split into separate patches, one

> for each flag?

>

>> -static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,

>> +static __u32 tpacket_get_timestamp(struct sk_buff *skb, __u32 *hi, __u32 *lo,

>>                                    unsigned int flags)

>

> Argument flags is no longer used.


Fixed

>> -       return 0;

>> +               *hi = upper_32_bits(ns);

>> +               *lo = lower_32_bits(ns);

>> +       } else {

>> +               struct timespec64 ts = ktime_to_timespec64(stamp);

>> +

>> +               *hi = ts.tv_sec;

>> +               if (po->tp_version == TPACKET_V1)

>

> Very minor: may want to invert test to make newer the protocols the

> likely branch.


Ok. I didn't think this would make any difference to the compiler, but
for readability it seems at least as good, so I've changed it as you suggested
and use "po->tp_version > TPACKET_V1".

>>  static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,

>>                                     struct sk_buff *skb)

>>  {

>>         union tpacket_uhdr h;

>> -       struct timespec64 ts;

>> -       __u32 ts_status;

>> +       __u32 ts_status, hi, lo;

>>

>> -       if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))

>> +       if (!(ts_status = tpacket_get_timestamp(skb, &hi, &lo, po->tp_tstamp)))

>>                 return 0;

>>

>>         h.raw = frame;

>> -       /*

>> -        * versions 1 through 3 overflow the timestamps in y2106, since they

>> -        * all store the seconds in a 32-bit unsigned integer.

>> -        * If we create a version 4, that should have a 64-bit timestamp,

>> -        * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit

>> -        * nanoseconds.

>> -        */

>

> Probably no need to introduce this in patch 1/2 when removing it in 2/2.


I'm still considering this patch as experimental, since I haven't done any
actual testing on it, so I'm not sure it gets merged at the same time.
If patch 1 gets merged separately, I'd rather keep the comment in place.

>> @@ -2191,8 +2226,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,

>>         unsigned long status = TP_STATUS_USER;

>>         unsigned short macoff, netoff, hdrlen;

>>         struct sk_buff *copy_skb = NULL;

>> -       struct timespec64 ts;

>>         __u32 ts_status;

>> +       __u32 hi, lo;

>

> since this function is not time-specific, the context of hi and lo is not

> immediately obvious here. tstamp_hi, tstamp_lo? Or even __u32

> tstamp[2] and have tpacket_get_timestamp and packet_get_time take

> one fewer argument.


Fixed.

Thanks for the review! Any suggestions for how to do the testing? If you have
existing test cases, could you give my next version a test run to see if there
are any regressions and if the timestamps work as expected?

I see that there are test cases in tools/testing/selftests/net/, but none
of them seem to use the time stamps so far, and I'm not overly familiar
with how it works in the details to extend it in a meaningful way.

        arnd
Willem de Bruijn Nov. 29, 2017, 4:51 p.m. UTC | #3
> Thanks for the review! Any suggestions for how to do the testing? If you have

> existing test cases, could you give my next version a test run to see if there

> are any regressions and if the timestamps work as expected?

>

> I see that there are test cases in tools/testing/selftests/net/, but none

> of them seem to use the time stamps so far, and I'm not overly familiar

> with how it works in the details to extend it in a meaningful way.


I could not find any good tests for this interface, either. The only
user of the interface I found was a little tool I wrote a few years
ago that compares timestamps at multiple points in the transmit
path for latency measurement [1]. But it may be easier to just write
a new test under tools/testing/selftests/net for this purpose. I can
help with that, too, if you want.

[1] https://github.com/wdebruij/kerneltools/blob/master/tools/tcplate/tcplate.c
Arnd Bergmann Nov. 29, 2017, 8:06 p.m. UTC | #4
On Wed, Nov 29, 2017 at 5:51 PM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>> Thanks for the review! Any suggestions for how to do the testing? If you have

>> existing test cases, could you give my next version a test run to see if there

>> are any regressions and if the timestamps work as expected?

>>

>> I see that there are test cases in tools/testing/selftests/net/, but none

>> of them seem to use the time stamps so far, and I'm not overly familiar

>> with how it works in the details to extend it in a meaningful way.

>

> I could not find any good tests for this interface, either. The only

> user of the interface I found was a little tool I wrote a few years

> ago that compares timestamps at multiple points in the transmit

> path for latency measurement [1]. But it may be easier to just write

> a new test under tools/testing/selftests/net for this purpose. I can

> help with that, too, if you want.


Thanks, that would be great!

     Arnd
Willem de Bruijn Nov. 30, 2017, 1:39 a.m. UTC | #5
On Wed, Nov 29, 2017 at 3:06 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Wed, Nov 29, 2017 at 5:51 PM, Willem de Bruijn

> <willemdebruijn.kernel@gmail.com> wrote:

>>> Thanks for the review! Any suggestions for how to do the testing? If you have

>>> existing test cases, could you give my next version a test run to see if there

>>> are any regressions and if the timestamps work as expected?

>>>

>>> I see that there are test cases in tools/testing/selftests/net/, but none

>>> of them seem to use the time stamps so far, and I'm not overly familiar

>>> with how it works in the details to extend it in a meaningful way.

>>

>> I could not find any good tests for this interface, either. The only

>> user of the interface I found was a little tool I wrote a few years

>> ago that compares timestamps at multiple points in the transmit

>> path for latency measurement [1]. But it may be easier to just write

>> a new test under tools/testing/selftests/net for this purpose. I can

>> help with that, too, if you want.

>

> Thanks, that would be great!


I'll reply to this thread with git send-email with an extension to
tools/testing/selftests/net/psock_tpacket.c. I can resend that for
submission after your feature is merged (as it depends on it) or
feel free to include it in your patchset. The test currently fails for
the ns64 case. I probably did not convert correctly, but have to leave
the office and want to send what I have.

Two other comments: the test crashed the kernel due to a NULL ptr
in tpacket_get_timestamp. We cannot rely on skb->sk being set to
the packet socket here. And assignment to bitfield requires a cast to
boolean.

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f55f330ab547..e9decc7fc5c3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -439,9 +439,9 @@ static int __packet_get_status(struct packet_sock
*po, void *frame)
        }
 }

-static __u32 tpacket_get_timestamp(struct sk_buff *skb, __u32 *hi, __u32 *lo)
+static __u32 tpacket_get_timestamp(struct packet_sock *po, struct sk_buff *skb,
+                                  __u32 *hi, __u32 *lo)
 {
-       struct packet_sock *po = pkt_sk(skb->sk);
        struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
        ktime_t stamp;
        u32 type;
@@ -508,7 +508,7 @@ static __u32 __packet_set_timestamp(struct
packet_sock *po, void *frame,
        union tpacket_uhdr h;
        __u32 ts_status, hi, lo;

-       if (!(ts_status = tpacket_get_timestamp(skb, &hi, &lo)))
+       if (!(ts_status = tpacket_get_timestamp(po, skb, &hi, &lo)))
                return 0;

        h.raw = frame;
@@ -2352,7 +2352,7 @@ static int tpacket_rcv(struct sk_buff *skb,
struct net_device *dev,

        skb_copy_bits(skb, 0, h.raw + macoff, snaplen);

-       if (!(ts_status = tpacket_get_timestamp(skb, &tstamp_hi, &tstamp_lo)))
+       if (!(ts_status = tpacket_get_timestamp(po, skb, &tstamp_hi,
&tstamp_lo)))
                packet_get_time(po, &tstamp_hi, &tstamp_lo);

        status |= ts_status;
@@ -3835,7 +3835,7 @@ packet_setsockopt(struct socket *sock, int
level, int optname, char __user *optv
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;

-               po->tp_skiptstamp = val;
+               po->tp_skiptstamp = !!val;
                return 0;
        }
        case PACKET_TIMESTAMP_NS64:
@@ -3847,7 +3847,7 @@ packet_setsockopt(struct socket *sock, int
level, int optname, char __user *optv
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;

-               po->tp_tstamp_ns64 = val;
+               po->tp_tstamp_ns64 = !!val;
                return 0;
        }
        case PACKET_FANOUT:
Willem de Bruijn Nov. 30, 2017, 1:54 a.m. UTC | #6
On Wed, Nov 29, 2017 at 8:39 PM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> On Wed, Nov 29, 2017 at 3:06 PM, Arnd Bergmann <arnd@arndb.de> wrote:

>> On Wed, Nov 29, 2017 at 5:51 PM, Willem de Bruijn

>> <willemdebruijn.kernel@gmail.com> wrote:

>>>> Thanks for the review! Any suggestions for how to do the testing? If you have

>>>> existing test cases, could you give my next version a test run to see if there

>>>> are any regressions and if the timestamps work as expected?

>>>>

>>>> I see that there are test cases in tools/testing/selftests/net/, but none

>>>> of them seem to use the time stamps so far, and I'm not overly familiar

>>>> with how it works in the details to extend it in a meaningful way.

>>>

>>> I could not find any good tests for this interface, either. The only

>>> user of the interface I found was a little tool I wrote a few years

>>> ago that compares timestamps at multiple points in the transmit

>>> path for latency measurement [1]. But it may be easier to just write

>>> a new test under tools/testing/selftests/net for this purpose. I can

>>> help with that, too, if you want.

>>

>> Thanks, that would be great!

>

> I'll reply to this thread with git send-email with an extension to

> tools/testing/selftests/net/psock_tpacket.c.


It appears that it did not end up in this thread. At least not when
using gmail threading. Patch at http://patchwork.ozlabs.org/patch/842854/
diff mbox series

Patch

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 67b61d91d89b..2eba54770e6b 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -57,6 +57,8 @@  struct sockaddr_ll {
 #define PACKET_QDISC_BYPASS		20
 #define PACKET_ROLLOVER_STATS		21
 #define PACKET_FANOUT_DATA		22
+#define PACKET_SKIPTIMESTAMP		23
+#define PACKET_TIMESTAMP_NS64		24
 
 #define PACKET_FANOUT_HASH		0
 #define PACKET_FANOUT_LB		1
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 7432c6699818..ed6291b564a9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -200,7 +200,7 @@  static void prb_retire_current_block(struct tpacket_kbdq_core *,
 		struct packet_sock *, unsigned int status);
 static int prb_queue_frozen(struct tpacket_kbdq_core *);
 static void prb_open_block(struct tpacket_kbdq_core *,
-		struct tpacket_block_desc *);
+		struct tpacket_block_desc *, struct packet_sock *);
 static void prb_retire_rx_blk_timer_expired(struct timer_list *);
 static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
 static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
@@ -439,52 +439,92 @@  static int __packet_get_status(struct packet_sock *po, void *frame)
 	}
 }
 
-static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
+static __u32 tpacket_get_timestamp(struct sk_buff *skb, __u32 *hi, __u32 *lo,
 				   unsigned int flags)
 {
+	struct packet_sock *po = pkt_sk(skb->sk);
 	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+	ktime_t stamp;
+	u32 type;
+
+	if (po->tp_skiptstamp)
+		return 0;
 
 	if (shhwtstamps &&
-	    (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
-	    ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
-		return TP_STATUS_TS_RAW_HARDWARE;
+	    (po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+	    shhwtstamps->hwtstamp) {
+		stamp = shhwtstamps->hwtstamp;
+		type = TP_STATUS_TS_RAW_HARDWARE;
+	} else if (skb->tstamp) {
+		stamp = skb->tstamp;
+		type = TP_STATUS_TS_SOFTWARE;
+	} else {
+		return 0;
+	}
 
-	if (ktime_to_timespec64_cond(skb->tstamp, ts))
-		return TP_STATUS_TS_SOFTWARE;
+	if (po->tp_tstamp_ns64) {
+		__u64 ns = ktime_to_ns(stamp);
 
-	return 0;
+		*hi = upper_32_bits(ns);
+		*lo = lower_32_bits(ns);
+	} else {
+		struct timespec64 ts = ktime_to_timespec64(stamp);
+
+		*hi = ts.tv_sec;
+		if (po->tp_version == TPACKET_V1)
+			*lo = ts.tv_nsec / NSEC_PER_USEC;
+		else
+			*lo = ts.tv_nsec;
+	}
+
+	return type;
+}
+
+static void packet_get_time(struct packet_sock *po, __u32 *hi, __u32 *lo)
+{
+	if (po->tp_skiptstamp) {
+		*hi = 0;
+		*lo = 0;
+	} else if (po->tp_tstamp_ns64) {
+		__u64 ns = ktime_get_real_ns();
+
+		*hi = upper_32_bits(ns);
+		*hi = lower_32_bits(ns);
+	} else {
+		struct timespec64 ts;
+
+		ktime_get_real_ts64(&ts);
+		/* unsigned seconds overflow in y2106 here */
+		*hi = ts.tv_sec;
+		if (po->tp_version == TPACKET_V1)
+			*lo = ts.tv_nsec / NSEC_PER_USEC;
+		else
+			*lo = ts.tv_nsec;
+	}
 }
 
 static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
 				    struct sk_buff *skb)
 {
 	union tpacket_uhdr h;
-	struct timespec64 ts;
-	__u32 ts_status;
+	__u32 ts_status, hi, lo;
 
-	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+	if (!(ts_status = tpacket_get_timestamp(skb, &hi, &lo, po->tp_tstamp)))
 		return 0;
 
 	h.raw = frame;
-	/*
-	 * versions 1 through 3 overflow the timestamps in y2106, since they
-	 * all store the seconds in a 32-bit unsigned integer.
-	 * If we create a version 4, that should have a 64-bit timestamp,
-	 * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit
-	 * nanoseconds.
-	 */
 	switch (po->tp_version) {
 	case TPACKET_V1:
-		h.h1->tp_sec = ts.tv_sec;
-		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
+		h.h1->tp_sec = hi;
+		h.h1->tp_usec = lo;
 		break;
 	case TPACKET_V2:
-		h.h2->tp_sec = ts.tv_sec;
-		h.h2->tp_nsec = ts.tv_nsec;
+		h.h2->tp_sec = hi;
+		h.h2->tp_nsec = lo;
 		break;
 	case TPACKET_V3:
-		h.h3->tp_sec = ts.tv_sec;
-		h.h3->tp_nsec = ts.tv_nsec;
+		h.h3->tp_sec = hi;
+		h.h3->tp_nsec = lo;
 		break;
 	default:
 		WARN(1, "TPACKET version not supported.\n");
@@ -633,7 +673,7 @@  static void init_prb_bdqc(struct packet_sock *po,
 	p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
 	prb_init_ft_ops(p1, req_u);
 	prb_setup_retire_blk_timer(po);
-	prb_open_block(p1, pbd);
+	prb_open_block(p1, pbd, po);
 }
 
 /*  Do NOT update the last_blk_num first.
@@ -730,7 +770,7 @@  static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
 				* opening a block thaws the queue,restarts timer
 				* Thawing/timer-refresh is a side effect.
 				*/
-				prb_open_block(pkc, pbd);
+				prb_open_block(pkc, pbd, po);
 				goto out;
 			}
 		}
@@ -812,10 +852,8 @@  static void prb_close_block(struct tpacket_kbdq_core *pkc1,
 		 * It shouldn't really happen as we don't close empty
 		 * blocks. See prb_retire_rx_blk_timer_expired().
 		 */
-		struct timespec64 ts;
-		ktime_get_real_ts64(&ts);
-		h1->ts_last_pkt.ts_sec = ts.tv_sec;
-		h1->ts_last_pkt.ts_nsec	= ts.tv_nsec;
+		packet_get_time(po, &h1->ts_last_pkt.ts_sec,
+				&h1->ts_last_pkt.ts_nsec);
 	}
 
 	smp_wmb();
@@ -841,9 +879,8 @@  static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
  *
  */
 static void prb_open_block(struct tpacket_kbdq_core *pkc1,
-	struct tpacket_block_desc *pbd1)
+	struct tpacket_block_desc *pbd1, struct packet_sock *po)
 {
-	struct timespec64 ts;
 	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
 
 	smp_rmb();
@@ -856,10 +893,8 @@  static void prb_open_block(struct tpacket_kbdq_core *pkc1,
 	BLOCK_NUM_PKTS(pbd1) = 0;
 	BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
 
-	ktime_get_real_ts64(&ts);
-
-	h1->ts_first_pkt.ts_sec = ts.tv_sec;
-	h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
+	packet_get_time(po, &h1->ts_first_pkt.ts_sec,
+			&h1->ts_first_pkt.ts_nsec);
 
 	pkc1->pkblk_start = (char *)pbd1;
 	pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
@@ -936,7 +971,7 @@  static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
 	 * open this block and return the offset where the first packet
 	 * needs to get stored.
 	 */
-	prb_open_block(pkc, pbd);
+	prb_open_block(pkc, pbd, po);
 	return (void *)pkc->nxt_offset;
 }
 
@@ -1068,7 +1103,7 @@  static void *__packet_lookup_frame_in_block(struct packet_sock *po,
 			 * opening a block also thaws the queue.
 			 * Thawing is a side effect.
 			 */
-			prb_open_block(pkc, pbd);
+			prb_open_block(pkc, pbd, po);
 		}
 	}
 
@@ -2191,8 +2226,8 @@  static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	unsigned long status = TP_STATUS_USER;
 	unsigned short macoff, netoff, hdrlen;
 	struct sk_buff *copy_skb = NULL;
-	struct timespec64 ts;
 	__u32 ts_status;
+	__u32 hi, lo;
 	bool is_drop_n_account = false;
 	bool do_vnet = false;
 
@@ -2318,8 +2353,8 @@  static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 
-	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
-		ktime_get_real_ts64(&ts);
+	if (!(ts_status = tpacket_get_timestamp(skb, &hi, &lo, po->tp_tstamp)))
+		packet_get_time(po, &hi, &lo);
 
 	status |= ts_status;
 
@@ -2329,8 +2364,8 @@  static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h1->tp_snaplen = snaplen;
 		h.h1->tp_mac = macoff;
 		h.h1->tp_net = netoff;
-		h.h1->tp_sec = ts.tv_sec;
-		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
+		h.h1->tp_sec = hi;
+		h.h1->tp_usec = lo;
 		hdrlen = sizeof(*h.h1);
 		break;
 	case TPACKET_V2:
@@ -2338,8 +2373,8 @@  static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h2->tp_snaplen = snaplen;
 		h.h2->tp_mac = macoff;
 		h.h2->tp_net = netoff;
-		h.h2->tp_sec = ts.tv_sec;
-		h.h2->tp_nsec = ts.tv_nsec;
+		h.h2->tp_sec = hi;
+		h.h2->tp_nsec = lo;
 		if (skb_vlan_tag_present(skb)) {
 			h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
 			h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
@@ -2360,8 +2395,8 @@  static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h3->tp_snaplen = snaplen;
 		h.h3->tp_mac = macoff;
 		h.h3->tp_net = netoff;
-		h.h3->tp_sec  = ts.tv_sec;
-		h.h3->tp_nsec = ts.tv_nsec;
+		h.h3->tp_sec  = hi;
+		h.h3->tp_nsec = lo;
 		memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
 		hdrlen = sizeof(*h.h3);
 		break;
@@ -3792,6 +3827,30 @@  packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->tp_tstamp = val;
 		return 0;
 	}
+	case PACKET_SKIPTIMESTAMP:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->tp_skiptstamp = val;
+		return 0;
+	}
+	case PACKET_TIMESTAMP_NS64:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->tp_tstamp_ns64 = val;
+		return 0;
+	}
 	case PACKET_FANOUT:
 	{
 		int val;
@@ -3921,6 +3980,12 @@  static int packet_getsockopt(struct socket *sock, int level, int optname,
 	case PACKET_TIMESTAMP:
 		val = po->tp_tstamp;
 		break;
+	case PACKET_SKIPTIMESTAMP:
+		val = po->tp_skiptstamp;
+		break;
+	case PACKET_TIMESTAMP_NS64:
+		val = po->tp_tstamp_ns64;
+		break;
 	case PACKET_FANOUT:
 		val = (po->fanout ?
 		       ((u32)po->fanout->id |
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 562fbc155006..20b69512210f 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -128,6 +128,8 @@  struct packet_sock {
 	unsigned int		tp_reserve;
 	unsigned int		tp_loss:1;
 	unsigned int		tp_tx_has_off:1;
+	unsigned int		tp_skiptstamp:1;
+	unsigned int		tp_tstamp_ns64:1;
 	unsigned int		tp_tstamp;
 	struct net_device __rcu	*cached_dev;
 	int			(*xmit)(struct sk_buff *skb);