diff mbox series

[7/7] TC-ETF support PTP clocks

Message ID 20201001205141.8885-8-erez.geva.ext@siemens.com
State New
Headers show
Series TC-ETF support PTP clocks series | expand

Commit Message

Geva, Erez Oct. 1, 2020, 8:51 p.m. UTC
- Add support for using a POSIX dynamic clock with
    Traffic control Earliest TxTime First (ETF) Qdisc.

Signed-off-by: Erez Geva <erez.geva.ext@siemens.com>
---
 include/uapi/linux/net_tstamp.h |   5 ++
 net/sched/sch_etf.c             | 100 +++++++++++++++++++++++++++++---
 2 files changed, 97 insertions(+), 8 deletions(-)

Comments

Thomas Gleixner Oct. 2, 2020, 12:33 a.m. UTC | #1
On Thu, Oct 01 2020 at 22:51, Erez Geva wrote:

>   - Add support for using a POSIX dynamic clock with
>     Traffic control Earliest TxTime First (ETF) Qdisc.

....

> --- a/include/uapi/linux/net_tstamp.h
> +++ b/include/uapi/linux/net_tstamp.h
> @@ -167,6 +167,11 @@ enum txtime_flags {
>  	SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
>  				 SOF_TXTIME_FLAGS_LAST
>  };
> +/*
> + * Clock ID to use with POSIX clocks
> + * The ID must be u8 to fit in (struct sock)->sk_clockid
> + */
> +#define SOF_TXTIME_POSIX_CLOCK_ID (0x77)

Random number with a random name. 
  
>  struct sock_txtime {
>  	__kernel_clockid_t	clockid;/* reference clockid */
> diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
> index c0de4c6f9299..8e3e0a61fa58 100644
> --- a/net/sched/sch_etf.c
> +++ b/net/sched/sch_etf.c
> @@ -15,6 +15,7 @@
>  #include <linux/rbtree.h>
>  #include <linux/skbuff.h>
>  #include <linux/posix-timers.h>
> +#include <linux/posix-clock.h>
>  #include <net/netlink.h>
>  #include <net/sch_generic.h>
>  #include <net/pkt_sched.h>
> @@ -40,19 +41,40 @@ struct etf_sched_data {
>  	struct rb_root_cached head;
>  	struct qdisc_watchdog watchdog;
>  	ktime_t (*get_time)(void);
> +#ifdef CONFIG_POSIX_TIMERS
> +	struct posix_clock *pclock; /* pointer to a posix clock */

Tail comments suck because they disturb the reading flow and this
comment has absolute zero value.

Comments are required to explain things which are not obvious...

> +#endif /* CONFIG_POSIX_TIMERS */

Also this #ifdeffery is bonkers. How is TSN supposed to work without
POSIX_TIMERS in the first place?

>  static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
>  	[TCA_ETF_PARMS]	= { .len = sizeof(struct tc_etf_qopt) },
>  };
>  
> +static inline ktime_t get_now(struct Qdisc *sch, struct etf_sched_data *q)
> +{
> +#ifdef CONFIG_POSIX_TIMERS
> +	if (IS_ERR_OR_NULL(q->get_time)) {
> +		struct timespec64 ts;
> +		int err = posix_clock_gettime(q->pclock, &ts);
> +
> +		if (err) {
> +			pr_warn("Clock is disabled (%d) for queue %d\n",
> +				err, q->queue);
> +			return 0;

That's really useful error handling.

> +		}
> +		return timespec64_to_ktime(ts);
> +	}
> +#endif /* CONFIG_POSIX_TIMERS */
> +	return q->get_time();
> +}
> +
>  static inline int validate_input_params(struct tc_etf_qopt *qopt,
>  					struct netlink_ext_ack *extack)
>  {
>  	/* Check if params comply to the following rules:
>  	 *	* Clockid and delta must be valid.
>  	 *
> -	 *	* Dynamic clockids are not supported.
> +	 *	* Dynamic CPU clockids are not supported.
>  	 *
>  	 *	* Delta must be a positive or zero integer.
>  	 *
> @@ -60,11 +82,22 @@ static inline int validate_input_params(struct tc_etf_qopt *qopt,
>  	 * expect that system clocks have been synchronized to PHC.
>  	 */
>  	if (qopt->clockid < 0) {
> +#ifdef CONFIG_POSIX_TIMERS
> +		/**
> +		 * Use of PTP clock through a posix clock.
> +		 * The TC application must open the posix clock device file
> +		 * and use the dynamic clockid from the file description.

What? How is the code which calls into this guaranteed to have a valid
file descriptor open for a particular dynamic posix clock?

> +		 */
> +		if (!is_clockid_fd_clock(qopt->clockid)) {
> +			NL_SET_ERR_MSG(extack,
> +				       "Dynamic CPU clockids are not supported");
> +			return -EOPNOTSUPP;
> +		}
> +#else /* CONFIG_POSIX_TIMERS */
>  		NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
>  		return -ENOTSUPP;
> -	}
> -
> -	if (qopt->clockid != CLOCK_TAI) {
> +#endif /* CONFIG_POSIX_TIMERS */
> +	} else if (qopt->clockid != CLOCK_TAI) {
>  		NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
>  		return -EINVAL;
>  	}
> @@ -103,7 +136,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct etf_sched_data *q,
>  		return false;
>  
>  skip:
> -	now = q->get_time();
> +	now = get_now(sch, q);

Yuck.

is_packet_valid() is invoked via:

    __dev_queue_xmit()
      __dev_xmit_skb()
         etf_enqueue_timesortedlist()
           is_packet_valid()

__dev_queue_xmit() does

       rcu_read_lock_bh();

and your get_now() does

    posix_clock_gettime()
       	down_read(&clk->rwsem);

 ----> FAIL

down_read() might sleep and cannot be called from a BH disabled
region. This clearly has never been tested with any mandatory debug
option enabled. Why am I not surprised?

Aside of accessing PCH clock being slow at hell this cannot ever work
and there is no way to make it work in any consistent form.

If you have several NICs on several PCH domains then all of these
domains should have one thing in common: CLOCK_TAI and the frequency.

If that's not the case then the overall system design is just broken,
but yes I'm aware of the fact that some industries decided to have their
own definition of time and frequency just because they can.

Handling different starting points of the domains interpretation of
"TAI" is doable because that's just an offset, but having different
frequencies is a nightmare.

So if such a trainwreck is a valid use case which needs to be supported
then just duct taping it into the code is not going to fly.

The only way to make this work is to sit down and actually design a
mechanism which allows to correlate the various notions of PCH time with
the systems CLOCK_TAI, i.e. providing offset and frequency correction.

Also you want to explain how user space applications should deal with
these different time domains in a sane way.

Thanks,

        tglx
Geva, Erez Oct. 2, 2020, 11:05 a.m. UTC | #2
On 02/10/2020 02:33, Thomas Gleixner wrote:
> On Thu, Oct 01 2020 at 22:51, Erez Geva wrote:

>

>>    - Add support for using a POSIX dynamic clock with

>>      Traffic control Earliest TxTime First (ETF) Qdisc.

>

> ....

>

>> --- a/include/uapi/linux/net_tstamp.h

>> +++ b/include/uapi/linux/net_tstamp.h

>> @@ -167,6 +167,11 @@ enum txtime_flags {

>>      SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |

>>                               SOF_TXTIME_FLAGS_LAST

>>   };

>> +/*

>> + * Clock ID to use with POSIX clocks

>> + * The ID must be u8 to fit in (struct sock)->sk_clockid

>> + */

>> +#define SOF_TXTIME_POSIX_CLOCK_ID (0x77)

>

> Random number with a random name.

>

>>   struct sock_txtime {

>>      __kernel_clockid_t      clockid;/* reference clockid */

>> diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c

>> index c0de4c6f9299..8e3e0a61fa58 100644

>> --- a/net/sched/sch_etf.c

>> +++ b/net/sched/sch_etf.c

>> @@ -15,6 +15,7 @@

>>   #include <linux/rbtree.h>

>>   #include <linux/skbuff.h>

>>   #include <linux/posix-timers.h>

>> +#include <linux/posix-clock.h>

>>   #include <net/netlink.h>

>>   #include <net/sch_generic.h>

>>   #include <net/pkt_sched.h>

>> @@ -40,19 +41,40 @@ struct etf_sched_data {

>>      struct rb_root_cached head;

>>      struct qdisc_watchdog watchdog;

>>      ktime_t (*get_time)(void);

>> +#ifdef CONFIG_POSIX_TIMERS

>> +    struct posix_clock *pclock; /* pointer to a posix clock */

>

> Tail comments suck because they disturb the reading flow and this

> comment has absolute zero value.

>

> Comments are required to explain things which are not obvious...

>

>> +#endif /* CONFIG_POSIX_TIMERS */

>

> Also this #ifdeffery is bonkers. How is TSN supposed to work without

> POSIX_TIMERS in the first place?

>

>>   static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {

>>      [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },

>>   };

>>

>> +static inline ktime_t get_now(struct Qdisc *sch, struct etf_sched_data *q)

>> +{

>> +#ifdef CONFIG_POSIX_TIMERS

>> +    if (IS_ERR_OR_NULL(q->get_time)) {

>> +            struct timespec64 ts;

>> +            int err = posix_clock_gettime(q->pclock, &ts);

>> +

>> +            if (err) {

>> +                    pr_warn("Clock is disabled (%d) for queue %d\n",

>> +                            err, q->queue);

>> +                    return 0;

>

> That's really useful error handling.

>

>> +            }

>> +            return timespec64_to_ktime(ts);

>> +    }

>> +#endif /* CONFIG_POSIX_TIMERS */

>> +    return q->get_time();

>> +}

>> +

>>   static inline int validate_input_params(struct tc_etf_qopt *qopt,

>>                                      struct netlink_ext_ack *extack)

>>   {

>>      /* Check if params comply to the following rules:

>>       *      * Clockid and delta must be valid.

>>       *

>> -     *      * Dynamic clockids are not supported.

>> +     *      * Dynamic CPU clockids are not supported.

>>       *

>>       *      * Delta must be a positive or zero integer.

>>       *

>> @@ -60,11 +82,22 @@ static inline int validate_input_params(struct tc_etf_qopt *qopt,

>>       * expect that system clocks have been synchronized to PHC.

>>       */

>>      if (qopt->clockid < 0) {

>> +#ifdef CONFIG_POSIX_TIMERS

>> +            /**

>> +             * Use of PTP clock through a posix clock.

>> +             * The TC application must open the posix clock device file

>> +             * and use the dynamic clockid from the file description.

>

> What? How is the code which calls into this guaranteed to have a valid

> file descriptor open for a particular dynamic posix clock?

>

>> +             */

>> +            if (!is_clockid_fd_clock(qopt->clockid)) {

>> +                    NL_SET_ERR_MSG(extack,

>> +                                   "Dynamic CPU clockids are not supported");

>> +                    return -EOPNOTSUPP;

>> +            }

>> +#else /* CONFIG_POSIX_TIMERS */

>>              NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");

>>              return -ENOTSUPP;

>> -    }

>> -

>> -    if (qopt->clockid != CLOCK_TAI) {

>> +#endif /* CONFIG_POSIX_TIMERS */

>> +    } else if (qopt->clockid != CLOCK_TAI) {

>>              NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");

>>              return -EINVAL;

>>      }

>> @@ -103,7 +136,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct etf_sched_data *q,

>>              return false;

>>

>>   skip:

>> -    now = q->get_time();

>> +    now = get_now(sch, q);

>

> Yuck.

>

> is_packet_valid() is invoked via:

>

>      __dev_queue_xmit()

>        __dev_xmit_skb()

>           etf_enqueue_timesortedlist()

>             is_packet_valid()

>

> __dev_queue_xmit() does

>

>         rcu_read_lock_bh();

>

> and your get_now() does

>

>      posix_clock_gettime()

>               down_read(&clk->rwsem);

>

>   ----> FAIL

>

> down_read() might sleep and cannot be called from a BH disabled

> region. This clearly has never been tested with any mandatory debug

> option enabled. Why am I not surprised?

>

> Aside of accessing PCH clock being slow at hell this cannot ever work

> and there is no way to make it work in any consistent form.

>

> If you have several NICs on several PCH domains then all of these

> domains should have one thing in common: CLOCK_TAI and the frequency.

>

> If that's not the case then the overall system design is just broken,

> but yes I'm aware of the fact that some industries decided to have their

> own definition of time and frequency just because they can.

>

> Handling different starting points of the domains interpretation of

> "TAI" is doable because that's just an offset, but having different

> frequencies is a nightmare.

>

> So if such a trainwreck is a valid use case which needs to be supported

> then just duct taping it into the code is not going to fly.

>

> The only way to make this work is to sit down and actually design a

> mechanism which allows to correlate the various notions of PCH time with

> the systems CLOCK_TAI, i.e. providing offset and frequency correction.

>

> Also you want to explain how user space applications should deal with

> these different time domains in a sane way.

>

> Thanks,

>

>          tglx

>


Thank you for your quick and depth feedback.

Erez
diff mbox series

Patch

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 7ed0b3d1c00a..ecbaac6230d7 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -167,6 +167,11 @@  enum txtime_flags {
 	SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
 				 SOF_TXTIME_FLAGS_LAST
 };
+/*
+ * Clock ID to use with POSIX clocks
+ * The ID must be u8 to fit in (struct sock)->sk_clockid
+ */
+#define SOF_TXTIME_POSIX_CLOCK_ID (0x77)
 
 struct sock_txtime {
 	__kernel_clockid_t	clockid;/* reference clockid */
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index c0de4c6f9299..8e3e0a61fa58 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -15,6 +15,7 @@ 
 #include <linux/rbtree.h>
 #include <linux/skbuff.h>
 #include <linux/posix-timers.h>
+#include <linux/posix-clock.h>
 #include <net/netlink.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
@@ -40,19 +41,40 @@  struct etf_sched_data {
 	struct rb_root_cached head;
 	struct qdisc_watchdog watchdog;
 	ktime_t (*get_time)(void);
+#ifdef CONFIG_POSIX_TIMERS
+	struct posix_clock *pclock; /* pointer to a posix clock */
+#endif /* CONFIG_POSIX_TIMERS */
 };
 
 static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
 	[TCA_ETF_PARMS]	= { .len = sizeof(struct tc_etf_qopt) },
 };
 
+static inline ktime_t get_now(struct Qdisc *sch, struct etf_sched_data *q)
+{
+#ifdef CONFIG_POSIX_TIMERS
+	if (IS_ERR_OR_NULL(q->get_time)) {
+		struct timespec64 ts;
+		int err = posix_clock_gettime(q->pclock, &ts);
+
+		if (err) {
+			pr_warn("Clock is disabled (%d) for queue %d\n",
+				err, q->queue);
+			return 0;
+		}
+		return timespec64_to_ktime(ts);
+	}
+#endif /* CONFIG_POSIX_TIMERS */
+	return q->get_time();
+}
+
 static inline int validate_input_params(struct tc_etf_qopt *qopt,
 					struct netlink_ext_ack *extack)
 {
 	/* Check if params comply to the following rules:
 	 *	* Clockid and delta must be valid.
 	 *
-	 *	* Dynamic clockids are not supported.
+	 *	* Dynamic CPU clockids are not supported.
 	 *
 	 *	* Delta must be a positive or zero integer.
 	 *
@@ -60,11 +82,22 @@  static inline int validate_input_params(struct tc_etf_qopt *qopt,
 	 * expect that system clocks have been synchronized to PHC.
 	 */
 	if (qopt->clockid < 0) {
+#ifdef CONFIG_POSIX_TIMERS
+		/**
+		 * Use of PTP clock through a posix clock.
+		 * The TC application must open the posix clock device file
+		 * and use the dynamic clockid from the file description.
+		 */
+		if (!is_clockid_fd_clock(qopt->clockid)) {
+			NL_SET_ERR_MSG(extack,
+				       "Dynamic CPU clockids are not supported");
+			return -EOPNOTSUPP;
+		}
+#else /* CONFIG_POSIX_TIMERS */
 		NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
 		return -ENOTSUPP;
-	}
-
-	if (qopt->clockid != CLOCK_TAI) {
+#endif /* CONFIG_POSIX_TIMERS */
+	} else if (qopt->clockid != CLOCK_TAI) {
 		NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
 		return -EINVAL;
 	}
@@ -103,7 +136,7 @@  static bool is_packet_valid(struct Qdisc *sch, struct etf_sched_data *q,
 		return false;
 
 skip:
-	now = q->get_time();
+	now = get_now(sch, q);
 	if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
 		return false;
 
@@ -133,6 +166,27 @@  static void reset_watchdog(struct Qdisc *sch, struct etf_sched_data *q)
 	}
 
 	next = ktime_sub_ns(skb->tstamp, q->delta + NET_SCH_ETF_TIMER_RANGE);
+#ifdef CONFIG_POSIX_TIMERS
+	if (!IS_ERR_OR_NULL(q->pclock)) {
+		s64 now;
+		struct timespec64 ts;
+		int err = posix_clock_gettime(q->pclock, &ts);
+
+		if (err) {
+			pr_warn("Clock is disabled (%d) for queue %d\n",
+				err, q->queue);
+			return;
+		}
+		now = timespec64_to_ns(&ts);
+		if (now == 0) {
+			pr_warn("Clock is not running for queue %d\n",
+				q->queue);
+			return;
+		}
+		/* Convert PHC to CLOCK_TAI as that is the timer clock */
+		next = ktime_add(next, ktime_sub_ns(ktime_get_clocktai(), now));
+	}
+#endif /* CONFIG_POSIX_TIMERS */
 	qdisc_watchdog_schedule_soon_ns(&q->watchdog, ktime_to_ns(next),
 					NET_SCH_ETF_TIMER_RANGE);
 }
@@ -263,7 +317,7 @@  static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
 	if (!skb)
 		return NULL;
 
-	now = q->get_time();
+	now = get_now(sch, q);
 
 	/* Drop if packet has expired while in queue. */
 	if (ktime_before(skb->tstamp, now)) {
@@ -354,6 +408,7 @@  static int etf_init(struct Qdisc *sch, struct nlattr *opt,
 	struct nlattr *tb[TCA_ETF_MAX + 1];
 	struct tc_etf_qopt *qopt;
 	int err;
+	clockid_t clockid;
 
 	if (!opt) {
 		NL_SET_ERR_MSG(extack,
@@ -391,13 +446,17 @@  static int etf_init(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	/* Everything went OK, save the parameters used. */
+	clockid = qopt->clockid;
 	q->delta = qopt->delta;
 	q->clockid = qopt->clockid;
 	q->offload = OFFLOAD_IS_ON(qopt);
 	q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
 	q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
+#ifdef CONFIG_POSIX_TIMERS
+	q->pclock = NULL;
+#endif /* CONFIG_POSIX_TIMERS */
 
-	switch (q->clockid) {
+	switch (clockid) {
 	case CLOCK_REALTIME:
 		q->get_time = ktime_get_real;
 		break;
@@ -411,11 +470,31 @@  static int etf_init(struct Qdisc *sch, struct nlattr *opt,
 		q->get_time = ktime_get_clocktai;
 		break;
 	default:
+#ifdef CONFIG_POSIX_TIMERS
+		q->pclock = posix_clock_get_clock(clockid);
+		if (IS_ERR_OR_NULL(q->pclock)) {
+			NL_SET_ERR_MSG(extack, "Clockid does not exist");
+			return PTR_ERR(q->pclock);
+		}
+		q->clockid = SOF_TXTIME_POSIX_CLOCK_ID;
+		/**
+		 * Posix clocks do not support high-resolution timers
+		 * Using the TAI clock is close enough (for relative sleeps)
+		 */
+		clockid = CLOCK_TAI;
+		/**
+		 * Posix clock do not provide direct ktime function
+		 * We need to call posix_clock_gettime()
+		 */
+		q->get_time = NULL;
+		break;
+#else /* CONFIG_POSIX_TIMERS */
 		NL_SET_ERR_MSG(extack, "Clockid is not supported");
 		return -ENOTSUPP;
+#endif /* CONFIG_POSIX_TIMERS */
 	}
 
-	qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
+	qdisc_watchdog_init_clockid(&q->watchdog, sch, clockid);
 
 	return 0;
 }
@@ -463,6 +542,11 @@  static void etf_destroy(struct Qdisc *sch)
 		qdisc_watchdog_cancel(&q->watchdog);
 
 	etf_disable_offload(dev, q);
+#ifdef CONFIG_POSIX_TIMERS
+	/* Release posix clock */
+	if (!IS_ERR_OR_NULL(q->pclock))
+		posix_clock_put_clock(q->pclock);
+#endif /* CONFIG_POSIX_TIMERS */
 }
 
 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)