diff mbox series

[1/3] Bluetooth: add support for skb TX timestamping

Message ID d19187ab9842df2565d1d82beb171a8967d2ea94.1709409547.git.pav@iki.fi
State New
Headers show
Series Bluetooth: add TX timestamping for ISO and L2CAP | expand

Commit Message

Pauli Virtanen March 2, 2024, 8:07 p.m. UTC
Support enabling TX timestamping for some skbs, and track them until
packet completion.  Generate SCM_TSTAMP_SCHED when sending to driver,
and SCM_TSTAMP_SND at packet completion.

Make the default situation with no TX timestamping more efficient by
only counting packets in the queue when there is nothing to track.  When
there is something to track, we need to make clones, since the driver
may modify sent skbs.

Signed-off-by: Pauli Virtanen <pav@iki.fi>
---
 include/net/bluetooth/hci_core.h | 12 +++++
 net/bluetooth/hci_conn.c         | 78 ++++++++++++++++++++++++++++++++
 net/bluetooth/hci_core.c         |  5 ++
 net/bluetooth/hci_event.c        |  4 ++
 4 files changed, 99 insertions(+)

Comments

bluez.test.bot@gmail.com March 2, 2024, 8:33 p.m. UTC | #1
This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=831819

---Test result---

Test Summary:
CheckPatch                    PASS      2.84 seconds
GitLint                       PASS      0.55 seconds
SubjectPrefix                 PASS      0.18 seconds
BuildKernel                   PASS      27.56 seconds
CheckAllWarning               PASS      30.66 seconds
CheckSparse                   WARNING   35.36 seconds
CheckSmatch                   WARNING   97.77 seconds
BuildKernel32                 PASS      26.93 seconds
TestRunnerSetup               PASS      497.74 seconds
TestRunner_l2cap-tester       PASS      18.07 seconds
TestRunner_iso-tester         PASS      128.12 seconds
TestRunner_bnep-tester        PASS      4.73 seconds
TestRunner_mgmt-tester        FAIL      111.18 seconds
TestRunner_rfcomm-tester      PASS      7.27 seconds
TestRunner_sco-tester         PASS      14.86 seconds
TestRunner_ioctl-tester       PASS      7.67 seconds
TestRunner_mesh-tester        PASS      5.88 seconds
TestRunner_smp-tester         PASS      6.79 seconds
TestRunner_userchan-tester    PASS      4.91 seconds
IncrementalBuild              PASS      59.57 seconds

Details
##############################
Test: CheckSparse - WARNING
Desc: Run sparse tool with linux kernel
Output:
net/bluetooth/hci_event.c: note: in included file (through include/net/bluetooth/hci_core.h):
##############################
Test: CheckSmatch - WARNING
Desc: Run smatch tool with source
Output:
net/bluetooth/hci_event.c: note: in included file (through include/net/bluetooth/hci_core.h):
##############################
Test: TestRunner_mgmt-tester - FAIL
Desc: Run mgmt-tester with test-runner
Output:
Total: 492, Passed: 489 (99.4%), Failed: 1, Not Run: 2

Failed Test Cases
LL Privacy - Start Discovery 2 (Disable RL)          Failed       0.171 seconds


---
Regards,
Linux Bluetooth
Luiz Augusto von Dentz March 4, 2024, 2:36 p.m. UTC | #2
Hi Pauli,

On Sat, Mar 2, 2024 at 3:08 PM Pauli Virtanen <pav@iki.fi> wrote:
>
> Support enabling TX timestamping for some skbs, and track them until
> packet completion.  Generate SCM_TSTAMP_SCHED when sending to driver,
> and SCM_TSTAMP_SND at packet completion.
>
> Make the default situation with no TX timestamping more efficient by
> only counting packets in the queue when there is nothing to track.  When
> there is something to track, we need to make clones, since the driver
> may modify sent skbs.

Great work, really nice to see how this is shaping up.

> Signed-off-by: Pauli Virtanen <pav@iki.fi>
> ---
>  include/net/bluetooth/hci_core.h | 12 +++++
>  net/bluetooth/hci_conn.c         | 78 ++++++++++++++++++++++++++++++++
>  net/bluetooth/hci_core.c         |  5 ++
>  net/bluetooth/hci_event.c        |  4 ++
>  4 files changed, 99 insertions(+)
>
> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> index 56fb42df44a3..51b556612a6b 100644
> --- a/include/net/bluetooth/hci_core.h
> +++ b/include/net/bluetooth/hci_core.h
> @@ -267,6 +267,12 @@ struct adv_info {
>         struct delayed_work     rpa_expired_cb;
>  };
>
> +struct tx_comp_queue {
> +       struct sk_buff_head queue;
> +       unsigned int extra;
> +       unsigned int tracked;
> +};
> +
>  #define HCI_MAX_ADV_INSTANCES          5
>  #define HCI_DEFAULT_ADV_DURATION       2
>
> @@ -763,6 +769,8 @@ struct hci_conn {
>         struct sk_buff_head data_q;
>         struct list_head chan_list;
>
> +       struct tx_comp_queue tx_comp_queue;

I'd go with tx_q just to be short.

> +
>         struct delayed_work disc_work;
>         struct delayed_work auto_accept_work;
>         struct delayed_work idle_work;
> @@ -1546,6 +1554,10 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
>  void hci_conn_failed(struct hci_conn *conn, u8 status);
>  u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle);
>
> +void hci_conn_tx_comp_queue(struct hci_conn *conn, struct sk_buff *skb);
> +void hci_conn_tx_comp_dequeue(struct hci_conn *conn);

I'd drop the comp term here, so just hci_conn_tx_queue and
hci_conn_tx_dequeue since we want to do it no matter if the skb was
marked to be tracked or not.

> +void hci_tx_timestamp(struct sk_buff *skb, const struct sockcm_cookie *sockc);
> +
>  /*
>   * hci_conn_get() and hci_conn_put() are used to control the life-time of an
>   * "hci_conn" object. They do not guarantee that the hci_conn object is running,
> diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
> index 3ad74f76983b..f44d4b8fa0c6 100644
> --- a/net/bluetooth/hci_conn.c
> +++ b/net/bluetooth/hci_conn.c
> @@ -27,6 +27,7 @@
>
>  #include <linux/export.h>
>  #include <linux/debugfs.h>
> +#include <linux/errqueue.h>
>
>  #include <net/bluetooth/bluetooth.h>
>  #include <net/bluetooth/hci_core.h>
> @@ -973,6 +974,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
>         }
>
>         skb_queue_head_init(&conn->data_q);
> +       skb_queue_head_init(&conn->tx_comp_queue.queue);
>
>         INIT_LIST_HEAD(&conn->chan_list);
>         INIT_LIST_HEAD(&conn->link_list);
> @@ -1117,6 +1119,7 @@ void hci_conn_del(struct hci_conn *conn)
>         }
>
>         skb_queue_purge(&conn->data_q);
> +       skb_queue_purge(&conn->tx_comp_queue.queue);
>
>         /* Remove the connection from the list and cleanup its remaining
>          * state. This is a separate function since for some cases like
> @@ -2928,3 +2931,78 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
>
>         return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL);
>  }
> +
> +void hci_tx_timestamp(struct sk_buff *skb, const struct sockcm_cookie *sockc)
> +{
> +       /* This shall be called on a single skb of those generated by user
> +        * sendmsg(), and only when the sendmsg() does not return error to
> +        * user. This is required for keeping the tskey that increments here in
> +        * sync with possible sendmsg() counting by user.
> +        */
> +
> +       if (!skb || !sockc)
> +               return;
> +
> +       skb_setup_tx_timestamp(skb, sockc->tsflags);
> +}
> +
> +void hci_conn_tx_comp_queue(struct hci_conn *conn, struct sk_buff *skb)
> +{
> +       struct tx_comp_queue *comp = &conn->tx_comp_queue;
> +       bool track = false;
> +
> +       if (skb->sk) {
> +               if (skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)
> +                       __skb_tstamp_tx(skb, NULL, NULL, skb->sk,
> +                                       SCM_TSTAMP_SCHED);
> +
> +               if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
> +                       track = true;
> +       }
> +
> +       /* If nothing is tracked, just count extra skbs at the queue head */
> +       if (!track && !comp->tracked) {
> +               comp->extra++;
> +               return;
> +       }
> +
> +       if (track) {
> +               skb = skb_clone_sk(skb);
> +               if (!skb)
> +                       return;
> +
> +               comp->tracked++;
> +       } else {
> +               skb = skb_clone(skb, GFP_KERNEL);
> +               if (!skb)
> +                       return;
> +       }

Do we really need clones here? Can we just have references or that
doesn't work because the skb could be put in another queue by the
driver which would screw our own queuing?

> +       skb_queue_tail(&comp->queue, skb);
> +}
> +
> +void hci_conn_tx_comp_dequeue(struct hci_conn *conn)
> +{
> +       struct tx_comp_queue *comp = &conn->tx_comp_queue;
> +       struct sk_buff *skb;
> +
> +       /* If there are tracked skbs, the counted extra go before dequeuing real
> +        * skbs, to keep ordering. When nothing is tracked, the ordering doesn't
> +        * matter so dequeue real skbs first to get rid of them ASAP.
> +        */
> +       if (comp->extra && (comp->tracked || skb_queue_empty(&comp->queue))) {
> +               comp->extra--;
> +               return;
> +       }
> +
> +       skb = skb_dequeue(&comp->queue);
> +       if (!skb)
> +               return;
> +
> +       if (skb->sk) {
> +               comp->tracked--;
> +               skb_tstamp_tx(skb, NULL);
> +       }
> +
> +       kfree_skb(skb);
> +}
> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> index df3aa41e376d..f4af6e99d798 100644
> --- a/net/bluetooth/hci_core.c
> +++ b/net/bluetooth/hci_core.c
> @@ -3715,6 +3715,8 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev)
>                         hci_conn_enter_active_mode(chan->conn,
>                                                    bt_cb(skb)->force_active);
>
> +                       hci_conn_tx_comp_queue(chan->conn, skb);
> +
>                         hci_send_frame(hdev, skb);
>                         hdev->acl_last_tx = jiffies;
>
> @@ -3876,6 +3878,9 @@ static void hci_sched_iso(struct hci_dev *hdev)
>         while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, &quote))) {
>                 while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
>                         BT_DBG("skb %p len %d", skb, skb->len);
> +
> +                       hci_conn_tx_comp_queue(conn, skb);
> +
>                         hci_send_frame(hdev, skb);
>
>                         conn->sent++;

I'd assume we should be doing the same for SCO_LINK, or that was
intentionally left out? Perhaps it would be better to have something
like hci_conn_send_frame as a helper function that does take care of
updating whatever needs to be updated before calling hci_send_frame.

> diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
> index bffd2c7ff608..f56211d8ff7a 100644
> --- a/net/bluetooth/hci_event.c
> +++ b/net/bluetooth/hci_event.c
> @@ -4438,6 +4438,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
>                 struct hci_comp_pkts_info *info = &ev->handles[i];
>                 struct hci_conn *conn;
>                 __u16  handle, count;
> +               unsigned int i;
>
>                 handle = __le16_to_cpu(info->handle);
>                 count  = __le16_to_cpu(info->count);
> @@ -4448,6 +4449,9 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
>
>                 conn->sent -= count;
>
> +               for (i = 0; i < count; ++i)
> +                       hci_conn_tx_comp_dequeue(conn);
> +
>                 switch (conn->type) {
>                 case ACL_LINK:
>                         hdev->acl_cnt += count;
> --
> 2.44.0

Btw, one thing that might be great to have is the timestamp
information also forward to the monitor with use of SCM, btmon already
does track the NOCP event and print the amount of time it took but
that is probably no as precise as doing this in kernel, besides it
doesn't
Pauli Virtanen March 4, 2024, 11:38 p.m. UTC | #3
Hi Luiz,

ma, 2024-03-04 kello 09:36 -0500, Luiz Augusto von Dentz kirjoitti:
> Hi Pauli,
> 
> On Sat, Mar 2, 2024 at 3:08 PM Pauli Virtanen <pav@iki.fi> wrote:
> > 
> > Support enabling TX timestamping for some skbs, and track them until
> > packet completion.  Generate SCM_TSTAMP_SCHED when sending to driver,
> > and SCM_TSTAMP_SND at packet completion.
> > 
> > Make the default situation with no TX timestamping more efficient by
> > only counting packets in the queue when there is nothing to track.  When
> > there is something to track, we need to make clones, since the driver
> > may modify sent skbs.
> 
> Great work, really nice to see how this is shaping up.
> 
> > Signed-off-by: Pauli Virtanen <pav@iki.fi>
> > ---
> >  include/net/bluetooth/hci_core.h | 12 +++++
> >  net/bluetooth/hci_conn.c         | 78 ++++++++++++++++++++++++++++++++
> >  net/bluetooth/hci_core.c         |  5 ++
> >  net/bluetooth/hci_event.c        |  4 ++
> >  4 files changed, 99 insertions(+)
> > 
> > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> > index 56fb42df44a3..51b556612a6b 100644
> > --- a/include/net/bluetooth/hci_core.h
> > +++ b/include/net/bluetooth/hci_core.h
> > @@ -267,6 +267,12 @@ struct adv_info {
> >         struct delayed_work     rpa_expired_cb;
> >  };
> > 
> > +struct tx_comp_queue {
> > +       struct sk_buff_head queue;
> > +       unsigned int extra;
> > +       unsigned int tracked;
> > +};
> > +
> >  #define HCI_MAX_ADV_INSTANCES          5
> >  #define HCI_DEFAULT_ADV_DURATION       2
> > 
> > @@ -763,6 +769,8 @@ struct hci_conn {
> >         struct sk_buff_head data_q;
> >         struct list_head chan_list;
> > 
> > +       struct tx_comp_queue tx_comp_queue;
> 
> I'd go with tx_q just to be short.

Ack.

> 
> > +
> >         struct delayed_work disc_work;
> >         struct delayed_work auto_accept_work;
> >         struct delayed_work idle_work;
> > @@ -1546,6 +1554,10 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
> >  void hci_conn_failed(struct hci_conn *conn, u8 status);
> >  u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle);
> > 
> > +void hci_conn_tx_comp_queue(struct hci_conn *conn, struct sk_buff *skb);
> > +void hci_conn_tx_comp_dequeue(struct hci_conn *conn);
> 
> I'd drop the comp term here, so just hci_conn_tx_queue and
> hci_conn_tx_dequeue since we want to do it no matter if the skb was
> marked to be tracked or not.

Ack.

> 
> > +void hci_tx_timestamp(struct sk_buff *skb, const struct sockcm_cookie *sockc);
> > +
> >  /*
> >   * hci_conn_get() and hci_conn_put() are used to control the life-time of an
> >   * "hci_conn" object. They do not guarantee that the hci_conn object is running,
> > diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
> > index 3ad74f76983b..f44d4b8fa0c6 100644
> > --- a/net/bluetooth/hci_conn.c
> > +++ b/net/bluetooth/hci_conn.c
> > @@ -27,6 +27,7 @@
> > 
> >  #include <linux/export.h>
> >  #include <linux/debugfs.h>
> > +#include <linux/errqueue.h>
> > 
> >  #include <net/bluetooth/bluetooth.h>
> >  #include <net/bluetooth/hci_core.h>
> > @@ -973,6 +974,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
> >         }
> > 
> >         skb_queue_head_init(&conn->data_q);
> > +       skb_queue_head_init(&conn->tx_comp_queue.queue);
> > 
> >         INIT_LIST_HEAD(&conn->chan_list);
> >         INIT_LIST_HEAD(&conn->link_list);
> > @@ -1117,6 +1119,7 @@ void hci_conn_del(struct hci_conn *conn)
> >         }
> > 
> >         skb_queue_purge(&conn->data_q);
> > +       skb_queue_purge(&conn->tx_comp_queue.queue);
> > 
> >         /* Remove the connection from the list and cleanup its remaining
> >          * state. This is a separate function since for some cases like
> > @@ -2928,3 +2931,78 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
> > 
> >         return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL);
> >  }
> > +
> > +void hci_tx_timestamp(struct sk_buff *skb, const struct sockcm_cookie *sockc)
> > +{
> > +       /* This shall be called on a single skb of those generated by user
> > +        * sendmsg(), and only when the sendmsg() does not return error to
> > +        * user. This is required for keeping the tskey that increments here in
> > +        * sync with possible sendmsg() counting by user.
> > +        */
> > +
> > +       if (!skb || !sockc)
> > +               return;
> > +
> > +       skb_setup_tx_timestamp(skb, sockc->tsflags);
> > +}
> > +
> > +void hci_conn_tx_comp_queue(struct hci_conn *conn, struct sk_buff *skb)
> > +{
> > +       struct tx_comp_queue *comp = &conn->tx_comp_queue;
> > +       bool track = false;
> > +
> > +       if (skb->sk) {
> > +               if (skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)
> > +                       __skb_tstamp_tx(skb, NULL, NULL, skb->sk,
> > +                                       SCM_TSTAMP_SCHED);
> > +
> > +               if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
> > +                       track = true;
> > +       }
> > +
> > +       /* If nothing is tracked, just count extra skbs at the queue head */
> > +       if (!track && !comp->tracked) {
> > +               comp->extra++;
> > +               return;
> > +       }
> > +
> > +       if (track) {
> > +               skb = skb_clone_sk(skb);
> > +               if (!skb)
> > +                       return;
> > +
> > +               comp->tracked++;
> > +       } else {
> > +               skb = skb_clone(skb, GFP_KERNEL);
> > +               if (!skb)
> > +                       return;
> > +       }
> 
> Do we really need clones here? Can we just have references or that
> doesn't work because the skb could be put in another queue by the
> driver which would screw our own queuing?

Some drivers like in btmtkuart_send_frame are putting sent skbs to
queues and modifying the contents, so at least different queue
structure from skb_queue_* would be needed.

I'm guessing the assumption in drivers generally is they now own the
skb and can do what they want with them, so clones sound safer here if
there are no rules we can assume.

> 
> > +       skb_queue_tail(&comp->queue, skb);
> > +}
> > +
> > +void hci_conn_tx_comp_dequeue(struct hci_conn *conn)
> > +{
> > +       struct tx_comp_queue *comp = &conn->tx_comp_queue;
> > +       struct sk_buff *skb;
> > +
> > +       /* If there are tracked skbs, the counted extra go before dequeuing real
> > +        * skbs, to keep ordering. When nothing is tracked, the ordering doesn't
> > +        * matter so dequeue real skbs first to get rid of them ASAP.
> > +        */
> > +       if (comp->extra && (comp->tracked || skb_queue_empty(&comp->queue))) {
> > +               comp->extra--;
> > +               return;
> > +       }
> > +
> > +       skb = skb_dequeue(&comp->queue);
> > +       if (!skb)
> > +               return;
> > +
> > +       if (skb->sk) {
> > +               comp->tracked--;
> > +               skb_tstamp_tx(skb, NULL);
> > +       }
> > +
> > +       kfree_skb(skb);
> > +}
> > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> > index df3aa41e376d..f4af6e99d798 100644
> > --- a/net/bluetooth/hci_core.c
> > +++ b/net/bluetooth/hci_core.c
> > @@ -3715,6 +3715,8 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev)
> >                         hci_conn_enter_active_mode(chan->conn,
> >                                                    bt_cb(skb)->force_active);
> > 
> > +                       hci_conn_tx_comp_queue(chan->conn, skb);
> > +
> >                         hci_send_frame(hdev, skb);
> >                         hdev->acl_last_tx = jiffies;
> > 
> > @@ -3876,6 +3878,9 @@ static void hci_sched_iso(struct hci_dev *hdev)
> >         while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, &quote))) {
> >                 while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
> >                         BT_DBG("skb %p len %d", skb, skb->len);
> > +
> > +                       hci_conn_tx_comp_queue(conn, skb);
> > +
> >                         hci_send_frame(hdev, skb);
> > 
> >                         conn->sent++;
> 
> I'd assume we should be doing the same for SCO_LINK, or that was
> intentionally left out? Perhaps it would be better to have something
> like hci_conn_send_frame as a helper function that does take care of
> updating whatever needs to be updated before calling hci_send_frame.

It looks like kernel is not currently enabling Synchronous Flow Control
(cf. Core 5.4 Vol 4, Part E page 1817), so there are no NoCP events for
SCO packets appearing, so the tx queue can't work.

Indeed, there's no flow control in hci_core.c:hci_sched_sco/esco,
sco_cnt is not decremented.

We could still send SCHED timestamps, but they probably wouldn't be
very useful for user if no queue can form in the socket.

Trying to enable TX timestamping if hdev doesn't support required flow
control mode maybe should fail with EOPNOTSUPP or something similar, so
user then knows no SND timestamps are going to be issued.

> > diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
> > index bffd2c7ff608..f56211d8ff7a 100644
> > --- a/net/bluetooth/hci_event.c
> > +++ b/net/bluetooth/hci_event.c
> > @@ -4438,6 +4438,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
> >                 struct hci_comp_pkts_info *info = &ev->handles[i];
> >                 struct hci_conn *conn;
> >                 __u16  handle, count;
> > +               unsigned int i;
> > 
> >                 handle = __le16_to_cpu(info->handle);
> >                 count  = __le16_to_cpu(info->count);
> > @@ -4448,6 +4449,9 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
> > 
> >                 conn->sent -= count;
> > 
> > +               for (i = 0; i < count; ++i)
> > +                       hci_conn_tx_comp_dequeue(conn);
> > +
> >                 switch (conn->type) {
> >                 case ACL_LINK:
> >                         hdev->acl_cnt += count;
> > --
> > 2.44.0
> 
> Btw, one thing that might be great to have is the timestamp
> information also forward to the monitor with use of SCM, btmon already
> does track the NOCP event and print the amount of time it took but
> that is probably no as precise as doing this in kernel, besides it
> doesn't

It's not clear to me how the SND tstamp should be signaled in the
monitor socket. For errqueue, similarly the other net TX tstamping, it
is sending one packet for each timestamp if there are multiple, along
with original data unless TSONLY was set, but not sure that's fine for
the monitor.
diff mbox series

Patch

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 56fb42df44a3..51b556612a6b 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -267,6 +267,12 @@  struct adv_info {
 	struct delayed_work	rpa_expired_cb;
 };
 
+struct tx_comp_queue {
+	struct sk_buff_head queue;
+	unsigned int extra;
+	unsigned int tracked;
+};
+
 #define HCI_MAX_ADV_INSTANCES		5
 #define HCI_DEFAULT_ADV_DURATION	2
 
@@ -763,6 +769,8 @@  struct hci_conn {
 	struct sk_buff_head data_q;
 	struct list_head chan_list;
 
+	struct tx_comp_queue tx_comp_queue;
+
 	struct delayed_work disc_work;
 	struct delayed_work auto_accept_work;
 	struct delayed_work idle_work;
@@ -1546,6 +1554,10 @@  void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
 void hci_conn_failed(struct hci_conn *conn, u8 status);
 u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle);
 
+void hci_conn_tx_comp_queue(struct hci_conn *conn, struct sk_buff *skb);
+void hci_conn_tx_comp_dequeue(struct hci_conn *conn);
+void hci_tx_timestamp(struct sk_buff *skb, const struct sockcm_cookie *sockc);
+
 /*
  * hci_conn_get() and hci_conn_put() are used to control the life-time of an
  * "hci_conn" object. They do not guarantee that the hci_conn object is running,
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 3ad74f76983b..f44d4b8fa0c6 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -27,6 +27,7 @@ 
 
 #include <linux/export.h>
 #include <linux/debugfs.h>
+#include <linux/errqueue.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -973,6 +974,7 @@  struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
 	}
 
 	skb_queue_head_init(&conn->data_q);
+	skb_queue_head_init(&conn->tx_comp_queue.queue);
 
 	INIT_LIST_HEAD(&conn->chan_list);
 	INIT_LIST_HEAD(&conn->link_list);
@@ -1117,6 +1119,7 @@  void hci_conn_del(struct hci_conn *conn)
 	}
 
 	skb_queue_purge(&conn->data_q);
+	skb_queue_purge(&conn->tx_comp_queue.queue);
 
 	/* Remove the connection from the list and cleanup its remaining
 	 * state. This is a separate function since for some cases like
@@ -2928,3 +2931,78 @@  int hci_abort_conn(struct hci_conn *conn, u8 reason)
 
 	return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL);
 }
+
+void hci_tx_timestamp(struct sk_buff *skb, const struct sockcm_cookie *sockc)
+{
+	/* This shall be called on a single skb of those generated by user
+	 * sendmsg(), and only when the sendmsg() does not return error to
+	 * user. This is required for keeping the tskey that increments here in
+	 * sync with possible sendmsg() counting by user.
+	 */
+
+	if (!skb || !sockc)
+		return;
+
+	skb_setup_tx_timestamp(skb, sockc->tsflags);
+}
+
+void hci_conn_tx_comp_queue(struct hci_conn *conn, struct sk_buff *skb)
+{
+	struct tx_comp_queue *comp = &conn->tx_comp_queue;
+	bool track = false;
+
+	if (skb->sk) {
+		if (skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)
+			__skb_tstamp_tx(skb, NULL, NULL, skb->sk,
+					SCM_TSTAMP_SCHED);
+
+		if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
+			track = true;
+	}
+
+	/* If nothing is tracked, just count extra skbs at the queue head */
+	if (!track && !comp->tracked) {
+		comp->extra++;
+		return;
+	}
+
+	if (track) {
+		skb = skb_clone_sk(skb);
+		if (!skb)
+			return;
+
+		comp->tracked++;
+	} else {
+		skb = skb_clone(skb, GFP_KERNEL);
+		if (!skb)
+			return;
+	}
+
+	skb_queue_tail(&comp->queue, skb);
+}
+
+void hci_conn_tx_comp_dequeue(struct hci_conn *conn)
+{
+	struct tx_comp_queue *comp = &conn->tx_comp_queue;
+	struct sk_buff *skb;
+
+	/* If there are tracked skbs, the counted extra go before dequeuing real
+	 * skbs, to keep ordering. When nothing is tracked, the ordering doesn't
+	 * matter so dequeue real skbs first to get rid of them ASAP.
+	 */
+	if (comp->extra && (comp->tracked || skb_queue_empty(&comp->queue))) {
+		comp->extra--;
+		return;
+	}
+
+	skb = skb_dequeue(&comp->queue);
+	if (!skb)
+		return;
+
+	if (skb->sk) {
+		comp->tracked--;
+		skb_tstamp_tx(skb, NULL);
+	}
+
+	kfree_skb(skb);
+}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index df3aa41e376d..f4af6e99d798 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3715,6 +3715,8 @@  static void hci_sched_acl_pkt(struct hci_dev *hdev)
 			hci_conn_enter_active_mode(chan->conn,
 						   bt_cb(skb)->force_active);
 
+			hci_conn_tx_comp_queue(chan->conn, skb);
+
 			hci_send_frame(hdev, skb);
 			hdev->acl_last_tx = jiffies;
 
@@ -3876,6 +3878,9 @@  static void hci_sched_iso(struct hci_dev *hdev)
 	while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, &quote))) {
 		while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
 			BT_DBG("skb %p len %d", skb, skb->len);
+
+			hci_conn_tx_comp_queue(conn, skb);
+
 			hci_send_frame(hdev, skb);
 
 			conn->sent++;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index bffd2c7ff608..f56211d8ff7a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4438,6 +4438,7 @@  static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
 		struct hci_comp_pkts_info *info = &ev->handles[i];
 		struct hci_conn *conn;
 		__u16  handle, count;
+		unsigned int i;
 
 		handle = __le16_to_cpu(info->handle);
 		count  = __le16_to_cpu(info->count);
@@ -4448,6 +4449,9 @@  static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
 
 		conn->sent -= count;
 
+		for (i = 0; i < count; ++i)
+			hci_conn_tx_comp_dequeue(conn);
+
 		switch (conn->type) {
 		case ACL_LINK:
 			hdev->acl_cnt += count;