usb: gadget: f_uac2: modulate playback data rate

Message ID 1409292819-2714-1-git-send-email-jaswinder.singh@linaro.org
State New
Headers show

Commit Message

Jassi Brar Aug. 29, 2014, 6:13 a.m.
The UAC2 function driver currently responds to all packets at all times
with wMaxPacketSize packets. That results in way too fast audio
playback as the function driver (which is in fact supposed to define
the audio stream pace) delivers as fast as it can.

We need data rate to match, as accurately as possible, the sampling rate
expressed by the UAC2 topology to the Host.

We do this by sending packets of varying length (1 sample more than usual)
in a pattern so that we get the desired data rate over a period of a
second or sooner. The payload pattern is calculated only once (using
"Alan's Algo"), when the Host enables the interface, and saved in an
array so that the 2 ping-pong usb_requests directly index into the
pattern array to figure out the payload length they are supposed to
transfer next. Note that the increased overhead in agdev_iso_complete()
is almost zero.

Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/usb/gadget/function/f_uac2.c | 70 ++++++++++++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 3 deletions(-)

Comments

Jassi Brar Aug. 29, 2014, 6:22 a.m. | #1
On Fri, Aug 29, 2014 at 11:43 AM, Jassi Brar <jaswinder.singh@linaro.org> wrote:
> The UAC2 function driver currently responds to all packets at all times
> with wMaxPacketSize packets. That results in way too fast audio
> playback as the function driver (which is in fact supposed to define
> the audio stream pace) delivers as fast as it can.
>
> We need data rate to match, as accurately as possible, the sampling rate
> expressed by the UAC2 topology to the Host.
>
> We do this by sending packets of varying length (1 sample more than usual)
> in a pattern so that we get the desired data rate over a period of a
> second or sooner. The payload pattern is calculated only once (using
> "Alan's Algo"), when the Host enables the interface, and saved in an
> array so that the 2 ping-pong usb_requests directly index into the
> pattern array to figure out the payload length they are supposed to
> transfer next. Note that the increased overhead in agdev_iso_complete()
> is almost zero.
>
> Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
> ---
>  drivers/usb/gadget/function/f_uac2.c | 70 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 67 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
> index 246a778..84fd3b0 100644
> --- a/drivers/usb/gadget/function/f_uac2.c
> +++ b/drivers/usb/gadget/function/f_uac2.c
> @@ -59,8 +59,15 @@ const char *uac2_name = "snd_uac2";
>  struct uac2_req {
>         struct uac2_rtd_params *pp; /* parent param */
>         struct usb_request *req;
> +       unsigned idx; /* current element of length-pattern loop */
>  };
>
> +/*
> + * 5512.5Hz is going to need the maximum number of elements (80),
> + * in the length-pattern loop, among standard ALSA supported rates.
> + */
> +#define MAX_LOOP_LEN   80
> +
>  struct uac2_rtd_params {
>         struct snd_uac2_chip *uac2; /* parent chip */
>         bool ep_enabled; /* if the ep is enabled */
> @@ -80,6 +87,9 @@ struct uac2_rtd_params {
>         unsigned max_psize;
>         struct uac2_req ureq[USB_XFERS];
>
> +       unsigned pattern[MAX_LOOP_LEN];
> +       unsigned plen; /* valid entries in pattern[] */
> +
>         spinlock_t lock;
>  };
>
> @@ -191,8 +201,12 @@ agdev_iso_complete(struct usb_ep *ep, struct usb_request *req)
>
>         spin_lock_irqsave(&prm->lock, flags);
>
> -       if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
> +       if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
> +               /* Update length for next payload */
> +               ur->idx = (ur->idx + USB_XFERS) % prm->plen;
> +               req->length = prm->pattern[ur->idx];
>                 req->actual = req->length;
> +       }
>
>         pending = prm->hw_ptr % prm->period_size;
>         pending += req->actual;
> @@ -1066,6 +1080,31 @@ err:
>         return -EINVAL;
>  }
>
> +/*
> + * Find optimal pattern of payloads for a given number
> + * of samples and maximum sync period (in ms) over which
> + * we have to distribute them uniformly.
> + */
> +static unsigned
> +get_pattern(unsigned samples, unsigned sync, unsigned *pt)
> +{
> +       unsigned n, x = 0, i = 0, p = samples % sync;
> +
> +       do {
> +               x += p;
> +               n = samples / sync;
> +               if (x >= sync) {
> +                       n += 1;
> +                       x -= sync;
> +               }
> +               if (pt)
> +                       pt[i] = n;
> +               i++;
> +       } while (x);
> +
> +       return i;
> +}
> +
>  static int
>  afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt)
>  {
> @@ -1097,11 +1136,35 @@ afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt)
>         if (intf == agdev->as_out_intf) {
>                 ep = agdev->out_ep;
>                 prm = &uac2->c_prm;
> +               prm->plen = 1;
> +               prm->pattern[0] = prm->max_psize;
>                 config_ep_by_speed(gadget, fn, ep);
>                 agdev->as_out_alt = alt;
>         } else if (intf == agdev->as_in_intf) {
> +               struct f_uac2_opts *opts = agdev_to_uac2_opts(agdev);
> +               unsigned intvl, rate;
> +
> +               if (gadget->speed == USB_SPEED_FULL)
> +                       intvl = (1 << (fs_epin_desc.bInterval - 1)) * 1000;
> +               else
> +                       intvl = (1 << (hs_epin_desc.bInterval - 1)) * 125;
> +
> +               rate = opts->p_srate;
> +               if (rate == 5512) { /* which implies 5512.5 practically */
> +                       rate = 55125;
> +                       intvl *= 10;
> +               }
> +
>                 ep = agdev->in_ep;
>                 prm = &uac2->p_prm;
> +               prm->plen = get_pattern(rate, intvl, NULL); /* dry run */
> +               /* We try to support arbitrary rates too */
> +               if (prm->plen > MAX_LOOP_LEN) {
> +                       prm->plen = 1;
> +                       prm->pattern[0] = rate / intvl;
> +               } else {
> +                       prm->plen = get_pattern(rate, intvl, prm->pattern);
>
   We need to multiply every element with framesize as well. Sorry I
missed. Other than that I think its all ready (just found my BB
doesn't boot anymore after ~2yrs of gathering dust).

-Jassi
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Mack Aug. 29, 2014, 7:22 a.m. | #2
Hi,

On 08/29/2014 08:13 AM, Jassi Brar wrote:
> +/*
> + * 5512.5Hz is going to need the maximum number of elements (80),
> + * in the length-pattern loop, among standard ALSA supported rates.
> + */
> +#define MAX_LOOP_LEN	80
> +
>  struct uac2_rtd_params {
>  	struct snd_uac2_chip *uac2; /* parent chip */
>  	bool ep_enabled; /* if the ep is enabled */
> @@ -80,6 +87,9 @@ struct uac2_rtd_params {
>  	unsigned max_psize;
>  	struct uac2_req ureq[USB_XFERS];
>  
> +	unsigned pattern[MAX_LOOP_LEN];
> +	unsigned plen; /* valid entries in pattern[] */
> +
>  	spinlock_t lock;
>  };
>  

You're doing this for both directions, while only the capture side needs
such treatment.

> +/*
> + * Find optimal pattern of payloads for a given number
> + * of samples and maximum sync period (in ms) over which
> + * we have to distribute them uniformly.
> + */
> +static unsigned
> +get_pattern(unsigned samples, unsigned sync, unsigned *pt)
> +{
> +	unsigned n, x = 0, i = 0, p = samples % sync;
> +
> +	do {
> +		x += p;
> +		n = samples / sync;
> +		if (x >= sync) {
> +			n += 1;
> +			x -= sync;
> +		}
> +		if (pt)
> +			pt[i] = n;
> +		i++;
> +	} while (x);
> +
> +	return i;
> +}
>
>  static int
>  afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt)
>  {
> @@ -1097,11 +1136,35 @@ afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt)
>  	if (intf == agdev->as_out_intf) {
>  		ep = agdev->out_ep;
>  		prm = &uac2->c_prm;
> +		prm->plen = 1;
> +		prm->pattern[0] = prm->max_psize;
>  		config_ep_by_speed(gadget, fn, ep);
>  		agdev->as_out_alt = alt;
>  	} else if (intf == agdev->as_in_intf) {
> +		struct f_uac2_opts *opts = agdev_to_uac2_opts(agdev);
> +		unsigned intvl, rate;
> +
> +		if (gadget->speed == USB_SPEED_FULL)
> +			intvl = (1 << (fs_epin_desc.bInterval - 1)) * 1000;
> +		else
> +			intvl = (1 << (hs_epin_desc.bInterval - 1)) * 125;
> +
> +		rate = opts->p_srate;
> +		if (rate == 5512) { /* which implies 5512.5 practically */
> +			rate = 55125;
> +			intvl *= 10;
> +		}
> +

Well, I'd say that Alan's approach as implemented in my v6 is still more
comprehensible for anyone who'll try to understand this later, and it
doesn't make any assumption on run time values. Plus, it only adds 5
unsigned ints to struct snd_uac2_chip, while your version blows up
struct uac2_rtd_params by 81 ints.

But it's also a matter of taste, so I guess it's ultimately up to Felipe
now.


Thanks,
Daniel
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jassi Brar Aug. 29, 2014, 7:51 a.m. | #3
Hi Daniel,

On 29 August 2014 12:52, Daniel Mack <daniel@zonque.org> wrote:
> Hi,
>
> On 08/29/2014 08:13 AM, Jassi Brar wrote:
>> +/*
>> + * 5512.5Hz is going to need the maximum number of elements (80),
>> + * in the length-pattern loop, among standard ALSA supported rates.
>> + */
>> +#define MAX_LOOP_LEN 80
>> +
>>  struct uac2_rtd_params {
>>       struct snd_uac2_chip *uac2; /* parent chip */
>>       bool ep_enabled; /* if the ep is enabled */
>> @@ -80,6 +87,9 @@ struct uac2_rtd_params {
>>       unsigned max_psize;
>>       struct uac2_req ureq[USB_XFERS];
>>
>> +     unsigned pattern[MAX_LOOP_LEN];
>> +     unsigned plen; /* valid entries in pattern[] */
>> +
>>       spinlock_t lock;
>>  };
>>
>
> You're doing this for both directions, while only the capture side needs
> such treatment.
>
Playback specific parameters placed in a common structure is a
tradeoff, I chose to avoid. Please also note, this makes code a bit
more symmetric in set_alt().
But I can move this to common structure if it pleases Gods.

>> +/*
>> + * Find optimal pattern of payloads for a given number
>> + * of samples and maximum sync period (in ms) over which
>> + * we have to distribute them uniformly.
>> + */
>> +static unsigned
>> +get_pattern(unsigned samples, unsigned sync, unsigned *pt)
>> +{
>> +     unsigned n, x = 0, i = 0, p = samples % sync;
>> +
>> +     do {
>> +             x += p;
>> +             n = samples / sync;
>> +             if (x >= sync) {
>> +                     n += 1;
>> +                     x -= sync;
>> +             }
>> +             if (pt)
>> +                     pt[i] = n;
>> +             i++;
>> +     } while (x);
>> +
>> +     return i;
>> +}
>>
>>  static int
>>  afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt)
>>  {
>> @@ -1097,11 +1136,35 @@ afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt)
>>       if (intf == agdev->as_out_intf) {
>>               ep = agdev->out_ep;
>>               prm = &uac2->c_prm;
>> +             prm->plen = 1;
>> +             prm->pattern[0] = prm->max_psize;
>>               config_ep_by_speed(gadget, fn, ep);
>>               agdev->as_out_alt = alt;
>>       } else if (intf == agdev->as_in_intf) {
>> +             struct f_uac2_opts *opts = agdev_to_uac2_opts(agdev);
>> +             unsigned intvl, rate;
>> +
>> +             if (gadget->speed == USB_SPEED_FULL)
>> +                     intvl = (1 << (fs_epin_desc.bInterval - 1)) * 1000;
>> +             else
>> +                     intvl = (1 << (hs_epin_desc.bInterval - 1)) * 125;
>> +
>> +             rate = opts->p_srate;
>> +             if (rate == 5512) { /* which implies 5512.5 practically */
>> +                     rate = 55125;
>> +                     intvl *= 10;
>> +             }
>> +
>
> Well, I'd say that Alan's approach as implemented in my v6 is still more
> comprehensible for anyone who'll try to understand this later, and it
> doesn't make any assumption on run time values.
>
I hope you realize this 5512 check has nothing to do with the Alan's
approach .... I can very well remove the check and it will still work!
 It is only an added feature to provide accurate data rate because
ALSA API can not define fractional rates.

> Plus, it only adds 5
> unsigned ints to struct snd_uac2_chip, while your version blows up
> struct uac2_rtd_params by 81 ints.
>
Sorry, that is bad comparison.

Real comparison is between having

+       unsigned int p_interval;
+       unsigned int p_residue;
+       unsigned int p_pktsize;
+       unsigned int p_pktsize_residue;
+       unsigned int p_framesize;
     vs
+       unsigned idx;
+       unsigned pattern[MAX_LOOP_LEN];
+       unsigned plen;

That is, managing 5 vs 3 extra variables in code.
 Please also look at the 'hotspot' iso_complete() function that
remains almost unchanged in my patch (as I have been suggesting you
all along).


> But it's also a matter of taste, so I guess it's ultimately up to Felipe
> now.
>
As a co-author of this driver, may I say its more than taste.
This implementation is cleaner (see your patch and this patch in
vertically split vim) and faster (see almost zero overhead in
iso_complete()).

Thanks
-Jassi
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jassi Brar Aug. 30, 2014, 4:52 a.m. | #4
On Fri, Aug 29, 2014 at 1:21 PM, Jassi Brar <jaswinder.singh@linaro.org> wrote:

>>> +
>>> +             rate = opts->p_srate;
>>> +             if (rate == 5512) { /* which implies 5512.5 practically */
>>> +                     rate = 55125;
>>> +                     intvl *= 10;
>>> +             }
>> Well, I'd say that Alan's approach as implemented in my v6 is still more
>> comprehensible for anyone who'll try to understand this later, and it
>> doesn't make any assumption on run time values.
>>
> I hope you realize this 5512 check has nothing to do with the Alan's
> approach .... I can very well remove the check and it will still work!
>  It is only an added feature to provide accurate data rate because
> ALSA API can not define fractional rates.
>
When ALSA says 5512Hz it is actually played at 5512.5Hz by a real codec.
So I think *any* f_uac2 patch, for the rate fix, should do the above
5512->5512.5 conversion.

-Jassi
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 246a778..84fd3b0 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -59,8 +59,15 @@  const char *uac2_name = "snd_uac2";
 struct uac2_req {
 	struct uac2_rtd_params *pp; /* parent param */
 	struct usb_request *req;
+	unsigned idx; /* current element of length-pattern loop */
 };
 
+/*
+ * 5512.5Hz is going to need the maximum number of elements (80),
+ * in the length-pattern loop, among standard ALSA supported rates.
+ */
+#define MAX_LOOP_LEN	80
+
 struct uac2_rtd_params {
 	struct snd_uac2_chip *uac2; /* parent chip */
 	bool ep_enabled; /* if the ep is enabled */
@@ -80,6 +87,9 @@  struct uac2_rtd_params {
 	unsigned max_psize;
 	struct uac2_req ureq[USB_XFERS];
 
+	unsigned pattern[MAX_LOOP_LEN];
+	unsigned plen; /* valid entries in pattern[] */
+
 	spinlock_t lock;
 };
 
@@ -191,8 +201,12 @@  agdev_iso_complete(struct usb_ep *ep, struct usb_request *req)
 
 	spin_lock_irqsave(&prm->lock, flags);
 
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		/* Update length for next payload */
+		ur->idx = (ur->idx + USB_XFERS) % prm->plen;
+		req->length = prm->pattern[ur->idx];
 		req->actual = req->length;
+	}
 
 	pending = prm->hw_ptr % prm->period_size;
 	pending += req->actual;
@@ -1066,6 +1080,31 @@  err:
 	return -EINVAL;
 }
 
+/*
+ * Find optimal pattern of payloads for a given number
+ * of samples and maximum sync period (in ms) over which
+ * we have to distribute them uniformly.
+ */
+static unsigned
+get_pattern(unsigned samples, unsigned sync, unsigned *pt)
+{
+	unsigned n, x = 0, i = 0, p = samples % sync;
+
+	do {
+		x += p;
+		n = samples / sync;
+		if (x >= sync) {
+			n += 1;
+			x -= sync;
+		}
+		if (pt)
+			pt[i] = n;
+		i++;
+	} while (x);
+
+	return i;
+}
+
 static int
 afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt)
 {
@@ -1097,11 +1136,35 @@  afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt)
 	if (intf == agdev->as_out_intf) {
 		ep = agdev->out_ep;
 		prm = &uac2->c_prm;
+		prm->plen = 1;
+		prm->pattern[0] = prm->max_psize;
 		config_ep_by_speed(gadget, fn, ep);
 		agdev->as_out_alt = alt;
 	} else if (intf == agdev->as_in_intf) {
+		struct f_uac2_opts *opts = agdev_to_uac2_opts(agdev);
+		unsigned intvl, rate;
+
+		if (gadget->speed == USB_SPEED_FULL)
+			intvl = (1 << (fs_epin_desc.bInterval - 1)) * 1000;
+		else
+			intvl = (1 << (hs_epin_desc.bInterval - 1)) * 125;
+
+		rate = opts->p_srate;
+		if (rate == 5512) { /* which implies 5512.5 practically */
+			rate = 55125;
+			intvl *= 10;
+		}
+
 		ep = agdev->in_ep;
 		prm = &uac2->p_prm;
+		prm->plen = get_pattern(rate, intvl, NULL); /* dry run */
+		/* We try to support arbitrary rates too */
+		if (prm->plen > MAX_LOOP_LEN) {
+			prm->plen = 1;
+			prm->pattern[0] = rate / intvl;
+		} else {
+			prm->plen = get_pattern(rate, intvl, prm->pattern);
+		}
 		config_ep_by_speed(gadget, fn, ep);
 		agdev->as_in_alt = alt;
 	} else {
@@ -1125,12 +1188,13 @@  afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt)
 
 			prm->ureq[i].req = req;
 			prm->ureq[i].pp = prm;
+			prm->ureq[i].idx = i % prm->plen;
 
 			req->zero = 0;
 			req->context = &prm->ureq[i];
-			req->length = prm->max_psize;
+			req->length = prm->pattern[prm->ureq[i].idx];
 			req->complete = agdev_iso_complete;
-			req->buf = prm->rbuf + i * req->length;
+			req->buf = prm->rbuf + i * prm->max_psize;
 		}
 
 		if (usb_ep_queue(ep, prm->ureq[i].req, GFP_ATOMIC))