diff mbox series

[v2] sound: rawmidi: Add framing mode

Message ID 20210324054253.34642-1-coding@diwic.se
State New
Headers show
Series [v2] sound: rawmidi: Add framing mode | expand

Commit Message

David Henningsson March 24, 2021, 5:42 a.m. UTC
This commit adds a new framing mode that frames all MIDI data into
16-byte frames with a timestamp from the monotonic_raw clock.

The main benefit is that we can get accurate timestamps even if
userspace wakeup and processing is not immediate.

Signed-off-by: David Henningsson <coding@diwic.se>
---

v2: Fixed checkpatch errors.

 include/sound/rawmidi.h     |  1 +
 include/uapi/sound/asound.h | 18 ++++++++++++++-
 sound/core/rawmidi.c        | 45 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 2 deletions(-)

Comments

David Henningsson March 24, 2021, 11:18 a.m. UTC | #1
On 2021-03-24 11:03, Takashi Iwai wrote:
> On Wed, 24 Mar 2021 06:42:53 +0100,
> David Henningsson wrote:
>> This commit adds a new framing mode that frames all MIDI data into
>> 16-byte frames with a timestamp from the monotonic_raw clock.
>>
>> The main benefit is that we can get accurate timestamps even if
>> userspace wakeup and processing is not immediate.
>>
>> Signed-off-by: David Henningsson <coding@diwic.se>
> Thanks for the patch!
Thanks for the review :-)
> I seem to have overlooked your previous post, sorry.
>
> This looks like a good middle ground solution, while we still need to
> address the sequencer timestamp (basically we should be able to send
> an event with the timestamp prepared from the rawmidi side).

I believe the new framing mode would be useful both for readers of 
rawmidi devices, and the seq kernel module.

I have also been thinking of doing something in usb-midi (because I 
assume that's the most common way to do midi input these days), to 
improve performance for packets with more than three bytes in them. 
Right now a sysex would be cut off in chunks of three bytes, each one 
with its own timestamp. If so, that would be a later patch.

>
> The implementation itself looks good to me.  But this needs to bump
> the SNDRV_RAWMIDI_VERSION for indicating the new API.

Sure, I'll send a v3 with a bumped SNDRV_RAWMIDI_VERSION.

I'm also considering adding "time when the stream started" in the 
snd_rawmidi_status timestamp to get a fixed starting point for the 
timestamps, unless the field was reserved for some other purpose? The 
status timestamp would only be added if the framing mode is enabled. If 
so, that change would go into the same version bump. Does that sound 
good to you?

// David

>
>
> Takashi
>
>> ---
>>
>> v2: Fixed checkpatch errors.
>>
>>   include/sound/rawmidi.h     |  1 +
>>   include/uapi/sound/asound.h | 18 ++++++++++++++-
>>   sound/core/rawmidi.c        | 45 ++++++++++++++++++++++++++++++++++++-
>>   3 files changed, 62 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
>> index 334842daa904..4ba5d2deec18 100644
>> --- a/include/sound/rawmidi.h
>> +++ b/include/sound/rawmidi.h
>> @@ -81,6 +81,7 @@ struct snd_rawmidi_substream {
>>   	bool opened;			/* open flag */
>>   	bool append;			/* append flag (merge more streams) */
>>   	bool active_sensing;		/* send active sensing when close */
>> +	u8 framing;			/* whether to frame data (for input) */
>>   	int use_count;			/* use counter (for output) */
>>   	size_t bytes;
>>   	struct snd_rawmidi *rmidi;
>> diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
>> index 535a7229e1d9..f33076755025 100644
>> --- a/include/uapi/sound/asound.h
>> +++ b/include/uapi/sound/asound.h
>> @@ -736,12 +736,28 @@ struct snd_rawmidi_info {
>>   	unsigned char reserved[64];	/* reserved for future use */
>>   };
>>   
>> +enum {
>> +	SNDRV_RAWMIDI_FRAMING_NONE = 0,
>> +	SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW,
>> +	SNDRV_RAWMIDI_FRAMING_LAST = SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW,
>> +};
>> +
>> +#define SND_RAWMIDI_FRAMING_DATA_LENGTH 7
>> +
>> +struct snd_rawmidi_framing_tstamp {
>> +	unsigned int tv_sec;	/* seconds */
>> +	unsigned int tv_nsec;	/* nanoseconds */
>> +	unsigned char length;
>> +	unsigned char data[SND_RAWMIDI_FRAMING_DATA_LENGTH];
>> +};
>> +
>>   struct snd_rawmidi_params {
>>   	int stream;
>>   	size_t buffer_size;		/* queue size in bytes */
>>   	size_t avail_min;		/* minimum avail bytes for wakeup */
>>   	unsigned int no_active_sensing: 1; /* do not send active sensing byte in close() */
>> -	unsigned char reserved[16];	/* reserved for future use */
>> +	unsigned char framing;		/* For input data only, frame incoming data */
>> +	unsigned char reserved[15];	/* reserved for future use */
>>   };
>>   
>>   #ifndef __KERNEL__
>> diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
>> index aca00af93afe..cd927ba178a6 100644
>> --- a/sound/core/rawmidi.c
>> +++ b/sound/core/rawmidi.c
>> @@ -721,6 +721,7 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
>>   			     struct snd_rawmidi_params *params)
>>   {
>>   	snd_rawmidi_drain_input(substream);
>> +	substream->framing = params->framing;
>>   	return resize_runtime_buffer(substream->runtime, params, true);
>>   }
>>   EXPORT_SYMBOL(snd_rawmidi_input_params);
>> @@ -963,6 +964,44 @@ static int snd_rawmidi_control_ioctl(struct snd_card *card,
>>   	return -ENOIOCTLCMD;
>>   }
>>   
>> +static int receive_with_tstamp_framing(struct snd_rawmidi_substream *substream,
>> +			const unsigned char *buffer, int src_count, struct timespec64 *tstamp)
>> +{
>> +	struct snd_rawmidi_runtime *runtime = substream->runtime;
>> +	struct snd_rawmidi_framing_tstamp frame;
>> +	struct snd_rawmidi_framing_tstamp *dest_ptr;
>> +	int dest_frames = 0;
>> +	int frame_size = sizeof(struct snd_rawmidi_framing_tstamp);
>> +
>> +	frame.tv_sec = tstamp->tv_sec;
>> +	frame.tv_nsec = tstamp->tv_nsec;
>> +	if (snd_BUG_ON(runtime->hw_ptr & 15 || runtime->buffer_size & 15 || frame_size != 16))
>> +		return -EINVAL;
>> +
>> +	while (src_count > 0) {
>> +		if ((int)(runtime->buffer_size - runtime->avail) < frame_size) {
>> +			runtime->xruns += src_count;
>> +			return dest_frames * frame_size;
>> +		}
>> +		if (src_count >= SND_RAWMIDI_FRAMING_DATA_LENGTH)
>> +			frame.length = SND_RAWMIDI_FRAMING_DATA_LENGTH;
>> +		else {
>> +			frame.length = src_count;
>> +			memset(frame.data, 0, SND_RAWMIDI_FRAMING_DATA_LENGTH);
>> +		}
>> +		memcpy(frame.data, buffer, frame.length);
>> +		buffer += frame.length;
>> +		src_count -= frame.length;
>> +		dest_ptr = (struct snd_rawmidi_framing_tstamp *) (runtime->buffer + runtime->hw_ptr);
>> +		*dest_ptr = frame;
>> +		runtime->avail += frame_size;
>> +		runtime->hw_ptr += frame_size;
>> +		runtime->hw_ptr %= runtime->buffer_size;
>> +		dest_frames++;
>> +	}
>> +	return dest_frames * frame_size;
>> +}
>> +
>>   /**
>>    * snd_rawmidi_receive - receive the input data from the device
>>    * @substream: the rawmidi substream
>> @@ -977,6 +1016,7 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
>>   			const unsigned char *buffer, int count)
>>   {
>>   	unsigned long flags;
>> +	struct timespec64 ts64;
>>   	int result = 0, count1;
>>   	struct snd_rawmidi_runtime *runtime = substream->runtime;
>>   
>> @@ -988,7 +1028,10 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
>>   		return -EINVAL;
>>   	}
>>   	spin_lock_irqsave(&runtime->lock, flags);
>> -	if (count == 1) {	/* special case, faster code */
>> +	if (substream->framing == SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW) {
>> +		ktime_get_raw_ts64(&ts64);
>> +		result = receive_with_tstamp_framing(substream, buffer, count, &ts64);
>> +	} else if (count == 1) {	/* special case, faster code */
>>   		substream->bytes++;
>>   		if (runtime->avail < runtime->buffer_size) {
>>   			runtime->buffer[runtime->hw_ptr++] = buffer[0];
>> -- 
>> 2.25.1
>>
Takashi Sakamoto March 24, 2021, 12:44 p.m. UTC | #2
Hi,

On Wed, Mar 24, 2021 at 06:42:53AM +0100, David Henningsson wrote:
> diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
> index 535a7229e1d9..f33076755025 100644
> --- a/include/uapi/sound/asound.h
> +++ b/include/uapi/sound/asound.h
> @@ -736,12 +736,28 @@ struct snd_rawmidi_info {
>  	unsigned char reserved[64];	/* reserved for future use */
>  };
>  
> +enum {
> +	SNDRV_RAWMIDI_FRAMING_NONE = 0,
> +	SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW,
> +	SNDRV_RAWMIDI_FRAMING_LAST = SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW,
> +};

In C language specification, enumeration is for value of int storage. In
my opinion, int type should be used for the framing member, perhaps.

(I think you can easily understand my insistent since you're Rust
programmer.)

I note that in UAPI of Linux kernel, we have some macros to represent
system clocks; e.g. CLOCK_REALTIME, CLOCK_MONOTONIC:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/time.h#n46

We can use the series of macro, instead of defining the specific
enumerations. However I have one concern that the 'None' value cannot be
zero in the case since CLOCK_REALTIME is zero. This is a bit inconvenient
since we need initializer function in both of kernel space and user
space...

For the idea to record system timestamp when drivers call helper
function to put MIDI message bytes into intermediate buffer in
hardware/software IRQ context, I have some concerns and I'll post
another message to thread, later.


Regards

Takashi Sakamoto
Takashi Sakamoto March 24, 2021, 1:29 p.m. UTC | #3
Hi,

On Wed, Mar 24, 2021 at 12:18:59PM +0100, David Henningsson wrote:
> On 2021-03-24 11:03, Takashi Iwai wrote:
> > This looks like a good middle ground solution, while we still need to
> > address the sequencer timestamp (basically we should be able to send
> > an event with the timestamp prepared from the rawmidi side).
> 
> I believe the new framing mode would be useful both for readers of rawmidi
> devices, and the seq kernel module.
> 
> I have also been thinking of doing something in usb-midi (because I assume
> that's the most common way to do midi input these days), to improve
> performance for packets with more than three bytes in them. Right now a
> sysex would be cut off in chunks of three bytes, each one with its own
> timestamp. If so, that would be a later patch.

I've been investigated with some old documentations since David posted his
initial idea[1]. However, I always have concern that we can really find
timestamp for incoming data for MIDI message in hardware/software IRQ
contexts.

As you know, in the specification, MIDI message has no timestamp. Even
if MIDI Time Code (MTC) is included in the specification, it's the role
for hardware MIDI sequencer to decide actual presentation time for
received MIDI messages. In this meaning, your patch is reasonable to
process the received MIDI messages.

However, the timing jitter of IRQ handler invocation is issued in this
case, as well as PCM interface, even if the data rate of MIDI physical
layer is quite low nowadays (31.25 Kbit / sec ~= 3906.25 byte / sec).
As long as I experienced, in actual running Linux system, the invocation
of IRQ handler has no guarantee for timing jitter, mainly due to CPU level
IRQ mask (like spin_lock). Therefore the interval of each invocation is not
so precise to decide event timestamp, at least for time slot comes from
MIDI physical layer.

Nevertheless, I think your idea is enough interesting, with conditions to
deliver information from driver (or driver developer) to applications
(ALSA Sequencer or userspace applications). Even if we have some
limitation and restriction to precise timestamp, it's worth to work for
it. It seems to be required that improvements at interface level and
documentation about how to use the frame timestamp you implemented.


P.S. As long as referring old resources relevant to the design of
hardware MIDI sequencer in late 1990's, the above concern seems to be real
to engineers. They are always requested to process MIDI message in real
time somehow beyond buffering and timing jitter.

[1] Midi timestamps
https://mailman.alsa-project.org/pipermail/alsa-devel/2021-March/182175.html


Regards

Takashi Sakamoto
David Henningsson March 24, 2021, 3:57 p.m. UTC | #4
On 2021-03-24 13:44, Takashi Sakamoto wrote:
> Hi,
>
> On Wed, Mar 24, 2021 at 06:42:53AM +0100, David Henningsson wrote:
>> diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
>> index 535a7229e1d9..f33076755025 100644
>> --- a/include/uapi/sound/asound.h
>> +++ b/include/uapi/sound/asound.h
>> @@ -736,12 +736,28 @@ struct snd_rawmidi_info {
>>   	unsigned char reserved[64];	/* reserved for future use */
>>   };
>>   
>> +enum {
>> +	SNDRV_RAWMIDI_FRAMING_NONE = 0,
>> +	SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW,
>> +	SNDRV_RAWMIDI_FRAMING_LAST = SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW,
>> +};

Hi and thanks for the review!

> In C language specification, enumeration is for value of int storage. In
> my opinion, int type should be used for the framing member, perhaps.
In this case, I was following how the rest of the enum declarations were 
in the same header. In addition, the only place it is used, is as an 
unsigned char. So I'm not sure defining it as an int here would make sense.
>
> (I think you can easily understand my insistent since you're Rust
> programmer.)

I am, and as a side point: for those who don't know, I've written (and 
maintaining) alsa-lib bindings for Rust in

https://github.com/diwic/alsa-rs

>
> I note that in UAPI of Linux kernel, we have some macros to represent
> system clocks; e.g. CLOCK_REALTIME, CLOCK_MONOTONIC:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/time.h#n46
>
> We can use the series of macro, instead of defining the specific
> enumerations. However I have one concern that the 'None' value cannot be
> zero in the case since CLOCK_REALTIME is zero. This is a bit inconvenient
> since we need initializer function in both of kernel space and user
> space...

Interesting point. So I guess we could add a bit in the existing 
bitfield that says on/off, and then have an unsigned char that decides 
the clock type. But as you point out in your other reply, if we can get 
a timestamp from closer to the source somehow, that would be even 
better, and then that would be a clock that is something different from 
the existing clock defines in time.h.

[snip from your other reply]

> However, the timing jitter of IRQ handler invocation is issued in this
> case, as well as PCM interface, even if the data rate of MIDI physical
> layer is quite low nowadays (31.25 Kbit / sec ~= 3906.25 byte / sec).
> As long as I experienced, in actual running Linux system, the invocation
> of IRQ handler has no guarantee for timing jitter, mainly due to CPU level
> IRQ mask (like spin_lock). Therefore the interval of each invocation is not
> so precise to decide event timestamp, at least for time slot comes from
> MIDI physical layer.
>
> Nevertheless, I think your idea is enough interesting, with conditions to
> deliver information from driver (or driver developer) to applications
> (ALSA Sequencer or userspace applications). Even if we have some
> limitation and restriction to precise timestamp, it's worth to work for
> it. It seems to be required that improvements at interface level and
> documentation about how to use the frame timestamp you implemented.

Right, so first, I believe the most common way to transport midi these 
days is through USB, where the 31.25 Kbit/sec limit does not apply: 
instead we have a granularity of 1/8 ms but many messages can fit in 
each one. So that limit is - for many if not most cases - gone.

Second; you're probably right in that there is still some jitter w r t 
when the IRQ fires. That is regrettable, but the earlier we get that 
timestamp the better, so a timestamp in IRQ context would at least be 
better than in a workqueue that fires after the IRQ, or in userspace 
that possibly has scheduling delays.

Btw, I checked the "struct urb" and there was no timestamp in there, so 
I don't know how to get a better timestamp than in snd_rawmidi_receive. 
But maybe other interfaces (PCI, Firewire etc) offers something better.

// David
Takashi Sakamoto March 26, 2021, 4:46 a.m. UTC | #5
Hi David,

On Wed, Mar 24, 2021 at 04:57:31PM +0100, David Henningsson wrote:
> > However, the timing jitter of IRQ handler invocation is issued in this
> > case, as well as PCM interface, even if the data rate of MIDI physical
> > layer is quite low nowadays (31.25 Kbit / sec ~= 3906.25 byte / sec).
> > As long as I experienced, in actual running Linux system, the invocation
> > of IRQ handler has no guarantee for timing jitter, mainly due to CPU level
> > IRQ mask (like spin_lock). Therefore the interval of each invocation is not
> > so precise to decide event timestamp, at least for time slot comes from
> > MIDI physical layer.
> > 
> > Nevertheless, I think your idea is enough interesting, with conditions to
> > deliver information from driver (or driver developer) to applications
> > (ALSA Sequencer or userspace applications). Even if we have some
> > limitation and restriction to precise timestamp, it's worth to work for
> > it. It seems to be required that improvements at interface level and
> > documentation about how to use the frame timestamp you implemented.
> 
> Right, so first, I believe the most common way to transport midi these days
> is through USB, where the 31.25 Kbit/sec limit does not apply: instead we
> have a granularity of 1/8 ms but many messages can fit in each one. So that
> limit is - for many if not most cases - gone.
> 
> Second; you're probably right in that there is still some jitter w r t when
> the IRQ fires. That is regrettable, but the earlier we get that timestamp
> the better, so a timestamp in IRQ context would at least be better than in a
> workqueue that fires after the IRQ, or in userspace that possibly has
> scheduling delays.
> 
> Btw, I checked the "struct urb" and there was no timestamp in there, so I
> don't know how to get a better timestamp than in snd_rawmidi_receive. But
> maybe other interfaces (PCI, Firewire etc) offers something better.

Hm. Regardless of type of hardware; e.g. OHCI/EHCI/xHCI, 1394 OHCI, or
PCI-e extension card, for software to process sampled data, it's always
issue that the jitter between triggering IRQ (hardware side) and invoking
IRQ handler (processor side). As an actual example, let me share my
experience in 1394 OHCI.


1394 OHCI controller is governed by 24.576 Mhz clock (or double depending
on vendors). In IEEE 1394, ishcornous service is 8,000 times per second.
1394 OHCI specification allows software to schedule hardware IRQ per
isochronous cycle.

Once ALSA firewire stack is programmed to schedule the hardwar IRQ evenry
16 isochronous cycle. The corresponding IRQ handler is expected to invoke
every 2 milli second. As long as I tested in usual desktop environment[2],
the jitter is between 150 micro second and 4.7 milli second. In the worst
case, it's 6.0 milli seconds. The above is also the same wnen using
'threadirqs'.

When using 'isolcpus' and arranging 'smp_affinity' to reduce load from one
of processor core to invoke the IRQ handler, the interval is 2 milli
second with +- several nano seconds, therefore the 1394 OHCI controller
can trigger hardware IRQ so precise. The jitter comes from processor side.
I think many running contexts on the same processor core masks IRQ so
often and the jitter is not deterministic.

Here, what I'd like to tell you is that we can not handle the system time
as is as timestamp of received MIDI messages, as long as relying on IRQ
context. We need some arrangements to construct better timestamp with some
compensations. In this point, the 3rd version of patch is not enough[3],
in my opinion.

My intension is not to discourage you, but it's better to have more care.
As one of the care, I think we can use extension of
'struct snd_rawmidi_status' to deliver some useful information to ALSA
rawmidi applications, or including other parameters to the frame structure.
But I don't have ideas about what information should be delivered,
sorry...

[1] https://github.com/systemd/systemd/pull/19124
[2] I used stock image of Ubuntu 19.10 desktop for the trial.
[3] https://mailman.alsa-project.org/pipermail/alsa-devel/2021-March/182842.html

Thanks

Takashi Sakamoto
Takashi Iwai March 26, 2021, 7:55 a.m. UTC | #6
On Fri, 26 Mar 2021 05:46:15 +0100,
Takashi Sakamoto wrote:
> 
> Hi David,
> 
> On Wed, Mar 24, 2021 at 04:57:31PM +0100, David Henningsson wrote:
> > > However, the timing jitter of IRQ handler invocation is issued in this
> > > case, as well as PCM interface, even if the data rate of MIDI physical
> > > layer is quite low nowadays (31.25 Kbit / sec ~= 3906.25 byte / sec).
> > > As long as I experienced, in actual running Linux system, the invocation
> > > of IRQ handler has no guarantee for timing jitter, mainly due to CPU level
> > > IRQ mask (like spin_lock). Therefore the interval of each invocation is not
> > > so precise to decide event timestamp, at least for time slot comes from
> > > MIDI physical layer.
> > > 
> > > Nevertheless, I think your idea is enough interesting, with conditions to
> > > deliver information from driver (or driver developer) to applications
> > > (ALSA Sequencer or userspace applications). Even if we have some
> > > limitation and restriction to precise timestamp, it's worth to work for
> > > it. It seems to be required that improvements at interface level and
> > > documentation about how to use the frame timestamp you implemented.
> > 
> > Right, so first, I believe the most common way to transport midi these days
> > is through USB, where the 31.25 Kbit/sec limit does not apply: instead we
> > have a granularity of 1/8 ms but many messages can fit in each one. So that
> > limit is - for many if not most cases - gone.
> > 
> > Second; you're probably right in that there is still some jitter w r t when
> > the IRQ fires. That is regrettable, but the earlier we get that timestamp
> > the better, so a timestamp in IRQ context would at least be better than in a
> > workqueue that fires after the IRQ, or in userspace that possibly has
> > scheduling delays.
> > 
> > Btw, I checked the "struct urb" and there was no timestamp in there, so I
> > don't know how to get a better timestamp than in snd_rawmidi_receive. But
> > maybe other interfaces (PCI, Firewire etc) offers something better.
> 
> Hm. Regardless of type of hardware; e.g. OHCI/EHCI/xHCI, 1394 OHCI, or
> PCI-e extension card, for software to process sampled data, it's always
> issue that the jitter between triggering IRQ (hardware side) and invoking
> IRQ handler (processor side). As an actual example, let me share my
> experience in 1394 OHCI.
> 
> 
> 1394 OHCI controller is governed by 24.576 Mhz clock (or double depending
> on vendors). In IEEE 1394, ishcornous service is 8,000 times per second.
> 1394 OHCI specification allows software to schedule hardware IRQ per
> isochronous cycle.
> 
> Once ALSA firewire stack is programmed to schedule the hardwar IRQ evenry
> 16 isochronous cycle. The corresponding IRQ handler is expected to invoke
> every 2 milli second. As long as I tested in usual desktop environment[2],
> the jitter is between 150 micro second and 4.7 milli second. In the worst
> case, it's 6.0 milli seconds. The above is also the same wnen using
> 'threadirqs'.
> 
> When using 'isolcpus' and arranging 'smp_affinity' to reduce load from one
> of processor core to invoke the IRQ handler, the interval is 2 milli
> second with +- several nano seconds, therefore the 1394 OHCI controller
> can trigger hardware IRQ so precise. The jitter comes from processor side.
> I think many running contexts on the same processor core masks IRQ so
> often and the jitter is not deterministic.
> 
> Here, what I'd like to tell you is that we can not handle the system time
> as is as timestamp of received MIDI messages, as long as relying on IRQ
> context. We need some arrangements to construct better timestamp with some
> compensations. In this point, the 3rd version of patch is not enough[3],
> in my opinion.
> 
> My intension is not to discourage you, but it's better to have more care.
> As one of the care, I think we can use extension of
> 'struct snd_rawmidi_status' to deliver some useful information to ALSA
> rawmidi applications, or including other parameters to the frame structure.
> But I don't have ideas about what information should be delivered,
> sorry...

Well, the question is how much accuracy is wanted, and it's relatively
low for MIDI -- at least v1, which was defined many decades ago for a
slow serial line.

That said, the patch set was designed for providing the best-effort
timestamping in software, and that's supposed to be enough for normal
use cases.  If there is any device that is with the hardware
timestamping, in theory, we could provide the similar data stream in
the same format but maybe with a different timestamp type.

But actually I'd like to see some measurement how much we can improve
the timestamp accuracy by shifting the post office.  This may show
interesting numbers.

Also, one thing to be more considered is the support for MIDI v2 in
future.  I haven't seen any development so far (and no device
available around), so I cannot comment on this much more, but it'd be
worth to take a quick look before defining the solid API/ABI.


thanks,

Takashi

> 
> [1] https://github.com/systemd/systemd/pull/19124
> [2] I used stock image of Ubuntu 19.10 desktop for the trial.
> [3] https://mailman.alsa-project.org/pipermail/alsa-devel/2021-March/182842.html
> 
> Thanks
> 
> Takashi Sakamoto
>
David Henningsson March 26, 2021, 4:29 p.m. UTC | #7
Hi Takashi and Takashi,

On 2021-03-26 08:55, Takashi Iwai wrote:
> On Fri, 26 Mar 2021 05:46:15 +0100,
> Takashi Sakamoto wrote:
>> Hi David,
>>
>> On Wed, Mar 24, 2021 at 04:57:31PM +0100, David Henningsson wrote:
>>>> However, the timing jitter of IRQ handler invocation is issued in this
>>>> case, as well as PCM interface, even if the data rate of MIDI physical
>>>> layer is quite low nowadays (31.25 Kbit / sec ~= 3906.25 byte / sec).
>>>> As long as I experienced, in actual running Linux system, the invocation
>>>> of IRQ handler has no guarantee for timing jitter, mainly due to CPU level
>>>> IRQ mask (like spin_lock). Therefore the interval of each invocation is not
>>>> so precise to decide event timestamp, at least for time slot comes from
>>>> MIDI physical layer.
>>>>
>>>> Nevertheless, I think your idea is enough interesting, with conditions to
>>>> deliver information from driver (or driver developer) to applications
>>>> (ALSA Sequencer or userspace applications). Even if we have some
>>>> limitation and restriction to precise timestamp, it's worth to work for
>>>> it. It seems to be required that improvements at interface level and
>>>> documentation about how to use the frame timestamp you implemented.
>>> Right, so first, I believe the most common way to transport midi these days
>>> is through USB, where the 31.25 Kbit/sec limit does not apply: instead we
>>> have a granularity of 1/8 ms but many messages can fit in each one. So that
>>> limit is - for many if not most cases - gone.
>>>
>>> Second; you're probably right in that there is still some jitter w r t when
>>> the IRQ fires. That is regrettable, but the earlier we get that timestamp
>>> the better, so a timestamp in IRQ context would at least be better than in a
>>> workqueue that fires after the IRQ, or in userspace that possibly has
>>> scheduling delays.
>>>
>>> Btw, I checked the "struct urb" and there was no timestamp in there, so I
>>> don't know how to get a better timestamp than in snd_rawmidi_receive. But
>>> maybe other interfaces (PCI, Firewire etc) offers something better.
>> Hm. Regardless of type of hardware; e.g. OHCI/EHCI/xHCI, 1394 OHCI, or
>> PCI-e extension card, for software to process sampled data, it's always
>> issue that the jitter between triggering IRQ (hardware side) and invoking
>> IRQ handler (processor side). As an actual example, let me share my
>> experience in 1394 OHCI.
>>
>>
>> 1394 OHCI controller is governed by 24.576 Mhz clock (or double depending
>> on vendors). In IEEE 1394, ishcornous service is 8,000 times per second.
>> 1394 OHCI specification allows software to schedule hardware IRQ per
>> isochronous cycle.
>>
>> Once ALSA firewire stack is programmed to schedule the hardwar IRQ evenry
>> 16 isochronous cycle. The corresponding IRQ handler is expected to invoke
>> every 2 milli second. As long as I tested in usual desktop environment[2],
>> the jitter is between 150 micro second and 4.7 milli second. In the worst
>> case, it's 6.0 milli seconds. The above is also the same wnen using
>> 'threadirqs'.
>>
>> When using 'isolcpus' and arranging 'smp_affinity' to reduce load from one
>> of processor core to invoke the IRQ handler, the interval is 2 milli
>> second with +- several nano seconds, therefore the 1394 OHCI controller
>> can trigger hardware IRQ so precise. The jitter comes from processor side.
>> I think many running contexts on the same processor core masks IRQ so
>> often and the jitter is not deterministic.
>>
>> Here, what I'd like to tell you is that we can not handle the system time
>> as is as timestamp of received MIDI messages, as long as relying on IRQ
>> context. We need some arrangements to construct better timestamp with some
>> compensations. In this point, the 3rd version of patch is not enough[3],
>> in my opinion.

Interesting measurements. While several ms of jitter is not ideal, I 
have a few counter arguments why I still believe we should merge this patch:

  1) I don't think we should let the perfect be the enemy of the good 
here, just because we cannot eliminate *all* jitter does not mean we 
should not try to eliminate as much jitter as we can.

  2) an unprivileged process (that cannot get RT_PRIO scheduling) might 
have a wakeup jitter of a lot more than a few ms, so for that type of 
process it would be a big improvement. And sometimes even RT_PRIO 
processes have big delays due to preempt_disable etc.

  3) The jitter will depend on the hardware, and other hardware might 
have better (or worse) IRQ jitter.

  4) If this patch gets accepted, it might show other kernel developers 
that IRQ jitter matters to us, and that in turn might improve the 
chances of getting IRQ jitter improvement patches in, in case someone 
else wants to help solving that problem.


>>
>> My intension is not to discourage you, but it's better to have more care.
>> As one of the care, I think we can use extension of
>> 'struct snd_rawmidi_status' to deliver some useful information to ALSA
>> rawmidi applications, or including other parameters to the frame structure.
>> But I don't have ideas about what information should be delivered,
>> sorry...
> Well, the question is how much accuracy is wanted, and it's relatively
> low for MIDI -- at least v1, which was defined many decades ago for a
> slow serial line.
>
> That said, the patch set was designed for providing the best-effort
> timestamping in software, and that's supposed to be enough for normal
> use cases.  If there is any device that is with the hardware
> timestamping, in theory, we could provide the similar data stream in
> the same format but maybe with a different timestamp type.
>
> But actually I'd like to see some measurement how much we can improve
> the timestamp accuracy by shifting the post office.  This may show
> interesting numbers.

Sorry, I don't know the idiom "shifting the post office" and neither 
does the urban dictionary, so I have no idea what this means. :-)

>
> Also, one thing to be more considered is the support for MIDI v2 in
> future.  I haven't seen any development so far (and no device
> available around), so I cannot comment on this much more, but it'd be
> worth to take a quick look before defining the solid API/ABI.

I had a quick look at MIDI 2.0. It offers something called "Jitter 
reduction timestamps". After some searching I found that its resolution 
is 16 bit, and in units of 1/31250 seconds [1]. So the suggested 
timestamp format of secs + nsecs would suit us well for that case, I 
believe. When implemented, MIDI 2.0 jitter reduction timestamps would be 
another clock ID on top of the existing frame format (or a new frame 
format, if we prefer).

A midi 2.0 UMP (Universal Midi Packet) seems to be 4, 8, 12 or 16 bytes, 
excluding the timestamp. If we want to fit that format with the existing 
patch, we could increase the frame to 32 bytes so we can fit more data 
per packet. Do you think we should do that? Otherwise I think Patch v3 
is ready for merging.

// David

[1] https://imitone.com/discover-midi/
Takashi Iwai March 26, 2021, 4:44 p.m. UTC | #8
On Fri, 26 Mar 2021 17:29:04 +0100,
David Henningsson wrote:
> 
> > But actually I'd like to see some measurement how much we can improve
> > the timestamp accuracy by shifting the post office.  This may show
> > interesting numbers.
> 
> Sorry, I don't know the idiom "shifting the post office" and neither
> does the urban dictionary, so I have no idea what this means. :-)

It was just joking; you basically moved the place to stamp the
incoming data from one place (at the delivery center of a sequencer
event) to another earlier place (at the irq handler).

The question is: how much time difference have you measured by this
move?

> > Also, one thing to be more considered is the support for MIDI v2 in
> > future.  I haven't seen any development so far (and no device
> > available around), so I cannot comment on this much more, but it'd be
> > worth to take a quick look before defining the solid API/ABI.
> 
> I had a quick look at MIDI 2.0. It offers something called "Jitter
> reduction timestamps". After some searching I found that its
> resolution is 16 bit, and in units of 1/31250 seconds [1]. So the
> suggested timestamp format of secs + nsecs would suit us well for that
> case, I believe. When implemented, MIDI 2.0 jitter reduction
> timestamps would be another clock ID on top of the existing frame
> format (or a new frame format, if we prefer).
> 
> A midi 2.0 UMP (Universal Midi Packet) seems to be 4, 8, 12 or 16
> bytes, excluding the timestamp. If we want to fit that format with the
> existing patch, we could increase the frame to 32 bytes so we can fit
> more data per packet. Do you think we should do that? Otherwise I
> think Patch v3 is ready for merging.

Let's evaluate a bit what would be the best fit.  I see no big reason
to rush the merge right now.


thanks,

Takashi

> 
> // David
> 
> [1] https://imitone.com/discover-midi/
>
Takashi Sakamoto March 27, 2021, 1:51 a.m. UTC | #9
On Fri, Mar 26, 2021 at 05:44:16PM +0100, Takashi Iwai wrote:
> On Fri, 26 Mar 2021 17:29:04 +0100,
> David Henningsson wrote:
> > 
> > > But actually I'd like to see some measurement how much we can improve
> > > the timestamp accuracy by shifting the post office.  This may show
> > > interesting numbers.
> > 
> > Sorry, I don't know the idiom "shifting the post office" and neither
> > does the urban dictionary, so I have no idea what this means. :-)
> 
> It was just joking; you basically moved the place to stamp the
> incoming data from one place (at the delivery center of a sequencer
> event) to another earlier place (at the irq handler).
 
Aha. I also have another image to estimate the time when public bus
have left terminal, by the time when we see the bus at bus stop. Traffic
jam makes the estimation difficult even if we have standard time table.


Just my two cents,

Takashi Sakamoto
Takashi Sakamoto March 27, 2021, 3:10 a.m. UTC | #10
Hi,

On Fri, Mar 26, 2021 at 08:55:38AM +0100, Takashi Iwai wrote:
> On Fri, 26 Mar 2021 05:46:15 +0100, Takashi Sakamoto wrote:
> > On Wed, Mar 24, 2021 at 04:57:31PM +0100, David Henningsson wrote:
> > > > However, the timing jitter of IRQ handler invocation is issued in this
> > > > case, as well as PCM interface, even if the data rate of MIDI physical
> > > > layer is quite low nowadays (31.25 Kbit / sec ~= 3906.25 byte / sec).
> > > > As long as I experienced, in actual running Linux system, the invocation
> > > > of IRQ handler has no guarantee for timing jitter, mainly due to CPU level
> > > > IRQ mask (like spin_lock). Therefore the interval of each invocation is not
> > > > so precise to decide event timestamp, at least for time slot comes from
> > > > MIDI physical layer.
> > > > 
> > > > Nevertheless, I think your idea is enough interesting, with conditions to
> > > > deliver information from driver (or driver developer) to applications
> > > > (ALSA Sequencer or userspace applications). Even if we have some
> > > > limitation and restriction to precise timestamp, it's worth to work for
> > > > it. It seems to be required that improvements at interface level and
> > > > documentation about how to use the frame timestamp you implemented.
> > > 
> > > Right, so first, I believe the most common way to transport midi these days
> > > is through USB, where the 31.25 Kbit/sec limit does not apply: instead we
> > > have a granularity of 1/8 ms but many messages can fit in each one. So that
> > > limit is - for many if not most cases - gone.
> > > 
> > > Second; you're probably right in that there is still some jitter w r t when
> > > the IRQ fires. That is regrettable, but the earlier we get that timestamp
> > > the better, so a timestamp in IRQ context would at least be better than in a
> > > workqueue that fires after the IRQ, or in userspace that possibly has
> > > scheduling delays.
> > > 
> > > Btw, I checked the "struct urb" and there was no timestamp in there, so I
> > > don't know how to get a better timestamp than in snd_rawmidi_receive. But
> > > maybe other interfaces (PCI, Firewire etc) offers something better.
> > 
> > Hm. Regardless of type of hardware; e.g. OHCI/EHCI/xHCI, 1394 OHCI, or
> > PCI-e extension card, for software to process sampled data, it's always
> > issue that the jitter between triggering IRQ (hardware side) and invoking
> > IRQ handler (processor side). As an actual example, let me share my
> > experience in 1394 OHCI.
> > 
> > 
> > 1394 OHCI controller is governed by 24.576 Mhz clock (or double depending
> > on vendors). In IEEE 1394, ishcornous service is 8,000 times per second.
> > 1394 OHCI specification allows software to schedule hardware IRQ per
> > isochronous cycle.
> > 
> > Once ALSA firewire stack is programmed to schedule the hardwar IRQ evenry
> > 16 isochronous cycle. The corresponding IRQ handler is expected to invoke
> > every 2 milli second. As long as I tested in usual desktop environment[2],
> > the jitter is between 150 micro second and 4.7 milli second. In the worst
> > case, it's 6.0 milli seconds. The above is also the same wnen using
> > 'threadirqs'.
> > 
> > When using 'isolcpus' and arranging 'smp_affinity' to reduce load from one
> > of processor core to invoke the IRQ handler, the interval is 2 milli
> > second with +- several nano seconds, therefore the 1394 OHCI controller
> > can trigger hardware IRQ so precise. The jitter comes from processor side.
> > I think many running contexts on the same processor core masks IRQ so
> > often and the jitter is not deterministic.
> > 
> > Here, what I'd like to tell you is that we can not handle the system time
> > as is as timestamp of received MIDI messages, as long as relying on IRQ
> > context. We need some arrangements to construct better timestamp with some
> > compensations. In this point, the 3rd version of patch is not enough[3],
> > in my opinion.
> > 
> > My intension is not to discourage you, but it's better to have more care.
> > As one of the care, I think we can use extension of
> > 'struct snd_rawmidi_status' to deliver some useful information to ALSA
> > rawmidi applications, or including other parameters to the frame structure.
> > But I don't have ideas about what information should be delivered,
> > sorry...
> 
> Well, the question is how much accuracy is wanted, and it's relatively
> low for MIDI -- at least v1, which was defined many decades ago for a
> slow serial line.
> 
> That said, the patch set was designed for providing the best-effort
> timestamping in software, and that's supposed to be enough for normal
> use cases.

Indeed. I think it the best-effort in software side, but with less care of
hardware matters. Although I have no strong objection to the idea of the
patch itself, the point of my insistent is _not_ how much accuracy is
preferable.

When I imagine to write actual program as an application of rawmidi with
the frame structure, I have a concern about how to use the timestamp. At
present, it's just a record of invocation of any IRQ context. When using it
for timestamp of MIDI message, the application needs supplement information
for compensation for any jitter and delay, however nothing is provided.

At present, we seem to fail setting any incentive for users to use the new
functionality, in my opinion. Such functionality is going to be obsoleted
sooner or later, like 'struct snd_rawmidi_status.tstamp' (I guess no one
could have implemented it for the original purpose, then no one is going to
use it). That's my concern.

> If there is any device that is with the hardware  timestamping, in
> theory, we could provide the similar data stream in  the same format
> but maybe with a different timestamp type.

I seem to fail getting what you mean, but if I'm allowed to mention
about legacy devices, I'll report the case of IEC 61883-1/6 in IEEE 1394
with 1394 OHCI. 1394 OHCI allows software to handle isochronous packet
payload with cycle time. We can compute the timestamp independent on
current system time in IRQ handler with resolution by 125 micro second.
Below drivers can adapt to it:

 * snd-bebob
 * snd-fireworks
 * snd-oxfw
 * snd-dice
 * snd-firewire-digi00x (mutated version of protocol)
 * snd-firewire-motu (mutated version of protocol)

But for the case below MIDI messages are on asynchronous packet and
need to current system time in invoked IRQ handler:
 * snd-tascam
 * snd-fireface

> But actually I'd like to see some measurement how much we can improve
> the timestamp accuracy by shifting the post office.  This may show
> interesting numbers.
> 
> Also, one thing to be more considered is the support for MIDI v2 in
> future.  I haven't seen any development so far (and no device
> available around), so I cannot comment on this much more, but it'd be
> worth to take a quick look before defining the solid API/ABI.


Regards

Takashi Sakamoto
Takashi Sakamoto March 27, 2021, 3:44 a.m. UTC | #11
Hi,
David Henningsson March 28, 2021, 6:39 a.m. UTC | #12
Hi Takashi and Takashi,

You both question the usability of the patch, so let's take a step back.

Suppose you're writing the next JACK, or a DAW, or something like that.
When writing a DAW, you need to support the users who need ultra-low 
latency for live playing of an instrument. These users (unfortunately) 
need to reconfigure their Linux installation, have special kernels, buy 
expensive sound cards etc, in order to get the best possible latency.
You also should give the best possible experience for people who don't 
have the time to do that. Just recording a simple MIDI file should not 
require any extra kernel options, RT_PRIO privileges or anything like 
that. (And then there are people in between, who try to get the best 
possible latency given their limited time, money and skills.)

Now you're asking yourself whether to use rawmidi or seq API. It seems 
silly to have to support both.
The seq interface is suboptimal for the first use case, due to the 
latency introduced by the workqueue. But rawmidi is entirely impossible 
for the second use case, due to the lack of timestamping.
(From a quick look at Ardour's sources, it does support both rawmidi and 
seq. The rawmidi code mostly timestamps the message and sends it to 
another thread. [1] I e, essentially what I believe the kernel should 
do, because that timestamp is better.)

What you don't need is exact measurements of burst interval or even 
timestamp accuracy. All you have use for is the best possible timestamp, 
because that's what's going to be written into the MIDI file. There 
might be other use cases for burst intervals etc, but I don't see them 
helping here.

On 2021-03-26 17:44, Takashi Iwai wrote:
> On Fri, 26 Mar 2021 17:29:04 +0100,
> David Henningsson wrote:
>>> But actually I'd like to see some measurement how much we can improve
>>> the timestamp accuracy by shifting the post office.  This may show
>>> interesting numbers.
>> Sorry, I don't know the idiom "shifting the post office" and neither
>> does the urban dictionary, so I have no idea what this means. :-)
> It was just joking; you basically moved the place to stamp the
> incoming data from one place (at the delivery center of a sequencer
> event) to another earlier place (at the irq handler).
>
> The question is: how much time difference have you measured by this
> move?

Ok, thanks for the explanation. I have not done any measurements because 
it would be quite time consuming to do so, across different hardware, 
kernel configurations, and so on. I don't have that time right now, 
sorry. But the claim that workqueues can be delayed up to a second (!) 
from just adding a few RT_PRIO tasks [2] is enough to scare me from 
using the seq interface for accurate timestamping.


>>> Also, one thing to be more considered is the support for MIDI v2 in
>>> future.  I haven't seen any development so far (and no device
>>> available around), so I cannot comment on this much more, but it'd be
>>> worth to take a quick look before defining the solid API/ABI.
>> I had a quick look at MIDI 2.0. It offers something called "Jitter
>> reduction timestamps". After some searching I found that its
>> resolution is 16 bit, and in units of 1/31250 seconds [1]. So the
>> suggested timestamp format of secs + nsecs would suit us well for that
>> case, I believe. When implemented, MIDI 2.0 jitter reduction
>> timestamps would be another clock ID on top of the existing frame
>> format (or a new frame format, if we prefer).
>>
>> A midi 2.0 UMP (Universal Midi Packet) seems to be 4, 8, 12 or 16
>> bytes, excluding the timestamp. If we want to fit that format with the
>> existing patch, we could increase the frame to 32 bytes so we can fit
>> more data per packet. Do you think we should do that? Otherwise I
>> think Patch v3 is ready for merging.
> Let's evaluate a bit what would be the best fit.  I see no big reason
> to rush the merge right now.

Does this mean "evaluate for a week or two because of kernel cadence, 
merge windows etc" or does this mean "evaluate for months or years until 
someone does a full MIDI 2.0 kernel implementation"?

// David

[1] 
https://github.com/Ardour/ardour/blob/master/libs/backends/alsa/alsa_rawmidi.cc
[2] http://bootloader.wikidot.com/linux:android:latency
Takashi Iwai March 28, 2021, 7:40 a.m. UTC | #13
On Sun, 28 Mar 2021 08:39:46 +0200,
David Henningsson wrote:
> 
> Hi Takashi and Takashi,
> 
> You both question the usability of the patch, so let's take a step back.
> 
> Suppose you're writing the next JACK, or a DAW, or something like that.
> When writing a DAW, you need to support the users who need ultra-low
> latency for live playing of an instrument. These users (unfortunately)
> need to reconfigure their Linux installation, have special kernels,
> buy expensive sound cards etc, in order to get the best possible
> latency.
> You also should give the best possible experience for people who don't
> have the time to do that. Just recording a simple MIDI file should not
> require any extra kernel options, RT_PRIO privileges or anything like
> that. (And then there are people in between, who try to get the best
> possible latency given their limited time, money and skills.)
> 
> Now you're asking yourself whether to use rawmidi or seq API. It seems
> silly to have to support both.
> The seq interface is suboptimal for the first use case, due to the
> latency introduced by the workqueue. But rawmidi is entirely
> impossible for the second use case, due to the lack of timestamping.
> (From a quick look at Ardour's sources, it does support both rawmidi
> and seq. The rawmidi code mostly timestamps the message and sends it
> to another thread. [1] I e, essentially what I believe the kernel
> should do, because that timestamp is better.)
> 
> What you don't need is exact measurements of burst interval or even
> timestamp accuracy. All you have use for is the best possible
> timestamp, because that's what's going to be written into the MIDI
> file. There might be other use cases for burst intervals etc, but I
> don't see them helping here.
> 
> On 2021-03-26 17:44, Takashi Iwai wrote:
> > On Fri, 26 Mar 2021 17:29:04 +0100,
> > David Henningsson wrote:
> >>> But actually I'd like to see some measurement how much we can improve
> >>> the timestamp accuracy by shifting the post office.  This may show
> >>> interesting numbers.
> >> Sorry, I don't know the idiom "shifting the post office" and neither
> >> does the urban dictionary, so I have no idea what this means. :-)
> > It was just joking; you basically moved the place to stamp the
> > incoming data from one place (at the delivery center of a sequencer
> > event) to another earlier place (at the irq handler).
> >
> > The question is: how much time difference have you measured by this
> > move?
> 
> Ok, thanks for the explanation. I have not done any measurements
> because it would be quite time consuming to do so, across different
> hardware, kernel configurations, and so on. I don't have that time
> right now, sorry. But the claim that workqueues can be delayed up to a
> second (!) from just adding a few RT_PRIO tasks [2] is enough to scare
> me from using the seq interface for accurate timestamping.
> 
> 
> >>> Also, one thing to be more considered is the support for MIDI v2 in
> >>> future.  I haven't seen any development so far (and no device
> >>> available around), so I cannot comment on this much more, but it'd be
> >>> worth to take a quick look before defining the solid API/ABI.
> >> I had a quick look at MIDI 2.0. It offers something called "Jitter
> >> reduction timestamps". After some searching I found that its
> >> resolution is 16 bit, and in units of 1/31250 seconds [1]. So the
> >> suggested timestamp format of secs + nsecs would suit us well for that
> >> case, I believe. When implemented, MIDI 2.0 jitter reduction
> >> timestamps would be another clock ID on top of the existing frame
> >> format (or a new frame format, if we prefer).
> >>
> >> A midi 2.0 UMP (Universal Midi Packet) seems to be 4, 8, 12 or 16
> >> bytes, excluding the timestamp. If we want to fit that format with the
> >> existing patch, we could increase the frame to 32 bytes so we can fit
> >> more data per packet. Do you think we should do that? Otherwise I
> >> think Patch v3 is ready for merging.
> > Let's evaluate a bit what would be the best fit.  I see no big reason
> > to rush the merge right now.
> 
> Does this mean "evaluate for a week or two because of kernel cadence,
> merge windows etc" or does this mean "evaluate for months or years
> until someone does a full MIDI 2.0 kernel implementation"?

Well, without the actual measurement, it's purely a theoretical
problem, and it implies that we haven't seen any real improvement by
that, too.  So, the first priority is to measure and prove the need of
the changes.

Then the next thing is to determine the exact format for the new API
in a solid form.  It's still not fully agreed which frame size fits at
best, for example.  Also, we may have two individual frame types,
e.g. a timestamp frame and a data frame, too, depending on the frame
size and the implementation.  And, it might be handy if the ioctl
returns the frame size to user-space, too.

And, of course, thinking on MIDI 2.0 wouldn't be bad.  Though I don't
think tying with MIDI 2.0 is needed right now; instead, we should
assure only that the new timestamp would be accurate enough for new
extensions like MIDI 2.0.


Takashi

> 
> // David
> 
> [1]
> https://github.com/Ardour/ardour/blob/master/libs/backends/alsa/alsa_rawmidi.cc
> [2] http://bootloader.wikidot.com/linux:android:latency
>
David Henningsson April 5, 2021, 12:13 p.m. UTC | #14
On 2021-03-31 09:40, Takashi Iwai wrote:
> On Tue, 30 Mar 2021 21:35:11 +0200,
> David Henningsson wrote:
>>
>> Well, I believe that rawmidi provides less jitter than seq is not a
>> theoretical problem but a known fact (see e g [1]), so I haven't tried
>> to "prove" it myself. And I cannot read your mind well enough to know
>> what you would consider a sufficient proof - are you expecting to see
>> differences on a default or RT kernel, on a Threadripper or a
>> Beaglebone, idle system or while running RT torture tests? Etc.
> There is certainly difference, and it might be interesting to see the
> dependency on the hardware or on the configuration.  But, again, my
> primary question is: have you measured how *your patch* really
> provides the improvement?  If yes, please show the numbers in the
> patch description.

As you requested, I have now performed such testing.

Results:

Seq - idle: 5.0 ms

Seq - hackbench: 1.3 s (yes, above one second)

Raw + framing - idle: 2.8 ms

Raw + framing - hackbench: 2.8 ms

Setup / test description:

I had an external midi sequencer connected through USB. The system under 
test was a Celeron N3150 with internal graphics. The sequencer was set 
to generate note on/note off commands exactly 10 times per second.

For the seq tests I used "arecordmidi" and analyzed the delta values of 
resulting midi file. For the raw + framing tests I used a home-made 
application to write a midi file. The monotonic clock option was used to 
rule out differences between monotonic and monotonic_raw. The result 
shown above is the maximum amount of delta value, converted to 
milliseconds, minus the expected 100 ms between notes. Each test was run 
for a minute or two.

For the "idle" test, the machine was idle (running a normal desktop), 
and for the "hackbench" test, "chrt -r 10 hackbench" was run a few times 
in parallel with the midi recording application (which was run with 
"chrt -r 15").

I also tried a few other stress tests but hackbench was the one that 
stood out as totally destroying the timestamps of seq midi. (E g, 
running "rt-migrate-test" in parallel with "arecordmidi" gave a max 
jitter value of 13 ms.)

Conclusion:

I still believe the proposed raw + framing mode is a valuable 
improvement in the normal/idle case, but even more so because it is more 
stable in stressed conditions. Do you agree?

If so, I'll send out a v4 with 32 byte framing (16 byte header, 16 byte 
data).

  // David
David Henningsson April 10, 2021, 11:41 a.m. UTC | #15
On 2021-04-06 14:01, Takashi Iwai wrote:
> On Mon, 05 Apr 2021 14:13:27 +0200,
> David Henningsson wrote:
>>
>> On 2021-03-31 09:40, Takashi Iwai wrote:
>>> On Tue, 30 Mar 2021 21:35:11 +0200,
>>> David Henningsson wrote:
>>>> Well, I believe that rawmidi provides less jitter than seq is not a
>>>> theoretical problem but a known fact (see e g [1]), so I haven't tried
>>>> to "prove" it myself. And I cannot read your mind well enough to know
>>>> what you would consider a sufficient proof - are you expecting to see
>>>> differences on a default or RT kernel, on a Threadripper or a
>>>> Beaglebone, idle system or while running RT torture tests? Etc.
>>> There is certainly difference, and it might be interesting to see the
>>> dependency on the hardware or on the configuration.  But, again, my
>>> primary question is: have you measured how *your patch* really
>>> provides the improvement?  If yes, please show the numbers in the
>>> patch description.
>> As you requested, I have now performed such testing.
>>
>> Results:
>>
>> Seq - idle: 5.0 ms
>>
>> Seq - hackbench: 1.3 s (yes, above one second)
>>
>> Raw + framing - idle: 2.8 ms
>>
>> Raw + framing - hackbench: 2.8 ms
>>
>> Setup / test description:
>>
>> I had an external midi sequencer connected through USB. The system
>> under test was a Celeron N3150 with internal graphics. The sequencer
>> was set to generate note on/note off commands exactly 10 times per
>> second.
>>
>> For the seq tests I used "arecordmidi" and analyzed the delta values
>> of resulting midi file. For the raw + framing tests I used a home-made
>> application to write a midi file. The monotonic clock option was used
>> to rule out differences between monotonic and monotonic_raw. The
>> result shown above is the maximum amount of delta value, converted to
>> milliseconds, minus the expected 100 ms between notes. Each test was
>> run for a minute or two.
>>
>> For the "idle" test, the machine was idle (running a normal desktop),
>> and for the "hackbench" test, "chrt -r 10 hackbench" was run a few
>> times in parallel with the midi recording application (which was run
>> with "chrt -r 15").
>>
>> I also tried a few other stress tests but hackbench was the one that
>> stood out as totally destroying the timestamps of seq midi. (E g,
>> running "rt-migrate-test" in parallel with "arecordmidi" gave a max
>> jitter value of 13 ms.)
>>
>> Conclusion:
>>
>> I still believe the proposed raw + framing mode is a valuable
>> improvement in the normal/idle case, but even more so because it is
>> more stable in stressed conditions. Do you agree?
> Thanks for the tests.  Yes, that's an interesting and convincing
> result.
>         
> Could you do a couple of favors in addition?

Okay, now done. Enjoy :-)

>
> 1) Check the other workqueue
>
> It's interesting to see whether the hiprio system workqueue may give a
> better latency.  A oneliner patch is like below.
>
> -- 8< --
> --- a/sound/core/rawmidi.c
> +++ b/sound/core/rawmidi.c
> @@ -1028,7 +1028,7 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
>   	}
>   	if (result > 0) {
>   		if (runtime->event)
> -			schedule_work(&runtime->event_work);
> +			queue_work(system_highpri_wq, &runtime->event_work);
>   		else if (__snd_rawmidi_ready(runtime))
>   			wake_up(&runtime->sleep);
>   	}
> -- 8< --

Result: idle: 5.0 ms

hackbench > 1 s

I e, same as original.


>
> Also, system_unbound_wq can be another interesting test case instead
> of system_highpri_wq.
>
> 2) Direct sequencer event process
>
> If a chance of workqueue doesn't give significant improvement, we
> might need to check the direct invocation of the sequencer
> dispatcher.  A totally untested patch is like below.
>
> -- 8< --
> --- a/sound/core/rawmidi.c
> +++ b/sound/core/rawmidi.c
> @@ -979,6 +979,7 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
>   	unsigned long flags;
>   	int result = 0, count1;
>   	struct snd_rawmidi_runtime *runtime = substream->runtime;
> +	bool call_event = false;
>   
>   	if (!substream->opened)
>   		return -EBADFD;
> @@ -1028,11 +1029,13 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
>   	}
>   	if (result > 0) {
>   		if (runtime->event)
> -			schedule_work(&runtime->event_work);
> +			call_event = true;
>   		else if (__snd_rawmidi_ready(runtime))
>   			wake_up(&runtime->sleep);
>   	}
>   	spin_unlock_irqrestore(&runtime->lock, flags);
> +	if (call_event)
> +		runtime->event(runtime->substream);
>   	return result;
>   }
>   EXPORT_SYMBOL(snd_rawmidi_receive);
>
> -- 8< --

Result:

Idle: 3.0 ms

Hackbench still > 1s.

The reason that this is 3.0 and not 2.8 is probably during to some 
rounding to whole ms somewhere in either seq or arecordmidi - I'd say 
this is likely the same 2.8 ms as we see from the rawmidi+framing test.

> In theory, this should bring to the same level of latency as the
> rawmidi timestamping.  Of course, this doesn't mean we can go straight
> to this way, but it's some material for consideration.

I don't know why the hackbench test is not improved here. But you seem 
to have changed seq from tasklet to workqueue in 2011 (commit 
b3c705aa9e9), presumably for some relevant reason, like a need to sleep 
in the seq code...?

// David
Takashi Iwai April 12, 2021, 4:03 p.m. UTC | #16
On Sat, 10 Apr 2021 13:41:38 +0200,
David Henningsson wrote:
> 
> 
> On 2021-04-06 14:01, Takashi Iwai wrote:
> > On Mon, 05 Apr 2021 14:13:27 +0200,
> > David Henningsson wrote:
> >>
> >> On 2021-03-31 09:40, Takashi Iwai wrote:
> >>> On Tue, 30 Mar 2021 21:35:11 +0200,
> >>> David Henningsson wrote:
> >>>> Well, I believe that rawmidi provides less jitter than seq is not a
> >>>> theoretical problem but a known fact (see e g [1]), so I haven't tried
> >>>> to "prove" it myself. And I cannot read your mind well enough to know
> >>>> what you would consider a sufficient proof - are you expecting to see
> >>>> differences on a default or RT kernel, on a Threadripper or a
> >>>> Beaglebone, idle system or while running RT torture tests? Etc.
> >>> There is certainly difference, and it might be interesting to see the
> >>> dependency on the hardware or on the configuration.  But, again, my
> >>> primary question is: have you measured how *your patch* really
> >>> provides the improvement?  If yes, please show the numbers in the
> >>> patch description.
> >> As you requested, I have now performed such testing.
> >>
> >> Results:
> >>
> >> Seq - idle: 5.0 ms
> >>
> >> Seq - hackbench: 1.3 s (yes, above one second)
> >>
> >> Raw + framing - idle: 2.8 ms
> >>
> >> Raw + framing - hackbench: 2.8 ms
> >>
> >> Setup / test description:
> >>
> >> I had an external midi sequencer connected through USB. The system
> >> under test was a Celeron N3150 with internal graphics. The sequencer
> >> was set to generate note on/note off commands exactly 10 times per
> >> second.
> >>
> >> For the seq tests I used "arecordmidi" and analyzed the delta values
> >> of resulting midi file. For the raw + framing tests I used a home-made
> >> application to write a midi file. The monotonic clock option was used
> >> to rule out differences between monotonic and monotonic_raw. The
> >> result shown above is the maximum amount of delta value, converted to
> >> milliseconds, minus the expected 100 ms between notes. Each test was
> >> run for a minute or two.
> >>
> >> For the "idle" test, the machine was idle (running a normal desktop),
> >> and for the "hackbench" test, "chrt -r 10 hackbench" was run a few
> >> times in parallel with the midi recording application (which was run
> >> with "chrt -r 15").
> >>
> >> I also tried a few other stress tests but hackbench was the one that
> >> stood out as totally destroying the timestamps of seq midi. (E g,
> >> running "rt-migrate-test" in parallel with "arecordmidi" gave a max
> >> jitter value of 13 ms.)
> >>
> >> Conclusion:
> >>
> >> I still believe the proposed raw + framing mode is a valuable
> >> improvement in the normal/idle case, but even more so because it is
> >> more stable in stressed conditions. Do you agree?
> > Thanks for the tests.  Yes, that's an interesting and convincing
> > result.
> >         Could you do a couple of favors in addition?
> 
> Okay, now done. Enjoy :-)
> 
> >
> > 1) Check the other workqueue
> >
> > It's interesting to see whether the hiprio system workqueue may give a
> > better latency.  A oneliner patch is like below.
> >
> > -- 8< --
> > --- a/sound/core/rawmidi.c
> > +++ b/sound/core/rawmidi.c
> > @@ -1028,7 +1028,7 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
> >   	}
> >   	if (result > 0) {
> >   		if (runtime->event)
> > -			schedule_work(&runtime->event_work);
> > +			queue_work(system_highpri_wq, &runtime->event_work);
> >   		else if (__snd_rawmidi_ready(runtime))
> >   			wake_up(&runtime->sleep);
> >   	}
> > -- 8< --
> 
> Result: idle: 5.0 ms
> 
> hackbench > 1 s
> 
> I e, same as original.

Hm, that's disappointing.  I hoped that the influence of the workqueue
would be visible, but apparently not.

But more concern is about the result with the second patch.

> >
> > Also, system_unbound_wq can be another interesting test case instead
> > of system_highpri_wq.
> >
> > 2) Direct sequencer event process
> >
> > If a chance of workqueue doesn't give significant improvement, we
> > might need to check the direct invocation of the sequencer
> > dispatcher.  A totally untested patch is like below.
> >
> > -- 8< --
> > --- a/sound/core/rawmidi.c
> > +++ b/sound/core/rawmidi.c
> > @@ -979,6 +979,7 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
> >   	unsigned long flags;
> >   	int result = 0, count1;
> >   	struct snd_rawmidi_runtime *runtime = substream->runtime;
> > +	bool call_event = false;
> >     	if (!substream->opened)
> >   		return -EBADFD;
> > @@ -1028,11 +1029,13 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
> >   	}
> >   	if (result > 0) {
> >   		if (runtime->event)
> > -			schedule_work(&runtime->event_work);
> > +			call_event = true;
> >   		else if (__snd_rawmidi_ready(runtime))
> >   			wake_up(&runtime->sleep);
> >   	}
> >   	spin_unlock_irqrestore(&runtime->lock, flags);
> > +	if (call_event)
> > +		runtime->event(runtime->substream);
> >   	return result;
> >   }
> >   EXPORT_SYMBOL(snd_rawmidi_receive);
> >
> > -- 8< --
> 
> Result:
> 
> Idle: 3.0 ms
> 
> Hackbench still > 1s.
> 
> The reason that this is 3.0 and not 2.8 is probably during to some
> rounding to whole ms somewhere in either seq or arecordmidi - I'd say
> this is likely the same 2.8 ms as we see from the rawmidi+framing
> test.
> 
> > In theory, this should bring to the same level of latency as the
> > rawmidi timestamping.  Of course, this doesn't mean we can go straight
> > to this way, but it's some material for consideration.
> 
> I don't know why the hackbench test is not improved here. But you seem
> to have changed seq from tasklet to workqueue in 2011 (commit
> b3c705aa9e9), presumably for some relevant reason, like a need to
> sleep in the seq code...?

No, the commit you mentioned didn't change the behavior.  It's a
simple replacement from a tasklet to a work (at least for the input
direction).  So, the irq handler processes and delivers the event
directly, and since it's an irq context, there can be no sleep.
At most, it's a spinlock and preemption, but I don't think this could
give such a long delay inside the irq handler.

Might it be something else; e.g. the timestamp gets updated later
again in a different place.  In anyway, this needs more
investigation, apart from the merge of the rawmidi frame mode
support.


thanks,

Takashi
diff mbox series

Patch

diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index 334842daa904..4ba5d2deec18 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -81,6 +81,7 @@  struct snd_rawmidi_substream {
 	bool opened;			/* open flag */
 	bool append;			/* append flag (merge more streams) */
 	bool active_sensing;		/* send active sensing when close */
+	u8 framing;			/* whether to frame data (for input) */
 	int use_count;			/* use counter (for output) */
 	size_t bytes;
 	struct snd_rawmidi *rmidi;
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 535a7229e1d9..f33076755025 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -736,12 +736,28 @@  struct snd_rawmidi_info {
 	unsigned char reserved[64];	/* reserved for future use */
 };
 
+enum {
+	SNDRV_RAWMIDI_FRAMING_NONE = 0,
+	SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW,
+	SNDRV_RAWMIDI_FRAMING_LAST = SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW,
+};
+
+#define SND_RAWMIDI_FRAMING_DATA_LENGTH 7
+
+struct snd_rawmidi_framing_tstamp {
+	unsigned int tv_sec;	/* seconds */
+	unsigned int tv_nsec;	/* nanoseconds */
+	unsigned char length;
+	unsigned char data[SND_RAWMIDI_FRAMING_DATA_LENGTH];
+};
+
 struct snd_rawmidi_params {
 	int stream;
 	size_t buffer_size;		/* queue size in bytes */
 	size_t avail_min;		/* minimum avail bytes for wakeup */
 	unsigned int no_active_sensing: 1; /* do not send active sensing byte in close() */
-	unsigned char reserved[16];	/* reserved for future use */
+	unsigned char framing;		/* For input data only, frame incoming data */
+	unsigned char reserved[15];	/* reserved for future use */
 };
 
 #ifndef __KERNEL__
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index aca00af93afe..cd927ba178a6 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -721,6 +721,7 @@  int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
 			     struct snd_rawmidi_params *params)
 {
 	snd_rawmidi_drain_input(substream);
+	substream->framing = params->framing;
 	return resize_runtime_buffer(substream->runtime, params, true);
 }
 EXPORT_SYMBOL(snd_rawmidi_input_params);
@@ -963,6 +964,44 @@  static int snd_rawmidi_control_ioctl(struct snd_card *card,
 	return -ENOIOCTLCMD;
 }
 
+static int receive_with_tstamp_framing(struct snd_rawmidi_substream *substream,
+			const unsigned char *buffer, int src_count, struct timespec64 *tstamp)
+{
+	struct snd_rawmidi_runtime *runtime = substream->runtime;
+	struct snd_rawmidi_framing_tstamp frame;
+	struct snd_rawmidi_framing_tstamp *dest_ptr;
+	int dest_frames = 0;
+	int frame_size = sizeof(struct snd_rawmidi_framing_tstamp);
+
+	frame.tv_sec = tstamp->tv_sec;
+	frame.tv_nsec = tstamp->tv_nsec;
+	if (snd_BUG_ON(runtime->hw_ptr & 15 || runtime->buffer_size & 15 || frame_size != 16))
+		return -EINVAL;
+
+	while (src_count > 0) {
+		if ((int)(runtime->buffer_size - runtime->avail) < frame_size) {
+			runtime->xruns += src_count;
+			return dest_frames * frame_size;
+		}
+		if (src_count >= SND_RAWMIDI_FRAMING_DATA_LENGTH)
+			frame.length = SND_RAWMIDI_FRAMING_DATA_LENGTH;
+		else {
+			frame.length = src_count;
+			memset(frame.data, 0, SND_RAWMIDI_FRAMING_DATA_LENGTH);
+		}
+		memcpy(frame.data, buffer, frame.length);
+		buffer += frame.length;
+		src_count -= frame.length;
+		dest_ptr = (struct snd_rawmidi_framing_tstamp *) (runtime->buffer + runtime->hw_ptr);
+		*dest_ptr = frame;
+		runtime->avail += frame_size;
+		runtime->hw_ptr += frame_size;
+		runtime->hw_ptr %= runtime->buffer_size;
+		dest_frames++;
+	}
+	return dest_frames * frame_size;
+}
+
 /**
  * snd_rawmidi_receive - receive the input data from the device
  * @substream: the rawmidi substream
@@ -977,6 +1016,7 @@  int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
 			const unsigned char *buffer, int count)
 {
 	unsigned long flags;
+	struct timespec64 ts64;
 	int result = 0, count1;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
 
@@ -988,7 +1028,10 @@  int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
 		return -EINVAL;
 	}
 	spin_lock_irqsave(&runtime->lock, flags);
-	if (count == 1) {	/* special case, faster code */
+	if (substream->framing == SNDRV_RAWMIDI_FRAMING_TSTAMP_MONOTONIC_RAW) {
+		ktime_get_raw_ts64(&ts64);
+		result = receive_with_tstamp_framing(substream, buffer, count, &ts64);
+	} else if (count == 1) {	/* special case, faster code */
 		substream->bytes++;
 		if (runtime->avail < runtime->buffer_size) {
 			runtime->buffer[runtime->hw_ptr++] = buffer[0];