diff mbox series

[v5,08/20] scripts/qapi/parser.py: improve doc comment indent handling

Message ID 20200810195019.25427-9-peter.maydell@linaro.org
State Accepted
Headers show
Series Convert QAPI doc comments to generate rST instead of texinfo | expand

Commit Message

Peter Maydell Aug. 10, 2020, 7:50 p.m. UTC
Make the handling of indentation in doc comments more sophisticated,
so that when we see a section like:

Notes: some text
       some more text
          indented line 3

we save it for the doc-comment processing code as:

some text
some more text
   indented line 3

and when we see a section with the heading on its own line:

Notes:

some text
some more text
   indented text

we also accept that and save it in the same form.

The exception is that we always retain indentation as-is for Examples
sections, because these are literal text.

If we detect that the comment document text is not indented as much
as we expect it to be, we throw a parse error.  (We don't complain
about over-indented sections, because for rST this can be legitimate
markup.)

The golden reference for the doc comment text is updated to remove
the two 'wrong' indents; these now form a test case that we correctly
stripped leading whitespace from an indented multi-line argument
definition.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

---
v1->v2: Update doc-good.out as per final para.
---
 scripts/qapi/parser.py         | 81 +++++++++++++++++++++++++++-------
 tests/qapi-schema/doc-good.out |  4 +-
 2 files changed, 67 insertions(+), 18 deletions(-)

-- 
2.20.1

Comments

Markus Armbruster Sept. 4, 2020, 9:03 a.m. UTC | #1
Peter Maydell <peter.maydell@linaro.org> writes:

> Make the handling of indentation in doc comments more sophisticated,

> so that when we see a section like:

>

> Notes: some text

>        some more text

>           indented line 3

>

> we save it for the doc-comment processing code as:

>

> some text

> some more text

>    indented line 3

>

> and when we see a section with the heading on its own line:

>

> Notes:

>

> some text

> some more text

>    indented text

>

> we also accept that and save it in the same form.

>

> The exception is that we always retain indentation as-is for Examples

> sections, because these are literal text.


Does docs/devel/qapi-code-gen.txt need an update?  Hmm, looks like you
leave it to [PATCH 15] docs/devel/qapi-code-gen.txt: Update to new rST
backend conventions.  Acceptable.  Mentioning it in the commit message
now may make sense.

> If we detect that the comment document text is not indented as much

> as we expect it to be, we throw a parse error.  (We don't complain

> about over-indented sections, because for rST this can be legitimate

> markup.)

>

> The golden reference for the doc comment text is updated to remove

> the two 'wrong' indents; these now form a test case that we correctly

> stripped leading whitespace from an indented multi-line argument

> definition.

>

> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

> ---

> v1->v2: Update doc-good.out as per final para.

> ---

>  scripts/qapi/parser.py         | 81 +++++++++++++++++++++++++++-------

>  tests/qapi-schema/doc-good.out |  4 +-

>  2 files changed, 67 insertions(+), 18 deletions(-)

>

> diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py

> index 7fae4478d34..d9f11eadd96 100644

> --- a/scripts/qapi/parser.py

> +++ b/scripts/qapi/parser.py

> @@ -308,18 +308,32 @@ class QAPIDoc:

>      """

>  

>      class Section:

> -        def __init__(self, name=None):

> +        def __init__(self, parser, name=None, indent=0):

> +            # parser, for error messages about indentation

> +            self._parser = parser

>              # optional section name (argument/member or section name)

>              self.name = name

>              # the list of lines for this section

>              self.text = ''

> +            # the expected indent level of the text of this section

> +            self._indent = indent

>  

>          def append(self, line):

> +            # Strip leading spaces corresponding to the expected indent level

> +            # Blank lines are always OK.

> +            if line:

> +                spacecount = len(line) - len(line.lstrip(" "))


Works, but I'd prefer

                   indent = re.match(r'\s*', line).end()

> +                if spacecount > self._indent:

> +                    spacecount = self._indent

> +                if spacecount < self._indent:

> +                    raise QAPIParseError(self._parser, "unexpected de-indent")


New error needs test coverage.  I append a possible test.

Reporting the expected indentation might be helpful.

> +                line = line[spacecount:]


If you use self._indent instead of spacecount here (which I find
clearer), you don't need to cap spacecount at self._indent above.

> +

>              self.text += line.rstrip() + '\n'

>  

>      class ArgSection(Section):

> -        def __init__(self, name):

> -            super().__init__(name)

> +        def __init__(self, parser, name, indent=0):

> +            super().__init__(parser, name, indent)

>              self.member = None

>  

>          def connect(self, member):

> @@ -333,7 +347,7 @@ class QAPIDoc:

>          self._parser = parser

>          self.info = info

>          self.symbol = None

> -        self.body = QAPIDoc.Section()

> +        self.body = QAPIDoc.Section(parser)

>          # dict mapping parameter name to ArgSection

>          self.args = OrderedDict()

>          self.features = OrderedDict()

> @@ -438,7 +452,18 @@ class QAPIDoc:

>  

>          if name.startswith('@') and name.endswith(':'):

>              line = line[len(name)+1:]

> -            self._start_args_section(name[1:-1])

> +            if not line or line.isspace():

> +                # Line was just the "@arg:" header; following lines

> +                # are not indented

> +                indent = 0

> +                line = ''

> +            else:

> +                # Line is "@arg: first line of description"; following

> +                # lines should be indented by len(name) + 1, and we

> +                # pad out this first line so it is handled the same way

> +                indent = len(name) + 1

> +                line = ' ' * indent + line

> +            self._start_args_section(name[1:-1], indent)

>          elif self._is_section_tag(name):

>              self._append_line = self._append_various_line

>              self._append_various_line(line)

> @@ -460,7 +485,17 @@ class QAPIDoc:

>  

>          if name.startswith('@') and name.endswith(':'):

>              line = line[len(name)+1:]

> -            self._start_features_section(name[1:-1])

> +            if not line or line.isspace():

> +                # Line is just the "@name:" header, no ident for following lines


pycodestyle complains:
scripts/qapi/parser.py:489:80: E501 line too long (80 > 79 characters)

> +                indent = 0

> +                line = ''

> +            else:

> +                # Line is "@arg: first line of description"; following

> +                # lines should be indented by len(name) + 3, and we

> +                # pad out this first line so it is handled the same way

> +                indent = len(name) + 1


Comment claims + 3, code uses + 1.

Does this do the right thing when @arg: is followed by multiple
whitespace characters?

> +                line = ' ' * indent + line

> +            self._start_features_section(name[1:-1], indent)

>          elif self._is_section_tag(name):

>              self._append_line = self._append_various_line

>              self._append_various_line(line)

> @@ -493,11 +528,23 @@ class QAPIDoc:

>                                   % (name, self.sections[0].name))

>          if self._is_section_tag(name):

>              line = line[len(name)+1:]

> -            self._start_section(name[:-1])

> +            if not line or line.isspace():

> +                # Line is just "SectionName:", no indent for following lines

> +                indent = 0

> +                line = ''

> +            elif name.startswith("Example"):

> +                # The "Examples" section is literal-text, so preserve

> +                # all the indentation as-is

> +                indent = 0


Section "Example" is an exception.  Needs to be documented.  Do we
really need the exception?  As far as I can see, it's only ever used in
documentation of block-latency-histogram-set.

> +            else:

> +                # Line is "SectionName: some text", indent required


Same situation as above, much terser comment.

> +                indent = len(name) + 1

> +                line = ' ' * indent + line

> +            self._start_section(name[:-1], indent)

>  

>          self._append_freeform(line)

>  

> -    def _start_symbol_section(self, symbols_dict, name):

> +    def _start_symbol_section(self, symbols_dict, name, indent):

>          # FIXME invalid names other than the empty string aren't flagged

>          if not name:

>              raise QAPIParseError(self._parser, "invalid parameter name")

> @@ -506,21 +553,21 @@ class QAPIDoc:

>                                   "'%s' parameter name duplicated" % name)

>          assert not self.sections

>          self._end_section()

> -        self._section = QAPIDoc.ArgSection(name)

> +        self._section = QAPIDoc.ArgSection(self._parser, name, indent)

>          symbols_dict[name] = self._section

>  

> -    def _start_args_section(self, name):

> -        self._start_symbol_section(self.args, name)

> +    def _start_args_section(self, name, indent):

> +        self._start_symbol_section(self.args, name, indent)

>  

> -    def _start_features_section(self, name):

> -        self._start_symbol_section(self.features, name)

> +    def _start_features_section(self, name, indent):

> +        self._start_symbol_section(self.features, name, indent)

>  

> -    def _start_section(self, name=None):

> +    def _start_section(self, name=None, indent=0):

>          if name in ('Returns', 'Since') and self.has_section(name):

>              raise QAPIParseError(self._parser,

>                                   "duplicated '%s' section" % name)

>          self._end_section()

> -        self._section = QAPIDoc.Section(name)

> +        self._section = QAPIDoc.Section(self._parser, name, indent)

>          self.sections.append(self._section)

>  

>      def _end_section(self):

> @@ -543,7 +590,7 @@ class QAPIDoc:

>      def connect_member(self, member):

>          if member.name not in self.args:

>              # Undocumented TODO outlaw

> -            self.args[member.name] = QAPIDoc.ArgSection(member.name)

> +            self.args[member.name] = QAPIDoc.ArgSection(self._parser, member.name)


pycodestyle complains:
scripts/qapi/parser.py:593:80: E501 line too long (82 > 79 characters)


>          self.args[member.name].connect(member)

>  

>      def connect_feature(self, feature):

> @@ -551,6 +598,8 @@ class QAPIDoc:

>              raise QAPISemError(feature.info,

>                                 "feature '%s' lacks documentation"

>                                 % feature.name)

> +            self.features[feature.name] = QAPIDoc.ArgSection(self._parser,

> +                                                             feature.name)


pylint points out:
scripts/qapi/parser.py:601:12: W0101: Unreachable code (unreachable)

>          self.features[feature.name].connect(feature)

>  

>      def check_expr(self, expr):

> diff --git a/tests/qapi-schema/doc-good.out b/tests/qapi-schema/doc-good.out

> index 0ef85d959ac..bbf77b08dc3 100644

> --- a/tests/qapi-schema/doc-good.out

> +++ b/tests/qapi-schema/doc-good.out

> @@ -158,7 +158,7 @@ doc symbol=Alternate

>  

>      arg=i

>  an integer

> -    @b is undocumented

> +@b is undocumented

>      arg=b

>  

>      feature=alt-feat

> @@ -173,7 +173,7 @@ doc symbol=cmd

>  the first argument

>      arg=arg2

>  the second

> -       argument

> +argument

>      arg=arg3

>  

>      feature=cmd-feat1



Suggested new test doc-bad-deintent.json, cribbed from your PATCH 06 of
doc-good.json:

##
# @Alternate:
# @i: an integer
# @b is undocumented
##
{ 'alternate': 'Alternate',
  'data': { 'i': 'int', 'b': 'bool' } }
Peter Maydell Sept. 21, 2020, 3:06 p.m. UTC | #2
On Fri, 4 Sep 2020 at 10:03, Markus Armbruster <armbru@redhat.com> wrote:
>
> Peter Maydell <peter.maydell@linaro.org> writes:
>
> > Make the handling of indentation in doc comments more sophisticated,

> >          def append(self, line):
> > +            # Strip leading spaces corresponding to the expected indent level
> > +            # Blank lines are always OK.
> > +            if line:
> > +                spacecount = len(line) - len(line.lstrip(" "))
>
> Works, but I'd prefer
>
>                    indent = re.match(r'\s*', line).end()

OK.

> > +                if spacecount > self._indent:
> > +                    spacecount = self._indent
> > +                if spacecount < self._indent:
> > +                    raise QAPIParseError(self._parser, "unexpected de-indent")
>
> New error needs test coverage.  I append a possible test.
>
> Reporting the expected indentation might be helpful.

Fixed; new message produces reports like:
doc-bad-indent.json:6:1: unexpected de-indent (expected at least 4 spaces)

(I have not special-cased "1 spaces" -> "1 space"...)

> > +                line = line[spacecount:]
>
> If you use self._indent instead of spacecount here (which I find
> clearer), you don't need to cap spacecount at self._indent above.

Fixed.

> > +

> > @@ -460,7 +485,17 @@ class QAPIDoc:
> >
> >          if name.startswith('@') and name.endswith(':'):
> >              line = line[len(name)+1:]
> > -            self._start_features_section(name[1:-1])
> > +            if not line or line.isspace():
> > +                # Line is just the "@name:" header, no ident for following lines
>
> pycodestyle complains:
> scripts/qapi/parser.py:489:80: E501 line too long (80 > 79 characters)

Fixed.

> > +                indent = 0
> > +                line = ''
> > +            else:
> > +                # Line is "@arg: first line of description"; following
> > +                # lines should be indented by len(name) + 3, and we
> > +                # pad out this first line so it is handled the same way
> > +                indent = len(name) + 1
>
> Comment claims + 3, code uses + 1.

Yeah. This is because at this point 'name' is not actually just the
name "arg" but includes the leading '@' and trailing ':' so I got
confused between "we want the length of the name ("arg") plus 3"
and the expression you need to actually use. I got this right in the
comment in _append_args_line() but not in _append_features_line().
Will clarify (in both functions) to:

                # Line is "@arg: first line of description"; since 'name'
                # at this point is "@arg:" any following lines should be
                # indented by len(name) + 1. We pad out this first line
                # so it is handled the same way.

> Does this do the right thing when @arg: is followed by multiple
> whitespace characters?

The assumption is that if you added extra whitespace characters that's
because you wanted to specify a line of rST which starts with leading
spaces. So the handling here is that if you say

@foo:   bar
      baz

it's because you want the rST to be

  bar
baz

If this turns out to be invalid rST then the rST parser will
find that out later on.

As it happens I'm not sure whether there is any useful rST
syntax which has leading spaces and where you'd want to be able
to start an argument docstring with it, but it means we're
consistent with our handling of free-form doc comments, where
writing

   Foo
   bar

and writing

Foo
bar

are different things. Also with the change you suggest later
to avoid special-casing the "Examples" section then literal
text becomes an example of where it makes a difference.

> > +                line = ' ' * indent + line
> > +            self._start_features_section(name[1:-1], indent)
> >          elif self._is_section_tag(name):
> >              self._append_line = self._append_various_line
> >              self._append_various_line(line)
> > @@ -493,11 +528,23 @@ class QAPIDoc:
> >                                   % (name, self.sections[0].name))
> >          if self._is_section_tag(name):
> >              line = line[len(name)+1:]
> > -            self._start_section(name[:-1])
> > +            if not line or line.isspace():
> > +                # Line is just "SectionName:", no indent for following lines
> > +                indent = 0
> > +                line = ''
> > +            elif name.startswith("Example"):
> > +                # The "Examples" section is literal-text, so preserve
> > +                # all the indentation as-is
> > +                indent = 0
>
> Section "Example" is an exception.  Needs to be documented. Do we
> really need the exception?  As far as I can see, it's only ever used in
> documentation of block-latency-histogram-set.

Hmm, so you'd rather we changed the documentation of that
command so that instead of

# Example: remove all latency histograms:
#
# -> { "execute": "block-latency-histogram-set",
#      "arguments": { "id": "drive0" } }
# <- { "return": {} }

it would be

# Example:
# remove all latency histograms:
#
# -> { "execute": "block-latency-histogram-set",
#      "arguments": { "id": "drive0" } }
# <- { "return": {} }

and remove the special-case for "Example" so that if you did
write

Example: something on the same line
         more stuff here

it would be treated as literal text

something on the same line
more stuff here

?

Seems reasonable. (I think I put this special case in only
because I was trying to avoid changes to the existing doc
comments if it was easy to accommodate them in the parser.)
That command does seem to be the only outlier, so I've added
a patch to v6 which will fix up its documentation comment
and dropped the special casing.

> > +            else:
> > +                # Line is "SectionName: some text", indent required
>
> Same situation as above, much terser comment.

Fixed to use the expanded comment from earlier.

> > +                indent = len(name) + 1
> > +                line = ' ' * indent + line
> > +            self._start_section(name[:-1], indent)
> >
> >          self._append_freeform(line)

> > @@ -543,7 +590,7 @@ class QAPIDoc:
> >      def connect_member(self, member):
> >          if member.name not in self.args:
> >              # Undocumented TODO outlaw
> > -            self.args[member.name] = QAPIDoc.ArgSection(member.name)
> > +            self.args[member.name] = QAPIDoc.ArgSection(self._parser, member.name)
>
> pycodestyle complains:
> scripts/qapi/parser.py:593:80: E501 line too long (82 > 79 characters)

Fixed.

> >          self.args[member.name].connect(member)
> >
> >      def connect_feature(self, feature):
> > @@ -551,6 +598,8 @@ class QAPIDoc:
> >              raise QAPISemError(feature.info,
> >                                 "feature '%s' lacks documentation"
> >                                 % feature.name)
> > +            self.features[feature.name] = QAPIDoc.ArgSection(self._parser,
> > +                                                             feature.name)
>
> pylint points out:
> scripts/qapi/parser.py:601:12: W0101: Unreachable code (unreachable)
>

Yeah; this part of the patch used to be a "just update all the
callsites of QAPIDoc.ArgSection() to pass the extra argument"
hunk. It looks like your commit 8ec0e1a4e68781 removed this
callsite entirely as dead code, but I missed that in the rebase
and accidentally reintroduced the dead code. Fixed.

> Suggested new test doc-bad-deintent.json, cribbed from your PATCH 06 of
> doc-good.json:
>
> ##
> # @Alternate:
> # @i: an integer
> # @b is undocumented
> ##
> { 'alternate': 'Alternate',
>   'data': { 'i': 'int', 'b': 'bool' } }

The '@' at the front of the second line here is not relevant to
the mis-indentation and it's kind of confusing (as the correct
fix is "add a colon", not "reindent the line"), so I think I'd
rather have a test that's clearly looking at the indent:

# Multiline doc comments should have consistent indentation

##
# @foo:
# @a: line one
# line two is wrongly indented
##
{ 'command': 'foo', 'data': { 'a': 'int' } }

which expects the error:

doc-bad-indent.json:6:1: unexpected de-indent (expected at least 4 spaces)

thanks
-- PMM
Markus Armbruster Sept. 22, 2020, 7:27 a.m. UTC | #3
Peter Maydell <peter.maydell@linaro.org> writes:

> On Fri, 4 Sep 2020 at 10:03, Markus Armbruster <armbru@redhat.com> wrote:
>>
>> Peter Maydell <peter.maydell@linaro.org> writes:
>>
>> > Make the handling of indentation in doc comments more sophisticated,
>
>> >          def append(self, line):
>> > +            # Strip leading spaces corresponding to the expected indent level
>> > +            # Blank lines are always OK.
>> > +            if line:
>> > +                spacecount = len(line) - len(line.lstrip(" "))
>>
>> Works, but I'd prefer
>>
>>                    indent = re.match(r'\s*', line).end()
>
> OK.
>
>> > +                if spacecount > self._indent:
>> > +                    spacecount = self._indent
>> > +                if spacecount < self._indent:
>> > +                    raise QAPIParseError(self._parser, "unexpected de-indent")
>>
>> New error needs test coverage.  I append a possible test.
>>
>> Reporting the expected indentation might be helpful.
>
> Fixed; new message produces reports like:
> doc-bad-indent.json:6:1: unexpected de-indent (expected at least 4 spaces)
>
> (I have not special-cased "1 spaces" -> "1 space"...)
>
>> > +                line = line[spacecount:]
>>
>> If you use self._indent instead of spacecount here (which I find
>> clearer), you don't need to cap spacecount at self._indent above.
>
> Fixed.
>
>> > +
>
>> > @@ -460,7 +485,17 @@ class QAPIDoc:
>> >
>> >          if name.startswith('@') and name.endswith(':'):
>> >              line = line[len(name)+1:]
>> > -            self._start_features_section(name[1:-1])
>> > +            if not line or line.isspace():
>> > +                # Line is just the "@name:" header, no ident for following lines
>>
>> pycodestyle complains:
>> scripts/qapi/parser.py:489:80: E501 line too long (80 > 79 characters)
>
> Fixed.
>
>> > +                indent = 0
>> > +                line = ''
>> > +            else:
>> > +                # Line is "@arg: first line of description"; following
>> > +                # lines should be indented by len(name) + 3, and we
>> > +                # pad out this first line so it is handled the same way
>> > +                indent = len(name) + 1
>>
>> Comment claims + 3, code uses + 1.
>
> Yeah. This is because at this point 'name' is not actually just the
> name "arg" but includes the leading '@' and trailing ':' so I got
> confused between "we want the length of the name ("arg") plus 3"
> and the expression you need to actually use. I got this right in the
> comment in _append_args_line() but not in _append_features_line().
> Will clarify (in both functions) to:
>
>                 # Line is "@arg: first line of description"; since 'name'
>                 # at this point is "@arg:" any following lines should be
>                 # indented by len(name) + 1. We pad out this first line
>                 # so it is handled the same way.
>
>> Does this do the right thing when @arg: is followed by multiple
>> whitespace characters?
>
> The assumption is that if you added extra whitespace characters that's
> because you wanted to specify a line of rST which starts with leading
> spaces. So the handling here is that if you say
>
> @foo:   bar
>       baz
>
> it's because you want the rST to be
>
>   bar
> baz
>
> If this turns out to be invalid rST then the rST parser will
> find that out later on.

In general, I'm wary of making the amount of whitespace within a line
significant, but in this case, the visual misalignment of bar and baz
should make accidents unlikely.

How does

  @foo:  bar
         baz
  @frob: gnu
         gnat

behave?

This is something people may actually write.

> As it happens I'm not sure whether there is any useful rST
> syntax which has leading spaces and where you'd want to be able
> to start an argument docstring with it, but it means we're
> consistent with our handling of free-form doc comments, where
> writing
>
>    Foo
>    bar
>
> and writing
>
> Foo
> bar
>
> are different things. Also with the change you suggest later
> to avoid special-casing the "Examples" section then literal
> text becomes an example of where it makes a difference.

Valid points.

>> > +                line = ' ' * indent + line
>> > +            self._start_features_section(name[1:-1], indent)
>> >          elif self._is_section_tag(name):
>> >              self._append_line = self._append_various_line
>> >              self._append_various_line(line)
>> > @@ -493,11 +528,23 @@ class QAPIDoc:
>> >                                   % (name, self.sections[0].name))
>> >          if self._is_section_tag(name):
>> >              line = line[len(name)+1:]
>> > -            self._start_section(name[:-1])
>> > +            if not line or line.isspace():
>> > +                # Line is just "SectionName:", no indent for following lines
>> > +                indent = 0
>> > +                line = ''
>> > +            elif name.startswith("Example"):
>> > +                # The "Examples" section is literal-text, so preserve
>> > +                # all the indentation as-is
>> > +                indent = 0
>>
>> Section "Example" is an exception.  Needs to be documented. Do we
>> really need the exception?  As far as I can see, it's only ever used in
>> documentation of block-latency-histogram-set.
>
> Hmm, so you'd rather we changed the documentation of that
> command so that instead of
>
> # Example: remove all latency histograms:
> #
> # -> { "execute": "block-latency-histogram-set",
> #      "arguments": { "id": "drive0" } }
> # <- { "return": {} }
>
> it would be
>
> # Example:
> # remove all latency histograms:
> #
> # -> { "execute": "block-latency-histogram-set",
> #      "arguments": { "id": "drive0" } }
> # <- { "return": {} }
>
> and remove the special-case for "Example" so that if you did
> write
>
> Example: something on the same line
>          more stuff here
>
> it would be treated as literal text
>
> something on the same line
> more stuff here
>
> ?
>
> Seems reasonable. (I think I put this special case in only
> because I was trying to avoid changes to the existing doc
> comments if it was easy to accommodate them in the parser.)
> That command does seem to be the only outlier, so I've added
> a patch to v6 which will fix up its documentation comment
> and dropped the special casing.

Sounds like a good trade.

>> > +            else:
>> > +                # Line is "SectionName: some text", indent required
>>
>> Same situation as above, much terser comment.
>
> Fixed to use the expanded comment from earlier.
>
>> > +                indent = len(name) + 1
>> > +                line = ' ' * indent + line
>> > +            self._start_section(name[:-1], indent)
>> >
>> >          self._append_freeform(line)
>
>> > @@ -543,7 +590,7 @@ class QAPIDoc:
>> >      def connect_member(self, member):
>> >          if member.name not in self.args:
>> >              # Undocumented TODO outlaw
>> > -            self.args[member.name] = QAPIDoc.ArgSection(member.name)
>> > +            self.args[member.name] = QAPIDoc.ArgSection(self._parser, member.name)
>>
>> pycodestyle complains:
>> scripts/qapi/parser.py:593:80: E501 line too long (82 > 79 characters)
>
> Fixed.
>
>> >          self.args[member.name].connect(member)
>> >
>> >      def connect_feature(self, feature):
>> > @@ -551,6 +598,8 @@ class QAPIDoc:
>> >              raise QAPISemError(feature.info,
>> >                                 "feature '%s' lacks documentation"
>> >                                 % feature.name)
>> > +            self.features[feature.name] = QAPIDoc.ArgSection(self._parser,
>> > +                                                             feature.name)
>>
>> pylint points out:
>> scripts/qapi/parser.py:601:12: W0101: Unreachable code (unreachable)
>>
>
> Yeah; this part of the patch used to be a "just update all the
> callsites of QAPIDoc.ArgSection() to pass the extra argument"
> hunk. It looks like your commit 8ec0e1a4e68781 removed this
> callsite entirely as dead code, but I missed that in the rebase
> and accidentally reintroduced the dead code. Fixed.
>
>> Suggested new test doc-bad-deintent.json, cribbed from your PATCH 06 of
>> doc-good.json:
>>
>> ##
>> # @Alternate:
>> # @i: an integer
>> # @b is undocumented
>> ##
>> { 'alternate': 'Alternate',
>>   'data': { 'i': 'int', 'b': 'bool' } }
>
> The '@' at the front of the second line here is not relevant to
> the mis-indentation and it's kind of confusing (as the correct
> fix is "add a colon", not "reindent the line"), so I think I'd
> rather have a test that's clearly looking at the indent:
>
> # Multiline doc comments should have consistent indentation
>
> ##
> # @foo:
> # @a: line one
> # line two is wrongly indented
> ##
> { 'command': 'foo', 'data': { 'a': 'int' } }
>
> which expects the error:
>
> doc-bad-indent.json:6:1: unexpected de-indent (expected at least 4 spaces)

Yes, that's better.
Peter Maydell Sept. 22, 2020, 11:48 a.m. UTC | #4
On Tue, 22 Sep 2020 at 08:27, Markus Armbruster <armbru@redhat.com> wrote:
> How does
>
>   @foo:  bar
>          baz
>   @frob: gnu
>          gnat
>
> behave?

The rST fragments would be:

 bar
 baz

gnu
gnat

So you get what rST does with that. We do actually have examples
of this in the existing QAPI doc comments. It ends up treating it
as a definition list where the term is 'bar' and the
definition is 'baz' (which I don't entirely understand, I was
expecting a block-quote). That renders sufficiently close to correct
that I hadn't noticed it.

It would be a fairly small change to determine the indent level by
looking for the first non-ws character on line 1 after the colon.
Since we have a fair amount of this style in the code and it's
as you say a natural-seeming thing to write that seems the best
thing. (If you really wanted to start the option documentation with
some rST that required an initial indent, probably because you're
writing a literal-text Examples section, then you'd need to use the
"nothing after the : on line 1, rST fragment begins on line 2 in
column 0" style. Which would be the most natural way to write
that literal text anyway.)

I guess at this point I'll potentially create work for myself
by drawing your attention to the rST syntax for field lists
and option lists:
https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#field-lists
which are kind of similar to what we're doing with @foo: stuff
markup, and which handle indentation like this:

:Hello: This field has a short field name, so aligning the field
        body with the first line is feasible.

:Number-of-African-swallows-required-to-carry-a-coconut: It would
    be very difficult to align the field body with the left edge
    of the first line. It may even be preferable not to begin the
    body on the same line as the marker.

The differences to what I have implemented in this series are:
 * indent of lines 2+ is determined by the indent of line 2, not 1
 * lines 2+ must be indented, so anything that currently uses
   "no indent, start in column 0" would need indenting. (This would
   be a lot of change to our current docs text.)
 * it doesn't say in the spec, but I guess that spaces between
   the colon and start of line 1 text are not significant.

The advantage would be a bit more consistency with rST syntax
otherwise; the disadvantage is that we have a *lot* of text
that uses the "start in column 0" format, like this:

# @QCryptoBlockOptionsBase:
#
# The common options that apply to all full disk
# encryption formats

and we'd need to reindent it all. My view is that trying to
look more like rST indent isn't sufficiently useful to be
worth having to change all that.

> This is something people may actually write.

Indeed, they have :-)

thanks
-- PMM
Markus Armbruster Sept. 22, 2020, 2:08 p.m. UTC | #5
Peter Maydell <peter.maydell@linaro.org> writes:

> On Tue, 22 Sep 2020 at 08:27, Markus Armbruster <armbru@redhat.com> wrote:
>> How does
>>
>>   @foo:  bar
>>          baz
>>   @frob: gnu
>>          gnat
>>
>> behave?
>
> The rST fragments would be:
>
>  bar
>  baz
>
> gnu
> gnat
>
> So you get what rST does with that. We do actually have examples
> of this in the existing QAPI doc comments. It ends up treating it
> as a definition list where the term is 'bar' and the
> definition is 'baz' (which I don't entirely understand, I was

The Perl of ASCII-based markups...

> expecting a block-quote). That renders sufficiently close to correct
> that I hadn't noticed it.

Understandable :)

> It would be a fairly small change to determine the indent level by
> looking for the first non-ws character on line 1 after the colon.
> Since we have a fair amount of this style in the code and it's
> as you say a natural-seeming thing to write that seems the best
> thing. (If you really wanted to start the option documentation with
> some rST that required an initial indent, probably because you're
> writing a literal-text Examples section, then you'd need to use the
> "nothing after the : on line 1, rST fragment begins on line 2 in
> column 0" style. Which would be the most natural way to write
> that literal text anyway.)

Agree.

> I guess at this point I'll potentially create work for myself
> by drawing your attention to the rST syntax for field lists
> and option lists:
> https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#field-lists
> which are kind of similar to what we're doing with @foo: stuff
> markup, and which handle indentation like this:
>
> :Hello: This field has a short field name, so aligning the field
>         body with the first line is feasible.
>
> :Number-of-African-swallows-required-to-carry-a-coconut: It would
>     be very difficult to align the field body with the left edge
>     of the first line. It may even be preferable not to begin the
>     body on the same line as the marker.
>
> The differences to what I have implemented in this series are:
>  * indent of lines 2+ is determined by the indent of line 2, not 1
>  * lines 2+ must be indented, so anything that currently uses
>    "no indent, start in column 0" would need indenting. (This would
>    be a lot of change to our current docs text.)
>  * it doesn't say in the spec, but I guess that spaces between
>    the colon and start of line 1 text are not significant.
>
> The advantage would be a bit more consistency with rST syntax
> otherwise; the disadvantage is that we have a *lot* of text
> that uses the "start in column 0" format, like this:
>
> # @QCryptoBlockOptionsBase:
> #
> # The common options that apply to all full disk
> # encryption formats
>
> and we'd need to reindent it all. My view is that trying to
> look more like rST indent isn't sufficiently useful to be
> worth having to change all that.

We use @FOO: for two distinct things:

1. Right at the beginning of a comment block, it makes the comment block
a definition doc block for symbol FOO.

2. At the beginning of an argument section, it names the argument /
member being documented.

Example:

    ##
    # @QCryptoBlockOptionsBase:                         <-- 1.
    #
    # The common options that apply to all full disk
    # encryption formats
    #
    # @format: the encryption format                    <-- 2.
    #
    # Since: 2.6
    ##

We could switch just 2. to reST field list syntax, and either keep 1. as
is, or switch it to some other reST markup that works for us.

But even if we want this, we should do it on top, to avoid complicating
and delaying this series.

>> This is something people may actually write.
>
> Indeed, they have :-)
>
> thanks
> -- PMM
Peter Maydell Sept. 22, 2020, 3:28 p.m. UTC | #6
On Fri, 4 Sep 2020 at 10:03, Markus Armbruster <armbru@redhat.com> wrote:
> Peter Maydell <peter.maydell@linaro.org> writes:
> > Make the handling of indentation in doc comments more sophisticated,
> > so that when we see a section like:
> >
> > Notes: some text
> >        some more text
> >           indented line 3
> >
> > we save it for the doc-comment processing code as:
> >
> > some text
> > some more text
> >    indented line 3
> >
> > and when we see a section with the heading on its own line:
> >
> > Notes:
> >
> > some text
> > some more text
> >    indented text
> >
> > we also accept that and save it in the same form.
> >
> > The exception is that we always retain indentation as-is for Examples
> > sections, because these are literal text.
>
> Does docs/devel/qapi-code-gen.txt need an update?  Hmm, looks like you
> leave it to [PATCH 15] docs/devel/qapi-code-gen.txt: Update to new rST
> backend conventions.  Acceptable.  Mentioning it in the commit message
> now may make sense.

I've decided to pull the bits of patch 15 which document the
new indent rules into this patch, in the optimistic hope that
if the patchseries is OK up to this point but needs another
review round for subsequent parts we might be able to commit
this patch to master and stop further inadvertent breaches
of the new indent rules being committed. (Another couple have
already appeared in master since v5, so v6 will again start
with a patch fixing them up.)

thanks
-- PMM
diff mbox series

Patch

diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 7fae4478d34..d9f11eadd96 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -308,18 +308,32 @@  class QAPIDoc:
     """
 
     class Section:
-        def __init__(self, name=None):
+        def __init__(self, parser, name=None, indent=0):
+            # parser, for error messages about indentation
+            self._parser = parser
             # optional section name (argument/member or section name)
             self.name = name
             # the list of lines for this section
             self.text = ''
+            # the expected indent level of the text of this section
+            self._indent = indent
 
         def append(self, line):
+            # Strip leading spaces corresponding to the expected indent level
+            # Blank lines are always OK.
+            if line:
+                spacecount = len(line) - len(line.lstrip(" "))
+                if spacecount > self._indent:
+                    spacecount = self._indent
+                if spacecount < self._indent:
+                    raise QAPIParseError(self._parser, "unexpected de-indent")
+                line = line[spacecount:]
+
             self.text += line.rstrip() + '\n'
 
     class ArgSection(Section):
-        def __init__(self, name):
-            super().__init__(name)
+        def __init__(self, parser, name, indent=0):
+            super().__init__(parser, name, indent)
             self.member = None
 
         def connect(self, member):
@@ -333,7 +347,7 @@  class QAPIDoc:
         self._parser = parser
         self.info = info
         self.symbol = None
-        self.body = QAPIDoc.Section()
+        self.body = QAPIDoc.Section(parser)
         # dict mapping parameter name to ArgSection
         self.args = OrderedDict()
         self.features = OrderedDict()
@@ -438,7 +452,18 @@  class QAPIDoc:
 
         if name.startswith('@') and name.endswith(':'):
             line = line[len(name)+1:]
-            self._start_args_section(name[1:-1])
+            if not line or line.isspace():
+                # Line was just the "@arg:" header; following lines
+                # are not indented
+                indent = 0
+                line = ''
+            else:
+                # Line is "@arg: first line of description"; following
+                # lines should be indented by len(name) + 1, and we
+                # pad out this first line so it is handled the same way
+                indent = len(name) + 1
+                line = ' ' * indent + line
+            self._start_args_section(name[1:-1], indent)
         elif self._is_section_tag(name):
             self._append_line = self._append_various_line
             self._append_various_line(line)
@@ -460,7 +485,17 @@  class QAPIDoc:
 
         if name.startswith('@') and name.endswith(':'):
             line = line[len(name)+1:]
-            self._start_features_section(name[1:-1])
+            if not line or line.isspace():
+                # Line is just the "@name:" header, no ident for following lines
+                indent = 0
+                line = ''
+            else:
+                # Line is "@arg: first line of description"; following
+                # lines should be indented by len(name) + 3, and we
+                # pad out this first line so it is handled the same way
+                indent = len(name) + 1
+                line = ' ' * indent + line
+            self._start_features_section(name[1:-1], indent)
         elif self._is_section_tag(name):
             self._append_line = self._append_various_line
             self._append_various_line(line)
@@ -493,11 +528,23 @@  class QAPIDoc:
                                  % (name, self.sections[0].name))
         if self._is_section_tag(name):
             line = line[len(name)+1:]
-            self._start_section(name[:-1])
+            if not line or line.isspace():
+                # Line is just "SectionName:", no indent for following lines
+                indent = 0
+                line = ''
+            elif name.startswith("Example"):
+                # The "Examples" section is literal-text, so preserve
+                # all the indentation as-is
+                indent = 0
+            else:
+                # Line is "SectionName: some text", indent required
+                indent = len(name) + 1
+                line = ' ' * indent + line
+            self._start_section(name[:-1], indent)
 
         self._append_freeform(line)
 
-    def _start_symbol_section(self, symbols_dict, name):
+    def _start_symbol_section(self, symbols_dict, name, indent):
         # FIXME invalid names other than the empty string aren't flagged
         if not name:
             raise QAPIParseError(self._parser, "invalid parameter name")
@@ -506,21 +553,21 @@  class QAPIDoc:
                                  "'%s' parameter name duplicated" % name)
         assert not self.sections
         self._end_section()
-        self._section = QAPIDoc.ArgSection(name)
+        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
         symbols_dict[name] = self._section
 
-    def _start_args_section(self, name):
-        self._start_symbol_section(self.args, name)
+    def _start_args_section(self, name, indent):
+        self._start_symbol_section(self.args, name, indent)
 
-    def _start_features_section(self, name):
-        self._start_symbol_section(self.features, name)
+    def _start_features_section(self, name, indent):
+        self._start_symbol_section(self.features, name, indent)
 
-    def _start_section(self, name=None):
+    def _start_section(self, name=None, indent=0):
         if name in ('Returns', 'Since') and self.has_section(name):
             raise QAPIParseError(self._parser,
                                  "duplicated '%s' section" % name)
         self._end_section()
-        self._section = QAPIDoc.Section(name)
+        self._section = QAPIDoc.Section(self._parser, name, indent)
         self.sections.append(self._section)
 
     def _end_section(self):
@@ -543,7 +590,7 @@  class QAPIDoc:
     def connect_member(self, member):
         if member.name not in self.args:
             # Undocumented TODO outlaw
-            self.args[member.name] = QAPIDoc.ArgSection(member.name)
+            self.args[member.name] = QAPIDoc.ArgSection(self._parser, member.name)
         self.args[member.name].connect(member)
 
     def connect_feature(self, feature):
@@ -551,6 +598,8 @@  class QAPIDoc:
             raise QAPISemError(feature.info,
                                "feature '%s' lacks documentation"
                                % feature.name)
+            self.features[feature.name] = QAPIDoc.ArgSection(self._parser,
+                                                             feature.name)
         self.features[feature.name].connect(feature)
 
     def check_expr(self, expr):
diff --git a/tests/qapi-schema/doc-good.out b/tests/qapi-schema/doc-good.out
index 0ef85d959ac..bbf77b08dc3 100644
--- a/tests/qapi-schema/doc-good.out
+++ b/tests/qapi-schema/doc-good.out
@@ -158,7 +158,7 @@  doc symbol=Alternate
 
     arg=i
 an integer
-    @b is undocumented
+@b is undocumented
     arg=b
 
     feature=alt-feat
@@ -173,7 +173,7 @@  doc symbol=cmd
 the first argument
     arg=arg2
 the second
-       argument
+argument
     arg=arg3
 
     feature=cmd-feat1