Message ID | 20201026194251.11075-6-jsnow@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | [v2,01/11,DO-NOT-MERGE] docs: replace single backtick (`) with double-backtick (``) | expand |
On Mon, Oct 26, 2020 at 03:42:45PM -0400, John Snow wrote: > The typing of _make_tree and friends is a bit involved, but it can be > done with some stubbed out types and a bit of elbow grease. The > forthcoming patches attempt to make some simplifications, but having the > type hints in advance may aid in review of subsequent patches. > > > Some notes on the abstract types used at this point, and what they > represent: > > - TreeValue represents any object in the type tree. _make_tree is an > optional call -- not every node in the final type tree will have been > passed to _make_tree, so this type encompasses not only what is passed > to _make_tree (dicts, strings) or returned from it (dicts, strings, a > 2-tuple), but any recursive value for any of the dicts passed to > _make_tree -- which includes lists, strings, integers, null constants, > and so on. > > - _DObject is a type alias I use to mean "A JSON-style object, > represented as a Python dict." There is no "JSON" type in Python, they > are converted natively to recursively nested dicts and lists, with > leaf values of str, int, float, None, True/False and so on. This type > structure is not possible to accurately portray in mypy yet, so a > placeholder is used. > > In this case, _DObject is being used to refer to SchemaInfo-like > structures as defined in qapi/introspect.json, OR any sub-object > values they may reference. We don't have strong typing available for > those, so a generic alternative is used. > > - Extra refers explicitly to the dict containing "extra" information > about a node in the tree. mypy does not offer per-key typing for dicts > in Python 3.6, so this is the best we can do here. > > - Annotated refers to (one of) the return types of _make_tree: > It represents a 2-tuple of (TreeValue, Extra). > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > Signed-off-by: John Snow <jsnow@redhat.com> > --- > scripts/qapi/introspect.py | 157 ++++++++++++++++++++++++++++--------- > scripts/qapi/mypy.ini | 5 -- > scripts/qapi/schema.py | 2 +- > 3 files changed, 121 insertions(+), 43 deletions(-) > > diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py > index 63f721ebfb6..803288a64e7 100644 > --- a/scripts/qapi/introspect.py > +++ b/scripts/qapi/introspect.py > @@ -10,7 +10,16 @@ > See the COPYING file in the top-level directory. > """ > > -from typing import Optional, Sequence, cast > +from typing import ( > + Any, > + Dict, > + List, > + Optional, > + Sequence, > + Tuple, > + Union, > + cast, > +) > > from .common import ( > c_name, > @@ -20,13 +29,56 @@ > ) > from .gen import QAPISchemaMonolithicCVisitor > from .schema import ( > + QAPISchema, > QAPISchemaArrayType, > QAPISchemaBuiltinType, > + QAPISchemaEntity, > + QAPISchemaEnumMember, > + QAPISchemaFeature, > + QAPISchemaObjectType, > + QAPISchemaObjectTypeMember, > QAPISchemaType, > + QAPISchemaVariant, > + QAPISchemaVariants, > ) > +from .source import QAPISourceInfo > > > -def _make_tree(obj, ifcond, features, extra=None): > +# This module constructs a tree-like data structure that is used to > +# generate the introspection information for QEMU. It behaves similarly > +# to a JSON value. > +# > +# A complexity over JSON is that our values may or may not be annotated. > +# > +# Un-annotated values may be: > +# Scalar: str, bool, None. > +# Non-scalar: List, Dict > +# _Value = Union[str, bool, None, Dict[str, Value], List[Value]] Here (and in a few other places) you mention `_Value`, but then define it as `_value` (lowercase). > +# > +# With optional annotations, the type of all values is: > +# TreeValue = Union[_Value, Annotated[_Value]] > +# > +# Sadly, mypy does not support recursive types, so we must approximate this. > +_stub = Any > +_scalar = Union[str, bool, None] > +_nonscalar = Union[Dict[str, _stub], List[_stub]] Why not use `Any` here instead of declaring/using `_stub`? > +_value = Union[_scalar, _nonscalar] > +TreeValue = Union[_value, 'Annotated'] > + Maybe declare `Annotations` first, then `Annotated`, then *use* `Annotated` proper here? - Cleber.
John Snow <jsnow@redhat.com> writes: > The typing of _make_tree and friends is a bit involved, but it can be > done with some stubbed out types and a bit of elbow grease. The > forthcoming patches attempt to make some simplifications, but having the > type hints in advance may aid in review of subsequent patches. > > > Some notes on the abstract types used at this point, and what they > represent: > > - TreeValue represents any object in the type tree. _make_tree is an > optional call -- not every node in the final type tree will have been > passed to _make_tree, so this type encompasses not only what is passed > to _make_tree (dicts, strings) or returned from it (dicts, strings, a > 2-tuple), but any recursive value for any of the dicts passed to > _make_tree -- which includes lists, strings, integers, null constants, > and so on. > > - _DObject is a type alias I use to mean "A JSON-style object, > represented as a Python dict." There is no "JSON" type in Python, they > are converted natively to recursively nested dicts and lists, with > leaf values of str, int, float, None, True/False and so on. This type > structure is not possible to accurately portray in mypy yet, so a > placeholder is used. > > In this case, _DObject is being used to refer to SchemaInfo-like > structures as defined in qapi/introspect.json, OR any sub-object > values they may reference. We don't have strong typing available for > those, so a generic alternative is used. > > - Extra refers explicitly to the dict containing "extra" information > about a node in the tree. mypy does not offer per-key typing for dicts > in Python 3.6, so this is the best we can do here. > > - Annotated refers to (one of) the return types of _make_tree: > It represents a 2-tuple of (TreeValue, Extra). > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > Signed-off-by: John Snow <jsnow@redhat.com> > --- > scripts/qapi/introspect.py | 157 ++++++++++++++++++++++++++++--------- > scripts/qapi/mypy.ini | 5 -- > scripts/qapi/schema.py | 2 +- > 3 files changed, 121 insertions(+), 43 deletions(-) > > diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py > index 63f721ebfb6..803288a64e7 100644 > --- a/scripts/qapi/introspect.py > +++ b/scripts/qapi/introspect.py > @@ -10,7 +10,16 @@ > See the COPYING file in the top-level directory. > """ > > -from typing import Optional, Sequence, cast > +from typing import ( > + Any, > + Dict, > + List, > + Optional, > + Sequence, > + Tuple, > + Union, > + cast, > +) > > from .common import ( > c_name, > @@ -20,13 +29,56 @@ > ) > from .gen import QAPISchemaMonolithicCVisitor > from .schema import ( > + QAPISchema, > QAPISchemaArrayType, > QAPISchemaBuiltinType, > + QAPISchemaEntity, > + QAPISchemaEnumMember, > + QAPISchemaFeature, > + QAPISchemaObjectType, > + QAPISchemaObjectTypeMember, > QAPISchemaType, > + QAPISchemaVariant, > + QAPISchemaVariants, > ) > +from .source import QAPISourceInfo > > > -def _make_tree(obj, ifcond, features, extra=None): > +# This module constructs a tree-like data structure that is used to "Tree-like" suggests it's not a tree, it just looks like one if you squint. Drop "-like"? > +# generate the introspection information for QEMU. It behaves similarly > +# to a JSON value. > +# > +# A complexity over JSON is that our values may or may not be annotated. It's the obvious abstract syntax tree for JSON, hacked up^W^Wextended to support certain annotations. Let me add a bit of context and history. The module's job is generating qapi-introspect.[ch] for a QAPISchema. The purpose of qapi-introspect.[ch] is providing the information query-qmp-schema needs, i.e. (a suitable C representation of) a JSON value conforming to [SchemaInfo]. Details of this C representation are not interesting right now. We first go from QAPISchema to a suitable Python representation of [SchemaInfo], then from there to the C source code, neatly separating concerns. Stupidest solution Python representation that could possibly work: the obvious abstract syntax tree for JSON (that's also how Python's json module works). Parts corresponding to QAPISchema parts guarded by 'if' conditionals need to be guarded by #if conditionals. We want to prefix parts corresponding to certain QAPISchema parts with a comment. These two requirements came later, and were hacked into the existing stupidest solution: any tree node can be a tuple (json, extra), where json is the "stupidest" node, and extra is a dict of annotations. In other words, to annotate an unannotated node N with dict D, replace N by (N, D). Possible annotations: 'comment': str 'if': Sequence[str] They say there are just three answers a Marine may give to an officer's questions: "Yes, sir!", "No, sir!", "No excuse, sir!". Let me put that to use here: Is this an elegant design? No, sir! Is the code easy to read? No excuse, sir! Was it cheap to make? Yes, sir! > +# > +# Un-annotated values may be: > +# Scalar: str, bool, None. > +# Non-scalar: List, Dict > +# _Value = Union[str, bool, None, Dict[str, Value], List[Value]] > +# > +# With optional annotations, the type of all values is: > +# TreeValue = Union[_Value, Annotated[_Value]] > +# > +# Sadly, mypy does not support recursive types, so we must approximate this. > +_stub = Any > +_scalar = Union[str, bool, None] > +_nonscalar = Union[Dict[str, _stub], List[_stub]] > +_value = Union[_scalar, _nonscalar] > +TreeValue = Union[_value, 'Annotated'] Are there naming conventions for this kind of variables? I'm asking because you capitalize some, but not all, and I can't see a pattern. Ignorant question: only 'Annotated' has quotes; why? There is "_Value", "Value" and "_value". Suggest to add "value" somewhere, for completeness ;-P I find the names _value and TreeValue a bit unfortunate: the difference between the two isn't Tree. I'll come back to this below. > + > +# This is just an alias for an object in the structure described above: > +_DObject = Dict[str, object] I'm confused. Which structure, and why do we want to alias it? > + > +# Represents the annotations themselves: > +Annotations = Dict[str, object] Losely typed. I have no idea whether that's bad :) > + > +# Represents an annotated node (of some kind). > +Annotated = Tuple[_value, Annotations] So, _value seems to represent a JSON value, Annotated an annotated JSON value, and TreeValue their union, i.e. a possibly annotated JSON value. Naming is hard... BareJsonValue, AnnotatedJsonValue, JsonValue? > + > + > +def _make_tree(obj: Union[_DObject, str], ifcond: List[str], I'd expect obj: _value, i.e. "unannotated value". > + features: List[QAPISchemaFeature], > + extra: Optional[Annotations] = None > + ) -> TreeValue: > if extra is None: > extra = {} > if ifcond: > @@ -39,9 +91,11 @@ def _make_tree(obj, ifcond, features, extra=None): > return obj > > > -def _tree_to_qlit(obj, level=0, suppress_first_indent=False): > +def _tree_to_qlit(obj: TreeValue, > + level: int = 0, > + suppress_first_indent: bool = False) -> str: > > - def indent(level): > + def indent(level: int) -> str: > return level * 4 * ' ' > > if isinstance(obj, tuple): > @@ -91,21 +145,20 @@ def indent(level): > return ret > > > -def to_c_string(string): > +def to_c_string(string: str) -> str: > return '"' + string.replace('\\', r'\\').replace('"', r'\"') + '"' > > > class QAPISchemaGenIntrospectVisitor(QAPISchemaMonolithicCVisitor): > - Intentional? > - def __init__(self, prefix, unmask): > + def __init__(self, prefix: str, unmask: bool): > super().__init__( > prefix, 'qapi-introspect', > ' * QAPI/QMP schema introspection', __doc__) > self._unmask = unmask > - self._schema = None > - self._trees = [] > - self._used_types = [] > - self._name_map = {} > + self._schema: Optional[QAPISchema] = None > + self._trees: List[TreeValue] = [] > + self._used_types: List[QAPISchemaType] = [] > + self._name_map: Dict[str, str] = {} > self._genc.add(mcgen(''' > #include "qemu/osdep.h" > #include "%(prefix)sqapi-introspect.h" > @@ -113,10 +166,10 @@ def __init__(self, prefix, unmask): > ''', > prefix=prefix)) > > - def visit_begin(self, schema): > + def visit_begin(self, schema: QAPISchema) -> None: > self._schema = schema > > - def visit_end(self): > + def visit_end(self) -> None: > # visit the types that are actually used > for typ in self._used_types: > typ.visit(self) > @@ -138,18 +191,18 @@ def visit_end(self): > self._used_types = [] > self._name_map = {} > > - def visit_needed(self, entity): > + def visit_needed(self, entity: QAPISchemaEntity) -> bool: > # Ignore types on first pass; visit_end() will pick up used types > return not isinstance(entity, QAPISchemaType) > > - def _name(self, name): > + def _name(self, name: str) -> str: > if self._unmask: > return name > if name not in self._name_map: > self._name_map[name] = '%d' % len(self._name_map) > return self._name_map[name] > > - def _use_type(self, typ): > + def _use_type(self, typ: QAPISchemaType) -> str: > # Map the various integer types to plain int > if typ.json_type() == 'int': > typ = self._schema.lookup_type('int') > @@ -168,8 +221,10 @@ def _use_type(self, typ): > return '[' + self._use_type(typ.element_type) + ']' > return self._name(typ.name) > > - def _gen_tree(self, name, mtype, obj, ifcond, features): > - extra = None > + def _gen_tree(self, name: str, mtype: str, obj: _DObject, > + ifcond: List[str], > + features: Optional[List[QAPISchemaFeature]]) -> None: _gen_tree() builds a complete tree (i.e. one SchemaInfo), and adds it to ._trees. The SchemaInfo's common parts are name, meta-type and features. _gen_tree() takes them as arguments @name, @mtype, @features. It takes SchemaInfo's variant parts as a dict @obj. It completes @obj into an unannotated tree node by the common parts into @obj. It also takes a QAPI conditional argument @ifcond. Now let me review the type annotations: * name: str matches SchemaInfo, good. * mtype: str approximates SchemaInfo's enum SchemaMetaType (it's not Python Enum because those were off limits when this code was written). * obj: _DObject ... I'd expect "unannotated JSON value". * ifcond: List[str] should work, but gen_if(), gen_endif() use Sequence[str]. Suggest to pick one (please explain why), and stick to it. More instances of ifcond: List[str] elsewhere; I'm not flagging them. * features: Optional[List[QAPISchemaFeature]] is correct. "No features" has two representations: None and []. I guess we could eliminate None, trading a tiny bit of efficiency for simpler typing. Not a demand. > + extra: Optional[Annotations] = None "No annotations" is represented as None here, not {}. I guess we could use {} for simpler typing. Not a demand. > if mtype not in ('command', 'event', 'builtin', 'array'): > if not self._unmask: > # Output a comment to make it easy to map masked names > @@ -180,44 +235,64 @@ def _gen_tree(self, name, mtype, obj, ifcond, features): > obj['meta-type'] = mtype > self._trees.append(_make_tree(obj, ifcond, features, extra)) > > - def _gen_member(self, member): > - obj = {'name': member.name, 'type': self._use_type(member.type)} > + def _gen_member(self, > + member: QAPISchemaObjectTypeMember) -> TreeValue: > + obj: _DObject = { I'd expect "unannotated value". More of the same below. > + 'name': member.name, > + 'type': self._use_type(member.type) > + } > if member.optional: > obj['default'] = None > return _make_tree(obj, member.ifcond, member.features) > > - def _gen_variants(self, tag_name, variants): > + def _gen_variants(self, tag_name: str, > + variants: List[QAPISchemaVariant]) -> _DObject: > return {'tag': tag_name, > 'variants': [self._gen_variant(v) for v in variants]} > > - def _gen_variant(self, variant): > - obj = {'case': variant.name, 'type': self._use_type(variant.type)} > + def _gen_variant(self, variant: QAPISchemaVariant) -> TreeValue: > + obj: _DObject = { > + 'case': variant.name, > + 'type': self._use_type(variant.type) > + } > return _make_tree(obj, variant.ifcond, None) > > - def visit_builtin_type(self, name, info, json_type): > + def visit_builtin_type(self, name: str, info: Optional[QAPISourceInfo], A built-in's type info is always None. Perhaps we should drop the parameter. > + json_type: str) -> None: > self._gen_tree(name, 'builtin', {'json-type': json_type}, [], None) > > - def visit_enum_type(self, name, info, ifcond, features, members, prefix): > + def visit_enum_type(self, name: str, info: QAPISourceInfo, > + ifcond: List[str], features: List[QAPISchemaFeature], > + members: List[QAPISchemaEnumMember], > + prefix: Optional[str]) -> None: > self._gen_tree(name, 'enum', > {'values': [_make_tree(m.name, m.ifcond, None) > for m in members]}, > ifcond, features) > > - def visit_array_type(self, name, info, ifcond, element_type): > + def visit_array_type(self, name: str, info: Optional[QAPISourceInfo], Here, @info is indeed optional: it's None when @element_type is a built-in type. > + ifcond: List[str], > + element_type: QAPISchemaType) -> None: > element = self._use_type(element_type) > self._gen_tree('[' + element + ']', 'array', {'element-type': element}, > ifcond, None) > > - def visit_object_type_flat(self, name, info, ifcond, features, > - members, variants): > - obj = {'members': [self._gen_member(m) for m in members]} > + def visit_object_type_flat(self, name: str, info: Optional[QAPISourceInfo], And here it is optional due to the internal object type 'q_empty'. > + ifcond: List[str], > + features: List[QAPISchemaFeature], > + members: Sequence[QAPISchemaObjectTypeMember], > + variants: Optional[QAPISchemaVariants]) -> None: We represent "no variants" as None, not as []. I guess we could eliminate use [], trading a tiny bit of efficiency for simpler typing. Not a demand. > + obj: _DObject = {'members': [self._gen_member(m) for m in members]} > if variants: > obj.update(self._gen_variants(variants.tag_member.name, > variants.variants)) > > self._gen_tree(name, 'object', obj, ifcond, features) > > - def visit_alternate_type(self, name, info, ifcond, features, variants): > + def visit_alternate_type(self, name: str, info: QAPISourceInfo, > + ifcond: List[str], > + features: List[QAPISchemaFeature], > + variants: QAPISchemaVariants) -> None: > self._gen_tree(name, 'alternate', > {'members': [ > _make_tree({'type': self._use_type(m.type)}, > @@ -225,24 +300,32 @@ def visit_alternate_type(self, name, info, ifcond, features, variants): > for m in variants.variants]}, > ifcond, features) > > - def visit_command(self, name, info, ifcond, features, > - arg_type, ret_type, gen, success_response, boxed, > - allow_oob, allow_preconfig, coroutine): > + def visit_command(self, name: str, info: QAPISourceInfo, ifcond: List[str], > + features: List[QAPISchemaFeature], > + arg_type: QAPISchemaObjectType, > + ret_type: Optional[QAPISchemaType], gen: bool, Are you sure arg_type can't be None? > + success_response: bool, boxed: bool, allow_oob: bool, > + allow_preconfig: bool, coroutine: bool) -> None: > arg_type = arg_type or self._schema.the_empty_object_type > ret_type = ret_type or self._schema.the_empty_object_type > - obj = {'arg-type': self._use_type(arg_type), > - 'ret-type': self._use_type(ret_type)} > + obj: _DObject = { > + 'arg-type': self._use_type(arg_type), > + 'ret-type': self._use_type(ret_type) > + } > if allow_oob: > obj['allow-oob'] = allow_oob > self._gen_tree(name, 'command', obj, ifcond, features) > > - def visit_event(self, name, info, ifcond, features, arg_type, boxed): > + def visit_event(self, name: str, info: QAPISourceInfo, > + ifcond: List[str], features: List[QAPISchemaFeature], > + arg_type: QAPISchemaObjectType, boxed: bool) -> None: > arg_type = arg_type or self._schema.the_empty_object_type > self._gen_tree(name, 'event', {'arg-type': self._use_type(arg_type)}, > ifcond, features) > > > -def gen_introspect(schema, output_dir, prefix, opt_unmask): > +def gen_introspect(schema: QAPISchema, output_dir: str, prefix: str, > + opt_unmask: bool) -> None: > vis = QAPISchemaGenIntrospectVisitor(prefix, opt_unmask) > schema.visit(vis) > vis.write(output_dir) > diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini > index 74fc6c82153..c0f2a58306d 100644 > --- a/scripts/qapi/mypy.ini > +++ b/scripts/qapi/mypy.ini > @@ -14,11 +14,6 @@ disallow_untyped_defs = False > disallow_incomplete_defs = False > check_untyped_defs = False > > -[mypy-qapi.introspect] > -disallow_untyped_defs = False > -disallow_incomplete_defs = False > -check_untyped_defs = False > - > [mypy-qapi.parser] > disallow_untyped_defs = False > disallow_incomplete_defs = False > diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py > index 720449feee4..e91b77fadc3 100644 > --- a/scripts/qapi/schema.py > +++ b/scripts/qapi/schema.py > @@ -28,7 +28,7 @@ > class QAPISchemaEntity: > meta: Optional[str] = None > > - def __init__(self, name, info, doc, ifcond=None, features=None): > + def __init__(self, name: str, info, doc, ifcond=None, features=None): > assert name is None or isinstance(name, str) > for f in features or []: > assert isinstance(f, QAPISchemaFeature)
On 11/6/20 9:12 PM, Cleber Rosa wrote: > On Mon, Oct 26, 2020 at 03:42:45PM -0400, John Snow wrote: >> The typing of _make_tree and friends is a bit involved, but it can be >> done with some stubbed out types and a bit of elbow grease. The >> forthcoming patches attempt to make some simplifications, but having the >> type hints in advance may aid in review of subsequent patches. >> >> >> Some notes on the abstract types used at this point, and what they >> represent: >> >> - TreeValue represents any object in the type tree. _make_tree is an >> optional call -- not every node in the final type tree will have been >> passed to _make_tree, so this type encompasses not only what is passed >> to _make_tree (dicts, strings) or returned from it (dicts, strings, a >> 2-tuple), but any recursive value for any of the dicts passed to >> _make_tree -- which includes lists, strings, integers, null constants, >> and so on. >> >> - _DObject is a type alias I use to mean "A JSON-style object, >> represented as a Python dict." There is no "JSON" type in Python, they >> are converted natively to recursively nested dicts and lists, with >> leaf values of str, int, float, None, True/False and so on. This type >> structure is not possible to accurately portray in mypy yet, so a >> placeholder is used. >> >> In this case, _DObject is being used to refer to SchemaInfo-like >> structures as defined in qapi/introspect.json, OR any sub-object >> values they may reference. We don't have strong typing available for >> those, so a generic alternative is used. >> >> - Extra refers explicitly to the dict containing "extra" information >> about a node in the tree. mypy does not offer per-key typing for dicts >> in Python 3.6, so this is the best we can do here. >> >> - Annotated refers to (one of) the return types of _make_tree: >> It represents a 2-tuple of (TreeValue, Extra). >> >> >> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> >> Signed-off-by: John Snow <jsnow@redhat.com> >> --- >> scripts/qapi/introspect.py | 157 ++++++++++++++++++++++++++++--------- >> scripts/qapi/mypy.ini | 5 -- >> scripts/qapi/schema.py | 2 +- >> 3 files changed, 121 insertions(+), 43 deletions(-) >> >> diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py >> index 63f721ebfb6..803288a64e7 100644 >> --- a/scripts/qapi/introspect.py >> +++ b/scripts/qapi/introspect.py >> @@ -10,7 +10,16 @@ >> See the COPYING file in the top-level directory. >> """ >> >> -from typing import Optional, Sequence, cast >> +from typing import ( >> + Any, >> + Dict, >> + List, >> + Optional, >> + Sequence, >> + Tuple, >> + Union, >> + cast, >> +) >> >> from .common import ( >> c_name, >> @@ -20,13 +29,56 @@ >> ) >> from .gen import QAPISchemaMonolithicCVisitor >> from .schema import ( >> + QAPISchema, >> QAPISchemaArrayType, >> QAPISchemaBuiltinType, >> + QAPISchemaEntity, >> + QAPISchemaEnumMember, >> + QAPISchemaFeature, >> + QAPISchemaObjectType, >> + QAPISchemaObjectTypeMember, >> QAPISchemaType, >> + QAPISchemaVariant, >> + QAPISchemaVariants, >> ) >> +from .source import QAPISourceInfo >> >> >> -def _make_tree(obj, ifcond, features, extra=None): >> +# This module constructs a tree-like data structure that is used to >> +# generate the introspection information for QEMU. It behaves similarly >> +# to a JSON value. >> +# >> +# A complexity over JSON is that our values may or may not be annotated. >> +# >> +# Un-annotated values may be: >> +# Scalar: str, bool, None. >> +# Non-scalar: List, Dict >> +# _Value = Union[str, bool, None, Dict[str, Value], List[Value]] > > Here (and in a few other places) you mention `_Value`, but then define it as > `_value` (lowercase). > This was maybe too subtle: I didn't intend for it to be the "same" as _value; this is the "idealized version". And then I went and re-used the same exact name of "TreeValue", which muddied the waters. Let's just err back on the side of synchronicity and say: # _value = Union[str, bool, None, Dict[str, TreeValue], List[TreeValue]] # TreeValue = Union[_value, Annotated[_value]] >> +# >> +# With optional annotations, the type of all values is: >> +# TreeValue = Union[_Value, Annotated[_Value]] >> +# >> +# Sadly, mypy does not support recursive types, so we must approximate this. >> +_stub = Any >> +_scalar = Union[str, bool, None] >> +_nonscalar = Union[Dict[str, _stub], List[_stub]] > > Why not use `Any` here instead of declaring/using `_stub`? > This was my way of calling visual attention to the exact places in the type tree we are breaking recursion with a stubbed type. I wanted to communicate that we didn't WANT the any type, but we were forced to accept a "stub" type. (Which we implement with the Any type.) >> +_value = Union[_scalar, _nonscalar] >> +TreeValue = Union[_value, 'Annotated'] >> + > > Maybe declare `Annotations` first, then `Annotated`, then *use* > `Annotated` proper here? > As long as that doesn't lose clarity and synchronicity with the little comment blurb, or cause problems later in the patch. I will see if there was a reason I couldn't. Otherwise, it's not really too important to shuffle around things to avoid strings; it doesn't change anything for mypy or for the runtime. > - Cleber. > Thanks! --js
On 11/13/20 11:48 AM, Markus Armbruster wrote: > John Snow <jsnow@redhat.com> writes: > >> The typing of _make_tree and friends is a bit involved, but it can be >> done with some stubbed out types and a bit of elbow grease. The >> forthcoming patches attempt to make some simplifications, but having the >> type hints in advance may aid in review of subsequent patches. >> >> >> Some notes on the abstract types used at this point, and what they >> represent: >> >> - TreeValue represents any object in the type tree. _make_tree is an >> optional call -- not every node in the final type tree will have been >> passed to _make_tree, so this type encompasses not only what is passed >> to _make_tree (dicts, strings) or returned from it (dicts, strings, a >> 2-tuple), but any recursive value for any of the dicts passed to >> _make_tree -- which includes lists, strings, integers, null constants, >> and so on. >> >> - _DObject is a type alias I use to mean "A JSON-style object, >> represented as a Python dict." There is no "JSON" type in Python, they >> are converted natively to recursively nested dicts and lists, with >> leaf values of str, int, float, None, True/False and so on. This type >> structure is not possible to accurately portray in mypy yet, so a >> placeholder is used. >> >> In this case, _DObject is being used to refer to SchemaInfo-like >> structures as defined in qapi/introspect.json, OR any sub-object >> values they may reference. We don't have strong typing available for >> those, so a generic alternative is used. >> >> - Extra refers explicitly to the dict containing "extra" information >> about a node in the tree. mypy does not offer per-key typing for dicts >> in Python 3.6, so this is the best we can do here. >> >> - Annotated refers to (one of) the return types of _make_tree: >> It represents a 2-tuple of (TreeValue, Extra). >> >> >> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> >> Signed-off-by: John Snow <jsnow@redhat.com> >> --- >> scripts/qapi/introspect.py | 157 ++++++++++++++++++++++++++++--------- >> scripts/qapi/mypy.ini | 5 -- >> scripts/qapi/schema.py | 2 +- >> 3 files changed, 121 insertions(+), 43 deletions(-) >> >> diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py >> index 63f721ebfb6..803288a64e7 100644 >> --- a/scripts/qapi/introspect.py >> +++ b/scripts/qapi/introspect.py >> @@ -10,7 +10,16 @@ >> See the COPYING file in the top-level directory. >> """ >> >> -from typing import Optional, Sequence, cast >> +from typing import ( >> + Any, >> + Dict, >> + List, >> + Optional, >> + Sequence, >> + Tuple, >> + Union, >> + cast, >> +) >> >> from .common import ( >> c_name, >> @@ -20,13 +29,56 @@ >> ) >> from .gen import QAPISchemaMonolithicCVisitor >> from .schema import ( >> + QAPISchema, >> QAPISchemaArrayType, >> QAPISchemaBuiltinType, >> + QAPISchemaEntity, >> + QAPISchemaEnumMember, >> + QAPISchemaFeature, >> + QAPISchemaObjectType, >> + QAPISchemaObjectTypeMember, >> QAPISchemaType, >> + QAPISchemaVariant, >> + QAPISchemaVariants, >> ) >> +from .source import QAPISourceInfo >> >> >> -def _make_tree(obj, ifcond, features, extra=None): >> +# This module constructs a tree-like data structure that is used to > > "Tree-like" suggests it's not a tree, it just looks like one if you > squint. Drop "-like"? > Sure. I think I am grammatically predisposed to assume "binary tree" or at least some kind of monomorphic tree when I see "tree", hence the hedging and weasel-words. No problem just to drop it. >> +# generate the introspection information for QEMU. It behaves similarly >> +# to a JSON value. >> +# >> +# A complexity over JSON is that our values may or may not be annotated. > > It's the obvious abstract syntax tree for JSON, hacked up^W^Wextended to > support certain annotations. > Yes. > Let me add a bit of context and history. > > The module's job is generating qapi-introspect.[ch] for a QAPISchema. > > The purpose of qapi-introspect.[ch] is providing the information > query-qmp-schema needs, i.e. (a suitable C representation of) a JSON > value conforming to [SchemaInfo]. Details of this C representation are > not interesting right now. > > We first go from QAPISchema to a suitable Python representation of > [SchemaInfo], then from there to the C source code, neatly separating > concerns. > > Stupidest solution Python representation that could possibly work: the > obvious abstract syntax tree for JSON (that's also how Python's json > module works). > > Parts corresponding to QAPISchema parts guarded by 'if' conditionals > need to be guarded by #if conditionals. > > We want to prefix parts corresponding to certain QAPISchema parts with a > comment. > > These two requirements came later, and were hacked into the existing > stupidest solution: any tree node can be a tuple (json, extra), where > json is the "stupidest" node, and extra is a dict of annotations. In > other words, to annotate an unannotated node N with dict D, replace N by > (N, D). > > Possible annotations: > > 'comment': str > 'if': Sequence[str] > > They say there are just three answers a Marine may give to an officer's > questions: "Yes, sir!", "No, sir!", "No excuse, sir!". Let me put that > to use here: > > Is this an elegant design? No, sir! > > Is the code easy to read? No excuse, sir! > > Was it cheap to make? Yes, sir! > Yes, I believe I am on the same page so far. It was difficult to read and difficult to annotate, but once I got to the bottom, it's easy to see why it happened this way in retrospect. It's the simplest thing that could work. And it does work! So far, this comment isn't really a correction on what I wrote, but if you believe my comment that explains the types could be clearer, feel free to suggest. >> +# >> +# Un-annotated values may be: >> +# Scalar: str, bool, None. >> +# Non-scalar: List, Dict >> +# _Value = Union[str, bool, None, Dict[str, Value], List[Value]] >> +# >> +# With optional annotations, the type of all values is: >> +# TreeValue = Union[_Value, Annotated[_Value]] >> +# >> +# Sadly, mypy does not support recursive types, so we must approximate this. >> +_stub = Any >> +_scalar = Union[str, bool, None] >> +_nonscalar = Union[Dict[str, _stub], List[_stub]] >> +_value = Union[_scalar, _nonscalar] >> +TreeValue = Union[_value, 'Annotated'] > > Are there naming conventions for this kind of variables? I'm asking > because you capitalize some, but not all, and I can't see a pattern. > Types generally get CamelCase names; though for interior aliases I used underscore + lowercase. In this case, I was trying to illustrate that these intermediate types were only interesting or useful in service of the Capitalized Thing, TreeValue. > Ignorant question: only 'Annotated' has quotes; why? > It hasn't been defined yet, so it's a forward reference. Cleber suggested I hoist its definition up to avoid this. I forget if I had some strong reason for doing it this way, admittedly. A lot of this code gets changed shortly in the following patches, so honestly I was likely just not really putting in a lot of effort to make code that gets deleted soon pretty. (Why did the patches go in that order then? During early review, Eduardo wanted to see the type hints go in first to help review for the cleanup be easier ...!) > There is "_Value", "Value" and "_value". Suggest to add "value" > somewhere, for completeness ;-P > Yeah, my mistake. See my response to Cleber here... > I find the names _value and TreeValue a bit unfortunate: the difference > between the two isn't Tree. I'll come back to this below. > Feel free to suggest something better; I am at a point with this particular module where I'd be happy to have someone more opinionated than me telling me what they want. >> + >> +# This is just an alias for an object in the structure described above: >> +_DObject = Dict[str, object] > > I'm confused. Which structure, and why do we want to alias it? > Yep, I don't have a good name for this either. I mean this to be a generic type that describes the natural python representation for a JSON object. i.e. Dict[str, THING]. All of the various bits and pieces here that are generating SchemaInfo subtype representations (like _gen_member, _gen_variant, etc.) are using this as a generic "Well, it's some kind of dict/object." (Aside: this is an interesting part of code, because it is not type safe with respect to the QAPI definitions that define SchemaInfo's descendant types. There's no more explicit type for me to use here to describe those objects.) >> + >> +# Represents the annotations themselves: >> +Annotations = Dict[str, object] > > Losely typed. I have no idea whether that's bad :) > Kind of: unlike Any, using 'object' means that if we treat the dictionary values in a manner that we could not treat *all* values, mypy will raise an error. So it is a very broad type, but it's "safe". >> + >> +# Represents an annotated node (of some kind). >> +Annotated = Tuple[_value, Annotations] > > So, _value seems to represent a JSON value, Annotated an annotated JSON > value, and TreeValue their union, i.e. a possibly annotated JSON value. > > Naming is hard... BareJsonValue, AnnotatedJsonValue, JsonValue? > I was afraid of using "JsonValue" to avoid implicating it as a literal JSON value -- since it isn't, exactly. It's the Python native approximation of a subset of JSON values. >> + >> + >> +def _make_tree(obj: Union[_DObject, str], ifcond: List[str], > > I'd expect obj: _value, i.e. "unannotated value". > Sure, and I believe that would work -- this type is "tighter". _make_tree only ever sees Dict[str, object] and str, actually. Later in the series, _make_tree goes away and this weird sub-type also goes away in favor of the more generic type. >> + features: List[QAPISchemaFeature], >> + extra: Optional[Annotations] = None >> + ) -> TreeValue: >> if extra is None: >> extra = {} >> if ifcond: >> @@ -39,9 +91,11 @@ def _make_tree(obj, ifcond, features, extra=None): >> return obj >> >> >> -def _tree_to_qlit(obj, level=0, suppress_first_indent=False): >> +def _tree_to_qlit(obj: TreeValue, >> + level: int = 0, >> + suppress_first_indent: bool = False) -> str: >> >> - def indent(level): >> + def indent(level: int) -> str: >> return level * 4 * ' ' >> >> if isinstance(obj, tuple): >> @@ -91,21 +145,20 @@ def indent(level): >> return ret >> >> >> -def to_c_string(string): >> +def to_c_string(string: str) -> str: >> return '"' + string.replace('\\', r'\\').replace('"', r'\"') + '"' >> >> >> class QAPISchemaGenIntrospectVisitor(QAPISchemaMonolithicCVisitor): >> - > > Intentional? > I'm sure I thought it looked nice at the time. It's not important. >> - def __init__(self, prefix, unmask): >> + def __init__(self, prefix: str, unmask: bool): >> super().__init__( >> prefix, 'qapi-introspect', >> ' * QAPI/QMP schema introspection', __doc__) >> self._unmask = unmask >> - self._schema = None >> - self._trees = [] >> - self._used_types = [] >> - self._name_map = {} >> + self._schema: Optional[QAPISchema] = None >> + self._trees: List[TreeValue] = [] >> + self._used_types: List[QAPISchemaType] = [] >> + self._name_map: Dict[str, str] = {} >> self._genc.add(mcgen(''' >> #include "qemu/osdep.h" >> #include "%(prefix)sqapi-introspect.h" >> @@ -113,10 +166,10 @@ def __init__(self, prefix, unmask): >> ''', >> prefix=prefix)) >> >> - def visit_begin(self, schema): >> + def visit_begin(self, schema: QAPISchema) -> None: >> self._schema = schema >> >> - def visit_end(self): >> + def visit_end(self) -> None: >> # visit the types that are actually used >> for typ in self._used_types: >> typ.visit(self) >> @@ -138,18 +191,18 @@ def visit_end(self): >> self._used_types = [] >> self._name_map = {} >> >> - def visit_needed(self, entity): >> + def visit_needed(self, entity: QAPISchemaEntity) -> bool: >> # Ignore types on first pass; visit_end() will pick up used types >> return not isinstance(entity, QAPISchemaType) >> >> - def _name(self, name): >> + def _name(self, name: str) -> str: >> if self._unmask: >> return name >> if name not in self._name_map: >> self._name_map[name] = '%d' % len(self._name_map) >> return self._name_map[name] >> >> - def _use_type(self, typ): >> + def _use_type(self, typ: QAPISchemaType) -> str: >> # Map the various integer types to plain int >> if typ.json_type() == 'int': >> typ = self._schema.lookup_type('int') >> @@ -168,8 +221,10 @@ def _use_type(self, typ): >> return '[' + self._use_type(typ.element_type) + ']' >> return self._name(typ.name) >> >> - def _gen_tree(self, name, mtype, obj, ifcond, features): >> - extra = None >> + def _gen_tree(self, name: str, mtype: str, obj: _DObject, >> + ifcond: List[str], >> + features: Optional[List[QAPISchemaFeature]]) -> None: > > _gen_tree() builds a complete tree (i.e. one SchemaInfo), and adds it to > ._trees. > > The SchemaInfo's common parts are name, meta-type and features. > _gen_tree() takes them as arguments @name, @mtype, @features. > > It takes SchemaInfo's variant parts as a dict @obj. > > It completes @obj into an unannotated tree node by the common parts into > @obj. > > It also takes a QAPI conditional argument @ifcond. > > Now let me review the type annotations: > > * name: str matches SchemaInfo, good. > > * mtype: str approximates SchemaInfo's enum SchemaMetaType (it's not > Python Enum because those were off limits when this code was written). > It's also a little cumbersome, perhaps, to duplicate information from the QAPI schema directly into the QAPI generator. Didn't feel like getting clever enough to "fix" this. > * obj: _DObject ... I'd expect "unannotated JSON value". > It's NOT any value though -- like you said: "_gen_tree() builds a complete tree (i.e. one SchemaInfo)" -- we only accept objects here, which is correct. > * ifcond: List[str] should work, but gen_if(), gen_endif() use > Sequence[str]. Suggest to pick one (please explain why), and stick to > it. > > More instances of ifcond: List[str] elsewhere; I'm not flagging them. > Yes, I should probably be using Sequence, if I can. generally: - Input types should use the most generic type they can cope with (Iterable, Sequence, Collection) based on what properties they actually need in the incoming type. (By the end of this series, Iterable[str] should actually be sufficient, but I'll have to see where it makes sense to slacken the input types in this series. It's been through the washer quite a few times I'm afraid.) - Output types should be as explicit as possible. I was not always perfectly good about generalizing the input types; List is correct, but not necessarily maximally correct. > * features: Optional[List[QAPISchemaFeature]] is correct. "No features" > has two representations: None and []. I guess we could eliminate > None, trading a tiny bit of efficiency for simpler typing. Not a > demand. > >> + extra: Optional[Annotations] = None > > "No annotations" is represented as None here, not {}. I guess we could > use {} for simpler typing. Not a demand. > This goes away later, kinda. It becomes: comment: Optional[str] = None and that comment is eventually passed to an Annotated node constructor that takes the comment specifically. >> if mtype not in ('command', 'event', 'builtin', 'array'): >> if not self._unmask: >> # Output a comment to make it easy to map masked names >> @@ -180,44 +235,64 @@ def _gen_tree(self, name, mtype, obj, ifcond, features): >> obj['meta-type'] = mtype >> self._trees.append(_make_tree(obj, ifcond, features, extra)) >> >> - def _gen_member(self, member): >> - obj = {'name': member.name, 'type': self._use_type(member.type)} >> + def _gen_member(self, >> + member: QAPISchemaObjectTypeMember) -> TreeValue: >> + obj: _DObject = { > > I'd expect "unannotated value". More of the same below. > See my remark to your expectation for what _gen_tree should accept. It deals with "objects" and "objects" have the property that more keys can be assigned to them, and this is a fundamental feature of _gen_tree. >> + 'name': member.name, >> + 'type': self._use_type(member.type) >> + } >> if member.optional: >> obj['default'] = None >> return _make_tree(obj, member.ifcond, member.features) >> >> - def _gen_variants(self, tag_name, variants): >> + def _gen_variants(self, tag_name: str, >> + variants: List[QAPISchemaVariant]) -> _DObject: >> return {'tag': tag_name, >> 'variants': [self._gen_variant(v) for v in variants]} >> >> - def _gen_variant(self, variant): >> - obj = {'case': variant.name, 'type': self._use_type(variant.type)} >> + def _gen_variant(self, variant: QAPISchemaVariant) -> TreeValue: >> + obj: _DObject = { >> + 'case': variant.name, >> + 'type': self._use_type(variant.type) >> + } >> return _make_tree(obj, variant.ifcond, None) >> >> - def visit_builtin_type(self, name, info, json_type): >> + def visit_builtin_type(self, name: str, info: Optional[QAPISourceInfo], > > A built-in's type info is always None. Perhaps we should drop the > parameter. > QAPISchemaBuiltinType -> QAPISchemaType -> QAPISchemaEntity This class ultimately takes an info parameter in the constructor which is Optional, which means that the type for the info field is Optional[QAPISourceInfo]. If you want to remove the parameter here, that works. >> + json_type: str) -> None: >> self._gen_tree(name, 'builtin', {'json-type': json_type}, [], None) >> >> - def visit_enum_type(self, name, info, ifcond, features, members, prefix): >> + def visit_enum_type(self, name: str, info: QAPISourceInfo, >> + ifcond: List[str], features: List[QAPISchemaFeature], >> + members: List[QAPISchemaEnumMember], >> + prefix: Optional[str]) -> None: >> self._gen_tree(name, 'enum', >> {'values': [_make_tree(m.name, m.ifcond, None) >> for m in members]}, >> ifcond, features) >> >> - def visit_array_type(self, name, info, ifcond, element_type): >> + def visit_array_type(self, name: str, info: Optional[QAPISourceInfo], > > Here, @info is indeed optional: it's None when @element_type is a > built-in type. > >> + ifcond: List[str], >> + element_type: QAPISchemaType) -> None: >> element = self._use_type(element_type) >> self._gen_tree('[' + element + ']', 'array', {'element-type': element}, >> ifcond, None) >> >> - def visit_object_type_flat(self, name, info, ifcond, features, >> - members, variants): >> - obj = {'members': [self._gen_member(m) for m in members]} >> + def visit_object_type_flat(self, name: str, info: Optional[QAPISourceInfo], > > And here it is optional due to the internal object type 'q_empty'. > >> + ifcond: List[str], >> + features: List[QAPISchemaFeature], >> + members: Sequence[QAPISchemaObjectTypeMember], >> + variants: Optional[QAPISchemaVariants]) -> None: > > We represent "no variants" as None, not as []. I guess we could > eliminate use [], trading a tiny bit of efficiency for simpler typing. > Not a demand. > For a later series: I recommend turning QAPISchemaVariants into an extension of Sequence[QAPISchemaVariant], and then always creating an empty collection for the purpose of simplifying the type signature. I'd recommend the following magicks: __bool__ -- for writing "if variants: ..." __iter__ -- for writing "for variant in variants: ..." Then we can just always say "variants: QAPISchemaVariants" and go about our lives. (Maybe that won't work, QAPISchemaVariants has a lot of other parameters it takes that maybe don't apply to empty collections. Something to come back to, I think.) >> + obj: _DObject = {'members': [self._gen_member(m) for m in members]} >> if variants: >> obj.update(self._gen_variants(variants.tag_member.name, >> variants.variants)) >> >> self._gen_tree(name, 'object', obj, ifcond, features) >> >> - def visit_alternate_type(self, name, info, ifcond, features, variants): >> + def visit_alternate_type(self, name: str, info: QAPISourceInfo, >> + ifcond: List[str], >> + features: List[QAPISchemaFeature], >> + variants: QAPISchemaVariants) -> None: >> self._gen_tree(name, 'alternate', >> {'members': [ >> _make_tree({'type': self._use_type(m.type)}, >> @@ -225,24 +300,32 @@ def visit_alternate_type(self, name, info, ifcond, features, variants): >> for m in variants.variants]}, >> ifcond, features) >> >> - def visit_command(self, name, info, ifcond, features, >> - arg_type, ret_type, gen, success_response, boxed, >> - allow_oob, allow_preconfig, coroutine): >> + def visit_command(self, name: str, info: QAPISourceInfo, ifcond: List[str], >> + features: List[QAPISchemaFeature], >> + arg_type: QAPISchemaObjectType, >> + ret_type: Optional[QAPISchemaType], gen: bool, > > Are you sure arg_type can't be None? > I am not. Strict optional checking is still disabled at this point in the series, there are problems like this that it would uncover if I turned it on, but many other errors are spurious. I could change approach and begin enabling it right away to make these discrepancies go away -- at the expense of maybe more "noise" with various assertions and other fluff. Turning on strict optional at the beginning of part 2 shows 96 errors. Adding a quick dumb type kludge for genc/genh, there's 18 errors prior to series 2. I can make a "part 1.5" and try to whittle that down to zero if you're willing to review it. Another quick edit to commands.py, I'm down to 15 errors all in gen.py. It might be a lot of frustrating fluff, but it will make Optional[T] vs T auditing a bit easier (if not more cumbersome on the way.) In order to turn it on, though, I do have to go all the way to the end of the six parts, and then backport the fixes to the earlier parts. You won't necessarily notice a difference until schema.py itself is fully typed. --js >> + success_response: bool, boxed: bool, allow_oob: bool, >> + allow_preconfig: bool, coroutine: bool) -> None: >> arg_type = arg_type or self._schema.the_empty_object_type >> ret_type = ret_type or self._schema.the_empty_object_type >> - obj = {'arg-type': self._use_type(arg_type), >> - 'ret-type': self._use_type(ret_type)} >> + obj: _DObject = { >> + 'arg-type': self._use_type(arg_type), >> + 'ret-type': self._use_type(ret_type) >> + } >> if allow_oob: >> obj['allow-oob'] = allow_oob >> self._gen_tree(name, 'command', obj, ifcond, features) >> >> - def visit_event(self, name, info, ifcond, features, arg_type, boxed): >> + def visit_event(self, name: str, info: QAPISourceInfo, >> + ifcond: List[str], features: List[QAPISchemaFeature], >> + arg_type: QAPISchemaObjectType, boxed: bool) -> None: >> arg_type = arg_type or self._schema.the_empty_object_type >> self._gen_tree(name, 'event', {'arg-type': self._use_type(arg_type)}, >> ifcond, features) >> >> >> -def gen_introspect(schema, output_dir, prefix, opt_unmask): >> +def gen_introspect(schema: QAPISchema, output_dir: str, prefix: str, >> + opt_unmask: bool) -> None: >> vis = QAPISchemaGenIntrospectVisitor(prefix, opt_unmask) >> schema.visit(vis) >> vis.write(output_dir) >> diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini >> index 74fc6c82153..c0f2a58306d 100644 >> --- a/scripts/qapi/mypy.ini >> +++ b/scripts/qapi/mypy.ini >> @@ -14,11 +14,6 @@ disallow_untyped_defs = False >> disallow_incomplete_defs = False >> check_untyped_defs = False >> >> -[mypy-qapi.introspect] >> -disallow_untyped_defs = False >> -disallow_incomplete_defs = False >> -check_untyped_defs = False >> - >> [mypy-qapi.parser] >> disallow_untyped_defs = False >> disallow_incomplete_defs = False >> diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py >> index 720449feee4..e91b77fadc3 100644 >> --- a/scripts/qapi/schema.py >> +++ b/scripts/qapi/schema.py >> @@ -28,7 +28,7 @@ >> class QAPISchemaEntity: >> meta: Optional[str] = None >> >> - def __init__(self, name, info, doc, ifcond=None, features=None): >> + def __init__(self, name: str, info, doc, ifcond=None, features=None): >> assert name is None or isinstance(name, str) >> for f in features or []: >> assert isinstance(f, QAPISchemaFeature)
John Snow <jsnow@redhat.com> writes: > On 11/13/20 11:48 AM, Markus Armbruster wrote: >> John Snow <jsnow@redhat.com> writes: >> >>> The typing of _make_tree and friends is a bit involved, but it can be >>> done with some stubbed out types and a bit of elbow grease. The >>> forthcoming patches attempt to make some simplifications, but having the >>> type hints in advance may aid in review of subsequent patches. >>> >>> >>> Some notes on the abstract types used at this point, and what they >>> represent: >>> >>> - TreeValue represents any object in the type tree. _make_tree is an >>> optional call -- not every node in the final type tree will have been >>> passed to _make_tree, so this type encompasses not only what is passed >>> to _make_tree (dicts, strings) or returned from it (dicts, strings, a >>> 2-tuple), but any recursive value for any of the dicts passed to >>> _make_tree -- which includes lists, strings, integers, null constants, >>> and so on. >>> >>> - _DObject is a type alias I use to mean "A JSON-style object, >>> represented as a Python dict." There is no "JSON" type in Python, they >>> are converted natively to recursively nested dicts and lists, with >>> leaf values of str, int, float, None, True/False and so on. This type >>> structure is not possible to accurately portray in mypy yet, so a >>> placeholder is used. >>> >>> In this case, _DObject is being used to refer to SchemaInfo-like >>> structures as defined in qapi/introspect.json, OR any sub-object >>> values they may reference. We don't have strong typing available for >>> those, so a generic alternative is used. >>> >>> - Extra refers explicitly to the dict containing "extra" information >>> about a node in the tree. mypy does not offer per-key typing for dicts >>> in Python 3.6, so this is the best we can do here. >>> >>> - Annotated refers to (one of) the return types of _make_tree: >>> It represents a 2-tuple of (TreeValue, Extra). >>> >>> >>> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> >>> Signed-off-by: John Snow <jsnow@redhat.com> >>> --- >>> scripts/qapi/introspect.py | 157 ++++++++++++++++++++++++++++--------- >>> scripts/qapi/mypy.ini | 5 -- >>> scripts/qapi/schema.py | 2 +- >>> 3 files changed, 121 insertions(+), 43 deletions(-) >>> >>> diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py >>> index 63f721ebfb6..803288a64e7 100644 >>> --- a/scripts/qapi/introspect.py >>> +++ b/scripts/qapi/introspect.py >>> @@ -10,7 +10,16 @@ >>> See the COPYING file in the top-level directory. >>> """ >>> >>> -from typing import Optional, Sequence, cast >>> +from typing import ( >>> + Any, >>> + Dict, >>> + List, >>> + Optional, >>> + Sequence, >>> + Tuple, >>> + Union, >>> + cast, >>> +) >>> >>> from .common import ( >>> c_name, >>> @@ -20,13 +29,56 @@ >>> ) >>> from .gen import QAPISchemaMonolithicCVisitor >>> from .schema import ( >>> + QAPISchema, >>> QAPISchemaArrayType, >>> QAPISchemaBuiltinType, >>> + QAPISchemaEntity, >>> + QAPISchemaEnumMember, >>> + QAPISchemaFeature, >>> + QAPISchemaObjectType, >>> + QAPISchemaObjectTypeMember, >>> QAPISchemaType, >>> + QAPISchemaVariant, >>> + QAPISchemaVariants, >>> ) >>> +from .source import QAPISourceInfo >>> >>> >>> -def _make_tree(obj, ifcond, features, extra=None): >>> +# This module constructs a tree-like data structure that is used to >> >> "Tree-like" suggests it's not a tree, it just looks like one if you >> squint. Drop "-like"? >> > > Sure. I think I am grammatically predisposed to assume "binary tree" or > at least some kind of monomorphic tree when I see "tree", hence the > hedging and weasel-words. > > No problem just to drop it. > >>> +# generate the introspection information for QEMU. It behaves similarly >>> +# to a JSON value. >>> +# >>> +# A complexity over JSON is that our values may or may not be annotated. >> >> It's the obvious abstract syntax tree for JSON, hacked up^W^Wextended to >> support certain annotations. >> > > Yes. > >> Let me add a bit of context and history. >> >> The module's job is generating qapi-introspect.[ch] for a QAPISchema. >> >> The purpose of qapi-introspect.[ch] is providing the information >> query-qmp-schema needs, i.e. (a suitable C representation of) a JSON >> value conforming to [SchemaInfo]. Details of this C representation are >> not interesting right now. >> >> We first go from QAPISchema to a suitable Python representation of >> [SchemaInfo], then from there to the C source code, neatly separating >> concerns. >> >> Stupidest solution Python representation that could possibly work: the >> obvious abstract syntax tree for JSON (that's also how Python's json >> module works). >> >> Parts corresponding to QAPISchema parts guarded by 'if' conditionals >> need to be guarded by #if conditionals. >> >> We want to prefix parts corresponding to certain QAPISchema parts with a >> comment. >> >> These two requirements came later, and were hacked into the existing >> stupidest solution: any tree node can be a tuple (json, extra), where >> json is the "stupidest" node, and extra is a dict of annotations. In >> other words, to annotate an unannotated node N with dict D, replace N by >> (N, D). >> >> Possible annotations: >> >> 'comment': str >> 'if': Sequence[str] >> >> They say there are just three answers a Marine may give to an officer's >> questions: "Yes, sir!", "No, sir!", "No excuse, sir!". Let me put that >> to use here: >> >> Is this an elegant design? No, sir! >> >> Is the code easy to read? No excuse, sir! >> >> Was it cheap to make? Yes, sir! >> > > Yes, I believe I am on the same page so far. It was difficult to read > and difficult to annotate, but once I got to the bottom, it's easy to > see why it happened this way in retrospect. It's the simplest thing that > could work. > > And it does work! > > So far, this comment isn't really a correction on what I wrote, but if > you believe my comment that explains the types could be clearer, feel > free to suggest. Yes, it's not correction, it's elaboration. You guys clearly struggled with the tree data structure. Documentation would have helped[*]. Since you're going to replace it (PATCH 09), adding it now makes little sense. *My* struggle is with the type annotations. The initial state is messy to type, in part due to mypy's surprising inability to deal with recursive types, in part because the tree data structure is messier than it could be. Much of the series is cleanup that benefits the typing. Makes sense. What makes review hard for me: you add (fairly complicated) typing first, then evolve it along with the cleanups. I have to first grok the complicated typing (a struggle), then for each cleanup grok the type changes in addition to the code changes. I believe adding the typing before the cleanups is a mistake. I share the desire to have type annotations that help with understanding the code. I understand the desire to have them sooner rather than later. I just think they're a costly distraction at this stage for this code. Once you understand the data structure, the cleanups are fairly straightforward. Is it too late to delay the introduction of type hints until after the data structure cleanups? If yes, I have to spend more time replying below. >>> +# >>> +# Un-annotated values may be: >>> +# Scalar: str, bool, None. >>> +# Non-scalar: List, Dict >>> +# _Value = Union[str, bool, None, Dict[str, Value], List[Value]] >>> +# >>> +# With optional annotations, the type of all values is: >>> +# TreeValue = Union[_Value, Annotated[_Value]] >>> +# >>> +# Sadly, mypy does not support recursive types, so we must approximate this. >>> +_stub = Any >>> +_scalar = Union[str, bool, None] >>> +_nonscalar = Union[Dict[str, _stub], List[_stub]] >>> +_value = Union[_scalar, _nonscalar] >>> +TreeValue = Union[_value, 'Annotated'] >> >> Are there naming conventions for this kind of variables? I'm asking >> because you capitalize some, but not all, and I can't see a pattern. >> > > Types generally get CamelCase names; though for interior aliases I used > underscore + lowercase. In this case, I was trying to illustrate that > these intermediate types were only interesting or useful in service of > the Capitalized Thing, TreeValue. > >> Ignorant question: only 'Annotated' has quotes; why? >> > > It hasn't been defined yet, so it's a forward reference. Cleber > suggested I hoist its definition up to avoid this. I forget if I had > some strong reason for doing it this way, admittedly. > > A lot of this code gets changed shortly in the following patches, so > honestly I was likely just not really putting in a lot of effort to make > code that gets deleted soon pretty. > > (Why did the patches go in that order then? During early review, Eduardo > wanted to see the type hints go in first to help review for the cleanup > be easier ...!) Understandable, but it didn't play out well, I'm afraid. >> There is "_Value", "Value" and "_value". Suggest to add "value" >> somewhere, for completeness ;-P >> > > Yeah, my mistake. See my response to Cleber here... > >> I find the names _value and TreeValue a bit unfortunate: the difference >> between the two isn't Tree. I'll come back to this below. >> > > Feel free to suggest something better; I am at a point with this > particular module where I'd be happy to have someone more opinionated > than me telling me what they want. > >>> + >>> +# This is just an alias for an object in the structure described above: >>> +_DObject = Dict[str, object] >> >> I'm confused. Which structure, and why do we want to alias it? >> > > Yep, I don't have a good name for this either. I mean this to be a > generic type that describes the natural python representation for a JSON > object. > > i.e. Dict[str, THING]. > > All of the various bits and pieces here that are generating SchemaInfo > subtype representations (like _gen_member, _gen_variant, etc.) are using > this as a generic "Well, it's some kind of dict/object." > > (Aside: this is an interesting part of code, because it is not type safe > with respect to the QAPI definitions that define SchemaInfo's descendant > types. There's no more explicit type for me to use here to describe > those objects.) > >>> + >>> +# Represents the annotations themselves: >>> +Annotations = Dict[str, object] >> >> Losely typed. I have no idea whether that's bad :) >> > > Kind of: unlike Any, using 'object' means that if we treat the > dictionary values in a manner that we could not treat *all* values, mypy > will raise an error. > > So it is a very broad type, but it's "safe". > >>> + >>> +# Represents an annotated node (of some kind). >>> +Annotated = Tuple[_value, Annotations] >> >> So, _value seems to represent a JSON value, Annotated an annotated JSON >> value, and TreeValue their union, i.e. a possibly annotated JSON value. >> >> Naming is hard... BareJsonValue, AnnotatedJsonValue, JsonValue? >> > > I was afraid of using "JsonValue" to avoid implicating it as a literal > JSON value -- since it isn't, exactly. It's the Python native > approximation of a subset of JSON values. > >>> + >>> + >>> +def _make_tree(obj: Union[_DObject, str], ifcond: List[str], >> >> I'd expect obj: _value, i.e. "unannotated value". >> > > Sure, and I believe that would work -- this type is "tighter". > _make_tree only ever sees Dict[str, object] and str, actually. > > Later in the series, _make_tree goes away and this weird sub-type also > goes away in favor of the more generic type. > >>> + features: List[QAPISchemaFeature], >>> + extra: Optional[Annotations] = None >>> + ) -> TreeValue: >>> if extra is None: >>> extra = {} >>> if ifcond: >>> @@ -39,9 +91,11 @@ def _make_tree(obj, ifcond, features, extra=None): >>> return obj >>> >>> >>> -def _tree_to_qlit(obj, level=0, suppress_first_indent=False): >>> +def _tree_to_qlit(obj: TreeValue, >>> + level: int = 0, >>> + suppress_first_indent: bool = False) -> str: >>> >>> - def indent(level): >>> + def indent(level: int) -> str: >>> return level * 4 * ' ' >>> >>> if isinstance(obj, tuple): >>> @@ -91,21 +145,20 @@ def indent(level): >>> return ret >>> >>> >>> -def to_c_string(string): >>> +def to_c_string(string: str) -> str: >>> return '"' + string.replace('\\', r'\\').replace('"', r'\"') + '"' >>> >>> >>> class QAPISchemaGenIntrospectVisitor(QAPISchemaMonolithicCVisitor): >>> - >> >> Intentional? >> > > I'm sure I thought it looked nice at the time. It's not important. > >>> - def __init__(self, prefix, unmask): >>> + def __init__(self, prefix: str, unmask: bool): >>> super().__init__( >>> prefix, 'qapi-introspect', >>> ' * QAPI/QMP schema introspection', __doc__) >>> self._unmask = unmask >>> - self._schema = None >>> - self._trees = [] >>> - self._used_types = [] >>> - self._name_map = {} >>> + self._schema: Optional[QAPISchema] = None >>> + self._trees: List[TreeValue] = [] >>> + self._used_types: List[QAPISchemaType] = [] >>> + self._name_map: Dict[str, str] = {} >>> self._genc.add(mcgen(''' >>> #include "qemu/osdep.h" >>> #include "%(prefix)sqapi-introspect.h" >>> @@ -113,10 +166,10 @@ def __init__(self, prefix, unmask): >>> ''', >>> prefix=prefix)) >>> >>> - def visit_begin(self, schema): >>> + def visit_begin(self, schema: QAPISchema) -> None: >>> self._schema = schema >>> >>> - def visit_end(self): >>> + def visit_end(self) -> None: >>> # visit the types that are actually used >>> for typ in self._used_types: >>> typ.visit(self) >>> @@ -138,18 +191,18 @@ def visit_end(self): >>> self._used_types = [] >>> self._name_map = {} >>> >>> - def visit_needed(self, entity): >>> + def visit_needed(self, entity: QAPISchemaEntity) -> bool: >>> # Ignore types on first pass; visit_end() will pick up used types >>> return not isinstance(entity, QAPISchemaType) >>> >>> - def _name(self, name): >>> + def _name(self, name: str) -> str: >>> if self._unmask: >>> return name >>> if name not in self._name_map: >>> self._name_map[name] = '%d' % len(self._name_map) >>> return self._name_map[name] >>> >>> - def _use_type(self, typ): >>> + def _use_type(self, typ: QAPISchemaType) -> str: >>> # Map the various integer types to plain int >>> if typ.json_type() == 'int': >>> typ = self._schema.lookup_type('int') >>> @@ -168,8 +221,10 @@ def _use_type(self, typ): >>> return '[' + self._use_type(typ.element_type) + ']' >>> return self._name(typ.name) >>> >>> - def _gen_tree(self, name, mtype, obj, ifcond, features): >>> - extra = None >>> + def _gen_tree(self, name: str, mtype: str, obj: _DObject, >>> + ifcond: List[str], >>> + features: Optional[List[QAPISchemaFeature]]) -> None: >> >> _gen_tree() builds a complete tree (i.e. one SchemaInfo), and adds it to >> ._trees. >> >> The SchemaInfo's common parts are name, meta-type and features. >> _gen_tree() takes them as arguments @name, @mtype, @features. >> >> It takes SchemaInfo's variant parts as a dict @obj. >> >> It completes @obj into an unannotated tree node by the common parts into >> @obj. >> >> It also takes a QAPI conditional argument @ifcond. >> >> Now let me review the type annotations: >> >> * name: str matches SchemaInfo, good. >> >> * mtype: str approximates SchemaInfo's enum SchemaMetaType (it's not >> Python Enum because those were off limits when this code was written). >> > > It's also a little cumbersome, perhaps, to duplicate information from > the QAPI schema directly into the QAPI generator. qapi/introspect.json is strongly coupled with scripts/qapi/introspect.py. Not proud of it, but I don't have better ideas. > Didn't feel like getting clever enough to "fix" this. > >> * obj: _DObject ... I'd expect "unannotated JSON value". >> > > It's NOT any value though -- like you said: "_gen_tree() builds a > complete tree (i.e. one SchemaInfo)" -- we only accept objects here, > which is correct. > >> * ifcond: List[str] should work, but gen_if(), gen_endif() use >> Sequence[str]. Suggest to pick one (please explain why), and stick to >> it. >> >> More instances of ifcond: List[str] elsewhere; I'm not flagging them. >> > > Yes, I should probably be using Sequence, if I can. generally: > > - Input types should use the most generic type they can cope with > (Iterable, Sequence, Collection) based on what properties they actually > need in the incoming type. > > (By the end of this series, Iterable[str] should actually be sufficient, > but I'll have to see where it makes sense to slacken the input types in > this series. It's been through the washer quite a few times I'm afraid.) > > - Output types should be as explicit as possible. Got it. > I was not always perfectly good about generalizing the input types; List > is correct, but not necessarily maximally correct. > >> * features: Optional[List[QAPISchemaFeature]] is correct. "No features" >> has two representations: None and []. I guess we could eliminate >> None, trading a tiny bit of efficiency for simpler typing. Not a >> demand. >> >>> + extra: Optional[Annotations] = None >> >> "No annotations" is represented as None here, not {}. I guess we could >> use {} for simpler typing. Not a demand. >> > > This goes away later, kinda. It becomes: > > comment: Optional[str] = None > > and that comment is eventually passed to an Annotated node constructor > that takes the comment specifically. > >>> if mtype not in ('command', 'event', 'builtin', 'array'): >>> if not self._unmask: >>> # Output a comment to make it easy to map masked names >>> @@ -180,44 +235,64 @@ def _gen_tree(self, name, mtype, obj, ifcond, features): >>> obj['meta-type'] = mtype >>> self._trees.append(_make_tree(obj, ifcond, features, extra)) >>> >>> - def _gen_member(self, member): >>> - obj = {'name': member.name, 'type': self._use_type(member.type)} >>> + def _gen_member(self, >>> + member: QAPISchemaObjectTypeMember) -> TreeValue: >>> + obj: _DObject = { >> >> I'd expect "unannotated value". More of the same below. >> > > See my remark to your expectation for what _gen_tree should accept. It > deals with "objects" and "objects" have the property that more keys can > be assigned to them, and this is a fundamental feature of _gen_tree. > >>> + 'name': member.name, >>> + 'type': self._use_type(member.type) >>> + } >>> if member.optional: >>> obj['default'] = None >>> return _make_tree(obj, member.ifcond, member.features) >>> >>> - def _gen_variants(self, tag_name, variants): >>> + def _gen_variants(self, tag_name: str, >>> + variants: List[QAPISchemaVariant]) -> _DObject: >>> return {'tag': tag_name, >>> 'variants': [self._gen_variant(v) for v in variants]} >>> >>> - def _gen_variant(self, variant): >>> - obj = {'case': variant.name, 'type': self._use_type(variant.type)} >>> + def _gen_variant(self, variant: QAPISchemaVariant) -> TreeValue: >>> + obj: _DObject = { >>> + 'case': variant.name, >>> + 'type': self._use_type(variant.type) >>> + } >>> return _make_tree(obj, variant.ifcond, None) >>> >>> - def visit_builtin_type(self, name, info, json_type): >>> + def visit_builtin_type(self, name: str, info: Optional[QAPISourceInfo], >> >> A built-in's type info is always None. Perhaps we should drop the >> parameter. >> > > QAPISchemaBuiltinType -> QAPISchemaType -> QAPISchemaEntity > > This class ultimately takes an info parameter in the constructor which > is Optional, which means that the type for the info field is > Optional[QAPISourceInfo]. I understand you've since worked on tightening Optional all over, so this might be moot. > If you want to remove the parameter here, that works. > >>> + json_type: str) -> None: >>> self._gen_tree(name, 'builtin', {'json-type': json_type}, [], None) >>> >>> - def visit_enum_type(self, name, info, ifcond, features, members, prefix): >>> + def visit_enum_type(self, name: str, info: QAPISourceInfo, >>> + ifcond: List[str], features: List[QAPISchemaFeature], >>> + members: List[QAPISchemaEnumMember], >>> + prefix: Optional[str]) -> None: >>> self._gen_tree(name, 'enum', >>> {'values': [_make_tree(m.name, m.ifcond, None) >>> for m in members]}, >>> ifcond, features) >>> >>> - def visit_array_type(self, name, info, ifcond, element_type): >>> + def visit_array_type(self, name: str, info: Optional[QAPISourceInfo], >> >> Here, @info is indeed optional: it's None when @element_type is a >> built-in type. >> >>> + ifcond: List[str], >>> + element_type: QAPISchemaType) -> None: >>> element = self._use_type(element_type) >>> self._gen_tree('[' + element + ']', 'array', {'element-type': element}, >>> ifcond, None) >>> >>> - def visit_object_type_flat(self, name, info, ifcond, features, >>> - members, variants): >>> - obj = {'members': [self._gen_member(m) for m in members]} >>> + def visit_object_type_flat(self, name: str, info: Optional[QAPISourceInfo], >> >> And here it is optional due to the internal object type 'q_empty'. >> >>> + ifcond: List[str], >>> + features: List[QAPISchemaFeature], >>> + members: Sequence[QAPISchemaObjectTypeMember], >>> + variants: Optional[QAPISchemaVariants]) -> None: >> >> We represent "no variants" as None, not as []. I guess we could >> eliminate use [], trading a tiny bit of efficiency for simpler typing. >> Not a demand. >> > > For a later series: > > I recommend turning QAPISchemaVariants into an extension of > Sequence[QAPISchemaVariant], I admittedly wasn't deep enough into Python at the time to even think of this. > and then always creating an empty > collection for the purpose of simplifying the type signature. > > I'd recommend the following magicks: > > __bool__ -- for writing "if variants: ..." > __iter__ -- for writing "for variant in variants: ..." > > Then we can just always say "variants: QAPISchemaVariants" and go about > our lives. > > (Maybe that won't work, QAPISchemaVariants has a lot of other parameters > it takes that maybe don't apply to empty collections. Something to come > back to, I think.) One way to find out whether it's an improvement. Not now. [...] [*] Dearth of documentation making the code hard to understand is a common theme in scripts/qapi/. Sorry about that.
On Wed, Dec 16, 2020 at 08:51:10AM +0100, Markus Armbruster wrote: [...] > You guys clearly struggled with the tree data structure. Documentation > would have helped[*]. Since you're going to replace it (PATCH 09), > adding it now makes little sense. > > *My* struggle is with the type annotations. > > The initial state is messy to type, in part due to mypy's surprising > inability to deal with recursive types, in part because the tree data > structure is messier than it could be. > > Much of the series is cleanup that benefits the typing. Makes sense. > > What makes review hard for me: you add (fairly complicated) typing > first, then evolve it along with the cleanups. I have to first grok the > complicated typing (a struggle), then for each cleanup grok the type > changes in addition to the code changes. > > I believe adding the typing before the cleanups is a mistake. Possibly my fault, as I remember asking John to do that (in earlier versions of these patches, type annotations were added after cleanup). > > I share the desire to have type annotations that help with understanding > the code. I understand the desire to have them sooner rather than > later. I just think they're a costly distraction at this stage for this > code. Once you understand the data structure, the cleanups are fairly > straightforward. > I expected the type annotations to be a simple and helpful tool for understanding the data structure before refactoring. In the case of introspect.py, I was completely wrong about "simple", and I'm not entirely sure about "helpful". I wasn't expecting them to be an obstacle for patch review, though. If the type annotations look good at the end of the series, do we care about the intermediate state? Bisectability isn't an issue because type annotations are ignored by the Python interpreter. -- Eduardo
On 12/16/20 2:51 AM, Markus Armbruster wrote: > > Is it too late to delay the introduction of type hints until after the > data structure cleanups? > > If yes, I have to spend more time replying below. > No, I re-ordered the series again to delay typing until the end, or close to it. Though I abandoned plans to slacken List[...] inputs to Iterable[...] or Sequence[...] like I had mentioned; I think it could still be done, but it's fine to just use List[...] for the inputs for now. We can worry about optimizing type flexibility later, I think. Let's just get the dog hunting at all first. --js
Eduardo Habkost <ehabkost@redhat.com> writes: > On Wed, Dec 16, 2020 at 08:51:10AM +0100, Markus Armbruster wrote: > [...] >> You guys clearly struggled with the tree data structure. Documentation >> would have helped[*]. Since you're going to replace it (PATCH 09), >> adding it now makes little sense. >> >> *My* struggle is with the type annotations. >> >> The initial state is messy to type, in part due to mypy's surprising >> inability to deal with recursive types, in part because the tree data >> structure is messier than it could be. >> >> Much of the series is cleanup that benefits the typing. Makes sense. >> >> What makes review hard for me: you add (fairly complicated) typing >> first, then evolve it along with the cleanups. I have to first grok the >> complicated typing (a struggle), then for each cleanup grok the type >> changes in addition to the code changes. >> >> I believe adding the typing before the cleanups is a mistake. > > Possibly my fault, as I remember asking John to do that (in > earlier versions of these patches, type annotations were added > after cleanup). > >> >> I share the desire to have type annotations that help with understanding >> the code. I understand the desire to have them sooner rather than >> later. I just think they're a costly distraction at this stage for this >> code. Once you understand the data structure, the cleanups are fairly >> straightforward. >> > > I expected the type annotations to be a simple and helpful tool > for understanding the data structure before refactoring. In the > case of introspect.py, I was completely wrong about "simple", and > I'm not entirely sure about "helpful". Quite excusable. We lack the mypy experience to predict such outcomes. > I wasn't expecting them to be an obstacle for patch review, > though. If the type annotations look good at the end of the > series, do we care about the intermediate state? Bisectability > isn't an issue because type annotations are ignored by the Python > interpreter. I don't worry about bisectability. The issue is reviewability. Here's the best case for me reviewing a single patch. First, the commit message convinces me this makes sense. Then I read the patch mostly in order. It does what the commit message made me expect, I think I understand how it does it, and it doesn't touch anything I know to be subtle. Here's the best case for me reviewing a patch series: every patch in order is a best case review. As soon as review deviates from this best case, I slow down. A lot. If there is something I didn't expect, maybe I'm misunderstanding the patch's purpose. If I feel confused about how the patch achieves its purpose, I better figure it out. If something subtle is being touched, I better recall its subtleties and carefully check the patch. Slow and exhausting work. This way of review can be overly careful. But even deciding "this isn't important, let it go" is slow, unless I do it wholesale. All we get then is "looks good at a glance". But that's maybe an Acked-by, certainly not a Reviewed-by. Me finding the patch where the type hints start to be "serious" is slow. Me mentally separating changes to type hints from other changes in patches before that point is slow. Me examining the type hints at that point (which need not be entirely visible in the patch) is slow. If the annotations in the intermediate state don't have to be good, do they have to be there? If John can take them out, review will be easier and faster.
John Snow <jsnow@redhat.com> writes: > On 12/16/20 2:51 AM, Markus Armbruster wrote: >> Is it too late to delay the introduction of type hints until after >> the >> data structure cleanups? >> If yes, I have to spend more time replying below. >> > > No, I re-ordered the series again to delay typing until the end, or > close to it. Thanks! > Though I abandoned plans to slacken List[...] inputs to Iterable[...] > or Sequence[...] like I had mentioned; I think it could still be done, > but it's fine to just use List[...] for the inputs for now. We can > worry about optimizing type flexibility later, I think. Shouldn't be a problem now I know what to expect. > Let's just get the dog hunting at all first. Yes.
diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py index 63f721ebfb6..803288a64e7 100644 --- a/scripts/qapi/introspect.py +++ b/scripts/qapi/introspect.py @@ -10,7 +10,16 @@ See the COPYING file in the top-level directory. """ -from typing import Optional, Sequence, cast +from typing import ( + Any, + Dict, + List, + Optional, + Sequence, + Tuple, + Union, + cast, +) from .common import ( c_name, @@ -20,13 +29,56 @@ ) from .gen import QAPISchemaMonolithicCVisitor from .schema import ( + QAPISchema, QAPISchemaArrayType, QAPISchemaBuiltinType, + QAPISchemaEntity, + QAPISchemaEnumMember, + QAPISchemaFeature, + QAPISchemaObjectType, + QAPISchemaObjectTypeMember, QAPISchemaType, + QAPISchemaVariant, + QAPISchemaVariants, ) +from .source import QAPISourceInfo -def _make_tree(obj, ifcond, features, extra=None): +# This module constructs a tree-like data structure that is used to +# generate the introspection information for QEMU. It behaves similarly +# to a JSON value. +# +# A complexity over JSON is that our values may or may not be annotated. +# +# Un-annotated values may be: +# Scalar: str, bool, None. +# Non-scalar: List, Dict +# _Value = Union[str, bool, None, Dict[str, Value], List[Value]] +# +# With optional annotations, the type of all values is: +# TreeValue = Union[_Value, Annotated[_Value]] +# +# Sadly, mypy does not support recursive types, so we must approximate this. +_stub = Any +_scalar = Union[str, bool, None] +_nonscalar = Union[Dict[str, _stub], List[_stub]] +_value = Union[_scalar, _nonscalar] +TreeValue = Union[_value, 'Annotated'] + +# This is just an alias for an object in the structure described above: +_DObject = Dict[str, object] + +# Represents the annotations themselves: +Annotations = Dict[str, object] + +# Represents an annotated node (of some kind). +Annotated = Tuple[_value, Annotations] + + +def _make_tree(obj: Union[_DObject, str], ifcond: List[str], + features: List[QAPISchemaFeature], + extra: Optional[Annotations] = None + ) -> TreeValue: if extra is None: extra = {} if ifcond: @@ -39,9 +91,11 @@ def _make_tree(obj, ifcond, features, extra=None): return obj -def _tree_to_qlit(obj, level=0, suppress_first_indent=False): +def _tree_to_qlit(obj: TreeValue, + level: int = 0, + suppress_first_indent: bool = False) -> str: - def indent(level): + def indent(level: int) -> str: return level * 4 * ' ' if isinstance(obj, tuple): @@ -91,21 +145,20 @@ def indent(level): return ret -def to_c_string(string): +def to_c_string(string: str) -> str: return '"' + string.replace('\\', r'\\').replace('"', r'\"') + '"' class QAPISchemaGenIntrospectVisitor(QAPISchemaMonolithicCVisitor): - - def __init__(self, prefix, unmask): + def __init__(self, prefix: str, unmask: bool): super().__init__( prefix, 'qapi-introspect', ' * QAPI/QMP schema introspection', __doc__) self._unmask = unmask - self._schema = None - self._trees = [] - self._used_types = [] - self._name_map = {} + self._schema: Optional[QAPISchema] = None + self._trees: List[TreeValue] = [] + self._used_types: List[QAPISchemaType] = [] + self._name_map: Dict[str, str] = {} self._genc.add(mcgen(''' #include "qemu/osdep.h" #include "%(prefix)sqapi-introspect.h" @@ -113,10 +166,10 @@ def __init__(self, prefix, unmask): ''', prefix=prefix)) - def visit_begin(self, schema): + def visit_begin(self, schema: QAPISchema) -> None: self._schema = schema - def visit_end(self): + def visit_end(self) -> None: # visit the types that are actually used for typ in self._used_types: typ.visit(self) @@ -138,18 +191,18 @@ def visit_end(self): self._used_types = [] self._name_map = {} - def visit_needed(self, entity): + def visit_needed(self, entity: QAPISchemaEntity) -> bool: # Ignore types on first pass; visit_end() will pick up used types return not isinstance(entity, QAPISchemaType) - def _name(self, name): + def _name(self, name: str) -> str: if self._unmask: return name if name not in self._name_map: self._name_map[name] = '%d' % len(self._name_map) return self._name_map[name] - def _use_type(self, typ): + def _use_type(self, typ: QAPISchemaType) -> str: # Map the various integer types to plain int if typ.json_type() == 'int': typ = self._schema.lookup_type('int') @@ -168,8 +221,10 @@ def _use_type(self, typ): return '[' + self._use_type(typ.element_type) + ']' return self._name(typ.name) - def _gen_tree(self, name, mtype, obj, ifcond, features): - extra = None + def _gen_tree(self, name: str, mtype: str, obj: _DObject, + ifcond: List[str], + features: Optional[List[QAPISchemaFeature]]) -> None: + extra: Optional[Annotations] = None if mtype not in ('command', 'event', 'builtin', 'array'): if not self._unmask: # Output a comment to make it easy to map masked names @@ -180,44 +235,64 @@ def _gen_tree(self, name, mtype, obj, ifcond, features): obj['meta-type'] = mtype self._trees.append(_make_tree(obj, ifcond, features, extra)) - def _gen_member(self, member): - obj = {'name': member.name, 'type': self._use_type(member.type)} + def _gen_member(self, + member: QAPISchemaObjectTypeMember) -> TreeValue: + obj: _DObject = { + 'name': member.name, + 'type': self._use_type(member.type) + } if member.optional: obj['default'] = None return _make_tree(obj, member.ifcond, member.features) - def _gen_variants(self, tag_name, variants): + def _gen_variants(self, tag_name: str, + variants: List[QAPISchemaVariant]) -> _DObject: return {'tag': tag_name, 'variants': [self._gen_variant(v) for v in variants]} - def _gen_variant(self, variant): - obj = {'case': variant.name, 'type': self._use_type(variant.type)} + def _gen_variant(self, variant: QAPISchemaVariant) -> TreeValue: + obj: _DObject = { + 'case': variant.name, + 'type': self._use_type(variant.type) + } return _make_tree(obj, variant.ifcond, None) - def visit_builtin_type(self, name, info, json_type): + def visit_builtin_type(self, name: str, info: Optional[QAPISourceInfo], + json_type: str) -> None: self._gen_tree(name, 'builtin', {'json-type': json_type}, [], None) - def visit_enum_type(self, name, info, ifcond, features, members, prefix): + def visit_enum_type(self, name: str, info: QAPISourceInfo, + ifcond: List[str], features: List[QAPISchemaFeature], + members: List[QAPISchemaEnumMember], + prefix: Optional[str]) -> None: self._gen_tree(name, 'enum', {'values': [_make_tree(m.name, m.ifcond, None) for m in members]}, ifcond, features) - def visit_array_type(self, name, info, ifcond, element_type): + def visit_array_type(self, name: str, info: Optional[QAPISourceInfo], + ifcond: List[str], + element_type: QAPISchemaType) -> None: element = self._use_type(element_type) self._gen_tree('[' + element + ']', 'array', {'element-type': element}, ifcond, None) - def visit_object_type_flat(self, name, info, ifcond, features, - members, variants): - obj = {'members': [self._gen_member(m) for m in members]} + def visit_object_type_flat(self, name: str, info: Optional[QAPISourceInfo], + ifcond: List[str], + features: List[QAPISchemaFeature], + members: Sequence[QAPISchemaObjectTypeMember], + variants: Optional[QAPISchemaVariants]) -> None: + obj: _DObject = {'members': [self._gen_member(m) for m in members]} if variants: obj.update(self._gen_variants(variants.tag_member.name, variants.variants)) self._gen_tree(name, 'object', obj, ifcond, features) - def visit_alternate_type(self, name, info, ifcond, features, variants): + def visit_alternate_type(self, name: str, info: QAPISourceInfo, + ifcond: List[str], + features: List[QAPISchemaFeature], + variants: QAPISchemaVariants) -> None: self._gen_tree(name, 'alternate', {'members': [ _make_tree({'type': self._use_type(m.type)}, @@ -225,24 +300,32 @@ def visit_alternate_type(self, name, info, ifcond, features, variants): for m in variants.variants]}, ifcond, features) - def visit_command(self, name, info, ifcond, features, - arg_type, ret_type, gen, success_response, boxed, - allow_oob, allow_preconfig, coroutine): + def visit_command(self, name: str, info: QAPISourceInfo, ifcond: List[str], + features: List[QAPISchemaFeature], + arg_type: QAPISchemaObjectType, + ret_type: Optional[QAPISchemaType], gen: bool, + success_response: bool, boxed: bool, allow_oob: bool, + allow_preconfig: bool, coroutine: bool) -> None: arg_type = arg_type or self._schema.the_empty_object_type ret_type = ret_type or self._schema.the_empty_object_type - obj = {'arg-type': self._use_type(arg_type), - 'ret-type': self._use_type(ret_type)} + obj: _DObject = { + 'arg-type': self._use_type(arg_type), + 'ret-type': self._use_type(ret_type) + } if allow_oob: obj['allow-oob'] = allow_oob self._gen_tree(name, 'command', obj, ifcond, features) - def visit_event(self, name, info, ifcond, features, arg_type, boxed): + def visit_event(self, name: str, info: QAPISourceInfo, + ifcond: List[str], features: List[QAPISchemaFeature], + arg_type: QAPISchemaObjectType, boxed: bool) -> None: arg_type = arg_type or self._schema.the_empty_object_type self._gen_tree(name, 'event', {'arg-type': self._use_type(arg_type)}, ifcond, features) -def gen_introspect(schema, output_dir, prefix, opt_unmask): +def gen_introspect(schema: QAPISchema, output_dir: str, prefix: str, + opt_unmask: bool) -> None: vis = QAPISchemaGenIntrospectVisitor(prefix, opt_unmask) schema.visit(vis) vis.write(output_dir) diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini index 74fc6c82153..c0f2a58306d 100644 --- a/scripts/qapi/mypy.ini +++ b/scripts/qapi/mypy.ini @@ -14,11 +14,6 @@ disallow_untyped_defs = False disallow_incomplete_defs = False check_untyped_defs = False -[mypy-qapi.introspect] -disallow_untyped_defs = False -disallow_incomplete_defs = False -check_untyped_defs = False - [mypy-qapi.parser] disallow_untyped_defs = False disallow_incomplete_defs = False diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py index 720449feee4..e91b77fadc3 100644 --- a/scripts/qapi/schema.py +++ b/scripts/qapi/schema.py @@ -28,7 +28,7 @@ class QAPISchemaEntity: meta: Optional[str] = None - def __init__(self, name, info, doc, ifcond=None, features=None): + def __init__(self, name: str, info, doc, ifcond=None, features=None): assert name is None or isinstance(name, str) for f in features or []: assert isinstance(f, QAPISchemaFeature)