diff mbox series

[RFC,1/4] scripts/decodetree.py: add a disassembly generator (HACK!)

Message ID 20180808123934.17450-2-alex.bennee@linaro.org
State New
Headers show
Series add hand-rolled fallback when capstone fails | expand

Commit Message

Alex Bennée Aug. 8, 2018, 12:39 p.m. UTC
Given our issues with failing disassembly we could try and re-use the
decode tree data to output what instruction is being decoded. This
will be used if registered as a fall-back for when the "proper"
disassembler fails to decode an instruction.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
 scripts/decodetree.py | 52 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 6 deletions(-)

-- 
2.17.1

Comments

Eduardo Habkost Aug. 10, 2018, 3:32 a.m. UTC | #1
On Wed, Aug 08, 2018 at 01:39:31PM +0100, Alex Bennée wrote:
> Given our issues with failing disassembly we could try and re-use the

> decode tree data to output what instruction is being decoded. This

> will be used if registered as a fall-back for when the "proper"

> disassembler fails to decode an instruction.

> 

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>


I don't have an opinion on the approach you are taking, but the
Python code you are adding is consistent with the existing style
of the script.

That said, I find the existing code full of output() calls very
hard to read.  If anybody wants to volunteer to improve the
readability of the output generation, it would be welcome.

Acked-by: Eduardo Habkost <ehabkost@redhat.com>


-- 
Eduardo
Alex Bennée Aug. 10, 2018, 8:55 a.m. UTC | #2
Eduardo Habkost <ehabkost@redhat.com> writes:

> On Wed, Aug 08, 2018 at 01:39:31PM +0100, Alex Bennée wrote:

>> Given our issues with failing disassembly we could try and re-use the

>> decode tree data to output what instruction is being decoded. This

>> will be used if registered as a fall-back for when the "proper"

>> disassembler fails to decode an instruction.

>>

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>

> I don't have an opinion on the approach you are taking, but the

> Python code you are adding is consistent with the existing style

> of the script.

>

> That said, I find the existing code full of output() calls very

> hard to read.  If anybody wants to volunteer to improve the

> readability of the output generation, it would be welcome.


If we expect to have the script output a number of different forms I
guess re-factoring it with some sort of template based approach would be
worthwhile. I wonder if there are other examples in the code base? We
wouldn't want to have multiple template types.

>

> Acked-by: Eduardo Habkost <ehabkost@redhat.com>



--
Alex Bennée
Eduardo Habkost Aug. 10, 2018, 12:21 p.m. UTC | #3
On Fri, Aug 10, 2018 at 09:55:50AM +0100, Alex Bennée wrote:
> 

> Eduardo Habkost <ehabkost@redhat.com> writes:

> 

> > On Wed, Aug 08, 2018 at 01:39:31PM +0100, Alex Bennée wrote:

> >> Given our issues with failing disassembly we could try and re-use the

> >> decode tree data to output what instruction is being decoded. This

> >> will be used if registered as a fall-back for when the "proper"

> >> disassembler fails to decode an instruction.

> >>

> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> >

> > I don't have an opinion on the approach you are taking, but the

> > Python code you are adding is consistent with the existing style

> > of the script.

> >

> > That said, I find the existing code full of output() calls very

> > hard to read.  If anybody wants to volunteer to improve the

> > readability of the output generation, it would be welcome.

> 

> If we expect to have the script output a number of different forms I

> guess re-factoring it with some sort of template based approach would be

> worthwhile. I wonder if there are other examples in the code base? We

> wouldn't want to have multiple template types.


QAPI scripts also generates C code, and I find them more
readable[1].

I think a true template language would be even better than QAPI's
approach, but I don't see an obvious candidate that would be
worth adding another build dependency to QEMU.


[1] Compare:

def output_decl(self):
    global translate_scope
    global translate_prefix
    output('typedef ', self.base.base.struct_name(),
           ' arg_', self.name, ';\n')
    output(translate_scope, 'bool ', translate_prefix, '_', self.name,
           '(DisasContext *ctx, arg_', self.name,
           ' *a, ', insntype, ' insn);\n')

And:

def gen_visit_members_decl(name):
    return mcgen('''

void visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp);
''',
                 c_name=c_name(name))

-- 
Eduardo
diff mbox series

Patch

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index 277f9a9bba..f4b4318c96 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -169,6 +169,7 @@  input_file = ''
 output_file = None
 output_fd = None
 insntype = 'uint32_t'
+disassemble = False
 
 re_ident = '[a-zA-Z][a-zA-Z0-9_]*'
 
@@ -467,6 +468,7 @@  class Pattern(General):
 
     def output_code(self, i, extracted, outerbits, outermask):
         global translate_prefix
+        global disassemble
         ind = str_indent(i)
         arg = self.base.base.name
         output(ind, '/* line ', str(self.lineno), ' */\n')
@@ -474,8 +476,34 @@  class Pattern(General):
             output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n')
         for n, f in self.fields.items():
             output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
-        output(ind, 'return ', translate_prefix, '_', self.name,
-               '(ctx, &u.f_', arg, ', insn);\n')
+        output(ind, 'return ', translate_prefix, '_', self.name)
+        if disassemble:
+            output("(dstr, maxd, ")
+        else:
+            output("(ctx, ")
+
+        output('&u.f_', arg)
+
+        if disassemble:
+            output(");\n")
+        else:
+            output(', insn);\n')
+
+    def output_formatter(self):
+        global translate_prefix
+        arg = self.base.base.name
+        output('/* line ', str(self.lineno), ' */\n')
+        output('typedef ', self.base.base.struct_name(),
+               ' arg_', self.name, ';\n')
+        output(translate_scope, 'bool ', translate_prefix, '_', self.name,
+               '(char *ptr, size_t n, arg_', self.name, ' *a)\n')
+        output("{\n")
+        output(str_indent(4), 'snprintf(ptr, n, "', self.name)
+        # fill in arguments here
+        output('"); \n')
+        output(str_indent(4), "return true;\n")
+        output("}\n")
+
 # end Pattern
 
 
@@ -973,11 +1001,12 @@  def main():
     global insnwidth
     global insntype
     global insnmask
+    global disassemble
 
     decode_function = 'decode'
     decode_scope = 'static '
 
-    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=']
+    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', 'disassemble']
     try:
         (opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts)
     except getopt.GetoptError as err:
@@ -998,6 +1027,8 @@  def main():
                 insnmask = 0xffff
             elif insnwidth != 32:
                 error(0, 'cannot handle insns of width', insnwidth)
+        elif o == '--disassemble':
+            disassemble = True
         else:
             assert False, 'unhandled option'
 
@@ -1031,7 +1062,10 @@  def main():
             if i.base.base != p.base.base:
                 error(0, i.name, ' has conflicting argument sets')
         else:
-            i.output_decl()
+            if disassemble:
+                i.output_formatter()
+            else:
+                i.output_decl()
             out_pats[i.name] = i
     output('\n')
 
@@ -1039,8 +1073,14 @@  def main():
         f = formats[n]
         f.output_extract()
 
-    output(decode_scope, 'bool ', decode_function,
-           '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
+    output(decode_scope, 'bool ', decode_function)
+
+    if disassemble:
+        output("(char *dstr, size_t maxd, ")
+    else:
+        output('(DisasContext *ctx, ')
+
+    output(insntype, ' insn)\n{\n')
 
     i4 = str_indent(4)
     output(i4, 'union {\n')