diff mbox

[Xen-devel] xen: arm: enable perf counters

Message ID 1400163427-6310-1-git-send-email-ian.campbell@citrix.com
State New
Headers show

Commit Message

Ian Campbell May 15, 2014, 2:17 p.m. UTC
As well as the existing common perf counters add a bunch of ARM specifics,
including the various trap types, vuart/vgic/vtimer accesses and different
types of interrupt.

Adjust the common code so that the columns line up again, not sure when/where this went wrong.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
This is mostly the set of stuff I happened to be interested in, it can be made
more fine grained as the need arises.
---
 xen/arch/arm/gic.c               |    4 +++
 xen/arch/arm/irq.c               |    8 +++++-
 xen/arch/arm/time.c              |    3 ++
 xen/arch/arm/traps.c             |   30 ++++++++++++++++++-
 xen/arch/arm/vgic.c              |   11 +++++++
 xen/arch/arm/vtimer.c            |   24 ++++++++++++++++
 xen/arch/arm/vuart.c             |    5 ++++
 xen/common/perfc.c               |    4 +--
 xen/include/asm-arm/config.h     |    2 ++
 xen/include/asm-arm/perfc.h      |   12 ++++++++
 xen/include/asm-arm/perfc_defn.h |   59 ++++++++++++++++++++++++++++++++++++++
 11 files changed, 158 insertions(+), 4 deletions(-)
 create mode 100644 xen/include/asm-arm/perfc.h
 create mode 100644 xen/include/asm-arm/perfc_defn.h

Comments

Julien Grall May 15, 2014, 2:53 p.m. UTC | #1
On 05/15/2014 03:17 PM, Ian Campbell wrote:
> As well as the existing common perf counters add a bunch of ARM specifics,
> including the various trap types, vuart/vgic/vtimer accesses and different
> types of interrupt.

Performance counters in vgic don't make sense for me as we need to trap
it in any case.

But we might want perf counter in p2m_lookup because this function is
costly.

I would also add one in flush_tlb_* functions, such as flush_tlb_domain.
It will help us optimizing TLBs.

>      PSCI_RESULT_REG(regs) = psci_call(PSCI_ARGS(regs));
>  }
>  
> @@ -1135,15 +1138,19 @@ static void do_trap_hypercall(struct cpu_user_regs *regs, register_t *nr,
>      register_t orig_pc = regs->pc;
>  #endif
>  
> +    BUILD_BUG_ON(NR_hypercalls < ARRAY_SIZE(arm_hypercall_table) );
> +
>      if ( iss != XEN_HYPERCALL_TAG )
>          domain_crash_synchronous();
>  
>      if ( *nr >= ARRAY_SIZE(arm_hypercall_table) )
>      {
> +        perfc_incr(invalid_hypercalls);
>          HYPERCALL_RESULT_REG(regs) = -ENOSYS;
>          return;
>      }
>  
> +    perfc_incra(hypercalls, *nr);
>      call = arm_hypercall_table[*nr].fn;
>      if ( call == NULL )
>      {
> @@ -1283,8 +1290,10 @@ static int check_conditional_instr(struct cpu_user_regs *regs, union hsr hsr)
>      cpsr_cond = cpsr >> 28;
>  
>      if ( !((cc_map[cond] >> cpsr_cond) & 1) )
> +    {
> +        perfc_incr(trap_uncond);

trap_uncond alone doesn't have much meaning. Can you add a perf_counter
to count the number of call for this function (i.e check_conditional_instr)?

>          return 0;
> -
> +    }
>      return 1;
>  }
>  
> @@ -1664,6 +1673,7 @@ asmlinkage void do_trap_hypervisor(struct cpu_user_regs *regs)
>  
>      switch (hsr.ec) {
>      case HSR_EC_WFI_WFE:
> +        perfc_incr(trap_wfi_wfe);

Can you add a perf counter to count the number of WFI and WFE? (actually
the last one is not trap for now).

>          if ( !check_conditional_instr(regs, hsr) )
>          {
>              advance_pc(regs, hsr);
> @@ -1684,38 +1694,51 @@ asmlinkage void do_trap_hypervisor(struct cpu_user_regs *regs)
>      case HSR_EC_CP15_32:
>          if ( !is_32bit_domain(current->domain) )
>              goto bad_trap;
> +        perfc_incr(trap_cp15_32);
>          do_cp15_32(regs, hsr);
>          break;
>      case HSR_EC_CP15_64:
>          if ( !is_32bit_domain(current->domain) )
>              goto bad_trap;
> +        perfc_incr(trap_cp15_32);

Did you mean trap_cp15_64?

[..]

>      case HSR_EC_HVC32:
> +        perfc_incr(trap_hvc32);
>  #ifndef NDEBUG
>          if ( (hsr.iss & 0xff00) == 0xff00 )
>              return do_debug_trap(regs, hsr.iss & 0x00ff);
>  #endif
>          if ( hsr.iss == 0 )
>              return do_trap_psci(regs);
> +

Spurious change?

[..]

>      case HSR_EC_INSTR_ABORT_LOWER_EL:
> +        perfc_incr(trap_iabt);
>          do_trap_instr_abort_guest(regs, hsr);
>          break;
>      case HSR_EC_DATA_ABORT_LOWER_EL:
> +        perfc_incr(trap_dabt);
>          do_trap_data_abort_guest(regs, hsr);
>          break;
>      default:
>   bad_trap:
> +        perfc_incr(trap_bad);

The perfc_incr seems pointless here. Indeed, do_unexcepted_trap will
basically break the current PCPU and can be worst when it's occurs on CPU0.

[..]

> diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
> index ef291ff..0de6f7e 100644
> --- a/xen/include/asm-arm/config.h
> +++ b/xen/include/asm-arm/config.h
> @@ -178,6 +178,8 @@
>  #define PAGE_MASK           (~(PAGE_SIZE-1))
>  #define PAGE_FLAG_MASK      (~0)
>  
> +#define NR_hypercalls 64
> +

Should not it be define in common code?

Regards,
Ian Campbell May 15, 2014, 3:30 p.m. UTC | #2
On Thu, 2014-05-15 at 15:53 +0100, Julien Grall wrote:
> On 05/15/2014 03:17 PM, Ian Campbell wrote:
> > As well as the existing common perf counters add a bunch of ARM specifics,
> > including the various trap types, vuart/vgic/vtimer accesses and different
> > types of interrupt.
> 
> Performance counters in vgic don't make sense for me as we need to trap
> it in any case.

??? The point of these is so you can see how frequently something is
trapping, so you can measure and optimise as appropriate, the fact that
we have to trap a particular thing doesn't make it pointless to measure.

In this case the perfc lets you see that the majority of the traps are
to send SGIs and that the vast majority are sent to a specific list of
processors, which is something I was interested in.

> But we might want perf counter in p2m_lookup because this function is
> costly.
> 
> I would also add one in flush_tlb_* functions, such as flush_tlb_domain.
> It will help us optimizing TLBs.

Please do add more if you think they will be useful, this is just a
starting point. I think this applies to most of your comments, if you
are doing some debugging or performance measurement and you find that
you want an extra perfc or a more granular one or whatever then please
add it and send a patch. Otherwise than that I don't think there is much
need to bikeshed what exactly is being added here.

> >      case HSR_EC_CP15_64:
> >          if ( !is_32bit_domain(current->domain) )
> >              goto bad_trap;
> > +        perfc_incr(trap_cp15_32);
> 
> Did you mean trap_cp15_64?

Yes.

> > diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
> > index ef291ff..0de6f7e 100644
> > --- a/xen/include/asm-arm/config.h
> > +++ b/xen/include/asm-arm/config.h
> > @@ -178,6 +178,8 @@
> >  #define PAGE_MASK           (~(PAGE_SIZE-1))
> >  #define PAGE_FLAG_MASK      (~0)
> >  
> > +#define NR_hypercalls 64
> > +
> 
> Should not it be define in common code?

Could be, but it's not. Since different architectures can implement
different subsets of hypercalls I'm not too bothered about moving this.

Ian.
Julien Grall May 15, 2014, 3:52 p.m. UTC | #3
On 05/15/2014 04:30 PM, Ian Campbell wrote:
>> But we might want perf counter in p2m_lookup because this function is
>> costly.
>>
>> I would also add one in flush_tlb_* functions, such as flush_tlb_domain.
>> It will help us optimizing TLBs.
> 
> Please do add more if you think they will be useful, this is just a
> starting point. I think this applies to most of your comments, if you
> are doing some debugging or performance measurement and you find that
> you want an extra perfc or a more granular one or whatever then please
> add it and send a patch. Otherwise than that I don't think there is much
> need to bikeshed what exactly is being added here.

I agree it's a starting point and I took the opportunity to give some
feedback on what kind of perf counter it would be nice to have on Xen.

IHMO, p2m_lookup and flush_tlb_domain should have the own perf counter
because they are used in hot patch. I'm fine to create a follow-up but
as you were working on it...

> 
>>>      case HSR_EC_CP15_64:
>>>          if ( !is_32bit_domain(current->domain) )
>>>              goto bad_trap;
>>> +        perfc_incr(trap_cp15_32);
>>
>> Did you mean trap_cp15_64?
> 
> Yes.
> 
>>> diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
>>> index ef291ff..0de6f7e 100644
>>> --- a/xen/include/asm-arm/config.h
>>> +++ b/xen/include/asm-arm/config.h
>>> @@ -178,6 +178,8 @@
>>>  #define PAGE_MASK           (~(PAGE_SIZE-1))
>>>  #define PAGE_FLAG_MASK      (~0)
>>>  
>>> +#define NR_hypercalls 64
>>> +
>>
>> Should not it be define in common code?
> 
> Could be, but it's not. Since different architectures can implement
> different subsets of hypercalls I'm not too bothered about moving this.

Oh ok. Thanks.

Regards,
diff mbox

Patch

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 577d85b..f9f598b 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -679,6 +679,8 @@  void gic_inject(void)
 
 static void do_sgi(struct cpu_user_regs *regs, int othercpu, enum gic_sgi sgi)
 {
+    perfc_incr(ipis);
+
     /* Lower the priority */
     GICC[GICC_EOIR] = sgi;
 
@@ -792,6 +794,8 @@  static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
     struct vcpu *v = current;
     uint64_t eisr = GICH[GICH_EISR0] | (((uint64_t) GICH[GICH_EISR1]) << 32);
 
+    perfc_incr(maintenance_irqs);
+
     while ((i = find_next_bit((const long unsigned int *) &eisr,
                               64, i)) < 64) {
         struct pending_irq *p, *p2;
diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
index 44696e7..704a095 100644
--- a/xen/arch/arm/irq.c
+++ b/xen/arch/arm/irq.c
@@ -150,7 +150,12 @@  void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
     struct irq_desc *desc = irq_to_desc(irq);
     struct irqaction *action = desc->action;
 
-    /* TODO: perfc_incr(irqs); */
+    perfc_incr(irqs);
+
+    if (irq < 32)
+        perfc_incr(ppis);
+    else
+        perfc_incr(spis);
 
     /* TODO: this_cpu(irq_count)++; */
 
@@ -170,6 +175,7 @@  void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
     {
         struct domain *d = irq_get_domain(desc);
 
+        perfc_incr(guest_irqs);
         desc->handler->end(desc);
 
         desc->status |= IRQ_INPROGRESS;
diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
index d04c97a..64106af 100644
--- a/xen/arch/arm/time.c
+++ b/xen/arch/arm/time.c
@@ -195,6 +195,7 @@  static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
     if ( irq == (timer_irq[TIMER_HYP_PPI].irq) &&
          READ_SYSREG32(CNTHP_CTL_EL2) & CNTx_CTL_PENDING )
     {
+        perfc_incr(hyp_timer_irqs);
         /* Signal the generic timer code to do its work */
         raise_softirq(TIMER_SOFTIRQ);
         /* Disable the timer to avoid more interrupts */
@@ -204,6 +205,7 @@  static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
     if ( irq == (timer_irq[TIMER_PHYS_NONSECURE_PPI].irq) &&
          READ_SYSREG32(CNTP_CTL_EL0) & CNTx_CTL_PENDING )
     {
+        perfc_incr(phys_timer_irqs);
         /* Signal the generic timer code to do its work */
         raise_softirq(TIMER_SOFTIRQ);
         /* Disable the timer to avoid more interrupts */
@@ -213,6 +215,7 @@  static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
 
 static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
 {
+    perfc_incr(virt_timer_irqs);
     current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0);
     WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, CNTV_CTL_EL0);
     vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq, 1);
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 03a3da6..232e9d9 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -30,6 +30,7 @@ 
 #include <xen/hypercall.h>
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
+#include <xen/perfc.h>
 #include <public/sched.h>
 #include <public/xen.h>
 #include <asm/event.h>
@@ -1106,6 +1107,8 @@  static void do_trap_psci(struct cpu_user_regs *regs)
         return;
     }
 
+    perfc_incra(pscsicalls, PSCI_OP_REG(regs));
+
     PSCI_RESULT_REG(regs) = psci_call(PSCI_ARGS(regs));
 }
 
@@ -1135,15 +1138,19 @@  static void do_trap_hypercall(struct cpu_user_regs *regs, register_t *nr,
     register_t orig_pc = regs->pc;
 #endif
 
+    BUILD_BUG_ON(NR_hypercalls < ARRAY_SIZE(arm_hypercall_table) );
+
     if ( iss != XEN_HYPERCALL_TAG )
         domain_crash_synchronous();
 
     if ( *nr >= ARRAY_SIZE(arm_hypercall_table) )
     {
+        perfc_incr(invalid_hypercalls);
         HYPERCALL_RESULT_REG(regs) = -ENOSYS;
         return;
     }
 
+    perfc_incra(hypercalls, *nr);
     call = arm_hypercall_table[*nr].fn;
     if ( call == NULL )
     {
@@ -1283,8 +1290,10 @@  static int check_conditional_instr(struct cpu_user_regs *regs, union hsr hsr)
     cpsr_cond = cpsr >> 28;
 
     if ( !((cc_map[cond] >> cpsr_cond) & 1) )
+    {
+        perfc_incr(trap_uncond);
         return 0;
-
+    }
     return 1;
 }
 
@@ -1664,6 +1673,7 @@  asmlinkage void do_trap_hypervisor(struct cpu_user_regs *regs)
 
     switch (hsr.ec) {
     case HSR_EC_WFI_WFE:
+        perfc_incr(trap_wfi_wfe);
         if ( !check_conditional_instr(regs, hsr) )
         {
             advance_pc(regs, hsr);
@@ -1684,38 +1694,51 @@  asmlinkage void do_trap_hypervisor(struct cpu_user_regs *regs)
     case HSR_EC_CP15_32:
         if ( !is_32bit_domain(current->domain) )
             goto bad_trap;
+        perfc_incr(trap_cp15_32);
         do_cp15_32(regs, hsr);
         break;
     case HSR_EC_CP15_64:
         if ( !is_32bit_domain(current->domain) )
             goto bad_trap;
+        perfc_incr(trap_cp15_32);
         do_cp15_64(regs, hsr);
         break;
     case HSR_EC_CP14_32:
+        if ( !is_32bit_domain(current->domain) )
+            goto bad_trap;
+        perfc_incr(trap_cp14_32);
+        do_cp14(regs, hsr);
+        break;
     case HSR_EC_CP14_DBG:
         if ( !is_32bit_domain(current->domain) )
             goto bad_trap;
+        perfc_incr(trap_cp14_dbg);
         do_cp14(regs, hsr);
         break;
     case HSR_EC_CP:
         if ( !is_32bit_domain(current->domain) )
             goto bad_trap;
+        perfc_incr(trap_cp);
         do_cp(regs, hsr);
         break;
     case HSR_EC_SMC32:
+        perfc_incr(trap_smc32);
         inject_undef32_exception(regs);
         break;
     case HSR_EC_HVC32:
+        perfc_incr(trap_hvc32);
 #ifndef NDEBUG
         if ( (hsr.iss & 0xff00) == 0xff00 )
             return do_debug_trap(regs, hsr.iss & 0x00ff);
 #endif
         if ( hsr.iss == 0 )
             return do_trap_psci(regs);
+
         do_trap_hypercall(regs, (register_t *)&regs->r12, hsr.iss);
         break;
 #ifdef CONFIG_ARM_64
     case HSR_EC_HVC64:
+        perfc_incr(trap_hvc64);
 #ifndef NDEBUG
         if ( (hsr.iss & 0xff00) == 0xff00 )
             return do_debug_trap(regs, hsr.iss & 0x00ff);
@@ -1725,23 +1748,28 @@  asmlinkage void do_trap_hypervisor(struct cpu_user_regs *regs)
         do_trap_hypercall(regs, &regs->x16, hsr.iss);
         break;
     case HSR_EC_SMC64:
+        perfc_incr(trap_smc64);
         inject_undef64_exception(regs, hsr.len);
         break;
     case HSR_EC_SYSREG:
         if ( is_32bit_domain(current->domain) )
             goto bad_trap;
+        perfc_incr(trap_sysreg);
         do_sysreg(regs, hsr);
         break;
 #endif
 
     case HSR_EC_INSTR_ABORT_LOWER_EL:
+        perfc_incr(trap_iabt);
         do_trap_instr_abort_guest(regs, hsr);
         break;
     case HSR_EC_DATA_ABORT_LOWER_EL:
+        perfc_incr(trap_dabt);
         do_trap_data_abort_guest(regs, hsr);
         break;
     default:
  bad_trap:
+        perfc_incr(trap_bad);
         printk("Hypervisor Trap. HSR=0x%x EC=0x%x IL=%x Syndrome=0x%"PRIx32"\n",
                hsr.bits, hsr.ec, hsr.len, hsr.iss);
         do_unexpected_trap("Hypervisor", regs);
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 4cf6470..cf990c3 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -24,6 +24,7 @@ 
 #include <xen/softirq.h>
 #include <xen/irq.h>
 #include <xen/sched.h>
+#include <xen/perfc.h>
 
 #include <asm/current.h>
 
@@ -181,6 +182,8 @@  static int vgic_distr_mmio_read(struct vcpu *v, mmio_info_t *info)
     int offset = (int)(info->gpa - v->domain->arch.vgic.dbase);
     int gicd_reg = REG(offset);
 
+    perfc_incr(vgic_reads);
+
     switch ( gicd_reg )
     {
     case GICD_CTLR:
@@ -450,9 +453,11 @@  static int vgic_to_sgi(struct vcpu *v, register_t sgir)
     switch ( filter )
     {
         case GICD_SGI_TARGET_LIST:
+            perfc_incr(vgic_sgi_list);
             vcpu_mask = (sgir & GICD_SGI_TARGET_MASK) >> GICD_SGI_TARGET_SHIFT;
             break;
         case GICD_SGI_TARGET_OTHERS:
+            perfc_incr(vgic_sgi_others);
             for ( i = 0; i < d->max_vcpus; i++ )
             {
                 if ( i != current->vcpu_id && is_vcpu_running(d, i) )
@@ -460,6 +465,7 @@  static int vgic_to_sgi(struct vcpu *v, register_t sgir)
             }
             break;
         case GICD_SGI_TARGET_SELF:
+            perfc_incr(vgic_sgi_self);
             set_bit(current->vcpu_id, &vcpu_mask);
             break;
         default:
@@ -491,6 +497,8 @@  static int vgic_distr_mmio_write(struct vcpu *v, mmio_info_t *info)
     int gicd_reg = REG(offset);
     uint32_t tr;
 
+    perfc_incr(vgic_writes);
+
     switch ( gicd_reg )
     {
     case GICD_CTLR:
@@ -756,7 +764,10 @@  out:
     running = v->is_running;
     vcpu_unblock(v);
     if ( running && v != current )
+    {
+        perfc_incr(vgic_cross_cpu_intr_inject);
         smp_send_event_check_mask(cpumask_of(v->processor));
+    }
 }
 
 /*
diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
index b93153e..ebbb269 100644
--- a/xen/arch/arm/vtimer.c
+++ b/xen/arch/arm/vtimer.c
@@ -21,6 +21,7 @@ 
 #include <xen/lib.h>
 #include <xen/timer.h>
 #include <xen/sched.h>
+#include <xen/perfc.h>
 #include <asm/irq.h>
 #include <asm/time.h>
 #include <asm/gic.h>
@@ -34,7 +35,13 @@  static void phys_timer_expired(void *data)
     struct vtimer *t = data;
     t->ctl |= CNTx_CTL_PENDING;
     if ( !(t->ctl & CNTx_CTL_MASK) )
+    {
+        perfc_incr(vtimer_phys_inject);
         vgic_vcpu_inject_irq(t->v, t->irq, 1);
+    }
+    else
+        perfc_incr(vtimer_phys_ignore);
+
 }
 
 static void virt_timer_expired(void *data)
@@ -42,6 +49,8 @@  static void virt_timer_expired(void *data)
     struct vtimer *t = data;
     t->ctl |= CNTx_CTL_MASK;
     vgic_vcpu_inject_irq(t->v, t->irq, 1);
+    perfc_incr(vtimer_virt_inject);
+
 }
 
 int domain_vtimer_init(struct domain *d)
@@ -192,6 +201,11 @@  static int vtimer_emulate_cp32(struct cpu_user_regs *regs, union hsr hsr)
     struct hsr_cp32 cp32 = hsr.cp32;
     uint32_t *r = (uint32_t *)select_user_reg(regs, cp32.reg);
 
+    if ( cp32.read )
+        perfc_incr(vtimer_cp32_reads);
+    else
+        perfc_incr(vtimer_cp32_writes);
+
     switch ( hsr.bits & HSR_CP32_REGS_MASK )
     {
     case HSR_CPREG32(CNTP_CTL):
@@ -214,6 +228,11 @@  static int vtimer_emulate_cp64(struct cpu_user_regs *regs, union hsr hsr)
     uint32_t *r2 = (uint32_t *)select_user_reg(regs, cp64.reg2);
     uint64_t x;
 
+    if ( cp64.read )
+        perfc_incr(vtimer_cp64_reads);
+    else
+        perfc_incr(vtimer_cp64_writes);
+
     switch ( hsr.bits & HSR_CP64_REGS_MASK )
     {
     case HSR_CPREG64(CNTPCT):
@@ -239,6 +258,11 @@  static int vtimer_emulate_sysreg(struct cpu_user_regs *regs, union hsr hsr)
     register_t *x = select_user_reg(regs, sysreg.reg);
     uint32_t r = (uint32_t)*x;
 
+    if ( sysreg.read )
+        perfc_incr(vtimer_sysreg_reads);
+    else
+        perfc_incr(vtimer_sysreg_writes);
+
     switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
     {
     case HSR_SYSREG_CNTP_CTL_EL0:
diff --git a/xen/arch/arm/vuart.c b/xen/arch/arm/vuart.c
index c02a8a9..05a0d2a 100644
--- a/xen/arch/arm/vuart.c
+++ b/xen/arch/arm/vuart.c
@@ -38,6 +38,7 @@ 
 #include <xen/errno.h>
 #include <xen/ctype.h>
 #include <xen/serial.h>
+#include <xen/perfc.h>
 
 #include "vuart.h"
 #include "io.h"
@@ -108,6 +109,8 @@  static int vuart_mmio_read(struct vcpu *v, mmio_info_t *info)
     register_t *r = select_user_reg(regs, dabt.reg);
     paddr_t offset = info->gpa - d->arch.vuart.info->base_addr;
 
+    perfc_incr(vuart_reads);
+
     /* By default zeroed the register */
     *r = 0;
 
@@ -126,6 +129,8 @@  static int vuart_mmio_write(struct vcpu *v, mmio_info_t *info)
     register_t *r = select_user_reg(regs, dabt.reg);
     paddr_t offset = info->gpa - d->arch.vuart.info->base_addr;
 
+    perfc_incr(vuart_writes);
+
     if ( offset == d->arch.vuart.info->data_off )
         /* ignore any status bits */
         vuart_print_char(v, *r & 0xFF);
diff --git a/xen/common/perfc.c b/xen/common/perfc.c
index 96a4245..9f078e1 100644
--- a/xen/common/perfc.c
+++ b/xen/common/perfc.c
@@ -57,7 +57,7 @@  void perfc_printall(unsigned char key)
                 for_each_online_cpu ( cpu )
                 {
                     if ( k > 0 && (k % 4) == 0 )
-                        printk("\n%46s", "");
+                        printk("\n%53s", "");
                     printk("  CPU%02u[%10"PRIperfc"u]", cpu, per_cpu(perfcounters, cpu)[j]);
                     ++k;
                 }
@@ -103,7 +103,7 @@  void perfc_printall(unsigned char key)
                     if ( perfc_info[i].type == TYPE_S_ARRAY ) 
                         sum = (perfc_t) sum;
                     if ( k > 0 && (k % 4) == 0 )
-                        printk("\n%46s", "");
+                        printk("\n%53s", "");
                     printk("  CPU%02u[%10Lu]", cpu, sum);
                     ++k;
                 }
diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
index ef291ff..0de6f7e 100644
--- a/xen/include/asm-arm/config.h
+++ b/xen/include/asm-arm/config.h
@@ -178,6 +178,8 @@ 
 #define PAGE_MASK           (~(PAGE_SIZE-1))
 #define PAGE_FLAG_MASK      (~0)
 
+#define NR_hypercalls 64
+
 #define STACK_ORDER 3
 #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
 
diff --git a/xen/include/asm-arm/perfc.h b/xen/include/asm-arm/perfc.h
new file mode 100644
index 0000000..a1a591e
--- /dev/null
+++ b/xen/include/asm-arm/perfc.h
@@ -0,0 +1,12 @@ 
+#ifndef __ASM_PERFC_H__
+#define __ASM_PERFC_H__
+
+static inline void arch_perfc_reset(void)
+{
+}
+
+static inline void arch_perfc_gather(void)
+{
+}
+
+#endif
diff --git a/xen/include/asm-arm/perfc_defn.h b/xen/include/asm-arm/perfc_defn.h
new file mode 100644
index 0000000..77ae339
--- /dev/null
+++ b/xen/include/asm-arm/perfc_defn.h
@@ -0,0 +1,59 @@ 
+/* This file is legitimately included multiple times. */
+/*#ifndef __XEN_PERFC_DEFN_H__*/
+/*#define __XEN_PERFC_DEFN_H__*/
+
+PERFCOUNTER(invalid_hypercalls, "invalid hypercalls")
+
+PERFCOUNTER(trap_wfi_wfe, "trap: wfi/wfe")
+PERFCOUNTER(trap_cp15_32, "trap: cp15 32-bit access")
+PERFCOUNTER(trap_cp15_64, "trap: cp15 64-bit access")
+PERFCOUNTER(trap_cp14_32, "trap: cp14 32-bit access")
+PERFCOUNTER(trap_cp14_dbg, "trap: cp14 dbg access")
+PERFCOUNTER(trap_cp, "trap: cp access")
+PERFCOUNTER(trap_smc32, "trap: 32-bit smc")
+PERFCOUNTER(trap_hvc32, "trap: 32-bit hvc")
+#ifdef CONFIG_ARM_64
+PERFCOUNTER(trap_smc64, "trap: 64-bit smc")
+PERFCOUNTER(trap_hvc64, "trap: 64-bit hvc")
+PERFCOUNTER(trap_sysreg, "trap: sysreg access")
+#endif
+PERFCOUNTER(trap_iabt, "trap: guest instr abort")
+PERFCOUNTER(trap_dabt, "trap: guest data abort")
+PERFCOUNTER(trap_uncond, "trap: condition failed")
+PERFCOUNTER(trap_bad, "trap: unknown/bad")
+
+PERFCOUNTER_ARRAY(pscsicalls, "#PSCI calls", 4)
+
+PERFCOUNTER(vgic_reads, "vgic: reads")
+PERFCOUNTER(vgic_writes, "vgic: writes")
+PERFCOUNTER(vgic_sgi_list, "vgic: SGI send to list")
+PERFCOUNTER(vgic_sgi_others, "vgic: SGI send to others")
+PERFCOUNTER(vgic_sgi_self, "vgic: SGI send to self")
+PERFCOUNTER(vgic_cross_cpu_intr_inject, "vgic: cross-CPU irq inject")
+
+PERFCOUNTER(vuart_reads, "vuart: reads")
+PERFCOUNTER(vuart_writes, "vuart: writes")
+
+PERFCOUNTER(vtimer_cp32_reads, "vtimer: cp32 reads")
+PERFCOUNTER(vtimer_cp32_writes, "vtimer: cp32 writes")
+
+PERFCOUNTER(vtimer_cp64_reads, "vtimer: cp64 reads")
+PERFCOUNTER(vtimer_cp64_writes, "vtimer: cp64 writes")
+
+PERFCOUNTER(vtimer_sysreg_reads, "vtimer: sysreg reads")
+PERFCOUNTER(vtimer_sysreg_writes, "vtimer: sysreg writes")
+
+PERFCOUNTER(vtimer_phys_inject, "vtimer: phys expired, injected")
+PERFCOUNTER(vtimer_phys_ignore, "vtimer: phys expired, ignored")
+PERFCOUNTER(vtimer_virt_inject, "vtimer: virt expired, injected")
+
+PERFCOUNTER(ppis,                   "#PPIs")
+PERFCOUNTER(spis,                   "#SPIs")
+PERFCOUNTER(guest_irqs,             "#GUEST-IRQS")
+
+PERFCOUNTER(hyp_timer_irqs, "Hypervisor timer interrupts")
+PERFCOUNTER(phys_timer_irqs, "Physical timer interrupts")
+PERFCOUNTER(virt_timer_irqs, "Virtual timer interrupts")
+PERFCOUNTER(maintenance_irqs, "Maintenance interrupts")
+
+/*#endif*/ /* __XEN_PERFC_DEFN_H__ */