From 73f45fe6aeb2ac4c67c405c6cd4cfe97f0b07b72 Mon Sep 17 00:00:00 2001 From: Paul Lind Date: Mon, 4 Mar 2013 17:10:49 -0800 Subject: Fix various problems in opcontrol Support multiple CPU types in opcontrol Use access(2) instead of opening files where possible Mount /dev/oprofile at startup Disable oprofiled when resetting current data session Change-Id: Ifd0ff2a50c9229af01249aefd5cd9dce00c58bf4 --- daemon/liblegacy/opd_kernel.c | 2 +- events/arm/armv6/events.h | 43 ++++ events/arm/armv7/events.h | 100 +++++++++ events/i386/arch_perfmon/events.h | 15 ++ events/mips/1004K/events.h | 229 +++++++++++++++++++ events/mips/24K/events.h | 181 +++++++++++++++ events/mips/34K/events.h | 199 +++++++++++++++++ events/mips/74K/events.h | 247 +++++++++++++++++++++ opcontrol/opcontrol.cpp | 453 ++++++++++++++++---------------------- opev.py | 84 +++++++ 10 files changed, 1293 insertions(+), 260 deletions(-) create mode 100644 events/arm/armv6/events.h create mode 100644 events/arm/armv7/events.h create mode 100644 events/i386/arch_perfmon/events.h create mode 100644 events/mips/1004K/events.h create mode 100644 events/mips/24K/events.h create mode 100644 events/mips/34K/events.h create mode 100644 events/mips/74K/events.h create mode 100755 opev.py diff --git a/daemon/liblegacy/opd_kernel.c b/daemon/liblegacy/opd_kernel.c index 1131aa7..60aaaad 100644 --- a/daemon/liblegacy/opd_kernel.c +++ b/daemon/liblegacy/opd_kernel.c @@ -393,7 +393,7 @@ void opd_handle_kernel_sample(unsigned long eip, u32 counter) int opd_eip_is_kernel(unsigned long eip) { -#ifdef __i386 +#ifdef __i386__ #define KERNEL_OFFSET 0xC0000000 /* * kernel_start == 0 when using --no-vmlinux. diff --git a/events/arm/armv6/events.h b/events/arm/armv6/events.h new file mode 100644 index 0000000..3ca988b --- /dev/null +++ b/events/arm/armv6/events.h @@ -0,0 +1,43 @@ +// events from file arm/armv6/events + {0x00, CTR(0) | CTR(1), 0, "IFU_IFETCH_MISS", + "number of instruction fetch misses"}, + {0x01, CTR(0) | CTR(1), 0, "CYCLES_IFU_MEM_STALL", + "cycles instruction fetch pipe is stalled"}, + {0x02, CTR(0) | CTR(1), 0, "CYCLES_DATA_STALL", + "cycles stall occurs for due to data dependency"}, + {0x03, CTR(0) | CTR(1), 0, "ITLB_MISS", + "number of Instruction MicroTLB misses"}, + {0x04, CTR(0) | CTR(1), 0, "DTLB_MISS", + "number of Data MicroTLB misses"}, + {0x05, CTR(0) | CTR(1), 0, "BR_INST_EXECUTED", + "branch instruction executed w/ or w/o program flow change"}, + {0x06, CTR(0) | CTR(1), 0, "BR_INST_MISS_PRED", + "branch mispredicted"}, + {0x07, CTR(0) | CTR(1), 0, "INSN_EXECUTED", + "instructions executed"}, + {0x09, CTR(0) | CTR(1), 0, "DCACHE_ACCESS", + "data cache access, cacheable locations"}, + {0x0a, CTR(0) | CTR(1), 0, "DCACHE_ACCESS_ALL", + "data cache access, all locations"}, + {0x0b, CTR(0) | CTR(1), 0, "DCACHE_MISS", + "data cache miss"}, + {0x0c, CTR(0) | CTR(1), 0, "DCACHE_WB", + "data cache writeback, 1 event for every half cacheline"}, + {0x0d, CTR(0) | CTR(1), 0, "PC_CHANGE", + "number of times the program counter was changed without a mode switch"}, + {0x0f, CTR(0) | CTR(1), 0, "TLB_MISS", + "Main TLB miss"}, + {0x10, CTR(0) | CTR(1), 0, "EXP_EXTERNAL", + "Explict external data access"}, + {0x11, CTR(0) | CTR(1), 0, "LSU_STALL", + "cycles stalled because Load Store request queque is full"}, + {0x12, CTR(0) | CTR(1), 0, "WRITE_DRAIN", + "Times write buffer was drained"}, + {0x20, CTR(0) | CTR(1), 0, "ETMEXTOUT0", + "nuber of cycles ETMEXTOUT[0] signal was asserted"}, + {0x21, CTR(0) | CTR(1), 0, "ETMEXTOUT1", + "nuber of cycles ETMEXTOUT[1] signal was asserted"}, + {0x22, CTR(0) | CTR(1), 0, "ETMEXTOUT_BOTH", + "nuber of cycles both ETMEXTOUT [0] and [1] were asserted * 2"}, + {0xff, CTR(0) | CTR(1) | CTR(2), 0, "CPU_CYCLES", + "clock cycles counter"}, diff --git a/events/arm/armv7/events.h b/events/arm/armv7/events.h new file mode 100644 index 0000000..d662cca --- /dev/null +++ b/events/arm/armv7/events.h @@ -0,0 +1,100 @@ +// events from file arm/armv7-common/events + {0x00, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "PMNC_SW_INCR", + "Software increment of PMNC registers"}, + {0x01, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "IFETCH_MISS", + "Instruction fetch misses from cache or normal cacheable memory"}, + {0x02, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "ITLB_MISS", + "Instruction fetch misses from TLB"}, + {0x03, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "DCACHE_REFILL", + "Data R/W operation that causes a refill from cache or normal cacheable memory"}, + {0x04, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "DCACHE_ACCESS", + "Data R/W from cache"}, + {0x05, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "DTLB_REFILL", + "Data R/W that causes a TLB refill"}, + {0x06, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "DREAD", + "Data read architecturally executed (note: architecturally executed = for instructions that are unconditional or that pass the condition code)"}, + {0x07, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "DWRITE", + "Data write architecturally executed"}, + {0x08, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "INSTR_EXECUTED", + "All executed instructions"}, + {0x09, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "EXC_TAKEN", + "Exception taken"}, + {0x0A, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "EXC_EXECUTED", + "Exception return architecturally executed"}, + {0x0B, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "CID_WRITE", + "Instruction that writes to the Context ID Register architecturally executed"}, + {0x0C, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "PC_WRITE", + "SW change of PC, architecturally executed (not by exceptions)"}, + {0x0D, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "PC_IMM_BRANCH", + "Immediate branch instruction executed (taken or not)"}, + {0x0E, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "PC_PROC_RETURN", + "Procedure return architecturally executed (not by exceptions)"}, + {0x0F, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "UNALIGNED_ACCESS", + "Unaligned access architecturally executed"}, + {0x10, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "PC_BRANCH_MIS_PRED", + "Branch mispredicted or not predicted. Counts pipeline flushes because of misprediction"}, + {0x12, CTR(1) | CTR(2) | CTR(3) | CTR(4) | CTR(5) | CTR(6), 0, "PC_BRANCH_MIS_USED", + "Branch or change in program flow that could have been predicted"}, + {0xFF, CTR(0), 0, "CPU_CYCLES", + "Number of CPU cycles"}, +// events from file arm/armv7/events + {0x40, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "WRITE_BUFFER_FULL", + "Any write buffer full cycle"}, + {0x41, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L2_STORE_MERGED", + "Any store that is merged in L2 cache"}, + {0x42, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L2_STORE_BUFF", + "Any bufferable store from load/store to L2 cache"}, + {0x43, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L2_ACCESS", + "Any access to L2 cache"}, + {0x44, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L2_CACH_MISS", + "Any cacheable miss in L2 cache"}, + {0x45, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "AXI_READ_CYCLES", + "Number of cycles for an active AXI read"}, + {0x46, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "AXI_WRITE_CYCLES", + "Number of cycles for an active AXI write"}, + {0x47, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "MEMORY_REPLAY", + "Any replay event in the memory subsystem"}, + {0x48, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "UNALIGNED_ACCESS_REPLAY", + "Unaligned access that causes a replay"}, + {0x49, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L1_DATA_MISS", + "L1 data cache miss as a result of the hashing algorithm"}, + {0x4A, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L1_INST_MISS", + "L1 instruction cache miss as a result of the hashing algorithm"}, + {0x4B, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L1_DATA_COLORING", + "L1 data access in which a page coloring alias occurs"}, + {0x4C, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L1_NEON_DATA", + "NEON data access that hits L1 cache"}, + {0x4D, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L1_NEON_CACH_DATA", + "NEON cacheable data access that hits L1 cache"}, + {0x4E, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L2_NEON", + "L2 access as a result of NEON memory access"}, + {0x4F, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L2_NEON_HIT", + "Any NEON hit in L2 cache"}, + {0x50, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "L1_INST", + "Any L1 instruction cache access, excluding CP15 cache accesses"}, + {0x51, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "PC_RETURN_MIS_PRED", + "Return stack misprediction at return stack pop (incorrect target address)"}, + {0x52, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "PC_BRANCH_FAILED", + "Branch prediction misprediction"}, + {0x53, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "PC_BRANCH_TAKEN", + "Any predicted branch that is taken"}, + {0x54, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "PC_BRANCH_EXECUTED", + "Any taken branch that is executed"}, + {0x55, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "OP_EXECUTED", + "Number of operations executed (in instruction or mutli-cycle instruction)"}, + {0x56, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "CYCLES_INST_STALL", + "Cycles where no instruction available"}, + {0x57, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "CYCLES_INST", + "Number of instructions issued in a cycle"}, + {0x58, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "CYCLES_NEON_DATA_STALL", + "Number of cycles the processor waits on MRC data from NEON"}, + {0x59, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "CYCLES_NEON_INST_STALL", + "Number of cycles the processor waits on NEON instruction queue or NEON load queue"}, + {0x5A, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "NEON_CYCLES", + "Number of cycles NEON and integer processors are not idle"}, + {0x70, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "PMU0_EVENTS", + "Number of events from external input source PMUEXTIN[0]"}, + {0x71, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "PMU1_EVENTS", + "Number of events from external input source PMUEXTIN[1]"}, + {0x72, CTR(1) | CTR(2) | CTR(3) | CTR(4), 0, "PMU_EVENTS", + "Number of events from both external input sources PMUEXTIN[0] and PMUEXTIN[1]"}, diff --git a/events/i386/arch_perfmon/events.h b/events/i386/arch_perfmon/events.h new file mode 100644 index 0000000..bdd2cd7 --- /dev/null +++ b/events/i386/arch_perfmon/events.h @@ -0,0 +1,15 @@ +// events from file events/i386/arch_perfmon/events + {0x3c, 0, 0, "CPU_CLK_UNHALTED", + "Clock cycles when not halted"}, + {0x3c, 0, 1, "UNHALTED_REFERENCE_CYCLES", + "Unhalted reference cycles"}, + {0xc0, 0, 0, "INST_RETIRED", + "number of instructions retired"}, + {0x2e, 0, 0x41, "LLC_MISSES", + "Last level cache demand requests from this core that missed the LLC"}, + {0x2e, 0, 0x4f, "LLC_REFS", + "Last level cache demand requests from this core"}, + {0xc4, 0, 0, "BR_INST_RETIRED", + "number of branch instructions retired"}, + {0xc5, 0, 0, "BR_MISS_PRED_RETIRED", + "number of mispredicted branches retired (precise)"}, diff --git a/events/mips/1004K/events.h b/events/mips/1004K/events.h new file mode 100644 index 0000000..1af89d1 --- /dev/null +++ b/events/mips/1004K/events.h @@ -0,0 +1,229 @@ +// events from file events/mips/1004K/events + {0x0, CTR(0) | CTR(1), 0, "CYCLES", + "0-0 Cycles"}, + {0x1, CTR(0) | CTR(1), 0, "INSTRUCTIONS", + "1-0 Instructions completed"}, + {0xb, CTR(0) | CTR(1), 0, "DCACHE_MISSES", + "11-0 Data cache misses"}, + {0x2, CTR(0), 0, "BRANCH_INSNS", + "2-0 Branch instructions (whether completed or mispredicted)"}, + {0x3, CTR(0), 0, "JR_31_INSNS", + "3-0 JR $31 (return) instructions executed"}, + {0x4, CTR(0), 0, "JR_NON_31_INSNS", + "4-0 JR $xx (not $31) instructions executed (at same cost as a mispredict)"}, + {0x5, CTR(0), 0, "ITLB_ACCESSES", + "5-0 Instruction micro-TLB accesses"}, + {0x6, CTR(0), 0, "DTLB_ACCESSES", + "6-0 Data micro-TLB accesses"}, + {0x7, CTR(0), 0, "JTLB_INSN_ACCESSES", + "7-0 Joint TLB instruction accesses"}, + {0x8, CTR(0), 0, "JTLB_DATA_ACCESSES", + "8-0 Joint TLB data (non-instruction) accesses"}, + {0x9, CTR(0), 0, "ICACHE_ACCESSES", + "9-0 Instruction cache accesses"}, + {0xa, CTR(0), 0, "DCACHE_ACCESSES", + "10-0 Data cache accesses"}, + {0xd, CTR(0), 0, "STORE_MISS_INSNS", + "13-0 Cacheable stores that miss in the cache"}, + {0xe, CTR(0), 0, "INTEGER_INSNS", + "14-0 Integer instructions completed"}, + {0xf, CTR(0), 0, "LOAD_INSNS", + "15-0 Load instructions completed (including FP)"}, + {0x10, CTR(0), 0, "J_JAL_INSNS", + "16-0 J/JAL instructions completed"}, + {0x11, CTR(0), 0, "NO_OPS_INSNS", + "17-0 no-ops completed, ie instructions writing $0"}, + {0x12, CTR(0), 0, "ALL_STALLS", + "18-0 Stall cycles, including ALU and IFU"}, + {0x13, CTR(0), 0, "SC_INSNS", + "19-0 SC instructions completed"}, + {0x14, CTR(0), 0, "PREFETCH_INSNS", + "20-0 PREFETCH instructions completed"}, + {0x15, CTR(0), 0, "L2_CACHE_WRITEBACKS", + "21-0 L2 cache lines written back to memory"}, + {0x16, CTR(0), 0, "L2_CACHE_MISSES", + "22-0 L2 cache accesses that missed in the cache"}, + {0x17, CTR(0), 0, "EXCEPTIONS_TAKEN", + "23-0 Exceptions taken"}, + {0x18, CTR(0), 0, "CACHE_FIXUP_CYCLES", + "24-0 Cache fixup cycles (specific to the 34K family microarchitecture)"}, + {0x19, CTR(0), 0, "IFU_STALLS", + "25-0 IFU stall cycles"}, + {0x1a, CTR(0), 0, "DSP_INSNS", + "26-0 DSP instructions completed"}, + {0x1c, CTR(0), 0, "POLICY_EVENTS", + "28-0 Implementation specific policy manager events"}, + {0x1d, CTR(0), 0, "ISPRAM_EVENTS", + "29-0 Implementation specific ISPRAM events"}, + {0x1e, CTR(0), 0, "COREEXTEND_EVENTS", + "30-0 Implementation specific CorExtend events"}, + {0x1f, CTR(0), 0, "YIELD_EVENTS", + "31-0 Implementation specific yield events"}, + {0x20, CTR(0), 0, "ITC_LOADS", + "32-0 ITC Loads"}, + {0x21, CTR(0), 0, "UNCACHED_LOAD_INSNS", + "33-0 Uncached load instructions"}, + {0x22, CTR(0), 0, "FORK_INSNS", + "34-0 Fork instructions completed"}, + {0x23, CTR(0), 0, "CP2_ARITH_INSNS", + "35-0 CP2 arithmetic instructions completed"}, + {0x24, CTR(0), 0, "INTERVENTION_STALLS", + "36-0 Cache coherence intervention processing stall cycles"}, + {0x25, CTR(0), 0, "ICACHE_MISS_STALLS", + "37-0 Stall cycles due to an instruction cache miss"}, + {0x27, CTR(0), 0, "DCACHE_MISS_CYCLES", + "39-0 Cycles a data cache miss is outstanding, but not necessarily stalling the pipeline"}, + {0x28, CTR(0), 0, "UNCACHED_STALLS", + "40-0 Uncached stall cycles"}, + {0x29, CTR(0), 0, "MDU_STALLS", + "41-0 MDU stall cycles"}, + {0x2a, CTR(0), 0, "CP2_STALLS", + "42-0 CP2 stall cycles"}, + {0x2b, CTR(0), 0, "ISPRAM_STALLS", + "43-0 ISPRAM stall cycles"}, + {0x2c, CTR(0), 0, "CACHE_INSN_STALLS", + "44-0 Stall cycless due to CACHE instructions"}, + {0x2d, CTR(0), 0, "LOAD_USE_STALLS", + "45-0 Load to use stall cycles"}, + {0x2e, CTR(0), 0, "INTERLOCK_STALLS", + "46-0 Stall cycles due to return data from MFC0, RDHWR, and MFTR instructions"}, + {0x2f, CTR(0), 0, "RELAX_STALLS", + "47-0 Low power stall cycles (operations) as requested by the policy manager"}, + {0x30, CTR(0), 0, "IFU_FB_FULL_REFETCHES", + "48-0 Refetches due to cache misses while both fill buffers already allocated"}, + {0x31, CTR(0), 0, "EJTAG_INSN_TRIGGERS", + "49-0 EJTAG instruction triggerpoints"}, + {0x32, CTR(0), 0, "FSB_LESS_25_FULL", + "50-0 FSB < 25% full"}, + {0x33, CTR(0), 0, "FSB_OVER_50_FULL", + "51-0 FSB > 50% full"}, + {0x34, CTR(0), 0, "LDQ_LESS_25_FULL", + "52-0 LDQ < 25% full"}, + {0x35, CTR(0), 0, "LDQ_OVER_50_FULL", + "53-0 LDQ > 50% full"}, + {0x36, CTR(0), 0, "WBB_LESS_25_FULL", + "54-0 WBB < 25% full"}, + {0x37, CTR(0), 0, "WBB_OVER_50_FULL", + "55-0 WBB > 50% full"}, + {0x38, CTR(0), 0, "INTERVENTION_HIT_COUNT", + "56-0 External interventions that hit in the cache"}, + {0x39, CTR(0), 0, "INVALIDATE_INTERVENTION_COUNT", + "57-0 External invalidate (i.e. leaving a cache line in the invalid state) interventions"}, + {0x3a, CTR(0), 0, "EVICTION_COUNT", + "58-0 Cache lines written back due to cache replacement or non-coherent cache operation"}, + {0x3b, CTR(0), 0, "MESI_INVAL_COUNT", + "59-0 MESI protocol transitions into invalid state"}, + {0x3c, CTR(0), 0, "MESI_MODIFIED_COUNT", + "60-0 MESI protocol transitions into modified state"}, + {0x3d, CTR(0), 0, "SELF_INTERVENTION_LATENCY", + "61-0 Latency from miss detection to self intervention"}, + {0x3e, CTR(0), 0, "READ_RESPONSE_LATENCY", + "62-0 Read latency from miss detection until critical dword of response is returned"}, + {0x402, CTR(1), 0, "MISPREDICTED_BRANCH_INSNS", + "2-1 Branch mispredictions"}, + {0x403, CTR(1), 0, "JR_31_MISPREDICTIONS", + "3-1 JR $31 mispredictions"}, + {0x404, CTR(1), 0, "JR_31_NO_PREDICTIONS", + "4-1 JR $31 not predicted (stack mismatch)."}, + {0x405, CTR(1), 0, "ITLB_MISSES", + "5-1 Instruction micro-TLB misses"}, + {0x406, CTR(1), 0, "DTLB_MISSES", + "6-1 Data micro-TLB misses"}, + {0x407, CTR(1), 0, "JTLB_INSN_MISSES", + "7-1 Joint TLB instruction misses"}, + {0x408, CTR(1), 0, "JTLB_DATA_MISSES", + "8-1 Joint TLB data (non-instruction) misses"}, + {0x409, CTR(1), 0, "ICACHE_MISSES", + "9-1 Instruction cache misses"}, + {0x40a, CTR(1), 0, "DCACHE_WRITEBACKS", + "10-1 Data cache lines written back to memory"}, + {0x40d, CTR(1), 0, "LOAD_MISS_INSNS", + "13-1 Cacheable load instructions that miss in the cache"}, + {0x40e, CTR(1), 0, "FPU_INSNS", + "14-1 FPU instructions completed (not including loads/stores)"}, + {0x40f, CTR(1), 0, "STORE_INSNS", + "15-1 Stores completed (including FP)"}, + {0x410, CTR(1), 0, "MIPS16_INSNS", + "16-1 MIPS16 instructions completed"}, + {0x411, CTR(1), 0, "INT_MUL_DIV_INSNS", + "17-1 Integer multiply/divide instructions completed"}, + {0x412, CTR(1), 0, "REPLAYED_INSNS", + "18-1 Replayed instructions"}, + {0x413, CTR(1), 0, "SC_INSNS_FAILED", + "19-1 SC instructions completed, but store failed (because the link bit had been cleared)"}, + {0x414, CTR(1), 0, "CACHE_HIT_PREFETCH_INSNS", + "20-1 PREFETCH instructions completed with cache hit"}, + {0x415, CTR(1), 0, "L2_CACHE_ACCESSES", + "21-1 Accesses to the L2 cache"}, + {0x416, CTR(1), 0, "L2_CACHE_SINGLE_BIT_ERRORS", + "22-1 Single bit errors corrected in L2"}, + {0x417, CTR(1), 0, "SINGLE_THREADED_CYCLES", + "23-1 Cycles while one and only one TC is eligible for scheduling"}, + {0x418, CTR(1), 0, "REFETCHED_INSNS", + "24-1 Replayed instructions sent back to IFU to be refetched"}, + {0x419, CTR(1), 0, "ALU_STALLS", + "25-1 ALU stall cycles"}, + {0x41a, CTR(1), 0, "ALU_DSP_SATURATION_INSNS", + "26-1 ALU-DSP saturation instructions"}, + {0x41b, CTR(1), 0, "MDU_DSP_SATURATION_INSNS", + "27-1 MDU-DSP saturation instructions"}, + {0x41c, CTR(1), 0, "CP2_EVENTS", + "28-1 Implementation specific CP2 events"}, + {0x41d, CTR(1), 0, "DSPRAM_EVENTS", + "29-1 Implementation specific DSPRAM events"}, + {0x41f, CTR(1), 0, "ITC_EVENT", + "31-1 Implementation specific yield event"}, + {0x421, CTR(1), 0, "UNCACHED_STORE_INSNS", + "33-1 Uncached store instructions"}, + {0x423, CTR(1), 0, "CP2_TO_FROM_INSNS", + "35-1 CP2 to/from instructions (moves, control, loads, stores)"}, + {0x424, CTR(1), 0, "INTERVENTION_MISS_STALLS", + "36-1 Cache coherence intervention processing stall cycles due to an earlier miss"}, + {0x425, CTR(1), 0, "DCACHE_MISS_STALLS", + "37-1 Stall cycles due to a data cache miss"}, + {0x426, CTR(1), 0, "FSB_INDEX_CONFLICT_STALLS", + "38-1 FSB (fill/store buffer) index conflict stall cycles"}, + {0x427, CTR(1), 0, "L2_CACHE_MISS_CYCLES", + "39-1 Cycles a L2 miss is outstanding, but not necessarily stalling the pipeline"}, + {0x428, CTR(1), 0, "ITC_STALLS", + "40-1 ITC stall cycles"}, + {0x429, CTR(1), 0, "FPU_STALLS", + "41-1 FPU stall cycles"}, + {0x42a, CTR(1), 0, "COREEXTEND_STALLS", + "42-1 CorExtend stall cycles"}, + {0x42b, CTR(1), 0, "DSPRAM_STALLS", + "43-1 DSPRAM stall cycles"}, + {0x42d, CTR(1), 0, "ALU_TO_AGEN_STALLS", + "45-1 ALU to AGEN stall cycles"}, + {0x42e, CTR(1), 0, "MISPREDICTION_STALLS", + "46-1 Branch mispredict stall cycles"}, + {0x430, CTR(1), 0, "FB_ENTRY_ALLOCATED_CYCLES", + "48-1 Cycles while at least one IFU fill buffer is allocated"}, + {0x431, CTR(1), 0, "EJTAG_DATA_TRIGGERS", + "49-1 EJTAG Data triggerpoints"}, + {0x432, CTR(1), 0, "FSB_25_50_FULL", + "50-1 FSB 25-50% full"}, + {0x433, CTR(1), 0, "FSB_FULL_STALLS", + "51-1 FSB full pipeline stall cycles"}, + {0x434, CTR(1), 0, "LDQ_25_50_FULL", + "52-1 LDQ 25-50% full"}, + {0x435, CTR(1), 0, "LDQ_FULL_STALLS", + "53-1 LDQ full pipeline stall cycles"}, + {0x436, CTR(1), 0, "WBB_25_50_FULL", + "54-1 WBB 25-50% full"}, + {0x437, CTR(1), 0, "WBB_FULL_STALLS", + "55-1 WBB full pipeline stall cycles"}, + {0x438, CTR(1), 0, "INTERVENTION_COUNT", + "56-1 External interventions"}, + {0x439, CTR(1), 0, "INVALIDATE_INTERVENTION_HIT_COUNT", + "57-1 External invalidate interventions that hit in the cache"}, + {0x43a, CTR(1), 0, "WRITEBACK_COUNT", + "58-1 Cache lines written back due to cache replacement or any cache operation (non-coherent, self, or external coherent)"}, + {0x43b, CTR(1), 0, "MESI_EXCLUSIVE_COUNT", + "59-1 MESI protocol transitions into exclusive state"}, + {0x43c, CTR(1), 0, "MESI_SHARED_COUNT", + "60-1 MESI protocol transitions into shared state"}, + {0x43d, CTR(1), 0, "SELF_INTERVENTION_COUNT", + "61-1 Self intervention requests on miss detection"}, + {0x43e, CTR(1), 0, "READ_RESPONSE_COUNT", + "62-1 Read requests on miss detection"}, diff --git a/events/mips/24K/events.h b/events/mips/24K/events.h new file mode 100644 index 0000000..c4cde9f --- /dev/null +++ b/events/mips/24K/events.h @@ -0,0 +1,181 @@ +// events from file events/mips/24K/events + {0x0, CTR(0) | CTR(1), 0, "CYCLES", + "0-0 Cycles"}, + {0x1, CTR(0) | CTR(1), 0, "INSTRUCTIONS", + "1-0 Instructions completed"}, + {0xb, CTR(0) | CTR(1), 0, "DCACHE_MISSES", + "11-0 Data cache misses"}, + {0x2, CTR(0), 0, "BRANCH_INSNS", + "2-0 Branch instructions (whether completed or mispredicted)"}, + {0x3, CTR(0), 0, "JR_31_INSNS", + "3-0 JR $31 (return) instructions executed"}, + {0x4, CTR(0), 0, "JR_NON_31_INSNS", + "4-0 JR $xx (not $31) instructions executed (at same cost as a mispredict)"}, + {0x5, CTR(0), 0, "ITLB_ACCESSES", + "5-0 Instruction micro-TLB accesses"}, + {0x6, CTR(0), 0, "DTLB_ACCESSES", + "6-0 Data micro-TLB accesses"}, + {0x7, CTR(0), 0, "JTLB_INSN_ACCESSES", + "7-0 Joint TLB instruction accesses"}, + {0x8, CTR(0), 0, "JTLB_DATA_ACCESSES", + "8-0 Joint TLB data (non-instruction) accesses"}, + {0x9, CTR(0), 0, "ICACHE_ACCESSES", + "9-0 Instruction cache accesses"}, + {0xa, CTR(0), 0, "DCACHE_ACCESSES", + "10-0 Data cache accesses"}, + {0xd, CTR(0), 0, "STORE_MISS_INSNS", + "13-0 Cacheable stores that miss in the cache"}, + {0xe, CTR(0), 0, "INTEGER_INSNS", + "14-0 Integer instructions completed"}, + {0xf, CTR(0), 0, "LOAD_INSNS", + "15-0 Load instructions completed (including FP)"}, + {0x10, CTR(0), 0, "J_JAL_INSNS", + "16-0 J/JAL instructions completed"}, + {0x11, CTR(0), 0, "NO_OPS_INSNS", + "17-0 no-ops completed, ie instructions writing $0"}, + {0x12, CTR(0), 0, "ALL_STALLS", + "18-0 Stall cycles, including ALU and IFU"}, + {0x13, CTR(0), 0, "SC_INSNS", + "19-0 SC instructions completed"}, + {0x14, CTR(0), 0, "PREFETCH_INSNS", + "20-0 PREFETCH instructions completed"}, + {0x15, CTR(0), 0, "L2_CACHE_WRITEBACKS", + "21-0 L2 cache lines written back to memory"}, + {0x16, CTR(0), 0, "L2_CACHE_MISSES", + "22-0 L2 cache accesses that missed in the cache"}, + {0x17, CTR(0), 0, "EXCEPTIONS_TAKEN", + "23-0 Exceptions taken"}, + {0x18, CTR(0), 0, "CACHE_FIXUP_CYCLES", + "24-0 Cache fixup cycles (specific to the 24K family microarchitecture)"}, + {0x19, CTR(0), 0, "IFU_STALLS", + "25-0 IFU stall cycles"}, + {0x1a, CTR(0), 0, "DSP_INSNS", + "26-0 DSP instructions completed"}, + {0x1d, CTR(0), 0, "ISPRAM_EVENTS", + "29-0 Implementation specific ISPRAM events"}, + {0x1e, CTR(0), 0, "COREEXTEND_EVENTS", + "30-0 Implementation specific CorExtend events"}, + {0x21, CTR(0), 0, "UNCACHED_LOAD_INSNS", + "33-0 Uncached load instructions"}, + {0x23, CTR(0), 0, "CP2_ARITH_INSNS", + "35-0 CP2 arithmetic instructions completed"}, + {0x25, CTR(0), 0, "ICACHE_MISS_STALLS", + "37-0 Stall cycles due to an instruction cache miss"}, + {0x26, CTR(0), 0, "SYNC_STALLS", + "38-0 SYNC stall cycles"}, + {0x27, CTR(0), 0, "DCACHE_MISS_CYCLES", + "39-0 Cycles a data cache miss is outstanding, but not necessarily stalling the pipeline"}, + {0x28, CTR(0), 0, "UNCACHED_STALLS", + "40-0 Uncached stall cycles"}, + {0x29, CTR(0), 0, "MDU_STALLS", + "41-0 MDU stall cycles"}, + {0x2a, CTR(0), 0, "CP2_STALLS", + "42-0 CP2 stall cycles"}, + {0x2b, CTR(0), 0, "ISPRAM_STALLS", + "43-0 ISPRAM stall cycles"}, + {0x2c, CTR(0), 0, "CACHE_INSN_STALLS", + "44-0 Stall cycless due to CACHE instructions"}, + {0x2d, CTR(0), 0, "LOAD_USE_STALLS", + "45-0 Load to use stall cycles"}, + {0x2e, CTR(0), 0, "INTERLOCK_STALLS", + "46-0 Stall cycles due to return data from MFC0 and RDHWR instructions"}, + {0x30, CTR(0), 0, "IFU_FB_FULL_REFETCHES", + "48-0 Refetches due to cache misses while both fill buffers already allocated"}, + {0x31, CTR(0), 0, "EJTAG_INSN_TRIGGERS", + "49-0 EJTAG instruction triggerpoints"}, + {0x32, CTR(0), 0, "FSB_LESS_25_FULL", + "50-0 FSB < 25% full"}, + {0x33, CTR(0), 0, "FSB_OVER_50_FULL", + "51-0 FSB > 50% full"}, + {0x34, CTR(0), 0, "LDQ_LESS_25_FULL", + "52-0 LDQ < 25% full"}, + {0x35, CTR(0), 0, "LDQ_OVER_50_FULL", + "53-0 LDQ > 50% full"}, + {0x36, CTR(0), 0, "WBB_LESS_25_FULL", + "54-0 WBB < 25% full"}, + {0x37, CTR(0), 0, "WBB_OVER_50_FULL", + "55-0 WBB > 50% full"}, + {0x402, CTR(1), 0, "MISPREDICTED_BRANCH_INSNS", + "2-1 Branch mispredictions"}, + {0x403, CTR(1), 0, "JR_31_MISPREDICTIONS", + "3-1 JR $31 mispredictions"}, + {0x404, CTR(1), 0, "JR_31_NO_PREDICTIONS", + "4-1 JR $31 not predicted (stack mismatch)."}, + {0x405, CTR(1), 0, "ITLB_MISSES", + "5-1 Instruction micro-TLB misses"}, + {0x406, CTR(1), 0, "DTLB_MISSES", + "6-1 Data micro-TLB misses"}, + {0x407, CTR(1), 0, "JTLB_INSN_MISSES", + "7-1 Joint TLB instruction misses"}, + {0x408, CTR(1), 0, "JTLB_DATA_MISSES", + "8-1 Joint TLB data (non-instruction) misses"}, + {0x409, CTR(1), 0, "ICACHE_MISSES", + "9-1 Instruction cache misses"}, + {0x40a, CTR(1), 0, "DCACHE_WRITEBACKS", + "10-1 Data cache lines written back to memory"}, + {0x40d, CTR(1), 0, "LOAD_MISS_INSNS", + "13-1 Cacheable load instructions that miss in the cache"}, + {0x40e, CTR(1), 0, "FPU_INSNS", + "14-1 FPU instructions completed (not including loads/stores)"}, + {0x40f, CTR(1), 0, "STORE_INSNS", + "15-1 Stores completed (including FP)"}, + {0x410, CTR(1), 0, "MIPS16_INSNS", + "16-1 MIPS16 instructions completed"}, + {0x411, CTR(1), 0, "INT_MUL_DIV_INSNS", + "17-1 Integer multiply/divide instructions completed"}, + {0x412, CTR(1), 0, "REPLAYED_INSNS", + "18-1 Replayed instructions"}, + {0x413, CTR(1), 0, "SC_INSNS_FAILED", + "19-1 SC instructions completed, but store failed (because the link bit had been cleared)"}, + {0x414, CTR(1), 0, "CACHE_HIT_PREFETCH_INSNS", + "20-1 PREFETCH instructions completed with cache hit"}, + {0x415, CTR(1), 0, "L2_CACHE_ACCESSES", + "21-1 Accesses to the L2 cache"}, + {0x416, CTR(1), 0, "L2_CACHE_SINGLE_BIT_ERRORS", + "22-1 Single bit errors corrected in L2"}, + {0x419, CTR(1), 0, "ALU_STALLS", + "25-1 ALU stall cycles"}, + {0x41a, CTR(1), 0, "ALU_DSP_SATURATION_INSNS", + "26-1 ALU-DSP saturation instructions"}, + {0x41b, CTR(1), 0, "MDU_DSP_SATURATION_INSNS", + "27-1 MDU-DSP saturation instructions"}, + {0x41c, CTR(1), 0, "CP2_EVENTS", + "28-1 Implementation specific CP2 events"}, + {0x41d, CTR(1), 0, "DSPRAM_EVENTS", + "29-1 Implementation specific DSPRAM events"}, + {0x421, CTR(1), 0, "UNCACHED_STORE_INSNS", + "33-1 Uncached store instructions"}, + {0x423, CTR(1), 0, "CP2_TO_FROM_INSNS", + "35-1 CP2 to/from instructions (moves, control, loads, stores)"}, + {0x425, CTR(1), 0, "DCACHE_MISS_STALLS", + "37-1 Stall cycles due to a data cache miss"}, + {0x426, CTR(1), 0, "FSB_INDEX_CONFLICT_STALLS", + "38-1 FSB (fill/store buffer) index conflict stall cycles"}, + {0x427, CTR(1), 0, "L2_CACHE_MISS_CYCLES", + "39-1 Cycles a L2 miss is outstanding, but not necessarily stalling the pipeline"}, + {0x429, CTR(1), 0, "FPU_STALLS", + "41-1 FPU stall cycles"}, + {0x42a, CTR(1), 0, "COREEXTEND_STALLS", + "42-1 CorExtend stall cycles"}, + {0x42b, CTR(1), 0, "DSPRAM_STALLS", + "43-1 DSPRAM stall cycles"}, + {0x42d, CTR(1), 0, "ALU_TO_AGEN_STALLS", + "45-1 ALU to AGEN stall cycles"}, + {0x42e, CTR(1), 0, "MISPREDICTION_STALLS", + "46-1 Branch mispredict stall cycles"}, + {0x430, CTR(1), 0, "FB_ENTRY_ALLOCATED_CYCLES", + "48-1 Cycles while at least one IFU fill buffer is allocated"}, + {0x431, CTR(1), 0, "EJTAG_DATA_TRIGGERS", + "49-1 EJTAG Data triggerpoints"}, + {0x432, CTR(1), 0, "FSB_25_50_FULL", + "50-1 FSB 25-50% full"}, + {0x433, CTR(1), 0, "FSB_FULL_STALLS", + "51-1 FSB full pipeline stall cycles"}, + {0x434, CTR(1), 0, "LDQ_25_50_FULL", + "52-1 LDQ 25-50% full"}, + {0x435, CTR(1), 0, "LDQ_FULL_STALLS", + "53-1 LDQ full pipeline stall cycles"}, + {0x436, CTR(1), 0, "WBB_25_50_FULL", + "54-1 WBB 25-50% full"}, + {0x437, CTR(1), 0, "WBB_FULL_STALLS", + "55-1 WBB full pipeline stall cycles"}, diff --git a/events/mips/34K/events.h b/events/mips/34K/events.h new file mode 100644 index 0000000..b3226e9 --- /dev/null +++ b/events/mips/34K/events.h @@ -0,0 +1,199 @@ +// events from file events/mips/34K/events + {0x0, CTR(0) | CTR(1), 0, "CYCLES", + "0-0 Cycles"}, + {0x1, CTR(0) | CTR(1), 0, "INSTRUCTIONS", + "1-0 Instructions completed"}, + {0xb, CTR(0) | CTR(1), 0, "DCACHE_MISSES", + "11-0 Data cache misses"}, + {0x2, CTR(0), 0, "BRANCH_INSNS", + "2-0 Branch instructions (whether completed or mispredicted)"}, + {0x3, CTR(0), 0, "JR_31_INSNS", + "3-0 JR $31 (return) instructions executed"}, + {0x4, CTR(0), 0, "JR_NON_31_INSNS", + "4-0 JR $xx (not $31) instructions executed (at same cost as a mispredict)"}, + {0x5, CTR(0), 0, "ITLB_ACCESSES", + "5-0 Instruction micro-TLB accesses"}, + {0x6, CTR(0), 0, "DTLB_ACCESSES", + "6-0 Data micro-TLB accesses"}, + {0x7, CTR(0), 0, "JTLB_INSN_ACCESSES", + "7-0 Joint TLB instruction accesses"}, + {0x8, CTR(0), 0, "JTLB_DATA_ACCESSES", + "8-0 Joint TLB data (non-instruction) accesses"}, + {0x9, CTR(0), 0, "ICACHE_ACCESSES", + "9-0 Instruction cache accesses"}, + {0xa, CTR(0), 0, "DCACHE_ACCESSES", + "10-0 Data cache accesses"}, + {0xd, CTR(0), 0, "STORE_MISS_INSNS", + "13-0 Cacheable stores that miss in the cache"}, + {0xe, CTR(0), 0, "INTEGER_INSNS", + "14-0 Integer instructions completed"}, + {0xf, CTR(0), 0, "LOAD_INSNS", + "15-0 Load instructions completed (including FP)"}, + {0x10, CTR(0), 0, "J_JAL_INSNS", + "16-0 J/JAL instructions completed"}, + {0x11, CTR(0), 0, "NO_OPS_INSNS", + "17-0 no-ops completed, ie instructions writing $0"}, + {0x12, CTR(0), 0, "ALL_STALLS", + "18-0 Stall cycles, including ALU and IFU"}, + {0x13, CTR(0), 0, "SC_INSNS", + "19-0 SC instructions completed"}, + {0x14, CTR(0), 0, "PREFETCH_INSNS", + "20-0 PREFETCH instructions completed"}, + {0x15, CTR(0), 0, "L2_CACHE_WRITEBACKS", + "21-0 L2 cache lines written back to memory"}, + {0x16, CTR(0), 0, "L2_CACHE_MISSES", + "22-0 L2 cache accesses that missed in the cache"}, + {0x17, CTR(0), 0, "EXCEPTIONS_TAKEN", + "23-0 Exceptions taken"}, + {0x18, CTR(0), 0, "CACHE_FIXUP_CYCLES", + "24-0 Cache fixup cycles (specific to the 34K family microarchitecture)"}, + {0x19, CTR(0), 0, "IFU_STALLS", + "25-0 IFU stall cycles"}, + {0x1a, CTR(0), 0, "DSP_INSNS", + "26-0 DSP instructions completed"}, + {0x1c, CTR(0), 0, "POLICY_EVENTS", + "28-0 Implementation specific policy manager events"}, + {0x1d, CTR(0), 0, "ISPRAM_EVENTS", + "29-0 Implementation specific ISPRAM events"}, + {0x1e, CTR(0), 0, "COREEXTEND_EVENTS", + "30-0 Implementation specific CorExtend events"}, + {0x1f, CTR(0), 0, "YIELD_EVENTS", + "31-0 Implementation specific yield events"}, + {0x20, CTR(0), 0, "ITC_LOADS", + "32-0 ITC Loads"}, + {0x21, CTR(0), 0, "UNCACHED_LOAD_INSNS", + "33-0 Uncached load instructions"}, + {0x22, CTR(0), 0, "FORK_INSNS", + "34-0 Fork instructions completed"}, + {0x23, CTR(0), 0, "CP2_ARITH_INSNS", + "35-0 CP2 arithmetic instructions completed"}, + {0x25, CTR(0), 0, "ICACHE_MISS_STALLS", + "37-0 Stall cycles due to an instruction cache miss"}, + {0x27, CTR(0), 0, "DCACHE_MISS_CYCLES", + "39-0 Cycles a data cache miss is outstanding, but not necessarily stalling the pipeline"}, + {0x28, CTR(0), 0, "UNCACHED_STALLS", + "40-0 Uncached stall cycles"}, + {0x29, CTR(0), 0, "MDU_STALLS", + "41-0 MDU stall cycles"}, + {0x2a, CTR(0), 0, "CP2_STALLS", + "42-0 CP2 stall cycles"}, + {0x2b, CTR(0), 0, "ISPRAM_STALLS", + "43-0 ISPRAM stall cycles"}, + {0x2c, CTR(0), 0, "CACHE_INSN_STALLS", + "44-0 Stall cycless due to CACHE instructions"}, + {0x2d, CTR(0), 0, "LOAD_USE_STALLS", + "45-0 Load to use stall cycles"}, + {0x2e, CTR(0), 0, "INTERLOCK_STALLS", + "46-0 Stall cycles due to return data from MFC0, RDHWR, and MFTR instructions"}, + {0x2f, CTR(0), 0, "RELAX_STALLS", + "47-0 Low power stall cycles (operations) as requested by the policy manager"}, + {0x30, CTR(0), 0, "IFU_FB_FULL_REFETCHES", + "48-0 Refetches due to cache misses while both fill buffers already allocated"}, + {0x31, CTR(0), 0, "EJTAG_INSN_TRIGGERS", + "49-0 EJTAG instruction triggerpoints"}, + {0x32, CTR(0), 0, "FSB_LESS_25_FULL", + "50-0 FSB < 25% full"}, + {0x33, CTR(0), 0, "FSB_OVER_50_FULL", + "51-0 FSB > 50% full"}, + {0x34, CTR(0), 0, "LDQ_LESS_25_FULL", + "52-0 LDQ < 25% full"}, + {0x35, CTR(0), 0, "LDQ_OVER_50_FULL", + "53-0 LDQ > 50% full"}, + {0x36, CTR(0), 0, "WBB_LESS_25_FULL", + "54-0 WBB < 25% full"}, + {0x37, CTR(0), 0, "WBB_OVER_50_FULL", + "55-0 WBB > 50% full"}, + {0x3e, CTR(0), 0, "READ_RESPONSE_LATENCY", + "62-0 Read latency from miss detection until critical dword of response is returned"}, + {0x402, CTR(1), 0, "MISPREDICTED_BRANCH_INSNS", + "2-1 Branch mispredictions"}, + {0x403, CTR(1), 0, "JR_31_MISPREDICTIONS", + "3-1 JR $31 mispredictions"}, + {0x404, CTR(1), 0, "JR_31_NO_PREDICTIONS", + "4-1 JR $31 not predicted (stack mismatch)."}, + {0x405, CTR(1), 0, "ITLB_MISSES", + "5-1 Instruction micro-TLB misses"}, + {0x406, CTR(1), 0, "DTLB_MISSES", + "6-1 Data micro-TLB misses"}, + {0x407, CTR(1), 0, "JTLB_INSN_MISSES", + "7-1 Joint TLB instruction misses"}, + {0x408, CTR(1), 0, "JTLB_DATA_MISSES", + "8-1 Joint TLB data (non-instruction) misses"}, + {0x409, CTR(1), 0, "ICACHE_MISSES", + "9-1 Instruction cache misses"}, + {0x40a, CTR(1), 0, "DCACHE_WRITEBACKS", + "10-1 Data cache lines written back to memory"}, + {0x40d, CTR(1), 0, "LOAD_MISS_INSNS", + "13-1 Cacheable load instructions that miss in the cache"}, + {0x40e, CTR(1), 0, "FPU_INSNS", + "14-1 FPU instructions completed (not including loads/stores)"}, + {0x40f, CTR(1), 0, "STORE_INSNS", + "15-1 Stores completed (including FP)"}, + {0x410, CTR(1), 0, "MIPS16_INSNS", + "16-1 MIPS16 instructions completed"}, + {0x411, CTR(1), 0, "INT_MUL_DIV_INSNS", + "17-1 Integer multiply/divide instructions completed"}, + {0x412, CTR(1), 0, "REPLAYED_INSNS", + "18-1 Replayed instructions"}, + {0x413, CTR(1), 0, "SC_INSNS_FAILED", + "19-1 SC instructions completed, but store failed (because the link bit had been cleared)"}, + {0x414, CTR(1), 0, "CACHE_HIT_PREFETCH_INSNS", + "20-1 PREFETCH instructions completed with cache hit"}, + {0x415, CTR(1), 0, "L2_CACHE_ACCESSES", + "21-1 Accesses to the L2 cache"}, + {0x416, CTR(1), 0, "L2_CACHE_SINGLE_BIT_ERRORS", + "22-1 Single bit errors corrected in L2"}, + {0x417, CTR(1), 0, "SINGLE_THREADED_CYCLES", + "23-1 Cycles while one and only one TC is eligible for scheduling"}, + {0x418, CTR(1), 0, "REFETCHED_INSNS", + "24-1 Replayed instructions sent back to IFU to be refetched"}, + {0x419, CTR(1), 0, "ALU_STALLS", + "25-1 ALU stall cycles"}, + {0x41a, CTR(1), 0, "ALU_DSP_SATURATION_INSNS", + "26-1 ALU-DSP saturation instructions"}, + {0x41b, CTR(1), 0, "MDU_DSP_SATURATION_INSNS", + "27-1 MDU-DSP saturation instructions"}, + {0x41c, CTR(1), 0, "CP2_EVENTS", + "28-1 Implementation specific CP2 events"}, + {0x41d, CTR(1), 0, "DSPRAM_EVENTS", + "29-1 Implementation specific DSPRAM events"}, + {0x41f, CTR(1), 0, "ITC_EVENT", + "31-1 Implementation specific yield event"}, + {0x421, CTR(1), 0, "UNCACHED_STORE_INSNS", + "33-1 Uncached store instructions"}, + {0x423, CTR(1), 0, "CP2_TO_FROM_INSNS", + "35-1 CP2 to/from instructions (moves, control, loads, stores)"}, + {0x425, CTR(1), 0, "DCACHE_MISS_STALLS", + "37-1 Stall cycles due to a data cache miss"}, + {0x427, CTR(1), 0, "L2_CACHE_MISS_CYCLES", + "39-1 Cycles a L2 miss is outstanding, but not necessarily stalling the pipeline"}, + {0x428, CTR(1), 0, "ITC_STALLS", + "40-1 ITC stall cycles"}, + {0x429, CTR(1), 0, "FPU_STALLS", + "41-1 FPU stall cycles"}, + {0x42a, CTR(1), 0, "COREEXTEND_STALLS", + "42-1 CorExtend stall cycles"}, + {0x42b, CTR(1), 0, "DSPRAM_STALLS", + "43-1 DSPRAM stall cycles"}, + {0x42d, CTR(1), 0, "ALU_TO_AGEN_STALLS", + "45-1 ALU to AGEN stall cycles"}, + {0x42e, CTR(1), 0, "MISPREDICTION_STALLS", + "46-1 Branch mispredict stall cycles"}, + {0x430, CTR(1), 0, "FB_ENTRY_ALLOCATED_CYCLES", + "48-1 Cycles while at least one IFU fill buffer is allocated"}, + {0x431, CTR(1), 0, "EJTAG_DATA_TRIGGERS", + "49-1 EJTAG Data triggerpoints"}, + {0x432, CTR(1), 0, "FSB_25_50_FULL", + "50-1 FSB 25-50% full"}, + {0x433, CTR(1), 0, "FSB_FULL_STALLS", + "51-1 FSB full pipeline stall cycles"}, + {0x434, CTR(1), 0, "LDQ_25_50_FULL", + "52-1 LDQ 25-50% full"}, + {0x435, CTR(1), 0, "LDQ_FULL_STALLS", + "53-1 LDQ full pipeline stall cycles"}, + {0x436, CTR(1), 0, "WBB_25_50_FULL", + "54-1 WBB 25-50% full"}, + {0x437, CTR(1), 0, "WBB_FULL_STALLS", + "55-1 WBB full pipeline stall cycles"}, + {0x43e, CTR(1), 0, "READ_RESPONSE_COUNT", + "62-1 Read requests on miss detection"}, diff --git a/events/mips/74K/events.h b/events/mips/74K/events.h new file mode 100644 index 0000000..ef3a6df --- /dev/null +++ b/events/mips/74K/events.h @@ -0,0 +1,247 @@ +// events from file events/mips/74K/events + {0x0, CTR(0) | CTR(1) | CTR(2) | CTR(3), 0, "CYCLES", + "0-0 Cycles"}, + {0x1, CTR(0) | CTR(1) | CTR(2) | CTR(3), 0, "INSTRUCTIONS", + "1-0 Instructions graduated"}, + {0x2, CTR(0) | CTR(2), 0, "PREDICTED_JR_31", + "2-0 JR $31 (return) instructions predicted including speculative instructions"}, + {0x3, CTR(0) | CTR(2), 0, "REDIRECT_STALLS", + "3-0 Stall cycles due to register indirect jumps (including non-predicted JR $31), ERET/WAIT instructions, and IFU determined exception"}, + {0x4, CTR(0) | CTR(2), 0, "ITLB_ACCESSES", + "4-0 Instruction micro-TLB accesses"}, + {0x6, CTR(0) | CTR(2), 0, "ICACHE_ACCESSES", + "6-0 Instruction cache accesses including speculative instructions"}, + {0x7, CTR(0) | CTR(2), 0, "ICACHE_MISS_STALLS", + "7-0 Instruction cache miss stall cycles"}, + {0x8, CTR(0) | CTR(2), 0, "UNCACHED_IFETCH_STALLS", + "8-0 Uncached instruction fetch stall cycles"}, + {0x9, CTR(0) | CTR(2), 0, "IFU_REPLAYS", + "9-0 Replays within the IFU due to full Instruction Buffer"}, + {0xb, CTR(0) | CTR(2), 0, "IFU_IDU_MISS_PRED_UPSTREAM_CYCLES", + "11-0 Cycles IFU-IDU gate is closed (to prevent upstream from getting ahead) due to mispredicted branch"}, + {0xc, CTR(0) | CTR(2), 0, "IFU_IDU_CLOGED_DOWNSTREAM_CYCLES", + "12-0 Cycles IFU-IDU gate is closed (waiting for downstream to unclog) due to MTC0/MFC0 sequence in pipe, EHB, or blocked DD, DR, or DS"}, + {0xd, CTR(0) | CTR(2), 0, "DDQ0_FULL_DR_STALLS", + "13-0 DR stage stall cycles due to DDQ0 (ALU out-of-order dispatch queue) full"}, + {0xe, CTR(0) | CTR(2), 0, "ALCB_FULL_DR_STALLS", + "14-0 DR stage stall cycles due to ALCB (ALU completion buffers) full"}, + {0xf, CTR(0) | CTR(2), 0, "CLDQ_FULL_DR_STALLS", + "15-0 DR stage stall cycles due to CLDQ (data comming back from FPU) full"}, + {0x10, CTR(0) | CTR(2), 0, "ALU_EMPTY_CYCLES", + "16-0 DDQ0 (ALU out-of-order dispatch queue) empty cycles"}, + {0x11, CTR(0) | CTR(2), 0, "ALU_OPERANDS_NOT_READY_CYCLES", + "17-0 DDQ0 (ALU out-of-order dispatch queue) no issue cycles with valid instructions but operands not ready"}, + {0x12, CTR(0) | CTR(2), 0, "ALU_NO_ISSUES_CYCLES", + "18-0 DDQ0 (ALU out-of-order dispatch queue) no issue cycles with valid instructions due to operand(s) not available, MDU busy, or CorExt resource busy"}, + {0x13, CTR(0) | CTR(2), 0, "ALU_BUBBLE_CYCLES", + "19-0 DDQ0 (ALU out-of-order dispatch queue) bubbles due to MFC1 data write"}, + {0x14, CTR(0) | CTR(2), 0, "SINGLE_ISSUE_CYCLES", + "20-0 Either DDQ0 (ALU out-of-order dispatch queue) or DDQ1 (AGEN out-of-order dispatch queue) valid instruction issue cycles"}, + {0x15, CTR(0) | CTR(2), 0, "OOO_ALU_ISSUE_CYCLES", + "21-0 Out-of-order ALU issue cycles (issued instruction is not the oldest in the pool)"}, + {0x16, CTR(0) | CTR(2), 0, "JALR_JALR_HB_INSNS", + "22-0 Graduated JALR/JALR.HB instructions"}, + {0x17, CTR(0) | CTR(2), 0, "DCACHE_LOAD_ACCESSES", + "23-0 Counts all accesses to the data cache caused by load instructions"}, + {0x18, CTR(0) | CTR(2), 0, "DCACHE_WRITEBACKS", + "24-0 Data cache writebacks"}, + {0x19, CTR(0) | CTR(2), 0, "JTLB_DATA_ACCESSES", + "25-0 Joint TLB data (non-instruction) accesses"}, + {0x1a, CTR(0) | CTR(2), 0, "LOAD_STORE_REPLAYS", + "26-0 Load/store generated replays - load/store follows too closely a matching CACHEOP"}, + {0x1b, CTR(0) | CTR(2), 0, "LOAD_STORE_BLOCKED_CYCLES", + "27-0 Load/store graduation blocked cycles due to CP1/2 store data not ready, SYNC/SYNCI/SC/CACHEOP at the head, or FSB/LDQ/WBB/ITU FIFO full"}, + {0x1c, CTR(0) | CTR(2), 0, "L2_CACHE_WRITEBACKS", + "28-0 L2 Cache Writebacks"}, + {0x1d, CTR(0) | CTR(2), 0, "L2_CACHE_MISSES", + "29-0 L2 Cache Misses"}, + {0x1e, CTR(0) | CTR(2), 0, "FSB_FULL_STALLS", + "30-0 Pipe stall cycles due to FSB full"}, + {0x1f, CTR(0) | CTR(2), 0, "LDQ_FULL_STALLS", + "31-0 Pipe stall cycles due to LDQ full"}, + {0x20, CTR(0) | CTR(2), 0, "WBB_FULL_STALLS", + "32-0 Pipe stall cycles due to WBB full"}, + {0x23, CTR(0) | CTR(2), 0, "LOAD_MISS_CONSUMER_REPLAYS", + "35-0 Replays following optimistic issue of instruction dependent on load which missed, counted only when the dependent instruction graduates"}, + {0x24, CTR(0) | CTR(2), 0, "JR_NON_31_INSNS", + "36-0 jr $xx (not $31) instructions graduated (at same cost as a mispredict)"}, + {0x25, CTR(0) | CTR(2), 0, "BRANCH_INSNS", + "37-0 Branch instructions graduated, excluding CP1/CP2 conditional branches"}, + {0x26, CTR(0) | CTR(2), 0, "BRANCH_LIKELY_INSNS", + "38-0 Branch likely instructions graduated including CP1 and CP2 branch likely instructions"}, + {0x27, CTR(0) | CTR(2), 0, "COND_BRANCH_INSNS", + "39-0 Conditional branches graduated"}, + {0x28, CTR(0) | CTR(2), 0, "INTEGER_INSNS", + "40-0 Integer instructions graduated including NOP, SSNOP, MOVCI, and EHB"}, + {0x29, CTR(0) | CTR(2), 0, "LOAD_INSNS", + "41-0 Loads graduated including CP1 ans CP2 loads"}, + {0x2a, CTR(0) | CTR(2), 0, "J_JAL_INSNS", + "42-0 J/JAL graduated"}, + {0x2b, CTR(0) | CTR(2), 0, "NOP_INSNS", + "43-0 NOP instructions graduated - SLL 0, NOP, SSNOP, and EHB"}, + {0x2c, CTR(0) | CTR(2), 0, "DSP_INSNS", + "44-0 DSP instructions graduated"}, + {0x2d, CTR(0) | CTR(2), 0, "DSP_BRANCH_INSNS", + "45-0 DSP branch instructions graduated"}, + {0x2e, CTR(0) | CTR(2), 0, "UNCACHED_LOAD_INSNS", + "46-0 Uncached loads graduated"}, + {0x31, CTR(0) | CTR(2), 0, "EJTAG_INSN_TRIGGERS", + "49-0 EJTAG instruction triggerpoints"}, + {0x32, CTR(0) | CTR(2), 0, "CP1_BRANCH_MISPREDICTIONS", + "50-0 CP1 branches mispredicted"}, + {0x33, CTR(0) | CTR(2), 0, "SC_INSNS", + "51-0 SC instructions graduated"}, + {0x34, CTR(0) | CTR(2), 0, "PREFETCH_INSNS", + "52-0 Prefetch instructions graduated"}, + {0x35, CTR(0) | CTR(2), 0, "NO_INSN_CYCLES", + "53-0 No instructions graduated cycles"}, + {0x36, CTR(0) | CTR(2), 0, "ONE_INSN_CYCLES", + "54-0 One instruction graduated cycles"}, + {0x37, CTR(0) | CTR(2), 0, "GFIFO_BLOCKED_CYCLES", + "55-0 GFIFO blocked cycles"}, + {0x38, CTR(0) | CTR(2), 0, "MISPREDICTION_STALLS", + "56-0 Cycles from the time of a pipe kill due to mispredict until the first new instruction graduates"}, + {0x39, CTR(0) | CTR(2), 0, "MISPREDICTED_BRANCH_INSNS_CYCLES", + "57-0 Mispredicted branch instruction graduation cycles without the delay slot"}, + {0x3a, CTR(0) | CTR(2), 0, "EXCEPTIONS_TAKEN", + "58-0 Exceptions taken"}, + {0x3b, CTR(0) | CTR(2), 0, "COREEXTEND_EVENTS", + "59-0 Implementation specific CorExtend events"}, + {0x3e, CTR(0) | CTR(2), 0, "ISPRAM_EVENTS", + "62-0 Implementation specific ISPRAM events"}, + {0x3f, CTR(0) | CTR(2), 0, "L2_CACHE_SINGLE_BIT_ERRORS", + "63-0 Single bit errors corrected in L2"}, + {0x40, CTR(0) | CTR(2), 0, "SYSTEM_EVENT_0", + "64-0 Implementation specific system event 0"}, + {0x41, CTR(0) | CTR(2), 0, "SYSTEM_EVENT_2", + "65-0 Implementation specific system event 2"}, + {0x42, CTR(0) | CTR(2), 0, "SYSTEM_EVENT_4", + "66-0 Implementation specific system event 4"}, + {0x43, CTR(0) | CTR(2), 0, "SYSTEM_EVENT_6", + "67-0 Implementation specific system event 6"}, + {0x44, CTR(0) | CTR(2), 0, "OCP_ALL_REQUESTS", + "68-0 All OCP requests accepted"}, + {0x45, CTR(0) | CTR(2), 0, "OCP_READ_REQUESTS", + "69-0 OCP read requests accepted"}, + {0x46, CTR(0) | CTR(2), 0, "OCP_WRITE_REQUESTS", + "70-0 OCP write requests accepted"}, + {0x4a, CTR(0) | CTR(2), 0, "FSB_LESS_25_FULL", + "74-0 FSB < 25% full"}, + {0x4b, CTR(0) | CTR(2), 0, "LDQ_LESS_25_FULL", + "75-0 LDQ < 25% full"}, + {0x4c, CTR(0) | CTR(2), 0, "WBB_LESS_25_FULL", + "76-0 WBB < 25% full"}, + {0x402, CTR(1) | CTR(3), 0, "JR_31_MISPREDICTIONS", + "2-1 JR $31 (return) instructions mispredicted"}, + {0x403, CTR(1) | CTR(3), 0, "JR_31_NO_PREDICTIONS", + "3-1 JR $31 (return) instructions not predicted"}, + {0x404, CTR(1) | CTR(3), 0, "ITLB_MISSES", + "4-1 Instruction micro-TLB misses"}, + {0x405, CTR(1) | CTR(3), 0, "JTLB_INSN_MISSES", + "5-1 Joint TLB instruction misses"}, + {0x406, CTR(1) | CTR(3), 0, "ICACHE_MISSES", + "6-1 Instruction cache misses, includes misses from fetch-ahead and speculation"}, + {0x408, CTR(1) | CTR(3), 0, "PDTRACE_BACK_STALLS", + "8-1 PDtrace back stalls"}, + {0x409, CTR(1) | CTR(3), 0, "KILLED_FETCH_SLOTS", + "9-1 Valid fetch slots killed due to taken branches/jumps or stalling instructions"}, + {0x40b, CTR(1) | CTR(3), 0, "IFU_IDU_NO_FETCH_CYCLES", + "11-1 Cycles IFU-IDU gate open but no instructions fetched by IFU"}, + {0x40d, CTR(1) | CTR(3), 0, "DDQ1_FULL_DR_STALLS", + "13-1 DR stage stall cycles due to DDQ1 (AGEN out-of-order dispatch queue) full"}, + {0x40e, CTR(1) | CTR(3), 0, "AGCB_FULL_DR_STALLS", + "14-1 DR stage stall cycles due to AGCB (AGEN completion buffers) full"}, + {0x40f, CTR(1) | CTR(3), 0, "IODQ_FULL_DR_STALLS", + "15-1 DR stage stall cycles due to IODQ (data comming back from IO) full"}, + {0x410, CTR(1) | CTR(3), 0, "AGEN_EMPTY_CYCLES", + "16-1 DDQ1 (AGEN out-of-order dispatch queue) empty cycles"}, + {0x411, CTR(1) | CTR(3), 0, "AGEN_OPERANDS_NOT_READY_CYCLES", + "17-1 DDQ1 (AGEN out-of-order dispatch queue) no issue cycles with valid instructions but operands not ready"}, + {0x412, CTR(1) | CTR(3), 0, "AGEN_NO_ISSUES_CYCLES", + "18-1 DDQ1 (AGEN out-of-order dispatch queue) no issue cycles with valid instructions due to operand(s) not available, non-issued stores blocking ready to issue loads, or non-issued CACHEOPs blocking ready to issue loads"}, + {0x413, CTR(1) | CTR(3), 0, "AGEN_BUBBLE_CYCLES", + "19-1 DDQ1 (AGEN out-of-order dispatch queue) bubbles due to MFC2 data write or cache access from FSB"}, + {0x414, CTR(1) | CTR(3), 0, "DUAL_ISSUE_CYCLES", + "20-1 Both DDQ0 (ALU out-of-order dispatch queue) and DDQ1 (AGEN out-of-order dispatch queue) valid instruction issue cycles"}, + {0x415, CTR(1) | CTR(3), 0, "OOO_AGEN_ISSUE_CYCLES", + "21-1 Out-of-order AGEN issue cycles (issued instruction is not the oldest in the pool)"}, + {0x416, CTR(1) | CTR(3), 0, "DCACHE_LINE_REFILL_REQUESTS", + "22-1 Data cache line loads (line refill requests)"}, + {0x417, CTR(1) | CTR(3), 0, "DCACHE_ACCESSES", + "23-1 Data cache accesses"}, + {0x418, CTR(1) | CTR(3), 0, "DCACHE_MISSES", + "24-1 Data cache misses"}, + {0x419, CTR(1) | CTR(3), 0, "JTLB_DATA_MISSES", + "25-1 Joint TLB data (non-instruction) misses"}, + {0x41a, CTR(1) | CTR(3), 0, "VA_TRANSALTION_CORNER_CASES", + "26-1 Virtual memory address translation synonyms, homonyms, and aliases (loads/stores treated as miss in the cache)"}, + {0x41b, CTR(1) | CTR(3), 0, "LOAD_STORE_NO_FILL_REQUESTS", + "27-1 Load/store graduations not resulting in a bus request because misses at integer pipe graduation turn into hit or merge with outstanding fill request"}, + {0x41c, CTR(1) | CTR(3), 0, "L2_CACHE_ACCESSES", + "28-1 Accesses to the L2 cache"}, + {0x41d, CTR(1) | CTR(3), 0, "L2_CACHE_MISS_CYCLES", + "29-1 Cycles a L2 miss is outstanding, but not necessarily stalling the pipeline"}, + {0x41e, CTR(1) | CTR(3), 0, "FSB_OVER_50_FULL", + "30-1 FSB > 50% full"}, + {0x41f, CTR(1) | CTR(3), 0, "LDQ_OVER_50_FULL", + "31-1 LDQ > 50% full"}, + {0x420, CTR(1) | CTR(3), 0, "WBB_OVER_50_FULL", + "32-1 WBB > 50% full"}, + {0x423, CTR(1) | CTR(3), 0, "CP1_CP2_LOAD_INSNS", + "35-1 CP1/CP2 load instructions graduated"}, + {0x424, CTR(1) | CTR(3), 0, "MISPREDICTED_JR_31_INSNS", + "36-1 jr $31 instructions graduated after mispredict"}, + {0x425, CTR(1) | CTR(3), 0, "CP1_CP2_COND_BRANCH_INSNS", + "37-1 CP1/CP2 conditional branch instructions graduated"}, + {0x426, CTR(1) | CTR(3), 0, "MISPREDICTED_BRANCH_LIKELY_INSNS", + "38-1 Mispredicted branch likely instructions graduated"}, + {0x427, CTR(1) | CTR(3), 0, "MISPREDICTED_BRANCH_INSNS", + "39-1 Mispredicted branches graduated"}, + {0x428, CTR(1) | CTR(3), 0, "FPU_INSNS", + "40-1 FPU instructions graduated"}, + {0x429, CTR(1) | CTR(3), 0, "STORE_INSNS", + "41-1 Store instructions graduated including CP1 ans CP2 stores"}, + {0x42a, CTR(1) | CTR(3), 0, "MIPS16_INSNS", + "42-1 MIPS16 instructions graduated"}, + {0x42b, CTR(1) | CTR(3), 0, "NT_MUL_DIV_INSNS", + "43-1 Integer multiply/divide instructions graduated"}, + {0x42c, CTR(1) | CTR(3), 0, "ALU_DSP_SATURATION_INSNS", + "44-1 ALU-DSP graduated, result was saturated"}, + {0x42d, CTR(1) | CTR(3), 0, "MDU_DSP_SATURATION_INSNS", + "45-1 MDU-DSP graduated, result was saturated"}, + {0x42e, CTR(1) | CTR(3), 0, "UNCACHED_STORE_INSNS", + "46-1 Uncached stores graduated"}, + {0x433, CTR(1) | CTR(3), 0, "FAILED_SC_INSNS", + "51-1 SC instructions failed"}, + {0x434, CTR(1) | CTR(3), 0, "CACHE_HIT_PREFETCH_INSNS", + "52-1 PREFETCH instructions which did nothing, because they hit in the cache"}, + {0x435, CTR(1) | CTR(3), 0, "LOAD_MISS_INSNS", + "53-1 Cacheable load instructions that miss in the cache graduated"}, + {0x436, CTR(1) | CTR(3), 0, "TWO_INSNS_CYCLES", + "54-1 Two instructions graduated cycles"}, + {0x437, CTR(1) | CTR(3), 0, "CP1_CP2_STORE_INSNS", + "55-1 CP1/CP2 Store graduated"}, + {0x43a, CTR(1) | CTR(3), 0, "GRADUATION_REPLAYS", + "58-1 Replays initiated from graduation"}, + {0x43e, CTR(1) | CTR(3), 0, "DSPRAM_EVENTS", + "62-1 Implementation specific events from the DSPRAM block"}, + {0x440, CTR(0) | CTR(2), 0, "SYSTEM_EVENT_1", + "64-1 Implementation specific system event 1"}, + {0x441, CTR(0) | CTR(2), 0, "SYSTEM_EVENT_3", + "65-1 Implementation specific system event 3"}, + {0x442, CTR(0) | CTR(2), 0, "SYSTEM_EVENT_5", + "66-1 Implementation specific system event 5"}, + {0x443, CTR(0) | CTR(2), 0, "SYSTEM_EVENT_7", + "67-1 Implementation specific system event 7"}, + {0x444, CTR(0) | CTR(2), 0, "OCP_ALL_CACHEABLE_REQUESTS", + "68-1 All OCP cacheable requests accepted"}, + {0x445, CTR(0) | CTR(2), 0, "OCP_READ_CACHEABLE_REQUESTS", + "69-1 OCP cacheable read request accepted"}, + {0x446, CTR(0) | CTR(2), 0, "OCP_WRITE_CACHEABLE_REQUESTS", + "70-1 OCP cacheable write request accepted"}, + {0x44a, CTR(0) | CTR(2), 0, "FSB_25_50_FULL", + "74-1 FSB 25-50% full"}, + {0x44b, CTR(0) | CTR(2), 0, "LDQ_25_50_FULL", + "75-1 LDQ 25-50% full"}, + {0x44c, CTR(0) | CTR(2), 0, "WBB_25_50_FULL", + "76-1 WBB 25-50% full"}, diff --git a/opcontrol/opcontrol.cpp b/opcontrol/opcontrol.cpp index 6b440e2..44eca77 100644 --- a/opcontrol/opcontrol.cpp +++ b/opcontrol/opcontrol.cpp @@ -36,21 +36,97 @@ #define verbose(fmt...) if (verbose_print) printf(fmt) -/* Experiments found that using a small interval may hang the device, and the - * more events tracked simultaneously, the longer the interval has to be. - */ +struct event_info { + int id; + int counters; + int um; + const char *name; + const char *explanation; +}; + +#define CTR(n) (1<<(n)) #if defined(__i386__) || defined(__x86_64__) +struct event_info event_info_arch_perfmon[] = { + #include "../events/i386/arch_perfmon/events.h" +}; + #define MAX_EVENTS 2 int min_count[MAX_EVENTS] = {60000, 100000}; -#elif !defined(WITH_ARM_V7_A) + +const char *default_event = "CPU_CLK_UNHALTED"; +#endif + +#if defined(__arm__) +#if !defined(WITH_ARM_V7_A) +struct event_info event_info_armv6[] = { + #include "../events/arm/armv6/events.h" +}; + #define MAX_EVENTS 3 int min_count[MAX_EVENTS] = {150000, 200000, 250000}; + #else +struct event_info event_info_armv7[] = { + #include "../events/arm/armv7/events.h" +}; + #define MAX_EVENTS 5 int min_count[MAX_EVENTS] = {150000, 20000, 25000, 30000, 35000}; #endif +const char *default_event = "CPU_CYCLES"; +#endif + +#if defined(__mips__) +struct event_info event_info_24K[] = { + #include "../events/mips/24K/events.h" +}; +struct event_info event_info_34K[] = { + #include "../events/mips/34K/events.h" +}; +struct event_info event_info_74K[] = { + #include "../events/mips/74K/events.h" +}; +struct event_info event_info_1004K[] = { + #include "../events/mips/1004K/events.h" +}; + +#define MAX_EVENTS 4 +int min_count[MAX_EVENTS] = {150000, 20000, 25000, 30000}; + +const char *default_event = "CYCLES"; +#endif /* defined(__mips__) */ + +#define ARRAYSZ(x) (sizeof(x)/sizeof((x)[0])) + +struct cpuevents { + const char *cpu; + struct event_info *event_info; + unsigned int nevents; +} cpuevents[] = { +#if defined(__i386__) || defined(__x86_64__) + {"i386/arch_perfmon", event_info_arch_perfmon, ARRAYSZ(event_info_arch_perfmon)}, +#endif /* defined(__i386__) || defined(__x86_64__) */ +#if defined(__arm__) +#if !defined(WITH_ARM_V7_A) + {"arm/armv6", event_info_armv6, ARRAYSZ(event_info_armv6)}, +#else + {"arm/armv7", event_info_armv7, ARRAYSZ(event_info_armv7)}, +#endif +#endif /* defined(__arm__) */ +#if defined(__mips__) + {"mips/24K", event_info_24K, ARRAYSZ(event_info_24K)}, + {"mips/34K", event_info_34K, ARRAYSZ(event_info_34K)}, + {"mips/74K", event_info_74K, ARRAYSZ(event_info_74K)}, + {"mips/1004K", event_info_1004K, ARRAYSZ(event_info_1004K)}, +#endif /* defined(__mips__) */ +}; + +struct cpuevents *cpuevent; +#define event_info cpuevent->event_info +#define NEVENTS cpuevent->nevents + int verbose_print; int list_events; int show_usage; @@ -64,6 +140,7 @@ int reset; int selected_events[MAX_EVENTS]; int selected_counts[MAX_EVENTS]; +int max_events; char callgraph[8]; char kernel_range[512]; @@ -90,205 +167,6 @@ struct option long_options[] = { {0, 0, 0, 0}, }; -struct event_info { - int id; - int um; - const char *name; - const char *explanation; -} event_info[] = { -#if defined(__i386__) || defined(__x86_64__) - /* INTEL_ARCH_PERFMON events */ - - /* 0x3c counters:cpuid um:zero minimum:6000 filter:0 name:CPU_CLK_UNHALTED : - * Clock cycles when not halted - */ - {0x3c, 0, "CPU_CLK_UNHALTED", - "Clock cycles when not halted" }, - - /* event:0x3c counters:cpuid um:one minimum:6000 filter:2 name:UNHALTED_REFERENCE_CYCLES : - * Unhalted reference cycles - */ - {0x3c, 1, "UNHALTED_REFERENCE_CYCLES", - "Unhalted reference cycles" }, - - /* event:0xc0 counters:cpuid um:zero minimum:6000 filter:1 name:INST_RETIRED : - * number of instructions retired - */ - {0xc0, 0, "INST_RETIRED", - "number of instructions retired"}, - - /* event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES : - * Last level cache demand requests from this core that missed the LLC - */ - {0x2e, 0x41, "LLC_MISSES", - "Last level cache demand requests from this core that missed the LLC"}, - - /* event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : - * Last level cache demand requests from this core - */ - {0x2e, 0x4f, "LLC_REFS", - "Last level cache demand requests from this core"}, - - /* event:0xc4 counters:cpuid um:zero minimum:500 filter:6 name:BR_INST_RETIRED : - * number of branch instructions retired - */ - {0xc4, 0, "BR_INST_RETIRED", - "number of branch instructions retired"}, - - /* event:0xc5 counters:cpuid um:zero minimum:500 filter:7 name:BR_MISS_PRED_RETIRED : - * number of mispredicted branches retired (precise) - */ - {0xc5, 0, "BR_MISS_PRED_RETIRED", - "number of mispredicted branches retired (precise)"}, - -#elif !defined(WITH_ARM_V7_A) - /* ARM V6 events */ - {0x00, 0, "IFU_IFETCH_MISS", - "number of instruction fetch misses"}, - {0x01, 0, "CYCLES_IFU_MEM_STALL", - "cycles instruction fetch pipe is stalled"}, - {0x02, 0, "CYCLES_DATA_STALL", - "cycles stall occurs for due to data dependency"}, - {0x03, 0, "ITLB_MISS", - "number of Instruction MicroTLB misses"}, - {0x04, 0, "DTLB_MISS", - "number of Data MicroTLB misses"}, - {0x05, 0, "BR_INST_EXECUTED", - "branch instruction executed w/ or w/o program flow change"}, - {0x06, 0, "BR_INST_MISS_PRED", - "branch mispredicted"}, - {0x07, 0, "INSN_EXECUTED", - "instructions executed"}, - {0x09, 0, "DCACHE_ACCESS", - "data cache access, cacheable locations"}, - {0x0a, 0, "DCACHE_ACCESS_ALL", - "data cache access, all locations"}, - {0x0b, 0, "DCACHE_MISS", - "data cache miss"}, - {0x0c, 0, "DCACHE_WB", - "data cache writeback, 1 event for every half cacheline"}, - {0x0d, 0, "PC_CHANGE", - "number of times the program counter was changed without a mode switch"}, - {0x0f, 0, "TLB_MISS", - "Main TLB miss"}, - {0x10, 0, "EXP_EXTERNAL", - "Explicit external data access"}, - {0x11, 0, "LSU_STALL", - "cycles stalled because Load Store request queue is full"}, - {0x12, 0, "WRITE_DRAIN", - "Times write buffer was drained"}, - {0xff, 0, "CPU_CYCLES", - "clock cycles counter"}, -#else - /* ARM V7 events */ - {0x00, 0, "PMNC_SW_INCR", - "Software increment of PMNC registers"}, - {0x01, 0, "IFETCH_MISS", - "Instruction fetch misses from cache or normal cacheable memory"}, - {0x02, 0, "ITLB_MISS", - "Instruction fetch misses from TLB"}, - {0x03, 0, "DCACHE_REFILL", - "Data R/W operation that causes a refill from cache or normal cacheable" - "memory"}, - {0x04, 0, "DCACHE_ACCESS", - "Data R/W from cache"}, - {0x05, 0, "DTLB_REFILL", - "Data R/W that causes a TLB refill"}, - {0x06, 0, "DREAD", - "Data read architecturally executed (note: architecturally executed = for" - "instructions that are unconditional or that pass the condition code)"}, - {0x07, 0, "DWRITE", - "Data write architecturally executed"}, - {0x08, 0, "INSTR_EXECUTED", - "All executed instructions"}, - {0x09, 0, "EXC_TAKEN", - "Exception taken"}, - {0x0A, 0, "EXC_EXECUTED", - "Exception return architecturally executed"}, - {0x0B, 0, "CID_WRITE", - "Instruction that writes to the Context ID Register architecturally" - "executed"}, - {0x0C, 0, "PC_WRITE", - "SW change of PC, architecturally executed (not by exceptions)"}, - {0x0D, 0, "PC_IMM_BRANCH", - "Immediate branch instruction executed (taken or not)"}, - {0x0E, 0, "PC_PROC_RETURN", - "Procedure return architecturally executed (not by exceptions)"}, - {0x0F, 0, "UNALIGNED_ACCESS", - "Unaligned access architecturally executed"}, - {0x10, 0, "PC_BRANCH_MIS_PRED", - "Branch mispredicted or not predicted. Counts pipeline flushes because of" - "misprediction"}, - {0x12, 0, "PC_BRANCH_MIS_USED", - "Branch or change in program flow that could have been predicted"}, - {0x40, 0, "WRITE_BUFFER_FULL", - "Any write buffer full cycle"}, - {0x41, 0, "L2_STORE_MERGED", - "Any store that is merged in L2 cache"}, - {0x42, 0, "L2_STORE_BUFF", - "Any bufferable store from load/store to L2 cache"}, - {0x43, 0, "L2_ACCESS", - "Any access to L2 cache"}, - {0x44, 0, "L2_CACH_MISS", - "Any cacheable miss in L2 cache"}, - {0x45, 0, "AXI_READ_CYCLES", - "Number of cycles for an active AXI read"}, - {0x46, 0, "AXI_WRITE_CYCLES", - "Number of cycles for an active AXI write"}, - {0x47, 0, "MEMORY_REPLAY", - "Any replay event in the memory subsystem"}, - {0x48, 0, "UNALIGNED_ACCESS_REPLAY", - "Unaligned access that causes a replay"}, - {0x49, 0, "L1_DATA_MISS", - "L1 data cache miss as a result of the hashing algorithm"}, - {0x4A, 0, "L1_INST_MISS", - "L1 instruction cache miss as a result of the hashing algorithm"}, - {0x4B, 0, "L1_DATA_COLORING", - "L1 data access in which a page coloring alias occurs"}, - {0x4C, 0, "L1_NEON_DATA", - "NEON data access that hits L1 cache"}, - {0x4D, 0, "L1_NEON_CACH_DATA", - "NEON cacheable data access that hits L1 cache"}, - {0x4E, 0, "L2_NEON", - "L2 access as a result of NEON memory access"}, - {0x4F, 0, "L2_NEON_HIT", - "Any NEON hit in L2 cache"}, - {0x50, 0, "L1_INST", - "Any L1 instruction cache access, excluding CP15 cache accesses"}, - {0x51, 0, "PC_RETURN_MIS_PRED", - "Return stack misprediction at return stack pop" - "(incorrect target address)"}, - {0x52, 0, "PC_BRANCH_FAILED", - "Branch prediction misprediction"}, - {0x53, 0, "PC_BRANCH_TAKEN", - "Any predicted branch that is taken"}, - {0x54, 0, "PC_BRANCH_EXECUTED", - "Any taken branch that is executed"}, - {0x55, 0, "OP_EXECUTED", - "Number of operations executed" - "(in instruction or mutli-cycle instruction)"}, - {0x56, 0, "CYCLES_INST_STALL", - "Cycles where no instruction available"}, - {0x57, 0, "CYCLES_INST", - "Number of instructions issued in a cycle"}, - {0x58, 0, "CYCLES_NEON_DATA_STALL", - "Number of cycles the processor waits on MRC data from NEON"}, - {0x59, 0, "CYCLES_NEON_INST_STALL", - "Number of cycles the processor waits on NEON instruction queue or" - "NEON load queue"}, - {0x5A, 0, "NEON_CYCLES", - "Number of cycles NEON and integer processors are not idle"}, - {0x70, 0, "PMU0_EVENTS", - "Number of events from external input source PMUEXTIN[0]"}, - {0x71, 0, "PMU1_EVENTS", - "Number of events from external input source PMUEXTIN[1]"}, - {0x72, 0, "PMU_EVENTS", - "Number of events from both external input sources PMUEXTIN[0]" - "and PMUEXTIN[1]"}, - {0xFF, 0, "CPU_CYCLES", - "Number of CPU cycles"}, -#endif -}; void usage() { @@ -301,15 +179,17 @@ void usage() " --setup setup directories\n" #if defined(__i386__) || defined(__x86_64__) " --quick setup and select CPU_CLK_UNHALTED:60000\n" -#else +#elif defined(__arm__) " --quick setup and select CPU_CYCLES:150000\n" +#elif defined(__mips__) + " --quick setup and select CYCLES:150000\n" #endif " --timer timer-based profiling\n" " --status show configuration\n" " --start start data collection\n" " --stop stop data collection\n" " --reset clears out data from current session\n" - " --shutdown kill the oprofile daeman\n" + " --shutdown kill the oprofile daemon\n" " --callgraph=depth callgraph depth\n" " --event=eventspec\n" " Choose an event. May be specified multiple times.\n" @@ -322,16 +202,67 @@ void usage() ); } -void setup_session_dir() +int setup_device(void) { - int fd; + if (mkdir(OP_DRIVER_BASE, 0755)) { + if (errno != EEXIST) { + fprintf(stderr, "Cannot create directory "OP_DRIVER_BASE": %s\n", + strerror(errno)); + return -1; + } + } - fd = open(OP_DATA_DIR, O_RDONLY); - if (fd != -1) { - system("rm -r "OP_DATA_DIR); - close(fd); + if (access(OP_DRIVER_BASE"/stats", F_OK)) { + if (system("mount -t oprofilefs nodev "OP_DRIVER_BASE)) { + return -1; + } + } + + /* Selecting the event information by cpu_type has only been tested on MIPS */ +#if defined(__mips__) + /* Use cpu_type to select the events */ + int fd = open(OP_DRIVER_BASE "/cpu_type", O_RDONLY); + if (fd < 0) { + fprintf(stderr, OP_DRIVER_BASE "/cpu_type: %s\n", + strerror(errno)); + return -1; } + char buf[512]; + int n = read(fd, buf, sizeof(buf)-1); + close(fd); + if (n < 0) { + fprintf(stderr, OP_DRIVER_BASE "/cpu_type: %s\n", + strerror(errno)); + return -1; + } + buf[n] = '\0'; + for (unsigned int i = 0; i < ARRAYSZ(cpuevents); i++) { + if (strcmp(buf, cpuevents[i].cpu) == 0) { + cpuevent = &cpuevents[i]; + } + } + if (cpuevent == NULL) { + fprintf(stderr, "Unrecognised CPU type %s\n", buf); + return -1; + } + for (max_events = 0; max_events < MAX_EVENTS; max_events++) { + snprintf(buf, sizeof(buf), OP_DRIVER_BASE"/%d", max_events); + if (access(buf, F_OK) < 0) + break; + } +#else + max_events = MAX_EVENTS; + cpuevent = &cpuevents[0]; +#endif + return 0; +} + +void setup_session_dir() +{ + if (access(OP_DATA_DIR, F_OK) == 0) + system("rm -r "OP_DATA_DIR); + if (mkdir(OP_DATA_DIR, 0755)) { fprintf(stderr, "Cannot create directory \"%s\": %s\n", OP_DATA_DIR, strerror(errno)); @@ -370,31 +301,36 @@ int do_setup() setup_session_dir(); - if (mkdir(OP_DRIVER_BASE, 0755)) { - if (errno != EEXIST) { - fprintf(stderr, "Cannot create directory "OP_DRIVER_BASE": %s\n", - strerror(errno)); - return -1; - } - } + return 0; +} - if (access(OP_DRIVER_BASE"/stats", F_OK)) { - if (system("mount -t oprofilefs nodev "OP_DRIVER_BASE)) { - return -1; - } +void stringify_counters(char *ctr_string, int ctr_mask) +{ + int i, n, len; + char *p = ctr_string; + + *p = '\0'; + for (i=0; i<32; ++i) { + if (ctr_mask & (1< 0) { @@ -510,7 +460,7 @@ void do_status() /* event name */ sprintf(fullname, OP_DRIVER_BASE"/%d/event", i); num = read_num(fullname); - printf(" name: %s\n", find_event_name_from_id(num)); + printf(" name: %s\n", find_event_name_from_id(num, CTR(i))); /* profile interval */ sprintf(fullname, OP_DRIVER_BASE"/%d/count", i); @@ -524,17 +474,14 @@ void do_status() num = read_num(OP_DATA_DIR"/lock"); if (num >= 0) { - int fd; /* Still needs to check if this lock is left-over */ sprintf(fullname, "/proc/%d", num); - fd = open(fullname, O_RDONLY); - if (fd == -1) { + if (access(fullname, R_OK) != 0) { printf("OProfile daemon exited prematurely - redo setup" " before you continue\n"); return; } else { - close(fd); printf("oprofiled pid: %d\n", num); num = read_num(OP_DRIVER_BASE"/enable"); @@ -598,20 +545,12 @@ void do_reset() * reset. */ #if 0 - int fd; - - fd = open(OP_DATA_DIR"/samples/current", O_RDONLY); - if (fd == -1) { - return; - } - close(fd); - system("rm -r "OP_DATA_DIR"/samples/current"); - int num = read_num(OP_DATA_DIR"/lock"); + int pid = read_num(OP_DATA_DIR"/lock"); + if (pid >= 0) + kill(pid, SIGHUP); /* HUP makes oprofiled close its sample files */ - if (num >= 0) { - printf("Signalling daemon...\n"); - kill(num, SIGHUP); - } + if (access(OP_DATA_DIR"/samples/current", R_OK) == 0) + system("rm -r "OP_DATA_DIR"/samples/current"); #endif } @@ -625,6 +564,8 @@ int main(int argc, char * const argv[]) strcpy(vmlinux, "--no-vmlinux"); strcpy(kernel_range, ""); + setup_device(); + while (1) { int c = getopt_long(argc, argv, "c:e:v:r:dhVtl:", long_options, &option_index); if (c == -1) { @@ -700,11 +641,7 @@ int main(int argc, char * const argv[]) } if (quick) { -#if defined(__i386__) || defined(__x86_64__) - process_event("CPU_CLK_UNHALTED"); -#else - process_event("CPU_CYCLES"); -#endif + process_event(default_event); setup = 1; } @@ -739,9 +676,7 @@ int main(int argc, char * const argv[]) char* slash = strrchr(command, '/'); strcpy(slash ? slash + 1 : command, "oprofiled --session-dir="OP_DATA_DIR); -#if defined(__i386__) || defined(__x86_64__) - /* Nothing */ -#elif !defined(WITH_ARM_V7_A) +#if defined(__arm__) && !defined(WITH_ARM_V7_A) /* Since counter #3 can only handle CPU_CYCLES, check and shuffle the * order a bit so that the maximal number of events can be profiled * simultaneously @@ -815,7 +750,7 @@ int main(int argc, char * const argv[]) if (timer == 0) { /* If not in timer mode, disable unused counters */ - for (i = num_events; i < MAX_EVENTS; i++) { + for (i = num_events; i < max_events; i++) { echo_dev("0", 0, "enabled", i); } } else { diff --git a/opev.py b/opev.py new file mode 100755 index 0000000..9f11e75 --- /dev/null +++ b/opev.py @@ -0,0 +1,84 @@ +#! /usr/bin/env python +""" +Read oprofile events file, generate C data struct for Android opcontrol. + +Android does not use script for opcontrol, they use a C binary, which +has embedded data structures with the event set that is supported. +Initially that is just Arm V6 and V7. + +This tool allows us to convert various MIPS cpu event files for +inclusion, and should work with other processor arch's as well. + +Neither Arm or Mips uses unit_masks, so that file is ignored. + +Event entries in file look like this: + + event:0x1 counters:0,1 um:zero minimum:500 name:INSTRUCTIONS : Instructions completed + +The format is key:value. A single : appears at the end of line +and the remaining text is the description +""" + +import os, sys + +def number(s): + if s == 'zero': + return '0' + if s == 'one': + return '1' + if s[0] == 'x': + return '0'+s + +def parse_event(line,ovf): + ''' return dictionary of items from one line of event file ''' + dict = {} + fields = line.split(None, 1) + while (fields): + first = fields[0].split(':', 1) + if first[0] == 'include': + ev(first[1] + "/events", ovf) + return None + line = fields[1] + if first[0] == 'um': + first[1] = number(first[1]) + if first[0] == '': + dict['description'] = fields[1] + fields = None; + else: + dict[first[0]] = first[1] + fields = line.split(None, 1) + return dict + +def parse_ctr(s): + ''' convert comma separated list of integers x,y,... , to CTR(x) | CTR(y) | ... ''' + if s == 'cpuid': + return 0 + ctrs = s.split(',') + c = '' + for i in range(len(ctrs)-1): + c += ("CTR(%s) | " % ctrs[i]) + c += ("CTR(%s)" % ctrs[-1]) + return c + +def ev(fname,ovf): + ''' read file, parse, generate C data struct to file ovf ''' + evf = open(fname, "r") + all_lines = evf.readlines() + lines = [s.strip() for s in all_lines if s.strip()] # strip blanks + lines = [s for s in lines if not s.startswith('#')] # strip comments + eventlist = [parse_event(line,ovf) for line in lines] + + ovf.write("// events from file %s\n" % fname) + for d in eventlist: + if d!=None: + ovf.write(' {%s, %s, %s, "%s",\n' % (d['event'], parse_ctr(d['counters']), d['um'], d['name'])) + ovf.write(' "%s"},\n' % d['description']) + + +if __name__ == "__main__" : + if len(sys.argv) != 2: + fname = "events/mips/24K/events" # convenient testing + else: + fname = sys.argv[1] + ovf = open(fname + ".h", "w") + ev(fname, ovf) -- cgit v1.2.3