From 594087a04eea544356f9c52e83c1a9bc380ce80f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 12 Mar 2010 18:22:17 -0500 Subject: [PATCH 1/7] perf probe: Fix probe_point buffer overrun Fix probe_point array-size overrun problem. In some cases (e.g. inline function), one user-specified probe-point can be translated to many probe address, and it overruns pre-defined array-size. This also removes redundant MAX_PROBES macro definition. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: LKML-Reference: <20100312232217.2017.45017.stgit@localhost6.localdomain6> [ Note that only root can create new probes. Eventually we should remove the MAX_PROBES limit, but that is a larger patch not eligible to perf/urgent treatment. ] Signed-off-by: Ingo Molnar --- tools/perf/builtin-probe.c | 1 - tools/perf/util/probe-finder.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index c30a33592340..152d6c9b1fa4 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -47,7 +47,6 @@ #include "util/probe-event.h" #define MAX_PATH_LEN 256 -#define MAX_PROBES 128 /* Session management structure */ static struct { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1e6c65ebbd80..f9cbbf18e6ca 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -455,6 +455,9 @@ static void show_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) /* *pf->fb_ops will be cached in libdw. Don't free it. */ pf->fb_ops = NULL; + if (pp->found == MAX_PROBES) + die("Too many( > %d) probe point found.\n", MAX_PROBES); + pp->probes[pp->found] = strdup(tmp); pp->found++; } From fc6ceea045031658d0b59af562369eae980b4370 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 12 Mar 2010 18:22:24 -0500 Subject: [PATCH 2/7] perf probe: Fix need_dwarf flag if lazy matching is used Set need_dwarf if lazy matching pattern is specified, because lazy matching requires real source path for which we must use debuginfo. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100312232224.2017.54550.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 53181dbfe4a8..7c004b6ef24f 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -242,7 +242,7 @@ void parse_perf_probe_event(const char *str, struct probe_point *pp, /* Parse probe point */ parse_perf_probe_probepoint(argv[0], pp); - if (pp->file || pp->line) + if (pp->file || pp->line || pp->lazy_line) *need_dwarf = true; /* Copy arguments and ensure return probe has no C argument */ From b63be8d7beda7fe5879559be6f70f8e1c93109e4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Mar 2010 15:03:50 -0300 Subject: [PATCH 3/7] perf top: Improve the autosizing of column lenghts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When profiling C++ workloads the symbol name length can be really big, so cap it before it garbles the result. This builds upon the autosizing already present where we choose to use the short, basename of DSOs instead of its long, full pathname. Reported-by: Pavel Krauz Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1268676230-9261-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 13 +++++++++---- tools/perf/util/symbol.c | 18 +++++++++++++----- tools/perf/util/symbol.h | 3 ++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0b719e3dde05..8364c8aba194 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -455,7 +455,7 @@ static void print_sym_table(void) struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; - int sym_width = 0, dso_width = 0, max_dso_width; + int sym_width = 0, dso_width = 0, dso_short_width; const int win_width = winsize.ws_col - 1; samples = userspace_samples = 0; @@ -545,15 +545,20 @@ static void print_sym_table(void) if (syme->map->dso->long_name_len > dso_width) dso_width = syme->map->dso->long_name_len; + if (syme->map->dso->short_name_len > dso_short_width) + dso_short_width = syme->map->dso->short_name_len; + if (syme->name_len > sym_width) sym_width = syme->name_len; } printed = 0; - max_dso_width = winsize.ws_col - sym_width - 29; - if (dso_width > max_dso_width) - dso_width = max_dso_width; + if (sym_width + dso_width > winsize.ws_col - 29) { + dso_width = dso_short_width; + if (sym_width + dso_width > winsize.ws_col - 29) + sym_width = winsize.ws_col - dso_width - 29; + } putchar('\n'); if (nr_counters == 1) printf(" samples pcnt"); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 323c0aea0a91..c458c4a371d1 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -163,9 +163,17 @@ void dso__set_long_name(struct dso *self, char *name) self->long_name_len = strlen(name); } +static void dso__set_short_name(struct dso *self, const char *name) +{ + if (name == NULL) + return; + self->short_name = name; + self->short_name_len = strlen(name); +} + static void dso__set_basename(struct dso *self) { - self->short_name = basename(self->long_name); + dso__set_short_name(self, basename(self->long_name)); } struct dso *dso__new(const char *name) @@ -176,7 +184,7 @@ struct dso *dso__new(const char *name) int i; strcpy(self->name, name); dso__set_long_name(self, self->name); - self->short_name = self->name; + dso__set_short_name(self, self->name); for (i = 0; i < MAP__NR_TYPES; ++i) self->symbols[i] = self->symbol_names[i] = RB_ROOT; self->slen_calculated = 0; @@ -897,7 +905,6 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, struct kmap *kmap = self->kernel ? map__kmap(map) : NULL; struct map *curr_map = map; struct dso *curr_dso = self; - size_t dso_name_len = strlen(self->short_name); Elf_Data *symstrs, *secstrs; uint32_t nr_syms; int err = -1; @@ -987,7 +994,8 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, char dso_name[PATH_MAX]; if (strcmp(section_name, - curr_dso->short_name + dso_name_len) == 0) + (curr_dso->short_name + + self->short_name_len)) == 0) goto new_symbol; if (strcmp(section_name, ".text") == 0) { @@ -1782,7 +1790,7 @@ struct dso *dso__new_kernel(const char *name) struct dso *self = dso__new(name ?: "[kernel.kallsyms]"); if (self != NULL) { - self->short_name = "[kernel]"; + dso__set_short_name(self, "[kernel]"); self->kernel = 1; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 280dadd32a08..f30a37428919 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -110,9 +110,10 @@ struct dso { u8 sorted_by_name; u8 loaded; u8 build_id[BUILD_ID_SIZE]; - u16 long_name_len; const char *short_name; char *long_name; + u16 long_name_len; + u16 short_name_len; char name[0]; }; From 67c7ff7c56f38a8ab338fbbfe366621ce6303ba1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 15 Mar 2010 13:02:28 -0400 Subject: [PATCH 4/7] perf probe: Fix offset to allow signed value Fix dereference offset to intmax_t from uintmax_t, because it can have negative values (for example local variable's offset from frame pointer). Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100315170228.31852.71946.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-finder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index f9cbbf18e6ca..0e8c8f1594ec 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -333,8 +333,8 @@ static void show_location(Dwarf_Op *op, struct probe_finder *pf) die("%u exceeds max register number.", regn); if (deref) - ret = snprintf(pf->buf, pf->len, " %s=+%ju(%s)", - pf->var, (uintmax_t)offs, regs); + ret = snprintf(pf->buf, pf->len, " %s=%+jd(%s)", + pf->var, (intmax_t)offs, regs); else ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs); DIE_IF(ret < 0); From d0cb4260f899d07462d49fc67e29f2438dbaca2f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 15 Mar 2010 13:02:35 -0400 Subject: [PATCH 5/7] perf probe: Use original address instead of CU-based address Use original address for looking up the location of variables for dwarf_getlocation_addr() instead of CU-based address. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100315170235.31852.91195.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-finder.c | 11 ++--------- tools/perf/util/probe-finder.h | 1 - 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 0e8c8f1594ec..c171a243d05b 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -352,8 +352,7 @@ static void show_variable(Dwarf_Die *vr_die, struct probe_finder *pf) if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL) goto error; /* TODO: handle more than 1 exprs */ - ret = dwarf_getlocation_addr(&attr, (pf->addr - pf->cu_base), - &expr, &nexpr, 1); + ret = dwarf_getlocation_addr(&attr, pf->addr, &expr, &nexpr, 1); if (ret <= 0 || nexpr == 0) goto error; @@ -437,8 +436,7 @@ static void show_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) /* Get the frame base attribute/ops */ dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); - ret = dwarf_getlocation_addr(&fb_attr, (pf->addr - pf->cu_base), - &pf->fb_ops, &nops, 1); + ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) pf->fb_ops = NULL; @@ -644,7 +642,6 @@ static void find_probe_point_by_func(struct probe_finder *pf) int find_probe_point(int fd, struct probe_point *pp) { struct probe_finder pf = {.pp = pp}; - int ret; Dwarf_Off off, noff; size_t cuhl; Dwarf_Die *diep; @@ -671,10 +668,6 @@ int find_probe_point(int fd, struct probe_point *pp) pf.fname = NULL; if (!pp->file || pf.fname) { - /* Save CU base address (for frame_base) */ - ret = dwarf_lowpc(&pf.cu_die, &pf.cu_base); - if (ret != 0) - pf.cu_base = 0; if (pp->function) find_probe_point_by_func(&pf); else if (pp->lazy_line) diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index d1a651793ba6..21f7354397b4 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -71,7 +71,6 @@ struct probe_finder { /* For variable searching */ Dwarf_Op *fb_ops; /* Frame base attribute */ - Dwarf_Addr cu_base; /* Current CU base address */ const char *var; /* Current variable name */ char *buf; /* Current output buffer */ int len; /* Length of output buffer */ From 00909e955125e90a6ebb34671c56c4c851e62951 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Mar 2010 18:28:46 -0300 Subject: [PATCH 6/7] perf top: Add missing initialization to zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dso_short_width has to start as zero, as we're calculating the maximum short DSO name length, somehow I missed this one. Reported-by: Frédéric Weisbecker Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1268774926-27488-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8364c8aba194..1f529321607e 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -455,7 +455,7 @@ static void print_sym_table(void) struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; - int sym_width = 0, dso_width = 0, dso_short_width; + int sym_width = 0, dso_width = 0, dso_short_width = 0; const int win_width = winsize.ws_col - 1; samples = userspace_samples = 0; From 9eff26ea48bfbe2885b158742a7512a097ec911b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 18 Mar 2010 16:05:13 +1100 Subject: [PATCH 7/7] powerpc/perf_events: Fix call-graph recording, add perf_arch_fetch_caller_regs This implements a powerpc version of perf_arch_fetch_caller_regs to get correct call-graphs. It's implemented in assembly because that way we can be sure there isn't a stack frame for perf_arch_fetch_caller_regs. If it was in C, gcc might or might not create a stack frame for it, which would affect the number of levels we have to skip. With this, we see results from perf record -e lock:lock_acquire like this: # Samples: 24878 # # Overhead Command Shared Object Symbol # ........ .............. ................. ...... # 14.99% perf [kernel.kallsyms] [k] ._raw_spin_lock | --- ._raw_spin_lock | |--25.00%-- .alloc_fd | (nil) | | | |--50.00%-- .anon_inode_getfd | | .sys_perf_event_open | | syscall_exit | | syscall | | create_counter | | __cmd_record | | run_builtin | | main | | 0xfd2e704 | | 0xfd2e8c0 | | (nil) ... etc. Signed-off-by: Paul Mackerras Acked-by: Benjamin Herrenschmidt Cc: anton@samba.org Cc: linuxppc-dev@ozlabs.org Cc: Peter Zijlstra Cc: Frederic Weisbecker LKML-Reference: <20100318050513.GA6575@drongo> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/asm-compat.h | 2 ++ arch/powerpc/kernel/misc.S | 28 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index c1b475a941eb..a9b91ed3d4b9 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -28,6 +28,7 @@ #define PPC_LLARX(t, a, b, eh) PPC_LDARX(t, a, b, eh) #define PPC_STLCX stringify_in_c(stdcx.) #define PPC_CNTLZL stringify_in_c(cntlzd) +#define PPC_LR_STKOFF 16 /* Move to CR, single-entry optimized version. Only available * on POWER4 and later. @@ -51,6 +52,7 @@ #define PPC_STLCX stringify_in_c(stwcx.) #define PPC_CNTLZL stringify_in_c(cntlzw) #define PPC_MTOCRF stringify_in_c(mtcrf) +#define PPC_LR_STKOFF 4 #endif diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 2d29752cbe16..b485a87c94e1 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -127,3 +127,31 @@ _GLOBAL(__setup_cpu_power7) _GLOBAL(__restore_cpu_power7) /* place holder */ blr + +#ifdef CONFIG_EVENT_TRACING +/* + * Get a minimal set of registers for our caller's nth caller. + * r3 = regs pointer, r5 = n. + * + * We only get R1 (stack pointer), NIP (next instruction pointer) + * and LR (link register). These are all we can get in the + * general case without doing complicated stack unwinding, but + * fortunately they are enough to do a stack backtrace, which + * is all we need them for. + */ +_GLOBAL(perf_arch_fetch_caller_regs) + mr r6,r1 + cmpwi r5,0 + mflr r4 + ble 2f + mtctr r5 +1: PPC_LL r6,0(r6) + bdnz 1b + PPC_LL r4,PPC_LR_STKOFF(r6) +2: PPC_LL r7,0(r6) + PPC_LL r7,PPC_LR_STKOFF(r7) + PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3) + PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3) + blr +#endif /* CONFIG_EVENT_TRACING */