From a5f4a6932ec2e1a53642e97a1be64bc7b169942f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 25 Oct 2015 00:49:24 +0900 Subject: [PATCH 01/12] perf tools: Improve ambiguous option help message Currently if an option name is ambiguous it only prints first two matched option names but no help. It'd be better it could show all possible names and help messages too. Before: $ perf report --show Error: Ambiguous option: show (could be --show-total-period or --show-ref-call-graph) Usage: perf report [] After: $ perf report --show Error: Ambiguous option: show (could be --show-total-period or --show-ref-call-graph) Usage: perf report [] -n, --show-nr-samples Show a column with the number of samples --showcpuutilization Show sample percentage for different cpu modes -I, --show-info Display extended information about perf.data file --show-total-period Show a column with the sum of periods --show-ref-call-graph Show callgraph from reference event Signed-off-by: Namhyung Kim Acked-by: Ingo Molnar Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1445701767-12731-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-options.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 22c2806bda98..b8d98229a8af 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -770,24 +770,23 @@ int parse_options_usage(const char * const *usagestr, opt: for ( ; opts->type != OPTION_END; opts++) { if (short_opt) { - if (opts->short_name == *optstr) + if (opts->short_name == *optstr) { + print_option_help(opts, 0); break; + } continue; } if (opts->long_name == NULL) continue; - if (!prefixcmp(optstr, opts->long_name)) - break; - if (!prefixcmp(optstr, "no-") && - !prefixcmp(optstr + 3, opts->long_name)) - break; + if (!prefixcmp(opts->long_name, optstr)) + print_option_help(opts, 0); + if (!prefixcmp("no-", optstr) && + !prefixcmp(opts->long_name, optstr + 3)) + print_option_help(opts, 0); } - if (opts->type != OPTION_END) - print_option_help(opts, 0); - return PARSE_OPT_HELP; } From b272a59d835cd8ca6b45f41c66c61b473996c759 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 25 Oct 2015 00:49:25 +0900 Subject: [PATCH 02/12] perf report: Rename to --show-cpu-utilization So that it can be more consistent with other --show-* options. The old name (--showcpuutilization) is provided only for compatibility. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1445701767-12731-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/builtin-report.c | 4 +++- tools/perf/util/parse-options.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index ab1fd64e3627..5ce8da1e1256 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -29,7 +29,7 @@ OPTIONS --show-nr-samples:: Show the number of samples for each symbol ---showcpuutilization:: +--show-cpu-utilization:: Show sample percentage for different cpu modes. -T:: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 50dd4d3d8667..2853ad2bd435 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -699,8 +699,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) " Please refer the man page for the complete list."), OPT_STRING('F', "fields", &field_order, "key[,keys...]", "output field(s): overhead, period, sample plus all of sort keys"), - OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, + OPT_BOOLEAN(0, "show-cpu-utilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), + OPT_BOOLEAN_FLAG(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, + "Show sample percentage for different cpu modes", PARSE_OPT_HIDDEN), OPT_STRING('p', "parent", &parent_pattern, "regex", "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 367d8b816cc7..182c86099330 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -111,6 +111,7 @@ struct option { #define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } #define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) } #define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h) } +#define OPT_BOOLEAN_FLAG(s, l, v, h, f) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h), .flags = (f) } #define OPT_BOOLEAN_SET(s, l, v, os, h) \ { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), \ .value = check_vtype(v, bool *), .help = (h), \ From 01b19455c08cc37d1c3ef174524278e84c92fec1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 25 Oct 2015 00:49:26 +0900 Subject: [PATCH 03/12] perf tools: Setup pager when printing usage and help It's annoying to see error or help message when command has many options like in perf record, report or top. So setup pager when print parser error or help message - it should be OK since no UI is enabled at the parsing time. The usage_with_options() already disables it by calling exit_browser() anyway. Signed-off-by: Namhyung Kim Acked-by: Ingo Molnar Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1445701767-12731-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-options.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index b8d98229a8af..eeeed98eb26d 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -7,6 +7,8 @@ #define OPT_SHORT 1 #define OPT_UNSET 2 +static struct strbuf error_buf = STRBUF_INIT; + static int opterror(const struct option *opt, const char *reason, int flags) { if (flags & OPT_SHORT) @@ -540,9 +542,11 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o exit(130); default: /* PARSE_OPT_UNKNOWN */ if (ctx.argv[0][1] == '-') { - error("unknown option `%s'", ctx.argv[0] + 2); + strbuf_addf(&error_buf, "unknown option `%s'", + ctx.argv[0] + 2); } else { - error("unknown switch `%c'", *ctx.opt); + strbuf_addf(&error_buf, "unknown switch `%c'", + *ctx.opt); } usage_with_options(usagestr, options); } @@ -711,6 +715,13 @@ int usage_with_options_internal(const char * const *usagestr, if (!usagestr) return PARSE_OPT_HELP; + setup_pager(); + + if (strbuf_avail(&error_buf)) { + fprintf(stderr, " Error: %s\n", error_buf.buf); + strbuf_release(&error_buf); + } + fprintf(stderr, "\n Usage: %s\n", *usagestr++); while (*usagestr && **usagestr) fprintf(stderr, " or: %s\n", *usagestr++); From c71183697250b356be6c7c1abc2e9a74073e1dca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 25 Oct 2015 00:49:27 +0900 Subject: [PATCH 04/12] perf tools: Introduce usage_with_options_msg() Now usage_with_options() setup a pager before printing message so normal printf() or pr_err() will not be shown. The usage_with_options_msg() can be used to print some help message before usage strings. Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1445701767-12731-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-evlist.c | 4 ++-- tools/perf/builtin-probe.c | 20 ++++++++++++-------- tools/perf/builtin-record.c | 11 ++++++----- tools/perf/builtin-sched.c | 4 ++-- tools/perf/builtin-script.c | 8 ++++---- tools/perf/util/parse-options.c | 15 +++++++++++++++ tools/perf/util/parse-options.h | 4 ++++ tools/perf/util/strbuf.c | 22 +++++++++++++++------- tools/perf/util/strbuf.h | 2 ++ 9 files changed, 62 insertions(+), 28 deletions(-) diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 695ec5a50cf2..f4d62510acbb 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -61,8 +61,8 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(evlist_usage, options); if (details.event_group && (details.verbose || details.freq)) { - pr_err("--group option is not compatible with other options\n"); - usage_with_options(evlist_usage, options); + usage_with_options_msg(evlist_usage, options, + "--group option is not compatible with other options\n"); } return __cmd_evlist(input_name, &details); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 530c3a28a58c..132afc97676c 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -528,12 +528,12 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) PARSE_OPT_STOP_AT_NON_OPTION); if (argc > 0) { if (strcmp(argv[0], "-") == 0) { - pr_warning(" Error: '-' is not supported.\n"); - usage_with_options(probe_usage, options); + usage_with_options_msg(probe_usage, options, + "'-' is not supported.\n"); } if (params.command && params.command != 'a') { - pr_warning(" Error: another command except --add is set.\n"); - usage_with_options(probe_usage, options); + usage_with_options_msg(probe_usage, options, + "another command except --add is set.\n"); } ret = parse_probe_event_argv(argc, argv); if (ret < 0) { @@ -562,8 +562,10 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) switch (params.command) { case 'l': if (params.uprobes) { - pr_warning(" Error: Don't use --list with --exec.\n"); - usage_with_options(probe_usage, options); + pr_err(" Error: Don't use --list with --exec.\n"); + parse_options_usage(probe_usage, options, "l", true); + parse_options_usage(NULL, options, "x", true); + return -EINVAL; } ret = show_perf_probe_events(params.filter); if (ret < 0) @@ -603,8 +605,10 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) case 'a': /* Ensure the last given target is used */ if (params.target && !params.target_used) { - pr_warning(" Error: -x/-m must follow the probe definitions.\n"); - usage_with_options(probe_usage, options); + pr_err(" Error: -x/-m must follow the probe definitions.\n"); + parse_options_usage(probe_usage, options, "m", true); + parse_options_usage(NULL, options, "x", true); + return -EINVAL; } ret = perf_add_probe_events(params.events, params.nevents); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2740d7a82ae8..de02267c73d8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1135,14 +1135,15 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(record_usage, record_options); if (nr_cgroups && !rec->opts.target.system_wide) { - ui__error("cgroup monitoring only available in" - " system-wide mode\n"); - usage_with_options(record_usage, record_options); + usage_with_options_msg(record_usage, record_options, + "cgroup monitoring only available in system-wide mode"); + } if (rec->opts.record_switch_events && !perf_can_record_switch_events()) { - ui__error("kernel does not support recording context switch events (--switch-events option)\n"); - usage_with_options(record_usage, record_options); + ui__error("kernel does not support recording context switch events\n"); + parse_options_usage(record_usage, record_options, "switch-events", 0); + return -EINVAL; } if (!rec->itr) { diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 33962612a5e9..0ee6d900e100 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1728,8 +1728,8 @@ static void setup_sorting(struct perf_sched *sched, const struct option *options for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { if (sort_dimension__add(tok, &sched->sort_list) < 0) { - error("Unknown --sort key: `%s'", tok); - usage_with_options(usage_msg, options); + usage_with_options_msg(usage_msg, options, + "Unknown --sort key: `%s'", tok); } } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2653c0273b89..278acb22f029 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1767,9 +1767,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) rep_script_path = get_script_path(argv[0], REPORT_SUFFIX); if (!rec_script_path && !rep_script_path) { - fprintf(stderr, " Couldn't find script %s\n\n See perf" + usage_with_options_msg(script_usage, options, + "Couldn't find script `%s'\n\n See perf" " script -l for available scripts.\n", argv[0]); - usage_with_options(script_usage, options); } if (is_top_script(argv[0])) { @@ -1780,10 +1780,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) rep_args = has_required_arg(rep_script_path); rec_args = (argc - 1) - rep_args; if (rec_args < 0) { - fprintf(stderr, " %s script requires options." + usage_with_options_msg(script_usage, options, + "`%s' script requires options." "\n\n See perf script -l for available " "scripts and options.\n", argv[0]); - usage_with_options(script_usage, options); } } diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index eeeed98eb26d..230e771407a3 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -760,6 +760,21 @@ void usage_with_options(const char * const *usagestr, exit(129); } +void usage_with_options_msg(const char * const *usagestr, + const struct option *opts, const char *fmt, ...) +{ + va_list ap; + + exit_browser(false); + + va_start(ap, fmt); + strbuf_addv(&error_buf, fmt, ap); + va_end(ap); + + usage_with_options_internal(usagestr, opts, 0, NULL); + exit(129); +} + int parse_options_usage(const char * const *usagestr, const struct option *opts, const char *optstr, bool short_opt) diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 182c86099330..a8e407bc251e 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -161,6 +161,10 @@ extern int parse_options_subcommand(int argc, const char **argv, extern NORETURN void usage_with_options(const char * const *usagestr, const struct option *options); +extern NORETURN __attribute__((format(printf,3,4))) +void usage_with_options_msg(const char * const *usagestr, + const struct option *options, + const char *fmt, ...); /*----- incremantal advanced APIs -----*/ diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 4abe23550c73..25671fa16618 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -82,23 +82,22 @@ void strbuf_add(struct strbuf *sb, const void *data, size_t len) strbuf_setlen(sb, sb->len + len); } -void strbuf_addf(struct strbuf *sb, const char *fmt, ...) +void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) { int len; - va_list ap; + va_list ap_saved; if (!strbuf_avail(sb)) strbuf_grow(sb, 64); - va_start(ap, fmt); + + va_copy(ap_saved, ap); len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - va_end(ap); if (len < 0) die("your vsnprintf is broken"); if (len > strbuf_avail(sb)) { strbuf_grow(sb, len); - va_start(ap, fmt); - len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - va_end(ap); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); + va_end(ap_saved); if (len > strbuf_avail(sb)) { die("this should not happen, your vsnprintf is broken"); } @@ -106,6 +105,15 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...) strbuf_setlen(sb, sb->len + len); } +void strbuf_addf(struct strbuf *sb, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + strbuf_addv(sb, fmt, ap); + va_end(ap); +} + ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint) { size_t oldlen = sb->len; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 436ac319f6c7..529f2f035249 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -39,6 +39,7 @@ */ #include +#include extern char strbuf_slopbuf[]; struct strbuf { @@ -85,6 +86,7 @@ static inline void strbuf_addstr(struct strbuf *sb, const char *s) { __attribute__((format(printf,2,3))) extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); +extern void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap); /* XXX: if read fails, any partial read is undone */ extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); From af3399817428d8adc2c87c91df23fde77dbcdb35 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 25 Oct 2015 15:51:44 +0100 Subject: [PATCH 05/12] perf evsel: Move id_offset out of struct perf_evsel union member Because the 'perf stat record' patches will use the id_offset member together with the priv pointer. Signed-off-by: Jiri Olsa Tested-by: Kan Liang Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1445784728-21732-29-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 02a5fed8d924..0e22c0b7dab5 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -90,9 +90,9 @@ struct perf_evsel { double scale; const char *unit; struct event_format *tp_format; + off_t id_offset; union { void *priv; - off_t id_offset; u64 db_id; }; struct cgroup_sel *cgrp; From 2322f573f8131da9c6d1fab01fe0a0c2c23aa549 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 25 Oct 2015 15:51:17 +0100 Subject: [PATCH 06/12] perf cpu_map: Add cpu_map__empty_new function Adding cpu_map__empty_new interface to create empty cpumap with given size. The cpumap entries are initialized with -1. It'll be used for caching cpu_map in following patches. Signed-off-by: Jiri Olsa Tested-by: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1445784728-21732-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 17 +++++++++++++++++ tools/perf/util/cpumap.h | 1 + 2 files changed, 18 insertions(+) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index aa6b490aa471..10af1e7524fb 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -203,6 +203,23 @@ struct cpu_map *cpu_map__dummy_new(void) return cpus; } +struct cpu_map *cpu_map__empty_new(int nr) +{ + struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr); + + if (cpus != NULL) { + int i; + + cpus->nr = nr; + for (i = 0; i < nr; i++) + cpus->map[i] = -1; + + atomic_set(&cpus->refcnt, 1); + } + + return cpus; +} + static void cpu_map__delete(struct cpu_map *map) { if (map) { diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index f1bcd2cfa164..85f7772457fa 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -15,6 +15,7 @@ struct cpu_map { }; struct cpu_map *cpu_map__new(const char *cpu_list); +struct cpu_map *cpu_map__empty_new(int nr); struct cpu_map *cpu_map__dummy_new(void); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); From 1e5a29318ba8506f52a8d727b5c6a53143f9882a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 25 Oct 2015 15:51:18 +0100 Subject: [PATCH 07/12] perf stat: Cache aggregated map entries in extra cpumap Currently any time we need to access socket or core id for given cpu, we access the sysfs topology file. Adding a cpus_aggr_map cpu_map to cache those entries. Signed-off-by: Jiri Olsa Tested-by: Kan Liang Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1445784728-21732-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 59 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 91e793a76929..2f438f76cceb 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -100,6 +100,8 @@ static struct target target = { .uid = UINT_MAX, }; +typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); + static int run_count = 1; static bool no_inherit = false; static volatile pid_t child_pid = -1; @@ -119,7 +121,7 @@ static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static struct timespec ref_time; static struct cpu_map *aggr_map; -static int (*aggr_get_id)(struct cpu_map *m, int cpu); +static aggr_get_id_t aggr_get_id; static volatile int done = 0; @@ -954,22 +956,63 @@ static int perf_stat__get_core(struct cpu_map *map, int cpu) return cpu_map__get_core(map, cpu, NULL); } +static int cpu_map__get_max(struct cpu_map *map) +{ + int i, max = -1; + + for (i = 0; i < map->nr; i++) { + if (map->map[i] > max) + max = map->map[i]; + } + + return max; +} + +static struct cpu_map *cpus_aggr_map; + +static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) +{ + int cpu; + + if (idx >= map->nr) + return -1; + + cpu = map->map[idx]; + + if (cpus_aggr_map->map[cpu] == -1) + cpus_aggr_map->map[cpu] = get_id(map, idx); + + return cpus_aggr_map->map[cpu]; +} + +static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) +{ + return perf_stat__get_aggr(perf_stat__get_socket, map, idx); +} + +static int perf_stat__get_core_cached(struct cpu_map *map, int idx) +{ + return perf_stat__get_aggr(perf_stat__get_core, map, idx); +} + static int perf_stat_init_aggr_mode(void) { + int nr; + switch (stat_config.aggr_mode) { case AGGR_SOCKET: if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { perror("cannot build socket map"); return -1; } - aggr_get_id = perf_stat__get_socket; + aggr_get_id = perf_stat__get_socket_cached; break; case AGGR_CORE: if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { perror("cannot build core map"); return -1; } - aggr_get_id = perf_stat__get_core; + aggr_get_id = perf_stat__get_core_cached; break; case AGGR_NONE: case AGGR_GLOBAL: @@ -978,7 +1021,15 @@ static int perf_stat_init_aggr_mode(void) default: break; } - return 0; + + /* + * The evsel_list->cpus is the base we operate on, + * taking the highest cpu number to be the size of + * the aggregation translate cpumap. + */ + nr = cpu_map__get_max(evsel_list->cpus); + cpus_aggr_map = cpu_map__empty_new(nr + 1); + return cpus_aggr_map ? 0 : -ENOMEM; } /* From f4efcce33d2e5224a905369f9906f3931f5d907c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 27 Oct 2015 17:35:58 -0300 Subject: [PATCH 08/12] perf tools: Search for more options when passing args to -h Recently 'perf -h' was made aware of arguments and would show just the help for the arguments specified, but that required a strict form, i.e.: $ perf -h --tui worked, but: $ perf -h tui didn't. Make it support both cases and also look at the option help when neither matches, so that he following examples works: $ perf report -h interface Usage: perf report [] --gtk Use the GTK2 interface --stdio Use the stdio interface --tui Use the TUI interface $ perf report -h stack Usage: perf report [] -g, --call-graph Display call graph (stack chain/backtrace): print_type: call graph printing style (graph|flat|fractal|none) threshold: minimum call graph inclusion threshold () print_limit: maximum number of call graph entry () order: call graph order (caller|callee) sort_key: call graph sort key (function|address) branch: include last branch info to call graph (branch) Default: graph,0.5,caller,function --max-stack Set the maximum stack depth when parsing the callchain, anything beyond the specified depth will be ignored. Default: 127 $ Suggested-by: Ingo Molnar Cc: Adrian Hunter Cc: Borislav Petkov Cc: Brendan Gregg Cc: Chandler Carruth Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-xzqvamzqv3cv0p6w3inhols3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-options.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 230e771407a3..9fca09296eb3 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -695,8 +695,21 @@ static bool option__in_argv(const struct option *opt, const struct parse_opt_ctx for (i = 1; i < ctx->argc; ++i) { const char *arg = ctx->argv[i]; - if (arg[0] != '-') + if (arg[0] != '-') { + if (arg[1] == '\0') { + if (arg[0] == opt->short_name) + return true; + continue; + } + + if (opt->long_name && strcmp(opt->long_name, arg) == 0) + return true; + + if (opt->help && strcasestr(opt->help, arg) != NULL) + return true; + continue; + } if (arg[1] == opt->short_name || (arg[1] == '-' && opt->long_name && strcmp(opt->long_name, arg + 2) == 0)) From f2f3096888569ff1fc15fa0187a39eef3a24b28e Mon Sep 17 00:00:00 2001 From: Dima Kogan Date: Mon, 7 Sep 2015 17:30:28 -0700 Subject: [PATCH 09/12] perf symbols: Fix type error when reading a build-id This was benign, but wrong. The build-id should live in a char[], not a char*[] Signed-off-by: Dima Kogan Link: http://lkml.kernel.org/r/87si6pfwz4.fsf@secretsauce.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-minimal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index fd8477cacf88..48906333a858 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -337,7 +337,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, symbol_filter_t filter __maybe_unused, int kmodule __maybe_unused) { - unsigned char *build_id[BUILD_ID_SIZE]; + unsigned char build_id[BUILD_ID_SIZE]; int ret; ret = fd__is_64_bit(ss->fd); From 5baecbcd9c9a2f491afe1369fc22e7363f9c94d5 Mon Sep 17 00:00:00 2001 From: Dima Kogan Date: Mon, 7 Sep 2015 17:34:19 -0700 Subject: [PATCH 10/12] perf symbols: we can now read separate debug-info files based on a build ID Recent GDB (at least on a vanilla Debian box) looks for debug information in /usr/lib/debug/.build-id/nn/nnnnnnn where nn/nnnnnn is the build-id of the stripped ELF binary. This is documented here: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html This was not working in perf because we didn't read the build id until AFTER we searched for the separate debug information file. This patch reads the build ID and THEN does the search. Signed-off-by: Dima Kogan Link: http://lkml.kernel.org/r/87si6pfwz4.fsf@secretsauce.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e7bf0c46918d..71bf711a628c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1406,6 +1406,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) struct symsrc ss_[2]; struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; + unsigned char build_id[BUILD_ID_SIZE]; pthread_mutex_lock(&dso->lock); @@ -1461,6 +1462,14 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE || dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; + + /* + * Read the build id if possible. This is required for + * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work + */ + if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0) + dso__set_build_id(dso, build_id); + /* * Iterate over candidate debug images. * Keep track of "interesting" ones (those which have a symtab, dynsym, From 374ce938aaeb481114b2a8fdedd261f9b2ff9b2b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 28 Oct 2015 10:55:02 +0000 Subject: [PATCH 11/12] perf tools: Enable pre-event inherit setting by config terms This patch allows perf record setting event's attr.inherit bit by config terms like: # perf record -e cycles/no-inherit/ ... # perf record -e cycles/inherit/ ... So user can control inherit bit for each event separately. In following example, a.out fork()s in main then do some complex CPU intensive computations in both of its children. Basic result with and without inherit: # perf record -e cycles -e instructions ./a.out [ perf record: Woken up 9 times to write data ] [ perf record: Captured and wrote 2.205 MB perf.data (47920 samples) ] # perf report --stdio # ... # Samples: 23K of event 'cycles' # Event count (approx.): 23641752891 ... # Samples: 24K of event 'instructions' # Event count (approx.): 30428312415 # perf record -i -e cycles -e instructions ./a.out [ perf record: Woken up 5 times to write data ] [ perf record: Captured and wrote 1.111 MB perf.data (24019 samples) ] ... # Samples: 12K of event 'cycles' # Event count (approx.): 11699501775 ... # Samples: 12K of event 'instructions' # Event count (approx.): 15058023559 Cancel inherit for one event when globally enable: # perf record -e cycles/no-inherit/ -e instructions ./a.out [ perf record: Woken up 7 times to write data ] [ perf record: Captured and wrote 1.660 MB perf.data (36004 samples) ] ... # Samples: 12K of event 'cycles/no-inherit/' # Event count (approx.): 11895759282 ... # Samples: 24K of event 'instructions' # Event count (approx.): 30668000441 Enable inherit for one event when globally disable: # perf record -i -e cycles/inherit/ -e instructions ./a.out [ perf record: Woken up 7 times to write data ] [ perf record: Captured and wrote 1.654 MB perf.data (35868 samples) ] ... # Samples: 23K of event 'cycles/inherit/' # Event count (approx.): 23285400229 ... # Samples: 11K of event 'instructions' # Event count (approx.): 14969050259 Committer note: One can check if the bit was set, in addition to seeing the result in the perf.data file size as above by doing one of: # perf record -e cycles -e instructions -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.911 MB perf.data (63 samples) ] # perf evlist -v cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 instructions: size: 112, config: 0x1, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1, exclude_guest: 1 # So, the inherit bit was set in both, now, if we disable it globally using --no-inherit: # perf record --no-inherit -e cycles -e instructions -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.910 MB perf.data (56 samples) ] # perf evlist -v cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 instructions: size: 112, config: 0x1, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, freq: 1, sample_id_all: 1, exclude_guest: 1 No inherit bit set, then disabling it and setting just on the cycles event: # perf record --no-inherit -e cycles/inherit/ -e instructions -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.909 MB perf.data (48 samples) ] # perf evlist -v cycles/inherit/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 instructions: size: 112, config: 0x1, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, freq: 1, sample_id_all: 1, exclude_guest: 1 # We can see it as well in by using a more verbose level of debug messages in the tool that sets up the perf_event_attr, 'perf record' in this case: [root@zoo ~]# perf record -vv --no-inherit -e cycles/inherit/ -e instructions -a usleep 1 ------------------------------------------------------------ perf_event_attr: size 112 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|ID|CPU|PERIOD read_format ID disabled 1 inherit 1 mmap 1 comm 1 freq 1 task 1 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 ------------------------------------------------------------ perf_event_attr: size 112 config 0x1 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|ID|CPU|PERIOD read_format ID disabled 1 freq 1 sample_id_all 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 Signed-off-by: Wang Nan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David S. Miller Cc: Li Zefan Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446029705-199659-2-git-send-email-wangnan0@huawei.com [ s/u64/bool/ for the perf_evsel_config_term inherit field - jolsa] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 9 +++++++++ tools/perf/util/evsel.h | 2 ++ tools/perf/util/parse-events.c | 14 ++++++++++++++ tools/perf/util/parse-events.h | 2 ++ tools/perf/util/parse-events.l | 2 ++ 5 files changed, 29 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ab05fa52a5cd..3ac4ee9c6a6e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -652,6 +652,15 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; break; + case PERF_EVSEL__CONFIG_TERM_INHERIT: + /* + * attr->inherit should has already been set by + * perf_evsel__config. If user explicitly set + * inherit using config terms, override global + * opt->no_inherit setting. + */ + attr->inherit = term->val.inherit ? 1 : 0; + break; default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0e22c0b7dab5..1e8ff1906f71 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -43,6 +43,7 @@ enum { PERF_EVSEL__CONFIG_TERM_TIME, PERF_EVSEL__CONFIG_TERM_CALLGRAPH, PERF_EVSEL__CONFIG_TERM_STACK_USER, + PERF_EVSEL__CONFIG_TERM_INHERIT, PERF_EVSEL__CONFIG_TERM_MAX, }; @@ -55,6 +56,7 @@ struct perf_evsel_config_term { bool time; char *callgraph; u64 stack_user; + bool inherit; } val; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 991bbd469bea..72abcf254ccb 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -666,6 +666,12 @@ do { \ case PARSE_EVENTS__TERM_TYPE_STACKSIZE: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_INHERIT: + CHECK_TYPE_VAL(NUM); + break; + case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + CHECK_TYPE_VAL(NUM); + break; case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; @@ -701,6 +707,8 @@ static int config_term_tracepoint(struct perf_event_attr *attr, switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + case PARSE_EVENTS__TERM_TYPE_INHERIT: + case PARSE_EVENTS__TERM_TYPE_NOINHERIT: return config_term_common(attr, term, err); default: if (err) { @@ -764,6 +772,12 @@ do { \ case PARSE_EVENTS__TERM_TYPE_STACKSIZE: ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); break; + case PARSE_EVENTS__TERM_TYPE_INHERIT: + ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 1 : 0); + break; + case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); + break; default: break; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index f13d3ccda444..13c9063513eb 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -67,6 +67,8 @@ enum { PARSE_EVENTS__TERM_TYPE_TIME, PARSE_EVENTS__TERM_TYPE_CALLGRAPH, PARSE_EVENTS__TERM_TYPE_STACKSIZE, + PARSE_EVENTS__TERM_TYPE_NOINHERIT, + PARSE_EVENTS__TERM_TYPE_INHERIT }; struct parse_events_term { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index be244573a02e..8d0de5b2991d 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -187,6 +187,8 @@ branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } +inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } +no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } From 443f8c75e8d58d394b0e65b47e02e5cd8ed32b41 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Oct 2015 14:51:30 +0100 Subject: [PATCH 12/12] perf symbols: Fix endless loop in dso__split_kallsyms_for_kcore Currently we split symbols based on the map comparison, but symbols are stored within dso objects and maps could point into same dso objects (kernel maps). Hence we could end up changing rbtree we are currently iterating and mess it up. It's easily reproduced on s390x by running: $ perf record -a -- sleep 3 $ perf buildid-list -i perf.data --with-hits The fix is to compare dso objects instead. Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Acked-by: Adrian Hunter Cc: Andi Kleen Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20151026135130.GA26003@krava.brq.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 71bf711a628c..b4cc7662677e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -680,7 +680,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, pos->start -= curr_map->start - curr_map->pgoff; if (pos->end) pos->end -= curr_map->start - curr_map->pgoff; - if (curr_map != map) { + if (curr_map->dso != map->dso) { rb_erase_init(&pos->rb_node, root); symbols__insert( &curr_map->dso->symbols[curr_map->type],