diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2ffb8221917a8e2d47888eaedc9139a7515f3f34..9b5089c5dffeea1446e4adcd9d274e9141880cec 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -740,6 +740,8 @@ struct outstate {
 	bool newline;
 	const char *prefix;
 	int  nfields;
+	int  id, nr;
+	struct perf_evsel *evsel;
 };
 
 #define METRIC_LEN  35
@@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os)
 {
 	fputc('\n', os->fh);
 	fputs(os->prefix, os->fh);
+	aggr_printout(os->evsel, os->id, os->nr);
 	if (stat_config.aggr_mode == AGGR_NONE)
 		fprintf(os->fh, "        ");
-	if (stat_config.aggr_mode == AGGR_CORE)
-		fprintf(os->fh, "                  ");
-	if (stat_config.aggr_mode == AGGR_SOCKET)
-		fprintf(os->fh, "            ");
 	fprintf(os->fh, "                                                 ");
 }
 
@@ -798,6 +797,7 @@ static void new_line_csv(void *ctx)
 	fputc('\n', os->fh);
 	if (os->prefix)
 		fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+	aggr_printout(os->evsel, os->id, os->nr);
 	for (i = 0; i < os->nfields; i++)
 		fputs(csv_sep, os->fh);
 }
@@ -855,6 +855,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+	int i;
+
+	if (!aggr_get_id)
+		return 0;
+
+	if (stat_config.aggr_mode == AGGR_NONE)
+		return id;
+
+	if (stat_config.aggr_mode == AGGR_GLOBAL)
+		return 0;
+
+	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+		int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+		if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+			return cpu2;
+	}
+	return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
@@ -891,7 +913,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	struct perf_stat_output_ctx out;
 	struct outstate os = {
 		.fh = stat_config.output,
-		.prefix = prefix ? prefix : ""
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = nr,
+		.evsel = counter,
 	};
 	print_metric_t pm = print_metric_std;
 	void (*nl)(void *);
@@ -958,16 +983,37 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	}
 
 	perf_stat__print_shadow_stats(counter, uval,
-				stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
-				cpu_map__id_to_cpu(id),
+				first_shadow_cpu(counter, id),
 				&out);
-
 	if (!csv_output) {
 		print_noise(counter, noise);
 		print_running(run, ena);
 	}
 }
 
+static void aggr_update_shadow(void)
+{
+	int cpu, s2, id, s;
+	u64 val;
+	struct perf_evsel *counter;
+
+	for (s = 0; s < aggr_map->nr; s++) {
+		id = aggr_map->map[s];
+		evlist__for_each(evsel_list, counter) {
+			val = 0;
+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+				s2 = aggr_get_id(evsel_list->cpus, cpu);
+				if (s2 != id)
+					continue;
+				val += perf_counts(counter->counts, cpu, 0)->val;
+			}
+			val = val * counter->scale;
+			perf_stat__update_shadow_stats(counter, &val,
+						       first_shadow_cpu(counter, id));
+		}
+	}
+}
+
 static void print_aggr(char *prefix)
 {
 	FILE *output = stat_config.output;
@@ -979,6 +1025,8 @@ static void print_aggr(char *prefix)
 	if (!(aggr_map || aggr_get_id))
 		return;
 
+	aggr_update_shadow();
+
 	for (s = 0; s < aggr_map->nr; s++) {
 		id = aggr_map->map[s];
 		evlist__for_each(evsel_list, counter) {
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 367e220e93d57d87a7c66e5feb7628c0d6affe36..5e2d2e34e1bc0224ae89622ba1e1d43c380bd54c 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -14,6 +14,13 @@ enum {
 
 #define NUM_CTX CTX_BIT_MAX
 
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];