[PATCH] /proc/<pid>/numa_maps to show on which nodes pages reside
This patch was recently discussed on linux-mm:
http://marc.theaimsgroup.com/?t=112085728500002&r=1&w=2
I inherited a large code base from Ray for page migration. There was a
small patch in there that I find to be very useful since it allows the
display of the locality of the pages in use by a process. I reworked that
patch and came up with a /proc/<pid>/numa_maps that gives more information
about the vma's of a process. numa_maps is indexes by the start address
found in /proc/<pid>/maps. F.e. with this patch you can see the page use
of the "getty" process:
margin:/proc/12008 # cat maps
00000000-00004000 r--p 00000000 00:00 0
2000000000000000-200000000002c000 r-xp 00000000 08:04 516 /lib/ld-2.3.3.so
2000000000038000-2000000000040000 rw-p 00028000 08:04 516 /lib/ld-2.3.3.so
2000000000040000-2000000000044000 rw-p 2000000000040000 00:00 0
2000000000058000-2000000000260000 r-xp 00000000 08:04 54707842 /lib/tls/libc.so.6.1
2000000000260000-2000000000268000 ---p 00208000 08:04 54707842 /lib/tls/libc.so.6.1
2000000000268000-2000000000274000 rw-p 00200000 08:04 54707842 /lib/tls/libc.so.6.1
2000000000274000-2000000000280000 rw-p 2000000000274000 00:00 0
2000000000280000-20000000002b4000 r--p 00000000 08:04 9126923 /usr/lib/locale/en_US.utf8/LC_CTYPE
2000000000300000-2000000000308000 r--s 00000000 08:04 60071467 /usr/lib/gconv/gconv-modules.cache
2000000000318000-2000000000328000 rw-p 2000000000318000 00:00 0
4000000000000000-4000000000008000 r-xp 00000000 08:04 29576399 /sbin/mingetty
6000000000004000-6000000000008000 rw-p 00004000 08:04 29576399 /sbin/mingetty
6000000000008000-600000000002c000 rw-p 6000000000008000 00:00 0 [heap]
60000fff7fffc000-60000fff80000000 rw-p 60000fff7fffc000 00:00 0
60000ffffff44000-60000ffffff98000 rw-p 60000ffffff44000 00:00 0 [stack]
a000000000000000-a000000000020000 ---p 00000000 00:00 0 [vdso]
cat numa_maps
2000000000000000 default MaxRef=43 Pages=11 Mapped=11 N0=4 N1=3 N2=2 N3=2
2000000000038000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2
2000000000040000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
2000000000058000 default MaxRef=43 Pages=61 Mapped=61 N0=14 N1=15 N2=16 N3=16
2000000000268000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2
2000000000274000 default MaxRef=1 Pages=3 Mapped=3 Anon=3 N0=3
2000000000280000 default MaxRef=8 Pages=3 Mapped=3 N0=3
2000000000300000 default MaxRef=8 Pages=2 Mapped=2 N0=2
2000000000318000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N2=1
4000000000000000 default MaxRef=6 Pages=2 Mapped=2 N1=2
6000000000004000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
6000000000008000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
60000fff7fffc000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
60000ffffff44000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
getty uses ld.so. The first vma is the code segment which is used by 43
other processes and the pages are evenly distributed over the 4 nodes.
The second vma is the process specific data portion for ld.so. This is
only one page.
The display format is:
<startaddress> Links to information in /proc/<pid>/map
<memory policy> This can be "default" "interleave={}", "prefer=<node>" or "bind={<zones>}"
MaxRef= <maximum reference to a page in this vma>
Pages= <Nr of pages in use>
Mapped= <Nr of pages with mapcount >
Anon= <nr of anonymous pages>
Nx= <Nr of pages on Node x>
The content of the proc-file is self-evident. If this would be tied into
the sparsemem system then the contents of this file would not be too
useful.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 28b4a02..64e84ca 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2,6 +2,8 @@
#include <linux/hugetlb.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
+#include <linux/pagemap.h>
+#include <linux/mempolicy.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -233,3 +235,133 @@
.stop = m_stop,
.show = show_map
};
+
+#ifdef CONFIG_NUMA
+
+struct numa_maps {
+ unsigned long pages;
+ unsigned long anon;
+ unsigned long mapped;
+ unsigned long mapcount_max;
+ unsigned long node[MAX_NUMNODES];
+};
+
+/*
+ * Calculate numa node maps for a vma
+ */
+static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma)
+{
+ struct page *page;
+ unsigned long vaddr;
+ struct mm_struct *mm = vma->vm_mm;
+ int i;
+ struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
+
+ if (!md)
+ return NULL;
+ md->pages = 0;
+ md->anon = 0;
+ md->mapped = 0;
+ md->mapcount_max = 0;
+ for_each_node(i)
+ md->node[i] =0;
+
+ spin_lock(&mm->page_table_lock);
+ for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+ page = follow_page(mm, vaddr, 0);
+ if (page) {
+ int count = page_mapcount(page);
+
+ if (count)
+ md->mapped++;
+ if (count > md->mapcount_max)
+ md->mapcount_max = count;
+ md->pages++;
+ if (PageAnon(page))
+ md->anon++;
+ md->node[page_to_nid(page)]++;
+ }
+ }
+ spin_unlock(&mm->page_table_lock);
+ return md;
+}
+
+static int show_numa_map(struct seq_file *m, void *v)
+{
+ struct task_struct *task = m->private;
+ struct vm_area_struct *vma = v;
+ struct mempolicy *pol;
+ struct numa_maps *md;
+ struct zone **z;
+ int n;
+ int first;
+
+ if (!vma->vm_mm)
+ return 0;
+
+ md = get_numa_maps(vma);
+ if (!md)
+ return 0;
+
+ seq_printf(m, "%08lx", vma->vm_start);
+ pol = get_vma_policy(task, vma, vma->vm_start);
+ /* Print policy */
+ switch (pol->policy) {
+ case MPOL_PREFERRED:
+ seq_printf(m, " prefer=%d", pol->v.preferred_node);
+ break;
+ case MPOL_BIND:
+ seq_printf(m, " bind={");
+ first = 1;
+ for (z = pol->v.zonelist->zones; *z; z++) {
+
+ if (!first)
+ seq_putc(m, ',');
+ else
+ first = 0;
+ seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
+ (*z)->name);
+ }
+ seq_putc(m, '}');
+ break;
+ case MPOL_INTERLEAVE:
+ seq_printf(m, " interleave={");
+ first = 1;
+ for_each_node(n) {
+ if (test_bit(n, pol->v.nodes)) {
+ if (!first)
+ seq_putc(m,',');
+ else
+ first = 0;
+ seq_printf(m, "%d",n);
+ }
+ }
+ seq_putc(m, '}');
+ break;
+ default:
+ seq_printf(m," default");
+ break;
+ }
+ seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
+ md->mapcount_max, md->pages, md->mapped);
+ if (md->anon)
+ seq_printf(m," Anon=%lu",md->anon);
+
+ for_each_online_node(n) {
+ if (md->node[n])
+ seq_printf(m, " N%d=%lu", n, md->node[n]);
+ }
+ seq_putc(m, '\n');
+ kfree(md);
+ if (m->count < m->size) /* vma is copied successfully */
+ m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
+ return 0;
+}
+
+struct seq_operations proc_pid_numa_maps_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_numa_map
+};
+#endif