This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Bug kprobes/2064] New: Support pagepoint probes


Hi jkenisto,

On Fri, Dec 16, 2005 at 09:10:45PM -0000, jkenisto at us dot ibm dot com wrote:
> It has been requested that we support "pagepoint" probes.  Such a probe is
> analogous to a watchpoint probe; but with a pagepoint probe, neither the number
> of concurrent probes nor the size of the probed area is limited by the CPU's
> debug-register architecture.

And I suppose these probes need to work on physical and also on virtual
page addresses for particular processes?

> Pagepoint probes would presumably be implemented by fussing the permission bits
> on the probed page(s) and hooking the page-fault handler (or exploiting the
> existing kprobes hook). 

I encountered a similar requirement for a project to track page accesses
(on a per-virtual-mapping basis, not physical addresses), and it was
solved by:

- disabling the PRESENT bit of the pagetable in question
- setting "PAGE_DISABLED" bit (using a free bit in the pte flags)
- hook in the pagefault handler to identify disabled pte's, reinstantiate them 
immediately, and call my private accounting function.

Plan is to convert the hook to SystemTap if possible. 

diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 088a945..db8c3f7 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -116,7 +116,7 @@ void paging_init(void);
 #define _PAGE_DIRTY	0x040
 #define _PAGE_PSE	0x080	/* 4 MB (or 2MB) page, Pentium+, if present.. */
 #define _PAGE_GLOBAL	0x100	/* Global TLB entry PPro+ */
-#define _PAGE_UNUSED1	0x200	/* available for programmer */
+#define _PAGE_DISABLED	0x200	/* for pagetrace */
 #define _PAGE_UNUSED2	0x400
 #define _PAGE_UNUSED3	0x800
 
@@ -225,6 +225,7 @@ static inline int pte_read(pte_t pte)		{
 static inline int pte_dirty(pte_t pte)		{ return (pte).pte_low & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return (pte).pte_low & _PAGE_ACCESSED; }
 static inline int pte_write(pte_t pte)		{ return (pte).pte_low & _PAGE_RW; }
+static inline int pte_disabled(pte_t pte)	{ return (pte).pte_low & _PAGE_DISABLED; }
 static inline int pte_huge(pte_t pte)		{ return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; }
 
 /*
@@ -237,11 +238,15 @@ static inline pte_t pte_exprotect(pte_t 
 static inline pte_t pte_mkclean(pte_t pte)	{ (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkold(pte_t pte)	{ (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
 static inline pte_t pte_wrprotect(pte_t pte)	{ (pte).pte_low &= ~_PAGE_RW; return pte; }
+static inline pte_t pte_presprotect(pte_t pte)	{ (pte).pte_low &= ~_PAGE_PRESENT; return pte; }
+static inline pte_t pte_enable(pte_t pte)	{ (pte).pte_low &= ~_PAGE_DISABLED; return pte; }
 static inline pte_t pte_mkread(pte_t pte)	{ (pte).pte_low |= _PAGE_USER; return pte; }
 static inline pte_t pte_mkexec(pte_t pte)	{ (pte).pte_low |= _PAGE_USER; return pte; }
 static inline pte_t pte_mkdirty(pte_t pte)	{ (pte).pte_low |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ (pte).pte_low |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
+static inline pte_t pte_mkpresent(pte_t pte)	{ (pte).pte_low |= _PAGE_PRESENT; return pte; }
+static inline pte_t pte_disable(pte_t pte)	{ (pte).pte_low |= _PAGE_DISABLED; return pte; }
 static inline pte_t pte_mkhuge(pte_t pte)	{ (pte).pte_low |= __LARGE_PTE; return pte; }
 
 #ifdef CONFIG_X86_PAE
diff --git a/mm/memory.c b/mm/memory.c
index 4b4fc3a..b57e808 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -418,7 +418,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
 	struct page *page;
 
 	/* pte contains position in swap or file, so copy. */
-	if (unlikely(!pte_present(pte))) {
+	if (unlikely(!pte_present(pte)) && !pte_disabled(pte)) {
 		if (!pte_file(pte)) {
 			swap_duplicate(pte_to_swp_entry(pte));
 			/* make sure dst_mm is on swapoff's mmlist. */
@@ -606,7 +606,7 @@ static unsigned long zap_pte_range(struc
 			(*zap_work)--;
 			continue;
 		}
-		if (pte_present(ptent)) {
+		if (pte_present(ptent) || pte_disabled(ptent)) {
 			struct page *page;
 
 			(*zap_work) -= PAGE_SIZE;
@@ -908,7 +908,7 @@ struct page *follow_page(struct vm_area_
 		goto out;
 
 	pte = *ptep;
-	if (!pte_present(pte))
+	if (!pte_present(pte) && !pte_disabled(pte))
 		goto unlock;
 	if ((flags & FOLL_WRITE) && !pte_write(pte))
 		goto unlock;
@@ -2199,6 +2199,18 @@ static inline int handle_pte_fault(struc
 
 	old_entry = entry = *pte;
 	if (!pte_present(entry)) {
+		if (pte_disabled(entry)) {
+			ptl = pte_lockptr(mm, pmd);
+			spin_lock(ptl);
+			if (unlikely(!pte_same(*pte, entry)))
+				goto unlock;
+			pgtrace(vma, 0, address);
+			set_pte_at(vma->vm_mm, address, pte, pte_enable(pte_mkpresent(entry)));
+			flush_tlb_page(vma, address);
+			update_mmu_cache(vma, address, entry);
+			pte_unmap(pte);
+			goto unlock;
+		}
 		if (pte_none(entry)) {
 			if (!vma->vm_ops || !vma->vm_ops->nopage)
 				return do_anonymous_page(mm, vma, address,




> 
> -- 
>            Summary: Support pagepoint probes
>            Product: systemtap
>            Version: unspecified
>             Status: NEW
>           Severity: enhancement
>           Priority: P3
>          Component: kprobes
>         AssignedTo: systemtap at sources dot redhat dot com
>         ReportedBy: jkenisto at us dot ibm dot com
> 
> 
> http://sourceware.org/bugzilla/show_bug.cgi?id=2064
> 
> ------- You are receiving this mail because: -------
> You are the assignee for the bug, or are watching the assignee.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]