commit a0078717a8a0407ea5c278c3b43eaac5740a41eb Author: Jeff Dike Date: Thu Jan 10 10:44:45 2008 -0500 Fixed skas3 patch for 2.6.23. Brokenness in 2.6.23 included use of current->mm in the mmap path causing new maps to be done in the UML kernel address space rather than the process address space. The -EINVAL that everyone started seeing with 2.6.23 was caused by a change in procfs. file->f_ops was no longer proc_mm_ops, but proc_reg_ops, with proc_mm_ops hidden elsewhere. This broke the sanity checking in proc_mm_get_mm which made sure that it was getting a /proc/mm descriptor by checking that file->f_ops was &proc_mm_ops. diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 97b64d7..129ae08 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -612,6 +612,26 @@ config X86_PAE has the cost of more pagetable lookup overhead, and also consumes more pagetable space per process. +config PROC_MM + bool "/proc/mm support" + default y + +config PROC_MM_DUMPABLE + bool "Make UML childs /proc/ completely browsable" + default n + help + If in doubt, say N. + + This fiddles with some settings to make sure /proc/ is completely + browsable by who started UML, at the expense of some additional + locking (maybe this could slow down the runned UMLs of a few percents, + I've not tested this). + + Also, if there is a bug in this feature, there is some little + possibility to do privilege escalation if you have UML installed + setuid (which you shouldn't have done) or if UML changes uid on + startup (which will be a good thing, when enabled) ... + # Common NUMA Features config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index e0b2d17..dc80de4 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -27,11 +27,12 @@ static void flush_ldt(void *null) } #endif -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +static int alloc_ldt(struct mm_struct *mm, int mincount, int reload) { void *oldldt; void *newldt; int oldsize; + mm_context_t * pc = &mm->context; if (mincount <= pc->size) return 0; @@ -58,13 +59,15 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #ifdef CONFIG_SMP cpumask_t mask; preempt_disable(); - load_LDT(pc); + if (¤t->active_mm->context == pc) + load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) + if (!cpus_equal(mm->cpu_vm_mask, mask)) smp_call_function(flush_ldt, NULL, 1, 1); preempt_enable(); #else - load_LDT(pc); + if (¤t->active_mm->context == pc) + load_LDT(pc); #endif } if (oldsize) { @@ -76,12 +79,12 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) return 0; } -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +static inline int copy_ldt(struct mm_struct *new, struct mm_struct *old) { - int err = alloc_ldt(new, old->size, 0); + int err = alloc_ldt(new, old->context.size, 0); if (err < 0) return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + memcpy(new->context.ldt, old->context.ldt, old->context.size*LDT_ENTRY_SIZE); return 0; } @@ -89,22 +92,24 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) * we do not have to muck with descriptors here, that is * done in switch_mm() as needed. */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +int copy_context(struct mm_struct *mm, struct mm_struct *old_mm) { - struct mm_struct * old_mm; int retval = 0; - init_MUTEX(&mm->context.sem); - mm->context.size = 0; - old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { down(&old_mm->context.sem); - retval = copy_ldt(&mm->context, &old_mm->context); + retval = copy_ldt(mm, old_mm); up(&old_mm->context.sem); } return retval; } +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + init_new_empty_context(mm); + return copy_context(mm, current->mm); +} + /* * No need to lock the MM as we are the last user */ @@ -121,11 +126,11 @@ void destroy_context(struct mm_struct *mm) } } -static int read_ldt(void __user * ptr, unsigned long bytecount) +static int read_ldt(struct mm_struct * mm, void __user * ptr, + unsigned long bytecount) { int err; unsigned long size; - struct mm_struct * mm = current->mm; if (!mm->context.size) return 0; @@ -172,9 +177,8 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount) return err; } -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) +static int write_ldt(struct mm_struct * mm, void __user * ptr, unsigned long bytecount, int oldmode) { - struct mm_struct * mm = current->mm; __u32 entry_1, entry_2; int error; struct user_desc ldt_info; @@ -198,7 +202,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) down(&mm->context.sem); if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); + error = alloc_ldt(mm, ldt_info.entry_number+1, 1); if (error < 0) goto out_unlock; } @@ -228,23 +232,33 @@ out: return error; } -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr, + unsigned long bytecount) { int ret = -ENOSYS; switch (func) { case 0: - ret = read_ldt(ptr, bytecount); + ret = read_ldt(mm, ptr, bytecount); break; case 1: - ret = write_ldt(ptr, bytecount, 1); + ret = write_ldt(mm, ptr, bytecount, 1); break; case 2: ret = read_default_ldt(ptr, bytecount); break; case 0x11: - ret = write_ldt(ptr, bytecount, 0); + ret = write_ldt(mm, ptr, bytecount, 0); break; } return ret; } + +asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +{ + int ret = __modify_ldt(current->mm, func, ptr, bytecount); + /* A tail call would reorder parameters on the stack and they would then + * be restored at the wrong places. */ + prevent_tail_call(ret); + return ret; +} diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 7c1b925..a78f642 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -616,6 +617,66 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) (struct user_desc __user *) data); break; +#ifdef CONFIG_PROC_MM + case PTRACE_EX_FAULTINFO: { + struct ptrace_ex_faultinfo fault; + + fault = ((struct ptrace_ex_faultinfo) + { .is_write = child->thread.error_code, + .addr = child->thread.cr2, + .trap_no = child->thread.trap_no }); + ret = copy_to_user((unsigned long *) data, &fault, + sizeof(fault)); + break; + } + + case PTRACE_FAULTINFO: { + struct ptrace_faultinfo fault; + + fault = ((struct ptrace_faultinfo) + { .is_write = child->thread.error_code, + .addr = child->thread.cr2 }); + ret = copy_to_user((unsigned long *) data, &fault, + sizeof(fault)); + break; + } + + case PTRACE_LDT: { + struct ptrace_ldt ldt; + + if(copy_from_user(&ldt, (unsigned long *) data, + sizeof(ldt))){ + ret = -EIO; + break; + } + ret = __modify_ldt(child->mm, ldt.func, ldt.ptr, ldt.bytecount); + break; + } + + case PTRACE_SWITCH_MM: { + struct mm_struct *old = child->mm; + struct mm_struct *new = proc_mm_get_mm(data); + + if(IS_ERR(new)){ + ret = PTR_ERR(new); + break; + } + + atomic_inc(&new->mm_users); + + lock_fix_dumpable_setting(child, new); + + child->mm = new; + child->active_mm = new; + + task_unlock(child); + + mmput(old); + ret = 0; + break; + } +#endif + default: ret = ptrace_request(child, request, addr, data); break; diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c index 4214730..82162f8 100644 --- a/arch/i386/kernel/sys_i386.c +++ b/arch/i386/kernel/sys_i386.c @@ -23,6 +23,7 @@ #include #include #include +#include /* * sys_pipe() is the normal C calling standard for creating @@ -41,13 +42,12 @@ asmlinkage int sys_pipe(unsigned long __user * fildes) return error; } -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, unsigned long fd, + unsigned long pgoff) { int error = -EBADF; struct file *file = NULL; - struct mm_struct *mm = current->mm; flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { @@ -57,7 +57,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, } down_write(&mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + error = __do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff); up_write(&mm->mmap_sem); if (file) @@ -66,6 +66,18 @@ out: return error; } +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + long ret = do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff); + + /* A tail call would reorder parameters on the stack and they would then + * be restored at the wrong places. */ + prevent_tail_call(ret); + return ret; +} + /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than @@ -94,8 +106,11 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) goto out; - err = sys_mmap2(a.addr, a.len, a.prot, a.flags, + err = do_mmap2(current->mm, a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + /* A tail call would reorder parameters on the stack and they would then + * be restored at the wrong places. */ + prevent_tail_call(err); out: return err; } diff --git a/arch/um/include/skas_ptrace.h b/arch/um/include/skas_ptrace.h index cd2327d..93f2562 100644 --- a/arch/um/include/skas_ptrace.h +++ b/arch/um/include/skas_ptrace.h @@ -6,6 +6,8 @@ #ifndef __SKAS_PTRACE_H #define __SKAS_PTRACE_H +#ifndef PTRACE_FAULTINFO + #define PTRACE_FAULTINFO 52 #define PTRACE_SWITCH_MM 55 @@ -13,6 +15,8 @@ #endif +#endif + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index b4d9089..93d71cc 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -522,6 +522,26 @@ config SWIOTLB access 32-bits of memory can be used on systems with more than 3 GB of memory. If unsure, say Y. +config PROC_MM + bool "/proc/mm support" + default y + +config PROC_MM_DUMPABLE + bool "Make UML childs /proc/ completely browsable" + default n + help + If in doubt, say N. + + This fiddles with some settings to make sure /proc/ is completely + browsable by who started UML, at the expense of some additional + locking (maybe this could slow down the runned UMLs of a few percents, + I've not tested this). + + Also, if there is a bug in this feature, there is some little + possibility to do privilege escalation if you have UML installed + setuid (which you shouldn't have done) or if UML changes uid on + startup (which will be a good thing, when enabled) ... + config X86_MCE bool "Machine check support" if EMBEDDED default y diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c index 4a233ad..cdc8a45 100644 --- a/arch/x86_64/ia32/ptrace32.c +++ b/arch/x86_64/ia32/ptrace32.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include #include @@ -27,6 +29,7 @@ #include #include #include +#include /* * Determines which flags the user has access to [1 = access, 0 = no access]. @@ -266,6 +269,12 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) case PTRACE_SETFPXREGS: case PTRACE_GETFPXREGS: case PTRACE_GETEVENTMSG: +#ifdef CONFIG_PROC_MM + case PTRACE_EX_FAULTINFO: + case PTRACE_FAULTINFO: + case PTRACE_LDT: + case PTRACE_SWITCH_MM: +#endif break; case PTRACE_SETSIGINFO: @@ -388,6 +397,65 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) ret = 0; break; } +#ifdef CONFIG_PROC_MM + case PTRACE_EX_FAULTINFO: { + struct ptrace_ex_faultinfo32 fault; + + fault = ((struct ptrace_ex_faultinfo32) + { .is_write = (compat_int_t) child->thread.error_code, + .addr = (compat_uptr_t) child->thread.cr2, + .trap_no = (compat_int_t) child->thread.trap_no }); + ret = copy_to_user((unsigned long *) datap, &fault, + sizeof(fault)); + break; + } + + case PTRACE_FAULTINFO: { + struct ptrace_faultinfo32 fault; + + fault = ((struct ptrace_faultinfo32) + { .is_write = (compat_int_t) child->thread.error_code, + .addr = (compat_uptr_t) child->thread.cr2 }); + ret = copy_to_user((unsigned long *) datap, &fault, + sizeof(fault)); + break; + } + + case PTRACE_LDT: { + struct ptrace_ldt32 ldt; + + if(copy_from_user(&ldt, (unsigned long *) datap, + sizeof(ldt))){ + ret = -EIO; + break; + } + ret = __modify_ldt(child->mm, ldt.func, compat_ptr(ldt.ptr), ldt.bytecount); + break; + } + + case PTRACE_SWITCH_MM: { + struct mm_struct *old = child->mm; + struct mm_struct *new = proc_mm_get_mm(data); + + if(IS_ERR(new)){ + ret = PTR_ERR(new); + break; + } + + atomic_inc(&new->mm_users); + + lock_fix_dumpable_setting(child, new); + + child->mm = new; + child->active_mm = new; + + task_unlock(child); + + mmput(old); + ret = 0; + break; + } +#endif case PTRACE_GETEVENTMSG: ret = put_user(child->ptrace_message,(unsigned int __user *)compat_ptr(data)); diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index bee96d6..8f12455 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -693,11 +693,10 @@ sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) return ret; } -asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, +long do32_mmap2(struct mm_struct *mm, unsigned long addr, + unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { - struct mm_struct *mm = current->mm; unsigned long error; struct file * file = NULL; @@ -709,7 +708,7 @@ asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, } down_write(&mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + error = __do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff); up_write(&mm->mmap_sem); if (file) @@ -717,6 +716,15 @@ asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, return error; } +/* XXX: this wrapper can be probably removed, we can simply use the 64-bit + * version.*/ +asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + return do32_mmap2(current->mm, addr, len, prot, flags, fd, pgoff); +} + asmlinkage long sys32_olduname(struct oldold_utsname __user * name) { int err; diff --git a/arch/x86_64/kernel/ldt.c b/arch/x86_64/kernel/ldt.c index bc9ffd5..df69f73 100644 --- a/arch/x86_64/kernel/ldt.c +++ b/arch/x86_64/kernel/ldt.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) @@ -30,11 +31,12 @@ static void flush_ldt(void *null) } #endif -static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) +static int alloc_ldt(struct mm_struct *mm, unsigned mincount, int reload) { void *oldldt; void *newldt; unsigned oldsize; + mm_context_t * pc = &mm->context; if (mincount <= (unsigned)pc->size) return 0; @@ -63,12 +65,14 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) preempt_disable(); mask = cpumask_of_cpu(smp_processor_id()); - load_LDT(pc); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) + if (¤t->active_mm->context == pc) + load_LDT(pc); + if (!cpus_equal(mm->cpu_vm_mask, mask)) smp_call_function(flush_ldt, NULL, 1, 1); preempt_enable(); #else - load_LDT(pc); + if (¤t->active_mm->context == pc) + load_LDT(pc); #endif } if (oldsize) { @@ -80,12 +84,12 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) return 0; } -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +static inline int copy_ldt(struct mm_struct *new, struct mm_struct *old) { - int err = alloc_ldt(new, old->size, 0); + int err = alloc_ldt(new, old->context.size, 0); if (err < 0) return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + memcpy(new->context.ldt, old->context.ldt, old->context.size*LDT_ENTRY_SIZE); return 0; } @@ -93,22 +97,24 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) * we do not have to muck with descriptors here, that is * done in switch_mm() as needed. */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +int copy_context(struct mm_struct *mm, struct mm_struct *old_mm) { - struct mm_struct * old_mm; int retval = 0; - init_MUTEX(&mm->context.sem); - mm->context.size = 0; - old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { down(&old_mm->context.sem); - retval = copy_ldt(&mm->context, &old_mm->context); + retval = copy_ldt(mm, old_mm); up(&old_mm->context.sem); } return retval; } +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + init_new_empty_context(mm); + return copy_context(mm, current->mm); +} + /* * * Don't touch the LDT register - we're already in the next thread. @@ -124,11 +130,10 @@ void destroy_context(struct mm_struct *mm) } } -static int read_ldt(void __user * ptr, unsigned long bytecount) +static int read_ldt(struct mm_struct * mm, void __user * ptr, unsigned long bytecount) { int err; unsigned long size; - struct mm_struct * mm = current->mm; if (!mm->context.size) return 0; @@ -169,10 +174,8 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount) return bytecount; } -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) +static int write_ldt(struct mm_struct * mm, void __user * ptr, unsigned long bytecount, int oldmode) { - struct task_struct *me = current; - struct mm_struct * mm = me->mm; __u32 entry_1, entry_2, *lp; int error; struct user_desc ldt_info; @@ -197,7 +200,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) down(&mm->context.sem); if (ldt_info.entry_number >= (unsigned)mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); + error = alloc_ldt(mm, ldt_info.entry_number+1, 1); if (error < 0) goto out_unlock; } @@ -230,23 +233,29 @@ out: return error; } -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr, + unsigned long bytecount) { int ret = -ENOSYS; switch (func) { case 0: - ret = read_ldt(ptr, bytecount); + ret = read_ldt(mm, ptr, bytecount); break; case 1: - ret = write_ldt(ptr, bytecount, 1); + ret = write_ldt(mm, ptr, bytecount, 1); break; case 2: ret = read_default_ldt(ptr, bytecount); break; case 0x11: - ret = write_ldt(ptr, bytecount, 0); + ret = write_ldt(mm, ptr, bytecount, 0); break; } return ret; } + +asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +{ + return __modify_ldt(current->mm, func, ptr, bytecount); +} diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index eea3702..bfa59a0 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -561,6 +562,75 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } +#ifdef CONFIG_PROC_MM + case PTRACE_EX_FAULTINFO: { + struct ptrace_ex_faultinfo fault; + + /* I checked in thread_struct comments that error_code and cr2 + * are still part of the "fault info" section, so I guess that + * things are unchanged for now. Still to check manuals. BB*/ + fault = ((struct ptrace_ex_faultinfo) + { .is_write = child->thread.error_code, + .addr = child->thread.cr2, + .trap_no = child->thread.trap_no }); + ret = copy_to_user((unsigned long *) data, &fault, + sizeof(fault)); + break; + } + + /*Don't extend this broken interface to x86-64*/ +#if 0 + case PTRACE_FAULTINFO: { + struct ptrace_faultinfo fault; + + /* I checked in thread_struct comments that error_code and cr2 + * are still part of the "fault info" section, so I guess that + * things are unchanged for now. Still to check manuals. BB*/ + fault = ((struct ptrace_faultinfo) + { .is_write = child->thread.error_code, + .addr = child->thread.cr2 }); + ret = copy_to_user((unsigned long *) data, &fault, + sizeof(fault)); + break; + } +#endif + + case PTRACE_LDT: { + struct ptrace_ldt ldt; + + if(copy_from_user(&ldt, (unsigned long *) data, + sizeof(ldt))){ + ret = -EIO; + break; + } + ret = __modify_ldt(child->mm, ldt.func, ldt.ptr, ldt.bytecount); + break; + } + + case PTRACE_SWITCH_MM: { + struct mm_struct *old = child->mm; + struct mm_struct *new = proc_mm_get_mm64(data); + + if(IS_ERR(new)){ + ret = PTR_ERR(new); + break; + } + + atomic_inc(&new->mm_users); + + lock_fix_dumpable_setting(child, new); + + child->mm = new; + child->active_mm = new; + + task_unlock(child); + + mmput(old); + ret = 0; + break; + } +#endif + default: ret = ptrace_request(child, request, addr, data); break; diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index 4770b7a..2dabd37 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c @@ -19,6 +19,7 @@ #include #include +#include /* * sys_pipe() is the normal C calling standard for creating @@ -37,7 +38,7 @@ asmlinkage long sys_pipe(int __user *fildes) return error; } -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, +long do64_mmap(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off) { long error; @@ -55,9 +56,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long pr if (!file) goto out; } - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT); - up_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); + error = __do_mmap_pgoff(mm, file, addr, len, prot, flags, off >> PAGE_SHIFT); + up_write(&mm->mmap_sem); if (file) fput(file); @@ -65,6 +66,12 @@ out: return error; } +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long off) +{ + return do64_mmap(current->mm, addr, len, prot, flags, fd, off); +} + static void find_start_end(unsigned long flags, unsigned long *begin, unsigned long *end) { diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile index d25ac86..44160bf 100644 --- a/arch/x86_64/mm/Makefile +++ b/arch/x86_64/mm/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_K8_NUMA) += k8topology.o obj-$(CONFIG_ACPI_NUMA) += srat.o +obj-$(CONFIG_PROC_MM) += proc_mm.o hugetlbpage-y = ../../i386/mm/hugetlbpage.o diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index c547403..1f9db83 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -216,6 +216,9 @@ static inline unsigned long get_desc_base(unsigned long *desc) return base; } +extern int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr, + unsigned long bytecount); + #else /* __ASSEMBLY__ */ /* diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 7eb0b0b..f110e43 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #ifndef CONFIG_PARAVIRT #include @@ -17,11 +18,22 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, /* - * Used for LDT copy/destruction. + * Used for LDT initialization/destruction. You cannot copy an LDT with + * init_new_context, since it thinks you are passing it a new LDT and won't + * deallocate its old content. */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); +/* LDT initialization for a clean environment - needed for SKAS.*/ +static inline void init_new_empty_context(struct mm_struct *mm) +{ + init_MUTEX(&mm->context.sem); + mm->context.size = 0; +} + +/* LDT copy for SKAS - for the above problem.*/ +int copy_context(struct mm_struct *mm, struct mm_struct *old_mm); static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { @@ -40,6 +52,10 @@ static inline void switch_mm(struct mm_struct *prev, { int cpu = smp_processor_id(); +#ifdef CONFIG_SMP + prev = per_cpu(cpu_tlbstate, cpu).active_mm; +#endif + if (likely(prev != next)) { /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); @@ -61,7 +77,6 @@ static inline void switch_mm(struct mm_struct *prev, #ifdef CONFIG_SMP else { per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; - BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index 6002597..a631497 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -60,4 +60,33 @@ static inline int v8086_mode(struct pt_regs *regs) extern unsigned long profile_pc(struct pt_regs *regs); #endif /* __KERNEL__ */ +/*For SKAS3 support.*/ +#ifndef _LINUX_PTRACE_STRUCT_DEF +#define _LINUX_PTRACE_STRUCT_DEF + +#define PTRACE_FAULTINFO 52 +/* 53 was used for PTRACE_SIGPENDING, don't reuse it. */ +#define PTRACE_LDT 54 +#define PTRACE_SWITCH_MM 55 +#define PTRACE_EX_FAULTINFO 56 + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ex_faultinfo { + int is_write; + unsigned long addr; + int trap_no; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#endif /*ifndef _LINUX_PTRACE_STRUCT_DEF*/ + #endif diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h index ac991b5..f6797fc 100644 --- a/include/asm-x86_64/desc.h +++ b/include/asm-x86_64/desc.h @@ -169,6 +169,9 @@ static inline void load_LDT(mm_context_t *pc) extern struct desc_ptr idt_descr; +extern int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr, + unsigned long bytecount); + #endif /* !__ASSEMBLY__ */ #endif diff --git a/include/asm-x86_64/mmu_context.h b/include/asm-x86_64/mmu_context.h index 0cce83a..ad6a52a 100644 --- a/include/asm-x86_64/mmu_context.h +++ b/include/asm-x86_64/mmu_context.h @@ -7,14 +7,29 @@ #include #include #include +#include #include /* * possibly do the LDT unload here? + * Used for LDT initialization/destruction. You cannot copy an LDT with + * init_new_context, since it thinks you are passing it a new LDT and won't + * deallocate its old content. */ + int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); +/* LDT initialization for a clean environment - needed for SKAS.*/ +static inline void init_new_empty_context(struct mm_struct *mm) +{ + init_MUTEX(&mm->context.sem); + mm->context.size = 0; +} + +/* LDT copy for SKAS - for the above problem.*/ +int copy_context(struct mm_struct *mm, struct mm_struct *old_mm); + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP @@ -32,6 +47,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { unsigned cpu = smp_processor_id(); +#ifdef CONFIG_SMP + prev = read_pda(active_mm); +#endif if (likely(prev != next)) { /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); @@ -48,8 +66,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, #ifdef CONFIG_SMP else { write_pda(mmu_state, TLBSTATE_OK); - if (read_pda(active_mm) != next) - out_of_line_bug(); if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 diff --git a/include/asm-x86_64/ptrace-abi.h b/include/asm-x86_64/ptrace-abi.h index 19184b0..e36a4cf 100644 --- a/include/asm-x86_64/ptrace-abi.h +++ b/include/asm-x86_64/ptrace-abi.h @@ -42,6 +42,12 @@ #define PTRACE_GETFPXREGS 18 #define PTRACE_SETFPXREGS 19 +#define PTRACE_FAULTINFO 52 +/* 53 was used for PTRACE_SIGPENDING, don't reuse it. */ +#define PTRACE_LDT 54 +#define PTRACE_SWITCH_MM 55 +#define PTRACE_EX_FAULTINFO 56 + /* only useful for access 32bit programs */ #define PTRACE_GET_THREAD_AREA 25 #define PTRACE_SET_THREAD_AREA 26 diff --git a/include/asm-x86_64/ptrace.h b/include/asm-x86_64/ptrace.h index 7f166cc..6dab73b 100644 --- a/include/asm-x86_64/ptrace.h +++ b/include/asm-x86_64/ptrace.h @@ -73,6 +73,59 @@ enum { EF_ID = 0x00200000, /* id */ }; +/* Stolen from +#include ; we can't include it because +there is a nasty ciclic include chain. +*/ + +#include + +#define compat_int_t s32 +#define compat_long_t s32 +#define compat_uint_t u32 +#define compat_ulong_t u32 +#define compat_uptr_t u32 + +struct ptrace_faultinfo32 { + compat_int_t is_write; + compat_ulong_t addr; +}; + +struct ptrace_ex_faultinfo32 { + compat_int_t is_write; + compat_ulong_t addr; + compat_int_t trap_no; +}; + +struct ptrace_ldt32 { + compat_int_t func; + compat_uptr_t ptr; /*Actually a void pointer on i386, but must be converted.*/ + compat_ulong_t bytecount; +}; + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ex_faultinfo { + int is_write; + unsigned long addr; + int trap_no; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#undef compat_int_t +#undef compat_long_t +#undef compat_uint_t +#undef compat_ulong_t +#undef compat_uptr_t + #endif #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 1692dd6..34e7be8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -5,6 +5,7 @@ #ifdef __KERNEL__ +#include #include #include #include @@ -1066,11 +1067,18 @@ extern int install_special_mapping(struct mm_struct *mm, extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); -extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, +extern unsigned long __do_mmap_pgoff(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flag, + unsigned long pgoff); +static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flag, unsigned long pgoff); -extern unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, unsigned long flags, + unsigned long flag, unsigned long pgoff) { + return __do_mmap_pgoff(current->mm, file, addr, len, prot, flag, pgoff); +} + +extern unsigned long mmap_region(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, unsigned long flags, unsigned int vm_flags, unsigned long pgoff, int accountable); @@ -1089,6 +1097,9 @@ out: extern int do_munmap(struct mm_struct *, unsigned long, size_t); +extern long do_mprotect(struct mm_struct *mm, unsigned long start, + size_t len, unsigned long prot); + extern unsigned long do_brk(unsigned long, unsigned long); /* filemap.c */ diff --git a/mm/Makefile b/mm/Makefile index 245e33a..0a34933 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -28,5 +28,10 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o +obj-$(CONFIG_PROC_MM) += proc_mm.o + +ifeq ($(CONFIG_PROC_MM),y) +obj-m += proc_mm-mod.o +endif obj-$(CONFIG_QUICKLIST) += quicklist.o diff --git a/mm/fremap.c b/mm/fremap.c index 95bcb56..53d103b 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -190,8 +190,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long addr; flags &= MAP_NONBLOCK; - addr = mmap_region(vma->vm_file, start, size, - flags, vma->vm_flags, pgoff, 1); + addr = mmap_region(current->mm, vma->vm_file, start, + size, flags, vma->vm_flags, pgoff, + 1); if (IS_ERR_VALUE(addr)) { err = addr; } else { diff --git a/mm/mmap.c b/mm/mmap.c index 0d40e66..029089e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -888,12 +888,11 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags, /* * The caller must hold down_write(current->mm->mmap_sem). */ - -unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flags, unsigned long pgoff) +unsigned long __do_mmap_pgoff(struct mm_struct *mm, struct file * file, + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long pgoff) { - struct mm_struct * mm = current->mm; struct inode *inode; unsigned int vm_flags; int error; @@ -1024,10 +1023,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, if (error) return error; - return mmap_region(file, addr, len, flags, vm_flags, pgoff, + return mmap_region(mm, file, addr, len, flags, vm_flags, pgoff, accountable); } -EXPORT_SYMBOL(do_mmap_pgoff); +EXPORT_SYMBOL(__do_mmap_pgoff); /* * Some shared mappigns will want the pages marked read-only @@ -1063,12 +1062,12 @@ int vma_wants_writenotify(struct vm_area_struct *vma) } -unsigned long mmap_region(struct file *file, unsigned long addr, +unsigned long mmap_region(struct mm_struct *mm, + struct file *file, unsigned long addr, unsigned long len, unsigned long flags, unsigned int vm_flags, unsigned long pgoff, int accountable) { - struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; int correct_wcount = 0; int error; diff --git a/mm/mprotect.c b/mm/mprotect.c index e8346c3..622d205 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -214,8 +214,9 @@ fail: return error; } -asmlinkage long -sys_mprotect(unsigned long start, size_t len, unsigned long prot) +long +do_mprotect(struct mm_struct *mm, unsigned long start, size_t len, + unsigned long prot) { unsigned long vm_flags, nstart, end, tmp, reqprot; struct vm_area_struct *vma, *prev; @@ -245,9 +246,9 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot) vm_flags = calc_vm_prot_bits(prot); - down_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); - vma = find_vma_prev(current->mm, start, &prev); + vma = find_vma_prev(mm, start, &prev); error = -ENOMEM; if (!vma) goto out; @@ -309,6 +310,15 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot) } } out: - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); return error; } + +asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) +{ + long ret = do_mprotect(current->mm, start, len, prot); + /* A tail call would reorder parameters on the stack and they would then + * be restored at the wrong places. */ + prevent_tail_call(ret); + return ret; +}