# From: Bodo Stroesser # # s390 normally doesn't support a method, that allows us to force # the host to skip its syscall restart handling. # I implemented a new method in the host, which also is posted to # LKML to hopefully be inserted in s390 mainline. # To check availability of this change, I added a new check, which # is done in a slightly different way for the other arches, too. # Success in check_ptrace() and success in the new check are # absolutely necessary for UML to run in any mode. # So I changed the sequence of checks to: # 1) check_ptrace() being called at startup very early # 2) check_ptrace() calls the new check, too # 3) can_do_skas() is called after check_ptrace() # check_ptrace() will never return, if it fails, but it now uses # printf() and exit() instead of panic(). # # Signed-off-by: Bodo Stroesser Index: linux-2.6.17/arch/um/include/common-offsets.h =================================================================== --- linux-2.6.17.orig/arch/um/include/common-offsets.h 2007-10-24 10:04:50.000000000 -0400 +++ linux-2.6.17/arch/um/include/common-offsets.h 2007-11-19 11:14:27.000000000 -0500 @@ -10,6 +10,8 @@ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); +DEFINE(UM_ERESTARTNOINTR, ERESTARTNOINTR); + DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); Index: linux-2.6.17/arch/um/include/sysdep-i386/ptrace_user.h =================================================================== --- linux-2.6.17.orig/arch/um/include/sysdep-i386/ptrace_user.h 2007-10-17 12:11:52.000000000 -0400 +++ linux-2.6.17/arch/um/include/sysdep-i386/ptrace_user.h 2007-11-19 11:14:27.000000000 -0500 @@ -15,6 +15,8 @@ #define PT_SYSCALL_NR(regs) ((regs)[ORIG_EAX]) #define PT_SYSCALL_NR_OFFSET PT_OFFSET(ORIG_EAX) +#define PT_SYSCALL_NR_SKIP_RESTART __NR_getpid + #define PT_SYSCALL_ARG1_OFFSET PT_OFFSET(EBX) #define PT_SYSCALL_ARG2_OFFSET PT_OFFSET(ECX) #define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX) Index: linux-2.6.17/arch/um/include/sysdep-x86_64/ptrace_user.h =================================================================== --- linux-2.6.17.orig/arch/um/include/sysdep-x86_64/ptrace_user.h 2007-10-17 12:11:52.000000000 -0400 +++ linux-2.6.17/arch/um/include/sysdep-x86_64/ptrace_user.h 2007-11-19 11:14:27.000000000 -0500 @@ -18,6 +18,8 @@ #define PT_SYSCALL_NR(regs) ((regs)[PT_INDEX(ORIG_RAX)]) #define PT_SYSCALL_NR_OFFSET (ORIG_RAX) +#define PT_SYSCALL_NR_SKIP_RESTART __NR_getpid + #define PT_SYSCALL_ARG1(regs) (((unsigned long *) (regs))[PT_INDEX(RDI)]) #define PT_SYSCALL_ARG1_OFFSET (RDI) Index: linux-2.6.17/arch/um/os-Linux/start_up.c =================================================================== --- linux-2.6.17.orig/arch/um/os-Linux/start_up.c 2007-11-19 10:56:39.000000000 -0500 +++ linux-2.6.17/arch/um/os-Linux/start_up.c 2007-11-19 11:18:15.000000000 -0500 @@ -25,6 +25,18 @@ #include "registers.h" #include "skas_ptrace.h" +/* In some cases, host's headers don't provide ERESTARTNOINTR. + * If so, we use the definition from our own kernel headers. + * As kernel headers must not be included in this user-obj, we + * provide kernel's ERESTARTNOINTR as UM_ERESTARTNOINTR in + * kern_constants.h. + * Here we trust in ERESTARTNOINTR being a part of the ABI for + * strace and debuggers, so it shouldn't change. + */ +#ifndef ERESTARTNOINTR +#define ERESTARTNOINTR UM_ERESTARTNOINTR +#endif + static int ptrace_child(void) { int ret; @@ -59,6 +71,12 @@ static int ptrace_child(void) * SKAS3/2.6 patch before release -V6, together with a bug in * the UML code itself. */ + + /* + * In ckeck_skas_restart_skip, this is the expected + * case, if everything works fine. (see below for + * additional info) + */ ret = 2; _exit(ret); } @@ -112,7 +130,7 @@ static int start_ptraced_child(void) } /* When testing for SYSEMU support, if it is one of the broken versions, we - * must just avoid using sysemu, not panic, but only if SYSEMU features are + * must just avoid using sysemu, not exit, but only if SYSEMU features are * broken. * So only for SYSEMU features we test mustpanic, while normal host features * must work anyway! @@ -344,6 +362,9 @@ void __init os_early_checks(void) /* Print out the core dump limits early */ check_coredump_limit(); + /* We check ptrace capabilities for UML's minimum need + * This won't return, if ptrace isn't sufficient + */ check_ptrace(); /* Need to check this early because mmapping happens before the @@ -388,6 +409,124 @@ __uml_setup("noptraceldt", noptraceldt_c " To support PTRACE_LDT, the host needs to be patched using\n" " the current skas3 patch.\n\n"); + +/* + * check_skas_restart_skip() will check, if the host can be forced to + * not do syscall restarting, even if the result of a UML-syscall is + * -ERESTARTxxxxx. Normally the -ERESTARTxxxxx result isn't passed to + * the user, because UML will replace it by -EINTR or the syscall-# + * to restart the syscall. So, the host won't see -ERESTARTxxxx in this + * case. + * But if the syscall in UML is sys_(rt)sigreturn, *every* result may + * be passed to the user, because it isn't a real result, but the + * interrupted processes register contents. So the host might see + * -ERESTARTxxxxx and could do syscall-restarting. This would produce + * unpredictible errors in UML. + * Thus, we have to force the host to skip syscall-restarting. + * Normally, this can be done by setting the syscall-# to -1 on exit + * from syscall (e.g. i386, x86_64). But that wouldn't work for s390, + * because s390 has syscall-result and syscall-# in the same register. + * s390 instead has a "trap" value in host's pt_regs, to distinguish + * between syscall / non-syscall. Unfortunately, "trap" isn't readable + * or writeable via ptrace. + * So, I modified the s390-host in a way, that it will change "trap",s + * if the syscall-# is written to -1 on syscall-entry. + * check_skas_restart_skip() will check presence of the change in + * s390-host. Also, it checks the "normal" way of skipping for the + * other arches. + * If syscall restarting can't be skipped, there is no safe way to run + * skas, no matter if skas0 or skas3. + * Due to the different syscall-handling in tt, a good result from + * check_ptrace is enough to run tt. + * + * What does the check: + * When the ptraced child is stopped at syscall entry, the syscall-# is + * overwritten by PT_SYSCALL_NR_SKIP_RESTART, which normally is + * __NR_getpid, so nothing is changed. But on s390 -1 is written, what + * should change "trap" in the host. + * Then the syscall is resumed with PTRACE_SYSCALL. On syscall exit, + * the result is replaced by -ERESTARTNOINTR, and if syscall-# and result + * are in different registers, the syscall-# is written with -1 (not on + * s390). + * Then, we send a SIGUSR1 to the child, forcing the host to do signal + * processing, which normally would cause syscall restarting. We + * intercept and suppress the signal via ptrace. + * Now, the ptraced child is resumed and the result is checked + * (stop_ptraced_child). What we expect is, that -ERESTARTNOINTR is the + * result of the syscall. If the host would do restarting, getpid() + * would be done again, and the result would be the child's pid. + * So the expected result of the child must be 2. + */ +static inline int check_skas_restart_skip(void) +{ + int pid, syscall, n, status; + + non_fatal("Checking if syscall restart handling in host can be " + "skipped..."); + pid = start_ptraced_child(); + + while(1){ + if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + fatal_perror("check_skas_restart_skip : ptrace failed"); + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if(n < 0) + fatal_perror("check_skas_restart_skip : wait failed"); + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) + fatal("check_skas_restart_skip : expected SIGTRAP, " + "got status = %d", status); + + syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, 0); + if(syscall == __NR_getpid){ + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, + PT_SYSCALL_NR_SKIP_RESTART); + if(n < 0) + fatal_perror("check_skas_restart_skip : failed " + "to modify system call"); + break; + } + } + + if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + fatal_perror("check_skas_restart_skip : ptrace failed"); + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if(n < 0) + fatal_perror("check_skas_restart_skip : wait failed"); + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) + fatal("check_skas_restart_skip : expected SIGTRAP, " + "got status = %d", status); + + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, -ERESTARTNOINTR); + if(n < 0) + fatal_perror("check_skas_restart_skip : failed to modify " + "system call result"); + + if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET){ + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, -1); + if(n < 0) + fatal_perror("check_skas_restart_skip : failed to " + "modify system call number"); + } + + kill(pid, SIGUSR1); + + if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + fatal_perror("check_skas_restart_skip : ptrace failed"); + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if(n < 0) + fatal_perror("check_skas_restart_skip : wait failed"); + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGUSR1)) + fatal("check_skas_restart_skip : expected SIGTRAP, " + "got status = %d", status); + + n = stop_ptraced_child(pid, 2, 0); + non_fatal("%s\n", n ? "failed" : "OK"); + + return n; +} + static inline void check_skas3_ptrace_faultinfo(void) { struct ptrace_faultinfo fi; @@ -468,6 +607,9 @@ static inline void check_skas3_proc_mm(v int can_do_skas(void) { + if(check_skas_restart_skip()) + return 0; + non_fatal("Checking for the skas3 patch in the host:\n"); check_skas3_proc_mm(); Index: linux-2.6.17/arch/um/os-Linux/skas/process.c =================================================================== --- linux-2.6.17.orig/arch/um/os-Linux/skas/process.c 2007-11-19 11:12:54.000000000 -0500 +++ linux-2.6.17/arch/um/os-Linux/skas/process.c 2007-11-19 11:18:49.000000000 -0500 @@ -143,7 +143,7 @@ static void handle_trap(int pid, struct if (!local_using_sysemu) { err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, - __NR_getpid); + PT_SYSCALL_NR_SKIP_RESTART); if (err < 0) panic("handle_trap - nullifying syscall failed, " "errno = %d\n", errno);