From 44011b897adf4be7a5bd7f2baf2ffec9cd87ec74 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 25 Oct 2015 11:44:05 +0100 Subject: [PATCH 1/9] um: Define PTRACE_OLDSETOPTIONS ...such that processes within UML can do a ptrace(PTRACE_OLDSETOPTIONS, ...) Signed-off-by: Richard Weinberger --- arch/um/include/asm/ptrace-generic.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h index 2966adbbdf6c..5ab20620fc97 100644 --- a/arch/um/include/asm/ptrace-generic.h +++ b/arch/um/include/asm/ptrace-generic.h @@ -27,6 +27,8 @@ struct pt_regs { #define instruction_pointer(regs) PT_REGS_IP(regs) +#define PTRACE_OLDSETOPTIONS 21 + struct task_struct; extern long subarch_ptrace(struct task_struct *child, long request, From 1d80f0cda10118d7505bd0a09a5c1c72f3a5f6c8 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 25 Oct 2015 19:54:33 +0100 Subject: [PATCH 2/9] um: Store syscall number after syscall_trace_enter() To support changing syscall numbers we have to store it after syscall_trace_enter(). Signed-off-by: Richard Weinberger --- arch/um/include/shared/os.h | 1 + arch/um/kernel/skas/syscall.c | 13 +++---------- arch/um/os-Linux/skas/process.c | 10 +++++++--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index ad3fa3ae6d34..f3cbaef20e04 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -274,6 +274,7 @@ extern void initial_thread_cb_skas(void (*proc)(void *), void *arg); extern void halt_skas(void); extern void reboot_skas(void); +extern int get_syscall(struct uml_pt_regs *regs); /* irq.c */ extern int os_waiting_for_events(struct irq_fd *active_fds); diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index d9ec0068b623..64a8fe589d6f 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -8,6 +8,7 @@ #include #include #include +#include extern int syscall_table_size; #define NR_SYSCALLS (syscall_table_size / sizeof(void *)) @@ -23,16 +24,8 @@ void handle_syscall(struct uml_pt_regs *r) goto out; } - /* - * This should go in the declaration of syscall, but when I do that, - * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing - * children at all, sometimes hanging when bash doesn't see the first - * ls exit. - * The assembly looks functionally the same to me. This is - * gcc version 4.0.1 20050727 (Red Hat 4.0.1-5) - * in case it's a compiler bug. - */ - syscall = UPT_SYSCALL_NR(r); + syscall = get_syscall(r); + if ((syscall >= NR_SYSCALLS) || (syscall < 0)) result = -ENOSYS; else result = EXECUTE_SYSCALL(syscall, regs); diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 3dddedba3a07..d38f4952d748 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -137,9 +137,6 @@ static void handle_trap(int pid, struct uml_pt_regs *regs, if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) fatal_sigsegv(); - /* Mark this as a syscall */ - UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); - if (!local_using_sysemu) { err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, @@ -174,6 +171,13 @@ static void handle_trap(int pid, struct uml_pt_regs *regs, handle_syscall(regs); } +int get_syscall(struct uml_pt_regs *regs) +{ + UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); + + return UPT_SYSCALL_NR(regs); +} + extern char __syscall_stub_start[]; static int userspace_tramp(void *stack) From f10e6d652bc2dad67b5e7b6e6d890c76f739ed77 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 25 Oct 2015 20:28:14 +0100 Subject: [PATCH 3/9] um: Get rid of open coded NR_SYSCALLS We can use __NR_syscall_max. Signed-off-by: Richard Weinberger --- arch/um/kernel/skas/syscall.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 64a8fe589d6f..1683b8efdfda 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -10,9 +10,6 @@ #include #include -extern int syscall_table_size; -#define NR_SYSCALLS (syscall_table_size / sizeof(void *)) - void handle_syscall(struct uml_pt_regs *r) { struct pt_regs *regs = container_of(r, struct pt_regs, regs); @@ -26,9 +23,10 @@ void handle_syscall(struct uml_pt_regs *r) syscall = get_syscall(r); - if ((syscall >= NR_SYSCALLS) || (syscall < 0)) + if ((syscall > __NR_syscall_max) || syscall < 0) result = -ENOSYS; - else result = EXECUTE_SYSCALL(syscall, regs); + else + result = EXECUTE_SYSCALL(syscall, regs); out: PT_REGS_SET_SYSCALL_RETURN(regs, result); From ec2c6c01ff241f8fad95c79de3364c76a474b3fa Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 25 Oct 2015 21:41:10 +0100 Subject: [PATCH 4/9] um: Remove dead code from x86_64 syscall stub syscall_stub is dead code as um is using only batch_syscall_stub. Signed-off-by: Richard Weinberger --- arch/x86/um/stub_64.S | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S index 7160b20172d0..a212445358d6 100644 --- a/arch/x86/um/stub_64.S +++ b/arch/x86/um/stub_64.S @@ -1,19 +1,6 @@ #include - .globl syscall_stub .section .__syscall_stub, "ax" -syscall_stub: - syscall - /* We don't have 64-bit constants, so this constructs the address - * we need. - */ - movq $(STUB_DATA >> 32), %rbx - salq $32, %rbx - movq $(STUB_DATA & 0xffffffff), %rcx - or %rcx, %rbx - movq %rax, (%rbx) - int3 - .globl batch_syscall_stub batch_syscall_stub: mov $(STUB_DATA >> 32), %rbx From 246d254f1ab62bc83ac84a8ebd263e5384412ce8 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 25 Oct 2015 21:41:10 +0100 Subject: [PATCH 5/9] um: Remove dead symbol from i386 syscall stub syscall_stub is nowhere used these days. Signed-off-by: Richard Weinberger --- arch/x86/um/stub_32.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S index b972649d3a18..98816804e131 100644 --- a/arch/x86/um/stub_32.S +++ b/arch/x86/um/stub_32.S @@ -1,6 +1,5 @@ #include - .globl syscall_stub .section .__syscall_stub, "ax" .globl batch_syscall_stub From 1b2411c283e8e178b1c57d07f7fe082442a0927b Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 26 Oct 2015 10:38:27 +0100 Subject: [PATCH 6/9] um: Simplify STUB_DATA loading As long STUB_DATA fits into 32bits we can use a plain mov. If it will grow at some point in future we will switch to movabsq. In any case the code is smaller and more easy to read than the current one Signed-off-by: Richard Weinberger --- arch/x86/um/stub_64.S | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S index a212445358d6..ba914b3b8cc4 100644 --- a/arch/x86/um/stub_64.S +++ b/arch/x86/um/stub_64.S @@ -3,10 +3,7 @@ .section .__syscall_stub, "ax" .globl batch_syscall_stub batch_syscall_stub: - mov $(STUB_DATA >> 32), %rbx - sal $32, %rbx - mov $(STUB_DATA & 0xffffffff), %rax - or %rax, %rbx + mov $(STUB_DATA), %rbx /* load pointer to first operation */ mov %rbx, %rsp add $0x10, %rsp From 70c8205f40a385383b0c81f59550cf27273bf912 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 25 Oct 2015 22:26:09 +0100 Subject: [PATCH 7/9] um: Report host OOM more nicely If UML runs on the host side out of memory, report this condition more nicely. Signed-off-by: Richard Weinberger --- arch/um/kernel/tlb.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 2077248e8a72..3777b82759bd 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -50,6 +50,13 @@ struct host_vm_change { .index = 0, \ .force = force }) +static void report_enomem(void) +{ + printk(KERN_ERR "UML ran out of memory on the host side! " + "This can happen due to a memory limitation or " + "vm.max_map_count has been reached.\n"); +} + static int do_ops(struct host_vm_change *hvc, int end, int finished) { @@ -81,6 +88,9 @@ static int do_ops(struct host_vm_change *hvc, int end, } } + if (ret == -ENOMEM) + report_enomem(); + return ret; } @@ -433,8 +443,12 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) else if (pte_newprot(*pte)) err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); - if (err) + if (err) { + if (err == -ENOMEM) + report_enomem(); + goto kill; + } *pte = pte_mkuptodate(*pte); From e17c6d77b28c6feab446ad6eaec865e8031ed616 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Thu, 29 Oct 2015 21:00:32 +0530 Subject: [PATCH 8/9] um: net: replace GFP_KERNEL with GFP_ATOMIC when spinlock is held since GFP_KERNEL with GFP_ATOMIC while spinlock is held, as code while holding a spinlock should be atomic. GFP_KERNEL may sleep and can cause deadlock, where as GFP_ATOMIC may fail but certainly avoids deadlockdex f70dd54..d898f6c 100644 Signed-off-by: Saurabh Sengar Signed-off-by: Richard Weinberger --- arch/um/drivers/net_kern.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index f70dd540655d..9ef669d24bb2 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -388,7 +388,7 @@ static const struct net_device_ops uml_netdev_ops = { static int driver_registered; static void eth_configure(int n, void *init, char *mac, - struct transport *transport) + struct transport *transport, gfp_t gfp_mask) { struct uml_net *device; struct net_device *dev; @@ -397,7 +397,7 @@ static void eth_configure(int n, void *init, char *mac, size = transport->private_size + sizeof(struct uml_net_private); - device = kzalloc(sizeof(*device), GFP_KERNEL); + device = kzalloc(sizeof(*device), gfp_mask); if (device == NULL) { printk(KERN_ERR "eth_configure failed to allocate struct " "uml_net\n"); @@ -568,7 +568,7 @@ static LIST_HEAD(transports); static LIST_HEAD(eth_cmd_line); static int check_transport(struct transport *transport, char *eth, int n, - void **init_out, char **mac_out) + void **init_out, char **mac_out, gfp_t gfp_mask) { int len; @@ -582,7 +582,7 @@ static int check_transport(struct transport *transport, char *eth, int n, else if (*eth != '\0') return 0; - *init_out = kmalloc(transport->setup_size, GFP_KERNEL); + *init_out = kmalloc(transport->setup_size, gfp_mask); if (*init_out == NULL) return 1; @@ -609,11 +609,11 @@ void register_transport(struct transport *new) list_for_each_safe(ele, next, ð_cmd_line) { eth = list_entry(ele, struct eth_init, list); match = check_transport(new, eth->init, eth->index, &init, - &mac); + &mac, GFP_KERNEL); if (!match) continue; else if (init != NULL) { - eth_configure(eth->index, init, mac, new); + eth_configure(eth->index, init, mac, new, GFP_KERNEL); kfree(init); } list_del(ð->list); @@ -631,10 +631,11 @@ static int eth_setup_common(char *str, int index) spin_lock(&transports_lock); list_for_each(ele, &transports) { transport = list_entry(ele, struct transport, list); - if (!check_transport(transport, str, index, &init, &mac)) + if (!check_transport(transport, str, index, &init, + &mac, GFP_ATOMIC)) continue; if (init != NULL) { - eth_configure(index, init, mac, transport); + eth_configure(index, init, mac, transport, GFP_ATOMIC); kfree(init); } found = 1; From 2eb5f31bc4ea24bb293e82934cfa1cce9573304b Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Mon, 2 Nov 2015 16:16:37 +0000 Subject: [PATCH 9/9] um: Switch clocksource to hrtimers UML is using an obsolete itimer call for all timers and "polls" for kernel space timer firing in its userspace portion resulting in a long list of bugs and incorrect behaviour(s). It also uses ITIMER_VIRTUAL for its timer which results in the timer being dependent on it running and the cpu load. This patch fixes this by moving to posix high resolution timers firing off CLOCK_MONOTONIC and relaying the timer correctly to the UML userspace. Fixes: - crashes when hosts suspends/resumes - broken userspace timers - effecive ~40Hz instead of what they should be. Note - this modifies skas behavior by no longer setting an itimer per clone(). Timer events are relayed instead. - kernel network packet scheduling disciplines - tcp behaviour especially under load - various timer related corner cases Finally, overall responsiveness of userspace is better. Signed-off-by: Thomas Meyer Signed-off-by: Anton Ivanov [rw: massaged commit message] Signed-off-by: Richard Weinberger --- arch/um/Makefile | 2 +- arch/um/include/shared/os.h | 18 +- arch/um/include/shared/skas/stub-data.h | 7 +- arch/um/include/shared/timer-internal.h | 13 ++ arch/um/kernel/process.c | 8 +- arch/um/kernel/skas/clone.c | 6 +- arch/um/kernel/skas/mmu.c | 3 + arch/um/kernel/time.c | 73 ++++-- arch/um/os-Linux/internal.h | 1 - arch/um/os-Linux/main.c | 7 +- arch/um/os-Linux/process.c | 6 + arch/um/os-Linux/signal.c | 41 ++-- arch/um/os-Linux/skas/process.c | 45 +--- arch/um/os-Linux/time.c | 299 ++++++++++++------------ 14 files changed, 280 insertions(+), 249 deletions(-) create mode 100644 arch/um/include/shared/timer-internal.h delete mode 100644 arch/um/os-Linux/internal.h diff --git a/arch/um/Makefile b/arch/um/Makefile index e3abe6f3156d..25ed4098640e 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -131,7 +131,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT) # The wrappers will select whether using "malloc" or the kernel allocator. LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc -LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt # Used by link-vmlinux.sh which has special support for um link export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index f3cbaef20e04..868e6c3f83dd 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -183,6 +185,7 @@ extern int create_mem_file(unsigned long long len); /* process.c */ extern unsigned long os_process_pc(int pid); extern int os_process_parent(int pid); +extern void os_alarm_process(int pid); extern void os_stop_process(int pid); extern void os_kill_process(int pid, int reap_child); extern void os_kill_ptraced_process(int pid, int reap_child); @@ -217,7 +220,7 @@ extern int set_umid(char *name); extern char *get_umid(void); /* signal.c */ -extern void timer_init(void); +extern void timer_set_signal_handler(void); extern void set_sigstack(void *sig_stack, int size); extern void remove_sigstack(void); extern void set_handler(int sig); @@ -227,6 +230,7 @@ extern void unblock_signals(void); extern int get_signals(void); extern int set_signals(int enable); extern int os_is_signal_stack(void); +extern void deliver_alarm(void); /* util.c */ extern void stack_protections(unsigned long address); @@ -238,12 +242,16 @@ extern void um_early_printk(const char *s, unsigned int n); extern void os_fix_helper_signals(void); /* time.c */ -extern void idle_sleep(unsigned long long nsecs); -extern int set_interval(void); -extern int timer_one_shot(int ticks); -extern long long disable_timer(void); +extern void os_idle_sleep(unsigned long long nsecs); +extern int os_timer_create(void* timer); +extern int os_timer_set_interval(void* timer, void* its); +extern int os_timer_one_shot(int ticks); +extern long long os_timer_disable(void); +extern long os_timer_remain(void* timer); extern void uml_idle_timer(void); +extern long long os_persistent_clock_emulation(void); extern long long os_nsecs(void); +extern long long os_vnsecs(void); /* skas/mem.c */ extern long run_syscall_stub(struct mm_id * mm_idp, diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h index f6ed92c3727d..a9deece956bf 100644 --- a/arch/um/include/shared/skas/stub-data.h +++ b/arch/um/include/shared/skas/stub-data.h @@ -1,4 +1,6 @@ /* + + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -6,12 +8,11 @@ #ifndef __STUB_DATA_H #define __STUB_DATA_H -#include +#include struct stub_data { - long offset; + unsigned long offset; int fd; - struct itimerval timer; long err; }; diff --git a/arch/um/include/shared/timer-internal.h b/arch/um/include/shared/timer-internal.h new file mode 100644 index 000000000000..03e6f217f807 --- /dev/null +++ b/arch/um/include/shared/timer-internal.h @@ -0,0 +1,13 @@ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __TIMER_INTERNAL_H__ +#define __TIMER_INTERNAL_H__ + +#define TIMER_MULTIPLIER 256 +#define TIMER_MIN_DELTA 500 + +#endif diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index a6d922672b9f..48af59aae129 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -1,4 +1,6 @@ /* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright 2003 PathScale, Inc. * Licensed under the GPL @@ -27,6 +29,7 @@ #include #include #include +#include /* * This is a per-cpu array. A processor only modifies its entry and it only @@ -203,11 +206,8 @@ void initial_thread_cb(void (*proc)(void *), void *arg) void arch_cpu_idle(void) { - unsigned long long nsecs; - cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); - nsecs = disable_timer(); - idle_sleep(nsecs); + os_idle_sleep(UM_NSEC_PER_SEC); local_irq_enable(); } diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 289771dadf81..0f25d41b1031 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -35,11 +36,6 @@ stub_clone_handler(void) if (err) goto out; - err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, - (long) &data->timer, 0); - if (err) - goto out; - remap_stack(data->fd, data->offset); goto done; diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index fda1deba1757..9591a66aa5c5 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -61,10 +62,12 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) if (current->mm != NULL && current->mm != &init_mm) from_mm = ¤t->mm->context; + block_signals(); if (from_mm) to_mm->id.u.pid = copy_context_skas0(stack, from_mm->id.u.pid); else to_mm->id.u.pid = start_userspace(stack); + unblock_signals(); if (to_mm->id.u.pid < 0) { ret = to_mm->id.u.pid; diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 5af441efb377..25c23666d592 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -1,4 +1,7 @@ /* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2012-2014 Cisco Systems * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -7,11 +10,15 @@ #include #include #include +#include +#include +#include #include #include #include #include #include +#include void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { @@ -24,81 +31,97 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) static int itimer_shutdown(struct clock_event_device *evt) { - disable_timer(); + os_timer_disable(); return 0; } static int itimer_set_periodic(struct clock_event_device *evt) { - set_interval(); + os_timer_set_interval(NULL, NULL); return 0; } static int itimer_next_event(unsigned long delta, struct clock_event_device *evt) { - return timer_one_shot(delta + 1); + return os_timer_one_shot(delta); } -static struct clock_event_device itimer_clockevent = { - .name = "itimer", +static int itimer_one_shot(struct clock_event_device *evt) +{ + os_timer_one_shot(1); + return 0; +} + +static struct clock_event_device timer_clockevent = { + .name = "posix-timer", .rating = 250, .cpumask = cpu_all_mask, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_state_shutdown = itimer_shutdown, .set_state_periodic = itimer_set_periodic, - .set_state_oneshot = itimer_shutdown, + .set_state_oneshot = itimer_one_shot, .set_next_event = itimer_next_event, - .shift = 32, + .shift = 0, + .max_delta_ns = 0xffffffff, + .min_delta_ns = TIMER_MIN_DELTA, //microsecond resolution should be enough for anyone, same as 640K RAM .irq = 0, + .mult = 1, }; static irqreturn_t um_timer(int irq, void *dev) { - (*itimer_clockevent.event_handler)(&itimer_clockevent); + if (get_current()->mm != NULL) + { + /* userspace - relay signal, results in correct userspace timers */ + os_alarm_process(get_current()->mm->context.id.u.pid); + } + + (*timer_clockevent.event_handler)(&timer_clockevent); return IRQ_HANDLED; } -static cycle_t itimer_read(struct clocksource *cs) +static cycle_t timer_read(struct clocksource *cs) { - return os_nsecs() / 1000; + return os_nsecs() / TIMER_MULTIPLIER; } -static struct clocksource itimer_clocksource = { - .name = "itimer", +static struct clocksource timer_clocksource = { + .name = "timer", .rating = 300, - .read = itimer_read, + .read = timer_read, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static void __init setup_itimer(void) +static void __init timer_setup(void) { int err; - err = request_irq(TIMER_IRQ, um_timer, 0, "timer", NULL); + err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL); if (err != 0) printk(KERN_ERR "register_timer : request_irq failed - " "errno = %d\n", -err); - itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32); - itimer_clockevent.max_delta_ns = - clockevent_delta2ns(60 * HZ, &itimer_clockevent); - itimer_clockevent.min_delta_ns = - clockevent_delta2ns(1, &itimer_clockevent); - err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC); + err = os_timer_create(NULL); + if (err != 0) { + printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); + return; + } + + err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER); if (err) { printk(KERN_ERR "clocksource_register_hz returned %d\n", err); return; } - clockevents_register_device(&itimer_clockevent); + clockevents_register_device(&timer_clockevent); } void read_persistent_clock(struct timespec *ts) { - long long nsecs = os_nsecs(); + long long nsecs = os_persistent_clock_emulation(); set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, nsecs % NSEC_PER_SEC); @@ -106,6 +129,6 @@ void read_persistent_clock(struct timespec *ts) void __init time_init(void) { - timer_init(); - late_time_init = setup_itimer; + timer_set_signal_handler(); + late_time_init = timer_setup; } diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h deleted file mode 100644 index 0dc2c9f135f6..000000000000 --- a/arch/um/os-Linux/internal.h +++ /dev/null @@ -1 +0,0 @@ -void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc); diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index df9191acd926..9d499de87e63 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -163,13 +164,13 @@ int __init main(int argc, char **argv, char **envp) /* * This signal stuff used to be in the reboot case. However, - * sometimes a SIGVTALRM can come in when we're halting (reproducably + * sometimes a timer signal can come in when we're halting (reproducably * when writing out gcov information, presumably because that takes * some time) and cause a segfault. */ - /* stop timers and set SIGVTALRM to be ignored */ - disable_timer(); + /* stop timers and set timer signal to be ignored */ + os_timer_disable(); /* disable SIGIO for the fds and set SIGIO to be ignored */ err = deactivate_all_fds(); diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 8408aba915b2..b3e0d40932e1 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -89,6 +90,11 @@ int os_process_parent(int pid) return parent; } +void os_alarm_process(int pid) +{ + kill(pid, SIGALRM); +} + void os_stop_process(int pid) { kill(pid, SIGSTOP); diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 036d0dbc7b52..c211153ca69a 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -1,4 +1,6 @@ /* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2004 PathScale, Inc * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL @@ -13,7 +15,6 @@ #include #include #include -#include "internal.h" void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGTRAP] = relay_signal, @@ -23,7 +24,8 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGBUS] = bus_handler, [SIGSEGV] = segv_handler, [SIGIO] = sigio_handler, - [SIGVTALRM] = timer_handler }; + [SIGALRM] = timer_handler +}; static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) { @@ -38,7 +40,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) } /* enable signals if sig isn't IRQ signal */ - if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM)) unblock_signals(); (*sig_info[sig])(sig, si, &r); @@ -55,8 +57,8 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGIO_BIT 0 #define SIGIO_MASK (1 << SIGIO_BIT) -#define SIGVTALRM_BIT 1 -#define SIGVTALRM_MASK (1 << SIGVTALRM_BIT) +#define SIGALRM_BIT 1 +#define SIGALRM_MASK (1 << SIGALRM_BIT) static int signals_enabled; static unsigned int signals_pending; @@ -78,36 +80,38 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) set_signals(enabled); } -static void real_alarm_handler(mcontext_t *mc) +static void timer_real_alarm_handler(mcontext_t *mc) { struct uml_pt_regs regs; if (mc != NULL) get_regs_from_mc(®s, mc); - regs.is_user = 0; - unblock_signals(); - timer_handler(SIGVTALRM, NULL, ®s); + timer_handler(SIGALRM, NULL, ®s); } -void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) +void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) { int enabled; enabled = signals_enabled; if (!signals_enabled) { - signals_pending |= SIGVTALRM_MASK; + signals_pending |= SIGALRM_MASK; return; } block_signals(); - real_alarm_handler(mc); + timer_real_alarm_handler(mc); set_signals(enabled); } -void timer_init(void) +void deliver_alarm(void) { + timer_alarm_handler(SIGALRM, NULL, NULL); +} + +void timer_set_signal_handler(void) { - set_handler(SIGVTALRM); + set_handler(SIGALRM); } void set_sigstack(void *sig_stack, int size) @@ -131,10 +135,9 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { [SIGIO] = sig_handler, [SIGWINCH] = sig_handler, - [SIGVTALRM] = alarm_handler + [SIGALRM] = timer_alarm_handler }; - static void hard_handler(int sig, siginfo_t *si, void *p) { struct ucontext *uc = p; @@ -188,9 +191,9 @@ void set_handler(int sig) /* block irq ones */ sigemptyset(&action.sa_mask); - sigaddset(&action.sa_mask, SIGVTALRM); sigaddset(&action.sa_mask, SIGIO); sigaddset(&action.sa_mask, SIGWINCH); + sigaddset(&action.sa_mask, SIGALRM); if (sig == SIGSEGV) flags |= SA_NODEFER; @@ -283,8 +286,8 @@ void unblock_signals(void) if (save_pending & SIGIO_MASK) sig_handler_common(SIGIO, NULL, NULL); - if (save_pending & SIGVTALRM_MASK) - real_alarm_handler(NULL); + if (save_pending & SIGALRM_MASK) + timer_real_alarm_handler(NULL); } } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index d38f4952d748..b856c66ebd3a 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -45,7 +46,7 @@ static int ptrace_dump_regs(int pid) * Signals that are OK to receive in the stub - we'll just continue it. * SIGWINCH will happen when UML is inside a detached screen. */ -#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) +#define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH)) /* Signals that the stub will finish with - anything else is an error */ #define STUB_DONE_MASK (1 << SIGTRAP) @@ -183,19 +184,13 @@ extern char __syscall_stub_start[]; static int userspace_tramp(void *stack) { void *addr; - int err, fd; + int fd; unsigned long long offset; ptrace(PTRACE_TRACEME, 0, 0, 0); signal(SIGTERM, SIG_DFL); signal(SIGWINCH, SIG_IGN); - err = set_interval(); - if (err) { - printk(UM_KERN_ERR "userspace_tramp - setting timer failed, " - "errno = %d\n", err); - exit(1); - } /* * This has a pte, but it can't be mapped in with the usual @@ -286,7 +281,7 @@ int start_userspace(unsigned long stub_stack) "errno = %d\n", errno); goto out_kill; } - } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { err = -EINVAL; @@ -319,8 +314,6 @@ int start_userspace(unsigned long stub_stack) void userspace(struct uml_pt_regs *regs) { - struct itimerval timer; - unsigned long long nsecs, now; int err, status, op, pid = userspace_pid[0]; /* To prevent races if using_sysemu changes under us.*/ int local_using_sysemu; @@ -329,13 +322,8 @@ void userspace(struct uml_pt_regs *regs) /* Handle any immediate reschedules or signals */ interrupt_end(); - if (getitimer(ITIMER_VIRTUAL, &timer)) - printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno); - nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + - timer.it_value.tv_usec * UM_NSEC_PER_USEC; - nsecs += os_nsecs(); - while (1) { + /* * This can legitimately fail if the process loads a * bogus value into a segment register. It will @@ -405,18 +393,7 @@ void userspace(struct uml_pt_regs *regs) case SIGTRAP: relay_signal(SIGTRAP, (struct siginfo *)&si, regs); break; - case SIGVTALRM: - now = os_nsecs(); - if (now < nsecs) - break; - block_signals(); - (*sig_info[sig])(sig, (struct siginfo *)&si, regs); - unblock_signals(); - nsecs = timer.it_value.tv_sec * - UM_NSEC_PER_SEC + - timer.it_value.tv_usec * - UM_NSEC_PER_USEC; - nsecs += os_nsecs(); + case SIGALRM: break; case SIGIO: case SIGILL: @@ -464,7 +441,6 @@ __initcall(init_thread_regs); int copy_context_skas0(unsigned long new_stack, int pid) { - struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ }; int err; unsigned long current_stack = current_stub_stack(); struct stub_data *data = (struct stub_data *) current_stack; @@ -476,11 +452,10 @@ int copy_context_skas0(unsigned long new_stack, int pid) * prepare offset and fd of child's stack as argument for parent's * and child's mmap2 calls */ - *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), - .fd = new_fd, - .timer = ((struct itimerval) - { .it_value = tv, - .it_interval = tv }) }); + *data = ((struct stub_data) { + .offset = MMAP_OFFSET(new_offset), + .fd = new_fd + }); err = ptrace_setregs(pid, thread_regs); if (err < 0) { diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index e9824d5dd7d5..0e39b9978729 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -1,4 +1,7 @@ /* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2012-2014 Cisco Systems * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -10,177 +13,177 @@ #include #include #include -#include "internal.h" +#include +#include -int set_interval(void) -{ - int usec = UM_USEC_PER_SEC / UM_HZ; - struct itimerval interval = ((struct itimerval) { { 0, usec }, - { 0, usec } }); +static timer_t event_high_res_timer = 0; - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) - return -errno; - - return 0; -} - -int timer_one_shot(int ticks) -{ - unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ; - unsigned long sec = usec / UM_USEC_PER_SEC; - struct itimerval interval; - - usec %= UM_USEC_PER_SEC; - interval = ((struct itimerval) { { 0, 0 }, { sec, usec } }); - - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) - return -errno; - - return 0; -} - -/** - * timeval_to_ns - Convert timeval to nanoseconds - * @ts: pointer to the timeval variable to be converted - * - * Returns the scalar nanosecond representation of the timeval - * parameter. - * - * Ripped from linux/time.h because it's a kernel header, and thus - * unusable from here. - */ static inline long long timeval_to_ns(const struct timeval *tv) { return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + tv->tv_usec * UM_NSEC_PER_USEC; } -long long disable_timer(void) +static inline long long timespec_to_ns(const struct timespec *ts) { - struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } }); - long long remain, max = UM_NSEC_PER_SEC / UM_HZ; + return ((long long) ts->tv_sec * UM_NSEC_PER_SEC) + + ts->tv_nsec; +} - if (setitimer(ITIMER_VIRTUAL, &time, &time) < 0) - printk(UM_KERN_ERR "disable_timer - setitimer failed, " - "errno = %d\n", errno); +long long os_persistent_clock_emulation (void) { + struct timespec realtime_tp; - remain = timeval_to_ns(&time.it_value); - if (remain > max) - remain = max; + clock_gettime(CLOCK_REALTIME, &realtime_tp); + return timespec_to_ns(&realtime_tp); +} - return remain; +/** + * os_timer_create() - create an new posix (interval) timer + */ +int os_timer_create(void* timer) { + + timer_t* t = timer; + + if(t == NULL) { + t = &event_high_res_timer; + } + + if (timer_create( + CLOCK_MONOTONIC, + NULL, + t) == -1) { + return -1; + } + return 0; +} + +int os_timer_set_interval(void* timer, void* i) +{ + struct itimerspec its; + unsigned long long nsec; + timer_t* t = timer; + struct itimerspec* its_in = i; + + if(t == NULL) { + t = &event_high_res_timer; + } + + nsec = UM_NSEC_PER_SEC / UM_HZ; + + if(its_in != NULL) { + its.it_value.tv_sec = its_in->it_value.tv_sec; + its.it_value.tv_nsec = its_in->it_value.tv_nsec; + } else { + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = nsec; + } + + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = nsec; + + if(timer_settime(*t, 0, &its, NULL) == -1) { + return -errno; + } + + return 0; +} + +/** + * os_timer_remain() - returns the remaining nano seconds of the given interval + * timer + * Because this is the remaining time of an interval timer, which correspondends + * to HZ, this value can never be bigger than one second. Just + * the nanosecond part of the timer is returned. + * The returned time is relative to the start time of the interval timer. + * Return an negative value in an error case. + */ +long os_timer_remain(void* timer) +{ + struct itimerspec its; + timer_t* t = timer; + + if(t == NULL) { + t = &event_high_res_timer; + } + + if(timer_gettime(t, &its) == -1) { + return -errno; + } + + return its.it_value.tv_nsec; +} + +int os_timer_one_shot(int ticks) +{ + struct itimerspec its; + unsigned long long nsec; + unsigned long sec; + + nsec = (ticks + 1); + sec = nsec / UM_NSEC_PER_SEC; + nsec = nsec % UM_NSEC_PER_SEC; + + its.it_value.tv_sec = nsec / UM_NSEC_PER_SEC; + its.it_value.tv_nsec = nsec; + + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 0; // we cheat here + + timer_settime(event_high_res_timer, 0, &its, NULL); + return 0; +} + +/** + * os_timer_disable() - disable the posix (interval) timer + * Returns the remaining interval timer time in nanoseconds + */ +long long os_timer_disable(void) +{ + struct itimerspec its; + + memset(&its, 0, sizeof(struct itimerspec)); + timer_settime(event_high_res_timer, 0, &its, &its); + + return its.it_value.tv_sec * UM_NSEC_PER_SEC + its.it_value.tv_nsec; +} + +long long os_vnsecs(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&ts); + return timespec_to_ns(&ts); } long long os_nsecs(void) { - struct timeval tv; + struct timespec ts; - gettimeofday(&tv, NULL); - return timeval_to_ns(&tv); + clock_gettime(CLOCK_MONOTONIC,&ts); + return timespec_to_ns(&ts); } -#ifdef UML_CONFIG_NO_HZ_COMMON -static int after_sleep_interval(struct timespec *ts) -{ - return 0; -} - -static void deliver_alarm(void) -{ - alarm_handler(SIGVTALRM, NULL, NULL); -} - -static unsigned long long sleep_time(unsigned long long nsecs) -{ - return nsecs; -} - -#else -unsigned long long last_tick; -unsigned long long skew; - -static void deliver_alarm(void) -{ - unsigned long long this_tick = os_nsecs(); - int one_tick = UM_NSEC_PER_SEC / UM_HZ; - - /* Protection against the host's time going backwards */ - if ((last_tick != 0) && (this_tick < last_tick)) - this_tick = last_tick; - - if (last_tick == 0) - last_tick = this_tick - one_tick; - - skew += this_tick - last_tick; - - while (skew >= one_tick) { - alarm_handler(SIGVTALRM, NULL, NULL); - skew -= one_tick; - } - - last_tick = this_tick; -} - -static unsigned long long sleep_time(unsigned long long nsecs) -{ - return nsecs > skew ? nsecs - skew : 0; -} - -static inline long long timespec_to_us(const struct timespec *ts) -{ - return ((long long) ts->tv_sec * UM_USEC_PER_SEC) + - ts->tv_nsec / UM_NSEC_PER_USEC; -} - -static int after_sleep_interval(struct timespec *ts) -{ - int usec = UM_USEC_PER_SEC / UM_HZ; - long long start_usecs = timespec_to_us(ts); - struct timeval tv; - struct itimerval interval; - - /* - * It seems that rounding can increase the value returned from - * setitimer to larger than the one passed in. Over time, - * this will cause the remaining time to be greater than the - * tick interval. If this happens, then just reduce the first - * tick to the interval value. - */ - if (start_usecs > usec) - start_usecs = usec; - - start_usecs -= skew / UM_NSEC_PER_USEC; - if (start_usecs < 0) - start_usecs = 0; - - tv = ((struct timeval) { .tv_sec = start_usecs / UM_USEC_PER_SEC, - .tv_usec = start_usecs % UM_USEC_PER_SEC }); - interval = ((struct itimerval) { { 0, usec }, tv }); - - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) - return -errno; - - return 0; -} -#endif - -void idle_sleep(unsigned long long nsecs) +/** + * os_idle_sleep() - sleep for a given time of nsecs + * @nsecs: nanoseconds to sleep + */ +void os_idle_sleep(unsigned long long nsecs) { struct timespec ts; + if (nsecs <= 0) { + return; + } + + ts = ((struct timespec) { + .tv_sec = nsecs / UM_NSEC_PER_SEC, + .tv_nsec = nsecs % UM_NSEC_PER_SEC + }); + /* - * nsecs can come in as zero, in which case, this starts a - * busy loop. To prevent this, reset nsecs to the tick - * interval if it is zero. + * Relay the signal if clock_nanosleep is interrupted. */ - if (nsecs == 0) - nsecs = UM_NSEC_PER_SEC / UM_HZ; - - nsecs = sleep_time(nsecs); - ts = ((struct timespec) { .tv_sec = nsecs / UM_NSEC_PER_SEC, - .tv_nsec = nsecs % UM_NSEC_PER_SEC }); - - if (nanosleep(&ts, &ts) == 0) + if (clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL)) { deliver_alarm(); - after_sleep_interval(&ts); + } }