diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index db7aab1516de..b8d0a30f1644 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -192,3 +192,14 @@ Date: November 2017 Contact: "Sheng Yong" Description: Controls readahead inode block in readdir. + +What: /sys/fs/f2fs//extension_list +Date: Feburary 2018 +Contact: "Chao Yu" +Description: + Used to control configure extension list: + - Query: cat /sys/fs/f2fs//extension_list + - Add: echo '[h/c]extension' > /sys/fs/f2fs//extension_list + - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list + - [h] means add/del hot file extension + - [c] means add/del cold file extension diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 6cf9ad12c57f..1f52baea2f69 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -172,6 +172,23 @@ offgrpjquota Turn off group journelled quota. offprjjquota Turn off project journelled quota. quota Enable plain user disk quota accounting. noquota Disable all plain disk quota option. +whint_mode=%s Control which write hints are passed down to block + layer. This supports "off", "user-based", and + "fs-based". In "off" mode (default), f2fs does not pass + down hints. In "user-based" mode, f2fs tries to pass + down hints given by users. And in "fs-based" mode, f2fs + passes down hints with its policy. +alloc_mode=%s Adjust block allocation policy, which supports "reuse" + and "default". +fsync_mode=%s Control the policy of fsync. Currently supports "posix" + and "strict". In "posix" mode, which is default, fsync + will follow POSIX semantics and does a light operation + to improve the filesystem performance. In "strict" mode, + fsync will be heavy and behaves in line with xfs, ext4 + and btrfs, where xfstest generic/342 will pass, but the + performance will regress. +test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt + context. The fake fscrypt context is used by xfstests. ================================================================================ DEBUGFS ENTRIES diff --git a/Makefile b/Makefile index 2213da14bc6d..f1f5f335b19d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 127 +SUBLEVEL = 128 EXTRAVERSION = NAME = Blurry Fish Butt diff --git a/arch/arm/boot/dts/imx53-qsrb.dts b/arch/arm/boot/dts/imx53-qsrb.dts index 96d7eede412e..036c9bd9bf75 100644 --- a/arch/arm/boot/dts/imx53-qsrb.dts +++ b/arch/arm/boot/dts/imx53-qsrb.dts @@ -23,7 +23,7 @@ imx53-qsrb { pinctrl_pmic: pmicgrp { fsl,pins = < - MX53_PAD_CSI0_DAT5__GPIO5_23 0x1e4 /* IRQ */ + MX53_PAD_CSI0_DAT5__GPIO5_23 0x1c4 /* IRQ */ >; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi index 9e096d811bed..7a032dd84bb2 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi @@ -88,6 +88,7 @@ clocks = <&clks 201>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_3p3v>; + lrclk-strength = <3>; }; }; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 9430a9928199..00de37fe5f8a 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -132,7 +132,7 @@ }; esdhc: esdhc@1560000 { - compatible = "fsl,esdhc"; + compatible = "fsl,ls1021a-esdhc", "fsl,esdhc"; reg = <0x0 0x1560000 0x0 0x10000>; interrupts = ; clock-frequency = <0>; diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h index 71e473d05fcc..620dc75362e5 100644 --- a/arch/arm/include/asm/xen/events.h +++ b/arch/arm/include/asm/xen/events.h @@ -16,7 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->ARM_cpsr); } -#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((ptr), \ +#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((long long*)(ptr),\ atomic64_t, \ counter), (val)) diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 28c90bc372bd..78d325f3245a 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -795,6 +795,8 @@ static struct platform_device da8xx_dsp = { .resource = da8xx_rproc_resources, }; +static bool rproc_mem_inited __initdata; + #if IS_ENABLED(CONFIG_DA8XX_REMOTEPROC) static phys_addr_t rproc_base __initdata; @@ -833,6 +835,8 @@ void __init da8xx_rproc_reserve_cma(void) ret = dma_declare_contiguous(&da8xx_dsp.dev, rproc_size, rproc_base, 0); if (ret) pr_err("%s: dma_declare_contiguous failed %d\n", __func__, ret); + else + rproc_mem_inited = true; } #else @@ -847,6 +851,12 @@ int __init da8xx_register_rproc(void) { int ret; + if (!rproc_mem_inited) { + pr_warn("%s: memory not reserved for DSP, not registering DSP device\n", + __func__); + return -ENOMEM; + } + ret = platform_device_register(&da8xx_dsp); if (ret) pr_err("%s: can't register DSP device: %d\n", __func__, ret); diff --git a/arch/arm/mach-imx/cpu.c b/arch/arm/mach-imx/cpu.c index 5b0f752d5507..24be631e487d 100644 --- a/arch/arm/mach-imx/cpu.c +++ b/arch/arm/mach-imx/cpu.c @@ -133,6 +133,9 @@ struct device * __init imx_soc_device_init(void) case MXC_CPU_IMX6UL: soc_id = "i.MX6UL"; break; + case MXC_CPU_IMX6ULL: + soc_id = "i.MX6ULL"; + break; case MXC_CPU_IMX7D: soc_id = "i.MX7D"; break; diff --git a/arch/arm/mach-imx/mxc.h b/arch/arm/mach-imx/mxc.h index a5b1af6d7441..478cd91d0885 100644 --- a/arch/arm/mach-imx/mxc.h +++ b/arch/arm/mach-imx/mxc.h @@ -39,6 +39,7 @@ #define MXC_CPU_IMX6SX 0x62 #define MXC_CPU_IMX6Q 0x63 #define MXC_CPU_IMX6UL 0x64 +#define MXC_CPU_IMX6ULL 0x65 #define MXC_CPU_IMX7D 0x72 #define IMX_DDR_TYPE_LPDDR2 1 @@ -171,6 +172,11 @@ static inline bool cpu_is_imx6ul(void) return __mxc_cpu_type == MXC_CPU_IMX6UL; } +static inline bool cpu_is_imx6ull(void) +{ + return __mxc_cpu_type == MXC_CPU_IMX6ULL; +} + static inline bool cpu_is_imx6q(void) { return __mxc_cpu_type == MXC_CPU_IMX6Q; diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 85c4a8981d47..f32b42e8725d 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -48,16 +48,16 @@ do { \ } while (0) static inline int -futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; + int oparg = (int)(encoded_op << 8) >> 20; + int cmparg = (int)(encoded_op << 20) >> 20; int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; + oparg = 1U << (oparg & 0x1f); if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h index daba1f9a4f79..174aedce3167 100644 --- a/arch/mips/include/asm/kprobes.h +++ b/arch/mips/include/asm/kprobes.h @@ -40,7 +40,8 @@ typedef union mips_instruction kprobe_opcode_t; #define flush_insn_slot(p) \ do { \ - flush_icache_range((unsigned long)p->addr, \ + if (p->addr) \ + flush_icache_range((unsigned long)p->addr, \ (unsigned long)p->addr + \ (MAX_INSN_SIZE * sizeof(kprobe_opcode_t))); \ } while (0) diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index 832e2167d00f..ef7c02af7522 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -18,6 +18,10 @@ #include +#ifdef CONFIG_HIGHMEM +#include +#endif + extern int temp_tlb_entry; /* @@ -61,7 +65,8 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, #define VMALLOC_START MAP_BASE -#define PKMAP_BASE (0xfe000000UL) +#define PKMAP_END ((FIXADDR_START) & ~((LAST_PKMAP << PAGE_SHIFT)-1)) +#define PKMAP_BASE (PKMAP_END - PAGE_SIZE * LAST_PKMAP) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index adc6911ba748..b19a3c506b1e 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -51,15 +51,15 @@ void __init pagetable_init(void) /* * Fixed mappings: */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base); + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1); + fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base); #ifdef CONFIG_HIGHMEM /* * Permanent kmaps: */ vaddr = PKMAP_BASE; - fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); pgd = swapper_pg_dir + __pgd_offset(vaddr); pud = pud_offset(pgd, vaddr); diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 3140c19c448c..70b379ee6b7e 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,19 @@ extern long long virt_phys_offset; #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * On hash the vmalloc and other regions alias to the kernel region when passed + * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can + * return true for some vmalloc addresses, which is incorrect. So explicitly + * check that the address is in the kernel region. + */ +#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \ + pfn_valid(virt_to_pfn(kaddr))) +#else #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#endif /* * On Book-E parts we need __va to parse the device tree and we can't diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1be1092c7204..9baba9576e99 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -686,12 +686,20 @@ static int __init get_freq(char *name, int cells, unsigned long *val) static void start_cpu_decrementer(void) { #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + unsigned int tcr; + /* Clear any pending timer interrupts */ mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); - /* Enable decrementer interrupt */ - mtspr(SPRN_TCR, TCR_DIE); -#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */ + tcr = mfspr(SPRN_TCR); + /* + * The watchdog may have already been enabled by u-boot. So leave + * TRC[WP] (Watchdog Period) alone. + */ + tcr &= TCR_WP_MASK; /* Clear all bits except for TCR[WP] */ + tcr |= TCR_DIE; /* Enable decrementer */ + mtspr(SPRN_TCR, tcr); +#endif } void __init generic_calibrate_decr(void) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index f2c75a1e0536..0d91baf63fed 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -50,7 +50,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) pteg_addr = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); - copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); + ret = H_FUNCTION; + if (copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg))) + goto done; hpte = pteg; ret = H_PTEG_FULL; @@ -71,7 +73,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6)); hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7)); pteg_addr += i * HPTE_SIZE; - copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE); + ret = H_FUNCTION; + if (copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE)) + goto done; kvmppc_set_gpr(vcpu, 4, pte_index | i); ret = H_SUCCESS; @@ -93,7 +97,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); - copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + ret = H_FUNCTION; + if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) + goto done; pte[0] = be64_to_cpu((__force __be64)pte[0]); pte[1] = be64_to_cpu((__force __be64)pte[1]); @@ -103,7 +109,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) goto done; - copy_to_user((void __user *)pteg, &v, sizeof(v)); + ret = H_FUNCTION; + if (copy_to_user((void __user *)pteg, &v, sizeof(v))) + goto done; rb = compute_tlbie_rb(pte[0], pte[1], pte_index); vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); @@ -171,7 +179,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) } pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); - copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) { + ret = H_FUNCTION; + break; + } pte[0] = be64_to_cpu((__force __be64)pte[0]); pte[1] = be64_to_cpu((__force __be64)pte[1]); @@ -184,7 +195,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) tsh |= H_BULK_REMOVE_NOT_FOUND; } else { /* Splat the pteg in (userland) hpt */ - copy_to_user((void __user *)pteg, &v, sizeof(v)); + if (copy_to_user((void __user *)pteg, &v, sizeof(v))) { + ret = H_FUNCTION; + break; + } rb = compute_tlbie_rb(pte[0], pte[1], tsh & H_BULK_REMOVE_PTEX); @@ -211,7 +225,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); - copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + ret = H_FUNCTION; + if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) + goto done; pte[0] = be64_to_cpu((__force __be64)pte[0]); pte[1] = be64_to_cpu((__force __be64)pte[1]); @@ -234,7 +250,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); pte[0] = (__force u64)cpu_to_be64(pte[0]); pte[1] = (__force u64)cpu_to_be64(pte[1]); - copy_to_user((void __user *)pteg, pte, sizeof(pte)); + ret = H_FUNCTION; + if (copy_to_user((void __user *)pteg, pte, sizeof(pte))) + goto done; ret = H_SUCCESS; done: diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index be6212ddbf06..7e42e3ec2142 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -174,6 +174,8 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, if (!dump_skip(cprm, roundup(cprm->written - total + sz, 4) - cprm->written)) goto Eio; + + rc = 0; out: free_page((unsigned long)buf); return rc; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 0f41a8286378..da4f3f2a8186 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -21,8 +21,14 @@ SECTIONS { . = 0x00000000; .text : { - _text = .; /* Text and read-only data */ + /* Text and read-only data */ HEAD_TEXT + /* + * E.g. perf doesn't like symbols starting at address zero, + * therefore skip the initial PSW and channel program located + * at address zero and let _text start at 0x200. + */ + _text = 0x200; TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 59d503866431..9cc600b2d68c 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1733,9 +1733,14 @@ static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) lp->rcv_nxt = p->seqid; + /* + * If this is a control-only packet, there is nothing + * else to do but advance the rx queue since the packet + * was already processed above. + */ if (!(p->type & LDC_DATA)) { new = rx_advance(lp, new); - goto no_data; + break; } if (p->stype & (LDC_ACK | LDC_NACK)) { err = data_ack_nack(lp, p); diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig new file mode 100644 index 000000000000..512d009146dd --- /dev/null +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -0,0 +1,442 @@ +CONFIG_POSIX_MQUEUE=y +# CONFIG_USELIB is not set +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_NAMESPACES=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_RD_LZ4 is not set +CONFIG_KALLSYMS_ALL=y +# CONFIG_PCSPKR_PLATFORM is not set +CONFIG_BPF_SYSCALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SMP=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_MCORE2=y +CONFIG_PROCESSOR_SELECT=y +# CONFIG_CPU_SUP_CENTAUR is not set +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT=y +# CONFIG_MICROCODE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_TRANSPARENT_HUGEPAGE=y +# CONFIG_MTRR is not set +CONFIG_HZ_100=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x200000 +CONFIG_RANDOMIZE_BASE=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 reboot=p" +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_ACPI_PROCFS_POWER=y +# CONFIG_ACPI_FAN is not set +# CONFIG_ACPI_THERMAL is not set +# CONFIG_X86_PM_TIMER is not set +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_ACPI_CPUFREQ_CPB is not set +CONFIG_PCI_MMCONFIG=y +CONFIG_PCI_MSI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_IA32_EMULATION=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_MATCH_RPFILTER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_RFKILL=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEBUG_DEVRES=y +CONFIG_OF=y +CONFIG_OF_UNITTEST=y +# CONFIG_PNP_DEBUG_MESSAGES is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_UID_SYS_STATS=y +CONFIG_MEMORY_STATE_TIME=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_VIRTIO=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +# CONFIG_ETHERNET is not set +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +# CONFIG_USB_NET_AX8817X is not set +# CONFIG_USB_NET_AX88179_178A is not set +# CONFIG_USB_NET_CDCETHER is not set +# CONFIG_USB_NET_CDC_NCM is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_CDC_SUBSET is not set +# CONFIG_USB_NET_ZAURUS is not set +CONFIG_MAC80211_HWSIM=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_I8042 is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=48 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_HW_RANDOM=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set +# CONFIG_HW_RANDOM_VIA is not set +CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_HPET=y +# CONFIG_HPET_MMAP_DEFAULT is not set +# CONFIG_DEVPORT is not set +# CONFIG_ACPI_I2C_OPREGION is not set +# CONFIG_I2C_COMPAT is not set +# CONFIG_I2C_HELPER_AUTO is not set +CONFIG_PTP_1588_CLOCK=y +CONFIG_GPIOLIB=y +# CONFIG_HWMON is not set +# CONFIG_X86_PKG_TEMP_THERMAL is not set +CONFIG_WATCHDOG=y +CONFIG_SOFT_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +# CONFIG_DVB_TUNER_DIB0070 is not set +# CONFIG_DVB_TUNER_DIB0090 is not set +# CONFIG_VGA_ARB is not set +CONFIG_DRM=y +# CONFIG_DRM_FBDEV_EMULATION is not set +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +# CONFIG_HID_GENERIC is not set +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_GADGET=y +CONFIG_USB_DUMMY_HCD=y +CONFIG_USB_CONFIGFS=y +CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_ACC=y +CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y +CONFIG_USB_CONFIGFS_UEVENT=y +CONFIG_USB_CONFIGFS_F_MIDI=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ANDROID_VSOC=y +CONFIG_ION=y +# CONFIG_X86_PLATFORM_DEVICES is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +# CONFIG_FIRMWARE_MEMMAP is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_ENCRYPTION=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=y +CONFIG_FUSE_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_SDCARD_FS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_IO_DELAY_NONE=y +CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_OPTIMIZE_INLINING=y +CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_PATH=y +CONFIG_HARDENED_USERCOPY=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_ECHAINIV=y diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c7c4d9c51e99..eb02087650d2 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -365,6 +365,8 @@ static int __init tsc_setup(char *str) tsc_clocksource_reliable = 1; if (!strncmp(str, "noirqtime", 9)) no_sched_irq_time = 1; + if (!strcmp(str, "unstable")) + mark_tsc_unstable("boot parameter"); return 1; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2038e5bacce6..42654375b73f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1386,6 +1386,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, */ if (var->unusable) var->db = 0; + /* This is symmetric with svm_set_segment() */ var->dpl = to_svm(vcpu)->vmcb->save.cpl; break; } @@ -1531,18 +1532,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, s->base = var->base; s->limit = var->limit; s->selector = var->selector; - if (var->unusable) - s->attrib = 0; - else { - s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); - s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; - s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; - s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; - s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; - s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; - s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; - s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; - } + s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); + s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; + s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; + s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT; + s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; + s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; + s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; + s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; /* * This is always accurate, except if SYSRET returned to a segment @@ -1551,7 +1548,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, * would entail passing the CPL to userspace and back. */ if (seg == VCPU_SREG_SS) - svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; + /* This is symmetric with svm_get_segment() */ + svm->vmcb->save.cpl = (var->dpl & 3); mark_dirty(svm->vmcb, VMCB_SEG); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 46bbc69844bd..528b4352fa99 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7657,11 +7657,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); int cr = exit_qualification & 15; - int reg = (exit_qualification >> 8) & 15; - unsigned long val = kvm_register_readl(vcpu, reg); + int reg; + unsigned long val; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ + reg = (exit_qualification >> 8) & 15; + val = kvm_register_readl(vcpu, reg); switch (cr) { case 0: if (vmcs12->cr0_guest_host_mask & @@ -7716,6 +7718,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * lmsw can change bits 1..3 of cr0, and only set bit 0 of * cr0. Other attempted changes are ignored, with no exit. */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; if (vmcs12->cr0_guest_host_mask & 0xe & (val ^ vmcs12->cr0_read_shadow)) return true; diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 7e48807b2fa1..45a53dfe1859 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -55,7 +55,7 @@ ENTRY(csum_partial_copy_generic) movq %r12, 3*8(%rsp) movq %r14, 4*8(%rsp) movq %r13, 5*8(%rsp) - movq %rbp, 6*8(%rsp) + movq %r15, 6*8(%rsp) movq %r8, (%rsp) movq %r9, 1*8(%rsp) @@ -74,7 +74,7 @@ ENTRY(csum_partial_copy_generic) /* main loop. clear in 64 byte blocks */ /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ /* r11: temp3, rdx: temp4, r12 loopcnt */ - /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ + /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ .p2align 4 .Lloop: source @@ -89,7 +89,7 @@ ENTRY(csum_partial_copy_generic) source movq 32(%rdi), %r10 source - movq 40(%rdi), %rbp + movq 40(%rdi), %r15 source movq 48(%rdi), %r14 source @@ -103,7 +103,7 @@ ENTRY(csum_partial_copy_generic) adcq %r11, %rax adcq %rdx, %rax adcq %r10, %rax - adcq %rbp, %rax + adcq %r15, %rax adcq %r14, %rax adcq %r13, %rax @@ -121,7 +121,7 @@ ENTRY(csum_partial_copy_generic) dest movq %r10, 32(%rsi) dest - movq %rbp, 40(%rsi) + movq %r15, 40(%rsi) dest movq %r14, 48(%rsi) dest @@ -203,7 +203,7 @@ ENTRY(csum_partial_copy_generic) movq 3*8(%rsp), %r12 movq 4*8(%rsp), %r14 movq 5*8(%rsp), %r13 - movq 6*8(%rsp), %rbp + movq 6*8(%rsp), %r15 addq $7*8, %rsp ret diff --git a/block/bio-integrity.c b/block/bio-integrity.c index f6325d573c10..6e091ccadcd4 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -175,6 +175,9 @@ bool bio_integrity_enabled(struct bio *bio) if (!bio_is_rw(bio)) return false; + if (!bio_sectors(bio)) + return false; + /* Already protected? */ if (bio_integrity(bio)) return false; diff --git a/block/blk-mq.c b/block/blk-mq.c index 1452db06ba45..d65ddc18d7e4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1252,13 +1252,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); + blk_queue_split(q, &bio, q->bio_split); + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio_io_error(bio); return BLK_QC_T_NONE; } - blk_queue_split(q, &bio, q->bio_split); - if (!is_flush_fua && !blk_queue_nomerges(q) && blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) return BLK_QC_T_NONE; @@ -1634,7 +1634,8 @@ static void blk_mq_exit_hctx(struct request_queue *q, { unsigned flush_start_tag = set->queue_depth; - blk_mq_tag_idle(hctx); + if (blk_mq_hw_queue_mapped(hctx)) + blk_mq_tag_idle(hctx); if (set->ops->exit_request) set->ops->exit_request(set->driver_data, diff --git a/block/partition-generic.c b/block/partition-generic.c index 19cf33b91a5a..f75be6a4411f 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -320,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, if (info) { struct partition_meta_info *pinfo = alloc_part_info(disk); - if (!pinfo) + if (!pinfo) { + err = -ENOMEM; goto out_free_stats; + } memcpy(pinfo, info, sizeof(*info)); p->info = pinfo; } diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64 new file mode 100644 index 000000000000..b3d89109fe75 --- /dev/null +++ b/build.config.cuttlefish.x86_64 @@ -0,0 +1,15 @@ +ARCH=x86_64 +BRANCH=android-4.4 +CLANG_TRIPLE=x86_64-linux-gnu- +CROSS_COMPILE=x86_64-linux-androidkernel- +DEFCONFIG=x86_64_cuttlefish_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +POST_DEFCONFIG_CMDS="check_defconfig" +CLANG_PREBUILT_BIN=prebuilts/clang/host/linux-x86/clang-4630689/bin +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 84f8d4d8b6bc..09f706b7b06e 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -62,9 +62,6 @@ do_async_gen_syndrome(struct dma_chan *chan, dma_addr_t dma_dest[2]; int src_off = 0; - if (submit->flags & ASYNC_TX_FENCE) - dma_flags |= DMA_PREP_FENCE; - while (src_cnt > 0) { submit->flags = flags_orig; pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags)); @@ -83,6 +80,8 @@ do_async_gen_syndrome(struct dma_chan *chan, if (cb_fn_orig) dma_flags |= DMA_PREP_INTERRUPT; } + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; /* Drivers force forward progress in case they can not provide * a descriptor diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c index 10ce48e16ebf..d830705f8a18 100644 --- a/drivers/acpi/acpica/evxfevnt.c +++ b/drivers/acpi/acpica/evxfevnt.c @@ -180,6 +180,12 @@ acpi_status acpi_enable_event(u32 event, u32 flags) ACPI_FUNCTION_TRACE(acpi_enable_event); + /* If Hardware Reduced flag is set, there are no fixed events */ + + if (acpi_gbl_reduced_hardware) { + return_ACPI_STATUS(AE_OK); + } + /* Decode the Fixed Event */ if (event > ACPI_EVENT_MAX) { @@ -237,6 +243,12 @@ acpi_status acpi_disable_event(u32 event, u32 flags) ACPI_FUNCTION_TRACE(acpi_disable_event); + /* If Hardware Reduced flag is set, there are no fixed events */ + + if (acpi_gbl_reduced_hardware) { + return_ACPI_STATUS(AE_OK); + } + /* Decode the Fixed Event */ if (event > ACPI_EVENT_MAX) { @@ -290,6 +302,12 @@ acpi_status acpi_clear_event(u32 event) ACPI_FUNCTION_TRACE(acpi_clear_event); + /* If Hardware Reduced flag is set, there are no fixed events */ + + if (acpi_gbl_reduced_hardware) { + return_ACPI_STATUS(AE_OK); + } + /* Decode the Fixed Event */ if (event > ACPI_EVENT_MAX) { diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c index e54bc2aa7a88..a05b3b79b987 100644 --- a/drivers/acpi/acpica/psobject.c +++ b/drivers/acpi/acpica/psobject.c @@ -121,6 +121,9 @@ static acpi_status acpi_ps_get_aml_opcode(struct acpi_walk_state *walk_state) (u32)(aml_offset + sizeof(struct acpi_table_header))); + ACPI_ERROR((AE_INFO, + "Aborting disassembly, AML byte code is corrupt")); + /* Dump the context surrounding the invalid opcode */ acpi_ut_dump_buffer(((u8 *)walk_state->parser_state. @@ -129,6 +132,14 @@ static acpi_status acpi_ps_get_aml_opcode(struct acpi_walk_state *walk_state) sizeof(struct acpi_table_header) - 16)); acpi_os_printf(" */\n"); + + /* + * Just abort the disassembly, cannot continue because the + * parser is essentially lost. The disassembler can then + * randomly fail because an ill-constructed parse tree + * can result. + */ + return_ACPI_STATUS(AE_AML_BAD_OPCODE); #endif } @@ -293,6 +304,9 @@ acpi_ps_create_op(struct acpi_walk_state *walk_state, if (status == AE_CTRL_PARSE_CONTINUE) { return_ACPI_STATUS(AE_CTRL_PARSE_CONTINUE); } + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } /* Create Op structure and append to parent's argument list */ diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index aaa761b9081c..cd2eab6aa92e 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -514,8 +514,9 @@ int ahci_platform_init_host(struct platform_device *pdev, irq = platform_get_irq(pdev, 0); if (irq <= 0) { - dev_err(dev, "no irq\n"); - return -EINVAL; + if (irq != -EPROBE_DEFER) + dev_err(dev, "no irq\n"); + return irq; } hpriv->irq = irq; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3e65ae144fde..1dd16f26e77d 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -623,6 +623,9 @@ static int loop_switch(struct loop_device *lo, struct file *file) */ static int loop_flush(struct loop_device *lo) { + /* loop not yet configured, no running thread, nothing to flush */ + if (lo->lo_state != Lo_bound) + return 0; return loop_switch(lo, NULL); } diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c index f364fa4d24eb..f59183018280 100644 --- a/drivers/bus/brcmstb_gisb.c +++ b/drivers/bus/brcmstb_gisb.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 Broadcom Corporation + * Copyright (C) 2014-2017 Broadcom * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -33,8 +33,6 @@ #define ARB_ERR_CAP_CLEAR (1 << 0) #define ARB_ERR_CAP_STATUS_TIMEOUT (1 << 12) #define ARB_ERR_CAP_STATUS_TEA (1 << 11) -#define ARB_ERR_CAP_STATUS_BS_SHIFT (1 << 2) -#define ARB_ERR_CAP_STATUS_BS_MASK 0x3c #define ARB_ERR_CAP_STATUS_WRITE (1 << 1) #define ARB_ERR_CAP_STATUS_VALID (1 << 0) @@ -43,7 +41,6 @@ enum { ARB_ERR_CAP_CLR, ARB_ERR_CAP_HI_ADDR, ARB_ERR_CAP_ADDR, - ARB_ERR_CAP_DATA, ARB_ERR_CAP_STATUS, ARB_ERR_CAP_MASTER, }; @@ -53,7 +50,6 @@ static const int gisb_offsets_bcm7038[] = { [ARB_ERR_CAP_CLR] = 0x0c4, [ARB_ERR_CAP_HI_ADDR] = -1, [ARB_ERR_CAP_ADDR] = 0x0c8, - [ARB_ERR_CAP_DATA] = 0x0cc, [ARB_ERR_CAP_STATUS] = 0x0d0, [ARB_ERR_CAP_MASTER] = -1, }; @@ -63,7 +59,6 @@ static const int gisb_offsets_bcm7400[] = { [ARB_ERR_CAP_CLR] = 0x0c8, [ARB_ERR_CAP_HI_ADDR] = -1, [ARB_ERR_CAP_ADDR] = 0x0cc, - [ARB_ERR_CAP_DATA] = 0x0d0, [ARB_ERR_CAP_STATUS] = 0x0d4, [ARB_ERR_CAP_MASTER] = 0x0d8, }; @@ -73,7 +68,6 @@ static const int gisb_offsets_bcm7435[] = { [ARB_ERR_CAP_CLR] = 0x168, [ARB_ERR_CAP_HI_ADDR] = -1, [ARB_ERR_CAP_ADDR] = 0x16c, - [ARB_ERR_CAP_DATA] = 0x170, [ARB_ERR_CAP_STATUS] = 0x174, [ARB_ERR_CAP_MASTER] = 0x178, }; @@ -83,7 +77,6 @@ static const int gisb_offsets_bcm7445[] = { [ARB_ERR_CAP_CLR] = 0x7e4, [ARB_ERR_CAP_HI_ADDR] = 0x7e8, [ARB_ERR_CAP_ADDR] = 0x7ec, - [ARB_ERR_CAP_DATA] = 0x7f0, [ARB_ERR_CAP_STATUS] = 0x7f4, [ARB_ERR_CAP_MASTER] = 0x7f8, }; @@ -105,9 +98,13 @@ static u32 gisb_read(struct brcmstb_gisb_arb_device *gdev, int reg) { int offset = gdev->gisb_offsets[reg]; - /* return 1 if the hardware doesn't have ARB_ERR_CAP_MASTER */ - if (offset == -1) - return 1; + if (offset < 0) { + /* return 1 if the hardware doesn't have ARB_ERR_CAP_MASTER */ + if (reg == ARB_ERR_CAP_MASTER) + return 1; + else + return 0; + } if (gdev->big_endian) return ioread32be(gdev->base + offset); @@ -115,6 +112,16 @@ static u32 gisb_read(struct brcmstb_gisb_arb_device *gdev, int reg) return ioread32(gdev->base + offset); } +static u64 gisb_read_address(struct brcmstb_gisb_arb_device *gdev) +{ + u64 value; + + value = gisb_read(gdev, ARB_ERR_CAP_ADDR); + value |= (u64)gisb_read(gdev, ARB_ERR_CAP_HI_ADDR) << 32; + + return value; +} + static void gisb_write(struct brcmstb_gisb_arb_device *gdev, u32 val, int reg) { int offset = gdev->gisb_offsets[reg]; @@ -123,9 +130,9 @@ static void gisb_write(struct brcmstb_gisb_arb_device *gdev, u32 val, int reg) return; if (gdev->big_endian) - iowrite32be(val, gdev->base + reg); + iowrite32be(val, gdev->base + offset); else - iowrite32(val, gdev->base + reg); + iowrite32(val, gdev->base + offset); } static ssize_t gisb_arb_get_timeout(struct device *dev, @@ -181,7 +188,7 @@ static int brcmstb_gisb_arb_decode_addr(struct brcmstb_gisb_arb_device *gdev, const char *reason) { u32 cap_status; - unsigned long arb_addr; + u64 arb_addr; u32 master; const char *m_name; char m_fmt[11]; @@ -193,10 +200,7 @@ static int brcmstb_gisb_arb_decode_addr(struct brcmstb_gisb_arb_device *gdev, return 1; /* Read the address and master */ - arb_addr = gisb_read(gdev, ARB_ERR_CAP_ADDR) & 0xffffffff; -#if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) - arb_addr |= (u64)gisb_read(gdev, ARB_ERR_CAP_HI_ADDR) << 32; -#endif + arb_addr = gisb_read_address(gdev); master = gisb_read(gdev, ARB_ERR_CAP_MASTER); m_name = brcmstb_gisb_master_to_str(gdev, master); @@ -205,7 +209,7 @@ static int brcmstb_gisb_arb_decode_addr(struct brcmstb_gisb_arb_device *gdev, m_name = m_fmt; } - pr_crit("%s: %s at 0x%lx [%c %s], core: %s\n", + pr_crit("%s: %s at 0x%llx [%c %s], core: %s\n", __func__, reason, arb_addr, cap_status & ARB_ERR_CAP_STATUS_WRITE ? 'W' : 'R', cap_status & ARB_ERR_CAP_STATUS_TIMEOUT ? "timeout" : "", diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index f53e8ba2c718..83c206f0fc98 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -409,6 +409,7 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags) msg = ipmi_alloc_smi_msg(); if (!msg) { ssif_info->ssif_state = SSIF_NORMAL; + ipmi_ssif_unlock_cond(ssif_info, flags); return; } @@ -431,6 +432,7 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info, msg = ipmi_alloc_smi_msg(); if (!msg) { ssif_info->ssif_state = SSIF_NORMAL; + ipmi_ssif_unlock_cond(ssif_info, flags); return; } diff --git a/drivers/char/random.c b/drivers/char/random.c index 1822472dffab..bd9fc2baa6aa 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -886,12 +886,16 @@ static void add_interrupt_bench(cycles_t start) static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { __u32 *ptr = (__u32 *) regs; + unsigned int idx; if (regs == NULL) return 0; - if (f->reg_idx >= sizeof(struct pt_regs) / sizeof(__u32)) - f->reg_idx = 0; - return *(ptr + f->reg_idx++); + idx = READ_ONCE(f->reg_idx); + if (idx >= sizeof(struct pt_regs) / sizeof(__u32)) + idx = 0; + ptr += idx++; + WRITE_ONCE(f->reg_idx, idx); + return *ptr; } void add_interrupt_randomness(int irq, int irq_flags) diff --git a/drivers/clk/clk-conf.c b/drivers/clk/clk-conf.c index 43a218f35b19..4ad32ce428cf 100644 --- a/drivers/clk/clk-conf.c +++ b/drivers/clk/clk-conf.c @@ -106,7 +106,7 @@ static int __set_clk_rates(struct device_node *node, bool clk_supplier) rc = clk_set_rate(clk, rate); if (rc < 0) - pr_err("clk: couldn't set %s clk rate to %d (%d), current rate: %ld\n", + pr_err("clk: couldn't set %s clk rate to %u (%d), current rate: %lu\n", __clk_get_name(clk), rate, rc, clk_get_rate(clk)); clk_put(clk); diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c index cd0f2726f5e0..c40445488d3a 100644 --- a/drivers/clk/clk-scpi.c +++ b/drivers/clk/clk-scpi.c @@ -71,15 +71,15 @@ static const struct clk_ops scpi_clk_ops = { }; /* find closest match to given frequency in OPP table */ -static int __scpi_dvfs_round_rate(struct scpi_clk *clk, unsigned long rate) +static long __scpi_dvfs_round_rate(struct scpi_clk *clk, unsigned long rate) { int idx; - u32 fmin = 0, fmax = ~0, ftmp; + unsigned long fmin = 0, fmax = ~0, ftmp; const struct scpi_opp *opp = clk->info->opps; for (idx = 0; idx < clk->info->count; idx++, opp++) { ftmp = opp->freq; - if (ftmp >= (u32)rate) { + if (ftmp >= rate) { if (ftmp <= fmax) fmax = ftmp; break; diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c index a5c111b67f37..ea11a33e7fff 100644 --- a/drivers/cpuidle/dt_idle_states.c +++ b/drivers/cpuidle/dt_idle_states.c @@ -174,8 +174,10 @@ int dt_init_idle_driver(struct cpuidle_driver *drv, if (!state_node) break; - if (!of_device_is_available(state_node)) + if (!of_device_is_available(state_node)) { + of_node_put(state_node); continue; + } if (!idle_state_valid(state_node, i, cpumask)) { pr_warn("%s idle state not valid, bailing out\n", diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index f8d740a6740d..48d4dddf4941 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1722,17 +1722,24 @@ static int sdma_probe(struct platform_device *pdev) if (IS_ERR(sdma->clk_ahb)) return PTR_ERR(sdma->clk_ahb); - clk_prepare(sdma->clk_ipg); - clk_prepare(sdma->clk_ahb); + ret = clk_prepare(sdma->clk_ipg); + if (ret) + return ret; + + ret = clk_prepare(sdma->clk_ahb); + if (ret) + goto err_clk; ret = devm_request_irq(&pdev->dev, irq, sdma_int_handler, 0, "sdma", sdma); if (ret) - return ret; + goto err_irq; sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL); - if (!sdma->script_addrs) - return -ENOMEM; + if (!sdma->script_addrs) { + ret = -ENOMEM; + goto err_irq; + } /* initially no scripts available */ saddr_arr = (s32 *)sdma->script_addrs; @@ -1847,6 +1854,10 @@ err_register: dma_async_device_unregister(&sdma->dma_device); err_init: kfree(sdma->script_addrs); +err_irq: + clk_unprepare(sdma->clk_ahb); +err_clk: + clk_unprepare(sdma->clk_ipg); return ret; } @@ -1857,6 +1868,8 @@ static int sdma_remove(struct platform_device *pdev) dma_async_device_unregister(&sdma->dma_device); kfree(sdma->script_addrs); + clk_unprepare(sdma->clk_ahb); + clk_unprepare(sdma->clk_ipg); /* Kill the tasklet */ for (i = 0; i < MAX_DMA_CHANNELS; i++) { struct sdma_channel *sdmac = &sdma->channel[i]; diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 0574e1bbe45c..3ce5609b4611 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -763,7 +763,7 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev) /* Non-ECC RAM? */ printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__); res = -ENODEV; - goto err2; + goto err; } edac_dbg(3, "init mci\n"); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 06d345b087f8..759a39906a52 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2145,7 +2145,8 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, return desc; } - status = gpiod_request(desc, con_id); + /* If a connection label was passed use that, else use the device name as label */ + status = gpiod_request(desc, con_id ? con_id : dev_name(dev)); if (status < 0) return ERR_PTR(status); diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 7ed08fdc4c42..393e5335e33b 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -158,7 +158,7 @@ static void evict_entry(struct drm_gem_object *obj, size_t size = PAGE_SIZE * n; loff_t off = mmap_offset(obj) + (entry->obj_pgoff << PAGE_SHIFT); - const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE); + const int m = DIV_ROUND_UP(omap_obj->width << fmt, PAGE_SIZE); if (m > 1) { int i; @@ -415,7 +415,7 @@ static int fault_2d(struct drm_gem_object *obj, * into account in some of the math, so figure out virtual stride * in pages */ - const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE); + const int m = DIV_ROUND_UP(omap_obj->width << fmt, PAGE_SIZE); /* We don't use vmf->pgoff since that has the fake offset: */ pgoff = ((unsigned long)vmf->virtual_address - diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index b24f1d3045f0..a629f7c130f0 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -94,18 +94,20 @@ enum ina2xx_ids { ina219, ina226 }; struct ina2xx_config { u16 config_default; - int calibration_factor; + int calibration_value; int registers; int shunt_div; int bus_voltage_shift; int bus_voltage_lsb; /* uV */ - int power_lsb; /* uW */ + int power_lsb_factor; }; struct ina2xx_data { const struct ina2xx_config *config; long rshunt; + long current_lsb_uA; + long power_lsb_uW; struct mutex config_lock; struct regmap *regmap; @@ -115,21 +117,21 @@ struct ina2xx_data { static const struct ina2xx_config ina2xx_config[] = { [ina219] = { .config_default = INA219_CONFIG_DEFAULT, - .calibration_factor = 40960000, + .calibration_value = 4096, .registers = INA219_REGISTERS, .shunt_div = 100, .bus_voltage_shift = 3, .bus_voltage_lsb = 4000, - .power_lsb = 20000, + .power_lsb_factor = 20, }, [ina226] = { .config_default = INA226_CONFIG_DEFAULT, - .calibration_factor = 5120000, + .calibration_value = 2048, .registers = INA226_REGISTERS, .shunt_div = 400, .bus_voltage_shift = 0, .bus_voltage_lsb = 1250, - .power_lsb = 25000, + .power_lsb_factor = 25, }, }; @@ -168,12 +170,16 @@ static u16 ina226_interval_to_reg(int interval) return INA226_SHIFT_AVG(avg_bits); } +/* + * Calibration register is set to the best value, which eliminates + * truncation errors on calculating current register in hardware. + * According to datasheet (eq. 3) the best values are 2048 for + * ina226 and 4096 for ina219. They are hardcoded as calibration_value. + */ static int ina2xx_calibrate(struct ina2xx_data *data) { - u16 val = DIV_ROUND_CLOSEST(data->config->calibration_factor, - data->rshunt); - - return regmap_write(data->regmap, INA2XX_CALIBRATION, val); + return regmap_write(data->regmap, INA2XX_CALIBRATION, + data->config->calibration_value); } /* @@ -186,10 +192,6 @@ static int ina2xx_init(struct ina2xx_data *data) if (ret < 0) return ret; - /* - * Set current LSB to 1mA, shunt is in uOhms - * (equation 13 in datasheet). - */ return ina2xx_calibrate(data); } @@ -267,15 +269,15 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg, val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_POWER: - val = regval * data->config->power_lsb; + val = regval * data->power_lsb_uW; break; case INA2XX_CURRENT: - /* signed register, LSB=1mA (selected), in mA */ - val = (s16)regval; + /* signed register, result in mA */ + val = regval * data->current_lsb_uA; + val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_CALIBRATION: - val = DIV_ROUND_CLOSEST(data->config->calibration_factor, - regval); + val = regval; break; default: /* programmer goofed */ @@ -303,9 +305,32 @@ static ssize_t ina2xx_show_value(struct device *dev, ina2xx_get_value(data, attr->index, regval)); } -static ssize_t ina2xx_set_shunt(struct device *dev, - struct device_attribute *da, - const char *buf, size_t count) +/* + * In order to keep calibration register value fixed, the product + * of current_lsb and shunt_resistor should also be fixed and equal + * to shunt_voltage_lsb = 1 / shunt_div multiplied by 10^9 in order + * to keep the scale. + */ +static int ina2xx_set_shunt(struct ina2xx_data *data, long val) +{ + unsigned int dividend = DIV_ROUND_CLOSEST(1000000000, + data->config->shunt_div); + if (val <= 0 || val > dividend) + return -EINVAL; + + mutex_lock(&data->config_lock); + data->rshunt = val; + data->current_lsb_uA = DIV_ROUND_CLOSEST(dividend, val); + data->power_lsb_uW = data->config->power_lsb_factor * + data->current_lsb_uA; + mutex_unlock(&data->config_lock); + + return 0; +} + +static ssize_t ina2xx_store_shunt(struct device *dev, + struct device_attribute *da, + const char *buf, size_t count) { unsigned long val; int status; @@ -315,18 +340,9 @@ static ssize_t ina2xx_set_shunt(struct device *dev, if (status < 0) return status; - if (val == 0 || - /* Values greater than the calibration factor make no sense. */ - val > data->config->calibration_factor) - return -EINVAL; - - mutex_lock(&data->config_lock); - data->rshunt = val; - status = ina2xx_calibrate(data); - mutex_unlock(&data->config_lock); + status = ina2xx_set_shunt(data, val); if (status < 0) return status; - return count; } @@ -386,7 +402,7 @@ static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL, /* shunt resistance */ static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR, - ina2xx_show_value, ina2xx_set_shunt, + ina2xx_show_value, ina2xx_store_shunt, INA2XX_CALIBRATION); /* update interval (ina226 only) */ @@ -441,10 +457,7 @@ static int ina2xx_probe(struct i2c_client *client, val = INA2XX_RSHUNT_DEFAULT; } - if (val <= 0 || val > data->config->calibration_factor) - return -ENODEV; - - data->rshunt = val; + ina2xx_set_shunt(data, val); ina2xx_regmap_config.max_register = data->config->registers; diff --git a/drivers/iio/adc/hi8435.c b/drivers/iio/adc/hi8435.c index c73c6c62a6ac..7401f102dff4 100644 --- a/drivers/iio/adc/hi8435.c +++ b/drivers/iio/adc/hi8435.c @@ -121,10 +121,21 @@ static int hi8435_write_event_config(struct iio_dev *idev, enum iio_event_direction dir, int state) { struct hi8435_priv *priv = iio_priv(idev); + int ret; + u32 tmp; + + if (state) { + ret = hi8435_readl(priv, HI8435_SO31_0_REG, &tmp); + if (ret < 0) + return ret; + if (tmp & BIT(chan->channel)) + priv->event_prev_val |= BIT(chan->channel); + else + priv->event_prev_val &= ~BIT(chan->channel); - priv->event_scan_mask &= ~BIT(chan->channel); - if (state) priv->event_scan_mask |= BIT(chan->channel); + } else + priv->event_scan_mask &= ~BIT(chan->channel); return 0; } @@ -442,13 +453,15 @@ static int hi8435_probe(struct spi_device *spi) priv->spi = spi; reset_gpio = devm_gpiod_get(&spi->dev, NULL, GPIOD_OUT_LOW); - if (IS_ERR(reset_gpio)) { - /* chip s/w reset if h/w reset failed */ + if (!IS_ERR(reset_gpio)) { + /* need >=100ns low pulse to reset chip */ + gpiod_set_raw_value_cansleep(reset_gpio, 0); + udelay(1); + gpiod_set_raw_value_cansleep(reset_gpio, 1); + } else { + /* s/w reset chip if h/w reset is not available */ hi8435_writeb(priv, HI8435_CTRL_REG, HI8435_CTRL_SRST); hi8435_writeb(priv, HI8435_CTRL_REG, 0); - } else { - udelay(5); - gpiod_set_value(reset_gpio, 1); } spi_set_drvdata(spi, idev); diff --git a/drivers/iio/magnetometer/st_magn_spi.c b/drivers/iio/magnetometer/st_magn_spi.c index 6325e7dc8e03..f3cb4dc05391 100644 --- a/drivers/iio/magnetometer/st_magn_spi.c +++ b/drivers/iio/magnetometer/st_magn_spi.c @@ -48,8 +48,6 @@ static int st_magn_spi_remove(struct spi_device *spi) } static const struct spi_device_id st_magn_id_table[] = { - { LSM303DLHC_MAGN_DEV_NAME }, - { LSM303DLM_MAGN_DEV_NAME }, { LIS3MDL_MAGN_DEV_NAME }, { LSM303AGR_MAGN_DEV_NAME }, {}, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index a73874508c3a..cb3a8623ff54 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2974,12 +2974,8 @@ static void srpt_queue_response(struct se_cmd *cmd) } spin_unlock_irqrestore(&ioctx->spinlock, flags); - if (unlikely(transport_check_aborted_status(&ioctx->cmd, false) - || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) { - atomic_inc(&ch->req_lim_delta); - srpt_abort_cmd(ioctx); + if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) return; - } dir = ioctx->cmd.data_direction; diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index c9d491bc85e0..3851d5715772 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1082,6 +1082,13 @@ static int elan_probe(struct i2c_client *client, return error; } + /* Make sure there is something at this address */ + error = i2c_smbus_read_byte(client); + if (error < 0) { + dev_dbg(&client->dev, "nothing at this address: %d\n", error); + return -ENXIO; + } + /* Initialize the touchpad. */ error = elan_initialize(data); if (error) diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c index a679e56c44cd..765879dcaf85 100644 --- a/drivers/input/mouse/elan_i2c_i2c.c +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -557,7 +557,14 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client, long ret; int error; int len; - u8 buffer[ETP_I2C_INF_LENGTH]; + u8 buffer[ETP_I2C_REPORT_LEN]; + + len = i2c_master_recv(client, buffer, ETP_I2C_REPORT_LEN); + if (len != ETP_I2C_REPORT_LEN) { + error = len < 0 ? len : -EIO; + dev_warn(dev, "failed to read I2C data after FW WDT reset: %d (%d)\n", + error, len); + } reinit_completion(completion); enable_irq(client->irq); diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 51b96e9bf793..06ea28e5d7b4 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1715,6 +1715,17 @@ int elantech_init(struct psmouse *psmouse) etd->samples[0], etd->samples[1], etd->samples[2]); } + if (etd->samples[1] == 0x74 && etd->hw_version == 0x03) { + /* + * This module has a bug which makes absolute mode + * unusable, so let's abort so we'll be using standard + * PS/2 protocol. + */ + psmouse_info(psmouse, + "absolute mode broken, forcing standard PS/2 protocol\n"); + goto init_fail; + } + if (elantech_set_absolute_mode(psmouse)) { psmouse_err(psmouse, "failed to put touchpad into absolute mode.\n"); diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index 9cb4b621fbc3..b92a19a594a1 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -72,7 +72,7 @@ send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb) if (sk->sk_state != MISDN_BOUND) continue; if (!cskb) - cskb = skb_copy(skb, GFP_KERNEL); + cskb = skb_copy(skb, GFP_ATOMIC); if (!cskb) { printk(KERN_WARNING "%s no skb\n", __func__); break; diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index b775e1efecd3..b9f71a87b7e1 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -281,7 +281,7 @@ static int pca955x_probe(struct i2c_client *client, "slave address 0x%02x\n", id->name, chip->bits, client->addr); - if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) return -EIO; if (pdata) { diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 4d46f2ce606f..aa84fcfd59fc 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -514,15 +514,21 @@ struct open_bucket { /* * We keep multiple buckets open for writes, and try to segregate different - * write streams for better cache utilization: first we look for a bucket where - * the last write to it was sequential with the current write, and failing that - * we look for a bucket that was last used by the same task. + * write streams for better cache utilization: first we try to segregate flash + * only volume write streams from cached devices, secondly we look for a bucket + * where the last write to it was sequential with the current write, and + * failing that we look for a bucket that was last used by the same task. * * The ideas is if you've got multiple tasks pulling data into the cache at the * same time, you'll get better cache utilization if you try to segregate their * data and preserve locality. * - * For example, say you've starting Firefox at the same time you're copying a + * For example, dirty sectors of flash only volume is not reclaimable, if their + * dirty sectors mixed with dirty sectors of cached device, such buckets will + * be marked as dirty and won't be reclaimed, though the dirty data of cached + * device have been written back to backend device. + * + * And say you've starting Firefox at the same time you're copying a * bunch of files. Firefox will likely end up being fairly hot and stay in the * cache awhile, but the data you copied might not be; if you wrote all that * data to the same buckets it'd get invalidated at the same time. @@ -539,7 +545,10 @@ static struct open_bucket *pick_data_bucket(struct cache_set *c, struct open_bucket *ret, *ret_task = NULL; list_for_each_entry_reverse(ret, &c->data_buckets, list) - if (!bkey_cmp(&ret->key, search)) + if (UUID_FLASH_ONLY(&c->uuids[KEY_INODE(&ret->key)]) != + UUID_FLASH_ONLY(&c->uuids[KEY_INODE(search)])) + continue; + else if (!bkey_cmp(&ret->key, search)) goto found; else if (ret->last_write_point == write_point) ret_task = ret; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 02257cb19c0b..b9a526271f02 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -890,6 +890,12 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_lock(&bch_register_lock); + cancel_delayed_work_sync(&dc->writeback_rate_update); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) { + kthread_stop(dc->writeback_thread); + dc->writeback_thread = NULL; + } + memset(&dc->sb.set_uuid, 0, 16); SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 494d01d0e92a..a7a561af05c9 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -945,8 +945,10 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev) cmsg.raid_slot = cpu_to_le32(rdev->desc_nr); lock_comm(cinfo); ret = __sendmsg(cinfo, &cmsg); - if (ret) + if (ret) { + unlock_comm(cinfo); return ret; + } cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE; ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX); cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5a334b947a16..9284acea4f7b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -110,8 +110,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash) static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) { int i; - local_irq_disable(); - spin_lock(conf->hash_locks); + spin_lock_irq(conf->hash_locks); for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); spin_lock(&conf->device_lock); @@ -121,9 +120,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) { int i; spin_unlock(&conf->device_lock); - for (i = NR_STRIPE_HASH_LOCKS; i; i--) - spin_unlock(conf->hash_locks + i - 1); - local_irq_enable(); + for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) + spin_unlock(conf->hash_locks + i); + spin_unlock_irq(conf->hash_locks); } /* bio's attached to a stripe+device for I/O are linked together in bi_sector @@ -726,12 +725,11 @@ static bool is_full_stripe_write(struct stripe_head *sh) static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) { - local_irq_disable(); if (sh1 > sh2) { - spin_lock(&sh2->stripe_lock); + spin_lock_irq(&sh2->stripe_lock); spin_lock_nested(&sh1->stripe_lock, 1); } else { - spin_lock(&sh1->stripe_lock); + spin_lock_irq(&sh1->stripe_lock); spin_lock_nested(&sh2->stripe_lock, 1); } } @@ -739,8 +737,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) { spin_unlock(&sh1->stripe_lock); - spin_unlock(&sh2->stripe_lock); - local_irq_enable(); + spin_unlock_irq(&sh2->stripe_lock); } /* Only freshly new full stripe normal write stripe can be added to a batch list */ diff --git a/drivers/media/i2c/cx25840/cx25840-core.c b/drivers/media/i2c/cx25840/cx25840-core.c index fe6eb78b6914..a47ab1947cc4 100644 --- a/drivers/media/i2c/cx25840/cx25840-core.c +++ b/drivers/media/i2c/cx25840/cx25840-core.c @@ -420,11 +420,13 @@ static void cx25840_initialize(struct i2c_client *client) INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); - prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); - queue_work(q, &state->fw_work); - schedule(); - finish_wait(&state->fw_wait, &wait); - destroy_workqueue(q); + if (q) { + prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); + queue_work(q, &state->fw_work); + schedule(); + finish_wait(&state->fw_wait, &wait); + destroy_workqueue(q); + } /* 6. */ cx25840_write(client, 0x115, 0x8c); @@ -631,11 +633,13 @@ static void cx23885_initialize(struct i2c_client *client) INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); - prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); - queue_work(q, &state->fw_work); - schedule(); - finish_wait(&state->fw_wait, &wait); - destroy_workqueue(q); + if (q) { + prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); + queue_work(q, &state->fw_work); + schedule(); + finish_wait(&state->fw_wait, &wait); + destroy_workqueue(q); + } /* Call the cx23888 specific std setup func, we no longer rely on * the generic cx24840 func. @@ -746,11 +750,13 @@ static void cx231xx_initialize(struct i2c_client *client) INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); - prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); - queue_work(q, &state->fw_work); - schedule(); - finish_wait(&state->fw_wait, &wait); - destroy_workqueue(q); + if (q) { + prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); + queue_work(q, &state->fw_work); + schedule(); + finish_wait(&state->fw_wait, &wait); + destroy_workqueue(q); + } cx25840_std_setup(client); diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index f838d9c7ed12..0fba4a2c1602 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -1370,8 +1370,13 @@ static int mceusb_dev_probe(struct usb_interface *intf, goto rc_dev_fail; /* wire up inbound data handler */ - usb_fill_int_urb(ir->urb_in, dev, pipe, ir->buf_in, maxp, - mceusb_dev_recv, ir, ep_in->bInterval); + if (usb_endpoint_xfer_int(ep_in)) + usb_fill_int_urb(ir->urb_in, dev, pipe, ir->buf_in, maxp, + mceusb_dev_recv, ir, ep_in->bInterval); + else + usb_fill_bulk_urb(ir->urb_in, dev, pipe, ir->buf_in, maxp, + mceusb_dev_recv, ir); + ir->urb_in->transfer_dma = ir->dma_in; ir->urb_in->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 3dc9ed2e0774..bb1e19f7ed5a 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -205,6 +205,10 @@ static int __vb2_queue_alloc(struct vb2_queue *q, enum vb2_memory memory, struct vb2_buffer *vb; int ret; + /* Ensure that q->num_buffers+num_buffers is below VB2_MAX_FRAME */ + num_buffers = min_t(unsigned int, num_buffers, + VB2_MAX_FRAME - q->num_buffers); + for (buffer = 0; buffer < num_buffers; ++buffer) { /* Allocate videobuf buffer structures */ vb = kzalloc(q->buf_struct_size, GFP_KERNEL); diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index f42d9c4e4561..cc277f7849b0 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -298,8 +298,11 @@ static void *qp_alloc_queue(u64 size, u32 flags) size_t pas_size; size_t vas_size; size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if); - const u64 num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + u64 num_pages; + if (size > SIZE_MAX - PAGE_SIZE) + return NULL; + num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; if (num_pages > (SIZE_MAX - queue_size) / (sizeof(*queue->kernel_if->u.g.pas) + @@ -624,9 +627,12 @@ static struct vmci_queue *qp_host_alloc_queue(u64 size) { struct vmci_queue *queue; size_t queue_page_size; - const u64 num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + u64 num_pages; const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if)); + if (size > SIZE_MAX - PAGE_SIZE) + return NULL; + num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; if (num_pages > (SIZE_MAX - queue_size) / sizeof(*queue->kernel_if->u.h.page)) return NULL; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index eadccf498589..278d12888cab 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1490,39 +1490,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_close; } - /* If the mode uses primary, then the following is handled by - * bond_change_active_slave(). - */ - if (!bond_uses_primary(bond)) { - /* set promiscuity level to new slave */ - if (bond_dev->flags & IFF_PROMISC) { - res = dev_set_promiscuity(slave_dev, 1); - if (res) - goto err_close; - } - - /* set allmulti level to new slave */ - if (bond_dev->flags & IFF_ALLMULTI) { - res = dev_set_allmulti(slave_dev, 1); - if (res) - goto err_close; - } - - netif_addr_lock_bh(bond_dev); - - dev_mc_sync_multiple(slave_dev, bond_dev); - dev_uc_sync_multiple(slave_dev, bond_dev); - - netif_addr_unlock_bh(bond_dev); - } - - if (BOND_MODE(bond) == BOND_MODE_8023AD) { - /* add lacpdu mc addr to mc list */ - u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; - - dev_mc_add(slave_dev, lacpdu_multicast); - } - res = vlan_vids_add_by_dev(slave_dev, bond_dev); if (res) { netdev_err(bond_dev, "Couldn't add bond vlan ids to %s\n", @@ -1679,6 +1646,40 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_upper_unlink; } + /* If the mode uses primary, then the following is handled by + * bond_change_active_slave(). + */ + if (!bond_uses_primary(bond)) { + /* set promiscuity level to new slave */ + if (bond_dev->flags & IFF_PROMISC) { + res = dev_set_promiscuity(slave_dev, 1); + if (res) + goto err_sysfs_del; + } + + /* set allmulti level to new slave */ + if (bond_dev->flags & IFF_ALLMULTI) { + res = dev_set_allmulti(slave_dev, 1); + if (res) { + if (bond_dev->flags & IFF_PROMISC) + dev_set_promiscuity(slave_dev, -1); + goto err_sysfs_del; + } + } + + netif_addr_lock_bh(bond_dev); + dev_mc_sync_multiple(slave_dev, bond_dev); + dev_uc_sync_multiple(slave_dev, bond_dev); + netif_addr_unlock_bh(bond_dev); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + /* add lacpdu mc addr to mc list */ + u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; + + dev_mc_add(slave_dev, lacpdu_multicast); + } + } + bond->slave_cnt++; bond_compute_features(bond); bond_set_carrier(bond); @@ -1702,6 +1703,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) return 0; /* Undo stages on error */ +err_sysfs_del: + bond_sysfs_slave_del(new_slave); + err_upper_unlink: bond_upper_dev_unlink(bond_dev, slave_dev); @@ -1709,9 +1713,6 @@ err_unregister: netdev_rx_handler_unregister(slave_dev); err_detach: - if (!bond_uses_primary(bond)) - bond_hw_addr_flush(bond_dev, slave_dev); - vlan_vids_del_by_dev(slave_dev, bond_dev); if (rcu_access_pointer(bond->primary_slave) == new_slave) RCU_INIT_POINTER(bond->primary_slave, NULL); @@ -2555,11 +2556,13 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); + slave->new_link = BOND_LINK_NOCHANGE; + if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, trans_start, 1) && bond_time_in_interval(bond, slave->last_rx, 1)) { - slave->link = BOND_LINK_UP; + slave->new_link = BOND_LINK_UP; slave_state_changed = 1; /* primary_slave has no meaning in round-robin @@ -2586,7 +2589,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) if (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, slave->last_rx, 2)) { - slave->link = BOND_LINK_DOWN; + slave->new_link = BOND_LINK_DOWN; slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) @@ -2617,6 +2620,11 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) if (!rtnl_trylock()) goto re_arm; + bond_for_each_slave(bond, slave, iter) { + if (slave->new_link != BOND_LINK_NOCHANGE) + slave->link = slave->new_link; + } + if (slave_state_changed) { bond_slave_state_change(bond); if (BOND_MODE(bond) == BOND_MODE_XOR) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index d1103d612d8b..949a82458a29 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3943,15 +3943,26 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) /* when transmitting in a vf, start bd must hold the ethertype * for fw to enforce it */ + u16 vlan_tci = 0; #ifndef BNX2X_STOP_ON_ERROR - if (IS_VF(bp)) + if (IS_VF(bp)) { #endif - tx_start_bd->vlan_or_ethertype = - cpu_to_le16(ntohs(eth->h_proto)); + /* Still need to consider inband vlan for enforced */ + if (__vlan_get_tag(skb, &vlan_tci)) { + tx_start_bd->vlan_or_ethertype = + cpu_to_le16(ntohs(eth->h_proto)); + } else { + tx_start_bd->bd_flags.as_bitfield |= + (X_ETH_INBAND_VLAN << + ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); + tx_start_bd->vlan_or_ethertype = + cpu_to_le16(vlan_tci); + } #ifndef BNX2X_STOP_ON_ERROR - else + } else { /* used by FW for packet accounting */ tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod); + } #endif } diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 0f6811860ad5..a36e38676640 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -2845,7 +2845,7 @@ bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver) static void bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc, char *manufacturer) { - memcpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); + strncpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); } static void diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index cf61a5869c6e..de23f23b41de 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -6076,13 +6076,18 @@ int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, if (!t4_fw_matches_chip(adap, fw_hdr)) return -EINVAL; + /* Disable FW_OK flag so that mbox commands with FW_OK flag set + * wont be sent when we are flashing FW. + */ + adap->flags &= ~FW_OK; + ret = t4_fw_halt(adap, mbox, force); if (ret < 0 && !force) - return ret; + goto out; ret = t4_load_fw(adap, fw_data, size); if (ret < 0) - return ret; + goto out; /* * Older versions of the firmware don't understand the new @@ -6093,7 +6098,17 @@ int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, * its header flags to see if it advertises the capability. */ reset = ((be32_to_cpu(fw_hdr->flags) & FW_HDR_FLAGS_RESET_HALT) == 0); - return t4_fw_restart(adap, mbox, reset); + ret = t4_fw_restart(adap, mbox, reset); + + /* Grab potentially new Firmware Device Log parameters so we can see + * how healthy the new Firmware is. It's okay to contact the new + * Firmware for these parameters even though, as far as it's + * concerned, we've never said "HELLO" to it ... + */ + (void)t4_init_devlog_params(adap); +out: + adap->flags |= FW_OK; + return ret; } /** @@ -7696,7 +7711,16 @@ int t4_cim_read_la(struct adapter *adap, u32 *la_buf, unsigned int *wrptr) ret = t4_cim_read(adap, UP_UP_DBG_LA_DATA_A, 1, &la_buf[i]); if (ret) break; - idx = (idx + 1) & UPDBGLARDPTR_M; + + /* Bits 0-3 of UpDbgLaRdPtr can be between 0000 to 1001 to + * identify the 32-bit portion of the full 312-bit data + */ + if (is_t6(adap->params.chip) && (idx & 0xf) >= 9) + idx = (idx & 0xff0) + 0x10; + else + idx++; + /* address can't exceed 0xfff */ + idx &= UPDBGLARDPTR_M; } restart: if (cfg & UPDBGLAEN_F) { diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index fa3786a9d30e..ec8ffd7eae33 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2604,8 +2604,8 @@ void t4vf_sge_stop(struct adapter *adapter) int t4vf_sge_init(struct adapter *adapter) { struct sge_params *sge_params = &adapter->params.sge; - u32 fl0 = sge_params->sge_fl_buffer_size[0]; - u32 fl1 = sge_params->sge_fl_buffer_size[1]; + u32 fl_small_pg = sge_params->sge_fl_buffer_size[0]; + u32 fl_large_pg = sge_params->sge_fl_buffer_size[1]; struct sge *s = &adapter->sge; unsigned int ingpadboundary, ingpackboundary; @@ -2614,9 +2614,20 @@ int t4vf_sge_init(struct adapter *adapter) * the Physical Function Driver. Ideally we should be able to deal * with _any_ configuration. Practice is different ... */ - if (fl0 != PAGE_SIZE || (fl1 != 0 && fl1 <= fl0)) { + + /* We only bother using the Large Page logic if the Large Page Buffer + * is larger than our Page Size Buffer. + */ + if (fl_large_pg <= fl_small_pg) + fl_large_pg = 0; + + /* The Page Size Buffer must be exactly equal to our Page Size and the + * Large Page Size Buffer should be 0 (per above) or a power of 2. + */ + if (fl_small_pg != PAGE_SIZE || + (fl_large_pg & (fl_large_pg - 1)) != 0) { dev_err(adapter->pdev_dev, "bad SGE FL buffer sizes [%d, %d]\n", - fl0, fl1); + fl_small_pg, fl_large_pg); return -EINVAL; } if ((sge_params->sge_control & RXPKTCPLMODE_F) == 0) { @@ -2627,8 +2638,8 @@ int t4vf_sge_init(struct adapter *adapter) /* * Now translate the adapter parameters into our internal forms. */ - if (fl1) - s->fl_pg_order = ilog2(fl1) - PAGE_SHIFT; + if (fl_large_pg) + s->fl_pg_order = ilog2(fl_large_pg) - PAGE_SHIFT; s->stat_len = ((sge_params->sge_control & EGRSTATUSPAGESIZE_F) ? 128 : 64); s->pktshift = PKTSHIFT_G(sge_params->sge_control); diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index 40071dad1c57..9c76f1a2f57b 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -382,7 +382,7 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev) { const struct of_device_id *id = of_match_device(fsl_pq_mdio_match, &pdev->dev); - const struct fsl_pq_mdio_data *data = id->data; + const struct fsl_pq_mdio_data *data; struct device_node *np = pdev->dev.of_node; struct resource res; struct device_node *tbi; @@ -390,6 +390,13 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev) struct mii_bus *new_bus; int err; + if (!id) { + dev_err(&pdev->dev, "Failed to match device\n"); + return -ENODEV; + } + + data = id->data; + dev_dbg(&pdev->dev, "found %s compatible node\n", id->compatible); new_bus = mdiobus_alloc_size(sizeof(*priv)); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 5d7db6c01c46..f301c03c527b 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -342,6 +342,7 @@ static int emac_reset(struct emac_instance *dev) { struct emac_regs __iomem *p = dev->emacp; int n = 20; + bool __maybe_unused try_internal_clock = false; DBG(dev, "reset" NL); @@ -354,6 +355,7 @@ static int emac_reset(struct emac_instance *dev) } #ifdef CONFIG_PPC_DCR_NATIVE +do_retry: /* * PPC460EX/GT Embedded Processor Advanced User's Manual * section 28.10.1 Mode Register 0 (EMACx_MR0) states: @@ -361,10 +363,19 @@ static int emac_reset(struct emac_instance *dev) * of the EMAC. If none is present, select the internal clock * (SDR0_ETH_CFG[EMACx_PHY_CLK] = 1). * After a soft reset, select the external clock. + * + * The AR8035-A PHY Meraki MR24 does not provide a TX Clk if the + * ethernet cable is not attached. This causes the reset to timeout + * and the PHY detection code in emac_init_phy() is unable to + * communicate and detect the AR8035-A PHY. As a result, the emac + * driver bails out early and the user has no ethernet. + * In order to stay compatible with existing configurations, the + * driver will temporarily switch to the internal clock, after + * the first reset fails. */ if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) { - if (dev->phy_address == 0xffffffff && - dev->phy_map == 0xffffffff) { + if (try_internal_clock || (dev->phy_address == 0xffffffff && + dev->phy_map == 0xffffffff)) { /* No PHY: select internal loop clock before reset */ dcri_clrset(SDR0, SDR0_ETH_CFG, 0, SDR0_ETH_CFG_ECS << dev->cell_index); @@ -382,8 +393,15 @@ static int emac_reset(struct emac_instance *dev) #ifdef CONFIG_PPC_DCR_NATIVE if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) { - if (dev->phy_address == 0xffffffff && - dev->phy_map == 0xffffffff) { + if (!n && !try_internal_clock) { + /* first attempt has timed out. */ + n = 20; + try_internal_clock = true; + goto do_retry; + } + + if (try_internal_clock || (dev->phy_address == 0xffffffff && + dev->phy_map == 0xffffffff)) { /* No PHY: restore external clock source after reset */ dcri_clrset(SDR0, SDR0_ETH_CFG, SDR0_ETH_CFG_ECS << dev->cell_index, 0); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index e356e9187e84..20d8806d2bff 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1182,6 +1182,7 @@ static void e1000e_tx_hwtstamp_work(struct work_struct *work) struct e1000_hw *hw = &adapter->hw; if (er32(TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID) { + struct sk_buff *skb = adapter->tx_hwtstamp_skb; struct skb_shared_hwtstamps shhwtstamps; u64 txstmp; @@ -1190,9 +1191,14 @@ static void e1000e_tx_hwtstamp_work(struct work_struct *work) e1000e_systim_to_hwtstamp(adapter, &shhwtstamps, txstmp); - skb_tstamp_tx(adapter->tx_hwtstamp_skb, &shhwtstamps); - dev_kfree_skb_any(adapter->tx_hwtstamp_skb); + /* Clear the global tx_hwtstamp_skb pointer and force writes + * prior to notifying the stack of a Tx timestamp. + */ adapter->tx_hwtstamp_skb = NULL; + wmb(); /* force write prior to skb_tstamp_tx */ + + skb_tstamp_tx(skb, &shhwtstamps); + dev_kfree_skb_any(skb); } else if (time_after(jiffies, adapter->tx_hwtstamp_start + adapter->tx_timeout_factor * HZ)) { dev_kfree_skb_any(adapter->tx_hwtstamp_skb); @@ -6589,12 +6595,17 @@ static int e1000e_pm_thaw(struct device *dev) static int e1000e_pm_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); + int rc; e1000e_flush_lpic(pdev); e1000e_pm_freeze(dev); - return __e1000_shutdown(pdev, false); + rc = __e1000_shutdown(pdev, false); + if (rc) + e1000e_pm_thaw(dev); + + return rc; } static int e1000e_pm_resume(struct device *dev) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 4b62aa1f9ff8..6e5065f0907b 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5079,7 +5079,7 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&hw->restart_work, sky2_restart); pci_set_drvdata(pdev, hw); - pdev->d3_delay = 150; + pdev->d3_delay = 200; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 1d4e2e054647..897d061e4f03 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -35,6 +35,7 @@ #include #include +#include #include #include "mlx4.h" @@ -985,16 +986,21 @@ int mlx4_flow_attach(struct mlx4_dev *dev, if (IS_ERR(mailbox)) return PTR_ERR(mailbox); + if (!mlx4_qp_lookup(dev, rule->qpn)) { + mlx4_err_rule(dev, "QP doesn't exist\n", rule); + ret = -EINVAL; + goto out; + } + trans_rule_ctrl_to_hw(rule, mailbox->buf); size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); list_for_each_entry(cur, &rule->list, list) { ret = parse_trans_rule(dev, cur, mailbox->buf + size); - if (ret < 0) { - mlx4_free_cmd_mailbox(dev, mailbox); - return ret; - } + if (ret < 0) + goto out; + size += ret; } @@ -1021,6 +1027,7 @@ int mlx4_flow_attach(struct mlx4_dev *dev, } } +out: mlx4_free_cmd_mailbox(dev, mailbox); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index d8359ffba026..62f1a3433a62 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -381,6 +381,19 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) __mlx4_qp_free_icm(dev, qpn); } +struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + struct mlx4_qp *qp; + + spin_lock(&qp_table->lock); + + qp = __mlx4_qp_lookup(dev, qpn); + + spin_unlock(&qp_table->lock); + return qp; +} + int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -468,6 +481,12 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, } if (attr & MLX4_UPDATE_QP_QOS_VPORT) { + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) { + mlx4_warn(dev, "Granular QoS per VF is not enabled\n"); + err = -EOPNOTSUPP; + goto out; + } + qp_mask |= 1ULL << MLX4_UPD_QP_MASK_QOS_VPP; cmd->qp_context.qos_vport = params->qos_vport; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index d1fc7fa87b05..e3080fbd9d00 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -5040,6 +5040,13 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); } +static void update_qos_vpp(struct mlx4_update_qp_context *ctx, + struct mlx4_vf_immed_vlan_work *work) +{ + ctx->qp_mask |= cpu_to_be64(1ULL << MLX4_UPD_QP_MASK_QOS_VPP); + ctx->qp_context.qos_vport = work->qos_vport; +} + void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) { struct mlx4_vf_immed_vlan_work *work = @@ -5144,11 +5151,10 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) qp->sched_queue & 0xC7; upd_context->qp_context.pri_path.sched_queue |= ((work->qos & 0x7) << 3); - upd_context->qp_mask |= - cpu_to_be64(1ULL << - MLX4_UPD_QP_MASK_QOS_VPP); - upd_context->qp_context.qos_vport = - work->qos_vport; + + if (dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_QOS_VPP) + update_qos_vpp(upd_context, work); } err = mlx4_cmd(dev, mailbox->dma, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f5c1f4acc57b..7c42be586be8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -513,7 +513,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -523,18 +522,11 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), priv->irq_info[i].mask); - err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); - if (err) { - mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x", - irq); - goto err_clear_mask; - } + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, priv->irq_info[i].mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); return 0; - -err_clear_mask: - free_cpumask_var(priv->irq_info[i].mask); - return err; } static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c index b8d5270359cd..e30676515529 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c @@ -247,7 +247,7 @@ nx_fw_cmd_set_mtu(struct netxen_adapter *adapter, int mtu) cmd.req.arg3 = 0; if (recv_ctx->state == NX_HOST_CTX_STATE_ACTIVE) - netxen_issue_cmd(adapter, &cmd); + rcode = netxen_issue_cmd(adapter, &cmd); if (rcode != NX_RCODE_SUCCESS) return -EIO; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 509b596cf1e8..bd1ec70fb736 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -341,7 +341,7 @@ qlcnic_pcie_sem_lock(struct qlcnic_adapter *adapter, int sem, u32 id_reg) } return -EIO; } - usleep_range(1000, 1500); + udelay(1200); } if (id_reg) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c index be258d90de9e..e3223f2fe2ff 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c @@ -765,7 +765,7 @@ int ql_core_dump(struct ql_adapter *qdev, struct ql_mpi_coredump *mpi_coredump) sizeof(struct mpi_coredump_global_header); mpi_coredump->mpi_global_header.imageSize = sizeof(struct ql_mpi_coredump); - memcpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump", + strncpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump", sizeof(mpi_coredump->mpi_global_header.idString)); /* Get generic NIC reg dump */ @@ -1255,7 +1255,7 @@ static void ql_gen_reg_dump(struct ql_adapter *qdev, sizeof(struct mpi_coredump_global_header); mpi_coredump->mpi_global_header.imageSize = sizeof(struct ql_reg_dump); - memcpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump", + strncpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump", sizeof(mpi_coredump->mpi_global_header.idString)); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 1ef03939d25f..c90ae4d4be7d 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -296,8 +296,9 @@ qcaspi_receive(struct qcaspi *qca) /* Allocate rx SKB if we don't have one available. */ if (!qca->rx_skb) { - qca->rx_skb = netdev_alloc_skb(net_dev, - net_dev->mtu + VLAN_ETH_HLEN); + qca->rx_skb = netdev_alloc_skb_ip_align(net_dev, + net_dev->mtu + + VLAN_ETH_HLEN); if (!qca->rx_skb) { netdev_dbg(net_dev, "out of RX resources\n"); qca->stats.out_of_mem++; @@ -377,7 +378,7 @@ qcaspi_receive(struct qcaspi *qca) qca->rx_skb, qca->rx_skb->dev); qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY; netif_rx_ni(qca->rx_skb); - qca->rx_skb = netdev_alloc_skb(net_dev, + qca->rx_skb = netdev_alloc_skb_ip_align(net_dev, net_dev->mtu + VLAN_ETH_HLEN); if (!qca->rx_skb) { netdev_dbg(net_dev, "out of RX resources\n"); @@ -759,7 +760,8 @@ qcaspi_netdev_init(struct net_device *dev) if (!qca->rx_buffer) return -ENOBUFS; - qca->rx_skb = netdev_alloc_skb(dev, qca->net_dev->mtu + VLAN_ETH_HLEN); + qca->rx_skb = netdev_alloc_skb_ip_align(dev, qca->net_dev->mtu + + VLAN_ETH_HLEN); if (!qca->rx_skb) { kfree(qca->rx_buffer); netdev_info(qca->net_dev, "Failed to allocate RX sk_buff.\n"); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 3783c40f568b..a82c89af7124 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -8411,12 +8411,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_msi_4; } + pci_set_drvdata(pdev, dev); + rc = register_netdev(dev); if (rc < 0) goto err_out_cnt_5; - pci_set_drvdata(pdev, dev); - netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n", rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr, (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 424d1dee55c9..afaf79b8761f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3222,7 +3222,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* MDIO bus init */ ret = sh_mdio_init(mdp, pd); if (ret) { - dev_err(&ndev->dev, "failed to initialise MDIO\n"); + dev_err(&pdev->dev, "failed to initialise MDIO\n"); goto out_release; } diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 435466c17852..c69b0bdd891d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -280,6 +280,10 @@ struct cpsw_ss_regs { /* Bit definitions for the CPSW1_TS_SEQ_LTYPE register */ #define CPSW_V1_SEQ_ID_OFS_SHIFT 16 +#define CPSW_MAX_BLKS_TX 15 +#define CPSW_MAX_BLKS_TX_SHIFT 4 +#define CPSW_MAX_BLKS_RX 5 + struct cpsw_host_regs { u32 max_blks; u32 blk_cnt; @@ -1127,11 +1131,23 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) switch (priv->version) { case CPSW_VERSION_1: slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP); + /* Increase RX FIFO size to 5 for supporting fullduplex + * flow control mode + */ + slave_write(slave, + (CPSW_MAX_BLKS_TX << CPSW_MAX_BLKS_TX_SHIFT) | + CPSW_MAX_BLKS_RX, CPSW1_MAX_BLKS); break; case CPSW_VERSION_2: case CPSW_VERSION_3: case CPSW_VERSION_4: slave_write(slave, TX_PRIORITY_MAPPING, CPSW2_TX_PRI_MAP); + /* Increase RX FIFO size to 5 for supporting fullduplex + * flow control mode + */ + slave_write(slave, + (CPSW_MAX_BLKS_TX << CPSW_MAX_BLKS_TX_SHIFT) | + CPSW_MAX_BLKS_RX, CPSW2_MAX_BLKS); break; } diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index 49fe59b180a8..a75ce9051a7f 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -574,6 +574,8 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case HDLCDRVCTL_CALIBRATE: if(!capable(CAP_SYS_RAWIO)) return -EPERM; + if (s->par.bitrate <= 0) + return -EINVAL; if (bi.data.calibrate > INT_MAX / s->par.bitrate) return -EINVAL; s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 7d0690433ee0..7d2cf015c5e7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -148,6 +148,12 @@ static inline int phy_aneg_done(struct phy_device *phydev) if (phydev->drv->aneg_done) return phydev->drv->aneg_done(phydev); + /* Avoid genphy_aneg_done() if the Clause 45 PHY does not + * implement Clause 22 registers + */ + if (phydev->is_c45 && !(phydev->c45_ids.devices_in_package & BIT(0))) + return -EINVAL; + return genphy_aneg_done(phydev); } diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index f7e8c79349ad..12a627fcc02c 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -501,7 +501,6 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.mtu = dst_mtu(&rt->dst); if (!po->chan.mtu) po->chan.mtu = PPP_MRU; - ip_rt_put(rt); po->chan.mtu -= PPTP_HEADER_OVERHEAD; po->chan.hdrlen = 2 + sizeof(struct pptp_gre_header); diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 72cb30828a12..c8e98c8e29fa 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1069,6 +1069,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) u16 n = 0, index, ndplen; u8 ready2send = 0; u32 delayed_ndp_size; + size_t padding_count; /* When our NDP gets written in cdc_ncm_ndp(), then skb_out->len gets updated * accordingly. Otherwise, we should check here. @@ -1225,11 +1226,13 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) * a ZLP after full sized NTBs. */ if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && - skb_out->len > ctx->min_tx_pkt) - memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0, - ctx->tx_max - skb_out->len); - else if (skb_out->len < ctx->tx_max && (skb_out->len % dev->maxpacket) == 0) + skb_out->len > ctx->min_tx_pkt) { + padding_count = ctx->tx_max - skb_out->len; + memset(skb_put(skb_out, padding_count), 0, padding_count); + } else if (skb_out->len < ctx->tx_max && + (skb_out->len % dev->maxpacket) == 0) { *skb_put(skb_out, 1) = 0; /* force short packet */ + } /* set final frame length */ nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8dfc75250583..d01285250204 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -556,7 +556,12 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, hdr = skb_vnet_hdr(skb); sg_init_table(rq->sg, 2); sg_set_buf(rq->sg, hdr, vi->hdr_len); - skb_to_sgvec(skb, rq->sg + 1, 0, skb->len); + + err = skb_to_sgvec(skb, rq->sg + 1, 0, skb->len); + if (unlikely(err < 0)) { + dev_kfree_skb(skb); + return err; + } err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp); if (err < 0) @@ -858,7 +863,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) struct virtio_net_hdr_mrg_rxbuf *hdr; const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; struct virtnet_info *vi = sq->vq->vdev->priv; - unsigned num_sg; + int num_sg; unsigned hdr_len = vi->hdr_len; bool can_push; @@ -911,11 +916,16 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) if (can_push) { __skb_push(skb, hdr_len); num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); + if (unlikely(num_sg < 0)) + return num_sg; /* Pull header back to avoid skew in tx bytes calculations. */ __skb_pull(skb, hdr_len); } else { sg_set_buf(sq->sg, hdr, hdr_len); - num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1; + num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); + if (unlikely(num_sg < 0)) + return num_sg; + num_sg++; } return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 82bf85ae5d08..419c045d0752 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2789,6 +2789,11 @@ vmxnet3_force_close(struct vmxnet3_adapter *adapter) /* we need to enable NAPI, otherwise dev_close will deadlock */ for (i = 0; i < adapter->num_rx_queues; i++) napi_enable(&adapter->rx_queue[i].napi); + /* + * Need to clear the quiesce bit to ensure that vmxnet3_close + * can quiesce the device properly + */ + clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); dev_close(adapter->netdev); } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ac945f8781ac..d3d59122a357 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -550,13 +550,15 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); - if (!IS_ERR(neigh)) + if (!IS_ERR(neigh)) { ret = dst_neigh_output(dst, neigh, skb); + rcu_read_unlock_bh(); + return ret; + } rcu_read_unlock_bh(); err: - if (unlikely(ret < 0)) - vrf_tx_error(skb->dev, skb); + vrf_tx_error(skb->dev, skb); return ret; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e4ff1e45c02e..c41378214ede 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -962,7 +962,7 @@ static bool vxlan_snoop(struct net_device *dev, return false; /* Don't migrate static entries, drop packets */ - if (f->state & NUD_NOARP) + if (f->state & (NUD_PERMANENT | NUD_NOARP)) return true; if (net_ratelimit()) diff --git a/drivers/net/wireless/ath/ath5k/debug.c b/drivers/net/wireless/ath/ath5k/debug.c index 654a1e33f827..7c5f189cace7 100644 --- a/drivers/net/wireless/ath/ath5k/debug.c +++ b/drivers/net/wireless/ath/ath5k/debug.c @@ -939,7 +939,10 @@ static int open_file_eeprom(struct inode *inode, struct file *file) } for (i = 0; i < eesize; ++i) { - AR5K_EEPROM_READ(i, val); + if (!ath5k_hw_nvram_read(ah, i, &val)) { + ret = -EIO; + goto freebuf; + } buf[i] = val; } diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 0881ba8535f4..c78abfc7bd96 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -247,7 +247,10 @@ static const UCHAR b4_default_startup_parms[] = { 0x04, 0x08, /* Noise gain, limit offset */ 0x28, 0x28, /* det rssi, med busy offsets */ 7, /* det sync thresh */ - 0, 2, 2 /* test mode, min, max */ + 0, 2, 2, /* test mode, min, max */ + 0, /* rx/tx delay */ + 0, 0, 0, 0, 0, 0, /* current BSS id */ + 0 /* hop set */ }; /*===========================================================================*/ @@ -598,7 +601,7 @@ static void init_startup_params(ray_dev_t *local) * a_beacon_period = hops a_beacon_period = KuS *//* 64ms = 010000 */ if (local->fw_ver == 0x55) { - memcpy((UCHAR *) &local->sparm.b4, b4_default_startup_parms, + memcpy(&local->sparm.b4, b4_default_startup_parms, sizeof(struct b4_startup_params)); /* Translate sane kus input values to old build 4/5 format */ /* i = hop time in uS truncated to 3 bytes */ diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 9bee3f11898a..869411f55d88 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -1196,8 +1196,7 @@ static void wl1251_op_bss_info_changed(struct ieee80211_hw *hw, WARN_ON(wl->bss_type != BSS_TYPE_STA_BSS); enable = bss_conf->arp_addr_cnt == 1 && bss_conf->assoc; - wl1251_acx_arp_ip_filter(wl, enable, addr); - + ret = wl1251_acx_arp_ip_filter(wl, enable, addr); if (ret < 0) goto out_sleep; } diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 84419af16f77..fd12ccc11e26 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -538,6 +538,7 @@ struct powercap_zone *powercap_register_zone( power_zone->id = result; idr_init(&power_zone->idr); + result = -ENOMEM; power_zone->name = kstrdup(name, GFP_KERNEL); if (!power_zone->name) goto err_name_alloc; diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 853976bd3d36..9473715725df 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -217,6 +217,13 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) missing = year; } + /* Can't proceed if alarm is still invalid after replacing + * missing fields. + */ + err = rtc_valid_tm(&alarm->time); + if (err) + goto done; + /* with luck, no rollover is needed */ t_now = rtc_tm_to_time64(&now); t_alm = rtc_tm_to_time64(&alarm->time); @@ -268,9 +275,9 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) dev_warn(&rtc->dev, "alarm rollover not handled\n"); } -done: err = rtc_valid_tm(&alarm->time); +done: if (err) { dev_warn(&rtc->dev, "invalid alarm value: %d-%d-%d %d:%d:%d\n", alarm->time.tm_year + 1900, alarm->time.tm_mon + 1, diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c index 229dd2fe8f45..c6b0c7ed7a30 100644 --- a/drivers/rtc/rtc-opal.c +++ b/drivers/rtc/rtc-opal.c @@ -150,6 +150,16 @@ static int opal_get_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = ((u64)be32_to_cpu(__h_m) << 32); + + /* check if no alarm is set */ + if (y_m_d == 0 && h_m_s_ms == 0) { + pr_debug("No alarm is set\n"); + rc = -ENOENT; + goto exit; + } else { + pr_debug("Alarm set to %x %llx\n", y_m_d, h_m_s_ms); + } + opal_to_tm(y_m_d, h_m_s_ms, &alarm->time); exit: diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index 950c5d0b6dca..afab89f5be48 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -257,7 +257,7 @@ static int snvs_rtc_probe(struct platform_device *pdev) of_property_read_u32(pdev->dev.of_node, "offset", &data->offset); } - if (!data->regmap) { + if (IS_ERR(data->regmap)) { dev_err(&pdev->dev, "Can't find snvs syscon\n"); return -ENODEV; } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index e7a6f1222642..b76a85d14ef0 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1881,8 +1881,12 @@ static int __dasd_device_is_unusable(struct dasd_device *device, { int mask = ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM); - if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) { - /* dasd is being set offline. */ + if (test_bit(DASD_FLAG_OFFLINE, &device->flags) && + !test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { + /* + * dasd is being set offline + * but it is no safe offline where we have to allow I/O + */ return 1; } if (device->stopped) { diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index 499e369eabf0..8bc1625337f6 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -191,6 +191,7 @@ struct bnx2fc_hba { struct bnx2fc_cmd_mgr *cmd_mgr; spinlock_t hba_lock; struct mutex hba_mutex; + struct mutex hba_stats_mutex; unsigned long adapter_state; #define ADAPTER_STATE_UP 0 #define ADAPTER_STATE_GOING_DOWN 1 diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 67405c628864..d0b227ffbd5f 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -641,15 +641,17 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) if (!fw_stats) return NULL; + mutex_lock(&hba->hba_stats_mutex); + bnx2fc_stats = fc_get_host_stats(shost); init_completion(&hba->stat_req_done); if (bnx2fc_send_stat_req(hba)) - return bnx2fc_stats; + goto unlock_stats_mutex; rc = wait_for_completion_timeout(&hba->stat_req_done, (2 * HZ)); if (!rc) { BNX2FC_HBA_DBG(lport, "FW stat req timed out\n"); - return bnx2fc_stats; + goto unlock_stats_mutex; } BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt); bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt; @@ -671,6 +673,9 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) memcpy(&hba->prev_stats, hba->stats_buffer, sizeof(struct fcoe_statistics_params)); + +unlock_stats_mutex: + mutex_unlock(&hba->hba_stats_mutex); return bnx2fc_stats; } @@ -1302,6 +1307,7 @@ static struct bnx2fc_hba *bnx2fc_hba_create(struct cnic_dev *cnic) } spin_lock_init(&hba->hba_lock); mutex_init(&hba->hba_mutex); + mutex_init(&hba->hba_stats_mutex); hba->cnic = cnic; diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 622bdabc8894..dab195f04da7 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -1769,7 +1769,6 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param) goto bye; } - mempool_free(mbp, hw->mb_mempool); if (finicsum != cfcsum) { csio_warn(hw, "Config File checksum mismatch: csum=%#x, computed=%#x\n", @@ -1780,6 +1779,10 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param) rv = csio_hw_validate_caps(hw, mbp); if (rv != 0) goto bye; + + mempool_free(mbp, hw->mb_mempool); + mbp = NULL; + /* * Note that we're operating with parameters * not supplied by the driver, rather than from hard-wired diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index efce04df2109..9f0b00c38658 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1695,6 +1695,15 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) */ switch (session->state) { case ISCSI_STATE_FAILED: + /* + * cmds should fail during shutdown, if the session + * state is bad, allowing completion to happen + */ + if (unlikely(system_state != SYSTEM_RUNNING)) { + reason = FAILURE_SESSION_FAILED; + sc->result = DID_NO_CONNECT << 16; + break; + } case ISCSI_STATE_IN_RECOVERY: reason = FAILURE_SESSION_IN_RECOVERY; sc->result = DID_IMM_RETRY << 16; @@ -1979,6 +1988,19 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) } if (session->state != ISCSI_STATE_LOGGED_IN) { + /* + * During shutdown, if session is prematurely disconnected, + * recovery won't happen and there will be hung cmds. Not + * handling cmds would trigger EH, also bad in this case. + * Instead, handle cmd, allow completion to happen and let + * upper layer to deal with the result. + */ + if (unlikely(system_state != SYSTEM_RUNNING)) { + sc->result = DID_NO_CONNECT << 16; + ISCSI_DBG_EH(session, "sc on shutdown, handled\n"); + rc = BLK_EH_HANDLED; + goto done; + } /* * We are probably in the middle of iscsi recovery so let * that complete and handle the error. @@ -2083,7 +2105,7 @@ done: task->last_timeout = jiffies; spin_unlock(&session->frwd_lock); ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ? - "timer reset" : "nh"); + "timer reset" : "shutdown or nh"); return rc; } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 022bb6e10d98..12886f96b286 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -282,6 +282,7 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, void *rsp) phy->phy->minimum_linkrate = dr->pmin_linkrate; phy->phy->maximum_linkrate = dr->pmax_linkrate; phy->phy->negotiated_linkrate = phy->linkrate; + phy->phy->enabled = (phy->linkrate != SAS_PHY_DISABLED); skip: if (new_phy) @@ -675,7 +676,7 @@ int sas_smp_get_phy_events(struct sas_phy *phy) res = smp_execute_task(dev, req, RPEL_REQ_SIZE, resp, RPEL_RESP_SIZE); - if (!res) + if (res) goto out; phy->invalid_dword_count = scsi_to_u32(&resp[12]); @@ -684,6 +685,7 @@ int sas_smp_get_phy_events(struct sas_phy *phy) phy->phy_reset_problem_count = scsi_to_u32(&resp[24]); out: + kfree(req); kfree(resp); return res; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index e111c3d8c5d6..b868ef3b2ca3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3886,19 +3886,6 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) return 0; } - /* - * Bug work around for firmware SATL handling. The loop - * is based on atomic operations and ensures consistency - * since we're lockless at this point - */ - do { - if (test_bit(0, &sas_device_priv_data->ata_command_pending)) { - scmd->result = SAM_STAT_BUSY; - scmd->scsi_done(scmd); - return 0; - } - } while (_scsih_set_satl_pending(scmd, true)); - sas_target_priv_data = sas_device_priv_data->sas_target; /* invalid device handle */ @@ -3924,6 +3911,19 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) sas_device_priv_data->block) return SCSI_MLQUEUE_DEVICE_BUSY; + /* + * Bug work around for firmware SATL handling. The loop + * is based on atomic operations and ensures consistency + * since we're lockless at this point + */ + do { + if (test_bit(0, &sas_device_priv_data->ata_command_pending)) { + scmd->result = SAM_STAT_BUSY; + scmd->scsi_done(scmd); + return 0; + } + } while (_scsih_set_satl_pending(scmd, true)); + if (scmd->sc_data_direction == DMA_FROM_DEVICE) mpi_control = MPI2_SCSIIO_CONTROL_READ; else if (scmd->sc_data_direction == DMA_TO_DEVICE) @@ -3945,6 +3945,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) if (!smid) { pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n", ioc->name, __func__); + _scsih_set_satl_pending(scmd, false); goto out; } mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); @@ -3975,6 +3976,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) if (mpi_request->DataLength) { if (ioc->build_sg_scmd(ioc, scmd, smid)) { mpt3sas_base_free_smid(ioc, smid); + _scsih_set_satl_pending(scmd, false); goto out; } } else diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 26629b856f91..6c4445863705 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -90,6 +90,15 @@ config ONESHOT_SYNC_USER help Provide a userspace API for creating oneshot sync objects. +config ANDROID_VSOC + tristate "Android Virtual SoC support" + default n + depends on PCI_MSI + ---help--- + This option adds support for the Virtual SoC driver needed to boot + a 'cuttlefish' Android image inside QEmu. The driver interacts with + a QEmu ivshmem device. If built as a module, it will be called vsoc. + source "drivers/staging/android/ion/Kconfig" source "drivers/staging/android/fiq_debugger/Kconfig" diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index b0b47ae4c0ea..8ef816152020 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER) += lowmemorykiller.o obj-$(CONFIG_SYNC) += sync.o sync_debug.o obj-$(CONFIG_SW_SYNC) += sw_sync.o obj-$(CONFIG_ONESHOT_SYNC) += oneshot_sync.o +obj-$(CONFIG_ANDROID_VSOC) += vsoc.o diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO index 8f3ac37bfe12..0c32c00fa700 100644 --- a/drivers/staging/android/TODO +++ b/drivers/staging/android/TODO @@ -25,5 +25,15 @@ ion/ exposes existing cma regions and doesn't reserve unecessarily memory when booting a system which doesn't use ion. +vsoc.c, uapi/vsoc_shm.h + - The current driver uses the same wait queue for all of the futexes in a + region. This will cause false wakeups in regions with a large number of + waiting threads. We should eventually use multiple queues and select the + queue based on the region. + - Add debugfs support for examining the permissions of regions. + - Use ioremap_wc instead of ioremap_nocache. + - Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT ioctl. This functionality has been + superseded by the futex and is there for legacy reasons. + Please send patches to Greg Kroah-Hartman and Cc: Arve HjønnevÃ¥g and Riley Andrews diff --git a/drivers/staging/android/uapi/vsoc_shm.h b/drivers/staging/android/uapi/vsoc_shm.h new file mode 100644 index 000000000000..741b1387c25b --- /dev/null +++ b/drivers/staging/android/uapi/vsoc_shm.h @@ -0,0 +1,303 @@ +/* + * Copyright (C) 2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _UAPI_LINUX_VSOC_SHM_H +#define _UAPI_LINUX_VSOC_SHM_H + +#include + +/** + * A permission is a token that permits a receiver to read and/or write an area + * of memory within a Vsoc region. + * + * An fd_scoped permission grants both read and write access, and can be + * attached to a file description (see open(2)). + * Ownership of the area can then be shared by passing a file descriptor + * among processes. + * + * begin_offset and end_offset define the area of memory that is controlled by + * the permission. owner_offset points to a word, also in shared memory, that + * controls ownership of the area. + * + * ownership of the region expires when the associated file description is + * released. + * + * At most one permission can be attached to each file description. + * + * This is useful when implementing HALs like gralloc that scope and pass + * ownership of shared resources via file descriptors. + * + * The caller is responsibe for doing any fencing. + * + * The calling process will normally identify a currently free area of + * memory. It will construct a proposed fd_scoped_permission_arg structure: + * + * begin_offset and end_offset describe the area being claimed + * + * owner_offset points to the location in shared memory that indicates the + * owner of the area. + * + * owned_value is the value that will be stored in owner_offset iff the + * permission can be granted. It must be different than VSOC_REGION_FREE. + * + * Two fd_scoped_permission structures are compatible if they vary only by + * their owned_value fields. + * + * The driver ensures that, for any group of simultaneous callers proposing + * compatible fd_scoped_permissions, it will accept exactly one of the + * propopsals. The other callers will get a failure with errno of EAGAIN. + * + * A process receiving a file descriptor can identify the region being + * granted using the VSOC_GET_FD_SCOPED_PERMISSION ioctl. + */ +struct fd_scoped_permission { + __u32 begin_offset; + __u32 end_offset; + __u32 owner_offset; + __u32 owned_value; +}; + +/* + * This value represents a free area of memory. The driver expects to see this + * value at owner_offset when creating a permission otherwise it will not do it, + * and will write this value back once the permission is no longer needed. + */ +#define VSOC_REGION_FREE ((__u32)0) + +/** + * ioctl argument for VSOC_CREATE_FD_SCOPE_PERMISSION + */ +struct fd_scoped_permission_arg { + struct fd_scoped_permission perm; + __s32 managed_region_fd; +}; + +#define VSOC_NODE_FREE ((__u32)0) + +/* + * Describes a signal table in shared memory. Each non-zero entry in the + * table indicates that the receiver should signal the futex at the given + * offset. Offsets are relative to the region, not the shared memory window. + * + * interrupt_signalled_offset is used to reliably signal interrupts across the + * vmm boundary. There are two roles: transmitter and receiver. For example, + * in the host_to_guest_signal_table the host is the transmitter and the + * guest is the receiver. The protocol is as follows: + * + * 1. The transmitter should convert the offset of the futex to an offset + * in the signal table [0, (1 << num_nodes_lg2)) + * The transmitter can choose any appropriate hashing algorithm, including + * hash = futex_offset & ((1 << num_nodes_lg2) - 1) + * + * 3. The transmitter should atomically compare and swap futex_offset with 0 + * at hash. There are 3 possible outcomes + * a. The swap fails because the futex_offset is already in the table. + * The transmitter should stop. + * b. Some other offset is in the table. This is a hash collision. The + * transmitter should move to another table slot and try again. One + * possible algorithm: + * hash = (hash + 1) & ((1 << num_nodes_lg2) - 1) + * c. The swap worked. Continue below. + * + * 3. The transmitter atomically swaps 1 with the value at the + * interrupt_signalled_offset. There are two outcomes: + * a. The prior value was 1. In this case an interrupt has already been + * posted. The transmitter is done. + * b. The prior value was 0, indicating that the receiver may be sleeping. + * The transmitter will issue an interrupt. + * + * 4. On waking the receiver immediately exchanges a 0 with the + * interrupt_signalled_offset. If it receives a 0 then this a spurious + * interrupt. That may occasionally happen in the current protocol, but + * should be rare. + * + * 5. The receiver scans the signal table by atomicaly exchanging 0 at each + * location. If a non-zero offset is returned from the exchange the + * receiver wakes all sleepers at the given offset: + * futex((int*)(region_base + old_value), FUTEX_WAKE, MAX_INT); + * + * 6. The receiver thread then does a conditional wait, waking immediately + * if the value at interrupt_signalled_offset is non-zero. This catches cases + * here additional signals were posted while the table was being scanned. + * On the guest the wait is handled via the VSOC_WAIT_FOR_INCOMING_INTERRUPT + * ioctl. + */ +struct vsoc_signal_table_layout { + /* log_2(Number of signal table entries) */ + __u32 num_nodes_lg2; + /* + * Offset to the first signal table entry relative to the start of the + * region + */ + __u32 futex_uaddr_table_offset; + /* + * Offset to an atomic_t / atomic uint32_t. A non-zero value indicates + * that one or more offsets are currently posted in the table. + * semi-unique access to an entry in the table + */ + __u32 interrupt_signalled_offset; +}; + +#define VSOC_REGION_WHOLE ((__s32)0) +#define VSOC_DEVICE_NAME_SZ 16 + +/** + * Each HAL would (usually) talk to a single device region + * Mulitple entities care about these regions: + * - The ivshmem_server will populate the regions in shared memory + * - The guest kernel will read the region, create minor device nodes, and + * allow interested parties to register for FUTEX_WAKE events in the region + * - HALs will access via the minor device nodes published by the guest kernel + * - Host side processes will access the region via the ivshmem_server: + * 1. Pass name to ivshmem_server at a UNIX socket + * 2. ivshmemserver will reply with 2 fds: + * - host->guest doorbell fd + * - guest->host doorbell fd + * - fd for the shared memory region + * - region offset + * 3. Start a futex receiver thread on the doorbell fd pointed at the + * signal_nodes + */ +struct vsoc_device_region { + __u16 current_version; + __u16 min_compatible_version; + __u32 region_begin_offset; + __u32 region_end_offset; + __u32 offset_of_region_data; + struct vsoc_signal_table_layout guest_to_host_signal_table; + struct vsoc_signal_table_layout host_to_guest_signal_table; + /* Name of the device. Must always be terminated with a '\0', so + * the longest supported device name is 15 characters. + */ + char device_name[VSOC_DEVICE_NAME_SZ]; + /* There are two ways that permissions to access regions are handled: + * - When subdivided_by is VSOC_REGION_WHOLE, any process that can + * open the device node for the region gains complete access to it. + * - When subdivided is set processes that open the region cannot + * access it. Access to a sub-region must be established by invoking + * the VSOC_CREATE_FD_SCOPE_PERMISSION ioctl on the region + * referenced in subdivided_by, providing a fileinstance + * (represented by a fd) opened on this region. + */ + __u32 managed_by; +}; + +/* + * The vsoc layout descriptor. + * The first 4K should be reserved for the shm header and region descriptors. + * The regions should be page aligned. + */ + +struct vsoc_shm_layout_descriptor { + __u16 major_version; + __u16 minor_version; + + /* size of the shm. This may be redundant but nice to have */ + __u32 size; + + /* number of shared memory regions */ + __u32 region_count; + + /* The offset to the start of region descriptors */ + __u32 vsoc_region_desc_offset; +}; + +/* + * This specifies the current version that should be stored in + * vsoc_shm_layout_descriptor.major_version and + * vsoc_shm_layout_descriptor.minor_version. + * It should be updated only if the vsoc_device_region and + * vsoc_shm_layout_descriptor structures have changed. + * Versioning within each region is transferred + * via the min_compatible_version and current_version fields in + * vsoc_device_region. The driver does not consult these fields: they are left + * for the HALs and host processes and will change independently of the layout + * version. + */ +#define CURRENT_VSOC_LAYOUT_MAJOR_VERSION 2 +#define CURRENT_VSOC_LAYOUT_MINOR_VERSION 0 + +#define VSOC_CREATE_FD_SCOPED_PERMISSION \ + _IOW(0xF5, 0, struct fd_scoped_permission) +#define VSOC_GET_FD_SCOPED_PERMISSION _IOR(0xF5, 1, struct fd_scoped_permission) + +/* + * This is used to signal the host to scan the guest_to_host_signal_table + * for new futexes to wake. This sends an interrupt if one is not already + * in flight. + */ +#define VSOC_MAYBE_SEND_INTERRUPT_TO_HOST _IO(0xF5, 2) + +/* + * When this returns the guest will scan host_to_guest_signal_table to + * check for new futexes to wake. + */ +/* TODO(ghartman): Consider moving this to the bottom half */ +#define VSOC_WAIT_FOR_INCOMING_INTERRUPT _IO(0xF5, 3) + +/* + * Guest HALs will use this to retrieve the region description after + * opening their device node. + */ +#define VSOC_DESCRIBE_REGION _IOR(0xF5, 4, struct vsoc_device_region) + +/* + * Wake any threads that may be waiting for a host interrupt on this region. + * This is mostly used during shutdown. + */ +#define VSOC_SELF_INTERRUPT _IO(0xF5, 5) + +/* + * This is used to signal the host to scan the guest_to_host_signal_table + * for new futexes to wake. This sends an interrupt unconditionally. + */ +#define VSOC_SEND_INTERRUPT_TO_HOST _IO(0xF5, 6) + +enum wait_types { + VSOC_WAIT_UNDEFINED = 0, + VSOC_WAIT_IF_EQUAL = 1, + VSOC_WAIT_IF_EQUAL_TIMEOUT = 2 +}; + +/* + * Wait for a condition to be true + * + * Note, this is sized and aligned so the 32 bit and 64 bit layouts are + * identical. + */ +struct vsoc_cond_wait { + /* Input: Offset of the 32 bit word to check */ + __u32 offset; + /* Input: Value that will be compared with the offset */ + __u32 value; + /* Monotonic time to wake at in seconds */ + __u64 wake_time_sec; + /* Input: Monotonic time to wait in nanoseconds */ + __u32 wake_time_nsec; + /* Input: Type of wait */ + __u32 wait_type; + /* Output: Number of times the thread woke before returning. */ + __u32 wakes; + /* Ensure that we're 8-byte aligned and 8 byte length for 32/64 bit + * compatibility. + */ + __u32 reserved_1; +}; + +#define VSOC_COND_WAIT _IOWR(0xF5, 7, struct vsoc_cond_wait) + +/* Wake any local threads waiting at the offset given in arg */ +#define VSOC_COND_WAKE _IO(0xF5, 8) + +#endif /* _UAPI_LINUX_VSOC_SHM_H */ diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c new file mode 100644 index 000000000000..587c66d709b9 --- /dev/null +++ b/drivers/staging/android/vsoc.c @@ -0,0 +1,1169 @@ +/* + * drivers/android/staging/vsoc.c + * + * Android Virtual System on a Chip (VSoC) driver + * + * Copyright (C) 2017 Google, Inc. + * + * Author: ghartman@google.com + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory + * Copyright 2009 Cam Macdonell + * + * Based on cirrusfb.c and 8139cp.c: + * Copyright 1999-2001 Jeff Garzik + * Copyright 2001-2004 Jeff Garzik + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "uapi/vsoc_shm.h" + +#define VSOC_DEV_NAME "vsoc" + +/* + * Description of the ivshmem-doorbell PCI device used by QEmu. These + * constants follow docs/specs/ivshmem-spec.txt, which can be found in + * the QEmu repository. This was last reconciled with the version that + * came out with 2.8 + */ + +/* + * These constants are determined KVM Inter-VM shared memory device + * register offsets + */ +enum { + INTR_MASK = 0x00, /* Interrupt Mask */ + INTR_STATUS = 0x04, /* Interrupt Status */ + IV_POSITION = 0x08, /* VM ID */ + DOORBELL = 0x0c, /* Doorbell */ +}; + +static const int REGISTER_BAR; /* Equal to 0 */ +static const int MAX_REGISTER_BAR_LEN = 0x100; +/* + * The MSI-x BAR is not used directly. + * + * static const int MSI_X_BAR = 1; + */ +static const int SHARED_MEMORY_BAR = 2; + +struct vsoc_region_data { + char name[VSOC_DEVICE_NAME_SZ + 1]; + wait_queue_head_t interrupt_wait_queue; + /* TODO(b/73664181): Use multiple futex wait queues */ + wait_queue_head_t futex_wait_queue; + /* Flag indicating that an interrupt has been signalled by the host. */ + atomic_t *incoming_signalled; + /* Flag indicating the guest has signalled the host. */ + atomic_t *outgoing_signalled; + int irq_requested; + int device_created; +}; + +struct vsoc_device { + /* Kernel virtual address of REGISTER_BAR. */ + void __iomem *regs; + /* Physical address of SHARED_MEMORY_BAR. */ + phys_addr_t shm_phys_start; + /* Kernel virtual address of SHARED_MEMORY_BAR. */ + void *kernel_mapped_shm; + /* Size of the entire shared memory window in bytes. */ + size_t shm_size; + /* + * Pointer to the virtual address of the shared memory layout structure. + * This is probably identical to kernel_mapped_shm, but saving this + * here saves a lot of annoying casts. + */ + struct vsoc_shm_layout_descriptor *layout; + /* + * Points to a table of region descriptors in the kernel's virtual + * address space. Calculated from + * vsoc_shm_layout_descriptor.vsoc_region_desc_offset + */ + struct vsoc_device_region *regions; + /* Head of a list of permissions that have been granted. */ + struct list_head permissions; + struct pci_dev *dev; + /* Per-region (and therefore per-interrupt) information. */ + struct vsoc_region_data *regions_data; + /* + * Table of msi-x entries. This has to be separated from struct + * vsoc_region_data because the kernel deals with them as an array. + */ + struct msix_entry *msix_entries; + /* + * Flags that indicate what we've initialzied. These are used to do an + * orderly cleanup of the device. + */ + char enabled_device; + char requested_regions; + char cdev_added; + char class_added; + char msix_enabled; + /* Mutex that protectes the permission list */ + struct mutex mtx; + /* Major number assigned by the kernel */ + int major; + + struct cdev cdev; + struct class *class; +}; + +static struct vsoc_device vsoc_dev; + +/* + * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. + */ + +struct fd_scoped_permission_node { + struct fd_scoped_permission permission; + struct list_head list; +}; + +struct vsoc_private_data { + struct fd_scoped_permission_node *fd_scoped_permission_node; +}; + +static long vsoc_ioctl(struct file *, unsigned int, unsigned long); +static int vsoc_mmap(struct file *, struct vm_area_struct *); +static int vsoc_open(struct inode *, struct file *); +static int vsoc_release(struct inode *, struct file *); +static ssize_t vsoc_read(struct file *, char *, size_t, loff_t *); +static ssize_t vsoc_write(struct file *, const char *, size_t, loff_t *); +static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); +static int do_create_fd_scoped_permission( + struct vsoc_device_region *region_p, + struct fd_scoped_permission_node *np, + struct fd_scoped_permission_arg *__user arg); +static void do_destroy_fd_scoped_permission( + struct vsoc_device_region *owner_region_p, + struct fd_scoped_permission *perm); +static long do_vsoc_describe_region(struct file *, + struct vsoc_device_region __user *); +static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); + +/** + * Validate arguments on entry points to the driver. + */ +inline int vsoc_validate_inode(struct inode *inode) +{ + if (iminor(inode) >= vsoc_dev.layout->region_count) { + dev_err(&vsoc_dev.dev->dev, + "describe_region: invalid region %d\n", iminor(inode)); + return -ENODEV; + } + return 0; +} + +inline int vsoc_validate_filep(struct file *filp) +{ + int ret = vsoc_validate_inode(file_inode(filp)); + + if (ret) + return ret; + if (!filp->private_data) { + dev_err(&vsoc_dev.dev->dev, + "No private data on fd, region %d\n", + iminor(file_inode(filp))); + return -EBADFD; + } + return 0; +} + +/* Converts from shared memory offset to virtual address */ +static inline void *shm_off_to_virtual_addr(__u32 offset) +{ + return vsoc_dev.kernel_mapped_shm + offset; +} + +/* Converts from shared memory offset to physical address */ +static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) +{ + return vsoc_dev.shm_phys_start + offset; +} + +/** + * Convenience functions to obtain the region from the inode or file. + * Dangerous to call before validating the inode/file. + */ +static inline struct vsoc_device_region *vsoc_region_from_inode( + struct inode *inode) +{ + return &vsoc_dev.regions[iminor(inode)]; +} + +static inline struct vsoc_device_region *vsoc_region_from_filep( + struct file *inode) +{ + return vsoc_region_from_inode(file_inode(inode)); +} + +static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) +{ + return r->region_end_offset - r->region_begin_offset; +} + +static const struct file_operations vsoc_ops = { + .owner = THIS_MODULE, + .open = vsoc_open, + .mmap = vsoc_mmap, + .read = vsoc_read, + .unlocked_ioctl = vsoc_ioctl, + .compat_ioctl = vsoc_ioctl, + .write = vsoc_write, + .llseek = vsoc_lseek, + .release = vsoc_release, +}; + +static struct pci_device_id vsoc_id_table[] = { + {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0}, +}; + +MODULE_DEVICE_TABLE(pci, vsoc_id_table); + +static void vsoc_remove_device(struct pci_dev *pdev); +static int vsoc_probe_device(struct pci_dev *pdev, + const struct pci_device_id *ent); + +static struct pci_driver vsoc_pci_driver = { + .name = "vsoc", + .id_table = vsoc_id_table, + .probe = vsoc_probe_device, + .remove = vsoc_remove_device, +}; + +static int do_create_fd_scoped_permission( + struct vsoc_device_region *region_p, + struct fd_scoped_permission_node *np, + struct fd_scoped_permission_arg *__user arg) +{ + struct file *managed_filp; + s32 managed_fd; + atomic_t *owner_ptr = NULL; + struct vsoc_device_region *managed_region_p; + + if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) || + copy_from_user(&managed_fd, + &arg->managed_region_fd, sizeof(managed_fd))) { + return -EFAULT; + } + managed_filp = fdget(managed_fd).file; + /* Check that it's a valid fd, */ + if (!managed_filp || vsoc_validate_filep(managed_filp)) + return -EPERM; + /* EEXIST if the given fd already has a permission. */ + if (((struct vsoc_private_data *)managed_filp->private_data)-> + fd_scoped_permission_node) + return -EEXIST; + managed_region_p = vsoc_region_from_filep(managed_filp); + /* Check that the provided region is managed by this one */ + if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) + return -EPERM; + /* The area must be well formed and have non-zero size */ + if (np->permission.begin_offset >= np->permission.end_offset) + return -EINVAL; + /* The area must fit in the memory window */ + if (np->permission.end_offset > + vsoc_device_region_size(managed_region_p)) + return -ERANGE; + /* The area must be in the region data section */ + if (np->permission.begin_offset < + managed_region_p->offset_of_region_data) + return -ERANGE; + /* The area must be page aligned */ + if (!PAGE_ALIGNED(np->permission.begin_offset) || + !PAGE_ALIGNED(np->permission.end_offset)) + return -EINVAL; + /* Owner offset must be naturally aligned in the window */ + if (np->permission.owner_offset & + (sizeof(np->permission.owner_offset) - 1)) + return -EINVAL; + /* The owner flag must reside in the owner memory */ + if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > + vsoc_device_region_size(region_p)) + return -ERANGE; + /* The owner flag must reside in the data section */ + if (np->permission.owner_offset < region_p->offset_of_region_data) + return -EINVAL; + /* The owner value must change to claim the memory */ + if (np->permission.owned_value == VSOC_REGION_FREE) + return -EINVAL; + owner_ptr = + (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + + np->permission.owner_offset); + /* We've already verified that this is in the shared memory window, so + * it should be safe to write to this address. + */ + if (atomic_cmpxchg(owner_ptr, + VSOC_REGION_FREE, + np->permission.owned_value) != VSOC_REGION_FREE) { + return -EBUSY; + } + ((struct vsoc_private_data *)managed_filp->private_data)-> + fd_scoped_permission_node = np; + /* The file offset needs to be adjusted if the calling + * process did any read/write operations on the fd + * before creating the permission. + */ + if (managed_filp->f_pos) { + if (managed_filp->f_pos > np->permission.end_offset) { + /* If the offset is beyond the permission end, set it + * to the end. + */ + managed_filp->f_pos = np->permission.end_offset; + } else { + /* If the offset is within the permission interval + * keep it there otherwise reset it to zero. + */ + if (managed_filp->f_pos < np->permission.begin_offset) { + managed_filp->f_pos = 0; + } else { + managed_filp->f_pos -= + np->permission.begin_offset; + } + } + } + return 0; +} + +static void do_destroy_fd_scoped_permission_node( + struct vsoc_device_region *owner_region_p, + struct fd_scoped_permission_node *node) +{ + if (node) { + do_destroy_fd_scoped_permission(owner_region_p, + &node->permission); + mutex_lock(&vsoc_dev.mtx); + list_del(&node->list); + mutex_unlock(&vsoc_dev.mtx); + kfree(node); + } +} + +static void do_destroy_fd_scoped_permission( + struct vsoc_device_region *owner_region_p, + struct fd_scoped_permission *perm) +{ + atomic_t *owner_ptr = NULL; + int prev = 0; + + if (!perm) + return; + owner_ptr = (atomic_t *)shm_off_to_virtual_addr( + owner_region_p->region_begin_offset + perm->owner_offset); + prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); + if (prev != perm->owned_value) + dev_err(&vsoc_dev.dev->dev, + "%x-%x: owner (%s) %x: expected to be %x was %x", + perm->begin_offset, perm->end_offset, + owner_region_p->device_name, perm->owner_offset, + perm->owned_value, prev); +} + +static long do_vsoc_describe_region(struct file *filp, + struct vsoc_device_region __user *dest) +{ + struct vsoc_device_region *region_p; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + region_p = vsoc_region_from_filep(filp); + if (copy_to_user(dest, region_p, sizeof(*region_p))) + return -EFAULT; + return 0; +} + +/** + * Implements the inner logic of cond_wait. Copies to and from userspace are + * done in the helper function below. + */ +static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) +{ + DEFINE_WAIT(wait); + u32 region_number = iminor(file_inode(filp)); + struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; + struct hrtimer_sleeper timeout, *to = NULL; + int ret = 0; + struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); + atomic_t *address = NULL; + struct timespec ts; + + /* Ensure that the offset is aligned */ + if (arg->offset & (sizeof(uint32_t) - 1)) + return -EADDRNOTAVAIL; + /* Ensure that the offset is within shared memory */ + if (((uint64_t)arg->offset) + region_p->region_begin_offset + + sizeof(uint32_t) > region_p->region_end_offset) + return -E2BIG; + address = shm_off_to_virtual_addr(region_p->region_begin_offset + + arg->offset); + + /* Ensure that the type of wait is valid */ + switch (arg->wait_type) { + case VSOC_WAIT_IF_EQUAL: + break; + case VSOC_WAIT_IF_EQUAL_TIMEOUT: + to = &timeout; + break; + default: + return -EINVAL; + } + + if (to) { + /* Copy the user-supplied timesec into the kernel structure. + * We do things this way to flatten differences between 32 bit + * and 64 bit timespecs. + */ + ts.tv_sec = arg->wake_time_sec; + ts.tv_nsec = arg->wake_time_nsec; + + if (!timespec_valid(&ts)) + return -EINVAL; + hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); + hrtimer_set_expires_range_ns(&to->timer, timespec_to_ktime(ts), + current->timer_slack_ns); + + hrtimer_init_sleeper(to, current); + } + + while (1) { + prepare_to_wait(&data->futex_wait_queue, &wait, + TASK_INTERRUPTIBLE); + /* + * Check the sentinel value after prepare_to_wait. If the value + * changes after this check the writer will call signal, + * changing the task state from INTERRUPTIBLE to RUNNING. That + * will ensure that schedule() will eventually schedule this + * task. + */ + if (atomic_read(address) != arg->value) { + ret = 0; + break; + } + if (to) { + hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); + if (likely(to->task)) + freezable_schedule(); + hrtimer_cancel(&to->timer); + if (!to->task) { + ret = -ETIMEDOUT; + break; + } + } else { + freezable_schedule(); + } + /* Count the number of times that we woke up. This is useful + * for unit testing. + */ + ++arg->wakes; + if (signal_pending(current)) { + ret = -EINTR; + break; + } + } + finish_wait(&data->futex_wait_queue, &wait); + if (to) + destroy_hrtimer_on_stack(&to->timer); + return ret; +} + +/** + * Handles the details of copying from/to userspace to ensure that the copies + * happen on all of the return paths of cond_wait. + */ +static int do_vsoc_cond_wait(struct file *filp, + struct vsoc_cond_wait __user *untrusted_in) +{ + struct vsoc_cond_wait arg; + int rval = 0; + + if (copy_from_user(&arg, untrusted_in, sizeof(arg))) + return -EFAULT; + /* wakes is an out parameter. Initialize it to something sensible. */ + arg.wakes = 0; + rval = handle_vsoc_cond_wait(filp, &arg); + if (copy_to_user(untrusted_in, &arg, sizeof(arg))) + return -EFAULT; + return rval; +} + +static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) +{ + struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); + u32 region_number = iminor(file_inode(filp)); + struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; + /* Ensure that the offset is aligned */ + if (offset & (sizeof(uint32_t) - 1)) + return -EADDRNOTAVAIL; + /* Ensure that the offset is within shared memory */ + if (((uint64_t)offset) + region_p->region_begin_offset + + sizeof(uint32_t) > region_p->region_end_offset) + return -E2BIG; + /* + * TODO(b/73664181): Use multiple futex wait queues. + * We need to wake every sleeper when the condition changes. Typically + * only a single thread will be waiting on the condition, but there + * are exceptions. The worst case is about 10 threads. + */ + wake_up_interruptible_all(&data->futex_wait_queue); + return 0; +} + +static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + int rv = 0; + struct vsoc_device_region *region_p; + u32 reg_num; + struct vsoc_region_data *reg_data; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + region_p = vsoc_region_from_filep(filp); + reg_num = iminor(file_inode(filp)); + reg_data = vsoc_dev.regions_data + reg_num; + switch (cmd) { + case VSOC_CREATE_FD_SCOPED_PERMISSION: + { + struct fd_scoped_permission_node *node = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + /* We can't allocate memory for the permission */ + if (!node) + return -ENOMEM; + INIT_LIST_HEAD(&node->list); + rv = do_create_fd_scoped_permission( + region_p, + node, + (struct fd_scoped_permission_arg __user *)arg); + if (!rv) { + mutex_lock(&vsoc_dev.mtx); + list_add(&node->list, &vsoc_dev.permissions); + mutex_unlock(&vsoc_dev.mtx); + } else { + kfree(node); + return rv; + } + } + break; + + case VSOC_GET_FD_SCOPED_PERMISSION: + { + struct fd_scoped_permission_node *node = + ((struct vsoc_private_data *)filp->private_data)-> + fd_scoped_permission_node; + if (!node) + return -ENOENT; + if (copy_to_user + ((struct fd_scoped_permission __user *)arg, + &node->permission, sizeof(node->permission))) + return -EFAULT; + } + break; + + case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: + if (!atomic_xchg( + reg_data->outgoing_signalled, + 1)) { + writel(reg_num, vsoc_dev.regs + DOORBELL); + return 0; + } else { + return -EBUSY; + } + break; + + case VSOC_SEND_INTERRUPT_TO_HOST: + writel(reg_num, vsoc_dev.regs + DOORBELL); + return 0; + + case VSOC_WAIT_FOR_INCOMING_INTERRUPT: + wait_event_interruptible( + reg_data->interrupt_wait_queue, + (atomic_read(reg_data->incoming_signalled) != 0)); + break; + + case VSOC_DESCRIBE_REGION: + return do_vsoc_describe_region( + filp, + (struct vsoc_device_region __user *)arg); + + case VSOC_SELF_INTERRUPT: + atomic_set(reg_data->incoming_signalled, 1); + wake_up_interruptible(®_data->interrupt_wait_queue); + break; + + case VSOC_COND_WAIT: + return do_vsoc_cond_wait(filp, + (struct vsoc_cond_wait __user *)arg); + case VSOC_COND_WAKE: + return do_vsoc_cond_wake(filp, arg); + + default: + return -EINVAL; + } + return 0; +} + +static ssize_t vsoc_read(struct file *filp, char *buffer, size_t len, + loff_t *poffset) +{ + __u32 area_off; + void *area_p; + ssize_t area_len; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, &area_off); + area_p = shm_off_to_virtual_addr(area_off); + area_p += *poffset; + area_len -= *poffset; + if (area_len <= 0) + return 0; + if (area_len < len) + len = area_len; + if (copy_to_user(buffer, area_p, len)) + return -EFAULT; + *poffset += len; + return len; +} + +static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) +{ + ssize_t area_len = 0; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, NULL); + switch (origin) { + case SEEK_SET: + break; + + case SEEK_CUR: + if (offset > 0 && offset + filp->f_pos < 0) + return -EOVERFLOW; + offset += filp->f_pos; + break; + + case SEEK_END: + if (offset > 0 && offset + area_len < 0) + return -EOVERFLOW; + offset += area_len; + break; + + case SEEK_DATA: + if (offset >= area_len) + return -EINVAL; + if (offset < 0) + offset = 0; + break; + + case SEEK_HOLE: + /* Next hole is always the end of the region, unless offset is + * beyond that + */ + if (offset < area_len) + offset = area_len; + break; + + default: + return -EINVAL; + } + + if (offset < 0 || offset > area_len) + return -EINVAL; + filp->f_pos = offset; + + return offset; +} + +static ssize_t vsoc_write(struct file *filp, const char *buffer, + size_t len, loff_t *poffset) +{ + __u32 area_off; + void *area_p; + ssize_t area_len; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, &area_off); + area_p = shm_off_to_virtual_addr(area_off); + area_p += *poffset; + area_len -= *poffset; + if (area_len <= 0) + return 0; + if (area_len < len) + len = area_len; + if (copy_from_user(area_p, buffer, len)) + return -EFAULT; + *poffset += len; + return len; +} + +static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) +{ + struct vsoc_region_data *region_data = + (struct vsoc_region_data *)region_data_v; + int reg_num = region_data - vsoc_dev.regions_data; + + if (unlikely(!region_data)) + return IRQ_NONE; + + if (unlikely(reg_num < 0 || + reg_num >= vsoc_dev.layout->region_count)) { + dev_err(&vsoc_dev.dev->dev, + "invalid irq @%p reg_num=0x%04x\n", + region_data, reg_num); + return IRQ_NONE; + } + if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { + dev_err(&vsoc_dev.dev->dev, + "irq not aligned @%p reg_num=0x%04x\n", + region_data, reg_num); + return IRQ_NONE; + } + wake_up_interruptible(®ion_data->interrupt_wait_queue); + return IRQ_HANDLED; +} + +static int vsoc_probe_device(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int result; + int i; + resource_size_t reg_size; + dev_t devt; + + vsoc_dev.dev = pdev; + result = pci_enable_device(pdev); + if (result) { + dev_err(&pdev->dev, + "pci_enable_device failed %s: error %d\n", + pci_name(pdev), result); + return result; + } + vsoc_dev.enabled_device = 1; + result = pci_request_regions(pdev, "vsoc"); + if (result < 0) { + dev_err(&pdev->dev, "pci_request_regions failed\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.requested_regions = 1; + /* Set up the control registers in BAR 0 */ + reg_size = pci_resource_len(pdev, REGISTER_BAR); + if (reg_size > MAX_REGISTER_BAR_LEN) + vsoc_dev.regs = + pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); + else + vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); + + if (!vsoc_dev.regs) { + dev_err(&pdev->dev, + "cannot ioremap registers of size %zu\n", + (size_t)reg_size); + vsoc_remove_device(pdev); + return -EBUSY; + } + + /* Map the shared memory in BAR 2 */ + vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); + vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); + + dev_info(&pdev->dev, "shared memory @ DMA %p size=0x%zx\n", + (void *)vsoc_dev.shm_phys_start, vsoc_dev.shm_size); + /* TODO(ghartman): ioremap_wc should work here */ + vsoc_dev.kernel_mapped_shm = ioremap_nocache( + vsoc_dev.shm_phys_start, vsoc_dev.shm_size); + if (!vsoc_dev.kernel_mapped_shm) { + dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + + vsoc_dev.layout = + (struct vsoc_shm_layout_descriptor *)vsoc_dev.kernel_mapped_shm; + dev_info(&pdev->dev, "major_version: %d\n", + vsoc_dev.layout->major_version); + dev_info(&pdev->dev, "minor_version: %d\n", + vsoc_dev.layout->minor_version); + dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); + dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); + if (vsoc_dev.layout->major_version != + CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { + dev_err(&vsoc_dev.dev->dev, + "driver supports only major_version %d\n", + CURRENT_VSOC_LAYOUT_MAJOR_VERSION); + vsoc_remove_device(pdev); + return -EBUSY; + } + result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, + VSOC_DEV_NAME); + if (result) { + dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.major = MAJOR(devt); + cdev_init(&vsoc_dev.cdev, &vsoc_ops); + vsoc_dev.cdev.owner = THIS_MODULE; + result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); + if (result) { + dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.cdev_added = 1; + vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); + if (IS_ERR(vsoc_dev.class)) { + dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); + vsoc_remove_device(pdev); + return PTR_ERR(vsoc_dev.class); + } + vsoc_dev.class_added = 1; + vsoc_dev.regions = (struct vsoc_device_region *) + (vsoc_dev.kernel_mapped_shm + + vsoc_dev.layout->vsoc_region_desc_offset); + vsoc_dev.msix_entries = kcalloc( + vsoc_dev.layout->region_count, + sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); + if (!vsoc_dev.msix_entries) { + dev_err(&vsoc_dev.dev->dev, + "unable to allocate msix_entries\n"); + vsoc_remove_device(pdev); + return -ENOSPC; + } + vsoc_dev.regions_data = kcalloc( + vsoc_dev.layout->region_count, + sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); + if (!vsoc_dev.regions_data) { + dev_err(&vsoc_dev.dev->dev, + "unable to allocate regions' data\n"); + vsoc_remove_device(pdev); + return -ENOSPC; + } + for (i = 0; i < vsoc_dev.layout->region_count; ++i) + vsoc_dev.msix_entries[i].entry = i; + + result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, + vsoc_dev.layout->region_count); + if (result) { + dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); + vsoc_remove_device(pdev); + return -ENOSPC; + } + /* Check that all regions are well formed */ + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { + const struct vsoc_device_region *region = vsoc_dev.regions + i; + + if (!PAGE_ALIGNED(region->region_begin_offset) || + !PAGE_ALIGNED(region->region_end_offset)) { + dev_err(&vsoc_dev.dev->dev, + "region %d not aligned (%x:%x)", i, + region->region_begin_offset, + region->region_end_offset); + vsoc_remove_device(pdev); + return -EFAULT; + } + if (region->region_begin_offset >= region->region_end_offset || + region->region_end_offset > vsoc_dev.shm_size) { + dev_err(&vsoc_dev.dev->dev, + "region %d offsets are wrong: %x %x %zx", + i, region->region_begin_offset, + region->region_end_offset, vsoc_dev.shm_size); + vsoc_remove_device(pdev); + return -EFAULT; + } + if (region->managed_by >= vsoc_dev.layout->region_count) { + dev_err(&vsoc_dev.dev->dev, + "region %d has invalid owner: %u", + i, region->managed_by); + vsoc_remove_device(pdev); + return -EFAULT; + } + } + vsoc_dev.msix_enabled = 1; + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { + const struct vsoc_device_region *region = vsoc_dev.regions + i; + size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; + const struct vsoc_signal_table_layout *h_to_g_signal_table = + ®ion->host_to_guest_signal_table; + const struct vsoc_signal_table_layout *g_to_h_signal_table = + ®ion->guest_to_host_signal_table; + + vsoc_dev.regions_data[i].name[name_sz] = '\0'; + memcpy(vsoc_dev.regions_data[i].name, region->device_name, + name_sz); + dev_info(&pdev->dev, "region %d name=%s\n", + i, vsoc_dev.regions_data[i].name); + init_waitqueue_head( + &vsoc_dev.regions_data[i].interrupt_wait_queue); + init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); + vsoc_dev.regions_data[i].incoming_signalled = + vsoc_dev.kernel_mapped_shm + + region->region_begin_offset + + h_to_g_signal_table->interrupt_signalled_offset; + vsoc_dev.regions_data[i].outgoing_signalled = + vsoc_dev.kernel_mapped_shm + + region->region_begin_offset + + g_to_h_signal_table->interrupt_signalled_offset; + + result = request_irq( + vsoc_dev.msix_entries[i].vector, + vsoc_interrupt, 0, + vsoc_dev.regions_data[i].name, + vsoc_dev.regions_data + i); + if (result) { + dev_info(&pdev->dev, + "request_irq failed irq=%d vector=%d\n", + i, vsoc_dev.msix_entries[i].vector); + vsoc_remove_device(pdev); + return -ENOSPC; + } + vsoc_dev.regions_data[i].irq_requested = 1; + if (!device_create(vsoc_dev.class, NULL, + MKDEV(vsoc_dev.major, i), + NULL, vsoc_dev.regions_data[i].name)) { + dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.regions_data[i].device_created = 1; + } + return 0; +} + +/* + * This should undo all of the allocations in the probe function in reverse + * order. + * + * Notes: + * + * The device may have been partially initialized, so double check + * that the allocations happened. + * + * This function may be called multiple times, so mark resources as freed + * as they are deallocated. + */ +static void vsoc_remove_device(struct pci_dev *pdev) +{ + int i; + /* + * pdev is the first thing to be set on probe and the last thing + * to be cleared here. If it's NULL then there is no cleanup. + */ + if (!pdev || !vsoc_dev.dev) + return; + dev_info(&pdev->dev, "remove_device\n"); + if (vsoc_dev.regions_data) { + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { + if (vsoc_dev.regions_data[i].device_created) { + device_destroy(vsoc_dev.class, + MKDEV(vsoc_dev.major, i)); + vsoc_dev.regions_data[i].device_created = 0; + } + if (vsoc_dev.regions_data[i].irq_requested) + free_irq(vsoc_dev.msix_entries[i].vector, NULL); + vsoc_dev.regions_data[i].irq_requested = 0; + } + kfree(vsoc_dev.regions_data); + vsoc_dev.regions_data = 0; + } + if (vsoc_dev.msix_enabled) { + pci_disable_msix(pdev); + vsoc_dev.msix_enabled = 0; + } + kfree(vsoc_dev.msix_entries); + vsoc_dev.msix_entries = 0; + vsoc_dev.regions = 0; + if (vsoc_dev.class_added) { + class_destroy(vsoc_dev.class); + vsoc_dev.class_added = 0; + } + if (vsoc_dev.cdev_added) { + cdev_del(&vsoc_dev.cdev); + vsoc_dev.cdev_added = 0; + } + if (vsoc_dev.major && vsoc_dev.layout) { + unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), + vsoc_dev.layout->region_count); + vsoc_dev.major = 0; + } + vsoc_dev.layout = 0; + if (vsoc_dev.kernel_mapped_shm) { + pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); + vsoc_dev.kernel_mapped_shm = 0; + } + if (vsoc_dev.regs) { + pci_iounmap(pdev, vsoc_dev.regs); + vsoc_dev.regs = 0; + } + if (vsoc_dev.requested_regions) { + pci_release_regions(pdev); + vsoc_dev.requested_regions = 0; + } + if (vsoc_dev.enabled_device) { + pci_disable_device(pdev); + vsoc_dev.enabled_device = 0; + } + /* Do this last: it indicates that the device is not initialized. */ + vsoc_dev.dev = NULL; +} + +static void __exit vsoc_cleanup_module(void) +{ + vsoc_remove_device(vsoc_dev.dev); + pci_unregister_driver(&vsoc_pci_driver); +} + +static int __init vsoc_init_module(void) +{ + int err = -ENOMEM; + + INIT_LIST_HEAD(&vsoc_dev.permissions); + mutex_init(&vsoc_dev.mtx); + + err = pci_register_driver(&vsoc_pci_driver); + if (err < 0) + return err; + return 0; +} + +static int vsoc_open(struct inode *inode, struct file *filp) +{ + /* Can't use vsoc_validate_filep because filp is still incomplete */ + int ret = vsoc_validate_inode(inode); + + if (ret) + return ret; + filp->private_data = + kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + return 0; +} + +static int vsoc_release(struct inode *inode, struct file *filp) +{ + struct vsoc_private_data *private_data = NULL; + struct fd_scoped_permission_node *node = NULL; + struct vsoc_device_region *owner_region_p = NULL; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + private_data = (struct vsoc_private_data *)filp->private_data; + if (!private_data) + return 0; + + node = private_data->fd_scoped_permission_node; + if (node) { + owner_region_p = vsoc_region_from_inode(inode); + if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { + owner_region_p = + &vsoc_dev.regions[owner_region_p->managed_by]; + } + do_destroy_fd_scoped_permission_node(owner_region_p, node); + private_data->fd_scoped_permission_node = NULL; + } + kfree(private_data); + filp->private_data = NULL; + + return 0; +} + +/* + * Returns the device relative offset and length of the area specified by the + * fd scoped permission. If there is no fd scoped permission set, a default + * permission covering the entire region is assumed, unless the region is owned + * by another one, in which case the default is a permission with zero size. + */ +static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) +{ + __u32 off = 0; + ssize_t length = 0; + struct vsoc_device_region *region_p; + struct fd_scoped_permission *perm; + + region_p = vsoc_region_from_filep(filp); + off = region_p->region_begin_offset; + perm = &((struct vsoc_private_data *)filp->private_data)-> + fd_scoped_permission_node->permission; + if (perm) { + off += perm->begin_offset; + length = perm->end_offset - perm->begin_offset; + } else if (region_p->managed_by == VSOC_REGION_WHOLE) { + /* No permission set and the regions is not owned by another, + * default to full region access. + */ + length = vsoc_device_region_size(region_p); + } else { + /* return zero length, access is denied. */ + length = 0; + } + if (area_offset) + *area_offset = off; + return length; +} + +static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) +{ + unsigned long len = vma->vm_end - vma->vm_start; + __u32 area_off; + phys_addr_t mem_off; + ssize_t area_len; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, &area_off); + /* Add the requested offset */ + area_off += (vma->vm_pgoff << PAGE_SHIFT); + area_len -= (vma->vm_pgoff << PAGE_SHIFT); + if (area_len < len) + return -EINVAL; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + mem_off = shm_off_to_phys_addr(area_off); + if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, + len, vma->vm_page_prot)) + return -EAGAIN; + return 0; +} + +module_init(vsoc_init_module); +module_exit(vsoc_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Greg Hartman "); +MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); +MODULE_VERSION("1.0"); diff --git a/drivers/staging/wlan-ng/prism2mgmt.c b/drivers/staging/wlan-ng/prism2mgmt.c index 013a6240f193..c1ad0aea23b9 100644 --- a/drivers/staging/wlan-ng/prism2mgmt.c +++ b/drivers/staging/wlan-ng/prism2mgmt.c @@ -169,7 +169,7 @@ int prism2mgmt_scan(wlandevice_t *wlandev, void *msgp) hw->ident_sta_fw.variant) > HFA384x_FIRMWARE_VERSION(1, 5, 0)) { if (msg->scantype.data != P80211ENUM_scantype_active) - word = cpu_to_le16(msg->maxchanneltime.data); + word = msg->maxchanneltime.data; else word = 0; diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 1246aa6fcab0..737635f0bec0 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -523,6 +523,7 @@ static void allow_maximum_power(struct thermal_zone_device *tz) struct thermal_instance *instance; struct power_allocator_params *params = tz->governor_data; + mutex_lock(&tz->lock); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { if ((instance->trip != params->trip_max_desired_temperature) || (!cdev_is_power_actor(instance->cdev))) @@ -532,6 +533,7 @@ static void allow_maximum_power(struct thermal_zone_device *tz) instance->cdev->updated = false; thermal_cdev_update(instance->cdev); } + mutex_unlock(&tz->lock); } /** diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c index dc5c0e6cdb84..4686e93aaf94 100644 --- a/drivers/tty/goldfish.c +++ b/drivers/tty/goldfish.c @@ -87,32 +87,35 @@ static void do_rw_io(struct goldfish_tty *qtty, } static void goldfish_tty_rw(struct goldfish_tty *qtty, - unsigned long addr, + const void *address_ptr, unsigned int count, int is_write) { dma_addr_t dma_handle; enum dma_data_direction dma_dir; + uintptr_t address; + address = (uintptr_t)address_ptr; dma_dir = (is_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + if (qtty->version > 0) { /* * Goldfish TTY for Ranchu platform uses * physical addresses and DMA for read/write operations */ - unsigned long addr_end = addr + count; + uintptr_t address_end = address + count; - while (addr < addr_end) { - unsigned long pg_end = (addr & PAGE_MASK) + PAGE_SIZE; - unsigned long next = - pg_end < addr_end ? pg_end : addr_end; - unsigned long avail = next - addr; + while (address < address_end) { + uintptr_t page_end = (address & PAGE_MASK) + PAGE_SIZE; + uintptr_t next = page_end < address_end ? + page_end : address_end; + uintptr_t avail = next - address; /* * Map the buffer's virtual address to the DMA address * so the buffer can be accessed by the device. */ - dma_handle = dma_map_single(qtty->dev, (void *)addr, + dma_handle = dma_map_single(qtty->dev, (void *)address, avail, dma_dir); if (dma_mapping_error(qtty->dev, dma_handle)) { @@ -127,31 +130,30 @@ static void goldfish_tty_rw(struct goldfish_tty *qtty, */ dma_unmap_single(qtty->dev, dma_handle, avail, dma_dir); - addr += avail; + address += avail; } } else { /* * Old style Goldfish TTY used on the Goldfish platform * uses virtual addresses. */ - do_rw_io(qtty, addr, count, is_write); + do_rw_io(qtty, address, count, is_write); } + } static void goldfish_tty_do_write(int line, const char *buf, unsigned int count) { struct goldfish_tty *qtty = &goldfish_ttys[line]; - unsigned long address = (unsigned long)(void *)buf; - goldfish_tty_rw(qtty, address, count, 1); + goldfish_tty_rw(qtty, buf, count, 1); } static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id) { struct goldfish_tty *qtty = dev_id; void __iomem *base = qtty->base; - unsigned long address; unsigned char *buf; u32 count; @@ -160,9 +162,7 @@ static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id) return IRQ_NONE; count = tty_prepare_flip_string(&qtty->port, &buf, count); - - address = (unsigned long)(void *)buf; - goldfish_tty_rw(qtty, address, count, 0); + goldfish_tty_rw(qtty, buf, count, 0); tty_schedule_flip(&qtty->port); return IRQ_HANDLED; diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 9aff37186246..78bd121ecede 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1467,6 +1467,10 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) * in which case an opening port goes back to closed and a closing port * is simply put into closed state (any further frames from the other * end will get a DM response) + * + * Some control dlci can stay in ADM mode with other dlci working just + * fine. In that case we can just keep the control dlci open after the + * DLCI_OPENING retries time out. */ static void gsm_dlci_t1(unsigned long data) @@ -1480,8 +1484,15 @@ static void gsm_dlci_t1(unsigned long data) if (dlci->retries) { gsm_command(dlci->gsm, dlci->addr, SABM|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); - } else + } else if (!dlci->addr && gsm->control == (DM | PF)) { + if (debug & 8) + pr_info("DLCI %d opening in ADM mode.\n", + dlci->addr); + gsm_dlci_open(dlci); + } else { gsm_dlci_close(dlci); + } + break; case DLCI_CLOSING: dlci->retries--; @@ -1499,8 +1510,8 @@ static void gsm_dlci_t1(unsigned long data) * @dlci: DLCI to open * * Commence opening a DLCI from the Linux side. We issue SABM messages - * to the modem which should then reply with a UA, at which point we - * will move into open state. Opening is done asynchronously with retry + * to the modem which should then reply with a UA or ADM, at which point + * we will move into open state. Opening is done asynchronously with retry * running off timers and the responses. */ diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index e8dd296fb25b..c4383573cf66 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -608,6 +608,10 @@ static int omap_8250_startup(struct uart_port *port) up->lsr_saved_flags = 0; up->msr_saved_flags = 0; + /* Disable DMA for console UART */ + if (uart_console(port)) + up->dma = NULL; + if (up->dma) { ret = serial8250_request_dma(up); if (ret) { diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c index fcf803ffad19..cdd2f942317c 100644 --- a/drivers/tty/serial/sccnxp.c +++ b/drivers/tty/serial/sccnxp.c @@ -884,14 +884,19 @@ static int sccnxp_probe(struct platform_device *pdev) clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(clk)) { - if (PTR_ERR(clk) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; + ret = PTR_ERR(clk); + if (ret == -EPROBE_DEFER) goto err_out; - } + uartclk = 0; + } else { + clk_prepare_enable(clk); + uartclk = clk_get_rate(clk); + } + + if (!uartclk) { dev_notice(&pdev->dev, "Using default clock frequency\n"); uartclk = s->chip->freq_std; - } else - uartclk = clk_get_rate(clk); + } /* Check input frequency */ if ((uartclk < s->chip->freq_min) || (uartclk > s->chip->freq_max)) { diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index fc7711c75b01..8dd822feb972 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1457,7 +1457,16 @@ static void sci_free_dma(struct uart_port *port) if (s->chan_rx) sci_rx_dma_release(s, false); } -#else + +static void sci_flush_buffer(struct uart_port *port) +{ + /* + * In uart_flush_buffer(), the xmit circular buffer has just been + * cleared, so we have to reset tx_dma_len accordingly. + */ + to_sci_port(port)->tx_dma_len = 0; +} +#else /* !CONFIG_SERIAL_SH_SCI_DMA */ static inline void sci_request_dma(struct uart_port *port) { } @@ -1465,7 +1474,9 @@ static inline void sci_request_dma(struct uart_port *port) static inline void sci_free_dma(struct uart_port *port) { } -#endif + +#define sci_flush_buffer NULL +#endif /* !CONFIG_SERIAL_SH_SCI_DMA */ static irqreturn_t sci_rx_interrupt(int irq, void *ptr) { @@ -2205,6 +2216,7 @@ static struct uart_ops sci_uart_ops = { .break_ctl = sci_break_ctl, .startup = sci_startup, .shutdown = sci_shutdown, + .flush_buffer = sci_flush_buffer, .set_termios = sci_set_termios, .pm = sci_pm, .type = sci_type, diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 939c6ad71068..57ee43512992 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -851,7 +851,7 @@ static inline void ci_role_destroy(struct ci_hdrc *ci) { ci_hdrc_gadget_destroy(ci); ci_hdrc_host_destroy(ci); - if (ci->is_otg) + if (ci->is_otg && ci->roles[CI_ROLE_GADGET]) ci_hdrc_otg_destroy(ci); } @@ -951,27 +951,35 @@ static int ci_hdrc_probe(struct platform_device *pdev) /* initialize role(s) before the interrupt is requested */ if (dr_mode == USB_DR_MODE_OTG || dr_mode == USB_DR_MODE_HOST) { ret = ci_hdrc_host_init(ci); - if (ret) - dev_info(dev, "doesn't support host\n"); + if (ret) { + if (ret == -ENXIO) + dev_info(dev, "doesn't support host\n"); + else + goto deinit_phy; + } } if (dr_mode == USB_DR_MODE_OTG || dr_mode == USB_DR_MODE_PERIPHERAL) { ret = ci_hdrc_gadget_init(ci); - if (ret) - dev_info(dev, "doesn't support gadget\n"); + if (ret) { + if (ret == -ENXIO) + dev_info(dev, "doesn't support gadget\n"); + else + goto deinit_host; + } } if (!ci->roles[CI_ROLE_HOST] && !ci->roles[CI_ROLE_GADGET]) { dev_err(dev, "no supported roles\n"); ret = -ENODEV; - goto deinit_phy; + goto deinit_gadget; } if (ci->is_otg && ci->roles[CI_ROLE_GADGET]) { ret = ci_hdrc_otg_init(ci); if (ret) { dev_err(dev, "init otg fails, ret = %d\n", ret); - goto stop; + goto deinit_gadget; } } @@ -1036,7 +1044,12 @@ static int ci_hdrc_probe(struct platform_device *pdev) ci_extcon_unregister(ci); stop: - ci_role_destroy(ci); + if (ci->is_otg && ci->roles[CI_ROLE_GADGET]) + ci_hdrc_otg_destroy(ci); +deinit_gadget: + ci_hdrc_gadget_destroy(ci); +deinit_host: + ci_hdrc_host_destroy(ci); deinit_phy: ci_usb_phy_exit(ci); diff --git a/drivers/usb/dwc3/dwc3-keystone.c b/drivers/usb/dwc3/dwc3-keystone.c index 2be268d2423d..03a926ebf34b 100644 --- a/drivers/usb/dwc3/dwc3-keystone.c +++ b/drivers/usb/dwc3/dwc3-keystone.c @@ -112,6 +112,10 @@ static int kdwc3_probe(struct platform_device *pdev) dev->dma_mask = &kdwc3_dma_mask; kdwc->clk = devm_clk_get(kdwc->dev, "usb"); + if (IS_ERR(kdwc->clk)) { + dev_err(kdwc->dev, "unable to get usb clock\n"); + return PTR_ERR(kdwc->clk); + } error = clk_prepare_enable(kdwc->clk); if (error < 0) { diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 408c8eca2bbe..61fb325e4267 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -437,7 +437,6 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, - .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = DEV_PM_OPS, diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c index 091e8ec7a6c0..962bb6376b0c 100644 --- a/drivers/usb/storage/ene_ub6250.c +++ b/drivers/usb/storage/ene_ub6250.c @@ -1953,6 +1953,8 @@ static int ene_load_bincode(struct us_data *us, unsigned char flag) bcb->CDB[0] = 0xEF; result = ene_send_scsi_cmd(us, FDIR_WRITE, buf, 0); + if (us->srb != NULL) + scsi_set_resid(us->srb, 0); info->BIN_FLAG = flag; kfree(buf); @@ -2306,21 +2308,22 @@ static int ms_scsi_irp(struct us_data *us, struct scsi_cmnd *srb) static int ene_transport(struct scsi_cmnd *srb, struct us_data *us) { - int result = 0; + int result = USB_STOR_XFER_GOOD; struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); /*US_DEBUG(usb_stor_show_command(us, srb)); */ scsi_set_resid(srb, 0); - if (unlikely(!(info->SD_Status.Ready || info->MS_Status.Ready))) { + if (unlikely(!(info->SD_Status.Ready || info->MS_Status.Ready))) result = ene_init(us); - } else { + if (result == USB_STOR_XFER_GOOD) { + result = USB_STOR_TRANSPORT_ERROR; if (info->SD_Status.Ready) result = sd_scsi_irp(us, srb); if (info->MS_Status.Ready) result = ms_scsi_irp(us, srb); } - return 0; + return result; } static struct scsi_host_template ene_ub6250_host_template; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ad2146a9ab2d..675819a1af37 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -173,8 +173,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file) if (mask) vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); if (mask & POLLERR) { - if (poll->wqh) - remove_wait_queue(poll->wqh, &poll->wait); + vhost_poll_stop(poll); ret = -EINVAL; } diff --git a/drivers/video/fbdev/vfb.c b/drivers/video/fbdev/vfb.c index 728cb6b23c42..7d8dfc7f1269 100644 --- a/drivers/video/fbdev/vfb.c +++ b/drivers/video/fbdev/vfb.c @@ -298,8 +298,23 @@ static int vfb_check_var(struct fb_var_screeninfo *var, */ static int vfb_set_par(struct fb_info *info) { + switch (info->var.bits_per_pixel) { + case 1: + info->fix.visual = FB_VISUAL_MONO01; + break; + case 8: + info->fix.visual = FB_VISUAL_PSEUDOCOLOR; + break; + case 16: + case 24: + case 32: + info->fix.visual = FB_VISUAL_TRUECOLOR; + break; + } + info->fix.line_length = get_line_length(info->var.xres_virtual, info->var.bits_per_pixel); + return 0; } @@ -540,6 +555,8 @@ static int vfb_probe(struct platform_device *dev) goto err2; platform_set_drvdata(dev, info); + vfb_set_par(info); + fb_info(info, "Virtual frame buffer device, using %ldK of video memory\n", videomemorysize >> 10); return 0; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e767f347f2b1..88bee6703cc0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2534,7 +2534,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) if (!uptodate) { ClearPageUptodate(page); SetPageError(page); - ret = ret < 0 ? ret : -EIO; + ret = err < 0 ? err : -EIO; mapping_set_error(page->mapping, ret); } return 0; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 744be3c146f5..0141aba9eca6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -589,7 +589,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - down_read(&cinode->lock_sem); + down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); if (cinode->can_cache_brlcks) { /* can cache locks - no need to relock */ up_read(&cinode->lock_sem); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 33b1bc21a120..807e989f436a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -999,15 +999,19 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, goto tcon_exit; } - if (rsp->ShareType & SMB2_SHARE_TYPE_DISK) + switch (rsp->ShareType) { + case SMB2_SHARE_TYPE_DISK: cifs_dbg(FYI, "connection to disk share\n"); - else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) { + break; + case SMB2_SHARE_TYPE_PIPE: tcon->ipc = true; cifs_dbg(FYI, "connection to pipe share\n"); - } else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) { - tcon->print = true; + break; + case SMB2_SHARE_TYPE_PRINT: + tcon->ipc = true; cifs_dbg(FYI, "connection to printer\n"); - } else { + break; + default: cifs_dbg(VFS, "unknown share type %d\n", rsp->ShareType); rc = -EOPNOTSUPP; goto tcon_error_exit; diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 732a786cce9d..ce654526c0fb 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "fscrypt_private.h" static unsigned int num_prealloc_crypto_pages = 32; diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 6eb434363ff2..b18fa323d1d9 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -12,42 +12,46 @@ #include #include +#include #include "fscrypt_private.h" +static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + /** * fname_encrypt() - encrypt a filename * - * The caller must have allocated sufficient memory for the @oname string. + * The output buffer must be at least as large as the input buffer. + * Any extra space is filled with NUL padding before encryption. * * Return: 0 on success, -errno on failure */ -static int fname_encrypt(struct inode *inode, - const struct qstr *iname, struct fscrypt_str *oname) +int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm; int res = 0; char iv[FS_CRYPTO_BLOCK_SIZE]; struct scatterlist sg; - int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - unsigned int lim; - unsigned int cryptlen; - - lim = inode->i_sb->s_cop->max_namelen(inode); - if (iname->len <= 0 || iname->len > lim) - return -EIO; /* * Copy the filename to the output buffer for encrypting in-place and * pad it with the needed number of NUL bytes. */ - cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE); - cryptlen = round_up(cryptlen, padding); - cryptlen = min(cryptlen, lim); - memcpy(oname->name, iname->name, iname->len); - memset(oname->name + iname->len, 0, cryptlen - iname->len); + if (WARN_ON(olen < iname->len)) + return -ENOBUFS; + memcpy(out, iname->name, iname->len); + memset(out + iname->len, 0, olen - iname->len); /* Initialize the IV */ memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); @@ -62,8 +66,8 @@ static int fname_encrypt(struct inode *inode, skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); - sg_init_one(&sg, oname->name, cryptlen); - skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); + sg_init_one(&sg, out, olen); + skcipher_request_set_crypt(req, &sg, &sg, olen, iv); /* Do the encryption */ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); @@ -74,7 +78,6 @@ static int fname_encrypt(struct inode *inode, return res; } - oname->len = cryptlen; return 0; } @@ -187,50 +190,52 @@ static int digest_decode(const char *src, int len, char *dst) return cp - dst; } -u32 fscrypt_fname_encrypted_size(const struct inode *inode, u32 ilen) +bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, + u32 max_len, u32 *encrypted_len_ret) { - int padding = 32; - struct fscrypt_info *ci = inode->i_crypt_info; + int padding = 4 << (inode->i_crypt_info->ci_flags & + FS_POLICY_FLAGS_PAD_MASK); + u32 encrypted_len; - if (ci) - padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - ilen = max(ilen, (u32)FS_CRYPTO_BLOCK_SIZE); - return round_up(ilen, padding); + if (orig_len > max_len) + return false; + encrypted_len = max(orig_len, (u32)FS_CRYPTO_BLOCK_SIZE); + encrypted_len = round_up(encrypted_len, padding); + *encrypted_len_ret = min(encrypted_len, max_len); + return true; } -EXPORT_SYMBOL(fscrypt_fname_encrypted_size); /** - * fscrypt_fname_crypto_alloc_obuff() - + * fscrypt_fname_alloc_buffer - allocate a buffer for presented filenames * - * Allocates an output buffer that is sufficient for the crypto operation - * specified by the context and the direction. + * Allocate a buffer that is large enough to hold any decrypted or encoded + * filename (null-terminated), for the given maximum encrypted filename length. + * + * Return: 0 on success, -errno on failure */ int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 ilen, struct fscrypt_str *crypto_str) + u32 max_encrypted_len, + struct fscrypt_str *crypto_str) { - u32 olen = fscrypt_fname_encrypted_size(inode, ilen); const u32 max_encoded_len = max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); + u32 max_presented_len; - crypto_str->len = olen; - olen = max(olen, max_encoded_len); + max_presented_len = max(max_encoded_len, max_encrypted_len); - /* - * Allocated buffer can hold one more character to null-terminate the - * string - */ - crypto_str->name = kmalloc(olen + 1, GFP_NOFS); - if (!(crypto_str->name)) + crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS); + if (!crypto_str->name) return -ENOMEM; + crypto_str->len = max_presented_len; return 0; } EXPORT_SYMBOL(fscrypt_fname_alloc_buffer); /** - * fscrypt_fname_crypto_free_buffer() - + * fscrypt_fname_free_buffer - free the buffer for presented filenames * - * Frees the buffer allocated for crypto operation. + * Free the buffer allocated by fscrypt_fname_alloc_buffer(). */ void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) { @@ -296,35 +301,6 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); -/** - * fscrypt_fname_usr_to_disk() - converts a filename from user space to disk - * space - * - * The caller must have allocated sufficient memory for the @oname string. - * - * Return: 0 on success, -errno on failure - */ -int fscrypt_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - if (fscrypt_is_dot_dotdot(iname)) { - oname->name[0] = '.'; - oname->name[iname->len - 1] = '.'; - oname->len = iname->len; - return 0; - } - if (inode->i_crypt_info) - return fname_encrypt(inode, iname, oname); - /* - * Without a proper key, a user is not allowed to modify the filenames - * in a directory. Consequently, a user space name cannot be mapped to - * a disk-space name - */ - return -ENOKEY; -} -EXPORT_SYMBOL(fscrypt_fname_usr_to_disk); - /** * fscrypt_setup_filename() - prepare to search a possibly encrypted directory * @dir: the directory that will be searched @@ -368,11 +344,17 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return ret; if (dir->i_crypt_info) { - ret = fscrypt_fname_alloc_buffer(dir, iname->len, - &fname->crypto_buf); - if (ret) - return ret; - ret = fname_encrypt(dir, iname, &fname->crypto_buf); + if (!fscrypt_fname_encrypted_size(dir, iname->len, + dir->i_sb->s_cop->max_namelen(dir), + &fname->crypto_buf.len)) + return -ENAMETOOLONG; + fname->crypto_buf.name = kmalloc(fname->crypto_buf.len, + GFP_NOFS); + if (!fname->crypto_buf.name) + return -ENOMEM; + + ret = fname_encrypt(dir, iname, fname->crypto_buf.name, + fname->crypto_buf.len); if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; @@ -424,7 +406,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return 0; errout: - fscrypt_fname_free_buffer(&fname->crypto_buf); + kfree(fname->crypto_buf.name); return ret; } EXPORT_SYMBOL(fscrypt_setup_filename); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index c3ad415cd14f..5c296d4af4a9 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -49,6 +49,15 @@ struct fscrypt_context { #define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct fscrypt_symlink_data { + __le16 len; + char encrypted_path[1]; +} __packed; + /* * A pointer to this structure is stored in the file system's in-core * representation of an inode. @@ -81,7 +90,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, bio->bi_rw = op | op_flags; } +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) +{ + if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && + filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) + return true; + + if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && + filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) + return true; + + return false; +} + /* crypto.c */ +extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); extern struct workqueue_struct *fscrypt_read_workqueue; extern int fscrypt_do_page_crypto(const struct inode *inode, @@ -93,6 +117,13 @@ extern int fscrypt_do_page_crypto(const struct inode *inode, extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags); +/* fname.c */ +extern int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen); +extern bool fscrypt_fname_encrypted_size(const struct inode *inode, + u32 orig_len, u32 max_len, + u32 *encrypted_len_ret); + /* keyinfo.c */ extern void __exit fscrypt_essiv_cleanup(void); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 9f5fb2eb9cf7..bc010e4609ef 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -110,3 +110,159 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) return 0; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); + +int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + int err; + + /* + * To calculate the size of the encrypted symlink target we need to know + * the amount of NUL padding, which is determined by the flags set in + * the encryption policy which will be inherited from the directory. + * The easiest way to get access to this is to just load the directory's + * fscrypt_info, since we'll need it to create the dir_entry anyway. + * + * Note: in test_dummy_encryption mode, @dir may be unencrypted. + */ + err = fscrypt_get_encryption_info(dir); + if (err) + return err; + if (!fscrypt_has_encryption_key(dir)) + return -ENOKEY; + + /* + * Calculate the size of the encrypted symlink and verify it won't + * exceed max_len. Note that for historical reasons, encrypted symlink + * targets are prefixed with the ciphertext length, despite this + * actually being redundant with i_size. This decreases by 2 bytes the + * longest symlink target we can accept. + * + * We could recover 1 byte by not counting a null terminator, but + * counting it (even though it is meaningless for ciphertext) is simpler + * for now since filesystems will assume it is there and subtract it. + */ + if (!fscrypt_fname_encrypted_size(dir, len, + max_len - sizeof(struct fscrypt_symlink_data), + &disk_link->len)) + return -ENAMETOOLONG; + disk_link->len += sizeof(struct fscrypt_symlink_data); + + disk_link->name = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_symlink); + +int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, struct fscrypt_str *disk_link) +{ + int err; + struct qstr iname = QSTR_INIT(target, len); + struct fscrypt_symlink_data *sd; + unsigned int ciphertext_len; + + err = fscrypt_require_key(inode); + if (err) + return err; + + if (disk_link->name) { + /* filesystem-provided buffer */ + sd = (struct fscrypt_symlink_data *)disk_link->name; + } else { + sd = kmalloc(disk_link->len, GFP_NOFS); + if (!sd) + return -ENOMEM; + } + ciphertext_len = disk_link->len - sizeof(*sd); + sd->len = cpu_to_le16(ciphertext_len); + + err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); + if (err) { + if (!disk_link->name) + kfree(sd); + return err; + } + /* + * Null-terminating the ciphertext doesn't make sense, but we still + * count the null terminator in the length, so we might as well + * initialize it just in case the filesystem writes it out. + */ + sd->encrypted_path[ciphertext_len] = '\0'; + + if (!disk_link->name) + disk_link->name = (unsigned char *)sd; + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink); + +/** + * fscrypt_get_symlink - get the target of an encrypted symlink + * @inode: the symlink inode + * @caddr: the on-disk contents of the symlink + * @max_size: size of @caddr buffer + * @done: if successful, will be set up to free the returned target + * + * If the symlink's encryption key is available, we decrypt its target. + * Otherwise, we encode its target for presentation. + * + * This may sleep, so the filesystem must have dropped out of RCU mode already. + * + * Return: the presentable symlink target or an ERR_PTR() + */ +void *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size) +{ + const struct fscrypt_symlink_data *sd; + struct fscrypt_str cstr, pstr; + int err; + + /* This is for encrypted symlinks only */ + if (WARN_ON(!IS_ENCRYPTED(inode))) + return ERR_PTR(-EINVAL); + + /* + * Try to set up the symlink's encryption key, but we can continue + * regardless of whether the key is available or not. + */ + err = fscrypt_get_encryption_info(inode); + if (err) + return ERR_PTR(err); + + /* + * For historical reasons, encrypted symlink targets are prefixed with + * the ciphertext length, even though this is redundant with i_size. + */ + + if (max_size < sizeof(*sd)) + return ERR_PTR(-EUCLEAN); + sd = caddr; + cstr.name = (unsigned char *)sd->encrypted_path; + cstr.len = le16_to_cpu(sd->len); + + if (cstr.len == 0) + return ERR_PTR(-EUCLEAN); + + if (cstr.len + sizeof(*sd) - 1 > max_size) + return ERR_PTR(-EUCLEAN); + + err = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); + if (err) + return ERR_PTR(err); + + err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); + if (err) + goto err_kfree; + + err = -EUCLEAN; + if (pstr.name[0] == '\0') + goto err_kfree; + + pstr.name[pstr.len] = '\0'; + return pstr.name; + +err_kfree: + kfree(pstr.name); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(fscrypt_get_symlink); diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 444c65ed6db8..7c00331da5df 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "fscrypt_private.h" static struct crypto_shash *essiv_hash_tfm; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a8b1749d79a8..debf0707789d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -460,7 +460,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, int i, num; unsigned long nr_pages; - num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, (pgoff_t)num); if (nr_pages == 0) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 27ff3706d632..03b874f3fefd 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3875,7 +3875,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - ext4_error(sb, "Error loading buddy information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + err, group); put_bh(bitmap_bh); return 0; } @@ -4032,10 +4033,11 @@ repeat: BUG_ON(pa->pa_type != MB_INODE_PA); group = ext4_get_group_number(sb, pa->pa_pstart); - err = ext4_mb_load_buddy(sb, group, &e4b); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); if (err) { - ext4_error(sb, "Error loading buddy information for %u", - group); + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } @@ -4291,11 +4293,14 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, spin_unlock(&lg->lg_prealloc_lock); list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) { + int err; group = ext4_get_group_number(sb, pa->pa_pstart); - if (ext4_mb_load_buddy(sb, group, &e4b)) { - ext4_error(sb, "Error loading buddy information for %u", - group); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); + if (err) { + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } ext4_lock_group(sb, group); @@ -5121,8 +5126,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ret = ext4_mb_load_buddy(sb, group, &e4b); if (ret) { - ext4_error(sb, "Error in loading buddy " - "information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + ret, group); return ret; } bitmap = e4b.bd_bitmap; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3c343e922f6e..04c608646fd5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -68,6 +68,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, + .is_meta = is_meta, }; if (unlikely(!is_meta)) @@ -163,6 +164,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, + .is_meta = (type != META_POR), }; struct blk_plug plug; @@ -573,13 +575,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) struct node_info ni; int err = acquire_orphan_inode(sbi); - if (err) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); - return err; - } + if (err) + goto err_out; __add_ino_entry(sbi, ino, 0, ORPHAN_INO); @@ -593,6 +590,11 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) return PTR_ERR(inode); } + err = dquot_initialize(inode); + if (err) + goto err_out; + + dquot_initialize(inode); clear_nlink(inode); /* truncate all the data during iput */ @@ -602,14 +604,18 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) /* ENOMEM was fully retried in f2fs_evict_inode. */ if (ni.blk_addr != NULL_ADDR) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x) by kernel, retry mount.", - __func__, ino); - return -EIO; + err = -EIO; + goto err_out; } __remove_ino_entry(sbi, ino, ORPHAN_INO); return 0; + +err_out: + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); + return err; } int recover_orphan_inodes(struct f2fs_sb_info *sbi) @@ -1140,6 +1146,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason & CP_TRIMMED) __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG); if (cpc->reason & CP_UMOUNT) __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); @@ -1166,6 +1174,39 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) spin_unlock_irqrestore(&sbi->cp_lock, flags); } +static void commit_checkpoint(struct f2fs_sb_info *sbi, + void *src, block_t blk_addr) +{ + struct writeback_control wbc = { + .for_reclaim = 0, + }; + + /* + * pagevec_lookup_tag and lock_page again will take + * some extra time. Therefore, update_meta_pages and + * sync_meta_pages are combined in this function. + */ + struct page *page = grab_meta_page(sbi, blk_addr); + int err; + + memcpy(page_address(page), src, PAGE_SIZE); + set_page_dirty(page); + + f2fs_wait_on_page_writeback(page, META, true); + f2fs_bug_on(sbi, PageWriteback(page)); + if (unlikely(!clear_page_dirty_for_io(page))) + f2fs_bug_on(sbi, 1); + + /* writeout cp pack 2 page */ + err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); + f2fs_bug_on(sbi, err); + + f2fs_put_page(page, 0); + + /* submit checkpoint (with barrier if NOBARRIER is not set) */ + f2fs_submit_merged_write(sbi, META_FLUSH); +} + static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1268,16 +1309,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } } - /* need to wait for end_io results */ - wait_on_all_pages_writeback(sbi); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - - /* flush all device cache */ - err = f2fs_flush_device_cache(sbi); - if (err) - return err; - /* write out checkpoint buffer at block 0 */ update_meta_page(sbi, ckpt, start_blk++); @@ -1305,26 +1336,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += NR_CURSEG_NODE_TYPE; } - /* writeout checkpoint block */ - update_meta_page(sbi, ckpt, start_blk); + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + percpu_counter_set(&sbi->alloc_valid_block_count, 0); - /* wait for previous submitted node/meta pages writeback */ + /* Here, we have one bio having CP pack except cp pack 2 page */ + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + + /* wait for previous submitted meta pages writeback */ wait_on_all_pages_writeback(sbi); if (unlikely(f2fs_cp_error(sbi))) return -EIO; - filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX); - filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX); + /* flush all device cache */ + err = f2fs_flush_device_cache(sbi); + if (err) + return err; - /* update user_block_counts */ - sbi->last_valid_block_count = sbi->total_valid_block_count; - percpu_counter_set(&sbi->alloc_valid_block_count, 0); - - /* Here, we only have one bio having CP pack */ - sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO); - - /* wait for previous submitted meta pages writeback */ + /* barrier and flush checkpoint cp pack 2 page if it can */ + commit_checkpoint(sbi, ckpt, start_blk); wait_on_all_pages_writeback(sbi); release_ino_entry(sbi, false); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d5299265feea..ad44712bb902 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -174,15 +174,22 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, */ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, struct writeback_control *wbc, - int npages, bool is_read) + int npages, bool is_read, + enum page_type type, enum temp_type temp) { struct bio *bio; bio = f2fs_bio_alloc(sbi, npages, true); f2fs_target_device(sbi, blk_addr, bio); - bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; - bio->bi_private = is_read ? NULL : sbi; + if (is_read) { + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = NULL; + } else { + bio->bi_end_io = f2fs_write_end_io; + bio->bi_private = sbi; + bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp); + } if (wbc) wbc_init_bio(wbc, bio); @@ -195,13 +202,12 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (!is_read_io(bio_op(bio))) { unsigned int start; - if (f2fs_sb_mounted_blkzoned(sbi->sb) && - current->plug && (type == DATA || type == NODE)) - blk_finish_plug(current->plug); - if (type != DATA && type != NODE) goto submit_io; + if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug) + blk_finish_plug(current->plug); + start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; start %= F2FS_IO_SIZE(sbi); @@ -376,12 +382,13 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; + verify_block_addr(fio, fio->new_blkaddr); trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); /* Allocate a new bio */ bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, - 1, is_read_io(fio->op)); + 1, is_read_io(fio->op), fio->type, fio->temp); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -421,8 +428,8 @@ next: } if (fio->old_blkaddr != NEW_ADDR) - verify_block_addr(sbi, fio->old_blkaddr); - verify_block_addr(sbi, fio->new_blkaddr); + verify_block_addr(fio, fio->old_blkaddr); + verify_block_addr(fio, fio->new_blkaddr); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; @@ -444,7 +451,8 @@ alloc_new: goto out_fail; } io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc, - BIO_MAX_PAGES, false); + BIO_MAX_PAGES, false, + fio->type, fio->temp); io->fio = *fio; } @@ -831,13 +839,6 @@ alloc: return 0; } -static inline bool __force_buffered_io(struct inode *inode, int rw) -{ - return (f2fs_encrypted_file(inode) || - (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || - F2FS_I_SB(inode)->s_ndevs); -} - int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); @@ -868,9 +869,8 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_seg_type = NO_CHECK_TYPE; if (direct_io) { - /* map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint); */ - map.m_seg_type = rw_hint_to_seg_type(WRITE_LIFE_NOT_SET); - flag = __force_buffered_io(inode, WRITE) ? + map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint); + flag = f2fs_force_buffered_io(inode, WRITE) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -1114,6 +1114,31 @@ out: return err; } +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) +{ + struct f2fs_map_blocks map; + block_t last_lblk; + int err; + + if (pos + len > i_size_read(inode)) + return false; + + map.m_lblk = F2FS_BYTES_TO_BLK(pos); + map.m_next_pgofs = NULL; + map.m_next_extent = NULL; + map.m_seg_type = NO_CHECK_TYPE; + last_lblk = F2FS_BLK_ALIGN(pos + len); + + while (map.m_lblk < last_lblk) { + map.m_len = last_lblk - map.m_lblk; + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); + if (err || map.m_len == 0) + return false; + map.m_lblk += map.m_len; + } + return true; +} + static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, pgoff_t *next_pgofs, int seg_type) @@ -1151,8 +1176,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_DEFAULT, NULL, rw_hint_to_seg_type( - WRITE_LIFE_NOT_SET)); - /* inode->i_write_hint)); */ + inode->i_write_hint)); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -2304,15 +2328,18 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t count = iov_iter_count(iter); int rw = iov_iter_rw(iter); int err; + enum rw_hint hint = iocb->ki_hint; + int whint_mode = F2FS_OPTION(sbi).whint_mode; err = check_direct_IO(inode, iter, offset); if (err) return err; - if (__force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, rw)) return 0; if (trace_android_fs_dataread_start_enabled() && @@ -2339,11 +2366,24 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, } trace_f2fs_direct_IO_enter(inode, offset, count, rw); - down_read(&F2FS_I(inode)->dio_rwsem[rw]); + if (rw == WRITE && whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = WRITE_LIFE_NOT_SET; + + if (!down_read_trylock(&F2FS_I(inode)->dio_rwsem[rw])) { + if (iocb->ki_flags & IOCB_NOWAIT) { + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + down_read(&F2FS_I(inode)->dio_rwsem[rw]); + } + err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); up_read(&F2FS_I(inode)->dio_rwsem[rw]); if (rw == WRITE) { + if (whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = hint; if (err > 0) { f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, err); @@ -2352,7 +2392,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, f2fs_write_failed(mapping, offset + count); } } - +out: if (trace_android_fs_dataread_start_enabled() && (iov_iter_rw(iter) == READ)) trace_android_fs_dataread_end(inode, offset, count); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 560b707050ca..41d32171bd52 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -361,6 +361,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, struct page *dpage) { struct page *page; + int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir)); int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { @@ -387,7 +388,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) { + if ((f2fs_encrypted_inode(dir) || dummy_encrypt) && + f2fs_may_encrypt(inode)) { err = fscrypt_inherit_context(dir, inode, page, false); if (err) goto put_error; @@ -396,8 +398,6 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, page = get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) return page; - - set_cold_node(inode, page); } if (new_name) { @@ -704,7 +704,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); - add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT) + add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index ff2352a0ed15..d5a861bf2b42 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -460,7 +460,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, struct rb_node *insert_parent) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct rb_node **p = &et->root.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -706,6 +706,9 @@ void f2fs_drop_extent_tree(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree; + if (!f2fs_may_extent_tree(inode)) + return; + set_inode_flag(inode, FI_NO_EXTENT); write_lock(&et->lock); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 34595ca42f68..e258f6536b56 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -99,9 +99,10 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 -#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) +#define F2FS_OPTION(sbi) ((sbi)->mount_opt) +#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (F2FS_OPTION(sbi).opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (F2FS_OPTION(sbi).opt & F2FS_MOUNT_##option) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -114,7 +115,26 @@ typedef u32 block_t; /* typedef u32 nid_t; struct f2fs_mount_info { - unsigned int opt; + unsigned int opt; + int write_io_size_bits; /* Write IO size bits */ + block_t root_reserved_blocks; /* root reserved blocks */ + kuid_t s_resuid; /* reserved blocks for uid */ + kgid_t s_resgid; /* reserved blocks for gid */ + int active_logs; /* # of active logs */ + int inline_xattr_size; /* inline xattr size */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + struct f2fs_fault_info fault_info; /* For fault injection */ +#endif +#ifdef CONFIG_QUOTA + /* Names of quota files with journalled quota */ + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; /* Format of quota to use */ +#endif + /* For which write hints are passed down to block layer */ + int whint_mode; + int alloc_mode; /* segment allocation policy */ + int fsync_mode; /* fsync policy */ + bool test_dummy_encryption; /* test dummy encryption */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -126,6 +146,8 @@ struct f2fs_mount_info { #define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 #define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_FEATURE_INODE_CRTIME 0x0100 +#define F2FS_FEATURE_LOST_FOUND 0x0200 +#define F2FS_FEATURE_VERITY 0x0400 /* reserved */ #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -509,7 +531,7 @@ static inline void make_dentry_ptr_block(struct inode *inode, d->inode = inode; d->max = NR_DENTRY_IN_BLOCK; d->nr_bitmap = SIZE_OF_DENTRY_BITMAP; - d->bitmap = &t->dentry_bitmap; + d->bitmap = t->dentry_bitmap; d->dentry = t->dentry; d->filename = t->filename; } @@ -635,6 +657,8 @@ enum { #define FADVISE_ENCRYPT_BIT 0x04 #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 +#define FADVISE_HOT_BIT 0x20 +#define FADVISE_VERITY_BIT 0x40 /* reserved */ #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -649,6 +673,9 @@ enum { #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) #define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT) #define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT) +#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) +#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) +#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) #define DEF_DIR_LEVEL 0 @@ -696,6 +723,7 @@ struct f2fs_inode_info { kprojid_t i_projid; /* id for project quota */ int i_inline_xattr_size; /* inline xattr size */ struct timespec i_crtime; /* inode creation time */ + struct timespec i_disk_time[4]; /* inode disk times */ }; static inline void get_extent_info(struct extent_info *ext, @@ -802,7 +830,7 @@ struct f2fs_nm_info { unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */ spinlock_t nid_list_lock; /* protect nid lists ops */ struct mutex build_lock; /* lock for build free nids */ - unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; + unsigned char **free_nid_bitmap; unsigned char *nat_block_bitmap; unsigned short *free_nid_count; /* free nid count of NAT block */ @@ -1035,6 +1063,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ + bool is_meta; /* indicate borrow meta inode mapping or not */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ }; @@ -1096,10 +1125,34 @@ enum { MAX_TIME, }; +enum { + WHINT_MODE_OFF, /* not pass down write hints */ + WHINT_MODE_USER, /* try to pass down hints given by users */ + WHINT_MODE_FS, /* pass down hints with F2FS policy */ +}; + +enum { + ALLOC_MODE_DEFAULT, /* stay default */ + ALLOC_MODE_REUSE, /* reuse segments as much as possible */ +}; + +enum fsync_mode { + FSYNC_MODE_POSIX, /* fsync follows posix semantics */ + FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */ +}; + +#ifdef CONFIG_F2FS_FS_ENCRYPTION +#define DUMMY_ENCRYPTION_ENABLED(sbi) \ + (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) +#else +#define DUMMY_ENCRYPTION_ENABLED(sbi) (0) +#endif + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ + struct rw_semaphore sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ @@ -1119,7 +1172,6 @@ struct f2fs_sb_info { struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; /* bio ordering for NODE/DATA */ - int write_io_size_bits; /* Write IO size bits */ mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ @@ -1169,9 +1221,7 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ - int active_logs; /* # of active logs */ int dir_level; /* directory level */ - int inline_xattr_size; /* inline xattr size */ unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ int readdir_ra; /* readahead inode in readdir */ @@ -1181,9 +1231,6 @@ struct f2fs_sb_info { block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ - block_t root_reserved_blocks; /* root reserved blocks */ - kuid_t s_resuid; /* reserved blocks for uid */ - kgid_t s_resgid; /* reserved blocks for gid */ unsigned int nquota_files; /* # of quota sysfile */ @@ -1268,17 +1315,6 @@ struct f2fs_sb_info { /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; - - /* For fault injection */ -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info fault_info; -#endif - -#ifdef CONFIG_QUOTA - /* Names of quota files with journalled quota */ - char *s_qf_names[MAXQUOTAS]; - int s_jquota_fmt; /* Format of quota to use */ -#endif }; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -1288,7 +1324,7 @@ struct f2fs_sb_info { __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (!ffi->inject_rate) return false; @@ -1645,13 +1681,13 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, return false; if (IS_NOQUOTA(inode)) return true; + if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid())) + return true; + if (!gid_eq(F2FS_OPTION(sbi).s_resgid, GLOBAL_ROOT_GID) && + in_group_p(F2FS_OPTION(sbi).s_resgid)) + return true; if (capable(CAP_SYS_RESOURCE)) return true; - if (uid_eq(sbi->s_resuid, current_fsuid())) - return true; - if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && - in_group_p(sbi->s_resgid)) - return true; return false; } @@ -1686,7 +1722,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, sbi->current_reserved_blocks; if (!__allow_reserved_blocks(sbi, inode)) - avail_user_block_count -= sbi->root_reserved_blocks; + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; @@ -1821,6 +1857,12 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); int offset; + if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { + offset = (flag == SIT_BITMAP) ? + le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0; + return &ckpt->sit_nat_version_bitmap + offset; + } + if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) return &ckpt->sit_nat_version_bitmap; @@ -1887,7 +1929,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, sbi->current_reserved_blocks + 1; if (!__allow_reserved_blocks(sbi, inode)) - valid_block_count += sbi->root_reserved_blocks; + valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); @@ -2489,7 +2531,17 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) } if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) || file_keep_isize(inode) || - i_size_read(inode) & PAGE_MASK) + i_size_read(inode) & ~PAGE_MASK) + return false; + + if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, + &F2FS_I(inode)->i_crtime)) return false; down_read(&F2FS_I(inode)->i_sem); @@ -2499,8 +2551,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) return ret; } -#define sb_rdonly f2fs_readonly -static inline int f2fs_readonly(struct super_block *sb) +static inline bool f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; } @@ -2562,15 +2613,6 @@ static inline void *kvzalloc(size_t size, gfp_t flags) return ret; } -enum rw_hint { - WRITE_LIFE_NOT_SET = 0, - WRITE_LIFE_NONE = 1, /* RWH_WRITE_LIFE_NONE */ - WRITE_LIFE_SHORT = 2, /* RWH_WRITE_LIFE_SHORT */ - WRITE_LIFE_MEDIUM = 3, /* RWH_WRITE_LIFE_MEDIUM */ - WRITE_LIFE_LONG = 4, /* RWH_WRITE_LIFE_LONG */ - WRITE_LIFE_EXTREME = 5, /* RWH_WRITE_LIFE_EXTREME */ -}; - static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { @@ -2679,6 +2721,8 @@ void handle_failed_inode(struct inode *inode); /* * namei.c */ +int update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); /* @@ -2851,6 +2895,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi); int __init create_segment_manager_caches(void); void destroy_segment_manager_caches(void); int rw_hint_to_seg_type(enum rw_hint hint); +enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, + enum temp_type temp); /* * checkpoint.c @@ -2933,6 +2979,7 @@ int f2fs_release_page(struct page *page, gfp_t wait); int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); #endif +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); /* * gc.c @@ -3255,45 +3302,21 @@ static inline bool f2fs_bio_encrypted(struct bio *bio) return bio->bi_private != NULL; } -static inline int f2fs_sb_has_crypto(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); +#define F2FS_FEATURE_FUNCS(name, flagname) \ +static inline int f2fs_sb_has_##name(struct super_block *sb) \ +{ \ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \ } -static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED); -} - -static inline int f2fs_sb_has_extra_attr(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_EXTRA_ATTR); -} - -static inline int f2fs_sb_has_project_quota(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA); -} - -static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); -} - -static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); -} - -static inline int f2fs_sb_has_quota_ino(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO); -} - -static inline int f2fs_sb_has_inode_crtime(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CRTIME); -} +F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); +F2FS_FEATURE_FUNCS(blkzoned, BLKZONED); +F2FS_FEATURE_FUNCS(extra_attr, EXTRA_ATTR); +F2FS_FEATURE_FUNCS(project_quota, PRJQUOTA); +F2FS_FEATURE_FUNCS(inode_chksum, INODE_CHKSUM); +F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); +F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); +F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); +F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, @@ -3313,7 +3336,7 @@ static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) { struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); - return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb); + return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb); } static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) @@ -3342,4 +3365,11 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } +static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) +{ + return (f2fs_encrypted_file(inode) || + (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || + F2FS_I_SB(inode)->s_ndevs); +} + #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 65cda5bc61b7..39c3acb454a3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -166,9 +166,10 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) cp_reason = CP_FASTBOOT_MODE; - else if (sbi->active_logs == 2) + else if (F2FS_OPTION(sbi).active_logs == 2) cp_reason = CP_SPEC_LOG_NUM; - else if (need_dentry_mark(sbi, inode->i_ino) && + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && + need_dentry_mark(sbi, inode->i_ino) && exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; @@ -481,6 +482,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; + + filp->f_mode |= FMODE_NOWAIT; + return dquot_file_open(inode, filp); } @@ -571,7 +575,6 @@ truncate_out: int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; @@ -580,7 +583,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); - free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= sbi->max_file_blocks) goto free_partial; @@ -1354,8 +1357,12 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, } out: - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } out_sem: up_write(&F2FS_I(inode)->i_mmap_sem); @@ -1709,6 +1716,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) inode_lock(inode); + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + if (f2fs_is_volatile_file(inode)) goto err_out; @@ -1727,6 +1736,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } err_out: + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1936,7 +1946,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -1946,7 +1956,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb)) + if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -1957,16 +1967,18 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; - if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) - goto got_it; - err = mnt_want_write_file(filp); if (err) return err; + down_write(&sbi->sb_lock); + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + /* update superblock with uuid */ generate_random_uuid(sbi->raw_super->encrypt_pw_salt); @@ -1974,15 +1986,16 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) { /* undo new data */ memset(sbi->raw_super->encrypt_pw_salt, 0, 16); - mnt_drop_write_file(filp); - return err; + goto out_err; } - mnt_drop_write_file(filp); got_it: if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 16)) - return -EFAULT; - return 0; + err = -EFAULT; +out_err: + up_write(&sbi->sb_lock); + mnt_drop_write_file(filp); + return err; } static int f2fs_ioc_gc(struct file *filp, unsigned long arg) @@ -2043,8 +2056,10 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) return ret; end = range.start + range.len; - if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) - return -EINVAL; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + ret = -EINVAL; + goto out; + } do_more: if (!range.sync) { if (!mutex_trylock(&sbi->gc_mutex)) { @@ -2685,25 +2700,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; - inode_lock(inode); + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } + ret = generic_write_checks(iocb, from); if (ret > 0) { + bool preallocated = false; + size_t target_size = 0; int err; if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - return err; + if ((iocb->ki_flags & IOCB_NOWAIT) && + (iocb->ki_flags & IOCB_DIRECT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, + iov_iter_count(from)) || + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, WRITE)) { + inode_unlock(inode); + return -EAGAIN; + } + + } else { + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + return err; + } } blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); + /* if we couldn't write data, we should deallocate blocks. */ + if (preallocated && i_size_read(inode) < target_size) + f2fs_truncate(inode); + if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d0de3429c26c..54f51a990794 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -76,14 +76,15 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (!mutex_trylock(&sbi->gc_mutex)) - goto next; - if (gc_th->gc_urgent) { wait_ms = gc_th->urgent_sleep_time; + mutex_lock(&sbi->gc_mutex); goto do_gc; } + if (!mutex_trylock(&sbi->gc_mutex)) + goto next; + if (!is_idle(sbi)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); @@ -161,12 +162,17 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) { int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; - if (gc_th && gc_th->gc_idle) { + if (!gc_th) + return gc_mode; + + if (gc_th->gc_idle) { if (gc_th->gc_idle == 1) gc_mode = GC_CB; else if (gc_th->gc_idle == 2) gc_mode = GC_GREEDY; } + if (gc_th->gc_urgent) + gc_mode = GC_GREEDY; return gc_mode; } @@ -188,11 +194,14 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, } /* we need to check every dirty segments in the FG_GC case */ - if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) + if (gc_type != FG_GC && + (sbi->gc_thread && !sbi->gc_thread->gc_urgent) && + p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - /* let's select beginning hot/small space first */ - if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + /* let's select beginning hot/small space first in no_heap mode*/ + if (test_opt(sbi, NOHEAP) && + (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; else p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 10be247ca421..51846fc54fbd 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -284,6 +284,10 @@ static int do_read_inode(struct inode *inode) fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); } + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -439,12 +443,15 @@ void update_inode(struct inode *inode, struct page *node_page) } __set_inode_rdev(inode, ri); - set_cold_node(inode, node_page); /* deleted inode */ if (inode->i_nlink == 0) clear_inline_node(node_page); + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; } void update_inode_page(struct inode *inode) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6bb1adb84324..5ec20f077629 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -78,7 +78,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_inode_flag(inode, FI_NEW_INODE); /* If the directory encrypted, then we should encrypt the inode. */ - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) + if ((f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) && + f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); if (f2fs_sb_has_extra_attr(sbi->sb)) { @@ -97,7 +98,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); if (f2fs_has_inline_xattr(inode)) - xattr_size = sbi->inline_xattr_size; + xattr_size = F2FS_OPTION(sbi).inline_xattr_size; /* Otherwise, will be 0 */ } else if (f2fs_has_inline_xattr(inode) || f2fs_has_inline_dentry(inode)) { @@ -142,7 +143,7 @@ fail_drop: return ERR_PTR(err); } -static int is_multimedia_file(const unsigned char *s, const char *sub) +static int is_extension_exist(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); @@ -168,19 +169,94 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) /* * Set multimedia files as cold files for hot/cold data separation */ -static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, +static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { - int i; - __u8 (*extlist)[8] = sbi->raw_super->extension_list; + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int i, cold_count, hot_count; - int count = le32_to_cpu(sbi->raw_super->extension_count); - for (i = 0; i < count; i++) { - if (is_multimedia_file(name, extlist[i])) { + down_read(&sbi->sb_lock); + + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; + + for (i = 0; i < cold_count + hot_count; i++) { + if (!is_extension_exist(name, extlist[i])) + continue; + if (i < cold_count) file_set_cold(inode); - break; - } + else + file_set_hot(inode); + break; } + + up_read(&sbi->sb_lock); +} + +int update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int total_count = cold_count + hot_count; + int start, count; + int i; + + if (set) { + if (total_count == F2FS_MAX_EXTENSION) + return -EINVAL; + } else { + if (!hot && !cold_count) + return -EINVAL; + if (hot && !hot_count) + return -EINVAL; + } + + if (hot) { + start = cold_count; + count = total_count; + } else { + start = 0; + count = cold_count; + } + + for (i = start; i < count; i++) { + if (strcmp(name, extlist[i])) + continue; + + if (set) + return -EINVAL; + + memcpy(extlist[i], extlist[i + 1], + F2FS_EXTENSION_LEN * (total_count - i - 1)); + memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN); + if (hot) + sbi->raw_super->hot_ext_count = hot_count - 1; + else + sbi->raw_super->extension_count = + cpu_to_le32(cold_count - 1); + return 0; + } + + if (!set) + return -EINVAL; + + if (hot) { + strncpy(extlist[count], name, strlen(name)); + sbi->raw_super->hot_ext_count = hot_count + 1; + } else { + char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; + + memcpy(buf, &extlist[cold_count], + F2FS_EXTENSION_LEN * hot_count); + memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); + strncpy(extlist[cold_count], name, strlen(name)); + memcpy(&extlist[cold_count + 1], buf, + F2FS_EXTENSION_LEN * hot_count); + sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); + } + return 0; } static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, @@ -203,7 +279,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return PTR_ERR(inode); if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_cold_files(sbi, inode, dentry->d_name.name); + set_file_temperature(sbi, inode, dentry->d_name.name); inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; @@ -478,27 +554,16 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; size_t len = strlen(symname); - struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1); - struct fscrypt_symlink_data *sd = NULL; + struct fscrypt_str disk_link; int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if (f2fs_encrypted_inode(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) - return err; - - if (!fscrypt_has_encryption_key(dir)) - return -ENOKEY; - - disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + - sizeof(struct fscrypt_symlink_data)); - } - - if (disk_link.len > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; + err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, + &disk_link); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -508,7 +573,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) return PTR_ERR(inode); - if (f2fs_encrypted_inode(inode)) + if (IS_ENCRYPTED(inode)) inode->i_op = &f2fs_encrypted_symlink_inode_operations; else inode->i_op = &f2fs_symlink_inode_operations; @@ -518,38 +583,13 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) - goto out; + goto out_handle_failed_inode; f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); - if (f2fs_encrypted_inode(inode)) { - struct qstr istr = QSTR_INIT(symname, len); - struct fscrypt_str ostr; - - sd = f2fs_kzalloc(sbi, disk_link.len, GFP_NOFS); - if (!sd) { - err = -ENOMEM; - goto err_out; - } - - err = fscrypt_get_encryption_info(inode); - if (err) - goto err_out; - - if (!fscrypt_has_encryption_key(inode)) { - err = -ENOKEY; - goto err_out; - } - - ostr.name = sd->encrypted_path; - ostr.len = disk_link.len; - err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); - if (err) - goto err_out; - - sd->len = cpu_to_le16(ostr.len); - disk_link.name = (char *)sd; - } + err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); + if (err) + goto err_out; err = page_symlink(inode, disk_link.name, disk_link.len); @@ -576,12 +616,14 @@ err_out: f2fs_unlink(dir, dentry); } - kfree(sd); - f2fs_balance_fs(sbi, true); - return err; -out: + goto out_free_encrypted_link; + +out_handle_failed_inode: handle_failed_inode(inode); +out_free_encrypted_link: + if (disk_link.name != (unsigned char *)symname) + kfree(disk_link.name); return err; } @@ -743,10 +785,12 @@ out: static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { - if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if (f2fs_encrypted_inode(dir)) { + if (f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) { int err = fscrypt_get_encryption_info(dir); if (err) return err; @@ -926,7 +970,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_put_page(old_dir_page, 0); f2fs_i_links_write(old_dir, false); } - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); f2fs_unlock_op(sbi); @@ -1076,8 +1121,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_mark_inode_dirty_sync(new_dir, false); - add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { + add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + } f2fs_unlock_op(sbi); @@ -1127,65 +1174,21 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cookie) { - struct page *cpage = NULL; - char *caddr, *paddr = NULL; - struct fscrypt_str cstr = FSTR_INIT(NULL, 0); - struct fscrypt_str pstr = FSTR_INIT(NULL, 0); - struct fscrypt_symlink_data *sd; struct inode *inode = d_inode(dentry); - u32 max_size = inode->i_sb->s_blocksize; - int res; + struct page *page; + void *target; - res = fscrypt_get_encryption_info(inode); - if (res) - return ERR_PTR(res); + if (!dentry) + return ERR_PTR(-ECHILD); - cpage = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(cpage)) - return ERR_CAST(cpage); - caddr = page_address(cpage); + page = read_mapping_page(inode->i_mapping, 0, NULL); + if (IS_ERR(page)) + return ERR_CAST(page); - /* Symlink is encrypted */ - sd = (struct fscrypt_symlink_data *)caddr; - cstr.name = sd->encrypted_path; - cstr.len = le16_to_cpu(sd->len); - - /* this is broken symlink case */ - if (unlikely(cstr.len == 0)) { - res = -ENOENT; - goto errout; - } - - if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) { - /* Symlink data on the disk is corrupted */ - res = -EIO; - goto errout; - } - res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); - if (res) - goto errout; - - res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (res) - goto errout; - - /* this is broken symlink case */ - if (unlikely(pstr.name[0] == 0)) { - res = -ENOENT; - goto errout; - } - - paddr = pstr.name; - - /* Null-terminate the name */ - paddr[pstr.len] = '\0'; - - put_page(cpage); - return *cookie = paddr; -errout: - fscrypt_fname_free_buffer(&pstr); - put_page(cpage); - return ERR_PTR(res); + target = fscrypt_get_symlink(inode, page_address(page), + inode->i_sb->s_blocksize); + put_page(page); + return *cookie = target; } const struct inode_operations f2fs_encrypted_symlink_inode_operations = { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c294d0feea08..157d768c7b31 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -193,8 +193,8 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) __free_nat_entry(e); } -static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) +static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) { nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; @@ -209,15 +209,36 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, head->entry_cnt = 0; f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } + return head; +} + +static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) +{ + struct nat_entry_set *head; + bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; + + if (!new_ne) + head = __grab_nat_entry_set(nm_i, ne); + + /* + * update entry_cnt in below condition: + * 1. update NEW_ADDR to valid block address; + * 2. update old block address to new one; + */ + if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) || + !get_nat_flag(ne, IS_DIRTY))) + head->entry_cnt++; + + set_nat_flag(ne, IS_PREALLOC, new_ne); if (get_nat_flag(ne, IS_DIRTY)) goto refresh_list; nm_i->dirty_nat_cnt++; - head->entry_cnt++; set_nat_flag(ne, IS_DIRTY, true); refresh_list: - if (nat_get_blkaddr(ne) == NEW_ADDR) + if (new_ne) list_del_init(&ne->list); else list_move_tail(&ne->list, &head->entry_list); @@ -1076,7 +1097,7 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) f2fs_wait_on_page_writeback(page, NODE, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(dn->inode, page); + set_cold_node(page, S_ISDIR(dn->inode->i_mode)); if (!PageUptodate(page)) SetPageUptodate(page); if (set_page_dirty(page)) @@ -2313,6 +2334,7 @@ retry: if (!PageUptodate(ipage)) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); + set_cold_node(page, false); src = F2FS_INODE(page); dst = F2FS_INODE(ipage); @@ -2602,8 +2624,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (!enabled_nat_bits(sbi, NULL)) return 0; - nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 + - F2FS_BLKSIZE - 1); + nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kzalloc(sbi, nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) @@ -2729,12 +2750,20 @@ static int init_node_manager(struct f2fs_sb_info *sbi) static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); + int i; - nm_i->free_nid_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks * - NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); + nm_i->free_nid_bitmap = f2fs_kzalloc(sbi, nm_i->nat_blocks * + sizeof(unsigned char *), GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; + for (i = 0; i < nm_i->nat_blocks; i++) { + nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi, + NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL); + if (!nm_i->free_nid_bitmap) + return -ENOMEM; + } + nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8, GFP_KERNEL); if (!nm_i->nat_block_bitmap) @@ -2825,7 +2854,13 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); - kvfree(nm_i->free_nid_bitmap); + if (nm_i->free_nid_bitmap) { + int i; + + for (i = 0; i < nm_i->nat_blocks; i++) + kvfree(nm_i->free_nid_bitmap[i]); + kfree(nm_i->free_nid_bitmap); + } kvfree(nm_i->free_nid_count); kfree(nm_i->nat_bitmap); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 081ef0d672bf..b95e49e4a928 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -44,6 +44,7 @@ enum { HAS_FSYNCED_INODE, /* is the inode fsynced before? */ HAS_LAST_FSYNC, /* has the latest node fsync mark? */ IS_DIRTY, /* this nat entry is dirty? */ + IS_PREALLOC, /* nat entry is preallocated */ }; /* @@ -422,12 +423,12 @@ static inline void clear_inline_node(struct page *page) ClearPageChecked(page); } -static inline void set_cold_node(struct inode *inode, struct page *page) +static inline void set_cold_node(struct page *page, bool is_dir) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); - if (S_ISDIR(inode->i_mode)) + if (is_dir) flag &= ~(0x1 << COLD_BIT_SHIFT); else flag |= (0x1 << COLD_BIT_SHIFT); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 210de28c9cd2..4ddc2262baf1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -242,6 +242,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct curseg_info *curseg; struct page *page = NULL; block_t blkaddr; + unsigned int loop_cnt = 0; + unsigned int free_blocks = sbi->user_block_count - + valid_user_blocks(sbi); int err = 0; /* get node pages in the current segment */ @@ -294,6 +297,17 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (IS_INODE(page) && is_dent_dnode(page)) entry->last_dentry = blkaddr; next: + /* sanity check in order to detect looped node chain */ + if (++loop_cnt >= free_blocks || + blkaddr == next_blkaddr_of_node(page)) { + f2fs_msg(sbi->sb, KERN_NOTICE, + "%s: detect looped node chain, " + "blkaddr:%u, next:%u", + __func__, blkaddr, next_blkaddr_of_node(page)); + err = -EINVAL; + break; + } + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bf98f6f34b7e..d7bac60ad719 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1491,12 +1491,11 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; - if (dcc->discard_wake) { + if (dcc->discard_wake) dcc->discard_wake = 0; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - init_discard_policy(&dpolicy, - DPOLICY_FORCE, 1); - } + + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + init_discard_policy(&dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); @@ -1565,7 +1564,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sbi->sb) && + if (f2fs_sb_has_blkzoned(sbi->sb) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif @@ -1763,7 +1762,7 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_mounted_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi->sb) || (force && len < cpc->trim_minlen)) goto skip; @@ -1807,7 +1806,7 @@ void init_discard_policy(struct discard_policy *dpolicy, } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; + dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { @@ -1943,7 +1942,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (se->type == CURSEG_WARM_NODE) { + if (IS_NODESEG(se->type)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } @@ -2244,11 +2243,17 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (sbi->segs_per_sec != 1) return CURSEG_I(sbi, type)->segno; - if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + if (test_opt(sbi, NOHEAP) && + (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) return SIT_I(sbi)->last_victim[ALLOC_NEXT]; + + /* find segments from 0 to reuse freed segments */ + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) + return 0; + return CURSEG_I(sbi, type)->segno; } @@ -2535,6 +2540,101 @@ int rw_hint_to_seg_type(enum rw_hint hint) } } +/* This returns write hints for each segment type. This hints will be + * passed down to block layer. There are mapping tables which depend on + * the mount option 'whint_mode'. + * + * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. + * + * 2) whint_mode=user-based. F2FS tries to pass down hints given by users. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_NOT_SET + * HOT_NODE " + * WARM_NODE " + * COLD_NODE " + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + * + * 3) whint_mode=fs-based. F2FS passes down hints with its policy. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_MEDIUM; + * HOT_NODE WRITE_LIFE_NOT_SET + * WARM_NODE " + * COLD_NODE WRITE_LIFE_NONE + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + */ + +enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) +{ + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_NOT_SET; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else { + return WRITE_LIFE_NOT_SET; + } + } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_LONG; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else if (type == NODE) { + if (temp == WARM || temp == HOT) + return WRITE_LIFE_NOT_SET; + else if (temp == COLD) + return WRITE_LIFE_NONE; + } else if (type == META) { + return WRITE_LIFE_MEDIUM; + } + } + return WRITE_LIFE_NOT_SET; +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -2567,7 +2667,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - if (is_inode_flag_set(inode, FI_HOT_DATA)) + if (file_is_hot(inode) || + is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; /* rw_hint_to_seg_type(inode->i_write_hint); */ return CURSEG_WARM_DATA; @@ -2583,7 +2684,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) { int type = 0; - switch (fio->sbi->active_logs) { + switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: type = __get_segment_type_2(fio); break; @@ -2723,6 +2824,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = META, + .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_META | REQ_PRIO, .old_blkaddr = page->index, @@ -2769,8 +2871,15 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) int rewrite_data_page(struct f2fs_io_info *fio) { int err; + struct f2fs_sb_info *sbi = fio->sbi; fio->new_blkaddr = fio->old_blkaddr; + /* i/o temperature is needed for passing down write hints */ + __get_segment_type(fio); + + f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi, + GET_SEGNO(sbi, fio->new_blkaddr))->type)); + stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5d6d3e72be31..96a2d57ba8a4 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -53,13 +53,19 @@ ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ (sbi)->segs_per_sec)) \ -#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) -#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) +#define MAIN_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) +#define SEG0_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr)) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) #define MAIN_SECS(sbi) ((sbi)->total_sections) -#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) +#define TOTAL_SEGS(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->segment_count : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) @@ -596,6 +602,8 @@ static inline int utilization(struct f2fs_sb_info *sbi) #define DEF_MIN_FSYNC_BLOCKS 8 #define DEF_MIN_HOT_BLOCKS 16 +#define SMALL_VOLUME_SEGMENTS (16 * 512) /* 16GB */ + enum { F2FS_IPU_FORCE, F2FS_IPU_SSR, @@ -630,10 +638,17 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) { - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) - || blk_addr >= MAX_BLKADDR(sbi)); + struct f2fs_sb_info *sbi = fio->sbi; + + if (PAGE_TYPE_OF_BIO(fio->type) == META && + (!is_read_io(fio->op) || fio->is_meta)) + BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || + blk_addr >= MAIN_BLKADDR(sbi)); + else + BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || + blk_addr >= MAX_BLKADDR(sbi)); } /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 872f9c1078f0..a622eb4f59f2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -60,7 +60,7 @@ char *fault_name[FAULT_MAX] = { static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (rate) { atomic_set(&ffi->inject_ops, 0); @@ -129,6 +129,10 @@ enum { Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, + Opt_whint, + Opt_alloc, + Opt_fsync, + Opt_test_dummy_encryption, Opt_err, }; @@ -182,6 +186,10 @@ static match_table_t f2fs_tokens = { {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, + {Opt_whint, "whint_mode=%s"}, + {Opt_alloc, "alloc_mode=%s"}, + {Opt_fsync, "fsync_mode=%s"}, + {Opt_test_dummy_encryption, "test_dummy_encryption"}, {Opt_err, NULL}, }; @@ -202,21 +210,24 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) block_t limit = (sbi->user_block_count << 1) / 1000; /* limit is 0.2% */ - if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) { - sbi->root_reserved_blocks = limit; + if (test_opt(sbi, RESERVE_ROOT) && + F2FS_OPTION(sbi).root_reserved_blocks > limit) { + F2FS_OPTION(sbi).root_reserved_blocks = limit; f2fs_msg(sbi->sb, KERN_INFO, "Reduce reserved blocks for root = %u", - sbi->root_reserved_blocks); + F2FS_OPTION(sbi).root_reserved_blocks); } if (!test_opt(sbi, RESERVE_ROOT) && - (!uid_eq(sbi->s_resuid, + (!uid_eq(F2FS_OPTION(sbi).s_resuid, make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || - !gid_eq(sbi->s_resgid, + !gid_eq(F2FS_OPTION(sbi).s_resgid, make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) f2fs_msg(sbi->sb, KERN_INFO, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", - from_kuid_munged(&init_user_ns, sbi->s_resuid), - from_kgid_munged(&init_user_ns, sbi->s_resgid)); + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); } static void init_once(void *foo) @@ -236,7 +247,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, char *qname; int ret = -EINVAL; - if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled " "quota options when quota turned on"); @@ -254,8 +265,8 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "Not enough memory for storing quotafile name"); return -EINVAL; } - if (sbi->s_qf_names[qtype]) { - if (strcmp(sbi->s_qf_names[qtype], qname) == 0) + if (F2FS_OPTION(sbi).s_qf_names[qtype]) { + if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) ret = 0; else f2fs_msg(sb, KERN_ERR, @@ -268,7 +279,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quotafile must be on filesystem root"); goto errout; } - sbi->s_qf_names[qtype] = qname; + F2FS_OPTION(sbi).s_qf_names[qtype] = qname; set_opt(sbi, QUOTA); return 0; errout: @@ -280,13 +291,13 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" " when quota turned on"); return -EINVAL; } - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -302,15 +313,19 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) "Cannot enable project quota enforcement."); return -1; } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA] || - sbi->s_qf_names[PRJQUOTA]) { - if (test_opt(sbi, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) { + if (test_opt(sbi, USRQUOTA) && + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) clear_opt(sbi, USRQUOTA); - if (test_opt(sbi, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sbi, GRPQUOTA) && + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) clear_opt(sbi, GRPQUOTA); - if (test_opt(sbi, PRJQUOTA) && sbi->s_qf_names[PRJQUOTA]) + if (test_opt(sbi, PRJQUOTA) && + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) clear_opt(sbi, PRJQUOTA); if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || @@ -320,19 +335,19 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) return -1; } - if (!sbi->s_jquota_fmt) { + if (!F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " "not specified"); return -1; } } - if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) { + if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_INFO, "QUOTA feature is enabled, so ignore jquota_fmt"); - sbi->s_jquota_fmt = 0; + F2FS_OPTION(sbi).s_jquota_fmt = 0; } - if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) { f2fs_msg(sbi->sb, KERN_INFO, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); @@ -403,14 +418,14 @@ static int parse_options(struct super_block *sb, char *options) q = bdev_get_queue(sb->s_bdev); if (blk_queue_discard(q)) { set_opt(sbi, DISCARD); - } else if (!f2fs_sb_mounted_blkzoned(sb)) { + } else if (!f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "mounting with \"discard\" option, but " "the device does not support discard"); } break; case Opt_nodiscard: - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "discard is required for zoned block devices"); return -EINVAL; @@ -440,7 +455,7 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; set_opt(sbi, INLINE_XATTR_SIZE); - sbi->inline_xattr_size = arg; + F2FS_OPTION(sbi).inline_xattr_size = arg; break; #else case Opt_user_xattr: @@ -480,7 +495,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) return -EINVAL; - sbi->active_logs = arg; + F2FS_OPTION(sbi).active_logs = arg; break; case Opt_disable_ext_identify: set_opt(sbi, DISABLE_EXT_IDENTIFY); @@ -524,9 +539,9 @@ static int parse_options(struct super_block *sb, char *options) if (test_opt(sbi, RESERVE_ROOT)) { f2fs_msg(sb, KERN_INFO, "Preserve previous reserve_root=%u", - sbi->root_reserved_blocks); + F2FS_OPTION(sbi).root_reserved_blocks); } else { - sbi->root_reserved_blocks = arg; + F2FS_OPTION(sbi).root_reserved_blocks = arg; set_opt(sbi, RESERVE_ROOT); } break; @@ -539,7 +554,7 @@ static int parse_options(struct super_block *sb, char *options) "Invalid uid value %d", arg); return -EINVAL; } - sbi->s_resuid = uid; + F2FS_OPTION(sbi).s_resuid = uid; break; case Opt_resgid: if (args->from && match_int(args, &arg)) @@ -550,7 +565,7 @@ static int parse_options(struct super_block *sb, char *options) "Invalid gid value %d", arg); return -EINVAL; } - sbi->s_resgid = gid; + F2FS_OPTION(sbi).s_resgid = gid; break; case Opt_mode: name = match_strdup(&args[0]); @@ -559,7 +574,7 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "adaptive mode is not allowed with " "zoned block device feature"); @@ -585,7 +600,7 @@ static int parse_options(struct super_block *sb, char *options) 1 << arg, BIO_MAX_PAGES); return -EINVAL; } - sbi->write_io_size_bits = arg; + F2FS_OPTION(sbi).write_io_size_bits = arg; break; case Opt_fault_injection: if (args->from && match_int(args, &arg)) @@ -646,13 +661,13 @@ static int parse_options(struct super_block *sb, char *options) return ret; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD; break; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0; break; case Opt_jqfmt_vfsv1: - sbi->s_jquota_fmt = QFMT_VFS_V1; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1; break; case Opt_noquota: clear_opt(sbi, QUOTA); @@ -679,6 +694,73 @@ static int parse_options(struct super_block *sb, char *options) "quota operations not supported"); break; #endif + case Opt_whint: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 10 && + !strncmp(name, "user-based", 10)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER; + } else if (strlen(name) == 3 && + !strncmp(name, "off", 3)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + } else if (strlen(name) == 8 && + !strncmp(name, "fs-based", 8)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_alloc: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + if (strlen(name) == 7 && + !strncmp(name, "default", 7)) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + } else if (strlen(name) == 5 && + !strncmp(name, "reuse", 5)) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_fsync: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 5 && + !strncmp(name, "posix", 5)) { + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; + } else if (strlen(name) == 6 && + !strncmp(name, "strict", 6)) { + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_test_dummy_encryption: +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (!f2fs_sb_has_encrypt(sb)) { + f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); + return -EINVAL; + } + + F2FS_OPTION(sbi).test_dummy_encryption = true; + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mode enabled"); +#else + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mount option ignored"); +#endif + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -699,14 +781,22 @@ static int parse_options(struct super_block *sb, char *options) } if (test_opt(sbi, INLINE_XATTR_SIZE)) { + if (!f2fs_sb_has_extra_attr(sb) || + !f2fs_sb_has_flexible_inline_xattr(sb)) { + f2fs_msg(sb, KERN_ERR, + "extra_attr or flexible_inline_xattr " + "feature is off"); + return -EINVAL; + } if (!test_opt(sbi, INLINE_XATTR)) { f2fs_msg(sb, KERN_ERR, "inline_xattr_size option should be " "set with inline_xattr option"); return -EINVAL; } - if (!sbi->inline_xattr_size || - sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE - + if (!F2FS_OPTION(sbi).inline_xattr_size || + F2FS_OPTION(sbi).inline_xattr_size >= + DEF_ADDRS_PER_INODE - F2FS_TOTAL_EXTRA_ATTR_SIZE - DEF_INLINE_RESERVED_SIZE - DEF_MIN_INLINE_SIZE) { @@ -715,6 +805,12 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; } } + + /* Not pass down write hints if the number of active logs is lesser + * than NR_CURSEG_TYPE. + */ + if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE) + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; return 0; } @@ -731,7 +827,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; - fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); @@ -743,10 +838,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_rwsem(&fi->i_mmap_sem); init_rwsem(&fi->i_xattr_sem); -#ifdef CONFIG_QUOTA - memset(&fi->i_dquot, 0, sizeof(fi->i_dquot)); - fi->i_reserved_quota = 0; -#endif /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; @@ -957,7 +1048,7 @@ static void f2fs_put_super(struct super_block *sb) mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) @@ -1071,8 +1162,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; - if (buf->f_bfree > sbi->root_reserved_blocks) - buf->f_bavail = buf->f_bfree - sbi->root_reserved_blocks; + if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) + buf->f_bavail = buf->f_bfree - + F2FS_OPTION(sbi).root_reserved_blocks; else buf->f_bavail = 0; @@ -1107,10 +1199,10 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, #ifdef CONFIG_QUOTA struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sbi->s_jquota_fmt) { + if (F2FS_OPTION(sbi).s_jquota_fmt) { char *fmtname = ""; - switch (sbi->s_jquota_fmt) { + switch (F2FS_OPTION(sbi).s_jquota_fmt) { case QFMT_VFS_OLD: fmtname = "vfsold"; break; @@ -1124,14 +1216,17 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, seq_printf(seq, ",jqfmt=%s", fmtname); } - if (sbi->s_qf_names[USRQUOTA]) - seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) + seq_show_option(seq, "usrjquota", + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]); - if (sbi->s_qf_names[GRPQUOTA]) - seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) + seq_show_option(seq, "grpjquota", + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]); - if (sbi->s_qf_names[PRJQUOTA]) - seq_show_option(seq, "prjjquota", sbi->s_qf_names[PRJQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + seq_show_option(seq, "prjjquota", + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]); #endif } @@ -1166,7 +1261,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noinline_xattr"); if (test_opt(sbi, INLINE_XATTR_SIZE)) seq_printf(seq, ",inline_xattr_size=%u", - sbi->inline_xattr_size); + F2FS_OPTION(sbi).inline_xattr_size); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -1202,18 +1297,20 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, "adaptive"); else if (test_opt(sbi, LFS)) seq_puts(seq, "lfs"); - seq_printf(seq, ",active_logs=%u", sbi->active_logs); + seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", - sbi->root_reserved_blocks, - from_kuid_munged(&init_user_ns, sbi->s_resuid), - from_kgid_munged(&init_user_ns, sbi->s_resgid)); + F2FS_OPTION(sbi).root_reserved_blocks, + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) seq_printf(seq, ",fault_injection=%u", - sbi->fault_info.inject_rate); + F2FS_OPTION(sbi).fault_info.inject_rate); #endif #ifdef CONFIG_QUOTA if (test_opt(sbi, QUOTA)) @@ -1226,15 +1323,37 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",prjquota"); #endif f2fs_show_quota_options(seq, sbi->sb); + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) + seq_printf(seq, ",whint_mode=%s", "user-based"); + else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) + seq_printf(seq, ",whint_mode=%s", "fs-based"); +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_OPTION(sbi).test_dummy_encryption) + seq_puts(seq, ",test_dummy_encryption"); +#endif + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT) + seq_printf(seq, ",alloc_mode=%s", "default"); + else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) + seq_printf(seq, ",alloc_mode=%s", "reuse"); + + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) + seq_printf(seq, ",fsync_mode=%s", "posix"); + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + seq_printf(seq, ",fsync_mode=%s", "strict"); return 0; } static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ - sbi->active_logs = NR_CURSEG_TYPE; - sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE; + F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; + F2FS_OPTION(sbi).test_dummy_encryption = false; + sbi->readdir_ra = 1; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1244,7 +1363,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - if (f2fs_sb_mounted_blkzoned(sbi->sb)) { + if (f2fs_sb_has_blkzoned(sbi->sb)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); set_opt(sbi, DISCARD); } else { @@ -1271,16 +1390,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; unsigned long old_sb_flags; - int err, active_logs; + int err; bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info ffi = sbi->fault_info; -#endif #ifdef CONFIG_QUOTA - int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; int i, j; #endif @@ -1290,21 +1404,21 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ org_mount_opt = sbi->mount_opt; old_sb_flags = sb->s_flags; - active_logs = sbi->active_logs; #ifdef CONFIG_QUOTA - s_jquota_fmt = sbi->s_jquota_fmt; + org_mount_opt.s_jquota_fmt = F2FS_OPTION(sbi).s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { - s_qf_names[i] = kstrdup(sbi->s_qf_names[i], - GFP_KERNEL); - if (!s_qf_names[i]) { + if (F2FS_OPTION(sbi).s_qf_names[i]) { + org_mount_opt.s_qf_names[i] = + kstrdup(F2FS_OPTION(sbi).s_qf_names[i], + GFP_KERNEL); + if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kfree(s_qf_names[j]); + kfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { - s_qf_names[i] = NULL; + org_mount_opt.s_qf_names[i] = NULL; } } #endif @@ -1374,7 +1488,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & MS_RDONLY) { + if (*flags & MS_RDONLY || + F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) { writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); @@ -1400,7 +1515,7 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kfree(s_qf_names[i]); + kfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -1418,18 +1533,14 @@ restore_gc: } restore_opts: #ifdef CONFIG_QUOTA - sbi->s_jquota_fmt = s_jquota_fmt; + F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = s_qf_names[i]; + kfree(F2FS_OPTION(sbi).s_qf_names[i]); + F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif sbi->mount_opt = org_mount_opt; - sbi->active_logs = active_logs; sb->s_flags = old_sb_flags; -#ifdef CONFIG_F2FS_FAULT_INJECTION - sbi->fault_info = ffi; -#endif return err; } @@ -1551,8 +1662,8 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { - return dquot_quota_on_mount(sbi->sb, sbi->s_qf_names[type], - sbi->s_jquota_fmt, type); + return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type], + F2FS_OPTION(sbi).s_jquota_fmt, type); } int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) @@ -1571,7 +1682,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) } for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { + if (F2FS_OPTION(sbi).s_qf_names[i]) { err = f2fs_quota_on_mount(sbi, i); if (!err) { enabled = 1; @@ -1801,11 +1912,28 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len) static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* + * Encrypting the root directory is not allowed because fsck + * expects lost+found directory to exist and remain unencrypted + * if LOST_FOUND feature is enabled. + * + */ + if (f2fs_sb_has_lost_found(sbi->sb) && + inode->i_ino == F2FS_ROOT_INO(sbi)) + return -EPERM; + return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len, fs_data, XATTR_CREATE); } +static bool f2fs_dummy_context(struct inode *inode) +{ + return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); +} + static unsigned f2fs_max_namelen(struct inode *inode) { return S_ISLNK(inode->i_mode) ? @@ -1816,6 +1944,7 @@ static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, + .dummy_context = f2fs_dummy_context, .empty_dir = f2fs_empty_dir, .max_namelen = f2fs_max_namelen, }; @@ -1898,7 +2027,6 @@ static int __f2fs_commit_super(struct buffer_head *bh, lock_buffer(bh); if (super) memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_uptodate(bh); set_buffer_dirty(bh); unlock_buffer(bh); @@ -2185,6 +2313,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dirty_device = 0; spin_lock_init(&sbi->dev_lock); + + init_rwsem(&sbi->sb_lock); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -2210,7 +2340,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) unsigned int n = 0; int err = -EIO; - if (!f2fs_sb_mounted_blkzoned(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi->sb)) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != @@ -2338,7 +2468,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* write back-up superblock first */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1); + bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); @@ -2349,7 +2479,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return err; /* write current valid superblock */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block); + bh = sb_bread(sbi->sb, sbi->valid_super_block); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); @@ -2421,7 +2551,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_mounted_blkzoned(sbi->sb)) { + !f2fs_sb_has_blkzoned(sbi->sb)) { f2fs_msg(sbi->sb, KERN_ERR, "Zoned block device feature not enabled\n"); return -EINVAL; @@ -2455,6 +2585,18 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) return 0; } +static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_i = SM_I(sbi); + + /* adjust parameters according to the volume size */ + if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + sm_i->dcc_info->discard_granularity = 1; + sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; + } +} + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -2502,8 +2644,8 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; - sbi->s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); - sbi->s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); + F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); + F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sb)) @@ -2516,7 +2658,7 @@ try_onemore: * devices, but mandatory for host-managed zoned block devices. */ #ifndef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); err = -EOPNOTSUPP; @@ -2732,7 +2874,7 @@ try_onemore: * Turn on quotas which were not enabled for read-only mounts if * filesystem has quota feature, so that they are updated correctly. */ - if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) { + if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) { f2fs_msg(sb, KERN_ERR, @@ -2807,6 +2949,8 @@ skip_recovery: f2fs_join_shrinker(sbi); + f2fs_tuning_parameters(sbi); + f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); @@ -2815,7 +2959,7 @@ skip_recovery: free_meta: #ifdef CONFIG_QUOTA - if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) + if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif f2fs_sync_inode_meta(sbi); @@ -2859,7 +3003,7 @@ free_bio_info: free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif kfree(options); free_sb_buf: diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d978c7b6ea04..f33a56d6e6dd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -58,7 +58,7 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) - return (unsigned char *)&sbi->fault_info; + return (unsigned char *)&F2FS_OPTION(sbi).fault_info; #endif return NULL; } @@ -92,10 +92,10 @@ static ssize_t features_show(struct f2fs_attr *a, if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); - if (f2fs_sb_has_crypto(sb)) + if (f2fs_sb_has_encrypt(sb)) len += snprintf(buf, PAGE_SIZE - len, "%s", "encryption"); - if (f2fs_sb_mounted_blkzoned(sb)) + if (f2fs_sb_has_blkzoned(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sb)) @@ -116,6 +116,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_inode_crtime(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); + if (f2fs_sb_has_lost_found(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "lost_found"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -136,6 +139,27 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + __u8 (*extlist)[F2FS_EXTENSION_LEN] = + sbi->raw_super->extension_list; + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int len = 0, i; + + len += snprintf(buf + len, PAGE_SIZE - len, + "cold file extenstion:\n"); + for (i = 0; i < cold_count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + + len += snprintf(buf + len, PAGE_SIZE - len, + "hot file extenstion:\n"); + for (i = cold_count; i < cold_count + hot_count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + return len; + } + ui = (unsigned int *)(ptr + a->offset); return snprintf(buf, PAGE_SIZE, "%u\n", *ui); @@ -154,6 +178,41 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + const char *name = strim((char *)buf); + bool set = true, hot; + + if (!strncmp(name, "[h]", 3)) + hot = true; + else if (!strncmp(name, "[c]", 3)) + hot = false; + else + return -EINVAL; + + name += 3; + + if (*name == '!') { + name++; + set = false; + } + + if (strlen(name) >= F2FS_EXTENSION_LEN) + return -EINVAL; + + down_write(&sbi->sb_lock); + + ret = update_extension_list(sbi, name, hot, set); + if (ret) + goto out; + + ret = f2fs_commit_super(sbi, false); + if (ret) + update_extension_list(sbi, name, hot, !set); +out: + up_write(&sbi->sb_lock); + return ret ? ret : count; + } + ui = (unsigned int *)(ptr + a->offset); ret = kstrtoul(skip_spaces(buf), 0, &t); @@ -166,7 +225,7 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - sbi->root_reserved_blocks)) { + F2FS_OPTION(sbi).root_reserved_blocks)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } @@ -236,6 +295,7 @@ enum feat_id { FEAT_FLEXIBLE_INLINE_XATTR, FEAT_QUOTA_INO, FEAT_INODE_CRTIME, + FEAT_LOST_FOUND, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -251,6 +311,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_FLEXIBLE_INLINE_XATTR: case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: + case FEAT_LOST_FOUND: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -307,6 +368,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); +F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -329,6 +391,7 @@ F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); +F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -357,6 +420,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), + ATTR_LIST(extension_list), #ifdef CONFIG_F2FS_FAULT_INJECTION ATTR_LIST(inject_rate), ATTR_LIST(inject_type), @@ -383,6 +447,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(flexible_inline_xattr), ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), + ATTR_LIST(lost_found), NULL, }; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 5f31ebd96c06..a2edb0049eb5 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -129,6 +129,8 @@ lockd(void *vrqstp) { int err = 0; struct svc_rqst *rqstp = vrqstp; + struct net *net = &init_net; + struct lockd_net *ln = net_generic(net, lockd_net_id); /* try_to_freeze() is called from svc_recv() */ set_freezable(); @@ -173,6 +175,8 @@ lockd(void *vrqstp) if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); + cancel_delayed_work_sync(&ln->grace_period_end); + locks_end_grace(&ln->lockd_manager); return 0; } @@ -267,8 +271,6 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); - cancel_delayed_work_sync(&ln->grace_period_end); - locks_end_grace(&ln->lockd_manager); svc_shutdown_net(serv, net); dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 54313322ee5b..c8e90152b61b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -461,6 +461,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, goto out_err_free; /* fh */ + rc = -EIO; p = xdr_inline_decode(&stream, 4); if (!p) goto out_err_free; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8ef6f70c9e25..0f397e62de5a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3025,6 +3025,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f .rpc_resp = &res, }; int status; + int i; bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_FH_EXPIRE_TYPE | @@ -3090,8 +3091,13 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->cache_consistency_bitmask[2] = 0; + + /* Avoid a regression due to buggy server */ + for (i = 0; i < ARRAY_SIZE(res.exclcreat_bitmask); i++) + res.exclcreat_bitmask[i] &= res.attr_bitmask[i]; memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask, sizeof(server->exclcreat_bitmask)); + server->acl_bitmask = res.acl_bitmask; server->fh_expire_type = res.fh_expire_type; } @@ -7670,6 +7676,12 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf /* fall through */ case -NFS4ERR_RETRY_UNCACHED_REP: return -EAGAIN; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + nfs4_schedule_session_recovery(clp->cl_session, + task->tk_status); + break; default: nfs4_schedule_lease_recovery(clp); } @@ -7748,7 +7760,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, if (status == 0) status = task->tk_status; rpc_put_task(task); - return 0; out: dprintk("<-- %s status=%d\n", __func__, status); return status; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9a0b219ff74d..83fba40396ae 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1593,13 +1593,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); } -static void nfs4_reclaim_complete(struct nfs_client *clp, +static int nfs4_reclaim_complete(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops, struct rpc_cred *cred) { /* Notify the server we're done reclaiming our state */ if (ops->reclaim_complete) - (void)ops->reclaim_complete(clp, cred); + return ops->reclaim_complete(clp, cred); + return 0; } static void nfs4_clear_reclaim_server(struct nfs_server *server) @@ -1646,13 +1647,16 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) { const struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; + int err; if (!nfs4_state_clear_reclaim_reboot(clp)) return; ops = clp->cl_mvops->reboot_recovery_ops; cred = nfs4_get_clid_cred(clp); - nfs4_reclaim_complete(clp, ops, cred); + err = nfs4_reclaim_complete(clp, ops, cred); put_rpccred(cred); + if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION) + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); } static void nfs_delegation_clear_all(struct nfs_client *clp) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 220b04f04523..985a4cdae06d 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -272,6 +272,16 @@ ssize_t ovl_getxattr(struct dentry *dentry, const char *name, return vfs_getxattr(realpath.dentry, name, value, size); } +static bool ovl_can_list(const char *s) +{ + /* List all non-trusted xatts */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* Never list trusted.overlay, list other trusted for superuser only */ + return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); +} + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct path realpath; @@ -296,7 +306,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return -EIO; len -= slen; - if (ovl_is_private_xattr(s)) { + if (!ovl_can_list(s)) { res -= slen; memmove(s, s + slen, len); } else { diff --git a/fs/proc/base.c b/fs/proc/base.c index 62facaf3971f..072dac45b102 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3090,8 +3090,8 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), @@ -3484,8 +3484,8 @@ static const struct pid_entry tid_base_stuff[] = { ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), diff --git a/fs/proc/uid.c b/fs/proc/uid.c index 616d99b157c3..040591d341f8 100644 --- a/fs/proc/uid.c +++ b/fs/proc/uid.c @@ -286,6 +286,8 @@ static const struct inode_operations proc_uid_inode_operations = { int __init proc_uid_init(void) { proc_uid = proc_mkdir("uid", NULL); + if (!proc_uid) + return -ENOMEM; proc_uid->proc_iops = &proc_uid_inode_operations; proc_uid->proc_fops = &proc_uid_operations; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6d73a04d0150..e7fd9490bcf6 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -56,6 +56,7 @@ struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; unsigned int bi_flags; /* status, command, etc */ + unsigned short bi_write_hint; int bi_error; unsigned long bi_rw; /* bottom bits READ/WRITE, * top bits priority diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 073365c9808a..2ebfa01b7091 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -21,6 +21,7 @@ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ +#define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ @@ -38,10 +39,10 @@ #define F2FS_MAX_QUOTAS 3 -#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ -#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ -#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */ +#define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ +#define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ +#define F2FS_IO_SIZE_BYTES(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */ +#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ #define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) /* This flag is used by node and meta inodes, and by recovery */ @@ -101,7 +102,7 @@ struct f2fs_super_block { __u8 uuid[16]; /* 128-bit uuid for volume */ __le16 volume_name[MAX_VOLUME_NAME]; /* volume name */ __le32 extension_count; /* # of extensions below */ - __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ + __u8 extension_list[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];/* extension array */ __le32 cp_payload; __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ @@ -110,12 +111,14 @@ struct f2fs_super_block { __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ struct f2fs_device devs[MAX_DEVICES]; /* device list */ __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ - __u8 reserved[315]; /* valid reserved region */ + __u8 hot_ext_count; /* # of hot file extension */ + __u8 reserved[314]; /* valid reserved region */ } __packed; /* * For checkpoint */ +#define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 #define CP_NAT_BITS_FLAG 0x00000080 @@ -302,6 +305,10 @@ struct f2fs_node { */ #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) #define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8) +#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \ + ((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \ + BITS_PER_LONG * BITS_PER_LONG) + struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 20b7b1b41630..250f4d1ce9c5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -143,6 +144,9 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) +/* File is capable of returning -EAGAIN if I/O will block */ +#define FMODE_NOWAIT ((__force fmode_t)0x8000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are @@ -326,9 +330,22 @@ struct page; struct address_space; struct writeback_control; +/* + * Write life time hint values. + */ +enum rw_hint { + WRITE_LIFE_NOT_SET = 0, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +}; + #define IOCB_EVENTFD (1 << 0) #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) +#define IOCB_NOWAIT (1 << 7) struct kiocb { struct file *ki_filp; @@ -336,6 +353,7 @@ struct kiocb { void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); void *private; int ki_flags; + enum rw_hint ki_hint; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) @@ -635,6 +653,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; unsigned int i_blkbits; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED @@ -1073,8 +1092,6 @@ struct file_lock_context { #define OFFT_OFFSET_MAX INT_LIMIT(off_t) #endif -#include - extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8641e56b8f8a..9e535af579e8 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -13,42 +13,13 @@ #ifndef _LINUX_FSCRYPT_H #define _LINUX_FSCRYPT_H -#include #include -#include -#include -#include -#include -#include #define FS_CRYPTO_BLOCK_SIZE 16 +struct fscrypt_ctx; struct fscrypt_info; -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct fscrypt_symlink_data { - __le16 len; - char encrypted_path[1]; -} __packed; - struct fscrypt_str { unsigned char *name; u32 len; @@ -67,86 +38,11 @@ struct fscrypt_name { #define fname_name(p) ((p)->disk_name.name) #define fname_len(p) ((p)->disk_name.len) -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto opertions for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*set_context)(struct inode *, const void *, size_t, void *); - bool (*dummy_context)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned (*max_namelen)(struct inode *); -}; - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - if (inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode)) - return true; - return false; -} - -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && - filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) - return true; - - if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) - return true; - - return false; -} - -static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -{ - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; -} - #if __FS_HAS_ENCRYPTION - -static inline struct page *fscrypt_control_page(struct page *page) -{ - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -} - -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return (inode->i_crypt_info != NULL); -} - #include - -#else /* !__FS_HAS_ENCRYPTION */ - -static inline struct page *fscrypt_control_page(struct page *page) -{ - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -} - -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return 0; -} - +#else #include -#endif /* __FS_HAS_ENCRYPTION */ +#endif /** * fscrypt_require_key - require an inode's encryption key @@ -287,4 +183,68 @@ static inline int fscrypt_prepare_setattr(struct dentry *dentry, return 0; } +/** + * fscrypt_prepare_symlink - prepare to create a possibly-encrypted symlink + * @dir: directory in which the symlink is being created + * @target: plaintext symlink target + * @len: length of @target excluding null terminator + * @max_len: space the filesystem has available to store the symlink target + * @disk_link: (out) the on-disk symlink target being prepared + * + * This function computes the size the symlink target will require on-disk, + * stores it in @disk_link->len, and validates it against @max_len. An + * encrypted symlink may be longer than the original. + * + * Additionally, @disk_link->name is set to @target if the symlink will be + * unencrypted, but left NULL if the symlink will be encrypted. For encrypted + * symlinks, the filesystem must call fscrypt_encrypt_symlink() to create the + * on-disk target later. (The reason for the two-step process is that some + * filesystems need to know the size of the symlink target before creating the + * inode, e.g. to determine whether it will be a "fast" or "slow" symlink.) + * + * Return: 0 on success, -ENAMETOOLONG if the symlink target is too long, + * -ENOKEY if the encryption key is missing, or another -errno code if a problem + * occurred while setting up the encryption key. + */ +static inline int fscrypt_prepare_symlink(struct inode *dir, + const char *target, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + if (IS_ENCRYPTED(dir) || fscrypt_dummy_context_enabled(dir)) + return __fscrypt_prepare_symlink(dir, len, max_len, disk_link); + + disk_link->name = (unsigned char *)target; + disk_link->len = len + 1; + if (disk_link->len > max_len) + return -ENAMETOOLONG; + return 0; +} + +/** + * fscrypt_encrypt_symlink - encrypt the symlink target if needed + * @inode: symlink inode + * @target: plaintext symlink target + * @len: length of @target excluding null terminator + * @disk_link: (in/out) the on-disk symlink target being prepared + * + * If the symlink target needs to be encrypted, then this function encrypts it + * into @disk_link->name. fscrypt_prepare_symlink() must have been called + * previously to compute @disk_link->len. If the filesystem did not allocate a + * buffer for @disk_link->name after calling fscrypt_prepare_link(), then one + * will be kmalloc()'ed and the filesystem will be responsible for freeing it. + * + * Return: 0 on success, -errno on failure + */ +static inline int fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + if (IS_ENCRYPTED(inode)) + return __fscrypt_encrypt_symlink(inode, target, len, disk_link); + return 0; +} + #endif /* _LINUX_FSCRYPT_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index c4c6bf2c390e..5777251400f9 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -13,6 +13,16 @@ #ifndef _LINUX_FSCRYPT_NOTSUPP_H #define _LINUX_FSCRYPT_NOTSUPP_H +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return false; +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return false; +} + /* crypto.c */ static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) @@ -42,6 +52,11 @@ static inline int fscrypt_decrypt_page(const struct inode *inode, return -EOPNOTSUPP; } +static inline struct page *fscrypt_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} static inline void fscrypt_restore_control_page(struct page *page) { @@ -115,16 +130,8 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) return; } -static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode, - u32 ilen) -{ - /* never happens */ - WARN_ON(1); - return 0; -} - static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 ilen, + u32 max_encrypted_len, struct fscrypt_str *crypto_str) { return -EOPNOTSUPP; @@ -143,13 +150,6 @@ static inline int fscrypt_fname_disk_to_usr(struct inode *inode, return -EOPNOTSUPP; } -static inline int fscrypt_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} - static inline bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len) { @@ -207,4 +207,27 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } +static inline int __fscrypt_prepare_symlink(struct inode *dir, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + +static inline void *fscrypt_get_symlink(struct inode *inode, + const void *caddr, + unsigned int max_size) +{ + return ERR_PTR(-EOPNOTSUPP); +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 2db5e9706f60..c88d2058902a 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -10,8 +10,54 @@ #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H +#include +#include + +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto operations for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + bool (*dummy_context)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned (*max_namelen)(struct inode *); +}; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return (inode->i_crypt_info != NULL); +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode); +} + /* crypto.c */ -extern struct kmem_cache *fscrypt_info_cachep; extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, @@ -19,6 +65,12 @@ extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, u64, gfp_t); extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, u64); + +static inline struct page *fscrypt_control_page(struct page *page) +{ + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +} + extern void fscrypt_restore_control_page(struct page *); extern const struct dentry_operations fscrypt_d_ops; @@ -54,14 +106,11 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) kfree(fname->crypto_buf.name); } -extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32); extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); extern void fscrypt_fname_free_buffer(struct fscrypt_str *); extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, const struct fscrypt_str *, struct fscrypt_str *); -extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, - struct fscrypt_str *); #define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 @@ -152,5 +201,13 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *new_dentry, unsigned int flags); extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); +extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link); +extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, + struct fscrypt_str *disk_link); +extern void *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size); #endif /* _LINUX_FSCRYPT_SUPP_H */ diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index fe052e234906..bb1018882199 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -465,6 +465,7 @@ struct mlx4_update_qp_params { u16 rate_val; }; +struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn); int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index a91b67b18a73..5c93f4a89afa 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -635,8 +635,14 @@ enum { }; enum { - CQE_RSS_HTYPE_IP = 0x3 << 6, - CQE_RSS_HTYPE_L4 = 0x3 << 2, + CQE_RSS_HTYPE_IP = 0x3 << 2, + /* cqe->rss_hash_type[3:2] - IP destination selected for hash + * (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved) + */ + CQE_RSS_HTYPE_L4 = 0x3 << 6, + /* cqe->rss_hash_type[7:6] - L4 destination selected for hash + * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI + */ }; enum { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b5421f6f155a..a6da214d0584 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -879,10 +879,10 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); -int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, - int offset, int len); -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, - int len); +int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); +int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); int skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) consume_skb(a) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 07f27b15e6fe..881c5d46a66c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -996,9 +996,9 @@ enum rate_info_flags { * @RATE_INFO_BW_160: 160 MHz bandwidth */ enum rate_info_bw { + RATE_INFO_BW_20 = 0, RATE_INFO_BW_5, RATE_INFO_BW_10, - RATE_INFO_BW_20, RATE_INFO_BW_40, RATE_INFO_BW_80, RATE_INFO_BW_160, diff --git a/include/net/x25.h b/include/net/x25.h index c383aa4edbf0..6d30a01d281d 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -298,10 +298,10 @@ void x25_check_rbuf(struct sock *); /* sysctl_net_x25.c */ #ifdef CONFIG_SYSCTL -void x25_register_sysctl(void); +int x25_register_sysctl(void); void x25_unregister_sysctl(void); #else -static inline void x25_register_sysctl(void) {}; +static inline int x25_register_sysctl(void) { return 0; }; static inline void x25_unregister_sysctl(void) {}; #endif /* CONFIG_SYSCTL */ diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index beed138bd359..f85ed3a5ef4d 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -42,6 +42,27 @@ #define F_SEAL_WRITE 0x0008 /* prevent writes */ /* (1U << 31) is reserved for signed error codes */ +/* + * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the + * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on + * the specific file. + */ +#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11) +#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12) +#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13) +#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14) + +/* + * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be + * used to clear any hints previously set. + */ +#define RWF_WRITE_LIFE_NOT_SET 0 +#define RWH_WRITE_LIFE_NONE 1 +#define RWH_WRITE_LIFE_SHORT 2 +#define RWH_WRITE_LIFE_MEDIUM 3 +#define RWH_WRITE_LIFE_LONG 4 +#define RWH_WRITE_LIFE_EXTREME 5 + /* * Types of directory notifications that may be requested. */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 322f63370038..656d55d30f8d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5387,9 +5387,6 @@ static void perf_output_read_one(struct perf_output_handle *handle, __output_copy(handle, values, n * sizeof(u64)); } -/* - * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. - */ static void perf_output_read_group(struct perf_output_handle *handle, struct perf_event *event, u64 enabled, u64 running) @@ -5434,6 +5431,13 @@ static void perf_output_read_group(struct perf_output_handle *handle, #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ PERF_FORMAT_TOTAL_TIME_RUNNING) +/* + * XXX PERF_SAMPLE_READ vs inherited events seems difficult. + * + * The problem is that its both hard and excessively expensive to iterate the + * child list, not to mention that its impossible to IPI the children running + * on another CPU, from interrupt/NMI context. + */ static void perf_output_read(struct perf_output_handle *handle, struct perf_event *event) { @@ -8167,9 +8171,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, local64_set(&hwc->period_left, hwc->sample_period); /* - * we currently do not support PERF_FORMAT_GROUP on inherited events + * We currently do not support PERF_SAMPLE_READ on inherited events. + * See perf_output_read(). */ - if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) + if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)) goto err_ns; if (!has_branch_stack(event)) diff --git a/kernel/futex.c b/kernel/futex.c index a09c1dd1f659..760a97da1050 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -470,6 +470,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page, *page_head; + struct address_space *mapping; int err, ro = 0; /* @@ -555,7 +556,19 @@ again: } #endif - lock_page(page_head); + /* + * The treatment of mapping from this point on is critical. The page + * lock protects many things but in this context the page lock + * stabilizes mapping, prevents inode freeing in the shared + * file-backed region case and guards against movement to swap cache. + * + * Strictly speaking the page lock is not needed in all cases being + * considered here and page lock forces unnecessarily serialization + * From this point on, mapping will be re-verified if necessary and + * page lock will be acquired only if it is unavoidable + */ + + mapping = READ_ONCE(page_head->mapping); /* * If page_head->mapping is NULL, then it cannot be a PageAnon @@ -572,18 +585,31 @@ again: * shmem_writepage move it from filecache to swapcache beneath us: * an unlikely race, but we do need to retry for page_head->mapping. */ - if (!page_head->mapping) { - int shmem_swizzled = PageSwapCache(page_head); + if (unlikely(!mapping)) { + int shmem_swizzled; + + /* + * Page lock is required to identify which special case above + * applies. If this is really a shmem page then the page lock + * will prevent unexpected transitions. + */ + lock_page(page); + shmem_swizzled = PageSwapCache(page) || page->mapping; unlock_page(page_head); put_page(page_head); + if (shmem_swizzled) goto again; + return -EFAULT; } /* * Private mappings are handled in a simple way. * + * If the futex key is stored on an anonymous page, then the associated + * object is the mm which is implicitly pinned by the calling process. + * * NOTE: When userspace waits on a MAP_SHARED mapping, even if * it's a read-only handle, it's expected that futexes attach to * the object not the particular process. @@ -601,16 +627,74 @@ again: key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; + + get_futex_key_refs(key); /* implies smp_mb(); (B) */ + } else { + struct inode *inode; + + /* + * The associated futex object in this case is the inode and + * the page->mapping must be traversed. Ordinarily this should + * be stabilised under page lock but it's not strictly + * necessary in this case as we just want to pin the inode, not + * update the radix tree or anything like that. + * + * The RCU read lock is taken as the inode is finally freed + * under RCU. If the mapping still matches expectations then the + * mapping->host can be safely accessed as being a valid inode. + */ + rcu_read_lock(); + + if (READ_ONCE(page_head->mapping) != mapping) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + inode = READ_ONCE(mapping->host); + if (!inode) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + /* + * Take a reference unless it is about to be freed. Previously + * this reference was taken by ihold under the page lock + * pinning the inode in place so i_lock was unnecessary. The + * only way for this check to fail is if the inode was + * truncated in parallel so warn for now if this happens. + * + * We are not calling into get_futex_key_refs() in file-backed + * cases, therefore a successful atomic_inc return below will + * guarantee that get_futex_key() will still imply smp_mb(); (B). + */ + if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + /* Should be impossible but lets be paranoid for now */ + if (WARN_ON_ONCE(inode->i_mapping != mapping)) { + err = -EFAULT; + rcu_read_unlock(); + iput(inode); + + goto out; + } + key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = page_head->mapping->host; + key->shared.inode = inode; key->shared.pgoff = basepage_index(page); + rcu_read_unlock(); } - get_futex_key_refs(key); /* implies MB (B) */ - out: - unlock_page(page_head); put_page(page_head); return err; } diff --git a/kernel/pid.c b/kernel/pid.c index b17263be9082..5fe7cdb6d05f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -322,8 +322,10 @@ struct pid *alloc_pid(struct pid_namespace *ns) } if (unlikely(is_child_reaper(pid))) { - if (pid_ns_prepare_proc(ns)) + if (pid_ns_prepare_proc(ns)) { + disable_pid_allocation(ns); goto out_free; + } } get_pid_ns(ns); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 23e37b0674df..f962ab1eb046 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2402,7 +2402,8 @@ void task_numa_work(struct callback_head *work) return; - down_read(&mm->mmap_sem); + if (!down_read_trylock(&mm->mmap_sem)) + return; vma = find_vma(mm, start); if (!vma) { reset_ptenuma_scan(p); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 89da689b6433..ac9791dd4768 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "vlan.h" @@ -654,8 +655,11 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev, { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops; + struct phy_device *phydev = vlan->real_dev->phydev; - if (ops->get_ts_info) { + if (phydev && phydev->drv && phydev->drv->ts_info) { + return phydev->drv->ts_info(phydev, info); + } else if (ops->get_ts_info) { return ops->get_ts_info(vlan->real_dev, info); } else { info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index da4078651c22..5b95477c3453 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -716,6 +716,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) { struct hci_dev *hdev = req->hdev; u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + bool changed = false; /* If Connectionless Slave Broadcast master role is supported * enable all necessary events for it. @@ -725,6 +726,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) events[1] |= 0x80; /* Synchronization Train Complete */ events[2] |= 0x10; /* Slave Page Response Timeout */ events[2] |= 0x20; /* CSB Channel Map Change */ + changed = true; } /* If Connectionless Slave Broadcast slave role is supported @@ -735,13 +737,24 @@ static void hci_set_event_mask_page_2(struct hci_request *req) events[2] |= 0x02; /* CSB Receive */ events[2] |= 0x04; /* CSB Timeout */ events[2] |= 0x08; /* Truncated Page Complete */ + changed = true; } /* Enable Authenticated Payload Timeout Expired event if supported */ - if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) + if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) { events[2] |= 0x80; + changed = true; + } - hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events); + /* Some Broadcom based controllers indicate support for Set Event + * Mask Page 2 command, but then actually do not support it. Since + * the default value is all bits set to zero, the command is only + * required if the event mask has to be changed. In case no change + * to the event mask is needed, skip this command. + */ + if (changed) + hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, + sizeof(events), events); } static void hci_init3_req(struct hci_request *req, unsigned long opt) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index bc95e48d5cfb..378c9ed00d40 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -295,6 +295,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) u32 yes; struct crush_rule *r; + err = -EINVAL; ceph_decode_32_safe(p, end, yes, bad); if (!yes) { dout("crush_decode NO rule %d off %x %p to %p\n", diff --git a/net/core/dev.c b/net/core/dev.c index 129e188d7722..154da3b1348b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -991,7 +991,7 @@ bool dev_valid_name(const char *name) { if (*name == '\0') return false; - if (strlen(name) >= IFNAMSIZ) + if (strnlen(name, IFNAMSIZ) == IFNAMSIZ) return false; if (!strcmp(name, ".") || !strcmp(name, "..")) return false; @@ -2517,7 +2517,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) return 0; - eth = (struct ethhdr *)skb_mac_header(skb); + eth = (struct ethhdr *)skb->data; type = eth->h_proto; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index eff329e9c23b..e713c75695f9 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1132,10 +1132,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, lladdr = neigh->ha; } - if (new & NUD_CONNECTED) - neigh->confirmed = jiffies; - neigh->updated = jiffies; - /* If entry was valid and address is not changed, do not change entry state, if new one is STALE. */ @@ -1159,6 +1155,16 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } } + /* Update timestamps only once we know we will make a change to the + * neighbour entry. Otherwise we risk to move the locktime window with + * noop updates and ignore relevant ARP updates. + */ + if (new != old || lladdr != neigh->ha) { + if (new & NUD_CONNECTED) + neigh->confirmed = jiffies; + neigh->updated = jiffies; + } + if (new != old) { neigh_del_timer(neigh); if (new & NUD_PROBE) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b5c351d2830b..ccd20669ac00 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -310,6 +310,25 @@ out_undo: goto out; } +static int __net_init net_defaults_init_net(struct net *net) +{ + net->core.sysctl_somaxconn = SOMAXCONN; + return 0; +} + +static struct pernet_operations net_defaults_ops = { + .init = net_defaults_init_net, +}; + +static __init int net_defaults_init(void) +{ + if (register_pernet_subsys(&net_defaults_ops)) + panic("Cannot initialize net default settings"); + + return 0; +} + +core_initcall(net_defaults_init); #ifdef CONFIG_NET_NS static struct kmem_cache *net_cachep; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2549fa05b564..3c5e3c022232 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2577,7 +2577,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags & + SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -3141,8 +3142,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & - SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & + SKBTX_SHARED_FRAG; while (pos < offset + len) { if (i >= nfrags) { @@ -3355,24 +3356,18 @@ void __init skb_init(void) NULL); } -/** - * skb_to_sgvec - Fill a scatter-gather list from a socket buffer - * @skb: Socket buffer containing the buffers to be mapped - * @sg: The scatter-gather list to map into - * @offset: The offset into the buffer's contents to start mapping - * @len: Length of buffer space to be mapped - * - * Fill the specified scatter-gather list with mappings/pointers into a - * region of the buffer space attached to a socket buffer. - */ static int -__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len, + unsigned int recursion_level) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int elt = 0; + if (unlikely(recursion_level >= 24)) + return -EMSGSIZE; + if (copy > 0) { if (copy > len) copy = len; @@ -3391,6 +3386,8 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (unlikely(elt && sg_is_last(&sg[elt - 1]))) + return -EMSGSIZE; if (copy > len) copy = len; @@ -3405,16 +3402,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) } skb_walk_frags(skb, frag_iter) { - int end; + int end, ret; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { + if (unlikely(elt && sg_is_last(&sg[elt - 1]))) + return -EMSGSIZE; + if (copy > len) copy = len; - elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start, - copy); + ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start, + copy, recursion_level + 1); + if (unlikely(ret < 0)) + return ret; + elt += ret; if ((len -= copy) == 0) return elt; offset += copy; @@ -3425,6 +3428,31 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) return elt; } +/** + * skb_to_sgvec - Fill a scatter-gather list from a socket buffer + * @skb: Socket buffer containing the buffers to be mapped + * @sg: The scatter-gather list to map into + * @offset: The offset into the buffer's contents to start mapping + * @len: Length of buffer space to be mapped + * + * Fill the specified scatter-gather list with mappings/pointers into a + * region of the buffer space attached to a socket buffer. Returns either + * the number of scatterlist items used, or -EMSGSIZE if the contents + * could not fit. + */ +int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +{ + int nsg = __skb_to_sgvec(skb, sg, offset, len, 0); + + if (nsg <= 0) + return nsg; + + sg_mark_end(&sg[nsg - 1]); + + return nsg; +} +EXPORT_SYMBOL_GPL(skb_to_sgvec); + /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given * sglist without mark the sg which contain last skb data as the end. * So the caller can mannipulate sg list as will when padding new data after @@ -3447,19 +3475,11 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { - return __skb_to_sgvec(skb, sg, offset, len); + return __skb_to_sgvec(skb, sg, offset, len, 0); } EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int nsg = __skb_to_sgvec(skb, sg, offset, len); - sg_mark_end(&sg[nsg - 1]); - - return nsg; -} -EXPORT_SYMBOL_GPL(skb_to_sgvec); /** * skb_cow_data - Check that a socket buffer's data buffers are writable @@ -3741,7 +3761,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, return; if (tsonly) { - skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags; + skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags & + SKBTX_ANY_TSTAMP; skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 6578a0a2f708..32898247d8bf 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -429,8 +429,6 @@ static __net_init int sysctl_core_net_init(struct net *net) { struct ctl_table *tbl; - net->core.sysctl_somaxconn = SOMAXCONN; - tbl = netns_core_table; if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a548be247e15..47b397264f24 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -302,12 +302,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); - dev_put(dev); - err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); + dev_put(dev); + return err ?: size; out_skb: @@ -689,12 +689,12 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); - dev_put(dev); - err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); + dev_put(dev); + return err ?: size; out_skb: diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 22377c8ff14b..e8f862358518 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -220,7 +220,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -393,7 +395,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, ihl); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index cb5eb649ad5f..bfa79831873f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -437,7 +437,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) /*unsigned long now; */ struct net *net = dev_net(dev); - rt = ip_route_output(net, sip, tip, 0, 0); + rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev)); if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { @@ -658,6 +658,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) unsigned char *arp_ptr; struct rtable *rt; unsigned char *sha; + unsigned char *tha = NULL; __be32 sip, tip; u16 dev_type = dev->type; int addr_type; @@ -729,6 +730,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) break; #endif default: + tha = arp_ptr; arp_ptr += dev->addr_len; } memcpy(&tip, arp_ptr, 4); @@ -839,8 +841,18 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && - addr_type == RTN_UNICAST; + is_garp = tip == sip && addr_type == RTN_UNICAST; + + /* Unsolicited ARP _replies_ also require target hwaddr to be + * the same as source. + */ + if (is_garp && arp->ar_op == htons(ARPOP_REPLY)) + is_garp = + /* IPv4 over IEEE 1394 doesn't provide target + * hardware address field in its ARP payload. + */ + tha && + !memcmp(tha, sha, dev->addr_len); if (!n && ((arp->ar_op == htons(ARPOP_REPLY) && diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 20fb25e3027b..3d8021d55336 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -268,10 +268,11 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph->spi = x->id.spi; sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); - + err = skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + if (unlikely(err < 0)) + goto error; aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); @@ -481,7 +482,9 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) } sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + err = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out; aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); aead_request_set_ad(req, assoclen); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 80e2d1b0c08c..3d62feb65932 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -253,13 +253,14 @@ static struct net_device *__ip_tunnel_create(struct net *net, struct net_device *dev; char name[IFNAMSIZ]; - if (parms->name[0]) - strlcpy(name, parms->name, IFNAMSIZ); - else { - if (strlen(ops->kind) > (IFNAMSIZ - 3)) { - err = -E2BIG; + err = -E2BIG; + if (parms->name[0]) { + if (!dev_valid_name(parms->name)) + goto failed; + strlcpy(name, parms->name, IFNAMSIZ); + } else { + if (strlen(ops->kind) > (IFNAMSIZ - 3)) goto failed; - } strlcpy(name, ops->kind, IFNAMSIZ); strncat(name, "%d", 2); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1a8e95d2dd39..da83648f95f8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -118,6 +118,7 @@ int sysctl_tcp_default_init_rwnd __read_mostly = TCP_INIT_CWND * 2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ +#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -3544,7 +3545,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) { /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ if (before(ack, prior_snd_una - tp->max_window)) { - tcp_send_challenge_ack(sk, skb); + if (!(flag & FLAG_NO_CHALLENGE_ACK)) + tcp_send_challenge_ack(sk, skb); return -1; } goto old_ack; @@ -5834,13 +5836,17 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) /* step 5: check the ACK field */ acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; + FLAG_UPDATE_TS_RECENT | + FLAG_NO_CHALLENGE_ACK) > 0; + if (!acceptable) { + if (sk->sk_state == TCP_SYN_RECV) + return 1; /* send one RST */ + tcp_send_challenge_ack(sk, skb); + goto discard; + } switch (sk->sk_state) { case TCP_SYN_RECV: - if (!acceptable) - return 1; - if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); @@ -5909,14 +5915,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * our SYNACK so stop the SYNACK timer. */ if (req) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) - return 1; /* We no longer need the request sock. */ reqsk_fastopen_remove(sk, req, false); tcp_rearm_rto(sk); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 40c29712f32a..199658afa68b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -986,7 +986,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; - ifa->flags = flags | IFA_F_TENTATIVE; + ifa->flags = flags; + /* No need to add the TENTATIVE flag for addresses with NODAD */ + if (!(flags & IFA_F_NODAD)) + ifa->flags |= IFA_F_TENTATIVE; ifa->valid_lft = valid_lft; ifa->prefered_lft = prefered_lft; ifa->cstamp = ifa->tstamp = jiffies; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 189eb10b742d..e742c4deb13d 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -423,7 +423,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -603,7 +605,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ip6h->hop_limit = 0; sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index cbcdd5db31f4..44a2010e2076 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -248,9 +248,11 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esph->spi = x->id.spi; sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); + err = skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + if (unlikely(err < 0)) + goto error; aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); @@ -423,7 +425,9 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) } sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + ret = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(ret < 0)) + goto out; aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); aead_request_set_ad(req, assoclen); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 85afef175cf9..6e496c3dd8ef 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -320,11 +320,13 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, if (t || !create) return t; - if (parms->name[0]) + if (parms->name[0]) { + if (!dev_valid_name(parms->name)) + return NULL; strlcpy(name, parms->name, IFNAMSIZ); - else + } else { strcpy(name, "ip6gre%d"); - + } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6gre_tunnel_setup); if (!dev) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3ef81c387923..bfa710e8b615 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -340,6 +340,10 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + struct dst_entry *dst = skb_dst(skb); + + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); skb_sender_cpu_clear(skb); return dst_output(net, sk, skb); } @@ -534,8 +538,6 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, net, NULL, skb, skb->dev, dst->dev, ip6_forward_finish); @@ -1276,7 +1278,7 @@ static int __ip6_append_data(struct sock *sk, unsigned int flags, int dontfrag) { struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; int exthdrlen = 0; int dst_exthdrlen = 0; int hh_len; @@ -1312,6 +1314,12 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; + /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit + * the first fragment + */ + if (headersize + transhdrlen > mtu) + goto emsgsize; + if (cork->length + length > mtu - headersize && dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_RAW)) { @@ -1327,9 +1335,8 @@ static int __ip6_append_data(struct sock *sk, if (cork->length + length > maxnonfragsize - headersize) { emsgsize: - ipv6_local_error(sk, EMSGSIZE, fl6, - mtu - headersize + - sizeof(struct ipv6hdr)); + pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); + ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); return -EMSGSIZE; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 27b00c4e642d..3c2468bd0b7c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -286,13 +286,16 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) struct net_device *dev; struct ip6_tnl *t; char name[IFNAMSIZ]; - int err = -ENOMEM; + int err = -E2BIG; - if (p->name[0]) + if (p->name[0]) { + if (!dev_valid_name(p->name)) + goto failed; strlcpy(name, p->name, IFNAMSIZ); - else + } else { sprintf(name, "ip6tnl%%d"); - + } + err = -ENOMEM; dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6_tnl_dev_setup); if (!dev) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 00111ac94251..b8bf123f7f79 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -212,10 +212,13 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p char name[IFNAMSIZ]; int err; - if (p->name[0]) + if (p->name[0]) { + if (!dev_valid_name(p->name)) + goto failed; strlcpy(name, p->name, IFNAMSIZ); - else + } else { sprintf(name, "ip6_vti%%d"); + } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup); if (!dev) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5842430a122a..6b65f3bf7270 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -844,6 +844,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_node *fn; struct rt6_info *rt; + if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) + flags &= ~RT6_LOOKUP_F_IFACE; + read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index bbba435d0639..51f7c32f04d7 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -244,11 +244,13 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (!create) goto failed; - if (parms->name[0]) + if (parms->name[0]) { + if (!dev_valid_name(parms->name)) + goto failed; strlcpy(name, parms->name, IFNAMSIZ); - else + } else { strcpy(name, "sit%d"); - + } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ipip6_tunnel_setup); if (!dev) @@ -690,6 +692,7 @@ static int ipip6_rcv(struct sk_buff *skb) if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6))) goto out; + iph = ip_hdr(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { diff --git a/net/key/af_key.c b/net/key/af_key.c index 6482b001f19a..15150b412930 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3305,7 +3305,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) { + sec_ctx->sadb_x_sec_len*8) { *dir = -EINVAL; goto out; } diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index fb3248ff8b48..ae3438685caa 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -732,6 +732,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl if ((session->ifname[0] && nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) || + (session->offset && + nla_put_u16(skb, L2TP_ATTR_OFFSET, session->offset)) || (session->cookie_len && nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, &session->cookie[0])) || diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index bb8edb9ef506..1e698768aca8 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -309,6 +309,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) int rc = -EINVAL; dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); + + lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; @@ -380,6 +382,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) out_put: llc_sap_put(sap); out: + release_sock(sk); return rc; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f3deee7da389..c16b4da20db2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4326,6 +4326,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) return -EINVAL; + /* If a reconfig is happening, bail out */ + if (local->in_reconfig) + return -EBUSY; + if (assoc) { rcu_read_lock(); have_sta = sta_info_get(sdata, cbss->bssid); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4ece07c68b3f..c68e020427ab 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -887,8 +887,13 @@ restart: } out: local_bh_enable(); - if (last) + if (last) { + /* nf ct hash resize happened, now clear the leftover. */ + if ((struct nf_conn *)cb->args[1] == last) + cb->args[1] = 0; + nf_ct_put(last); + } return skb->len; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f59d82f0aa97..83c0f56d05cb 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1034,6 +1034,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family != AF_NETLINK) return -EINVAL; + if (alen < sizeof(struct sockaddr_nl)) + return -EINVAL; + if ((nladdr->nl_groups || nladdr->nl_pid) && !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; diff --git a/net/rds/bind.c b/net/rds/bind.c index b22ea956522b..e29b47193645 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -108,6 +108,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) rs, &addr, (int)ntohs(*port)); break; } else { + rs->rs_bound_addr = 0; rds_sock_put(rs); ret = -ENOMEM; break; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index d7a9ab5a9d9c..6c65fb229e50 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -209,7 +209,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct sk_buff *trailer; unsigned int len; u16 check; - int nsg; + int nsg, err; sp = rxrpc_skb(skb); @@ -240,7 +240,9 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, len &= ~(call->conn->size_align - 1); sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, len); + err = skb_to_sgvec(skb, sg, 0, len); + if (unlikely(err < 0)) + return err; crypto_blkcipher_encrypt_iv(&desc, sg, sg, len); _leave(" = 0"); @@ -336,7 +338,7 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, struct sk_buff *trailer; u32 data_size, buf; u16 check; - int nsg; + int nsg, ret; _enter(""); @@ -348,7 +350,9 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, goto nomem; sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, 8); + ret = skb_to_sgvec(skb, sg, 0, 8); + if (unlikely(ret < 0)) + return ret; /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -411,7 +415,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, struct sk_buff *trailer; u32 data_size, buf; u16 check; - int nsg; + int nsg, ret; _enter(",{%d}", skb->len); @@ -430,7 +434,12 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, } sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, skb->len); + ret = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(ret < 0)) { + if (sg != _sg) + kfree(sg); + return ret; + } /* decrypt from the session key */ token = call->conn->key->payload.data[0]; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 694a06f1e0d5..f44fea22d69c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -101,8 +101,10 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, a->order = n_i; nest = nla_nest_start(skb, a->order); - if (nest == NULL) + if (nest == NULL) { + index--; goto nla_put_failure; + } err = tcf_action_dump_1(skb, a, 0, 0); if (err < 0) { index--; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 0bc6f912f870..bd155e59be1c 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -249,10 +249,14 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) { - if (cfg->is_ebpf) - bpf_prog_put(cfg->filter); - else - bpf_prog_destroy(cfg->filter); + struct bpf_prog *filter = cfg->filter; + + if (filter) { + if (cfg->is_ebpf) + bpf_prog_put(filter); + else + bpf_prog_destroy(filter); + } kfree(cfg->bpf_ops); kfree(cfg->bpf_name); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 481806b43de8..edb8514b4e00 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -723,8 +723,10 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) sctp_v6_map_v4(addr); } - if (addr->sa.sa_family == AF_INET) + if (addr->sa.sa_family == AF_INET) { + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); return sizeof(struct sockaddr_in); + } return sizeof(struct sockaddr_in6); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index df6a4b2d0728..13c7f42b7040 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -335,11 +335,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, if (!opt->pf->af_supported(addr->sa.sa_family, opt)) return NULL; - /* V4 mapped address are really of AF_INET family */ - if (addr->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr->v6.sin6_addr) && - !opt->pf->af_supported(AF_INET, opt)) - return NULL; + if (addr->sa.sa_family == AF_INET6) { + if (len < SIN6_LEN_RFC2133) + return NULL; + /* V4 mapped address are really of AF_INET family */ + if (ipv6_addr_v4mapped(&addr->v6.sin6_addr) && + !opt->pf->af_supported(AF_INET, opt)) + return NULL; + } /* If we get this far, af is valid. */ af = sctp_get_af_specific(addr->sa.sa_family); @@ -1518,7 +1521,7 @@ static void sctp_close(struct sock *sk, long timeout) pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sk->sk_shutdown = SHUTDOWN_MASK; sk->sk_state = SCTP_SS_CLOSING; @@ -1569,7 +1572,7 @@ static void sctp_close(struct sock *sk, long timeout) * held and that should be grabbed before socket lock. */ spin_lock_bh(&net->sctp.addr_wq_lock); - bh_lock_sock(sk); + bh_lock_sock_nested(sk); /* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 728d65fbab0c..c9c0976d3bbb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2363,7 +2363,12 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: - /* retry with existing socket, after a delay */ + /* + * xs_tcp_force_close() wakes tasks with -EIO. + * We need to wake them first to ensure the + * correct error code. + */ + xprt_wake_pending_tasks(xprt, status); xs_tcp_force_close(xprt); goto out; } diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a750f330b8dd..c6ab4da4b8e2 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1794,32 +1794,40 @@ void x25_kill_by_neigh(struct x25_neigh *nb) static int __init x25_init(void) { - int rc = proto_register(&x25_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&x25_proto, 0); + if (rc) goto out; rc = sock_register(&x25_family_ops); - if (rc != 0) + if (rc) goto out_proto; dev_add_pack(&x25_packet_type); rc = register_netdevice_notifier(&x25_dev_notifier); - if (rc != 0) + if (rc) goto out_sock; + rc = x25_register_sysctl(); + if (rc) + goto out_dev; + + rc = x25_proc_init(); + if (rc) + goto out_sysctl; + pr_info("Linux Version 0.2\n"); - x25_register_sysctl(); - rc = x25_proc_init(); - if (rc != 0) - goto out_dev; out: return rc; +out_sysctl: + x25_unregister_sysctl(); out_dev: unregister_netdevice_notifier(&x25_dev_notifier); out_sock: + dev_remove_pack(&x25_packet_type); sock_unregister(AF_X25); out_proto: proto_unregister(&x25_proto); diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index 43239527a205..703d46aae7a2 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -73,9 +73,12 @@ static struct ctl_table x25_table[] = { { 0, }, }; -void __init x25_register_sysctl(void) +int __init x25_register_sysctl(void) { x25_table_header = register_net_sysctl(&init_net, "net/x25", x25_table); + if (!x25_table_header) + return -ENOMEM; + return 0; } void x25_unregister_sysctl(void) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7dfa35d223e6..70535b8ee4d6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1208,6 +1208,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; x->km.seq = orig->km.seq; + x->replay = orig->replay; + x->preplay = orig->preplay; return x; diff --git a/scripts/tags.sh b/scripts/tags.sh index 262889046703..45e246595d10 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -106,6 +106,7 @@ all_compiled_sources() case "$i" in *.[cS]) j=${i/\.[cS]/\.o} + j="${j#$tree}" if [ -e $j ]; then echo $i fi diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 13bb3b409b5c..e3f8891c5d72 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1945,8 +1945,9 @@ static inline u32 file_to_av(struct file *file) static inline u32 open_file_to_av(struct file *file) { u32 av = file_to_av(file); + struct inode *inode = file_inode(file); - if (selinux_policycap_openperm) + if (selinux_policycap_openperm && inode->i_sb->s_magic != SOCKFS_MAGIC) av |= FILE__OPEN; return av; @@ -2915,6 +2916,7 @@ static int selinux_inode_permission(struct inode *inode, int mask) static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) { const struct cred *cred = current_cred(); + struct inode *inode = d_backing_inode(dentry); unsigned int ia_valid = iattr->ia_valid; __u32 av = FILE__WRITE; @@ -2930,8 +2932,10 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET)) return dentry_has_perm(cred, dentry, FILE__SETATTR); - if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE) - && !(ia_valid & ATTR_FILE)) + if (selinux_policycap_openperm && + inode->i_sb->s_magic != SOCKFS_MAGIC && + (ia_valid & ATTR_SIZE) && + !(ia_valid & ATTR_FILE)) av |= FILE__OPEN; return dentry_has_perm(cred, dentry, av); diff --git a/sound/soc/intel/atom/sst/sst_stream.c b/sound/soc/intel/atom/sst/sst_stream.c index a74c64c7053c..e83da42a8c03 100644 --- a/sound/soc/intel/atom/sst/sst_stream.c +++ b/sound/soc/intel/atom/sst/sst_stream.c @@ -221,7 +221,7 @@ int sst_send_byte_stream_mrfld(struct intel_sst_drv *sst_drv_ctx, sst_free_block(sst_drv_ctx, block); out: test_and_clear_bit(pvt_id, &sst_drv_ctx->pvt_id); - return 0; + return ret; } /* diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index 38d65a3529c4..44d560966e9c 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -96,6 +96,7 @@ static const struct snd_soc_dapm_widget cht_dapm_widgets[] = { SND_SOC_DAPM_HP("Headphone", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_MIC("Int Mic", NULL), + SND_SOC_DAPM_MIC("Int Analog Mic", NULL), SND_SOC_DAPM_SPK("Ext Spk", NULL), SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, platform_clock_control, SND_SOC_DAPM_POST_PMD), @@ -106,6 +107,8 @@ static const struct snd_soc_dapm_route cht_rt5645_audio_map[] = { {"IN1N", NULL, "Headset Mic"}, {"DMIC L1", NULL, "Int Mic"}, {"DMIC R1", NULL, "Int Mic"}, + {"IN2P", NULL, "Int Analog Mic"}, + {"IN2N", NULL, "Int Analog Mic"}, {"Headphone", NULL, "HPOL"}, {"Headphone", NULL, "HPOR"}, {"Ext Spk", NULL, "SPOL"}, @@ -119,6 +122,9 @@ static const struct snd_soc_dapm_route cht_rt5645_audio_map[] = { {"Headphone", NULL, "Platform Clock"}, {"Headset Mic", NULL, "Platform Clock"}, {"Int Mic", NULL, "Platform Clock"}, + {"Int Analog Mic", NULL, "Platform Clock"}, + {"Int Analog Mic", NULL, "micbias1"}, + {"Int Analog Mic", NULL, "micbias2"}, {"Ext Spk", NULL, "Platform Clock"}, }; @@ -147,6 +153,7 @@ static const struct snd_kcontrol_new cht_mc_controls[] = { SOC_DAPM_PIN_SWITCH("Headphone"), SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Int Mic"), + SOC_DAPM_PIN_SWITCH("Int Analog Mic"), SOC_DAPM_PIN_SWITCH("Ext Spk"), }; diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index a001331a53c1..df79d7c846ea 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -453,6 +453,13 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod, struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); u32 *buf = (u32 *)(runtime->dma_area + rsnd_dai_pointer_offset(io, 0)); + int shift = 0; + + switch (runtime->sample_bits) { + case 32: + shift = 8; + break; + } /* * 8/16/32 data can be assesse to TDR/RDR register @@ -460,9 +467,9 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod, * see rsnd_ssi_init() */ if (rsnd_io_is_play(io)) - rsnd_mod_write(mod, SSITDR, *buf); + rsnd_mod_write(mod, SSITDR, (*buf) << shift); else - *buf = rsnd_mod_read(mod, SSIRDR); + *buf = (rsnd_mod_read(mod, SSIRDR) >> shift); elapsed = rsnd_dai_pointer_update(io, sizeof(*buf)); } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ebe7115c751a..da8afc121118 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1152,6 +1152,10 @@ static struct syscall_fmt { { .name = "mlockall", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mmap", .hexret = true, +/* The standard mmap maps to old_mmap on s390x */ +#if defined(__s390x__) + .alias = "old_mmap", +#endif .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [2] = SCA_MMAP_PROT, /* prot */ [3] = SCA_MMAP_FLAGS, /* flags */ diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index a767a6400c5c..6ea4fcfaab36 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -182,6 +182,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, unsigned char buf2[BUFSZ]; size_t ret_len; u64 objdump_addr; + const char *objdump_name; + char decomp_name[KMOD_DECOMP_LEN]; int ret; pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); @@ -242,9 +244,25 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, state->done[state->done_cnt++] = al.map->start; } + objdump_name = al.map->dso->long_name; + if (dso__needs_decompress(al.map->dso)) { + if (dso__decompress_kmodule_path(al.map->dso, objdump_name, + decomp_name, + sizeof(decomp_name)) < 0) { + pr_debug("decompression failed\n"); + return -1; + } + + objdump_name = decomp_name; + } + /* Read the object code using objdump */ objdump_addr = map__rip_2objdump(al.map, al.addr); - ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len); + ret = read_via_objdump(objdump_name, objdump_addr, buf2, len); + + if (dso__needs_decompress(al.map->dso)) + unlink(objdump_name); + if (ret > 0) { /* * The kernel maps are inaccurate - assume objdump is right in diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 43838003c1a1..304f5d710143 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1258,8 +1258,16 @@ static int __event_process_build_id(struct build_id_event *bev, dso__set_build_id(dso, &bev->build_id); - if (!is_kernel_module(filename, cpumode)) - dso->kernel = dso_type; + if (dso_type != DSO_TYPE_USER) { + struct kmod_path m = { .name = NULL, }; + + if (!kmod_path__parse_name(&m, filename) && m.kmod) + dso__set_short_name(dso, strdup(m.name), true); + else + dso->kernel = dso_type; + + free(m.name); + } build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 03875f9154e7..0195b7e8c54a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2349,6 +2349,14 @@ static int get_new_event_name(char *buf, size_t len, const char *base, out: free(nbase); + + /* Final validation */ + if (ret >= 0 && !is_c_func_name(buf)) { + pr_warning("Internal error: \"%s\" is an invalid event name.\n", + buf); + ret = -EINVAL; + } + return ret; } diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 2dcfe9a7c8d0..60edec383281 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -37,6 +37,14 @@ static int __report_module(struct addr_location *al, u64 ip, return 0; mod = dwfl_addrmodule(ui->dwfl, ip); + if (mod) { + Dwarf_Addr s; + + dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); + if (s != al->map->start) + mod = 0; + } + if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1, al->map->start, diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 47b1e36c7ea0..9adc9af8b048 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -162,7 +162,7 @@ int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) size -= ret; off_in += ret; - off_out -= ret; + off_out += ret; } munmap(ptr, off_in + size); diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c index 42d4c8caad81..de8dc82e2567 100644 --- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -45,12 +45,12 @@ int test_body(void) printf("Check DSCR TM context switch: "); fflush(stdout); for (;;) { - rv = 1; asm __volatile__ ( /* set a known value into the DSCR */ "ld 3, %[dscr1];" "mtspr %[sprn_dscr], 3;" + "li %[rv], 1;" /* start and suspend a transaction */ TBEGIN "beq 1f;"