Merge branch 'v4.4-16.09-android-tmp' into lsk-v4.4-16.09-android
* v4.4-16.09-android-tmp: unsafe_[get|put]_user: change interface to use a error target label usercopy: remove page-spanning test for now usercopy: fix overlap check for kernel text mm/slub: support left redzone Linux 4.4.21 lib/mpi: mpi_write_sgl(): fix skipping of leading zero limbs regulator: anatop: allow regulator to be in bypass mode hwrng: exynos - Disable runtime PM on probe failure cpufreq: Fix GOV_LIMITS handling for the userspace governor metag: Fix atomic_*_return inline asm constraints scsi: fix upper bounds check of sense key in scsi_sense_key_string() ALSA: timer: fix NULL pointer dereference on memory allocation failure ALSA: timer: fix division by zero after SNDRV_TIMER_IOCTL_CONTINUE ALSA: timer: fix NULL pointer dereference in read()/ioctl() race ALSA: hda - Enable subwoofer on Dell Inspiron 7559 ALSA: hda - Add headset mic quirk for Dell Inspiron 5468 ALSA: rawmidi: Fix possible deadlock with virmidi registration ALSA: fireworks: accessing to user space outside spinlock ALSA: firewire-tascam: accessing to user space outside spinlock ALSA: usb-audio: Add sample rate inquiry quirk for B850V3 CP2114 crypto: caam - fix IV loading for authenc (giv)decryption uprobes: Fix the memcg accounting x86/apic: Do not init irq remapping if ioapic is disabled vhost/scsi: fix reuse of &vq->iov[out] in response bcache: RESERVE_PRIO is too small by one when prio_buckets() is a power of two. ubifs: Fix assertion in layout_in_gaps() ovl: fix workdir creation ovl: listxattr: use strnlen() ovl: remove posix_acl_default from workdir ovl: don't copy up opaqueness wrappers for ->i_mutex access lustre: remove unused declaration timekeeping: Avoid taking lock in NMI path with CONFIG_DEBUG_TIMEKEEPING timekeeping: Cap array access in timekeeping_debug xfs: fix superblock inprogress check ASoC: atmel_ssc_dai: Don't unconditionally reset SSC on stream startup drm/msm: fix use of copy_from_user() while holding spinlock drm: Reject page_flip for !DRIVER_MODESET drm/radeon: fix radeon_move_blit on 32bit systems s390/sclp_ctl: fix potential information leak with /dev/sclp rds: fix an infoleak in rds_inc_info_copy powerpc/tm: Avoid SLB faults in treclaim/trecheckpoint when RI=0 nvme: Call pci_disable_device on the error path. cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork block: make sure a big bio is split into at most 256 bvecs block: Fix race triggered by blk_set_queue_dying() ext4: avoid modifying checksum fields directly during checksum verification ext4: avoid deadlock when expanding inode size ext4: properly align shifted xattrs when expanding inodes ext4: fix xattr shifting when expanding inodes part 2 ext4: fix xattr shifting when expanding inodes ext4: validate that metadata blocks do not overlap superblock net: Use ns_capable_noaudit() when determining net sysctl permissions kernel: Add noaudit variant of ns_capable() KEYS: Fix ASN.1 indefinite length object parsing drivers:hv: Lock access to hyperv_mmio resource tree cxlflash: Move to exponential back-off when cmd_room is not available netfilter: x_tables: check for size overflow drm/amdgpu/cz: enable/disable vce dpm even if vce pg is disabled cred: Reject inodes with invalid ids in set_create_file_as() fs: Check for invalid i_uid in may_follow_link() IB/IPoIB: Do not set skb truesize since using one linearskb udp: properly support MSG_PEEK with truncated buffers crypto: nx-842 - Mask XERS0 bit in return value cxlflash: Fix to avoid virtual LUN failover failure cxlflash: Fix to escalate LINK_RESET also on port 1 tipc: fix nl compat regression for link statistics tipc: fix an infoleak in tipc_nl_compat_link_dump netfilter: x_tables: check for size overflow Bluetooth: Add support for Intel Bluetooth device 8265 [8087:0a2b] drm/i915: Check VBT for port presence in addition to the strap on VLV/CHV drm/i915: Only ignore eDP ports that are connected Input: xpad - move pending clear to the correct location net: thunderx: Fix link status reporting x86/hyperv: Avoid reporting bogus NMI status for Gen2 instances crypto: vmx - IV size failing on skcipher API tda10071: Fix dependency to REGMAP_I2C crypto: vmx - Fix ABI detection crypto: vmx - comply with ABIs that specify vrsave as reserved. HID: core: prevent out-of-bound readings lpfc: Fix DMA faults observed upon plugging loopback connector block: fix blk_rq_get_max_sectors for driver private requests irqchip/gicv3-its: numa: Enable workaround for Cavium thunderx erratum 23144 clocksource: Allow unregistering the watchdog btrfs: Continue write in case of can_not_nocow blk-mq: End unstarted requests on dying queue cxlflash: Fix to resolve dead-lock during EEH recovery drm/radeon/mst: fix regression in lane/link handling. ecryptfs: fix handling of directory opening ALSA: hda: add AMD Polaris-10/11 AZ PCI IDs with proper driver caps drm: Balance error path for GEM handle allocation ntp: Fix ADJ_SETOFFSET being used w/ ADJ_NANO time: Verify time values in adjtimex ADJ_SETOFFSET to avoid overflow Input: xpad - correctly handle concurrent LED and FF requests net: thunderx: Fix receive packet stats net: thunderx: Fix for multiqset not configured upon interface toggle perf/x86/cqm: Fix CQM memory leak and notifier leak perf/x86/cqm: Fix CQM handling of grouping events into a cache_group s390/crypto: provide correct file mode at device register. proc: revert /proc/<pid>/maps [stack:TID] annotation intel_idle: Support for Intel Xeon Phi Processor x200 Product Family cxlflash: Fix to avoid unnecessary scan with internal LUNs Drivers: hv: vmbus: don't manipulate with clocksources on crash Drivers: hv: vmbus: avoid scheduling in interrupt context in vmbus_initiate_unload() Drivers: hv: vmbus: avoid infinite loop in init_vp_index() arcmsr: fixes not release allocated resource arcmsr: fixed getting wrong configuration data s390/pci_dma: fix DMA table corruption with > 4 TB main memory net/mlx5e: Don't modify CQ before it was created net/mlx5e: Don't try to modify CQ moderation if it is not supported mmc: sdhci: Do not BUG on invalid vdd UVC: Add support for R200 depth camera sched/numa: Fix use-after-free bug in the task_numa_compare ALSA: hda - add codec support for Kabylake display audio codec drm/i915: Fix hpd live status bits for g4x tipc: fix nullptr crash during subscription cancel arm64: Add workaround for Cavium erratum 27456 net: thunderx: Fix for Qset error due to CQ full drm/radeon: fix dp link rate selection (v2) drm/amdgpu: fix dp link rate selection (v2) qla2xxx: Use ATIO type to send correct tmr response mmc: sdhci: 64-bit DMA actually has 4-byte alignment drm/atomic: Do not unset crtc when an encoder is stolen drm/i915/skl: Add missing SKL ids drm/i915/bxt: update list of PCIIDs hrtimer: Catch illegal clockids i40e/i40evf: Fix RSS rx-flow-hash configuration through ethtool mpt3sas: Fix for Asynchronous completion of timedout IO and task abort of timedout IO. mpt3sas: A correction in unmap_resources net: cavium: liquidio: fix check for in progress flag arm64: KVM: Configure TCR_EL2.PS at runtime irqchip/gic-v3: Make sure read from ICC_IAR1_EL1 is visible on redestributor pwm: lpc32xx: fix and simplify duty cycle and period calculations pwm: lpc32xx: correct number of PWM channels from 2 to 1 pwm: fsl-ftm: Fix clock enable/disable when using PM megaraid_sas: Add an i/o barrier megaraid_sas: Fix SMAP issue megaraid_sas: Do not allow PCI access during OCR s390/cio: update measurement characteristics s390/cio: ensure consistent measurement state s390/cio: fix measurement characteristics memleak qeth: initialize net_device with carrier off lpfc: Fix external loopback failure. lpfc: Fix mbox reuse in PLOGI completion lpfc: Fix RDP Speed reporting. lpfc: Fix crash in fcp command completion path. lpfc: Fix driver crash when module parameter lpfc_fcp_io_channel set to 16 lpfc: Fix RegLogin failed error seen on Lancer FC during port bounce lpfc: Fix the FLOGI discovery logic to comply with T11 standards lpfc: Fix FCF Infinite loop in lpfc_sli4_fcf_rr_next_index_get. cxl: Enable PCI device ID for future IBM CXL adapter cxl: fix build for GCC 4.6.x cxlflash: Enable device id for future IBM CXL adapter cxlflash: Resolve oops in wait_port_offline cxlflash: Fix to resolve cmd leak after host reset cxl: Fix DSI misses when the context owning task exits cxl: Fix possible idr warning when contexts are released Drivers: hv: vmbus: fix rescind-offer handling for device without a driver Drivers: hv: vmbus: serialize process_chn_event() and vmbus_close_internal() Drivers: hv: vss: run only on supported host versions drivers/hv: cleanup synic msrs if vmbus connect failed Drivers: hv: util: catch allocation errors tools: hv: report ENOSPC errors in hv_fcopy_daemon Drivers: hv: utils: run polling callback always in interrupt context Drivers: hv: util: Increase the timeout for util services lightnvm: fix missing grown bad block type lightnvm: fix locking and mempool in rrpc_lun_gc lightnvm: unlock rq and free ppa_list on submission fail lightnvm: add check after mempool allocation lightnvm: fix incorrect nr_free_blocks stat lightnvm: fix bio submission issue cxlflash: a couple off by one bugs fm10k: Cleanup exception handling for mailbox interrupt fm10k: Cleanup MSI-X interrupts in case of failure fm10k: reinitialize queuing scheme after calling init_hw fm10k: always check init_hw for errors fm10k: reset max_queues on init_hw_vf failure fm10k: Fix handling of NAPI budget when multiple queues are enabled per vector fm10k: Correct MTU for jumbo frames fm10k: do not assume VF always has 1 queue clk: xgene: Fix divider with non-zero shift value e1000e: fix division by zero on jumbo MTUs e1000: fix data race between tx_ring->next_to_clean ixgbe: Fix handling of NAPI budget when multiple queues are enabled per vector igb: fix NULL derefs due to skipped SR-IOV enabling igb: use the correct i210 register for EEMNGCTL igb: don't unmap NULL hw_addr i40e: Fix Rx hash reported to the stack by our driver i40e: clean whole mac filter list i40evf: check rings before freeing resources i40e: don't add zero MAC filter i40e: properly delete VF MAC filters i40e: Fix memory leaks, sideband filter programming i40e: fix: do not sleep in netdev_ops i40e/i40evf: Fix RS bit update in Tx path and disable force WB workaround i40evf: handle many MAC filters correctly i40e: Workaround fix for mss < 256 issue UPSTREAM: audit: fix a double fetch in audit_log_single_execve_arg() UPSTREAM: ARM: 8494/1: mm: Enable PXN when running non-LPAE kernel on LPAE processor FIXUP: sched/tune: update accouting before CPU capacity FIXUP: sched/tune: add fixes missing from a previous patch arm: Fix #if/#ifdef typo in topology.c arm: Fix build error "conflicting types for 'scale_cpu_capacity'" sched/walt: use do_div instead of division operator DEBUG: cpufreq: fix cpu_capacity tracing build for non-smp systems sched/walt: include missing header for arm_timer_read_counter() cpufreq: Kconfig: Fixup incorrect selection by CPU_FREQ_DEFAULT_GOV_SCHED sched/fair: Avoid redundant idle_cpu() call in update_sg_lb_stats() FIXUP: sched: scheduler-driven cpu frequency selection sched/rt: Add Kconfig option to enable panicking for RT throttling sched/rt: print RT tasks when RT throttling is activated UPSTREAM: sched: Fix a race between __kthread_bind() and sched_setaffinity() sched/fair: Favor higher cpus only for boosted tasks vmstat: make vmstat_updater deferrable again and shut down on idle sched/fair: call OPP update when going idle after migration sched/cpufreq_sched: fix thermal capping events sched/fair: Picking cpus with low OPPs for tasks that prefer idle CPUs FIXUP: sched/tune: do initialization as a postcore_initicall DEBUG: sched: add tracepoint for RD overutilized sched/tune: Introducing a new schedtune attribute prefer_idle sched: use util instead of capacity to select busy cpu arch_timer: add error handling when the MPM global timer is cleared FIXUP: sched: Fix double-release of spinlock in move_queued_task FIXUP: sched/fair: Fix hang during suspend in sched_group_energy FIXUP: sched: fix SchedFreq integration for both PELT and WALT sched: EAS: Avoid causing spikes to max-freq unnecessarily FIXUP: sched: fix set_cfs_cpu_capacity when WALT is in use sched/walt: Accounting for number of irqs pending on each core sched: Introduce Window Assisted Load Tracking (WALT) sched/tune: fix PB and PC cuts indexes definition sched/fair: optimize idle cpu selection for boosted tasks FIXUP: sched/tune: fix accounting for runnable tasks sched/tune: use a single initialisation function sched/{fair,tune}: simplify fair.c code FIXUP: sched/tune: fix payoff calculation for boost region sched/tune: Add support for negative boost values FIX: sched/tune: move schedtune_nornalize_energy into fair.c FIX: sched/tune: update usage of boosted task utilisation on CPU selection sched/fair: add tunable to set initial task load sched/fair: add tunable to force selection at cpu granularity sched: EAS: take cstate into account when selecting idle core sched/cpufreq_sched: Consolidated update FIXUP: sched: fix build for non-SMP target DEBUG: sched/tune: add tracepoint on P-E space filtering DEBUG: sched/tune: add tracepoint for energy_diff() values DEBUG: sched/tune: add tracepoint for task boost signal arm: topology: Define TC2 energy and provide it to the scheduler CHROMIUM: sched: update the average of nr_running DEBUG: schedtune: add tracepoint for schedtune_tasks_update() values DEBUG: schedtune: add tracepoint for CPU boost signal DEBUG: schedtune: add tracepoint for SchedTune configuration update DEBUG: sched: add energy procfs interface DEBUG: sched,cpufreq: add cpu_capacity change tracepoint DEBUG: sched: add tracepoint for CPU load/util signals DEBUG: sched: add tracepoint for task load/util signals DEBUG: sched: add tracepoint for cpu/freq scale invariance sched/fair: filter energy_diff() based on energy_payoff value sched/tune: add support to compute normalized energy sched/fair: keep track of energy/capacity variations sched/fair: add boosted task utilization sched/{fair,tune}: track RUNNABLE tasks impact on per CPU boost value sched/tune: compute and keep track of per CPU boost value sched/tune: add initial support for CGroups based boosting sched/fair: add boosted CPU usage sched/fair: add function to convert boost value into "margin" sched/tune: add sysctl interface to define a boost value sched/tune: add detailed documentation fixup! sched/fair: jump to max OPP when crossing UP threshold fixup! sched: scheduler-driven cpu frequency selection sched: rt scheduler sets capacity requirement sched: deadline: use deadline bandwidth in scale_rt_capacity sched: remove call of sched_avg_update from sched_rt_avg_update sched/cpufreq_sched: add trace events sched/fair: jump to max OPP when crossing UP threshold sched/fair: cpufreq_sched triggers for load balancing sched/{core,fair}: trigger OPP change request on fork() sched/fair: add triggers for OPP change requests sched: scheduler-driven cpu frequency selection cpufreq: introduce cpufreq_driver_is_slow sched: Consider misfit tasks when load-balancing sched: Add group_misfit_task load-balance type sched: Add per-cpu max capacity to sched_group_capacity sched: Do eas idle balance regardless of the rq avg idle value arm64: Enable max freq invariant scheduler load-tracking and capacity support arm: Enable max freq invariant scheduler load-tracking and capacity support sched: Update max cpu capacity in case of max frequency constraints cpufreq: Max freq invariant scheduler load-tracking and cpu capacity support arm64, topology: Updates to use DT bindings for EAS costing data sched: Support for extracting EAS energy costs from DT Documentation: DT bindings for energy model cost data required by EAS sched: Disable energy-unfriendly nohz kicks sched: Consider a not over-utilized energy-aware system as balanced sched: Energy-aware wake-up task placement sched: Determine the current sched_group idle-state sched, cpuidle: Track cpuidle state index in the scheduler sched: Add over-utilization/tipping point indicator sched: Estimate energy impact of scheduling decisions sched: Extend sched_group_energy to test load-balancing decisions sched: Calculate energy consumption of sched_group sched: Highest energy aware balancing sched_domain level pointer sched: Relocated cpu_util() and change return type sched: Compute cpu capacity available at current frequency arm64: Cpu invariant scheduler load-tracking and capacity support arm: Cpu invariant scheduler load-tracking and capacity support sched: Introduce SD_SHARE_CAP_STATES sched_domain flag sched: Initialize energy data structures sched: Introduce energy data structures sched: Make energy awareness a sched feature sched: Documentation for scheduler energy cost model sched: Prevent unnecessary active balance of single task in sched group sched: Enable idle balance to pull single task towards cpu with higher capacity sched: Consider spare cpu capacity at task wake-up sched: Add cpu capacity awareness to wakeup balancing sched: Store system-wide maximum cpu capacity in root domain arm: Update arch_scale_cpu_capacity() to reflect change to define arm64: Enable frequency invariant scheduler load-tracking support arm: Enable frequency invariant scheduler load-tracking support cpufreq: Frequency invariant scheduler load-tracking support sched/fair: Fix new task's load avg removed from source CPU in wake_up_new_task() FROMLIST: pstore: drop pmsg bounce buffer UPSTREAM: usercopy: remove page-spanning test for now UPSTREAM: usercopy: force check_object_size() inline BACKPORT: usercopy: fold builtin_const check into inline function UPSTREAM: x86/uaccess: force copy_*_user() to be inlined UPSTREAM: HID: core: prevent out-of-bound readings Android: Fix build breakages. UPSTREAM: tty: Prevent ldisc drivers from re-using stale tty fields UPSTREAM: netfilter: nfnetlink: correctly validate length of batch messages cpuset: Make cpusets restore on hotplug UPSTREAM: mm/slub: support left redzone UPSTREAM: Make the hardened user-copy code depend on having a hardened allocator Android: MMC/UFS IO Latency Histograms. UPSTREAM: usercopy: fix overlap check for kernel text UPSTREAM: usercopy: avoid potentially undefined behavior in pointer math UPSTREAM: unsafe_[get|put]_user: change interface to use a error target label BACKPORT: arm64: mm: fix location of _etext BACKPORT: ARM: 8583/1: mm: fix location of _etext BACKPORT: Don't show empty tag stats for unprivileged uids UPSTREAM: tcp: fix use after free in tcp_xmit_retransmit_queue() ANDROID: base-cfg: drop SECCOMP_FILTER config UPSTREAM: [media] xc2028: unlock on error in xc2028_set_config() UPSTREAM: [media] xc2028: avoid use after free ANDROID: base-cfg: enable SECCOMP config ANDROID: rcu_sync: Export rcu_sync_lockdep_assert RFC: FROMLIST: cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork RFC: FROMLIST: cgroup: avoid synchronize_sched() in __cgroup_procs_write() RFC: FROMLIST: locking/percpu-rwsem: Optimize readers and reduce global impact net: ipv6: Fix ping to link-local addresses. ipv6: fix endianness error in icmpv6_err ANDROID: dm: android-verity: Allow android-verity to be compiled as an independent module backporting: a brief introduce of backported feautures on 4.4 Linux 4.4.20 sysfs: correctly handle read offset on PREALLOC attrs hwmon: (iio_hwmon) fix memory leak in name attribute ALSA: line6: Fix POD sysfs attributes segfault ALSA: line6: Give up on the lock while URBs are released. ALSA: line6: Remove double line6_pcm_release() after failed acquire. ACPI / SRAT: fix SRAT parsing order with both LAPIC and X2APIC present ACPI / sysfs: fix error code in get_status() ACPI / drivers: replace acpi_probe_lock spinlock with mutex ACPI / drivers: fix typo in ACPI_DECLARE_PROBE_ENTRY macro staging: comedi: ni_mio_common: fix wrong insn_write handler staging: comedi: ni_mio_common: fix AO inttrig backwards compatibility staging: comedi: comedi_test: fix timer race conditions staging: comedi: daqboard2000: bug fix board type matching code USB: serial: option: add WeTelecom 0x6802 and 0x6803 products USB: serial: option: add WeTelecom WM-D200 USB: serial: mos7840: fix non-atomic allocation in write path USB: serial: mos7720: fix non-atomic allocation in write path USB: fix typo in wMaxPacketSize validation usb: chipidea: udc: don't touch DP when controller is in host mode USB: avoid left shift by -1 dmaengine: usb-dmac: check CHCR.DE bit in usb_dmac_isr_channel() crypto: qat - fix aes-xts key sizes crypto: nx - off by one bug in nx_of_update_msc() Input: i8042 - set up shared ps2_cmd_mutex for AUX ports Input: i8042 - break load dependency between atkbd/psmouse and i8042 Input: tegra-kbc - fix inverted reset logic btrfs: properly track when rescan worker is running btrfs: waiting on qgroup rescan should not always be interruptible fs/seq_file: fix out-of-bounds read gpio: Fix OF build problem on UM usb: renesas_usbhs: gadget: fix return value check in usbhs_mod_gadget_probe() megaraid_sas: Fix probing cards without io port mpt3sas: Fix resume on WarpDrive flash cards cdc-acm: fix wrong pipe type on rx interrupt xfers i2c: cros-ec-tunnel: Fix usage of cros_ec_cmd_xfer() mfd: cros_ec: Add cros_ec_cmd_xfer_status() helper aacraid: Check size values after double-fetch from user ARC: Elide redundant setup of DMA callbacks ARC: Call trace_hardirqs_on() before enabling irqs ARC: use correct offset in pt_regs for saving/restoring user mode r25 ARC: build: Better way to detect ISA compatible toolchain drm/i915: fix aliasing_ppgtt leak drm/amdgpu: record error code when ring test failed drm/amd/amdgpu: sdma resume fail during S4 on CI drm/amdgpu: skip TV/CV in display parsing drm/amdgpu: avoid a possible array overflow drm/amdgpu: fix amdgpu_move_blit on 32bit systems drm/amdgpu: Change GART offset to 64-bit iio: fix sched WARNING "do not call blocking ops when !TASK_RUNNING" sched/nohz: Fix affine unpinned timers mess sched/cputime: Fix NO_HZ_FULL getrusage() monotonicity regression of: fix reference counting in of_graph_get_endpoint_by_regs arm64: dts: rockchip: add reset saradc node for rk3368 SoCs mac80211: fix purging multicast PS buffer queue s390/dasd: fix hanging device after clear subchannel EDAC: Increment correct counter in edac_inc_ue_error() pinctrl/amd: Remove the default de-bounce time iommu/arm-smmu: Don't BUG() if we find aborting STEs with disable_bypass iommu/arm-smmu: Fix CMDQ error handling iommu/dma: Don't put uninitialised IOVA domains xhci: Make sure xhci handles USB_SPEED_SUPER_PLUS devices. USB: serial: ftdi_sio: add PIDs for Ivium Technologies devices USB: serial: ftdi_sio: add device ID for WICED USB UART dev board USB: serial: option: add support for Telit LE920A4 USB: serial: option: add D-Link DWM-156/A3 USB: serial: fix memleak in driver-registration error path xhci: don't dereference a xhci member after removing xhci usb: xhci: Fix panic if disconnect xhci: always handle "Command Ring Stopped" events usb/gadget: fix gadgetfs aio support. usb: gadget: fsl_qe_udc: off by one in setup_received_handle() USB: validate wMaxPacketValue entries in endpoint descriptors usb: renesas_usbhs: Use dmac only if the pipe type is bulk usb: renesas_usbhs: clear the BRDYSTS in usbhsg_ep_enable() USB: hub: change the locking in hub_activate USB: hub: fix up early-exit pathway in hub_activate usb: hub: Fix unbalanced reference count/memory leak/deadlocks usb: define USB_SPEED_SUPER_PLUS speed for SuperSpeedPlus USB3.1 devices usb: dwc3: gadget: increment request->actual once usb: dwc3: pci: add Intel Kabylake PCI ID usb: misc: usbtest: add fix for driver hang usb: ehci: change order of register cleanup during shutdown crypto: caam - defer aead_set_sh_desc in case of zero authsize crypto: caam - fix echainiv(authenc) encrypt shared descriptor crypto: caam - fix non-hmac hashes genirq/msi: Make sure PCI MSIs are activated early genirq/msi: Remove unused MSI_FLAG_IDENTITY_MAP um: Don't discard .text.exit section ACPI / CPPC: Prevent cpc_desc_ptr points to the invalid data ACPI: CPPC: Return error if _CPC is invalid on a CPU mmc: sdhci-acpi: Reduce Baytrail eMMC/SD/SDIO hangs PCI: Limit config space size for Netronome NFP4000 PCI: Add Netronome NFP4000 PF device ID PCI: Limit config space size for Netronome NFP6000 family PCI: Add Netronome vendor and device IDs PCI: Support PCIe devices with short cfg_size NVMe: Don't unmap controller registers on reset ALSA: hda - Manage power well properly for resume libnvdimm, nd_blk: mask off reserved status bits perf intel-pt: Fix occasional decoding errors when tracing system-wide vfio/pci: Fix NULL pointer oops in error interrupt setup handling virtio: fix memory leak in virtqueue_add() parisc: Fix order of EREFUSED define in errno.h arm64: Define AT_VECTOR_SIZE_ARCH for ARCH_DLINFO ALSA: usb-audio: Add quirk for ELP HD USB Camera ALSA: usb-audio: Add a sample rate quirk for Creative Live! Cam Socialize HD (VF0610) powerpc/eeh: eeh_pci_enable(): fix checking of post-request state SUNRPC: allow for upcalls for same uid but different gss service SUNRPC: Handle EADDRNOTAVAIL on connection failures tools/testing/nvdimm: fix SIGTERM vs hotplug crash uprobes/x86: Fix RIP-relative handling of EVEX-encoded instructions x86/mm: Disable preemption during CR3 read+write hugetlb: fix nr_pmds accounting with shared page tables mm: SLUB hardened usercopy support mm: SLAB hardened usercopy support s390/uaccess: Enable hardened usercopy sparc/uaccess: Enable hardened usercopy powerpc/uaccess: Enable hardened usercopy ia64/uaccess: Enable hardened usercopy arm64/uaccess: Enable hardened usercopy ARM: uaccess: Enable hardened usercopy x86/uaccess: Enable hardened usercopy x86: remove more uaccess_32.h complexity x86: remove pointless uaccess_32.h complexity x86: fix SMAP in 32-bit environments Use the new batched user accesses in generic user string handling Add 'unsafe' user access functions for batched accesses x86: reorganize SMAP handling in user space accesses mm: Hardened usercopy mm: Implement stack frame object validation mm: Add is_migrate_cma_page Linux 4.4.19 Documentation/module-signing.txt: Note need for version info if reusing a key module: Invalidate signatures on force-loaded modules dm flakey: error READ bios during the down_interval rtc: s3c: Add s3c_rtc_{enable/disable}_clk in s3c_rtc_setfreq() lpfc: fix oops in lpfc_sli4_scmd_to_wqidx_distr() from lpfc_send_taskmgmt() ACPI / EC: Work around method reentrancy limit in ACPICA for _Qxx x86/platform/intel_mid_pci: Rework IRQ0 workaround PCI: Mark Atheros AR9485 and QCA9882 to avoid bus reset MIPS: hpet: Increase HPET_MIN_PROG_DELTA and decrease HPET_MIN_CYCLES MIPS: Don't register r4k sched clock when CPUFREQ enabled MIPS: mm: Fix definition of R6 cache instruction SUNRPC: Don't allocate a full sockaddr_storage for tracing Input: elan_i2c - properly wake up touchpad on ASUS laptops target: Fix ordered task CHECK_CONDITION early exception handling target: Fix max_unmap_lba_count calc overflow target: Fix race between iscsi-target connection shutdown + ABORT_TASK target: Fix missing complete during ABORT_TASK + CMD_T_FABRIC_STOP target: Fix ordered task target_setup_cmd_from_cdb exception hang iscsi-target: Fix panic when adding second TCP connection to iSCSI session ubi: Fix race condition between ubi device creation and udev ubi: Fix early logging ubi: Make volume resize power cut aware of: fix memory leak related to safe_name() IB/mlx4: Fix memory leak if QP creation failed IB/mlx4: Fix error flow when sending mads under SRIOV IB/mlx4: Fix the SQ size of an RC QP IB/IWPM: Fix a potential skb leak IB/IPoIB: Don't update neigh validity for unresolved entries IB/SA: Use correct free function IB/mlx5: Return PORT_ERR in Active to Initializing tranisition IB/mlx5: Fix post send fence logic IB/mlx5: Fix entries check in mlx5_ib_resize_cq IB/mlx5: Fix returned values of query QP IB/mlx5: Fix entries checks in mlx5_ib_create_cq IB/mlx5: Fix MODIFY_QP command input structure ALSA: hda - Fix headset mic detection problem for two dell machines ALSA: hda: add AMD Bonaire AZ PCI ID with proper driver caps ALSA: hda/realtek - Can't adjust speaker's volume on a Dell AIO ALSA: hda: Fix krealloc() with __GFP_ZERO usage mm/hugetlb: avoid soft lockup in set_max_huge_pages() mtd: nand: fix bug writing 1 byte less than page size block: fix bdi vs gendisk lifetime mismatch block: add missing group association in bio-cloning functions metag: Fix __cmpxchg_u32 asm constraint for CMP ftrace/recordmcount: Work around for addition of metag magic but not relocations balloon: check the number of available pages in leak balloon drm/i915/dp: Revert "drm/i915/dp: fall back to 18 bpp when sink capability is unknown" drm/i915: Never fully mask the the EI up rps interrupt on SNB/IVB drm/edid: Add 6 bpc quirk for display AEO model 0. drm: Restore double clflush on the last partial cacheline drm/nouveau/fbcon: fix font width not divisible by 8 drm/nouveau/gr/nv3x: fix instobj write offsets in gr setup drm/nouveau: check for supported chipset before booting fbdev off the hw drm/radeon: support backlight control for UNIPHY3 drm/radeon: fix firmware info version checks drm/radeon: Poll for both connect/disconnect on analog connectors drm/radeon: add a delay after ATPX dGPU power off drm/amdgpu/gmc7: add missing mullins case drm/amdgpu: fix firmware info version checks drm/amdgpu: Disable RPM helpers while reprobing connectors on resume drm/amdgpu: support backlight control for UNIPHY3 drm/amdgpu: Poll for both connect/disconnect on analog connectors drm/amdgpu: add a delay after ATPX dGPU power off w1:omap_hdq: fix regression netlabel: add address family checks to netlbl_{sock,req}_delattr() ARM: dts: sunxi: Add a startup delay for fixed regulator enabled phys audit: fix a double fetch in audit_log_single_execve_arg() iommu/amd: Update Alias-DTE in update_device_table() iommu/amd: Init unity mappings only for dma_ops domains iommu/amd: Handle IOMMU_DOMAIN_DMA in ops->domain_free call-back iommu/vt-d: Return error code in domain_context_mapping_one() iommu/exynos: Suppress unbinding to prevent system failure drm/i915: Don't complain about lack of ACPI video bios nfsd: don't return an unhashed lock stateid after taking mutex nfsd: Fix race between FREE_STATEID and LOCK nfs: don't create zero-length requests MIPS: KVM: Propagate kseg0/mapped tlb fault errors MIPS: KVM: Fix gfn range check in kseg0 tlb faults MIPS: KVM: Add missing gfn range check MIPS: KVM: Fix mapped fault broken commpage handling random: add interrupt callback to VMBus IRQ handler random: print a warning for the first ten uninitialized random users random: initialize the non-blocking pool via add_hwgenerator_randomness() CIFS: Fix a possible invalid memory access in smb2_query_symlink() cifs: fix crash due to race in hmac(md5) handling cifs: Check for existing directory when opening file with O_CREAT fs/cifs: make share unaccessible at root level mountable jbd2: make journal y2038 safe ARC: mm: don't loose PTE_SPECIAL in pte_modify() remoteproc: Fix potential race condition in rproc_add ovl: disallow overlayfs as upperdir HID: uhid: fix timeout when probe races with IO EDAC: Correct channel count limit Bluetooth: Fix l2cap_sock_setsockopt() with optname BT_RCVMTU spi: pxa2xx: Clear all RFT bits in reset_sccr1() on Intel Quark i2c: efm32: fix a failure path in efm32_i2c_probe() s5p-mfc: Add release callback for memory region devs s5p-mfc: Set device name for reserved memory region devs hp-wmi: Fix wifi cannot be hard-unblocked dm: set DMF_SUSPENDED* _before_ clearing DMF_NOFLUSH_SUSPENDING sur40: fix occasional oopses on device close sur40: lower poll interval to fix occasional FPS drops to ~56 FPS Fix RC5 decoding with Fintek CIR chipset vb2: core: Skip planes array verification if pb is NULL videobuf2-v4l2: Verify planes array in buffer dequeueing media: dvb_ringbuffer: Add memory barriers media: usbtv: prevent access to free'd resources mfd: qcom_rpm: Parametrize also ack selector size mfd: qcom_rpm: Fix offset error for msm8660 intel_pstate: Fix MSR_CONFIG_TDP_x addressing in core_get_max_pstate() s390/cio: allow to reset channel measurement block KVM: nVMX: Fix memory corruption when using VMCS shadowing KVM: VMX: handle PML full VMEXIT that occurs during event delivery KVM: MTRR: fix kvm_mtrr_check_gfn_range_consistency page fault KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures arm64: mm: avoid fdt_check_header() before the FDT is fully mapped arm64: dts: rockchip: fixes the gic400 2nd region size for rk3368 pinctrl: cherryview: prevent concurrent access to GPIO controllers Bluetooth: hci_intel: Fix null gpio desc pointer dereference gpio: intel-mid: Remove potentially harmful code gpio: pca953x: Fix NBANK calculation for PCA9536 tty/serial: atmel: fix RS485 half duplex with DMA serial: samsung: Fix ERR pointer dereference on deferred probe tty: serial: msm: Don't read off end of tx fifo arm64: Fix incorrect per-cpu usage for boot CPU arm64: debug: unmask PSTATE.D earlier arm64: kernel: Save and restore UAO and addr_limit on exception entry USB: usbfs: fix potential infoleak in devio usb: renesas_usbhs: fix NULL pointer dereference in xfer_work() USB: serial: option: add support for Telit LE910 PID 0x1206 usb: dwc3: fix for the isoc transfer EP_BUSY flag usb: quirks: Add no-lpm quirk for Elan usb: renesas_usbhs: protect the CFIFOSEL setting in usbhsg_ep_enable() usb: f_fs: off by one bug in _ffs_func_bind() usb: gadget: avoid exposing kernel stack UPSTREAM: usb: gadget: configfs: add mutex lock before unregister gadget ANDROID: dm-verity: adopt changes made to dm callbacks UPSTREAM: ecryptfs: fix handling of directory opening ANDROID: net: core: fix UID-based routing ANDROID: net: fib: remove duplicate assignment FROMLIST: proc: Fix timerslack_ns CAP_SYS_NICE check when adjusting self ANDROID: dm verity fec: pack the fec_header structure ANDROID: dm: android-verity: Verify header before fetching table ANDROID: dm: allow adb disable-verity only in userdebug ANDROID: dm: mount as linear target if eng build ANDROID: dm: use default verity public key ANDROID: dm: fix signature verification flag ANDROID: dm: use name_to_dev_t ANDROID: dm: rename dm-linear methods for dm-android-verity ANDROID: dm: Minor cleanup ANDROID: dm: Mounting root as linear device when verity disabled ANDROID: dm-android-verity: Rebase on top of 4.1 ANDROID: dm: Add android verity target ANDROID: dm: fix dm_substitute_devices() ANDROID: dm: Rebase on top of 4.1 CHROMIUM: dm: boot time specification of dm= Implement memory_state_time, used by qcom,cpubw Revert "panic: Add board ID to panic output" usb: gadget: f_accessory: remove duplicate endpoint alloc BACKPORT: brcmfmac: defer DPC processing during probe FROMLIST: proc: Add LSM hook checks to /proc/<tid>/timerslack_ns FROMLIST: proc: Relax /proc/<tid>/timerslack_ns capability requirements UPSTREAM: ppp: defer netns reference release for ppp channel cpuset: Add allow_attach hook for cpusets on android. UPSTREAM: KEYS: Fix ASN.1 indefinite length object parsing ANDROID: sdcardfs: fix itnull.cocci warnings android-recommended.cfg: enable fstack-protector-strong Linux 4.4.18 mm: memcontrol: fix memcg id ref counter on swap charge move mm: memcontrol: fix swap counter leak on swapout from offline cgroup mm: memcontrol: fix cgroup creation failure after many small jobs ext4: fix reference counting bug on block allocation error ext4: short-cut orphan cleanup on error ext4: validate s_reserved_gdt_blocks on mount ext4: don't call ext4_should_journal_data() on the journal inode ext4: fix deadlock during page writeback ext4: check for extents that wrap around crypto: scatterwalk - Fix test in scatterwalk_done crypto: gcm - Filter out async ghash if necessary fs/dcache.c: avoid soft-lockup in dput() fuse: fix wrong assignment of ->flags in fuse_send_init() fuse: fuse_flush must check mapping->flags for errors fuse: fsync() did not return IO errors sysv, ipc: fix security-layer leaking block: fix use-after-free in seq file x86/syscalls/64: Add compat_sys_keyctl for 32-bit userspace drm/i915: Pretend cursor is always on for ILK-style WM calculations (v2) x86/mm/pat: Fix BUG_ON() in mmap_mem() on QEMU/i386 x86/pat: Document the PAT initialization sequence x86/xen, pat: Remove PAT table init code from Xen x86/mtrr: Fix PAT init handling when MTRR is disabled x86/mtrr: Fix Xorg crashes in Qemu sessions x86/mm/pat: Replace cpu_has_pat with boot_cpu_has() x86/mm/pat: Add pat_disable() interface x86/mm/pat: Add support of non-default PAT MSR setting devpts: clean up interface to pty drivers random: strengthen input validation for RNDADDTOENTCNT apparmor: fix ref count leak when profile sha1 hash is read Revert "s390/kdump: Clear subchannel ID to signal non-CCW/SCSI IPL" KEYS: 64-bit MIPS needs to use compat_sys_keyctl for 32-bit userspace arm: oabi compat: add missing access checks cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR x86/mm/32: Enable full randomization on i386 and X86_32 HID: sony: do not bail out when the sixaxis refuses the output report PNP: Add Broadwell to Intel MCH size workaround PNP: Add Haswell-ULT to Intel MCH size workaround scsi: ignore errors from scsi_dh_add_device() ipath: Restrict use of the write() interface tcp: consider recv buf for the initial window scale qed: Fix setting/clearing bit in completion bitmap net/irda: fix NULL pointer dereference on memory allocation failure net: bgmac: Fix infinite loop in bgmac_dma_tx_add() bonding: set carrier off for devices created through netlink ipv4: reject RTNH_F_DEAD and RTNH_F_LINKDOWN from user space tcp: enable per-socket rate limiting of all 'challenge acks' tcp: make challenge acks less predictable arm64: relocatable: suppress R_AARCH64_ABS64 relocations in vmlinux arm64: vmlinux.lds: make __rela_offset and __dynsym_offset ABSOLUTE Linux 4.4.17 vfs: fix deadlock in file_remove_privs() on overlayfs intel_th: Fix a deadlock in modprobing intel_th: pci: Add Kaby Lake PCH-H support net: mvneta: set real interrupt per packet for tx_done libceph: apply new_state before new_up_client on incrementals libata: LITE-ON CX1-JB256-HP needs lower max_sectors i2c: mux: reg: wrong condition checked for of_address_to_resource return value posix_cpu_timer: Exit early when process has been reaped media: fix airspy usb probe error path ipr: Clear interrupt on croc/crocodile when running with LSI SCSI: fix new bug in scsi_dev_info_list string matching RDS: fix rds_tcp_init() error path can: fix oops caused by wrong rtnl dellink usage can: fix handling of unmodifiable configuration options fix can: c_can: Update D_CAN TX and RX functions to 32 bit - fix Altera Cyclone access can: at91_can: RX queue could get stuck at high bus load perf/x86: fix PEBS issues on Intel Atom/Core2 ovl: handle ATTR_KILL* sched/fair: Fix effective_load() to consistently use smoothed load mmc: block: fix packed command header endianness block: fix use-after-free in sys_ioprio_get() qeth: delete napi struct when removing a qeth device platform/chrome: cros_ec_dev - double fetch bug in ioctl clk: rockchip: initialize flags of clk_init_data in mmc-phase clock spi: sun4i: fix FIFO limit spi: sunxi: fix transfer timeout namespace: update event counter when umounting a deleted dentry 9p: use file_dentry() ext4: verify extent header depth ecryptfs: don't allow mmap when the lower fs doesn't support it Revert "ecryptfs: forbid opening files without mmap handler" locks: use file_inode() power_supply: power_supply_read_temp only if use_cnt > 0 cgroup: set css->id to -1 during init pinctrl: imx: Do not treat a PIN without MUX register as an error pinctrl: single: Fix missing flush of posted write for a wakeirq pvclock: Add CPU barriers to get correct version value Input: tsc200x - report proper input_dev name Input: xpad - validate USB endpoint count during probe Input: wacom_w8001 - w8001_MAX_LENGTH should be 13 Input: xpad - fix oops when attaching an unknown Xbox One gamepad Input: elantech - add more IC body types to the list Input: vmmouse - remove port reservation ALSA: timer: Fix leak in events via snd_timer_user_tinterrupt ALSA: timer: Fix leak in events via snd_timer_user_ccallback ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS xenbus: don't bail early from xenbus_dev_request_and_reply() xenbus: don't BUG() on user mode induced condition xen/pciback: Fix conf_space read/write overlap check. ARC: unwind: ensure that .debug_frame is generated (vs. .eh_frame) arc: unwind: warn only once if DW2_UNWIND is disabled kernel/sysrq, watchdog, sched/core: Reset watchdog on all CPUs while processing sysrq-w pps: do not crash when failed to register vmlinux.lds: account for destructor sections mm, meminit: ensure node is online before checking whether pages are uninitialised mm, meminit: always return a valid node from early_pfn_to_nid mm, compaction: prevent VM_BUG_ON when terminating freeing scanner fs/nilfs2: fix potential underflow in call to crc32_le mm, compaction: abort free scanner if split fails mm, sl[au]b: add __GFP_ATOMIC to the GFP reclaim mask dmaengine: at_xdmac: double FIFO flush needed to compute residue dmaengine: at_xdmac: fix residue corruption dmaengine: at_xdmac: align descriptors on 64 bits x86/quirks: Add early quirk to reset Apple AirPort card x86/quirks: Reintroduce scanning of secondary buses x86/quirks: Apply nvidia_bugs quirk only on root bus USB: OHCI: Don't mark EDs as ED_OPER if scheduling fails Conflicts: arch/arm/kernel/topology.c arch/arm64/include/asm/arch_gicv3.h arch/arm64/kernel/topology.c block/bio.c drivers/cpufreq/Kconfig drivers/md/Makefile drivers/media/dvb-core/dvb_ringbuffer.c drivers/media/tuners/tuner-xc2028.c drivers/misc/Kconfig drivers/misc/Makefile drivers/mmc/core/host.c drivers/scsi/ufs/ufshcd.c drivers/scsi/ufs/ufshcd.h drivers/usb/dwc3/gadget.c drivers/usb/gadget/configfs.c fs/ecryptfs/file.c include/linux/mmc/core.h include/linux/mmc/host.h include/linux/mmzone.h include/linux/sched.h include/linux/sched/sysctl.h include/trace/events/power.h include/trace/events/sched.h init/Kconfig kernel/cpuset.c kernel/exit.c kernel/sched/Makefile kernel/sched/core.c kernel/sched/cputime.c kernel/sched/fair.c kernel/sched/features.h kernel/sched/rt.c kernel/sched/sched.h kernel/sched/stop_task.c kernel/sched/tune.c lib/Kconfig.debug mm/Makefile mm/vmstat.c Change-Id: I243a43231ca56a6362076fa6301827e1b0493be5 Signed-off-by: Runmin Wang <runminw@codeaurora.org>
This commit is contained in:
commit
efbe378b81
656 changed files with 15023 additions and 3955 deletions
|
@ -0,0 +1,8 @@
|
|||
Memory bandwidth and frequency state tracking
|
||||
|
||||
Required properties:
|
||||
- compatible : should be:
|
||||
"memory-state-time"
|
||||
- freq-tbl: Should contain entries with each frequency in Hz.
|
||||
- bw-buckets: Should contain upper-bound limits for each bandwidth bucket in Mbps.
|
||||
Must match the framework power_profile.xml for the device.
|
|
@ -0,0 +1,360 @@
|
|||
===========================================================
|
||||
Energy cost bindings for Energy Aware Scheduling
|
||||
===========================================================
|
||||
|
||||
===========================================================
|
||||
1 - Introduction
|
||||
===========================================================
|
||||
|
||||
This note specifies bindings required for energy-aware scheduling
|
||||
(EAS)[1]. Historically, the scheduler's primary objective has been
|
||||
performance. EAS aims to provide an alternative objective - energy
|
||||
efficiency. EAS relies on a simple platform energy cost model to
|
||||
guide scheduling decisions. The model only considers the CPU
|
||||
subsystem.
|
||||
|
||||
This note is aligned with the definition of the layout of physical
|
||||
CPUs in the system as described in the ARM topology binding
|
||||
description [2]. The concept is applicable to any system so long as
|
||||
the cost model data is provided for those processing elements in
|
||||
that system's topology that EAS is required to service.
|
||||
|
||||
Processing elements refer to hardware threads, CPUs and clusters of
|
||||
related CPUs in increasing order of hierarchy.
|
||||
|
||||
EAS requires two key cost metrics - busy costs and idle costs. Busy
|
||||
costs comprise of a list of compute capacities for the processing
|
||||
element in question and the corresponding power consumption at that
|
||||
capacity. Idle costs comprise of a list of power consumption values
|
||||
for each idle state [C-state] that the processing element supports.
|
||||
For a detailed description of these metrics, their derivation and
|
||||
their use see [3].
|
||||
|
||||
These cost metrics are required for processing elements in all
|
||||
scheduling domain levels that EAS is required to service.
|
||||
|
||||
===========================================================
|
||||
2 - energy-costs node
|
||||
===========================================================
|
||||
|
||||
Energy costs for the processing elements in scheduling domains that
|
||||
EAS is required to service are defined in the energy-costs node
|
||||
which acts as a container for the actual per processing element cost
|
||||
nodes. A single energy-costs node is required for a given system.
|
||||
|
||||
- energy-costs node
|
||||
|
||||
Usage: Required
|
||||
|
||||
Description: The energy-costs node is a container node and
|
||||
it's sub-nodes describe costs for each processing element at
|
||||
all scheduling domain levels that EAS is required to
|
||||
service.
|
||||
|
||||
Node name must be "energy-costs".
|
||||
|
||||
The energy-costs node's parent node must be the cpus node.
|
||||
|
||||
The energy-costs node's child nodes can be:
|
||||
|
||||
- one or more cost nodes.
|
||||
|
||||
Any other configuration is considered invalid.
|
||||
|
||||
The energy-costs node can only contain a single type of child node
|
||||
whose bindings are described in paragraph 4.
|
||||
|
||||
===========================================================
|
||||
3 - energy-costs node child nodes naming convention
|
||||
===========================================================
|
||||
|
||||
energy-costs child nodes must follow a naming convention where the
|
||||
node name must be "thread-costN", "core-costN", "cluster-costN"
|
||||
depending on whether the costs in the node are for a thread, core or
|
||||
cluster. N (where N = {0, 1, ...}) is the node number and has no
|
||||
bearing to the OS' logical thread, core or cluster index.
|
||||
|
||||
===========================================================
|
||||
4 - cost node bindings
|
||||
===========================================================
|
||||
|
||||
Bindings for cost nodes are defined as follows:
|
||||
|
||||
- cluster-cost node
|
||||
|
||||
Description: must be declared within an energy-costs node. A
|
||||
system can contain multiple clusters and each cluster
|
||||
serviced by EAS must have a corresponding cluster-costs
|
||||
node.
|
||||
|
||||
The cluster-cost node name must be "cluster-costN" as
|
||||
described in 3 above.
|
||||
|
||||
A cluster-cost node must be a leaf node with no children.
|
||||
|
||||
Properties for cluster-cost nodes are described in paragraph
|
||||
5 below.
|
||||
|
||||
Any other configuration is considered invalid.
|
||||
|
||||
- core-cost node
|
||||
|
||||
Description: must be declared within an energy-costs node. A
|
||||
system can contain multiple cores and each core serviced by
|
||||
EAS must have a corresponding core-cost node.
|
||||
|
||||
The core-cost node name must be "core-costN" as described in
|
||||
3 above.
|
||||
|
||||
A core-cost node must be a leaf node with no children.
|
||||
|
||||
Properties for core-cost nodes are described in paragraph
|
||||
5 below.
|
||||
|
||||
Any other configuration is considered invalid.
|
||||
|
||||
- thread-cost node
|
||||
|
||||
Description: must be declared within an energy-costs node. A
|
||||
system can contain cores with multiple hardware threads and
|
||||
each thread serviced by EAS must have a corresponding
|
||||
thread-cost node.
|
||||
|
||||
The core-cost node name must be "core-costN" as described in
|
||||
3 above.
|
||||
|
||||
A core-cost node must be a leaf node with no children.
|
||||
|
||||
Properties for thread-cost nodes are described in paragraph
|
||||
5 below.
|
||||
|
||||
Any other configuration is considered invalid.
|
||||
|
||||
===========================================================
|
||||
5 - Cost node properties
|
||||
==========================================================
|
||||
|
||||
All cost node types must have only the following properties:
|
||||
|
||||
- busy-cost-data
|
||||
|
||||
Usage: required
|
||||
Value type: An array of 2-item tuples. Each item is of type
|
||||
u32.
|
||||
Definition: The first item in the tuple is the capacity
|
||||
value as described in [3]. The second item in the tuple is
|
||||
the energy cost value as described in [3].
|
||||
|
||||
- idle-cost-data
|
||||
|
||||
Usage: required
|
||||
Value type: An array of 1-item tuples. The item is of type
|
||||
u32.
|
||||
Definition: The item in the tuple is the energy cost value
|
||||
as described in [3].
|
||||
|
||||
===========================================================
|
||||
4 - Extensions to the cpu node
|
||||
===========================================================
|
||||
|
||||
The cpu node is extended with a property that establishes the
|
||||
connection between the processing element represented by the cpu
|
||||
node and the cost-nodes associated with this processing element.
|
||||
|
||||
The connection is expressed in line with the topological hierarchy
|
||||
that this processing element belongs to starting with the level in
|
||||
the hierarchy that this processing element itself belongs to through
|
||||
to the highest level that EAS is required to service. The
|
||||
connection cannot be sparse and must be contiguous from the
|
||||
processing element's level through to the highest desired level. The
|
||||
highest desired level must be the same for all processing elements.
|
||||
|
||||
Example: Given that a cpu node may represent a thread that is a part
|
||||
of a core, this property may contain multiple elements which
|
||||
associate the thread with cost nodes describing the costs for the
|
||||
thread itself, the core the thread belongs to, the cluster the core
|
||||
belongs to and so on. The elements must be ordered from the lowest
|
||||
level nodes to the highest desired level that EAS must service. The
|
||||
highest desired level must be the same for all cpu nodes. The
|
||||
elements must not be sparse: there must be elements for the current
|
||||
thread, the next level of hierarchy (core) and so on without any
|
||||
'holes'.
|
||||
|
||||
Example: Given that a cpu node may represent a core that is a part
|
||||
of a cluster of related cpus this property may contain multiple
|
||||
elements which associate the core with cost nodes describing the
|
||||
costs for the core itself, the cluster the core belongs to and so
|
||||
on. The elements must be ordered from the lowest level nodes to the
|
||||
highest desired level that EAS must service. The highest desired
|
||||
level must be the same for all cpu nodes. The elements must not be
|
||||
sparse: there must be elements for the current thread, the next
|
||||
level of hierarchy (core) and so on without any 'holes'.
|
||||
|
||||
If the system comprises of hierarchical clusters of clusters, this
|
||||
property will contain multiple associations with the relevant number
|
||||
of cluster elements in hierarchical order.
|
||||
|
||||
Property added to the cpu node:
|
||||
|
||||
- sched-energy-costs
|
||||
|
||||
Usage: required
|
||||
Value type: List of phandles
|
||||
Definition: a list of phandles to specific cost nodes in the
|
||||
energy-costs parent node that correspond to the processing
|
||||
element represented by this cpu node in hierarchical order
|
||||
of topology.
|
||||
|
||||
The order of phandles in the list is significant. The first
|
||||
phandle is to the current processing element's own cost
|
||||
node. Subsequent phandles are to higher hierarchical level
|
||||
cost nodes up until the maximum level that EAS is to
|
||||
service.
|
||||
|
||||
All cpu nodes must have the same highest level cost node.
|
||||
|
||||
The phandle list must not be sparsely populated with handles
|
||||
to non-contiguous hierarchical levels. See commentary above
|
||||
for clarity.
|
||||
|
||||
Any other configuration is invalid.
|
||||
|
||||
===========================================================
|
||||
5 - Example dts
|
||||
===========================================================
|
||||
|
||||
Example 1 (ARM 64-bit, 6-cpu system, two clusters of cpus, one
|
||||
cluster of 2 Cortex-A57 cpus, one cluster of 4 Cortex-A53 cpus):
|
||||
|
||||
cpus {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <0>;
|
||||
.
|
||||
.
|
||||
.
|
||||
A57_0: cpu@0 {
|
||||
compatible = "arm,cortex-a57","arm,armv8";
|
||||
reg = <0x0 0x0>;
|
||||
device_type = "cpu";
|
||||
enable-method = "psci";
|
||||
next-level-cache = <&A57_L2>;
|
||||
clocks = <&scpi_dvfs 0>;
|
||||
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
||||
sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
|
||||
};
|
||||
|
||||
A57_1: cpu@1 {
|
||||
compatible = "arm,cortex-a57","arm,armv8";
|
||||
reg = <0x0 0x1>;
|
||||
device_type = "cpu";
|
||||
enable-method = "psci";
|
||||
next-level-cache = <&A57_L2>;
|
||||
clocks = <&scpi_dvfs 0>;
|
||||
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
||||
sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
|
||||
};
|
||||
|
||||
A53_0: cpu@100 {
|
||||
compatible = "arm,cortex-a53","arm,armv8";
|
||||
reg = <0x0 0x100>;
|
||||
device_type = "cpu";
|
||||
enable-method = "psci";
|
||||
next-level-cache = <&A53_L2>;
|
||||
clocks = <&scpi_dvfs 1>;
|
||||
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
||||
sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
|
||||
};
|
||||
|
||||
A53_1: cpu@101 {
|
||||
compatible = "arm,cortex-a53","arm,armv8";
|
||||
reg = <0x0 0x101>;
|
||||
device_type = "cpu";
|
||||
enable-method = "psci";
|
||||
next-level-cache = <&A53_L2>;
|
||||
clocks = <&scpi_dvfs 1>;
|
||||
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
||||
sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
|
||||
};
|
||||
|
||||
A53_2: cpu@102 {
|
||||
compatible = "arm,cortex-a53","arm,armv8";
|
||||
reg = <0x0 0x102>;
|
||||
device_type = "cpu";
|
||||
enable-method = "psci";
|
||||
next-level-cache = <&A53_L2>;
|
||||
clocks = <&scpi_dvfs 1>;
|
||||
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
||||
sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
|
||||
};
|
||||
|
||||
A53_3: cpu@103 {
|
||||
compatible = "arm,cortex-a53","arm,armv8";
|
||||
reg = <0x0 0x103>;
|
||||
device_type = "cpu";
|
||||
enable-method = "psci";
|
||||
next-level-cache = <&A53_L2>;
|
||||
clocks = <&scpi_dvfs 1>;
|
||||
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
||||
sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
|
||||
};
|
||||
|
||||
energy-costs {
|
||||
CPU_COST_0: core-cost0 {
|
||||
busy-cost-data = <
|
||||
417 168
|
||||
579 251
|
||||
744 359
|
||||
883 479
|
||||
1024 616
|
||||
>;
|
||||
idle-cost-data = <
|
||||
15
|
||||
0
|
||||
>;
|
||||
};
|
||||
CPU_COST_1: core-cost1 {
|
||||
busy-cost-data = <
|
||||
235 33
|
||||
302 46
|
||||
368 61
|
||||
406 76
|
||||
447 93
|
||||
>;
|
||||
idle-cost-data = <
|
||||
6
|
||||
0
|
||||
>;
|
||||
};
|
||||
CLUSTER_COST_0: cluster-cost0 {
|
||||
busy-cost-data = <
|
||||
417 24
|
||||
579 32
|
||||
744 43
|
||||
883 49
|
||||
1024 64
|
||||
>;
|
||||
idle-cost-data = <
|
||||
65
|
||||
24
|
||||
>;
|
||||
};
|
||||
CLUSTER_COST_1: cluster-cost1 {
|
||||
busy-cost-data = <
|
||||
235 26
|
||||
303 30
|
||||
368 39
|
||||
406 47
|
||||
447 57
|
||||
>;
|
||||
idle-cost-data = <
|
||||
56
|
||||
17
|
||||
>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
===============================================================================
|
||||
[1] https://lkml.org/lkml/2015/5/12/728
|
||||
[2] Documentation/devicetree/bindings/topology.txt
|
||||
[3] Documentation/scheduler/sched-energy.txt
|
|
@ -348,7 +348,7 @@ address perms offset dev inode pathname
|
|||
a7cb1000-a7cb2000 ---p 00000000 00:00 0
|
||||
a7cb2000-a7eb2000 rw-p 00000000 00:00 0
|
||||
a7eb2000-a7eb3000 ---p 00000000 00:00 0
|
||||
a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack:1001]
|
||||
a7eb3000-a7ed5000 rw-p 00000000 00:00 0
|
||||
a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
|
||||
a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
|
||||
a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
|
||||
|
@ -380,7 +380,6 @@ is not associated with a file:
|
|||
|
||||
[heap] = the heap of the program
|
||||
[stack] = the stack of the main process
|
||||
[stack:1001] = the stack of the thread with tid 1001
|
||||
[vdso] = the "virtual dynamic shared object",
|
||||
the kernel system call handler
|
||||
[anon:<name>] = an anonymous mapping that has been
|
||||
|
@ -390,10 +389,8 @@ is not associated with a file:
|
|||
|
||||
The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
|
||||
of the individual tasks of a process. In this file you will see a mapping marked
|
||||
as [stack] if that task sees it as a stack. This is a key difference from the
|
||||
content of /proc/PID/maps, where you will see all mappings that are being used
|
||||
as stack by all of those tasks. Hence, for the example above, the task-level
|
||||
map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
|
||||
as [stack] if that task sees it as a stack. Hence, for the example above, the
|
||||
task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
|
||||
|
||||
08048000-08049000 r-xp 00000000 03:00 8312 /opt/test
|
||||
08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
|
||||
|
|
|
@ -923,6 +923,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
dm= [DM] Allows early creation of a device-mapper device.
|
||||
See Documentation/device-mapper/boot.txt.
|
||||
|
||||
dmasound= [HW,OSS] Sound subsystem buff
|
||||
|
||||
dma_debug=off If the kernel is compiled with DMA_API_DEBUG support,
|
||||
this option disables the debugging code at boot.
|
||||
|
||||
|
|
|
@ -271,3 +271,9 @@ Since the private key is used to sign modules, viruses and malware could use
|
|||
the private key to sign modules and compromise the operating system. The
|
||||
private key must be either destroyed or moved to a secure location and not kept
|
||||
in the root node of the kernel source tree.
|
||||
|
||||
If you use the same private key to sign modules for multiple kernel
|
||||
configurations, you must ensure that the module version information is
|
||||
sufficient to prevent loading a module into a different kernel. Either
|
||||
set CONFIG_MODVERSIONS=y or ensure that each configuration has a different
|
||||
kernel release string by changing EXTRAVERSION or CONFIG_LOCALVERSION.
|
||||
|
|
362
Documentation/scheduler/sched-energy.txt
Normal file
362
Documentation/scheduler/sched-energy.txt
Normal file
|
@ -0,0 +1,362 @@
|
|||
Energy cost model for energy-aware scheduling (EXPERIMENTAL)
|
||||
|
||||
Introduction
|
||||
=============
|
||||
|
||||
The basic energy model uses platform energy data stored in sched_group_energy
|
||||
data structures attached to the sched_groups in the sched_domain hierarchy. The
|
||||
energy cost model offers two functions that can be used to guide scheduling
|
||||
decisions:
|
||||
|
||||
1. static unsigned int sched_group_energy(struct energy_env *eenv)
|
||||
2. static int energy_diff(struct energy_env *eenv)
|
||||
|
||||
sched_group_energy() estimates the energy consumed by all cpus in a specific
|
||||
sched_group including any shared resources owned exclusively by this group of
|
||||
cpus. Resources shared with other cpus are excluded (e.g. later level caches).
|
||||
|
||||
energy_diff() estimates the total energy impact of a utilization change. That
|
||||
is, adding, removing, or migrating utilization (tasks).
|
||||
|
||||
Both functions use a struct energy_env to specify the scenario to be evaluated:
|
||||
|
||||
struct energy_env {
|
||||
struct sched_group *sg_top;
|
||||
struct sched_group *sg_cap;
|
||||
int cap_idx;
|
||||
int util_delta;
|
||||
int src_cpu;
|
||||
int dst_cpu;
|
||||
int energy;
|
||||
};
|
||||
|
||||
sg_top: sched_group to be evaluated. Not used by energy_diff().
|
||||
|
||||
sg_cap: sched_group covering the cpus in the same frequency domain. Set by
|
||||
sched_group_energy().
|
||||
|
||||
cap_idx: Capacity state to be used for energy calculations. Set by
|
||||
find_new_capacity().
|
||||
|
||||
util_delta: Amount of utilization to be added, removed, or migrated.
|
||||
|
||||
src_cpu: Source cpu from where 'util_delta' utilization is removed. Should be
|
||||
-1 if no source (e.g. task wake-up).
|
||||
|
||||
dst_cpu: Destination cpu where 'util_delta' utilization is added. Should be -1
|
||||
if utilization is removed (e.g. terminating tasks).
|
||||
|
||||
energy: Result of sched_group_energy().
|
||||
|
||||
The metric used to represent utilization is the actual per-entity running time
|
||||
averaged over time using a geometric series. Very similar to the existing
|
||||
per-entity load-tracking, but _not_ scaled by task priority and capped by the
|
||||
capacity of the cpu. The latter property does mean that utilization may
|
||||
underestimate the compute requirements for task on fully/over utilized cpus.
|
||||
The greatest potential for energy savings without affecting performance too much
|
||||
is scenarios where the system isn't fully utilized. If the system is deemed
|
||||
fully utilized load-balancing should be done with task load (includes task
|
||||
priority) instead in the interest of fairness and performance.
|
||||
|
||||
|
||||
Background and Terminology
|
||||
===========================
|
||||
|
||||
To make it clear from the start:
|
||||
|
||||
energy = [joule] (resource like a battery on powered devices)
|
||||
power = energy/time = [joule/second] = [watt]
|
||||
|
||||
The goal of energy-aware scheduling is to minimize energy, while still getting
|
||||
the job done. That is, we want to maximize:
|
||||
|
||||
performance [inst/s]
|
||||
--------------------
|
||||
power [W]
|
||||
|
||||
which is equivalent to minimizing:
|
||||
|
||||
energy [J]
|
||||
-----------
|
||||
instruction
|
||||
|
||||
while still getting 'good' performance. It is essentially an alternative
|
||||
optimization objective to the current performance-only objective for the
|
||||
scheduler. This alternative considers two objectives: energy-efficiency and
|
||||
performance. Hence, there needs to be a user controllable knob to switch the
|
||||
objective. Since it is early days, this is currently a sched_feature
|
||||
(ENERGY_AWARE).
|
||||
|
||||
The idea behind introducing an energy cost model is to allow the scheduler to
|
||||
evaluate the implications of its decisions rather than applying energy-saving
|
||||
techniques blindly that may only have positive effects on some platforms. At
|
||||
the same time, the energy cost model must be as simple as possible to minimize
|
||||
the scheduler latency impact.
|
||||
|
||||
Platform topology
|
||||
------------------
|
||||
|
||||
The system topology (cpus, caches, and NUMA information, not peripherals) is
|
||||
represented in the scheduler by the sched_domain hierarchy which has
|
||||
sched_groups attached at each level that covers one or more cpus (see
|
||||
sched-domains.txt for more details). To add energy awareness to the scheduler
|
||||
we need to consider power and frequency domains.
|
||||
|
||||
Power domain:
|
||||
|
||||
A power domain is a part of the system that can be powered on/off
|
||||
independently. Power domains are typically organized in a hierarchy where you
|
||||
may be able to power down just a cpu or a group of cpus along with any
|
||||
associated resources (e.g. shared caches). Powering up a cpu means that all
|
||||
power domains it is a part of in the hierarchy must be powered up. Hence, it is
|
||||
more expensive to power up the first cpu that belongs to a higher level power
|
||||
domain than powering up additional cpus in the same high level domain. Two
|
||||
level power domain hierarchy example:
|
||||
|
||||
Power source
|
||||
+-------------------------------+----...
|
||||
per group PD G G
|
||||
| +----------+ |
|
||||
+--------+-------| Shared | (other groups)
|
||||
per-cpu PD G G | resource |
|
||||
| | +----------+
|
||||
+-------+ +-------+
|
||||
| CPU 0 | | CPU 1 |
|
||||
+-------+ +-------+
|
||||
|
||||
Frequency domain:
|
||||
|
||||
Frequency domains (P-states) typically cover the same group of cpus as one of
|
||||
the power domain levels. That is, there might be several smaller power domains
|
||||
sharing the same frequency (P-state) or there might be a power domain spanning
|
||||
multiple frequency domains.
|
||||
|
||||
From a scheduling point of view there is no need to know the actual frequencies
|
||||
[Hz]. All the scheduler cares about is the compute capacity available at the
|
||||
current state (P-state) the cpu is in and any other available states. For that
|
||||
reason, and to also factor in any cpu micro-architecture differences, compute
|
||||
capacity scaling states are called 'capacity states' in this document. For SMP
|
||||
systems this is equivalent to P-states. For mixed micro-architecture systems
|
||||
(like ARM big.LITTLE) it is P-states scaled according to the micro-architecture
|
||||
performance relative to the other cpus in the system.
|
||||
|
||||
Energy modelling:
|
||||
------------------
|
||||
|
||||
Due to the hierarchical nature of the power domains, the most obvious way to
|
||||
model energy costs is therefore to associate power and energy costs with
|
||||
domains (groups of cpus). Energy costs of shared resources are associated with
|
||||
the group of cpus that share the resources, only the cost of powering the
|
||||
cpu itself and any private resources (e.g. private L1 caches) is associated
|
||||
with the per-cpu groups (lowest level).
|
||||
|
||||
For example, for an SMP system with per-cpu power domains and a cluster level
|
||||
(group of cpus) power domain we get the overall energy costs to be:
|
||||
|
||||
energy = energy_cluster + n * energy_cpu
|
||||
|
||||
where 'n' is the number of cpus powered up and energy_cluster is the cost paid
|
||||
as soon as any cpu in the cluster is powered up.
|
||||
|
||||
The power and frequency domains can naturally be mapped onto the existing
|
||||
sched_domain hierarchy and sched_groups by adding the necessary data to the
|
||||
existing data structures.
|
||||
|
||||
The energy model considers energy consumption from two contributors (shown in
|
||||
the illustration below):
|
||||
|
||||
1. Busy energy: Energy consumed while a cpu and the higher level groups that it
|
||||
belongs to are busy running tasks. Busy energy is associated with the state of
|
||||
the cpu, not an event. The time the cpu spends in this state varies. Thus, the
|
||||
most obvious platform parameter for this contribution is busy power
|
||||
(energy/time).
|
||||
|
||||
2. Idle energy: Energy consumed while a cpu and higher level groups that it
|
||||
belongs to are idle (in a C-state). Like busy energy, idle energy is associated
|
||||
with the state of the cpu. Thus, the platform parameter for this contribution
|
||||
is idle power (energy/time).
|
||||
|
||||
Energy consumed during transitions from an idle-state (C-state) to a busy state
|
||||
(P-state) or going the other way is ignored by the model to simplify the energy
|
||||
model calculations.
|
||||
|
||||
|
||||
Power
|
||||
^
|
||||
| busy->idle idle->busy
|
||||
| transition transition
|
||||
|
|
||||
| _ __
|
||||
| / \ / \__________________
|
||||
|______________/ \ /
|
||||
| \ /
|
||||
| Busy \ Idle / Busy
|
||||
| low P-state \____________/ high P-state
|
||||
|
|
||||
+------------------------------------------------------------> time
|
||||
|
||||
Busy |--------------| |-----------------|
|
||||
|
||||
Wakeup |------| |------|
|
||||
|
||||
Idle |------------|
|
||||
|
||||
|
||||
The basic algorithm
|
||||
====================
|
||||
|
||||
The basic idea is to determine the total energy impact when utilization is
|
||||
added or removed by estimating the impact at each level in the sched_domain
|
||||
hierarchy starting from the bottom (sched_group contains just a single cpu).
|
||||
The energy cost comes from busy time (sched_group is awake because one or more
|
||||
cpus are busy) and idle time (in an idle-state). Energy model numbers account
|
||||
for energy costs associated with all cpus in the sched_group as a group.
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
sg = sched_group_of(cpu)
|
||||
energy_before = curr_util(sg) * busy_power(sg)
|
||||
+ (1-curr_util(sg)) * idle_power(sg)
|
||||
energy_after = new_util(sg) * busy_power(sg)
|
||||
+ (1-new_util(sg)) * idle_power(sg)
|
||||
energy_diff += energy_before - energy_after
|
||||
|
||||
}
|
||||
|
||||
return energy_diff
|
||||
|
||||
{curr, new}_util: The cpu utilization at the lowest level and the overall
|
||||
non-idle time for the entire group for higher levels. Utilization is in the
|
||||
range 0.0 to 1.0 in the pseudo-code.
|
||||
|
||||
busy_power: The power consumption of the sched_group.
|
||||
|
||||
idle_power: The power consumption of the sched_group when idle.
|
||||
|
||||
Note: It is a fundamental assumption that the utilization is (roughly) scale
|
||||
invariant. Task utilization tracking factors in any frequency scaling and
|
||||
performance scaling differences due to difference cpu microarchitectures such
|
||||
that task utilization can be used across the entire system.
|
||||
|
||||
|
||||
Platform energy data
|
||||
=====================
|
||||
|
||||
struct sched_group_energy can be attached to sched_groups in the sched_domain
|
||||
hierarchy and has the following members:
|
||||
|
||||
cap_states:
|
||||
List of struct capacity_state representing the supported capacity states
|
||||
(P-states). struct capacity_state has two members: cap and power, which
|
||||
represents the compute capacity and the busy_power of the state. The
|
||||
list must be ordered by capacity low->high.
|
||||
|
||||
nr_cap_states:
|
||||
Number of capacity states in cap_states list.
|
||||
|
||||
idle_states:
|
||||
List of struct idle_state containing idle_state power cost for each
|
||||
idle-state supported by the system orderd by shallowest state first.
|
||||
All states must be included at all level in the hierarchy, i.e. a
|
||||
sched_group spanning just a single cpu must also include coupled
|
||||
idle-states (cluster states). In addition to the cpuidle idle-states,
|
||||
the list must also contain an entry for the idling using the arch
|
||||
default idle (arch_idle_cpu()). Despite this state may not be a true
|
||||
hardware idle-state it is considered the shallowest idle-state in the
|
||||
energy model and must be the first entry. cpus may enter this state
|
||||
(possibly 'active idling') if cpuidle decides not enter a cpuidle
|
||||
idle-state. Default idle may not be used when cpuidle is enabled.
|
||||
In this case, it should just be a copy of the first cpuidle idle-state.
|
||||
|
||||
nr_idle_states:
|
||||
Number of idle states in idle_states list.
|
||||
|
||||
There are no unit requirements for the energy cost data. Data can be normalized
|
||||
with any reference, however, the normalization must be consistent across all
|
||||
energy cost data. That is, one bogo-joule/watt must be the same quantity for
|
||||
data, but we don't care what it is.
|
||||
|
||||
A recipe for platform characterization
|
||||
=======================================
|
||||
|
||||
Obtaining the actual model data for a particular platform requires some way of
|
||||
measuring power/energy. There isn't a tool to help with this (yet). This
|
||||
section provides a recipe for use as reference. It covers the steps used to
|
||||
characterize the ARM TC2 development platform. This sort of measurements is
|
||||
expected to be done anyway when tuning cpuidle and cpufreq for a given
|
||||
platform.
|
||||
|
||||
The energy model needs two types of data (struct sched_group_energy holds
|
||||
these) for each sched_group where energy costs should be taken into account:
|
||||
|
||||
1. Capacity state information
|
||||
|
||||
A list containing the compute capacity and power consumption when fully
|
||||
utilized attributed to the group as a whole for each available capacity state.
|
||||
At the lowest level (group contains just a single cpu) this is the power of the
|
||||
cpu alone without including power consumed by resources shared with other cpus.
|
||||
It basically needs to fit the basic modelling approach described in "Background
|
||||
and Terminology" section:
|
||||
|
||||
energy_system = energy_shared + n * energy_cpu
|
||||
|
||||
for a system containing 'n' busy cpus. Only 'energy_cpu' should be included at
|
||||
the lowest level. 'energy_shared' is included at the next level which
|
||||
represents the group of cpus among which the resources are shared.
|
||||
|
||||
This model is, of course, a simplification of reality. Thus, power/energy
|
||||
attributions might not always exactly represent how the hardware is designed.
|
||||
Also, busy power is likely to depend on the workload. It is therefore
|
||||
recommended to use a representative mix of workloads when characterizing the
|
||||
capacity states.
|
||||
|
||||
If the group has no capacity scaling support, the list will contain a single
|
||||
state where power is the busy power attributed to the group. The capacity
|
||||
should be set to a default value (1024).
|
||||
|
||||
When frequency domains include multiple power domains, the group representing
|
||||
the frequency domain and all child groups share capacity states. This must be
|
||||
indicated by setting the SD_SHARE_CAP_STATES sched_domain flag. All groups at
|
||||
all levels that share the capacity state must have the list of capacity states
|
||||
with the power set to the contribution of the individual group.
|
||||
|
||||
2. Idle power information
|
||||
|
||||
Stored in the idle_states list. The power number is the group idle power
|
||||
consumption in each idle state as well when the group is idle but has not
|
||||
entered an idle-state ('active idle' as mentioned earlier). Due to the way the
|
||||
energy model is defined, the idle power of the deepest group idle state can
|
||||
alternatively be accounted for in the parent group busy power. In that case the
|
||||
group idle state power values are offset such that the idle power of the
|
||||
deepest state is zero. It is less intuitive, but it is easier to measure as
|
||||
idle power consumed by the group and the busy/idle power of the parent group
|
||||
cannot be distinguished without per group measurement points.
|
||||
|
||||
Measuring capacity states and idle power:
|
||||
|
||||
The capacity states' capacity and power can be estimated by running a benchmark
|
||||
workload at each available capacity state. By restricting the benchmark to run
|
||||
on subsets of cpus it is possible to extrapolate the power consumption of
|
||||
shared resources.
|
||||
|
||||
ARM TC2 has two clusters of two and three cpus respectively. Each cluster has a
|
||||
shared L2 cache. TC2 has on-chip energy counters per cluster. Running a
|
||||
benchmark workload on just one cpu in a cluster means that power is consumed in
|
||||
the cluster (higher level group) and a single cpu (lowest level group). Adding
|
||||
another benchmark task to another cpu increases the power consumption by the
|
||||
amount consumed by the additional cpu. Hence, it is possible to extrapolate the
|
||||
cluster busy power.
|
||||
|
||||
For platforms that don't have energy counters or equivalent instrumentation
|
||||
built-in, it may be possible to use an external DAQ to acquire similar data.
|
||||
|
||||
If the benchmark includes some performance score (for example sysbench cpu
|
||||
benchmark), this can be used to record the compute capacity.
|
||||
|
||||
Measuring idle power requires insight into the idle state implementation on the
|
||||
particular platform. Specifically, if the platform has coupled idle-states (or
|
||||
package states). To measure non-coupled per-cpu idle-states it is necessary to
|
||||
keep one cpu busy to keep any shared resources alive to isolate the idle power
|
||||
of the cpu from idle/busy power of the shared resources. The cpu can be tricked
|
||||
into different per-cpu idle states by disabling the other states. Based on
|
||||
various combinations of measurements with specific cpus busy and disabling
|
||||
idle-states it is possible to extrapolate the idle-state power.
|
366
Documentation/scheduler/sched-tune.txt
Normal file
366
Documentation/scheduler/sched-tune.txt
Normal file
|
@ -0,0 +1,366 @@
|
|||
Central, scheduler-driven, power-performance control
|
||||
(EXPERIMENTAL)
|
||||
|
||||
Abstract
|
||||
========
|
||||
|
||||
The topic of a single simple power-performance tunable, that is wholly
|
||||
scheduler centric, and has well defined and predictable properties has come up
|
||||
on several occasions in the past [1,2]. With techniques such as a scheduler
|
||||
driven DVFS [3], we now have a good framework for implementing such a tunable.
|
||||
This document describes the overall ideas behind its design and implementation.
|
||||
|
||||
|
||||
Table of Contents
|
||||
=================
|
||||
|
||||
1. Motivation
|
||||
2. Introduction
|
||||
3. Signal Boosting Strategy
|
||||
4. OPP selection using boosted CPU utilization
|
||||
5. Per task group boosting
|
||||
6. Question and Answers
|
||||
- What about "auto" mode?
|
||||
- What about boosting on a congested system?
|
||||
- How CPUs are boosted when we have tasks with multiple boost values?
|
||||
7. References
|
||||
|
||||
|
||||
1. Motivation
|
||||
=============
|
||||
|
||||
Sched-DVFS [3] is a new event-driven cpufreq governor which allows the
|
||||
scheduler to select the optimal DVFS operating point (OPP) for running a task
|
||||
allocated to a CPU. The introduction of sched-DVFS enables running workloads at
|
||||
the most energy efficient OPPs.
|
||||
|
||||
However, sometimes it may be desired to intentionally boost the performance of
|
||||
a workload even if that could imply a reasonable increase in energy
|
||||
consumption. For example, in order to reduce the response time of a task, we
|
||||
may want to run the task at a higher OPP than the one that is actually required
|
||||
by it's CPU bandwidth demand.
|
||||
|
||||
This last requirement is especially important if we consider that one of the
|
||||
main goals of the sched-DVFS component is to replace all currently available
|
||||
CPUFreq policies. Since sched-DVFS is event based, as opposed to the sampling
|
||||
driven governors we currently have, it is already more responsive at selecting
|
||||
the optimal OPP to run tasks allocated to a CPU. However, just tracking the
|
||||
actual task load demand may not be enough from a performance standpoint. For
|
||||
example, it is not possible to get behaviors similar to those provided by the
|
||||
"performance" and "interactive" CPUFreq governors.
|
||||
|
||||
This document describes an implementation of a tunable, stacked on top of the
|
||||
sched-DVFS which extends its functionality to support task performance
|
||||
boosting.
|
||||
|
||||
By "performance boosting" we mean the reduction of the time required to
|
||||
complete a task activation, i.e. the time elapsed from a task wakeup to its
|
||||
next deactivation (e.g. because it goes back to sleep or it terminates). For
|
||||
example, if we consider a simple periodic task which executes the same workload
|
||||
for 5[s] every 20[s] while running at a certain OPP, a boosted execution of
|
||||
that task must complete each of its activations in less than 5[s].
|
||||
|
||||
A previous attempt [5] to introduce such a boosting feature has not been
|
||||
successful mainly because of the complexity of the proposed solution. The
|
||||
approach described in this document exposes a single simple interface to
|
||||
user-space. This single tunable knob allows the tuning of system wide
|
||||
scheduler behaviours ranging from energy efficiency at one end through to
|
||||
incremental performance boosting at the other end. This first tunable affects
|
||||
all tasks. However, a more advanced extension of the concept is also provided
|
||||
which uses CGroups to boost the performance of only selected tasks while using
|
||||
the energy efficient default for all others.
|
||||
|
||||
The rest of this document introduces in more details the proposed solution
|
||||
which has been named SchedTune.
|
||||
|
||||
|
||||
2. Introduction
|
||||
===============
|
||||
|
||||
SchedTune exposes a simple user-space interface with a single power-performance
|
||||
tunable:
|
||||
|
||||
/proc/sys/kernel/sched_cfs_boost
|
||||
|
||||
This permits expressing a boost value as an integer in the range [0..100].
|
||||
|
||||
A value of 0 (default) configures the CFS scheduler for maximum energy
|
||||
efficiency. This means that sched-DVFS runs the tasks at the minimum OPP
|
||||
required to satisfy their workload demand.
|
||||
A value of 100 configures scheduler for maximum performance, which translates
|
||||
to the selection of the maximum OPP on that CPU.
|
||||
|
||||
The range between 0 and 100 can be set to satisfy other scenarios suitably. For
|
||||
example to satisfy interactive response or depending on other system events
|
||||
(battery level etc).
|
||||
|
||||
A CGroup based extension is also provided, which permits further user-space
|
||||
defined task classification to tune the scheduler for different goals depending
|
||||
on the specific nature of the task, e.g. background vs interactive vs
|
||||
low-priority.
|
||||
|
||||
The overall design of the SchedTune module is built on top of "Per-Entity Load
|
||||
Tracking" (PELT) signals and sched-DVFS by introducing a bias on the Operating
|
||||
Performance Point (OPP) selection.
|
||||
Each time a task is allocated on a CPU, sched-DVFS has the opportunity to tune
|
||||
the operating frequency of that CPU to better match the workload demand. The
|
||||
selection of the actual OPP being activated is influenced by the global boost
|
||||
value, or the boost value for the task CGroup when in use.
|
||||
|
||||
This simple biasing approach leverages existing frameworks, which means minimal
|
||||
modifications to the scheduler, and yet it allows to achieve a range of
|
||||
different behaviours all from a single simple tunable knob.
|
||||
The only new concept introduced is that of signal boosting.
|
||||
|
||||
|
||||
3. Signal Boosting Strategy
|
||||
===========================
|
||||
|
||||
The whole PELT machinery works based on the value of a few load tracking signals
|
||||
which basically track the CPU bandwidth requirements for tasks and the capacity
|
||||
of CPUs. The basic idea behind the SchedTune knob is to artificially inflate
|
||||
some of these load tracking signals to make a task or RQ appears more demanding
|
||||
that it actually is.
|
||||
|
||||
Which signals have to be inflated depends on the specific "consumer". However,
|
||||
independently from the specific (signal, consumer) pair, it is important to
|
||||
define a simple and possibly consistent strategy for the concept of boosting a
|
||||
signal.
|
||||
|
||||
A boosting strategy defines how the "abstract" user-space defined
|
||||
sched_cfs_boost value is translated into an internal "margin" value to be added
|
||||
to a signal to get its inflated value:
|
||||
|
||||
margin := boosting_strategy(sched_cfs_boost, signal)
|
||||
boosted_signal := signal + margin
|
||||
|
||||
Different boosting strategies were identified and analyzed before selecting the
|
||||
one found to be most effective.
|
||||
|
||||
Signal Proportional Compensation (SPC)
|
||||
--------------------------------------
|
||||
|
||||
In this boosting strategy the sched_cfs_boost value is used to compute a
|
||||
margin which is proportional to the complement of the original signal.
|
||||
When a signal has a maximum possible value, its complement is defined as
|
||||
the delta from the actual value and its possible maximum.
|
||||
|
||||
Since the tunable implementation uses signals which have SCHED_LOAD_SCALE as
|
||||
the maximum possible value, the margin becomes:
|
||||
|
||||
margin := sched_cfs_boost * (SCHED_LOAD_SCALE - signal)
|
||||
|
||||
Using this boosting strategy:
|
||||
- a 100% sched_cfs_boost means that the signal is scaled to the maximum value
|
||||
- each value in the range of sched_cfs_boost effectively inflates the signal in
|
||||
question by a quantity which is proportional to the maximum value.
|
||||
|
||||
For example, by applying the SPC boosting strategy to the selection of the OPP
|
||||
to run a task it is possible to achieve these behaviors:
|
||||
|
||||
- 0% boosting: run the task at the minimum OPP required by its workload
|
||||
- 100% boosting: run the task at the maximum OPP available for the CPU
|
||||
- 50% boosting: run at the half-way OPP between minimum and maximum
|
||||
|
||||
Which means that, at 50% boosting, a task will be scheduled to run at half of
|
||||
the maximum theoretically achievable performance on the specific target
|
||||
platform.
|
||||
|
||||
A graphical representation of an SPC boosted signal is represented in the
|
||||
following figure where:
|
||||
a) "-" represents the original signal
|
||||
b) "b" represents a 50% boosted signal
|
||||
c) "p" represents a 100% boosted signal
|
||||
|
||||
|
||||
^
|
||||
| SCHED_LOAD_SCALE
|
||||
+-----------------------------------------------------------------+
|
||||
|pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp
|
||||
|
|
||||
| boosted_signal
|
||||
| bbbbbbbbbbbbbbbbbbbbbbbb
|
||||
|
|
||||
| original signal
|
||||
| bbbbbbbbbbbbbbbbbbbbbbbb+----------------------+
|
||||
| |
|
||||
|bbbbbbbbbbbbbbbbbb |
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
| +-----------------------+
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
|------------------+
|
||||
|
|
||||
|
|
||||
+----------------------------------------------------------------------->
|
||||
|
||||
The plot above shows a ramped load signal (titled 'original_signal') and it's
|
||||
boosted equivalent. For each step of the original signal the boosted signal
|
||||
corresponding to a 50% boost is midway from the original signal and the upper
|
||||
bound. Boosting by 100% generates a boosted signal which is always saturated to
|
||||
the upper bound.
|
||||
|
||||
|
||||
4. OPP selection using boosted CPU utilization
|
||||
==============================================
|
||||
|
||||
It is worth calling out that the implementation does not introduce any new load
|
||||
signals. Instead, it provides an API to tune existing signals. This tuning is
|
||||
done on demand and only in scheduler code paths where it is sensible to do so.
|
||||
The new API calls are defined to return either the default signal or a boosted
|
||||
one, depending on the value of sched_cfs_boost. This is a clean an non invasive
|
||||
modification of the existing existing code paths.
|
||||
|
||||
The signal representing a CPU's utilization is boosted according to the
|
||||
previously described SPC boosting strategy. To sched-DVFS, this allows a CPU
|
||||
(ie CFS run-queue) to appear more used then it actually is.
|
||||
|
||||
Thus, with the sched_cfs_boost enabled we have the following main functions to
|
||||
get the current utilization of a CPU:
|
||||
|
||||
cpu_util()
|
||||
boosted_cpu_util()
|
||||
|
||||
The new boosted_cpu_util() is similar to the first but returns a boosted
|
||||
utilization signal which is a function of the sched_cfs_boost value.
|
||||
|
||||
This function is used in the CFS scheduler code paths where sched-DVFS needs to
|
||||
decide the OPP to run a CPU at.
|
||||
For example, this allows selecting the highest OPP for a CPU which has
|
||||
the boost value set to 100%.
|
||||
|
||||
|
||||
5. Per task group boosting
|
||||
==========================
|
||||
|
||||
The availability of a single knob which is used to boost all tasks in the
|
||||
system is certainly a simple solution but it quite likely doesn't fit many
|
||||
utilization scenarios, especially in the mobile device space.
|
||||
|
||||
For example, on battery powered devices there usually are many background
|
||||
services which are long running and need energy efficient scheduling. On the
|
||||
other hand, some applications are more performance sensitive and require an
|
||||
interactive response and/or maximum performance, regardless of the energy cost.
|
||||
To better service such scenarios, the SchedTune implementation has an extension
|
||||
that provides a more fine grained boosting interface.
|
||||
|
||||
A new CGroup controller, namely "schedtune", could be enabled which allows to
|
||||
defined and configure task groups with different boosting values.
|
||||
Tasks that require special performance can be put into separate CGroups.
|
||||
The value of the boost associated with the tasks in this group can be specified
|
||||
using a single knob exposed by the CGroup controller:
|
||||
|
||||
schedtune.boost
|
||||
|
||||
This knob allows the definition of a boost value that is to be used for
|
||||
SPC boosting of all tasks attached to this group.
|
||||
|
||||
The current schedtune controller implementation is really simple and has these
|
||||
main characteristics:
|
||||
|
||||
1) It is only possible to create 1 level depth hierarchies
|
||||
|
||||
The root control groups define the system-wide boost value to be applied
|
||||
by default to all tasks. Its direct subgroups are named "boost groups" and
|
||||
they define the boost value for specific set of tasks.
|
||||
Further nested subgroups are not allowed since they do not have a sensible
|
||||
meaning from a user-space standpoint.
|
||||
|
||||
2) It is possible to define only a limited number of "boost groups"
|
||||
|
||||
This number is defined at compile time and by default configured to 16.
|
||||
This is a design decision motivated by two main reasons:
|
||||
a) In a real system we do not expect utilization scenarios with more then few
|
||||
boost groups. For example, a reasonable collection of groups could be
|
||||
just "background", "interactive" and "performance".
|
||||
b) It simplifies the implementation considerably, especially for the code
|
||||
which has to compute the per CPU boosting once there are multiple
|
||||
RUNNABLE tasks with different boost values.
|
||||
|
||||
Such a simple design should allow servicing the main utilization scenarios identified
|
||||
so far. It provides a simple interface which can be used to manage the
|
||||
power-performance of all tasks or only selected tasks.
|
||||
Moreover, this interface can be easily integrated by user-space run-times (e.g.
|
||||
Android, ChromeOS) to implement a QoS solution for task boosting based on tasks
|
||||
classification, which has been a long standing requirement.
|
||||
|
||||
Setup and usage
|
||||
---------------
|
||||
|
||||
0. Use a kernel with CGROUP_SCHEDTUNE support enabled
|
||||
|
||||
1. Check that the "schedtune" CGroup controller is available:
|
||||
|
||||
root@linaro-nano:~# cat /proc/cgroups
|
||||
#subsys_name hierarchy num_cgroups enabled
|
||||
cpuset 0 1 1
|
||||
cpu 0 1 1
|
||||
schedtune 0 1 1
|
||||
|
||||
2. Mount a tmpfs to create the CGroups mount point (Optional)
|
||||
|
||||
root@linaro-nano:~# sudo mount -t tmpfs cgroups /sys/fs/cgroup
|
||||
|
||||
3. Mount the "schedtune" controller
|
||||
|
||||
root@linaro-nano:~# mkdir /sys/fs/cgroup/stune
|
||||
root@linaro-nano:~# sudo mount -t cgroup -o schedtune stune /sys/fs/cgroup/stune
|
||||
|
||||
4. Setup the system-wide boost value (Optional)
|
||||
|
||||
If not configured the root control group has a 0% boost value, which
|
||||
basically disables boosting for all tasks in the system thus running in
|
||||
an energy-efficient mode.
|
||||
|
||||
root@linaro-nano:~# echo $SYSBOOST > /sys/fs/cgroup/stune/schedtune.boost
|
||||
|
||||
5. Create task groups and configure their specific boost value (Optional)
|
||||
|
||||
For example here we create a "performance" boost group configure to boost
|
||||
all its tasks to 100%
|
||||
|
||||
root@linaro-nano:~# mkdir /sys/fs/cgroup/stune/performance
|
||||
root@linaro-nano:~# echo 100 > /sys/fs/cgroup/stune/performance/schedtune.boost
|
||||
|
||||
6. Move tasks into the boost group
|
||||
|
||||
For example, the following moves the tasks with PID $TASKPID (and all its
|
||||
threads) into the "performance" boost group.
|
||||
|
||||
root@linaro-nano:~# echo "TASKPID > /sys/fs/cgroup/stune/performance/cgroup.procs
|
||||
|
||||
This simple configuration allows only the threads of the $TASKPID task to run,
|
||||
when needed, at the highest OPP in the most capable CPU of the system.
|
||||
|
||||
|
||||
6. Question and Answers
|
||||
=======================
|
||||
|
||||
What about "auto" mode?
|
||||
-----------------------
|
||||
|
||||
The 'auto' mode as described in [5] can be implemented by interfacing SchedTune
|
||||
with some suitable user-space element. This element could use the exposed
|
||||
system-wide or cgroup based interface.
|
||||
|
||||
How are multiple groups of tasks with different boost values managed?
|
||||
---------------------------------------------------------------------
|
||||
|
||||
The current SchedTune implementation keeps track of the boosted RUNNABLE tasks
|
||||
on a CPU. Once sched-DVFS selects the OPP to run a CPU at, the CPU utilization
|
||||
is boosted with a value which is the maximum of the boost values of the
|
||||
currently RUNNABLE tasks in its RQ.
|
||||
|
||||
This allows sched-DVFS to boost a CPU only while there are boosted tasks ready
|
||||
to run and switch back to the energy efficient mode as soon as the last boosted
|
||||
task is dequeued.
|
||||
|
||||
|
||||
7. References
|
||||
=============
|
||||
[1] http://lwn.net/Articles/552889
|
||||
[2] http://lkml.org/lkml/2012/5/18/91
|
||||
[3] http://lkml.org/lkml/2015/6/26/620
|
|
@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with
|
|||
"debugpat" boot parameter. With this parameter, various debug messages are
|
||||
printed to dmesg log.
|
||||
|
||||
PAT Initialization
|
||||
------------------
|
||||
|
||||
The following table describes how PAT is initialized under various
|
||||
configurations. The PAT MSR must be updated by Linux in order to support WC
|
||||
and WT attributes. Otherwise, the PAT MSR has the value programmed in it
|
||||
by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
|
||||
|
||||
MTRR PAT Call Sequence PAT State PAT MSR
|
||||
=========================================================
|
||||
E E MTRR -> PAT init Enabled OS
|
||||
E D MTRR -> PAT init Disabled -
|
||||
D E MTRR -> PAT disable Disabled BIOS
|
||||
D D MTRR -> PAT disable Disabled -
|
||||
- np/E PAT -> PAT disable Disabled BIOS
|
||||
- np/D PAT -> PAT disable Disabled -
|
||||
E !P/E MTRR -> PAT init Disabled BIOS
|
||||
D !P/E MTRR -> PAT disable Disabled BIOS
|
||||
!M !P/E MTRR stub -> PAT disable Disabled BIOS
|
||||
|
||||
Legend
|
||||
------------------------------------------------
|
||||
E Feature enabled in CPU
|
||||
D Feature disabled/unsupported in CPU
|
||||
np "nopat" boot option specified
|
||||
!P CONFIG_X86_PAT option unset
|
||||
!M CONFIG_MTRR option unset
|
||||
Enabled PAT state set to enabled
|
||||
Disabled PAT state set to disabled
|
||||
OS PAT initializes PAT MSR with OS setting
|
||||
BIOS PAT keeps PAT MSR with BIOS setting
|
||||
|
||||
|
|
2
Makefile
2
Makefile
|
@ -1,6 +1,6 @@
|
|||
VERSION = 4
|
||||
PATCHLEVEL = 4
|
||||
SUBLEVEL = 16
|
||||
SUBLEVEL = 21
|
||||
EXTRAVERSION =
|
||||
NAME = Blurry Fish Butt
|
||||
|
||||
|
|
|
@ -141,6 +141,7 @@ CONFIG_PROFILING=y
|
|||
CONFIG_QUOTA=y
|
||||
CONFIG_RTC_CLASS=y
|
||||
CONFIG_RT_GROUP_SCHED=y
|
||||
CONFIG_SECCOMP=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
|
||||
|
|
|
@ -11,6 +11,7 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y
|
|||
CONFIG_BLK_DEV_LOOP=y
|
||||
CONFIG_BLK_DEV_RAM=y
|
||||
CONFIG_BLK_DEV_RAM_SIZE=8192
|
||||
CONFIG_CC_STACKPROTECTOR_STRONG=y
|
||||
CONFIG_COMPACTION=y
|
||||
CONFIG_DEBUG_RODATA=y
|
||||
CONFIG_DM_UEVENT=y
|
||||
|
@ -118,6 +119,7 @@ CONFIG_TIMER_STATS=y
|
|||
CONFIG_TMPFS=y
|
||||
CONFIG_TMPFS_POSIX_ACL=y
|
||||
CONFIG_UHID=y
|
||||
CONFIG_MEMORY_STATE_TIME=y
|
||||
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
|
||||
CONFIG_USB_EHCI_HCD=y
|
||||
CONFIG_USB_HIDDEV=y
|
||||
|
|
|
@ -423,6 +423,15 @@ config CC_STACKPROTECTOR_STRONG
|
|||
|
||||
endchoice
|
||||
|
||||
config HAVE_ARCH_WITHIN_STACK_FRAMES
|
||||
bool
|
||||
help
|
||||
An architecture should select this if it can walk the kernel stack
|
||||
frames to determine if an object is part of either the arguments
|
||||
or local variables (i.e. that it excludes saved return addresses,
|
||||
and similar) by implementing an inline arch_within_stack_frames(),
|
||||
which is used by CONFIG_HARDENED_USERCOPY.
|
||||
|
||||
config HAVE_CONTEXT_TRACKING
|
||||
bool
|
||||
help
|
||||
|
|
|
@ -18,6 +18,20 @@ cflags-y += -fno-common -pipe -fno-builtin -D__linux__
|
|||
cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7
|
||||
cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs
|
||||
|
||||
is_700 = $(shell $(CC) -dM -E - < /dev/null | grep -q "ARC700" && echo 1 || echo 0)
|
||||
|
||||
ifdef CONFIG_ISA_ARCOMPACT
|
||||
ifeq ($(is_700), 0)
|
||||
$(error Toolchain not configured for ARCompact builds)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef CONFIG_ISA_ARCV2
|
||||
ifeq ($(is_700), 1)
|
||||
$(error Toolchain not configured for ARCv2 builds)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef CONFIG_ARC_CURR_IN_REG
|
||||
# For a global register defintion, make sure it gets passed to every file
|
||||
# We had a customer reported bug where some code built in kernel was NOT using
|
||||
|
@ -48,8 +62,6 @@ endif
|
|||
|
||||
endif
|
||||
|
||||
cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables
|
||||
|
||||
# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
|
||||
ifeq ($(atleast_gcc48),y)
|
||||
cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2
|
||||
|
|
|
@ -374,12 +374,6 @@ static inline int is_isa_arcompact(void)
|
|||
return IS_ENABLED(CONFIG_ISA_ARCOMPACT);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ISA_ARCOMPACT) && !defined(_CPU_DEFAULT_A7)
|
||||
#error "Toolchain not configured for ARCompact builds"
|
||||
#elif defined(CONFIG_ISA_ARCV2) && !defined(_CPU_DEFAULT_HS)
|
||||
#error "Toolchain not configured for ARCv2 builds"
|
||||
#endif
|
||||
|
||||
#endif /* __ASEMBLY__ */
|
||||
|
||||
#endif /* _ASM_ARC_ARCREGS_H */
|
||||
|
|
|
@ -142,7 +142,7 @@
|
|||
|
||||
#ifdef CONFIG_ARC_CURR_IN_REG
|
||||
; Retrieve orig r25 and save it with rest of callee_regs
|
||||
ld.as r12, [r12, PT_user_r25]
|
||||
ld r12, [r12, PT_user_r25]
|
||||
PUSH r12
|
||||
#else
|
||||
PUSH r25
|
||||
|
@ -198,7 +198,7 @@
|
|||
|
||||
; SP is back to start of pt_regs
|
||||
#ifdef CONFIG_ARC_CURR_IN_REG
|
||||
st.as r12, [sp, PT_user_r25]
|
||||
st r12, [sp, PT_user_r25]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
|
|
|
@ -188,10 +188,10 @@ static inline int arch_irqs_disabled(void)
|
|||
.endm
|
||||
|
||||
.macro IRQ_ENABLE scratch
|
||||
TRACE_ASM_IRQ_ENABLE
|
||||
lr \scratch, [status32]
|
||||
or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
|
||||
flag \scratch
|
||||
TRACE_ASM_IRQ_ENABLE
|
||||
.endm
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
|
|
@ -110,7 +110,7 @@
|
|||
#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
|
||||
|
||||
/* Set of bits not changed in pte_modify */
|
||||
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
|
||||
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
|
||||
|
||||
/* More Abbrevaited helpers */
|
||||
#define PAGE_U_NONE __pgprot(___DEF)
|
||||
|
|
|
@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
|
|||
* prelogue is setup (callee regs saved and then fp set and not other
|
||||
* way around
|
||||
*/
|
||||
pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
|
||||
pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
|
||||
return 0;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -914,6 +914,15 @@ void arc_cache_init(void)
|
|||
|
||||
printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
|
||||
|
||||
/*
|
||||
* Only master CPU needs to execute rest of function:
|
||||
* - Assume SMP so all cores will have same cache config so
|
||||
* any geomtry checks will be same for all
|
||||
* - IOC setup / dma callbacks only need to be setup once
|
||||
*/
|
||||
if (cpu)
|
||||
return;
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
|
||||
struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ config ARM
|
|||
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32
|
||||
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
|
||||
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
||||
select HAVE_ARCH_HARDENED_USERCOPY
|
||||
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_BPF_JIT
|
||||
|
|
|
@ -84,6 +84,7 @@
|
|||
regulator-name = "emac-3v3";
|
||||
regulator-min-microvolt = <3300000>;
|
||||
regulator-max-microvolt = <3300000>;
|
||||
startup-delay-us = <20000>;
|
||||
enable-active-high;
|
||||
gpio = <&pio 7 15 GPIO_ACTIVE_HIGH>;
|
||||
};
|
||||
|
|
|
@ -66,6 +66,7 @@
|
|||
regulator-name = "emac-3v3";
|
||||
regulator-min-microvolt = <3300000>;
|
||||
regulator-max-microvolt = <3300000>;
|
||||
startup-delay-us = <20000>;
|
||||
enable-active-high;
|
||||
gpio = <&pio 7 19 GPIO_ACTIVE_HIGH>;
|
||||
};
|
||||
|
|
|
@ -80,6 +80,7 @@
|
|||
regulator-name = "emac-3v3";
|
||||
regulator-min-microvolt = <3300000>;
|
||||
regulator-max-microvolt = <3300000>;
|
||||
startup-delay-us = <20000>;
|
||||
enable-active-high;
|
||||
gpio = <&pio 7 19 GPIO_ACTIVE_HIGH>; /* PH19 */
|
||||
};
|
||||
|
|
|
@ -79,6 +79,7 @@
|
|||
regulator-name = "emac-3v3";
|
||||
regulator-min-microvolt = <3300000>;
|
||||
regulator-max-microvolt = <3300000>;
|
||||
startup-delay-us = <20000>;
|
||||
enable-active-high;
|
||||
gpio = <&pio 0 2 GPIO_ACTIVE_HIGH>;
|
||||
};
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#ifdef CONFIG_ARM_CPU_TOPOLOGY
|
||||
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
struct cputopo_arm {
|
||||
|
@ -24,6 +25,12 @@ void init_cpu_topology(void);
|
|||
void store_cpu_topology(unsigned int cpuid);
|
||||
const struct cpumask *cpu_coregroup_mask(int cpu);
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
#define arch_scale_freq_capacity cpufreq_scale_freq_capacity
|
||||
#endif
|
||||
#define arch_scale_cpu_capacity scale_cpu_capacity
|
||||
extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu);
|
||||
|
||||
#else
|
||||
|
||||
static inline void init_cpu_topology(void) { }
|
||||
|
|
|
@ -496,7 +496,10 @@ arm_copy_from_user(void *to, const void __user *from, unsigned long n);
|
|||
static inline unsigned long __must_check
|
||||
__copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
unsigned int __ua_flags = uaccess_save_and_enable();
|
||||
unsigned int __ua_flags;
|
||||
|
||||
check_object_size(to, n, false);
|
||||
__ua_flags = uaccess_save_and_enable();
|
||||
n = arm_copy_from_user(to, from, n);
|
||||
uaccess_restore(__ua_flags);
|
||||
return n;
|
||||
|
@ -511,11 +514,15 @@ static inline unsigned long __must_check
|
|||
__copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
#ifndef CONFIG_UACCESS_WITH_MEMCPY
|
||||
unsigned int __ua_flags = uaccess_save_and_enable();
|
||||
unsigned int __ua_flags;
|
||||
|
||||
check_object_size(from, n, true);
|
||||
__ua_flags = uaccess_save_and_enable();
|
||||
n = arm_copy_to_user(to, from, n);
|
||||
uaccess_restore(__ua_flags);
|
||||
return n;
|
||||
#else
|
||||
check_object_size(from, n, true);
|
||||
return arm_copy_to_user(to, from, n);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -781,7 +781,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
|
|||
struct resource *res;
|
||||
|
||||
kernel_code.start = virt_to_phys(_text);
|
||||
kernel_code.end = virt_to_phys(_etext - 1);
|
||||
kernel_code.end = virt_to_phys(__init_begin - 1);
|
||||
kernel_data.start = virt_to_phys(_sdata);
|
||||
kernel_data.end = virt_to_phys(_end - 1);
|
||||
|
||||
|
|
|
@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
|
|||
mm_segment_t fs;
|
||||
long ret, err, i;
|
||||
|
||||
if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
|
||||
if (maxevents <= 0 ||
|
||||
maxevents > (INT_MAX/sizeof(*kbuf)) ||
|
||||
maxevents > (INT_MAX/sizeof(*events)))
|
||||
return -EINVAL;
|
||||
if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
|
||||
return -EFAULT;
|
||||
kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
|
||||
if (!kbuf)
|
||||
return -ENOMEM;
|
||||
|
@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
|
|||
|
||||
if (nsops < 1 || nsops > SEMOPM)
|
||||
return -EINVAL;
|
||||
if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
|
||||
return -EFAULT;
|
||||
sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
|
||||
if (!sops)
|
||||
return -ENOMEM;
|
||||
|
|
|
@ -42,9 +42,15 @@
|
|||
*/
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_scale);
|
||||
|
||||
unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
|
||||
unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu);
|
||||
|
||||
return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT;
|
||||
#else
|
||||
return per_cpu(cpu_scale, cpu);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
|
||||
|
@ -343,6 +349,8 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static const struct sched_group_energy * const cpu_core_energy(int cpu);
|
||||
|
||||
/*
|
||||
* Look for a customed capacity of a CPU in the cpu_capacity table during the
|
||||
* boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
|
||||
|
@ -350,10 +358,14 @@ out:
|
|||
*/
|
||||
static void update_cpu_capacity(unsigned int cpu)
|
||||
{
|
||||
if (!cpu_capacity(cpu))
|
||||
return;
|
||||
unsigned long capacity = SCHED_CAPACITY_SCALE;
|
||||
|
||||
set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
|
||||
if (cpu_core_energy(cpu)) {
|
||||
int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1;
|
||||
capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap;
|
||||
}
|
||||
|
||||
set_capacity_scale(cpu, capacity);
|
||||
|
||||
pr_info("CPU%u: update cpu_capacity %lu\n",
|
||||
cpu, arch_scale_cpu_capacity(NULL, cpu));
|
||||
|
@ -464,17 +476,138 @@ topology_populated:
|
|||
update_cpu_capacity(cpuid);
|
||||
}
|
||||
|
||||
/*
|
||||
* ARM TC2 specific energy cost model data. There are no unit requirements for
|
||||
* the data. Data can be normalized to any reference point, but the
|
||||
* normalization must be consistent. That is, one bogo-joule/watt must be the
|
||||
* same quantity for all data, but we don't care what it is.
|
||||
*/
|
||||
static struct idle_state idle_states_cluster_a7[] = {
|
||||
{ .power = 25 }, /* arch_cpu_idle() (active idle) = WFI */
|
||||
{ .power = 25 }, /* WFI */
|
||||
{ .power = 10 }, /* cluster-sleep-l */
|
||||
};
|
||||
|
||||
static struct idle_state idle_states_cluster_a15[] = {
|
||||
{ .power = 70 }, /* arch_cpu_idle() (active idle) = WFI */
|
||||
{ .power = 70 }, /* WFI */
|
||||
{ .power = 25 }, /* cluster-sleep-b */
|
||||
};
|
||||
|
||||
static struct capacity_state cap_states_cluster_a7[] = {
|
||||
/* Cluster only power */
|
||||
{ .cap = 150, .power = 2967, }, /* 350 MHz */
|
||||
{ .cap = 172, .power = 2792, }, /* 400 MHz */
|
||||
{ .cap = 215, .power = 2810, }, /* 500 MHz */
|
||||
{ .cap = 258, .power = 2815, }, /* 600 MHz */
|
||||
{ .cap = 301, .power = 2919, }, /* 700 MHz */
|
||||
{ .cap = 344, .power = 2847, }, /* 800 MHz */
|
||||
{ .cap = 387, .power = 3917, }, /* 900 MHz */
|
||||
{ .cap = 430, .power = 4905, }, /* 1000 MHz */
|
||||
};
|
||||
|
||||
static struct capacity_state cap_states_cluster_a15[] = {
|
||||
/* Cluster only power */
|
||||
{ .cap = 426, .power = 7920, }, /* 500 MHz */
|
||||
{ .cap = 512, .power = 8165, }, /* 600 MHz */
|
||||
{ .cap = 597, .power = 8172, }, /* 700 MHz */
|
||||
{ .cap = 682, .power = 8195, }, /* 800 MHz */
|
||||
{ .cap = 768, .power = 8265, }, /* 900 MHz */
|
||||
{ .cap = 853, .power = 8446, }, /* 1000 MHz */
|
||||
{ .cap = 938, .power = 11426, }, /* 1100 MHz */
|
||||
{ .cap = 1024, .power = 15200, }, /* 1200 MHz */
|
||||
};
|
||||
|
||||
static struct sched_group_energy energy_cluster_a7 = {
|
||||
.nr_idle_states = ARRAY_SIZE(idle_states_cluster_a7),
|
||||
.idle_states = idle_states_cluster_a7,
|
||||
.nr_cap_states = ARRAY_SIZE(cap_states_cluster_a7),
|
||||
.cap_states = cap_states_cluster_a7,
|
||||
};
|
||||
|
||||
static struct sched_group_energy energy_cluster_a15 = {
|
||||
.nr_idle_states = ARRAY_SIZE(idle_states_cluster_a15),
|
||||
.idle_states = idle_states_cluster_a15,
|
||||
.nr_cap_states = ARRAY_SIZE(cap_states_cluster_a15),
|
||||
.cap_states = cap_states_cluster_a15,
|
||||
};
|
||||
|
||||
static struct idle_state idle_states_core_a7[] = {
|
||||
{ .power = 0 }, /* arch_cpu_idle (active idle) = WFI */
|
||||
{ .power = 0 }, /* WFI */
|
||||
{ .power = 0 }, /* cluster-sleep-l */
|
||||
};
|
||||
|
||||
static struct idle_state idle_states_core_a15[] = {
|
||||
{ .power = 0 }, /* arch_cpu_idle (active idle) = WFI */
|
||||
{ .power = 0 }, /* WFI */
|
||||
{ .power = 0 }, /* cluster-sleep-b */
|
||||
};
|
||||
|
||||
static struct capacity_state cap_states_core_a7[] = {
|
||||
/* Power per cpu */
|
||||
{ .cap = 150, .power = 187, }, /* 350 MHz */
|
||||
{ .cap = 172, .power = 275, }, /* 400 MHz */
|
||||
{ .cap = 215, .power = 334, }, /* 500 MHz */
|
||||
{ .cap = 258, .power = 407, }, /* 600 MHz */
|
||||
{ .cap = 301, .power = 447, }, /* 700 MHz */
|
||||
{ .cap = 344, .power = 549, }, /* 800 MHz */
|
||||
{ .cap = 387, .power = 761, }, /* 900 MHz */
|
||||
{ .cap = 430, .power = 1024, }, /* 1000 MHz */
|
||||
};
|
||||
|
||||
static struct capacity_state cap_states_core_a15[] = {
|
||||
/* Power per cpu */
|
||||
{ .cap = 426, .power = 2021, }, /* 500 MHz */
|
||||
{ .cap = 512, .power = 2312, }, /* 600 MHz */
|
||||
{ .cap = 597, .power = 2756, }, /* 700 MHz */
|
||||
{ .cap = 682, .power = 3125, }, /* 800 MHz */
|
||||
{ .cap = 768, .power = 3524, }, /* 900 MHz */
|
||||
{ .cap = 853, .power = 3846, }, /* 1000 MHz */
|
||||
{ .cap = 938, .power = 5177, }, /* 1100 MHz */
|
||||
{ .cap = 1024, .power = 6997, }, /* 1200 MHz */
|
||||
};
|
||||
|
||||
static struct sched_group_energy energy_core_a7 = {
|
||||
.nr_idle_states = ARRAY_SIZE(idle_states_core_a7),
|
||||
.idle_states = idle_states_core_a7,
|
||||
.nr_cap_states = ARRAY_SIZE(cap_states_core_a7),
|
||||
.cap_states = cap_states_core_a7,
|
||||
};
|
||||
|
||||
static struct sched_group_energy energy_core_a15 = {
|
||||
.nr_idle_states = ARRAY_SIZE(idle_states_core_a15),
|
||||
.idle_states = idle_states_core_a15,
|
||||
.nr_cap_states = ARRAY_SIZE(cap_states_core_a15),
|
||||
.cap_states = cap_states_core_a15,
|
||||
};
|
||||
|
||||
/* sd energy functions */
|
||||
static inline
|
||||
const struct sched_group_energy * const cpu_cluster_energy(int cpu)
|
||||
{
|
||||
return cpu_topology[cpu].cluster_id ? &energy_cluster_a7 :
|
||||
&energy_cluster_a15;
|
||||
}
|
||||
|
||||
static inline
|
||||
const struct sched_group_energy * const cpu_core_energy(int cpu)
|
||||
{
|
||||
return cpu_topology[cpu].cluster_id ? &energy_core_a7 :
|
||||
&energy_core_a15;
|
||||
}
|
||||
|
||||
static inline int cpu_corepower_flags(void)
|
||||
{
|
||||
return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN;
|
||||
return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN | \
|
||||
SD_SHARE_CAP_STATES;
|
||||
}
|
||||
|
||||
static struct sched_domain_topology_level arm_topology[] = {
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
{ cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) },
|
||||
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
|
||||
{ cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) },
|
||||
#endif
|
||||
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
|
||||
{ cpu_cpu_mask, NULL, cpu_cluster_energy, SD_INIT_NAME(DIE) },
|
||||
{ NULL, },
|
||||
};
|
||||
|
||||
|
|
|
@ -120,6 +120,8 @@ SECTIONS
|
|||
#ifdef CONFIG_DEBUG_RODATA
|
||||
. = ALIGN(1<<SECTION_SHIFT);
|
||||
#endif
|
||||
_etext = .; /* End of text section */
|
||||
|
||||
RO_DATA(PAGE_SIZE)
|
||||
|
||||
. = ALIGN(4);
|
||||
|
@ -150,8 +152,6 @@ SECTIONS
|
|||
|
||||
NOTES
|
||||
|
||||
_etext = .; /* End of text and rodata section */
|
||||
|
||||
#ifndef CONFIG_XIP_KERNEL
|
||||
# ifdef CONFIG_ARM_KERNMEM_PERMS
|
||||
. = ALIGN(1<<SECTION_SHIFT);
|
||||
|
|
|
@ -572,7 +572,7 @@ static void __init build_mem_type_table(void)
|
|||
* in the Short-descriptor translation table format descriptors.
|
||||
*/
|
||||
if (cpu_arch == CPU_ARCH_ARMv7 &&
|
||||
(read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) == 4) {
|
||||
(read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) >= 4) {
|
||||
user_pmd_table |= PMD_PXNTABLE;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -51,6 +51,7 @@ config ARM64
|
|||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_BITREVERSE
|
||||
select HAVE_ARCH_HARDENED_USERCOPY
|
||||
select HAVE_ARCH_HUGE_VMAP
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
|
||||
|
@ -455,6 +456,15 @@ config CAVIUM_ERRATUM_22375
|
|||
|
||||
If unsure, say Y.
|
||||
|
||||
config CAVIUM_ERRATUM_23144
|
||||
bool "Cavium erratum 23144: ITS SYNC hang on dual socket system"
|
||||
depends on NUMA
|
||||
default y
|
||||
help
|
||||
ITS SYNC command hang for cross node io and collections/cpu mapping.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config CAVIUM_ERRATUM_23154
|
||||
bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
|
||||
default y
|
||||
|
@ -465,6 +475,17 @@ config CAVIUM_ERRATUM_23154
|
|||
|
||||
If unsure, say Y.
|
||||
|
||||
config CAVIUM_ERRATUM_27456
|
||||
bool "Cavium erratum 27456: Broadcast TLBI instructions may cause icache corruption"
|
||||
default y
|
||||
help
|
||||
On ThunderX T88 pass 1.x through 2.1 parts, broadcast TLBI
|
||||
instructions may cause the icache to become corrupted if it
|
||||
contains data for a non-current ASID. The fix is to
|
||||
invalidate the icache when changing the mm context.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
endmenu
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
|
|||
GZFLAGS :=-9
|
||||
|
||||
ifneq ($(CONFIG_RELOCATABLE),)
|
||||
LDFLAGS_vmlinux += -pie
|
||||
LDFLAGS_vmlinux += -pie -Bsymbolic
|
||||
endif
|
||||
|
||||
KBUILD_DEFCONFIG := defconfig
|
||||
|
|
|
@ -262,6 +262,8 @@
|
|||
#io-channel-cells = <1>;
|
||||
clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
|
||||
clock-names = "saradc", "apb_pclk";
|
||||
resets = <&cru SRST_SARADC>;
|
||||
reset-names = "saradc-apb";
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
|
@ -517,7 +519,7 @@
|
|||
#address-cells = <0>;
|
||||
|
||||
reg = <0x0 0xffb71000 0x0 0x1000>,
|
||||
<0x0 0xffb72000 0x0 0x1000>,
|
||||
<0x0 0xffb72000 0x0 0x2000>,
|
||||
<0x0 0xffb74000 0x0 0x2000>,
|
||||
<0x0 0xffb76000 0x0 0x2000>;
|
||||
interrupts = <GIC_PPI 9
|
||||
|
|
|
@ -247,6 +247,8 @@ CONFIG_SCSI_UFSHCD_PLATFORM=y
|
|||
CONFIG_SCSI_UFS_QCOM=y
|
||||
CONFIG_SCSI_UFS_QCOM_ICE=y
|
||||
CONFIG_MD=y
|
||||
CONFIG_BLK_DEV_MD=y
|
||||
CONFIG_MD_LINEAR=y
|
||||
CONFIG_BLK_DEV_DM=y
|
||||
CONFIG_DM_CRYPT=y
|
||||
CONFIG_DM_REQ_CRYPT=y
|
||||
|
|
|
@ -249,6 +249,8 @@ CONFIG_SCSI_UFSHCD_PLATFORM=y
|
|||
CONFIG_SCSI_UFS_QCOM=y
|
||||
CONFIG_SCSI_UFS_QCOM_ICE=y
|
||||
CONFIG_MD=y
|
||||
CONFIG_BLK_DEV_MD=y
|
||||
CONFIG_MD_LINEAR=y
|
||||
CONFIG_BLK_DEV_DM=y
|
||||
CONFIG_DM_CRYPT=y
|
||||
CONFIG_DM_REQ_CRYPT=y
|
||||
|
|
|
@ -35,6 +35,8 @@
|
|||
#define ARM64_ALT_PAN_NOT_UAO 10
|
||||
|
||||
#define ARM64_NCAPS 11
|
||||
#define ARM64_WORKAROUND_CAVIUM_27456 12
|
||||
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
|
|
|
@ -140,6 +140,7 @@ typedef struct user_fpsimd_state elf_fpregset_t;
|
|||
|
||||
#define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT);
|
||||
|
||||
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
|
||||
#define ARCH_DLINFO \
|
||||
do { \
|
||||
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
|
||||
|
|
|
@ -107,8 +107,6 @@
|
|||
#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \
|
||||
TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
|
||||
|
||||
#define TCR_EL2_FLAGS (TCR_EL2_RES1 | TCR_EL2_PS_40B)
|
||||
|
||||
/* VTCR_EL2 Registers bits */
|
||||
#define VTCR_EL2_RES1 (1 << 31)
|
||||
#define VTCR_EL2_PS_MASK (7 << 16)
|
||||
|
|
|
@ -117,6 +117,8 @@ struct pt_regs {
|
|||
};
|
||||
u64 orig_x0;
|
||||
u64 syscallno;
|
||||
u64 orig_addr_limit;
|
||||
u64 unused; // maintain 16 byte alignment
|
||||
};
|
||||
|
||||
#define arch_has_single_step() (1)
|
||||
|
|
|
@ -23,6 +23,15 @@ void store_cpu_topology(unsigned int cpuid);
|
|||
const struct cpumask *cpu_coregroup_mask(int cpu);
|
||||
unsigned long arch_get_cpu_efficiency(int cpu);
|
||||
|
||||
struct sched_domain;
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
#define arch_scale_freq_capacity cpufreq_scale_freq_capacity
|
||||
extern unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu);
|
||||
extern unsigned long cpufreq_scale_max_freq_capacity(int cpu);
|
||||
#endif
|
||||
#define arch_scale_cpu_capacity scale_cpu_capacity
|
||||
extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu);
|
||||
|
||||
#include <asm-generic/topology.h>
|
||||
|
||||
#endif /* _ASM_ARM_TOPOLOGY_H */
|
||||
|
|
|
@ -269,24 +269,39 @@ do { \
|
|||
-EFAULT; \
|
||||
})
|
||||
|
||||
extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
|
||||
extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
|
||||
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
|
||||
extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n);
|
||||
extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n);
|
||||
extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
|
||||
|
||||
static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
check_object_size(to, n, false);
|
||||
return __arch_copy_from_user(to, from, n);
|
||||
}
|
||||
|
||||
static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
check_object_size(from, n, true);
|
||||
return __arch_copy_to_user(to, from, n);
|
||||
}
|
||||
|
||||
static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
if (access_ok(VERIFY_READ, from, n))
|
||||
n = __copy_from_user(to, from, n);
|
||||
else /* security hole - plug it */
|
||||
if (access_ok(VERIFY_READ, from, n)) {
|
||||
check_object_size(to, n, false);
|
||||
n = __arch_copy_from_user(to, from, n);
|
||||
} else /* security hole - plug it */
|
||||
memset(to, 0, n);
|
||||
return n;
|
||||
}
|
||||
|
||||
static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
if (access_ok(VERIFY_WRITE, to, n))
|
||||
n = __copy_to_user(to, from, n);
|
||||
if (access_ok(VERIFY_WRITE, to, n)) {
|
||||
check_object_size(from, n, true);
|
||||
n = __arch_copy_to_user(to, from, n);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,4 +19,6 @@
|
|||
/* vDSO location */
|
||||
#define AT_SYSINFO_EHDR 33
|
||||
|
||||
#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -34,8 +34,8 @@ EXPORT_SYMBOL(copy_page);
|
|||
EXPORT_SYMBOL(clear_page);
|
||||
|
||||
/* user mem (segment) */
|
||||
EXPORT_SYMBOL(__copy_from_user);
|
||||
EXPORT_SYMBOL(__copy_to_user);
|
||||
EXPORT_SYMBOL(__arch_copy_from_user);
|
||||
EXPORT_SYMBOL(__arch_copy_to_user);
|
||||
EXPORT_SYMBOL(__clear_user);
|
||||
EXPORT_SYMBOL(__copy_in_user);
|
||||
|
||||
|
|
|
@ -58,6 +58,7 @@ int main(void)
|
|||
DEFINE(S_PC, offsetof(struct pt_regs, pc));
|
||||
DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0));
|
||||
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
|
||||
DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
|
||||
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
|
||||
BLANK();
|
||||
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
|
||||
|
|
|
@ -93,6 +93,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
|
|||
.capability = ARM64_WORKAROUND_CAVIUM_23154,
|
||||
MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01),
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_CAVIUM_ERRATUM_27456
|
||||
{
|
||||
/* Cavium ThunderX, T88 pass 1.x - 2.1 */
|
||||
.desc = "Cavium erratum 27456",
|
||||
.capability = ARM64_WORKAROUND_CAVIUM_27456,
|
||||
MIDR_RANGE(MIDR_THUNDERX, 0x00,
|
||||
(1 << MIDR_VARIANT_SHIFT) | 1),
|
||||
},
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
|
|
@ -152,7 +152,6 @@ static int debug_monitors_init(void)
|
|||
/* Clear the OS lock. */
|
||||
on_each_cpu(clear_os_lock, NULL, 1);
|
||||
isb();
|
||||
local_dbg_enable();
|
||||
|
||||
/* Register hotplug handler. */
|
||||
__register_cpu_notifier(&os_lock_nb);
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <asm/errno.h>
|
||||
#include <asm/esr.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
|
@ -97,7 +98,13 @@
|
|||
mov x29, xzr // fp pointed to user-space
|
||||
.else
|
||||
add x21, sp, #S_FRAME_SIZE
|
||||
.endif
|
||||
get_thread_info tsk
|
||||
/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
|
||||
ldr x20, [tsk, #TI_ADDR_LIMIT]
|
||||
str x20, [sp, #S_ORIG_ADDR_LIMIT]
|
||||
mov x20, #TASK_SIZE_64
|
||||
str x20, [tsk, #TI_ADDR_LIMIT]
|
||||
.endif /* \el == 0 */
|
||||
mrs x22, elr_el1
|
||||
mrs x23, spsr_el1
|
||||
stp lr, x21, [sp, #S_LR]
|
||||
|
@ -128,6 +135,12 @@
|
|||
.endm
|
||||
|
||||
.macro kernel_exit, el
|
||||
.if \el != 0
|
||||
/* Restore the task's original addr_limit. */
|
||||
ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
|
||||
str x20, [tsk, #TI_ADDR_LIMIT]
|
||||
.endif
|
||||
|
||||
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
|
||||
.if \el == 0
|
||||
ct_user_enter
|
||||
|
|
|
@ -717,40 +717,25 @@ __primary_switch:
|
|||
* Iterate over each entry in the relocation table, and apply the
|
||||
* relocations in place.
|
||||
*/
|
||||
ldr w8, =__dynsym_offset // offset to symbol table
|
||||
ldr w9, =__rela_offset // offset to reloc table
|
||||
ldr w10, =__rela_size // size of reloc table
|
||||
|
||||
mov_q x11, KIMAGE_VADDR // default virtual offset
|
||||
add x11, x11, x23 // actual virtual offset
|
||||
add x8, x8, x11 // __va(.dynsym)
|
||||
add x9, x9, x11 // __va(.rela)
|
||||
add x10, x9, x10 // __va(.rela) + sizeof(.rela)
|
||||
|
||||
0: cmp x9, x10
|
||||
b.hs 2f
|
||||
b.hs 1f
|
||||
ldp x11, x12, [x9], #24
|
||||
ldr x13, [x9, #-8]
|
||||
cmp w12, #R_AARCH64_RELATIVE
|
||||
b.ne 1f
|
||||
b.ne 0b
|
||||
add x13, x13, x23 // relocate
|
||||
str x13, [x11, x23]
|
||||
b 0b
|
||||
|
||||
1: cmp w12, #R_AARCH64_ABS64
|
||||
b.ne 0b
|
||||
add x12, x12, x12, lsl #1 // symtab offset: 24x top word
|
||||
add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word
|
||||
ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx
|
||||
ldr x15, [x12, #8] // Elf64_Sym::st_value
|
||||
cmp w14, #-0xf // SHN_ABS (0xfff1) ?
|
||||
add x14, x15, x23 // relocate
|
||||
csel x15, x14, x15, ne
|
||||
add x15, x13, x15
|
||||
str x15, [x11, x23]
|
||||
b 0b
|
||||
|
||||
2:
|
||||
1:
|
||||
#endif
|
||||
ldr x8, =__primary_switched
|
||||
br x8
|
||||
|
|
|
@ -214,7 +214,7 @@ static void __init request_standard_resources(void)
|
|||
struct resource *res;
|
||||
|
||||
kernel_code.start = virt_to_phys(_text);
|
||||
kernel_code.end = virt_to_phys(_etext - 1);
|
||||
kernel_code.end = virt_to_phys(__init_begin - 1);
|
||||
kernel_data.start = virt_to_phys(_sdata);
|
||||
kernel_data.end = virt_to_phys(_end - 1);
|
||||
|
||||
|
|
|
@ -191,7 +191,6 @@ asmlinkage void secondary_start_kernel(void)
|
|||
set_cpu_online(cpu, true);
|
||||
complete(&cpu_running);
|
||||
|
||||
local_dbg_enable();
|
||||
local_irq_enable();
|
||||
local_async_enable();
|
||||
|
||||
|
@ -346,8 +345,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
|
|||
|
||||
void __init smp_prepare_boot_cpu(void)
|
||||
{
|
||||
cpuinfo_store_boot_cpu();
|
||||
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
|
||||
cpuinfo_store_boot_cpu();
|
||||
}
|
||||
|
||||
static u64 __init of_get_cpu_mpidr(struct device_node *dn)
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include <linux/of.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched_energy.h>
|
||||
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/topology.h>
|
||||
|
@ -35,7 +37,7 @@
|
|||
* rebalance_domains for all idle cores and the cpu_power can be updated
|
||||
* during this sequence.
|
||||
*/
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_scale);
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
|
||||
|
||||
unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
|
@ -47,6 +49,22 @@ static void set_power_scale(unsigned int cpu, unsigned long power)
|
|||
per_cpu(cpu_scale, cpu) = power;
|
||||
}
|
||||
|
||||
unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu);
|
||||
|
||||
return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT;
|
||||
#else
|
||||
return per_cpu(cpu_scale, cpu);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
|
||||
{
|
||||
per_cpu(cpu_scale, cpu) = capacity;
|
||||
}
|
||||
|
||||
static int __init get_cpu_for_node(struct device_node *node)
|
||||
{
|
||||
struct device_node *cpu_node;
|
||||
|
@ -371,11 +389,67 @@ static void update_cpu_power(unsigned int cpu)
|
|||
struct cpu_topology cpu_topology[NR_CPUS];
|
||||
EXPORT_SYMBOL_GPL(cpu_topology);
|
||||
|
||||
/* sd energy functions */
|
||||
static inline
|
||||
const struct sched_group_energy * const cpu_cluster_energy(int cpu)
|
||||
{
|
||||
struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL1];
|
||||
|
||||
if (!sge) {
|
||||
pr_warn("Invalid sched_group_energy for Cluster%d\n", cpu);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return sge;
|
||||
}
|
||||
|
||||
static inline
|
||||
const struct sched_group_energy * const cpu_core_energy(int cpu)
|
||||
{
|
||||
struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL0];
|
||||
|
||||
if (!sge) {
|
||||
pr_warn("Invalid sched_group_energy for CPU%d\n", cpu);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return sge;
|
||||
}
|
||||
|
||||
const struct cpumask *cpu_coregroup_mask(int cpu)
|
||||
{
|
||||
return &cpu_topology[cpu].core_sibling;
|
||||
}
|
||||
|
||||
static inline int cpu_corepower_flags(void)
|
||||
{
|
||||
return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN | \
|
||||
SD_SHARE_CAP_STATES;
|
||||
}
|
||||
|
||||
static struct sched_domain_topology_level arm64_topology[] = {
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
{ cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) },
|
||||
#endif
|
||||
{ cpu_cpu_mask, NULL, cpu_cluster_energy, SD_INIT_NAME(DIE) },
|
||||
{ NULL, },
|
||||
};
|
||||
|
||||
static void update_cpu_capacity(unsigned int cpu)
|
||||
{
|
||||
unsigned long capacity = SCHED_CAPACITY_SCALE;
|
||||
|
||||
if (sched_energy_aware && cpu_core_energy(cpu)) {
|
||||
int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1;
|
||||
capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap;
|
||||
}
|
||||
|
||||
set_capacity_scale(cpu, capacity);
|
||||
|
||||
pr_info("CPU%d: update cpu_capacity %lu\n",
|
||||
cpu, arch_scale_cpu_capacity(NULL, cpu));
|
||||
}
|
||||
|
||||
static void update_siblings_masks(unsigned int cpuid)
|
||||
{
|
||||
struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
|
||||
|
@ -438,6 +512,7 @@ void store_cpu_topology(unsigned int cpuid)
|
|||
topology_populated:
|
||||
update_siblings_masks(cpuid);
|
||||
update_cpu_power(cpuid);
|
||||
update_cpu_capacity(cpuid);
|
||||
}
|
||||
|
||||
static void __init reset_cpu_topology(void)
|
||||
|
@ -479,10 +554,12 @@ void __init init_cpu_topology(void)
|
|||
if (of_have_populated_dt() && parse_dt_topology()) {
|
||||
reset_cpu_topology();
|
||||
} else {
|
||||
set_sched_topology(arm64_topology);
|
||||
for_each_possible_cpu(cpu)
|
||||
update_siblings_masks(cpu);
|
||||
}
|
||||
|
||||
reset_cpu_power();
|
||||
parse_dt_cpu_power();
|
||||
init_sched_energy_costs();
|
||||
}
|
||||
|
|
|
@ -94,6 +94,7 @@ SECTIONS
|
|||
*(.discard)
|
||||
*(.discard.*)
|
||||
*(.interp .dynamic)
|
||||
*(.dynsym .dynstr .hash)
|
||||
}
|
||||
|
||||
. = KIMAGE_VADDR + TEXT_OFFSET;
|
||||
|
@ -120,12 +121,13 @@ SECTIONS
|
|||
}
|
||||
|
||||
. = ALIGN(SEGMENT_ALIGN);
|
||||
RO_DATA(PAGE_SIZE) /* everything from this point to */
|
||||
EXCEPTION_TABLE(8) /* _etext will be marked RO NX */
|
||||
_etext = .; /* End of text section */
|
||||
|
||||
RO_DATA(PAGE_SIZE) /* everything from this point to */
|
||||
EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */
|
||||
NOTES
|
||||
|
||||
. = ALIGN(SEGMENT_ALIGN);
|
||||
_etext = .; /* End of text and rodata section */
|
||||
__init_begin = .;
|
||||
|
||||
INIT_TEXT_SECTION(8)
|
||||
|
@ -159,19 +161,9 @@ SECTIONS
|
|||
.rela : ALIGN(8) {
|
||||
*(.rela .rela*)
|
||||
}
|
||||
.dynsym : ALIGN(8) {
|
||||
*(.dynsym)
|
||||
}
|
||||
.dynstr : {
|
||||
*(.dynstr)
|
||||
}
|
||||
.hash : {
|
||||
*(.hash)
|
||||
}
|
||||
|
||||
__rela_offset = ADDR(.rela) - KIMAGE_VADDR;
|
||||
__rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR);
|
||||
__rela_size = SIZEOF(.rela);
|
||||
__dynsym_offset = ADDR(.dynsym) - KIMAGE_VADDR;
|
||||
|
||||
. = ALIGN(SEGMENT_ALIGN);
|
||||
__init_end = .;
|
||||
|
|
|
@ -64,7 +64,7 @@ __do_hyp_init:
|
|||
mrs x4, tcr_el1
|
||||
ldr x5, =TCR_EL2_MASK
|
||||
and x4, x4, x5
|
||||
ldr x5, =TCR_EL2_FLAGS
|
||||
mov x5, #TCR_EL2_RES1
|
||||
orr x4, x4, x5
|
||||
|
||||
#ifndef CONFIG_ARM64_VA_BITS_48
|
||||
|
@ -85,15 +85,18 @@ __do_hyp_init:
|
|||
ldr_l x5, idmap_t0sz
|
||||
bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
|
||||
#endif
|
||||
msr tcr_el2, x4
|
||||
|
||||
ldr x4, =VTCR_EL2_FLAGS
|
||||
/*
|
||||
* Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
|
||||
* VTCR_EL2.
|
||||
* TCR_EL2 and VTCR_EL2.
|
||||
*/
|
||||
mrs x5, ID_AA64MMFR0_EL1
|
||||
bfi x4, x5, #16, #3
|
||||
|
||||
msr tcr_el2, x4
|
||||
|
||||
ldr x4, =VTCR_EL2_FLAGS
|
||||
bfi x4, x5, #16, #3
|
||||
|
||||
msr vtcr_el2, x4
|
||||
|
||||
mrs x4, mair_el1
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
.endm
|
||||
|
||||
end .req x5
|
||||
ENTRY(__copy_from_user)
|
||||
ENTRY(__arch_copy_from_user)
|
||||
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
|
||||
CONFIG_ARM64_PAN)
|
||||
add end, x0, x2
|
||||
|
@ -75,7 +75,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
|
|||
CONFIG_ARM64_PAN)
|
||||
mov x0, #0 // Nothing to copy
|
||||
ret
|
||||
ENDPROC(__copy_from_user)
|
||||
ENDPROC(__arch_copy_from_user)
|
||||
|
||||
.section .fixup,"ax"
|
||||
.align 2
|
||||
|
|
|
@ -65,7 +65,7 @@
|
|||
.endm
|
||||
|
||||
end .req x5
|
||||
ENTRY(__copy_to_user)
|
||||
ENTRY(__arch_copy_to_user)
|
||||
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
|
||||
CONFIG_ARM64_PAN)
|
||||
add end, x0, x2
|
||||
|
@ -74,7 +74,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
|
|||
CONFIG_ARM64_PAN)
|
||||
mov x0, #0
|
||||
ret
|
||||
ENDPROC(__copy_to_user)
|
||||
ENDPROC(__arch_copy_to_user)
|
||||
|
||||
.section .fixup,"ax"
|
||||
.align 2
|
||||
|
|
|
@ -387,8 +387,8 @@ void __init mem_init(void)
|
|||
MLM(MODULES_VADDR, MODULES_END),
|
||||
MLG(VMALLOC_START, VMALLOC_END),
|
||||
MLK_ROUNDUP(__init_begin, __init_end),
|
||||
MLK_ROUNDUP(_text, __start_rodata),
|
||||
MLK_ROUNDUP(__start_rodata, _etext),
|
||||
MLK_ROUNDUP(_text, _etext),
|
||||
MLK_ROUNDUP(__start_rodata, __init_begin),
|
||||
MLK_ROUNDUP(_sdata, _edata),
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
MLG(VMEMMAP_START,
|
||||
|
|
|
@ -392,14 +392,14 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
|
|||
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
unsigned long kernel_start = __pa(_text);
|
||||
unsigned long kernel_end = __pa(_etext);
|
||||
unsigned long kernel_end = __pa(__init_begin);
|
||||
|
||||
/*
|
||||
* Take care not to create a writable alias for the
|
||||
* read-only text and rodata sections of the kernel image.
|
||||
*/
|
||||
|
||||
/* No overlap with the kernel text */
|
||||
/* No overlap with the kernel text/rodata */
|
||||
if (end < kernel_start || start >= kernel_end) {
|
||||
__create_pgd_mapping(pgd, start, __phys_to_virt(start),
|
||||
end - start, PAGE_KERNEL,
|
||||
|
@ -408,7 +408,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
|
|||
}
|
||||
|
||||
/*
|
||||
* This block overlaps the kernel text mapping.
|
||||
* This block overlaps the kernel text/rodata mappings.
|
||||
* Map the portion(s) which don't overlap.
|
||||
*/
|
||||
if (start < kernel_start)
|
||||
|
@ -423,7 +423,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
|
|||
early_pgtable_alloc);
|
||||
|
||||
/*
|
||||
* Map the linear alias of the [_text, _etext) interval as
|
||||
* Map the linear alias of the [_text, __init_begin) interval as
|
||||
* read-only/non-executable. This makes the contents of the
|
||||
* region accessible to subsystems such as hibernate, but
|
||||
* protects it from inadvertent modification or execution.
|
||||
|
@ -453,14 +453,14 @@ void mark_rodata_ro(void)
|
|||
{
|
||||
unsigned long section_size;
|
||||
|
||||
section_size = (unsigned long)__start_rodata - (unsigned long)_text;
|
||||
section_size = (unsigned long)_etext - (unsigned long)_text;
|
||||
create_mapping_late(__pa(_text), (unsigned long)_text,
|
||||
section_size, PAGE_KERNEL_ROX);
|
||||
/*
|
||||
* mark .rodata as read only. Use _etext rather than __end_rodata to
|
||||
* cover NOTES and EXCEPTION_TABLE.
|
||||
* mark .rodata as read only. Use __init_begin rather than __end_rodata
|
||||
* to cover NOTES and EXCEPTION_TABLE.
|
||||
*/
|
||||
section_size = (unsigned long)_etext - (unsigned long)__start_rodata;
|
||||
section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
|
||||
create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
|
||||
section_size, PAGE_KERNEL_RO);
|
||||
}
|
||||
|
@ -503,8 +503,8 @@ static void __init map_kernel(pgd_t *pgd)
|
|||
{
|
||||
static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
|
||||
|
||||
map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text);
|
||||
map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata);
|
||||
map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
|
||||
map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata);
|
||||
map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
|
||||
&vmlinux_init);
|
||||
map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
|
||||
|
@ -785,9 +785,9 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
|
|||
/*
|
||||
* Check whether the physical FDT address is set and meets the minimum
|
||||
* alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
|
||||
* at least 8 bytes so that we can always access the size field of the
|
||||
* FDT header after mapping the first chunk, double check here if that
|
||||
* is indeed the case.
|
||||
* at least 8 bytes so that we can always access the magic and size
|
||||
* fields of the FDT header after mapping the first chunk, double check
|
||||
* here if that is indeed the case.
|
||||
*/
|
||||
BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
|
||||
if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
|
||||
|
@ -815,7 +815,7 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
|
|||
create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
|
||||
dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
|
||||
|
||||
if (fdt_check_header(dt_virt) != 0)
|
||||
if (fdt_magic(dt_virt) != FDT_MAGIC)
|
||||
return NULL;
|
||||
|
||||
*size = fdt_totalsize(dt_virt);
|
||||
|
|
|
@ -25,6 +25,8 @@
|
|||
#include <asm/hwcap.h>
|
||||
#include <asm/pgtable-hwdef.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/alternative.h>
|
||||
|
||||
#include "proc-macros.S"
|
||||
|
||||
|
@ -183,7 +185,17 @@ ENTRY(cpu_do_switch_mm)
|
|||
bfi x0, x1, #48, #16 // set the ASID
|
||||
msr ttbr0_el1, x0 // set TTBR0
|
||||
isb
|
||||
alternative_if_not ARM64_WORKAROUND_CAVIUM_27456
|
||||
ret
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
alternative_else
|
||||
ic iallu
|
||||
dsb nsh
|
||||
isb
|
||||
ret
|
||||
alternative_endif
|
||||
ENDPROC(cpu_do_switch_mm)
|
||||
|
||||
.pushsection ".idmap.text", "ax"
|
||||
|
@ -228,6 +240,8 @@ ENTRY(__cpu_setup)
|
|||
msr cpacr_el1, x0 // Enable FP/ASIMD
|
||||
mov x0, #1 << 12 // Reset mdscr_el1 and disable
|
||||
msr mdscr_el1, x0 // access to the DCC from EL0
|
||||
isb // Unmask debug exceptions now,
|
||||
enable_dbg // since this is per-cpu
|
||||
reset_pmuserenr_el0 x0 // Disable PMU access from EL0
|
||||
/*
|
||||
* Memory region attributes for LPAE:
|
||||
|
|
|
@ -53,6 +53,7 @@ config IA64
|
|||
select MODULES_USE_ELF_RELA
|
||||
select ARCH_USE_CMPXCHG_LOCKREF
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_HARDENED_USERCOPY
|
||||
default y
|
||||
help
|
||||
The Itanium Processor Family is Intel's 64-bit successor to
|
||||
|
|
|
@ -241,12 +241,18 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use
|
|||
static inline unsigned long
|
||||
__copy_to_user (void __user *to, const void *from, unsigned long count)
|
||||
{
|
||||
if (!__builtin_constant_p(count))
|
||||
check_object_size(from, count, true);
|
||||
|
||||
return __copy_user(to, (__force void __user *) from, count);
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
__copy_from_user (void *to, const void __user *from, unsigned long count)
|
||||
{
|
||||
if (!__builtin_constant_p(count))
|
||||
check_object_size(to, count, false);
|
||||
|
||||
return __copy_user((__force void __user *) to, from, count);
|
||||
}
|
||||
|
||||
|
@ -258,8 +264,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
|
|||
const void *__cu_from = (from); \
|
||||
long __cu_len = (n); \
|
||||
\
|
||||
if (__access_ok(__cu_to, __cu_len, get_fs())) \
|
||||
__cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \
|
||||
if (__access_ok(__cu_to, __cu_len, get_fs())) { \
|
||||
if (!__builtin_constant_p(n)) \
|
||||
check_object_size(__cu_from, __cu_len, true); \
|
||||
__cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \
|
||||
} \
|
||||
__cu_len; \
|
||||
})
|
||||
|
||||
|
@ -270,8 +279,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
|
|||
long __cu_len = (n); \
|
||||
\
|
||||
__chk_user_ptr(__cu_from); \
|
||||
if (__access_ok(__cu_from, __cu_len, get_fs())) \
|
||||
if (__access_ok(__cu_from, __cu_len, get_fs())) { \
|
||||
if (!__builtin_constant_p(n)) \
|
||||
check_object_size(__cu_to, __cu_len, false); \
|
||||
__cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \
|
||||
} \
|
||||
__cu_len; \
|
||||
})
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
|
|||
" CMPT %0, #HI(0x02000000)\n" \
|
||||
" BNZ 1b\n" \
|
||||
: "=&d" (temp), "=&da" (result) \
|
||||
: "da" (&v->counter), "bd" (i) \
|
||||
: "da" (&v->counter), "br" (i) \
|
||||
: "cc"); \
|
||||
\
|
||||
smp_mb(); \
|
||||
|
|
|
@ -73,7 +73,7 @@ static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
|
|||
" DCACHE [%2], %0\n"
|
||||
#endif
|
||||
"2:\n"
|
||||
: "=&d" (temp), "=&da" (retval)
|
||||
: "=&d" (temp), "=&d" (retval)
|
||||
: "da" (m), "bd" (old), "da" (new)
|
||||
: "cc"
|
||||
);
|
||||
|
|
|
@ -23,7 +23,7 @@ static struct clocksource clocksource_mips = {
|
|||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
static u64 notrace r4k_read_sched_clock(void)
|
||||
static u64 __maybe_unused notrace r4k_read_sched_clock(void)
|
||||
{
|
||||
return read_c0_count();
|
||||
}
|
||||
|
@ -82,7 +82,9 @@ int __init init_r4k_clocksource(void)
|
|||
|
||||
clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
|
||||
|
||||
#ifndef CONFIG_CPU_FREQ
|
||||
sched_clock_register(r4k_read_sched_clock, 32, mips_hpt_frequency);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -344,7 +344,7 @@ EXPORT(sysn32_call_table)
|
|||
PTR sys_ni_syscall /* available, was setaltroot */
|
||||
PTR sys_add_key
|
||||
PTR sys_request_key
|
||||
PTR sys_keyctl /* 6245 */
|
||||
PTR compat_sys_keyctl /* 6245 */
|
||||
PTR sys_set_thread_area
|
||||
PTR sys_inotify_init
|
||||
PTR sys_inotify_add_watch
|
||||
|
|
|
@ -500,7 +500,7 @@ EXPORT(sys32_call_table)
|
|||
PTR sys_ni_syscall /* available, was setaltroot */
|
||||
PTR sys_add_key /* 4280 */
|
||||
PTR sys_request_key
|
||||
PTR sys_keyctl
|
||||
PTR compat_sys_keyctl
|
||||
PTR sys_set_thread_area
|
||||
PTR sys_inotify_init
|
||||
PTR sys_inotify_add_watch /* 4285 */
|
||||
|
|
|
@ -1629,8 +1629,14 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
|
|||
|
||||
preempt_disable();
|
||||
if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
|
||||
if (kvm_mips_host_tlb_lookup(vcpu, va) < 0)
|
||||
kvm_mips_handle_kseg0_tlb_fault(va, vcpu);
|
||||
if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 &&
|
||||
kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) {
|
||||
kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n",
|
||||
__func__, va, vcpu, read_c0_entryhi());
|
||||
er = EMULATE_FAIL;
|
||||
preempt_enable();
|
||||
goto done;
|
||||
}
|
||||
} else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) ||
|
||||
KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
|
||||
int index;
|
||||
|
@ -1665,14 +1671,19 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
|
|||
run, vcpu);
|
||||
preempt_enable();
|
||||
goto dont_update_pc;
|
||||
} else {
|
||||
/*
|
||||
* We fault an entry from the guest tlb to the
|
||||
* shadow host TLB
|
||||
*/
|
||||
kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb,
|
||||
NULL,
|
||||
NULL);
|
||||
}
|
||||
/*
|
||||
* We fault an entry from the guest tlb to the
|
||||
* shadow host TLB
|
||||
*/
|
||||
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb,
|
||||
NULL, NULL)) {
|
||||
kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
|
||||
__func__, va, index, vcpu,
|
||||
read_c0_entryhi());
|
||||
er = EMULATE_FAIL;
|
||||
preempt_enable();
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -2633,8 +2644,13 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
|
|||
* OK we have a Guest TLB entry, now inject it into the
|
||||
* shadow host TLB
|
||||
*/
|
||||
kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL,
|
||||
NULL);
|
||||
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb,
|
||||
NULL, NULL)) {
|
||||
kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
|
||||
__func__, va, index, vcpu,
|
||||
read_c0_entryhi());
|
||||
er = EMULATE_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -276,7 +276,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
|
|||
}
|
||||
|
||||
gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
|
||||
if (gfn >= kvm->arch.guest_pmap_npages) {
|
||||
if ((gfn | 1) >= kvm->arch.guest_pmap_npages) {
|
||||
kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
|
||||
gfn, badvaddr);
|
||||
kvm_mips_dump_host_tlbs();
|
||||
|
@ -361,25 +361,39 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
|
|||
unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
pfn_t pfn0, pfn1;
|
||||
gfn_t gfn0, gfn1;
|
||||
long tlb_lo[2];
|
||||
|
||||
if ((tlb->tlb_hi & VPN2_MASK) == 0) {
|
||||
pfn0 = 0;
|
||||
pfn1 = 0;
|
||||
} else {
|
||||
if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo0)
|
||||
>> PAGE_SHIFT) < 0)
|
||||
return -1;
|
||||
tlb_lo[0] = tlb->tlb_lo0;
|
||||
tlb_lo[1] = tlb->tlb_lo1;
|
||||
|
||||
if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo1)
|
||||
>> PAGE_SHIFT) < 0)
|
||||
return -1;
|
||||
/*
|
||||
* The commpage address must not be mapped to anything else if the guest
|
||||
* TLB contains entries nearby, or commpage accesses will break.
|
||||
*/
|
||||
if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) &
|
||||
VPN2_MASK & (PAGE_MASK << 1)))
|
||||
tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0;
|
||||
|
||||
pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo0)
|
||||
>> PAGE_SHIFT];
|
||||
pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo1)
|
||||
>> PAGE_SHIFT];
|
||||
gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT;
|
||||
gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT;
|
||||
if (gfn0 >= kvm->arch.guest_pmap_npages ||
|
||||
gfn1 >= kvm->arch.guest_pmap_npages) {
|
||||
kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n",
|
||||
__func__, gfn0, gfn1, tlb->tlb_hi);
|
||||
kvm_mips_dump_guest_tlbs(vcpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (kvm_mips_map_page(kvm, gfn0) < 0)
|
||||
return -1;
|
||||
|
||||
if (kvm_mips_map_page(kvm, gfn1) < 0)
|
||||
return -1;
|
||||
|
||||
pfn0 = kvm->arch.guest_pmap[gfn0];
|
||||
pfn1 = kvm->arch.guest_pmap[gfn1];
|
||||
|
||||
if (hpa0)
|
||||
*hpa0 = pfn0 << PAGE_SHIFT;
|
||||
|
||||
|
@ -391,9 +405,9 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
|
|||
kvm_mips_get_kernel_asid(vcpu) :
|
||||
kvm_mips_get_user_asid(vcpu));
|
||||
entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
|
||||
(tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V);
|
||||
(tlb_lo[0] & MIPS3_PG_D) | (tlb_lo[0] & MIPS3_PG_V);
|
||||
entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
|
||||
(tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V);
|
||||
(tlb_lo[1] & MIPS3_PG_D) | (tlb_lo[1] & MIPS3_PG_V);
|
||||
|
||||
kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
|
||||
tlb->tlb_lo0, tlb->tlb_lo1);
|
||||
|
@ -794,10 +808,16 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
|
|||
local_irq_restore(flags);
|
||||
return KVM_INVALID_INST;
|
||||
}
|
||||
kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
|
||||
&vcpu->arch.
|
||||
guest_tlb[index],
|
||||
NULL, NULL);
|
||||
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
|
||||
&vcpu->arch.guest_tlb[index],
|
||||
NULL, NULL)) {
|
||||
kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n",
|
||||
__func__, opc, index, vcpu,
|
||||
read_c0_entryhi());
|
||||
kvm_mips_dump_guest_tlbs(vcpu);
|
||||
local_irq_restore(flags);
|
||||
return KVM_INVALID_INST;
|
||||
}
|
||||
inst = *(opc);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
|
|
@ -13,8 +13,8 @@
|
|||
#define SMBUS_PCI_REG64 0x64
|
||||
#define SMBUS_PCI_REGB4 0xb4
|
||||
|
||||
#define HPET_MIN_CYCLES 64
|
||||
#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
|
||||
#define HPET_MIN_CYCLES 16
|
||||
#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES * 12)
|
||||
|
||||
static DEFINE_SPINLOCK(hpet_lock);
|
||||
DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device);
|
||||
|
@ -157,14 +157,14 @@ static int hpet_tick_resume(struct clock_event_device *evt)
|
|||
static int hpet_next_event(unsigned long delta,
|
||||
struct clock_event_device *evt)
|
||||
{
|
||||
unsigned int cnt;
|
||||
int res;
|
||||
u32 cnt;
|
||||
s32 res;
|
||||
|
||||
cnt = hpet_read(HPET_COUNTER);
|
||||
cnt += delta;
|
||||
cnt += (u32) delta;
|
||||
hpet_write(HPET_T0_CMP, cnt);
|
||||
|
||||
res = (int)(cnt - hpet_read(HPET_COUNTER));
|
||||
res = (s32)(cnt - hpet_read(HPET_COUNTER));
|
||||
|
||||
return res < HPET_MIN_CYCLES ? -ETIME : 0;
|
||||
}
|
||||
|
@ -230,7 +230,7 @@ void __init setup_hpet_timer(void)
|
|||
|
||||
cd = &per_cpu(hpet_clockevent_device, cpu);
|
||||
cd->name = "hpet";
|
||||
cd->rating = 320;
|
||||
cd->rating = 100;
|
||||
cd->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
|
||||
cd->set_state_shutdown = hpet_set_state_shutdown;
|
||||
cd->set_state_periodic = hpet_set_state_periodic;
|
||||
|
|
|
@ -65,7 +65,7 @@ static struct insn insn_table[] = {
|
|||
#ifndef CONFIG_CPU_MIPSR6
|
||||
{ insn_cache, M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
|
||||
#else
|
||||
{ insn_cache, M6(cache_op, 0, 0, 0, cache6_op), RS | RT | SIMM9 },
|
||||
{ insn_cache, M6(spec3_op, 0, 0, 0, cache6_op), RS | RT | SIMM9 },
|
||||
#endif
|
||||
{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
|
||||
{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
|
||||
|
|
|
@ -97,10 +97,10 @@
|
|||
#define ENOTCONN 235 /* Transport endpoint is not connected */
|
||||
#define ESHUTDOWN 236 /* Cannot send after transport endpoint shutdown */
|
||||
#define ETOOMANYREFS 237 /* Too many references: cannot splice */
|
||||
#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */
|
||||
#define ETIMEDOUT 238 /* Connection timed out */
|
||||
#define ECONNREFUSED 239 /* Connection refused */
|
||||
#define EREMOTERELEASE 240 /* Remote peer released connection */
|
||||
#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */
|
||||
#define EREMOTERELEASE 240 /* Remote peer released connection */
|
||||
#define EHOSTDOWN 241 /* Host is down */
|
||||
#define EHOSTUNREACH 242 /* No route to host */
|
||||
|
||||
|
|
|
@ -160,6 +160,7 @@ config PPC
|
|||
select EDAC_ATOMIC_SCRUB
|
||||
select ARCH_HAS_DMA_SET_COHERENT_MASK
|
||||
select HAVE_ARCH_SECCOMP_FILTER
|
||||
select HAVE_ARCH_HARDENED_USERCOPY
|
||||
|
||||
config GENERIC_CSUM
|
||||
def_bool CPU_LITTLE_ENDIAN
|
||||
|
|
|
@ -164,6 +164,7 @@ struct coprocessor_request_block {
|
|||
#define ICSWX_INITIATED (0x8)
|
||||
#define ICSWX_BUSY (0x4)
|
||||
#define ICSWX_REJECTED (0x2)
|
||||
#define ICSWX_XERS0 (0x1) /* undefined or set from XERSO. */
|
||||
|
||||
static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb)
|
||||
{
|
||||
|
|
|
@ -325,10 +325,15 @@ static inline unsigned long copy_from_user(void *to,
|
|||
{
|
||||
unsigned long over;
|
||||
|
||||
if (access_ok(VERIFY_READ, from, n))
|
||||
if (access_ok(VERIFY_READ, from, n)) {
|
||||
if (!__builtin_constant_p(n))
|
||||
check_object_size(to, n, false);
|
||||
return __copy_tofrom_user((__force void __user *)to, from, n);
|
||||
}
|
||||
if ((unsigned long)from < TASK_SIZE) {
|
||||
over = (unsigned long)from + n - TASK_SIZE;
|
||||
if (!__builtin_constant_p(n - over))
|
||||
check_object_size(to, n - over, false);
|
||||
return __copy_tofrom_user((__force void __user *)to, from,
|
||||
n - over) + over;
|
||||
}
|
||||
|
@ -340,10 +345,15 @@ static inline unsigned long copy_to_user(void __user *to,
|
|||
{
|
||||
unsigned long over;
|
||||
|
||||
if (access_ok(VERIFY_WRITE, to, n))
|
||||
if (access_ok(VERIFY_WRITE, to, n)) {
|
||||
if (!__builtin_constant_p(n))
|
||||
check_object_size(from, n, true);
|
||||
return __copy_tofrom_user(to, (__force void __user *)from, n);
|
||||
}
|
||||
if ((unsigned long)to < TASK_SIZE) {
|
||||
over = (unsigned long)to + n - TASK_SIZE;
|
||||
if (!__builtin_constant_p(n))
|
||||
check_object_size(from, n - over, true);
|
||||
return __copy_tofrom_user(to, (__force void __user *)from,
|
||||
n - over) + over;
|
||||
}
|
||||
|
@ -387,6 +397,10 @@ static inline unsigned long __copy_from_user_inatomic(void *to,
|
|||
if (ret == 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!__builtin_constant_p(n))
|
||||
check_object_size(to, n, false);
|
||||
|
||||
return __copy_tofrom_user((__force void __user *)to, from, n);
|
||||
}
|
||||
|
||||
|
@ -413,6 +427,9 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to,
|
|||
if (ret == 0)
|
||||
return 0;
|
||||
}
|
||||
if (!__builtin_constant_p(n))
|
||||
check_object_size(from, n, true);
|
||||
|
||||
return __copy_tofrom_user(to, (__force const void __user *)from, n);
|
||||
}
|
||||
|
||||
|
|
|
@ -677,7 +677,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
|
|||
/* Check if the request is finished successfully */
|
||||
if (active_flag) {
|
||||
rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
|
||||
if (rc <= 0)
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
if (rc & active_flag)
|
||||
|
|
|
@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim)
|
|||
std r3, STK_PARAM(R3)(r1)
|
||||
SAVE_NVGPRS(r1)
|
||||
|
||||
/* We need to setup MSR for VSX register save instructions. Here we
|
||||
* also clear the MSR RI since when we do the treclaim, we won't have a
|
||||
* valid kernel pointer for a while. We clear RI here as it avoids
|
||||
* adding another mtmsr closer to the treclaim. This makes the region
|
||||
* maked as non-recoverable wider than it needs to be but it saves on
|
||||
* inserting another mtmsrd later.
|
||||
*/
|
||||
/* We need to setup MSR for VSX register save instructions. */
|
||||
mfmsr r14
|
||||
mr r15, r14
|
||||
ori r15, r15, MSR_FP
|
||||
li r16, MSR_RI
|
||||
li r16, 0
|
||||
ori r16, r16, MSR_EE /* IRQs hard off */
|
||||
andc r15, r15, r16
|
||||
oris r15, r15, MSR_VEC@h
|
||||
|
@ -176,7 +170,17 @@ dont_backup_fp:
|
|||
1: tdeqi r6, 0
|
||||
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
|
||||
|
||||
/* The moment we treclaim, ALL of our GPRs will switch
|
||||
/* Clear MSR RI since we are about to change r1, EE is already off. */
|
||||
li r4, 0
|
||||
mtmsrd r4, 1
|
||||
|
||||
/*
|
||||
* BE CAREFUL HERE:
|
||||
* At this point we can't take an SLB miss since we have MSR_RI
|
||||
* off. Load only to/from the stack/paca which are in SLB bolted regions
|
||||
* until we turn MSR RI back on.
|
||||
*
|
||||
* The moment we treclaim, ALL of our GPRs will switch
|
||||
* to user register state. (FPRs, CCR etc. also!)
|
||||
* Use an sprg and a tm_scratch in the PACA to shuffle.
|
||||
*/
|
||||
|
@ -197,6 +201,11 @@ dont_backup_fp:
|
|||
|
||||
/* Store the PPR in r11 and reset to decent value */
|
||||
std r11, GPR11(r1) /* Temporary stash */
|
||||
|
||||
/* Reset MSR RI so we can take SLB faults again */
|
||||
li r11, MSR_RI
|
||||
mtmsrd r11, 1
|
||||
|
||||
mfspr r11, SPRN_PPR
|
||||
HMT_MEDIUM
|
||||
|
||||
|
@ -397,11 +406,6 @@ restore_gprs:
|
|||
ld r5, THREAD_TM_DSCR(r3)
|
||||
ld r6, THREAD_TM_PPR(r3)
|
||||
|
||||
/* Clear the MSR RI since we are about to change R1. EE is already off
|
||||
*/
|
||||
li r4, 0
|
||||
mtmsrd r4, 1
|
||||
|
||||
REST_GPR(0, r7) /* GPR0 */
|
||||
REST_2GPRS(2, r7) /* GPR2-3 */
|
||||
REST_GPR(4, r7) /* GPR4 */
|
||||
|
@ -439,10 +443,33 @@ restore_gprs:
|
|||
ld r6, _CCR(r7)
|
||||
mtcr r6
|
||||
|
||||
REST_GPR(1, r7) /* GPR1 */
|
||||
REST_GPR(5, r7) /* GPR5-7 */
|
||||
REST_GPR(6, r7)
|
||||
ld r7, GPR7(r7)
|
||||
|
||||
/*
|
||||
* Store r1 and r5 on the stack so that we can access them
|
||||
* after we clear MSR RI.
|
||||
*/
|
||||
|
||||
REST_GPR(5, r7)
|
||||
std r5, -8(r1)
|
||||
ld r5, GPR1(r7)
|
||||
std r5, -16(r1)
|
||||
|
||||
REST_GPR(7, r7)
|
||||
|
||||
/* Clear MSR RI since we are about to change r1. EE is already off */
|
||||
li r5, 0
|
||||
mtmsrd r5, 1
|
||||
|
||||
/*
|
||||
* BE CAREFUL HERE:
|
||||
* At this point we can't take an SLB miss since we have MSR_RI
|
||||
* off. Load only to/from the stack/paca which are in SLB bolted regions
|
||||
* until we turn MSR RI back on.
|
||||
*/
|
||||
|
||||
ld r5, -8(r1)
|
||||
ld r1, -16(r1)
|
||||
|
||||
/* Commit register state as checkpointed state: */
|
||||
TRECHKPT
|
||||
|
|
|
@ -655,112 +655,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
|||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
BEGIN_FTR_SECTION
|
||||
b skip_tm
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_TM)
|
||||
|
||||
/* Turn on TM/FP/VSX/VMX so we can restore them. */
|
||||
mfmsr r5
|
||||
li r6, MSR_TM >> 32
|
||||
sldi r6, r6, 32
|
||||
or r5, r5, r6
|
||||
ori r5, r5, MSR_FP
|
||||
oris r5, r5, (MSR_VEC | MSR_VSX)@h
|
||||
mtmsrd r5
|
||||
|
||||
/*
|
||||
* The user may change these outside of a transaction, so they must
|
||||
* always be context switched.
|
||||
*/
|
||||
ld r5, VCPU_TFHAR(r4)
|
||||
ld r6, VCPU_TFIAR(r4)
|
||||
ld r7, VCPU_TEXASR(r4)
|
||||
mtspr SPRN_TFHAR, r5
|
||||
mtspr SPRN_TFIAR, r6
|
||||
mtspr SPRN_TEXASR, r7
|
||||
|
||||
ld r5, VCPU_MSR(r4)
|
||||
rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
|
||||
beq skip_tm /* TM not active in guest */
|
||||
|
||||
/* Make sure the failure summary is set, otherwise we'll program check
|
||||
* when we trechkpt. It's possible that this might have been not set
|
||||
* on a kvmppc_set_one_reg() call but we shouldn't let this crash the
|
||||
* host.
|
||||
*/
|
||||
oris r7, r7, (TEXASR_FS)@h
|
||||
mtspr SPRN_TEXASR, r7
|
||||
|
||||
/*
|
||||
* We need to load up the checkpointed state for the guest.
|
||||
* We need to do this early as it will blow away any GPRs, VSRs and
|
||||
* some SPRs.
|
||||
*/
|
||||
|
||||
mr r31, r4
|
||||
addi r3, r31, VCPU_FPRS_TM
|
||||
bl load_fp_state
|
||||
addi r3, r31, VCPU_VRS_TM
|
||||
bl load_vr_state
|
||||
mr r4, r31
|
||||
lwz r7, VCPU_VRSAVE_TM(r4)
|
||||
mtspr SPRN_VRSAVE, r7
|
||||
|
||||
ld r5, VCPU_LR_TM(r4)
|
||||
lwz r6, VCPU_CR_TM(r4)
|
||||
ld r7, VCPU_CTR_TM(r4)
|
||||
ld r8, VCPU_AMR_TM(r4)
|
||||
ld r9, VCPU_TAR_TM(r4)
|
||||
mtlr r5
|
||||
mtcr r6
|
||||
mtctr r7
|
||||
mtspr SPRN_AMR, r8
|
||||
mtspr SPRN_TAR, r9
|
||||
|
||||
/*
|
||||
* Load up PPR and DSCR values but don't put them in the actual SPRs
|
||||
* till the last moment to avoid running with userspace PPR and DSCR for
|
||||
* too long.
|
||||
*/
|
||||
ld r29, VCPU_DSCR_TM(r4)
|
||||
ld r30, VCPU_PPR_TM(r4)
|
||||
|
||||
std r2, PACATMSCRATCH(r13) /* Save TOC */
|
||||
|
||||
/* Clear the MSR RI since r1, r13 are all going to be foobar. */
|
||||
li r5, 0
|
||||
mtmsrd r5, 1
|
||||
|
||||
/* Load GPRs r0-r28 */
|
||||
reg = 0
|
||||
.rept 29
|
||||
ld reg, VCPU_GPRS_TM(reg)(r31)
|
||||
reg = reg + 1
|
||||
.endr
|
||||
|
||||
mtspr SPRN_DSCR, r29
|
||||
mtspr SPRN_PPR, r30
|
||||
|
||||
/* Load final GPRs */
|
||||
ld 29, VCPU_GPRS_TM(29)(r31)
|
||||
ld 30, VCPU_GPRS_TM(30)(r31)
|
||||
ld 31, VCPU_GPRS_TM(31)(r31)
|
||||
|
||||
/* TM checkpointed state is now setup. All GPRs are now volatile. */
|
||||
TRECHKPT
|
||||
|
||||
/* Now let's get back the state we need. */
|
||||
HMT_MEDIUM
|
||||
GET_PACA(r13)
|
||||
ld r29, HSTATE_DSCR(r13)
|
||||
mtspr SPRN_DSCR, r29
|
||||
ld r4, HSTATE_KVM_VCPU(r13)
|
||||
ld r1, HSTATE_HOST_R1(r13)
|
||||
ld r2, PACATMSCRATCH(r13)
|
||||
|
||||
/* Set the MSR RI since we have our registers back. */
|
||||
li r5, MSR_RI
|
||||
mtmsrd r5, 1
|
||||
skip_tm:
|
||||
bl kvmppc_restore_tm
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
||||
#endif
|
||||
|
||||
/* Load guest PMU registers */
|
||||
|
@ -841,12 +737,6 @@ BEGIN_FTR_SECTION
|
|||
/* Skip next section on POWER7 */
|
||||
b 8f
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
||||
/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
|
||||
mfmsr r8
|
||||
li r0, 1
|
||||
rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
|
||||
mtmsrd r8
|
||||
|
||||
/* Load up POWER8-specific registers */
|
||||
ld r5, VCPU_IAMR(r4)
|
||||
lwz r6, VCPU_PSPB(r4)
|
||||
|
@ -1436,106 +1326,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
|||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
BEGIN_FTR_SECTION
|
||||
b 2f
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_TM)
|
||||
/* Turn on TM. */
|
||||
mfmsr r8
|
||||
li r0, 1
|
||||
rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
|
||||
mtmsrd r8
|
||||
|
||||
ld r5, VCPU_MSR(r9)
|
||||
rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
|
||||
beq 1f /* TM not active in guest. */
|
||||
|
||||
li r3, TM_CAUSE_KVM_RESCHED
|
||||
|
||||
/* Clear the MSR RI since r1, r13 are all going to be foobar. */
|
||||
li r5, 0
|
||||
mtmsrd r5, 1
|
||||
|
||||
/* All GPRs are volatile at this point. */
|
||||
TRECLAIM(R3)
|
||||
|
||||
/* Temporarily store r13 and r9 so we have some regs to play with */
|
||||
SET_SCRATCH0(r13)
|
||||
GET_PACA(r13)
|
||||
std r9, PACATMSCRATCH(r13)
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
|
||||
/* Get a few more GPRs free. */
|
||||
std r29, VCPU_GPRS_TM(29)(r9)
|
||||
std r30, VCPU_GPRS_TM(30)(r9)
|
||||
std r31, VCPU_GPRS_TM(31)(r9)
|
||||
|
||||
/* Save away PPR and DSCR soon so don't run with user values. */
|
||||
mfspr r31, SPRN_PPR
|
||||
HMT_MEDIUM
|
||||
mfspr r30, SPRN_DSCR
|
||||
ld r29, HSTATE_DSCR(r13)
|
||||
mtspr SPRN_DSCR, r29
|
||||
|
||||
/* Save all but r9, r13 & r29-r31 */
|
||||
reg = 0
|
||||
.rept 29
|
||||
.if (reg != 9) && (reg != 13)
|
||||
std reg, VCPU_GPRS_TM(reg)(r9)
|
||||
.endif
|
||||
reg = reg + 1
|
||||
.endr
|
||||
/* ... now save r13 */
|
||||
GET_SCRATCH0(r4)
|
||||
std r4, VCPU_GPRS_TM(13)(r9)
|
||||
/* ... and save r9 */
|
||||
ld r4, PACATMSCRATCH(r13)
|
||||
std r4, VCPU_GPRS_TM(9)(r9)
|
||||
|
||||
/* Reload stack pointer and TOC. */
|
||||
ld r1, HSTATE_HOST_R1(r13)
|
||||
ld r2, PACATOC(r13)
|
||||
|
||||
/* Set MSR RI now we have r1 and r13 back. */
|
||||
li r5, MSR_RI
|
||||
mtmsrd r5, 1
|
||||
|
||||
/* Save away checkpinted SPRs. */
|
||||
std r31, VCPU_PPR_TM(r9)
|
||||
std r30, VCPU_DSCR_TM(r9)
|
||||
mflr r5
|
||||
mfcr r6
|
||||
mfctr r7
|
||||
mfspr r8, SPRN_AMR
|
||||
mfspr r10, SPRN_TAR
|
||||
std r5, VCPU_LR_TM(r9)
|
||||
stw r6, VCPU_CR_TM(r9)
|
||||
std r7, VCPU_CTR_TM(r9)
|
||||
std r8, VCPU_AMR_TM(r9)
|
||||
std r10, VCPU_TAR_TM(r9)
|
||||
|
||||
/* Restore r12 as trap number. */
|
||||
lwz r12, VCPU_TRAP(r9)
|
||||
|
||||
/* Save FP/VSX. */
|
||||
addi r3, r9, VCPU_FPRS_TM
|
||||
bl store_fp_state
|
||||
addi r3, r9, VCPU_VRS_TM
|
||||
bl store_vr_state
|
||||
mfspr r6, SPRN_VRSAVE
|
||||
stw r6, VCPU_VRSAVE_TM(r9)
|
||||
1:
|
||||
/*
|
||||
* We need to save these SPRs after the treclaim so that the software
|
||||
* error code is recorded correctly in the TEXASR. Also the user may
|
||||
* change these outside of a transaction, so they must always be
|
||||
* context switched.
|
||||
*/
|
||||
mfspr r5, SPRN_TFHAR
|
||||
mfspr r6, SPRN_TFIAR
|
||||
mfspr r7, SPRN_TEXASR
|
||||
std r5, VCPU_TFHAR(r9)
|
||||
std r6, VCPU_TFIAR(r9)
|
||||
std r7, VCPU_TEXASR(r9)
|
||||
2:
|
||||
bl kvmppc_save_tm
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
||||
#endif
|
||||
|
||||
/* Increment yield count if they have a VPA */
|
||||
|
@ -2245,6 +2037,13 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
|
|||
/* save FP state */
|
||||
bl kvmppc_save_fp
|
||||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
BEGIN_FTR_SECTION
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
bl kvmppc_save_tm
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Set DEC to the smaller of DEC and HDEC, so that we wake
|
||||
* no later than the end of our timeslice (HDEC interrupts
|
||||
|
@ -2321,6 +2120,12 @@ kvm_end_cede:
|
|||
bl kvmhv_accumulate_time
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
BEGIN_FTR_SECTION
|
||||
bl kvmppc_restore_tm
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
||||
#endif
|
||||
|
||||
/* load up FP state */
|
||||
bl kvmppc_load_fp
|
||||
|
||||
|
@ -2629,6 +2434,239 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
|
|||
mr r4,r31
|
||||
blr
|
||||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
/*
|
||||
* Save transactional state and TM-related registers.
|
||||
* Called with r9 pointing to the vcpu struct.
|
||||
* This can modify all checkpointed registers, but
|
||||
* restores r1, r2 and r9 (vcpu pointer) before exit.
|
||||
*/
|
||||
kvmppc_save_tm:
|
||||
mflr r0
|
||||
std r0, PPC_LR_STKOFF(r1)
|
||||
|
||||
/* Turn on TM. */
|
||||
mfmsr r8
|
||||
li r0, 1
|
||||
rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
|
||||
mtmsrd r8
|
||||
|
||||
ld r5, VCPU_MSR(r9)
|
||||
rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
|
||||
beq 1f /* TM not active in guest. */
|
||||
|
||||
std r1, HSTATE_HOST_R1(r13)
|
||||
li r3, TM_CAUSE_KVM_RESCHED
|
||||
|
||||
/* Clear the MSR RI since r1, r13 are all going to be foobar. */
|
||||
li r5, 0
|
||||
mtmsrd r5, 1
|
||||
|
||||
/* All GPRs are volatile at this point. */
|
||||
TRECLAIM(R3)
|
||||
|
||||
/* Temporarily store r13 and r9 so we have some regs to play with */
|
||||
SET_SCRATCH0(r13)
|
||||
GET_PACA(r13)
|
||||
std r9, PACATMSCRATCH(r13)
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
|
||||
/* Get a few more GPRs free. */
|
||||
std r29, VCPU_GPRS_TM(29)(r9)
|
||||
std r30, VCPU_GPRS_TM(30)(r9)
|
||||
std r31, VCPU_GPRS_TM(31)(r9)
|
||||
|
||||
/* Save away PPR and DSCR soon so don't run with user values. */
|
||||
mfspr r31, SPRN_PPR
|
||||
HMT_MEDIUM
|
||||
mfspr r30, SPRN_DSCR
|
||||
ld r29, HSTATE_DSCR(r13)
|
||||
mtspr SPRN_DSCR, r29
|
||||
|
||||
/* Save all but r9, r13 & r29-r31 */
|
||||
reg = 0
|
||||
.rept 29
|
||||
.if (reg != 9) && (reg != 13)
|
||||
std reg, VCPU_GPRS_TM(reg)(r9)
|
||||
.endif
|
||||
reg = reg + 1
|
||||
.endr
|
||||
/* ... now save r13 */
|
||||
GET_SCRATCH0(r4)
|
||||
std r4, VCPU_GPRS_TM(13)(r9)
|
||||
/* ... and save r9 */
|
||||
ld r4, PACATMSCRATCH(r13)
|
||||
std r4, VCPU_GPRS_TM(9)(r9)
|
||||
|
||||
/* Reload stack pointer and TOC. */
|
||||
ld r1, HSTATE_HOST_R1(r13)
|
||||
ld r2, PACATOC(r13)
|
||||
|
||||
/* Set MSR RI now we have r1 and r13 back. */
|
||||
li r5, MSR_RI
|
||||
mtmsrd r5, 1
|
||||
|
||||
/* Save away checkpinted SPRs. */
|
||||
std r31, VCPU_PPR_TM(r9)
|
||||
std r30, VCPU_DSCR_TM(r9)
|
||||
mflr r5
|
||||
mfcr r6
|
||||
mfctr r7
|
||||
mfspr r8, SPRN_AMR
|
||||
mfspr r10, SPRN_TAR
|
||||
std r5, VCPU_LR_TM(r9)
|
||||
stw r6, VCPU_CR_TM(r9)
|
||||
std r7, VCPU_CTR_TM(r9)
|
||||
std r8, VCPU_AMR_TM(r9)
|
||||
std r10, VCPU_TAR_TM(r9)
|
||||
|
||||
/* Restore r12 as trap number. */
|
||||
lwz r12, VCPU_TRAP(r9)
|
||||
|
||||
/* Save FP/VSX. */
|
||||
addi r3, r9, VCPU_FPRS_TM
|
||||
bl store_fp_state
|
||||
addi r3, r9, VCPU_VRS_TM
|
||||
bl store_vr_state
|
||||
mfspr r6, SPRN_VRSAVE
|
||||
stw r6, VCPU_VRSAVE_TM(r9)
|
||||
1:
|
||||
/*
|
||||
* We need to save these SPRs after the treclaim so that the software
|
||||
* error code is recorded correctly in the TEXASR. Also the user may
|
||||
* change these outside of a transaction, so they must always be
|
||||
* context switched.
|
||||
*/
|
||||
mfspr r5, SPRN_TFHAR
|
||||
mfspr r6, SPRN_TFIAR
|
||||
mfspr r7, SPRN_TEXASR
|
||||
std r5, VCPU_TFHAR(r9)
|
||||
std r6, VCPU_TFIAR(r9)
|
||||
std r7, VCPU_TEXASR(r9)
|
||||
|
||||
ld r0, PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
blr
|
||||
|
||||
/*
|
||||
* Restore transactional state and TM-related registers.
|
||||
* Called with r4 pointing to the vcpu struct.
|
||||
* This potentially modifies all checkpointed registers.
|
||||
* It restores r1, r2, r4 from the PACA.
|
||||
*/
|
||||
kvmppc_restore_tm:
|
||||
mflr r0
|
||||
std r0, PPC_LR_STKOFF(r1)
|
||||
|
||||
/* Turn on TM/FP/VSX/VMX so we can restore them. */
|
||||
mfmsr r5
|
||||
li r6, MSR_TM >> 32
|
||||
sldi r6, r6, 32
|
||||
or r5, r5, r6
|
||||
ori r5, r5, MSR_FP
|
||||
oris r5, r5, (MSR_VEC | MSR_VSX)@h
|
||||
mtmsrd r5
|
||||
|
||||
/*
|
||||
* The user may change these outside of a transaction, so they must
|
||||
* always be context switched.
|
||||
*/
|
||||
ld r5, VCPU_TFHAR(r4)
|
||||
ld r6, VCPU_TFIAR(r4)
|
||||
ld r7, VCPU_TEXASR(r4)
|
||||
mtspr SPRN_TFHAR, r5
|
||||
mtspr SPRN_TFIAR, r6
|
||||
mtspr SPRN_TEXASR, r7
|
||||
|
||||
ld r5, VCPU_MSR(r4)
|
||||
rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
|
||||
beqlr /* TM not active in guest */
|
||||
std r1, HSTATE_HOST_R1(r13)
|
||||
|
||||
/* Make sure the failure summary is set, otherwise we'll program check
|
||||
* when we trechkpt. It's possible that this might have been not set
|
||||
* on a kvmppc_set_one_reg() call but we shouldn't let this crash the
|
||||
* host.
|
||||
*/
|
||||
oris r7, r7, (TEXASR_FS)@h
|
||||
mtspr SPRN_TEXASR, r7
|
||||
|
||||
/*
|
||||
* We need to load up the checkpointed state for the guest.
|
||||
* We need to do this early as it will blow away any GPRs, VSRs and
|
||||
* some SPRs.
|
||||
*/
|
||||
|
||||
mr r31, r4
|
||||
addi r3, r31, VCPU_FPRS_TM
|
||||
bl load_fp_state
|
||||
addi r3, r31, VCPU_VRS_TM
|
||||
bl load_vr_state
|
||||
mr r4, r31
|
||||
lwz r7, VCPU_VRSAVE_TM(r4)
|
||||
mtspr SPRN_VRSAVE, r7
|
||||
|
||||
ld r5, VCPU_LR_TM(r4)
|
||||
lwz r6, VCPU_CR_TM(r4)
|
||||
ld r7, VCPU_CTR_TM(r4)
|
||||
ld r8, VCPU_AMR_TM(r4)
|
||||
ld r9, VCPU_TAR_TM(r4)
|
||||
mtlr r5
|
||||
mtcr r6
|
||||
mtctr r7
|
||||
mtspr SPRN_AMR, r8
|
||||
mtspr SPRN_TAR, r9
|
||||
|
||||
/*
|
||||
* Load up PPR and DSCR values but don't put them in the actual SPRs
|
||||
* till the last moment to avoid running with userspace PPR and DSCR for
|
||||
* too long.
|
||||
*/
|
||||
ld r29, VCPU_DSCR_TM(r4)
|
||||
ld r30, VCPU_PPR_TM(r4)
|
||||
|
||||
std r2, PACATMSCRATCH(r13) /* Save TOC */
|
||||
|
||||
/* Clear the MSR RI since r1, r13 are all going to be foobar. */
|
||||
li r5, 0
|
||||
mtmsrd r5, 1
|
||||
|
||||
/* Load GPRs r0-r28 */
|
||||
reg = 0
|
||||
.rept 29
|
||||
ld reg, VCPU_GPRS_TM(reg)(r31)
|
||||
reg = reg + 1
|
||||
.endr
|
||||
|
||||
mtspr SPRN_DSCR, r29
|
||||
mtspr SPRN_PPR, r30
|
||||
|
||||
/* Load final GPRs */
|
||||
ld 29, VCPU_GPRS_TM(29)(r31)
|
||||
ld 30, VCPU_GPRS_TM(30)(r31)
|
||||
ld 31, VCPU_GPRS_TM(31)(r31)
|
||||
|
||||
/* TM checkpointed state is now setup. All GPRs are now volatile. */
|
||||
TRECHKPT
|
||||
|
||||
/* Now let's get back the state we need. */
|
||||
HMT_MEDIUM
|
||||
GET_PACA(r13)
|
||||
ld r29, HSTATE_DSCR(r13)
|
||||
mtspr SPRN_DSCR, r29
|
||||
ld r4, HSTATE_KVM_VCPU(r13)
|
||||
ld r1, HSTATE_HOST_R1(r13)
|
||||
ld r2, PACATMSCRATCH(r13)
|
||||
|
||||
/* Set the MSR RI since we have our registers back. */
|
||||
li r5, MSR_RI
|
||||
mtmsrd r5, 1
|
||||
|
||||
ld r0, PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
blr
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We come here if we get any exception or interrupt while we are
|
||||
* executing host real mode code while in guest MMU context.
|
||||
|
|
|
@ -117,6 +117,7 @@ config S390
|
|||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_EARLY_PFN_TO_NID
|
||||
select HAVE_ARCH_HARDENED_USERCOPY
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_SECCOMP_FILTER
|
||||
select HAVE_ARCH_SOFT_DIRTY
|
||||
|
|
|
@ -669,11 +669,13 @@ static const struct file_operations prng_tdes_fops = {
|
|||
static struct miscdevice prng_sha512_dev = {
|
||||
.name = "prandom",
|
||||
.minor = MISC_DYNAMIC_MINOR,
|
||||
.mode = 0644,
|
||||
.fops = &prng_sha512_fops,
|
||||
};
|
||||
static struct miscdevice prng_tdes_dev = {
|
||||
.name = "prandom",
|
||||
.minor = MISC_DYNAMIC_MINOR,
|
||||
.mode = 0644,
|
||||
.fops = &prng_tdes_fops,
|
||||
};
|
||||
|
||||
|
|
|
@ -23,6 +23,8 @@ enum zpci_ioat_dtype {
|
|||
#define ZPCI_IOTA_FS_2G 2
|
||||
#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5)
|
||||
|
||||
#define ZPCI_TABLE_SIZE_RT (1UL << 42)
|
||||
|
||||
#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
|
||||
#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
|
||||
#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS)
|
||||
|
|
|
@ -2070,13 +2070,6 @@ void s390_reset_system(void (*fn_pre)(void),
|
|||
S390_lowcore.program_new_psw.addr =
|
||||
PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
|
||||
|
||||
/*
|
||||
* Clear subchannel ID and number to signal new kernel that no CCW or
|
||||
* SCSI IPL has been done (for kexec and kdump)
|
||||
*/
|
||||
S390_lowcore.subchannel_id = 0;
|
||||
S390_lowcore.subchannel_nr = 0;
|
||||
|
||||
/* Store status at absolute zero */
|
||||
store_status();
|
||||
|
||||
|
|
|
@ -104,6 +104,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
|
|||
|
||||
unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
check_object_size(to, n, false);
|
||||
if (static_branch_likely(&have_mvcos))
|
||||
return copy_from_user_mvcos(to, from, n);
|
||||
return copy_from_user_mvcp(to, from, n);
|
||||
|
@ -177,6 +178,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
|
|||
|
||||
unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
check_object_size(from, n, true);
|
||||
if (static_branch_likely(&have_mvcos))
|
||||
return copy_to_user_mvcos(to, from, n);
|
||||
return copy_to_user_mvcs(to, from, n);
|
||||
|
|
|
@ -701,8 +701,7 @@ static int zpci_restore(struct device *dev)
|
|||
goto out;
|
||||
|
||||
zpci_map_resources(pdev);
|
||||
zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
|
||||
zdev->start_dma + zdev->iommu_size - 1,
|
||||
zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
|
||||
(u64) zdev->dma_table);
|
||||
|
||||
out:
|
||||
|
|
|
@ -458,7 +458,19 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
|
|||
goto out_clean;
|
||||
}
|
||||
|
||||
zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
|
||||
/*
|
||||
* Restrict the iommu bitmap size to the minimum of the following:
|
||||
* - main memory size
|
||||
* - 3-level pagetable address limit minus start_dma offset
|
||||
* - DMA address range allowed by the hardware (clp query pci fn)
|
||||
*
|
||||
* Also set zdev->end_dma to the actual end address of the usable
|
||||
* range, instead of the theoretical maximum as reported by hardware.
|
||||
*/
|
||||
zdev->iommu_size = min3((u64) high_memory,
|
||||
ZPCI_TABLE_SIZE_RT - zdev->start_dma,
|
||||
zdev->end_dma - zdev->start_dma + 1);
|
||||
zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
|
||||
zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
|
||||
zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
|
||||
if (!zdev->iommu_bitmap) {
|
||||
|
@ -466,10 +478,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
|
|||
goto out_reg;
|
||||
}
|
||||
|
||||
rc = zpci_register_ioat(zdev,
|
||||
0,
|
||||
zdev->start_dma + PAGE_OFFSET,
|
||||
zdev->start_dma + zdev->iommu_size - 1,
|
||||
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
|
||||
(u64) zdev->dma_table);
|
||||
if (rc)
|
||||
goto out_reg;
|
||||
|
|
|
@ -43,6 +43,7 @@ config SPARC
|
|||
select ODD_RT_SIGACTION
|
||||
select OLD_SIGSUSPEND
|
||||
select ARCH_HAS_SG_CHAIN
|
||||
select HAVE_ARCH_HARDENED_USERCOPY
|
||||
|
||||
config SPARC32
|
||||
def_bool !64BIT
|
||||
|
|
|
@ -313,22 +313,28 @@ unsigned long __copy_user(void __user *to, const void __user *from, unsigned lon
|
|||
|
||||
static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
if (n && __access_ok((unsigned long) to, n))
|
||||
if (n && __access_ok((unsigned long) to, n)) {
|
||||
if (!__builtin_constant_p(n))
|
||||
check_object_size(from, n, true);
|
||||
return __copy_user(to, (__force void __user *) from, n);
|
||||
else
|
||||
} else
|
||||
return n;
|
||||
}
|
||||
|
||||
static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
if (!__builtin_constant_p(n))
|
||||
check_object_size(from, n, true);
|
||||
return __copy_user(to, (__force void __user *) from, n);
|
||||
}
|
||||
|
||||
static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
if (n && __access_ok((unsigned long) from, n))
|
||||
if (n && __access_ok((unsigned long) from, n)) {
|
||||
if (!__builtin_constant_p(n))
|
||||
check_object_size(to, n, false);
|
||||
return __copy_user((__force void __user *) to, from, n);
|
||||
else
|
||||
} else
|
||||
return n;
|
||||
}
|
||||
|
||||
|
|
|
@ -250,8 +250,12 @@ unsigned long copy_from_user_fixup(void *to, const void __user *from,
|
|||
static inline unsigned long __must_check
|
||||
copy_from_user(void *to, const void __user *from, unsigned long size)
|
||||
{
|
||||
unsigned long ret = ___copy_from_user(to, from, size);
|
||||
unsigned long ret;
|
||||
|
||||
if (!__builtin_constant_p(size))
|
||||
check_object_size(to, size, false);
|
||||
|
||||
ret = ___copy_from_user(to, from, size);
|
||||
if (unlikely(ret))
|
||||
ret = copy_from_user_fixup(to, from, size);
|
||||
|
||||
|
@ -267,8 +271,11 @@ unsigned long copy_to_user_fixup(void __user *to, const void *from,
|
|||
static inline unsigned long __must_check
|
||||
copy_to_user(void __user *to, const void *from, unsigned long size)
|
||||
{
|
||||
unsigned long ret = ___copy_to_user(to, from, size);
|
||||
unsigned long ret;
|
||||
|
||||
if (!__builtin_constant_p(size))
|
||||
check_object_size(from, size, true);
|
||||
ret = ___copy_to_user(to, from, size);
|
||||
if (unlikely(ret))
|
||||
ret = copy_to_user_fixup(to, from, size);
|
||||
return ret;
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
.altinstr_replacement : { *(.altinstr_replacement) }
|
||||
/* .exit.text is discard at runtime, not link time, to deal with references
|
||||
from .altinstructions and .eh_frame */
|
||||
.exit.text : { *(.exit.text) }
|
||||
.exit.text : { EXIT_TEXT }
|
||||
.exit.data : { *(.exit.data) }
|
||||
|
||||
.preinit_array : {
|
||||
|
|
|
@ -80,6 +80,7 @@ config X86
|
|||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||
select HAVE_AOUT if X86_32
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_HARDENED_USERCOPY
|
||||
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
|
||||
|
@ -89,7 +90,7 @@ config X86
|
|||
select HAVE_ARCH_SOFT_DIRTY if X86_64
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
select HAVE_BPF_JIT if X86_64
|
||||
select HAVE_ARCH_WITHIN_STACK_FRAMES
|
||||
select HAVE_CC_STACKPROTECTOR
|
||||
select HAVE_CMPXCHG_DOUBLE
|
||||
select HAVE_CMPXCHG_LOCAL
|
||||
|
|
|
@ -294,7 +294,7 @@
|
|||
# 285 sys_setaltroot
|
||||
286 i386 add_key sys_add_key
|
||||
287 i386 request_key sys_request_key
|
||||
288 i386 keyctl sys_keyctl
|
||||
288 i386 keyctl sys_keyctl compat_sys_keyctl
|
||||
289 i386 ioprio_set sys_ioprio_set
|
||||
290 i386 ioprio_get sys_ioprio_get
|
||||
291 i386 inotify_init sys_inotify_init
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#define _ASM_X86_MTRR_H
|
||||
|
||||
#include <uapi/asm/mtrr.h>
|
||||
#include <asm/pat.h>
|
||||
|
||||
|
||||
/*
|
||||
|
@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
|
|||
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
|
||||
{
|
||||
}
|
||||
static inline void mtrr_bp_init(void)
|
||||
{
|
||||
pat_disable("MTRRs disabled, skipping PAT initialization too.");
|
||||
}
|
||||
|
||||
#define mtrr_ap_init() do {} while (0)
|
||||
#define mtrr_bp_init() do {} while (0)
|
||||
#define set_mtrr_aps_delayed_init() do {} while (0)
|
||||
#define mtrr_aps_init() do {} while (0)
|
||||
#define mtrr_bp_restore() do {} while (0)
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
#include <asm/pgtable_types.h>
|
||||
|
||||
bool pat_enabled(void);
|
||||
void pat_disable(const char *reason);
|
||||
extern void pat_init(void);
|
||||
void pat_init_cache_modes(u64);
|
||||
|
||||
extern int reserve_memtype(u64 start, u64 end,
|
||||
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
|
||||
|
|
|
@ -76,6 +76,8 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
|
|||
u8 ret_flags;
|
||||
|
||||
version = src->version;
|
||||
/* Make the latest version visible */
|
||||
smp_rmb();
|
||||
|
||||
offset = pvclock_get_nsec_offset(src);
|
||||
ret = src->system_time + offset;
|
||||
|
|
|
@ -177,6 +177,50 @@ static inline unsigned long current_stack_pointer(void)
|
|||
return sp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walks up the stack frames to make sure that the specified object is
|
||||
* entirely contained by a single stack frame.
|
||||
*
|
||||
* Returns:
|
||||
* 1 if within a frame
|
||||
* -1 if placed across a frame boundary (or outside stack)
|
||||
* 0 unable to determine (no frame pointers, etc)
|
||||
*/
|
||||
static inline int arch_within_stack_frames(const void * const stack,
|
||||
const void * const stackend,
|
||||
const void *obj, unsigned long len)
|
||||
{
|
||||
#if defined(CONFIG_FRAME_POINTER)
|
||||
const void *frame = NULL;
|
||||
const void *oldframe;
|
||||
|
||||
oldframe = __builtin_frame_address(1);
|
||||
if (oldframe)
|
||||
frame = __builtin_frame_address(2);
|
||||
/*
|
||||
* low ----------------------------------------------> high
|
||||
* [saved bp][saved ip][args][local vars][saved bp][saved ip]
|
||||
* ^----------------^
|
||||
* allow copies only within here
|
||||
*/
|
||||
while (stack <= frame && frame < stackend) {
|
||||
/*
|
||||
* If obj + len extends past the last frame, this
|
||||
* check won't pass and the next frame will be 0,
|
||||
* causing us to bail out and correctly report
|
||||
* the copy as invalid.
|
||||
*/
|
||||
if (obj + len <= frame)
|
||||
return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
|
||||
oldframe = frame;
|
||||
frame = *(const void * const *)frame;
|
||||
}
|
||||
return -1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
|
|
@ -86,7 +86,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
|
|||
|
||||
static inline void __native_flush_tlb(void)
|
||||
{
|
||||
/*
|
||||
* If current->mm == NULL then we borrow a mm which may change during a
|
||||
* task switch and therefore we must not be preempted while we write CR3
|
||||
* back:
|
||||
*/
|
||||
preempt_disable();
|
||||
native_write_cr3(native_read_cr3());
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void __native_flush_tlb_global_irq_disabled(void)
|
||||
|
|
|
@ -134,6 +134,9 @@ extern int __get_user_4(void);
|
|||
extern int __get_user_8(void);
|
||||
extern int __get_user_bad(void);
|
||||
|
||||
#define __uaccess_begin() stac()
|
||||
#define __uaccess_end() clac()
|
||||
|
||||
/*
|
||||
* This is a type: either unsigned long, if the argument fits into
|
||||
* that type, or otherwise unsigned long long.
|
||||
|
@ -193,10 +196,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
|
|||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define __put_user_asm_u64(x, addr, err, errret) \
|
||||
asm volatile(ASM_STAC "\n" \
|
||||
asm volatile("\n" \
|
||||
"1: movl %%eax,0(%2)\n" \
|
||||
"2: movl %%edx,4(%2)\n" \
|
||||
"3: " ASM_CLAC "\n" \
|
||||
"3:" \
|
||||
".section .fixup,\"ax\"\n" \
|
||||
"4: movl %3,%0\n" \
|
||||
" jmp 3b\n" \
|
||||
|
@ -207,10 +210,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
|
|||
: "A" (x), "r" (addr), "i" (errret), "0" (err))
|
||||
|
||||
#define __put_user_asm_ex_u64(x, addr) \
|
||||
asm volatile(ASM_STAC "\n" \
|
||||
asm volatile("\n" \
|
||||
"1: movl %%eax,0(%1)\n" \
|
||||
"2: movl %%edx,4(%1)\n" \
|
||||
"3: " ASM_CLAC "\n" \
|
||||
"3:" \
|
||||
_ASM_EXTABLE_EX(1b, 2b) \
|
||||
_ASM_EXTABLE_EX(2b, 3b) \
|
||||
: : "A" (x), "r" (addr))
|
||||
|
@ -304,6 +307,10 @@ do { \
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* This doesn't do __uaccess_begin/end - the exception handling
|
||||
* around it must do that.
|
||||
*/
|
||||
#define __put_user_size_ex(x, ptr, size) \
|
||||
do { \
|
||||
__chk_user_ptr(ptr); \
|
||||
|
@ -358,9 +365,9 @@ do { \
|
|||
} while (0)
|
||||
|
||||
#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
|
||||
asm volatile(ASM_STAC "\n" \
|
||||
asm volatile("\n" \
|
||||
"1: mov"itype" %2,%"rtype"1\n" \
|
||||
"2: " ASM_CLAC "\n" \
|
||||
"2:\n" \
|
||||
".section .fixup,\"ax\"\n" \
|
||||
"3: mov %3,%0\n" \
|
||||
" xor"itype" %"rtype"1,%"rtype"1\n" \
|
||||
|
@ -370,6 +377,10 @@ do { \
|
|||
: "=r" (err), ltype(x) \
|
||||
: "m" (__m(addr)), "i" (errret), "0" (err))
|
||||
|
||||
/*
|
||||
* This doesn't do __uaccess_begin/end - the exception handling
|
||||
* around it must do that.
|
||||
*/
|
||||
#define __get_user_size_ex(x, ptr, size) \
|
||||
do { \
|
||||
__chk_user_ptr(ptr); \
|
||||
|
@ -400,7 +411,9 @@ do { \
|
|||
#define __put_user_nocheck(x, ptr, size) \
|
||||
({ \
|
||||
int __pu_err; \
|
||||
__uaccess_begin(); \
|
||||
__put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
|
||||
__uaccess_end(); \
|
||||
__builtin_expect(__pu_err, 0); \
|
||||
})
|
||||
|
||||
|
@ -408,7 +421,9 @@ do { \
|
|||
({ \
|
||||
int __gu_err; \
|
||||
unsigned long __gu_val; \
|
||||
__uaccess_begin(); \
|
||||
__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
|
||||
__uaccess_end(); \
|
||||
(x) = (__force __typeof__(*(ptr)))__gu_val; \
|
||||
__builtin_expect(__gu_err, 0); \
|
||||
})
|
||||
|
@ -423,9 +438,9 @@ struct __large_struct { unsigned long buf[100]; };
|
|||
* aliasing issues.
|
||||
*/
|
||||
#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \
|
||||
asm volatile(ASM_STAC "\n" \
|
||||
asm volatile("\n" \
|
||||
"1: mov"itype" %"rtype"1,%2\n" \
|
||||
"2: " ASM_CLAC "\n" \
|
||||
"2:\n" \
|
||||
".section .fixup,\"ax\"\n" \
|
||||
"3: mov %3,%0\n" \
|
||||
" jmp 2b\n" \
|
||||
|
@ -445,11 +460,11 @@ struct __large_struct { unsigned long buf[100]; };
|
|||
*/
|
||||
#define uaccess_try do { \
|
||||
current_thread_info()->uaccess_err = 0; \
|
||||
stac(); \
|
||||
__uaccess_begin(); \
|
||||
barrier();
|
||||
|
||||
#define uaccess_catch(err) \
|
||||
clac(); \
|
||||
__uaccess_end(); \
|
||||
(err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
|
||||
} while (0)
|
||||
|
||||
|
@ -547,12 +562,13 @@ extern void __cmpxchg_wrong_size(void)
|
|||
__typeof__(ptr) __uval = (uval); \
|
||||
__typeof__(*(ptr)) __old = (old); \
|
||||
__typeof__(*(ptr)) __new = (new); \
|
||||
__uaccess_begin(); \
|
||||
switch (size) { \
|
||||
case 1: \
|
||||
{ \
|
||||
asm volatile("\t" ASM_STAC "\n" \
|
||||
asm volatile("\n" \
|
||||
"1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \
|
||||
"2:\t" ASM_CLAC "\n" \
|
||||
"2:\n" \
|
||||
"\t.section .fixup, \"ax\"\n" \
|
||||
"3:\tmov %3, %0\n" \
|
||||
"\tjmp 2b\n" \
|
||||
|
@ -566,9 +582,9 @@ extern void __cmpxchg_wrong_size(void)
|
|||
} \
|
||||
case 2: \
|
||||
{ \
|
||||
asm volatile("\t" ASM_STAC "\n" \
|
||||
asm volatile("\n" \
|
||||
"1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \
|
||||
"2:\t" ASM_CLAC "\n" \
|
||||
"2:\n" \
|
||||
"\t.section .fixup, \"ax\"\n" \
|
||||
"3:\tmov %3, %0\n" \
|
||||
"\tjmp 2b\n" \
|
||||
|
@ -582,9 +598,9 @@ extern void __cmpxchg_wrong_size(void)
|
|||
} \
|
||||
case 4: \
|
||||
{ \
|
||||
asm volatile("\t" ASM_STAC "\n" \
|
||||
asm volatile("\n" \
|
||||
"1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \
|
||||
"2:\t" ASM_CLAC "\n" \
|
||||
"2:\n" \
|
||||
"\t.section .fixup, \"ax\"\n" \
|
||||
"3:\tmov %3, %0\n" \
|
||||
"\tjmp 2b\n" \
|
||||
|
@ -601,9 +617,9 @@ extern void __cmpxchg_wrong_size(void)
|
|||
if (!IS_ENABLED(CONFIG_X86_64)) \
|
||||
__cmpxchg_wrong_size(); \
|
||||
\
|
||||
asm volatile("\t" ASM_STAC "\n" \
|
||||
asm volatile("\n" \
|
||||
"1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \
|
||||
"2:\t" ASM_CLAC "\n" \
|
||||
"2:\n" \
|
||||
"\t.section .fixup, \"ax\"\n" \
|
||||
"3:\tmov %3, %0\n" \
|
||||
"\tjmp 2b\n" \
|
||||
|
@ -618,6 +634,7 @@ extern void __cmpxchg_wrong_size(void)
|
|||
default: \
|
||||
__cmpxchg_wrong_size(); \
|
||||
} \
|
||||
__uaccess_end(); \
|
||||
*__uval = __old; \
|
||||
__ret; \
|
||||
})
|
||||
|
@ -689,7 +706,7 @@ __copy_from_user_overflow(int size, unsigned long count)
|
|||
|
||||
#endif
|
||||
|
||||
static inline unsigned long __must_check
|
||||
static __always_inline unsigned long __must_check
|
||||
copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
int sz = __compiletime_object_size(to);
|
||||
|
@ -714,9 +731,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
|
|||
* case, and do only runtime checking for non-constant sizes.
|
||||
*/
|
||||
|
||||
if (likely(sz < 0 || sz >= n))
|
||||
if (likely(sz < 0 || sz >= n)) {
|
||||
check_object_size(to, n, false);
|
||||
n = _copy_from_user(to, from, n);
|
||||
else if(__builtin_constant_p(n))
|
||||
} else if (__builtin_constant_p(n))
|
||||
copy_from_user_overflow();
|
||||
else
|
||||
__copy_from_user_overflow(sz, n);
|
||||
|
@ -724,7 +742,7 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
|
|||
return n;
|
||||
}
|
||||
|
||||
static inline unsigned long __must_check
|
||||
static __always_inline unsigned long __must_check
|
||||
copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
int sz = __compiletime_object_size(from);
|
||||
|
@ -732,9 +750,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
|
|||
might_fault();
|
||||
|
||||
/* See the comment in copy_from_user() above. */
|
||||
if (likely(sz < 0 || sz >= n))
|
||||
if (likely(sz < 0 || sz >= n)) {
|
||||
check_object_size(from, n, true);
|
||||
n = _copy_to_user(to, from, n);
|
||||
else if(__builtin_constant_p(n))
|
||||
} else if (__builtin_constant_p(n))
|
||||
copy_to_user_overflow();
|
||||
else
|
||||
__copy_to_user_overflow(sz, n);
|
||||
|
@ -745,5 +764,30 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
|
|||
#undef __copy_from_user_overflow
|
||||
#undef __copy_to_user_overflow
|
||||
|
||||
/*
|
||||
* The "unsafe" user accesses aren't really "unsafe", but the naming
|
||||
* is a big fat warning: you have to not only do the access_ok()
|
||||
* checking before using them, but you have to surround them with the
|
||||
* user_access_begin/end() pair.
|
||||
*/
|
||||
#define user_access_begin() __uaccess_begin()
|
||||
#define user_access_end() __uaccess_end()
|
||||
|
||||
#define unsafe_put_user(x, ptr, err_label) \
|
||||
do { \
|
||||
int __pu_err; \
|
||||
__put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \
|
||||
if (unlikely(__pu_err)) goto err_label; \
|
||||
} while (0)
|
||||
|
||||
#define unsafe_get_user(x, ptr, err_label) \
|
||||
do { \
|
||||
int __gu_err; \
|
||||
unsigned long __gu_val; \
|
||||
__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \
|
||||
(x) = (__force __typeof__(*(ptr)))__gu_val; \
|
||||
if (unlikely(__gu_err)) goto err_label; \
|
||||
} while (0)
|
||||
|
||||
#endif /* _ASM_X86_UACCESS_H */
|
||||
|
||||
|
|
|
@ -33,38 +33,11 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
|
|||
* the specified block with access_ok() before calling this function.
|
||||
* The caller should also make sure he pins the user space address
|
||||
* so that we don't result in page fault and sleep.
|
||||
*
|
||||
* Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault
|
||||
* we return the initial request size (1, 2 or 4), as copy_*_user should do.
|
||||
* If a store crosses a page boundary and gets a fault, the x86 will not write
|
||||
* anything, so this is accurate.
|
||||
*/
|
||||
|
||||
static __always_inline unsigned long __must_check
|
||||
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
if (__builtin_constant_p(n)) {
|
||||
unsigned long ret;
|
||||
|
||||
switch (n) {
|
||||
case 1:
|
||||
__put_user_size(*(u8 *)from, (u8 __user *)to,
|
||||
1, ret, 1);
|
||||
return ret;
|
||||
case 2:
|
||||
__put_user_size(*(u16 *)from, (u16 __user *)to,
|
||||
2, ret, 2);
|
||||
return ret;
|
||||
case 4:
|
||||
__put_user_size(*(u32 *)from, (u32 __user *)to,
|
||||
4, ret, 4);
|
||||
return ret;
|
||||
case 8:
|
||||
__put_user_size(*(u64 *)from, (u64 __user *)to,
|
||||
8, ret, 8);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
check_object_size(from, n, true);
|
||||
return __copy_to_user_ll(to, from, n);
|
||||
}
|
||||
|
||||
|
@ -93,26 +66,6 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
|
|||
static __always_inline unsigned long
|
||||
__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
/* Avoid zeroing the tail if the copy fails..
|
||||
* If 'n' is constant and 1, 2, or 4, we do still zero on a failure,
|
||||
* but as the zeroing behaviour is only significant when n is not
|
||||
* constant, that shouldn't be a problem.
|
||||
*/
|
||||
if (__builtin_constant_p(n)) {
|
||||
unsigned long ret;
|
||||
|
||||
switch (n) {
|
||||
case 1:
|
||||
__get_user_size(*(u8 *)to, from, 1, ret, 1);
|
||||
return ret;
|
||||
case 2:
|
||||
__get_user_size(*(u16 *)to, from, 2, ret, 2);
|
||||
return ret;
|
||||
case 4:
|
||||
__get_user_size(*(u32 *)to, from, 4, ret, 4);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return __copy_from_user_ll_nozero(to, from, n);
|
||||
}
|
||||
|
||||
|
@ -143,18 +96,25 @@ static __always_inline unsigned long
|
|||
__copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
might_fault();
|
||||
check_object_size(to, n, false);
|
||||
if (__builtin_constant_p(n)) {
|
||||
unsigned long ret;
|
||||
|
||||
switch (n) {
|
||||
case 1:
|
||||
__uaccess_begin();
|
||||
__get_user_size(*(u8 *)to, from, 1, ret, 1);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 2:
|
||||
__uaccess_begin();
|
||||
__get_user_size(*(u16 *)to, from, 2, ret, 2);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 4:
|
||||
__uaccess_begin();
|
||||
__get_user_size(*(u32 *)to, from, 4, ret, 4);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
@ -170,13 +130,19 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
|
|||
|
||||
switch (n) {
|
||||
case 1:
|
||||
__uaccess_begin();
|
||||
__get_user_size(*(u8 *)to, from, 1, ret, 1);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 2:
|
||||
__uaccess_begin();
|
||||
__get_user_size(*(u16 *)to, from, 2, ret, 2);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 4:
|
||||
__uaccess_begin();
|
||||
__get_user_size(*(u32 *)to, from, 4, ret, 4);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,38 +53,53 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
|
|||
{
|
||||
int ret = 0;
|
||||
|
||||
check_object_size(dst, size, false);
|
||||
if (!__builtin_constant_p(size))
|
||||
return copy_user_generic(dst, (__force void *)src, size);
|
||||
switch (size) {
|
||||
case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src,
|
||||
case 1:
|
||||
__uaccess_begin();
|
||||
__get_user_asm(*(u8 *)dst, (u8 __user *)src,
|
||||
ret, "b", "b", "=q", 1);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 2:__get_user_asm(*(u16 *)dst, (u16 __user *)src,
|
||||
case 2:
|
||||
__uaccess_begin();
|
||||
__get_user_asm(*(u16 *)dst, (u16 __user *)src,
|
||||
ret, "w", "w", "=r", 2);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 4:__get_user_asm(*(u32 *)dst, (u32 __user *)src,
|
||||
case 4:
|
||||
__uaccess_begin();
|
||||
__get_user_asm(*(u32 *)dst, (u32 __user *)src,
|
||||
ret, "l", "k", "=r", 4);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 8:__get_user_asm(*(u64 *)dst, (u64 __user *)src,
|
||||
case 8:
|
||||
__uaccess_begin();
|
||||
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
|
||||
ret, "q", "", "=r", 8);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 10:
|
||||
__uaccess_begin();
|
||||
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
|
||||
ret, "q", "", "=r", 10);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
__get_user_asm(*(u16 *)(8 + (char *)dst),
|
||||
(u16 __user *)(8 + (char __user *)src),
|
||||
ret, "w", "w", "=r", 2);
|
||||
if (likely(!ret))
|
||||
__get_user_asm(*(u16 *)(8 + (char *)dst),
|
||||
(u16 __user *)(8 + (char __user *)src),
|
||||
ret, "w", "w", "=r", 2);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 16:
|
||||
__uaccess_begin();
|
||||
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
|
||||
ret, "q", "", "=r", 16);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
__get_user_asm(*(u64 *)(8 + (char *)dst),
|
||||
(u64 __user *)(8 + (char __user *)src),
|
||||
ret, "q", "", "=r", 8);
|
||||
if (likely(!ret))
|
||||
__get_user_asm(*(u64 *)(8 + (char *)dst),
|
||||
(u64 __user *)(8 + (char __user *)src),
|
||||
ret, "q", "", "=r", 8);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
default:
|
||||
return copy_user_generic(dst, (__force void *)src, size);
|
||||
|
@ -103,38 +118,55 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
|
|||
{
|
||||
int ret = 0;
|
||||
|
||||
check_object_size(src, size, true);
|
||||
if (!__builtin_constant_p(size))
|
||||
return copy_user_generic((__force void *)dst, src, size);
|
||||
switch (size) {
|
||||
case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst,
|
||||
case 1:
|
||||
__uaccess_begin();
|
||||
__put_user_asm(*(u8 *)src, (u8 __user *)dst,
|
||||
ret, "b", "b", "iq", 1);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 2:__put_user_asm(*(u16 *)src, (u16 __user *)dst,
|
||||
case 2:
|
||||
__uaccess_begin();
|
||||
__put_user_asm(*(u16 *)src, (u16 __user *)dst,
|
||||
ret, "w", "w", "ir", 2);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 4:__put_user_asm(*(u32 *)src, (u32 __user *)dst,
|
||||
case 4:
|
||||
__uaccess_begin();
|
||||
__put_user_asm(*(u32 *)src, (u32 __user *)dst,
|
||||
ret, "l", "k", "ir", 4);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst,
|
||||
case 8:
|
||||
__uaccess_begin();
|
||||
__put_user_asm(*(u64 *)src, (u64 __user *)dst,
|
||||
ret, "q", "", "er", 8);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 10:
|
||||
__uaccess_begin();
|
||||
__put_user_asm(*(u64 *)src, (u64 __user *)dst,
|
||||
ret, "q", "", "er", 10);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
asm("":::"memory");
|
||||
__put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
|
||||
ret, "w", "w", "ir", 2);
|
||||
if (likely(!ret)) {
|
||||
asm("":::"memory");
|
||||
__put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
|
||||
ret, "w", "w", "ir", 2);
|
||||
}
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
case 16:
|
||||
__uaccess_begin();
|
||||
__put_user_asm(*(u64 *)src, (u64 __user *)dst,
|
||||
ret, "q", "", "er", 16);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
asm("":::"memory");
|
||||
__put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
|
||||
ret, "q", "", "er", 8);
|
||||
if (likely(!ret)) {
|
||||
asm("":::"memory");
|
||||
__put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
|
||||
ret, "q", "", "er", 8);
|
||||
}
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
default:
|
||||
return copy_user_generic((__force void *)dst, src, size);
|
||||
|
@ -160,39 +192,47 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
|
|||
switch (size) {
|
||||
case 1: {
|
||||
u8 tmp;
|
||||
__uaccess_begin();
|
||||
__get_user_asm(tmp, (u8 __user *)src,
|
||||
ret, "b", "b", "=q", 1);
|
||||
if (likely(!ret))
|
||||
__put_user_asm(tmp, (u8 __user *)dst,
|
||||
ret, "b", "b", "iq", 1);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
}
|
||||
case 2: {
|
||||
u16 tmp;
|
||||
__uaccess_begin();
|
||||
__get_user_asm(tmp, (u16 __user *)src,
|
||||
ret, "w", "w", "=r", 2);
|
||||
if (likely(!ret))
|
||||
__put_user_asm(tmp, (u16 __user *)dst,
|
||||
ret, "w", "w", "ir", 2);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
}
|
||||
|
||||
case 4: {
|
||||
u32 tmp;
|
||||
__uaccess_begin();
|
||||
__get_user_asm(tmp, (u32 __user *)src,
|
||||
ret, "l", "k", "=r", 4);
|
||||
if (likely(!ret))
|
||||
__put_user_asm(tmp, (u32 __user *)dst,
|
||||
ret, "l", "k", "ir", 4);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
}
|
||||
case 8: {
|
||||
u64 tmp;
|
||||
__uaccess_begin();
|
||||
__get_user_asm(tmp, (u64 __user *)src,
|
||||
ret, "q", "", "=r", 8);
|
||||
if (likely(!ret))
|
||||
__put_user_asm(tmp, (u64 __user *)dst,
|
||||
ret, "q", "", "er", 8);
|
||||
__uaccess_end();
|
||||
return ret;
|
||||
}
|
||||
default:
|
||||
|
|
|
@ -1587,6 +1587,9 @@ void __init enable_IR_x2apic(void)
|
|||
unsigned long flags;
|
||||
int ret, ir_stat;
|
||||
|
||||
if (skip_ioapic_setup)
|
||||
return;
|
||||
|
||||
ir_stat = irq_remapping_prepare();
|
||||
if (ir_stat < 0 && !x2apic_supported())
|
||||
return;
|
||||
|
|
|
@ -152,6 +152,11 @@ static struct clocksource hyperv_cs = {
|
|||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
static unsigned char hv_get_nmi_reason(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init ms_hyperv_init_platform(void)
|
||||
{
|
||||
/*
|
||||
|
@ -191,6 +196,13 @@ static void __init ms_hyperv_init_platform(void)
|
|||
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
|
||||
#endif
|
||||
mark_tsc_unstable("running on Hyper-V");
|
||||
|
||||
/*
|
||||
* Generation 2 instances don't support reading the NMI status from
|
||||
* 0x61 port.
|
||||
*/
|
||||
if (efi_enabled(EFI_BOOT))
|
||||
x86_platform.get_nmi_reason = hv_get_nmi_reason;
|
||||
}
|
||||
|
||||
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue