* origin/tmp-da9a92f: arm64: kaslr: increase randomization granularity arm64: relocatable: deal with physically misaligned kernel images arm64: don't map TEXT_OFFSET bytes below the kernel if we can avoid it arm64: kernel: replace early 64-bit literal loads with move-immediates arm64: introduce mov_q macro to move a constant into a 64-bit register arm64: kernel: perform relocation processing from ID map arm64: kernel: use literal for relocated address of __secondary_switched arm64: kernel: don't export local symbols from head.S arm64: simplify kernel segment mapping granularity arm64: cover the .head.text section in the .text segment mapping arm64: move early boot code to the .init segment arm64: use 'segment' rather than 'chunk' to describe mapped kernel regions arm64: mm: Mark .rodata as RO Linux 4.4.16 ovl: verify upper dentry before unlink and rename drm/i915: Revert DisplayPort fast link training feature tmpfs: fix regression hang in fallocate undo tmpfs: don't undo fallocate past its last page crypto: qat - make qat_asym_algs.o depend on asn1 headers xen/acpi: allow xen-acpi-processor driver to load on Xen 4.7 File names with trailing period or space need special case conversion cifs: dynamic allocation of ntlmssp blob Fix reconnect to not defer smb3 session reconnect long after socket reconnect 53c700: fix BUG on untagged commands s390: fix test_fp_ctl inline assembly contraints scsi: fix race between simultaneous decrements of ->host_failed ovl: verify upper dentry in ovl_remove_and_whiteout() ovl: Copy up underlying inode's ->i_mode to overlay inode ARM: mvebu: fix HW I/O coherency related deadlocks ARM: dts: armada-38x: fix MBUS_ID for crypto SRAM on Armada 385 Linksys ARM: sunxi/dt: make the CHIP inherit from allwinner,sun5i-a13 ALSA: hda: add AMD Stoney PCI ID with proper driver caps ALSA: hda - fix use-after-free after module unload ALSA: ctl: Stop notification after disconnection ALSA: pcm: Free chmap at PCM free callback, too ALSA: hda/realtek - add new pin definition in alc225 pin quirk table ALSA: hda - fix read before array start ALSA: hda - Add PCI ID for Kabylake-H ALSA: hda/realtek: Add Lenovo L460 to docking unit fixup ALSA: timer: Fix negative queue usage by racy accesses ALSA: echoaudio: Fix memory allocation ALSA: au88x0: Fix calculation in vortex_wtdma_bufshift() ALSA: hda / realtek - add two more Thinkpad IDs (5050,5053) for tpt460 fixup ALSA: hda - Fix the headset mic jack detection on Dell machine ALSA: dummy: Fix a use-after-free at closing hwmon: (dell-smm) Cache fan_type() calls and change fan detection hwmon: (dell-smm) Disallow fan_type() calls on broken machines hwmon: (dell-smm) Restrict fan control and serial number to CAP_SYS_ADMIN by default tty/vt/keyboard: fix OOB access in do_compute_shiftstate() tty: vt: Fix soft lockup in fbcon cursor blink timer. iio:ad7266: Fix probe deferral for vref iio:ad7266: Fix support for optional regulators iio:ad7266: Fix broken regulator error handling iio: accel: kxsd9: fix the usage of spi_w8r8() staging: iio: accel: fix error check iio: hudmidity: hdc100x: fix incorrect shifting and scaling iio: humidity: hdc100x: fix IIO_TEMP channel reporting iio: humidity: hdc100x: correct humidity integration time mask iio: proximity: as3935: fix buffer stack trashing iio: proximity: as3935: remove triggered buffer processing iio: proximity: as3935: correct IIO_CHAN_INFO_RAW output iio: light apds9960: Add the missing dev.parent iio:st_pressure: fix sampling gains (bring inline with ABI) iio: Fix error handling in iio_trigger_attach_poll_func xen/balloon: Fix declared-but-not-defined warning perf/x86: Fix undefined shift on 32-bit kernels memory: omap-gpmc: Fix omap gpmc EXTRADELAY timing drm/vmwgfx: Fix error paths when mapping framebuffer drm/vmwgfx: Delay pinning fbdev framebuffer until after mode set drm/vmwgfx: Check pin count before attempting to move a buffer drm/vmwgfx: Work around mode set failure in 2D VMs drm/vmwgfx: Add an option to change assumed FB bpp drm/ttm: Make ttm_bo_mem_compat available drm: atmel-hlcdc: actually disable scaling when no scaling is required drm: make drm_atomic_set_mode_prop_for_crtc() more reliable drm: add missing drm_mode_set_crtcinfo call drm/i915: Update CDCLK_FREQ register on BDW after changing cdclk frequency drm/i915: Update ifdeffery for mutex->owner drm/i915: Refresh cached DP port register value on resume drm/i915/ilk: Don't disable SSC source if it's in use drm/nouveau/disp/sor/gf119: select correct sor when poking training pattern drm/nouveau: fix for disabled fbdev emulation drm/nouveau/fbcon: fix out-of-bounds memory accesses drm/nouveau/gr/gf100-: update sm error decoding from gk20a nvgpu headers drm/nouveau/disp/sor/gf119: both links use the same training register virtio_balloon: fix PFN format for virtio-1 drm/dp/mst: Always clear proposed vcpi table for port. drm/amdkfd: destroy dbgmgr in notifier release drm/amdkfd: unbind only existing processes ubi: Make recover_peb power cut aware drm/amdgpu/gfx7: fix broken condition check drm/radeon: fix asic initialization for virtualized environments btrfs: account for non-CoW'd blocks in btrfs_abort_transaction percpu: fix synchronization between synchronous map extension and chunk destruction percpu: fix synchronization between chunk->map_extend_work and chunk destruction af_unix: fix hard linked sockets on overlay vfs: add d_real_inode() helper arm64: Rework valid_user_regs ipmi: Remove smi_msg from waiting_rcv_msgs list before handle_one_recv_msg() drm/mgag200: Black screen fix for G200e rev 4 iommu/amd: Fix unity mapping initialization race iommu/vt-d: Enable QI on all IOMMUs before setting root entry iommu/arm-smmu: Wire up map_sg for arm-smmu-v3 base: make module_create_drivers_dir race-free tracing: Handle NULL formats in hold_module_trace_bprintk_format() HID: multitouch: enable palm rejection for Windows Precision Touchpad HID: hiddev: validate num_values for HIDIOCGUSAGES, HIDIOCSUSAGES commands HID: elo: kill not flush the work KVM: nVMX: VMX instructions: fix segment checks when L1 is in long mode. kvm: Fix irq route entries exceeding KVM_MAX_IRQ_ROUTES KEYS: potential uninitialized variable ARCv2: LLSC: software backoff is NOT needed starting HS2.1c ARCv2: Check for LL-SC livelock only if LLSC is enabled ipv6: Fix mem leak in rt6i_pcpu cdc_ncm: workaround for EM7455 "silent" data interface net_sched: fix mirrored packets checksum packet: Use symmetric hash for PACKET_FANOUT_HASH. sched/fair: Fix cfs_rq avg tracking underflow UBIFS: Implement ->migratepage() mm: Export migrate_page_move_mapping and migrate_page_copy MIPS: KVM: Fix modular KVM under QEMU ARM: 8579/1: mm: Fix definition of pmd_mknotpresent ARM: 8578/1: mm: ensure pmd_present only checks the valid bit ARM: imx6ul: Fix Micrel PHY mask NFS: Fix another OPEN_DOWNGRADE bug make nfs_atomic_open() call d_drop() on all ->open_context() errors. nfsd: check permissions when setting ACLs posix_acl: Add set_posix_acl nfsd: Extend the mutex holding region around in nfsd4_process_open2() nfsd: Always lock state exclusively. nfsd4/rpc: move backchannel create logic into rpc code writeback: use higher precision calculation in domain_dirty_limits() thermal: cpu_cooling: fix improper order during initialization uvc: Forward compat ioctls to their handlers directly Revert "gpiolib: Split GPIO flags parsing and GPIO configuration" x86/amd_nb: Fix boot crash on non-AMD systems kprobes/x86: Clear TF bit in fault on single-stepping x86, build: copy ldlinux.c32 to image.iso locking/static_key: Fix concurrent static_key_slow_inc() locking/qspinlock: Fix spin_unlock_wait() some more locking/ww_mutex: Report recursive ww_mutex locking early of: irq: fix of_irq_get[_byname]() kernel-doc of: fix autoloading due to broken modalias with no 'compatible' mnt: If fs_fully_visible fails call put_filesystem. mnt: Account for MS_RDONLY in fs_fully_visible mnt: fs_fully_visible test the proper mount for MNT_LOCKED usb: common: otg-fsm: add license to usb-otg-fsm USB: EHCI: declare hostpc register as zero-length array usb: dwc2: fix regression on big-endian PowerPC/ARM systems powerpc/tm: Always reclaim in start_thread() for exec() class syscalls powerpc/pseries: Fix IBM_ARCH_VEC_NRCORES_OFFSET since POWER8NVL was added powerpc/pseries: Fix PCI config address for DDW powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism IB/mlx4: Properly initialize GRH TClass and FlowLabel in AHs IB/cm: Fix a recently introduced locking bug EDAC, sb_edac: Fix rank lookup on Broadwell mac80211: Fix mesh estab_plinks counting in STA removal case mac80211_hwsim: Add missing check for HWSIM_ATTR_SIGNAL mac80211: mesh: flush mesh paths unconditionally mac80211: fix fast_tx header alignment Linux 4.4.15 usb: dwc3: exynos: Fix deferred probing storm. usb: host: ehci-tegra: Grab the correct UTMI pads reset usb: gadget: fix spinlock dead lock in gadgetfs USB: mos7720: delete parport xhci: Fix handling timeouted commands on hosts in weird states. USB: xhci: Add broken streams quirk for Frescologic device id 1009 usb: xhci-plat: properly handle probe deferral for devm_clk_get() xhci: Cleanup only when releasing primary hcd usb: musb: host: correct cppi dma channel for isoch transfer usb: musb: Ensure rx reinit occurs for shared_fifo endpoints usb: musb: Stop bulk endpoint while queue is rotated usb: musb: only restore devctl when session was set in backup usb: quirks: Add no-lpm quirk for Acer C120 LED Projector usb: quirks: Fix sorting USB: uas: Fix slave queue_depth not being set crypto: user - re-add size check for CRYPTO_MSG_GETALG crypto: ux500 - memmove the right size crypto: vmx - Increase priority of aes-cbc cipher AX.25: Close socket connection on session completion bpf: try harder on clones when writing into skb net: alx: Work around the DMA RX overflow issue net: macb: fix default configuration for GMAC on AT91 neigh: Explicitly declare RCU-bh read side critical section in neigh_xmit() bpf, perf: delay release of BPF prog after grace period sock_diag: do not broadcast raw socket destruction Bridge: Fix ipv6 mc snooping if bridge has no ipv6 address ipmr/ip6mr: Initialize the last assert time of mfc entries. netem: fix a use after free esp: Fix ESN generation under UDP encapsulation sit: correct IP protocol used in ipip6_err net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG net_sched: fix pfifo_head_drop behavior vs backlog sdcardfs: Truncate packages_gid.list on overflow UPSTREAM: cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind BACKPORT: proc: add /proc/<pid>/timerslack_ns interface BACKPORT: timer: convert timer_slack_ns from unsigned long to u64 netfilter: xt_quota2: make quota2_log work well Revert "usb: gadget: prevent change of Host MAC address of 'usb0' interface" BACKPORT: PM / sleep: Go direct_complete if driver has no callbacks ANDROID: base-cfg: enable UID_CPUTIME UPSTREAM: USB: usbfs: fix potential infoleak in devio UPSTREAM: ALSA: timer: Fix leak in events via snd_timer_user_ccallback UPSTREAM: ALSA: timer: Fix leak in events via snd_timer_user_tinterrupt UPSTREAM: ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS ANDROID: configs: remove unused configs ANDROID: cpu: send KOBJ_ONLINE event when enabling cpus ANDROID: dm verity fec: initialize recursion level ANDROID: dm verity fec: fix RS block calculation Linux 4.4.14 netfilter: x_tables: introduce and use xt_copy_counters_from_user netfilter: x_tables: do compat validation via translate_table netfilter: x_tables: xt_compat_match_from_user doesn't need a retval netfilter: ip6_tables: simplify translate_compat_table args netfilter: ip_tables: simplify translate_compat_table args netfilter: arp_tables: simplify translate_compat_table args netfilter: x_tables: don't reject valid target size on some architectures netfilter: x_tables: validate all offsets and sizes in a rule netfilter: x_tables: check for bogus target offset netfilter: x_tables: check standard target size too netfilter: x_tables: add compat version of xt_check_entry_offsets netfilter: x_tables: assert minimum target size netfilter: x_tables: kill check_entry helper netfilter: x_tables: add and use xt_check_entry_offsets netfilter: x_tables: validate targets of jumps netfilter: x_tables: don't move to non-existent next rule drm/core: Do not preserve framebuffer on rmfb, v4. crypto: qat - fix adf_ctl_drv.c:undefined reference to adf_init_pf_wq netfilter: x_tables: fix unconditional helper netfilter: x_tables: make sure e->next_offset covers remaining blob size netfilter: x_tables: validate e->target_offset early MIPS: Fix 64k page support for 32 bit kernels. sparc64: Fix return from trap window fill crashes. sparc: Harden signal return frame checks. sparc64: Take ctx_alloc_lock properly in hugetlb_setup(). sparc64: Reduce TLB flushes during hugepte changes sparc/PCI: Fix for panic while enabling SR-IOV sparc64: Fix sparc64_set_context stack handling. sparc64: Fix numa node distance initialization sparc64: Fix bootup regressions on some Kconfig combinations. sparc: Fix system call tracing register handling. fix d_walk()/non-delayed __d_free() race sched: panic on corrupted stack end proc: prevent stacking filesystems on top x86/entry/traps: Don't force in_interrupt() to return true in IST handlers wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel ecryptfs: forbid opening files without mmap handler memcg: add RCU locking around css_for_each_descendant_pre() in memcg_offline_kmem() parisc: Fix pagefault crash in unaligned __get_user() call pinctrl: mediatek: fix dual-edge code defect powerpc/pseries: Add POWER8NVL support to ibm,client-architecture-support call powerpc: Use privileged SPR number for MMCR2 powerpc: Fix definition of SIAR and SDAR registers powerpc/pseries/eeh: Handle RTAS delay requests in configure_bridge arm64: mm: always take dirty state from new pte in ptep_set_access_flags arm64: Provide "model name" in /proc/cpuinfo for PER_LINUX32 tasks crypto: ccp - Fix AES XTS error for request sizes above 4096 crypto: public_key: select CRYPTO_AKCIPHER irqchip/gic-v3: Fix ICC_SGI1R_EL1.INTID decoding mask s390/bpf: reduce maximum program size to 64 KB s390/bpf: fix recache skb->data/hlen for skb_vlan_push/pop gpio: bcm-kona: fix bcm_kona_gpio_reset() warnings ARM: fix PTRACE_SETVFPREGS on SMP systems ALSA: hda/realtek: Add T560 docking unit fixup ALSA: hda/realtek - Add support for new codecs ALC700/ALC701/ALC703 ALSA: hda/realtek - ALC256 speaker noise issue ALSA: hda - Fix headset mic detection problem for Dell machine ALSA: hda - Add PCI ID for Kabylake KVM: irqfd: fix NULL pointer dereference in kvm_irq_map_gsi KVM: x86: fix OOPS after invalid KVM_SET_DEBUGREGS vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices geneve: Relax MTU constraints vxlan: Relax MTU constraints ipv6: Skip XFRM lookup if dst_entry in socket cache is valid l2tp: fix configuration passed to setup_udp_tunnel_sock() bridge: Don't insert unnecessary local fdb entry on changing mac address tcp: record TLP and ER timer stats in v6 stats vxlan: Accept user specified MTU value when create new vxlan link team: don't call netdev_change_features under team->lock sfc: on MC reset, clear PIO buffer linkage in TXQs bpf, inode: disallow userns mounts uapi glibc compat: fix compilation when !__USE_MISC in glibc udp: prevent skbs lingering in tunnel socket queues bpf: Use mount_nodev not mount_ns to mount the bpf filesystem tuntap: correctly wake up process during uninit switchdev: pass pointer to fib_info instead of copy tipc: fix nametable publication field in nl compat netlink: Fix dump skb leak/double free tipc: check nl sock before parsing nested attributes scsi: Add QEMU CD-ROM to VPD Inquiry Blacklist scsi_lib: correctly retry failed zero length REQ_TYPE_FS commands cs-etm: associating output packet with CPU they executed on cs-etm: removing unecessary structure field cs-etm: account for each trace buffer in the queue cs-etm: avoid casting variable perf tools: fixing Makefile problems perf tools: new naming convention for openCSD perf scripts: Add python scripts for CoreSight traces perf tools: decoding capailitity for CoreSight traces perf symbols: Check before overwriting build_id perf tools: pushing driver configuration down to the kernel perf tools: add infrastructure for PMU specific configuration coresight: etm-perf: incorporating sink definition from the cmd line coresight: adding sink parameter to function coresight_build_path() perf: passing struct perf_event to function setup_aux() perf/core: adding PMU driver specific configuration perf tools: adding coresight etm PMU record capabilities perf tools: making coresight PMU listable coresight: tmc: implementing TMC-ETR AUX space API coresight: Add support for Juno platform coresight: Handle build path error coresight: Fix erroneous memset in tmc_read_unprepare_etr coresight: Fix tmc_read_unprepare_etr coresight: Fix NULL pointer dereference in _coresight_build_path ANDROID: dm verity fec: add missing release from fec_ktype ANDROID: dm verity fec: limit error correction recursion ANDROID: restrict access to perf events FROMLIST: security,perf: Allow further restriction of perf_event_open BACKPORT: perf tools: Document the perf sysctls Revert "armv6 dcc tty driver" Revert "arm: dcc_tty: fix armv6 dcc tty build failure" ARM64: Ignore Image-dtb from git point of view arm64: add option to build Image-dtb ANDROID: usb: gadget: f_midi: set fi->f to NULL when free f_midi function Linux 4.4.13 xfs: handle dquot buffer readahead in log recovery correctly xfs: print name of verifier if it fails xfs: skip stale inodes in xfs_iflush_cluster xfs: fix inode validity check in xfs_iflush_cluster xfs: xfs_iflush_cluster fails to abort on error xfs: Don't wrap growfs AGFL indexes xfs: disallow rw remount on fs with unknown ro-compat features gcov: disable tree-loop-im to reduce stack usage scripts/package/Makefile: rpmbuild add support of RPMOPTS dma-debug: avoid spinlock recursion when disabling dma-debug PM / sleep: Handle failures in device_suspend_late() consistently ext4: silence UBSAN in ext4_mb_init() ext4: address UBSAN warning in mb_find_order_for_block() ext4: fix oops on corrupted filesystem ext4: clean up error handling when orphan list is corrupted ext4: fix hang when processing corrupted orphaned inode list drm/imx: Match imx-ipuv3-crtc components using device node in platform data drm/i915: Don't leave old junk in ilk active watermarks on readout drm/atomic: Verify connector->funcs != NULL when clearing states drm/fb_helper: Fix references to dev->mode_config.num_connector drm/i915/fbdev: Fix num_connector references in intel_fb_initial_config() drm/amdgpu: Fix hdmi deep color support. drm/amdgpu: use drm_mode_vrefresh() rather than mode->vrefresh drm/vmwgfx: Fix order of operation drm/vmwgfx: use vmw_cmd_dx_cid_check for query commands. drm/vmwgfx: Enable SVGA_3D_CMD_DX_SET_PREDICATION drm/gma500: Fix possible out of bounds read sunrpc: fix stripping of padded MIC tokens xen: use same main loop for counting and remapping pages xen/events: Don't move disabled irqs powerpc/eeh: Restore initial state in eeh_pe_reset_and_recover() Revert "powerpc/eeh: Fix crash in eeh_add_device_early() on Cell" powerpc/eeh: Don't report error in eeh_pe_reset_and_recover() powerpc/book3s64: Fix branching to OOL handlers in relocatable kernel pipe: limit the per-user amount of pages allocated in pipes QE-UART: add "fsl,t1040-ucc-uart" to of_device_id wait/ptrace: assume __WALL if the child is traced mm: use phys_addr_t for reserve_bootmem_region() arguments media: v4l2-compat-ioctl32: fix missing reserved field copy in put_v4l2_create32 PCI: Disable all BAR sizing for devices with non-compliant BARs pinctrl: exynos5440: Use off-stack memory for pinctrl_gpio_range clk: bcm2835: divider value has to be 1 or more clk: bcm2835: pll_off should only update CM_PLL_ANARST clk: at91: fix check of clk_register() returned value clk: bcm2835: Fix PLL poweron cpuidle: Fix cpuidle_state_is_coupled() argument in cpuidle_enter() cpuidle: Indicate when a device has been unregistered PM / Runtime: Fix error path in pm_runtime_force_resume() mfd: intel_soc_pmic_core: Terminate panel control GPIO lookup table correctly mfd: intel-lpss: Save register context on suspend hwmon: (ads7828) Enable internal reference aacraid: Fix for KDUMP driver hang aacraid: Fix for aac_command_thread hang aacraid: Relinquish CPU during timeout wait rtlwifi: pci: use dev_kfree_skb_irq instead of kfree_skb in rtl_pci_reset_trx_ring rtlwifi: Fix logic error in enter/exit power-save mode rtlwifi: btcoexist: Implement antenna selection rtlwifi: rtl8723be: Add antenna select module parameter hwrng: exynos - Fix unbalanced PM runtime put on timeout error path ath5k: Change led pin configuration for compaq c700 laptop ath10k: fix kernel panic, move arvifs list head init before htt init ath10k: fix rx_channel during hw reconfigure ath10k: fix firmware assert in monitor mode ath10k: fix debugfs pktlog_filter write ath9k: Fix LED polarity for some Mini PCI AR9220 MB92 cards. ath9k: Add a module parameter to invert LED polarity. ARM: dts: imx35: restore existing used clock enumeration ARM: dts: exynos: Add interrupt line to MAX8997 PMIC on exynos4210-trats ARM: dts: at91: fix typo in sama5d2 PIN_PD24 description ARM: mvebu: fix GPIO config on the Linksys boards Input: uinput - handle compat ioctl for UI_SET_PHYS ASoC: ak4642: Enable cache usage to fix crashes on resume affs: fix remount failure when there are no options changed MIPS: VDSO: Build with `-fno-strict-aliasing' MIPS: lib: Mark intrinsics notrace MIPS: Build microMIPS VDSO for microMIPS kernels MIPS: Fix sigreturn via VDSO on microMIPS kernel MIPS: ptrace: Prevent writes to read-only FCSR bits MIPS: ptrace: Fix FP context restoration FCSR regression MIPS: Disable preemption during prctl(PR_SET_FP_MODE, ...) MIPS: Prevent "restoration" of MSA context in non-MSA kernels MIPS: Fix MSA ld_*/st_* asm macros to use PTR_ADDU MIPS: Use copy_s.fmt rather than copy_u.fmt MIPS: Loongson-3: Reserve 32MB for RS780E integrated GPU MIPS: Reserve nosave data for hibernation MIPS: ath79: make bootconsole wait for both THRE and TEMT MIPS: Sync icache & dcache in set_pte_at MIPS: Handle highmem pages in __update_cache MIPS: Flush highmem pages in __flush_dcache_page MIPS: Fix watchpoint restoration MIPS: Fix uapi include in exported asm/siginfo.h MIPS: Fix siginfo.h to use strict posix types MIPS: Avoid using unwind_stack() with usermode MIPS: Don't unwind to user mode with EVA MIPS: MSA: Fix a link error on `_init_msa_upper' with older GCC MIPS: math-emu: Fix jalr emulation when rd == $0 MIPS64: R6: R2 emulation bugfix coresight: etb10: adjust read pointer only when needed coresight: configuring ETF in FIFO mode when acting as link coresight: tmc: implementing TMC-ETF AUX space API coresight: moving struct cs_buffers to header file coresight: tmc: keep track of memory width coresight: tmc: make sysFS and Perf mode mutually exclusive coresight: tmc: dump system memory content only when needed coresight: tmc: adding mode of operation for link/sinks coresight: tmc: getting rid of multiple read access coresight: tmc: allocating memory when needed coresight: tmc: making prepare/unprepare functions generic coresight: tmc: splitting driver in ETB/ETF and ETR components coresight: tmc: cleaning up header file coresight: tmc: introducing new header file coresight: tmc: clearly define number of transfers per burst coresight: tmc: re-implementing tmc_read_prepare/unprepare() functions coresight: tmc: waiting for TMCReady bit before programming coresight: tmc: modifying naming convention coresight: tmc: adding sysFS management entries coresight: etm4x: add tracer ID for A72 Maia processor. coresight: etb10: fixing the right amount of words to read coresight: stm: adding driver for CoreSight STM component coresight: adding path for STM device coresight: etm4x: modify q_support type coresight: no need to do the forced type conversion coresight: removing gratuitous boot time log messages coresight: etb10: splitting sysFS "status" entry coresight: moving coresight_simple_func() to header file coresight: etm4x: implementing the perf PMU API coresight: etm4x: implementing user/kernel mode tracing coresight: etm4x: moving etm_drvdata::enable to atomic field coresight: etm4x: unlocking tracers in default arch init coresight: etm4x: splitting etmv4 default configuration coresight: etm4x: splitting struct etmv4_drvdata coresight: etm4x: adding config and traceid registers coresight: etm4x: moving sysFS entries to a dedicated file stm class: Support devices that override software assigned masters stm class: Remove unnecessary pointer increment stm class: Fix stm device initialization order stm class: Do not leak the chrdev in error path stm class: Remove a pointless line stm class: stm_heartbeat: Make nr_devs parameter read-only stm class: dummy_stm: Make nr_dummies parameter read-only MAINTAINERS: Add a git tree for the stm class perf/ring_buffer: Document AUX API usage perf/core: Free AUX pages in unmap path perf/ring_buffer: Refuse to begin AUX transaction after rb->aux_mmap_count drops perf auxtrace: Add perf_evlist pointer to *info_priv_size() perf session: Simplify tool stubs perf inject: Hit all DSOs for AUX data in JIT and other cases perf tools: tracepoint_error() can receive e=NULL, robustify it perf evlist: Make perf_evlist__open() open evsels with their cpus and threads (like perf record does) perf evsel: Introduce disable() method perf cpumap: Auto initialize cpu__max_{node,cpu} drivers/hwtracing: make coresight-etm-perf.c explicitly non-modular drivers/hwtracing: make coresight-* explicitly non-modular coresight: introducing a global trace ID function coresight: etm-perf: new PMU driver for ETM tracers coresight: etb10: implementing AUX API coresight: etb10: adding operation mode for sink->enable() coresight: etb10: moving to local atomic operations coresight: etm3x: implementing perf_enable/disable() API coresight: etm3x: implementing user/kernel mode tracing coresight: etm3x: consolidating initial config coresight: etm3x: changing default trace configuration coresight: etm3x: set progbit to stop trace collection coresight: etm3x: adding operation mode for etm_enable() coresight: etm3x: splitting struct etm_drvdata coresight: etm3x: unlocking tracers in default arch init coresight: etm3x: moving sysFS entries to dedicated file coresight: etm3x: moving etm_readl/writel to header file coresight: moving PM runtime operations to core framework coresight: add API to get sink from path coresight: associating path with session rather than tracer coresight: etm4x: Check every parameter used by dma_xx_coherent. coresight: "DEVICE_ATTR_RO" should defined as static. coresight: implementing 'cpu_id()' API coresight: removing bind/unbind options from sysfs coresight: remove csdev's link from topology coresight: release reference taken by 'bus_find_device()' coresight: coresight_unregister() function cleanup coresight: fixing lockdep error coresight: fixing indentation problem coresight: Fix a typo in Kconfig coresight: checking for NULL string in coresight_name_match() perf/core: Disable the event on a truncated AUX record perf/core: Don't leak event in the syscall error path perf/core: Fix perf_sched_count derailment stm class: dummy_stm: Add link callback for fault injection stm class: Plug stm device's unlink callback stm class: Fix a race in unlinking stm class: Fix unbalanced module/device refcounting stm class: Guard output assignment against concurrency stm class: Fix unlocking braino in the error path stm class: Add heartbeat stm source device stm class: dummy_stm: Create multiple devices stm class: Support devices with multiple instances stm class: Use driver's packet callback return value stm class: Prevent user-controllable allocations stm class: Fix link list locking stm class: Fix locking in unbinding policy path stm class: Select CONFIG_SRCU stm class: Hide STM-specific options if STM is disabled perf: Synchronously free aux pages in case of allocation failure Linux 4.4.12 kbuild: move -Wunused-const-variable to W=1 warning level Revert "scsi: fix soft lockup in scsi_remove_target() on module removal" scsi: Add intermediate STARGET_REMOVE state to scsi_target_state hpfs: implement the show_options method hpfs: fix remount failure when there are no options changed UBI: Fix static volume checks when Fastmap is used SIGNAL: Move generic copy_siginfo() to signal.h thunderbolt: Fix double free of drom buffer IB/srp: Fix a debug kernel crash ALSA: hda - Fix headset mic detection problem for one Dell machine ALSA: hda/realtek - Add support for ALC295/ALC3254 ALSA: hda - Fix headphone noise on Dell XPS 13 9360 ALSA: hda/realtek - New codecs support for ALC234/ALC274/ALC294 mcb: Fixed bar number assignment for the gdd clk: bcm2835: add locking to pll*_on/off methods locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait() serial: samsung: Reorder the sequence of clock control when call s3c24xx_serial_set_termios() serial: 8250_mid: recognize interrupt source in handler serial: 8250_mid: use proper bar for DNV platform serial: 8250_pci: fix divide error bug if baud rate is 0 Fix OpenSSH pty regression on close tty/serial: atmel: fix hardware handshake selection TTY: n_gsm, fix false positive WARN_ON tty: vt, return error when con_startup fails xen/x86: actually allocate legacy interrupts on PV guests KVM: x86: mask CPUID(0xD,0x1).EAX against host value MIPS: KVM: Fix timer IRQ race when writing CP0_Compare MIPS: KVM: Fix timer IRQ race when freezing timer KVM: x86: fix ordering of cr0 initialization code in vmx_cpu_reset KVM: MTRR: remove MSR 0x2f8 staging: comedi: das1800: fix possible NULL dereference usb: gadget: udc: core: Fix argument of dev_err() in usb_gadget_map_request() USB: leave LPM alone if possible when binding/unbinding interface drivers usb: misc: usbtest: fix pattern tests for scatterlists. usb: f_mass_storage: test whether thread is running before starting another usb: gadget: f_fs: Fix EFAULT generation for async read operations USB: serial: option: add even more ZTE device ids USB: serial: option: add more ZTE device ids USB: serial: option: add support for Cinterion PH8 and AHxx USB: serial: io_edgeport: fix memory leaks in probe error path USB: serial: io_edgeport: fix memory leaks in attach error path USB: serial: quatech2: fix use-after-free in probe error path USB: serial: keyspan: fix use-after-free in probe error path USB: serial: mxuport: fix use-after-free in probe error path mei: bus: call mei_cl_read_start under device lock mei: amthif: discard not read messages mei: fix NULL dereferencing during FW initiated disconnection Bluetooth: vhci: Fix race at creating hci device Bluetooth: vhci: purge unhandled skbs Bluetooth: vhci: fix open_timeout vs. hdev race mmc: sdhci-pci: Remove MMC_CAP_BUS_WIDTH_TEST for Intel controllers mmc: longer timeout for long read time quirk dell-rbtn: Ignore ACPI notifications if device is suspended ACPI / osi: Fix an issue that acpi_osi=!* cannot disable ACPICA internal strings mmc: sdhci-acpi: Remove MMC_CAP_BUS_WIDTH_TEST for Intel controllers mmc: mmc: Fix partition switch timeout for some eMMCs can: fix handling of unmodifiable configuration options irqchip/gic-v3: Configure all interrupts as non-secure Group-1 irqchip/gic: Ensure ordering between read of INTACK and shared data Input: pwm-beeper - fix - scheduling while atomic mfd: omap-usb-tll: Fix scheduling while atomic BUG sched/loadavg: Fix loadavg artifacts on fully idle and on fully loaded systems clk: qcom: msm8916: Fix crypto clock flags crypto: sun4i-ss - Replace spinlock_bh by spin_lock_irq{save|restore} crypto: talitos - fix ahash algorithms registration crypto: caam - fix caam_jr_alloc() ret code ring-buffer: Prevent overflow of size in ring_buffer_resize() ring-buffer: Use long for nr_pages to avoid overflow failures asix: Fix offset calculation in asix_rx_fixup() causing slow transmissions fs/cifs: correctly to anonymous authentication for the NTLM(v2) authentication fs/cifs: correctly to anonymous authentication for the NTLM(v1) authentication fs/cifs: correctly to anonymous authentication for the LANMAN authentication fs/cifs: correctly to anonymous authentication via NTLMSSP remove directory incorrectly tries to set delete on close on non-empty directories kvm: arm64: Fix EC field in inject_abt64 arm/arm64: KVM: Enforce Break-Before-Make on Stage-2 page tables arm64: cpuinfo: Missing NULL terminator in compat_hwcap_str arm64: Implement pmdp_set_access_flags() for hardware AF/DBM arm64: Implement ptep_set_access_flags() for hardware AF/DBM arm64: Ensure pmd_present() returns false after pmd_mknotpresent() arm64: Fix typo in the pmdp_huge_get_and_clear() definition ext4: iterate over buffer heads correctly in move_extent_per_page() perf test: Fix build of BPF and LLVM on older glibc libraries perf/core: Fix perf_event_open() vs. execve() race perf/x86/intel/pt: Generate PMI in the STOP region as well Btrfs: don't use src fd for printk UPSTREAM: mac80211: fix "warning: ‘target_metric’ may be used uninitialized" Revert "drivers: power: use 'current' instead of 'get_current()'" cpufreq: interactive: drop cpufreq_{get,put}_global_kobject func calls Revert "cpufreq: interactive: build fixes for 4.4" xt_qtaguid: Fix panic caused by processing non-full socket. fiq_debugger: Add fiq_debugger.disable option UPSTREAM: procfs: fixes pthread cross-thread naming if !PR_DUMPABLE FROMLIST: wlcore: Disable filtering in AP role Revert "drivers: power: Add watchdog timer to catch drivers which lockup during suspend." fiq_debugger: Add option to apply uart overlay by FIQ_DEBUGGER_UART_OVERLAY Revert "Recreate asm/mach/mmc.h include file" Revert "ARM: Add 'card_present' state to mmc_platfrom_data" usb: dual-role: make stub functions inline Revert "mmc: Add status IRQ and status callback function to mmc platform data" quick selinux support for tracefs Revert "hid-multitouch: Filter collections by application usage." Revert "HID: steelseries: validate output report details" xt_qtaguid: Fix panic caused by synack processing Revert "mm: vmscan: Add a debug file for shrinkers" Revert "SELinux: Enable setting security contexts on rootfs inodes." Revert "SELinux: build fix for 4.1" fuse: Add support for d_canonical_path vfs: change d_canonical_path to take two paths android: recommended.cfg: remove CONFIG_UID_STAT netfilter: xt_qtaguid: seq_printf fixes Revert "misc: uidstat: Adding uid stat driver to collect network statistics." Revert "net: activity_stats: Add statistics for network transmission activity" Revert "net: activity_stats: Stop using obsolete create_proc_read_entry api" Revert "misc: uidstat: avoid create_stat() race and blockage." Revert "misc: uidstat: Remove use of obsolete create_proc_read_entry api" Revert "misc seq_printf fixes for 4.4" Revert "misc: uid_stat: Include linux/atomic.h instead of asm/atomic.h" Revert "net: socket ioctl to reset connections matching local address" Revert "net: fix iterating over hashtable in tcp_nuke_addr()" Revert "net: fix crash in tcp_nuke_addr()" Revert "Don't kill IPv4 sockets when killing IPv6 sockets was requested." Revert "tcp: Fix IPV6 module build errors" android: base-cfg: remove CONFIG_SWITCH Revert "switch: switch class and GPIO drivers." Revert "drivers: switch: remove S_IWUSR from dev_attr" ANDROID: base-cfg: enable CONFIG_IP_NF_NAT BACKPORT: selinux: restrict kernel module loading android: base-cfg: enable CONFIG_QUOTA Conflicts: Documentation/sysctl/kernel.txt drivers/cpufreq/cpufreq_interactive.c drivers/hwtracing/coresight/Kconfig drivers/hwtracing/coresight/Makefile drivers/hwtracing/coresight/coresight-etm4x.c drivers/hwtracing/coresight/coresight-etm4x.h drivers/hwtracing/coresight/coresight-priv.h drivers/hwtracing/coresight/coresight-stm.c drivers/hwtracing/coresight/coresight-tmc.c drivers/mmc/core/core.c include/linux/coresight-stm.h include/linux/coresight.h include/linux/msm_mdp.h include/uapi/linux/coresight-stm.h kernel/events/core.c kernel/sched/fair.c net/Makefile net/ipv4/netfilter/arp_tables.c net/ipv4/netfilter/ip_tables.c net/ipv4/tcp.c net/ipv6/netfilter/ip6_tables.c net/netfilter/xt_quota2.c sound/core/pcm.c Change-Id: I17aa0002815014e9bddc47e67769a53c15768a99 Signed-off-by: Runmin Wang <runminw@codeaurora.org>
3228 lines
78 KiB
C
3228 lines
78 KiB
C
/*
|
|
* Generic address resolution entity
|
|
*
|
|
* Authors:
|
|
* Pedro Roque <roque@di.fc.ul.pt>
|
|
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
* Fixes:
|
|
* Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
|
|
* Harald Welte Add neighbour cache statistics like rtstat
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/socket.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/proc_fs.h>
|
|
#ifdef CONFIG_SYSCTL
|
|
#include <linux/sysctl.h>
|
|
#endif
|
|
#include <linux/times.h>
|
|
#include <net/net_namespace.h>
|
|
#include <net/neighbour.h>
|
|
#include <net/dst.h>
|
|
#include <net/sock.h>
|
|
#include <net/netevent.h>
|
|
#include <net/netlink.h>
|
|
#include <linux/rtnetlink.h>
|
|
#include <linux/random.h>
|
|
#include <linux/string.h>
|
|
#include <linux/log2.h>
|
|
#include <linux/inetdevice.h>
|
|
#include <net/addrconf.h>
|
|
|
|
#define DEBUG
|
|
#define NEIGH_DEBUG 1
|
|
#define neigh_dbg(level, fmt, ...) \
|
|
do { \
|
|
if (level <= NEIGH_DEBUG) \
|
|
pr_debug(fmt, ##__VA_ARGS__); \
|
|
} while (0)
|
|
|
|
#define PNEIGH_HASHMASK 0xF
|
|
|
|
static void neigh_timer_handler(unsigned long arg);
|
|
static void __neigh_notify(struct neighbour *n, int type, int flags);
|
|
static void neigh_update_notify(struct neighbour *neigh);
|
|
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
static const struct file_operations neigh_stat_seq_fops;
|
|
#endif
|
|
|
|
/*
|
|
Neighbour hash table buckets are protected with rwlock tbl->lock.
|
|
|
|
- All the scans/updates to hash buckets MUST be made under this lock.
|
|
- NOTHING clever should be made under this lock: no callbacks
|
|
to protocol backends, no attempts to send something to network.
|
|
It will result in deadlocks, if backend/driver wants to use neighbour
|
|
cache.
|
|
- If the entry requires some non-trivial actions, increase
|
|
its reference count and release table lock.
|
|
|
|
Neighbour entries are protected:
|
|
- with reference count.
|
|
- with rwlock neigh->lock
|
|
|
|
Reference count prevents destruction.
|
|
|
|
neigh->lock mainly serializes ll address data and its validity state.
|
|
However, the same lock is used to protect another entry fields:
|
|
- timer
|
|
- resolution queue
|
|
|
|
Again, nothing clever shall be made under neigh->lock,
|
|
the most complicated procedure, which we allow is dev->hard_header.
|
|
It is supposed, that dev->hard_header is simplistic and does
|
|
not make callbacks to neighbour tables.
|
|
*/
|
|
|
|
static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
|
|
{
|
|
kfree_skb(skb);
|
|
return -ENETDOWN;
|
|
}
|
|
|
|
static void neigh_cleanup_and_release(struct neighbour *neigh)
|
|
{
|
|
if (neigh->parms->neigh_cleanup)
|
|
neigh->parms->neigh_cleanup(neigh);
|
|
|
|
__neigh_notify(neigh, RTM_DELNEIGH, 0);
|
|
neigh_release(neigh);
|
|
}
|
|
|
|
/*
|
|
* It is random distribution in the interval (1/2)*base...(3/2)*base.
|
|
* It corresponds to default IPv6 settings and is not overridable,
|
|
* because it is really reasonable choice.
|
|
*/
|
|
|
|
unsigned long neigh_rand_reach_time(unsigned long base)
|
|
{
|
|
return base ? (prandom_u32() % base) + (base >> 1) : 0;
|
|
}
|
|
EXPORT_SYMBOL(neigh_rand_reach_time);
|
|
|
|
|
|
static int neigh_forced_gc(struct neigh_table *tbl)
|
|
{
|
|
int shrunk = 0;
|
|
int i;
|
|
struct neigh_hash_table *nht;
|
|
|
|
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
|
|
|
|
write_lock_bh(&tbl->lock);
|
|
nht = rcu_dereference_protected(tbl->nht,
|
|
lockdep_is_held(&tbl->lock));
|
|
for (i = 0; i < (1 << nht->hash_shift); i++) {
|
|
struct neighbour *n;
|
|
struct neighbour __rcu **np;
|
|
|
|
np = &nht->hash_buckets[i];
|
|
while ((n = rcu_dereference_protected(*np,
|
|
lockdep_is_held(&tbl->lock))) != NULL) {
|
|
/* Neighbour record may be discarded if:
|
|
* - nobody refers to it.
|
|
* - it is not permanent
|
|
*/
|
|
write_lock(&n->lock);
|
|
if (atomic_read(&n->refcnt) == 1 &&
|
|
!(n->nud_state & NUD_PERMANENT)) {
|
|
rcu_assign_pointer(*np,
|
|
rcu_dereference_protected(n->next,
|
|
lockdep_is_held(&tbl->lock)));
|
|
n->dead = 1;
|
|
shrunk = 1;
|
|
write_unlock(&n->lock);
|
|
neigh_cleanup_and_release(n);
|
|
continue;
|
|
}
|
|
write_unlock(&n->lock);
|
|
np = &n->next;
|
|
}
|
|
}
|
|
|
|
tbl->last_flush = jiffies;
|
|
|
|
write_unlock_bh(&tbl->lock);
|
|
|
|
return shrunk;
|
|
}
|
|
|
|
static void neigh_add_timer(struct neighbour *n, unsigned long when)
|
|
{
|
|
neigh_hold(n);
|
|
if (unlikely(mod_timer(&n->timer, when))) {
|
|
printk("NEIGH: BUG, double timer add, state is %x\n",
|
|
n->nud_state);
|
|
dump_stack();
|
|
}
|
|
}
|
|
|
|
static int neigh_del_timer(struct neighbour *n)
|
|
{
|
|
if ((n->nud_state & NUD_IN_TIMER) &&
|
|
del_timer(&n->timer)) {
|
|
neigh_release(n);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void pneigh_queue_purge(struct sk_buff_head *list)
|
|
{
|
|
struct sk_buff *skb;
|
|
|
|
while ((skb = skb_dequeue(list)) != NULL) {
|
|
dev_put(skb->dev);
|
|
kfree_skb(skb);
|
|
}
|
|
}
|
|
|
|
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
|
|
{
|
|
int i;
|
|
struct neigh_hash_table *nht;
|
|
|
|
nht = rcu_dereference_protected(tbl->nht,
|
|
lockdep_is_held(&tbl->lock));
|
|
|
|
for (i = 0; i < (1 << nht->hash_shift); i++) {
|
|
struct neighbour *n;
|
|
struct neighbour __rcu **np = &nht->hash_buckets[i];
|
|
|
|
while ((n = rcu_dereference_protected(*np,
|
|
lockdep_is_held(&tbl->lock))) != NULL) {
|
|
if (dev && n->dev != dev) {
|
|
np = &n->next;
|
|
continue;
|
|
}
|
|
rcu_assign_pointer(*np,
|
|
rcu_dereference_protected(n->next,
|
|
lockdep_is_held(&tbl->lock)));
|
|
write_lock(&n->lock);
|
|
neigh_del_timer(n);
|
|
n->dead = 1;
|
|
|
|
if (atomic_read(&n->refcnt) != 1) {
|
|
/* The most unpleasant situation.
|
|
We must destroy neighbour entry,
|
|
but someone still uses it.
|
|
|
|
The destroy will be delayed until
|
|
the last user releases us, but
|
|
we must kill timers etc. and move
|
|
it to safe state.
|
|
*/
|
|
__skb_queue_purge(&n->arp_queue);
|
|
n->arp_queue_len_bytes = 0;
|
|
n->output = neigh_blackhole;
|
|
if (n->nud_state & NUD_VALID)
|
|
n->nud_state = NUD_NOARP;
|
|
else
|
|
n->nud_state = NUD_NONE;
|
|
neigh_dbg(2, "neigh %p is stray\n", n);
|
|
}
|
|
write_unlock(&n->lock);
|
|
neigh_cleanup_and_release(n);
|
|
}
|
|
}
|
|
}
|
|
|
|
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
|
|
{
|
|
write_lock_bh(&tbl->lock);
|
|
neigh_flush_dev(tbl, dev);
|
|
write_unlock_bh(&tbl->lock);
|
|
}
|
|
EXPORT_SYMBOL(neigh_changeaddr);
|
|
|
|
int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
|
|
{
|
|
write_lock_bh(&tbl->lock);
|
|
neigh_flush_dev(tbl, dev);
|
|
pneigh_ifdown(tbl, dev);
|
|
write_unlock_bh(&tbl->lock);
|
|
|
|
del_timer_sync(&tbl->proxy_timer);
|
|
pneigh_queue_purge(&tbl->proxy_queue);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(neigh_ifdown);
|
|
|
|
static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
|
|
{
|
|
struct neighbour *n = NULL;
|
|
unsigned long now = jiffies;
|
|
int entries;
|
|
|
|
entries = atomic_inc_return(&tbl->entries) - 1;
|
|
if (entries >= tbl->gc_thresh3 ||
|
|
(entries >= tbl->gc_thresh2 &&
|
|
time_after(now, tbl->last_flush + 5 * HZ))) {
|
|
if (!neigh_forced_gc(tbl) &&
|
|
entries >= tbl->gc_thresh3) {
|
|
net_info_ratelimited("%s: neighbor table overflow!\n",
|
|
tbl->id);
|
|
NEIGH_CACHE_STAT_INC(tbl, table_fulls);
|
|
goto out_entries;
|
|
}
|
|
}
|
|
|
|
n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
|
|
if (!n)
|
|
goto out_entries;
|
|
|
|
__skb_queue_head_init(&n->arp_queue);
|
|
rwlock_init(&n->lock);
|
|
seqlock_init(&n->ha_lock);
|
|
n->updated = n->used = now;
|
|
n->nud_state = NUD_NONE;
|
|
n->output = neigh_blackhole;
|
|
seqlock_init(&n->hh.hh_lock);
|
|
n->parms = neigh_parms_clone(&tbl->parms);
|
|
setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
|
|
|
|
NEIGH_CACHE_STAT_INC(tbl, allocs);
|
|
n->tbl = tbl;
|
|
atomic_set(&n->refcnt, 1);
|
|
n->dead = 1;
|
|
out:
|
|
return n;
|
|
|
|
out_entries:
|
|
atomic_dec(&tbl->entries);
|
|
goto out;
|
|
}
|
|
|
|
static void neigh_get_hash_rnd(u32 *x)
|
|
{
|
|
get_random_bytes(x, sizeof(*x));
|
|
*x |= 1;
|
|
}
|
|
|
|
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
|
|
{
|
|
size_t size = (1 << shift) * sizeof(struct neighbour *);
|
|
struct neigh_hash_table *ret;
|
|
struct neighbour __rcu **buckets;
|
|
int i;
|
|
|
|
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
|
|
if (!ret)
|
|
return NULL;
|
|
if (size <= PAGE_SIZE)
|
|
buckets = kzalloc(size, GFP_ATOMIC);
|
|
else
|
|
buckets = (struct neighbour __rcu **)
|
|
__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
|
|
get_order(size));
|
|
if (!buckets) {
|
|
kfree(ret);
|
|
return NULL;
|
|
}
|
|
ret->hash_buckets = buckets;
|
|
ret->hash_shift = shift;
|
|
for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
|
|
neigh_get_hash_rnd(&ret->hash_rnd[i]);
|
|
return ret;
|
|
}
|
|
|
|
static void neigh_hash_free_rcu(struct rcu_head *head)
|
|
{
|
|
struct neigh_hash_table *nht = container_of(head,
|
|
struct neigh_hash_table,
|
|
rcu);
|
|
size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
|
|
struct neighbour __rcu **buckets = nht->hash_buckets;
|
|
|
|
if (size <= PAGE_SIZE)
|
|
kfree(buckets);
|
|
else
|
|
free_pages((unsigned long)buckets, get_order(size));
|
|
kfree(nht);
|
|
}
|
|
|
|
static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
|
|
unsigned long new_shift)
|
|
{
|
|
unsigned int i, hash;
|
|
struct neigh_hash_table *new_nht, *old_nht;
|
|
|
|
NEIGH_CACHE_STAT_INC(tbl, hash_grows);
|
|
|
|
old_nht = rcu_dereference_protected(tbl->nht,
|
|
lockdep_is_held(&tbl->lock));
|
|
new_nht = neigh_hash_alloc(new_shift);
|
|
if (!new_nht)
|
|
return old_nht;
|
|
|
|
for (i = 0; i < (1 << old_nht->hash_shift); i++) {
|
|
struct neighbour *n, *next;
|
|
|
|
for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
|
|
lockdep_is_held(&tbl->lock));
|
|
n != NULL;
|
|
n = next) {
|
|
hash = tbl->hash(n->primary_key, n->dev,
|
|
new_nht->hash_rnd);
|
|
|
|
hash >>= (32 - new_nht->hash_shift);
|
|
next = rcu_dereference_protected(n->next,
|
|
lockdep_is_held(&tbl->lock));
|
|
|
|
rcu_assign_pointer(n->next,
|
|
rcu_dereference_protected(
|
|
new_nht->hash_buckets[hash],
|
|
lockdep_is_held(&tbl->lock)));
|
|
rcu_assign_pointer(new_nht->hash_buckets[hash], n);
|
|
}
|
|
}
|
|
|
|
rcu_assign_pointer(tbl->nht, new_nht);
|
|
call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
|
|
return new_nht;
|
|
}
|
|
|
|
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
|
|
struct net_device *dev)
|
|
{
|
|
struct neighbour *n;
|
|
|
|
NEIGH_CACHE_STAT_INC(tbl, lookups);
|
|
|
|
rcu_read_lock_bh();
|
|
n = __neigh_lookup_noref(tbl, pkey, dev);
|
|
if (n) {
|
|
if (!atomic_inc_not_zero(&n->refcnt))
|
|
n = NULL;
|
|
NEIGH_CACHE_STAT_INC(tbl, hits);
|
|
}
|
|
|
|
rcu_read_unlock_bh();
|
|
return n;
|
|
}
|
|
EXPORT_SYMBOL(neigh_lookup);
|
|
|
|
struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
|
|
const void *pkey)
|
|
{
|
|
struct neighbour *n;
|
|
int key_len = tbl->key_len;
|
|
u32 hash_val;
|
|
struct neigh_hash_table *nht;
|
|
|
|
NEIGH_CACHE_STAT_INC(tbl, lookups);
|
|
|
|
rcu_read_lock_bh();
|
|
nht = rcu_dereference_bh(tbl->nht);
|
|
hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
|
|
|
|
for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
|
|
n != NULL;
|
|
n = rcu_dereference_bh(n->next)) {
|
|
if (!memcmp(n->primary_key, pkey, key_len) &&
|
|
net_eq(dev_net(n->dev), net)) {
|
|
if (!atomic_inc_not_zero(&n->refcnt))
|
|
n = NULL;
|
|
NEIGH_CACHE_STAT_INC(tbl, hits);
|
|
break;
|
|
}
|
|
}
|
|
|
|
rcu_read_unlock_bh();
|
|
return n;
|
|
}
|
|
EXPORT_SYMBOL(neigh_lookup_nodev);
|
|
|
|
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
|
|
struct net_device *dev, bool want_ref)
|
|
{
|
|
u32 hash_val;
|
|
int key_len = tbl->key_len;
|
|
int error;
|
|
struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
|
|
struct neigh_hash_table *nht;
|
|
|
|
if (!n) {
|
|
rc = ERR_PTR(-ENOBUFS);
|
|
goto out;
|
|
}
|
|
|
|
memcpy(n->primary_key, pkey, key_len);
|
|
n->dev = dev;
|
|
dev_hold(dev);
|
|
|
|
/* Protocol specific setup. */
|
|
if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
|
|
rc = ERR_PTR(error);
|
|
goto out_neigh_release;
|
|
}
|
|
|
|
if (dev->netdev_ops->ndo_neigh_construct) {
|
|
error = dev->netdev_ops->ndo_neigh_construct(n);
|
|
if (error < 0) {
|
|
rc = ERR_PTR(error);
|
|
goto out_neigh_release;
|
|
}
|
|
}
|
|
|
|
/* Device specific setup. */
|
|
if (n->parms->neigh_setup &&
|
|
(error = n->parms->neigh_setup(n)) < 0) {
|
|
rc = ERR_PTR(error);
|
|
goto out_neigh_release;
|
|
}
|
|
|
|
n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
|
|
|
|
write_lock_bh(&tbl->lock);
|
|
nht = rcu_dereference_protected(tbl->nht,
|
|
lockdep_is_held(&tbl->lock));
|
|
|
|
if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
|
|
nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
|
|
|
|
hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
|
|
|
|
if (n->parms->dead) {
|
|
rc = ERR_PTR(-EINVAL);
|
|
goto out_tbl_unlock;
|
|
}
|
|
|
|
for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
|
|
lockdep_is_held(&tbl->lock));
|
|
n1 != NULL;
|
|
n1 = rcu_dereference_protected(n1->next,
|
|
lockdep_is_held(&tbl->lock))) {
|
|
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
|
|
if (want_ref)
|
|
neigh_hold(n1);
|
|
rc = n1;
|
|
goto out_tbl_unlock;
|
|
}
|
|
}
|
|
|
|
n->dead = 0;
|
|
if (want_ref)
|
|
neigh_hold(n);
|
|
rcu_assign_pointer(n->next,
|
|
rcu_dereference_protected(nht->hash_buckets[hash_val],
|
|
lockdep_is_held(&tbl->lock)));
|
|
rcu_assign_pointer(nht->hash_buckets[hash_val], n);
|
|
write_unlock_bh(&tbl->lock);
|
|
neigh_dbg(2, "neigh %p is created\n", n);
|
|
rc = n;
|
|
out:
|
|
return rc;
|
|
out_tbl_unlock:
|
|
write_unlock_bh(&tbl->lock);
|
|
out_neigh_release:
|
|
neigh_release(n);
|
|
goto out;
|
|
}
|
|
EXPORT_SYMBOL(__neigh_create);
|
|
|
|
static u32 pneigh_hash(const void *pkey, int key_len)
|
|
{
|
|
u32 hash_val = *(u32 *)(pkey + key_len - 4);
|
|
hash_val ^= (hash_val >> 16);
|
|
hash_val ^= hash_val >> 8;
|
|
hash_val ^= hash_val >> 4;
|
|
hash_val &= PNEIGH_HASHMASK;
|
|
return hash_val;
|
|
}
|
|
|
|
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
|
|
struct net *net,
|
|
const void *pkey,
|
|
int key_len,
|
|
struct net_device *dev)
|
|
{
|
|
while (n) {
|
|
if (!memcmp(n->key, pkey, key_len) &&
|
|
net_eq(pneigh_net(n), net) &&
|
|
(n->dev == dev || !n->dev))
|
|
return n;
|
|
n = n->next;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
|
|
struct net *net, const void *pkey, struct net_device *dev)
|
|
{
|
|
int key_len = tbl->key_len;
|
|
u32 hash_val = pneigh_hash(pkey, key_len);
|
|
|
|
return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
|
|
net, pkey, key_len, dev);
|
|
}
|
|
EXPORT_SYMBOL_GPL(__pneigh_lookup);
|
|
|
|
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
|
|
struct net *net, const void *pkey,
|
|
struct net_device *dev, int creat)
|
|
{
|
|
struct pneigh_entry *n;
|
|
int key_len = tbl->key_len;
|
|
u32 hash_val = pneigh_hash(pkey, key_len);
|
|
|
|
read_lock_bh(&tbl->lock);
|
|
n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
|
|
net, pkey, key_len, dev);
|
|
read_unlock_bh(&tbl->lock);
|
|
|
|
if (n || !creat)
|
|
goto out;
|
|
|
|
ASSERT_RTNL();
|
|
|
|
n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
|
|
if (!n)
|
|
goto out;
|
|
|
|
write_pnet(&n->net, net);
|
|
memcpy(n->key, pkey, key_len);
|
|
n->dev = dev;
|
|
if (dev)
|
|
dev_hold(dev);
|
|
|
|
if (tbl->pconstructor && tbl->pconstructor(n)) {
|
|
if (dev)
|
|
dev_put(dev);
|
|
kfree(n);
|
|
n = NULL;
|
|
goto out;
|
|
}
|
|
|
|
write_lock_bh(&tbl->lock);
|
|
n->next = tbl->phash_buckets[hash_val];
|
|
tbl->phash_buckets[hash_val] = n;
|
|
write_unlock_bh(&tbl->lock);
|
|
out:
|
|
return n;
|
|
}
|
|
EXPORT_SYMBOL(pneigh_lookup);
|
|
|
|
|
|
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
|
|
struct net_device *dev)
|
|
{
|
|
struct pneigh_entry *n, **np;
|
|
int key_len = tbl->key_len;
|
|
u32 hash_val = pneigh_hash(pkey, key_len);
|
|
|
|
write_lock_bh(&tbl->lock);
|
|
for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
|
|
np = &n->next) {
|
|
if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
|
|
net_eq(pneigh_net(n), net)) {
|
|
*np = n->next;
|
|
write_unlock_bh(&tbl->lock);
|
|
if (tbl->pdestructor)
|
|
tbl->pdestructor(n);
|
|
if (n->dev)
|
|
dev_put(n->dev);
|
|
kfree(n);
|
|
return 0;
|
|
}
|
|
}
|
|
write_unlock_bh(&tbl->lock);
|
|
return -ENOENT;
|
|
}
|
|
|
|
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
|
|
{
|
|
struct pneigh_entry *n, **np;
|
|
u32 h;
|
|
|
|
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
|
|
np = &tbl->phash_buckets[h];
|
|
while ((n = *np) != NULL) {
|
|
if (!dev || n->dev == dev) {
|
|
*np = n->next;
|
|
if (tbl->pdestructor)
|
|
tbl->pdestructor(n);
|
|
if (n->dev)
|
|
dev_put(n->dev);
|
|
kfree(n);
|
|
continue;
|
|
}
|
|
np = &n->next;
|
|
}
|
|
}
|
|
return -ENOENT;
|
|
}
|
|
|
|
static void neigh_parms_destroy(struct neigh_parms *parms);
|
|
|
|
static inline void neigh_parms_put(struct neigh_parms *parms)
|
|
{
|
|
if (atomic_dec_and_test(&parms->refcnt))
|
|
neigh_parms_destroy(parms);
|
|
}
|
|
|
|
/*
|
|
* neighbour must already be out of the table;
|
|
*
|
|
*/
|
|
void neigh_destroy(struct neighbour *neigh)
|
|
{
|
|
struct net_device *dev = neigh->dev;
|
|
|
|
NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
|
|
|
|
if (!neigh->dead) {
|
|
pr_warn("Destroying alive neighbour %pK\n", neigh);
|
|
dump_stack();
|
|
return;
|
|
}
|
|
|
|
if (neigh_del_timer(neigh))
|
|
pr_warn("Impossible event\n");
|
|
|
|
write_lock_bh(&neigh->lock);
|
|
__skb_queue_purge(&neigh->arp_queue);
|
|
write_unlock_bh(&neigh->lock);
|
|
neigh->arp_queue_len_bytes = 0;
|
|
|
|
if (dev->netdev_ops->ndo_neigh_destroy)
|
|
dev->netdev_ops->ndo_neigh_destroy(neigh);
|
|
|
|
dev_put(dev);
|
|
neigh_parms_put(neigh->parms);
|
|
|
|
neigh_dbg(2, "neigh %p is destroyed\n", neigh);
|
|
|
|
atomic_dec(&neigh->tbl->entries);
|
|
kfree_rcu(neigh, rcu);
|
|
}
|
|
EXPORT_SYMBOL(neigh_destroy);
|
|
|
|
/* Neighbour state is suspicious;
|
|
disable fast path.
|
|
|
|
Called with write_locked neigh.
|
|
*/
|
|
static void neigh_suspect(struct neighbour *neigh)
|
|
{
|
|
neigh_dbg(2, "neigh %p is suspected\n", neigh);
|
|
|
|
neigh->output = neigh->ops->output;
|
|
}
|
|
|
|
/* Neighbour state is OK;
|
|
enable fast path.
|
|
|
|
Called with write_locked neigh.
|
|
*/
|
|
static void neigh_connect(struct neighbour *neigh)
|
|
{
|
|
neigh_dbg(2, "neigh %p is connected\n", neigh);
|
|
|
|
neigh->output = neigh->ops->connected_output;
|
|
}
|
|
|
|
static void neigh_periodic_work(struct work_struct *work)
|
|
{
|
|
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
|
|
struct neighbour *n;
|
|
struct neighbour __rcu **np;
|
|
unsigned int i;
|
|
struct neigh_hash_table *nht;
|
|
|
|
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
|
|
|
|
write_lock_bh(&tbl->lock);
|
|
nht = rcu_dereference_protected(tbl->nht,
|
|
lockdep_is_held(&tbl->lock));
|
|
|
|
/*
|
|
* periodically recompute ReachableTime from random function
|
|
*/
|
|
|
|
if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
|
|
struct neigh_parms *p;
|
|
tbl->last_rand = jiffies;
|
|
list_for_each_entry(p, &tbl->parms_list, list)
|
|
p->reachable_time =
|
|
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
|
|
}
|
|
|
|
if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
|
|
goto out;
|
|
|
|
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
|
|
np = &nht->hash_buckets[i];
|
|
|
|
while ((n = rcu_dereference_protected(*np,
|
|
lockdep_is_held(&tbl->lock))) != NULL) {
|
|
unsigned int state;
|
|
|
|
write_lock(&n->lock);
|
|
|
|
state = n->nud_state;
|
|
if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
|
|
write_unlock(&n->lock);
|
|
goto next_elt;
|
|
}
|
|
|
|
if (time_before(n->used, n->confirmed))
|
|
n->used = n->confirmed;
|
|
|
|
if (atomic_read(&n->refcnt) == 1 &&
|
|
(state == NUD_FAILED ||
|
|
time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
|
|
*np = n->next;
|
|
n->dead = 1;
|
|
write_unlock(&n->lock);
|
|
neigh_cleanup_and_release(n);
|
|
continue;
|
|
}
|
|
write_unlock(&n->lock);
|
|
|
|
next_elt:
|
|
np = &n->next;
|
|
}
|
|
/*
|
|
* It's fine to release lock here, even if hash table
|
|
* grows while we are preempted.
|
|
*/
|
|
write_unlock_bh(&tbl->lock);
|
|
cond_resched();
|
|
write_lock_bh(&tbl->lock);
|
|
nht = rcu_dereference_protected(tbl->nht,
|
|
lockdep_is_held(&tbl->lock));
|
|
}
|
|
out:
|
|
/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
|
|
* ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
|
|
* BASE_REACHABLE_TIME.
|
|
*/
|
|
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
|
|
NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
|
|
write_unlock_bh(&tbl->lock);
|
|
}
|
|
|
|
static __inline__ int neigh_max_probes(struct neighbour *n)
|
|
{
|
|
struct neigh_parms *p = n->parms;
|
|
return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
|
|
(n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
|
|
NEIGH_VAR(p, MCAST_PROBES));
|
|
}
|
|
|
|
static void neigh_invalidate(struct neighbour *neigh)
|
|
__releases(neigh->lock)
|
|
__acquires(neigh->lock)
|
|
{
|
|
struct sk_buff *skb;
|
|
|
|
NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
|
|
neigh_dbg(2, "neigh %p is failed\n", neigh);
|
|
neigh->updated = jiffies;
|
|
|
|
/* It is very thin place. report_unreachable is very complicated
|
|
routine. Particularly, it can hit the same neighbour entry!
|
|
|
|
So that, we try to be accurate and avoid dead loop. --ANK
|
|
*/
|
|
while (neigh->nud_state == NUD_FAILED &&
|
|
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
|
|
write_unlock(&neigh->lock);
|
|
neigh->ops->error_report(neigh, skb);
|
|
write_lock(&neigh->lock);
|
|
}
|
|
__skb_queue_purge(&neigh->arp_queue);
|
|
neigh->arp_queue_len_bytes = 0;
|
|
}
|
|
|
|
static void neigh_probe(struct neighbour *neigh)
|
|
__releases(neigh->lock)
|
|
{
|
|
struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
|
|
/* keep skb alive even if arp_queue overflows */
|
|
if (skb)
|
|
skb = skb_clone(skb, GFP_ATOMIC);
|
|
write_unlock(&neigh->lock);
|
|
neigh->ops->solicit(neigh, skb);
|
|
atomic_inc(&neigh->probes);
|
|
kfree_skb(skb);
|
|
}
|
|
|
|
/* Called when a timer expires for a neighbour entry. */
|
|
|
|
static void neigh_timer_handler(unsigned long arg)
|
|
{
|
|
unsigned long now, next;
|
|
struct neighbour *neigh = (struct neighbour *)arg;
|
|
unsigned int state;
|
|
int notify = 0;
|
|
|
|
write_lock(&neigh->lock);
|
|
|
|
state = neigh->nud_state;
|
|
now = jiffies;
|
|
next = now + HZ;
|
|
|
|
if (!(state & NUD_IN_TIMER))
|
|
goto out;
|
|
|
|
if (state & NUD_REACHABLE) {
|
|
if (time_before_eq(now,
|
|
neigh->confirmed + neigh->parms->reachable_time)) {
|
|
neigh_dbg(2, "neigh %p is still alive\n", neigh);
|
|
next = neigh->confirmed + neigh->parms->reachable_time;
|
|
} else if (time_before_eq(now,
|
|
neigh->used +
|
|
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
|
|
neigh_dbg(2, "neigh %p is delayed\n", neigh);
|
|
neigh->nud_state = NUD_DELAY;
|
|
neigh->updated = jiffies;
|
|
neigh_suspect(neigh);
|
|
next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
|
|
} else {
|
|
neigh_dbg(2, "neigh %p is suspected\n", neigh);
|
|
neigh->nud_state = NUD_STALE;
|
|
neigh->updated = jiffies;
|
|
neigh_suspect(neigh);
|
|
notify = 1;
|
|
}
|
|
} else if (state & NUD_DELAY) {
|
|
if (time_before_eq(now,
|
|
neigh->confirmed +
|
|
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
|
|
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
|
|
neigh->nud_state = NUD_REACHABLE;
|
|
neigh->updated = jiffies;
|
|
neigh_connect(neigh);
|
|
notify = 1;
|
|
next = neigh->confirmed + neigh->parms->reachable_time;
|
|
} else {
|
|
neigh_dbg(2, "neigh %p is probed\n", neigh);
|
|
neigh->nud_state = NUD_PROBE;
|
|
neigh->updated = jiffies;
|
|
atomic_set(&neigh->probes, 0);
|
|
notify = 1;
|
|
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
|
|
}
|
|
} else {
|
|
/* NUD_PROBE|NUD_INCOMPLETE */
|
|
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
|
|
}
|
|
|
|
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
|
|
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
|
|
neigh->nud_state = NUD_FAILED;
|
|
notify = 1;
|
|
neigh_invalidate(neigh);
|
|
goto out;
|
|
}
|
|
|
|
if (neigh->nud_state & NUD_IN_TIMER) {
|
|
if (time_before(next, jiffies + HZ/2))
|
|
next = jiffies + HZ/2;
|
|
if (!mod_timer(&neigh->timer, next))
|
|
neigh_hold(neigh);
|
|
}
|
|
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
|
|
neigh_probe(neigh);
|
|
} else {
|
|
out:
|
|
write_unlock(&neigh->lock);
|
|
}
|
|
|
|
if (notify)
|
|
neigh_update_notify(neigh);
|
|
|
|
neigh_release(neigh);
|
|
}
|
|
|
|
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
|
|
{
|
|
int rc;
|
|
bool immediate_probe = false;
|
|
|
|
write_lock_bh(&neigh->lock);
|
|
|
|
rc = 0;
|
|
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
|
|
goto out_unlock_bh;
|
|
if (neigh->dead)
|
|
goto out_dead;
|
|
|
|
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
|
|
if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
|
|
NEIGH_VAR(neigh->parms, APP_PROBES)) {
|
|
unsigned long next, now = jiffies;
|
|
|
|
atomic_set(&neigh->probes,
|
|
NEIGH_VAR(neigh->parms, UCAST_PROBES));
|
|
neigh->nud_state = NUD_INCOMPLETE;
|
|
neigh->updated = now;
|
|
next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
|
|
HZ/2);
|
|
neigh_add_timer(neigh, next);
|
|
immediate_probe = true;
|
|
} else {
|
|
neigh->nud_state = NUD_FAILED;
|
|
neigh->updated = jiffies;
|
|
write_unlock_bh(&neigh->lock);
|
|
|
|
kfree_skb(skb);
|
|
return 1;
|
|
}
|
|
} else if (neigh->nud_state & NUD_STALE) {
|
|
neigh_dbg(2, "neigh %p is delayed\n", neigh);
|
|
neigh->nud_state = NUD_DELAY;
|
|
neigh->updated = jiffies;
|
|
neigh_add_timer(neigh, jiffies +
|
|
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
|
|
}
|
|
|
|
if (neigh->nud_state == NUD_INCOMPLETE) {
|
|
if (skb) {
|
|
while (neigh->arp_queue_len_bytes + skb->truesize >
|
|
NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
|
|
struct sk_buff *buff;
|
|
|
|
buff = __skb_dequeue(&neigh->arp_queue);
|
|
if (!buff)
|
|
break;
|
|
neigh->arp_queue_len_bytes -= buff->truesize;
|
|
kfree_skb(buff);
|
|
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
|
|
}
|
|
skb_dst_force(skb);
|
|
__skb_queue_tail(&neigh->arp_queue, skb);
|
|
neigh->arp_queue_len_bytes += skb->truesize;
|
|
}
|
|
rc = 1;
|
|
}
|
|
out_unlock_bh:
|
|
if (immediate_probe)
|
|
neigh_probe(neigh);
|
|
else
|
|
write_unlock(&neigh->lock);
|
|
local_bh_enable();
|
|
return rc;
|
|
|
|
out_dead:
|
|
if (neigh->nud_state & NUD_STALE)
|
|
goto out_unlock_bh;
|
|
write_unlock_bh(&neigh->lock);
|
|
kfree_skb(skb);
|
|
return 1;
|
|
}
|
|
EXPORT_SYMBOL(__neigh_event_send);
|
|
|
|
static void neigh_update_hhs(struct neighbour *neigh)
|
|
{
|
|
struct hh_cache *hh;
|
|
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
|
|
= NULL;
|
|
|
|
if (neigh->dev->header_ops)
|
|
update = neigh->dev->header_ops->cache_update;
|
|
|
|
if (update) {
|
|
hh = &neigh->hh;
|
|
if (hh->hh_len) {
|
|
write_seqlock_bh(&hh->hh_lock);
|
|
update(hh, neigh->dev, neigh->ha);
|
|
write_sequnlock_bh(&hh->hh_lock);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/* Generic update routine.
|
|
-- lladdr is new lladdr or NULL, if it is not supplied.
|
|
-- new is new state.
|
|
-- flags
|
|
NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
|
|
if it is different.
|
|
NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
|
|
lladdr instead of overriding it
|
|
if it is different.
|
|
It also allows to retain current state
|
|
if lladdr is unchanged.
|
|
NEIGH_UPDATE_F_ADMIN means that the change is administrative.
|
|
|
|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
|
|
NTF_ROUTER flag.
|
|
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
|
|
a router.
|
|
|
|
Caller MUST hold reference count on the entry.
|
|
*/
|
|
|
|
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
|
|
u32 flags)
|
|
{
|
|
u8 old;
|
|
int err;
|
|
int notify = 0;
|
|
struct net_device *dev;
|
|
int update_isrouter = 0;
|
|
|
|
write_lock_bh(&neigh->lock);
|
|
|
|
dev = neigh->dev;
|
|
old = neigh->nud_state;
|
|
err = -EPERM;
|
|
|
|
if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
|
|
(old & (NUD_NOARP | NUD_PERMANENT)))
|
|
goto out;
|
|
if (neigh->dead)
|
|
goto out;
|
|
|
|
if (!(new & NUD_VALID)) {
|
|
neigh_del_timer(neigh);
|
|
if (old & NUD_CONNECTED)
|
|
neigh_suspect(neigh);
|
|
neigh->nud_state = new;
|
|
err = 0;
|
|
notify = old & NUD_VALID;
|
|
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
|
|
(new & NUD_FAILED)) {
|
|
neigh_invalidate(neigh);
|
|
notify = 1;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
/* Compare new lladdr with cached one */
|
|
if (!dev->addr_len) {
|
|
/* First case: device needs no address. */
|
|
lladdr = neigh->ha;
|
|
} else if (lladdr) {
|
|
/* The second case: if something is already cached
|
|
and a new address is proposed:
|
|
- compare new & old
|
|
- if they are different, check override flag
|
|
*/
|
|
if ((old & NUD_VALID) &&
|
|
!memcmp(lladdr, neigh->ha, dev->addr_len))
|
|
lladdr = neigh->ha;
|
|
} else {
|
|
/* No address is supplied; if we know something,
|
|
use it, otherwise discard the request.
|
|
*/
|
|
err = -EINVAL;
|
|
if (!(old & NUD_VALID))
|
|
goto out;
|
|
lladdr = neigh->ha;
|
|
}
|
|
|
|
if (new & NUD_CONNECTED)
|
|
neigh->confirmed = jiffies;
|
|
neigh->updated = jiffies;
|
|
|
|
/* If entry was valid and address is not changed,
|
|
do not change entry state, if new one is STALE.
|
|
*/
|
|
err = 0;
|
|
update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
|
|
if (old & NUD_VALID) {
|
|
if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
|
|
update_isrouter = 0;
|
|
if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
|
|
(old & NUD_CONNECTED)) {
|
|
lladdr = neigh->ha;
|
|
new = NUD_STALE;
|
|
} else
|
|
goto out;
|
|
} else {
|
|
if (lladdr == neigh->ha && new == NUD_STALE &&
|
|
((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
|
|
(old & NUD_CONNECTED))
|
|
)
|
|
new = old;
|
|
}
|
|
}
|
|
|
|
if (new != old) {
|
|
neigh_del_timer(neigh);
|
|
if (new & NUD_PROBE)
|
|
atomic_set(&neigh->probes, 0);
|
|
if (new & NUD_IN_TIMER)
|
|
neigh_add_timer(neigh, (jiffies +
|
|
((new & NUD_REACHABLE) ?
|
|
neigh->parms->reachable_time :
|
|
0)));
|
|
neigh->nud_state = new;
|
|
notify = 1;
|
|
}
|
|
|
|
if (lladdr != neigh->ha) {
|
|
write_seqlock(&neigh->ha_lock);
|
|
memcpy(&neigh->ha, lladdr, dev->addr_len);
|
|
write_sequnlock(&neigh->ha_lock);
|
|
neigh_update_hhs(neigh);
|
|
if (!(new & NUD_CONNECTED))
|
|
neigh->confirmed = jiffies -
|
|
(NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
|
|
notify = 1;
|
|
}
|
|
if (new == old)
|
|
goto out;
|
|
if (new & NUD_CONNECTED)
|
|
neigh_connect(neigh);
|
|
else
|
|
neigh_suspect(neigh);
|
|
if (!(old & NUD_VALID)) {
|
|
struct sk_buff *skb;
|
|
|
|
/* Again: avoid dead loop if something went wrong */
|
|
|
|
while (neigh->nud_state & NUD_VALID &&
|
|
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
|
|
struct dst_entry *dst = skb_dst(skb);
|
|
struct neighbour *n2, *n1 = neigh;
|
|
write_unlock_bh(&neigh->lock);
|
|
|
|
rcu_read_lock();
|
|
|
|
/* Why not just use 'neigh' as-is? The problem is that
|
|
* things such as shaper, eql, and sch_teql can end up
|
|
* using alternative, different, neigh objects to output
|
|
* the packet in the output path. So what we need to do
|
|
* here is re-lookup the top-level neigh in the path so
|
|
* we can reinject the packet there.
|
|
*/
|
|
n2 = NULL;
|
|
if (dst) {
|
|
n2 = dst_neigh_lookup_skb(dst, skb);
|
|
if (n2)
|
|
n1 = n2;
|
|
}
|
|
n1->output(n1, skb);
|
|
if (n2)
|
|
neigh_release(n2);
|
|
rcu_read_unlock();
|
|
|
|
write_lock_bh(&neigh->lock);
|
|
}
|
|
__skb_queue_purge(&neigh->arp_queue);
|
|
neigh->arp_queue_len_bytes = 0;
|
|
}
|
|
out:
|
|
if (update_isrouter) {
|
|
neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
|
|
(neigh->flags | NTF_ROUTER) :
|
|
(neigh->flags & ~NTF_ROUTER);
|
|
}
|
|
write_unlock_bh(&neigh->lock);
|
|
|
|
if (notify)
|
|
neigh_update_notify(neigh);
|
|
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(neigh_update);
|
|
|
|
/* Update the neigh to listen temporarily for probe responses, even if it is
|
|
* in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
|
|
*/
|
|
void __neigh_set_probe_once(struct neighbour *neigh)
|
|
{
|
|
if (neigh->dead)
|
|
return;
|
|
neigh->updated = jiffies;
|
|
if (!(neigh->nud_state & NUD_FAILED))
|
|
return;
|
|
neigh->nud_state = NUD_INCOMPLETE;
|
|
atomic_set(&neigh->probes, neigh_max_probes(neigh));
|
|
neigh_add_timer(neigh,
|
|
jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
|
|
}
|
|
EXPORT_SYMBOL(__neigh_set_probe_once);
|
|
|
|
struct neighbour *neigh_event_ns(struct neigh_table *tbl,
|
|
u8 *lladdr, void *saddr,
|
|
struct net_device *dev)
|
|
{
|
|
struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
|
|
lladdr || !dev->addr_len);
|
|
if (neigh)
|
|
neigh_update(neigh, lladdr, NUD_STALE,
|
|
NEIGH_UPDATE_F_OVERRIDE);
|
|
return neigh;
|
|
}
|
|
EXPORT_SYMBOL(neigh_event_ns);
|
|
|
|
/* called with read_lock_bh(&n->lock); */
|
|
static void neigh_hh_init(struct neighbour *n)
|
|
{
|
|
struct net_device *dev = n->dev;
|
|
__be16 prot = n->tbl->protocol;
|
|
struct hh_cache *hh = &n->hh;
|
|
|
|
write_lock_bh(&n->lock);
|
|
|
|
/* Only one thread can come in here and initialize the
|
|
* hh_cache entry.
|
|
*/
|
|
if (!hh->hh_len)
|
|
dev->header_ops->cache(n, hh, prot);
|
|
|
|
write_unlock_bh(&n->lock);
|
|
}
|
|
|
|
/* Slow and careful. */
|
|
|
|
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
|
|
{
|
|
int rc = 0;
|
|
|
|
if (!neigh_event_send(neigh, skb)) {
|
|
int err;
|
|
struct net_device *dev = neigh->dev;
|
|
unsigned int seq;
|
|
|
|
if (dev->header_ops->cache && !neigh->hh.hh_len)
|
|
neigh_hh_init(neigh);
|
|
|
|
do {
|
|
__skb_pull(skb, skb_network_offset(skb));
|
|
seq = read_seqbegin(&neigh->ha_lock);
|
|
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
|
|
neigh->ha, NULL, skb->len);
|
|
} while (read_seqretry(&neigh->ha_lock, seq));
|
|
|
|
if (err >= 0)
|
|
rc = dev_queue_xmit(skb);
|
|
else
|
|
goto out_kfree_skb;
|
|
}
|
|
out:
|
|
return rc;
|
|
out_kfree_skb:
|
|
rc = -EINVAL;
|
|
kfree_skb(skb);
|
|
goto out;
|
|
}
|
|
EXPORT_SYMBOL(neigh_resolve_output);
|
|
|
|
/* As fast as possible without hh cache */
|
|
|
|
int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
|
|
{
|
|
struct net_device *dev = neigh->dev;
|
|
unsigned int seq;
|
|
int err;
|
|
|
|
do {
|
|
__skb_pull(skb, skb_network_offset(skb));
|
|
seq = read_seqbegin(&neigh->ha_lock);
|
|
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
|
|
neigh->ha, NULL, skb->len);
|
|
} while (read_seqretry(&neigh->ha_lock, seq));
|
|
|
|
if (err >= 0)
|
|
err = dev_queue_xmit(skb);
|
|
else {
|
|
err = -EINVAL;
|
|
kfree_skb(skb);
|
|
}
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(neigh_connected_output);
|
|
|
|
int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
|
|
{
|
|
return dev_queue_xmit(skb);
|
|
}
|
|
EXPORT_SYMBOL(neigh_direct_output);
|
|
|
|
static void neigh_proxy_process(unsigned long arg)
|
|
{
|
|
struct neigh_table *tbl = (struct neigh_table *)arg;
|
|
long sched_next = 0;
|
|
unsigned long now = jiffies;
|
|
struct sk_buff *skb, *n;
|
|
|
|
spin_lock(&tbl->proxy_queue.lock);
|
|
|
|
skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
|
|
long tdif = NEIGH_CB(skb)->sched_next - now;
|
|
|
|
if (tdif <= 0) {
|
|
struct net_device *dev = skb->dev;
|
|
|
|
__skb_unlink(skb, &tbl->proxy_queue);
|
|
if (tbl->proxy_redo && netif_running(dev)) {
|
|
rcu_read_lock();
|
|
tbl->proxy_redo(skb);
|
|
rcu_read_unlock();
|
|
} else {
|
|
kfree_skb(skb);
|
|
}
|
|
|
|
dev_put(dev);
|
|
} else if (!sched_next || tdif < sched_next)
|
|
sched_next = tdif;
|
|
}
|
|
del_timer(&tbl->proxy_timer);
|
|
if (sched_next)
|
|
mod_timer(&tbl->proxy_timer, jiffies + sched_next);
|
|
spin_unlock(&tbl->proxy_queue.lock);
|
|
}
|
|
|
|
void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
|
|
struct sk_buff *skb)
|
|
{
|
|
unsigned long now = jiffies;
|
|
|
|
unsigned long sched_next = now + (prandom_u32() %
|
|
NEIGH_VAR(p, PROXY_DELAY));
|
|
|
|
if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
|
|
kfree_skb(skb);
|
|
return;
|
|
}
|
|
|
|
NEIGH_CB(skb)->sched_next = sched_next;
|
|
NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
|
|
|
|
spin_lock(&tbl->proxy_queue.lock);
|
|
if (del_timer(&tbl->proxy_timer)) {
|
|
if (time_before(tbl->proxy_timer.expires, sched_next))
|
|
sched_next = tbl->proxy_timer.expires;
|
|
}
|
|
skb_dst_drop(skb);
|
|
dev_hold(skb->dev);
|
|
__skb_queue_tail(&tbl->proxy_queue, skb);
|
|
mod_timer(&tbl->proxy_timer, sched_next);
|
|
spin_unlock(&tbl->proxy_queue.lock);
|
|
}
|
|
EXPORT_SYMBOL(pneigh_enqueue);
|
|
|
|
static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
|
|
struct net *net, int ifindex)
|
|
{
|
|
struct neigh_parms *p;
|
|
|
|
list_for_each_entry(p, &tbl->parms_list, list) {
|
|
if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
|
|
(!p->dev && !ifindex && net_eq(net, &init_net)))
|
|
return p;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
|
|
struct neigh_table *tbl)
|
|
{
|
|
struct neigh_parms *p;
|
|
struct net *net = dev_net(dev);
|
|
const struct net_device_ops *ops = dev->netdev_ops;
|
|
|
|
p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
|
|
if (p) {
|
|
p->tbl = tbl;
|
|
atomic_set(&p->refcnt, 1);
|
|
p->reachable_time =
|
|
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
|
|
dev_hold(dev);
|
|
p->dev = dev;
|
|
write_pnet(&p->net, net);
|
|
p->sysctl_table = NULL;
|
|
|
|
if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
|
|
dev_put(dev);
|
|
kfree(p);
|
|
return NULL;
|
|
}
|
|
|
|
write_lock_bh(&tbl->lock);
|
|
list_add(&p->list, &tbl->parms.list);
|
|
write_unlock_bh(&tbl->lock);
|
|
|
|
neigh_parms_data_state_cleanall(p);
|
|
}
|
|
return p;
|
|
}
|
|
EXPORT_SYMBOL(neigh_parms_alloc);
|
|
|
|
static void neigh_rcu_free_parms(struct rcu_head *head)
|
|
{
|
|
struct neigh_parms *parms =
|
|
container_of(head, struct neigh_parms, rcu_head);
|
|
|
|
neigh_parms_put(parms);
|
|
}
|
|
|
|
void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
|
|
{
|
|
if (!parms || parms == &tbl->parms)
|
|
return;
|
|
write_lock_bh(&tbl->lock);
|
|
list_del(&parms->list);
|
|
parms->dead = 1;
|
|
write_unlock_bh(&tbl->lock);
|
|
if (parms->dev)
|
|
dev_put(parms->dev);
|
|
call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
|
|
}
|
|
EXPORT_SYMBOL(neigh_parms_release);
|
|
|
|
static void neigh_parms_destroy(struct neigh_parms *parms)
|
|
{
|
|
kfree(parms);
|
|
}
|
|
|
|
static struct lock_class_key neigh_table_proxy_queue_class;
|
|
|
|
static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
|
|
|
|
void neigh_table_init(int index, struct neigh_table *tbl)
|
|
{
|
|
unsigned long now = jiffies;
|
|
unsigned long phsize;
|
|
|
|
INIT_LIST_HEAD(&tbl->parms_list);
|
|
list_add(&tbl->parms.list, &tbl->parms_list);
|
|
write_pnet(&tbl->parms.net, &init_net);
|
|
atomic_set(&tbl->parms.refcnt, 1);
|
|
tbl->parms.reachable_time =
|
|
neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
|
|
|
|
tbl->stats = alloc_percpu(struct neigh_statistics);
|
|
if (!tbl->stats)
|
|
panic("cannot create neighbour cache statistics");
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
|
|
&neigh_stat_seq_fops, tbl))
|
|
panic("cannot create neighbour proc dir entry");
|
|
#endif
|
|
|
|
RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
|
|
|
|
phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
|
|
tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
|
|
|
|
if (!tbl->nht || !tbl->phash_buckets)
|
|
panic("cannot allocate neighbour cache hashes");
|
|
|
|
if (!tbl->entry_size)
|
|
tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
|
|
tbl->key_len, NEIGH_PRIV_ALIGN);
|
|
else
|
|
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
|
|
|
|
rwlock_init(&tbl->lock);
|
|
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
|
|
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
|
|
tbl->parms.reachable_time);
|
|
setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
|
|
skb_queue_head_init_class(&tbl->proxy_queue,
|
|
&neigh_table_proxy_queue_class);
|
|
|
|
tbl->last_flush = now;
|
|
tbl->last_rand = now + tbl->parms.reachable_time * 20;
|
|
|
|
neigh_tables[index] = tbl;
|
|
}
|
|
EXPORT_SYMBOL(neigh_table_init);
|
|
|
|
int neigh_table_clear(int index, struct neigh_table *tbl)
|
|
{
|
|
neigh_tables[index] = NULL;
|
|
/* It is not clean... Fix it to unload IPv6 module safely */
|
|
cancel_delayed_work_sync(&tbl->gc_work);
|
|
del_timer_sync(&tbl->proxy_timer);
|
|
pneigh_queue_purge(&tbl->proxy_queue);
|
|
neigh_ifdown(tbl, NULL);
|
|
if (atomic_read(&tbl->entries))
|
|
pr_crit("neighbour leakage\n");
|
|
|
|
call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
|
|
neigh_hash_free_rcu);
|
|
tbl->nht = NULL;
|
|
|
|
kfree(tbl->phash_buckets);
|
|
tbl->phash_buckets = NULL;
|
|
|
|
remove_proc_entry(tbl->id, init_net.proc_net_stat);
|
|
|
|
free_percpu(tbl->stats);
|
|
tbl->stats = NULL;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(neigh_table_clear);
|
|
|
|
static struct neigh_table *neigh_find_table(int family)
|
|
{
|
|
struct neigh_table *tbl = NULL;
|
|
|
|
switch (family) {
|
|
case AF_INET:
|
|
tbl = neigh_tables[NEIGH_ARP_TABLE];
|
|
break;
|
|
case AF_INET6:
|
|
tbl = neigh_tables[NEIGH_ND_TABLE];
|
|
break;
|
|
case AF_DECnet:
|
|
tbl = neigh_tables[NEIGH_DN_TABLE];
|
|
break;
|
|
}
|
|
|
|
return tbl;
|
|
}
|
|
|
|
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
|
|
{
|
|
struct net *net = sock_net(skb->sk);
|
|
struct ndmsg *ndm;
|
|
struct nlattr *dst_attr;
|
|
struct neigh_table *tbl;
|
|
struct neighbour *neigh;
|
|
struct net_device *dev = NULL;
|
|
int err = -EINVAL;
|
|
|
|
ASSERT_RTNL();
|
|
if (nlmsg_len(nlh) < sizeof(*ndm))
|
|
goto out;
|
|
|
|
dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
|
|
if (dst_attr == NULL)
|
|
goto out;
|
|
|
|
ndm = nlmsg_data(nlh);
|
|
if (ndm->ndm_ifindex) {
|
|
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
|
|
if (dev == NULL) {
|
|
err = -ENODEV;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
tbl = neigh_find_table(ndm->ndm_family);
|
|
if (tbl == NULL)
|
|
return -EAFNOSUPPORT;
|
|
|
|
if (nla_len(dst_attr) < tbl->key_len)
|
|
goto out;
|
|
|
|
if (ndm->ndm_flags & NTF_PROXY) {
|
|
err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
|
|
goto out;
|
|
}
|
|
|
|
if (dev == NULL)
|
|
goto out;
|
|
|
|
neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
|
|
if (neigh == NULL) {
|
|
err = -ENOENT;
|
|
goto out;
|
|
}
|
|
|
|
err = neigh_update(neigh, NULL, NUD_FAILED,
|
|
NEIGH_UPDATE_F_OVERRIDE |
|
|
NEIGH_UPDATE_F_ADMIN);
|
|
neigh_release(neigh);
|
|
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
|
|
{
|
|
int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
|
|
struct net *net = sock_net(skb->sk);
|
|
struct ndmsg *ndm;
|
|
struct nlattr *tb[NDA_MAX+1];
|
|
struct neigh_table *tbl;
|
|
struct net_device *dev = NULL;
|
|
struct neighbour *neigh;
|
|
void *dst, *lladdr;
|
|
int err;
|
|
|
|
ASSERT_RTNL();
|
|
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
|
|
if (err < 0)
|
|
goto out;
|
|
|
|
err = -EINVAL;
|
|
if (tb[NDA_DST] == NULL)
|
|
goto out;
|
|
|
|
ndm = nlmsg_data(nlh);
|
|
if (ndm->ndm_ifindex) {
|
|
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
|
|
if (dev == NULL) {
|
|
err = -ENODEV;
|
|
goto out;
|
|
}
|
|
|
|
if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
|
|
goto out;
|
|
}
|
|
|
|
tbl = neigh_find_table(ndm->ndm_family);
|
|
if (tbl == NULL)
|
|
return -EAFNOSUPPORT;
|
|
|
|
if (nla_len(tb[NDA_DST]) < tbl->key_len)
|
|
goto out;
|
|
dst = nla_data(tb[NDA_DST]);
|
|
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
|
|
|
|
if (ndm->ndm_flags & NTF_PROXY) {
|
|
struct pneigh_entry *pn;
|
|
|
|
err = -ENOBUFS;
|
|
pn = pneigh_lookup(tbl, net, dst, dev, 1);
|
|
if (pn) {
|
|
pn->flags = ndm->ndm_flags;
|
|
err = 0;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
if (dev == NULL)
|
|
goto out;
|
|
|
|
neigh = neigh_lookup(tbl, dst, dev);
|
|
if (neigh == NULL) {
|
|
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
|
|
err = -ENOENT;
|
|
goto out;
|
|
}
|
|
|
|
neigh = __neigh_lookup_errno(tbl, dst, dev);
|
|
if (IS_ERR(neigh)) {
|
|
err = PTR_ERR(neigh);
|
|
goto out;
|
|
}
|
|
} else {
|
|
if (nlh->nlmsg_flags & NLM_F_EXCL) {
|
|
err = -EEXIST;
|
|
neigh_release(neigh);
|
|
goto out;
|
|
}
|
|
|
|
if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
|
|
flags &= ~NEIGH_UPDATE_F_OVERRIDE;
|
|
}
|
|
|
|
if (ndm->ndm_flags & NTF_USE) {
|
|
neigh_event_send(neigh, NULL);
|
|
err = 0;
|
|
} else
|
|
err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
|
|
neigh_release(neigh);
|
|
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
|
|
{
|
|
struct nlattr *nest;
|
|
|
|
nest = nla_nest_start(skb, NDTA_PARMS);
|
|
if (nest == NULL)
|
|
return -ENOBUFS;
|
|
|
|
if ((parms->dev &&
|
|
nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
|
|
nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
|
|
nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
|
|
NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
|
|
/* approximative value for deprecated QUEUE_LEN (in packets) */
|
|
nla_put_u32(skb, NDTPA_QUEUE_LEN,
|
|
NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
|
|
nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
|
|
nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
|
|
nla_put_u32(skb, NDTPA_UCAST_PROBES,
|
|
NEIGH_VAR(parms, UCAST_PROBES)) ||
|
|
nla_put_u32(skb, NDTPA_MCAST_PROBES,
|
|
NEIGH_VAR(parms, MCAST_PROBES)) ||
|
|
nla_put_u32(skb, NDTPA_MCAST_REPROBES,
|
|
NEIGH_VAR(parms, MCAST_REPROBES)) ||
|
|
nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
|
|
nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
|
|
NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
|
|
nla_put_msecs(skb, NDTPA_GC_STALETIME,
|
|
NEIGH_VAR(parms, GC_STALETIME)) ||
|
|
nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
|
|
NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
|
|
nla_put_msecs(skb, NDTPA_RETRANS_TIME,
|
|
NEIGH_VAR(parms, RETRANS_TIME)) ||
|
|
nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
|
|
NEIGH_VAR(parms, ANYCAST_DELAY)) ||
|
|
nla_put_msecs(skb, NDTPA_PROXY_DELAY,
|
|
NEIGH_VAR(parms, PROXY_DELAY)) ||
|
|
nla_put_msecs(skb, NDTPA_LOCKTIME,
|
|
NEIGH_VAR(parms, LOCKTIME)))
|
|
goto nla_put_failure;
|
|
return nla_nest_end(skb, nest);
|
|
|
|
nla_put_failure:
|
|
nla_nest_cancel(skb, nest);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
|
|
u32 pid, u32 seq, int type, int flags)
|
|
{
|
|
struct nlmsghdr *nlh;
|
|
struct ndtmsg *ndtmsg;
|
|
|
|
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
|
|
if (nlh == NULL)
|
|
return -EMSGSIZE;
|
|
|
|
ndtmsg = nlmsg_data(nlh);
|
|
|
|
read_lock_bh(&tbl->lock);
|
|
ndtmsg->ndtm_family = tbl->family;
|
|
ndtmsg->ndtm_pad1 = 0;
|
|
ndtmsg->ndtm_pad2 = 0;
|
|
|
|
if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
|
|
nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
|
|
nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
|
|
nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
|
|
nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
|
|
goto nla_put_failure;
|
|
{
|
|
unsigned long now = jiffies;
|
|
unsigned int flush_delta = now - tbl->last_flush;
|
|
unsigned int rand_delta = now - tbl->last_rand;
|
|
struct neigh_hash_table *nht;
|
|
struct ndt_config ndc = {
|
|
.ndtc_key_len = tbl->key_len,
|
|
.ndtc_entry_size = tbl->entry_size,
|
|
.ndtc_entries = atomic_read(&tbl->entries),
|
|
.ndtc_last_flush = jiffies_to_msecs(flush_delta),
|
|
.ndtc_last_rand = jiffies_to_msecs(rand_delta),
|
|
.ndtc_proxy_qlen = tbl->proxy_queue.qlen,
|
|
};
|
|
|
|
rcu_read_lock_bh();
|
|
nht = rcu_dereference_bh(tbl->nht);
|
|
ndc.ndtc_hash_rnd = nht->hash_rnd[0];
|
|
ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
|
|
rcu_read_unlock_bh();
|
|
|
|
if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
|
|
goto nla_put_failure;
|
|
}
|
|
|
|
{
|
|
int cpu;
|
|
struct ndt_stats ndst;
|
|
|
|
memset(&ndst, 0, sizeof(ndst));
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
struct neigh_statistics *st;
|
|
|
|
st = per_cpu_ptr(tbl->stats, cpu);
|
|
ndst.ndts_allocs += st->allocs;
|
|
ndst.ndts_destroys += st->destroys;
|
|
ndst.ndts_hash_grows += st->hash_grows;
|
|
ndst.ndts_res_failed += st->res_failed;
|
|
ndst.ndts_lookups += st->lookups;
|
|
ndst.ndts_hits += st->hits;
|
|
ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
|
|
ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
|
|
ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
|
|
ndst.ndts_forced_gc_runs += st->forced_gc_runs;
|
|
ndst.ndts_table_fulls += st->table_fulls;
|
|
}
|
|
|
|
if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
|
|
goto nla_put_failure;
|
|
}
|
|
|
|
BUG_ON(tbl->parms.dev);
|
|
if (neightbl_fill_parms(skb, &tbl->parms) < 0)
|
|
goto nla_put_failure;
|
|
|
|
read_unlock_bh(&tbl->lock);
|
|
nlmsg_end(skb, nlh);
|
|
return 0;
|
|
|
|
nla_put_failure:
|
|
read_unlock_bh(&tbl->lock);
|
|
nlmsg_cancel(skb, nlh);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static int neightbl_fill_param_info(struct sk_buff *skb,
|
|
struct neigh_table *tbl,
|
|
struct neigh_parms *parms,
|
|
u32 pid, u32 seq, int type,
|
|
unsigned int flags)
|
|
{
|
|
struct ndtmsg *ndtmsg;
|
|
struct nlmsghdr *nlh;
|
|
|
|
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
|
|
if (nlh == NULL)
|
|
return -EMSGSIZE;
|
|
|
|
ndtmsg = nlmsg_data(nlh);
|
|
|
|
read_lock_bh(&tbl->lock);
|
|
ndtmsg->ndtm_family = tbl->family;
|
|
ndtmsg->ndtm_pad1 = 0;
|
|
ndtmsg->ndtm_pad2 = 0;
|
|
|
|
if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
|
|
neightbl_fill_parms(skb, parms) < 0)
|
|
goto errout;
|
|
|
|
read_unlock_bh(&tbl->lock);
|
|
nlmsg_end(skb, nlh);
|
|
return 0;
|
|
errout:
|
|
read_unlock_bh(&tbl->lock);
|
|
nlmsg_cancel(skb, nlh);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
|
|
[NDTA_NAME] = { .type = NLA_STRING },
|
|
[NDTA_THRESH1] = { .type = NLA_U32 },
|
|
[NDTA_THRESH2] = { .type = NLA_U32 },
|
|
[NDTA_THRESH3] = { .type = NLA_U32 },
|
|
[NDTA_GC_INTERVAL] = { .type = NLA_U64 },
|
|
[NDTA_PARMS] = { .type = NLA_NESTED },
|
|
};
|
|
|
|
static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
|
|
[NDTPA_IFINDEX] = { .type = NLA_U32 },
|
|
[NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
|
|
[NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
|
|
[NDTPA_APP_PROBES] = { .type = NLA_U32 },
|
|
[NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
|
|
[NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
|
|
[NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
|
|
[NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
|
|
[NDTPA_GC_STALETIME] = { .type = NLA_U64 },
|
|
[NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
|
|
[NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
|
|
[NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
|
|
[NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
|
|
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
|
|
};
|
|
|
|
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
|
|
{
|
|
struct net *net = sock_net(skb->sk);
|
|
struct neigh_table *tbl;
|
|
struct ndtmsg *ndtmsg;
|
|
struct nlattr *tb[NDTA_MAX+1];
|
|
bool found = false;
|
|
int err, tidx;
|
|
|
|
err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
|
|
nl_neightbl_policy);
|
|
if (err < 0)
|
|
goto errout;
|
|
|
|
if (tb[NDTA_NAME] == NULL) {
|
|
err = -EINVAL;
|
|
goto errout;
|
|
}
|
|
|
|
ndtmsg = nlmsg_data(nlh);
|
|
|
|
for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
|
|
tbl = neigh_tables[tidx];
|
|
if (!tbl)
|
|
continue;
|
|
if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
|
|
continue;
|
|
if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!found)
|
|
return -ENOENT;
|
|
|
|
/*
|
|
* We acquire tbl->lock to be nice to the periodic timers and
|
|
* make sure they always see a consistent set of values.
|
|
*/
|
|
write_lock_bh(&tbl->lock);
|
|
|
|
if (tb[NDTA_PARMS]) {
|
|
struct nlattr *tbp[NDTPA_MAX+1];
|
|
struct neigh_parms *p;
|
|
int i, ifindex = 0;
|
|
|
|
err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
|
|
nl_ntbl_parm_policy);
|
|
if (err < 0)
|
|
goto errout_tbl_lock;
|
|
|
|
if (tbp[NDTPA_IFINDEX])
|
|
ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
|
|
|
|
p = lookup_neigh_parms(tbl, net, ifindex);
|
|
if (p == NULL) {
|
|
err = -ENOENT;
|
|
goto errout_tbl_lock;
|
|
}
|
|
|
|
for (i = 1; i <= NDTPA_MAX; i++) {
|
|
if (tbp[i] == NULL)
|
|
continue;
|
|
|
|
switch (i) {
|
|
case NDTPA_QUEUE_LEN:
|
|
NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
|
|
nla_get_u32(tbp[i]) *
|
|
SKB_TRUESIZE(ETH_FRAME_LEN));
|
|
break;
|
|
case NDTPA_QUEUE_LENBYTES:
|
|
NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
|
|
nla_get_u32(tbp[i]));
|
|
break;
|
|
case NDTPA_PROXY_QLEN:
|
|
NEIGH_VAR_SET(p, PROXY_QLEN,
|
|
nla_get_u32(tbp[i]));
|
|
break;
|
|
case NDTPA_APP_PROBES:
|
|
NEIGH_VAR_SET(p, APP_PROBES,
|
|
nla_get_u32(tbp[i]));
|
|
break;
|
|
case NDTPA_UCAST_PROBES:
|
|
NEIGH_VAR_SET(p, UCAST_PROBES,
|
|
nla_get_u32(tbp[i]));
|
|
break;
|
|
case NDTPA_MCAST_PROBES:
|
|
NEIGH_VAR_SET(p, MCAST_PROBES,
|
|
nla_get_u32(tbp[i]));
|
|
break;
|
|
case NDTPA_MCAST_REPROBES:
|
|
NEIGH_VAR_SET(p, MCAST_REPROBES,
|
|
nla_get_u32(tbp[i]));
|
|
break;
|
|
case NDTPA_BASE_REACHABLE_TIME:
|
|
NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
|
|
nla_get_msecs(tbp[i]));
|
|
/* update reachable_time as well, otherwise, the change will
|
|
* only be effective after the next time neigh_periodic_work
|
|
* decides to recompute it (can be multiple minutes)
|
|
*/
|
|
p->reachable_time =
|
|
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
|
|
break;
|
|
case NDTPA_GC_STALETIME:
|
|
NEIGH_VAR_SET(p, GC_STALETIME,
|
|
nla_get_msecs(tbp[i]));
|
|
break;
|
|
case NDTPA_DELAY_PROBE_TIME:
|
|
NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
|
|
nla_get_msecs(tbp[i]));
|
|
break;
|
|
case NDTPA_RETRANS_TIME:
|
|
NEIGH_VAR_SET(p, RETRANS_TIME,
|
|
nla_get_msecs(tbp[i]));
|
|
break;
|
|
case NDTPA_ANYCAST_DELAY:
|
|
NEIGH_VAR_SET(p, ANYCAST_DELAY,
|
|
nla_get_msecs(tbp[i]));
|
|
break;
|
|
case NDTPA_PROXY_DELAY:
|
|
NEIGH_VAR_SET(p, PROXY_DELAY,
|
|
nla_get_msecs(tbp[i]));
|
|
break;
|
|
case NDTPA_LOCKTIME:
|
|
NEIGH_VAR_SET(p, LOCKTIME,
|
|
nla_get_msecs(tbp[i]));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
err = -ENOENT;
|
|
if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
|
|
tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
|
|
!net_eq(net, &init_net))
|
|
goto errout_tbl_lock;
|
|
|
|
if (tb[NDTA_THRESH1])
|
|
tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
|
|
|
|
if (tb[NDTA_THRESH2])
|
|
tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
|
|
|
|
if (tb[NDTA_THRESH3])
|
|
tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
|
|
|
|
if (tb[NDTA_GC_INTERVAL])
|
|
tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
|
|
|
|
err = 0;
|
|
|
|
errout_tbl_lock:
|
|
write_unlock_bh(&tbl->lock);
|
|
errout:
|
|
return err;
|
|
}
|
|
|
|
static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
|
|
{
|
|
struct net *net = sock_net(skb->sk);
|
|
int family, tidx, nidx = 0;
|
|
int tbl_skip = cb->args[0];
|
|
int neigh_skip = cb->args[1];
|
|
struct neigh_table *tbl;
|
|
|
|
family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
|
|
|
|
for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
|
|
struct neigh_parms *p;
|
|
|
|
tbl = neigh_tables[tidx];
|
|
if (!tbl)
|
|
continue;
|
|
|
|
if (tidx < tbl_skip || (family && tbl->family != family))
|
|
continue;
|
|
|
|
if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
|
|
cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
|
|
NLM_F_MULTI) < 0)
|
|
break;
|
|
|
|
nidx = 0;
|
|
p = list_next_entry(&tbl->parms, list);
|
|
list_for_each_entry_from(p, &tbl->parms_list, list) {
|
|
if (!net_eq(neigh_parms_net(p), net))
|
|
continue;
|
|
|
|
if (nidx < neigh_skip)
|
|
goto next;
|
|
|
|
if (neightbl_fill_param_info(skb, tbl, p,
|
|
NETLINK_CB(cb->skb).portid,
|
|
cb->nlh->nlmsg_seq,
|
|
RTM_NEWNEIGHTBL,
|
|
NLM_F_MULTI) < 0)
|
|
goto out;
|
|
next:
|
|
nidx++;
|
|
}
|
|
|
|
neigh_skip = 0;
|
|
}
|
|
out:
|
|
cb->args[0] = tidx;
|
|
cb->args[1] = nidx;
|
|
|
|
return skb->len;
|
|
}
|
|
|
|
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
|
|
u32 pid, u32 seq, int type, unsigned int flags)
|
|
{
|
|
unsigned long now = jiffies;
|
|
struct nda_cacheinfo ci;
|
|
struct nlmsghdr *nlh;
|
|
struct ndmsg *ndm;
|
|
|
|
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
|
|
if (nlh == NULL)
|
|
return -EMSGSIZE;
|
|
|
|
ndm = nlmsg_data(nlh);
|
|
ndm->ndm_family = neigh->ops->family;
|
|
ndm->ndm_pad1 = 0;
|
|
ndm->ndm_pad2 = 0;
|
|
ndm->ndm_flags = neigh->flags;
|
|
ndm->ndm_type = neigh->type;
|
|
ndm->ndm_ifindex = neigh->dev->ifindex;
|
|
|
|
if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
|
|
goto nla_put_failure;
|
|
|
|
read_lock_bh(&neigh->lock);
|
|
ndm->ndm_state = neigh->nud_state;
|
|
if (neigh->nud_state & NUD_VALID) {
|
|
char haddr[MAX_ADDR_LEN];
|
|
|
|
neigh_ha_snapshot(haddr, neigh, neigh->dev);
|
|
if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
|
|
read_unlock_bh(&neigh->lock);
|
|
goto nla_put_failure;
|
|
}
|
|
}
|
|
|
|
ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
|
|
ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
|
|
ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
|
|
ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
|
|
read_unlock_bh(&neigh->lock);
|
|
|
|
if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
|
|
nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
|
|
goto nla_put_failure;
|
|
|
|
nlmsg_end(skb, nlh);
|
|
return 0;
|
|
|
|
nla_put_failure:
|
|
nlmsg_cancel(skb, nlh);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
|
|
u32 pid, u32 seq, int type, unsigned int flags,
|
|
struct neigh_table *tbl)
|
|
{
|
|
struct nlmsghdr *nlh;
|
|
struct ndmsg *ndm;
|
|
|
|
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
|
|
if (nlh == NULL)
|
|
return -EMSGSIZE;
|
|
|
|
ndm = nlmsg_data(nlh);
|
|
ndm->ndm_family = tbl->family;
|
|
ndm->ndm_pad1 = 0;
|
|
ndm->ndm_pad2 = 0;
|
|
ndm->ndm_flags = pn->flags | NTF_PROXY;
|
|
ndm->ndm_type = RTN_UNICAST;
|
|
ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
|
|
ndm->ndm_state = NUD_NONE;
|
|
|
|
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
|
|
goto nla_put_failure;
|
|
|
|
nlmsg_end(skb, nlh);
|
|
return 0;
|
|
|
|
nla_put_failure:
|
|
nlmsg_cancel(skb, nlh);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static void neigh_update_notify(struct neighbour *neigh)
|
|
{
|
|
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
|
|
__neigh_notify(neigh, RTM_NEWNEIGH, 0);
|
|
}
|
|
|
|
static bool neigh_master_filtered(struct net_device *dev, int master_idx)
|
|
{
|
|
struct net_device *master;
|
|
|
|
if (!master_idx)
|
|
return false;
|
|
|
|
master = netdev_master_upper_dev_get(dev);
|
|
if (!master || master->ifindex != master_idx)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
|
|
{
|
|
if (filter_idx && dev->ifindex != filter_idx)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
|
|
struct netlink_callback *cb)
|
|
{
|
|
struct net *net = sock_net(skb->sk);
|
|
const struct nlmsghdr *nlh = cb->nlh;
|
|
struct nlattr *tb[NDA_MAX + 1];
|
|
struct neighbour *n;
|
|
int rc, h, s_h = cb->args[1];
|
|
int idx, s_idx = idx = cb->args[2];
|
|
struct neigh_hash_table *nht;
|
|
int filter_master_idx = 0, filter_idx = 0;
|
|
unsigned int flags = NLM_F_MULTI;
|
|
int err;
|
|
|
|
err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
|
|
if (!err) {
|
|
if (tb[NDA_IFINDEX])
|
|
filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
|
|
|
|
if (tb[NDA_MASTER])
|
|
filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
|
|
|
|
if (filter_idx || filter_master_idx)
|
|
flags |= NLM_F_DUMP_FILTERED;
|
|
}
|
|
|
|
rcu_read_lock_bh();
|
|
nht = rcu_dereference_bh(tbl->nht);
|
|
|
|
for (h = s_h; h < (1 << nht->hash_shift); h++) {
|
|
if (h > s_h)
|
|
s_idx = 0;
|
|
for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
|
|
n != NULL;
|
|
n = rcu_dereference_bh(n->next)) {
|
|
if (!net_eq(dev_net(n->dev), net))
|
|
continue;
|
|
if (neigh_ifindex_filtered(n->dev, filter_idx))
|
|
continue;
|
|
if (neigh_master_filtered(n->dev, filter_master_idx))
|
|
continue;
|
|
if (idx < s_idx)
|
|
goto next;
|
|
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
|
|
cb->nlh->nlmsg_seq,
|
|
RTM_NEWNEIGH,
|
|
flags) < 0) {
|
|
rc = -1;
|
|
goto out;
|
|
}
|
|
next:
|
|
idx++;
|
|
}
|
|
}
|
|
rc = skb->len;
|
|
out:
|
|
rcu_read_unlock_bh();
|
|
cb->args[1] = h;
|
|
cb->args[2] = idx;
|
|
return rc;
|
|
}
|
|
|
|
static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
|
|
struct netlink_callback *cb)
|
|
{
|
|
struct pneigh_entry *n;
|
|
struct net *net = sock_net(skb->sk);
|
|
int rc, h, s_h = cb->args[3];
|
|
int idx, s_idx = idx = cb->args[4];
|
|
|
|
read_lock_bh(&tbl->lock);
|
|
|
|
for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
|
|
if (h > s_h)
|
|
s_idx = 0;
|
|
for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
|
|
if (pneigh_net(n) != net)
|
|
continue;
|
|
if (idx < s_idx)
|
|
goto next;
|
|
if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
|
|
cb->nlh->nlmsg_seq,
|
|
RTM_NEWNEIGH,
|
|
NLM_F_MULTI, tbl) < 0) {
|
|
read_unlock_bh(&tbl->lock);
|
|
rc = -1;
|
|
goto out;
|
|
}
|
|
next:
|
|
idx++;
|
|
}
|
|
}
|
|
|
|
read_unlock_bh(&tbl->lock);
|
|
rc = skb->len;
|
|
out:
|
|
cb->args[3] = h;
|
|
cb->args[4] = idx;
|
|
return rc;
|
|
|
|
}
|
|
|
|
static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
|
|
{
|
|
struct neigh_table *tbl;
|
|
int t, family, s_t;
|
|
int proxy = 0;
|
|
int err;
|
|
|
|
family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
|
|
|
|
/* check for full ndmsg structure presence, family member is
|
|
* the same for both structures
|
|
*/
|
|
if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
|
|
((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
|
|
proxy = 1;
|
|
|
|
s_t = cb->args[0];
|
|
|
|
for (t = 0; t < NEIGH_NR_TABLES; t++) {
|
|
tbl = neigh_tables[t];
|
|
|
|
if (!tbl)
|
|
continue;
|
|
if (t < s_t || (family && tbl->family != family))
|
|
continue;
|
|
if (t > s_t)
|
|
memset(&cb->args[1], 0, sizeof(cb->args) -
|
|
sizeof(cb->args[0]));
|
|
if (proxy)
|
|
err = pneigh_dump_table(tbl, skb, cb);
|
|
else
|
|
err = neigh_dump_table(tbl, skb, cb);
|
|
if (err < 0)
|
|
break;
|
|
}
|
|
|
|
cb->args[0] = t;
|
|
return skb->len;
|
|
}
|
|
|
|
void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
|
|
{
|
|
int chain;
|
|
struct neigh_hash_table *nht;
|
|
|
|
rcu_read_lock_bh();
|
|
nht = rcu_dereference_bh(tbl->nht);
|
|
|
|
read_lock(&tbl->lock); /* avoid resizes */
|
|
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
|
|
struct neighbour *n;
|
|
|
|
for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
|
|
n != NULL;
|
|
n = rcu_dereference_bh(n->next))
|
|
cb(n, cookie);
|
|
}
|
|
read_unlock(&tbl->lock);
|
|
rcu_read_unlock_bh();
|
|
}
|
|
EXPORT_SYMBOL(neigh_for_each);
|
|
|
|
/* The tbl->lock must be held as a writer and BH disabled. */
|
|
void __neigh_for_each_release(struct neigh_table *tbl,
|
|
int (*cb)(struct neighbour *))
|
|
{
|
|
int chain;
|
|
struct neigh_hash_table *nht;
|
|
|
|
nht = rcu_dereference_protected(tbl->nht,
|
|
lockdep_is_held(&tbl->lock));
|
|
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
|
|
struct neighbour *n;
|
|
struct neighbour __rcu **np;
|
|
|
|
np = &nht->hash_buckets[chain];
|
|
while ((n = rcu_dereference_protected(*np,
|
|
lockdep_is_held(&tbl->lock))) != NULL) {
|
|
int release;
|
|
|
|
write_lock(&n->lock);
|
|
release = cb(n);
|
|
if (release) {
|
|
rcu_assign_pointer(*np,
|
|
rcu_dereference_protected(n->next,
|
|
lockdep_is_held(&tbl->lock)));
|
|
n->dead = 1;
|
|
} else
|
|
np = &n->next;
|
|
write_unlock(&n->lock);
|
|
if (release)
|
|
neigh_cleanup_and_release(n);
|
|
}
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(__neigh_for_each_release);
|
|
|
|
int neigh_xmit(int index, struct net_device *dev,
|
|
const void *addr, struct sk_buff *skb)
|
|
{
|
|
int err = -EAFNOSUPPORT;
|
|
if (likely(index < NEIGH_NR_TABLES)) {
|
|
struct neigh_table *tbl;
|
|
struct neighbour *neigh;
|
|
|
|
tbl = neigh_tables[index];
|
|
if (!tbl)
|
|
goto out;
|
|
rcu_read_lock_bh();
|
|
neigh = __neigh_lookup_noref(tbl, addr, dev);
|
|
if (!neigh)
|
|
neigh = __neigh_create(tbl, addr, dev, false);
|
|
err = PTR_ERR(neigh);
|
|
if (IS_ERR(neigh)) {
|
|
rcu_read_unlock_bh();
|
|
goto out_kfree_skb;
|
|
}
|
|
err = neigh->output(neigh, skb);
|
|
rcu_read_unlock_bh();
|
|
}
|
|
else if (index == NEIGH_LINK_TABLE) {
|
|
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
|
|
addr, NULL, skb->len);
|
|
if (err < 0)
|
|
goto out_kfree_skb;
|
|
err = dev_queue_xmit(skb);
|
|
}
|
|
out:
|
|
return err;
|
|
out_kfree_skb:
|
|
kfree_skb(skb);
|
|
goto out;
|
|
}
|
|
EXPORT_SYMBOL(neigh_xmit);
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
static struct neighbour *neigh_get_first(struct seq_file *seq)
|
|
{
|
|
struct neigh_seq_state *state = seq->private;
|
|
struct net *net = seq_file_net(seq);
|
|
struct neigh_hash_table *nht = state->nht;
|
|
struct neighbour *n = NULL;
|
|
int bucket = state->bucket;
|
|
|
|
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
|
|
for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
|
|
n = rcu_dereference_bh(nht->hash_buckets[bucket]);
|
|
|
|
while (n) {
|
|
if (!net_eq(dev_net(n->dev), net))
|
|
goto next;
|
|
if (state->neigh_sub_iter) {
|
|
loff_t fakep = 0;
|
|
void *v;
|
|
|
|
v = state->neigh_sub_iter(state, n, &fakep);
|
|
if (!v)
|
|
goto next;
|
|
}
|
|
if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
|
|
break;
|
|
if (n->nud_state & ~NUD_NOARP)
|
|
break;
|
|
next:
|
|
n = rcu_dereference_bh(n->next);
|
|
}
|
|
|
|
if (n)
|
|
break;
|
|
}
|
|
state->bucket = bucket;
|
|
|
|
return n;
|
|
}
|
|
|
|
static struct neighbour *neigh_get_next(struct seq_file *seq,
|
|
struct neighbour *n,
|
|
loff_t *pos)
|
|
{
|
|
struct neigh_seq_state *state = seq->private;
|
|
struct net *net = seq_file_net(seq);
|
|
struct neigh_hash_table *nht = state->nht;
|
|
|
|
if (state->neigh_sub_iter) {
|
|
void *v = state->neigh_sub_iter(state, n, pos);
|
|
if (v)
|
|
return n;
|
|
}
|
|
n = rcu_dereference_bh(n->next);
|
|
|
|
while (1) {
|
|
while (n) {
|
|
if (!net_eq(dev_net(n->dev), net))
|
|
goto next;
|
|
if (state->neigh_sub_iter) {
|
|
void *v = state->neigh_sub_iter(state, n, pos);
|
|
if (v)
|
|
return n;
|
|
goto next;
|
|
}
|
|
if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
|
|
break;
|
|
|
|
if (n->nud_state & ~NUD_NOARP)
|
|
break;
|
|
next:
|
|
n = rcu_dereference_bh(n->next);
|
|
}
|
|
|
|
if (n)
|
|
break;
|
|
|
|
if (++state->bucket >= (1 << nht->hash_shift))
|
|
break;
|
|
|
|
n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
|
|
}
|
|
|
|
if (n && pos)
|
|
--(*pos);
|
|
return n;
|
|
}
|
|
|
|
static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
|
|
{
|
|
struct neighbour *n = neigh_get_first(seq);
|
|
|
|
if (n) {
|
|
--(*pos);
|
|
while (*pos) {
|
|
n = neigh_get_next(seq, n, pos);
|
|
if (!n)
|
|
break;
|
|
}
|
|
}
|
|
return *pos ? NULL : n;
|
|
}
|
|
|
|
static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
|
|
{
|
|
struct neigh_seq_state *state = seq->private;
|
|
struct net *net = seq_file_net(seq);
|
|
struct neigh_table *tbl = state->tbl;
|
|
struct pneigh_entry *pn = NULL;
|
|
int bucket = state->bucket;
|
|
|
|
state->flags |= NEIGH_SEQ_IS_PNEIGH;
|
|
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
|
|
pn = tbl->phash_buckets[bucket];
|
|
while (pn && !net_eq(pneigh_net(pn), net))
|
|
pn = pn->next;
|
|
if (pn)
|
|
break;
|
|
}
|
|
state->bucket = bucket;
|
|
|
|
return pn;
|
|
}
|
|
|
|
static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
|
|
struct pneigh_entry *pn,
|
|
loff_t *pos)
|
|
{
|
|
struct neigh_seq_state *state = seq->private;
|
|
struct net *net = seq_file_net(seq);
|
|
struct neigh_table *tbl = state->tbl;
|
|
|
|
do {
|
|
pn = pn->next;
|
|
} while (pn && !net_eq(pneigh_net(pn), net));
|
|
|
|
while (!pn) {
|
|
if (++state->bucket > PNEIGH_HASHMASK)
|
|
break;
|
|
pn = tbl->phash_buckets[state->bucket];
|
|
while (pn && !net_eq(pneigh_net(pn), net))
|
|
pn = pn->next;
|
|
if (pn)
|
|
break;
|
|
}
|
|
|
|
if (pn && pos)
|
|
--(*pos);
|
|
|
|
return pn;
|
|
}
|
|
|
|
static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
|
|
{
|
|
struct pneigh_entry *pn = pneigh_get_first(seq);
|
|
|
|
if (pn) {
|
|
--(*pos);
|
|
while (*pos) {
|
|
pn = pneigh_get_next(seq, pn, pos);
|
|
if (!pn)
|
|
break;
|
|
}
|
|
}
|
|
return *pos ? NULL : pn;
|
|
}
|
|
|
|
static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
|
|
{
|
|
struct neigh_seq_state *state = seq->private;
|
|
void *rc;
|
|
loff_t idxpos = *pos;
|
|
|
|
rc = neigh_get_idx(seq, &idxpos);
|
|
if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
|
|
rc = pneigh_get_idx(seq, &idxpos);
|
|
|
|
return rc;
|
|
}
|
|
|
|
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
|
|
__acquires(rcu_bh)
|
|
{
|
|
struct neigh_seq_state *state = seq->private;
|
|
|
|
state->tbl = tbl;
|
|
state->bucket = 0;
|
|
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
|
|
|
|
rcu_read_lock_bh();
|
|
state->nht = rcu_dereference_bh(tbl->nht);
|
|
|
|
return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
|
|
}
|
|
EXPORT_SYMBOL(neigh_seq_start);
|
|
|
|
void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|
{
|
|
struct neigh_seq_state *state;
|
|
void *rc;
|
|
|
|
if (v == SEQ_START_TOKEN) {
|
|
rc = neigh_get_first(seq);
|
|
goto out;
|
|
}
|
|
|
|
state = seq->private;
|
|
if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
|
|
rc = neigh_get_next(seq, v, NULL);
|
|
if (rc)
|
|
goto out;
|
|
if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
|
|
rc = pneigh_get_first(seq);
|
|
} else {
|
|
BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
|
|
rc = pneigh_get_next(seq, v, NULL);
|
|
}
|
|
out:
|
|
++(*pos);
|
|
return rc;
|
|
}
|
|
EXPORT_SYMBOL(neigh_seq_next);
|
|
|
|
void neigh_seq_stop(struct seq_file *seq, void *v)
|
|
__releases(rcu_bh)
|
|
{
|
|
rcu_read_unlock_bh();
|
|
}
|
|
EXPORT_SYMBOL(neigh_seq_stop);
|
|
|
|
/* statistics via seq_file */
|
|
|
|
static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
|
|
{
|
|
struct neigh_table *tbl = seq->private;
|
|
int cpu;
|
|
|
|
if (*pos == 0)
|
|
return SEQ_START_TOKEN;
|
|
|
|
for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
|
|
if (!cpu_possible(cpu))
|
|
continue;
|
|
*pos = cpu+1;
|
|
return per_cpu_ptr(tbl->stats, cpu);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|
{
|
|
struct neigh_table *tbl = seq->private;
|
|
int cpu;
|
|
|
|
for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
|
|
if (!cpu_possible(cpu))
|
|
continue;
|
|
*pos = cpu+1;
|
|
return per_cpu_ptr(tbl->stats, cpu);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
|
|
{
|
|
|
|
}
|
|
|
|
static int neigh_stat_seq_show(struct seq_file *seq, void *v)
|
|
{
|
|
struct neigh_table *tbl = seq->private;
|
|
struct neigh_statistics *st = v;
|
|
|
|
if (v == SEQ_START_TOKEN) {
|
|
seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
|
|
return 0;
|
|
}
|
|
|
|
seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
|
|
"%08lx %08lx %08lx %08lx %08lx %08lx\n",
|
|
atomic_read(&tbl->entries),
|
|
|
|
st->allocs,
|
|
st->destroys,
|
|
st->hash_grows,
|
|
|
|
st->lookups,
|
|
st->hits,
|
|
|
|
st->res_failed,
|
|
|
|
st->rcv_probes_mcast,
|
|
st->rcv_probes_ucast,
|
|
|
|
st->periodic_gc_runs,
|
|
st->forced_gc_runs,
|
|
st->unres_discards,
|
|
st->table_fulls
|
|
);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct seq_operations neigh_stat_seq_ops = {
|
|
.start = neigh_stat_seq_start,
|
|
.next = neigh_stat_seq_next,
|
|
.stop = neigh_stat_seq_stop,
|
|
.show = neigh_stat_seq_show,
|
|
};
|
|
|
|
static int neigh_stat_seq_open(struct inode *inode, struct file *file)
|
|
{
|
|
int ret = seq_open(file, &neigh_stat_seq_ops);
|
|
|
|
if (!ret) {
|
|
struct seq_file *sf = file->private_data;
|
|
sf->private = PDE_DATA(inode);
|
|
}
|
|
return ret;
|
|
};
|
|
|
|
static const struct file_operations neigh_stat_seq_fops = {
|
|
.owner = THIS_MODULE,
|
|
.open = neigh_stat_seq_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = seq_release,
|
|
};
|
|
|
|
#endif /* CONFIG_PROC_FS */
|
|
|
|
static inline size_t neigh_nlmsg_size(void)
|
|
{
|
|
return NLMSG_ALIGN(sizeof(struct ndmsg))
|
|
+ nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
|
|
+ nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
|
|
+ nla_total_size(sizeof(struct nda_cacheinfo))
|
|
+ nla_total_size(4); /* NDA_PROBES */
|
|
}
|
|
|
|
static void __neigh_notify(struct neighbour *n, int type, int flags)
|
|
{
|
|
struct net *net = dev_net(n->dev);
|
|
struct sk_buff *skb;
|
|
int err = -ENOBUFS;
|
|
|
|
skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
|
|
if (skb == NULL)
|
|
goto errout;
|
|
|
|
err = neigh_fill_info(skb, n, 0, 0, type, flags);
|
|
if (err < 0) {
|
|
/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
|
|
WARN_ON(err == -EMSGSIZE);
|
|
kfree_skb(skb);
|
|
goto errout;
|
|
}
|
|
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
|
|
return;
|
|
errout:
|
|
if (err < 0)
|
|
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
|
|
}
|
|
|
|
void neigh_app_ns(struct neighbour *n)
|
|
{
|
|
__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
|
|
}
|
|
EXPORT_SYMBOL(neigh_app_ns);
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
static int zero;
|
|
static int int_max = INT_MAX;
|
|
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
|
|
|
|
static int proc_unres_qlen(struct ctl_table *ctl, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
int size, ret;
|
|
struct ctl_table tmp = *ctl;
|
|
|
|
tmp.extra1 = &zero;
|
|
tmp.extra2 = &unres_qlen_max;
|
|
tmp.data = &size;
|
|
|
|
size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
|
|
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
|
|
|
|
if (write && !ret)
|
|
*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
|
|
return ret;
|
|
}
|
|
|
|
static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
|
|
int family)
|
|
{
|
|
switch (family) {
|
|
case AF_INET:
|
|
return __in_dev_arp_parms_get_rcu(dev);
|
|
case AF_INET6:
|
|
return __in6_dev_nd_parms_get_rcu(dev);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
|
|
int index)
|
|
{
|
|
struct net_device *dev;
|
|
int family = neigh_parms_family(p);
|
|
|
|
rcu_read_lock();
|
|
for_each_netdev_rcu(net, dev) {
|
|
struct neigh_parms *dst_p =
|
|
neigh_get_dev_parms_rcu(dev, family);
|
|
|
|
if (dst_p && !test_bit(index, dst_p->data_state))
|
|
dst_p->data[index] = p->data[index];
|
|
}
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
static void neigh_proc_update(struct ctl_table *ctl, int write)
|
|
{
|
|
struct net_device *dev = ctl->extra1;
|
|
struct neigh_parms *p = ctl->extra2;
|
|
struct net *net = neigh_parms_net(p);
|
|
int index = (int *) ctl->data - p->data;
|
|
|
|
if (!write)
|
|
return;
|
|
|
|
set_bit(index, p->data_state);
|
|
if (!dev) /* NULL dev means this is default value */
|
|
neigh_copy_dflt_parms(net, p, index);
|
|
}
|
|
|
|
static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
|
|
void __user *buffer,
|
|
size_t *lenp, loff_t *ppos)
|
|
{
|
|
struct ctl_table tmp = *ctl;
|
|
int ret;
|
|
|
|
tmp.extra1 = &zero;
|
|
tmp.extra2 = &int_max;
|
|
|
|
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
|
|
neigh_proc_update(ctl, write);
|
|
return ret;
|
|
}
|
|
|
|
int neigh_proc_dointvec(struct ctl_table *ctl, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
|
|
|
|
neigh_proc_update(ctl, write);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(neigh_proc_dointvec);
|
|
|
|
int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
|
|
void __user *buffer,
|
|
size_t *lenp, loff_t *ppos)
|
|
{
|
|
int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
|
|
|
|
neigh_proc_update(ctl, write);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
|
|
|
|
static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
|
|
void __user *buffer,
|
|
size_t *lenp, loff_t *ppos)
|
|
{
|
|
int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
|
|
|
|
neigh_proc_update(ctl, write);
|
|
return ret;
|
|
}
|
|
|
|
int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
|
|
void __user *buffer,
|
|
size_t *lenp, loff_t *ppos)
|
|
{
|
|
int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
|
|
|
|
neigh_proc_update(ctl, write);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
|
|
|
|
static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
|
|
void __user *buffer,
|
|
size_t *lenp, loff_t *ppos)
|
|
{
|
|
int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
|
|
|
|
neigh_proc_update(ctl, write);
|
|
return ret;
|
|
}
|
|
|
|
static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
|
|
void __user *buffer,
|
|
size_t *lenp, loff_t *ppos)
|
|
{
|
|
struct neigh_parms *p = ctl->extra2;
|
|
int ret;
|
|
|
|
if (strcmp(ctl->procname, "base_reachable_time") == 0)
|
|
ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
|
|
else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
|
|
ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
|
|
else
|
|
ret = -1;
|
|
|
|
if (write && ret == 0) {
|
|
/* update reachable_time as well, otherwise, the change will
|
|
* only be effective after the next time neigh_periodic_work
|
|
* decides to recompute it
|
|
*/
|
|
p->reachable_time =
|
|
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
#define NEIGH_PARMS_DATA_OFFSET(index) \
|
|
(&((struct neigh_parms *) 0)->data[index])
|
|
|
|
#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
|
|
[NEIGH_VAR_ ## attr] = { \
|
|
.procname = name, \
|
|
.data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
|
|
.maxlen = sizeof(int), \
|
|
.mode = mval, \
|
|
.proc_handler = proc, \
|
|
}
|
|
|
|
#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
|
|
NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
|
|
|
|
#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
|
|
NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
|
|
|
|
#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
|
|
NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
|
|
|
|
#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
|
|
NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
|
|
|
|
#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
|
|
NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
|
|
|
|
#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
|
|
NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
|
|
|
|
static struct neigh_sysctl_table {
|
|
struct ctl_table_header *sysctl_header;
|
|
struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
|
|
} neigh_sysctl_template __read_mostly = {
|
|
.neigh_vars = {
|
|
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
|
|
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
|
|
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
|
|
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
|
|
NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
|
|
NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
|
|
NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
|
|
NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
|
|
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
|
|
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
|
|
NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
|
|
NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
|
|
NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
|
|
NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
|
|
NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
|
|
NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
|
|
[NEIGH_VAR_GC_INTERVAL] = {
|
|
.procname = "gc_interval",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_jiffies,
|
|
},
|
|
[NEIGH_VAR_GC_THRESH1] = {
|
|
.procname = "gc_thresh1",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.extra1 = &zero,
|
|
.extra2 = &int_max,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
},
|
|
[NEIGH_VAR_GC_THRESH2] = {
|
|
.procname = "gc_thresh2",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.extra1 = &zero,
|
|
.extra2 = &int_max,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
},
|
|
[NEIGH_VAR_GC_THRESH3] = {
|
|
.procname = "gc_thresh3",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.extra1 = &zero,
|
|
.extra2 = &int_max,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
},
|
|
{},
|
|
},
|
|
};
|
|
|
|
int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
|
|
proc_handler *handler)
|
|
{
|
|
int i;
|
|
struct neigh_sysctl_table *t;
|
|
const char *dev_name_source;
|
|
char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
|
|
char *p_name;
|
|
|
|
t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
|
|
if (!t)
|
|
goto err;
|
|
|
|
for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
|
|
t->neigh_vars[i].data += (long) p;
|
|
t->neigh_vars[i].extra1 = dev;
|
|
t->neigh_vars[i].extra2 = p;
|
|
}
|
|
|
|
if (dev) {
|
|
dev_name_source = dev->name;
|
|
/* Terminate the table early */
|
|
memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
|
|
sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
|
|
} else {
|
|
struct neigh_table *tbl = p->tbl;
|
|
dev_name_source = "default";
|
|
t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
|
|
t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
|
|
t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
|
|
t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
|
|
}
|
|
|
|
if (handler) {
|
|
/* RetransTime */
|
|
t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
|
|
/* ReachableTime */
|
|
t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
|
|
/* RetransTime (in milliseconds)*/
|
|
t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
|
|
/* ReachableTime (in milliseconds) */
|
|
t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
|
|
} else {
|
|
/* Those handlers will update p->reachable_time after
|
|
* base_reachable_time(_ms) is set to ensure the new timer starts being
|
|
* applied after the next neighbour update instead of waiting for
|
|
* neigh_periodic_work to update its value (can be multiple minutes)
|
|
* So any handler that replaces them should do this as well
|
|
*/
|
|
/* ReachableTime */
|
|
t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
|
|
neigh_proc_base_reachable_time;
|
|
/* ReachableTime (in milliseconds) */
|
|
t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
|
|
neigh_proc_base_reachable_time;
|
|
}
|
|
|
|
/* Don't export sysctls to unprivileged users */
|
|
if (neigh_parms_net(p)->user_ns != &init_user_ns)
|
|
t->neigh_vars[0].procname = NULL;
|
|
|
|
switch (neigh_parms_family(p)) {
|
|
case AF_INET:
|
|
p_name = "ipv4";
|
|
break;
|
|
case AF_INET6:
|
|
p_name = "ipv6";
|
|
break;
|
|
default:
|
|
BUG();
|
|
}
|
|
|
|
snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
|
|
p_name, dev_name_source);
|
|
t->sysctl_header =
|
|
register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
|
|
if (!t->sysctl_header)
|
|
goto free;
|
|
|
|
p->sysctl_table = t;
|
|
return 0;
|
|
|
|
free:
|
|
kfree(t);
|
|
err:
|
|
return -ENOBUFS;
|
|
}
|
|
EXPORT_SYMBOL(neigh_sysctl_register);
|
|
|
|
void neigh_sysctl_unregister(struct neigh_parms *p)
|
|
{
|
|
if (p->sysctl_table) {
|
|
struct neigh_sysctl_table *t = p->sysctl_table;
|
|
p->sysctl_table = NULL;
|
|
unregister_net_sysctl_table(t->sysctl_header);
|
|
kfree(t);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(neigh_sysctl_unregister);
|
|
|
|
#endif /* CONFIG_SYSCTL */
|
|
|
|
static int __init neigh_init(void)
|
|
{
|
|
rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
|
|
rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
|
|
rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
|
|
|
|
rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
|
|
NULL);
|
|
rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
|
|
|
|
return 0;
|
|
}
|
|
|
|
subsys_initcall(neigh_init);
|
|
|