Files
Michael Bestas 43cdebbc57 Merge remote-tracking branch 'common/android-4.9-q' into android-msm-pixel-4.9
* common/android-4.9-q:
  Linux 4.9.312
  block/compat_ioctl: fix range check in BLKGETSIZE
  ext4: force overhead calculation if the s_overhead_cluster makes no sense
  ext4: fix overhead calculation to account for the reserved gdt blocks
  ext4: limit length to bitmap_maxbytes - blocksize in punch_hole
  ARC: entry: fix syscall_trace_exit argument
  e1000e: Fix possible overflow in LTR decoding
  ASoC: soc-dapm: fix two incorrect uses of list iterator
  openvswitch: fix OOB access in reserve_sfa_size()
  dma: at_xdmac: fix a missing check on list iterator
  ata: pata_marvell: Check the 'bmdma_addr' beforing reading
  drm/msm/mdp5: check the return of kzalloc()
  brcmfmac: sdio: Fix undefined behavior due to shift overflowing the constant
  cifs: Check the IOCB_DIRECT flag, not O_DIRECT
  vxlan: fix error return code in vxlan_fdb_append
  ALSA: usb-audio: Fix undefined behavior due to shift overflowing the constant
  platform/x86: samsung-laptop: Fix an unsigned comparison which can never be negative
  ARM: vexpress/spc: Avoid negative array index when !SMP
  netlink: reset network and mac headers in netlink_dump()
  net/packet: fix packet_sock xmit return value checking
  dmaengine: imx-sdma: Fix error checking in sdma_event_remap
  ALSA: usb-audio: Clear MIDI port active flag after draining
  gfs2: assign rgrp glock before compute_bitstructs
  mm: page_alloc: fix building error on -Werror=array-compare
  etherdevice: Adjust ether_addr* prototypes to silence -Wstringop-overead
  Linux 4.9.311
  gcc-plugins: latent_entropy: use /dev/urandom
  i2c: pasemi: Wait for write xfers to finish
  smp: Fix offline cpu check in flush_smp_call_function_queue()
  ARM: davinci: da850-evm: Avoid NULL pointer dereference
  ALSA: pcm: Test for "silence" field in struct "pcm_format_data"
  mm: kmemleak: take a full lowmem check in kmemleak_*_phys()
  mm, page_alloc: fix build_zonerefs_node()
  drivers: net: slip: fix NPD bug in sl_tx_timeout()
  scsi: mvsas: Add PCI ID of RocketRaid 2640
  gpu: ipu-v3: Fix dev_dbg frequency output
  net: micrel: fix KS8851_MLL Kconfig
  scsi: ibmvscsis: Increase INITIAL_SRP_LIMIT to 1024
  drm/amdkfd: Check for potential null return of kmalloc_array()
  cifs: potential buffer overflow in handling symlinks
  nfc: nci: add flush_workqueue to prevent uaf
  net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link
  veth: Ensure eth header is in skb's linear part
  xfrm: policy: match with both mark and mask on user interfaces
  arm64: module: remove (NOLOAD) from linker script
  mm: don't skip swap entry even if zap_details specified
  dmaengine: Revert "dmaengine: shdma: Fix runtime PM imbalance on error"
  tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts
  arm64: patch_text: Fixup last cpu should be master
  x86/speculation: Restore speculation related MSRs during S3 resume
  x86/pm: Save the MSR validity status at context setup
  mm/mempolicy: fix mpol_new leak in shared_policy_replace
  mmmremap.c: avoid pointless invalidate_range_start/end on mremap(old_size=0)
  drbd: Fix five use after free bugs in get_initial_state
  drm/imx: Fix memory leak in imx_pd_connector_get_modes
  net: stmmac: Fix unset max_speed difference between DT and non-DT platforms
  scsi: zorro7xx: Fix a resource leak in zorro7xx_remove_one()
  mm: fix race between MADV_FREE reclaim and blkdev direct IO read
  jfs: prevent NULL deref in diFree
  virtio_console: eliminate anonymous module_init & module_exit
  serial: samsung_tty: do not unlock port->lock for uart_write_wakeup()
  SUNRPC/call_alloc: async tasks mustn't block waiting for memory
  w1: w1_therm: fixes w1_seq for ds28ea00 sensors
  init/main.c: return 1 from handled __setup() functions
  Bluetooth: Fix use after free in hci_send_acl
  xtensa: fix DTC warning unit_address_format
  usb: dwc3: omap: fix "unbalanced disables for smps10_out1" on omap5evm
  scsi: libfc: Fix use after free in fc_exch_abts_resp()
  MIPS: fix fortify panic when copying asm exception handlers
  bnxt_en: Eliminate unintended link toggle during FW reset
  scsi: aha152x: Fix aha152x_setup() __setup handler return value
  scsi: pm8001: Fix pm8001_mpi_task_abort_resp()
  dm ioctl: prevent potential spectre v1 gadget
  iommu/arm-smmu-v3: fix event handling soft lockup
  scsi: bfa: Replace snprintf() with sysfs_emit()
  scsi: mvsas: Replace snprintf() with sysfs_emit()
  powerpc: dts: t104xrdb: fix phy type for FMAN 4/5
  ptp: replace snprintf with sysfs_emit
  ath5k: fix OOB in ath5k_eeprom_read_pcal_info_5111
  KVM: x86/svm: Clear reserved bits written to PerfEvtSeln MSRs
  ARM: 9187/1: JIVE: fix return value of __setup handler
  rtc: wm8350: Handle error for wm8350_register_irq
  KVM: x86: Forbid VMM to set SYNIC/STIMER MSRs when SynIC wasn't activated
  openvswitch: Fixed nd target mask field in the flow dump.
  ARM: dts: spear13xx: Update SPI dma properties
  ARM: dts: spear1340: Update serial node properties
  ASoC: topology: Allow TLV control to be either read or write
  ubi: fastmap: Return error code if memory allocation fails in add_aeb()
  mm/memcontrol: return 1 from cgroup.memory __setup() handler
  mm/mmap: return 1 from stack_guard_gap __setup() handler
  ACPI: CPPC: Avoid out of bounds access when parsing _CPC data
  pinctrl: pinconf-generic: Print arguments for bias-pull-*
  gfs2: Make sure FITRIM minlen is rounded up to fs block size
  ubifs: setflags: Make dirtied_ino_d 8 bytes aligned
  ubifs: Add missing iput if do_tmpfile() failed in rename whiteout
  KVM: Prevent module exit until all VMs are freed
  scsi: qla2xxx: Fix incorrect reporting of task management failure
  mmc: host: Return an error when ->enable_sdio_irq() ops is missing
  media: hdpvr: initialize dev->worker at hdpvr_register_videodev
  video: fbdev: sm712fb: Fix crash in smtcfb_write()
  ARM: mmp: Fix failure to remove sram device
  ARM: tegra: tamonten: Fix I2C3 pad setting
  media: cx88-mpeg: clear interrupt status register before streaming video
  ASoC: soc-core: skip zero num_dai component in searching dai name
  video: fbdev: omapfb: panel-tpo-td043mtea1: Use sysfs_emit() instead of snprintf()
  video: fbdev: omapfb: panel-dsi-cm: Use sysfs_emit() instead of snprintf()
  ARM: dts: bcm2837: Add the missing L1/L2 cache information
  ARM: dts: qcom: fix gic_irq_domain_translate warnings for msm8960
  video: fbdev: omapfb: acx565akm: replace snprintf with sysfs_emit
  video: fbdev: cirrusfb: check pixclock to avoid divide by zero
  video: fbdev: w100fb: Reset global state
  video: fbdev: nvidiafb: Use strscpy() to prevent buffer overflow
  ntfs: add sanity check on allocation size
  ext4: don't BUG if someone dirty pages without asking ext4 first
  spi: tegra20: Use of_device_get_match_data()
  PM: core: keep irq flags in device_pm_check_callbacks()
  ACPI/APEI: Limit printable size of BERT table data
  ACPICA: Avoid walking the ACPI Namespace if it is not there
  irqchip/nvic: Release nvic_base upon failure
  Fix incorrect type in assignment of ipv6 port for audit
  loop: use sysfs_emit() in the sysfs xxx show()
  selinux: use correct type for context length
  net/x25: Fix null-ptr-deref caused by x25_disconnect
  qlcnic: dcb: default to returning -EOPNOTSUPP
  net: phy: broadcom: Fix brcm_fet_config_init()
  netfilter: nf_conntrack_tcp: preserve liberal flag in tcp options
  jfs: fix divide error in dbNextAG
  kgdbts: fix return value of __setup handler
  kgdboc: fix return value of __setup handler
  tty: hvc: fix return value of __setup handler
  pinctrl/rockchip: Add missing of_node_put() in rockchip_pinctrl_probe
  pinctrl: nomadik: Add missing of_node_put() in nmk_pinctrl_probe
  pinctrl: mediatek: Fix missing of_node_put() in mtk_pctrl_init
  NFS: remove unneeded check in decode_devicenotify_args()
  clk: tegra: tegra124-emc: Fix missing put_device() call in emc_ensure_emc_driver
  clk: clps711x: Terminate clk_div_table with sentinel element
  clk: loongson1: Terminate clk_div_table with sentinel element
  remoteproc: qcom_wcnss: Add missing of_node_put() in wcnss_alloc_memory_region
  clk: qcom: clk-rcg2: Update the frac table for pixel clock
  iio: adc: Add check for devm_request_threaded_irq
  pwm: lpc18xx-sct: Initialize driver data and hardware before pwmchip_add()
  mxser: fix xmit_buf leak in activate when LSR == 0xff
  mfd: asic3: Add missing iounmap() on error asic3_mfd_probe
  i2c: mux: demux-pinctrl: do not deactivate a master that is not active
  af_netlink: Fix shift out of bounds in group mask calculation
  USB: storage: ums-realtek: fix error code in rts51x_read_mem()
  MIPS: RB532: fix return value of __setup handler
  mfd: mc13xxx: Add check for mc13xxx_irq_request
  powerpc/sysdev: fix incorrect use to determine if list is empty
  power: supply: wm8350-power: Add missing free in free_charger_irq
  power: supply: wm8350-power: Handle error for wm8350_register_irq
  i2c: xiic: Make bus names unique
  KVM: x86/emulator: Defer not-present segment check in __load_segment_descriptor()
  KVM: x86: Fix emulation in writing cr8
  drm/tegra: Fix reference leak in tegra_dsi_ganged_probe
  ext2: correct max file size computing
  TOMOYO: fix __setup handlers return values
  scsi: pm8001: Fix abort all task initialization
  scsi: pm8001: Fix payload initialization in pm80xx_set_thermal_config()
  scsi: pm8001: Fix command initialization in pm8001_chip_ssp_tm_req()
  scsi: pm8001: Fix command initialization in pm80XX_send_read_log()
  iwlwifi: Fix -EIO error code that is never returned
  HID: i2c-hid: fix GET/SET_REPORT for unnumbered reports
  power: supply: ab8500: Fix memory leak in ab8500_fg_sysfs_init
  ray_cs: Check ioremap return value
  ath9k_htc: fix uninit value bugs
  drm/edid: Don't clear formats if using deep color
  mtd: onenand: Check for error irq
  ASoC: imx-es8328: Fix error return code in imx_es8328_probe()
  ASoC: mxs: Fix error handling in mxs_sgtl5000_probe
  ASoC: dmaengine: do not use a NULL prepare_slave_config() callback
  video: fbdev: omapfb: Add missing of_node_put() in dvic_probe_of
  ASoC: fsi: Add check for clk_enable
  ASoC: wm8350: Handle error for wm8350_register_irq
  ASoC: atmel: Add missing of_node_put() in at91sam9g20ek_audio_probe
  ALSA: firewire-lib: fix uninitialized flag for AV/C deferred transaction
  memory: emif: check the pointer temp in get_device_details()
  memory: emif: Add check for setup_interrupts
  ASoC: atmel_ssc_dai: Handle errors for clk_enable
  ASoC: mxs-saif: Handle errors for clk_enable
  printk: fix return value of printk.devkmsg __setup handler
  arm64: dts: broadcom: Fix sata nodename
  arm64: dts: ns2: Fix spi-cpol and spi-cpha property
  ALSA: spi: Add check for clk_enable()
  ASoC: ti: davinci-i2s: Add check for clk_enable()
  media: usb: go7007: s2250-board: fix leak in probe()
  soc: ti: wkup_m3_ipc: Fix IRQ check in wkup_m3_ipc_probe
  ARM: dts: qcom: ipq4019: fix sleep clock
  video: fbdev: fbcvt.c: fix printing in fb_cvt_print_name()
  video: fbdev: smscufx: Fix null-ptr-deref in ufx_usb_probe()
  perf/x86/intel/pt: Fix address filter config for 32-bit kernel
  perf/core: Fix address filter parser for multiple filters
  sched/debug: Remove mpol_get/put and task_lock/unlock from sched_show_numa
  clocksource: acpi_pm: fix return value of __setup handler
  hwmon: (pmbus) Add Vin unit off handling
  crypto: ccp - ccp_dmaengine_unregister release dma channels
  crypto: vmx - add missing dependencies
  PM: suspend: fix return value of __setup handler
  PM: hibernate: fix __setup handler error handling
  hwmon: (sch56xx-common) Replace WDOG_ACTIVE with WDOG_HW_RUNNING
  hwmon: (pmbus) Add mutex to regulator ops
  selftests/x86: Add validity check and allow field splitting
  spi: tegra114: Add missing IRQ check in tegra_spi_probe
  crypto: mxs-dcp - Fix scatterlist processing
  crypto: authenc - Fix sleep in atomic context in decrypt_tail
  PCI: pciehp: Clear cmd_busy bit in polling mode
  brcmfmac: pcie: Replace brcmf_pcie_copy_mem_todev with memcpy_toio
  brcmfmac: firmware: Allocate space for default boardrev in nvram
  media: davinci: vpif: fix unbalanced runtime PM get
  DEC: Limit PMAX memory probing to R3k systems
  lib/raid6/test: fix multiple definition linking error
  thermal: int340x: Increase bitmap size
  carl9170: fix missing bit-wise or operator for tx_params
  ARM: dts: exynos: add missing HDMI supplies on SMDK5420
  ARM: dts: exynos: add missing HDMI supplies on SMDK5250
  ARM: dts: exynos: fix UART3 pins configuration in Exynos5250
  video: fbdev: atari: Atari 2 bpp (STe) palette bugfix
  video: fbdev: sm712fb: Fix crash in smtcfb_read()
  drivers: hamradio: 6pack: fix UAF bug caused by mod_timer()
  ALSA: cs4236: fix an incorrect NULL check on list iterator
  Revert "Input: clear BTN_RIGHT/MIDDLE on buttonpads"
  scsi: libsas: Fix sas_ata_qc_issue() handling of NCQ NON DATA commands
  mempolicy: mbind_range() set_policy() after vma_merge()
  mm/pages_alloc.c: don't create ZONE_MOVABLE beyond the end of a node
  jffs2: fix memory leak in jffs2_scan_medium
  jffs2: fix memory leak in jffs2_do_mount_fs
  jffs2: fix use-after-free in jffs2_clear_xattr_subsystem
  can: ems_usb: ems_usb_start_xmit(): fix double dev_kfree_skb() in error path
  NFSD: prevent underflow in nfssvc_decode_writeargs()
  SUNRPC: avoid race between mod_timer() and del_timer_sync()
  ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE
  clk: uniphier: Fix fixed-rate initialization
  iio: inkern: make a best effort on offset calculation
  iio: inkern: apply consumer scale on IIO_VAL_INT cases
  coresight: Fix TRCCONFIGR.QE sysfs interface
  USB: usb-storage: Fix use of bitfields for hardware data in ene_ub6250.c
  virtio-blk: Use blk_validate_block_size() to validate block size
  block: Add a helper to validate the block size
  af_key: add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register
  ethernet: sun: Free the coherent when failing in probing
  virtio_console: break out of buf poll on remove
  netdevice: add the case if dev is NULL
  USB: serial: simple: add Nokia phone driver
  USB: serial: pl2303: add IBM device IDs
  Linux 4.9.310
  arm64: Use the clearbhb instruction in mitigations
  arm64: add ID_AA64ISAR2_EL1 sys register
  KVM: arm64: Allow SMCCC_ARCH_WORKAROUND_3 to be discovered and migrated
  arm64: Mitigate spectre style branch history side channels
  KVM: arm64: Add templates for BHB mitigation sequences
  arm64: Add percpu vectors for EL1
  arm64: entry: Add macro for reading symbol addresses from the trampoline
  arm64: entry: Add vectors that have the bhb mitigation sequences
  arm64: Move arm64_update_smccc_conduit() out of SSBD ifdef
  arm64: entry: Add non-kpti __bp_harden_el1_vectors for mitigations
  arm64: entry: Allow the trampoline text to occupy multiple pages
  arm64: entry: Make the kpti trampoline's kpti sequence optional
  arm64: entry: Move trampoline macros out of ifdef'd section
  arm64: entry: Don't assume tramp_vectors is the start of the vectors
  arm64: entry: Allow tramp_alias to access symbols after the 4K boundary
  arm64: entry: Move the trampoline data page before the text page
  arm64: entry: Free up another register on kpti's tramp_exit path
  arm64: entry: Make the trampoline cleanup optional
  arm64: entry.S: Add ventry overflow sanity checks
  arm64: Add helper to decode register from instruction
  arm64: Add Cortex-X2 CPU part definition
  arm64: Add Neoverse-N2, Cortex-A710 CPU part definition
  arm64: Add part number for Arm Cortex-A77
  arm64: Add part number for Neoverse N1
  arm64: Make ARM64_ERRATUM_1188873 depend on COMPAT
  arm64: Add silicon-errata.txt entry for ARM erratum 1188873
  arm64: arch_timer: avoid unused function warning
  arm64: arch_timer: Add workaround for ARM erratum 1188873
  arm64: arch_timer: Add erratum handler for CPU-specific capability
  arm64: arch_timer: Add infrastructure for multiple erratum detection methods
  clocksource/drivers/arm_arch_timer: Introduce generic errata handling infrastructure
  clocksource/drivers/arm_arch_timer: Remove fsl-a008585 parameter
  arm64: capabilities: Add support for checks based on a list of MIDRs
  arm64: Add helpers for checking CPU MIDR against a range
  arm64: capabilities: Clean up midr range helpers
  arm64: capabilities: Add flags to handle the conflicts on late CPU
  arm64: capabilities: Prepare for fine grained capabilities
  arm64: capabilities: Move errata processing code
  arm64: capabilities: Move errata work around check on boot CPU
  arm64: capabilities: Update prototype for enable call back
  arm64: Add MIDR encoding for Arm Cortex-A55 and Cortex-A35
  arm64: Remove useless UAO IPI and describe how this gets enabled
  arm64: errata: Provide macro for major and minor cpu revisions
  Linux 4.9.309
  llc: only change llc->dev when bind() succeeds
  mac80211: fix potential double free on mesh join
  crypto: qat - disable registration of algorithms
  ACPI: video: Force backlight native for Clevo NL5xRU and NL5xNU
  ACPI: battery: Add device HID and quirk for Microsoft Surface Go 3
  ACPI / x86: Work around broken XSDT on Advantech DAC-BJ01 board
  netfilter: nf_tables: initialize registers in nft_do_chain()
  ALSA: pci: fix reading of swapped values from pcmreg in AC97 codec
  ALSA: cmipci: Restore aux vol on suspend/resume
  ALSA: usb-audio: Add mute TLV for playback volumes on RODE NT-USB
  ALSA: pcm: Add stream lock during PCM reset ioctl operations
  llc: fix netdevice reference leaks in llc_ui_bind()
  staging: fbtft: fb_st7789v: reset display before initialization
  net: ipv6: fix skb_over_panic in __ip6_append_data
  nfc: st21nfca: Fix potential buffer overflows in EVT_TRANSACTION
  Linux 4.9.308
  Input: aiptek - properly check endpoint type
  usb: gadget: Fix use-after-free bug by not setting udc->dev.driver
  usb: gadget: rndis: prevent integer overflow in rndis_set_response()
  atm: eni: Add check for dma_map_single
  net/packet: fix slab-out-of-bounds access in packet_recvmsg()
  fs: sysfs_emit: Remove PAGE_SIZE alignment check
  kselftest/vm: fix tests build with old libc
  sfc: extend the locking on mcdi->seqno
  tcp: make tcp_read_sock() more robust
  nl80211: Update bss channel on channel switch for P2P_CLIENT
  atm: firestream: check the return value of ioremap() in fs_init()
  can: rcar_canfd: rcar_canfd_channel_probe(): register the CAN device when fully ready
  ARM: 9178/1: fix unmet dependency on BITREVERSE for HAVE_ARCH_BITREVERSE
  MIPS: smp: fill in sibling and core maps earlier
  ARM: dts: rockchip: fix a typo on rk3288 crypto-controller
  xfrm: Fix xfrm migrate issues when address family changes
  Linux 4.9.307
  btrfs: unlock newly allocated extent buffer after error
  ARM: fix Thumb2 regression with Spectre BHB
  batman-adv: Don't expect inter-netns unique iflink indices
  batman-adv: Request iflink once in batadv-on-batadv check
  staging: gdm724x: fix use after free in gdm_lte_rx()
  ARM: Spectre-BHB: provide empty stub for non-config
  selftests/memfd: clean up mapping in mfd_fail_write
  tracing: Ensure trace buffer is at least 4096 bytes large
  Revert "xen-netback: Check for hotplug-status existence before watching"
  net-sysfs: add check for netdevice being present to speed_show
  sctp: fix kernel-infoleak for SCTP sockets
  gpio: ts4900: Do not set DAT and OE together
  NFC: port100: fix use-after-free in port100_send_complete
  net/mlx5: Fix size field in bufferx_reg struct
  ax25: Fix NULL pointer dereference in ax25_kill_by_device
  net: ethernet: lpc_eth: Handle error for clk_enable
  ethernet: Fix error handling in xemaclite_of_probe
  qed: return status of qed_iov_get_link
  net: qlogic: check the return value of dma_alloc_coherent() in qed_vf_hw_prepare()
  Linux 4.9.306
  xen/netfront: react properly to failing gnttab_end_foreign_access_ref()
  xen/gnttab: fix gnttab_end_foreign_access() without page specified
  xen: remove gnttab_query_foreign_access()
  xen/gntalloc: don't use gnttab_query_foreign_access()
  xen/scsifront: don't use gnttab_query_foreign_access() for mapped status
  xen/netfront: don't use gnttab_query_foreign_access() for mapped status
  xen/blkfront: don't use gnttab_query_foreign_access() for mapped status
  xen/grant-table: add gnttab_try_end_foreign_access()
  xen/xenbus: don't let xenbus_grant_ring() remove grants in error case
  ARM: fix build warning in proc-v7-bugs.c
  x86, modpost: Replace last remnants of RETPOLINE with CONFIG_RETPOLINE
  x86/build: Fix compiler support check for CONFIG_RETPOLINE
  ARM: Do not use NOCROSSREFS directive with ld.lld
  ARM: fix co-processor register typo
  ARM: fix build error when BPF_SYSCALL is disabled
  ARM: include unprivileged BPF status in Spectre V2 reporting
  ARM: Spectre-BHB workaround
  ARM: use LOADADDR() to get load address of sections
  ARM: early traps initialisation
  ARM: report Spectre v2 status through sysfs
  arm/arm64: smccc/psci: add arm_smccc_1_1_get_conduit()
  arm/arm64: Provide a wrapper for SMCCC 1.1 calls
  x86/speculation: Warn about eIBRS + LFENCE + Unprivileged eBPF + SMT
  x86/speculation: Warn about Spectre v2 LFENCE mitigation
  x86/speculation: Update link to AMD speculation whitepaper
  x86/speculation: Use generic retpoline by default on AMD
  x86/speculation: Include unprivileged eBPF status in Spectre v2 mitigation reporting
  Documentation/hw-vuln: Update spectre doc
  x86/speculation: Add eIBRS + Retpoline options
  x86/speculation: Rename RETPOLINE_AMD to RETPOLINE_LFENCE
  x86,bugs: Unconditionally allow spectre_v2=retpoline,amd
  x86/speculation: Merge one test in spectre_v2_user_select_mitigation()
  Documentation: refer to config RANDOMIZE_BASE for kernel address-space randomization
  Documentation: Add swapgs description to the Spectre v1 documentation
  Documentation: Add section about CPU vulnerabilities for Spectre
  x86/retpoline: Remove minimal retpoline support
  x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support
  x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant
  Linux 4.9.305
  hamradio: fix macro redefine warning
  net: dcb: disable softirqs in dcbnl_flush_dev()
  memfd: fix F_SEAL_WRITE after shmem huge page allocated
  HID: add mapping for KEY_ALL_APPLICATIONS
  Input: elan_i2c - fix regulator enable count imbalance after suspend/resume
  Input: elan_i2c - move regulator_[en|dis]able() out of elan_[en|dis]able_power()
  net: chelsio: cxgb3: check the return value of pci_find_capability()
  soc: fsl: qe: Check of ioremap return value
  ARM: 9182/1: mmu: fix returns from early_param() and __setup() functions
  can: gs_usb: change active_channels's type from atomic_t to u8
  efivars: Respect "block" flag in efivar_entry_set_safe()
  net: arcnet: com20020: Fix null-ptr-deref in com20020pci_probe()
  net: sxgbe: fix return value of __setup handler
  net: stmmac: fix return value of __setup handler
  mac80211: fix forwarded mesh frames AC & queue selection
  firmware: qemu_fw_cfg: fix kobject leak in probe error path
  firmware: Fix a reference count leak.
  net: dcb: flush lingering app table entries for unregistered devices
  netfilter: nf_queue: fix possible use-after-free
  netfilter: nf_queue: don't assume sk is full socket
  xfrm: fix MTU regression
  ASoC: ops: Shift tested values in snd_soc_put_volsw() by +min
  ata: pata_hpt37x: fix PCI clock detection
  usb: gadget: clear related members when goto fail
  usb: gadget: don't release an existing dev->buf
  net: usb: cdc_mbim: avoid altsetting toggling for Telit FN990
  i2c: qup: allow COMPILE_TEST
  dmaengine: shdma: Fix runtime PM imbalance on error
  cifs: fix double free race when mount fails in cifs_get_root()
  Input: clear BTN_RIGHT/MIDDLE on buttonpads
  i2c: bcm2835: Avoid clock stretching timeouts
  mac80211_hwsim: initialize ieee80211_tx_info at hw_scan_work
  mac80211_hwsim: report NOACK frames in tx_status
  Linux 4.9.304
  fget: clarify and improve __fget_files() implementation
  memblock: use kfree() to release kmalloced memblock regions
  tty: n_gsm: fix proper link termination after failed open
  tty: n_gsm: fix encoding of control signal octet bit DV
  xhci: Prevent futile URB re-submissions due to incorrect return value.
  usb: dwc3: gadget: Let the interrupt handler disable bottom halves.
  USB: serial: option: add Telit LE910R1 compositions
  USB: serial: option: add support for DW5829e
  tracefs: Set the group ownership in apply_options() not parse_options()
  USB: gadget: validate endpoint index for xilinx udc
  usb: gadget: rndis: add spinlock for rndis response list
  Revert "USB: serial: ch341: add new Product ID for CH341A"
  ata: pata_hpt37x: disable primary channel on HPT371
  iio: adc: men_z188_adc: Fix a resource leak in an error handling path
  RDMA/ib_srp: Fix a deadlock
  configfs: fix a race in configfs_{,un}register_subsystem()
  net/mlx5e: Fix wrong return value on ioctl EEPROM query failure
  drm/edid: Always set RGB444
  openvswitch: Fix setting ipv6 fields causing hw csum failure
  gso: do not skip outer ip header in case of ipip and net_failover
  net: __pskb_pull_tail() & pskb_carve_frag_list() drop_monitor friends
  serial: 8250: of: Fix mapped region size when using reg-offset property
  serial: 8250: fix error handling in of_platform_serial_probe()
  USB: zaurus: support another broken Zaurus
  sr9700: sanity check for packet length
  parisc/unaligned: Fix ldw() and stw() unalignment handlers
  parisc/unaligned: Fix fldd and fstd unaligned handlers on 32-bit kernel
  vhost/vsock: don't check owner in vhost_vsock_stop() while releasing
  mtd: rawnand: brcmnand: Fixed incorrect sub-page ECC status
  Linux 4.9.303
  net: usb: qmi_wwan: Add support for Dell DW5829e
  tracing: Fix tp_printk option related with tp_printk_stop_on_boot
  ata: libata-core: Disable TRIM on M88V29
  NFS: Do not report writeback errors in nfs_getattr()
  KVM: x86/pmu: Use AMD64_RAW_EVENT_MASK for PERF_TYPE_RAW
  lib/iov_iter: initialize "flags" in new pipe_buffer
  i2c: brcmstb: fix support for DSL and CM variants
  EDAC: Fix calculation of returned address and next offset in edac_align_ptr()
  NFS: LOOKUP_DIRECTORY is also ok with symlinks
  ASoC: ops: Fix stereo change notifications in snd_soc_put_volsw_range()
  ASoC: ops: Fix stereo change notifications in snd_soc_put_volsw()
  ALSA: hda: Fix missing codec probe on Shenker Dock 15
  ALSA: hda: Fix regression on forced probe mask option
  libsubcmd: Fix use-after-free for realloc(..., 0)
  drop_monitor: fix data-race in dropmon_net_event / trace_napi_poll_hit
  iwlwifi: pcie: fix locking when "HW not ready"
  vsock: remove vsock from connected table when connect is interrupted by a signal
  vsock: correct removal of socket from the list
  taskstats: Cleanup the use of task->exit_code
  xfrm: Don't accidentally set RTO_ONLINK in decode_session4()
  drm/radeon: Fix backlight control on iMac 12,1
  quota: make dquot_quota_sync return errors from ->sync_fs
  vfs: make freeze_super abort when sync_filesystem returns error
  ax25: improve the incomplete fix to avoid UAF and NPD bugs
  selftests/zram: Adapt the situation that /dev/zram0 is being used
  selftests/zram01.sh: Fix compression ratio calculation
  selftests/zram: Skip max_comp_streams interface on newer kernel
  net: ieee802154: at86rf230: Stop leaking skb's
  btrfs: send: in case of IO error log it
  parisc: Fix sglist access in ccio-dma.c
  parisc: Fix data TLB miss in sba_unmap_sg
  serial: parisc: GSC: fix build when IOSAPIC is not set
  net: usb: ax88179_178a: Fix out-of-bounds accesses in RX fixup
  Makefile.extrawarn: Move -Wunaligned-access to W=1
  UPSTREAM: net: fix skb_panic to output real address
  UPSTREAM: xfrm: Make function xfrmi_get_link_net() static
  UPSTREAM: xfrm: fix gro_cells leak when remove virtual xfrm interfaces
  UPSTREAM: xfrm interface: fix memory leak on creation
  UPSTREAM: xfrm: clone XFRMA_SET_MARK in xfrm_do_migrate
  UPSTREAM: xfrm/compat: Translate by copying XFRMA_UNSPEC attribute
  UPSTREAM: xfrm/compat: memset(0) 64-bit padding at right place
  UPSTREAM: xfrm/compat: Don't allocate memory with __GFP_ZERO
  UPSTREAM: xfrm/compat: Cleanup WARN()s that can be user-triggered
  UPSTREAM: net: xfrm: fix memory leak in xfrm_user_rcv_msg
  UPSTREAM: arm64/vdso: don't leak kernel addresses
  UPSTREAM: tracing: make PREEMPTIRQ_EVENTS depend on TRACING
  UPSTREAM: trace_uprobe: Use %lx to display offset
  UPSTREAM: kprobes: Fix random address output of blacklist file
  UPSTREAM: mm/huge_memory.c: __split_huge_page() use atomic ClearPageDirty()
  UPSTREAM: x86/realmode: Don't leak the trampoline kernel address
  UPSTREAM: bpf: bpf_prog_array_alloc() should return a generic non-rcu pointer
  UPSTREAM: bpf: fix rcu annotations in compute_effective_progs()
  UPSTREAM: optee: add writeback to valid memory type
  UPSTREAM: lib/test_printf.c: accept "ptrval" as valid result for plain 'p' tests
  UPSTREAM: kdb: use correct pointer when 'btc' calls 'btt'
  UPSTREAM: kdb: print real address of pointers instead of hashed addresses
  UPSTREAM: powerpc/traps: Fix the message printed when stack overflows
  UPSTREAM: f2fs: should use GFP_NOFS for directory inodes
  UPSTREAM: zram: off by one in read_block_state()
  UPSTREAM: tee: fix put order in teedev_close_context()
  UPSTREAM: vsprintf: Replace memory barrier with static_key for random_ptr_key update
  UPSTREAM: ARM: 8896/1: VDSO: Don't leak kernel addresses
  UPSTREAM: parisc: Show unhashed hardware inventory
  UPSTREAM: parisc: Show initial kernel memory layout unhashed
  UPSTREAM: parisc: Show unhashed HPA of Dino chip
  UPSTREAM: parisc: Show unhashed EISA EEPROM address
  UPSTREAM: HID: input: throttle battery uevents
  UPSTREAM: HID: steam: select CONFIG_POWER_SUPPLY
  UPSTREAM: HID: sony: Fix for broken buttons on DS3 USB dongles
  UPSTREAM: HID: input: do not report stylus battery state as "full"
  Linux 4.9.302
  HID: wacom: add USB_HID dependency
  hwmon: (dell-smm) Speed up setting of fan speed
  USB: serial: cp210x: add CPI Bulk Coin Recycler id
  USB: serial: cp210x: add NCR Retail IO box id
  USB: serial: ch341: add support for GW Instek USB2.0-Serial devices
  USB: serial: option: add ZTE MF286D modem
  USB: serial: ftdi_sio: add support for Brainboxes US-159/235/320
  usb: gadget: rndis: check size of RNDIS_MSG_SET command
  USB: gadget: validate interface OS descriptor requests
  usb: dwc3: gadget: Prevent core from processing stale TRBs
  n_tty: wake up poll(POLLRDNORM) on receiving data
  bpf: Add kconfig knob for disabling unpriv bpf by default
  vt_ioctl: add array_index_nospec to VT_ACTIVATE
  vt_ioctl: fix array_index_nospec in vt_setactivate
  tipc: rate limit warning for received illegal binding update
  net: fix a memleak when uncloning an skb dst and its metadata
  net: do not keep the dst cache when uncloning an skb dst and its metadata
  ipmr,ip6mr: acquire RTNL before calling ip[6]mr_free_table() on failure path
  bonding: pair enable_port with slave_arr_updates
  ARM: dts: imx6qdl-udoo: Properly describe the SD card detect
  staging: fbtft: Fix error path in fbtft_driver_module_init()
  ARM: dts: imx23-evk: Remove MX23_PAD_SSP1_DETECT from hog group
  usb: dwc2: gadget: don't try to disable ep0 in dwc2_hsotg_suspend
  scsi: target: iscsi: Make sure the np under each tpg is unique
  NFSv4 remove zero number of fs_locations entries error check
  nfs: nfs4clinet: check the return value of kstrdup()
  NFSv4 only print the label when its queried
  Revert "net: axienet: Wait for PhyRstCmplt after core reset"
  ALSA: line6: Fix misplaced backport of "Fix wrong altsetting for LINE6_PODHD500_1"
  serial: sh-sci: Fix misplaced backport of "Fix late enablement of AUTORTS"
  Input: i8042 - Fix misplaced backport of "add ASUS Zenbook Flip to noselftest list"
  NFSD: Clamp WRITE offsets
  NFS: Fix initialisation of nfs_client cl_flags field
  ima: Remove ima_policy file before directory
  integrity: check the return value of audit_log_start()
  Revert "tracefs: Have tracefs directories not set OTH permission bits by default"
  Linux 4.9.301
  tipc: improve size validations for received domain records
  moxart: fix potential use-after-free on remove path
  cgroup-v1: Require capabilities to set release_agent
  Linux 4.9.300
  ext4: fix error handling in ext4_restore_inline_data()
  EDAC/xgene: Fix deferred probing
  EDAC/altera: Fix deferred probing
  rtc: cmos: Evaluate century appropriate
  nfsd: nfsd4_setclientid_confirm mistakenly expires confirmed client.
  scsi: bnx2fc: Make bnx2fc_recv_frame() mp safe
  ASoC: fsl: Add missing error handling in pcm030_fabric_probe
  net: macsec: Verify that send_sci is on when setting Tx sci explicitly
  net: ieee802154: Return meaningful error codes from the netlink helpers
  spi: mediatek: Avoid NULL pointer crash in interrupt
  spi: bcm-qspi: check for valid cs before applying chip select
  iommu/amd: Fix loop timeout issue in iommu_ga_log_enable()
  drm/nouveau: fix off by one in BIOS boundary checking
  ASoC: ops: Reject out of bounds values in snd_soc_put_xr_sx()
  ASoC: ops: Reject out of bounds values in snd_soc_put_volsw_sx()
  ASoC: ops: Reject out of bounds values in snd_soc_put_volsw()
  af_packet: fix data-race in packet_setsockopt / packet_setsockopt
  rtnetlink: make sure to refresh master_dev/m_ops in __rtnl_newlink()
  net: amd-xgbe: Fix skb data length underflow
  net: amd-xgbe: ensure to reset the tx_timer_active flag
  ipheth: fix EOVERFLOW in ipheth_rcvbulk_callback
  netfilter: nat: limit port clash resolution attempts
  netfilter: nat: remove l4 protocol port rovers
  ipv4: tcp: send zero IPID in SYNACK messages
  ipv4: raw: lock the socket in raw_bind()
  hwmon: (lm90) Reduce maximum conversion rate for G781
  drm/msm: Fix wrong size calculation
  net-procfs: show net devices bound packet types
  NFSv4: nfs_atomic_open() can race when looking up a non-regular file
  NFSv4: Handle case where the lookup of a directory fails
  ipv4: avoid using shared IP generator for connected sockets
  net: fix information leakage in /proc/net/ptype
  ipv6_tunnel: Rate limit warning messages
  scsi: bnx2fc: Flush destroy_work queue before calling bnx2fc_interface_put()
  powerpc/32: Fix boot failure with GCC latent entropy plugin
  USB: core: Fix hang in usb_kill_urb by adding memory barriers
  usb: gadget: f_sourcesink: Fix isoc transfer for USB_SPEED_SUPER_PLUS
  usb-storage: Add unusual-devs entry for VL817 USB-SATA bridge
  tty: Add support for Brainboxes UC cards.
  tty: n_gsm: fix SW flow control encoding/handling
  serial: stm32: fix software flow control transfer
  PM: wakeup: simplify the output logic of pm_show_wakelocks()
  udf: Fix NULL ptr deref when converting from inline format
  udf: Restore i_lenAlloc when inode expansion fails
  scsi: zfcp: Fix failed recovery on gone remote port with non-NPIV FCP devices
  s390/hypfs: include z/VM guests with access control group set
  Bluetooth: refactor malicious adv data check
  can: bcm: fix UAF of bcm op
  BACKPORT: ipv6: Implement draft-ietf-6man-rfc4941bis
  Linux 4.9.299
  ion: Do not 'put' ION handle until after its final use
  ion: Protect kref from userspace manipulation
  ion: Fix use after free during ION_IOC_ALLOC
  ARM: 8800/1: use choice for kernel unwinders
  KVM: X86: MMU: Use the correct inherited permissions to get shadow page
  KVM: nVMX: fix EPT permissions as reported in exit qualification
  NFSv4: Initialise connection to the server in nfs4_alloc_client()
  media: firewire: firedtv-avc: fix a buffer overflow in avc_ca_pmt()
  drm/i915: Flush TLBs before releasing backing store
  Linux 4.9.298
  KVM: do not allow mapping valid but non-reference-counted pages
  KVM: Use kvm_pfn_t for local PFN variable in hva_to_pfn_remapped()
  KVM: do not assume PTE is writable after follow_pfn
  mm: add follow_pte_pmd()
  lib/timerqueue: Rely on rbtree semantics for next timer
  rbtree: cache leftmost node internally
  cipso,calipso: resolve a number of problems with the DOI refcounts
  gianfar: fix jumbo packets+napi+rx overrun crash
  gianfar: simplify FCS handling and fix memory leak
  drm/ttm/nouveau: don't call tt destroy callback on alloc failure.
  gup: document and work around "COW can break either way" issue
  Revert "gup: document and work around "COW can break either way" issue"
  lib82596: Fix IRQ check in sni_82596_probe
  scripts/dtc: dtx_diff: remove broken example from help text
  bcmgenet: add WOL IRQ check
  net_sched: restore "mpu xxx" handling
  dmaengine: at_xdmac: Fix at_xdmac_lld struct definition
  dmaengine: at_xdmac: Fix lld view setting
  dmaengine: at_xdmac: Print debug message after realeasing the lock
  dmaengine: at_xdmac: Don't start transactions at tx_submit level
  libcxgb: Don't accidentally set RTO_ONLINK in cxgb_find_route()
  netns: add schedule point in ops_exit_list()
  net: axienet: fix number of TX ring slots for available check
  net: axienet: Wait for PhyRstCmplt after core reset
  af_unix: annote lockless accesses to unix_tot_inflight & gc_in_progress
  parisc: pdc_stable: Fix memory leak in pdcs_register_pathentries
  net/fsl: xgmac_mdio: Fix incorrect iounmap when removing module
  powerpc/fsl/dts: Enable WA for erratum A-009885 on fman3l MDIO buses
  RDMA/rxe: Fix a typo in opcode name
  RDMA/hns: Modify the mapping attribute of doorbell to device
  drm/radeon: fix error handling in radeon_driver_open_kms
  fuse: fix live lock in fuse_iget()
  fuse: fix bad inode
  ext4: don't use the orphan list when migrating an inode
  ext4: Fix BUG_ON in ext4_bread when write quota data
  ext4: set csum seed in tmp inode while migrating to extents
  iwlwifi: mvm: Increase the scan timeout guard to 30 seconds
  ubifs: Error path in ubifs_remount_rw() seems to wrongly free write buffers
  power: bq25890: Enable continuous conversion for ADC at charging
  ASoC: mediatek: mt8173: fix device_node leak
  scsi: sr: Don't use GFP_DMA
  MIPS: Octeon: Fix build errors using clang
  i2c: designware-pci: Fix to change data types of hcnt and lcnt parameters
  ALSA: seq: Set upper limit of processed events
  w1: Misuse of get_user()/put_user() reported by sparse
  i2c: mpc: Correct I2C reset procedure
  powerpc/smp: Move setup_profiling_timer() under CONFIG_PROFILING
  i2c: i801: Don't silently correct invalid transfer size
  powerpc/btext: add missing of_node_put
  powerpc/cell: add missing of_node_put
  powerpc/powernv: add missing of_node_put
  powerpc/6xx: add missing of_node_put
  parisc: Avoid calling faulthandler_disabled() twice
  serial: core: Keep mctrl register state and cached copy in sync
  serial: pl010: Drop CR register reset on set_termios
  dm space map common: add bounds check to sm_ll_lookup_bitmap()
  dm btree: add a defensive bounds check to insert_at()
  net: mdio: Demote probed message to debug print
  btrfs: remove BUG_ON(!eie) in find_parent_nodes
  btrfs: remove BUG_ON() in find_parent_nodes()
  ACPICA: Executer: Fix the REFCLASS_REFOF case in acpi_ex_opcode_1A_0T_1R()
  ACPICA: Utilities: Avoid deleting the same object twice in a row
  jffs2: GC deadlock reading a page that is used in jffs2_write_begin()
  um: registers: Rename function names to avoid conflicts and build problems
  ath9k: Fix out-of-bound memcpy in ath9k_hif_usb_rx_stream
  usb: hub: Add delay for SuperSpeed hub resume to let links transit to U0
  media: saa7146: hexium_gemini: Fix a NULL pointer dereference in hexium_attach()
  media: igorplugusb: receiver overflow should be reported
  net: bonding: debug: avoid printing debug logs when bond is not notifying peers
  ath10k: Fix tx hanging
  iwlwifi: mvm: synchronize with FW after multicast commands
  media: m920x: don't use stack on USB reads
  media: saa7146: hexium_orion: Fix a NULL pointer dereference in hexium_attach()
  floppy: Add max size check for user space request
  mwifiex: Fix skb_over_panic in mwifiex_usb_recv()
  HSI: core: Fix return freed object in hsi_new_client
  gpiolib: acpi: Do not set the IRQ type if the IRQ is already in use
  media: b2c2: Add missing check in flexcop_pci_isr:
  HID: apple: Do not reset quirks when the Fn key is not found
  usb: gadget: f_fs: Use stream_open() for endpoint files
  ar5523: Fix null-ptr-deref with unexpected WDCMSG_TARGET_START reply
  fs: dlm: filter user dlm messages for kernel locks
  Bluetooth: Fix debugfs entry leak in hci_register_dev()
  RDMA/cxgb4: Set queue pair state when being queried
  mips: bcm63xx: add support for clk_set_parent()
  mips: lantiq: add support for clk_set_parent()
  misc: lattice-ecp3-config: Fix task hung when firmware load failed
  ASoC: samsung: idma: Check of ioremap return value
  dmaengine: pxa/mmp: stop referencing config->slave_id
  RDMA/core: Let ib_find_gid() continue search even after empty entry
  scsi: ufs: Fix race conditions related to driver data
  char/mwave: Adjust io port register size
  ALSA: oss: fix compile error when OSS_DEBUG is enabled
  powerpc/prom_init: Fix improper check of prom_getprop()
  RDMA/hns: Validate the pkey index
  ALSA: hda: Add missing rwsem around snd_ctl_remove() calls
  ALSA: PCM: Add missing rwsem around snd_ctl_remove() calls
  ALSA: jack: Add missing rwsem around snd_ctl_remove() calls
  ext4: avoid trim error on fs with small groups
  net: mcs7830: handle usb read errors properly
  pcmcia: fix setting of kthread task states
  can: xilinx_can: xcan_probe(): check for error irq
  can: softing: softing_startstop(): fix set but not used variable warning
  spi: spi-meson-spifc: Add missing pm_runtime_disable() in meson_spifc_probe
  fsl/fman: Check for null pointer after calling devm_ioremap
  ppp: ensure minimum packet size in ppp_write()
  pcmcia: rsrc_nonstatic: Fix a NULL pointer dereference in nonstatic_find_mem_region()
  pcmcia: rsrc_nonstatic: Fix a NULL pointer dereference in __nonstatic_find_io_region()
  usb: ftdi-elan: fix memory leak on device disconnect
  media: msi001: fix possible null-ptr-deref in msi001_probe()
  media: si2157: Fix "warm" tuner state detection
  media: saa7146: mxb: Fix a NULL pointer dereference in mxb_attach()
  media: dib8000: Fix a memleak in dib8000_init()
  floppy: Fix hang in watchdog when disk is ejected
  serial: amba-pl011: do not request memory region twice
  drm/radeon/radeon_kms: Fix a NULL pointer dereference in radeon_driver_open_kms()
  drm/amdgpu: Fix a NULL pointer dereference in amdgpu_connector_lcd_native_mode()
  arm64: dts: qcom: msm8916: fix MMC controller aliases
  netfilter: bridge: add support for pppoe filtering
  tty: serial: atmel: Call dma_async_issue_pending()
  tty: serial: atmel: Check return code of dmaengine_submit()
  crypto: qce - fix uaf on qce_ahash_register_one
  media: dmxdev: fix UAF when dvb_register_device() fails
  Bluetooth: stop proccessing malicious adv data
  wcn36xx: Indicate beacon not connection loss on MISSED_BEACON_IND
  Bluetooth: cmtp: fix possible panic when cmtp_init_sockets() fails
  shmem: fix a race between shmem_unused_huge_shrink and shmem_evict_inode
  PCI: Add function 1 DMA alias quirk for Marvell 88SE9125 SATA controller
  can: softing_cs: softingcs_probe(): fix memleak on registration failure
  media: stk1160: fix control-message timeouts
  media: pvrusb2: fix control-message timeouts
  media: redrat3: fix control-message timeouts
  media: dib0700: fix undefined behavior in tuner shutdown
  media: s2255: fix control-message timeouts
  media: cpia2: fix control-message timeouts
  media: em28xx: fix control-message timeouts
  media: mceusb: fix control-message timeouts
  media: flexcop-usb: fix control-message timeouts
  rtc: cmos: take rtc_lock while reading from CMOS
  nfc: llcp: fix NULL error pointer dereference on sendmsg() after failed bind()
  HID: wacom: Avoid using stale array indicies to read contact count
  HID: uhid: Fix worker destroying device without any protection
  rtlwifi: rtl8192cu: Fix WARNING when calling local_irq_restore() with interrupts enabled
  media: uvcvideo: fix division by zero at stream start
  drm/i915: Avoid bitwise vs logical OR warning in snb_wm_latency_quirk()
  staging: wlan-ng: Avoid bitwise vs logical OR warning in hfa384x_usb_throttlefn()
  random: fix data race on crng init time
  random: fix data race on crng_node_pool
  can: gs_usb: gs_can_start_xmit(): zero-initialize hf->{flags,reserved}
  can: gs_usb: fix use of uninitialized variable, detach device on reception of invalid USB data
  mfd: intel-lpss: Fix too early PM enablement in the ACPI ->probe()
  USB: Fix "slab-out-of-bounds Write" bug in usb_hcd_poll_rh_status
  USB: core: Fix bug in resuming hub's handling of wakeup requests
  Bluetooth: bfusb: fix division by zero in send path
  Linux 4.9.297
  power: reset: ltc2952: Fix use of floating point literals
  mISDN: change function names to avoid conflicts
  net: udp: fix alignment problem in udp4_seq_show()
  ip6_vti: initialize __ip6_tnl_parm struct in vti6_siocdevprivate
  scsi: libiscsi: Fix UAF in iscsi_conn_get_param()/iscsi_conn_teardown()
  phonet: refcount leak in pep_sock_accep
  arm64: sysreg: Move to use definitions for all the SCTLR bits
  arm64: move !VHE work to end of el2_setup
  arm64: reduce el2_setup branching
  arm64: Remove a redundancy in sysreg.h
  bug: split BUILD_BUG stuff out into <linux/build_bug.h>
  rndis_host: support Hytera digital radios
  xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate
  sch_qfq: prevent shift-out-of-bounds in qfq_init_qdisc
  i40e: Fix incorrect netdev's real number of RX/TX queues
  mac80211: initialize variable have_higher_than_11mbit
  ieee802154: atusb: fix uninit value in atusb_set_extended_addr
  virtio_pci: Support surprise removal of virtio pci device
  tracing: Tag trace_percpu_buffer as a percpu pointer
  tracing: Fix check for trace_percpu_buffer validity in get_trace_buf()
  Bluetooth: btusb: Apply QCA Rome patches for some ATH3012 models
  Linux 4.9.296
  net: fix use-after-free in tw_timer_handler
  Input: spaceball - fix parsing of movement data packets
  Input: appletouch - initialize work before device registration
  scsi: vmw_pvscsi: Set residual data length conditionally
  usb: gadget: f_fs: Clear ffs_eventfd in ffs_data_clear.
  xhci: Fresco FL1100 controller should not have BROKEN_MSI quirk set.
  uapi: fix linux/nfc.h userspace compilation errors
  nfc: uapi: use kernel size_t to fix user-space builds
  fsl/fman: Fix missing put_device() call in fman_port_probe
  selinux: initialize proto variable in selinux_ip_postroute_compat()
  recordmcount.pl: fix typo in s390 mcount regex
  platform/x86: apple-gmux: use resource_size() with res
  HID: asus: Add depends on USB_HID to HID_ASUS Kconfig option
  Linux 4.9.295
  phonet/pep: refuse to enable an unbound pipe
  hamradio: improve the incomplete fix to avoid NPD
  hamradio: defer ax25 kfree after unregister_netdev
  ax25: NPD bug when detaching AX25 device
  hwmon: (lm90) Do not report 'busy' status bit as alarm
  ARM: 9169/1: entry: fix Thumb2 bug in iWMMXt exception handling
  x86/pkey: Fix undefined behaviour with PKRU_WD_BIT
  ALSA: drivers: opl3: Fix incorrect use of vp->state
  ALSA: jack: Check the return value of kstrdup()
  hwmon: (lm90) Fix usage of CONFIG2 register in detect function
  drivers: net: smc911x: Check for error irq
  fjes: Check for error irq
  bonding: fix ad_actor_system option setting to default
  qlcnic: potential dereference null pointer of rx_queue->page_ring
  IB/qib: Fix memory leak in qib_user_sdma_queue_pkts()
  HID: holtek: fix mouse probing
  can: kvaser_usb: get CAN clock frequency from device
  net: usb: lan78xx: add Allied Telesis AT29M2-AF
  Linux 4.9.294
  xen/netback: don't queue unlimited number of packages
  xen/netback: fix rx queue stall detection
  xen/console: harden hvc_xen against event channel storms
  xen/netfront: harden netfront against event channel storms
  xen/blkfront: harden blkfront against event channel storms
  Input: touchscreen - avoid bitwise vs logical OR warning
  mwifiex: Remove unnecessary braces from HostCmd_SET_SEQ_NO_BSS_INFO
  ARM: 8805/2: remove unneeded naked function usage
  net: lan78xx: Avoid unnecessary self assignment
  scsi: scsi_debug: Sanity check block descriptor length in resp_mode_select()
  fuse: annotate lock in fuse_reverse_inval_entry()
  firmware: arm_scpi: Fix string overflow in SCPI genpd driver
  net: systemport: Add global locking for descriptor lifecycle
  timekeeping: Really make sure wall_to_monotonic isn't positive
  USB: serial: option: add Telit FN990 compositions
  PCI/MSI: Clear PCI_MSIX_FLAGS_MASKALL on error
  USB: gadget: bRequestType is a bitfield, not a enum
  ixgbe: set X550 MDIO speed before talking to PHY
  igbvf: fix double free in `igbvf_probe`
  soc/tegra: fuse: Fix bitwise vs. logical OR warning
  nfsd: fix use-after-free due to delegation race
  dm btree remove: fix use after free in rebalance_children()
  recordmcount.pl: look for jgnop instruction as well as bcrl on s390
  mac80211: send ADDBA requests using the tid/queue of the aggregation session
  hwmon: (dell-smm) Fix warning on /proc/i8k creation error
  tracing: Fix a kmemleak false positive in tracing_map
  net: netlink: af_netlink: Prevent empty skb by adding a check on len.
  i2c: rk3x: Handle a spurious start completion interrupt flag
  parisc/agp: Annotate parisc agp init functions with __init
  net/mlx4_en: Update reported link modes for 1/10G
  nfc: fix segfault in nfc_genl_dump_devices_done
  FROMGIT: USB: gadget: bRequestType is a bitfield, not a enum
  Linux 4.9.293
  irqchip: nvic: Fix offset for Interrupt Priority Offsets
  irqchip/irq-gic-v3-its.c: Force synchronisation when issuing INVALL
  irqchip/armada-370-xp: Fix support for Multi-MSI interrupts
  irqchip/armada-370-xp: Fix return value of armada_370_xp_msi_alloc()
  iio: accel: kxcjk-1013: Fix possible memory leak in probe and remove
  iio: itg3200: Call iio_trigger_notify_done() on error
  iio: kxsd9: Don't return error code in trigger handler
  iio: ltr501: Don't return error code in trigger handler
  iio: mma8452: Fix trigger reference couting
  iio: stk3310: Don't return error code in interrupt handler
  usb: core: config: using bit mask instead of individual bits
  usb: core: config: fix validation of wMaxPacketValue entries
  USB: gadget: zero allocate endpoint 0 buffers
  USB: gadget: detect too-big endpoint 0 requests
  net/qla3xxx: fix an error code in ql_adapter_up()
  net, neigh: clear whole pneigh_entry at alloc time
  net: fec: only clear interrupt of handling queue in fec_enet_rx_queue()
  net: altera: set a couple error code in probe()
  net: cdc_ncm: Allow for dwNtbOutMaxSize to be unset or zero
  block: fix ioprio_get(IOPRIO_WHO_PGRP) vs setuid(2)
  tracefs: Set all files to the same group ownership as the mount option
  signalfd: use wake_up_pollfree()
  binder: use wake_up_pollfree()
  wait: add wake_up_pollfree()
  libata: add horkage for ASMedia 1092
  can: pch_can: pch_can_rx_normal: fix use after free
  tracefs: Have new files inherit the ownership of their parent
  ALSA: pcm: oss: Handle missing errors in snd_pcm_oss_change_params*()
  ALSA: pcm: oss: Limit the period size to 16MB
  ALSA: pcm: oss: Fix negative period/buffer sizes
  ALSA: ctl: Fix copy of updated id with element read/write
  mm: bdi: initialize bdi_min_ratio when bdi is unregistered
  IB/hfi1: Correct guard on eager buffer deallocation
  nfc: fix potential NULL pointer deref in nfc_genl_dump_ses_done
  can: sja1000: fix use after free in ems_pcmcia_add_card()
  HID: check for valid USB device for many HID drivers
  HID: wacom: fix problems when device is not a valid USB device
  HID: add USB_HID dependancy on some USB HID drivers
  HID: add USB_HID dependancy to hid-chicony
  HID: add USB_HID dependancy to hid-prodikeys
  HID: add hid_is_usb() function to make it simpler for USB detection
  HID: introduce hid_is_using_ll_driver
  UPSTREAM: USB: gadget: zero allocate endpoint 0 buffers
  UPSTREAM: USB: gadget: detect too-big endpoint 0 requests

 Conflicts:
	arch/arm64/include/asm/cputype.h
	arch/arm64/kernel/bpi.S
	arch/arm64/kernel/cpu_errata.c
	arch/arm64/kernel/vdso.c
	drivers/clk/qcom/clk-rcg2.c
	drivers/media/dvb-core/dmxdev.c
	drivers/mmc/core/host.c
	drivers/net/usb/lan78xx.c
	drivers/staging/android/ion/ion-ioctl.c
	drivers/staging/android/ion/ion.c
	drivers/staging/android/ion/ion_priv.h
	drivers/usb/gadget/composite.c
	drivers/usb/gadget/function/rndis.c
	drivers/usb/gadget/function/rndis.h
	lib/vsprintf.c
	mm/memory.c
	net/ipv6/ip6_output.c

Change-Id: Ie8bf6aa5dac3ae822cef90decbba577cefedcb31
2022-05-09 16:57:36 +03:00

1643 lines
46 KiB
C

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/memremap.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/sched.h>
#include <linux/rwsem.h>
#include <linux/hugetlb.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include "internal.h"
static struct page *no_page_table(struct vm_area_struct *vma,
unsigned int flags)
{
/*
* When core dumping an enormous anonymous area that nobody
* has touched so far, we don't want to allocate unnecessary pages or
* page tables. Return error instead of NULL to skip handle_mm_fault,
* then get_dump_page() will return NULL to leave a hole in the dump.
* But we can only make this optimization where a hole would surely
* be zero-filled if handle_mm_fault() actually did handle it.
*/
if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
return ERR_PTR(-EFAULT);
return NULL;
}
static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
pte_t *pte, unsigned int flags)
{
/* No page to get reference */
if (flags & FOLL_GET)
return -EFAULT;
if (flags & FOLL_TOUCH) {
pte_t entry = *pte;
if (flags & FOLL_WRITE)
entry = pte_mkdirty(entry);
entry = pte_mkyoung(entry);
if (!pte_same(*pte, entry)) {
set_pte_at(vma->vm_mm, address, pte, entry);
update_mmu_cache(vma, address, pte);
}
}
/* Proper page table entry exists, but no corresponding struct page */
return -EEXIST;
}
/*
* FOLL_FORCE or a forced COW break can write even to unwritable pte's,
* but only after we've gone through a COW cycle and they are dirty.
*/
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
{
return pte_write(pte) || ((flags & FOLL_COW) && pte_dirty(pte));
}
/*
* A (separate) COW fault might break the page the other way and
* get_user_pages() would return the page from what is now the wrong
* VM. So we need to force a COW break at GUP time even for reads.
*/
static inline bool should_force_cow_break(struct vm_area_struct *vma, unsigned int flags)
{
return is_cow_mapping(vma->vm_flags) && (flags & FOLL_GET);
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd, unsigned int flags)
{
struct mm_struct *mm = vma->vm_mm;
struct dev_pagemap *pgmap = NULL;
struct page *page;
spinlock_t *ptl;
pte_t *ptep, pte;
retry:
if (unlikely(pmd_bad(*pmd)))
return no_page_table(vma, flags);
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
pte = *ptep;
if (!pte_present(pte)) {
swp_entry_t entry;
/*
* KSM's break_ksm() relies upon recognizing a ksm page
* even while it is being migrated, so for that case we
* need migration_entry_wait().
*/
if (likely(!(flags & FOLL_MIGRATION)))
goto no_page;
if (pte_none(pte))
goto no_page;
entry = pte_to_swp_entry(pte);
if (!is_migration_entry(entry))
goto no_page;
pte_unmap_unlock(ptep, ptl);
migration_entry_wait(mm, pmd, address);
goto retry;
}
if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
pte_unmap_unlock(ptep, ptl);
return NULL;
}
page = vm_normal_page(vma, address, pte);
if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
/*
* Only return device mapping pages in the FOLL_GET case since
* they are only valid while holding the pgmap reference.
*/
pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
if (pgmap)
page = pte_page(pte);
else
goto no_page;
} else if (unlikely(!page)) {
if (flags & FOLL_DUMP) {
/* Avoid special (like zero) pages in core dumps */
page = ERR_PTR(-EFAULT);
goto out;
}
if (is_zero_pfn(pte_pfn(pte))) {
page = pte_page(pte);
} else {
int ret;
ret = follow_pfn_pte(vma, address, ptep, flags);
page = ERR_PTR(ret);
goto out;
}
}
if (flags & FOLL_SPLIT && PageTransCompound(page)) {
int ret;
get_page(page);
pte_unmap_unlock(ptep, ptl);
lock_page(page);
ret = split_huge_page(page);
unlock_page(page);
put_page(page);
if (ret)
return ERR_PTR(ret);
goto retry;
}
if (flags & FOLL_GET) {
if (unlikely(!try_get_page(page))) {
page = ERR_PTR(-ENOMEM);
goto out;
}
/* drop the pgmap reference now that we hold the page */
if (pgmap) {
put_dev_pagemap(pgmap);
pgmap = NULL;
}
}
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
set_page_dirty(page);
/*
* pte_mkyoung() would be more correct here, but atomic care
* is needed to avoid losing the dirty bit: it is easier to use
* mark_page_accessed().
*/
mark_page_accessed(page);
}
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
/* Do not mlock pte-mapped THP */
if (PageTransCompound(page))
goto out;
/*
* The preliminary mapping check is mainly to avoid the
* pointless overhead of lock_page on the ZERO_PAGE
* which might bounce very badly if there is contention.
*
* If the page is already locked, we don't need to
* handle it now - vmscan will handle it later if and
* when it attempts to reclaim the page.
*/
if (page->mapping && trylock_page(page)) {
lru_add_drain(); /* push cached pages to LRU */
/*
* Because we lock page here, and migration is
* blocked by the pte's page reference, and we
* know the page is still mapped, we don't even
* need to check for file-cache page truncation.
*/
mlock_vma_page(page);
unlock_page(page);
}
}
out:
pte_unmap_unlock(ptep, ptl);
return page;
no_page:
pte_unmap_unlock(ptep, ptl);
if (!pte_none(pte))
return NULL;
return no_page_table(vma, flags);
}
/**
* follow_page_mask - look up a page descriptor from a user-virtual address
* @vma: vm_area_struct mapping @address
* @address: virtual address to look up
* @flags: flags modifying lookup behaviour
* @page_mask: on output, *page_mask is set according to the size of the page
*
* @flags can have FOLL_ flags set, defined in <linux/mm.h>
*
* Returns the mapped (struct page *), %NULL if no mapping exists, or
* an error pointer if there is a mapping to something not represented
* by a page descriptor (see also vm_normal_page()).
*/
struct page *follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
unsigned int *page_mask)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
spinlock_t *ptl;
struct page *page;
struct mm_struct *mm = vma->vm_mm;
*page_mask = 0;
page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
if (!IS_ERR(page)) {
BUG_ON(flags & FOLL_GET);
return page;
}
pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
return no_page_table(vma, flags);
pud = pud_offset(pgd, address);
if (pud_none(*pud))
return no_page_table(vma, flags);
if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
page = follow_huge_pud(mm, address, pud, flags);
if (page)
return page;
return no_page_table(vma, flags);
}
if (unlikely(pud_bad(*pud)))
return no_page_table(vma, flags);
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return no_page_table(vma, flags);
if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
page = follow_huge_pmd(mm, address, pmd, flags);
if (page)
return page;
return no_page_table(vma, flags);
}
if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
return no_page_table(vma, flags);
if (pmd_devmap(*pmd)) {
ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags);
spin_unlock(ptl);
if (page)
return page;
}
if (likely(!pmd_trans_huge(*pmd)))
return follow_page_pte(vma, address, pmd, flags);
ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_trans_huge(*pmd))) {
spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags);
}
if (flags & FOLL_SPLIT) {
int ret;
page = pmd_page(*pmd);
if (is_huge_zero_page(page)) {
spin_unlock(ptl);
ret = 0;
split_huge_pmd(vma, pmd, address);
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
} else {
if (unlikely(!try_get_page(page))) {
spin_unlock(ptl);
return ERR_PTR(-ENOMEM);
}
spin_unlock(ptl);
lock_page(page);
ret = split_huge_page(page);
unlock_page(page);
put_page(page);
if (pmd_none(*pmd))
return no_page_table(vma, flags);
}
return ret ? ERR_PTR(ret) :
follow_page_pte(vma, address, pmd, flags);
}
page = follow_trans_huge_pmd(vma, address, pmd, flags);
spin_unlock(ptl);
*page_mask = HPAGE_PMD_NR - 1;
return page;
}
static int get_gate_page(struct mm_struct *mm, unsigned long address,
unsigned int gup_flags, struct vm_area_struct **vma,
struct page **page)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int ret = -EFAULT;
/* user gate pages are read-only */
if (gup_flags & FOLL_WRITE)
return -EFAULT;
if (address > TASK_SIZE)
pgd = pgd_offset_k(address);
else
pgd = pgd_offset_gate(mm, address);
BUG_ON(pgd_none(*pgd));
pud = pud_offset(pgd, address);
BUG_ON(pud_none(*pud));
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return -EFAULT;
VM_BUG_ON(pmd_trans_huge(*pmd));
pte = pte_offset_map(pmd, address);
if (pte_none(*pte))
goto unmap;
*vma = get_gate_vma(mm);
if (!page)
goto out;
*page = vm_normal_page(*vma, address, *pte);
if (!*page) {
if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
goto unmap;
*page = pte_page(*pte);
}
if (unlikely(!try_get_page(*page))) {
ret = -ENOMEM;
goto unmap;
}
out:
ret = 0;
unmap:
pte_unmap(pte);
return ret;
}
/*
* mmap_sem must be held on entry. If @nonblocking != NULL and
* *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
* If it is, *@nonblocking will be set to 0 and -EBUSY returned.
*/
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
unsigned long address, unsigned int *flags, int *nonblocking)
{
unsigned int fault_flags = 0;
int ret;
/* mlock all present pages, but do not fault in new pages */
if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
return -ENOENT;
if (*flags & FOLL_WRITE)
fault_flags |= FAULT_FLAG_WRITE;
if (*flags & FOLL_REMOTE)
fault_flags |= FAULT_FLAG_REMOTE;
if (nonblocking)
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
if (*flags & FOLL_NOWAIT)
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
if (*flags & FOLL_TRIED) {
VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
fault_flags |= FAULT_FLAG_TRIED;
}
ret = handle_mm_fault(vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return -ENOMEM;
if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
return -EFAULT;
BUG();
}
if (tsk) {
if (ret & VM_FAULT_MAJOR)
tsk->maj_flt++;
else
tsk->min_flt++;
}
if (ret & VM_FAULT_RETRY) {
if (nonblocking)
*nonblocking = 0;
return -EBUSY;
}
/*
* The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
* necessary, even if maybe_mkwrite decided not to set pte_write. We
* can thus safely do subsequent page lookups as if they were reads.
* But only do so when looping for pte_write is futile: in some cases
* userspace may also be wanting to write to the gotten user page,
* which a read fault here might prevent (a readonly page might get
* reCOWed by userspace write).
*/
if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
*flags |= FOLL_COW;
return 0;
}
static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
{
vm_flags_t vm_flags = vma->vm_flags;
int write = (gup_flags & FOLL_WRITE);
int foreign = (gup_flags & FOLL_REMOTE);
if (vm_flags & (VM_IO | VM_PFNMAP))
return -EFAULT;
if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
return -EFAULT;
if (write) {
if (!(vm_flags & VM_WRITE)) {
if (!(gup_flags & FOLL_FORCE))
return -EFAULT;
/*
* We used to let the write,force case do COW in a
* VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
* set a breakpoint in a read-only mapping of an
* executable, without corrupting the file (yet only
* when that file had been opened for writing!).
* Anon pages in shared mappings are surprising: now
* just reject it.
*/
if (!is_cow_mapping(vm_flags))
return -EFAULT;
}
} else if (!(vm_flags & VM_READ)) {
if (!(gup_flags & FOLL_FORCE))
return -EFAULT;
/*
* Is there actually any vma we can reach here which does not
* have VM_MAYREAD set?
*/
if (!(vm_flags & VM_MAYREAD))
return -EFAULT;
}
/*
* gups are always data accesses, not instruction
* fetches, so execute=false here
*/
if (!arch_vma_access_permitted(vma, write, false, foreign))
return -EFAULT;
return 0;
}
/**
* __get_user_pages() - pin user pages in memory
* @tsk: task_struct of target task
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
* @gup_flags: flags modifying pin behaviour
* @pages: array that receives pointers to the pages pinned.
* Should be at least nr_pages long. Or NULL, if caller
* only intends to ensure the pages are faulted in.
* @vmas: array of pointers to vmas corresponding to each page.
* Or NULL if the caller does not require them.
* @nonblocking: whether waiting for disk IO or mmap_sem contention
*
* Returns number of pages pinned. This may be fewer than the number
* requested. If nr_pages is 0 or negative, returns 0. If no pages
* were pinned, returns -errno. Each page returned must be released
* with a put_page() call when it is finished with. vmas will only
* remain valid while mmap_sem is held.
*
* Must be called with mmap_sem held. It may be released. See below.
*
* __get_user_pages walks a process's page tables and takes a reference to
* each struct page that each user address corresponds to at a given
* instant. That is, it takes the page that would be accessed if a user
* thread accesses the given user virtual address at that instant.
*
* This does not guarantee that the page exists in the user mappings when
* __get_user_pages returns, and there may even be a completely different
* page there in some cases (eg. if mmapped pagecache has been invalidated
* and subsequently re faulted). However it does guarantee that the page
* won't be freed completely. And mostly callers simply care that the page
* contains data that was valid *at some point in time*. Typically, an IO
* or similar operation cannot guarantee anything stronger anyway because
* locks can't be held over the syscall boundary.
*
* If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If
* the page is written to, set_page_dirty (or set_page_dirty_lock, as
* appropriate) must be called after the page is finished with, and
* before put_page is called.
*
* If @nonblocking != NULL, __get_user_pages will not wait for disk IO
* or mmap_sem contention, and if waiting is needed to pin all pages,
* *@nonblocking will be set to 0. Further, if @gup_flags does not
* include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
* this case.
*
* A caller using such a combination of @nonblocking and @gup_flags
* must therefore hold the mmap_sem for reading only, and recognize
* when it's been released. Otherwise, it must be held for either
* reading or writing and will not be released.
*
* In most cases, get_user_pages or get_user_pages_fast should be used
* instead of __get_user_pages. __get_user_pages should be used only if
* you need some special @gup_flags.
*/
static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
{
long i = 0;
unsigned int page_mask;
struct vm_area_struct *vma = NULL;
if (!nr_pages)
return 0;
start = untagged_addr(start);
VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
/*
* If FOLL_FORCE is set then do not force a full fault as the hinting
* fault information is unrelated to the reference behaviour of a task
* using the address space
*/
if (!(gup_flags & FOLL_FORCE))
gup_flags |= FOLL_NUMA;
do {
struct page *page;
unsigned int foll_flags = gup_flags;
unsigned int page_increm;
/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
vma = find_extend_vma(mm, start);
if (!vma && in_gate_area(mm, start)) {
int ret;
ret = get_gate_page(mm, start & PAGE_MASK,
gup_flags, &vma,
pages ? &pages[i] : NULL);
if (ret)
return i ? : ret;
page_mask = 0;
goto next_page;
}
if (!vma || check_vma_flags(vma, gup_flags))
return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
if (should_force_cow_break(vma, foll_flags))
foll_flags |= FOLL_WRITE;
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
foll_flags);
continue;
}
}
if (should_force_cow_break(vma, foll_flags))
foll_flags |= FOLL_WRITE;
retry:
/*
* If we have a pending SIGKILL, don't keep faulting pages and
* potentially allocating memory.
*/
if (unlikely(fatal_signal_pending(current)))
return i ? i : -ERESTARTSYS;
cond_resched();
page = follow_page_mask(vma, start, foll_flags, &page_mask);
if (!page) {
int ret;
ret = faultin_page(tsk, vma, start, &foll_flags,
nonblocking);
switch (ret) {
case 0:
goto retry;
case -EFAULT:
case -ENOMEM:
case -EHWPOISON:
return i ? i : ret;
case -EBUSY:
return i;
case -ENOENT:
goto next_page;
}
BUG();
} else if (PTR_ERR(page) == -EEXIST) {
/*
* Proper page table entry exists, but no corresponding
* struct page.
*/
goto next_page;
} else if (IS_ERR(page)) {
return i ? i : PTR_ERR(page);
}
if (pages) {
pages[i] = page;
flush_anon_page(vma, page, start);
flush_dcache_page(page);
page_mask = 0;
}
next_page:
if (vmas) {
vmas[i] = vma;
page_mask = 0;
}
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
if (page_increm > nr_pages)
page_increm = nr_pages;
i += page_increm;
start += page_increm * PAGE_SIZE;
nr_pages -= page_increm;
} while (nr_pages);
return i;
}
bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
{
bool write = !!(fault_flags & FAULT_FLAG_WRITE);
bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
if (!(vm_flags & vma->vm_flags))
return false;
/*
* The architecture might have a hardware protection
* mechanism other than read/write that can deny access.
*
* gup always represents data access, not instruction
* fetches, so execute=false here:
*/
if (!arch_vma_access_permitted(vma, write, false, foreign))
return false;
return true;
}
/*
* fixup_user_fault() - manually resolve a user page fault
* @tsk: the task_struct to use for page fault accounting, or
* NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @address: user address
* @fault_flags:flags to pass down to handle_mm_fault()
* @unlocked: did we unlock the mmap_sem while retrying, maybe NULL if caller
* does not allow retry
*
* This is meant to be called in the specific scenario where for locking reasons
* we try to access user memory in atomic context (within a pagefault_disable()
* section), this returns -EFAULT, and we want to resolve the user fault before
* trying again.
*
* Typically this is meant to be used by the futex code.
*
* The main difference with get_user_pages() is that this function will
* unconditionally call handle_mm_fault() which will in turn perform all the
* necessary SW fixup of the dirty and young bits in the PTE, while
* get_user_pages() only guarantees to update these in the struct page.
*
* This is important for some architectures where those bits also gate the
* access permission to the page because they are maintained in software. On
* such architectures, gup() will not be enough to make a subsequent access
* succeed.
*
* This function will not return with an unlocked mmap_sem. So it has not the
* same semantics wrt the @mm->mmap_sem as does filemap_fault().
*/
int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked)
{
struct vm_area_struct *vma;
int ret, major = 0;
if (unlocked)
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
address = untagged_addr(address);
retry:
vma = find_extend_vma(mm, address);
if (!vma || address < vma->vm_start)
return -EFAULT;
if (!vma_permits_fault(vma, fault_flags))
return -EFAULT;
ret = handle_mm_fault(vma, address, fault_flags);
major |= ret & VM_FAULT_MAJOR;
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return -ENOMEM;
if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
return -EHWPOISON;
if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
return -EFAULT;
BUG();
}
if (ret & VM_FAULT_RETRY) {
down_read(&mm->mmap_sem);
if (!(fault_flags & FAULT_FLAG_TRIED)) {
*unlocked = true;
fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
fault_flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
if (tsk) {
if (major)
tsk->maj_flt++;
else
tsk->min_flt++;
}
return 0;
}
EXPORT_SYMBOL_GPL(fixup_user_fault);
static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
struct vm_area_struct **vmas,
int *locked, bool notify_drop,
unsigned int flags)
{
long ret, pages_done;
bool lock_dropped;
if (locked) {
/* if VM_FAULT_RETRY can be returned, vmas become invalid */
BUG_ON(vmas);
/* check caller initialized locked */
BUG_ON(*locked != 1);
}
if (pages)
flags |= FOLL_GET;
pages_done = 0;
lock_dropped = false;
for (;;) {
ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
vmas, locked);
if (!locked)
/* VM_FAULT_RETRY couldn't trigger, bypass */
return ret;
/* VM_FAULT_RETRY cannot return errors */
if (!*locked) {
BUG_ON(ret < 0);
BUG_ON(ret >= nr_pages);
}
if (!pages)
/* If it's a prefault don't insist harder */
return ret;
if (ret > 0) {
nr_pages -= ret;
pages_done += ret;
if (!nr_pages)
break;
}
if (*locked) {
/* VM_FAULT_RETRY didn't trigger */
if (!pages_done)
pages_done = ret;
break;
}
/* VM_FAULT_RETRY triggered, so seek to the faulting offset */
pages += ret;
start += ret << PAGE_SHIFT;
/*
* Repeat on the address that fired VM_FAULT_RETRY
* without FAULT_FLAG_ALLOW_RETRY but with
* FAULT_FLAG_TRIED.
*/
*locked = 1;
lock_dropped = true;
down_read(&mm->mmap_sem);
ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
pages, NULL, NULL);
if (ret != 1) {
BUG_ON(ret > 1);
if (!pages_done)
pages_done = ret;
break;
}
nr_pages--;
pages_done++;
if (!nr_pages)
break;
pages++;
start += PAGE_SIZE;
}
if (notify_drop && lock_dropped && *locked) {
/*
* We must let the caller know we temporarily dropped the lock
* and so the critical section protected by it was lost.
*/
up_read(&mm->mmap_sem);
*locked = 0;
}
return pages_done;
}
/*
* We can leverage the VM_FAULT_RETRY functionality in the page fault
* paths better by using either get_user_pages_locked() or
* get_user_pages_unlocked().
*
* get_user_pages_locked() is suitable to replace the form:
*
* down_read(&mm->mmap_sem);
* do_something()
* get_user_pages(tsk, mm, ..., pages, NULL);
* up_read(&mm->mmap_sem);
*
* to:
*
* int locked = 1;
* down_read(&mm->mmap_sem);
* do_something()
* get_user_pages_locked(tsk, mm, ..., pages, &locked);
* if (locked)
* up_read(&mm->mmap_sem);
*/
long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
int *locked)
{
return __get_user_pages_locked(current, current->mm, start, nr_pages,
pages, NULL, locked, true,
gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages_locked);
/*
* Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to
* pass additional gup_flags as last parameter (like FOLL_HWPOISON).
*
* NOTE: here FOLL_TOUCH is not set implicitly and must be set by the
* caller if required (just like with __get_user_pages). "FOLL_GET",
* "FOLL_WRITE" and "FOLL_FORCE" are set implicitly as needed
* according to the parameters "pages", "write", "force"
* respectively.
*/
__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags)
{
long ret;
int locked = 1;
down_read(&mm->mmap_sem);
ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL,
&locked, false, gup_flags);
if (locked)
up_read(&mm->mmap_sem);
return ret;
}
EXPORT_SYMBOL(__get_user_pages_unlocked);
/*
* get_user_pages_unlocked() is suitable to replace the form:
*
* down_read(&mm->mmap_sem);
* get_user_pages(tsk, mm, ..., pages, NULL);
* up_read(&mm->mmap_sem);
*
* with:
*
* get_user_pages_unlocked(tsk, mm, ..., pages);
*
* It is functionally equivalent to get_user_pages_fast so
* get_user_pages_fast should be used instead, if the two parameters
* "tsk" and "mm" are respectively equal to current and current->mm,
* or if "force" shall be set to 1 (get_user_pages_fast misses the
* "force" parameter).
*/
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags)
{
return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
pages, gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages_unlocked);
/*
* get_user_pages_remote() - pin user pages in memory
* @tsk: the task_struct to use for page fault accounting, or
* NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
* @gup_flags: flags modifying lookup behaviour
* @pages: array that receives pointers to the pages pinned.
* Should be at least nr_pages long. Or NULL, if caller
* only intends to ensure the pages are faulted in.
* @vmas: array of pointers to vmas corresponding to each page.
* Or NULL if the caller does not require them.
*
* Returns number of pages pinned. This may be fewer than the number
* requested. If nr_pages is 0 or negative, returns 0. If no pages
* were pinned, returns -errno. Each page returned must be released
* with a put_page() call when it is finished with. vmas will only
* remain valid while mmap_sem is held.
*
* Must be called with mmap_sem held for read or write.
*
* get_user_pages walks a process's page tables and takes a reference to
* each struct page that each user address corresponds to at a given
* instant. That is, it takes the page that would be accessed if a user
* thread accesses the given user virtual address at that instant.
*
* This does not guarantee that the page exists in the user mappings when
* get_user_pages returns, and there may even be a completely different
* page there in some cases (eg. if mmapped pagecache has been invalidated
* and subsequently re faulted). However it does guarantee that the page
* won't be freed completely. And mostly callers simply care that the page
* contains data that was valid *at some point in time*. Typically, an IO
* or similar operation cannot guarantee anything stronger anyway because
* locks can't be held over the syscall boundary.
*
* If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page
* is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
* be called after the page is finished with, and before put_page is called.
*
* get_user_pages is typically used for fewer-copy IO operations, to get a
* handle on the memory by some means other than accesses via the user virtual
* addresses. The pages may be submitted for DMA to devices or accessed via
* their kernel linear mapping (via the kmap APIs). Care should be taken to
* use the correct cache flushing APIs.
*
* See also get_user_pages_fast, for performance critical applications.
*
* get_user_pages should be phased out in favor of
* get_user_pages_locked|unlocked or get_user_pages_fast. Nothing
* should use get_user_pages because it cannot pass
* FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
*/
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)
{
return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
NULL, false,
gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
EXPORT_SYMBOL(get_user_pages_remote);
/*
* This is the same as get_user_pages_remote(), just with a
* less-flexible calling convention where we assume that the task
* and mm being operated on are the current task's. We also
* obviously don't pass FOLL_REMOTE in here.
*/
long get_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)
{
return __get_user_pages_locked(current, current->mm, start, nr_pages,
pages, vmas, NULL, false,
gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages);
#ifdef CONFIG_FS_DAX
/*
* This is the same as get_user_pages() in that it assumes we are
* operating on the current task's mm, but it goes further to validate
* that the vmas associated with the address range are suitable for
* longterm elevated page reference counts. For example, filesystem-dax
* mappings are subject to the lifetime enforced by the filesystem and
* we need guarantees that longterm users like RDMA and V4L2 only
* establish mappings that have a kernel enforced revocation mechanism.
*
* "longterm" == userspace controlled elevated page count lifetime.
* Contrast this to iov_iter_get_pages() usages which are transient.
*/
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas_arg)
{
struct vm_area_struct **vmas = vmas_arg;
struct vm_area_struct *vma_prev = NULL;
long rc, i;
if (!pages)
return -EINVAL;
if (!vmas) {
vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *),
GFP_KERNEL);
if (!vmas)
return -ENOMEM;
}
rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
for (i = 0; i < rc; i++) {
struct vm_area_struct *vma = vmas[i];
if (vma == vma_prev)
continue;
vma_prev = vma;
if (vma_is_fsdax(vma))
break;
}
/*
* Either get_user_pages() failed, or the vma validation
* succeeded, in either case we don't need to put_page() before
* returning.
*/
if (i >= rc)
goto out;
for (i = 0; i < rc; i++)
put_page(pages[i]);
rc = -EOPNOTSUPP;
out:
if (vmas != vmas_arg)
kfree(vmas);
return rc;
}
EXPORT_SYMBOL(get_user_pages_longterm);
#endif /* CONFIG_FS_DAX */
/**
* populate_vma_page_range() - populate a range of pages in the vma.
* @vma: target vma
* @start: start address
* @end: end address
* @nonblocking:
*
* This takes care of mlocking the pages too if VM_LOCKED is set.
*
* return 0 on success, negative error code on error.
*
* vma->vm_mm->mmap_sem must be held.
*
* If @nonblocking is NULL, it may be held for read or write and will
* be unperturbed.
*
* If @nonblocking is non-NULL, it must held for read only and may be
* released. If it's released, *@nonblocking will be set to 0.
*/
long populate_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *nonblocking)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long nr_pages = (end - start) / PAGE_SIZE;
int gup_flags;
VM_BUG_ON(start & ~PAGE_MASK);
VM_BUG_ON(end & ~PAGE_MASK);
VM_BUG_ON_VMA(start < vma->vm_start, vma);
VM_BUG_ON_VMA(end > vma->vm_end, vma);
VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
if (vma->vm_flags & VM_LOCKONFAULT)
gup_flags &= ~FOLL_POPULATE;
/*
* We want to touch writable mappings with a write fault in order
* to break COW, except for shared mappings because these don't COW
* and we would not want to dirty them for nothing.
*/
if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
gup_flags |= FOLL_WRITE;
/*
* We want mlock to succeed for regions that have any permissions
* other than PROT_NONE.
*/
if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
gup_flags |= FOLL_FORCE;
/*
* We made sure addr is within a VMA, so the following will
* not result in a stack expansion that recurses back here.
*/
return __get_user_pages(current, mm, start, nr_pages, gup_flags,
NULL, NULL, nonblocking);
}
/*
* __mm_populate - populate and/or mlock pages within a range of address space.
*
* This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
* flags. VMAs must be already marked with the desired vm_flags, and
* mmap_sem must not be held.
*/
int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
{
struct mm_struct *mm = current->mm;
unsigned long end, nstart, nend;
struct vm_area_struct *vma = NULL;
int locked = 0;
long ret = 0;
end = start + len;
for (nstart = start; nstart < end; nstart = nend) {
/*
* We want to fault in pages for [nstart; end) address range.
* Find first corresponding VMA.
*/
if (!locked) {
locked = 1;
down_read(&mm->mmap_sem);
vma = find_vma(mm, nstart);
} else if (nstart >= vma->vm_end)
vma = vma->vm_next;
if (!vma || vma->vm_start >= end)
break;
/*
* Set [nstart; nend) to intersection of desired address
* range with the first VMA. Also, skip undesirable VMA types.
*/
nend = min(end, vma->vm_end);
if (vma->vm_flags & (VM_IO | VM_PFNMAP))
continue;
if (nstart < vma->vm_start)
nstart = vma->vm_start;
/*
* Now fault in a range of pages. populate_vma_page_range()
* double checks the vma flags, so that it won't mlock pages
* if the vma was already munlocked.
*/
ret = populate_vma_page_range(vma, nstart, nend, &locked);
if (ret < 0) {
if (ignore_errors) {
ret = 0;
continue; /* continue at next VMA */
}
break;
}
nend = nstart + ret * PAGE_SIZE;
ret = 0;
}
if (locked)
up_read(&mm->mmap_sem);
return ret; /* 0 or negative error code */
}
/**
* get_dump_page() - pin user page in memory while writing it to core dump
* @addr: user address
*
* Returns struct page pointer of user page pinned for dump,
* to be freed afterwards by put_page().
*
* Returns NULL on any kind of failure - a hole must then be inserted into
* the corefile, to preserve alignment with its headers; and also returns
* NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
* allowing a hole to be left in the corefile to save diskspace.
*
* Called without mmap_sem, but after all other threads have been killed.
*/
#ifdef CONFIG_ELF_CORE
struct page *get_dump_page(unsigned long addr)
{
struct vm_area_struct *vma;
struct page *page;
if (__get_user_pages(current, current->mm, addr, 1,
FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
NULL) < 1)
return NULL;
flush_cache_page(vma, addr, page_to_pfn(page));
return page;
}
#endif /* CONFIG_ELF_CORE */
/*
* Generic RCU Fast GUP
*
* get_user_pages_fast attempts to pin user pages by walking the page
* tables directly and avoids taking locks. Thus the walker needs to be
* protected from page table pages being freed from under it, and should
* block any THP splits.
*
* One way to achieve this is to have the walker disable interrupts, and
* rely on IPIs from the TLB flushing code blocking before the page table
* pages are freed. This is unsuitable for architectures that do not need
* to broadcast an IPI when invalidating TLBs.
*
* Another way to achieve this is to batch up page table containing pages
* belonging to more than one mm_user, then rcu_sched a callback to free those
* pages. Disabling interrupts will allow the fast_gup walker to both block
* the rcu_sched callback, and an IPI that we broadcast for splitting THPs
* (which is a relatively rare event). The code below adopts this strategy.
*
* Before activating this code, please be aware that the following assumptions
* are currently made:
*
* *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
* pages containing page tables.
*
* *) ptes can be read atomically by the architecture.
*
* *) access_ok is sufficient to validate userspace address ranges.
*
* The last two assumptions can be relaxed by the addition of helper functions.
*
* This code is based heavily on the PowerPC implementation by Nick Piggin.
*/
#ifdef CONFIG_HAVE_GENERIC_RCU_GUP
/*
* Return the compund head page with ref appropriately incremented,
* or NULL if that failed.
*/
static inline struct page *try_get_compound_head(struct page *page, int refs)
{
struct page *head = compound_head(page);
if (WARN_ON_ONCE(page_ref_count(head) < 0))
return NULL;
if (unlikely(!page_cache_add_speculative(head, refs)))
return NULL;
return head;
}
#ifdef __HAVE_ARCH_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
pte_t *ptep, *ptem;
int ret = 0;
ptem = ptep = pte_offset_map(&pmd, addr);
do {
/*
* In the line below we are assuming that the pte can be read
* atomically. If this is not the case for your architecture,
* please wrap this in a helper function!
*
* for an example see gup_get_pte in arch/x86/mm/gup.c
*/
pte_t pte = READ_ONCE(*ptep);
struct page *head, *page;
/*
* Similar to the PMD case below, NUMA hinting must take slow
* path using the pte_protnone check.
*/
if (!pte_present(pte) || pte_special(pte) ||
pte_protnone(pte) || (write && !pte_write(pte)))
goto pte_unmap;
if (!arch_pte_access_permitted(pte, write))
goto pte_unmap;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
head = try_get_compound_head(page, 1);
if (!head)
goto pte_unmap;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
put_page(head);
goto pte_unmap;
}
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
(*nr)++;
} while (ptep++, addr += PAGE_SIZE, addr != end);
ret = 1;
pte_unmap:
pte_unmap(ptem);
return ret;
}
#else
/*
* If we can't determine whether or not a pte is special, then fail immediately
* for ptes. Note, we can still pin HugeTLB and THP as these are guaranteed not
* to be special.
*
* For a futex to be placed on a THP tail page, get_futex_key requires a
* __get_user_pages_fast implementation that can pin pages. Thus it's still
* useful to have gup_huge_pmd even if we can't operate on ptes.
*/
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
return 0;
}
#endif /* __HAVE_ARCH_PTE_SPECIAL */
static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
struct page *head, *page;
int refs;
if (write && !pmd_write(orig))
return 0;
refs = 0;
page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
do {
pages[*nr] = page;
(*nr)++;
page++;
refs++;
} while (addr += PAGE_SIZE, addr != end);
head = try_get_compound_head(pmd_page(orig), refs);
if (!head) {
*nr -= refs;
return 0;
}
if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
*nr -= refs;
while (refs--)
put_page(head);
return 0;
}
return 1;
}
static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
struct page *head, *page;
int refs;
if (write && !pud_write(orig))
return 0;
refs = 0;
page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
do {
pages[*nr] = page;
(*nr)++;
page++;
refs++;
} while (addr += PAGE_SIZE, addr != end);
head = try_get_compound_head(pud_page(orig), refs);
if (!head) {
*nr -= refs;
return 0;
}
if (unlikely(pud_val(orig) != pud_val(*pudp))) {
*nr -= refs;
while (refs--)
put_page(head);
return 0;
}
return 1;
}
static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
unsigned long end, int write,
struct page **pages, int *nr)
{
int refs;
struct page *head, *page;
if (write && !pgd_write(orig))
return 0;
refs = 0;
page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
do {
pages[*nr] = page;
(*nr)++;
page++;
refs++;
} while (addr += PAGE_SIZE, addr != end);
head = try_get_compound_head(pgd_page(orig), refs);
if (!head) {
*nr -= refs;
return 0;
}
if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
*nr -= refs;
while (refs--)
put_page(head);
return 0;
}
return 1;
}
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pmd_t *pmdp;
pmdp = pmd_offset(&pud, addr);
do {
pmd_t pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(addr, end);
if (pmd_none(pmd))
return 0;
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
pmd_devmap(pmd))) {
/*
* NUMA hinting faults need to be handled in the GUP
* slowpath for accounting purposes and so that they
* can be serialised against THP migration.
*/
if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
pages, nr))
return 0;
} else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
/*
* architecture have different format for hugetlbfs
* pmd format and THP pmd format
*/
if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
PMD_SHIFT, next, write, pages, nr))
return 0;
} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
return 1;
}
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;
pudp = pud_offset(&pgd, addr);
do {
pud_t pud = READ_ONCE(*pudp);
next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
if (unlikely(pud_huge(pud))) {
if (!gup_huge_pud(pud, pudp, addr, next, write,
pages, nr))
return 0;
} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
PUD_SHIFT, next, write, pages, nr))
return 0;
} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
return 0;
} while (pudp++, addr = next, addr != end);
return 1;
}
/*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values.
*
* Careful, careful! COW breaking can go either way, so a non-write
* access can get ambiguous page results. If you call this function without
* 'write' set, you'd better be sure that you're ok with that ambiguity.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next, flags;
pgd_t *pgdp;
int nr = 0;
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
(void __user *)start, len)))
return 0;
/*
* Disable interrupts. We use the nested form as we can already have
* interrupts disabled by get_futex_key.
*
* With interrupts disabled, we block page table pages from being
* freed from under us. See mmu_gather_tlb in asm-generic/tlb.h
* for more details.
*
* We do not adopt an rcu_read_lock(.) here as we also want to
* block IPIs that come from THPs splitting.
*
* NOTE! We allow read-only gup_fast() here, but you'd better be
* careful about possible COW pages. You'll get _a_ COW page, but
* not necessarily the one you intended to get depending on what
* COW event happens after this. COW may break the page copy in a
* random direction.
*/
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = READ_ONCE(*pgdp);
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
if (unlikely(pgd_huge(pgd))) {
if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
pages, &nr))
break;
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
PGDIR_SHIFT, next, write, pages, &nr))
break;
} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
return nr;
}
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
* @nr_pages: number of pages from start to pin
* @write: whether pages will be written to
* @pages: array that receives pointers to the pages pinned.
* Should be at least nr_pages long.
*
* Attempt to pin user pages in memory without taking mm->mmap_sem.
* If not successful, it will fall back to taking the lock and
* calling get_user_pages().
*
* Returns number of pages pinned. This may be fewer than the number
* requested. If nr_pages is 0 or negative, returns 0. If no pages
* were pinned, returns -errno.
*/
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
int nr, ret;
start &= PAGE_MASK;
/*
* The FAST_GUP case requires FOLL_WRITE even for pure reads,
* because get_user_pages() may need to cause an early COW in
* order to avoid confusing the normal COW routines. So only
* targets that are already writable are safe to do by just
* looking at the page tables.
*/
nr = __get_user_pages_fast(start, nr_pages, 1, pages);
ret = nr;
if (nr < nr_pages) {
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
pages += nr;
ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
write ? FOLL_WRITE : 0);
/* Have to be a bit careful with return values */
if (nr > 0) {
if (ret < 0)
ret = nr;
else
ret += nr;
}
}
return ret;
}
#endif /* CONFIG_HAVE_GENERIC_RCU_GUP */