android_kernel_xiaomi_sm8350/mm/oom_kill.c
Srinivasarao Pathipati 16e939c6e4 Merge android11-5.4.161+ (b9d179c) into msm-5.4
* refs/heads/tmp-b9d179c:
  UPSTREAM: driver core: Fix possible memory leak in device_link_add()
  UPSTREAM: blk-mq: fix kernel panic during iterating over flush request
  UPSTREAM: net: xfrm: fix memory leak in xfrm_user_rcv_msg
  UPSTREAM: binder: fix the missing BR_FROZEN_REPLY in binder_return_strings
  ANDROID: incremental-fs: fix mount_fs issue
  UPSTREAM: vfs: fs_context: fix up param length parsing in legacy_parse_param
  ANDROID: GKI: disable CONFIG_FORTIFY_SOURCE
  Linux 5.4.161
  erofs: fix unsafe pagevec reuse of hooked pclusters
  erofs: remove the occupied parameter from z_erofs_pagevec_enqueue()
  PCI: Add MSI masking quirk for Nvidia ION AHCI
  PCI/MSI: Deal with devices lying about their MSI mask capability
  PCI/MSI: Destroy sysfs before freeing entries
  parisc/entry: fix trace test in syscall exit path
  fortify: Explicitly disable Clang support
  scsi: ufs: Fix tm request when non-fatal error happens
  ext4: fix lazy initialization next schedule time computation in more granular unit
  MIPS: Fix assembly error from MIPSr2 code used within MIPS_ISA_ARCH_LEVEL
  scsi: ufs: Fix interrupt error message for shared interrupts
  soc/tegra: pmc: Fix imbalanced clock disabling in error code path
  Revert "net: sched: update default qdisc visibility after Tx queue cnt changes"
  Revert "serial: core: Fix initializing and restoring termios speed"
  Linux 5.4.160
  selftests/bpf: Fix also no-alu32 strobemeta selftest
  ath10k: fix invalid dma_addr_t token assignment
  SUNRPC: Partial revert of commit 6f9f17287e78
  PCI: Add PCI_EXP_DEVCTL_PAYLOAD_* macros
  powerpc/powernv/prd: Unregister OPAL_MSG_PRD2 notifier during module unload
  s390/cio: make ccw_device_dma_* more robust
  s390/tape: fix timer initialization in tape_std_assign()
  s390/cio: check the subchannel validity for dev_busid
  video: backlight: Drop maximum brightness override for brightness zero
  mm, oom: do not trigger out_of_memory from the #PF
  mm, oom: pagefault_out_of_memory: don't force global OOM for dying tasks
  powerpc/bpf: Emit stf barrier instruction sequences for BPF_NOSPEC
  powerpc/security: Add a helper to query stf_barrier type
  powerpc/bpf: Fix BPF_SUB when imm == 0x80000000
  powerpc/bpf: Validate branch ranges
  powerpc/lib: Add helper to check if offset is within conditional branch range
  ovl: fix deadlock in splice write
  9p/net: fix missing error check in p9_check_errors
  net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE
  f2fs: should use GFP_NOFS for directory inodes
  irqchip/sifive-plic: Fixup EOI failed when masked
  parisc: Fix set_fixmap() on PA1.x CPUs
  parisc: Fix backtrace to always include init funtion names
  ARM: 9156/1: drop cc-option fallbacks for architecture selection
  ARM: 9155/1: fix early early_iounmap()
  selftests/net: udpgso_bench_rx: fix port argument
  cxgb4: fix eeprom len when diagnostics not implemented
  net/smc: fix sk_refcnt underflow on linkdown and fallback
  vsock: prevent unnecessary refcnt inc for nonblocking connect
  net: hns3: allow configure ETS bandwidth of all TCs
  net/sched: sch_taprio: fix undefined behavior in ktime_mono_to_any
  bpf: sockmap, strparser, and tls are reusing qdisc_skb_cb and colliding
  arm64: pgtable: make __pte_to_phys/__phys_to_pte_val inline functions
  nfc: pn533: Fix double free when pn533_fill_fragment_skbs() fails
  llc: fix out-of-bound array index in llc_sk_dev_hash()
  perf bpf: Add missing free to bpf_event__print_bpf_prog_info()
  zram: off by one in read_block_state()
  mm/zsmalloc.c: close race window between zs_pool_dec_isolated() and zs_unregister_migration()
  bonding: Fix a use-after-free problem when bond_sysfs_slave_add() failed
  ACPI: PMIC: Fix intel_pmic_regs_handler() read accesses
  net: vlan: fix a UAF in vlan_dev_real_dev()
  net: davinci_emac: Fix interrupt pacing disable
  xen-pciback: Fix return in pm_ctrl_init()
  i2c: xlr: Fix a resource leak in the error handling path of 'xlr_i2c_probe()'
  NFSv4: Fix a regression in nfs_set_open_stateid_locked()
  scsi: qla2xxx: Turn off target reset during issue_lip
  scsi: qla2xxx: Fix gnl list corruption
  ar7: fix kernel builds for compiler test
  watchdog: f71808e_wdt: fix inaccurate report in WDIOC_GETTIMEOUT
  m68k: set a default value for MEMORY_RESERVE
  signal/sh: Use force_sig(SIGKILL) instead of do_group_exit(SIGKILL)
  dmaengine: dmaengine_desc_callback_valid(): Check for `callback_result`
  netfilter: nfnetlink_queue: fix OOB when mac header was cleared
  soc: fsl: dpaa2-console: free buffer before returning from dpaa2_console_read
  auxdisplay: ht16k33: Fix frame buffer device blanking
  auxdisplay: ht16k33: Connect backlight to fbdev
  auxdisplay: img-ascii-lcd: Fix lock-up when displaying empty string
  dmaengine: at_xdmac: fix AT_XDMAC_CC_PERID() macro
  mtd: core: don't remove debugfs directory if device is in use
  mtd: spi-nor: hisi-sfc: Remove excessive clk_disable_unprepare()
  fs: orangefs: fix error return code of orangefs_revalidate_lookup()
  NFS: Fix deadlocks in nfs_scan_commit_list()
  opp: Fix return in _opp_add_static_v2()
  PCI: aardvark: Fix preserving PCI_EXP_RTCTL_CRSSVE flag on emulated bridge
  PCI: aardvark: Don't spam about PIO Response Status
  drm/plane-helper: fix uninitialized variable reference
  pnfs/flexfiles: Fix misplaced barrier in nfs4_ff_layout_prepare_ds
  rpmsg: Fix rpmsg_create_ept return when RPMSG config is not defined
  apparmor: fix error check
  power: supply: bq27xxx: Fix kernel crash on IRQ handler register error
  mips: cm: Convert to bitfield API to fix out-of-bounds access
  powerpc/44x/fsp2: add missing of_node_put
  HID: u2fzero: properly handle timeouts in usb_submit_urb
  HID: u2fzero: clarify error check and length calculations
  serial: xilinx_uartps: Fix race condition causing stuck TX
  phy: qcom-qusb2: Fix a memory leak on probe
  ASoC: cs42l42: Defer probe if request_threaded_irq() returns EPROBE_DEFER
  ASoC: cs42l42: Correct some register default values
  ARM: dts: stm32: fix SAI sub nodes register range
  staging: ks7010: select CRYPTO_HASH/CRYPTO_MICHAEL_MIC
  RDMA/mlx4: Return missed an error if device doesn't support steering
  scsi: csiostor: Uninitialized data in csio_ln_vnp_read_cbfn()
  power: supply: rt5033_battery: Change voltage values to µV
  usb: gadget: hid: fix error code in do_config()
  serial: 8250_dw: Drop wrong use of ACPI_PTR()
  video: fbdev: chipsfb: use memset_io() instead of memset()
  clk: at91: check pmc node status before registering syscore ops
  memory: fsl_ifc: fix leak of irq and nand_irq in fsl_ifc_ctrl_probe
  soc/tegra: Fix an error handling path in tegra_powergate_power_up()
  arm: dts: omap3-gta04a4: accelerometer irq fix
  ALSA: hda: Reduce udelay() at SKL+ position reporting
  JFS: fix memleak in jfs_mount
  MIPS: loongson64: make CPU_LOONGSON64 depends on MIPS_FP_SUPPORT
  scsi: dc395: Fix error case unwinding
  ARM: dts: at91: tse850: the emac<->phy interface is rmii
  arm64: dts: meson-g12a: Fix the pwm regulator supply properties
  RDMA/bnxt_re: Fix query SRQ failure
  ARM: dts: qcom: msm8974: Add xo_board reference clock to DSI0 PHY
  arm64: dts: rockchip: Fix GPU register width for RK3328
  ARM: s3c: irq-s3c24xx: Fix return value check for s3c24xx_init_intc()
  clk: mvebu: ap-cpu-clk: Fix a memory leak in error handling paths
  RDMA/rxe: Fix wrong port_cap_flags
  ibmvnic: Process crqs after enabling interrupts
  ibmvnic: don't stop queue in xmit
  udp6: allow SO_MARK ctrl msg to affect routing
  selftests/bpf: Fix fclose/pclose mismatch in test_progs
  crypto: pcrypt - Delay write to padata->info
  net: phylink: avoid mvneta warning when setting pause parameters
  net: amd-xgbe: Toggle PLL settings during rate change
  drm/amdgpu/gmc6: fix DMA mask from 44 to 40 bits
  wcn36xx: add proper DMA memory barriers in rx path
  libertas: Fix possible memory leak in probe and disconnect
  libertas_tf: Fix possible memory leak in probe and disconnect
  KVM: s390: Fix handle_sske page fault handling
  samples/kretprobes: Fix return value if register_kretprobe() failed
  tcp: don't free a FIN sk_buff in tcp_remove_empty_skb()
  irq: mips: avoid nested irq_enter()
  s390/gmap: don't unconditionally call pte_unmap_unlock() in __gmap_zap()
  libbpf: Fix BTF data layout checks and allow empty BTF
  smackfs: use netlbl_cfg_cipsov4_del() for deleting cipso_v4_doi
  drm/msm: Fix potential NULL dereference in DPU SSPP
  clocksource/drivers/timer-ti-dm: Select TIMER_OF
  PM: hibernate: fix sparse warnings
  nvme-rdma: fix error code in nvme_rdma_setup_ctrl
  phy: micrel: ksz8041nl: do not use power down mode
  mwifiex: Send DELBA requests according to spec
  rsi: stop thread firstly in rsi_91x_init() error handling
  mt76: mt76x02: fix endianness warnings in mt76x02_mac.c
  platform/x86: thinkpad_acpi: Fix bitwise vs. logical warning
  block: ataflop: fix breakage introduced at blk-mq refactoring
  mmc: mxs-mmc: disable regulator on error and in the remove function
  net: stream: don't purge sk_error_queue in sk_stream_kill_queues()
  drm/msm: uninitialized variable in msm_gem_import()
  ath10k: fix max antenna gain unit
  hwmon: (pmbus/lm25066) Let compiler determine outer dimension of lm25066_coeff
  hwmon: Fix possible memleak in __hwmon_device_register()
  net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE
  memstick: jmb38x_ms: use appropriate free function in jmb38x_ms_alloc_host()
  memstick: avoid out-of-range warning
  mmc: sdhci-omap: Fix NULL pointer exception if regulator is not configured
  b43: fix a lower bounds test
  b43legacy: fix a lower bounds test
  hwrng: mtk - Force runtime pm ops for sleep ops
  crypto: qat - disregard spurious PFVF interrupts
  crypto: qat - detect PFVF collision after ACK
  media: dvb-frontends: mn88443x: Handle errors of clk_prepare_enable()
  netfilter: nft_dynset: relax superfluous check on set updates
  EDAC/amd64: Handle three rank interleaving mode
  ath9k: Fix potential interrupt storm on queue reset
  media: em28xx: Don't use ops->suspend if it is NULL
  cpuidle: Fix kobject memory leaks in error paths
  crypto: ecc - fix CRYPTO_DEFAULT_RNG dependency
  kprobes: Do not use local variable when creating debugfs file
  media: cx23885: Fix snd_card_free call on null card pointer
  media: tm6000: Avoid card name truncation
  media: si470x: Avoid card name truncation
  media: radio-wl1273: Avoid card name truncation
  media: mtk-vpu: Fix a resource leak in the error handling path of 'mtk_vpu_probe()'
  media: TDA1997x: handle short reads of hdmi info frame.
  media: dvb-usb: fix ununit-value in az6027_rc_query
  media: cxd2880-spi: Fix a null pointer dereference on error handling path
  media: em28xx: add missing em28xx_close_extension
  drm/amdgpu: fix warning for overflow check
  ath10k: Fix missing frame timestamp for beacon/probe-resp
  net: dsa: rtl8366rb: Fix off-by-one bug
  rxrpc: Fix _usecs_to_jiffies() by using usecs_to_jiffies()
  crypto: caam - disable pkc for non-E SoCs
  Bluetooth: btmtkuart: fix a memleak in mtk_hci_wmt_sync
  wilc1000: fix possible memory leak in cfg_scan_result()
  cgroup: Make rebind_subsystems() disable v2 controllers all at once
  net: net_namespace: Fix undefined member in key_remove_domain()
  virtio-gpu: fix possible memory allocation failure
  drm/v3d: fix wait for TMU write combiner flush
  rcu: Fix existing exp request check in sync_sched_exp_online_cleanup()
  Bluetooth: fix init and cleanup of sco_conn.timeout_work
  selftests/bpf: Fix strobemeta selftest regression
  netfilter: conntrack: set on IPS_ASSURED if flows enters internal stream state
  parisc/kgdb: add kgdb_roundup() to make kgdb work with idle polling
  parisc/unwind: fix unwinder when CONFIG_64BIT is enabled
  task_stack: Fix end_of_stack() for architectures with upwards-growing stack
  parisc: fix warning in flush_tlb_all
  x86/hyperv: Protect set_hv_tscchange_cb() against getting preempted
  spi: bcm-qspi: Fix missing clk_disable_unprepare() on error in bcm_qspi_probe()
  btrfs: do not take the uuid_mutex in btrfs_rm_device
  net: annotate data-race in neigh_output()
  vrf: run conntrack only in context of lower/physdev for locally generated packets
  ARM: 9136/1: ARMv7-M uses BE-8, not BE-32
  gre/sit: Don't generate link-local addr if addr_gen_mode is IN6_ADDR_GEN_MODE_NONE
  ARM: clang: Do not rely on lr register for stacktrace
  smackfs: use __GFP_NOFAIL for smk_cipso_doi()
  iwlwifi: mvm: disable RX-diversity in powersave
  selftests: kvm: fix mismatched fclose() after popen()
  PM: hibernate: Get block device exclusively in swsusp_check()
  nvme: drop scan_lock and always kick requeue list when removing namespaces
  nvmet-tcp: fix use-after-free when a port is removed
  nvmet: fix use-after-free when a port is removed
  block: remove inaccurate requeue check
  mwl8k: Fix use-after-free in mwl8k_fw_state_machine()
  tracing/cfi: Fix cmp_entries_* functions signature mismatch
  workqueue: make sysfs of unbound kworker cpumask more clever
  lib/xz: Validate the value before assigning it to an enum variable
  lib/xz: Avoid overlapping memcpy() with invalid input with in-place decompression
  memstick: r592: Fix a UAF bug when removing the driver
  leaking_addresses: Always print a trailing newline
  ACPI: battery: Accept charges over the design capacity as full
  iov_iter: Fix iov_iter_get_pages{,_alloc} page fault return value
  ath: dfs_pattern_detector: Fix possible null-pointer dereference in channel_detector_create()
  tracefs: Have tracefs directories not set OTH permission bits by default
  net-sysfs: try not to restart the syscall if it will fail eventually
  media: usb: dvd-usb: fix uninit-value bug in dibusb_read_eeprom_byte()
  media: ipu3-imgu: VIDIOC_QUERYCAP: Fix bus_info
  media: ipu3-imgu: imgu_fmt: Handle properly try
  ACPICA: Avoid evaluating methods too early during system resume
  ipmi: Disable some operations during a panic
  media: rcar-csi2: Add checking to rcsi2_start_receiver()
  brcmfmac: Add DMI nvram filename quirk for Cyberbook T116 tablet
  ia64: don't do IA64_CMPXCHG_DEBUG without CONFIG_PRINTK
  media: mceusb: return without resubmitting URB in case of -EPROTO error.
  media: imx: set a media_device bus_info string
  media: s5p-mfc: Add checking to s5p_mfc_probe().
  media: s5p-mfc: fix possible null-pointer dereference in s5p_mfc_probe()
  media: uvcvideo: Set unique vdev name based in type
  media: uvcvideo: Return -EIO for control errors
  media: uvcvideo: Set capability in s_param
  media: stm32: Potential NULL pointer dereference in dcmi_irq_thread()
  media: netup_unidvb: handle interrupt properly according to the firmware
  media: mt9p031: Fix corrupted frame after restarting stream
  ath10k: high latency fixes for beacon buffer
  mwifiex: Properly initialize private structure on interface type changes
  mwifiex: Run SET_BSS_MODE when changing from P2P to STATION vif-type
  x86: Increase exception stack sizes
  smackfs: Fix use-after-free in netlbl_catmap_walk()
  net: sched: update default qdisc visibility after Tx queue cnt changes
  locking/lockdep: Avoid RCU-induced noinstr fail
  MIPS: lantiq: dma: reset correct number of channel
  MIPS: lantiq: dma: add small delay after reset
  platform/x86: wmi: do not fail if disabling fails
  drm/panel-orientation-quirks: add Valve Steam Deck
  Bluetooth: fix use-after-free error in lock_sock_nested()
  Bluetooth: sco: Fix lock_sock() blockage by memcpy_from_msg()
  drm: panel-orientation-quirks: Add quirk for the Samsung Galaxy Book 10.6
  drm: panel-orientation-quirks: Add quirk for KD Kurio Smart C15200 2-in-1
  drm: panel-orientation-quirks: Update the Lenovo Ideapad D330 quirk (v2)
  dma-buf: WARN on dmabuf release with pending attachments
  USB: chipidea: fix interrupt deadlock
  USB: iowarrior: fix control-message timeouts
  USB: serial: keyspan: fix memleak on probe errors
  iio: dac: ad5446: Fix ad5622_write() return value
  pinctrl: core: fix possible memory leak in pinctrl_enable()
  quota: correct error number in free_dqentry()
  quota: check block number when reading the block in quota file
  PCI: aardvark: Read all 16-bits from PCIE_MSI_PAYLOAD_REG
  PCI: aardvark: Fix return value of MSI domain .alloc() method
  PCI: aardvark: Fix reporting Data Link Layer Link Active
  PCI: aardvark: Do not unmask unused interrupts
  PCI: aardvark: Fix checking for link up via LTSSM state
  PCI: aardvark: Do not clear status bits of masked interrupts
  PCI: pci-bridge-emul: Fix emulation of W1C bits
  xen/balloon: add late_initcall_sync() for initial ballooning done
  ALSA: mixer: fix deadlock in snd_mixer_oss_set_volume
  ALSA: mixer: oss: Fix racy access to slots
  serial: core: Fix initializing and restoring termios speed
  powerpc/85xx: Fix oops when mpc85xx_smp_guts_ids node cannot be found
  can: j1939: j1939_can_recv(): ignore messages with invalid source address
  can: j1939: j1939_tp_cmd_recv(): ignore abort message in the BAM transport
  KVM: nVMX: Query current VMCS when determining if MSR bitmaps are in use
  power: supply: max17042_battery: use VFSOC for capacity when no rsns
  power: supply: max17042_battery: Prevent int underflow in set_soc_threshold
  signal/mips: Update (_save|_restore)_fp_context to fail with -EFAULT
  signal: Remove the bogus sigkill_pending in ptrace_stop
  RDMA/qedr: Fix NULL deref for query_qp on the GSI QP
  rsi: Fix module dev_oper_mode parameter description
  rsi: fix rate mask set leading to P2P failure
  rsi: fix key enabled check causing unwanted encryption for vap_id > 0
  rsi: fix occasional initialisation failure with BT coex
  wcn36xx: handle connection loss indication
  libata: fix checking of DMA state
  mwifiex: Read a PCI register after writing the TX ring write pointer
  wcn36xx: Fix HT40 capability for 2Ghz band
  evm: mark evm_fixmode as __ro_after_init
  rtl8187: fix control-message timeouts
  PCI: Mark Atheros QCA6174 to avoid bus reset
  ath10k: fix division by zero in send path
  ath10k: fix control-message timeout
  ath6kl: fix control-message timeout
  ath6kl: fix division by zero in send path
  mwifiex: fix division by zero in fw download path
  EDAC/sb_edac: Fix top-of-high-memory value for Broadwell/Haswell
  regulator: dt-bindings: samsung,s5m8767: correct s5m8767,pmic-buck-default-dvs-idx property
  regulator: s5m8767: do not use reset value as DVS voltage if GPIO DVS is disabled
  hwmon: (pmbus/lm25066) Add offset coefficients
  ia64: kprobes: Fix to pass correct trampoline address to the handler
  btrfs: call btrfs_check_rw_degradable only if there is a missing device
  btrfs: fix lost error handling when replaying directory deletes
  btrfs: clear MISSING device status bit in btrfs_close_one_device
  net/smc: Correct spelling mistake to TCPF_SYN_RECV
  nfp: bpf: relax prog rejection for mtu check through max_pkt_offset
  vmxnet3: do not stop tx queues after netif_device_detach()
  r8169: Add device 10ec:8162 to driver r8169
  nvmet-tcp: fix header digest verification
  drm: panel-orientation-quirks: Add quirk for GPD Win3
  watchdog: Fix OMAP watchdog early handling
  net: multicast: calculate csum of looped-back and forwarded packets
  spi: spl022: fix Microwire full duplex mode
  nvmet-tcp: fix a memory leak when releasing a queue
  xen/netfront: stop tx queues during live migration
  bpf: Prevent increasing bpf_jit_limit above max
  bpf: Define bpf_jit_alloc_exec_limit for arm64 JIT
  drm: panel-orientation-quirks: Add quirk for Aya Neo 2021
  mmc: winbond: don't build on M68K
  reset: socfpga: add empty driver allowing consumers to probe
  ARM: dts: sun7i: A20-olinuxino-lime2: Fix ethernet phy-mode
  hyperv/vmbus: include linux/bitops.h
  sfc: Don't use netif_info before net_device setup
  cavium: Fix return values of the probe function
  scsi: qla2xxx: Fix unmap of already freed sgl
  scsi: qla2xxx: Return -ENOMEM if kzalloc() fails
  cavium: Return negative value when pci_alloc_irq_vectors() fails
  x86/irq: Ensure PI wakeup handler is unregistered before module unload
  x86/cpu: Fix migration safety with X86_BUG_NULL_SEL
  x86/sme: Use #define USE_EARLY_PGTABLE_L5 in mem_encrypt_identity.c
  fuse: fix page stealing
  ALSA: timer: Unconditionally unlink slave instances, too
  ALSA: timer: Fix use-after-free problem
  ALSA: synth: missing check for possible NULL after the call to kstrdup
  ALSA: usb-audio: Add registration quirk for JBL Quantum 400
  ALSA: line6: fix control and interrupt message timeouts
  ALSA: 6fire: fix control and bulk message timeouts
  ALSA: ua101: fix division by zero at probe
  ALSA: hda/realtek: Add quirk for HP EliteBook 840 G7 mute LED
  ALSA: hda/realtek: Add quirk for ASUS UX550VE
  ALSA: hda/realtek: Add a quirk for Acer Spin SP513-54N
  ALSA: hda/realtek: Add quirk for Clevo PC70HS
  media: v4l2-ioctl: Fix check_ext_ctrls
  media: ir-kbd-i2c: improve responsiveness of hauppauge zilog receivers
  media: ite-cir: IR receiver stop working after receive overflow
  crypto: s5p-sss - Add error handling in s5p_aes_probe()
  firmware/psci: fix application of sizeof to pointer
  tpm: Check for integer overflow in tpm2_map_response_body()
  parisc: Fix ptrace check on syscall return
  mmc: dw_mmc: Dont wait for DRTO on Write RSP error
  scsi: qla2xxx: Fix use after free in eh_abort path
  scsi: qla2xxx: Fix kernel crash when accessing port_speed sysfs file
  ocfs2: fix data corruption on truncate
  libata: fix read log timeout value
  Input: i8042 - Add quirk for Fujitsu Lifebook T725
  Input: elantench - fix misreporting trackpoint coordinates
  Input: iforce - fix control-message timeout
  binder: use cred instead of task for getsecid
  binder: use cred instead of task for selinux checks
  binder: use euid from cred instead of using task
  usb: xhci: Enable runtime-pm by default on AMD Yellow Carp platform
  xhci: Fix USB 3.1 enumeration issues by increasing roothub power-on-good delay
  Linux 5.4.159
  rsi: fix control-message timeout
  media: staging/intel-ipu3: css: Fix wrong size comparison imgu_css_fw_init
  staging: rtl8192u: fix control-message timeouts
  staging: r8712u: fix control-message timeout
  comedi: vmk80xx: fix bulk and interrupt message timeouts
  comedi: vmk80xx: fix bulk-buffer overflow
  comedi: vmk80xx: fix transfer-buffer overflows
  comedi: ni_usb6501: fix NULL-deref in command paths
  comedi: dt9812: fix DMA buffers on stack
  isofs: Fix out of bound access for corrupted isofs image
  printk/console: Allow to disable console output by using console="" or console=null
  binder: don't detect sender/target during buffer cleanup
  usb-storage: Add compatibility quirk flags for iODD 2531/2541
  usb: musb: Balance list entry in musb_gadget_queue
  usb: gadget: Mark USB_FSL_QE broken on 64-bit
  usb: ehci: handshake CMD_RUN instead of STS_HALT
  Revert "x86/kvm: fix vcpu-id indexed array sizes"
  Linux 5.4.158
  ARM: 9120/1: Revert "amba: make use of -1 IRQs warn"
  Revert "drm/ttm: fix memleak in ttm_transfered_destroy"
  sfc: Fix reading non-legacy supported link modes
  Revert "usb: core: hcd: Add support for deferring roothub registration"
  Revert "xhci: Set HCD flag to defer primary roothub registration"
  media: firewire: firedtv-avc: fix a buffer overflow in avc_ca_pmt()
  net: ethernet: microchip: lan743x: Fix skb allocation failure
  vrf: Revert "Reset skb conntrack connection..."
  scsi: core: Put LLD module refcnt after SCSI device is released
  Linux 5.4.157
  perf script: Check session->header.env.arch before using it
  KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu
  KVM: s390: clear kicked_mask before sleeping again
  cfg80211: correct bridge/4addr mode check
  net: use netif_is_bridge_port() to check for IFF_BRIDGE_PORT
  sctp: add vtag check in sctp_sf_ootb
  sctp: add vtag check in sctp_sf_do_8_5_1_E_sa
  sctp: add vtag check in sctp_sf_violation
  sctp: fix the processing for COOKIE_ECHO chunk
  sctp: fix the processing for INIT_ACK chunk
  sctp: use init_tag from inithdr for ABORT chunk
  phy: phy_start_aneg: Add an unlocked version
  phy: phy_ethtool_ksettings_get: Lock the phy for consistency
  net/tls: Fix flipped sign in async_wait.err assignment
  net: nxp: lpc_eth.c: avoid hang when bringing interface down
  net: ethernet: microchip: lan743x: Fix dma allocation failure by using dma_set_mask_and_coherent
  net: ethernet: microchip: lan743x: Fix driver crash when lan743x_pm_resume fails
  nios2: Make NIOS2_DTB_SOURCE_BOOL depend on !COMPILE_TEST
  RDMA/sa_query: Use strscpy_pad instead of memcpy to copy a string
  net: Prevent infinite while loop in skb_tx_hash()
  net: batman-adv: fix error handling
  regmap: Fix possible double-free in regcache_rbtree_exit()
  arm64: dts: allwinner: h5: NanoPI Neo 2: Fix ethernet node
  RDMA/mlx5: Set user priority for DCT
  nvme-tcp: fix data digest pointer calculation
  nvmet-tcp: fix data digest pointer calculation
  IB/hfi1: Fix abba locking issue with sc_disable()
  IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields
  tcp_bpf: Fix one concurrency problem in the tcp_bpf_send_verdict function
  drm/ttm: fix memleak in ttm_transfered_destroy
  net: lan78xx: fix division by zero in send path
  cfg80211: scan: fix RCU in cfg80211_add_nontrans_list()
  mmc: sdhci-esdhc-imx: clear the buffer_read_ready to reset standard tuning circuit
  mmc: sdhci: Map more voltage level to SDHCI_POWER_330
  mmc: dw_mmc: exynos: fix the finding clock sample value
  mmc: cqhci: clear HALT state after CQE enable
  mmc: vub300: fix control-message timeouts
  net/tls: Fix flipped sign in tls_err_abort() calls
  Revert "net: mdiobus: Fix memory leak in __mdiobus_register"
  nfc: port100: fix using -ERRNO as command type mask
  ata: sata_mv: Fix the error handling of mv_chip_id()
  Revert "pinctrl: bcm: ns: support updated DT binding as syscon subnode"
  usbnet: fix error return code in usbnet_probe()
  usbnet: sanity check for maxpacket
  ipv4: use siphash instead of Jenkins in fnhe_hashfun()
  ipv6: use siphash in rt6_exception_hash()
  powerpc/bpf: Fix BPF_MOD when imm == 1
  ARM: 9141/1: only warn about XIP address when not compile testing
  ARM: 9139/1: kprobes: fix arch_init_kprobes() prototype
  ARM: 9134/1: remove duplicate memcpy() definition
  ARM: 9133/1: mm: proc-macros: ensure *_tlb_fns are 4B aligned
  Linux 5.4.156
  pinctrl: stm32: use valid pin identifier in stm32_pinctrl_resume()
  ARM: 9122/1: select HAVE_FUTEX_CMPXCHG
  tracing: Have all levels of checks prevent recursion
  net: mdiobus: Fix memory leak in __mdiobus_register
  scsi: core: Fix shost->cmd_per_lun calculation in scsi_add_host_with_dma()
  Input: snvs_pwrkey - add clk handling
  ALSA: hda: avoid write to STATESTS if controller is in reset
  platform/x86: intel_scu_ipc: Update timeout value in comment
  isdn: mISDN: Fix sleeping function called from invalid context
  ARM: dts: spear3xx: Fix gmac node
  net: stmmac: add support for dwmac 3.40a
  btrfs: deal with errors when checking if a dir entry exists during log replay
  gcc-plugins/structleak: add makefile var for disabling structleak
  selftests: netfilter: remove stray bash debug line
  netfilter: Kconfig: use 'default y' instead of 'm' for bool config option
  isdn: cpai: check ctr->cnr to avoid array index out of bound
  nfc: nci: fix the UAF of rf_conn_info object
  mm, slub: fix potential memoryleak in kmem_cache_open()
  mm, slub: fix mismatch between reconstructed freelist depth and cnt
  powerpc/idle: Don't corrupt back chain when going idle
  KVM: PPC: Book3S HV: Make idle_kvm_start_guest() return 0 if it went to guest
  KVM: PPC: Book3S HV: Fix stack handling in idle_kvm_start_guest()
  powerpc64/idle: Fix SP offsets when saving GPRs
  audit: fix possible null-pointer dereference in audit_filter_rules
  ASoC: DAPM: Fix missing kctl change notifications
  ALSA: hda/realtek: Add quirk for Clevo PC50HS
  ALSA: usb-audio: Provide quirk for Sennheiser GSP670 Headset
  vfs: check fd has read access in kernel_read_file_from_fd()
  elfcore: correct reference to CONFIG_UML
  ocfs2: mount fails with buffer overflow in strlen
  ocfs2: fix data corruption after conversion from inline format
  ceph: fix handling of "meta" errors
  can: j1939: j1939_xtp_rx_rts_session_new(): abort TP less than 9 bytes
  can: j1939: j1939_xtp_rx_dat_one(): cancel session if receive TP.DT with error length
  can: j1939: j1939_netdev_start(): fix UAF for rx_kref of j1939_priv
  can: j1939: j1939_tp_rxtimer(): fix errant alert in j1939_tp_rxtimer
  can: peak_pci: peak_pci_remove(): fix UAF
  can: peak_usb: pcan_usb_fd_decode_status(): fix back to ERROR_ACTIVE state notification
  can: rcar_can: fix suspend/resume
  net: enetc: fix ethtool counter name for PM0_TERR
  net: stmmac: Fix E2E delay mechanism
  net: hns3: disable sriov before unload hclge layer
  net: hns3: add limit ets dwrr bandwidth cannot be 0
  net: hns3: reset DWRR of unused tc to zero
  NIOS2: irqflags: rename a redefined register name
  net: dsa: lantiq_gswip: fix register definition
  lan78xx: select CRC32
  netfilter: ipvs: make global sysctl readonly in non-init netns
  ASoC: wm8960: Fix clock configuration on slave mode
  dma-debug: fix sg checks in debug_dma_map_sg()
  NFSD: Keep existing listeners on portlist error
  xtensa: xtfpga: Try software restart before simulating CPU reset
  xtensa: xtfpga: use CONFIG_USE_OF instead of CONFIG_OF
  ARM: dts: at91: sama5d2_som1_ek: disable ISC node by default
  tee: optee: Fix missing devices unregister during optee_remove
  net: switchdev: do not propagate bridge updates across bridges
  parisc: math-emu: Fix fall-through warnings
  Linux 5.4.155
  ionic: don't remove netdev->dev_addr when syncing uc list
  r8152: select CRC32 and CRYPTO/CRYPTO_HASH/CRYPTO_SHA256
  qed: Fix missing error code in qed_slowpath_start()
  mqprio: Correct stats in mqprio_dump_class_stats().
  acpi/arm64: fix next_platform_timer() section mismatch error
  drm/msm/dsi: fix off by one in dsi_bus_clk_enable error handling
  drm/msm/dsi: Fix an error code in msm_dsi_modeset_init()
  drm/msm: Fix null pointer dereference on pointer edp
  drm/panel: olimex-lcd-olinuxino: select CRC32
  platform/mellanox: mlxreg-io: Fix argument base in kstrtou32() call
  mlxsw: thermal: Fix out-of-bounds memory accesses
  ata: ahci_platform: fix null-ptr-deref in ahci_platform_enable_regulators()
  pata_legacy: fix a couple uninitialized variable bugs
  NFC: digital: fix possible memory leak in digital_in_send_sdd_req()
  NFC: digital: fix possible memory leak in digital_tg_listen_mdaa()
  nfc: fix error handling of nfc_proto_register()
  ethernet: s2io: fix setting mac address during resume
  net: encx24j600: check error in devm_regmap_init_encx24j600
  net: stmmac: fix get_hw_feature() on old hardware
  net/mlx5e: Mutually exclude RX-FCS and RX-port-timestamp
  net: korina: select CRC32
  net: arc: select CRC32
  gpio: pca953x: Improve bias setting
  sctp: account stream padding length for reconf chunk
  iio: dac: ti-dac5571: fix an error code in probe()
  iio: ssp_sensors: fix error code in ssp_print_mcu_debug()
  iio: ssp_sensors: add more range checking in ssp_parse_dataframe()
  iio: light: opt3001: Fixed timeout error when 0 lux
  iio: mtk-auxadc: fix case IIO_CHAN_INFO_PROCESSED
  iio: adc128s052: Fix the error handling path of 'adc128_probe()'
  iio: adc: aspeed: set driver data when adc probe.
  powerpc/xive: Discard disabled interrupts in get_irqchip_state()
  x86/Kconfig: Do not enable AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT automatically
  nvmem: Fix shift-out-of-bound (UBSAN) with byte size cells
  EDAC/armada-xp: Fix output of uncorrectable error counter
  virtio: write back F_VERSION_1 before validate
  USB: serial: option: add prod. id for Quectel EG91
  USB: serial: option: add Telit LE910Cx composition 0x1204
  USB: serial: option: add Quectel EC200S-CN module support
  USB: serial: qcserial: add EM9191 QDL support
  Input: xpad - add support for another USB ID of Nacon GC-100
  usb: musb: dsps: Fix the probe error path
  efi: Change down_interruptible() in virt_efi_reset_system() to down_trylock()
  efi/cper: use stack buffer for error record decoding
  cb710: avoid NULL pointer subtraction
  xhci: Enable trust tx length quirk for Fresco FL11 USB controller
  xhci: Fix command ring pointer corruption while aborting a command
  xhci: guard accesses to ep_state in xhci_endpoint_reset()
  mei: me: add Ice Lake-N device id.
  x86/resctrl: Free the ctrlval arrays when domain_setup_mon_state() fails
  watchdog: orion: use 0 for unset heartbeat
  btrfs: check for error when looking up inode during dir entry replay
  btrfs: deal with errors when adding inode reference during log replay
  btrfs: deal with errors when replaying dir entry during log replay
  btrfs: unlock newly allocated extent buffer after error
  csky: Fixup regs.sr broken in ptrace
  csky: don't let sigreturn play with priveleged bits of status register
  s390: fix strrchr() implementation
  nds32/ftrace: Fix Error: invalid operands (*UND* and *UND* sections) for `^'
  ALSA: hda/realtek: Fix the mic type detection issue for ASUS G551JW
  ALSA: hda/realtek - ALC236 headset MIC recording issue
  ALSA: hda/realtek: Add quirk for Clevo X170KM-G
  ALSA: hda/realtek: Complete partial device name to avoid ambiguity
  ALSA: seq: Fix a potential UAF by wrong private_free call order
  ALSA: usb-audio: Add quirk for VF0770
  ovl: simplify file splice
  Linux 5.4.154
  sched: Always inline is_percpu_thread()
  scsi: virtio_scsi: Fix spelling mistake "Unsupport" -> "Unsupported"
  scsi: ses: Fix unsigned comparison with less than zero
  drm/amdgpu: fix gart.bo pin_count leak
  net: sun: SUNVNET_COMMON should depend on INET
  mac80211: check return value of rhashtable_init
  net: prevent user from passing illegal stab size
  m68k: Handle arrivals of multiple signals correctly
  mac80211: Drop frames from invalid MAC address in ad-hoc mode
  netfilter: nf_nat_masquerade: defer conntrack walk to work queue
  netfilter: nf_nat_masquerade: make async masq_inet6_event handling generic
  HID: wacom: Add new Intuos BT (CTL-4100WL/CTL-6100WL) device IDs
  netfilter: ip6_tables: zero-initialize fragment offset
  HID: apple: Fix logical maximum and usage maximum of Magic Keyboard JIS
  ext4: correct the error path of ext4_write_inline_data_end()
  net: phy: bcm7xxx: Fixed indirect MMD operations
  UPSTREAM: ovl: simplify file splice
  Linux 5.4.153
  x86/Kconfig: Correct reference to MWINCHIP3D
  x86/hpet: Use another crystalball to evaluate HPET usability
  x86/platform/olpc: Correct ifdef symbol to intended CONFIG_OLPC_XO15_SCI
  RISC-V: Include clone3() on rv32
  bpf, s390: Fix potential memory leak about jit_data
  i2c: acpi: fix resource leak in reconfiguration device addition
  net: prefer socket bound to interface when not in VRF
  i40e: Fix freeing of uninitialized misc IRQ vector
  i40e: fix endless loop under rtnl
  gve: fix gve_get_stats()
  rtnetlink: fix if_nlmsg_stats_size() under estimation
  gve: Correct available tx qpl check
  drm/nouveau/debugfs: fix file release memory leak
  video: fbdev: gbefb: Only instantiate device when built for IP32
  bus: ti-sysc: Use CLKDM_NOAUTO for dra7 dcan1 for errata i893
  netlink: annotate data races around nlk->bound
  net: sfp: Fix typo in state machine debug string
  net/sched: sch_taprio: properly cancel timer from taprio_destroy()
  net: bridge: use nla_total_size_64bit() in br_get_linkxstats_size()
  ARM: imx6: disable the GIC CPU interface before calling stby-poweroff sequence
  arm64: dts: ls1028a: add missing CAN nodes
  arm64: dts: freescale: Fix SP805 clock-names
  ptp_pch: Load module automatically if ID matches
  powerpc/fsl/dts: Fix phy-connection-type for fm1mac3
  net_sched: fix NULL deref in fifo_set_limit()
  phy: mdio: fix memory leak
  bpf: Fix integer overflow in prealloc_elems_and_freelist()
  bpf, arm: Fix register clobbering in div/mod implementation
  xtensa: call irqchip_init only when CONFIG_USE_OF is selected
  xtensa: use CONFIG_USE_OF instead of CONFIG_OF
  xtensa: move XCHAL_KIO_* definitions to kmem_layout.h
  arm64: dts: qcom: pm8150: use qcom,pm8998-pon binding
  ARM: dts: imx: Fix USB host power regulator polarity on M53Menlo
  ARM: dts: imx: Add missing pinctrl-names for panel on M53Menlo
  soc: qcom: mdt_loader: Drop PT_LOAD check on hash segment
  ARM: dts: qcom: apq8064: Use 27MHz PXO clock as DSI PLL reference
  soc: qcom: socinfo: Fixed argument passed to platform_set_data()
  bpf, mips: Validate conditional branch offsets
  MIPS: BPF: Restore MIPS32 cBPF JIT
  ARM: dts: qcom: apq8064: use compatible which contains chipid
  ARM: dts: omap3430-sdp: Fix NAND device node
  xen/balloon: fix cancelled balloon action
  nfsd4: Handle the NFSv4 READDIR 'dircount' hint being zero
  nfsd: fix error handling of register_pernet_subsys() in init_nfsd()
  ovl: fix missing negative dentry check in ovl_rename()
  mmc: meson-gx: do not use memcpy_to/fromio for dram-access-quirk
  xen/privcmd: fix error handling in mmap-resource processing
  usb: typec: tcpm: handle SRC_STARTUP state if cc changes
  USB: cdc-acm: fix break reporting
  USB: cdc-acm: fix racy tty buffer accesses
  Partially revert "usb: Kconfig: using select for USB_COMMON dependency"
  ANDROID: Different fix for KABI breakage in 5.4.151 in struct sock
  Linux 5.4.152
  libata: Add ATA_HORKAGE_NO_NCQ_ON_ATI for Samsung 860 and 870 SSD.
  silence nfscache allocation warnings with kvzalloc
  perf/x86: Reset destroy callback on event init failure
  kvm: x86: Add AMD PMU MSRs to msrs_to_save_all[]
  KVM: do not shrink halt_poll_ns below grow_start
  tools/vm/page-types: remove dependency on opt_file for idle page tracking
  scsi: ses: Retry failed Send/Receive Diagnostic commands
  selftests:kvm: fix get_warnings_count() ignoring fscanf() return warn
  selftests: be sure to make khdr before other targets
  usb: dwc2: check return value after calling platform_get_resource()
  usb: testusb: Fix for showing the connection speed
  scsi: sd: Free scsi_disk device via put_device()
  ext2: fix sleeping in atomic bugs on error
  sparc64: fix pci_iounmap() when CONFIG_PCI is not set
  xen-netback: correct success/error reporting for the SKB-with-fraglist case
  net: mdio: introduce a shutdown method to mdio device drivers
  ANDROID: Fix up KABI breakage in 5.4.151 in struct sock
  Linux 5.4.151
  HID: usbhid: free raw_report buffers in usbhid_stop
  netfilter: ipset: Fix oversized kvmalloc() calls
  HID: betop: fix slab-out-of-bounds Write in betop_probe
  crypto: ccp - fix resource leaks in ccp_run_aes_gcm_cmd()
  usb: hso: remove the bailout parameter
  usb: hso: fix error handling code of hso_create_net_device
  hso: fix bailout in error case of probe
  libnvdimm/pmem: Fix crash triggered when I/O in-flight during unbind
  PCI: Fix pci_host_bridge struct device release/free handling
  net: stmmac: don't attach interface until resume finishes
  net: udp: annotate data race around udp_sk(sk)->corkflag
  HID: u2fzero: ignore incomplete packets without data
  ext4: fix potential infinite loop in ext4_dx_readdir()
  ext4: fix reserved space counter leakage
  ext4: fix loff_t overflow in ext4_max_bitmap_size()
  ipack: ipoctal: fix module reference leak
  ipack: ipoctal: fix missing allocation-failure check
  ipack: ipoctal: fix tty-registration error handling
  ipack: ipoctal: fix tty registration race
  ipack: ipoctal: fix stack information leak
  debugfs: debugfs_create_file_size(): use IS_ERR to check for error
  elf: don't use MAP_FIXED_NOREPLACE for elf interpreter mappings
  perf/x86/intel: Update event constraints for ICX
  af_unix: fix races in sk_peer_pid and sk_peer_cred accesses
  net: sched: flower: protect fl_walk() with rcu
  net: hns3: do not allow call hns3_nic_net_open repeatedly
  scsi: csiostor: Add module softdep on cxgb4
  Revert "block, bfq: honor already-setup queue merges"
  selftests, bpf: test_lwt_ip_encap: Really disable rp_filter
  e100: fix buffer overrun in e100_get_regs
  e100: fix length calculation in e100_get_regs_len
  net: ipv4: Fix rtnexthop len when RTA_FLOW is present
  hwmon: (tmp421) fix rounding for negative values
  hwmon: (tmp421) report /PVLD condition as fault
  sctp: break out if skb_header_pointer returns NULL in sctp_rcv_ootb
  mac80211-hwsim: fix late beacon hrtimer handling
  mac80211: mesh: fix potentially unaligned access
  mac80211: limit injected vht mcs/nss in ieee80211_parse_tx_radiotap
  mac80211: Fix ieee80211_amsdu_aggregate frag_tail bug
  hwmon: (mlxreg-fan) Return non-zero value when fan current state is enforced from sysfs
  ipvs: check that ip_vs_conn_tab_bits is between 8 and 20
  drm/amd/display: Pass PCI deviceid into DC
  x86/kvmclock: Move this_cpu_pvti into kvmclock.h
  mac80211: fix use-after-free in CCMP/GCMP RX
  scsi: ufs: Fix illegal offset in UPIU event trace
  hwmon: (w83791d) Fix NULL pointer dereference by removing unnecessary structure field
  hwmon: (w83792d) Fix NULL pointer dereference by removing unnecessary structure field
  hwmon: (w83793) Fix NULL pointer dereference by removing unnecessary structure field
  fs-verity: fix signed integer overflow with i_size near S64_MAX
  usb: cdns3: fix race condition before setting doorbell
  cpufreq: schedutil: Destroy mutex before kobject_put() frees the memory
  cpufreq: schedutil: Use kobject release() method to free sugov_tunables
  tty: Fix out-of-bound vmalloc access in imageblit
  Revert "crypto: public_key: fix overflow during implicit conversion"
  Linux 5.4.150
  qnx4: work around gcc false positive warning bug
  xen/balloon: fix balloon kthread freezing
  arm64: dts: marvell: armada-37xx: Extend PCIe MEM space
  thermal/drivers/int340x: Do not set a wrong tcc offset on resume
  EDAC/synopsys: Fix wrong value type assignment for edac_mode
  spi: Fix tegra20 build with CONFIG_PM=n
  net: 6pack: Fix tx timeout and slot time
  alpha: Declare virt_to_phys and virt_to_bus parameter as pointer to volatile
  arm64: Mark __stack_chk_guard as __ro_after_init
  parisc: Use absolute_pointer() to define PAGE0
  qnx4: avoid stringop-overread errors
  sparc: avoid stringop-overread errors
  net: i825xx: Use absolute_pointer for memcpy from fixed memory location
  compiler.h: Introduce absolute_pointer macro
  blk-cgroup: fix UAF by grabbing blkcg lock before destroying blkg pd
  sparc32: page align size in arch_dma_alloc
  nvme-multipath: fix ANA state updates when a namespace is not present
  xen/balloon: use a kernel thread instead a workqueue
  bpf: Add oversize check before call kvcalloc()
  ipv6: delay fib6_sernum increase in fib6_add
  m68k: Double cast io functions to unsigned long
  net: stmmac: allow CSR clock of 300MHz
  net: macb: fix use after free on rmmod
  blktrace: Fix uaf in blk_trace access after removing by sysfs
  md: fix a lock order reversal in md_alloc
  irqchip/gic-v3-its: Fix potential VPE leak on error
  irqchip/goldfish-pic: Select GENERIC_IRQ_CHIP to fix build
  scsi: lpfc: Use correct scnprintf() limit
  scsi: qla2xxx: Restore initiator in dual mode
  cifs: fix a sign extension bug
  thermal/core: Potential buffer overflow in thermal_build_list_of_policies()
  fpga: machxo2-spi: Fix missing error code in machxo2_write_complete()
  fpga: machxo2-spi: Return an error on failure
  tty: synclink_gt: rename a conflicting function name
  tty: synclink_gt, drop unneeded forward declarations
  scsi: iscsi: Adjust iface sysfs attr detection
  net/mlx4_en: Don't allow aRFS for encapsulated packets
  qed: rdma - don't wait for resources under hw error recovery flow
  gpio: uniphier: Fix void functions to remove return value
  net/smc: add missing error check in smc_clc_prfx_set()
  bnxt_en: Fix TX timeout when TX ring size is set to the smallest
  enetc: Fix illegal access when reading affinity_hint
  platform/x86/intel: punit_ipc: Drop wrong use of ACPI_PTR()
  afs: Fix incorrect triggering of sillyrename on 3rd-party invalidation
  net: hso: fix muxed tty registration
  serial: mvebu-uart: fix driver's tx_empty callback
  xhci: Set HCD flag to defer primary roothub registration
  btrfs: prevent __btrfs_dump_space_info() to underflow its free space
  erofs: fix up erofs_lookup tracepoint
  mcb: fix error handling in mcb_alloc_bus()
  USB: serial: option: add device id for Foxconn T99W265
  USB: serial: option: remove duplicate USB device ID
  USB: serial: option: add Telit LN920 compositions
  USB: serial: mos7840: remove duplicated 0xac24 device ID
  usb: core: hcd: Add support for deferring roothub registration
  Re-enable UAS for LaCie Rugged USB3-FW with fk quirk
  staging: greybus: uart: fix tty use after free
  binder: make sure fd closes complete
  USB: cdc-acm: fix minor-number release
  USB: serial: cp210x: add ID for GW Instek GDM-834x Digital Multimeter
  usb-storage: Add quirk for ScanLogic SL11R-IDE older than 2.6c
  xen/x86: fix PV trap handling on secondary processors
  cifs: fix incorrect check for null pointer in header_assemble
  usb: musb: tusb6010: uninitialized data in tusb_fifo_write_unaligned()
  usb: dwc2: gadget: Fix ISOC transfer complete handling for DDMA
  usb: dwc2: gadget: Fix ISOC flow for BDMA and Slave
  usb: gadget: r8a66597: fix a loop in set_feature()
  ocfs2: drop acl cache for directories too
  Linux 5.4.149
  drm/nouveau/nvkm: Replace -ENOSYS with -ENODEV
  rtc: rx8010: select REGMAP_I2C
  blk-throttle: fix UAF by deleteing timer in blk_throtl_exit()
  pwm: stm32-lp: Don't modify HW state in .remove() callback
  pwm: rockchip: Don't modify HW state in .remove() callback
  pwm: img: Don't modify HW state in .remove() callback
  nilfs2: fix memory leak in nilfs_sysfs_delete_snapshot_group
  nilfs2: fix memory leak in nilfs_sysfs_create_snapshot_group
  nilfs2: fix memory leak in nilfs_sysfs_delete_##name##_group
  nilfs2: fix memory leak in nilfs_sysfs_create_##name##_group
  nilfs2: fix NULL pointer in nilfs_##name##_attr_release
  nilfs2: fix memory leak in nilfs_sysfs_create_device_group
  btrfs: fix lockdep warning while mounting sprout fs
  ceph: lockdep annotations for try_nonblocking_invalidate
  ceph: request Fw caps before updating the mtime in ceph_write_iter
  dmaengine: xilinx_dma: Set DMA mask for coherent APIs
  dmaengine: ioat: depends on !UML
  dmaengine: sprd: Add missing MODULE_DEVICE_TABLE
  parisc: Move pci_dev_is_behind_card_dino to where it is used
  drivers: base: cacheinfo: Get rid of DEFINE_SMP_CALL_CACHE_FUNCTION()
  thermal/core: Fix thermal_cooling_device_register() prototype
  Kconfig.debug: drop selecting non-existing HARDLOCKUP_DETECTOR_ARCH
  net: stmmac: reset Tx desc base address before restarting Tx
  phy: avoid unnecessary link-up delay in polling mode
  pwm: lpc32xx: Don't modify HW state in .probe() after the PWM chip was registered
  profiling: fix shift-out-of-bounds bugs
  nilfs2: use refcount_dec_and_lock() to fix potential UAF
  prctl: allow to setup brk for et_dyn executables
  9p/trans_virtio: Remove sysfs file on probe failure
  thermal/drivers/exynos: Fix an error code in exynos_tmu_probe()
  dmaengine: acpi: Avoid comparison GSI with Linux vIRQ
  um: virtio_uml: fix memory leak on init failures
  staging: rtl8192u: Fix bitwise vs logical operator in TranslateRxSignalStuff819xUsb()
  sctp: add param size validation for SCTP_PARAM_SET_PRIMARY
  sctp: validate chunk size in __rcv_asconf_lookup
  ARM: 9098/1: ftrace: MODULE_PLT: Fix build problem without DYNAMIC_FTRACE
  ARM: 9079/1: ftrace: Add MODULE_PLTS support
  ARM: 9078/1: Add warn suppress parameter to arm_gen_branch_link()
  ARM: 9077/1: PLT: Move struct plt_entries definition to header
  apparmor: remove duplicate macro list_entry_is_head()
  ARM: Qualify enabling of swiotlb_init()
  s390/pci_mmio: fully validate the VMA before calling follow_pte()
  console: consume APC, DM, DCS
  KVM: remember position in kvm->vcpus array
  PCI/ACPI: Add Ampere Altra SOC MCFG quirk
  PCI: aardvark: Fix reporting CRS value
  PCI: pci-bridge-emul: Add PCIe Root Capabilities Register
  PCI: aardvark: Indicate error in 'val' when config read fails
  PCI: pci-bridge-emul: Fix big-endian support
  Linux 5.4.148
  s390/bpf: Fix 64-bit subtraction of the -0x80000000 constant
  s390/bpf: Fix optimizing out zero-extensions
  net: renesas: sh_eth: Fix freeing wrong tx descriptor
  ip_gre: validate csum_start only on pull
  qlcnic: Remove redundant unlock in qlcnic_pinit_from_rom
  fq_codel: reject silly quantum parameters
  netfilter: socket: icmp6: fix use-after-scope
  net: dsa: b53: Fix calculating number of switch ports
  perf unwind: Do not overwrite FEATURE_CHECK_LDFLAGS-libunwind-{x86,aarch64}
  ARC: export clear_user_page() for modules
  mtd: rawnand: cafe: Fix a resource leak in the error handling path of 'cafe_nand_probe()'
  PCI: Sync __pci_register_driver() stub for CONFIG_PCI=n
  KVM: arm64: Handle PSCI resets before userspace touches vCPU state
  mfd: tqmx86: Clear GPIO IRQ resource when no IRQ is set
  PCI: Fix pci_dev_str_match_path() alloc while atomic bug
  mfd: axp20x: Update AXP288 volatile ranges
  NTB: perf: Fix an error code in perf_setup_inbuf()
  NTB: Fix an error code in ntb_msit_probe()
  ethtool: Fix an error code in cxgb2.c
  PCI: ibmphp: Fix double unmap of io_mem
  block, bfq: honor already-setup queue merges
  net: usb: cdc_mbim: avoid altsetting toggling for Telit LN920
  Set fc_nlinfo in nh_create_ipv4, nh_create_ipv6
  PCI: Add ACS quirks for Cavium multi-function devices
  tracing/probes: Reject events which have the same name of existing one
  mfd: Don't use irq_create_mapping() to resolve a mapping
  fuse: fix use after free in fuse_read_interrupt()
  PCI: Add ACS quirks for NXP LX2xx0 and LX2xx2 platforms
  mfd: db8500-prcmu: Adjust map to reality
  dt-bindings: mtd: gpmc: Fix the ECC bytes vs. OOB bytes equation
  mm/memory_hotplug: use "unsigned long" for PFN in zone_for_pfn_range()
  net: hns3: fix the timing issue of VF clearing interrupt sources
  net: hns3: disable mac in flr process
  net: hns3: change affinity_mask to numa node range
  net: hns3: pad the short tunnel frame before sending to hardware
  KVM: PPC: Book3S HV: Tolerate treclaim. in fake-suspend mode changing registers
  ibmvnic: check failover_pending in login response
  dt-bindings: arm: Fix Toradex compatible typo
  qed: Handle management FW error
  tcp: fix tp->undo_retrans accounting in tcp_sacktag_one()
  net: dsa: destroy the phylink instance on any error in dsa_slave_phy_setup
  net/af_unix: fix a data-race in unix_dgram_poll
  vhost_net: fix OoB on sendmsg() failure.
  events: Reuse value read using READ_ONCE instead of re-reading it
  net/mlx5: Fix potential sleeping in atomic context
  net/mlx5: FWTrace, cancel work on alloc pd error flow
  perf machine: Initialize srcline string member in add_location struct
  tipc: increase timeout in tipc_sk_enqueue()
  r6040: Restore MDIO clock frequency after MAC reset
  net/l2tp: Fix reference count leak in l2tp_udp_recv_core
  dccp: don't duplicate ccid when cloning dccp sock
  ptp: dp83640: don't define PAGE0
  net-caif: avoid user-triggerable WARN_ON(1)
  tipc: fix an use-after-free issue in tipc_recvmsg
  x86/mm: Fix kern_addr_valid() to cope with existing but not present entries
  s390/sclp: fix Secure-IPL facility detection
  drm/etnaviv: add missing MMU context put when reaping MMU mapping
  drm/etnaviv: reference MMU context when setting up hardware state
  drm/etnaviv: fix MMU context leak on GPU reset
  drm/etnaviv: exec and MMU state is lost when resetting the GPU
  drm/etnaviv: keep MMU context across runtime suspend/resume
  drm/etnaviv: stop abusing mmu_context as FE running marker
  drm/etnaviv: put submit prev MMU context when it exists
  drm/etnaviv: return context from etnaviv_iommu_context_get
  drm/amd/amdgpu: Increase HWIP_MAX_INSTANCE to 10
  PCI: Add AMD GPU multi-function power dependencies
  PM: base: power: don't try to use non-existing RTC for storing data
  arm64/sve: Use correct size when reinitialising SVE state
  bnx2x: Fix enabling network interfaces without VFs
  xen: reset legacy rtc flag for PV domU
  btrfs: fix upper limit for max_inline for page size 64K
  drm/panfrost: Clamp lock region to Bifrost minimum
  drm/panfrost: Use u64 for size in lock_region
  drm/panfrost: Simplify lock_region calculation
  drm/amdgpu: Fix BUG_ON assert
  drm/msi/mdp4: populate priv->kms in mdp4_kms_init
  net: dsa: lantiq_gswip: fix maximum frame length
  lib/test_stackinit: Fix static initializer test
  platform/chrome: cros_ec_proto: Send command again when timeout occurs
  memcg: enable accounting for pids in nested pid namespaces
  mm,vmscan: fix divide by zero in get_scan_count
  mm/hugetlb: initialize hugetlb_usage in mm_init
  s390/pv: fix the forcing of the swiotlb
  cpufreq: powernv: Fix init_chip_info initialization in numa=off
  scsi: qla2xxx: Sync queue idx with queue_pair_map idx
  scsi: qla2xxx: Changes to support kdump kernel
  scsi: BusLogic: Fix missing pr_cont() use
  ovl: fix BUG_ON() in may_delete() when called from ovl_cleanup()
  parisc: fix crash with signals and alloca
  net: w5100: check return value after calling platform_get_resource()
  fix array-index-out-of-bounds in taprio_change
  net: fix NULL pointer reference in cipso_v4_doi_free
  ath9k: fix sleeping in atomic context
  ath9k: fix OOB read ar9300_eeprom_restore_internal
  parport: remove non-zero check on count
  net/mlx5: DR, Enable QP retransmission
  iwlwifi: mvm: fix access to BSS elements
  iwlwifi: mvm: avoid static queue number aliasing
  iwlwifi: mvm: fix a memory leak in iwl_mvm_mac_ctxt_beacon_changed
  drm/amdkfd: Account for SH/SE count when setting up cu masks.
  ASoC: rockchip: i2s: Fixup config for DAIFMT_DSP_A/B
  ASoC: rockchip: i2s: Fix regmap_ops hang
  usbip:vhci_hcd USB port can get stuck in the disabled state
  usbip: give back URBs for unsent unlink requests during cleanup
  usb: musb: musb_dsps: request_irq() after initializing musb
  Revert "USB: xhci: fix U1/U2 handling for hardware with XHCI_INTEL_HOST quirk set"
  cifs: fix wrong release in sess_alloc_buffer() failed path
  mmc: core: Return correct emmc response in case of ioctl error
  selftests/bpf: Enlarge select() timeout for test_maps
  mmc: rtsx_pci: Fix long reads when clock is prescaled
  mmc: sdhci-of-arasan: Check return value of non-void funtions
  of: Don't allow __of_attached_node_sysfs() without CONFIG_SYSFS
  ASoC: Intel: Skylake: Fix passing loadable flag for module
  ASoC: Intel: Skylake: Fix module configuration for KPB and MIXER
  btrfs: tree-log: check btrfs_lookup_data_extent return value
  m68knommu: only set CONFIG_ISA_DMA_API for ColdFire sub-arch
  drm/exynos: Always initialize mapping in exynos_drm_register_dma()
  lockd: lockd server-side shouldn't set fl_ops
  usb: chipidea: host: fix port index underflow and UBSAN complains
  gfs2: Don't call dlm after protocol is unmounted
  staging: rts5208: Fix get_ms_information() heap buffer size
  rpc: fix gss_svc_init cleanup on failure
  tcp: enable data-less, empty-cookie SYN with TFO_SERVER_COOKIE_NOT_REQD
  serial: sh-sci: fix break handling for sysrq
  opp: Don't print an error if required-opps is missing
  Bluetooth: Fix handling of LE Enhanced Connection Complete
  nvme-tcp: don't check blk_mq_tag_to_rq when receiving pdu data
  arm64: dts: ls1046a: fix eeprom entries
  arm64: tegra: Fix compatible string for Tegra132 CPUs
  ARM: tegra: tamonten: Fix UART pad setting
  mac80211: Fix monitor MTU limit so that A-MSDUs get through
  drm/display: fix possible null-pointer dereference in dcn10_set_clock()
  gpu: drm: amd: amdgpu: amdgpu_i2c: fix possible uninitialized-variable access in amdgpu_i2c_router_select_ddc_port()
  net/mlx5: Fix variable type to match 64bit
  Bluetooth: avoid circular locks in sco_sock_connect
  Bluetooth: schedule SCO timeouts with delayed_work
  selftests/bpf: Fix xdp_tx.c prog section name
  drm/msm: mdp4: drop vblank get/put from prepare/complete_commit
  net: ethernet: stmmac: Do not use unreachable() in ipq806x_gmac_probe()
  arm64: dts: qcom: sdm660: use reg value for memory node
  ARM: dts: imx53-ppd: Fix ACHC entry
  media: tegra-cec: Handle errors of clk_prepare_enable()
  media: TDA1997x: fix tda1997x_query_dv_timings() return value
  media: v4l2-dv-timings.c: fix wrong condition in two for-loops
  media: imx258: Limit the max analogue gain to 480
  media: imx258: Rectify mismatch of VTS value
  ASoC: Intel: bytcr_rt5640: Move "Platform Clock" routes to the maps for the matching in-/output
  arm64: tegra: Fix Tegra194 PCIe EP compatible string
  bonding: 3ad: fix the concurrency between __bond_release_one() and bond_3ad_state_machine_handler()
  workqueue: Fix possible memory leaks in wq_numa_init()
  Bluetooth: skip invalid hci_sync_conn_complete_evt
  ata: sata_dwc_460ex: No need to call phy_exit() befre phy_init()
  samples: bpf: Fix tracex7 error raised on the missing argument
  staging: ks7010: Fix the initialization of the 'sleep_status' structure
  serial: 8250_pci: make setup_port() parameters explicitly unsigned
  hvsi: don't panic on tty_register_driver failure
  xtensa: ISS: don't panic in rs_init
  serial: 8250: Define RX trigger levels for OxSemi 950 devices
  s390: make PCI mio support a machine flag
  s390/jump_label: print real address in a case of a jump label bug
  flow_dissector: Fix out-of-bounds warnings
  ipv4: ip_output.c: Fix out-of-bounds warning in ip_copy_addrs()
  video: fbdev: riva: Error out if 'pixclock' equals zero
  video: fbdev: kyro: Error out if 'pixclock' equals zero
  video: fbdev: asiliantfb: Error out if 'pixclock' equals zero
  bpf/tests: Do not PASS tests without actually testing the result
  bpf/tests: Fix copy-and-paste error in double word test
  drm/amd/amdgpu: Update debugfs link_settings output link_rate field in hex
  drm/amd/display: Fix timer_per_pixel unit error
  tty: serial: jsm: hold port lock when reporting modem line changes
  staging: board: Fix uninitialized spinlock when attaching genpd
  usb: gadget: composite: Allow bMaxPower=0 if self-powered
  USB: EHCI: ehci-mv: improve error handling in mv_ehci_enable()
  usb: gadget: u_ether: fix a potential null pointer dereference
  usb: host: fotg210: fix the actual_length of an iso packet
  usb: host: fotg210: fix the endpoint's transactional opportunities calculation
  igc: Check if num of q_vectors is smaller than max before array access
  drm: avoid blocking in drm_clients_info's rcu section
  Smack: Fix wrong semantics in smk_access_entry()
  netlink: Deal with ESRCH error in nlmsg_notify()
  video: fbdev: kyro: fix a DoS bug by restricting user input
  ARM: dts: qcom: apq8064: correct clock names
  iavf: fix locking of critical sections
  iavf: do not override the adapter state in the watchdog task
  iio: dac: ad5624r: Fix incorrect handling of an optional regulator.
  tipc: keep the skb in rcv queue until the whole data is read
  PCI: Use pci_update_current_state() in pci_enable_device_flags()
  crypto: mxs-dcp - Use sg_mapping_iter to copy data
  media: dib8000: rewrite the init prbs logic
  ASoC: atmel: ATMEL drivers don't need HAS_DMA
  drm/amdgpu: Fix amdgpu_ras_eeprom_init()
  userfaultfd: prevent concurrent API initialization
  kbuild: Fix 'no symbols' warning when CONFIG_TRIM_UNUSD_KSYMS=y
  MIPS: Malta: fix alignment of the devicetree buffer
  f2fs: fix to unmap pages from userspace process in punch_hole()
  f2fs: fix unexpected ENOENT comes from f2fs_map_blocks()
  f2fs: fix to account missing .skipped_gc_rwsem
  KVM: PPC: Fix clearing never mapped TCEs in realmode
  clk: at91: clk-generated: Limit the requested rate to our range
  clk: at91: clk-generated: pass the id of changeable parent at registration
  clk: at91: sam9x60: Don't use audio PLL
  fscache: Fix cookie key hashing
  platform/x86: dell-smbios-wmi: Add missing kfree in error-exit from run_smbios_call
  KVM: PPC: Book3S HV Nested: Reflect guest PMU in-use to L0 when guest SPRs are live
  HID: i2c-hid: Fix Elan touchpad regression
  scsi: target: avoid per-loop XCOPY buffer allocations
  powerpc/config: Renable MTD_PHYSMAP_OF
  scsi: qedf: Fix error codes in qedf_alloc_global_queues()
  scsi: qedi: Fix error codes in qedi_alloc_global_queues()
  scsi: smartpqi: Fix an error code in pqi_get_raid_map()
  pinctrl: single: Fix error return code in pcs_parse_bits_in_pinctrl_entry()
  scsi: fdomain: Fix error return code in fdomain_probe()
  SUNRPC: Fix potential memory corruption
  dma-debug: fix debugfs initialization order
  openrisc: don't printk() unconditionally
  f2fs: reduce the scope of setting fsck tag when de->name_len is zero
  f2fs: show f2fs instance in printk_ratelimited
  RDMA/efa: Remove double QP type assignment
  powerpc/stacktrace: Include linux/delay.h
  vfio: Use config not menuconfig for VFIO_NOIOMMU
  pinctrl: samsung: Fix pinctrl bank pin count
  docs: Fix infiniband uverbs minor number
  RDMA/iwcm: Release resources if iw_cm module initialization fails
  IB/hfi1: Adjust pkey entry in index 0
  scsi: bsg: Remove support for SCSI_IOCTL_SEND_COMMAND
  f2fs: quota: fix potential deadlock
  HID: input: do not report stylus battery state as "full"
  PCI: aardvark: Fix masking and unmasking legacy INTx interrupts
  PCI: aardvark: Increase polling delay to 1.5s while waiting for PIO response
  PCI: aardvark: Fix checking for PIO status
  PCI: xilinx-nwl: Enable the clock through CCF
  PCI: Return ~0 data on pciconfig_read() CAP_SYS_ADMIN failure
  PCI: Restrict ASMedia ASM1062 SATA Max Payload Size Supported
  PCI/portdrv: Enable Bandwidth Notification only if port supports it
  ARM: 9105/1: atags_to_fdt: don't warn about stack size
  libata: add ATA_HORKAGE_NO_NCQ_TRIM for Samsung 860 and 870 SSDs
  dmaengine: imx-sdma: remove duplicated sdma_load_context
  Revert "dmaengine: imx-sdma: refine to load context only once"
  media: rc-loopback: return number of emitters rather than error
  media: uvc: don't do DMA on stack
  VMCI: fix NULL pointer dereference when unmapping queue pair
  dm crypt: Avoid percpu_counter spinlock contention in crypt_page_alloc()
  power: supply: max17042: handle fails of reading status register
  block: bfq: fix bfq_set_next_ioprio_data()
  crypto: public_key: fix overflow during implicit conversion
  arm64: head: avoid over-mapping in map_memory
  soc: aspeed: p2a-ctrl: Fix boundary check for mmap
  soc: aspeed: lpc-ctrl: Fix boundary check for mmap
  soc: qcom: aoss: Fix the out of bound usage of cooling_devs
  pinctrl: ingenic: Fix incorrect pull up/down info
  pinctrl: stmfx: Fix hazardous u8[] to unsigned long cast
  tools/thermal/tmon: Add cross compiling support
  9p/xen: Fix end of loop tests for list_for_each_entry
  include/linux/list.h: add a macro to test if entry is pointing to the head
  xen: fix setting of max_pfn in shared_info
  powerpc/perf/hv-gpci: Fix counter value parsing
  PCI/MSI: Skip masking MSI-X on Xen PV
  blk-zoned: allow BLKREPORTZONE without CAP_SYS_ADMIN
  blk-zoned: allow zone management send operations without CAP_SYS_ADMIN
  btrfs: reset replace target device to allocation state on close
  btrfs: wake up async_delalloc_pages waiters after submit
  rtc: tps65910: Correct driver module alias

 Conflicts:
	Documentation/devicetree/bindings
	Documentation/devicetree/bindings/arm/tegra.yaml
	Documentation/devicetree/bindings/mtd/gpmc-nand.txt
	Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
	kernel/sched/cpufreq_schedutil.c

Change-Id: Id17c4366cdc6854cd23fba0f41d335b09fc6100e
Signed-off-by: Srinivasarao Pathipati <quic_spathi@quicinc.com>
2022-02-07 22:29:21 +05:30

1284 lines
34 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/mm/oom_kill.c
*
* Copyright (C) 1998,2000 Rik van Riel
* Thanks go out to Claus Fischer for some serious inspiration and
* for goading me into coding this file...
* Copyright (C) 2010 Google, Inc.
* Rewritten by David Rientjes
*
* The routines in this file are used to kill a process when
* we're seriously out of memory. This gets called from __alloc_pages()
* in mm/page_alloc.c when we really run out of memory.
*
* Since we won't call these routines often (on a well-configured
* machine) this file will double as a 'coding guide' and a signpost
* for newbie kernel hackers. It features several pointers to major
* kernel subsystems and hints as to where to find out what things do.
*/
#include <linux/oom.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/task.h>
#include <linux/swap.h>
#include <linux/timex.h>
#include <linux/jiffies.h>
#include <linux/cpuset.h>
#include <linux/export.h>
#include <linux/notifier.h>
#include <linux/memcontrol.h>
#include <linux/mempolicy.h>
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/freezer.h>
#include <linux/ftrace.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/init.h>
#include <linux/mmu_notifier.h>
#include <linux/show_mem_notifier.h>
#include <linux/memory_hotplug.h>
#include <asm/tlb.h>
#include "internal.h"
#include "slab.h"
#define CREATE_TRACE_POINTS
#include <trace/events/oom.h>
int sysctl_panic_on_oom =
IS_ENABLED(CONFIG_DEBUG_PANIC_ON_OOM) ? 2 : 0;
int sysctl_oom_kill_allocating_task;
int sysctl_oom_dump_tasks = 1;
int sysctl_reap_mem_on_sigkill = 1;
#ifdef CONFIG_PRIORITIZE_OOM_TASKS
static unsigned long panic_on_oom_timeout;
#endif
static int panic_on_adj_zero;
module_param(panic_on_adj_zero, int, 0644);
/*
* Serializes oom killer invocations (out_of_memory()) from all contexts to
* prevent from over eager oom killing (e.g. when the oom killer is invoked
* from different domains).
*
* oom_killer_disable() relies on this lock to stabilize oom_killer_disabled
* and mark_oom_victim
*/
DEFINE_MUTEX(oom_lock);
/* Serializes oom_score_adj and oom_score_adj_min updates */
DEFINE_MUTEX(oom_adj_mutex);
static inline bool is_memcg_oom(struct oom_control *oc)
{
return oc->memcg != NULL;
}
#ifdef CONFIG_NUMA
/**
* oom_cpuset_eligible() - check task eligiblity for kill
* @start: task struct of which task to consider
* @oc: pointer to struct oom_control
*
* Task eligibility is determined by whether or not a candidate task, @tsk,
* shares the same mempolicy nodes as current if it is bound by such a policy
* and whether or not it has the same set of allowed cpuset nodes.
*
* This function is assuming oom-killer context and 'current' has triggered
* the oom-killer.
*/
static bool oom_cpuset_eligible(struct task_struct *start,
struct oom_control *oc)
{
struct task_struct *tsk;
bool ret = false;
const nodemask_t *mask = oc->nodemask;
if (is_memcg_oom(oc))
return true;
rcu_read_lock();
for_each_thread(start, tsk) {
if (mask) {
/*
* If this is a mempolicy constrained oom, tsk's
* cpuset is irrelevant. Only return true if its
* mempolicy intersects current, otherwise it may be
* needlessly killed.
*/
ret = mempolicy_nodemask_intersects(tsk, mask);
} else {
/*
* This is not a mempolicy constrained oom, so only
* check the mems of tsk's cpuset.
*/
ret = cpuset_mems_allowed_intersects(current, tsk);
}
if (ret)
break;
}
rcu_read_unlock();
return ret;
}
#else
static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc)
{
return true;
}
#endif /* CONFIG_NUMA */
/*
* The process p may have detached its own ->mm while exiting or through
* use_mm(), but one or more of its subthreads may still have a valid
* pointer. Return p, or any of its subthreads with a valid ->mm, with
* task_lock() held.
*/
struct task_struct *find_lock_task_mm(struct task_struct *p)
{
struct task_struct *t;
rcu_read_lock();
for_each_thread(p, t) {
task_lock(t);
if (likely(t->mm))
goto found;
task_unlock(t);
}
t = NULL;
found:
rcu_read_unlock();
return t;
}
/*
* order == -1 means the oom kill is required by sysrq, otherwise only
* for display purposes.
*/
static inline bool is_sysrq_oom(struct oom_control *oc)
{
return oc->order == -1;
}
/* return true if the task is not adequate as candidate victim task. */
static bool oom_unkillable_task(struct task_struct *p)
{
if (is_global_init(p))
return true;
if (p->flags & PF_KTHREAD)
return true;
return false;
}
/*
* Print out unreclaimble slabs info when unreclaimable slabs amount is greater
* than all user memory (LRU pages)
*/
static bool is_dump_unreclaim_slabs(void)
{
unsigned long nr_lru;
nr_lru = global_node_page_state(NR_ACTIVE_ANON) +
global_node_page_state(NR_INACTIVE_ANON) +
global_node_page_state(NR_ACTIVE_FILE) +
global_node_page_state(NR_INACTIVE_FILE) +
global_node_page_state(NR_ISOLATED_ANON) +
global_node_page_state(NR_ISOLATED_FILE) +
global_node_page_state(NR_UNEVICTABLE);
return (global_node_page_state(NR_SLAB_UNRECLAIMABLE) > nr_lru);
}
/**
* oom_badness - heuristic function to determine which candidate task to kill
* @p: task struct of which task we should calculate
* @totalpages: total present RAM allowed for page allocation
*
* The heuristic for determining which task to kill is made to be as simple and
* predictable as possible. The goal is to return the highest value for the
* task consuming the most memory to avoid subsequent oom failures.
*/
long oom_badness(struct task_struct *p, unsigned long totalpages)
{
long points;
long adj;
if (oom_unkillable_task(p))
return LONG_MIN;
p = find_lock_task_mm(p);
if (!p)
return LONG_MIN;
/*
* Do not even consider tasks which are explicitly marked oom
* unkillable or have been already oom reaped or the are in
* the middle of vfork
*/
adj = (long)p->signal->oom_score_adj;
if (adj == OOM_SCORE_ADJ_MIN ||
test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
in_vfork(p)) {
task_unlock(p);
return LONG_MIN;
}
/*
* The baseline for the badness score is the proportion of RAM that each
* task's rss, pagetable and swap space use.
*/
points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
mm_pgtables_bytes(p->mm) / PAGE_SIZE;
task_unlock(p);
/* Normalize to oom_score_adj units */
adj *= totalpages / 1000;
points += adj;
return points;
}
static const char * const oom_constraint_text[] = {
[CONSTRAINT_NONE] = "CONSTRAINT_NONE",
[CONSTRAINT_CPUSET] = "CONSTRAINT_CPUSET",
[CONSTRAINT_MEMORY_POLICY] = "CONSTRAINT_MEMORY_POLICY",
[CONSTRAINT_MEMCG] = "CONSTRAINT_MEMCG",
};
/*
* Determine the type of allocation constraint.
*/
static enum oom_constraint constrained_alloc(struct oom_control *oc)
{
struct zone *zone;
struct zoneref *z;
enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
bool cpuset_limited = false;
int nid;
if (is_memcg_oom(oc)) {
oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1;
return CONSTRAINT_MEMCG;
}
/* Default to all available memory */
oc->totalpages = totalram_pages() + total_swap_pages;
if (!IS_ENABLED(CONFIG_NUMA))
return CONSTRAINT_NONE;
if (!oc->zonelist)
return CONSTRAINT_NONE;
/*
* Reach here only when __GFP_NOFAIL is used. So, we should avoid
* to kill current.We have to random task kill in this case.
* Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
*/
if (oc->gfp_mask & __GFP_THISNODE)
return CONSTRAINT_NONE;
/*
* This is not a __GFP_THISNODE allocation, so a truncated nodemask in
* the page allocator means a mempolicy is in effect. Cpuset policy
* is enforced in get_page_from_freelist().
*/
if (oc->nodemask &&
!nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
oc->totalpages = total_swap_pages;
for_each_node_mask(nid, *oc->nodemask)
oc->totalpages += node_present_pages(nid);
return CONSTRAINT_MEMORY_POLICY;
}
/* Check this allocation failure is caused by cpuset's wall function */
for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
high_zoneidx, oc->nodemask)
if (!cpuset_zone_allowed(zone, oc->gfp_mask))
cpuset_limited = true;
if (cpuset_limited) {
oc->totalpages = total_swap_pages;
for_each_node_mask(nid, cpuset_current_mems_allowed)
oc->totalpages += node_present_pages(nid);
return CONSTRAINT_CPUSET;
}
return CONSTRAINT_NONE;
}
static int oom_evaluate_task(struct task_struct *task, void *arg)
{
struct oom_control *oc = arg;
long points;
#ifdef CONFIG_PRIORITIZE_OOM_TASKS
struct task_struct *p;
short adj;
#endif
if (oom_unkillable_task(task))
goto next;
/* p may not have freeable memory in nodemask */
if (!is_memcg_oom(oc) && !oom_cpuset_eligible(task, oc))
goto next;
/*
* This task already has access to memory reserves and is being killed.
* Don't allow any other task to have access to the reserves unless
* the task has MMF_OOM_SKIP because chances that it would release
* any memory is quite low.
*/
if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) {
if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags))
goto next;
goto abort;
}
#ifdef CONFIG_PRIORITIZE_OOM_TASKS
p = find_lock_task_mm(task);
if (!p)
goto next;
adj = p->signal->oom_score_adj;
task_unlock(p);
if (adj < oc->min_kill_adj)
goto next;
#endif
/*
* If task is allocating a lot of memory and has been marked to be
* killed first if it triggers an oom, then select it.
*/
if (oom_task_origin(task)) {
points = LONG_MAX;
goto select;
}
points = oom_badness(task, oc->totalpages);
if (points == LONG_MIN || points < oc->chosen_points)
goto next;
select:
if (oc->chosen)
put_task_struct(oc->chosen);
get_task_struct(task);
oc->chosen = task;
oc->chosen_points = points;
next:
return 0;
abort:
if (oc->chosen)
put_task_struct(oc->chosen);
oc->chosen = (void *)-1UL;
return 1;
}
/*
* Simple selection loop. We choose the process with the highest number of
* 'points'. In case scan was aborted, oc->chosen is set to -1.
*/
static void select_bad_process(struct oom_control *oc)
{
oc->chosen_points = LONG_MIN;
if (is_memcg_oom(oc))
mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
else {
struct task_struct *p;
rcu_read_lock();
for_each_process(p)
if (oom_evaluate_task(p, oc))
break;
rcu_read_unlock();
}
}
static int dump_task(struct task_struct *p, void *arg)
{
struct oom_control *oc = arg;
struct task_struct *task;
if (oom_unkillable_task(p))
return 0;
/* p may not have freeable memory in nodemask */
if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
return 0;
task = find_lock_task_mm(p);
if (!task) {
/*
* This is a kthread or all of p's threads have already
* detached their mm's. There's no need to report
* them; they can't be oom killed anyway.
*/
return 0;
}
pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n",
task->pid, from_kuid(&init_user_ns, task_uid(task)),
task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
mm_pgtables_bytes(task->mm),
get_mm_counter(task->mm, MM_SWAPENTS),
task->signal->oom_score_adj, task->comm);
task_unlock(task);
return 0;
}
/**
* dump_tasks - dump current memory state of all system tasks
* @oc: pointer to struct oom_control
*
* Dumps the current memory state of all eligible tasks. Tasks not in the same
* memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
* are not shown.
* State information includes task's pid, uid, tgid, vm size, rss,
* pgtables_bytes, swapents, oom_score_adj value, and name.
*/
static void dump_tasks(struct oom_control *oc)
{
pr_info("Tasks state (memory values in pages):\n");
pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
if (is_memcg_oom(oc))
mem_cgroup_scan_tasks(oc->memcg, dump_task, oc);
else {
struct task_struct *p;
rcu_read_lock();
for_each_process(p)
dump_task(p, oc);
rcu_read_unlock();
}
}
static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
{
/* one line summary of the oom killer context. */
pr_info("oom-kill:constraint=%s,nodemask=%*pbl",
oom_constraint_text[oc->constraint],
nodemask_pr_args(oc->nodemask));
cpuset_print_current_mems_allowed();
mem_cgroup_print_oom_context(oc->memcg, victim);
pr_cont(",task=%s,pid=%d,uid=%d\n", victim->comm, victim->pid,
from_kuid(&init_user_ns, task_uid(victim)));
}
static void dump_header(struct oom_control *oc, struct task_struct *p)
{
pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
current->signal->oom_score_adj);
if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
pr_warn("COMPACTION is disabled!!!\n");
dump_stack();
if (is_memcg_oom(oc))
mem_cgroup_print_oom_meminfo(oc->memcg);
else {
show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
if (is_dump_unreclaim_slabs())
dump_unreclaimable_slab();
show_mem_call_notifiers();
}
if (sysctl_oom_dump_tasks)
dump_tasks(oc);
if (p)
dump_oom_summary(oc, p);
}
/*
* Number of OOM victims in flight
*/
static atomic_t oom_victims = ATOMIC_INIT(0);
static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
static bool oom_killer_disabled __read_mostly;
#define K(x) ((x) << (PAGE_SHIFT-10))
/*
* task->mm can be NULL if the task is the exited group leader. So to
* determine whether the task is using a particular mm, we examine all the
* task's threads: if one of those is using this mm then this task was also
* using it.
*/
bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
{
struct task_struct *t;
for_each_thread(p, t) {
struct mm_struct *t_mm = READ_ONCE(t->mm);
if (t_mm)
return t_mm == mm;
}
return false;
}
#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
* victim (if that is possible) to help the OOM killer to move on.
*/
static struct task_struct *oom_reaper_th;
static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);
bool __oom_reap_task_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
bool ret = true;
/*
* Tell all users of get_user/copy_from_user etc... that the content
* is no longer stable. No barriers really needed because unmapping
* should imply barriers already and the reader would hit a page fault
* if it stumbled over a reaped memory.
*/
set_bit(MMF_UNSTABLE, &mm->flags);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_lru_vma(vma))
continue;
/*
* Only anonymous pages have a good chance to be dropped
* without additional steps which we cannot afford as we
* are OOM already.
*
* We do not even care about fs backed pages because all
* which are reclaimable have already been reclaimed and
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
struct mmu_notifier_range range;
struct mmu_gather tlb;
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0,
vma, mm, vma->vm_start,
vma->vm_end);
tlb_gather_mmu(&tlb, mm, range.start, range.end);
if (mmu_notifier_invalidate_range_start_nonblock(&range)) {
tlb_finish_mmu(&tlb, range.start, range.end);
ret = false;
continue;
}
unmap_page_range(&tlb, vma, range.start, range.end, NULL);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, range.start, range.end);
}
}
return ret;
}
/*
* Reaps the address space of the give task.
*
* Returns true on success and false if none or part of the address space
* has been reclaimed and the caller should retry later.
*/
static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
{
bool ret = true;
if (!down_read_trylock(&mm->mmap_sem)) {
trace_skip_task_reaping(tsk->pid);
return false;
}
/*
* MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
* work on the mm anymore. The check for MMF_OOM_SKIP must run
* under mmap_sem for reading because it serializes against the
* down_write();up_write() cycle in exit_mmap().
*/
if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
trace_skip_task_reaping(tsk->pid);
goto out_unlock;
}
trace_start_task_reaping(tsk->pid);
/* failed to reap part of the address space. Try again later */
ret = __oom_reap_task_mm(mm);
if (!ret)
goto out_finish;
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
K(get_mm_counter(mm, MM_FILEPAGES)),
K(get_mm_counter(mm, MM_SHMEMPAGES)));
out_finish:
trace_finish_task_reaping(tsk->pid);
out_unlock:
up_read(&mm->mmap_sem);
return ret;
}
#define MAX_OOM_REAP_RETRIES 10
static void oom_reap_task(struct task_struct *tsk)
{
int attempts = 0;
struct mm_struct *mm = tsk->signal->oom_mm;
/* Retry the down_read_trylock(mmap_sem) a few times */
while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
schedule_timeout_idle(HZ/10);
if (attempts <= MAX_OOM_REAP_RETRIES ||
test_bit(MMF_OOM_SKIP, &mm->flags))
goto done;
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm);
debug_show_all_locks();
done:
tsk->oom_reaper_list = NULL;
/*
* Hide this mm from OOM killer because it has been either reaped or
* somebody can't call up_write(mmap_sem).
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
/* Drop a reference taken by wake_oom_reaper */
put_task_struct(tsk);
}
static int oom_reaper(void *unused)
{
while (true) {
struct task_struct *tsk = NULL;
wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
spin_lock(&oom_reaper_lock);
if (oom_reaper_list != NULL) {
tsk = oom_reaper_list;
oom_reaper_list = tsk->oom_reaper_list;
}
spin_unlock(&oom_reaper_lock);
if (tsk)
oom_reap_task(tsk);
}
return 0;
}
static void wake_oom_reaper(struct task_struct *tsk)
{
/*
* Move the lock here to avoid scenario of queuing
* the same task by both OOM killer and any other SIGKILL
* path.
*/
spin_lock(&oom_reaper_lock);
/* mm is already queued? */
if (test_and_set_bit(MMF_OOM_REAP_QUEUED,
&tsk->signal->oom_mm->flags)) {
spin_unlock(&oom_reaper_lock);
return;
}
get_task_struct(tsk);
tsk->oom_reaper_list = oom_reaper_list;
oom_reaper_list = tsk;
spin_unlock(&oom_reaper_lock);
trace_wake_reaper(tsk->pid);
wake_up(&oom_reaper_wait);
}
static int __init oom_init(void)
{
oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
return 0;
}
subsys_initcall(oom_init)
#else
static inline void wake_oom_reaper(struct task_struct *tsk)
{
}
#endif /* CONFIG_MMU */
static void __mark_oom_victim(struct task_struct *tsk)
{
struct mm_struct *mm = tsk->mm;
if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
mmgrab(tsk->signal->oom_mm);
set_bit(MMF_OOM_VICTIM, &mm->flags);
}
}
/**
* mark_oom_victim - mark the given task as OOM victim
* @tsk: task to mark
*
* Has to be called with oom_lock held and never after
* oom has been disabled already.
*
* tsk->mm has to be non NULL and caller has to guarantee it is stable (either
* under task_lock or operate on the current).
*/
static void mark_oom_victim(struct task_struct *tsk)
{
WARN_ON(oom_killer_disabled);
/* OOM killer might race with memcg OOM */
if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
return;
/* oom_mm is bound to the signal struct life time. */
__mark_oom_victim(tsk);
/*
* Make sure that the task is woken up from uninterruptible sleep
* if it is frozen because OOM killer wouldn't be able to free
* any memory and livelock. freezing_slow_path will tell the freezer
* that TIF_MEMDIE tasks should be ignored.
*/
__thaw_task(tsk);
atomic_inc(&oom_victims);
trace_mark_victim(tsk->pid);
}
/**
* exit_oom_victim - note the exit of an OOM victim
*/
void exit_oom_victim(void)
{
clear_thread_flag(TIF_MEMDIE);
if (!atomic_dec_return(&oom_victims))
wake_up_all(&oom_victims_wait);
}
/**
* oom_killer_enable - enable OOM killer
*/
void oom_killer_enable(void)
{
oom_killer_disabled = false;
pr_info("OOM killer enabled.\n");
}
/**
* oom_killer_disable - disable OOM killer
* @timeout: maximum timeout to wait for oom victims in jiffies
*
* Forces all page allocations to fail rather than trigger OOM killer.
* Will block and wait until all OOM victims are killed or the given
* timeout expires.
*
* The function cannot be called when there are runnable user tasks because
* the userspace would see unexpected allocation failures as a result. Any
* new usage of this function should be consulted with MM people.
*
* Returns true if successful and false if the OOM killer cannot be
* disabled.
*/
bool oom_killer_disable(signed long timeout)
{
signed long ret;
/*
* Make sure to not race with an ongoing OOM killer. Check that the
* current is not killed (possibly due to sharing the victim's memory).
*/
if (mutex_lock_killable(&oom_lock))
return false;
oom_killer_disabled = true;
mutex_unlock(&oom_lock);
ret = wait_event_interruptible_timeout(oom_victims_wait,
!atomic_read(&oom_victims), timeout);
if (ret <= 0) {
oom_killer_enable();
return false;
}
pr_info("OOM killer disabled.\n");
return true;
}
static inline bool __task_will_free_mem(struct task_struct *task)
{
struct signal_struct *sig = task->signal;
/*
* A coredumping process may sleep for an extended period in exit_mm(),
* so the oom killer cannot assume that the process will promptly exit
* and release memory.
*/
if (sig->flags & SIGNAL_GROUP_COREDUMP)
return false;
if (sig->flags & SIGNAL_GROUP_EXIT)
return true;
if (thread_group_empty(task) && (task->flags & PF_EXITING))
return true;
return false;
}
/*
* Checks whether the given task is dying or exiting and likely to
* release its address space. This means that all threads and processes
* sharing the same mm have to be killed or exiting.
* Caller has to make sure that task->mm is stable (hold task_lock or
* it operates on the current).
*/
static bool task_will_free_mem(struct task_struct *task)
{
struct mm_struct *mm = task->mm;
struct task_struct *p;
bool ret = true;
/*
* Skip tasks without mm because it might have passed its exit_mm and
* exit_oom_victim. oom_reaper could have rescued that but do not rely
* on that for now. We can consider find_lock_task_mm in future.
*/
if (!mm)
return false;
if (!__task_will_free_mem(task))
return false;
/*
* This task has already been drained by the oom reaper so there are
* only small chances it will free some more
*/
if (test_bit(MMF_OOM_SKIP, &mm->flags))
return false;
if (atomic_read(&mm->mm_users) <= 1)
return true;
/*
* Make sure that all tasks which share the mm with the given tasks
* are dying as well to make sure that a) nobody pins its mm and
* b) the task is also reapable by the oom reaper.
*/
rcu_read_lock();
for_each_process(p) {
if (!process_shares_mm(p, mm))
continue;
if (same_thread_group(task, p))
continue;
ret = __task_will_free_mem(p);
if (!ret)
break;
}
rcu_read_unlock();
return ret;
}
static void __oom_kill_process(struct task_struct *victim, const char *message)
{
struct task_struct *p;
struct mm_struct *mm;
bool can_oom_reap = true;
p = find_lock_task_mm(victim);
if (!p) {
put_task_struct(victim);
return;
} else if (victim != p) {
get_task_struct(p);
put_task_struct(victim);
victim = p;
}
/* Get a reference to safely compare mm after task_unlock(victim) */
mm = victim->mm;
mmgrab(mm);
/* Raise event before sending signal: task reaper must see this */
count_vm_event(OOM_KILL);
memcg_memory_event_mm(mm, MEMCG_OOM_KILL);
/*
* We should send SIGKILL before granting access to memory reserves
* in order to prevent the OOM victim from depleting the memory
* reserves from the user space under its control.
*/
do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
#ifdef CONFIG_PRIORITIZE_OOM_TASKS
panic_on_oom_timeout = 0;
#endif
mark_oom_victim(victim);
pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n",
message, task_pid_nr(victim), victim->comm, K(mm->total_vm),
K(get_mm_counter(mm, MM_ANONPAGES)),
K(get_mm_counter(mm, MM_FILEPAGES)),
K(get_mm_counter(mm, MM_SHMEMPAGES)),
from_kuid(&init_user_ns, task_uid(victim)),
mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj);
task_unlock(victim);
/*
* Kill all user processes sharing victim->mm in other thread groups, if
* any. They don't get access to memory reserves, though, to avoid
* depletion of all memory. This prevents mm->mmap_sem livelock when an
* oom killed thread cannot exit because it requires the semaphore and
* its contended by another thread trying to allocate memory itself.
* That thread will now get access to memory reserves since it has a
* pending fatal signal.
*/
rcu_read_lock();
for_each_process(p) {
if (!process_shares_mm(p, mm))
continue;
if (same_thread_group(p, victim))
continue;
if (is_global_init(p)) {
can_oom_reap = false;
set_bit(MMF_OOM_SKIP, &mm->flags);
pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
task_pid_nr(victim), victim->comm,
task_pid_nr(p), p->comm);
continue;
}
/*
* No use_mm() user needs to read from the userspace so we are
* ok to reap it.
*/
if (unlikely(p->flags & PF_KTHREAD))
continue;
do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
}
rcu_read_unlock();
if (can_oom_reap)
wake_oom_reaper(victim);
mmdrop(mm);
put_task_struct(victim);
}
#undef K
/*
* Kill provided task unless it's secured by setting
* oom_score_adj to OOM_SCORE_ADJ_MIN.
*/
static int oom_kill_memcg_member(struct task_struct *task, void *message)
{
if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
!is_global_init(task)) {
get_task_struct(task);
__oom_kill_process(task, message);
}
return 0;
}
static void oom_kill_process(struct oom_control *oc, const char *message)
{
struct task_struct *victim = oc->chosen;
struct mem_cgroup *oom_group;
static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
/*
* If the task is already exiting, don't alarm the sysadmin or kill
* its children or threads, just give it access to memory reserves
* so it can die quickly
*/
task_lock(victim);
if (task_will_free_mem(victim)) {
mark_oom_victim(victim);
wake_oom_reaper(victim);
task_unlock(victim);
put_task_struct(victim);
return;
}
task_unlock(victim);
if (__ratelimit(&oom_rs)
#ifdef CONFIG_PRIORITIZE_OOM_TASKS
&& oc->min_kill_adj < CONFIG_OOM_TASK_PRIORITY_ADJ_LIMIT
#endif
)
dump_header(oc, victim);
/*
* Do we need to kill the entire memory cgroup?
* Or even one of the ancestor memory cgroups?
* Check this out before killing the victim task.
*/
oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
__oom_kill_process(victim, message);
/*
* If necessary, kill all tasks in the selected memory cgroup.
*/
if (oom_group) {
mem_cgroup_print_oom_group(oom_group);
mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member,
(void*)message);
mem_cgroup_put(oom_group);
}
}
#define PANIC_ON_OOM_DEFER_TIMEOUT (5*HZ)
#define PANIC_ON_OOM_DEFER_WINDOW (20*HZ)
/*
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
*/
static void check_panic_on_oom(struct oom_control *oc)
{
if (likely(!sysctl_panic_on_oom))
return;
if (sysctl_panic_on_oom != 2) {
/*
* panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel
* does not panic for cpuset, mempolicy, or memcg allocation
* failures.
*/
if (oc->constraint != CONSTRAINT_NONE)
return;
}
/* Do not panic for oom kills triggered by sysrq */
if (is_sysrq_oom(oc))
return;
#ifdef CONFIG_PRIORITIZE_OOM_TASKS
if (!panic_on_oom_timeout ||
time_after_eq(jiffies, panic_on_oom_timeout +
PANIC_ON_OOM_DEFER_WINDOW)) {
panic_on_oom_timeout = jiffies + PANIC_ON_OOM_DEFER_TIMEOUT;
oc->chosen = (void *)-1UL;
return;
} else if (time_before_eq(jiffies, panic_on_oom_timeout)) {
oc->chosen = (void *)-1UL;
return;
}
#endif
dump_header(oc, NULL);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
}
static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
int register_oom_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&oom_notify_list, nb);
}
EXPORT_SYMBOL_GPL(register_oom_notifier);
int unregister_oom_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_unregister(&oom_notify_list, nb);
}
EXPORT_SYMBOL_GPL(unregister_oom_notifier);
/**
* out_of_memory - kill the "best" process when we run out of memory
* @oc: pointer to struct oom_control
*
* If we run out of memory, we have the choice between either
* killing a random task (bad), letting the system crash (worse)
* OR try to be smart about which process to kill. Note that we
* don't have to be perfect here, we just have to be good.
*/
bool out_of_memory(struct oom_control *oc)
{
unsigned long freed = 0;
if (oom_killer_disabled)
return false;
if (try_online_one_block(numa_node_id())) {
/* Got some memory back */
WARN(1, "OOM killer had to online a memory block\n");
return true;
}
#ifdef CONFIG_PRIORITIZE_OOM_TASKS
oc->min_kill_adj = OOM_SCORE_ADJ_MIN;
#endif
if (!is_memcg_oom(oc)) {
blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
if (freed > 0)
/* Got some memory back in the last second. */
return true;
}
/*
* If current has a pending SIGKILL or is exiting, then automatically
* select it. The goal is to allow it to allocate so that it may
* quickly exit and free its memory.
*/
if (task_will_free_mem(current)) {
mark_oom_victim(current);
wake_oom_reaper(current);
return true;
}
/*
* The OOM killer does not compensate for IO-less reclaim.
* pagefault_out_of_memory lost its gfp context so we have to
* make sure exclude 0 mask - all other users should have at least
* ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
* invoke the OOM killer even if it is a GFP_NOFS allocation.
*/
if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
return true;
/*
* Check if there were limitations on the allocation (only relevant for
* NUMA and memcg) that may require different handling.
*/
oc->constraint = constrained_alloc(oc);
if (oc->constraint != CONSTRAINT_MEMORY_POLICY)
oc->nodemask = NULL;
if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
current->mm && !oom_unkillable_task(current) &&
oom_cpuset_eligible(current, oc) &&
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
check_panic_on_oom(oc);
get_task_struct(current);
oc->chosen = current;
oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
return true;
}
#ifdef CONFIG_PRIORITIZE_OOM_TASKS
if (oc->min_kill_adj < CONFIG_OOM_TASK_PRIORITY_ADJ_LIMIT) {
short prev_min_kill_adj = oc->min_kill_adj;
oc->min_kill_adj = CONFIG_OOM_TASK_PRIORITY_ADJ_LIMIT;
select_bad_process(oc);
if (!oc->chosen) {
pr_warn_ratelimited("Could not find task with adj >= %d\n",
CONFIG_OOM_TASK_PRIORITY_ADJ_LIMIT);
oc->min_kill_adj = prev_min_kill_adj;
oc->chosen_points = 0;
if (tsk_is_oom_victim(current)) {
pr_warn_ratelimited("current killed, retry\n");
return true;
}
}
}
#endif
if (!oc->chosen)
check_panic_on_oom(oc);
if (!oc->chosen)
select_bad_process(oc);
/* Found nothing?!?! */
if (!oc->chosen) {
dump_header(oc, NULL);
pr_warn("Out of memory and no killable processes...\n");
/*
* If we got here due to an actual allocation at the
* system level, we cannot survive this and will enter
* an endless loop in the allocator. Bail out now.
*/
if (!is_sysrq_oom(oc) && !is_memcg_oom(oc))
panic("System is deadlocked on memory\n");
}
if (oc->chosen && oc->chosen != (void *)-1UL)
oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
"Memory cgroup out of memory");
return !!oc->chosen;
}
/*
* The pagefault handler calls here because some allocation has failed. We have
* to take care of the memcg OOM here because this is the only safe context without
* any locks held but let the oom killer triggered from the allocation context care
* about the global OOM.
*/
void pagefault_out_of_memory(void)
{
static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
if (IS_ENABLED(CONFIG_HAVE_USERSPACE_LOW_MEMORY_KILLER))
return;
if (mem_cgroup_oom_synchronize(true))
return;
if (fatal_signal_pending(current))
return;
if (__ratelimit(&pfoom_rs))
pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n");
}
void add_to_oom_reaper(struct task_struct *p)
{
static DEFINE_RATELIMIT_STATE(reaper_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
if (!sysctl_reap_mem_on_sigkill)
return;
p = find_lock_task_mm(p);
if (!p)
return;
get_task_struct(p);
if (task_will_free_mem(p)) {
__mark_oom_victim(p);
#ifdef CONFIG_PRIORITIZE_OOM_TASKS
panic_on_oom_timeout = 0;
#endif
wake_oom_reaper(p);
}
task_unlock(p);
if (!strcmp(current->comm, ULMK_MAGIC) && __ratelimit(&reaper_rs)
&& p->signal->oom_score_adj == 0) {
show_mem(SHOW_MEM_FILTER_NODES, NULL);
show_mem_call_notifiers();
}
put_task_struct(p);
}
/*
* Should be called prior to sending sigkill. To guarantee that the
* process to-be-killed is still untouched.
*/
void check_panic_on_foreground_kill(struct task_struct *p)
{
if (unlikely(!strcmp(current->comm, ULMK_MAGIC)
&& p->signal->oom_score_adj == 0
&& panic_on_adj_zero)) {
show_mem(SHOW_MEM_FILTER_NODES, NULL);
show_mem_call_notifiers();
panic("Attempt to kill foreground task: %s", p->comm);
}
}