diff --git a/.mailmap b/.mailmap index 5ac7074c455fc..63c11ea7e35d7 100644 --- a/.mailmap +++ b/.mailmap @@ -498,7 +498,8 @@ Lior David Loic Poulain Loic Poulain Lorenzo Pieralisi -Lorenzo Stoakes +Lorenzo Stoakes +Lorenzo Stoakes Luca Ceresoli Luca Weiss Lucas De Marchi diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index e538d4850d611..64c03010e9511 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -151,11 +151,11 @@ Description: The algorithm_params file is write-only and is used to setup compression algorithm parameters. -What: /sys/block/zram/writeback_compressed +What: /sys/block/zram/compressed_writeback Date: Decemeber 2025 Contact: Richard Chang Description: - The writeback_compressed device atrribute toggles compressed + The compressed_writeback device atrribute toggles compressed writeback feature. What: /sys/block/zram/writeback_batch_size diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 94bb7f2245eef..451fa00d3004b 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -216,7 +216,7 @@ writeback_limit WO specifies the maximum amount of write IO zram writeback_limit_enable RW show and set writeback_limit feature writeback_batch_size RW show and set maximum number of in-flight writeback operations -writeback_compressed RW show and set compressed writeback feature +compressed_writeback RW show and set compressed writeback feature comp_algorithm RW show and change the compression algorithm algorithm_params WO setup compression algorithm parameters compact WO trigger memory compaction @@ -439,11 +439,11 @@ budget in next setting is user's job. By default zram stores written back pages in decompressed (raw) form, which means that writeback operation involves decompression of the page before writing it to the backing device. This behavior can be changed by enabling -`writeback_compressed` feature, which causes zram to write compressed pages +`compressed_writeback` feature, which causes zram to write compressed pages to the backing device, thus avoiding decompression overhead. To enable this feature, execute:: - $ echo yes > /sys/block/zramX/writeback_compressed + $ echo yes > /sys/block/zramX/compressed_writeback Note that this feature should be configured before the `zramX` device is initialized. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 55ffc0f8858a7..03a550630644f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -8196,6 +8196,9 @@ Kernel parameters p = USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT (Reduce timeout of the SET_ADDRESS request from 5000 ms to 500 ms); + q = USB_QUIRK_FORCE_ONE_CONFIG (Device + claims zero configurations, + forcing to 1); Example: quirks=0781:5580:bk,0a5c:5834:gij usbhid.mousepoll= diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml index ebda78db87a68..02ddfaab5f56d 100644 --- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml +++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml @@ -253,7 +253,6 @@ allOf: enum: # these platforms support 2 streams MST on some interfaces, # others are SST only - - qcom,glymur-dp - qcom,sc8280xp-dp - qcom,x1e80100-dp then: @@ -310,6 +309,26 @@ allOf: minItems: 6 maxItems: 8 + - if: + properties: + compatible: + contains: + enum: + # these platforms support 2 streams MST on some interfaces, + # others are SST only, but all controllers have 4 ports + - qcom,glymur-dp + then: + properties: + reg: + minItems: 9 + maxItems: 9 + clocks: + minItems: 5 + maxItems: 6 + clocks-names: + minItems: 5 + maxItems: 6 + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/display/msm/qcom,glymur-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,glymur-mdss.yaml index 2329ed96e6cb3..64dde43373ac7 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,glymur-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,glymur-mdss.yaml @@ -176,13 +176,17 @@ examples: }; }; - displayport-controller@ae90000 { + displayport-controller@af54000 { compatible = "qcom,glymur-dp"; - reg = <0xae90000 0x200>, - <0xae90200 0x200>, - <0xae90400 0x600>, - <0xae91000 0x400>, - <0xae91400 0x400>; + reg = <0xaf54000 0x200>, + <0xaf54200 0x200>, + <0xaf55000 0xc00>, + <0xaf56000 0x400>, + <0xaf57000 0x400>, + <0xaf58000 0x400>, + <0xaf59000 0x400>, + <0xaf5a000 0x600>, + <0xaf5b000 0x600>; interrupt-parent = <&mdss>; interrupts = <12>; diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml index d55fda9a523e2..a38c2261ef1ac 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml @@ -10,7 +10,7 @@ maintainers: - Krzysztof Kozlowski description: - SM8650 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like + SM8750 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like DPU display controller, DSI and DP interfaces etc. $ref: /schemas/display/msm/mdss-common.yaml# diff --git a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml index 9142001888095..082fdc2e69ea0 100644 --- a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Synopsys DesignWare APB I2C Controller maintainers: - - Jarkko Nikula + - Mika Westerberg allOf: - $ref: /schemas/i2c/i2c-controller.yaml# diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fsl,mpc83xx.yaml b/Documentation/devicetree/bindings/powerpc/fsl/fsl,mpc83xx.yaml new file mode 100644 index 0000000000000..9e37d155c5829 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/fsl,mpc83xx.yaml @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/powerpc/fsl/fsl,mpc83xx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale PowerQUICC II Pro (MPC83xx) platforms + +maintainers: + - J. Neuschäfer + +properties: + $nodename: + const: '/' + compatible: + oneOf: + - description: MPC83xx Reference Design Boards + items: + - enum: + - fsl,mpc8308rdb + - fsl,mpc8315erdb + - fsl,mpc8360rdk + - fsl,mpc8377rdb + - fsl,mpc8377wlan + - fsl,mpc8378rdb + - fsl,mpc8379rdb + + - description: MPC8313E Reference Design Board + items: + - const: MPC8313ERDB + - const: MPC831xRDB + - const: MPC83xxRDB + + - description: MPC8323E Reference Design Board + items: + - const: MPC8323ERDB + - const: MPC832xRDB + - const: MPC83xxRDB + + - description: MPC8349E-mITX(-GP) Reference Design Platform + items: + - enum: + - MPC8349EMITX + - MPC8349EMITXGP + - const: MPC834xMITX + - const: MPC83xxMITX + + - description: Keymile KMETER1 board + const: keymile,KMETER1 + + - description: MPC8308 P1M board + const: denx,mpc8308_p1m + +patternProperties: + "^soc@.*$": + type: object + properties: + compatible: + oneOf: + - items: + - enum: + - fsl,mpc8315-immr + - fsl,mpc8308-immr + - const: simple-bus + - items: + - const: fsl,mpc8360-immr + - const: fsl,immr + - const: fsl,soc + - const: simple-bus + - const: simple-bus + +additionalProperties: true + +examples: + - | + / { + compatible = "fsl,mpc8315erdb"; + model = "MPC8315E-RDB"; + #address-cells = <1>; + #size-cells = <1>; + + soc@e0000000 { + compatible = "fsl,mpc8315-immr", "simple-bus"; + reg = <0xe0000000 0x00000200>; + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + ranges = <0 0xe0000000 0x00100000>; + bus-frequency = <0>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml index a6067030c5edc..6af4ff2331586 100644 --- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml +++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml @@ -6,9 +6,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Allwinner A31 SPI Controller -allOf: - - $ref: spi-controller.yaml - maintainers: - Chen-Yu Tsai - Maxime Ripard @@ -82,11 +79,11 @@ patternProperties: spi-rx-bus-width: items: - - const: 1 + enum: [0, 1, 2, 4] spi-tx-bus-width: items: - - const: 1 + enum: [0, 1, 2, 4] required: - compatible @@ -95,6 +92,28 @@ required: - clocks - clock-names +allOf: + - $ref: spi-controller.yaml + - if: + not: + properties: + compatible: + contains: + enum: + - allwinner,sun50i-r329-spi + - allwinner,sun55i-a523-spi + then: + patternProperties: + "^.*@[0-9a-f]+": + properties: + spi-rx-bus-width: + items: + enum: [0, 1] + + spi-tx-bus-width: + items: + enum: [0, 1] + unevaluatedProperties: false examples: diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst index 9e2882d937b43..d74c2c2b9ef39 100644 --- a/Documentation/scheduler/sched-ext.rst +++ b/Documentation/scheduler/sched-ext.rst @@ -43,7 +43,6 @@ options should be enabled to use sched_ext: CONFIG_DEBUG_INFO_BTF=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_DEFAULT_ON=y - CONFIG_PAHOLE_HAS_BTF_TAG=y sched_ext is used only when the BPF scheduler is loaded and running. @@ -58,7 +57,8 @@ in ``ops->flags``, all ``SCHED_NORMAL``, ``SCHED_BATCH``, ``SCHED_IDLE``, and However, when the BPF scheduler is loaded and ``SCX_OPS_SWITCH_PARTIAL`` is set in ``ops->flags``, only tasks with the ``SCHED_EXT`` policy are scheduled by sched_ext, while tasks with ``SCHED_NORMAL``, ``SCHED_BATCH`` and -``SCHED_IDLE`` policies are scheduled by the fair-class scheduler. +``SCHED_IDLE`` policies are scheduled by the fair-class scheduler which has +higher sched_class precedence than ``SCHED_EXT``. Terminating the sched_ext scheduler program, triggering `SysRq-S`, or detection of any internal error including stalled runnable tasks aborts the @@ -345,6 +345,8 @@ Where to Look The functions prefixed with ``scx_bpf_`` can be called from the BPF scheduler. +* ``kernel/sched/ext_idle.c`` contains the built-in idle CPU selection policy. + * ``tools/sched_ext/`` hosts example BPF scheduler implementations. * ``scx_simple[.bpf].c``: Minimal global FIFO scheduler example using a @@ -353,13 +355,35 @@ Where to Look * ``scx_qmap[.bpf].c``: A multi-level FIFO scheduler supporting five levels of priority implemented with ``BPF_MAP_TYPE_QUEUE``. + * ``scx_central[.bpf].c``: A central FIFO scheduler where all scheduling + decisions are made on one CPU, demonstrating ``LOCAL_ON`` dispatching, + tickless operation, and kthread preemption. + + * ``scx_cpu0[.bpf].c``: A scheduler that queues all tasks to a shared DSQ + and only dispatches them on CPU0 in FIFO order. Useful for testing bypass + behavior. + + * ``scx_flatcg[.bpf].c``: A flattened cgroup hierarchy scheduler + implementing hierarchical weight-based cgroup CPU control by compounding + each cgroup's share at every level into a single flat scheduling layer. + + * ``scx_pair[.bpf].c``: A core-scheduling example that always makes + sibling CPU pairs execute tasks from the same CPU cgroup. + + * ``scx_sdt[.bpf].c``: A variation of ``scx_simple`` demonstrating BPF + arena memory management for per-task data. + + * ``scx_userland[.bpf].c``: A minimal scheduler demonstrating user space + scheduling. Tasks with CPU affinity are direct-dispatched in FIFO order; + all others are scheduled in user space by a simple vruntime scheduler. + ABI Instability =============== The APIs provided by sched_ext to BPF schedulers programs have no stability guarantees. This includes the ops table callbacks and constants defined in ``include/linux/sched/ext.h``, as well as the ``scx_bpf_`` kfuncs defined in -``kernel/sched/ext.c``. +``kernel/sched/ext.c`` and ``kernel/sched/ext_idle.c``. While we will attempt to provide a relatively stable API surface when possible, they are subject to change without warning between kernel diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6f85e1b321dd3..032516783e962 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8435,115 +8435,123 @@ KVM_CHECK_EXTENSION. The valid bits in cap.args[0] are: -=================================== ============================================ - KVM_X86_QUIRK_LINT0_REENABLED By default, the reset value for the LVT - LINT0 register is 0x700 (APIC_MODE_EXTINT). - When this quirk is disabled, the reset value - is 0x10000 (APIC_LVT_MASKED). - - KVM_X86_QUIRK_CD_NW_CLEARED By default, KVM clears CR0.CD and CR0.NW on - AMD CPUs to workaround buggy guest firmware - that runs in perpetuity with CR0.CD, i.e. - with caches in "no fill" mode. - - When this quirk is disabled, KVM does not - change the value of CR0.CD and CR0.NW. - - KVM_X86_QUIRK_LAPIC_MMIO_HOLE By default, the MMIO LAPIC interface is - available even when configured for x2APIC - mode. When this quirk is disabled, KVM - disables the MMIO LAPIC interface if the - LAPIC is in x2APIC mode. - - KVM_X86_QUIRK_OUT_7E_INC_RIP By default, KVM pre-increments %rip before - exiting to userspace for an OUT instruction - to port 0x7e. When this quirk is disabled, - KVM does not pre-increment %rip before - exiting to userspace. - - KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT When this quirk is disabled, KVM sets - CPUID.01H:ECX[bit 3] (MONITOR/MWAIT) if - IA32_MISC_ENABLE[bit 18] (MWAIT) is set. - Additionally, when this quirk is disabled, - KVM clears CPUID.01H:ECX[bit 3] if - IA32_MISC_ENABLE[bit 18] is cleared. - - KVM_X86_QUIRK_FIX_HYPERCALL_INSN By default, KVM rewrites guest - VMMCALL/VMCALL instructions to match the - vendor's hypercall instruction for the - system. When this quirk is disabled, KVM - will no longer rewrite invalid guest - hypercall instructions. Executing the - incorrect hypercall instruction will - generate a #UD within the guest. - -KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if - they are intercepted) as NOPs regardless of - whether or not MONITOR/MWAIT are supported - according to guest CPUID. When this quirk - is disabled and KVM_X86_DISABLE_EXITS_MWAIT - is not set (MONITOR/MWAIT are intercepted), - KVM will inject a #UD on MONITOR/MWAIT if - they're unsupported per guest CPUID. Note, - KVM will modify MONITOR/MWAIT support in - guest CPUID on writes to MISC_ENABLE if - KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is - disabled. - -KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM - invalidates all SPTEs in all memslots and - address spaces when a memslot is deleted or - moved. When this quirk is disabled (or the - VM type isn't KVM_X86_DEFAULT_VM), KVM only - ensures the backing memory of the deleted - or moved memslot isn't reachable, i.e KVM - _may_ invalidate only SPTEs related to the - memslot. - -KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the - vCPU's MSR_IA32_PERF_CAPABILITIES (0x345), - MSR_IA32_ARCH_CAPABILITIES (0x10a), - MSR_PLATFORM_INFO (0xce), and all VMX MSRs - (0x480..0x492) to the maximal capabilities - supported by KVM. KVM also sets - MSR_IA32_UCODE_REV (0x8b) to an arbitrary - value (which is different for Intel vs. - AMD). Lastly, when guest CPUID is set (by - userspace), KVM modifies select VMX MSR - fields to force consistency between guest - CPUID and L2's effective ISA. When this - quirk is disabled, KVM zeroes the vCPU's MSR - values (with two exceptions, see below), - i.e. treats the feature MSRs like CPUID - leaves and gives userspace full control of - the vCPU model definition. This quirk does - not affect VMX MSRs CR0/CR4_FIXED1 (0x487 - and 0x489), as KVM does now allow them to - be set by userspace (KVM sets them based on - guest CPUID, for safety purposes). - -KVM_X86_QUIRK_IGNORE_GUEST_PAT By default, on Intel platforms, KVM ignores - guest PAT and forces the effective memory - type to WB in EPT. The quirk is not available - on Intel platforms which are incapable of - safely honoring guest PAT (i.e., without CPU - self-snoop, KVM always ignores guest PAT and - forces effective memory type to WB). It is - also ignored on AMD platforms or, on Intel, - when a VM has non-coherent DMA devices - assigned; KVM always honors guest PAT in - such case. The quirk is needed to avoid - slowdowns on certain Intel Xeon platforms - (e.g. ICX, SPR) where self-snoop feature is - supported but UC is slow enough to cause - issues with some older guests that use - UC instead of WC to map the video RAM. - Userspace can disable the quirk to honor - guest PAT if it knows that there is no such - guest software, for example if it does not - expose a bochs graphics device (which is - known to have had a buggy driver). -=================================== ============================================ +======================================== ================================================ +KVM_X86_QUIRK_LINT0_REENABLED By default, the reset value for the LVT + LINT0 register is 0x700 (APIC_MODE_EXTINT). + When this quirk is disabled, the reset value + is 0x10000 (APIC_LVT_MASKED). + +KVM_X86_QUIRK_CD_NW_CLEARED By default, KVM clears CR0.CD and CR0.NW on + AMD CPUs to workaround buggy guest firmware + that runs in perpetuity with CR0.CD, i.e. + with caches in "no fill" mode. + + When this quirk is disabled, KVM does not + change the value of CR0.CD and CR0.NW. + +KVM_X86_QUIRK_LAPIC_MMIO_HOLE By default, the MMIO LAPIC interface is + available even when configured for x2APIC + mode. When this quirk is disabled, KVM + disables the MMIO LAPIC interface if the + LAPIC is in x2APIC mode. + +KVM_X86_QUIRK_OUT_7E_INC_RIP By default, KVM pre-increments %rip before + exiting to userspace for an OUT instruction + to port 0x7e. When this quirk is disabled, + KVM does not pre-increment %rip before + exiting to userspace. + +KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT When this quirk is disabled, KVM sets + CPUID.01H:ECX[bit 3] (MONITOR/MWAIT) if + IA32_MISC_ENABLE[bit 18] (MWAIT) is set. + Additionally, when this quirk is disabled, + KVM clears CPUID.01H:ECX[bit 3] if + IA32_MISC_ENABLE[bit 18] is cleared. + +KVM_X86_QUIRK_FIX_HYPERCALL_INSN By default, KVM rewrites guest + VMMCALL/VMCALL instructions to match the + vendor's hypercall instruction for the + system. When this quirk is disabled, KVM + will no longer rewrite invalid guest + hypercall instructions. Executing the + incorrect hypercall instruction will + generate a #UD within the guest. + +KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if + they are intercepted) as NOPs regardless of + whether or not MONITOR/MWAIT are supported + according to guest CPUID. When this quirk + is disabled and KVM_X86_DISABLE_EXITS_MWAIT + is not set (MONITOR/MWAIT are intercepted), + KVM will inject a #UD on MONITOR/MWAIT if + they're unsupported per guest CPUID. Note, + KVM will modify MONITOR/MWAIT support in + guest CPUID on writes to MISC_ENABLE if + KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is + disabled. + +KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM + invalidates all SPTEs in all memslots and + address spaces when a memslot is deleted or + moved. When this quirk is disabled (or the + VM type isn't KVM_X86_DEFAULT_VM), KVM only + ensures the backing memory of the deleted + or moved memslot isn't reachable, i.e KVM + _may_ invalidate only SPTEs related to the + memslot. + +KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the + vCPU's MSR_IA32_PERF_CAPABILITIES (0x345), + MSR_IA32_ARCH_CAPABILITIES (0x10a), + MSR_PLATFORM_INFO (0xce), and all VMX MSRs + (0x480..0x492) to the maximal capabilities + supported by KVM. KVM also sets + MSR_IA32_UCODE_REV (0x8b) to an arbitrary + value (which is different for Intel vs. + AMD). Lastly, when guest CPUID is set (by + userspace), KVM modifies select VMX MSR + fields to force consistency between guest + CPUID and L2's effective ISA. When this + quirk is disabled, KVM zeroes the vCPU's MSR + values (with two exceptions, see below), + i.e. treats the feature MSRs like CPUID + leaves and gives userspace full control of + the vCPU model definition. This quirk does + not affect VMX MSRs CR0/CR4_FIXED1 (0x487 + and 0x489), as KVM does now allow them to + be set by userspace (KVM sets them based on + guest CPUID, for safety purposes). + +KVM_X86_QUIRK_IGNORE_GUEST_PAT By default, on Intel platforms, KVM ignores + guest PAT and forces the effective memory + type to WB in EPT. The quirk is not available + on Intel platforms which are incapable of + safely honoring guest PAT (i.e., without CPU + self-snoop, KVM always ignores guest PAT and + forces effective memory type to WB). It is + also ignored on AMD platforms or, on Intel, + when a VM has non-coherent DMA devices + assigned; KVM always honors guest PAT in + such case. The quirk is needed to avoid + slowdowns on certain Intel Xeon platforms + (e.g. ICX, SPR) where self-snoop feature is + supported but UC is slow enough to cause + issues with some older guests that use + UC instead of WC to map the video RAM. + Userspace can disable the quirk to honor + guest PAT if it knows that there is no such + guest software, for example if it does not + expose a bochs graphics device (which is + known to have had a buggy driver). + +KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM By default, KVM relaxes the consistency + check for GUEST_IA32_DEBUGCTL in vmcs12 + to allow FREEZE_IN_SMM to be set. When + this quirk is disabled, KVM requires this + bit to be cleared. Note that the vmcs02 + bit is still completely controlled by the + host, regardless of the quirk setting. +======================================== ================================================ 7.32 KVM_CAP_MAX_VCPU_ID ------------------------ diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index ae8bce7fecbeb..662231e958a07 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -17,6 +17,8 @@ The acquisition orders for mutexes are as follows: - kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock +- vcpu->mutex is taken outside kvm->slots_lock and kvm->slots_arch_lock + - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring them together is quite rare. diff --git a/MAINTAINERS b/MAINTAINERS index 77fdfcb55f060..96ea84948d76a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8626,9 +8626,8 @@ F: drivers/gpu/drm/lima/ F: include/uapi/drm/lima_drm.h DRM DRIVERS FOR LOONGSON -M: Sui Jingfeng L: dri-devel@lists.freedesktop.org -S: Supported +S: Orphan T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/loongson/ @@ -16358,7 +16357,6 @@ F: net/dsa/tag_mtk.c MEDIATEK T7XX 5G WWAN MODEM DRIVER M: Chandrashekar Devegowda -R: Chiranjeevi Rapolu R: Liu Haijun R: Ricardo Martinez L: netdev@vger.kernel.org @@ -16643,7 +16641,7 @@ F: mm/balloon.c MEMORY MANAGEMENT - CORE M: Andrew Morton M: David Hildenbrand -R: Lorenzo Stoakes +R: Lorenzo Stoakes R: Liam R. Howlett R: Vlastimil Babka R: Mike Rapoport @@ -16773,7 +16771,7 @@ F: mm/workingset.c MEMORY MANAGEMENT - MISC M: Andrew Morton M: David Hildenbrand -R: Lorenzo Stoakes +R: Lorenzo Stoakes R: Liam R. Howlett R: Vlastimil Babka R: Mike Rapoport @@ -16864,7 +16862,7 @@ R: David Hildenbrand R: Michal Hocko R: Qi Zheng R: Shakeel Butt -R: Lorenzo Stoakes +R: Lorenzo Stoakes L: linux-mm@kvack.org S: Maintained F: mm/vmscan.c @@ -16873,7 +16871,7 @@ F: mm/workingset.c MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) M: Andrew Morton M: David Hildenbrand -M: Lorenzo Stoakes +M: Lorenzo Stoakes R: Rik van Riel R: Liam R. Howlett R: Vlastimil Babka @@ -16918,7 +16916,7 @@ F: mm/swapfile.c MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) M: Andrew Morton M: David Hildenbrand -M: Lorenzo Stoakes +M: Lorenzo Stoakes R: Zi Yan R: Baolin Wang R: Liam R. Howlett @@ -16958,7 +16956,7 @@ F: tools/testing/selftests/mm/uffd-*.[ch] MEMORY MANAGEMENT - RUST M: Alice Ryhl -R: Lorenzo Stoakes +R: Lorenzo Stoakes R: Liam R. Howlett L: linux-mm@kvack.org L: rust-for-linux@vger.kernel.org @@ -16974,7 +16972,7 @@ F: rust/kernel/page.rs MEMORY MAPPING M: Andrew Morton M: Liam R. Howlett -M: Lorenzo Stoakes +M: Lorenzo Stoakes R: Vlastimil Babka R: Jann Horn R: Pedro Falcato @@ -17004,7 +17002,7 @@ MEMORY MAPPING - LOCKING M: Andrew Morton M: Suren Baghdasaryan M: Liam R. Howlett -M: Lorenzo Stoakes +M: Lorenzo Stoakes R: Vlastimil Babka R: Shakeel Butt L: linux-mm@kvack.org @@ -17019,7 +17017,7 @@ F: mm/mmap_lock.c MEMORY MAPPING - MADVISE (MEMORY ADVICE) M: Andrew Morton M: Liam R. Howlett -M: Lorenzo Stoakes +M: Lorenzo Stoakes M: David Hildenbrand R: Vlastimil Babka R: Jann Horn @@ -21938,7 +21936,7 @@ F: drivers/media/radio/radio-tea5777.c RADOS BLOCK DEVICE (RBD) M: Ilya Dryomov -R: Dongsheng Yang +R: Dongsheng Yang L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ @@ -22267,6 +22265,16 @@ L: linux-wireless@vger.kernel.org S: Orphan F: drivers/net/wireless/rsi/ +RELAY +M: Andrew Morton +M: Jens Axboe +M: Jason Xing +L: linux-kernel@vger.kernel.org +S: Maintained +F: Documentation/filesystems/relay.rst +F: include/linux/relay.h +F: kernel/relay.c + REGISTER MAP ABSTRACTION M: Mark Brown L: linux-kernel@vger.kernel.org @@ -23156,7 +23164,7 @@ K: \b(?i:rust)\b RUST [ALLOC] M: Danilo Krummrich -R: Lorenzo Stoakes +R: Lorenzo Stoakes R: Vlastimil Babka R: Liam R. Howlett R: Uladzislau Rezki @@ -24333,11 +24341,12 @@ F: drivers/nvmem/layouts/sl28vpd.c SLAB ALLOCATOR M: Vlastimil Babka +M: Harry Yoo M: Andrew Morton +R: Hao Li R: Christoph Lameter R: David Rientjes R: Roman Gushchin -R: Harry Yoo L: linux-mm@kvack.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git @@ -25748,6 +25757,7 @@ F: include/net/pkt_cls.h F: include/net/pkt_sched.h F: include/net/sch_priv.h F: include/net/tc_act/ +F: include/net/tc_wrapper.h F: include/uapi/linux/pkt_cls.h F: include/uapi/linux/pkt_sched.h F: include/uapi/linux/tc_act/ diff --git a/Makefile b/Makefile index 2b15f0b4a0cb5..c9b7bee102e81 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -476,6 +476,7 @@ KBUILD_USERLDFLAGS := $(USERLDFLAGS) export rust_common_flags := --edition=2021 \ -Zbinary_dep_depinfo=y \ -Astable_features \ + -Aunused_features \ -Dnon_ascii_idents \ -Dunsafe_op_in_unsafe_fn \ -Wmissing_docs \ @@ -1113,6 +1114,9 @@ KBUILD_CFLAGS += -fno-builtin-wcslen # change __FILE__ to the relative path to the source directory ifdef building_out_of_srctree KBUILD_CPPFLAGS += -fmacro-prefix-map=$(srcroot)/= +ifeq ($(call rustc-option-yn, --remap-path-scope=macro),y) +KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/= --remap-path-scope=macro +endif endif # include additional Makefiles when needed diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2ca264b3db5fa..70cb9cfd760a3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -784,6 +784,9 @@ struct kvm_host_data { /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */ unsigned int debug_brps; unsigned int debug_wrps; + + /* Last vgic_irq part of the AP list recorded in an LR */ + struct vgic_irq *last_lr_irq; }; struct kvm_host_psci_config { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c31f8e17732a3..32c2dbcc0c641 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2345,6 +2345,15 @@ static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry, !is_midr_in_range_list(has_vgic_v3)) return false; + /* + * pKVM prevents late onlining of CPUs. This means that whatever + * state the capability is in after deprivilege cannot be affected + * by a new CPU booting -- this is garanteed to be a CPU we have + * already seen, and the cap is therefore unchanged. + */ + if (system_capabilities_finalized() && is_protected_kvm_enabled()) + return cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR); + if (is_kernel_in_hyp_mode()) res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2); else diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 6588ea251ed77..c5c5644b1878e 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1504,8 +1504,6 @@ int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) fail = true; } - isb(); - if (!fail) par = read_sysreg_par(); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 1c87699fd886e..332c453b87cf8 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -29,7 +29,7 @@ #include "trace.h" -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; @@ -42,7 +42,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, hvc_exit_stat), STATS_DESC_COUNTER(VCPU, wfe_exit_stat), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 38f66a56a7665..d815265bd374f 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -518,7 +518,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - if (!range_included(&cur, range)) + if (!range_included(&cur, range) && level < KVM_PGTABLE_LAST_LEVEL) continue; *range = cur; return 0; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index ec2eee857208e..17d64a1e11e5c 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1751,6 +1751,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, force_pte = (max_map_size == PAGE_SIZE); vma_pagesize = min_t(long, vma_pagesize, max_map_size); + vma_shift = __ffs(vma_pagesize); } /* @@ -1837,10 +1838,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && s2_force_noncacheable) ret = -ENOEXEC; - if (ret) { - kvm_release_page_unused(page); - return ret; - } + if (ret) + goto out_put_page; /* * Guest performs atomic/exclusive operations on memory with unsupported @@ -1850,7 +1849,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ if (esr_fsc_is_excl_atomic_fault(kvm_vcpu_get_esr(vcpu))) { kvm_inject_dabt_excl_atomic(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; + ret = 1; + goto out_put_page; } if (nested) @@ -1936,6 +1936,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, mark_page_dirty_in_slot(kvm, memslot, gfn); return ret != -EAGAIN ? ret : 0; + +out_put_page: + kvm_release_page_unused(page); + return ret; } /* Resolve the access fault by making the page young again. */ diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 12c9f6e8dfdab..2c43097248b21 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -152,31 +152,31 @@ static int get_ia_size(struct s2_walk_info *wi) return 64 - wi->t0sz; } -static int check_base_s2_limits(struct s2_walk_info *wi, +static int check_base_s2_limits(struct kvm_vcpu *vcpu, struct s2_walk_info *wi, int level, int input_size, int stride) { - int start_size, ia_size; + int start_size, pa_max; - ia_size = get_ia_size(wi); + pa_max = kvm_get_pa_bits(vcpu->kvm); /* Check translation limits */ switch (BIT(wi->pgshift)) { case SZ_64K: - if (level == 0 || (level == 1 && ia_size <= 42)) + if (level == 0 || (level == 1 && pa_max <= 42)) return -EFAULT; break; case SZ_16K: - if (level == 0 || (level == 1 && ia_size <= 40)) + if (level == 0 || (level == 1 && pa_max <= 40)) return -EFAULT; break; case SZ_4K: - if (level < 0 || (level == 0 && ia_size <= 42)) + if (level < 0 || (level == 0 && pa_max <= 42)) return -EFAULT; break; } /* Check input size limits */ - if (input_size > ia_size) + if (input_size > pa_max) return -EFAULT; /* Check number of entries in starting level table */ @@ -269,16 +269,19 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, if (input_size > 48 || input_size < 25) return -EFAULT; - ret = check_base_s2_limits(wi, level, input_size, stride); - if (WARN_ON(ret)) + ret = check_base_s2_limits(vcpu, wi, level, input_size, stride); + if (WARN_ON(ret)) { + out->esr = compute_fsc(0, ESR_ELx_FSC_FAULT); return ret; + } base_lower_bound = 3 + input_size - ((3 - level) * stride + wi->pgshift); base_addr = wi->baddr & GENMASK_ULL(47, base_lower_bound); if (check_output_size(wi, base_addr)) { - out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ); + /* R_BFHQH */ + out->esr = compute_fsc(0, ESR_ELx_FSC_ADDRSZ); return 1; } @@ -293,8 +296,10 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, paddr = base_addr | index; ret = read_guest_s2_desc(vcpu, paddr, &desc, wi); - if (ret < 0) + if (ret < 0) { + out->esr = ESR_ELx_FSC_SEA_TTW(level); return ret; + } new_desc = desc; diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 9b3091ad868cf..e9b8b5fc480c7 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -143,6 +143,21 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST; + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; + + aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) { + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + } else { + INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); + pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); + } + + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); kvm_for_each_vcpu(i, vcpu, kvm) { ret = vgic_allocate_private_irqs_locked(vcpu, type); @@ -157,25 +172,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) vgic_cpu->private_irqs = NULL; } + kvm->arch.vgic.vgic_model = 0; goto out_unlock; } - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - - aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; - pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; - - if (type == KVM_DEV_TYPE_ARM_VGIC_V2) { - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - } else { - INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); - aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); - pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); - } - - kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); - kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); - if (type == KVM_DEV_TYPE_ARM_VGIC_V3) kvm->arch.vgic.nassgicap = system_supports_direct_sgis(); diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 585491fbda807..cafa3cb32bda6 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -115,7 +115,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; u32 eoicount = FIELD_GET(GICH_HCR_EOICOUNT, cpuif->vgic_hcr); - struct vgic_irq *irq; + struct vgic_irq *irq = *host_data_ptr(last_lr_irq); DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); @@ -123,7 +123,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) vgic_v2_fold_lr(vcpu, cpuif->vgic_lr[lr]); /* See the GICv3 equivalent for the EOIcount handling rationale */ - list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + list_for_each_entry_continue(irq, &vgic_cpu->ap_list_head, ap_list) { u32 lr; if (!eoicount) { diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 386ddf69a9c51..6a355eca19348 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -148,7 +148,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr); - struct vgic_irq *irq; + struct vgic_irq *irq = *host_data_ptr(last_lr_irq); DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); @@ -158,12 +158,12 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) /* * EOIMode=0: use EOIcount to emulate deactivation. We are * guaranteed to deactivate in reverse order of the activation, so - * just pick one active interrupt after the other in the ap_list, - * and replay the deactivation as if the CPU was doing it. We also - * rely on priority drop to have taken place, and the list to be - * sorted by priority. + * just pick one active interrupt after the other in the tail part + * of the ap_list, past the LRs, and replay the deactivation as if + * the CPU was doing it. We also rely on priority drop to have taken + * place, and the list to be sorted by priority. */ - list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + list_for_each_entry_continue(irq, &vgic_cpu->ap_list_head, ap_list) { u64 lr; /* diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 430aa98888fda..e22b79cfff965 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -814,6 +814,9 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) { + if (!*host_data_ptr(last_lr_irq)) + return; + if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_fold_lr_state(vcpu); else @@ -960,10 +963,13 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) if (irqs_outside_lrs(&als)) vgic_sort_ap_list(vcpu); + *host_data_ptr(last_lr_irq) = NULL; + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { scoped_guard(raw_spinlock, &irq->irq_lock) { if (likely(vgic_target_oracle(irq) == vcpu)) { vgic_populate_lr(vcpu, irq, count++); + *host_data_ptr(last_lr_irq) = irq; } } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 09e137f2f841f..8ffd50a470e6b 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -14,7 +14,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, int_exits), STATS_DESC_COUNTER(VCPU, idle_exits), diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 5282158b8122a..17b3d5b36cfc0 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -10,7 +10,7 @@ #include #include -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_ICOUNTER(VM, pages), STATS_DESC_ICOUNTER(VM, hugepages), diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 29d9f630edfb2..a53abbba43ea3 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -38,7 +38,7 @@ #define VECTORSPACING 0x100 /* for EI/VI mode */ #endif -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; @@ -51,7 +51,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, wait_exits), STATS_DESC_COUNTER(VCPU, cache_exits), diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ad7a2fe63a2a4..10240cb809043 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -573,8 +573,8 @@ config ARCH_USING_PATCHABLE_FUNCTION_ENTRY depends on FUNCTION_TRACER && (PPC32 || PPC64_ELF_ABI_V2) depends on $(cc-option,-fpatchable-function-entry=2) def_bool y if PPC32 - def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN - def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN + def_bool $(success,$(srctree)/arch/powerpc/tools/check-fpatchable-function-entry.sh $(CC) $(CLANG_FLAGS) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN + def_bool $(success,$(srctree)/arch/powerpc/tools/check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN config PPC_FTRACE_OUT_OF_LINE def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts index 33ddb17d18760..c541bd3679831 100644 --- a/arch/powerpc/boot/dts/asp834x-redboot.dts +++ b/arch/powerpc/boot/dts/asp834x-redboot.dts @@ -37,7 +37,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x8000000>; // 128MB at 0 }; diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi deleted file mode 100644 index 9cffccf4e07e6..0000000000000 --- a/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi +++ /dev/null @@ -1,156 +0,0 @@ -/* T4240 Interlaken LAC Portal device tree stub with 24 portals. - * - * Copyright 2012 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#address-cells = <0x1>; -#size-cells = <0x1>; -compatible = "fsl,interlaken-lac-portals"; - -lportal0: lac-portal@0 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x0 0x1000>; -}; - -lportal1: lac-portal@1000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x1000 0x1000>; -}; - -lportal2: lac-portal@2000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x2000 0x1000>; -}; - -lportal3: lac-portal@3000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x3000 0x1000>; -}; - -lportal4: lac-portal@4000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x4000 0x1000>; -}; - -lportal5: lac-portal@5000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x5000 0x1000>; -}; - -lportal6: lac-portal@6000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x6000 0x1000>; -}; - -lportal7: lac-portal@7000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x7000 0x1000>; -}; - -lportal8: lac-portal@8000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x8000 0x1000>; -}; - -lportal9: lac-portal@9000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x9000 0x1000>; -}; - -lportal10: lac-portal@A000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xA000 0x1000>; -}; - -lportal11: lac-portal@B000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xB000 0x1000>; -}; - -lportal12: lac-portal@C000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xC000 0x1000>; -}; - -lportal13: lac-portal@D000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xD000 0x1000>; -}; - -lportal14: lac-portal@E000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xE000 0x1000>; -}; - -lportal15: lac-portal@F000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xF000 0x1000>; -}; - -lportal16: lac-portal@10000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x10000 0x1000>; -}; - -lportal17: lac-portal@11000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x11000 0x1000>; -}; - -lportal18: lac-portal@1200 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x12000 0x1000>; -}; - -lportal19: lac-portal@13000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x13000 0x1000>; -}; - -lportal20: lac-portal@14000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x14000 0x1000>; -}; - -lportal21: lac-portal@15000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x15000 0x1000>; -}; - -lportal22: lac-portal@16000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x16000 0x1000>; -}; - -lportal23: lac-portal@17000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x17000 0x1000>; -}; diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi deleted file mode 100644 index e8208720ac0e2..0000000000000 --- a/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi +++ /dev/null @@ -1,45 +0,0 @@ -/* - * T4 Interlaken Look-aside Controller (LAC) device tree stub - * - * Copyright 2012 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -lac: lac@229000 { - compatible = "fsl,interlaken-lac"; - reg = <0x229000 0x1000>; - interrupts = <16 2 1 18>; -}; - -lac-hv@228000 { - compatible = "fsl,interlaken-lac-hv"; - reg = <0x228000 0x1000>; - fsl,non-hv-node = <&lac>; -}; diff --git a/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi b/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi deleted file mode 100644 index 1cf0b77b1efe6..0000000000000 --- a/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi +++ /dev/null @@ -1,43 +0,0 @@ -/* - * PQ3 MPIC Message (Group B) device tree stub [ controller @ offset 0x42400 ] - * - * Copyright 2012 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -message@42400 { - compatible = "fsl,mpic-v3.1-msgr"; - reg = <0x42400 0x200>; - interrupts = < - 0xb4 2 0 0 - 0xb5 2 0 0 - 0xb6 2 0 0 - 0xb7 2 0 0>; -}; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi deleted file mode 100644 index 71eb75e82c2e1..0000000000000 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi +++ /dev/null @@ -1,80 +0,0 @@ -/* - * QorIQ FMan v3 1g port #1 device tree stub [ controller @ offset 0x400000 ] - * - * Copyright 2012 - 2015 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -fman@400000 { - fman0_rx_0x09: port@89000 { - cell-index = <0x9>; - compatible = "fsl,fman-v3-port-rx"; - reg = <0x89000 0x1000>; - fsl,fman-10g-port; - fsl,fman-best-effort-port; - }; - - fman0_tx_0x29: port@a9000 { - cell-index = <0x29>; - compatible = "fsl,fman-v3-port-tx"; - reg = <0xa9000 0x1000>; - fsl,fman-10g-port; - fsl,fman-best-effort-port; - }; - - ethernet@e2000 { - cell-index = <1>; - compatible = "fsl,fman-memac"; - reg = <0xe2000 0x1000>; - fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>; - ptp-timer = <&ptp_timer0>; - pcsphy-handle = <&pcsphy1>, <&qsgmiia_pcs1>; - pcs-handle-names = "sgmii", "qsgmii"; - }; - - mdio@e1000 { - qsgmiia_pcs1: ethernet-pcs@1 { - compatible = "fsl,lynx-pcs"; - reg = <1>; - }; - }; - - mdio@e3000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; - reg = <0xe3000 0x1000>; - fsl,erratum-a011043; /* must ignore read errors */ - - pcsphy1: ethernet-phy@0 { - reg = <0x0>; - }; - }; -}; diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts index 2638555afcc45..41f917f97dab8 100644 --- a/arch/powerpc/boot/dts/mpc8308_p1m.dts +++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts @@ -37,7 +37,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB at 0 }; diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts index af2ed8380a867..39ed26fba4109 100644 --- a/arch/powerpc/boot/dts/mpc8308rdb.dts +++ b/arch/powerpc/boot/dts/mpc8308rdb.dts @@ -38,7 +38,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB at 0 }; diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts index 09508b4c8c730..c9fe4dabc80a7 100644 --- a/arch/powerpc/boot/dts/mpc8313erdb.dts +++ b/arch/powerpc/boot/dts/mpc8313erdb.dts @@ -6,6 +6,7 @@ */ /dts-v1/; +#include / { model = "MPC8313ERDB"; @@ -38,7 +39,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB at 0 }; @@ -48,7 +49,7 @@ #size-cells = <1>; compatible = "fsl,mpc8313-elbc", "fsl,elbc", "simple-bus"; reg = <0xe0005000 0x1000>; - interrupts = <77 0x8>; + interrupts = <77 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; // CS0 and CS1 are swapped when @@ -118,7 +119,7 @@ cell-index = <0>; compatible = "fsl-i2c"; reg = <0x3000 0x100>; - interrupts = <14 0x8>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; dfsrr; rtc@68 { @@ -131,7 +132,7 @@ compatible = "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; reg = <0x30000 0x10000>; - interrupts = <11 0x8>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; fsl,num-channels = <1>; fsl,channel-fifo-len = <24>; @@ -146,7 +147,7 @@ cell-index = <1>; compatible = "fsl-i2c"; reg = <0x3100 0x100>; - interrupts = <15 0x8>; + interrupts = <15 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; dfsrr; }; @@ -155,7 +156,7 @@ cell-index = <0>; compatible = "fsl,spi"; reg = <0x7000 0x1000>; - interrupts = <16 0x8>; + interrupts = <16 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; mode = "cpu"; }; @@ -167,7 +168,7 @@ #address-cells = <1>; #size-cells = <0>; interrupt-parent = <&ipic>; - interrupts = <38 0x8>; + interrupts = <38 IRQ_TYPE_LEVEL_LOW>; phy_type = "utmi_wide"; sleep = <&pmc 0x00300000>; }; @@ -175,7 +176,8 @@ ptp_clock@24E00 { compatible = "fsl,etsec-ptp"; reg = <0x24E00 0xB0>; - interrupts = <12 0x8 13 0x8>; + interrupts = <12 IRQ_TYPE_LEVEL_LOW>, + <13 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = < &ipic >; fsl,tclk-period = <10>; fsl,tmr-prsc = <100>; @@ -197,7 +199,9 @@ compatible = "gianfar"; reg = <0x24000 0x1000>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <37 0x8 36 0x8 35 0x8>; + interrupts = <37 IRQ_TYPE_LEVEL_LOW>, + <36 IRQ_TYPE_LEVEL_LOW>, + <35 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; tbi-handle = < &tbi0 >; /* Vitesse 7385 isn't on the MDIO bus */ @@ -211,7 +215,7 @@ reg = <0x520 0x20>; phy4: ethernet-phy@4 { interrupt-parent = <&ipic>; - interrupts = <20 0x8>; + interrupts = <20 IRQ_TYPE_LEVEL_LOW>; reg = <0x4>; }; tbi0: tbi-phy@11 { @@ -231,7 +235,9 @@ reg = <0x25000 0x1000>; ranges = <0x0 0x25000 0x1000>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <34 0x8 33 0x8 32 0x8>; + interrupts = <34 IRQ_TYPE_LEVEL_LOW>, + <33 IRQ_TYPE_LEVEL_LOW>, + <32 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; tbi-handle = < &tbi1 >; phy-handle = < &phy4 >; @@ -259,7 +265,7 @@ compatible = "fsl,ns16550", "ns16550"; reg = <0x4500 0x100>; clock-frequency = <0>; - interrupts = <9 0x8>; + interrupts = <9 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; @@ -269,15 +275,12 @@ compatible = "fsl,ns16550", "ns16550"; reg = <0x4600 0x100>; clock-frequency = <0>; - interrupts = <10 0x8>; + interrupts = <10 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; /* IPIC - * interrupts cell = - * sense values match linux IORESOURCE_IRQ_* defines: - * sense == 8: Level, low assertion - * sense == 2: Edge, high-to-low change + * interrupts cell = */ ipic: pic@700 { interrupt-controller; @@ -290,7 +293,7 @@ pmc: power@b00 { compatible = "fsl,mpc8313-pmc", "fsl,mpc8349-pmc"; reg = <0xb00 0x100 0xa00 0x100>; - interrupts = <80 8>; + interrupts = <80 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; fsl,mpc8313-wakeup-timer = <>m1>; @@ -306,14 +309,20 @@ gtm1: timer@500 { compatible = "fsl,mpc8313-gtm", "fsl,gtm"; reg = <0x500 0x100>; - interrupts = <90 8 78 8 84 8 72 8>; + interrupts = <90 IRQ_TYPE_LEVEL_LOW>, + <78 IRQ_TYPE_LEVEL_LOW>, + <84 IRQ_TYPE_LEVEL_LOW>, + <72 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; timer@600 { compatible = "fsl,mpc8313-gtm", "fsl,gtm"; reg = <0x600 0x100>; - interrupts = <91 8 79 8 85 8 73 8>; + interrupts = <91 IRQ_TYPE_LEVEL_LOW>, + <79 IRQ_TYPE_LEVEL_LOW>, + <85 IRQ_TYPE_LEVEL_LOW>, + <73 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; }; @@ -341,7 +350,7 @@ 0x7800 0x0 0x0 0x3 &ipic 17 0x8 0x7800 0x0 0x0 0x4 &ipic 18 0x8>; interrupt-parent = <&ipic>; - interrupts = <66 0x8>; + interrupts = <66 IRQ_TYPE_LEVEL_LOW>; bus-range = <0x0 0x0>; ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000 @@ -363,14 +372,14 @@ reg = <0xe00082a8 4>; ranges = <0 0xe0008100 0x1a8>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; dma-channel@0 { compatible = "fsl,mpc8313-dma-channel", "fsl,elo-dma-channel"; reg = <0 0x28>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <0>; }; @@ -379,7 +388,7 @@ "fsl,elo-dma-channel"; reg = <0x80 0x28>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <1>; }; @@ -388,7 +397,7 @@ "fsl,elo-dma-channel"; reg = <0x100 0x28>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <2>; }; @@ -397,7 +406,7 @@ "fsl,elo-dma-channel"; reg = <0x180 0x28>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <3>; }; }; diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index a8f68d6e50b0d..7ba1159f88031 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts @@ -40,7 +40,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB at 0 }; @@ -50,7 +50,7 @@ #size-cells = <1>; compatible = "fsl,mpc8315-elbc", "fsl,elbc", "simple-bus"; reg = <0xe0005000 0x1000>; - interrupts = <77 0x8>; + interrupts = <77 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; // CS0 and CS1 are swapped when @@ -112,7 +112,7 @@ cell-index = <0>; compatible = "fsl-i2c"; reg = <0x3000 0x100>; - interrupts = <14 0x8>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; dfsrr; rtc@68 { @@ -133,8 +133,10 @@ cell-index = <0>; compatible = "fsl,spi"; reg = <0x7000 0x1000>; - interrupts = <16 0x8>; + interrupts = <16 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; + #address-cells = <1>; + #size-cells = <0>; mode = "cpu"; }; @@ -145,35 +147,35 @@ reg = <0x82a8 4>; ranges = <0 0x8100 0x1a8>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <0>; dma-channel@0 { compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel"; reg = <0 0x80>; cell-index = <0>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; }; dma-channel@80 { compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel"; reg = <0x80 0x80>; cell-index = <1>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; }; dma-channel@100 { compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel"; reg = <0x100 0x80>; cell-index = <2>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; }; dma-channel@180 { compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel"; reg = <0x180 0x28>; cell-index = <3>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; }; }; @@ -183,7 +185,7 @@ #address-cells = <1>; #size-cells = <0>; interrupt-parent = <&ipic>; - interrupts = <38 0x8>; + interrupts = <38 IRQ_TYPE_LEVEL_LOW>; phy_type = "utmi"; }; @@ -197,7 +199,9 @@ reg = <0x24000 0x1000>; ranges = <0x0 0x24000 0x1000>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <32 0x8 33 0x8 34 0x8>; + interrupts = <32 IRQ_TYPE_LEVEL_LOW>, + <33 IRQ_TYPE_LEVEL_LOW>, + <34 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; tbi-handle = <&tbi0>; phy-handle = < &phy0 >; @@ -238,7 +242,9 @@ reg = <0x25000 0x1000>; ranges = <0x0 0x25000 0x1000>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <35 0x8 36 0x8 37 0x8>; + interrupts = <35 IRQ_TYPE_LEVEL_LOW>, + <36 IRQ_TYPE_LEVEL_LOW>, + <37 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; tbi-handle = <&tbi1>; phy-handle = < &phy1 >; @@ -263,7 +269,7 @@ compatible = "fsl,ns16550", "ns16550"; reg = <0x4500 0x100>; clock-frequency = <133333333>; - interrupts = <9 0x8>; + interrupts = <9 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; @@ -273,7 +279,7 @@ compatible = "fsl,ns16550", "ns16550"; reg = <0x4600 0x100>; clock-frequency = <133333333>; - interrupts = <10 0x8>; + interrupts = <10 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; @@ -282,7 +288,7 @@ "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; reg = <0x30000 0x10000>; - interrupts = <11 0x8>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; fsl,num-channels = <4>; fsl,channel-fifo-len = <24>; @@ -294,7 +300,7 @@ compatible = "fsl,mpc8315-sata", "fsl,pq-sata"; reg = <0x18000 0x1000>; cell-index = <1>; - interrupts = <44 0x8>; + interrupts = <44 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; @@ -302,14 +308,17 @@ compatible = "fsl,mpc8315-sata", "fsl,pq-sata"; reg = <0x19000 0x1000>; cell-index = <2>; - interrupts = <45 0x8>; + interrupts = <45 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; gtm1: timer@500 { compatible = "fsl,mpc8315-gtm", "fsl,gtm"; reg = <0x500 0x100>; - interrupts = <90 8 78 8 84 8 72 8>; + interrupts = <90 IRQ_TYPE_LEVEL_LOW>, + <78 IRQ_TYPE_LEVEL_LOW>, + <84 IRQ_TYPE_LEVEL_LOW>, + <72 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; clock-frequency = <133333333>; }; @@ -317,16 +326,16 @@ timer@600 { compatible = "fsl,mpc8315-gtm", "fsl,gtm"; reg = <0x600 0x100>; - interrupts = <91 8 79 8 85 8 73 8>; + interrupts = <91 IRQ_TYPE_LEVEL_LOW>, + <79 IRQ_TYPE_LEVEL_LOW>, + <85 IRQ_TYPE_LEVEL_LOW>, + <73 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; clock-frequency = <133333333>; }; /* IPIC - * interrupts cell = - * sense values match linux IORESOURCE_IRQ_* defines: - * sense == 8: Level, low assertion - * sense == 2: Edge, high-to-low change + * interrupts cell = */ ipic: interrupt-controller@700 { interrupt-controller; @@ -340,14 +349,14 @@ compatible = "fsl,ipic-msi"; reg = <0x7c0 0x40>; msi-available-ranges = <0 0x100>; - interrupts = <0x43 0x8 - 0x4 0x8 - 0x51 0x8 - 0x52 0x8 - 0x56 0x8 - 0x57 0x8 - 0x58 0x8 - 0x59 0x8>; + interrupts = <0x43 IRQ_TYPE_LEVEL_LOW + 0x4 IRQ_TYPE_LEVEL_LOW + 0x51 IRQ_TYPE_LEVEL_LOW + 0x52 IRQ_TYPE_LEVEL_LOW + 0x56 IRQ_TYPE_LEVEL_LOW + 0x57 IRQ_TYPE_LEVEL_LOW + 0x58 IRQ_TYPE_LEVEL_LOW + 0x59 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = < &ipic >; }; @@ -355,7 +364,7 @@ compatible = "fsl,mpc8315-pmc", "fsl,mpc8313-pmc", "fsl,mpc8349-pmc"; reg = <0xb00 0x100 0xa00 0x100>; - interrupts = <80 8>; + interrupts = <80 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; fsl,mpc8313-wakeup-timer = <>m1>; }; @@ -374,24 +383,24 @@ interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0E -mini PCI */ - 0x7000 0x0 0x0 0x1 &ipic 18 0x8 - 0x7000 0x0 0x0 0x2 &ipic 18 0x8 - 0x7000 0x0 0x0 0x3 &ipic 18 0x8 - 0x7000 0x0 0x0 0x4 &ipic 18 0x8 + 0x7000 0x0 0x0 0x1 &ipic 18 IRQ_TYPE_LEVEL_LOW + 0x7000 0x0 0x0 0x2 &ipic 18 IRQ_TYPE_LEVEL_LOW + 0x7000 0x0 0x0 0x3 &ipic 18 IRQ_TYPE_LEVEL_LOW + 0x7000 0x0 0x0 0x4 &ipic 18 IRQ_TYPE_LEVEL_LOW /* IDSEL 0x0F -mini PCI */ - 0x7800 0x0 0x0 0x1 &ipic 17 0x8 - 0x7800 0x0 0x0 0x2 &ipic 17 0x8 - 0x7800 0x0 0x0 0x3 &ipic 17 0x8 - 0x7800 0x0 0x0 0x4 &ipic 17 0x8 + 0x7800 0x0 0x0 0x1 &ipic 17 IRQ_TYPE_LEVEL_LOW + 0x7800 0x0 0x0 0x2 &ipic 17 IRQ_TYPE_LEVEL_LOW + 0x7800 0x0 0x0 0x3 &ipic 17 IRQ_TYPE_LEVEL_LOW + 0x7800 0x0 0x0 0x4 &ipic 17 IRQ_TYPE_LEVEL_LOW /* IDSEL 0x10 - PCI slot */ - 0x8000 0x0 0x0 0x1 &ipic 48 0x8 - 0x8000 0x0 0x0 0x2 &ipic 17 0x8 - 0x8000 0x0 0x0 0x3 &ipic 48 0x8 - 0x8000 0x0 0x0 0x4 &ipic 17 0x8>; + 0x8000 0x0 0x0 0x1 &ipic 48 IRQ_TYPE_LEVEL_LOW + 0x8000 0x0 0x0 0x2 &ipic 17 IRQ_TYPE_LEVEL_LOW + 0x8000 0x0 0x0 0x3 &ipic 48 IRQ_TYPE_LEVEL_LOW + 0x8000 0x0 0x0 0x4 &ipic 17 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; - interrupts = <66 0x8>; + interrupts = <66 IRQ_TYPE_LEVEL_LOW>; bus-range = <0x0 0x0>; ranges = <0x02000000 0 0x90000000 0x90000000 0 0x10000000 0x42000000 0 0x80000000 0x80000000 0 0x10000000 @@ -417,10 +426,10 @@ 0x01000000 0 0x00000000 0xb1000000 0 0x00800000>; bus-range = <0 255>; interrupt-map-mask = <0xf800 0 0 7>; - interrupt-map = <0 0 0 1 &ipic 1 8 - 0 0 0 2 &ipic 1 8 - 0 0 0 3 &ipic 1 8 - 0 0 0 4 &ipic 1 8>; + interrupt-map = <0 0 0 1 &ipic 1 IRQ_TYPE_LEVEL_LOW + 0 0 0 2 &ipic 1 IRQ_TYPE_LEVEL_LOW + 0 0 0 3 &ipic 1 IRQ_TYPE_LEVEL_LOW + 0 0 0 4 &ipic 1 IRQ_TYPE_LEVEL_LOW>; clock-frequency = <0>; pcie@0 { @@ -448,10 +457,10 @@ 0x01000000 0 0x00000000 0xd1000000 0 0x00800000>; bus-range = <0 255>; interrupt-map-mask = <0xf800 0 0 7>; - interrupt-map = <0 0 0 1 &ipic 2 8 - 0 0 0 2 &ipic 2 8 - 0 0 0 3 &ipic 2 8 - 0 0 0 4 &ipic 2 8>; + interrupt-map = <0 0 0 1 &ipic 2 IRQ_TYPE_LEVEL_LOW + 0 0 0 2 &ipic 2 IRQ_TYPE_LEVEL_LOW + 0 0 0 3 &ipic 2 IRQ_TYPE_LEVEL_LOW + 0 0 0 4 &ipic 2 IRQ_TYPE_LEVEL_LOW>; clock-frequency = <0>; pcie@0 { @@ -471,12 +480,12 @@ leds { compatible = "gpio-leds"; - pwr { + led-pwr { gpios = <&mcu_pio 0 0>; default-state = "on"; }; - hdd { + led-hdd { gpios = <&mcu_pio 1 0>; linux,default-trigger = "disk-activity"; }; diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts index ba7caaf98fd58..06f134490d957 100644 --- a/arch/powerpc/boot/dts/mpc832x_rdb.dts +++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts @@ -38,7 +38,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; }; diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts index 13f17232ba83d..12d33cb55b72a 100644 --- a/arch/powerpc/boot/dts/mpc8349emitx.dts +++ b/arch/powerpc/boot/dts/mpc8349emitx.dts @@ -39,7 +39,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; }; diff --git a/arch/powerpc/boot/dts/mpc8349emitxgp.dts b/arch/powerpc/boot/dts/mpc8349emitxgp.dts index eae0afd5abbc3..2998a233a790b 100644 --- a/arch/powerpc/boot/dts/mpc8349emitxgp.dts +++ b/arch/powerpc/boot/dts/mpc8349emitxgp.dts @@ -37,7 +37,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; }; diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts index f137ccb8cfded..fb311a7eb9f2c 100644 --- a/arch/powerpc/boot/dts/mpc8377_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts @@ -39,7 +39,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; // 256MB at 0 }; diff --git a/arch/powerpc/boot/dts/mpc8377_wlan.dts b/arch/powerpc/boot/dts/mpc8377_wlan.dts index ce254dd74dd06..f736a15cceffa 100644 --- a/arch/powerpc/boot/dts/mpc8377_wlan.dts +++ b/arch/powerpc/boot/dts/mpc8377_wlan.dts @@ -40,7 +40,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x20000000>; // 512MB at 0 }; diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts index 19e5473d4161b..32c49622b4040 100644 --- a/arch/powerpc/boot/dts/mpc8378_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts @@ -39,7 +39,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; // 256MB at 0 }; diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts index 61519acca2280..07deb89c5a9bd 100644 --- a/arch/powerpc/boot/dts/mpc8379_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts @@ -37,7 +37,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; // 256MB at 0 }; diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 2d71e4b7cd09c..496ecc65ac255 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -120,10 +120,8 @@ #if defined(CONFIG_44x) #include -#elif defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT) -#include #elif defined(CONFIG_PPC_85xx) -#include +#include #elif defined(CONFIG_PPC_8xx) #include #endif diff --git a/arch/powerpc/include/asm/nohash/32/pte-85xx.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h deleted file mode 100644 index 14d64b4f3f14a..0000000000000 --- a/arch/powerpc/include/asm/nohash/32/pte-85xx.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_32_PTE_85xx_H -#define _ASM_POWERPC_NOHASH_32_PTE_85xx_H -#ifdef __KERNEL__ - -/* PTE bit definitions for Freescale BookE SW loaded TLB MMU based - * processors - * - MMU Assist Register 3: - - 32 33 34 35 36 ... 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - RPN...................... 0 0 U0 U1 U2 U3 UX SX UW SW UR SR - - - PRESENT *must* be in the bottom two bits because swap PTEs use - the top 30 bits. - -*/ - -/* Definitions for FSL Book-E Cores */ -#define _PAGE_READ 0x00001 /* H: Read permission (SR) */ -#define _PAGE_PRESENT 0x00002 /* S: PTE contains a translation */ -#define _PAGE_WRITE 0x00004 /* S: Write permission (SW) */ -#define _PAGE_DIRTY 0x00008 /* S: Page dirty */ -#define _PAGE_EXEC 0x00010 /* H: SX permission */ -#define _PAGE_ACCESSED 0x00020 /* S: Page referenced */ - -#define _PAGE_ENDIAN 0x00040 /* H: E bit */ -#define _PAGE_GUARDED 0x00080 /* H: G bit */ -#define _PAGE_COHERENT 0x00100 /* H: M bit */ -#define _PAGE_NO_CACHE 0x00200 /* H: I bit */ -#define _PAGE_WRITETHRU 0x00400 /* H: W bit */ -#define _PAGE_SPECIAL 0x00800 /* S: Special page */ - -#define _PMD_PRESENT 0 -#define _PMD_PRESENT_MASK (PAGE_MASK) -#define _PMD_BAD (~PAGE_MASK) -#define _PMD_USER 0 - -#define _PTE_NONE_MASK 0 - -#define PTE_WIMGE_SHIFT (6) - -/* - * We define 2 sets of base prot bits, one for basic pages (ie, - * cacheable kernel and user pages) and one for non cacheable - * pages. We always set _PAGE_COHERENT when SMP is enabled or - * the processor might need it for DMA coherency. - */ -#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) -#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) -#else -#define _PAGE_BASE (_PAGE_BASE_NC) -#endif - -#include - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_85xx_H */ diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index f3086e39e7d2c..e8dec885b6e47 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -49,7 +49,7 @@ static inline unsigned long pud_val(pud_t x) #endif /* CONFIG_PPC64 */ /* PGD level */ -#if defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT) +#if defined(CONFIG_PPC_85xx) typedef struct { unsigned long long pgd; } pgd_t; static inline unsigned long long pgd_val(pgd_t x) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index ba1d878c3f404..17e63244e8855 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -15,6 +15,9 @@ #define TASK_SIZE_MAX TASK_SIZE_USER64 #endif +/* Threshold above which VMX copy path is used */ +#define VMX_COPY_THRESHOLD 3328 + #include /* @@ -255,7 +258,7 @@ __gus_failed: \ ".section .fixup,\"ax\"\n" \ "4: li %0,%3\n" \ " li %1,0\n" \ - " li %1+1,0\n" \ + " li %L1,0\n" \ " b 3b\n" \ ".previous\n" \ EX_TABLE(1b, 4b) \ @@ -326,40 +329,62 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); -#ifdef __powerpc64__ -static inline unsigned long -raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) +unsigned long __copy_tofrom_user_base(void __user *to, + const void __user *from, unsigned long size); + +unsigned long __copy_tofrom_user_power7_vmx(void __user *to, + const void __user *from, unsigned long size); + +static __always_inline bool will_use_vmx(unsigned long n) +{ + return IS_ENABLED(CONFIG_ALTIVEC) && cpu_has_feature(CPU_FTR_VMX_COPY) && + n > VMX_COPY_THRESHOLD; +} + +static __always_inline unsigned long +raw_copy_tofrom_user(void __user *to, const void __user *from, + unsigned long n, unsigned long dir) { unsigned long ret; - barrier_nospec(); - allow_user_access(to, KUAP_READ_WRITE); + if (will_use_vmx(n) && enter_vmx_usercopy()) { + allow_user_access(to, dir); + ret = __copy_tofrom_user_power7_vmx(to, from, n); + prevent_user_access(dir); + exit_vmx_usercopy(); + + if (unlikely(ret)) { + allow_user_access(to, dir); + ret = __copy_tofrom_user_base(to, from, n); + prevent_user_access(dir); + } + return ret; + } + + allow_user_access(to, dir); ret = __copy_tofrom_user(to, from, n); - prevent_user_access(KUAP_READ_WRITE); + prevent_user_access(dir); return ret; } -#endif /* __powerpc64__ */ -static inline unsigned long raw_copy_from_user(void *to, - const void __user *from, unsigned long n) +#ifdef CONFIG_PPC64 +static inline unsigned long +raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { - unsigned long ret; + barrier_nospec(); + return raw_copy_tofrom_user(to, from, n, KUAP_READ_WRITE); +} +#endif /* CONFIG_PPC64 */ - allow_user_access(NULL, KUAP_READ); - ret = __copy_tofrom_user((__force void __user *)to, from, n); - prevent_user_access(KUAP_READ); - return ret; +static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return raw_copy_tofrom_user((__force void __user *)to, from, n, KUAP_READ); } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - unsigned long ret; - - allow_user_access(to, KUAP_WRITE); - ret = __copy_tofrom_user(to, (__force const void __user *)from, n); - prevent_user_access(KUAP_WRITE); - return ret; + return raw_copy_tofrom_user(to, (__force const void __user *)from, n, KUAP_WRITE); } unsigned long __arch_clear_user(void __user *addr, unsigned long size); diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index f9a73fae64641..8867596d35adc 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -305,7 +305,6 @@ set_ivor: * r12 is pointer to the pte * r10 is the pshift from the PGD, if we're a hugepage */ -#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_HUGETLB_PAGE #define FIND_PTE \ rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ @@ -329,15 +328,6 @@ set_ivor: rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ lwz r11, 4(r12); /* Get pte entry */ #endif /* HUGEPAGE */ -#else /* !PTE_64BIT */ -#define FIND_PTE \ - rlwimi r11, r13, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ - lwz r11, 0(r11); /* Get L1 entry */ \ - rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \ - beq 2f; /* Bail if no table */ \ - rlwimi r12, r13, 22, 20, 29; /* Compute PTE address */ \ - lwz r11, 0(r12); /* Get Linux PTE */ -#endif /* * Interrupt vector entry code @@ -473,21 +463,15 @@ END_BTB_FLUSH_SECTION 4: FIND_PTE -#ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT|_PAGE_BAP_SR oris r13,r13,_PAGE_ACCESSED@h -#else - li r13,_PAGE_PRESENT|_PAGE_READ|_PAGE_ACCESSED -#endif andc. r13,r13,r11 /* Check permission */ -#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_SMP subf r13,r11,r12 /* create false data dep */ lwzx r13,r11,r13 /* Get upper pte bits */ #else lwz r13,0(r12) /* Get upper pte bits */ -#endif #endif bne 2f /* Bail if permission/valid mismatch */ @@ -552,12 +536,8 @@ END_BTB_FLUSH_SECTION FIND_PTE /* Make up the required permissions for kernel code */ -#ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_SX oris r13,r13,_PAGE_ACCESSED@h -#else - li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#endif b 4f /* Get the PGD for the current thread */ @@ -573,23 +553,17 @@ END_BTB_FLUSH_SECTION FIND_PTE /* Make up the required permissions for user code */ -#ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_UX oris r13,r13,_PAGE_ACCESSED@h -#else - li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#endif 4: andc. r13,r13,r11 /* Check permission */ -#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_SMP subf r13,r11,r12 /* create false data dep */ lwzx r13,r11,r13 /* Get upper pte bits */ #else lwz r13,0(r12) /* Get upper pte bits */ -#endif #endif bne 2f /* Bail if permission mismatch */ @@ -683,7 +657,7 @@ interrupt_end: * r10 - tsize encoding (if HUGETLB_PAGE) or available to use * r11 - TLB (info from Linux PTE) * r12 - available to use - * r13 - upper bits of PTE (if PTE_64BIT) or available to use + * r13 - upper bits of PTE * CR5 - results of addr >= PAGE_OFFSET * MAS0, MAS1 - loaded with proper value when we get here * MAS2, MAS3 - will need additional info from Linux PTE @@ -751,7 +725,6 @@ finish_tlb_load: * here we (properly should) assume have the appropriate value. */ finish_tlb_load_cont: -#ifdef CONFIG_PTE_64BIT rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */ andi. r10, r11, _PAGE_DIRTY bne 1f @@ -764,26 +737,9 @@ BEGIN_MMU_FTR_SECTION srwi r10, r13, 12 /* grab RPN[12:31] */ mtspr SPRN_MAS7, r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) -#else - li r10, (_PAGE_EXEC | _PAGE_READ) - mr r13, r11 - rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ - and r12, r11, r10 - mcrf cr0, cr5 /* Test for user page */ - slwi r10, r12, 1 - or r10, r10, r12 - rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ - isellt r12, r10, r12 - rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ - mtspr SPRN_MAS3, r13 -#endif mfspr r12, SPRN_MAS2 -#ifdef CONFIG_PTE_64BIT rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ -#else - rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ -#endif #ifdef CONFIG_HUGETLB_PAGE beq 6, 3f /* don't mask if page isn't huge */ li r13, 1 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0ce71310b7d91..d122e8447831c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1159,7 +1159,7 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, struct device *dev, struct iommu_domain *old) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain = iommu_driver_get_domain_for_dev(dev); struct iommu_table_group *table_group; struct iommu_group *grp; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 827c958677f8c..f26e80cbc6156 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2893,7 +2893,8 @@ static void __init fixup_device_tree_pmac(void) for (node = 0; prom_next_node(&node); ) { type[0] = '\0'; prom_getprop(node, "device_type", type, sizeof(type)); - if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s")) + if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s") && + prom_strcmp(type, "media-bay")) continue; if (prom_getproplen(node, "#size-cells") != PROM_ERROR) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index cb5b73adc2506..b1761909c23fe 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -995,15 +994,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(); - /* - * Reserve large chunks of memory for use by CMA for kdump, fadump, KVM and - * hugetlb. These must be called after initmem_init(), so that - * pageblock_order is initialised. - */ - fadump_cma_init(); - kdump_cma_reserve(); - kvm_cma_reserve(); - early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); if (ppc_md.setup_arch) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 841d077e28251..1b2f293e7dcb5 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -37,11 +37,29 @@ unsigned long ftrace_call_adjust(unsigned long addr) if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) return 0; - if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && - !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { - addr += MCOUNT_INSN_SIZE; - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { + if (!IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { addr += MCOUNT_INSN_SIZE; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + addr += MCOUNT_INSN_SIZE; + } else if (IS_ENABLED(CONFIG_CC_IS_CLANG) && IS_ENABLED(CONFIG_PPC64)) { + /* + * addr points to global entry point though the NOP was emitted at local + * entry point due to https://github.com/llvm/llvm-project/issues/163706 + * Handle that here with ppc_function_entry() for kernel symbols while + * adjusting module addresses in the else case, by looking for the below + * module global entry point sequence: + * ld r2, -8(r12) + * add r2, r2, r12 + */ + if (is_kernel_text(addr) || is_kernel_inittext(addr)) + addr = ppc_function_entry((void *)addr); + else if ((ppc_inst_val(ppc_inst_read((u32 *)addr)) == + PPC_RAW_LD(_R2, _R12, -8)) && + (ppc_inst_val(ppc_inst_read((u32 *)(addr+4))) == + PPC_RAW_ADD(_R2, _R2, _R12))) + addr += 8; + } } return addr; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 104c05520bf05..dc44f11be353e 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -23,6 +23,7 @@ #include #define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG) +#define __be_word __PASTE(__be, BITS_PER_LONG) #ifdef CONFIG_CRASH_DUMP void machine_crash_shutdown(struct pt_regs *regs) @@ -146,25 +147,25 @@ int __init overlaps_crashkernel(unsigned long start, unsigned long size) } /* Values we need to export to the second kernel via the device tree. */ -static phys_addr_t crashk_base; -static phys_addr_t crashk_size; -static unsigned long long mem_limit; +static __be_word crashk_base; +static __be_word crashk_size; +static __be_word mem_limit; static struct property crashk_base_prop = { .name = "linux,crashkernel-base", - .length = sizeof(phys_addr_t), + .length = sizeof(__be_word), .value = &crashk_base }; static struct property crashk_size_prop = { .name = "linux,crashkernel-size", - .length = sizeof(phys_addr_t), + .length = sizeof(__be_word), .value = &crashk_size, }; static struct property memory_limit_prop = { .name = "linux,memory-limit", - .length = sizeof(unsigned long long), + .length = sizeof(__be_word), .value = &mem_limit, }; @@ -193,11 +194,11 @@ static void __init export_crashk_values(struct device_node *node) } #endif /* CONFIG_CRASH_RESERVE */ -static phys_addr_t kernel_end; +static __be_word kernel_end; static struct property kernel_end_prop = { .name = "linux,kernel-end", - .length = sizeof(phys_addr_t), + .length = sizeof(__be_word), .value = &kernel_end, }; diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index e7ef8b2a25546..5f6d50e4c3d45 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -450,6 +450,11 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) kbuf->buffer = headers; kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; kbuf->bufsz = headers_sz; + + /* + * Account for extra space required to accommodate additional memory + * ranges in elfcorehdr due to memory hotplug events. + */ kbuf->memsz = headers_sz + kdump_extra_elfcorehdr_size(cmem); kbuf->top_down = false; @@ -460,7 +465,14 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) } image->elf_load_addr = kbuf->mem; - image->elf_headers_sz = headers_sz; + + /* + * If CONFIG_CRASH_HOTPLUG is enabled, the elfcorehdr kexec segment + * memsz can be larger than bufsz. Always initialize elf_headers_sz + * with memsz. This ensures the correct size is reserved for elfcorehdr + * memory in the FDT prepared for kdump. + */ + image->elf_headers_sz = kbuf->memsz; image->elf_headers = headers; out: kfree(cmem); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d79c5d1098c05..2efbe05caed76 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -38,7 +38,7 @@ /* #define EXIT_DEBUG */ -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) @@ -53,7 +53,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, sum_exits), STATS_DESC_COUNTER(VCPU, mmio_exits), diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3401b96be475e..f3ddb24ece749 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -36,7 +36,7 @@ unsigned long kvmppc_booke_handlers; -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) @@ -51,7 +51,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, sum_exits), STATS_DESC_COUNTER(VCPU, mmio_exits), diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index f9acf866c709e..e4469ad73a2e9 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -39,15 +39,11 @@ enum vcpu_ftr { /* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */ #define E500_TLB_MAS2_ATTR (0x7f) -struct tlbe_ref { +struct tlbe_priv { kvm_pfn_t pfn; /* valid only for TLB0, except briefly */ unsigned int flags; /* E500_TLB_* */ }; -struct tlbe_priv { - struct tlbe_ref ref; -}; - #ifdef CONFIG_KVM_E500V2 struct vcpu_id_table; #endif diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 48580c85f23b0..75ed1496ead52 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -920,12 +920,12 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) vcpu_e500->gtlb_offset[0] = 0; vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; - vcpu_e500->gtlb_priv[0] = kzalloc_objs(struct tlbe_ref, + vcpu_e500->gtlb_priv[0] = kzalloc_objs(struct tlbe_priv, vcpu_e500->gtlb_params[0].entries); if (!vcpu_e500->gtlb_priv[0]) goto free_vcpu; - vcpu_e500->gtlb_priv[1] = kzalloc_objs(struct tlbe_ref, + vcpu_e500->gtlb_priv[1] = kzalloc_objs(struct tlbe_priv, vcpu_e500->gtlb_params[1].entries); if (!vcpu_e500->gtlb_priv[1]) goto free_vcpu; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 06caf8bbbe2b7..37e0d3d9e244d 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -189,16 +189,16 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, { struct kvm_book3e_206_tlb_entry *gtlbe = get_entry(vcpu_e500, tlbsel, esel); - struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; + struct tlbe_priv *tlbe = &vcpu_e500->gtlb_priv[tlbsel][esel]; /* Don't bother with unmapped entries */ - if (!(ref->flags & E500_TLB_VALID)) { - WARN(ref->flags & (E500_TLB_BITMAP | E500_TLB_TLB0), - "%s: flags %x\n", __func__, ref->flags); + if (!(tlbe->flags & E500_TLB_VALID)) { + WARN(tlbe->flags & (E500_TLB_BITMAP | E500_TLB_TLB0), + "%s: flags %x\n", __func__, tlbe->flags); WARN_ON(tlbsel == 1 && vcpu_e500->g2h_tlb1_map[esel]); } - if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { + if (tlbsel == 1 && tlbe->flags & E500_TLB_BITMAP) { u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; int hw_tlb_indx; unsigned long flags; @@ -216,28 +216,28 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, } mb(); vcpu_e500->g2h_tlb1_map[esel] = 0; - ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); + tlbe->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); local_irq_restore(flags); } - if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) { + if (tlbsel == 1 && tlbe->flags & E500_TLB_TLB0) { /* * TLB1 entry is backed by 4k pages. This should happen * rarely and is not worth optimizing. Invalidate everything. */ kvmppc_e500_tlbil_all(vcpu_e500); - ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID); + tlbe->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID); } /* * If TLB entry is still valid then it's a TLB0 entry, and thus * backed by at most one host tlbe per shadow pid */ - if (ref->flags & E500_TLB_VALID) + if (tlbe->flags & E500_TLB_VALID) kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); /* Mark the TLB as not backed by the host anymore */ - ref->flags = 0; + tlbe->flags = 0; } static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) @@ -245,26 +245,26 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); } -static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, - struct kvm_book3e_206_tlb_entry *gtlbe, - kvm_pfn_t pfn, unsigned int wimg, - bool writable) +static inline void kvmppc_e500_tlbe_setup(struct tlbe_priv *tlbe, + struct kvm_book3e_206_tlb_entry *gtlbe, + kvm_pfn_t pfn, unsigned int wimg, + bool writable) { - ref->pfn = pfn; - ref->flags = E500_TLB_VALID; + tlbe->pfn = pfn; + tlbe->flags = E500_TLB_VALID; if (writable) - ref->flags |= E500_TLB_WRITABLE; + tlbe->flags |= E500_TLB_WRITABLE; /* Use guest supplied MAS2_G and MAS2_E */ - ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; + tlbe->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; } -static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) +static inline void kvmppc_e500_tlbe_release(struct tlbe_priv *tlbe) { - if (ref->flags & E500_TLB_VALID) { + if (tlbe->flags & E500_TLB_VALID) { /* FIXME: don't log bogus pfn for TLB1 */ - trace_kvm_booke206_ref_release(ref->pfn, ref->flags); - ref->flags = 0; + trace_kvm_booke206_ref_release(tlbe->pfn, tlbe->flags); + tlbe->flags = 0; } } @@ -284,11 +284,8 @@ static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) int i; for (tlbsel = 0; tlbsel <= 1; tlbsel++) { - for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->gtlb_priv[tlbsel][i].ref; - kvmppc_e500_ref_release(ref); - } + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) + kvmppc_e500_tlbe_release(&vcpu_e500->gtlb_priv[tlbsel][i]); } } @@ -304,18 +301,18 @@ void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) static void kvmppc_e500_setup_stlbe( struct kvm_vcpu *vcpu, struct kvm_book3e_206_tlb_entry *gtlbe, - int tsize, struct tlbe_ref *ref, u64 gvaddr, + int tsize, struct tlbe_priv *tlbe, u64 gvaddr, struct kvm_book3e_206_tlb_entry *stlbe) { - kvm_pfn_t pfn = ref->pfn; + kvm_pfn_t pfn = tlbe->pfn; u32 pr = vcpu->arch.shared->msr & MSR_PR; - bool writable = !!(ref->flags & E500_TLB_WRITABLE); + bool writable = !!(tlbe->flags & E500_TLB_WRITABLE); - BUG_ON(!(ref->flags & E500_TLB_VALID)); + BUG_ON(!(tlbe->flags & E500_TLB_VALID)); /* Force IPROT=0 for all guest mappings. */ stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; - stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); + stlbe->mas2 = (gvaddr & MAS2_EPN) | (tlbe->flags & E500_TLB_MAS2_ATTR); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | e500_shadow_mas3_attrib(gtlbe->mas7_3, writable, pr); } @@ -323,7 +320,7 @@ static void kvmppc_e500_setup_stlbe( static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, - struct tlbe_ref *ref) + struct tlbe_priv *tlbe) { struct kvm_memory_slot *slot; unsigned int psize; @@ -455,9 +452,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } } - kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable); + kvmppc_e500_tlbe_setup(tlbe, gtlbe, pfn, wimg, writable); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, - ref, gvaddr, stlbe); + tlbe, gvaddr, stlbe); writable = tlbe_is_writable(stlbe); /* Clear i-cache for new pages */ @@ -474,17 +471,17 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel, struct kvm_book3e_206_tlb_entry *stlbe) { struct kvm_book3e_206_tlb_entry *gtlbe; - struct tlbe_ref *ref; + struct tlbe_priv *tlbe; int stlbsel = 0; int sesel = 0; int r; gtlbe = get_entry(vcpu_e500, 0, esel); - ref = &vcpu_e500->gtlb_priv[0][esel].ref; + tlbe = &vcpu_e500->gtlb_priv[0][esel]; r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), get_tlb_raddr(gtlbe) >> PAGE_SHIFT, - gtlbe, 0, stlbe, ref); + gtlbe, 0, stlbe, tlbe); if (r) return r; @@ -494,7 +491,7 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel, } static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, - struct tlbe_ref *ref, + struct tlbe_priv *tlbe, int esel) { unsigned int sesel = vcpu_e500->host_tlb1_nv++; @@ -507,10 +504,10 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + vcpu_e500->gtlb_priv[1][esel].flags |= E500_TLB_BITMAP; vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; - WARN_ON(!(ref->flags & E500_TLB_VALID)); + WARN_ON(!(tlbe->flags & E500_TLB_VALID)); return sesel; } @@ -522,24 +519,24 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { - struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[1][esel].ref; + struct tlbe_priv *tlbe = &vcpu_e500->gtlb_priv[1][esel]; int sesel; int r; r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, - ref); + tlbe); if (r) return r; /* Use TLB0 when we can only map a page with 4k */ if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) { - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0; + vcpu_e500->gtlb_priv[1][esel].flags |= E500_TLB_TLB0; write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0); return 0; } /* Otherwise map into TLB1 */ - sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, ref, esel); + sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, tlbe, esel); write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); return 0; @@ -561,11 +558,11 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; /* Triggers after clear_tlb_privs or on initial mapping */ - if (!(priv->ref.flags & E500_TLB_VALID)) { + if (!(priv->flags & E500_TLB_VALID)) { kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); } else { kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, - &priv->ref, eaddr, &stlbe); + priv, eaddr, &stlbe); write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0); } break; diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 9af969d2cc0cb..25a99108caff4 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -562,3 +562,4 @@ exc; std r10,32(3) li r5,4096 b .Ldst_aligned EXPORT_SYMBOL(__copy_tofrom_user) +EXPORT_SYMBOL(__copy_tofrom_user_base) diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 8474c682a1784..17dbcfbae25f0 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -5,13 +5,9 @@ * * Author: Anton Blanchard */ +#include #include -#ifndef SELFTEST_CASE -/* 0 == don't use VMX, 1 == use VMX */ -#define SELFTEST_CASE 0 -#endif - #ifdef __BIG_ENDIAN__ #define LVS(VRT,RA,RB) lvsl VRT,RA,RB #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC @@ -47,10 +43,14 @@ ld r15,STK_REG(R15)(r1) ld r14,STK_REG(R14)(r1) .Ldo_err3: - bl CFUNC(exit_vmx_usercopy) + ld r6,STK_REG(R31)(r1) /* original destination pointer */ + ld r5,STK_REG(R29)(r1) /* original number of bytes */ + subf r7,r6,r3 /* #bytes copied */ + subf r3,r7,r5 /* #bytes not copied in r3 */ ld r0,STACKFRAMESIZE+16(r1) mtlr r0 - b .Lexit + addi r1,r1,STACKFRAMESIZE + blr #endif /* CONFIG_ALTIVEC */ .Ldo_err2: @@ -74,7 +74,6 @@ _GLOBAL(__copy_tofrom_user_power7) cmpldi r5,16 - cmpldi cr1,r5,3328 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) @@ -82,12 +81,6 @@ _GLOBAL(__copy_tofrom_user_power7) blt .Lshort_copy -#ifdef CONFIG_ALTIVEC -test_feature = SELFTEST_CASE -BEGIN_FTR_SECTION - bgt cr1,.Lvmx_copy -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif .Lnonvmx_copy: /* Get the source 8B aligned */ @@ -263,23 +256,14 @@ err1; stb r0,0(r3) 15: li r3,0 blr -.Lunwind_stack_nonvmx_copy: - addi r1,r1,STACKFRAMESIZE - b .Lnonvmx_copy - -.Lvmx_copy: #ifdef CONFIG_ALTIVEC +_GLOBAL(__copy_tofrom_user_power7_vmx) mflr r0 std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl CFUNC(enter_vmx_usercopy) - cmpwi cr1,r3,0 - ld r0,STACKFRAMESIZE+16(r1) - ld r3,STK_REG(R31)(r1) - ld r4,STK_REG(R30)(r1) - ld r5,STK_REG(R29)(r1) - mtlr r0 + std r3,STK_REG(R31)(r1) + std r5,STK_REG(R29)(r1) /* * We prefetch both the source and destination using enhanced touch * instructions. We use a stream ID of 0 for the load side and @@ -300,8 +284,6 @@ err1; stb r0,0(r3) DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) - beq cr1,.Lunwind_stack_nonvmx_copy - /* * If source and destination are not relatively aligned we use a * slower permute loop. @@ -478,7 +460,8 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b CFUNC(exit_vmx_usercopy) /* tail call optimise */ + li r3,0 + blr .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -681,5 +664,7 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b CFUNC(exit_vmx_usercopy) /* tail call optimise */ + li r3,0 + blr +EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx) #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 54340912398fd..554b248002b4f 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -27,6 +27,7 @@ int enter_vmx_usercopy(void) return 1; } +EXPORT_SYMBOL(enter_vmx_usercopy); /* * This function must return 0 because we tail call optimise when calling @@ -49,6 +50,7 @@ int exit_vmx_usercopy(void) set_dec(1); return 0; } +EXPORT_SYMBOL(exit_vmx_usercopy); int enter_vmx_ops(void) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index a985fc96b9530..b7982d0243d48 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -30,6 +30,10 @@ #include #include +#include +#include +#include + #include unsigned long long memory_limit __initdata; @@ -268,6 +272,16 @@ void __init paging_init(void) void __init arch_mm_preinit(void) { + + /* + * Reserve large chunks of memory for use by CMA for kdump, fadump, KVM + * and hugetlb. These must be called after pageblock_order is + * initialised. + */ + fadump_cma_init(); + kdump_cma_reserve(); + kvm_cma_reserve(); + /* * book3s is limited to 16 page sizes due to encoding this in * a 4-bit field for slices. diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 82bbf63f0e575..7354e1d72f794 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -81,9 +81,6 @@ #ifdef CONFIG_PPC64 -/* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6 * 8) - /* If dummy pass (!image), account for maximum possible instructions */ #define PPC_LI64(d, i) do { \ if (!image) \ @@ -219,8 +216,6 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass, struct codegen_context *ctx, int insn_idx, int jmp_off, int dst_reg, u32 code); - -int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx); #endif #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 52162e4a7f84f..a62a9a92b7b57 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -450,7 +450,7 @@ bool bpf_jit_supports_subprog_tailcalls(void) bool bpf_jit_supports_kfunc_call(void) { - return true; + return IS_ENABLED(CONFIG_PPC64); } bool bpf_jit_supports_arena(void) @@ -638,19 +638,12 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context * for the traced function (BPF subprog/callee) to fetch it. */ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx, - int func_frame_offset, - int bpf_dummy_frame_size, int r4_off) + int bpf_frame_size, int r4_off) { if (IS_ENABLED(CONFIG_PPC64)) { - /* See Generated stack layout */ - int tailcallinfo_offset = BPF_PPC_TAILCALL; - - /* - * func_frame_offset = ...(1) - * bpf_dummy_frame_size + trampoline_frame_size - */ - EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset)); - EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset)); + EMIT(PPC_RAW_LD(_R4, _R1, bpf_frame_size)); + /* Refer to trampoline's Generated stack layout */ + EMIT(PPC_RAW_LD(_R3, _R4, -BPF_PPC_TAILCALL)); /* * Setting the tail_call_info in trampoline's frame @@ -658,22 +651,14 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_conte */ EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT)); PPC_BCC_CONST_SHORT(COND_GT, 8); - EMIT(PPC_RAW_ADDI(_R3, _R4, bpf_jit_stack_tailcallinfo_offset(ctx))); + EMIT(PPC_RAW_ADDI(_R3, _R4, -BPF_PPC_TAILCALL)); + /* - * From ...(1) above: - * trampoline_frame_bottom = ...(2) - * func_frame_offset - bpf_dummy_frame_size - * - * Using ...(2) derived above: - * trampoline_tail_call_info_offset = ...(3) - * trampoline_frame_bottom - tailcallinfo_offset - * - * From ...(3): - * Use trampoline_tail_call_info_offset to write reference of main's - * tail_call_info in trampoline frame. + * Trampoline's tail_call_info is at the same offset, as that of + * any bpf program, with reference to previous frame. Update the + * address of main's tail_call_info in trampoline frame. */ - EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size) - - tailcallinfo_offset)); + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size - BPF_PPC_TAILCALL)); } else { /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); @@ -681,7 +666,7 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_conte } static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, - int func_frame_offset, int r4_off) + int bpf_frame_size, int r4_off) { if (IS_ENABLED(CONFIG_PPC32)) { /* @@ -692,12 +677,12 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_cont } } -static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset, - int nr_regs, int regs_off) +static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, + int bpf_frame_size, int nr_regs, int regs_off) { int param_save_area_offset; - param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset = bpf_frame_size; param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ for (int i = 0; i < nr_regs; i++) { @@ -720,11 +705,11 @@ static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context /* Used when we call into the traced function. Replicate parameter save area */ static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx, - int func_frame_offset, int nr_regs, int regs_off) + int bpf_frame_size, int nr_regs, int regs_off) { int param_save_area_offset; - param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset = bpf_frame_size; param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ for (int i = 8; i < nr_regs; i++) { @@ -741,10 +726,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im void *func_addr) { int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0; - int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + int i, ret, nr_regs, retaddr_off, bpf_frame_size = 0; struct codegen_context codegen_ctx, *ctx; u32 *image = (u32 *)rw_image; ppc_inst_t branch_insn; @@ -770,24 +755,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * Generated stack layout: * * func prev back chain [ back chain ] - * [ ] - * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc) - * [ ] -- - * LR save area [ r0 save (64-bit) ] | header - * [ r0 save (32-bit) ] | - * dummy frame for unwind [ back chain 1 ] -- * [ tail_call_info ] optional - 64-bit powerpc * [ padding ] align stack frame * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc * alt_lr_off [ real lr (ool stub)] optional - actual lr + * retaddr_off [ return address ] * [ r26 ] * nvr_off [ r25 ] nvr save area * retval_off [ return value ] * [ reg argN ] * [ ... ] - * regs_off [ reg_arg1 ] prog ctx context - * nregs_off [ args count ] - * ip_off [ traced function ] + * regs_off [ reg_arg1 ] prog_ctx + * nregs_off [ args count ] ((u64 *)prog_ctx)[-1] + * ip_off [ traced function ] ((u64 *)prog_ctx)[-2] * [ ... ] * run_ctx_off [ bpf_tramp_run_ctx ] * [ reg argN ] @@ -843,6 +823,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im nvr_off = bpf_frame_size; bpf_frame_size += 2 * SZL; + /* Save area for return address */ + retaddr_off = bpf_frame_size; + bpf_frame_size += SZL; + /* Optional save area for actual LR in case of ool ftrace */ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { alt_lr_off = bpf_frame_size; @@ -869,16 +853,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* Padding to align stack frame, if any */ bpf_frame_size = round_up(bpf_frame_size, SZL * 2); - /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */ - bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64; - - /* Offset to the traced function's stack frame */ - func_frame_offset = bpf_dummy_frame_size + bpf_frame_size; - - /* Create dummy frame for unwind, store original return value */ + /* Store original return value */ EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF)); - /* Protect red zone where tail call count goes */ - EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size)); /* Create our stack frame */ EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size)); @@ -893,34 +869,44 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2) EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); - bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off); + bpf_trampoline_save_args(image, ctx, bpf_frame_size, nr_regs, regs_off); - /* Save our return address */ + /* Save our LR/return address */ EMIT(PPC_RAW_MFLR(_R3)); if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off)); else - EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off)); /* - * Save ip address of the traced function. - * We could recover this from LR, but we will need to address for OOL trampoline, - * and optional GEP area. + * Derive IP address of the traced function. + * In case of CONFIG_PPC_FTRACE_OUT_OF_LINE or BPF program, LR points to the instruction + * after the 'bl' instruction in the OOL stub. Refer to ftrace_init_ool_stub() and + * bpf_arch_text_poke() for OOL stub of kernel functions and bpf programs respectively. + * Relevant stub sequence: + * + * bl + * LR (R3) => mtlr r0 + * b + * + * Recover kernel function/bpf program address from the unconditional + * branch instruction at the end of OOL stub. */ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) || flags & BPF_TRAMP_F_IP_ARG) { EMIT(PPC_RAW_LWZ(_R4, _R3, 4)); EMIT(PPC_RAW_SLWI(_R4, _R4, 6)); EMIT(PPC_RAW_SRAWI(_R4, _R4, 6)); EMIT(PPC_RAW_ADD(_R3, _R3, _R4)); - EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); } if (flags & BPF_TRAMP_F_IP_ARG) EMIT(PPC_RAW_STL(_R3, _R1, ip_off)); - if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) - /* Fake our LR for unwind */ - EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + /* Fake our LR for BPF_TRAMP_F_CALL_ORIG case */ + EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); + EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off)); + } /* Save function arg count -- see bpf_get_func_arg_cnt() */ EMIT(PPC_RAW_LI(_R3, nr_regs)); @@ -958,20 +944,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* Call the traced function */ if (flags & BPF_TRAMP_F_CALL_ORIG) { /* - * The address in LR save area points to the correct point in the original function + * retaddr on trampoline stack points to the correct point in the original function * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction * sequence */ - EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LL(_R3, _R1, retaddr_off)); EMIT(PPC_RAW_MTCTR(_R3)); /* Replicate tail_call_cnt before calling the original BPF prog */ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) - bpf_trampoline_setup_tail_call_info(image, ctx, func_frame_offset, - bpf_dummy_frame_size, r4_off); + bpf_trampoline_setup_tail_call_info(image, ctx, bpf_frame_size, r4_off); /* Restore args */ - bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off); + bpf_trampoline_restore_args_stack(image, ctx, bpf_frame_size, nr_regs, regs_off); /* Restore TOC for 64-bit */ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) @@ -985,7 +970,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* Restore updated tail_call_cnt */ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) - bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); + bpf_trampoline_restore_tail_call_cnt(image, ctx, bpf_frame_size, r4_off); /* Reserve space to patch branch instruction to skip fexit progs */ if (ro_image) /* image is NULL for dummy pass */ @@ -1037,7 +1022,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im EMIT(PPC_RAW_LD(_R2, _R1, 24)); if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* Skip the traced function and return to parent */ - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); EMIT(PPC_RAW_MTLR(_R0)); EMIT(PPC_RAW_BLR()); @@ -1045,13 +1030,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off)); EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); EMIT(PPC_RAW_BLR()); } else { - EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LL(_R0, _R1, retaddr_off)); EMIT(PPC_RAW_MTCTR(_R0)); - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); EMIT(PPC_RAW_MTLR(_R0)); EMIT(PPC_RAW_BCTR()); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index b1a3945ccc9fd..c5e26d231cd51 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -32,23 +32,27 @@ * * [ prev sp ] <------------- * [ tail_call_info ] 8 | - * [ nv gpr save area ] 6*8 + (12*8) | + * [ nv gpr save area ] (6 * 8) | + * [ addl. nv gpr save area] (12 * 8) | <--- exception boundary/callback program * [ local_tmp_var ] 24 | * fp (r31) --> [ ebpf stack space ] upto 512 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- * - * Additional (12*8) in 'nv gpr save area' only in case of - * exception boundary. + * Additional (12 * 8) in 'nv gpr save area' only in case of + * exception boundary/callback. */ +/* BPF non-volatile registers save area size */ +#define BPF_PPC_STACK_SAVE (6 * 8) + /* for bpf JIT code internal usage */ #define BPF_PPC_STACK_LOCALS 24 /* * for additional non volatile registers(r14-r25) to be saved * at exception boundary */ -#define BPF_PPC_EXC_STACK_SAVE (12*8) +#define BPF_PPC_EXC_STACK_SAVE (12 * 8) /* stack frame excluding BPF stack, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ @@ -125,12 +129,13 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- * [ tail_call_info ] 8 - * [ nv gpr save area ] 6*8 + (12*8) + * [ nv gpr save area ] (6 * 8) + * [ addl. nv gpr save area] (12 * 8) <--- exception boundary/callback program * [ local_tmp_var ] 24 * [ unused red zone ] 224 * - * Additional (12*8) in 'nv gpr save area' only in case of - * exception boundary. + * Additional (12 * 8) in 'nv gpr save area' only in case of + * exception boundary/callback. */ static int bpf_jit_stack_local(struct codegen_context *ctx) { @@ -148,7 +153,7 @@ static int bpf_jit_stack_local(struct codegen_context *ctx) } } -int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx) +static int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx) { return bpf_jit_stack_local(ctx) + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE; } @@ -237,10 +242,6 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (bpf_has_stack_frame(ctx) && !ctx->exception_cb) { /* - * exception_cb uses boundary frame after stack walk. - * It can simply use redzone, this optimization reduces - * stack walk loop by one level. - * * We need a stack frame, but we don't necessarily need to * save/restore LR unless we call other functions */ @@ -284,6 +285,22 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * program(main prog) as third arg */ EMIT(PPC_RAW_MR(_R1, _R5)); + /* + * Exception callback reuses the stack frame of exception boundary. + * But BPF stack depth of exception callback and exception boundary + * don't have to be same. If BPF stack depth is different, adjust the + * stack frame size considering BPF stack depth of exception callback. + * The non-volatile register save area remains unchanged. These non- + * volatile registers are restored in exception callback's epilogue. + */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R5, 0)); + EMIT(PPC_RAW_SUB(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_1), _R1)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), + -BPF_PPC_EXC_STACKFRAME)); + EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_2), ctx->stack_size)); + PPC_BCC_CONST_SHORT(COND_EQ, 12); + EMIT(PPC_RAW_MR(_R1, bpf_to_ppc(TMP_REG_1))); + EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_EXC_STACKFRAME + ctx->stack_size))); } /* @@ -482,6 +499,83 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context * return 0; } +static int zero_extend(u32 *image, struct codegen_context *ctx, u32 src_reg, u32 dst_reg, u32 size) +{ + switch (size) { + case 1: + /* zero-extend 8 bits into 64 bits */ + EMIT(PPC_RAW_RLDICL(dst_reg, src_reg, 0, 56)); + return 0; + case 2: + /* zero-extend 16 bits into 64 bits */ + EMIT(PPC_RAW_RLDICL(dst_reg, src_reg, 0, 48)); + return 0; + case 4: + /* zero-extend 32 bits into 64 bits */ + EMIT(PPC_RAW_RLDICL(dst_reg, src_reg, 0, 32)); + fallthrough; + case 8: + /* Nothing to do */ + return 0; + default: + return -1; + } +} + +static int sign_extend(u32 *image, struct codegen_context *ctx, u32 src_reg, u32 dst_reg, u32 size) +{ + switch (size) { + case 1: + /* sign-extend 8 bits into 64 bits */ + EMIT(PPC_RAW_EXTSB(dst_reg, src_reg)); + return 0; + case 2: + /* sign-extend 16 bits into 64 bits */ + EMIT(PPC_RAW_EXTSH(dst_reg, src_reg)); + return 0; + case 4: + /* sign-extend 32 bits into 64 bits */ + EMIT(PPC_RAW_EXTSW(dst_reg, src_reg)); + fallthrough; + case 8: + /* Nothing to do */ + return 0; + default: + return -1; + } +} + +/* + * Handle powerpc ABI expectations from caller: + * - Unsigned arguments are zero-extended. + * - Signed arguments are sign-extended. + */ +static int prepare_for_kfunc_call(const struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, + const struct bpf_insn *insn) +{ + const struct btf_func_model *m = bpf_jit_find_kfunc_model(fp, insn); + int i; + + if (!m) + return -1; + + for (i = 0; i < m->nr_args; i++) { + /* Note that BPF ABI only allows up to 5 args for kfuncs */ + u32 reg = bpf_to_ppc(BPF_REG_1 + i), size = m->arg_size[i]; + + if (!(m->arg_flags[i] & BTF_FMODEL_SIGNED_ARG)) { + if (zero_extend(image, ctx, reg, reg, size)) + return -1; + } else { + if (sign_extend(image, ctx, reg, reg, size)) + return -1; + } + } + + return 0; +} + static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* @@ -522,9 +616,30 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* * tail_call_info++; <- Actual value of tcc here + * Writeback this updated value only if tailcall succeeds. */ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1)); + /* prog = array->ptrs[index]; */ + EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_2), b2p_index, 8)); + EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), b2p_bpf_array)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), + offsetof(struct bpf_array, ptrs))); + + /* + * if (prog == NULL) + * goto out; + */ + EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_2), 0)); + PPC_BCC_SHORT(COND_EQ, out); + + /* goto *(prog->bpf_func + prologue_size); */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), + offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), + FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); + EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_2))); + /* * Before writing updated tail_call_info, distinguish if current frame * is storing a reference to tail_call_info or actual tcc value in @@ -539,24 +654,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* Writeback updated value to tail_call_info */ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0)); - /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8)); - EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array)); - EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs))); - - /* - * if (prog == NULL) - * goto out; - */ - EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0)); - PPC_BCC_SHORT(COND_EQ, out); - - /* goto *(prog->bpf_func + prologue_size); */ - EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func))); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), - FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); - EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1))); - /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -1123,14 +1220,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code /* special mov32 for zext */ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); break; - } else if (off == 8) { - EMIT(PPC_RAW_EXTSB(dst_reg, src_reg)); - } else if (off == 16) { - EMIT(PPC_RAW_EXTSH(dst_reg, src_reg)); - } else if (off == 32) { - EMIT(PPC_RAW_EXTSW(dst_reg, src_reg)); - } else if (dst_reg != src_reg) - EMIT(PPC_RAW_MR(dst_reg, src_reg)); + } + if (off == 0) { + /* MOV */ + if (dst_reg != src_reg) + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + } else { + /* MOVSX: dst = (s8,s16,s32)src (off = 8,16,32) */ + if (sign_extend(image, ctx, src_reg, dst_reg, off / 8)) + return -1; + } goto bpf_alu32_trunc; case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ @@ -1598,6 +1697,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code if (ret < 0) return ret; + /* Take care of powerpc ABI requirements before kfunc call */ + if (insn[i].src_reg == BPF_PSEUDO_KFUNC_CALL) { + if (prepare_for_kfunc_call(fp, image, ctx, &insn[i])) + return -1; + } + ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); if (ret) return ret; diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 26aa26482c9ac..992cc5c982144 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -103,6 +103,11 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + perf_callchain_store(entry, perf_arch_instruction_pointer(regs)); + + if (!current->mm) + return; + if (!is_32bit_task()) perf_callchain_user_64(entry, regs); else diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index ddcc2d8aa64a5..0de21c5d272c2 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -142,7 +142,6 @@ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); while (entry->nr < entry->max_stack) { fp = (unsigned int __user *) (unsigned long) sp; diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 115d1c105e8a8..30fb61c5f0cb0 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -77,7 +77,6 @@ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); while (entry->nr < entry->max_stack) { fp = (unsigned long __user *) sp; diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index 2b5d187d9b62d..9ef8fb39dd1b1 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -155,8 +155,8 @@ machine_device_initcall(mpc83xx_km, mpc83xx_declare_of_platform_devices); /* list of the supported boards */ static char *board[] __initdata = { - "Keymile,KMETER1", - "Keymile,kmpbec8321", + "keymile,KMETER1", + "keymile,kmpbec8321", NULL }; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index f399917c17bdc..bac02c83bb3e4 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -276,7 +276,7 @@ config PPC_BOOK3S config PPC_E500 select FSL_EMB_PERFMON bool - select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 + select ARCH_SUPPORTS_HUGETLBFS select PPC_SMP_MUXED_IPI select PPC_DOORBELL select PPC_KUEP @@ -337,7 +337,7 @@ config BOOKE config PTE_64BIT bool depends on 44x || PPC_E500 || PPC_86xx - default y if PHYS_64BIT + default y if PPC_E500 || PHYS_64BIT config PHYS_64BIT bool 'Large physical address support' if PPC_E500 || PPC_86xx diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 64ffc6476ad6e..8285b9a29fbfa 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -605,7 +605,7 @@ static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq &pseries_msi_irq_chip, pseries_dev); } - pseries_dev->msi_used++; + pseries_dev->msi_used += nr_irqs; return 0; out: diff --git a/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh b/arch/powerpc/tools/check-fpatchable-function-entry.sh similarity index 100% rename from arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh rename to arch/powerpc/tools/check-fpatchable-function-entry.sh diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh index bac186bdf64a7..9218d43aeb548 100755 --- a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -15,9 +15,9 @@ if [ -z "$is_64bit" ]; then RELOCATION=R_PPC_ADDR32 fi -num_ool_stubs_total=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | +num_ool_stubs_total=$($objdump -r -j __patchable_function_entries -d "$vmlinux_o" | grep -c "$RELOCATION") -num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | +num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries -d "$vmlinux_o" | grep -e ".init.text" -e ".text.startup" | grep -c "$RELOCATION") num_ool_stubs_text=$((num_ool_stubs_total - num_ool_stubs_inittext)) diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index cac3c2b51d724..5ec503288555d 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -182,9 +183,14 @@ int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, unsigned long *out_val) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) + if (!riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) return -ENOENT; + if (reg_num >= regs_max) + return -ENOENT; + + reg_num = array_index_nospec(reg_num, regs_max); *out_val = 0; if (kvm_riscv_aia_available()) @@ -198,9 +204,14 @@ int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu, unsigned long val) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) + if (!riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) return -ENOENT; + if (reg_num >= regs_max) + return -ENOENT; + + reg_num = array_index_nospec(reg_num, regs_max); if (kvm_riscv_aia_available()) { ((unsigned long *)csr)[reg_num] = val; diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c index d1e50bf5c3512..3464f3351df72 100644 --- a/arch/riscv/kvm/aia_aplic.c +++ b/arch/riscv/kvm/aia_aplic.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,7 @@ static u32 aplic_read_sourcecfg(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return 0; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); ret = irqd->sourcecfg; @@ -61,7 +62,7 @@ static void aplic_write_sourcecfg(struct aplic *aplic, u32 irq, u32 val) if (!irq || aplic->nr_irqs <= irq) return; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; if (val & APLIC_SOURCECFG_D) val = 0; @@ -81,7 +82,7 @@ static u32 aplic_read_target(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return 0; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); ret = irqd->target; @@ -97,7 +98,7 @@ static void aplic_write_target(struct aplic *aplic, u32 irq, u32 val) if (!irq || aplic->nr_irqs <= irq) return; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; val &= APLIC_TARGET_EIID_MASK | (APLIC_TARGET_HART_IDX_MASK << APLIC_TARGET_HART_IDX_SHIFT) | @@ -116,7 +117,7 @@ static bool aplic_read_pending(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return false; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); ret = (irqd->state & APLIC_IRQ_STATE_PENDING) ? true : false; @@ -132,7 +133,7 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) if (!irq || aplic->nr_irqs <= irq) return; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); @@ -170,7 +171,7 @@ static bool aplic_read_enabled(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return false; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); ret = (irqd->state & APLIC_IRQ_STATE_ENABLED) ? true : false; @@ -186,7 +187,7 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled) if (!irq || aplic->nr_irqs <= irq) return; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); if (enabled) @@ -205,7 +206,7 @@ static bool aplic_read_input(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return false; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); @@ -254,7 +255,7 @@ static void aplic_update_irq_range(struct kvm *kvm, u32 first, u32 last) for (irq = first; irq <= last; irq++) { if (!irq || aplic->nr_irqs <= irq) continue; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); @@ -283,7 +284,7 @@ int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level) if (!aplic || !source || (aplic->nr_irqs <= source)) return -ENODEV; - irqd = &aplic->irqs[source]; + irqd = &aplic->irqs[array_index_nospec(source, aplic->nr_irqs)]; ie = (aplic->domaincfg & APLIC_DOMAINCFG_IE) ? true : false; raw_spin_lock_irqsave(&irqd->lock, flags); diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c index b195a93add1ce..49c71d3cdb007 100644 --- a/arch/riscv/kvm/aia_device.c +++ b/arch/riscv/kvm/aia_device.c @@ -11,6 +11,7 @@ #include #include #include +#include static int aia_create(struct kvm_device *dev, u32 type) { @@ -22,6 +23,9 @@ static int aia_create(struct kvm_device *dev, u32 type) if (irqchip_in_kernel(kvm)) return -EEXIST; + if (!riscv_isa_extension_available(NULL, SSAIA)) + return -ENODEV; + ret = -EBUSY; if (kvm_trylock_all_vcpus(kvm)) return ret; @@ -437,7 +441,7 @@ static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int nr_vcpus; + int nr_vcpus, r = -ENXIO; switch (attr->group) { case KVM_DEV_RISCV_AIA_GRP_CONFIG: @@ -466,12 +470,18 @@ static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) } break; case KVM_DEV_RISCV_AIA_GRP_APLIC: - return kvm_riscv_aia_aplic_has_attr(dev->kvm, attr->attr); + mutex_lock(&dev->kvm->lock); + r = kvm_riscv_aia_aplic_has_attr(dev->kvm, attr->attr); + mutex_unlock(&dev->kvm->lock); + break; case KVM_DEV_RISCV_AIA_GRP_IMSIC: - return kvm_riscv_aia_imsic_has_attr(dev->kvm, attr->attr); + mutex_lock(&dev->kvm->lock); + r = kvm_riscv_aia_imsic_has_attr(dev->kvm, attr->attr); + mutex_unlock(&dev->kvm->lock); + break; } - return -ENXIO; + return r; } struct kvm_device_ops kvm_riscv_aia_device_ops = { diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 06752fa247987..8786f52cf65a2 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -908,6 +908,10 @@ int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel, int r, rc = KVM_INSN_CONTINUE_NEXT_SEPC; struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + /* If IMSIC vCPU state not initialized then forward to user space */ + if (!imsic) + return KVM_INSN_EXIT_TO_USER_SPACE; + if (isel == KVM_RISCV_AIA_IMSIC_TOPEI) { /* Read pending and enabled interrupt with highest priority */ topei = imsic_mrif_topei(imsic->swfile, imsic->nr_eix, diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 0b75eb2a1820e..088d33ba90eda 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -245,6 +245,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { struct kvm_gstage gstage; + bool mmu_locked; if (!kvm->arch.pgd) return false; @@ -253,9 +254,12 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) gstage.flags = 0; gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); gstage.pgd = kvm->arch.pgd; + mmu_locked = spin_trylock(&kvm->mmu_lock); kvm_riscv_gstage_unmap_range(&gstage, range->start << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT, range->may_block); + if (mmu_locked) + spin_unlock(&kvm->mmu_lock); return false; } @@ -535,7 +539,7 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, goto out_unlock; /* Check if we are backed by a THP and thus use block mapping if possible */ - if (vma_pagesize == PAGE_SIZE) + if (!logging && (vma_pagesize == PAGE_SIZE)) vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &hfn, &gpa); if (writable) { diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index a55a95da54d0f..fdd99ac1e7148 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -24,7 +24,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, ecall_exit_stat), STATS_DESC_COUNTER(VCPU, wfi_exit_stat), diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 030904d82b583..bd5a9e7e71656 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -93,9 +94,11 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr)) reg_val = &cntx->fp.f.fcsr; else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) && - reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) + reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) { + reg_num = array_index_nospec(reg_num, + ARRAY_SIZE(cntx->fp.f.f)); reg_val = &cntx->fp.f.f[reg_num]; - else + } else return -ENOENT; } else if ((rtype == KVM_REG_RISCV_FP_D) && riscv_isa_extension_available(vcpu->arch.isa, d)) { @@ -107,6 +110,8 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) { if (KVM_REG_SIZE(reg->id) != sizeof(u64)) return -EINVAL; + reg_num = array_index_nospec(reg_num, + ARRAY_SIZE(cntx->fp.d.f)); reg_val = &cntx->fp.d.f[reg_num]; } else return -ENOENT; @@ -138,9 +143,11 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr)) reg_val = &cntx->fp.f.fcsr; else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) && - reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) + reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) { + reg_num = array_index_nospec(reg_num, + ARRAY_SIZE(cntx->fp.f.f)); reg_val = &cntx->fp.f.f[reg_num]; - else + } else return -ENOENT; } else if ((rtype == KVM_REG_RISCV_FP_D) && riscv_isa_extension_available(vcpu->arch.isa, d)) { @@ -152,6 +159,8 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) { if (KVM_REG_SIZE(reg->id) != sizeof(u64)) return -EINVAL; + reg_num = array_index_nospec(reg_num, + ARRAY_SIZE(cntx->fp.d.f)); reg_val = &cntx->fp.d.f[reg_num]; } else return -ENOENT; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index e7ab6cb006461..45ecc0082e902 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -127,6 +128,7 @@ static int kvm_riscv_vcpu_isa_check_host(unsigned long kvm_ext, unsigned long *g kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr)) return -ENOENT; + kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr)); *guest_ext = kvm_isa_ext_arr[kvm_ext]; switch (*guest_ext) { case RISCV_ISA_EXT_SMNPM: @@ -443,13 +445,16 @@ static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu, unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_CORE); + unsigned long regs_max = sizeof(struct kvm_riscv_core) / sizeof(unsigned long); unsigned long reg_val; if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) return -EINVAL; - if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) + if (reg_num >= regs_max) return -ENOENT; + reg_num = array_index_nospec(reg_num, regs_max); + if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc)) reg_val = cntx->sepc; else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num && @@ -476,13 +481,16 @@ static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu, unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_CORE); + unsigned long regs_max = sizeof(struct kvm_riscv_core) / sizeof(unsigned long); unsigned long reg_val; if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) return -EINVAL; - if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) + if (reg_num >= regs_max) return -ENOENT; + reg_num = array_index_nospec(reg_num, regs_max); + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -507,10 +515,13 @@ static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu, unsigned long *out_val) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + if (reg_num >= regs_max) return -ENOENT; + reg_num = array_index_nospec(reg_num, regs_max); + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { kvm_riscv_vcpu_flush_interrupts(vcpu); *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; @@ -526,10 +537,13 @@ static int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu, unsigned long reg_val) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + if (reg_num >= regs_max) return -ENOENT; + reg_num = array_index_nospec(reg_num, regs_max); + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { reg_val &= VSIP_VALID_MASK; reg_val <<= VSIP_TO_HVIP_SHIFT; @@ -548,10 +562,15 @@ static inline int kvm_riscv_vcpu_smstateen_set_csr(struct kvm_vcpu *vcpu, unsigned long reg_val) { struct kvm_vcpu_smstateen_csr *csr = &vcpu->arch.smstateen_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_smstateen_csr) / + sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_smstateen_csr) / - sizeof(unsigned long)) - return -EINVAL; + if (!riscv_isa_extension_available(vcpu->arch.isa, SMSTATEEN)) + return -ENOENT; + if (reg_num >= regs_max) + return -ENOENT; + + reg_num = array_index_nospec(reg_num, regs_max); ((unsigned long *)csr)[reg_num] = reg_val; return 0; @@ -562,10 +581,15 @@ static int kvm_riscv_vcpu_smstateen_get_csr(struct kvm_vcpu *vcpu, unsigned long *out_val) { struct kvm_vcpu_smstateen_csr *csr = &vcpu->arch.smstateen_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_smstateen_csr) / + sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_smstateen_csr) / - sizeof(unsigned long)) - return -EINVAL; + if (!riscv_isa_extension_available(vcpu->arch.isa, SMSTATEEN)) + return -ENOENT; + if (reg_num >= regs_max) + return -ENOENT; + + reg_num = array_index_nospec(reg_num, regs_max); *out_val = ((unsigned long *)csr)[reg_num]; return 0; @@ -595,10 +619,7 @@ static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, ®_val); break; case KVM_REG_RISCV_CSR_SMSTATEEN: - rc = -EINVAL; - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) - rc = kvm_riscv_vcpu_smstateen_get_csr(vcpu, reg_num, - ®_val); + rc = kvm_riscv_vcpu_smstateen_get_csr(vcpu, reg_num, ®_val); break; default: rc = -ENOENT; @@ -640,10 +661,7 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val); break; case KVM_REG_RISCV_CSR_SMSTATEEN: - rc = -EINVAL; - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) - rc = kvm_riscv_vcpu_smstateen_set_csr(vcpu, reg_num, - reg_val); + rc = kvm_riscv_vcpu_smstateen_set_csr(vcpu, reg_num, reg_val); break; default: rc = -ENOENT; diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 4d8d5e9aa53d4..e873430e596b2 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -87,7 +88,8 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code) { - return hw_event_perf_map[sbi_event_code]; + return hw_event_perf_map[array_index_nospec(sbi_event_code, + SBI_PMU_HW_GENERAL_MAX)]; } static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code) @@ -218,6 +220,7 @@ static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, return -EINVAL; } + cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) @@ -244,6 +247,7 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, return -EINVAL; } + cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { @@ -520,11 +524,12 @@ int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx, { struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); - if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) { + if (cidx >= RISCV_KVM_MAX_COUNTERS || cidx == 1) { retdata->err_val = SBI_ERR_INVALID_PARAM; return 0; } + cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); retdata->out_val = kvpmu->pmc[cidx].cinfo.value; return 0; @@ -559,7 +564,8 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, } /* Start the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { - pmc_index = i + ctr_base; + pmc_index = array_index_nospec(i + ctr_base, + RISCV_KVM_MAX_COUNTERS); if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; /* The guest started the counter again. Reset the overflow status */ @@ -630,7 +636,8 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, /* Stop the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { - pmc_index = i + ctr_base; + pmc_index = array_index_nospec(i + ctr_base, + RISCV_KVM_MAX_COUNTERS); if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; pmc = &kvpmu->pmc[pmc_index]; @@ -761,6 +768,7 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba } } + ctr_idx = array_index_nospec(ctr_idx, RISCV_KVM_MAX_COUNTERS); pmc = &kvpmu->pmc[ctr_idx]; pmc->idx = ctr_idx; diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 58bce57dc55bf..13c63ae1a78b2 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -13,7 +13,7 @@ #include #include -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 7fdf960191d37..d10a17e6531da 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -147,10 +147,8 @@ void noinstr do_io_irq(struct pt_regs *regs) bool from_idle; from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); - if (from_idle) { + if (from_idle) update_timer_idle(); - regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); - } irq_enter_rcu(); @@ -176,6 +174,9 @@ void noinstr do_io_irq(struct pt_regs *regs) set_irq_regs(old_regs); irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); } void noinstr do_ext_irq(struct pt_regs *regs) @@ -185,10 +186,8 @@ void noinstr do_ext_irq(struct pt_regs *regs) bool from_idle; from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); - if (from_idle) { + if (from_idle) update_timer_idle(); - regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); - } irq_enter_rcu(); @@ -210,6 +209,9 @@ void noinstr do_ext_irq(struct pt_regs *regs) irq_exit_rcu(); set_irq_regs(old_regs); irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); } static void show_msi_interrupt(struct seq_file *p, int irq) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bc7d6fa66eafd..3eb60aa932ecc 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -65,7 +65,7 @@ #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ (KVM_MAX_VCPUS + LOCAL_IRQS)) -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_COUNTER(VM, inject_io), STATS_DESC_COUNTER(VM, inject_float_mchk), @@ -91,7 +91,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, exit_userspace), STATS_DESC_COUNTER(VCPU, exit_null), diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ff07c45e3c731..6e4e3ef9b8c72 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2485,7 +2485,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ KVM_X86_QUIRK_SLOT_ZAP_ALL | \ KVM_X86_QUIRK_STUFF_FEATURE_MSRS | \ - KVM_X86_QUIRK_IGNORE_GUEST_PAT) + KVM_X86_QUIRK_IGNORE_GUEST_PAT | \ + KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM) #define KVM_X86_CONDITIONAL_QUIRKS \ (KVM_X86_QUIRK_CD_NW_CLEARED | \ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 846a63215ce14..0d4538fa6c31a 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -476,6 +476,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) #define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) +#define KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM (1 << 10) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d93f87f29d03b..961714e6adae1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1894,6 +1894,7 @@ void __init check_x2apic(void) static inline void try_to_enable_x2apic(int remap_mode) { } static inline void __x2apic_enable(void) { } +static inline void __x2apic_disable(void) { } #endif /* !CONFIG_X86_X2APIC */ void __init enable_IR_x2apic(void) @@ -2456,6 +2457,11 @@ static void lapic_resume(void *data) if (x2apic_mode) { __x2apic_enable(); } else { + if (x2apic_enabled()) { + pr_warn_once("x2apic: re-enabled by firmware during resume. Disabling\n"); + __x2apic_disable(); + } + /* * Make sure the APICBASE points to the right address * diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d2486506a8086..8137927e7387e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -776,7 +776,10 @@ do { \ #define SYNTHESIZED_F(name) \ ({ \ kvm_cpu_cap_synthesized |= feature_bit(name); \ - F(name); \ + \ + BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \ + if (boot_cpu_has(X86_FEATURE_##name)) \ + F(name); \ }) /* diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 30202942289ac..9b140bbdc1d83 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1981,16 +1981,17 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; - if (is_noncanonical_invlpg_address(entries[i], vcpu)) - continue; - /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. */ gva = entries[i] & PAGE_MASK; - for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++) + for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++) { + if (is_noncanonical_invlpg_address(gva + j * PAGE_SIZE, vcpu)) + continue; + kvm_x86_call(flush_tlb_gva)(vcpu, gva + j * PAGE_SIZE); + } ++vcpu->stat.tlb_flush; } diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index bb257793b6cb9..eed96ff6e7229 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -321,7 +321,8 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kimn, &ioapic->mask_notifier_list, link) + hlist_for_each_entry_srcu(kimn, &ioapic->mask_notifier_list, link, + srcu_read_lock_held(&kvm->irq_srcu)) if (kimn->irq == gsi) kimn->func(kimn, mask); srcu_read_unlock(&kvm->irq_srcu, idx); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index f92214b1a9383..f7ec7914e3c47 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -189,12 +189,12 @@ static void avic_activate_vmcb(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK); - vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK; vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu); - vmcb->control.int_ctl |= AVIC_ENABLE_MASK; + svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); + /* * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR * accesses, while interrupt injection to a running vCPU can be @@ -226,6 +226,9 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm) vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK); vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK; + if (!sev_es_guest(svm->vcpu.kvm)) + svm_set_intercept(svm, INTERCEPT_CR8_WRITE); + /* * If running nested and the guest uses its own MSR bitmap, there * is no need to update L0's msr bitmap @@ -368,7 +371,7 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb) vmcb->control.avic_physical_id = __sme_set(__pa(kvm_svm->avic_physical_id_table)); vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE; - if (kvm_apicv_activated(svm->vcpu.kvm)) + if (kvm_vcpu_apicv_active(&svm->vcpu)) avic_activate_vmcb(svm); else avic_deactivate_vmcb(svm); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 53ab6ce3cc26d..b36c33255bed6 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -418,6 +418,15 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu) return __nested_vmcb_check_controls(vcpu, ctl); } +int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu) +{ + if (!nested_vmcb_check_save(vcpu) || + !nested_vmcb_check_controls(vcpu)) + return -EINVAL; + + return 0; +} + /* * If a feature is not advertised to L1, clear the corresponding vmcb12 * intercept. @@ -1028,8 +1037,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); - if (!nested_vmcb_check_save(vcpu) || - !nested_vmcb_check_controls(vcpu)) { + if (nested_svm_check_cached_vmcb12(vcpu) < 0) { vmcb12->control.exit_code = SVM_EXIT_ERR; vmcb12->control.exit_info_1 = 0; vmcb12->control.exit_info_2 = 0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8f8bc863e2143..e6477affac9a0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1077,8 +1077,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event) svm_set_intercept(svm, INTERCEPT_CR0_WRITE); svm_set_intercept(svm, INTERCEPT_CR3_WRITE); svm_set_intercept(svm, INTERCEPT_CR4_WRITE); - if (!kvm_vcpu_apicv_active(vcpu)) - svm_set_intercept(svm, INTERCEPT_CR8_WRITE); + svm_set_intercept(svm, INTERCEPT_CR8_WRITE); set_dr_intercepts(svm); @@ -1189,7 +1188,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event) if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS)) svm->vmcb->control.erap_ctl |= ERAP_CONTROL_ALLOW_LARGER_RAP; - if (kvm_vcpu_apicv_active(vcpu)) + if (enable_apicv && irqchip_in_kernel(vcpu->kvm)) avic_init_vmcb(svm, vmcb); if (vnmi) @@ -2674,9 +2673,11 @@ static int dr_interception(struct kvm_vcpu *vcpu) static int cr8_write_interception(struct kvm_vcpu *vcpu) { + u8 cr8_prev = kvm_get_cr8(vcpu); int r; - u8 cr8_prev = kvm_get_cr8(vcpu); + WARN_ON_ONCE(kvm_vcpu_apicv_active(vcpu)); + /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(vcpu); if (lapic_in_kernel(vcpu)) @@ -4879,11 +4880,15 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) vmcb12 = map.hva; nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); - ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false); - if (ret) + if (nested_svm_check_cached_vmcb12(vcpu) < 0) + goto unmap_save; + + if (enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, + vmcb12, false) != 0) goto unmap_save; + ret = 0; svm->nested.nested_run_pending = 1; unmap_save: diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index ebd7b36b1ceb9..6942e6b0eda67 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -797,6 +797,7 @@ static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) int nested_svm_exit_handled(struct vcpu_svm *svm); int nested_svm_check_permissions(struct kvm_vcpu *vcpu); +int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 248635da67661..937aeb474af7d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3300,10 +3300,24 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP))) return -EINVAL; - if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && - (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || - CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false)))) - return -EINVAL; + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { + u64 debugctl = vmcs12->guest_ia32_debugctl; + + /* + * FREEZE_IN_SMM is not virtualized, but allow L1 to set it in + * vmcs12's DEBUGCTL under a quirk for backwards compatibility. + * Note that the quirk only relaxes the consistency check. The + * vmcc02 bit is still under the control of the host. In + * particular, if a host administrator decides to clear the bit, + * then L1 has no say in the matter. + */ + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM)) + debugctl &= ~DEBUGCTLMSR_FREEZE_IN_SMM; + + if (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || + CC(!vmx_is_valid_debugctl(vcpu, debugctl, false))) + return -EINVAL; + } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && CC(!kvm_pat_valid(vmcs12->guest_ia32_pat))) @@ -6842,13 +6856,34 @@ void vmx_leave_nested(struct kvm_vcpu *vcpu) free_nested(vcpu); } +int nested_vmx_check_restored_vmcs12(struct kvm_vcpu *vcpu) +{ + enum vm_entry_failure_code ignored; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + if (nested_cpu_has_shadow_vmcs(vmcs12) && + vmcs12->vmcs_link_pointer != INVALID_GPA) { + struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); + + if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || + !shadow_vmcs12->hdr.shadow_vmcs) + return -EINVAL; + } + + if (nested_vmx_check_controls(vcpu, vmcs12) || + nested_vmx_check_host_state(vcpu, vmcs12) || + nested_vmx_check_guest_state(vcpu, vmcs12, &ignored)) + return -EINVAL; + + return 0; +} + static int vmx_set_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, struct kvm_nested_state *kvm_state) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12; - enum vm_entry_failure_code ignored; struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = &user_kvm_nested_state->data.vmx[0]; int ret; @@ -6979,25 +7014,20 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmx->nested.mtf_pending = !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING); - ret = -EINVAL; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != INVALID_GPA) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); + ret = -EINVAL; if (kvm_state->size < sizeof(*kvm_state) + sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12)) goto error_guest_mode; + ret = -EFAULT; if (copy_from_user(shadow_vmcs12, user_vmx_nested_state->shadow_vmcs12, - sizeof(*shadow_vmcs12))) { - ret = -EFAULT; - goto error_guest_mode; - } - - if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || - !shadow_vmcs12->hdr.shadow_vmcs) + sizeof(*shadow_vmcs12))) goto error_guest_mode; } @@ -7008,9 +7038,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, kvm_state->hdr.vmx.preemption_timer_deadline; } - if (nested_vmx_check_controls(vcpu, vmcs12) || - nested_vmx_check_host_state(vcpu, vmcs12) || - nested_vmx_check_guest_state(vcpu, vmcs12, &ignored)) + ret = nested_vmx_check_restored_vmcs12(vcpu); + if (ret < 0) goto error_guest_mode; vmx->nested.dirty_vmcs12 = true; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index b844c5d59025b..213a448104aff 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -22,6 +22,7 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps); void nested_vmx_hardware_unsetup(void); __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); void nested_vmx_set_vmcs_shadowing_bitmap(void); +int nested_vmx_check_restored_vmcs12(struct kvm_vcpu *vcpu); void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu); enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 967b58a8ab9d0..8b24e682535bf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1149,7 +1149,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, } vmx_add_auto_msr(&m->guest, msr, guest_val, VM_ENTRY_MSR_LOAD_COUNT, kvm); - vmx_add_auto_msr(&m->guest, msr, host_val, VM_EXIT_MSR_LOAD_COUNT, kvm); + vmx_add_auto_msr(&m->host, msr, host_val, VM_EXIT_MSR_LOAD_COUNT, kvm); } static bool update_transition_efer(struct vcpu_vmx *vmx) @@ -8528,9 +8528,13 @@ int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) } if (vmx->nested.smm.guest_mode) { + /* Triple fault if the state is invalid. */ + if (nested_vmx_check_restored_vmcs12(vcpu) < 0) + return 1; + ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) - return ret; + if (ret != NVMX_VMENTRY_SUCCESS) + return 1; vmx->nested.nested_run_pending = 1; vmx->nested.smm.guest_mode = false; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a035307957077..fd1c4a36b5936 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -243,7 +243,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_ipiv); bool __read_mostly enable_device_posted_irqs = true; EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_device_posted_irqs); -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_COUNTER(VM, mmu_shadow_zapped), STATS_DESC_COUNTER(VM, mmu_pte_write), @@ -269,7 +269,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, pf_taken), STATS_DESC_COUNTER(VCPU, pf_fixed), diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index afee5e667f775..c0d348884f749 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -165,7 +165,6 @@ aie2_sched_notify(struct amdxdna_sched_job *job) trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); - amdxdna_pm_suspend_put(job->hwctx->client->xdna); job->hwctx->priv->completed++; dma_fence_signal(fence); @@ -290,19 +289,11 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; int ret; - ret = amdxdna_pm_resume_get(hwctx->client->xdna); - if (ret) + if (!hwctx->priv->mbox_chann) return NULL; - if (!hwctx->priv->mbox_chann) { - amdxdna_pm_suspend_put(hwctx->client->xdna); - return NULL; - } - - if (!mmget_not_zero(job->mm)) { - amdxdna_pm_suspend_put(hwctx->client->xdna); + if (!mmget_not_zero(job->mm)) return ERR_PTR(-ESRCH); - } kref_get(&job->refcnt); fence = dma_fence_get(job->fence); @@ -333,7 +324,6 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) out: if (ret) { - amdxdna_pm_suspend_put(hwctx->client->xdna); dma_fence_put(job->fence); aie2_job_put(job); mmput(job->mm); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 666dfd7b2a805..838430903a3ea 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -17,6 +17,7 @@ #include "amdxdna_ctx.h" #include "amdxdna_gem.h" #include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" #define MAX_HWCTX_ID 255 #define MAX_ARG_COUNT 4095 @@ -445,6 +446,7 @@ amdxdna_arg_bos_lookup(struct amdxdna_client *client, void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) { trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); + amdxdna_pm_suspend_put(job->hwctx->client->xdna); amdxdna_arg_bos_put(job); amdxdna_gem_put_obj(job->cmd_bo); dma_fence_put(job->fence); @@ -482,6 +484,12 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, goto cmd_put; } + ret = amdxdna_pm_resume_get(xdna); + if (ret) { + XDNA_ERR(xdna, "Resume failed, ret %d", ret); + goto put_bos; + } + idx = srcu_read_lock(&client->hwctx_srcu); hwctx = xa_load(&client->hwctx_xa, hwctx_hdl); if (!hwctx) { @@ -522,6 +530,8 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, dma_fence_put(job->fence); unlock_srcu: srcu_read_unlock(&client->hwctx_srcu, idx); + amdxdna_pm_suspend_put(xdna); +put_bos: amdxdna_arg_bos_put(job); cmd_put: amdxdna_gem_put_obj(job->cmd_bo); diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h index 421242acb1844..fc0ee8d637f96 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h @@ -121,12 +121,6 @@ #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0) -#define VPU_40XX_HOST_SS_AON_RETENTION0 0x0003000cu -#define VPU_40XX_HOST_SS_AON_RETENTION1 0x00030010u -#define VPU_40XX_HOST_SS_AON_RETENTION2 0x00030014u -#define VPU_40XX_HOST_SS_AON_RETENTION3 0x00030018u -#define VPU_40XX_HOST_SS_AON_RETENTION4 0x0003001cu - #define VPU_40XX_HOST_SS_AON_IDLE_GEN 0x00030200u #define VPU_40XX_HOST_SS_AON_IDLE_GEN_EN_MASK BIT_MASK(0) #define VPU_40XX_HOST_SS_AON_IDLE_GEN_HW_PG_EN_MASK BIT_MASK(1) diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c index 959984c54341a..37f95a0551eda 100644 --- a/drivers/accel/ivpu/ivpu_hw_ip.c +++ b/drivers/accel/ivpu/ivpu_hw_ip.c @@ -931,7 +931,6 @@ static int soc_cpu_boot_40xx(struct ivpu_device *vdev) static int soc_cpu_boot_60xx(struct ivpu_device *vdev) { - REGV_WR64(VPU_40XX_HOST_SS_AON_RETENTION1, vdev->fw->mem_bp->vpu_addr); soc_cpu_set_entry_point_40xx(vdev, vdev->fw->cold_boot_entry_point); return 0; diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index df0ff0764d0d5..6f4b545f7377e 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -9,6 +9,7 @@ config ARCH_SUPPORTS_ACPI menuconfig ACPI bool "ACPI (Advanced Configuration and Power Interface) Support" depends on ARCH_SUPPORTS_ACPI + select AUXILIARY_BUS select PNP select NLS select CRC32 diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 64199b19ceff6..a09636a4168ec 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -135,7 +135,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev, } } - if (adev->device_type == ACPI_BUS_TYPE_DEVICE && !adev->pnp.type.backlight) { + if (adev->device_type == ACPI_BUS_TYPE_DEVICE) { LIST_HEAD(resource_list); count = acpi_dev_get_resources(adev, &resource_list, NULL, NULL); diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 3fa28f1abca38..adbaf0226c905 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "ACPI: video: " fmt +#include #include #include #include @@ -21,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -77,8 +77,9 @@ static int register_count; static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); static LIST_HEAD(video_bus_head); -static int acpi_video_bus_probe(struct platform_device *pdev); -static void acpi_video_bus_remove(struct platform_device *pdev); +static int acpi_video_bus_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *id); +static void acpi_video_bus_remove(struct auxiliary_device *aux); static void acpi_video_bus_notify(acpi_handle handle, u32 event, void *data); /* @@ -93,19 +94,16 @@ enum acpi_video_level_idx { ACPI_VIDEO_FIRST_LEVEL, /* actual supported levels begin here */ }; -static const struct acpi_device_id video_device_ids[] = { - {ACPI_VIDEO_HID, 0}, - {"", 0}, +static const struct auxiliary_device_id video_bus_auxiliary_id_table[] = { + { .name = "acpi.video_bus" }, + {}, }; -MODULE_DEVICE_TABLE(acpi, video_device_ids); +MODULE_DEVICE_TABLE(auxiliary, video_bus_auxiliary_id_table); -static struct platform_driver acpi_video_bus = { +static struct auxiliary_driver acpi_video_bus = { .probe = acpi_video_bus_probe, .remove = acpi_video_bus_remove, - .driver = { - .name = "acpi-video", - .acpi_match_table = video_device_ids, - }, + .id_table = video_bus_auxiliary_id_table, }; struct acpi_video_bus_flags { @@ -1885,7 +1883,7 @@ static void acpi_video_dev_add_notify_handler(struct acpi_video_device *device) } static int acpi_video_bus_add_notify_handler(struct acpi_video_bus *video, - struct platform_device *pdev) + struct device *parent) { struct input_dev *input; struct acpi_video_device *dev; @@ -1908,7 +1906,7 @@ static int acpi_video_bus_add_notify_handler(struct acpi_video_bus *video, input->phys = video->phys; input->id.bustype = BUS_HOST; input->id.product = 0x06; - input->dev.parent = &pdev->dev; + input->dev.parent = parent; input->evbit[0] = BIT(EV_KEY); set_bit(KEY_SWITCHVIDEOMODE, input->keybit); set_bit(KEY_VIDEO_NEXT, input->keybit); @@ -1980,9 +1978,10 @@ static int acpi_video_bus_put_devices(struct acpi_video_bus *video) static int instance; -static int acpi_video_bus_probe(struct platform_device *pdev) +static int acpi_video_bus_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *id_unused) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device = ACPI_COMPANION(&aux_dev->dev); struct acpi_video_bus *video; bool auto_detect; int error; @@ -2019,7 +2018,7 @@ static int acpi_video_bus_probe(struct platform_device *pdev) instance++; } - platform_set_drvdata(pdev, video); + auxiliary_set_drvdata(aux_dev, video); video->device = device; strscpy(acpi_device_name(device), ACPI_VIDEO_BUS_NAME); @@ -2068,7 +2067,7 @@ static int acpi_video_bus_probe(struct platform_device *pdev) !auto_detect) acpi_video_bus_register_backlight(video); - error = acpi_video_bus_add_notify_handler(video, pdev); + error = acpi_video_bus_add_notify_handler(video, &aux_dev->dev); if (error) goto err_del; @@ -2096,10 +2095,10 @@ static int acpi_video_bus_probe(struct platform_device *pdev) return error; } -static void acpi_video_bus_remove(struct platform_device *pdev) +static void acpi_video_bus_remove(struct auxiliary_device *aux_dev) { - struct acpi_video_bus *video = platform_get_drvdata(pdev); - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_video_bus *video = auxiliary_get_drvdata(aux_dev); + struct acpi_device *device = ACPI_COMPANION(&aux_dev->dev); acpi_dev_remove_notify_handler(device, ACPI_DEVICE_NOTIFY, acpi_video_bus_notify); @@ -2163,7 +2162,7 @@ int acpi_video_register(void) dmi_check_system(video_dmi_table); - ret = platform_driver_register(&acpi_video_bus); + ret = auxiliary_driver_register(&acpi_video_bus); if (ret) goto leave; @@ -2183,7 +2182,7 @@ void acpi_video_unregister(void) { mutex_lock(®ister_count_mutex); if (register_count) { - platform_driver_unregister(&acpi_video_bus); + auxiliary_driver_unregister(&acpi_video_bus); register_count = 0; may_report_brightness_keys = false; } diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 5b777316b9acd..62b9c83d4f20b 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -1681,7 +1681,7 @@ acpi_status __init acpi_os_initialize(void) * Use acpi_os_map_generic_address to pre-map the reset * register if it's in system memory. */ - void *rv; + void __iomem *rv; rv = acpi_os_map_generic_address(&acpi_gbl_FADT.reset_register); pr_debug("%s: Reset register mapping %s\n", __func__, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index dfdd004fb1a9c..e8cdbdb46fdb4 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) "ACPI: " fmt #include +#include #include #include #include @@ -2192,6 +2193,44 @@ static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used, return acpi_bus_check_add(handle, false, (struct acpi_device **)ret_p); } +static void acpi_video_bus_device_release(struct device *dev) +{ + struct auxiliary_device *aux_dev = to_auxiliary_dev(dev); + + kfree(aux_dev); +} + +static void acpi_create_video_bus_device(struct acpi_device *adev, + struct acpi_device *parent) +{ + struct auxiliary_device *aux_dev; + static unsigned int aux_dev_id; + + aux_dev = kzalloc_obj(*aux_dev); + if (!aux_dev) + return; + + aux_dev->id = aux_dev_id++; + aux_dev->name = "video_bus"; + aux_dev->dev.parent = acpi_get_first_physical_node(parent); + if (!aux_dev->dev.parent) + goto err; + + aux_dev->dev.release = acpi_video_bus_device_release; + + if (auxiliary_device_init(aux_dev)) + goto err; + + ACPI_COMPANION_SET(&aux_dev->dev, adev); + if (__auxiliary_device_add(aux_dev, "acpi")) + auxiliary_device_uninit(aux_dev); + + return; + +err: + kfree(aux_dev); +} + struct acpi_scan_system_dev { struct list_head node; struct acpi_device *adev; @@ -2229,6 +2268,12 @@ static void acpi_default_enumeration(struct acpi_device *device) sd->adev = device; list_add_tail(&sd->node, &acpi_scan_system_dev_list); } + } else if (device->pnp.type.backlight) { + struct acpi_device *parent; + + parent = acpi_dev_parent(device); + if (parent) + acpi_create_video_bus_device(device, parent); } else { /* For a regular device object, create a platform device. */ acpi_create_platform_device(device, NULL); diff --git a/drivers/android/binder/page_range.rs b/drivers/android/binder/page_range.rs index fdd97112ef5c8..9dfc154e5dd4e 100644 --- a/drivers/android/binder/page_range.rs +++ b/drivers/android/binder/page_range.rs @@ -142,6 +142,30 @@ pub(crate) struct ShrinkablePageRange { _pin: PhantomPinned, } +// We do not define any ops. For now, used only to check identity of vmas. +static BINDER_VM_OPS: bindings::vm_operations_struct = pin_init::zeroed(); + +// To ensure that we do not accidentally install pages into or zap pages from the wrong vma, we +// check its vm_ops and private data before using it. +fn check_vma(vma: &virt::VmaRef, owner: *const ShrinkablePageRange) -> Option<&virt::VmaMixedMap> { + // SAFETY: Just reading the vm_ops pointer of any active vma is safe. + let vm_ops = unsafe { (*vma.as_ptr()).vm_ops }; + if !ptr::eq(vm_ops, &BINDER_VM_OPS) { + return None; + } + + // SAFETY: Reading the vm_private_data pointer of a binder-owned vma is safe. + let vm_private_data = unsafe { (*vma.as_ptr()).vm_private_data }; + // The ShrinkablePageRange is only dropped when the Process is dropped, which only happens once + // the file's ->release handler is invoked, which means the ShrinkablePageRange outlives any + // VMA associated with it, so there can't be any false positives due to pointer reuse here. + if !ptr::eq(vm_private_data, owner.cast()) { + return None; + } + + vma.as_mixedmap_vma() +} + struct Inner { /// Array of pages. /// @@ -308,6 +332,18 @@ impl ShrinkablePageRange { inner.size = num_pages; inner.vma_addr = vma.start(); + // This pointer is only used for comparison - it's not dereferenced. + // + // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on + // `vm_private_data`. + unsafe { + (*vma.as_ptr()).vm_private_data = ptr::from_ref(self).cast_mut().cast::() + }; + + // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on + // `vm_ops`. + unsafe { (*vma.as_ptr()).vm_ops = &BINDER_VM_OPS }; + Ok(num_pages) } @@ -399,22 +435,25 @@ impl ShrinkablePageRange { // // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a // workqueue. - MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?) - .mmap_read_lock() - .vma_lookup(vma_addr) - .ok_or(ESRCH)? - .as_mixedmap_vma() - .ok_or(ESRCH)? - .vm_insert_page(user_page_addr, &new_page) - .inspect_err(|err| { - pr_warn!( - "Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}", - user_page_addr, - vma_addr, - i, - err - ) - })?; + let mm = MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?); + { + let vma_read; + let mmap_read; + let vma = if let Some(ret) = mm.lock_vma_under_rcu(vma_addr) { + vma_read = ret; + check_vma(&vma_read, self) + } else { + mmap_read = mm.mmap_read_lock(); + mmap_read + .vma_lookup(vma_addr) + .and_then(|vma| check_vma(vma, self)) + }; + + match vma { + Some(vma) => vma.vm_insert_page(user_page_addr, &new_page)?, + None => return Err(ESRCH), + } + } let inner = self.lock.lock(); @@ -667,12 +706,15 @@ unsafe extern "C" fn rust_shrink_free_page( let mmap_read; let mm_mutex; let vma_addr; + let range_ptr; { // CAST: The `list_head` field is first in `PageInfo`. let info = item as *mut PageInfo; // SAFETY: The `range` field of `PageInfo` is immutable. - let range = unsafe { &*((*info).range) }; + range_ptr = unsafe { (*info).range }; + // SAFETY: The `range` outlives its `PageInfo` values. + let range = unsafe { &*range_ptr }; mm = match range.mm.mmget_not_zero() { Some(mm) => MmWithUser::into_mmput_async(mm), @@ -717,9 +759,11 @@ unsafe extern "C" fn rust_shrink_free_page( // SAFETY: The lru lock is locked when this method is called. unsafe { bindings::spin_unlock(&raw mut (*lru).lock) }; - if let Some(vma) = mmap_read.vma_lookup(vma_addr) { - let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); - vma.zap_page_range_single(user_page_addr, PAGE_SIZE); + if let Some(unchecked_vma) = mmap_read.vma_lookup(vma_addr) { + if let Some(vma) = check_vma(unchecked_vma, range_ptr) { + let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); + vma.zap_page_range_single(user_page_addr, PAGE_SIZE); + } } drop(mmap_read); diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs index 41de5593197cd..f06498129aa97 100644 --- a/drivers/android/binder/process.rs +++ b/drivers/android/binder/process.rs @@ -1295,7 +1295,8 @@ impl Process { } pub(crate) fn dead_binder_done(&self, cookie: u64, thread: &Thread) { - if let Some(death) = self.inner.lock().pull_delivered_death(cookie) { + let death = self.inner.lock().pull_delivered_death(cookie); + if let Some(death) = death { death.set_notification_done(thread); } } diff --git a/drivers/android/binder/range_alloc/array.rs b/drivers/android/binder/range_alloc/array.rs index 07e1dec2ce630..ada1d1b4302e5 100644 --- a/drivers/android/binder/range_alloc/array.rs +++ b/drivers/android/binder/range_alloc/array.rs @@ -118,7 +118,7 @@ impl ArrayRangeAllocator { size: usize, is_oneway: bool, pid: Pid, - ) -> Result { + ) -> Result<(usize, bool)> { // Compute new value of free_oneway_space, which is set only on success. let new_oneway_space = if is_oneway { match self.free_oneway_space.checked_sub(size) { @@ -146,7 +146,38 @@ impl ArrayRangeAllocator { .ok() .unwrap(); - Ok(insert_at_offset) + // Start detecting spammers once we have less than 20% + // of async space left (which is less than 10% of total + // buffer size). + // + // (This will short-circuit, so `low_oneway_space` is + // only called when necessary.) + let oneway_spam_detected = + is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); + + Ok((insert_at_offset, oneway_spam_detected)) + } + + /// Find the amount and size of buffers allocated by the current caller. + /// + /// The idea is that once we cross the threshold, whoever is responsible + /// for the low async space is likely to try to send another async transaction, + /// and at some point we'll catch them in the act. This is more efficient + /// than keeping a map per pid. + fn low_oneway_space(&self, calling_pid: Pid) -> bool { + let mut total_alloc_size = 0; + let mut num_buffers = 0; + + // Warn if this pid has more than 50 transactions, or more than 50% of + // async space (which is 25% of total buffer size). Oneway spam is only + // detected when the threshold is exceeded. + for range in &self.ranges { + if range.state.is_oneway() && range.state.pid() == calling_pid { + total_alloc_size += range.size; + num_buffers += 1; + } + } + num_buffers > 50 || total_alloc_size > self.size / 4 } pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result { diff --git a/drivers/android/binder/range_alloc/mod.rs b/drivers/android/binder/range_alloc/mod.rs index 2301e2bc1a1fc..1f4734468ff11 100644 --- a/drivers/android/binder/range_alloc/mod.rs +++ b/drivers/android/binder/range_alloc/mod.rs @@ -188,11 +188,11 @@ impl RangeAllocator { self.reserve_new(args) } Impl::Array(array) => { - let offset = + let (offset, oneway_spam_detected) = array.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid)?; Ok(ReserveNew::Success(ReserveNewSuccess { offset, - oneway_spam_detected: false, + oneway_spam_detected, _empty_array_alloc: args.empty_array_alloc, _new_tree_alloc: args.new_tree_alloc, _tree_alloc: args.tree_alloc, diff --git a/drivers/android/binder/range_alloc/tree.rs b/drivers/android/binder/range_alloc/tree.rs index 838fdd2b47ea7..48796fcdb3624 100644 --- a/drivers/android/binder/range_alloc/tree.rs +++ b/drivers/android/binder/range_alloc/tree.rs @@ -164,15 +164,6 @@ impl TreeRangeAllocator { self.free_oneway_space }; - // Start detecting spammers once we have less than 20% - // of async space left (which is less than 10% of total - // buffer size). - // - // (This will short-circut, so `low_oneway_space` is - // only called when necessary.) - let oneway_spam_detected = - is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); - let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) { None => { pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size); @@ -203,6 +194,15 @@ impl TreeRangeAllocator { self.free_tree.insert(free_tree_node); } + // Start detecting spammers once we have less than 20% + // of async space left (which is less than 10% of total + // buffer size). + // + // (This will short-circuit, so `low_oneway_space` is + // only called when necessary.) + let oneway_spam_detected = + is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); + Ok((found_off, oneway_spam_detected)) } diff --git a/drivers/android/binder/thread.rs b/drivers/android/binder/thread.rs index 0b62d24b2118a..c004214b16629 100644 --- a/drivers/android/binder/thread.rs +++ b/drivers/android/binder/thread.rs @@ -1015,12 +1015,9 @@ impl Thread { // Copy offsets if there are any. if offsets_size > 0 { - { - let mut reader = - UserSlice::new(UserPtr::from_addr(trd_data_ptr.offsets as _), offsets_size) - .reader(); - alloc.copy_into(&mut reader, aligned_data_size, offsets_size)?; - } + let mut offsets_reader = + UserSlice::new(UserPtr::from_addr(trd_data_ptr.offsets as _), offsets_size) + .reader(); let offsets_start = aligned_data_size; let offsets_end = aligned_data_size + offsets_size; @@ -1041,11 +1038,9 @@ impl Thread { .step_by(size_of::()) .enumerate() { - let offset: usize = view - .alloc - .read::(index_offset)? - .try_into() - .map_err(|_| EINVAL)?; + let offset = offsets_reader.read::()?; + view.alloc.write(index_offset, &offset)?; + let offset: usize = offset.try_into().map_err(|_| EINVAL)?; if offset < end_of_previous_object || !is_aligned(offset, size_of::()) { pr_warn!("Got transaction with invalid offset."); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 71c7c56b38ca3..3ba7da94d3149 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -4449,7 +4449,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, /* Skip partition scan if disabled by user */ if (ub->dev_info.flags & UBLK_F_NO_AUTO_PART_SCAN) { - clear_bit(GD_SUPPRESS_PART_SCAN, &disk->state); + /* Not clear for unprivileged daemons, see comment above */ + if (!ub->unprivileged_daemons) + clear_bit(GD_SUPPRESS_PART_SCAN, &disk->state); } else { /* Schedule async partition scan for trusted daemons */ if (!ub->unprivileged_daemons) @@ -5012,15 +5014,22 @@ static int ublk_ctrl_get_features(const struct ublksrv_ctrl_cmd *header) return 0; } -static void ublk_ctrl_set_size(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header) +static int ublk_ctrl_set_size(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header) { struct ublk_param_basic *p = &ub->params.basic; u64 new_size = header->data[0]; + int ret = 0; mutex_lock(&ub->mutex); + if (!ub->ub_disk) { + ret = -ENODEV; + goto out; + } p->dev_sectors = new_size; set_capacity_and_notify(ub->ub_disk, p->dev_sectors); +out: mutex_unlock(&ub->mutex); + return ret; } struct count_busy { @@ -5341,8 +5350,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, ret = ublk_ctrl_end_recovery(ub, &header); break; case UBLK_CMD_UPDATE_SIZE: - ublk_ctrl_set_size(ub, &header); - ret = 0; + ret = ublk_ctrl_set_size(ub, &header); break; case UBLK_CMD_QUIESCE_DEV: ret = ublk_ctrl_quiesce_dev(ub, &header); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index bca33403fc8bf..a324ede6206d3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -549,7 +549,7 @@ static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr, return ret; } -static ssize_t writeback_compressed_store(struct device *dev, +static ssize_t compressed_writeback_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -564,12 +564,12 @@ static ssize_t writeback_compressed_store(struct device *dev, return -EBUSY; } - zram->wb_compressed = val; + zram->compressed_wb = val; return len; } -static ssize_t writeback_compressed_show(struct device *dev, +static ssize_t compressed_writeback_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -577,7 +577,7 @@ static ssize_t writeback_compressed_show(struct device *dev, struct zram *zram = dev_to_zram(dev); guard(rwsem_read)(&zram->dev_lock); - val = zram->wb_compressed; + val = zram->compressed_wb; return sysfs_emit(buf, "%d\n", val); } @@ -946,7 +946,7 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) goto out; } - if (zram->wb_compressed) { + if (zram->compressed_wb) { /* * ZRAM_WB slots get freed, we need to preserve data required * for read decompression. @@ -960,7 +960,7 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) set_slot_flag(zram, index, ZRAM_WB); set_slot_handle(zram, index, req->blk_idx); - if (zram->wb_compressed) { + if (zram->compressed_wb) { if (huge) set_slot_flag(zram, index, ZRAM_HUGE); set_slot_size(zram, index, size); @@ -1100,7 +1100,7 @@ static int zram_writeback_slots(struct zram *zram, */ if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) goto next; - if (zram->wb_compressed) + if (zram->compressed_wb) err = read_from_zspool_raw(zram, req->page, index); else err = read_from_zspool(zram, req->page, index); @@ -1429,7 +1429,7 @@ static void zram_async_read_endio(struct bio *bio) * * Keep the existing behavior for now. */ - if (zram->wb_compressed == false) { + if (zram->compressed_wb == false) { /* No decompression needed, complete the parent IO */ bio_endio(req->parent); bio_put(bio); @@ -1508,7 +1508,7 @@ static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index, flush_work(&req.work); destroy_work_on_stack(&req.work); - if (req.error || zram->wb_compressed == false) + if (req.error || zram->compressed_wb == false) return req.error; return decompress_bdev_page(zram, page, index); @@ -3007,7 +3007,7 @@ static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); static DEVICE_ATTR_RW(writeback_batch_size); -static DEVICE_ATTR_RW(writeback_compressed); +static DEVICE_ATTR_RW(compressed_writeback); #endif #ifdef CONFIG_ZRAM_MULTI_COMP static DEVICE_ATTR_RW(recomp_algorithm); @@ -3031,7 +3031,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_writeback_limit.attr, &dev_attr_writeback_limit_enable.attr, &dev_attr_writeback_batch_size.attr, - &dev_attr_writeback_compressed.attr, + &dev_attr_compressed_writeback.attr, #endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, @@ -3091,7 +3091,7 @@ static int zram_add(void) init_rwsem(&zram->dev_lock); #ifdef CONFIG_ZRAM_WRITEBACK zram->wb_batch_size = 32; - zram->wb_compressed = false; + zram->compressed_wb = false; #endif /* gendisk structure */ diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 515a72d9c06f6..f0de8f8218f5c 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -133,7 +133,7 @@ struct zram { #ifdef CONFIG_ZRAM_WRITEBACK struct file *backing_dev; bool wb_limit_enable; - bool wb_compressed; + bool compressed_wb; u32 wb_batch_size; u64 bd_wb_limit; struct block_device *bdev; diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 65fbb8e807b97..c7876e9e024f9 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -359,16 +359,6 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev, int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, bool *stop_tick) { - /* - * If there is only a single idle state (or none), there is nothing - * meaningful for the governor to choose. Skip the governor and - * always use state 0 with the tick running. - */ - if (drv->state_count <= 1) { - *stop_tick = false; - return 0; - } - return cpuidle_curr_governor->select(drv, dev, stop_tick); } diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index b4f1c01e3b5b6..5d8be0ac7c5e6 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -1610,11 +1610,17 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, region_name); if (reg) { + /* + * Although we expect the underlying bus does not require + * physically-contiguous buffers, we pessimistically use + * a temporary buffer instead of trusting that the + * alignment of region->data is ok. + */ region_len = le32_to_cpu(region->len); if (region_len > buf_len) { buf_len = round_up(region_len, PAGE_SIZE); - kfree(buf); - buf = kmalloc(buf_len, GFP_KERNEL | GFP_DMA); + vfree(buf); + buf = vmalloc(buf_len); if (!buf) { ret = -ENOMEM; goto out_fw; @@ -1643,7 +1649,7 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, ret = 0; out_fw: - kfree(buf); + vfree(buf); if (ret == -EOVERFLOW) cs_dsp_err(dsp, "%s: file content overflows file data\n", file); @@ -2331,11 +2337,17 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware } if (reg) { + /* + * Although we expect the underlying bus does not require + * physically-contiguous buffers, we pessimistically use + * a temporary buffer instead of trusting that the + * alignment of blk->data is ok. + */ region_len = le32_to_cpu(blk->len); if (region_len > buf_len) { buf_len = round_up(region_len, PAGE_SIZE); - kfree(buf); - buf = kmalloc(buf_len, GFP_KERNEL | GFP_DMA); + vfree(buf); + buf = vmalloc(buf_len); if (!buf) { ret = -ENOMEM; goto out_fw; @@ -2366,7 +2378,7 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware ret = 0; out_fw: - kfree(buf); + vfree(buf); if (ret == -EOVERFLOW) cs_dsp_err(dsp, "%s: file content overflows file data\n", file); diff --git a/drivers/firmware/stratix10-rsu.c b/drivers/firmware/stratix10-rsu.c index 41da07c445a6f..e1912108a0fee 100644 --- a/drivers/firmware/stratix10-rsu.c +++ b/drivers/firmware/stratix10-rsu.c @@ -768,7 +768,9 @@ static int stratix10_rsu_probe(struct platform_device *pdev) rsu_async_status_callback); if (ret) { dev_err(dev, "Error, getting RSU status %i\n", ret); + stratix10_svc_remove_async_client(priv->chan); stratix10_svc_free_channel(priv->chan); + return ret; } /* get DCMF version from firmware */ diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 6f5c298582abd..e9e35d67ef966 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -37,15 +37,14 @@ * service layer will return error to FPGA manager when timeout occurs, * timeout is set to 30 seconds (30 * 1000) at Intel Stratix10 SoC. */ -#define SVC_NUM_DATA_IN_FIFO 32 +#define SVC_NUM_DATA_IN_FIFO 8 #define SVC_NUM_CHANNEL 4 -#define FPGA_CONFIG_DATA_CLAIM_TIMEOUT_MS 200 +#define FPGA_CONFIG_DATA_CLAIM_TIMEOUT_MS 2000 #define FPGA_CONFIG_STATUS_TIMEOUT_SEC 30 #define BYTE_TO_WORD_SIZE 4 /* stratix10 service layer clients */ #define STRATIX10_RSU "stratix10-rsu" -#define INTEL_FCS "intel-fcs" /* Maximum number of SDM client IDs. */ #define MAX_SDM_CLIENT_IDS 16 @@ -105,11 +104,9 @@ struct stratix10_svc_chan; /** * struct stratix10_svc - svc private data * @stratix10_svc_rsu: pointer to stratix10 RSU device - * @intel_svc_fcs: pointer to the FCS device */ struct stratix10_svc { struct platform_device *stratix10_svc_rsu; - struct platform_device *intel_svc_fcs; }; /** @@ -251,12 +248,10 @@ struct stratix10_async_ctrl { * @num_active_client: number of active service client * @node: list management * @genpool: memory pool pointing to the memory region - * @task: pointer to the thread task which handles SMC or HVC call - * @svc_fifo: a queue for storing service message data * @complete_status: state for completion - * @svc_fifo_lock: protect access to service message data queue * @invoke_fn: function to issue secure monitor call or hypervisor call * @svc: manages the list of client svc drivers + * @sdm_lock: only allows a single command single response to SDM * @actrl: async control structure * * This struct is used to create communication channels for service clients, to @@ -269,12 +264,10 @@ struct stratix10_svc_controller { int num_active_client; struct list_head node; struct gen_pool *genpool; - struct task_struct *task; - struct kfifo svc_fifo; struct completion complete_status; - spinlock_t svc_fifo_lock; svc_invoke_fn *invoke_fn; struct stratix10_svc *svc; + struct mutex sdm_lock; struct stratix10_async_ctrl actrl; }; @@ -283,6 +276,9 @@ struct stratix10_svc_controller { * @ctrl: pointer to service controller which is the provider of this channel * @scl: pointer to service client which owns the channel * @name: service client name associated with the channel + * @task: pointer to the thread task which handles SMC or HVC call + * @svc_fifo: a queue for storing service message data (separate fifo for every channel) + * @svc_fifo_lock: protect access to service message data queue (locking pending fifo) * @lock: protect access to the channel * @async_chan: reference to asynchronous channel object for this channel * @@ -293,6 +289,9 @@ struct stratix10_svc_chan { struct stratix10_svc_controller *ctrl; struct stratix10_svc_client *scl; char *name; + struct task_struct *task; + struct kfifo svc_fifo; + spinlock_t svc_fifo_lock; spinlock_t lock; struct stratix10_async_chan *async_chan; }; @@ -527,10 +526,10 @@ static void svc_thread_recv_status_ok(struct stratix10_svc_data *p_data, */ static int svc_normal_to_secure_thread(void *data) { - struct stratix10_svc_controller - *ctrl = (struct stratix10_svc_controller *)data; - struct stratix10_svc_data *pdata; - struct stratix10_svc_cb_data *cbdata; + struct stratix10_svc_chan *chan = (struct stratix10_svc_chan *)data; + struct stratix10_svc_controller *ctrl = chan->ctrl; + struct stratix10_svc_data *pdata = NULL; + struct stratix10_svc_cb_data *cbdata = NULL; struct arm_smccc_res res; unsigned long a0, a1, a2, a3, a4, a5, a6, a7; int ret_fifo = 0; @@ -555,12 +554,12 @@ static int svc_normal_to_secure_thread(void *data) a6 = 0; a7 = 0; - pr_debug("smc_hvc_shm_thread is running\n"); + pr_debug("%s: %s: Thread is running!\n", __func__, chan->name); while (!kthread_should_stop()) { - ret_fifo = kfifo_out_spinlocked(&ctrl->svc_fifo, + ret_fifo = kfifo_out_spinlocked(&chan->svc_fifo, pdata, sizeof(*pdata), - &ctrl->svc_fifo_lock); + &chan->svc_fifo_lock); if (!ret_fifo) continue; @@ -569,9 +568,25 @@ static int svc_normal_to_secure_thread(void *data) (unsigned int)pdata->paddr, pdata->command, (unsigned int)pdata->size); + /* SDM can only process one command at a time */ + pr_debug("%s: %s: Thread is waiting for mutex!\n", + __func__, chan->name); + if (mutex_lock_interruptible(&ctrl->sdm_lock)) { + /* item already dequeued; notify client to unblock it */ + cbdata->status = BIT(SVC_STATUS_ERROR); + cbdata->kaddr1 = NULL; + cbdata->kaddr2 = NULL; + cbdata->kaddr3 = NULL; + if (pdata->chan->scl) + pdata->chan->scl->receive_cb(pdata->chan->scl, + cbdata); + break; + } + switch (pdata->command) { case COMMAND_RECONFIG_DATA_CLAIM: svc_thread_cmd_data_claim(ctrl, pdata, cbdata); + mutex_unlock(&ctrl->sdm_lock); continue; case COMMAND_RECONFIG: a0 = INTEL_SIP_SMC_FPGA_CONFIG_START; @@ -700,10 +715,11 @@ static int svc_normal_to_secure_thread(void *data) break; default: pr_warn("it shouldn't happen\n"); - break; + mutex_unlock(&ctrl->sdm_lock); + continue; } - pr_debug("%s: before SMC call -- a0=0x%016x a1=0x%016x", - __func__, + pr_debug("%s: %s: before SMC call -- a0=0x%016x a1=0x%016x", + __func__, chan->name, (unsigned int)a0, (unsigned int)a1); pr_debug(" a2=0x%016x\n", (unsigned int)a2); @@ -712,8 +728,8 @@ static int svc_normal_to_secure_thread(void *data) pr_debug(" a5=0x%016x\n", (unsigned int)a5); ctrl->invoke_fn(a0, a1, a2, a3, a4, a5, a6, a7, &res); - pr_debug("%s: after SMC call -- res.a0=0x%016x", - __func__, (unsigned int)res.a0); + pr_debug("%s: %s: after SMC call -- res.a0=0x%016x", + __func__, chan->name, (unsigned int)res.a0); pr_debug(" res.a1=0x%016x, res.a2=0x%016x", (unsigned int)res.a1, (unsigned int)res.a2); pr_debug(" res.a3=0x%016x\n", (unsigned int)res.a3); @@ -728,6 +744,7 @@ static int svc_normal_to_secure_thread(void *data) cbdata->kaddr2 = NULL; cbdata->kaddr3 = NULL; pdata->chan->scl->receive_cb(pdata->chan->scl, cbdata); + mutex_unlock(&ctrl->sdm_lock); continue; } @@ -801,6 +818,8 @@ static int svc_normal_to_secure_thread(void *data) break; } + + mutex_unlock(&ctrl->sdm_lock); } kfree(cbdata); @@ -1696,22 +1715,33 @@ int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg) if (!p_data) return -ENOMEM; - /* first client will create kernel thread */ - if (!chan->ctrl->task) { - chan->ctrl->task = - kthread_run_on_cpu(svc_normal_to_secure_thread, - (void *)chan->ctrl, - cpu, "svc_smc_hvc_thread"); - if (IS_ERR(chan->ctrl->task)) { + /* first caller creates the per-channel kthread */ + if (!chan->task) { + struct task_struct *task; + + task = kthread_run_on_cpu(svc_normal_to_secure_thread, + (void *)chan, + cpu, "svc_smc_hvc_thread"); + if (IS_ERR(task)) { dev_err(chan->ctrl->dev, "failed to create svc_smc_hvc_thread\n"); kfree(p_data); return -EINVAL; } + + spin_lock(&chan->lock); + if (chan->task) { + /* another caller won the race; discard our thread */ + spin_unlock(&chan->lock); + kthread_stop(task); + } else { + chan->task = task; + spin_unlock(&chan->lock); + } } - pr_debug("%s: sent P-va=%p, P-com=%x, P-size=%u\n", __func__, - p_msg->payload, p_msg->command, + pr_debug("%s: %s: sent P-va=%p, P-com=%x, P-size=%u\n", __func__, + chan->name, p_msg->payload, p_msg->command, (unsigned int)p_msg->payload_length); if (list_empty(&svc_data_mem)) { @@ -1747,12 +1777,16 @@ int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg) p_data->arg[2] = p_msg->arg[2]; p_data->size = p_msg->payload_length; p_data->chan = chan; - pr_debug("%s: put to FIFO pa=0x%016x, cmd=%x, size=%u\n", __func__, - (unsigned int)p_data->paddr, p_data->command, - (unsigned int)p_data->size); - ret = kfifo_in_spinlocked(&chan->ctrl->svc_fifo, p_data, + pr_debug("%s: %s: put to FIFO pa=0x%016x, cmd=%x, size=%u\n", + __func__, + chan->name, + (unsigned int)p_data->paddr, + p_data->command, + (unsigned int)p_data->size); + + ret = kfifo_in_spinlocked(&chan->svc_fifo, p_data, sizeof(*p_data), - &chan->ctrl->svc_fifo_lock); + &chan->svc_fifo_lock); kfree(p_data); @@ -1773,11 +1807,12 @@ EXPORT_SYMBOL_GPL(stratix10_svc_send); */ void stratix10_svc_done(struct stratix10_svc_chan *chan) { - /* stop thread when thread is running AND only one active client */ - if (chan->ctrl->task && chan->ctrl->num_active_client <= 1) { - pr_debug("svc_smc_hvc_shm_thread is stopped\n"); - kthread_stop(chan->ctrl->task); - chan->ctrl->task = NULL; + /* stop thread when thread is running */ + if (chan->task) { + pr_debug("%s: %s: svc_smc_hvc_shm_thread is stopping\n", + __func__, chan->name); + kthread_stop(chan->task); + chan->task = NULL; } } EXPORT_SYMBOL_GPL(stratix10_svc_done); @@ -1817,8 +1852,8 @@ void *stratix10_svc_allocate_memory(struct stratix10_svc_chan *chan, pmem->paddr = pa; pmem->size = s; list_add_tail(&pmem->node, &svc_data_mem); - pr_debug("%s: va=%p, pa=0x%016x\n", __func__, - pmem->vaddr, (unsigned int)pmem->paddr); + pr_debug("%s: %s: va=%p, pa=0x%016x\n", __func__, + chan->name, pmem->vaddr, (unsigned int)pmem->paddr); return (void *)va; } @@ -1855,6 +1890,13 @@ static const struct of_device_id stratix10_svc_drv_match[] = { {}, }; +static const char * const chan_names[SVC_NUM_CHANNEL] = { + SVC_CLIENT_FPGA, + SVC_CLIENT_RSU, + SVC_CLIENT_FCS, + SVC_CLIENT_HWMON +}; + static int stratix10_svc_drv_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1862,11 +1904,11 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) struct stratix10_svc_chan *chans; struct gen_pool *genpool; struct stratix10_svc_sh_memory *sh_memory; - struct stratix10_svc *svc; + struct stratix10_svc *svc = NULL; svc_invoke_fn *invoke_fn; size_t fifo_size; - int ret; + int ret, i = 0; /* get SMC or HVC function */ invoke_fn = get_invoke_func(dev); @@ -1905,8 +1947,8 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) controller->num_active_client = 0; controller->chans = chans; controller->genpool = genpool; - controller->task = NULL; controller->invoke_fn = invoke_fn; + INIT_LIST_HEAD(&controller->node); init_completion(&controller->complete_status); ret = stratix10_svc_async_init(controller); @@ -1917,32 +1959,20 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) } fifo_size = sizeof(struct stratix10_svc_data) * SVC_NUM_DATA_IN_FIFO; - ret = kfifo_alloc(&controller->svc_fifo, fifo_size, GFP_KERNEL); - if (ret) { - dev_err(dev, "failed to allocate FIFO\n"); - goto err_async_exit; - } - spin_lock_init(&controller->svc_fifo_lock); - - chans[0].scl = NULL; - chans[0].ctrl = controller; - chans[0].name = SVC_CLIENT_FPGA; - spin_lock_init(&chans[0].lock); + mutex_init(&controller->sdm_lock); - chans[1].scl = NULL; - chans[1].ctrl = controller; - chans[1].name = SVC_CLIENT_RSU; - spin_lock_init(&chans[1].lock); - - chans[2].scl = NULL; - chans[2].ctrl = controller; - chans[2].name = SVC_CLIENT_FCS; - spin_lock_init(&chans[2].lock); - - chans[3].scl = NULL; - chans[3].ctrl = controller; - chans[3].name = SVC_CLIENT_HWMON; - spin_lock_init(&chans[3].lock); + for (i = 0; i < SVC_NUM_CHANNEL; i++) { + chans[i].scl = NULL; + chans[i].ctrl = controller; + chans[i].name = (char *)chan_names[i]; + spin_lock_init(&chans[i].lock); + ret = kfifo_alloc(&chans[i].svc_fifo, fifo_size, GFP_KERNEL); + if (ret) { + dev_err(dev, "failed to allocate FIFO %d\n", i); + goto err_free_fifos; + } + spin_lock_init(&chans[i].svc_fifo_lock); + } list_add_tail(&controller->node, &svc_ctrl); platform_set_drvdata(pdev, controller); @@ -1951,7 +1981,7 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) svc = devm_kzalloc(dev, sizeof(*svc), GFP_KERNEL); if (!svc) { ret = -ENOMEM; - goto err_free_kfifo; + goto err_free_fifos; } controller->svc = svc; @@ -1959,51 +1989,43 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) if (!svc->stratix10_svc_rsu) { dev_err(dev, "failed to allocate %s device\n", STRATIX10_RSU); ret = -ENOMEM; - goto err_free_kfifo; + goto err_free_fifos; } ret = platform_device_add(svc->stratix10_svc_rsu); - if (ret) { - platform_device_put(svc->stratix10_svc_rsu); - goto err_free_kfifo; - } - - svc->intel_svc_fcs = platform_device_alloc(INTEL_FCS, 1); - if (!svc->intel_svc_fcs) { - dev_err(dev, "failed to allocate %s device\n", INTEL_FCS); - ret = -ENOMEM; - goto err_unregister_rsu_dev; - } - - ret = platform_device_add(svc->intel_svc_fcs); - if (ret) { - platform_device_put(svc->intel_svc_fcs); - goto err_unregister_rsu_dev; - } + if (ret) + goto err_put_device; ret = of_platform_default_populate(dev_of_node(dev), NULL, dev); if (ret) - goto err_unregister_fcs_dev; + goto err_unregister_rsu_dev; pr_info("Intel Service Layer Driver Initialized\n"); return 0; -err_unregister_fcs_dev: - platform_device_unregister(svc->intel_svc_fcs); err_unregister_rsu_dev: platform_device_unregister(svc->stratix10_svc_rsu); -err_free_kfifo: - kfifo_free(&controller->svc_fifo); -err_async_exit: + goto err_free_fifos; +err_put_device: + platform_device_put(svc->stratix10_svc_rsu); +err_free_fifos: + /* only remove from list if list_add_tail() was reached */ + if (!list_empty(&controller->node)) + list_del(&controller->node); + /* free only the FIFOs that were successfully allocated */ + while (i--) + kfifo_free(&chans[i].svc_fifo); stratix10_svc_async_exit(controller); err_destroy_pool: gen_pool_destroy(genpool); + return ret; } static void stratix10_svc_drv_remove(struct platform_device *pdev) { + int i; struct stratix10_svc_controller *ctrl = platform_get_drvdata(pdev); struct stratix10_svc *svc = ctrl->svc; @@ -2011,14 +2033,16 @@ static void stratix10_svc_drv_remove(struct platform_device *pdev) of_platform_depopulate(ctrl->dev); - platform_device_unregister(svc->intel_svc_fcs); platform_device_unregister(svc->stratix10_svc_rsu); - kfifo_free(&ctrl->svc_fifo); - if (ctrl->task) { - kthread_stop(ctrl->task); - ctrl->task = NULL; + for (i = 0; i < SVC_NUM_CHANNEL; i++) { + if (ctrl->chans[i].task) { + kthread_stop(ctrl->chans[i].task); + ctrl->chans[i].task = NULL; + } + kfifo_free(&ctrl->chans[i].svc_fifo); } + if (ctrl->genpool) gen_pool_destroy(ctrl->genpool); list_del(&ctrl->node); diff --git a/drivers/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 6fc4e3452b88d..ee781d2f0b8e1 100644 --- a/drivers/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -38,8 +38,10 @@ MODULE_DESCRIPTION("GPIB driver for LPVO usb devices"); /* * Table of devices that work with this driver. * - * Currently, only one device is known to be used in the - * lpvo_usb_gpib adapter (FTDI 0403:6001). + * Currently, only one device is known to be used in the lpvo_usb_gpib + * adapter (FTDI 0403:6001) but as this device id is already handled by the + * ftdi_sio USB serial driver the LPVO driver must not bind to it by default. + * * If your adapter uses a different chip, insert a line * in the following table with proper , . * @@ -50,7 +52,6 @@ MODULE_DESCRIPTION("GPIB driver for LPVO usb devices"); */ static const struct usb_device_id skel_table[] = { - { USB_DEVICE(0x0403, 0x6001) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, skel_table); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3e19b51a27638..d8296dfc5e8a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2690,8 +2690,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; default: r = amdgpu_discovery_set_ip_blocks(adev); - if (r) + if (r) { + adev->num_ip_blocks = 0; return r; + } break; } @@ -3247,6 +3249,8 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; if (!adev->ip_blocks[i].status.late_initialized) continue; + if (!adev->ip_blocks[i].version) + continue; /* skip CG for GFX, SDMA on S0ix */ if (adev->in_s0ix && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || @@ -3286,6 +3290,8 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; if (!adev->ip_blocks[i].status.late_initialized) continue; + if (!adev->ip_blocks[i].version) + continue; /* skip PG for GFX, SDMA on S0ix */ if (adev->in_s0ix && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || @@ -3493,6 +3499,8 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].version) + continue; if (!adev->ip_blocks[i].version->funcs->early_fini) continue; @@ -3570,6 +3578,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.sw) continue; + if (!adev->ip_blocks[i].version) + continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { amdgpu_ucode_free_bo(adev); amdgpu_free_static_csa(&adev->virt.csa_obj); @@ -3596,6 +3606,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.late_initialized) continue; + if (!adev->ip_blocks[i].version) + continue; if (adev->ip_blocks[i].version->funcs->late_fini) adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]); adev->ip_blocks[i].status.late_initialized = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 77e2133de5cf9..7f19554b9ad11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -83,7 +83,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); - if (adev == NULL) + if (adev == NULL || !adev->num_ip_blocks) return; amdgpu_unregister_gpu_instance(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index dc8d2f52c7d61..e244c12ceb238 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -368,15 +368,15 @@ struct amdgpu_mode_info { struct drm_property *plane_ctm_property; /** - * @shaper_lut_property: Plane property to set pre-blending shaper LUT - * that converts color content before 3D LUT. If - * plane_shaper_tf_property != Identity TF, AMD color module will + * @plane_shaper_lut_property: Plane property to set pre-blending + * shaper LUT that converts color content before 3D LUT. + * If plane_shaper_tf_property != Identity TF, AMD color module will * combine the user LUT values with pre-defined TF into the LUT * parameters to be programmed. */ struct drm_property *plane_shaper_lut_property; /** - * @shaper_lut_size_property: Plane property for the size of + * @plane_shaper_lut_size_property: Plane property for the size of * pre-blending shaper LUT as supported by the driver (read-only). */ struct drm_property *plane_shaper_lut_size_property; @@ -400,10 +400,10 @@ struct amdgpu_mode_info { */ struct drm_property *plane_lut3d_property; /** - * @plane_degamma_lut_size_property: Plane property to define the max - * size of 3D LUT as supported by the driver (read-only). The max size - * is the max size of one dimension and, therefore, the max number of - * entries for 3D LUT array is the 3D LUT size cubed; + * @plane_lut3d_size_property: Plane property to define the max size + * of 3D LUT as supported by the driver (read-only). The max size is + * the max size of one dimension and, therefore, the max number of + * entries for 3D LUT array is the 3D LUT size cubed. */ struct drm_property *plane_lut3d_size_property; /** diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 5bfa5d1d0b369..023c7345ea548 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -731,6 +731,9 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) int i; struct amdgpu_device *adev = mes->adev; union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; + uint32_t mes_rev = (pipe == AMDGPU_MES_SCHED_PIPE) ? + (mes->sched_version & AMDGPU_MES_VERSION_MASK) : + (mes->kiq_version & AMDGPU_MES_VERSION_MASK); memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); @@ -785,7 +788,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) * handling support, other queue will not use the oversubscribe timer. * handling mode - 0: disabled; 1: basic version; 2: basic+ version */ - mes_set_hw_res_pkt.oversubscription_timer = 50; + mes_set_hw_res_pkt.oversubscription_timer = mes_rev < 0x8b ? 0 : 50; mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; if (amdgpu_mes_log_enable) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 8ea31699d38ba..f5d2847e1cbb4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -593,6 +593,7 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, p->queue_size)) { pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", p->queue_address, p->queue_size); + amdgpu_bo_unreserve(vm->root.bo); return -EFAULT; } diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h index 3711d400773af..4c4e61bc91b50 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h @@ -38,7 +38,11 @@ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0),\ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1),\ SR(DISPCLK_FREQ_CHANGE_CNTL),\ - SR(DC_MEM_GLOBAL_PWR_REQ_CNTL) + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL),\ + SR(MICROSECOND_TIME_BASE_DIV),\ + SR(MILLISECOND_TIME_BASE_DIV),\ + SR(DCCG_GATE_DISABLE_CNTL),\ + SR(DCCG_GATE_DISABLE_CNTL2) #define DCCG_REG_LIST_DCN2() \ DCCG_COMMON_REG_LIST_DCN_BASE(),\ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c index 75c69348027ea..c4d4eea140f3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c @@ -96,6 +96,25 @@ static void dccg21_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppcl dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; } +/* + * On DCN21 S0i3 resume, BIOS programs MICROSECOND_TIME_BASE_DIV to + * 0x00120464 as a marker that golden init has already been done. + * dcn21_s0i3_golden_init_wa() reads this marker later in bios_golden_init() + * to decide whether to skip golden init. + * + * dccg2_init() unconditionally overwrites MICROSECOND_TIME_BASE_DIV to + * 0x00120264, destroying the marker before it can be read. + * + * Guard the call: if the S0i3 marker is present, skip dccg2_init() so the + * WA can function correctly. bios_golden_init() will handle init in that case. + */ +static void dccg21_init(struct dccg *dccg) +{ + if (dccg2_is_s0i3_golden_init_wa_done(dccg)) + return; + + dccg2_init(dccg); +} static const struct dccg_funcs dccg21_funcs = { .update_dpp_dto = dccg21_update_dpp_dto, @@ -103,7 +122,7 @@ static const struct dccg_funcs dccg21_funcs = { .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en, .otg_add_pixel = dccg2_otg_add_pixel, .otg_drop_pixel = dccg2_otg_drop_pixel, - .dccg_init = dccg2_init, + .dccg_init = dccg21_init, .refclk_setup = dccg2_refclk_setup, /* Deprecated - for backward compatibility only */ .allow_clock_gating = dccg2_allow_clock_gating, .enable_memory_low_power = dccg2_enable_memory_low_power, diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h index 067e49cb238ec..e2381ca0be0b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h @@ -34,7 +34,13 @@ DCCG_SRII(DTO_PARAM, DPPCLK, 1),\ DCCG_SRII(DTO_PARAM, DPPCLK, 2),\ DCCG_SRII(DTO_PARAM, DPPCLK, 3),\ - SR(REFCLK_CNTL) + SR(REFCLK_CNTL),\ + SR(DISPCLK_FREQ_CHANGE_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL),\ + SR(MICROSECOND_TIME_BASE_DIV),\ + SR(MILLISECOND_TIME_BASE_DIV),\ + SR(DCCG_GATE_DISABLE_CNTL),\ + SR(DCCG_GATE_DISABLE_CNTL2) #define DCCG_MASK_SH_LIST_DCN301(mask_sh) \ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 0, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h index bf659920d4cc2..b5e3849ef12a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h @@ -64,9 +64,12 @@ SR(DSCCLK1_DTO_PARAM),\ SR(DSCCLK2_DTO_PARAM),\ SR(DSCCLK_DTO_CTRL),\ + SR(DCCG_GATE_DISABLE_CNTL),\ SR(DCCG_GATE_DISABLE_CNTL2),\ SR(DCCG_GATE_DISABLE_CNTL3),\ - SR(HDMISTREAMCLK0_DTO_PARAM) + SR(HDMISTREAMCLK0_DTO_PARAM),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL),\ + SR(MICROSECOND_TIME_BASE_DIV) #define DCCG_MASK_SH_LIST_DCN31(mask_sh) \ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h index a609635f35dbd..ecbdc05f7c459 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h @@ -70,11 +70,14 @@ SR(DSCCLK2_DTO_PARAM),\ SR(DSCCLK3_DTO_PARAM),\ SR(DSCCLK_DTO_CTRL),\ + SR(DCCG_GATE_DISABLE_CNTL),\ SR(DCCG_GATE_DISABLE_CNTL2),\ SR(DCCG_GATE_DISABLE_CNTL3),\ SR(HDMISTREAMCLK0_DTO_PARAM),\ SR(OTG_PIXEL_RATE_DIV),\ - SR(DTBCLK_P_CNTL) + SR(DTBCLK_P_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL),\ + SR(MICROSECOND_TIME_BASE_DIV) #define DCCG_MASK_SH_LIST_DCN314_COMMON(mask_sh) \ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index b32c053950c90..a8d63d4d1f6e2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2222,7 +2222,8 @@ static int smu_v13_0_0_restore_user_od_settings(struct smu_context *smu) user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | BIT(PP_OD_FEATURE_UCLK_BIT) | BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | - BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + BIT(PP_OD_FEATURE_FAN_CURVE_BIT) | + BIT(PP_OD_FEATURE_ZERO_FAN_BIT); res = smu_v13_0_0_upload_overdrive_table(smu, user_od_table); user_od_table->OverDriveTable.FeatureCtrlMask = 0; if (res == 0) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index f08cfa510a8a9..5500a0f12f0e9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2224,7 +2224,8 @@ static int smu_v13_0_7_restore_user_od_settings(struct smu_context *smu) user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | BIT(PP_OD_FEATURE_UCLK_BIT) | BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | - BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + BIT(PP_OD_FEATURE_FAN_CURVE_BIT) | + BIT(PP_OD_FEATURE_ZERO_FAN_BIT); res = smu_v13_0_7_upload_overdrive_table(smu, user_od_table); user_od_table->OverDriveTable.FeatureCtrlMask = 0; if (res == 0) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 9994d4369da8e..73762d9b5969e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -2311,7 +2311,8 @@ static int smu_v14_0_2_restore_user_od_settings(struct smu_context *smu) user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | BIT(PP_OD_FEATURE_UCLK_BIT) | BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | - BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + BIT(PP_OD_FEATURE_FAN_CURVE_BIT) | + BIT(PP_OD_FEATURE_ZERO_FAN_BIT); res = smu_v14_0_2_upload_overdrive_table(smu, user_od_table); user_od_table->OverDriveTable.FeatureCtrlMask = 0; if (res == 0) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index f6736b4457bb9..17a885244e1e1 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -351,9 +351,9 @@ static u8 sn65dsi83_get_dsi_range(struct sn65dsi83 *ctx, * DSI_CLK = mode clock * bpp / dsi_data_lanes / 2 * the 2 is there because the bus is DDR. */ - return DIV_ROUND_UP(clamp((unsigned int)mode->clock * - mipi_dsi_pixel_format_to_bpp(ctx->dsi->format) / - ctx->dsi->lanes / 2, 40000U, 500000U), 5000U); + return clamp((unsigned int)mode->clock * + mipi_dsi_pixel_format_to_bpp(ctx->dsi->format) / + ctx->dsi->lanes / 2, 40000U, 500000U) / 5000U; } static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx) @@ -517,6 +517,7 @@ static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, struct drm_atomic_state *state) { struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); + const unsigned int dual_factor = ctx->lvds_dual_link ? 2 : 1; const struct drm_bridge_state *bridge_state; const struct drm_crtc_state *crtc_state; const struct drm_display_mode *mode; @@ -653,18 +654,18 @@ static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, /* 32 + 1 pixel clock to ensure proper operation */ le16val = cpu_to_le16(32 + 1); regmap_bulk_write(ctx->regmap, REG_VID_CHA_SYNC_DELAY_LOW, &le16val, 2); - le16val = cpu_to_le16(mode->hsync_end - mode->hsync_start); + le16val = cpu_to_le16((mode->hsync_end - mode->hsync_start) / dual_factor); regmap_bulk_write(ctx->regmap, REG_VID_CHA_HSYNC_PULSE_WIDTH_LOW, &le16val, 2); le16val = cpu_to_le16(mode->vsync_end - mode->vsync_start); regmap_bulk_write(ctx->regmap, REG_VID_CHA_VSYNC_PULSE_WIDTH_LOW, &le16val, 2); regmap_write(ctx->regmap, REG_VID_CHA_HORIZONTAL_BACK_PORCH, - mode->htotal - mode->hsync_end); + (mode->htotal - mode->hsync_end) / dual_factor); regmap_write(ctx->regmap, REG_VID_CHA_VERTICAL_BACK_PORCH, mode->vtotal - mode->vsync_end); regmap_write(ctx->regmap, REG_VID_CHA_HORIZONTAL_FRONT_PORCH, - mode->hsync_start - mode->hdisplay); + (mode->hsync_start - mode->hdisplay) / dual_factor); regmap_write(ctx->regmap, REG_VID_CHA_VERTICAL_FRONT_PORCH, mode->vsync_start - mode->vdisplay); regmap_write(ctx->regmap, REG_VID_CHA_TEST_PATTERN, 0x00); diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c index d0122d4776100..17c2dead2c13b 100644 --- a/drivers/gpu/drm/gud/gud_drv.c +++ b/drivers/gpu/drm/gud/gud_drv.c @@ -339,7 +339,9 @@ static int gud_stats_debugfs(struct seq_file *m, void *data) } static const struct drm_crtc_helper_funcs gud_crtc_helper_funcs = { - .atomic_check = drm_crtc_helper_atomic_check + .atomic_check = drm_crtc_helper_atomic_check, + .atomic_enable = gud_crtc_atomic_enable, + .atomic_disable = gud_crtc_atomic_disable, }; static const struct drm_crtc_funcs gud_crtc_funcs = { @@ -364,6 +366,10 @@ static const struct drm_plane_funcs gud_plane_funcs = { DRM_GEM_SHADOW_PLANE_FUNCS, }; +static const struct drm_mode_config_helper_funcs gud_mode_config_helpers = { + .atomic_commit_tail = drm_atomic_helper_commit_tail_rpm, +}; + static const struct drm_mode_config_funcs gud_mode_config_funcs = { .fb_create = drm_gem_fb_create_with_dirty, .atomic_check = drm_atomic_helper_check, @@ -499,6 +505,7 @@ static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) drm->mode_config.min_height = le32_to_cpu(desc.min_height); drm->mode_config.max_height = le32_to_cpu(desc.max_height); drm->mode_config.funcs = &gud_mode_config_funcs; + drm->mode_config.helper_private = &gud_mode_config_helpers; /* Format init */ formats_dev = devm_kmalloc(dev, GUD_FORMATS_MAX_NUM, GFP_KERNEL); diff --git a/drivers/gpu/drm/gud/gud_internal.h b/drivers/gpu/drm/gud/gud_internal.h index d27c31648341c..8eec8335f5f90 100644 --- a/drivers/gpu/drm/gud/gud_internal.h +++ b/drivers/gpu/drm/gud/gud_internal.h @@ -62,6 +62,10 @@ int gud_usb_set_u8(struct gud_device *gdrm, u8 request, u8 val); void gud_clear_damage(struct gud_device *gdrm); void gud_flush_work(struct work_struct *work); +void gud_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state); +void gud_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state); int gud_plane_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state); void gud_plane_atomic_update(struct drm_plane *plane, diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c index 4b77be94348d9..b355bf4d33895 100644 --- a/drivers/gpu/drm/gud/gud_pipe.c +++ b/drivers/gpu/drm/gud/gud_pipe.c @@ -580,6 +580,39 @@ int gud_plane_atomic_check(struct drm_plane *plane, return ret; } +void gud_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_device *drm = crtc->dev; + struct gud_device *gdrm = to_gud_device(drm); + int idx; + + if (!drm_dev_enter(drm, &idx)) + return; + + gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 1); + gud_usb_set(gdrm, GUD_REQ_SET_STATE_COMMIT, 0, NULL, 0); + gud_usb_set_u8(gdrm, GUD_REQ_SET_DISPLAY_ENABLE, 1); + + drm_dev_exit(idx); +} + +void gud_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_device *drm = crtc->dev; + struct gud_device *gdrm = to_gud_device(drm); + int idx; + + if (!drm_dev_enter(drm, &idx)) + return; + + gud_usb_set_u8(gdrm, GUD_REQ_SET_DISPLAY_ENABLE, 0); + gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 0); + + drm_dev_exit(idx); +} + void gud_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *atomic_state) { @@ -607,24 +640,12 @@ void gud_plane_atomic_update(struct drm_plane *plane, mutex_unlock(&gdrm->damage_lock); } - if (!drm_dev_enter(drm, &idx)) + if (!crtc || !drm_dev_enter(drm, &idx)) return; - if (!old_state->fb) - gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 1); - - if (fb && (crtc->state->mode_changed || crtc->state->connectors_changed)) - gud_usb_set(gdrm, GUD_REQ_SET_STATE_COMMIT, 0, NULL, 0); - - if (crtc->state->active_changed) - gud_usb_set_u8(gdrm, GUD_REQ_SET_DISPLAY_ENABLE, crtc->state->active); - - if (!fb) - goto ctrl_disable; - ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) - goto ctrl_disable; + goto out; drm_atomic_helper_damage_iter_init(&iter, old_state, new_state); drm_atomic_for_each_plane_damage(&iter, &damage) @@ -632,9 +653,6 @@ void gud_plane_atomic_update(struct drm_plane *plane, drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); -ctrl_disable: - if (!crtc->state->enable) - gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 0); - +out: drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index 07ffee38974b0..f4f1b68f75435 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -43,12 +43,6 @@ bool intel_alpm_is_alpm_aux_less(struct intel_dp *intel_dp, void intel_alpm_init(struct intel_dp *intel_dp) { - u8 dpcd; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, &dpcd) < 0) - return; - - intel_dp->alpm_dpcd = dpcd; mutex_init(&intel_dp->alpm.lock); } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3b8ba8ab76a1b..c4246481fc2fe 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1614,7 +1614,6 @@ static void hsw_configure_cpu_transcoder(const struct intel_crtc_state *crtc_sta } intel_set_transcoder_timings(crtc_state); - intel_vrr_set_transcoder_timings(crtc_state); if (cpu_transcoder != TRANSCODER_EDP) intel_de_write(display, TRANS_MULT(display, cpu_transcoder), diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 559cf3bb23fdf..696edf40b2436 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4577,6 +4577,7 @@ static bool intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector) { struct intel_display *display = to_intel_display(intel_dp); + int ret; /* this function is meant to be called only once */ drm_WARN_ON(display->drm, intel_dp->dpcd[DP_DPCD_REV] != 0); @@ -4616,6 +4617,12 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector */ intel_dp_init_source_oui(intel_dp); + /* Read the ALPM DPCD caps */ + ret = drm_dp_dpcd_read_byte(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, + &intel_dp->alpm_dpcd); + if (ret < 0) + return false; + /* * This has to be called after intel_dp->edp_dpcd is filled, PSR checks * for SET_POWER_CAPABLE bit in intel_dp->edp_dpcd[1] diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 4ce1173a2e91b..b7302a32ded46 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -2619,6 +2619,12 @@ void intel_psr2_program_trans_man_trk_ctl(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PIPE_SRCSZ_ERLY_TPT(crtc->pipe), crtc_state->pipe_srcsz_early_tpt); + + if (!crtc_state->dsc.compression_enable) + return; + + intel_dsc_su_et_parameters_configure(dsb, encoder, crtc_state, + drm_rect_height(&crtc_state->psr2_su_area)); } static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, @@ -2689,11 +2695,12 @@ static void clip_area_update(struct drm_rect *overlap_damage_area, overlap_damage_area->y2 = damage_area->y2; } -static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_state) +static bool intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; u16 y_alignment; + bool su_area_changed = false; /* ADLP aligns the SU region to vdsc slice height in case dsc is enabled */ if (crtc_state->dsc.compression_enable && @@ -2702,10 +2709,18 @@ static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_st else y_alignment = crtc_state->su_y_granularity; - crtc_state->psr2_su_area.y1 -= crtc_state->psr2_su_area.y1 % y_alignment; - if (crtc_state->psr2_su_area.y2 % y_alignment) + if (crtc_state->psr2_su_area.y1 % y_alignment) { + crtc_state->psr2_su_area.y1 -= crtc_state->psr2_su_area.y1 % y_alignment; + su_area_changed = true; + } + + if (crtc_state->psr2_su_area.y2 % y_alignment) { crtc_state->psr2_su_area.y2 = ((crtc_state->psr2_su_area.y2 / y_alignment) + 1) * y_alignment; + su_area_changed = true; + } + + return su_area_changed; } /* @@ -2839,7 +2854,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct intel_plane_state *new_plane_state, *old_plane_state; struct intel_plane *plane; - bool full_update = false, cursor_in_su_area = false; + bool full_update = false, su_area_changed; int i, ret; if (!crtc_state->enable_psr2_sel_fetch) @@ -2946,15 +2961,32 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, if (ret) return ret; - /* - * Adjust su area to cover cursor fully as necessary (early - * transport). This needs to be done after - * drm_atomic_add_affected_planes to ensure visible cursor is added into - * affected planes even when cursor is not updated by itself. - */ - intel_psr2_sel_fetch_et_alignment(state, crtc, &cursor_in_su_area); + do { + bool cursor_in_su_area; - intel_psr2_sel_fetch_pipe_alignment(crtc_state); + /* + * Adjust su area to cover cursor fully as necessary + * (early transport). This needs to be done after + * drm_atomic_add_affected_planes to ensure visible + * cursor is added into affected planes even when + * cursor is not updated by itself. + */ + intel_psr2_sel_fetch_et_alignment(state, crtc, &cursor_in_su_area); + + su_area_changed = intel_psr2_sel_fetch_pipe_alignment(crtc_state); + + /* + * If the cursor was outside the SU area before + * alignment, the alignment step (which only expands + * SU) may pull the cursor partially inside, so we + * must run ET alignment again to fully cover it. But + * if the cursor was already fully inside before + * alignment, expanding the SU area won't change that, + * so no further work is needed. + */ + if (cursor_in_su_area) + break; + } while (su_area_changed); /* * Now that we have the pipe damaged area check if it intersect with @@ -3014,6 +3046,10 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, } skip_sel_fetch_set_loop: + if (full_update) + clip_area_update(&crtc_state->psr2_su_area, &crtc_state->pipe_src, + &crtc_state->pipe_src); + psr2_man_trk_ctl_calc(crtc_state, full_update); crtc_state->pipe_srcsz_early_tpt = psr2_pipe_srcsz_early_tpt_calc(crtc_state, full_update); diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 5493082f30a75..2065dac1e3fdb 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -767,6 +767,29 @@ void intel_dsc_dp_pps_write(struct intel_encoder *encoder, sizeof(dp_dsc_pps_sdp)); } +void intel_dsc_su_et_parameters_configure(struct intel_dsb *dsb, struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, int su_lines) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + enum pipe pipe = crtc->pipe; + int vdsc_instances_per_pipe = intel_dsc_get_vdsc_per_pipe(crtc_state); + int slice_row_per_frame = su_lines / vdsc_cfg->slice_height; + u32 val; + + drm_WARN_ON_ONCE(display->drm, su_lines % vdsc_cfg->slice_height); + drm_WARN_ON_ONCE(display->drm, vdsc_instances_per_pipe > 2); + + val = DSC_SUPS0_SU_SLICE_ROW_PER_FRAME(slice_row_per_frame); + val |= DSC_SUPS0_SU_PIC_HEIGHT(su_lines); + + intel_de_write_dsb(display, dsb, LNL_DSC0_SU_PARAMETER_SET_0(pipe), val); + + if (vdsc_instances_per_pipe == 2) + intel_de_write_dsb(display, dsb, LNL_DSC1_SU_PARAMETER_SET_0(pipe), val); +} + static i915_reg_t dss_ctl1_reg(struct intel_crtc *crtc, enum transcoder cpu_transcoder) { return is_pipe_dsc(crtc, cpu_transcoder) ? diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h index 99f64ac54b273..99bb9042592a4 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc.h @@ -13,6 +13,7 @@ struct drm_printer; enum transcoder; struct intel_crtc; struct intel_crtc_state; +struct intel_dsb; struct intel_encoder; bool intel_dsc_source_support(const struct intel_crtc_state *crtc_state); @@ -31,6 +32,8 @@ void intel_dsc_dsi_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_dsc_dp_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); +void intel_dsc_su_et_parameters_configure(struct intel_dsb *dsb, struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, int su_lines); void intel_vdsc_state_dump(struct drm_printer *p, int indent, const struct intel_crtc_state *crtc_state); int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h index 2d478a84b07c4..2b2e3c1b8138a 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h @@ -196,6 +196,18 @@ #define DSC_PPS18_NSL_BPG_OFFSET(offset) REG_FIELD_PREP(DSC_PPS18_NSL_BPG_OFFSET_MASK, offset) #define DSC_PPS18_SL_OFFSET_ADJ(offset) REG_FIELD_PREP(DSC_PPS18_SL_OFFSET_ADJ_MASK, offset) +#define _LNL_DSC0_SU_PARAMETER_SET_0_PA 0x78064 +#define _LNL_DSC1_SU_PARAMETER_SET_0_PA 0x78164 +#define _LNL_DSC0_SU_PARAMETER_SET_0_PB 0x78264 +#define _LNL_DSC1_SU_PARAMETER_SET_0_PB 0x78364 +#define LNL_DSC0_SU_PARAMETER_SET_0(pipe) _MMIO_PIPE((pipe), _LNL_DSC0_SU_PARAMETER_SET_0_PA, _LNL_DSC0_SU_PARAMETER_SET_0_PB) +#define LNL_DSC1_SU_PARAMETER_SET_0(pipe) _MMIO_PIPE((pipe), _LNL_DSC1_SU_PARAMETER_SET_0_PA, _LNL_DSC1_SU_PARAMETER_SET_0_PB) + +#define DSC_SUPS0_SU_SLICE_ROW_PER_FRAME_MASK REG_GENMASK(31, 20) +#define DSC_SUPS0_SU_SLICE_ROW_PER_FRAME(rows) REG_FIELD_PREP(DSC_SUPS0_SU_SLICE_ROW_PER_FRAME_MASK, (rows)) +#define DSC_SUPS0_SU_PIC_HEIGHT_MASK REG_GENMASK(15, 0) +#define DSC_SUPS0_SU_PIC_HEIGHT(h) REG_FIELD_PREP(DSC_SUPS0_SU_PIC_HEIGHT_MASK, (h)) + /* Icelake Rate Control Buffer Threshold Registers */ #define DSCA_RC_BUF_THRESH_0 _MMIO(0x6B230) #define DSCA_RC_BUF_THRESH_0_UDW _MMIO(0x6B230 + 4) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index db74744ddb31a..bea0057523271 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -597,6 +597,18 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) if (!HAS_VRR(display)) return; + /* + * Bspec says: + * "(note: VRR needs to be programmed after + * TRANS_DDI_FUNC_CTL and before TRANS_CONF)." + * + * In practice it turns out that ICL can hang if + * TRANS_VRR_VMAX/FLIPLINE are written before + * enabling TRANS_DDI_FUNC_CTL. + */ + drm_WARN_ON(display->drm, + !(intel_de_read(display, TRANS_DDI_FUNC_CTL(display, cpu_transcoder)) & TRANS_DDI_FUNC_ENABLE)); + /* * This bit seems to have two meanings depending on the platform: * TGL: generate VRR "safe window" for DSB vblank waits @@ -939,6 +951,8 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); + intel_vrr_set_transcoder_timings(crtc_state); + if (!intel_vrr_possible(crtc_state)) return; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index c6c64ba29bc42..720a9ad39aa2a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -153,8 +153,12 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, } } while (1); - nr_pages = min_t(unsigned long, - folio_nr_pages(folio), page_count - i); + nr_pages = min_array(((unsigned long[]) { + folio_nr_pages(folio), + page_count - i, + max_segment / PAGE_SIZE, + }), 3); + if (!i || sg->length >= max_segment || folio_pfn(folio) != next_pfn) { @@ -164,7 +168,9 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, st->nents++; sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0); } else { - /* XXX: could overflow? */ + nr_pages = min_t(unsigned long, nr_pages, + (max_segment - sg->length) / PAGE_SIZE); + sg->length += nr_pages * PAGE_SIZE; } next_pfn = folio_pfn(folio) + nr_pages; diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c index d77b4774d414f..e2225c5ba6478 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c @@ -78,7 +78,7 @@ static void a2xx_gpummu_destroy(struct msm_mmu *mmu) { struct a2xx_gpummu *gpummu = to_a2xx_gpummu(mmu); - dma_free_attrs(mmu->dev, TABLE_SIZE, gpummu->table, gpummu->pt_base, + dma_free_attrs(mmu->dev, TABLE_SIZE + 32, gpummu->table, gpummu->pt_base, DMA_ATTR_FORCE_CONTIGUOUS); kfree(gpummu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 550a53a7865eb..38561f26837e3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -1759,7 +1759,7 @@ static const u32 x285_protect_regs[] = { A6XX_PROTECT_NORDWR(0x27c06, 0x0000), }; -DECLARE_ADRENO_PROTECT(x285_protect, 64); +DECLARE_ADRENO_PROTECT(x285_protect, 15); static const struct adreno_reglist_pipe a840_nonctxt_regs[] = { { REG_A8XX_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, @@ -1966,5 +1966,4 @@ static inline __always_unused void __build_asserts(void) BUILD_BUG_ON(a660_protect.count > a660_protect.count_max); BUILD_BUG_ON(a690_protect.count > a690_protect.count_max); BUILD_BUG_ON(a730_protect.count > a730_protect.count_max); - BUILD_BUG_ON(a840_protect.count > a840_protect.count_max); } diff --git a/drivers/gpu/drm/msm/adreno/a8xx_gpu.c b/drivers/gpu/drm/msm/adreno/a8xx_gpu.c index 5a320f5bde41a..b1887e0cf6983 100644 --- a/drivers/gpu/drm/msm/adreno/a8xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a8xx_gpu.c @@ -310,11 +310,21 @@ static void a8xx_set_ubwc_config(struct msm_gpu *gpu) hbb = cfg->highest_bank_bit - 13; hbb_hi = hbb >> 2; hbb_lo = hbb & 3; - a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); - a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); + + a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, + hbb << 5 | + level3_swizzling_dis << 4 | + level2_swizzling_dis << 3); + + a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, + hbb << 5 | + level3_swizzling_dis << 4 | + level2_swizzling_dis << 3); a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL, yuvnotcomptofc << 6 | + level3_swizzling_dis << 5 | + level2_swizzling_dis << 4 | hbb_hi << 3 | hbb_lo << 1); diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 554d746f115b2..4edfe80c5be7c 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -302,6 +302,7 @@ static const struct of_device_id dt_match[] = { { .compatible = "qcom,kgsl-3d0" }, {} }; +MODULE_DEVICE_TABLE(of, dt_match); static int adreno_runtime_resume(struct device *dev) { diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h index 303d33dc7783a..9f2bceca1789e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h @@ -133,7 +133,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { static const struct dpu_lm_cfg sc8280xp_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -141,7 +141,7 @@ static const struct dpu_lm_cfg sc8280xp_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -149,7 +149,7 @@ static const struct dpu_lm_cfg sc8280xp_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -157,7 +157,7 @@ static const struct dpu_lm_cfg sc8280xp_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -165,14 +165,14 @@ static const struct dpu_lm_cfg sc8280xp_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index b09a6af4c474a..04b22167f93d6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -134,7 +134,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { static const struct dpu_lm_cfg sm8450_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -142,7 +142,7 @@ static const struct dpu_lm_cfg sm8450_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -150,7 +150,7 @@ static const struct dpu_lm_cfg sm8450_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -158,7 +158,7 @@ static const struct dpu_lm_cfg sm8450_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -166,14 +166,14 @@ static const struct dpu_lm_cfg sm8450_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h index 0f7b4a224e4c9..42cf3bd5a12ad 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h @@ -366,8 +366,8 @@ static const struct dpu_intf_cfg sa8775p_intf[] = { .type = INTF_NONE, .controller_id = MSM_DP_CONTROLLER_0, /* pair with intf_0 for DP MST */ .prog_fetch_lines_worst_case = 24, - .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), - .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), }, { .name = "intf_7", .id = INTF_7, .base = 0x3b000, .len = 0x280, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index 465b6460f8754..4c7eb55d474c5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sm8550_sspp[] = { static const struct dpu_lm_cfg sm8550_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -139,7 +139,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -147,7 +147,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -155,7 +155,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -163,14 +163,14 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h index 6caa7d40f3688..dec83ea8167d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sar2130p_sspp[] = { static const struct dpu_lm_cfg sar2130p_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -139,7 +139,7 @@ static const struct dpu_lm_cfg sar2130p_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -147,7 +147,7 @@ static const struct dpu_lm_cfg sar2130p_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -155,7 +155,7 @@ static const struct dpu_lm_cfg sar2130p_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -163,14 +163,14 @@ static const struct dpu_lm_cfg sar2130p_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h index 7243eebb85f36..52ff4baa668a4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h @@ -130,7 +130,7 @@ static const struct dpu_sspp_cfg x1e80100_sspp[] = { static const struct dpu_lm_cfg x1e80100_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -138,7 +138,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -146,7 +146,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -154,7 +154,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -162,14 +162,14 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index 188ee0af2c90f..23dcbe1ce1b83 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -89,7 +89,7 @@ static void dpu_setup_dspp_gc(struct dpu_hw_dspp *ctx, base = ctx->cap->sblk->gc.base; if (!base) { - DRM_ERROR("invalid ctx %pK gc base\n", ctx); + DRM_ERROR("invalid ctx %p gc base\n", ctx); return; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp_v13.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp_v13.c index e65f1fc026fdc..f8f96ad971d78 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp_v13.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp_v13.c @@ -156,11 +156,13 @@ static void dpu_hw_sspp_setup_pe_config_v13(struct dpu_hw_sspp *ctx, u8 color; u32 lr_pe[4], tb_pe[4]; const u32 bytemask = 0xff; - u32 offset = ctx->cap->sblk->sspp_rec0_blk.base; + u32 offset; if (!ctx || !pe_ext) return; + offset = ctx->cap->sblk->sspp_rec0_blk.base; + c = &ctx->hw; /* program SW pixel extension override for all pipes*/ for (color = 0; color < DPU_MAX_PLANES; color++) { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 451a4fcf3e653..7e77d88f89592 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -350,26 +350,28 @@ static bool _dpu_rm_check_lm_and_get_connected_blks(struct dpu_rm *rm, return true; } -static bool dpu_rm_find_lms(struct dpu_rm *rm, - struct dpu_global_state *global_state, - uint32_t crtc_id, bool skip_dspp, - struct msm_display_topology *topology, - int *lm_idx, int *pp_idx, int *dspp_idx) +static int _dpu_rm_reserve_lms(struct dpu_rm *rm, + struct dpu_global_state *global_state, + uint32_t crtc_id, + struct msm_display_topology *topology) { + int lm_idx[MAX_BLOCKS]; + int pp_idx[MAX_BLOCKS]; + int dspp_idx[MAX_BLOCKS] = {0}; int i, lm_count = 0; + if (!topology->num_lm) { + DPU_ERROR("zero LMs in topology\n"); + return -EINVAL; + } + /* Find a primary mixer */ for (i = 0; i < ARRAY_SIZE(rm->mixer_blks) && lm_count < topology->num_lm; i++) { if (!rm->mixer_blks[i]) continue; - if (skip_dspp && to_dpu_hw_mixer(rm->mixer_blks[i])->cap->dspp) { - DPU_DEBUG("Skipping LM_%d, skipping LMs with DSPPs\n", i); - continue; - } - /* * Reset lm_count to an even index. This will drop the previous * primary mixer if failed to find its peer. @@ -408,38 +410,12 @@ static bool dpu_rm_find_lms(struct dpu_rm *rm, } } - return lm_count == topology->num_lm; -} - -static int _dpu_rm_reserve_lms(struct dpu_rm *rm, - struct dpu_global_state *global_state, - uint32_t crtc_id, - struct msm_display_topology *topology) - -{ - int lm_idx[MAX_BLOCKS]; - int pp_idx[MAX_BLOCKS]; - int dspp_idx[MAX_BLOCKS] = {0}; - int i; - bool found; - - if (!topology->num_lm) { - DPU_ERROR("zero LMs in topology\n"); - return -EINVAL; - } - - /* Try using non-DSPP LM blocks first */ - found = dpu_rm_find_lms(rm, global_state, crtc_id, !topology->num_dspp, - topology, lm_idx, pp_idx, dspp_idx); - if (!found && !topology->num_dspp) - found = dpu_rm_find_lms(rm, global_state, crtc_id, false, - topology, lm_idx, pp_idx, dspp_idx); - if (!found) { + if (lm_count != topology->num_lm) { DPU_DEBUG("unable to find appropriate mixers\n"); return -ENAVAIL; } - for (i = 0; i < topology->num_lm; i++) { + for (i = 0; i < lm_count; i++) { global_state->mixer_to_crtc_id[lm_idx[i]] = crtc_id; global_state->pingpong_to_crtc_id[pp_idx[i]] = crtc_id; global_state->dspp_to_crtc_id[dspp_idx[i]] = diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index e0de545d40775..db6da99375a18 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -584,13 +584,30 @@ void dsi_link_clk_disable_v2(struct msm_dsi_host *msm_host) * FIXME: Reconsider this if/when CMD mode handling is rewritten to use * transfer time and data overhead as a starting point of the calculations. */ -static unsigned long dsi_adjust_pclk_for_compression(const struct drm_display_mode *mode, - const struct drm_dsc_config *dsc) +static unsigned long +dsi_adjust_pclk_for_compression(const struct drm_display_mode *mode, + const struct drm_dsc_config *dsc, + bool is_bonded_dsi) { - int new_hdisplay = DIV_ROUND_UP(mode->hdisplay * drm_dsc_get_bpp_int(dsc), - dsc->bits_per_component * 3); + int hdisplay, new_hdisplay, new_htotal; - int new_htotal = mode->htotal - mode->hdisplay + new_hdisplay; + /* + * For bonded DSI, split hdisplay across two links and round up each + * half separately, passing the full hdisplay would only round up once. + * This also aligns with the hdisplay we program later in + * dsi_timing_setup() + */ + hdisplay = mode->hdisplay; + if (is_bonded_dsi) + hdisplay /= 2; + + new_hdisplay = DIV_ROUND_UP(hdisplay * drm_dsc_get_bpp_int(dsc), + dsc->bits_per_component * 3); + + if (is_bonded_dsi) + new_hdisplay *= 2; + + new_htotal = mode->htotal - mode->hdisplay + new_hdisplay; return mult_frac(mode->clock * 1000u, new_htotal, mode->htotal); } @@ -603,7 +620,7 @@ static unsigned long dsi_get_pclk_rate(const struct drm_display_mode *mode, pclk_rate = mode->clock * 1000u; if (dsc) - pclk_rate = dsi_adjust_pclk_for_compression(mode, dsc); + pclk_rate = dsi_adjust_pclk_for_compression(mode, dsc, is_bonded_dsi); /* * For bonded DSI mode, the current DRM mode has the complete width of the @@ -993,7 +1010,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) if (msm_host->dsc) { struct drm_dsc_config *dsc = msm_host->dsc; - u32 bytes_per_pclk; + u32 bits_per_pclk; /* update dsc params with timing params */ if (!dsc || !mode->hdisplay || !mode->vdisplay) { @@ -1015,7 +1032,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) /* * DPU sends 3 bytes per pclk cycle to DSI. If widebus is - * enabled, bus width is extended to 6 bytes. + * enabled, MDP always sends out 48-bit compressed data per + * pclk and on average, DSI consumes an amount of compressed + * data equivalent to the uncompressed pixel depth per pclk. * * Calculate the number of pclks needed to transmit one line of * the compressed data. @@ -1027,12 +1046,12 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) * unused anyway. */ h_total -= hdisplay; - if (wide_bus_enabled && !(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO)) - bytes_per_pclk = 6; + if (wide_bus_enabled) + bits_per_pclk = mipi_dsi_pixel_format_to_bpp(msm_host->format); else - bytes_per_pclk = 3; + bits_per_pclk = 24; - hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), bytes_per_pclk); + hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc) * 8, bits_per_pclk); h_total += hdisplay; ha_end = ha_start + hdisplay; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 8cb0db3a98808..01182442dfd61 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -51,8 +51,8 @@ #define DSI_PHY_7NM_QUIRK_V4_3 BIT(3) /* Hardware is V5.2 */ #define DSI_PHY_7NM_QUIRK_V5_2 BIT(4) -/* Hardware is V7.0 */ -#define DSI_PHY_7NM_QUIRK_V7_0 BIT(5) +/* Hardware is V7.2 */ +#define DSI_PHY_7NM_QUIRK_V7_2 BIT(5) struct dsi_pll_config { bool enable_ssc; @@ -143,7 +143,7 @@ static void dsi_pll_calc_dec_frac(struct dsi_pll_7nm *pll, struct dsi_pll_config if (pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_PRE_V4_1) { config->pll_clock_inverters = 0x28; - } else if ((pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + } else if ((pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { if (pll_freq < 163000000ULL) config->pll_clock_inverters = 0xa0; else if (pll_freq < 175000000ULL) @@ -284,7 +284,7 @@ static void dsi_pll_config_hzindep_reg(struct dsi_pll_7nm *pll) } if ((pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2) || - (pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + (pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { if (pll->vco_current_rate < 1557000000ULL) vco_config_1 = 0x08; else @@ -699,7 +699,7 @@ static int dsi_7nm_set_usecase(struct msm_dsi_phy *phy) case MSM_DSI_PHY_MASTER: pll_7nm->slave = pll_7nm_list[(pll_7nm->phy->id + 1) % DSI_MAX]; /* v7.0: Enable ATB_EN0 and alternate clock output to external phy */ - if (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0) + if (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2) writel(0x07, base + REG_DSI_7nm_PHY_CMN_CTRL_5); break; case MSM_DSI_PHY_SLAVE: @@ -987,7 +987,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, /* Request for REFGEN READY */ if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_3) || (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2) || - (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { writel(0x1, phy->base + REG_DSI_7nm_PHY_CMN_GLBL_DIGTOP_SPARE10); udelay(500); } @@ -1021,7 +1021,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, lane_ctrl0 = 0x1f; } - if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { if (phy->cphy_mode) { /* TODO: different for second phy */ vreg_ctrl_0 = 0x57; @@ -1097,7 +1097,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, /* program CMN_CTRL_4 for minor_ver 2 chipsets*/ if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2) || - (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0) || + (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2) || (readl(base + REG_DSI_7nm_PHY_CMN_REVISION_ID0) & (0xf0)) == 0x20) writel(0x04, base + REG_DSI_7nm_PHY_CMN_CTRL_4); @@ -1213,7 +1213,7 @@ static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) /* Turn off REFGEN Vote */ if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_3) || (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2) || - (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { writel(0x0, base + REG_DSI_7nm_PHY_CMN_GLBL_DIGTOP_SPARE10); wmb(); /* Delay to ensure HW removes vote before PHY shut down */ @@ -1502,7 +1502,7 @@ const struct msm_dsi_phy_cfg dsi_phy_3nm_8750_cfgs = { #endif .io_start = { 0xae95000, 0xae97000 }, .num_dsi_phy = 2, - .quirks = DSI_PHY_7NM_QUIRK_V7_0, + .quirks = DSI_PHY_7NM_QUIRK_V7_2, }; const struct msm_dsi_phy_cfg dsi_phy_3nm_kaanapali_cfgs = { @@ -1525,5 +1525,5 @@ const struct msm_dsi_phy_cfg dsi_phy_3nm_kaanapali_cfgs = { #endif .io_start = { 0x9ac1000, 0x9ac4000 }, .num_dsi_phy = 2, - .quirks = DSI_PHY_7NM_QUIRK_V7_0, + .quirks = DSI_PHY_7NM_QUIRK_V7_2, }; diff --git a/drivers/gpu/drm/sitronix/st7586.c b/drivers/gpu/drm/sitronix/st7586.c index b57ebf37a664c..16b6b4e368af8 100644 --- a/drivers/gpu/drm/sitronix/st7586.c +++ b/drivers/gpu/drm/sitronix/st7586.c @@ -347,6 +347,12 @@ static int st7586_probe(struct spi_device *spi) if (ret) return ret; + /* + * Override value set by mipi_dbi_spi_init(). This driver is a bit + * non-standard, so best to set it explicitly here. + */ + dbi->write_memory_bpw = 8; + /* Cannot read from this controller via SPI */ dbi->read_commands = NULL; @@ -356,15 +362,6 @@ static int st7586_probe(struct spi_device *spi) if (ret) return ret; - /* - * we are using 8-bit data, so we are not actually swapping anything, - * but setting mipi->swap_bytes makes mipi_dbi_typec3_command() do the - * right thing and not use 16-bit transfers (which results in swapped - * bytes on little-endian systems and causes out of order data to be - * sent to the display). - */ - dbi->swap_bytes = true; - drm_mode_config_reset(drm); ret = drm_dev_register(drm, 0); diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 174feaca0a6b9..c69adaa92bbe7 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -47,16 +47,12 @@ struct PteArray([u64; NUM_ENTRIES]); unsafe impl AsBytes for PteArray {} impl PteArray { - /// Creates a new page table array mapping `NUM_PAGES` GSP pages starting at address `start`. - fn new(start: DmaAddress) -> Result { - let mut ptes = [0u64; NUM_PAGES]; - for (i, pte) in ptes.iter_mut().enumerate() { - *pte = start - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) - .ok_or(EOVERFLOW)?; - } - - Ok(Self(ptes)) + /// Returns the page table entry for `index`, for a mapping starting at `start`. + // TODO: Replace with `IoView` projection once available. + fn entry(start: DmaAddress, index: usize) -> Result { + start + .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW) } } @@ -86,16 +82,22 @@ impl LogBuffer { NUM_PAGES * GSP_PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, )?); - let ptes = PteArray::::new(obj.0.dma_handle())?; + + let start_addr = obj.0.dma_handle(); // SAFETY: `obj` has just been created and we are its sole user. - unsafe { - // Copy the self-mapping PTE at the expected location. + let pte_region = unsafe { obj.0 - .as_slice_mut(size_of::(), size_of_val(&ptes))? - .copy_from_slice(ptes.as_bytes()) + .as_slice_mut(size_of::(), NUM_PAGES * size_of::())? }; + // Write values one by one to avoid an on-stack instance of `PteArray`. + for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).enumerate() { + let pte_value = PteArray::<0>::entry(start_addr, i)?; + + chunk.copy_from_slice(&pte_value.to_ne_bytes()); + } + Ok(obj) } } @@ -143,14 +145,14 @@ impl Gsp { // _kgspInitLibosLoggingStructures (allocates memory for buffers) // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) dma_write!( - libos[0] = LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0) - )?; + libos, [0]?, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0) + ); dma_write!( - libos[1] = LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0) - )?; - dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?; - dma_write!(rmargs[0].inner = fw::GspArgumentsCached::new(cmdq))?; - dma_write!(libos[3] = LibosMemoryRegionInitArgument::new("RMARGS", rmargs))?; + libos, [1]?, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0) + ); + dma_write!(libos, [2]?, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0)); + dma_write!(rmargs, [0]?.inner, fw::GspArgumentsCached::new(cmdq)); + dma_write!(libos, [3]?, LibosMemoryRegionInitArgument::new("RMARGS", rmargs)); }, })) }) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index be427fe26a584..94833f7996e8a 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -157,7 +157,7 @@ impl super::Gsp { let wpr_meta = CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; - dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw, &fb_layout))?; + dma_write!(wpr_meta, [0]?, GspFwWprMeta::new(&gsp_fw, &fb_layout)); self.cmdq .send_command(bar, commands::SetSystemInfo::new(pdev))?; diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index 46819a82a51ad..03a4f35998498 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -2,11 +2,7 @@ use core::{ cmp, - mem, - sync::atomic::{ - fence, - Ordering, // - }, // + mem, // }; use kernel::{ @@ -146,30 +142,36 @@ static_assert!(align_of::() == GSP_PAGE_SIZE); #[repr(C)] // There is no struct defined for this in the open-gpu-kernel-source headers. // Instead it is defined by code in `GspMsgQueuesInit()`. -struct Msgq { +// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module. +pub(super) struct Msgq { /// Header for sending messages, including the write pointer. - tx: MsgqTxHeader, + pub(super) tx: MsgqTxHeader, /// Header for receiving messages, including the read pointer. - rx: MsgqRxHeader, + pub(super) rx: MsgqRxHeader, /// The message queue proper. msgq: MsgqData, } /// Structure shared between the driver and the GSP and containing the command and message queues. #[repr(C)] -struct GspMem { +// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module. +pub(super) struct GspMem { /// Self-mapping page table entries. - ptes: PteArray<{ GSP_PAGE_SIZE / size_of::() }>, + ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the /// write and read pointers that the CPU updates. /// /// This member is read-only for the GSP. - cpuq: Msgq, + pub(super) cpuq: Msgq, /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the /// write and read pointers that the GSP updates. /// /// This member is read-only for the driver. - gspq: Msgq, + pub(super) gspq: Msgq, +} + +impl GspMem { + const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::(); } // SAFETY: These structs don't meet the no-padding requirements of AsBytes but @@ -201,9 +203,19 @@ impl DmaGspMem { let gsp_mem = CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; - dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?; - dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?; - dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?; + + let start = gsp_mem.dma_handle(); + // Write values one by one to avoid an on-stack instance of `PteArray`. + for i in 0..GspMem::PTE_ARRAY_SIZE { + dma_write!(gsp_mem, [0]?.ptes.0[i], PteArray::<0>::entry(start, i)?); + } + + dma_write!( + gsp_mem, + [0]?.cpuq.tx, + MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES) + ); + dma_write!(gsp_mem, [0]?.cpuq.rx, MsgqRxHeader::new()); Ok(Self(gsp_mem)) } @@ -317,12 +329,7 @@ impl DmaGspMem { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn gsp_write_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - (unsafe { (*gsp_mem).gspq.tx.write_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::gsp_write_ptr(&self.0) } // Returns the index of the memory page the GSP will read the next command from. @@ -331,12 +338,7 @@ impl DmaGspMem { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn gsp_read_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - (unsafe { (*gsp_mem).gspq.rx.read_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::gsp_read_ptr(&self.0) } // Returns the index of the memory page the CPU can read the next message from. @@ -345,27 +347,12 @@ impl DmaGspMem { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn cpu_read_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The ['CoherentAllocation'] contains at least one object. - // - By the invariants of CoherentAllocation the pointer is valid. - (unsafe { (*gsp_mem).cpuq.rx.read_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::cpu_read_ptr(&self.0) } // Informs the GSP that it can send `elem_count` new pages into the message queue. fn advance_cpu_read_ptr(&mut self, elem_count: u32) { - let rptr = self.cpu_read_ptr().wrapping_add(elem_count) % MSGQ_NUM_PAGES; - - // Ensure read pointer is properly ordered. - fence(Ordering::SeqCst); - - let gsp_mem = self.0.start_ptr_mut(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - unsafe { (*gsp_mem).cpuq.rx.set_read_ptr(rptr) }; + super::fw::gsp_mem::advance_cpu_read_ptr(&self.0, elem_count) } // Returns the index of the memory page the CPU can write the next command to. @@ -374,26 +361,12 @@ impl DmaGspMem { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn cpu_write_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - (unsafe { (*gsp_mem).cpuq.tx.write_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::cpu_write_ptr(&self.0) } // Informs the GSP that it can process `elem_count` new pages from the command queue. fn advance_cpu_write_ptr(&mut self, elem_count: u32) { - let wptr = self.cpu_write_ptr().wrapping_add(elem_count) & MSGQ_NUM_PAGES; - let gsp_mem = self.0.start_ptr_mut(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - unsafe { (*gsp_mem).cpuq.tx.set_write_ptr(wptr) }; - - // Ensure all command data is visible before triggering the GSP read. - fence(Ordering::SeqCst); + super::fw::gsp_mem::advance_cpu_write_ptr(&self.0, elem_count) } } diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 83ff91614e36d..040b30ec3089b 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -40,6 +40,75 @@ use crate::{ }, }; +// TODO: Replace with `IoView` projections once available; the `unwrap()` calls go away once we +// switch to the new `dma::Coherent` API. +pub(super) mod gsp_mem { + use core::sync::atomic::{ + fence, + Ordering, // + }; + + use kernel::{ + dma::CoherentAllocation, + dma_read, + dma_write, + prelude::*, // + }; + + use crate::gsp::cmdq::{ + GspMem, + MSGQ_NUM_PAGES, // + }; + + pub(in crate::gsp) fn gsp_write_ptr(qs: &CoherentAllocation) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { Ok(dma_read!(qs, [0]?.gspq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn gsp_read_ptr(qs: &CoherentAllocation) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { Ok(dma_read!(qs, [0]?.gspq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn cpu_read_ptr(qs: &CoherentAllocation) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { Ok(dma_read!(qs, [0]?.cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &CoherentAllocation, count: u32) { + let rptr = cpu_read_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; + + // Ensure read pointer is properly ordered. + fence(Ordering::SeqCst); + + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { + dma_write!(qs, [0]?.cpuq.rx.0.readPtr, rptr); + Ok(()) + }() + .unwrap() + } + + pub(in crate::gsp) fn cpu_write_ptr(qs: &CoherentAllocation) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { Ok(dma_read!(qs, [0]?.cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &CoherentAllocation, count: u32) { + let wptr = cpu_write_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; + + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { + dma_write!(qs, [0]?.cpuq.tx.0.writePtr, wptr); + Ok(()) + }() + .unwrap(); + + // Ensure all command data is visible before triggering the GSP read. + fence(Ordering::SeqCst); + } +} + /// Empty type to group methods related to heap parameters for running the GSP firmware. enum GspFwHeapParams {} @@ -708,22 +777,6 @@ impl MsgqTxHeader { entryOff: num::usize_into_u32::(), }) } - - /// Returns the value of the write pointer for this queue. - pub(crate) fn write_ptr(&self) -> u32 { - let ptr = core::ptr::from_ref(&self.0.writePtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.read_volatile() } - } - - /// Sets the value of the write pointer for this queue. - pub(crate) fn set_write_ptr(&mut self, val: u32) { - let ptr = core::ptr::from_mut(&mut self.0.writePtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.write_volatile(val) } - } } // SAFETY: Padding is explicit and does not contain uninitialized data. @@ -739,22 +792,6 @@ impl MsgqRxHeader { pub(crate) fn new() -> Self { Self(Default::default()) } - - /// Returns the value of the read pointer for this queue. - pub(crate) fn read_ptr(&self) -> u32 { - let ptr = core::ptr::from_ref(&self.0.readPtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.read_volatile() } - } - - /// Sets the value of the read pointer for this queue. - pub(crate) fn set_read_ptr(&mut self, val: u32) { - let ptr = core::ptr::from_mut(&mut self.0.readPtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.write_volatile(val) } - } } // SAFETY: Padding is explicit and does not contain uninitialized data. diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 486152a8ea779..328867242cb37 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1493,8 +1493,7 @@ config SENSORS_LM73 config SENSORS_LM75 tristate "National Semiconductor LM75 and compatibles" - depends on I2C - depends on I3C || !I3C + depends on I3C_OR_I2C select REGMAP_I2C select REGMAP_I3C if I3C help @@ -2382,8 +2381,7 @@ config SENSORS_TMP103 config SENSORS_TMP108 tristate "Texas Instruments TMP108" - depends on I2C - depends on I3C || !I3C + depends on I3C_OR_I2C select REGMAP_I2C select REGMAP_I3C if I3C help diff --git a/drivers/i3c/Kconfig b/drivers/i3c/Kconfig index 30a441506f61c..626c54b386d54 100644 --- a/drivers/i3c/Kconfig +++ b/drivers/i3c/Kconfig @@ -22,3 +22,15 @@ menuconfig I3C if I3C source "drivers/i3c/master/Kconfig" endif # I3C + +config I3C_OR_I2C + tristate + default m if I3C=m + default I2C + help + Device drivers using module_i3c_i2c_driver() can use either + i2c or i3c hosts, but cannot be built-in for the kernel when + CONFIG_I3C=m. + + Add 'depends on I2C_OR_I3C' in Kconfig for those drivers to + get the correct dependencies. diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index d87bde3f7700f..d6bdb32397fb9 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -1024,7 +1024,7 @@ static int dw_i3c_master_reattach_i3c_dev(struct i3c_dev_desc *dev, master->free_pos &= ~BIT(pos); } - writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(dev->info.dyn_addr), + writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(dev->info.dyn_addr) | DEV_ADDR_TABLE_SIR_REJECT, master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, data->index)); @@ -1053,7 +1053,7 @@ static int dw_i3c_master_attach_i3c_dev(struct i3c_dev_desc *dev) master->free_pos &= ~BIT(pos); i3c_dev_set_master_data(dev, data); - writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(master->devs[pos].addr), + writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(master->devs[pos].addr) | DEV_ADDR_TABLE_SIR_REJECT, master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, data->index)); @@ -1659,6 +1659,8 @@ int dw_i3c_common_probe(struct dw_i3c_master *master, pm_runtime_get_noresume(&pdev->dev); INIT_WORK(&master->hj_work, dw_i3c_hj_work); + + device_set_of_node_from_dev(&master->base.i2c.dev, &pdev->dev); ret = i3c_master_register(&master->base, &pdev->dev, &dw_mipi_i3c_ops, false); if (ret) diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd.h b/drivers/i3c/master/mipi-i3c-hci/cmd.h index 1d6dd2c5d01a5..b1bf87daa6516 100644 --- a/drivers/i3c/master/mipi-i3c-hci/cmd.h +++ b/drivers/i3c/master/mipi-i3c-hci/cmd.h @@ -17,6 +17,7 @@ #define CMD_0_TOC W0_BIT_(31) #define CMD_0_ROC W0_BIT_(30) #define CMD_0_ATTR W0_MASK(2, 0) +#define CMD_0_TID W0_MASK(6, 3) /* * Response Descriptor Structure diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c b/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c index fe260461e7e61..75d452d7f6af8 100644 --- a/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c +++ b/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c @@ -331,12 +331,10 @@ static int hci_cmd_v1_daa(struct i3c_hci *hci) CMD_A0_ROC | CMD_A0_TOC; xfer->cmd_desc[1] = 0; xfer->completion = &done; - hci->io->queue_xfer(hci, xfer, 1); - if (!wait_for_completion_timeout(&done, HZ) && - hci->io->dequeue_xfer(hci, xfer, 1)) { - ret = -ETIME; + xfer->timeout = HZ; + ret = i3c_hci_process_xfer(hci, xfer, 1); + if (ret) break; - } if ((RESP_STATUS(xfer->response) == RESP_ERR_ADDR_HEADER || RESP_STATUS(xfer->response) == RESP_ERR_NACK) && RESP_DATA_LENGTH(xfer->response) == 1) { diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c b/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c index 3729e64195818..39eec26a363c7 100644 --- a/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c +++ b/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c @@ -253,6 +253,7 @@ static int hci_cmd_v2_daa(struct i3c_hci *hci) xfer[0].rnw = true; xfer[0].cmd_desc[1] = CMD_A1_DATA_LENGTH(8); xfer[1].completion = &done; + xfer[1].timeout = HZ; for (;;) { ret = i3c_master_get_free_addr(&hci->master, next_addr); @@ -272,12 +273,9 @@ static int hci_cmd_v2_daa(struct i3c_hci *hci) CMD_A0_ASSIGN_ADDRESS(next_addr) | CMD_A0_ROC | CMD_A0_TOC; - hci->io->queue_xfer(hci, xfer, 2); - if (!wait_for_completion_timeout(&done, HZ) && - hci->io->dequeue_xfer(hci, xfer, 2)) { - ret = -ETIME; + ret = i3c_hci_process_xfer(hci, xfer, 2); + if (ret) break; - } if (RESP_STATUS(xfer[0].response) != RESP_SUCCESS) { ret = 0; /* no more devices to be assigned */ break; diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index 5879bba781649..284f3ed7af8cb 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c @@ -152,7 +152,11 @@ static int i3c_hci_bus_init(struct i3c_master_controller *m) if (hci->quirks & HCI_QUIRK_RESP_BUF_THLD) amd_set_resp_buf_thld(hci); - reg_set(HC_CONTROL, HC_CONTROL_BUS_ENABLE); + scoped_guard(spinlock_irqsave, &hci->lock) + hci->irq_inactive = false; + + /* Enable bus with Hot-Join disabled */ + reg_set(HC_CONTROL, HC_CONTROL_BUS_ENABLE | HC_CONTROL_HOT_JOIN_CTRL); dev_dbg(&hci->master.dev, "HC_CONTROL = %#x", reg_read(HC_CONTROL)); return 0; @@ -177,21 +181,51 @@ static int i3c_hci_bus_disable(struct i3c_hci *hci) return ret; } +static int i3c_hci_software_reset(struct i3c_hci *hci) +{ + u32 regval; + int ret; + + /* + * SOFT_RST must be clear before we write to it. + * Then we must wait until it clears again. + */ + ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval, + !(regval & SOFT_RST), 0, 10 * USEC_PER_MSEC); + if (ret) { + dev_err(&hci->master.dev, "%s: Software reset stuck\n", __func__); + return ret; + } + + reg_write(RESET_CONTROL, SOFT_RST); + + ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval, + !(regval & SOFT_RST), 0, 10 * USEC_PER_MSEC); + if (ret) { + dev_err(&hci->master.dev, "%s: Software reset failed\n", __func__); + return ret; + } + + return 0; +} + void i3c_hci_sync_irq_inactive(struct i3c_hci *hci) { struct platform_device *pdev = to_platform_device(hci->master.dev.parent); int irq = platform_get_irq(pdev, 0); reg_write(INTR_SIGNAL_ENABLE, 0x0); - hci->irq_inactive = true; synchronize_irq(irq); + scoped_guard(spinlock_irqsave, &hci->lock) + hci->irq_inactive = true; } static void i3c_hci_bus_cleanup(struct i3c_master_controller *m) { struct i3c_hci *hci = to_i3c_hci(m); - i3c_hci_bus_disable(hci); + if (i3c_hci_bus_disable(hci)) + i3c_hci_software_reset(hci); hci->io->cleanup(hci); } @@ -212,6 +246,36 @@ void mipi_i3c_hci_dct_index_reset(struct i3c_hci *hci) reg_write(DCT_SECTION, FIELD_PREP(DCT_TABLE_INDEX, 0)); } +int i3c_hci_process_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n) +{ + struct completion *done = xfer[n - 1].completion; + unsigned long timeout = xfer[n - 1].timeout; + int ret; + + ret = hci->io->queue_xfer(hci, xfer, n); + if (ret) + return ret; + + if (!wait_for_completion_timeout(done, timeout)) { + if (hci->io->dequeue_xfer(hci, xfer, n)) { + dev_err(&hci->master.dev, "%s: timeout error\n", __func__); + return -ETIMEDOUT; + } + return 0; + } + + if (hci->io->handle_error) { + bool error = false; + + for (int i = 0; i < n && !error; i++) + error = RESP_STATUS(xfer[i].response); + if (error) + return hci->io->handle_error(hci, xfer, n); + } + + return 0; +} + static int i3c_hci_send_ccc_cmd(struct i3c_master_controller *m, struct i3c_ccc_cmd *ccc) { @@ -252,18 +316,14 @@ static int i3c_hci_send_ccc_cmd(struct i3c_master_controller *m, last = i - 1; xfer[last].cmd_desc[0] |= CMD_0_TOC; xfer[last].completion = &done; + xfer[last].timeout = HZ; if (prefixed) xfer--; - ret = hci->io->queue_xfer(hci, xfer, nxfers); + ret = i3c_hci_process_xfer(hci, xfer, nxfers); if (ret) goto out; - if (!wait_for_completion_timeout(&done, HZ) && - hci->io->dequeue_xfer(hci, xfer, nxfers)) { - ret = -ETIME; - goto out; - } for (i = prefixed; i < nxfers; i++) { if (ccc->rnw) ccc->dests[i - prefixed].payload.len = @@ -334,15 +394,11 @@ static int i3c_hci_i3c_xfers(struct i3c_dev_desc *dev, last = i - 1; xfer[last].cmd_desc[0] |= CMD_0_TOC; xfer[last].completion = &done; + xfer[last].timeout = HZ; - ret = hci->io->queue_xfer(hci, xfer, nxfers); + ret = i3c_hci_process_xfer(hci, xfer, nxfers); if (ret) goto out; - if (!wait_for_completion_timeout(&done, HZ) && - hci->io->dequeue_xfer(hci, xfer, nxfers)) { - ret = -ETIME; - goto out; - } for (i = 0; i < nxfers; i++) { if (i3c_xfers[i].rnw) i3c_xfers[i].len = RESP_DATA_LENGTH(xfer[i].response); @@ -382,15 +438,11 @@ static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev, last = i - 1; xfer[last].cmd_desc[0] |= CMD_0_TOC; xfer[last].completion = &done; + xfer[last].timeout = m->i2c.timeout; - ret = hci->io->queue_xfer(hci, xfer, nxfers); + ret = i3c_hci_process_xfer(hci, xfer, nxfers); if (ret) goto out; - if (!wait_for_completion_timeout(&done, m->i2c.timeout) && - hci->io->dequeue_xfer(hci, xfer, nxfers)) { - ret = -ETIME; - goto out; - } for (i = 0; i < nxfers; i++) { if (RESP_STATUS(xfer[i].response) != RESP_SUCCESS) { ret = -EIO; @@ -566,6 +618,8 @@ static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id) irqreturn_t result = IRQ_NONE; u32 val; + guard(spinlock)(&hci->lock); + /* * The IRQ can be shared, so the handler may be called when the IRQ is * due to a different device. That could happen when runtime suspended, @@ -601,34 +655,6 @@ static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id) return result; } -static int i3c_hci_software_reset(struct i3c_hci *hci) -{ - u32 regval; - int ret; - - /* - * SOFT_RST must be clear before we write to it. - * Then we must wait until it clears again. - */ - ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval, - !(regval & SOFT_RST), 0, 10 * USEC_PER_MSEC); - if (ret) { - dev_err(&hci->master.dev, "%s: Software reset stuck\n", __func__); - return ret; - } - - reg_write(RESET_CONTROL, SOFT_RST); - - ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval, - !(regval & SOFT_RST), 0, 10 * USEC_PER_MSEC); - if (ret) { - dev_err(&hci->master.dev, "%s: Software reset failed\n", __func__); - return ret; - } - - return 0; -} - static inline bool is_version_1_1_or_newer(struct i3c_hci *hci) { return hci->version_major > 1 || (hci->version_major == 1 && hci->version_minor > 0); @@ -739,8 +765,12 @@ static int i3c_hci_runtime_suspend(struct device *dev) int ret; ret = i3c_hci_bus_disable(hci); - if (ret) + if (ret) { + /* Fall back to software reset to disable the bus */ + ret = i3c_hci_software_reset(hci); + i3c_hci_sync_irq_inactive(hci); return ret; + } hci->io->suspend(hci); @@ -760,11 +790,13 @@ static int i3c_hci_runtime_resume(struct device *dev) mipi_i3c_hci_dat_v1.restore(hci); - hci->irq_inactive = false; - hci->io->resume(hci); - reg_set(HC_CONTROL, HC_CONTROL_BUS_ENABLE); + scoped_guard(spinlock_irqsave, &hci->lock) + hci->irq_inactive = false; + + /* Enable bus with Hot-Join disabled */ + reg_set(HC_CONTROL, HC_CONTROL_BUS_ENABLE | HC_CONTROL_HOT_JOIN_CTRL); return 0; } @@ -924,6 +956,9 @@ static int i3c_hci_probe(struct platform_device *pdev) if (!hci) return -ENOMEM; + spin_lock_init(&hci->lock); + mutex_init(&hci->control_mutex); + /* * Multi-bus instances share the same MMIO address range, but not * necessarily in separate contiguous sub-ranges. To avoid overlapping @@ -950,6 +985,8 @@ static int i3c_hci_probe(struct platform_device *pdev) if (ret) return ret; + hci->irq_inactive = true; + irq = platform_get_irq(pdev, 0); ret = devm_request_irq(&pdev->dev, irq, i3c_hci_irq_handler, IRQF_SHARED, NULL, hci); diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c index b903a2da1fd16..e487ef52f6b4e 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dma.c +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c @@ -129,9 +129,8 @@ struct hci_rh_data { dma_addr_t xfer_dma, resp_dma, ibi_status_dma, ibi_data_dma; unsigned int xfer_entries, ibi_status_entries, ibi_chunks_total; unsigned int xfer_struct_sz, resp_struct_sz, ibi_status_sz, ibi_chunk_sz; - unsigned int done_ptr, ibi_chunk_ptr; + unsigned int done_ptr, ibi_chunk_ptr, xfer_space; struct hci_xfer **src_xfers; - spinlock_t lock; struct completion op_done; }; @@ -261,6 +260,7 @@ static void hci_dma_init_rh(struct i3c_hci *hci, struct hci_rh_data *rh, int i) rh->done_ptr = 0; rh->ibi_chunk_ptr = 0; + rh->xfer_space = rh->xfer_entries; } static void hci_dma_init_rings(struct i3c_hci *hci) @@ -344,7 +344,6 @@ static int hci_dma_init(struct i3c_hci *hci) goto err_out; rh = &rings->headers[i]; rh->regs = hci->base_regs + offset; - spin_lock_init(&rh->lock); init_completion(&rh->op_done); rh->xfer_entries = XFER_RING_ENTRIES; @@ -439,26 +438,63 @@ static void hci_dma_unmap_xfer(struct i3c_hci *hci, } } +static struct i3c_dma *hci_dma_map_xfer(struct device *dev, struct hci_xfer *xfer) +{ + enum dma_data_direction dir = xfer->rnw ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + bool need_bounce = device_iommu_mapped(dev) && xfer->rnw && (xfer->data_len & 3); + + return i3c_master_dma_map_single(dev, xfer->data, xfer->data_len, need_bounce, dir); +} + +static int hci_dma_map_xfer_list(struct i3c_hci *hci, struct device *dev, + struct hci_xfer *xfer_list, int n) +{ + for (int i = 0; i < n; i++) { + struct hci_xfer *xfer = xfer_list + i; + + if (!xfer->data) + continue; + + xfer->dma = hci_dma_map_xfer(dev, xfer); + if (!xfer->dma) { + hci_dma_unmap_xfer(hci, xfer_list, i); + return -ENOMEM; + } + } + + return 0; +} + static int hci_dma_queue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer_list, int n) { struct hci_rings_data *rings = hci->io_data; struct hci_rh_data *rh; unsigned int i, ring, enqueue_ptr; - u32 op1_val, op2_val; + u32 op1_val; + int ret; + + ret = hci_dma_map_xfer_list(hci, rings->sysdev, xfer_list, n); + if (ret) + return ret; /* For now we only use ring 0 */ ring = 0; rh = &rings->headers[ring]; + spin_lock_irq(&hci->lock); + + if (n > rh->xfer_space) { + spin_unlock_irq(&hci->lock); + hci_dma_unmap_xfer(hci, xfer_list, n); + return -EBUSY; + } + op1_val = rh_reg_read(RING_OPERATION1); enqueue_ptr = FIELD_GET(RING_OP1_CR_ENQ_PTR, op1_val); for (i = 0; i < n; i++) { struct hci_xfer *xfer = xfer_list + i; u32 *ring_data = rh->xfer + rh->xfer_struct_sz * enqueue_ptr; - enum dma_data_direction dir = xfer->rnw ? DMA_FROM_DEVICE : - DMA_TO_DEVICE; - bool need_bounce; /* store cmd descriptor */ *ring_data++ = xfer->cmd_desc[0]; @@ -477,18 +513,6 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci, /* 2nd and 3rd words of Data Buffer Descriptor Structure */ if (xfer->data) { - need_bounce = device_iommu_mapped(rings->sysdev) && - xfer->rnw && - xfer->data_len != ALIGN(xfer->data_len, 4); - xfer->dma = i3c_master_dma_map_single(rings->sysdev, - xfer->data, - xfer->data_len, - need_bounce, - dir); - if (!xfer->dma) { - hci_dma_unmap_xfer(hci, xfer_list, i); - return -ENOMEM; - } *ring_data++ = lower_32_bits(xfer->dma->addr); *ring_data++ = upper_32_bits(xfer->dma->addr); } else { @@ -503,26 +527,14 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci, xfer->ring_entry = enqueue_ptr; enqueue_ptr = (enqueue_ptr + 1) % rh->xfer_entries; - - /* - * We may update the hardware view of the enqueue pointer - * only if we didn't reach its dequeue pointer. - */ - op2_val = rh_reg_read(RING_OPERATION2); - if (enqueue_ptr == FIELD_GET(RING_OP2_CR_DEQ_PTR, op2_val)) { - /* the ring is full */ - hci_dma_unmap_xfer(hci, xfer_list, i + 1); - return -EBUSY; - } } - /* take care to update the hardware enqueue pointer atomically */ - spin_lock_irq(&rh->lock); - op1_val = rh_reg_read(RING_OPERATION1); + rh->xfer_space -= n; + op1_val &= ~RING_OP1_CR_ENQ_PTR; op1_val |= FIELD_PREP(RING_OP1_CR_ENQ_PTR, enqueue_ptr); rh_reg_write(RING_OPERATION1, op1_val); - spin_unlock_irq(&rh->lock); + spin_unlock_irq(&hci->lock); return 0; } @@ -534,18 +546,29 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci, struct hci_rh_data *rh = &rings->headers[xfer_list[0].ring_number]; unsigned int i; bool did_unqueue = false; - - /* stop the ring */ - rh_reg_write(RING_CONTROL, RING_CTRL_ABORT); - if (wait_for_completion_timeout(&rh->op_done, HZ) == 0) { - /* - * We're deep in it if ever this condition is ever met. - * Hardware might still be writing to memory, etc. - */ - dev_crit(&hci->master.dev, "unable to abort the ring\n"); - WARN_ON(1); + u32 ring_status; + + guard(mutex)(&hci->control_mutex); + + ring_status = rh_reg_read(RING_STATUS); + if (ring_status & RING_STATUS_RUNNING) { + /* stop the ring */ + reinit_completion(&rh->op_done); + rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE | RING_CTRL_ABORT); + wait_for_completion_timeout(&rh->op_done, HZ); + ring_status = rh_reg_read(RING_STATUS); + if (ring_status & RING_STATUS_RUNNING) { + /* + * We're deep in it if ever this condition is ever met. + * Hardware might still be writing to memory, etc. + */ + dev_crit(&hci->master.dev, "unable to abort the ring\n"); + WARN_ON(1); + } } + spin_lock_irq(&hci->lock); + for (i = 0; i < n; i++) { struct hci_xfer *xfer = xfer_list + i; int idx = xfer->ring_entry; @@ -559,7 +582,7 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci, u32 *ring_data = rh->xfer + rh->xfer_struct_sz * idx; /* store no-op cmd descriptor */ - *ring_data++ = FIELD_PREP(CMD_0_ATTR, 0x7); + *ring_data++ = FIELD_PREP(CMD_0_ATTR, 0x7) | FIELD_PREP(CMD_0_TID, xfer->cmd_tid); *ring_data++ = 0; if (hci->cmd == &mipi_i3c_hci_cmd_v2) { *ring_data++ = 0; @@ -577,15 +600,25 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci, } /* restart the ring */ + mipi_i3c_hci_resume(hci); rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE); + rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE | RING_CTRL_RUN_STOP); + + spin_unlock_irq(&hci->lock); return did_unqueue; } +static int hci_dma_handle_error(struct i3c_hci *hci, struct hci_xfer *xfer_list, int n) +{ + return hci_dma_dequeue_xfer(hci, xfer_list, n) ? -EIO : 0; +} + static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh) { u32 op1_val, op2_val, resp, *ring_resp; unsigned int tid, done_ptr = rh->done_ptr; + unsigned int done_cnt = 0; struct hci_xfer *xfer; for (;;) { @@ -603,6 +636,7 @@ static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh) dev_dbg(&hci->master.dev, "orphaned ring entry"); } else { hci_dma_unmap_xfer(hci, xfer, 1); + rh->src_xfers[done_ptr] = NULL; xfer->ring_entry = -1; xfer->response = resp; if (tid != xfer->cmd_tid) { @@ -617,15 +651,14 @@ static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh) done_ptr = (done_ptr + 1) % rh->xfer_entries; rh->done_ptr = done_ptr; + done_cnt += 1; } - /* take care to update the software dequeue pointer atomically */ - spin_lock(&rh->lock); + rh->xfer_space += done_cnt; op1_val = rh_reg_read(RING_OPERATION1); op1_val &= ~RING_OP1_CR_SW_DEQ_PTR; op1_val |= FIELD_PREP(RING_OP1_CR_SW_DEQ_PTR, done_ptr); rh_reg_write(RING_OPERATION1, op1_val); - spin_unlock(&rh->lock); } static int hci_dma_request_ibi(struct i3c_hci *hci, struct i3c_dev_desc *dev, @@ -805,13 +838,10 @@ static void hci_dma_process_ibi(struct i3c_hci *hci, struct hci_rh_data *rh) i3c_master_queue_ibi(dev, slot); done: - /* take care to update the ibi dequeue pointer atomically */ - spin_lock(&rh->lock); op1_val = rh_reg_read(RING_OPERATION1); op1_val &= ~RING_OP1_IBI_DEQ_PTR; op1_val |= FIELD_PREP(RING_OP1_IBI_DEQ_PTR, deq_ptr); rh_reg_write(RING_OPERATION1, op1_val); - spin_unlock(&rh->lock); /* update the chunk pointer */ rh->ibi_chunk_ptr += ibi_chunks; @@ -845,29 +875,8 @@ static bool hci_dma_irq_handler(struct i3c_hci *hci) hci_dma_xfer_done(hci, rh); if (status & INTR_RING_OP) complete(&rh->op_done); - - if (status & INTR_TRANSFER_ABORT) { - u32 ring_status; - - dev_notice_ratelimited(&hci->master.dev, - "Ring %d: Transfer Aborted\n", i); - mipi_i3c_hci_resume(hci); - ring_status = rh_reg_read(RING_STATUS); - if (!(ring_status & RING_STATUS_RUNNING) && - status & INTR_TRANSFER_COMPLETION && - status & INTR_TRANSFER_ERR) { - /* - * Ring stop followed by run is an Intel - * specific required quirk after resuming the - * halted controller. Do it only when the ring - * is not in running state after a transfer - * error. - */ - rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE); - rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE | - RING_CTRL_RUN_STOP); - } - } + if (status & INTR_TRANSFER_ABORT) + dev_dbg(&hci->master.dev, "Ring %d: Transfer Aborted\n", i); if (status & INTR_IBI_RING_FULL) dev_err_ratelimited(&hci->master.dev, "Ring %d: IBI Ring Full Condition\n", i); @@ -883,6 +892,7 @@ const struct hci_io_ops mipi_i3c_hci_dma = { .cleanup = hci_dma_cleanup, .queue_xfer = hci_dma_queue_xfer, .dequeue_xfer = hci_dma_dequeue_xfer, + .handle_error = hci_dma_handle_error, .irq_handler = hci_dma_irq_handler, .request_ibi = hci_dma_request_ibi, .free_ibi = hci_dma_free_ibi, diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h b/drivers/i3c/master/mipi-i3c-hci/hci.h index 337b7ab1cb06e..9ac9d0e342f4f 100644 --- a/drivers/i3c/master/mipi-i3c-hci/hci.h +++ b/drivers/i3c/master/mipi-i3c-hci/hci.h @@ -50,6 +50,8 @@ struct i3c_hci { const struct hci_io_ops *io; void *io_data; const struct hci_cmd_ops *cmd; + spinlock_t lock; + struct mutex control_mutex; atomic_t next_cmd_tid; bool irq_inactive; u32 caps; @@ -87,6 +89,7 @@ struct hci_xfer { unsigned int data_len; unsigned int cmd_tid; struct completion *completion; + unsigned long timeout; union { struct { /* PIO specific */ @@ -120,6 +123,7 @@ struct hci_io_ops { bool (*irq_handler)(struct i3c_hci *hci); int (*queue_xfer)(struct i3c_hci *hci, struct hci_xfer *xfer, int n); bool (*dequeue_xfer)(struct i3c_hci *hci, struct hci_xfer *xfer, int n); + int (*handle_error)(struct i3c_hci *hci, struct hci_xfer *xfer, int n); int (*request_ibi)(struct i3c_hci *hci, struct i3c_dev_desc *dev, const struct i3c_ibi_setup *req); void (*free_ibi)(struct i3c_hci *hci, struct i3c_dev_desc *dev); @@ -154,5 +158,6 @@ void mipi_i3c_hci_dct_index_reset(struct i3c_hci *hci); void amd_set_od_pp_timing(struct i3c_hci *hci); void amd_set_resp_buf_thld(struct i3c_hci *hci); void i3c_hci_sync_irq_inactive(struct i3c_hci *hci); +int i3c_hci_process_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n); #endif diff --git a/drivers/i3c/master/mipi-i3c-hci/pio.c b/drivers/i3c/master/mipi-i3c-hci/pio.c index f8825ac814088..8f48a81e65abe 100644 --- a/drivers/i3c/master/mipi-i3c-hci/pio.c +++ b/drivers/i3c/master/mipi-i3c-hci/pio.c @@ -123,7 +123,6 @@ struct hci_pio_ibi_data { }; struct hci_pio_data { - spinlock_t lock; struct hci_xfer *curr_xfer, *xfer_queue; struct hci_xfer *curr_rx, *rx_queue; struct hci_xfer *curr_tx, *tx_queue; @@ -212,7 +211,6 @@ static int hci_pio_init(struct i3c_hci *hci) return -ENOMEM; hci->io_data = pio; - spin_lock_init(&pio->lock); __hci_pio_init(hci, &size_val); @@ -631,7 +629,7 @@ static int hci_pio_queue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n) xfer[i].data_left = xfer[i].data_len; } - spin_lock_irq(&pio->lock); + spin_lock_irq(&hci->lock); prev_queue_tail = pio->xfer_queue; pio->xfer_queue = &xfer[n - 1]; if (pio->curr_xfer) { @@ -645,7 +643,7 @@ static int hci_pio_queue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n) pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE)); } - spin_unlock_irq(&pio->lock); + spin_unlock_irq(&hci->lock); return 0; } @@ -716,14 +714,14 @@ static bool hci_pio_dequeue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int struct hci_pio_data *pio = hci->io_data; int ret; - spin_lock_irq(&pio->lock); + spin_lock_irq(&hci->lock); dev_dbg(&hci->master.dev, "n=%d status=%#x/%#x", n, pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE)); dev_dbg(&hci->master.dev, "main_status = %#x/%#x", readl(hci->base_regs + 0x20), readl(hci->base_regs + 0x28)); ret = hci_pio_dequeue_xfer_common(hci, pio, xfer, n); - spin_unlock_irq(&pio->lock); + spin_unlock_irq(&hci->lock); return ret; } @@ -1016,15 +1014,12 @@ static bool hci_pio_irq_handler(struct i3c_hci *hci) struct hci_pio_data *pio = hci->io_data; u32 status; - spin_lock(&pio->lock); status = pio_reg_read(INTR_STATUS); dev_dbg(&hci->master.dev, "PIO_INTR_STATUS %#x/%#x", status, pio->enabled_irqs); status &= pio->enabled_irqs | STAT_LATENCY_WARNINGS; - if (!status) { - spin_unlock(&pio->lock); + if (!status) return false; - } if (status & STAT_IBI_STATUS_THLD) hci_pio_process_ibi(hci, pio); @@ -1058,7 +1053,6 @@ static bool hci_pio_irq_handler(struct i3c_hci *hci) pio_reg_write(INTR_SIGNAL_ENABLE, pio->enabled_irqs); dev_dbg(&hci->master.dev, "PIO_INTR_STATUS %#x/%#x", pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE)); - spin_unlock(&pio->lock); return true; } diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index fcd8aea7152e8..e16dede687d3f 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -531,7 +531,7 @@ static int ad7768_reg_access(struct iio_dev *indio_dev, return ret; } -static void ad7768_fill_scale_tbl(struct iio_dev *dev) +static int ad7768_fill_scale_tbl(struct iio_dev *dev) { struct ad7768_state *st = iio_priv(dev); const struct iio_scan_type *scan_type; @@ -541,6 +541,11 @@ static void ad7768_fill_scale_tbl(struct iio_dev *dev) u64 tmp2; scan_type = iio_get_current_scan_type(dev, &dev->channels[0]); + if (IS_ERR(scan_type)) { + dev_err(&st->spi->dev, "Failed to get scan type.\n"); + return PTR_ERR(scan_type); + } + if (scan_type->sign == 's') val2 = scan_type->realbits - 1; else @@ -565,6 +570,8 @@ static void ad7768_fill_scale_tbl(struct iio_dev *dev) st->scale_tbl[i][0] = tmp0; /* Integer part */ st->scale_tbl[i][1] = abs(tmp1); /* Fractional part */ } + + return 0; } static int ad7768_set_sinc3_dec_rate(struct ad7768_state *st, @@ -669,7 +676,9 @@ static int ad7768_configure_dig_fil(struct iio_dev *dev, } /* Update scale table: scale values vary according to the precision */ - ad7768_fill_scale_tbl(dev); + ret = ad7768_fill_scale_tbl(dev); + if (ret) + return ret; ad7768_fill_samp_freq_tbl(st); diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index 70f81c4a96bac..24e0b59e2fdf0 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -613,7 +613,7 @@ static int bme680_wait_for_eoc(struct bme680_data *data) * + heater duration */ int wait_eoc_us = ((data->oversampling_temp + data->oversampling_press + - data->oversampling_humid) * 1936) + (477 * 4) + + data->oversampling_humid) * 1963) + (477 * 4) + (477 * 5) + 1000 + (data->heater_dur * 1000); fsleep(wait_eoc_us); diff --git a/drivers/iio/chemical/sps30_i2c.c b/drivers/iio/chemical/sps30_i2c.c index f692c089d17b4..c92f04990c34c 100644 --- a/drivers/iio/chemical/sps30_i2c.c +++ b/drivers/iio/chemical/sps30_i2c.c @@ -171,7 +171,7 @@ static int sps30_i2c_read_meas(struct sps30_state *state, __be32 *meas, size_t n if (!sps30_i2c_meas_ready(state)) return -ETIMEDOUT; - return sps30_i2c_command(state, SPS30_I2C_READ_MEAS, NULL, 0, meas, sizeof(num) * num); + return sps30_i2c_command(state, SPS30_I2C_READ_MEAS, NULL, 0, meas, sizeof(*meas) * num); } static int sps30_i2c_clean_fan(struct sps30_state *state) diff --git a/drivers/iio/chemical/sps30_serial.c b/drivers/iio/chemical/sps30_serial.c index 008bc88590f37..a5e6bc08d5fd4 100644 --- a/drivers/iio/chemical/sps30_serial.c +++ b/drivers/iio/chemical/sps30_serial.c @@ -303,7 +303,7 @@ static int sps30_serial_read_meas(struct sps30_state *state, __be32 *meas, size_ if (msleep_interruptible(1000)) return -EINTR; - ret = sps30_serial_command(state, SPS30_SERIAL_READ_MEAS, NULL, 0, meas, num * sizeof(num)); + ret = sps30_serial_command(state, SPS30_SERIAL_READ_MEAS, NULL, 0, meas, num * sizeof(*meas)); if (ret < 0) return ret; /* if measurements aren't ready sensor returns empty frame */ diff --git a/drivers/iio/dac/ds4424.c b/drivers/iio/dac/ds4424.c index 6dda8918975a6..c61868f2de316 100644 --- a/drivers/iio/dac/ds4424.c +++ b/drivers/iio/dac/ds4424.c @@ -140,7 +140,7 @@ static int ds4424_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - if (val < S8_MIN || val > S8_MAX) + if (val <= S8_MIN || val > S8_MAX) return -EINVAL; if (val > 0) { diff --git a/drivers/iio/frequency/adf4377.c b/drivers/iio/frequency/adf4377.c index fa686f785fa43..8e2da218d48a4 100644 --- a/drivers/iio/frequency/adf4377.c +++ b/drivers/iio/frequency/adf4377.c @@ -508,7 +508,7 @@ static int adf4377_soft_reset(struct adf4377_state *st) return ret; return regmap_read_poll_timeout(st->regmap, 0x0, read_val, - !(read_val & (ADF4377_0000_SOFT_RESET_R_MSK | + !(read_val & (ADF4377_0000_SOFT_RESET_MSK | ADF4377_0000_SOFT_RESET_R_MSK)), 200, 200 * 100); } diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index ee2fcd20545de..317e7b217ec6b 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -322,7 +322,9 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev, } case IIO_CHAN_INFO_RAW: /* Resume device */ - pm_runtime_get_sync(mpu3050->dev); + ret = pm_runtime_resume_and_get(mpu3050->dev); + if (ret) + return ret; mutex_lock(&mpu3050->lock); ret = mpu3050_set_8khz_samplerate(mpu3050); @@ -647,14 +649,20 @@ static irqreturn_t mpu3050_trigger_handler(int irq, void *p) static int mpu3050_buffer_preenable(struct iio_dev *indio_dev) { struct mpu3050 *mpu3050 = iio_priv(indio_dev); + int ret; - pm_runtime_get_sync(mpu3050->dev); + ret = pm_runtime_resume_and_get(mpu3050->dev); + if (ret) + return ret; /* Unless we have OUR trigger active, run at full speed */ - if (!mpu3050->hw_irq_trigger) - return mpu3050_set_8khz_samplerate(mpu3050); + if (!mpu3050->hw_irq_trigger) { + ret = mpu3050_set_8khz_samplerate(mpu3050); + if (ret) + pm_runtime_put_autosuspend(mpu3050->dev); + } - return 0; + return ret; } static int mpu3050_buffer_postdisable(struct iio_dev *indio_dev) diff --git a/drivers/iio/gyro/mpu3050-i2c.c b/drivers/iio/gyro/mpu3050-i2c.c index 092878f2c8869..6549b22e643d8 100644 --- a/drivers/iio/gyro/mpu3050-i2c.c +++ b/drivers/iio/gyro/mpu3050-i2c.c @@ -19,8 +19,7 @@ static int mpu3050_i2c_bypass_select(struct i2c_mux_core *mux, u32 chan_id) struct mpu3050 *mpu3050 = i2c_mux_priv(mux); /* Just power up the device, that is all that is needed */ - pm_runtime_get_sync(mpu3050->dev); - return 0; + return pm_runtime_resume_and_get(mpu3050->dev); } static int mpu3050_i2c_bypass_deselect(struct i2c_mux_core *mux, u32 chan_id) diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index d160147cce0ba..a2bc1d14ed91d 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -526,7 +526,7 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev, adis->spi = spi; adis->data = data; - if (!adis->ops->write && !adis->ops->read && !adis->ops->reset) + if (!adis->ops) adis->ops = &adis_default_ops; else if (!adis->ops->write || !adis->ops->read || !adis->ops->reset) return -EINVAL; diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c index 54760d8f92a27..0ab6eddf0543f 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c @@ -651,6 +651,8 @@ static int inv_icm42600_accel_write_odr(struct iio_dev *indio_dev, return -EINVAL; conf.odr = inv_icm42600_accel_odr_conv[idx / 2]; + if (conf.odr == st->conf.accel.odr) + return 0; pm_runtime_get_sync(dev); mutex_lock(&st->lock); diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c index ada968be954d4..68a3957580318 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c @@ -371,6 +371,8 @@ static int inv_icm42600_buffer_predisable(struct iio_dev *indio_dev) static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev) { struct inv_icm42600_state *st = iio_device_get_drvdata(indio_dev); + struct inv_icm42600_sensor_state *sensor_st = iio_priv(indio_dev); + struct inv_sensors_timestamp *ts = &sensor_st->ts; struct device *dev = regmap_get_device(st->map); unsigned int sensor; unsigned int *watermark; @@ -392,6 +394,8 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev) mutex_lock(&st->lock); + inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); + ret = inv_icm42600_buffer_set_fifo_en(st, st->fifo.en & ~sensor); if (ret) goto out_unlock; diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c index 7ef0a25ec74f6..11339ddf1da36 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c @@ -358,6 +358,8 @@ static int inv_icm42600_gyro_write_odr(struct iio_dev *indio_dev, return -EINVAL; conf.odr = inv_icm42600_gyro_odr_conv[idx / 2]; + if (conf.odr == st->conf.gyro.odr) + return 0; pm_runtime_get_sync(dev); mutex_lock(&st->lock); diff --git a/drivers/iio/imu/inv_icm45600/inv_icm45600.h b/drivers/iio/imu/inv_icm45600/inv_icm45600.h index c5b5446f6c3b4..1c796d4b2a403 100644 --- a/drivers/iio/imu/inv_icm45600/inv_icm45600.h +++ b/drivers/iio/imu/inv_icm45600/inv_icm45600.h @@ -205,7 +205,7 @@ struct inv_icm45600_sensor_state { #define INV_ICM45600_SPI_SLEW_RATE_38NS 0 #define INV_ICM45600_REG_INT1_CONFIG2 0x0018 -#define INV_ICM45600_INT1_CONFIG2_PUSH_PULL BIT(2) +#define INV_ICM45600_INT1_CONFIG2_OPEN_DRAIN BIT(2) #define INV_ICM45600_INT1_CONFIG2_LATCHED BIT(1) #define INV_ICM45600_INT1_CONFIG2_ACTIVE_HIGH BIT(0) #define INV_ICM45600_INT1_CONFIG2_ACTIVE_LOW 0x00 diff --git a/drivers/iio/imu/inv_icm45600/inv_icm45600_core.c b/drivers/iio/imu/inv_icm45600/inv_icm45600_core.c index 25bd9757a594d..d49053161a657 100644 --- a/drivers/iio/imu/inv_icm45600/inv_icm45600_core.c +++ b/drivers/iio/imu/inv_icm45600/inv_icm45600_core.c @@ -637,8 +637,8 @@ static int inv_icm45600_irq_init(struct inv_icm45600_state *st, int irq, break; } - if (!open_drain) - val |= INV_ICM45600_INT1_CONFIG2_PUSH_PULL; + if (open_drain) + val |= INV_ICM45600_INT1_CONFIG2_OPEN_DRAIN; ret = regmap_write(st->map, INV_ICM45600_REG_INT1_CONFIG2, val); if (ret) @@ -744,6 +744,11 @@ int inv_icm45600_core_probe(struct regmap *regmap, const struct inv_icm45600_chi */ fsleep(5 * USEC_PER_MSEC); + /* set pm_runtime active early for disable vddio resource cleanup */ + ret = pm_runtime_set_active(dev); + if (ret) + return ret; + ret = inv_icm45600_enable_regulator_vddio(st); if (ret) return ret; @@ -776,7 +781,7 @@ int inv_icm45600_core_probe(struct regmap *regmap, const struct inv_icm45600_chi if (ret) return ret; - ret = devm_pm_runtime_set_active_enabled(dev); + ret = devm_pm_runtime_enable(dev); if (ret) return ret; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index b2fa1f4957a5b..5796896d54cd8 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -1943,6 +1943,14 @@ int inv_mpu_core_probe(struct regmap *regmap, int irq, const char *name, irq_type); return -EINVAL; } + + /* + * Acking interrupts by status register does not work reliably + * but seem to work when this bit is set. + */ + if (st->chip_type == INV_MPU9150) + st->irq_mask |= INV_MPU6050_INT_RD_CLEAR; + device_set_wakeup_capable(dev, true); st->vdd_supply = devm_regulator_get(dev, "vdd"); diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h index 211901f8b8eb6..6239b1a803f77 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h @@ -390,6 +390,8 @@ struct inv_mpu6050_state { /* enable level triggering */ #define INV_MPU6050_LATCH_INT_EN 0x20 #define INV_MPU6050_BIT_BYPASS_EN 0x2 +/* allow acking interrupts by any register read */ +#define INV_MPU6050_INT_RD_CLEAR 0x10 /* Allowed timestamp period jitter in percent */ #define INV_MPU6050_TS_PERIOD_JITTER 4 diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c index 10a4733420759..22c1ce66f99ee 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c @@ -248,7 +248,6 @@ static irqreturn_t inv_mpu6050_interrupt_handle(int irq, void *p) switch (st->chip_type) { case INV_MPU6000: case INV_MPU6050: - case INV_MPU9150: /* * WoM is not supported and interrupt status read seems to be broken for * some chips. Since data ready is the only interrupt, bypass interrupt @@ -257,6 +256,10 @@ static irqreturn_t inv_mpu6050_interrupt_handle(int irq, void *p) wom_bits = 0; int_status = INV_MPU6050_BIT_RAW_DATA_RDY_INT; goto data_ready_interrupt; + case INV_MPU9150: + /* IRQ needs to be acked */ + wom_bits = 0; + break; case INV_MPU6500: case INV_MPU6515: case INV_MPU6880: diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index f15a180dc49ea..46f36a6ed2710 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -228,8 +228,10 @@ static ssize_t iio_buffer_write(struct file *filp, const char __user *buf, written = 0; add_wait_queue(&rb->pollq, &wait); do { - if (!indio_dev->info) - return -ENODEV; + if (!indio_dev->info) { + ret = -ENODEV; + break; + } if (!iio_buffer_space_available(rb)) { if (signal_pending(current)) { diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c index 5d3c6d5276bac..a740d1f992a8a 100644 --- a/drivers/iio/light/bh1780.c +++ b/drivers/iio/light/bh1780.c @@ -109,9 +109,9 @@ static int bh1780_read_raw(struct iio_dev *indio_dev, case IIO_LIGHT: pm_runtime_get_sync(&bh1780->client->dev); value = bh1780_read_word(bh1780, BH1780_REG_DLOW); + pm_runtime_put_autosuspend(&bh1780->client->dev); if (value < 0) return value; - pm_runtime_put_autosuspend(&bh1780->client->dev); *val = value; return IIO_VAL_INT; diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig index 9345fb6d53178..fb313e591e850 100644 --- a/drivers/iio/magnetometer/Kconfig +++ b/drivers/iio/magnetometer/Kconfig @@ -143,8 +143,7 @@ config MMC5633 tristate "MEMSIC MMC5633 3-axis magnetic sensor" select REGMAP_I2C select REGMAP_I3C if I3C - depends on I2C - depends on I3C || !I3C + depends on I3C_OR_I2C help Say yes here to build support for the MEMSIC MMC5633 3-axis magnetic sensor. diff --git a/drivers/iio/magnetometer/tlv493d.c b/drivers/iio/magnetometer/tlv493d.c index ec53fd40277b0..e5e050af2b74c 100644 --- a/drivers/iio/magnetometer/tlv493d.c +++ b/drivers/iio/magnetometer/tlv493d.c @@ -171,7 +171,7 @@ static s16 tlv493d_get_channel_data(u8 *b, enum tlv493d_channels ch) switch (ch) { case TLV493D_AXIS_X: val = FIELD_GET(TLV493D_BX_MAG_X_AXIS_MSB, b[TLV493D_RD_REG_BX]) << 4 | - FIELD_GET(TLV493D_BX2_MAG_X_AXIS_LSB, b[TLV493D_RD_REG_BX2]) >> 4; + FIELD_GET(TLV493D_BX2_MAG_X_AXIS_LSB, b[TLV493D_RD_REG_BX2]); break; case TLV493D_AXIS_Y: val = FIELD_GET(TLV493D_BY_MAG_Y_AXIS_MSB, b[TLV493D_RD_REG_BY]) << 4 | diff --git a/drivers/iio/potentiometer/mcp4131.c b/drivers/iio/potentiometer/mcp4131.c index ad082827aad5e..56c9111ef5e81 100644 --- a/drivers/iio/potentiometer/mcp4131.c +++ b/drivers/iio/potentiometer/mcp4131.c @@ -221,7 +221,7 @@ static int mcp4131_write_raw(struct iio_dev *indio_dev, mutex_lock(&data->lock); - data->buf[0] = address << MCP4131_WIPER_SHIFT; + data->buf[0] = address; data->buf[0] |= MCP4131_WRITE | (val >> 8); data->buf[1] = val & 0xFF; /* 8 bits here */ diff --git a/drivers/iio/proximity/hx9023s.c b/drivers/iio/proximity/hx9023s.c index 2918dfc0df547..17e00ee2b6f84 100644 --- a/drivers/iio/proximity/hx9023s.c +++ b/drivers/iio/proximity/hx9023s.c @@ -719,6 +719,9 @@ static int hx9023s_set_samp_freq(struct hx9023s_data *data, int val, int val2) struct device *dev = regmap_get_device(data->regmap); unsigned int i, period_ms; + if (!val && !val2) + return -EINVAL; + period_ms = div_u64(NANO, (val * MEGA + val2)); for (i = 0; i < ARRAY_SIZE(hx9023s_samp_freq_table); i++) { @@ -1034,9 +1037,8 @@ static int hx9023s_send_cfg(const struct firmware *fw, struct hx9023s_data *data if (!bin) return -ENOMEM; - memcpy(bin->data, fw->data, fw->size); - bin->fw_size = fw->size; + memcpy(bin->data, fw->data, bin->fw_size); bin->fw_ver = bin->data[FW_VER_OFFSET]; bin->reg_count = get_unaligned_le16(bin->data + FW_REG_CNT_OFFSET); diff --git a/drivers/irqchip/irq-riscv-aplic-main.c b/drivers/irqchip/irq-riscv-aplic-main.c index 4495ca26abf57..9f53979b69625 100644 --- a/drivers/irqchip/irq-riscv-aplic-main.c +++ b/drivers/irqchip/irq-riscv-aplic-main.c @@ -116,6 +116,16 @@ static struct syscore aplic_syscore = { .ops = &aplic_syscore_ops, }; +static bool aplic_syscore_registered __ro_after_init; + +static void aplic_syscore_init(void) +{ + if (!aplic_syscore_registered) { + register_syscore(&aplic_syscore); + aplic_syscore_registered = true; + } +} + static int aplic_pm_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct aplic_priv *priv = container_of(nb, struct aplic_priv, genpd_nb); @@ -372,18 +382,21 @@ static int aplic_probe(struct platform_device *pdev) rc = aplic_msi_setup(dev, regs); else rc = aplic_direct_setup(dev, regs); - if (rc) + + if (rc) { dev_err_probe(dev, rc, "failed to setup APLIC in %s mode\n", msi_mode ? "MSI" : "direct"); - else - register_syscore(&aplic_syscore); + return rc; + } + + aplic_syscore_init(); #ifdef CONFIG_ACPI if (!acpi_disabled) acpi_dev_clear_dependencies(ACPI_COMPANION(dev)); #endif - return rc; + return 0; } static const struct of_device_id aplic_match[] = { diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 64bb38c958956..6627a381f65ae 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1373,6 +1373,13 @@ static CLOSURE_CALLBACK(cached_dev_free) mutex_unlock(&bch_register_lock); + /* + * Wait for any pending sb_write to complete before free. + * The sb_bio is embedded in struct cached_dev, so we must + * ensure no I/O is in progress. + */ + closure_sync(&dc->sb_write); + if (dc->sb_disk) folio_put(virt_to_folio(dc->sb_disk)); diff --git a/drivers/misc/amd-sbi/Kconfig b/drivers/misc/amd-sbi/Kconfig index be022c71a90c1..30e7fad7356cc 100644 --- a/drivers/misc/amd-sbi/Kconfig +++ b/drivers/misc/amd-sbi/Kconfig @@ -1,10 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only config AMD_SBRMI_I2C tristate "AMD side band RMI support" - depends on I2C + depends on I3C_OR_I2C depends on ARM || ARM64 || COMPILE_TEST select REGMAP_I2C - depends on I3C || !I3C select REGMAP_I3C if I3C help Side band RMI over I2C/I3C support for AMD out of band management. diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 14ed91391fcc5..707419270ebf2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1509,6 +1509,50 @@ static netdev_features_t bond_fix_features(struct net_device *dev, return features; } +static int bond_header_create(struct sk_buff *skb, struct net_device *bond_dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct bonding *bond = netdev_priv(bond_dev); + const struct header_ops *slave_ops; + struct slave *slave; + int ret = 0; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (slave) { + slave_ops = READ_ONCE(slave->dev->header_ops); + if (slave_ops && slave_ops->create) + ret = slave_ops->create(skb, slave->dev, + type, daddr, saddr, len); + } + rcu_read_unlock(); + return ret; +} + +static int bond_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + struct bonding *bond = netdev_priv(skb->dev); + const struct header_ops *slave_ops; + struct slave *slave; + int ret = 0; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (slave) { + slave_ops = READ_ONCE(slave->dev->header_ops); + if (slave_ops && slave_ops->parse) + ret = slave_ops->parse(skb, haddr); + } + rcu_read_unlock(); + return ret; +} + +static const struct header_ops bond_header_ops = { + .create = bond_header_create, + .parse = bond_header_parse, +}; + static void bond_setup_by_slave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1516,7 +1560,8 @@ static void bond_setup_by_slave(struct net_device *bond_dev, dev_close(bond_dev); - bond_dev->header_ops = slave_dev->header_ops; + bond_dev->header_ops = slave_dev->header_ops ? + &bond_header_ops : NULL; bond_dev->type = slave_dev->type; bond_dev->hard_header_len = slave_dev->hard_header_len; @@ -2801,8 +2846,14 @@ static void bond_miimon_commit(struct bonding *bond) continue; + case BOND_LINK_FAIL: + case BOND_LINK_BACK: + slave_dbg(bond->dev, slave->dev, "link_new_state %d on slave\n", + slave->link_new_state); + continue; + default: - slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n", + slave_err(bond->dev, slave->dev, "invalid link_new_state %d on slave\n", slave->link_new_state); bond_propose_link_state(slave, BOND_LINK_NOCHANGE); @@ -3377,7 +3428,7 @@ int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, } else if (is_arp) { return bond_arp_rcv(skb, bond, slave); #if IS_ENABLED(CONFIG_IPV6) - } else if (is_ipv6) { + } else if (is_ipv6 && likely(ipv6_mod_enabled())) { return bond_na_rcv(skb, bond, slave); #endif } else { @@ -5069,13 +5120,18 @@ static void bond_set_slave_arr(struct bonding *bond, { struct bond_up_slave *usable, *all; - usable = rtnl_dereference(bond->usable_slaves); - rcu_assign_pointer(bond->usable_slaves, usable_slaves); - kfree_rcu(usable, rcu); - all = rtnl_dereference(bond->all_slaves); rcu_assign_pointer(bond->all_slaves, all_slaves); kfree_rcu(all, rcu); + + if (BOND_MODE(bond) == BOND_MODE_BROADCAST) { + kfree_rcu(usable_slaves, rcu); + return; + } + + usable = rtnl_dereference(bond->usable_slaves); + rcu_assign_pointer(bond->usable_slaves, usable_slaves); + kfree_rcu(usable, rcu); } static void bond_reset_slave_arr(struct bonding *bond) diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index b90890030751f..1873d8287bb9b 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -297,6 +297,7 @@ static void ser_release(struct work_struct *work) dev_close(ser->dev); unregister_netdevice(ser->dev); debugfs_deinit(ser); + tty_kref_put(tty->link); tty_kref_put(tty); } rtnl_unlock(); @@ -331,6 +332,7 @@ static int ldisc_open(struct tty_struct *tty) ser = netdev_priv(dev); ser->tty = tty_kref_get(tty); + tty_kref_get(tty->link); ser->dev = dev; debugfs_init(ser, tty); tty->receive_room = 4096; @@ -339,6 +341,7 @@ static int ldisc_open(struct tty_struct *tty) rtnl_lock(); result = register_netdevice(dev); if (result) { + tty_kref_put(tty->link); tty_kref_put(tty); rtnl_unlock(); free_netdev(dev); diff --git a/drivers/net/can/dev/calc_bittiming.c b/drivers/net/can/dev/calc_bittiming.c index cc4022241553f..42498e9d3f38d 100644 --- a/drivers/net/can/dev/calc_bittiming.c +++ b/drivers/net/can/dev/calc_bittiming.c @@ -8,7 +8,7 @@ #include #include -#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ +#define CAN_CALC_MAX_ERROR 500 /* max error 5% */ /* CiA recommended sample points for Non Return to Zero encoding. */ static int can_calc_sample_point_nrz(const struct can_bittiming *bt) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index e00d3dbc4cf43..91b1fa970f8fb 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -755,7 +755,9 @@ static int hi3110_open(struct net_device *net) return ret; mutex_lock(&priv->hi3110_lock); - hi3110_power_enable(priv->transceiver, 1); + ret = hi3110_power_enable(priv->transceiver, 1); + if (ret) + goto out_close_candev; priv->force_quit = 0; priv->tx_skb = NULL; @@ -790,6 +792,7 @@ static int hi3110_open(struct net_device *net) hi3110_hw_sleep(spi); out_close: hi3110_power_enable(priv->transceiver, 0); + out_close_candev: close_candev(net); mutex_unlock(&priv->hi3110_lock); return ret; diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index 4a2cc57a628f9..8b98039320add 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c @@ -1108,6 +1108,7 @@ static int ksz_ptp_msg_irq_setup(struct ksz_port *port, u8 n) const struct ksz_dev_ops *ops = port->ksz_dev->dev_ops; struct ksz_irq *ptpirq = &port->ptpirq; struct ksz_ptp_irq *ptpmsg_irq; + int ret; ptpmsg_irq = &port->ptpmsg_irq[n]; ptpmsg_irq->num = irq_create_mapping(ptpirq->domain, n); @@ -1119,9 +1120,13 @@ static int ksz_ptp_msg_irq_setup(struct ksz_port *port, u8 n) strscpy(ptpmsg_irq->name, name[n]); - return request_threaded_irq(ptpmsg_irq->num, NULL, - ksz_ptp_msg_thread_fn, IRQF_ONESHOT, - ptpmsg_irq->name, ptpmsg_irq); + ret = request_threaded_irq(ptpmsg_irq->num, NULL, + ksz_ptp_msg_thread_fn, IRQF_ONESHOT, + ptpmsg_irq->name, ptpmsg_irq); + if (ret) + irq_dispose_mapping(ptpmsg_irq->num); + + return ret; } int ksz_ptp_irq_setup(struct dsa_switch *ds, u8 p) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index b1e2094b58165..d7ab04f5afefd 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -149,7 +149,6 @@ static int mxl862xx_setup_mdio(struct dsa_switch *ds) return -ENOMEM; bus->priv = priv; - ds->user_mii_bus = bus; bus->name = KBUILD_MODNAME "-mii"; snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(dev)); bus->read_c45 = mxl862xx_phy_read_c45_mii_bus; diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index f938a3f701cc9..31fa94dac627d 100644 --- a/drivers/net/dsa/realtek/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -1480,8 +1480,7 @@ static void rtl8365mb_stats_update(struct realtek_priv *priv, int port) stats->rx_packets = cnt[RTL8365MB_MIB_ifInUcastPkts] + cnt[RTL8365MB_MIB_ifInMulticastPkts] + - cnt[RTL8365MB_MIB_ifInBroadcastPkts] - - cnt[RTL8365MB_MIB_ifOutDiscards]; + cnt[RTL8365MB_MIB_ifInBroadcastPkts]; stats->tx_packets = cnt[RTL8365MB_MIB_ifOutUcastPkts] + cnt[RTL8365MB_MIB_ifOutMulticastPkts] + diff --git a/drivers/net/dsa/realtek/rtl8366rb-leds.c b/drivers/net/dsa/realtek/rtl8366rb-leds.c index 99c890681ae60..509ffd3f8db5c 100644 --- a/drivers/net/dsa/realtek/rtl8366rb-leds.c +++ b/drivers/net/dsa/realtek/rtl8366rb-leds.c @@ -12,11 +12,11 @@ static inline u32 rtl8366rb_led_group_port_mask(u8 led_group, u8 port) case 0: return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); case 1: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + return FIELD_PREP(RTL8366RB_LED_X_1_CTRL_MASK, BIT(port)); case 2: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + return FIELD_PREP(RTL8366RB_LED_2_X_CTRL_MASK, BIT(port)); case 3: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + return FIELD_PREP(RTL8366RB_LED_X_3_CTRL_MASK, BIT(port)); default: return 0; } diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 94d17b06da404..c72c2bfdcffb2 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2339,14 +2339,13 @@ int sja1105_static_config_reload(struct sja1105_private *priv, goto out; } + rc = sja1105_reload_cbs(priv); + +out: dsa_switch_for_each_available_port(dp, ds) if (dp->pl) phylink_replay_link_end(dp->pl); - rc = sja1105_reload_cbs(priv); - if (rc < 0) - goto out; -out: mutex_unlock(&priv->mgmt_lock); mutex_unlock(&priv->fdb_lock); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 8b79d88480dbe..23beea48ae26b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1271,20 +1271,25 @@ static int xgbe_start(struct xgbe_prv_data *pdata) if (ret) goto err_napi; + /* Reset the phy settings */ + ret = xgbe_phy_reset(pdata); + if (ret) + goto err_irqs; + + /* Start the phy */ ret = phy_if->phy_start(pdata); if (ret) goto err_irqs; hw_if->enable_tx(pdata); hw_if->enable_rx(pdata); + /* Synchronize flag with hardware state after enabling TX/RX. + * This prevents stale state after device restart cycles. + */ + pdata->data_path_stopped = false; udp_tunnel_nic_reset_ntf(netdev); - /* Reset the phy settings */ - ret = xgbe_phy_reset(pdata); - if (ret) - goto err_txrx; - netif_tx_start_all_queues(netdev); xgbe_start_timers(pdata); @@ -1294,10 +1299,6 @@ static int xgbe_start(struct xgbe_prv_data *pdata) return 0; -err_txrx: - hw_if->disable_rx(pdata); - hw_if->disable_tx(pdata); - err_irqs: xgbe_free_irqs(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index c63ddb12237ea..b8cf6ccfe6414 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -1942,7 +1942,7 @@ static void xgbe_set_rx_adap_mode(struct xgbe_prv_data *pdata, static void xgbe_rx_adaptation(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int reg; + int reg; /* step 2: force PCS to send RX_ADAPT Req to PHY */ XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_EQ_CTRL4, @@ -1964,11 +1964,20 @@ static void xgbe_rx_adaptation(struct xgbe_prv_data *pdata) /* Step 4: Check for Block lock */ - /* Link status is latched low, so read once to clear - * and then read again to get current state - */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + goto set_mode; + + /* Link status is latched low so that momentary link drops + * can be detected. If link was already down read again + * to get the latest state. + */ + if (!pdata->phy.link && !(reg & MDIO_STAT1_LSTATUS)) { + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + goto set_mode; + } + if (reg & MDIO_STAT1_LSTATUS) { /* If the block lock is found, update the helpers * and declare the link up @@ -2008,6 +2017,48 @@ static void xgbe_phy_rx_adaptation(struct xgbe_prv_data *pdata) xgbe_rx_adaptation(pdata); } +/* + * xgbe_phy_stop_data_path - Stop TX/RX to prevent packet corruption + * @pdata: driver private data + * + * This function stops the data path (TX and RX) to prevent packet + * corruption during critical PHY operations like RX adaptation. + * Must be called before initiating RX adaptation when link goes down. + */ +static void xgbe_phy_stop_data_path(struct xgbe_prv_data *pdata) +{ + if (pdata->data_path_stopped) + return; + + /* Stop TX/RX to prevent packet corruption during RX adaptation */ + pdata->hw_if.disable_tx(pdata); + pdata->hw_if.disable_rx(pdata); + pdata->data_path_stopped = true; + + netif_dbg(pdata, link, pdata->netdev, + "stopping data path for RX adaptation\n"); +} + +/* + * xgbe_phy_start_data_path - Re-enable TX/RX after RX adaptation + * @pdata: driver private data + * + * This function re-enables the data path (TX and RX) after RX adaptation + * has completed successfully. Only called when link is confirmed up. + */ +static void xgbe_phy_start_data_path(struct xgbe_prv_data *pdata) +{ + if (!pdata->data_path_stopped) + return; + + pdata->hw_if.enable_rx(pdata); + pdata->hw_if.enable_tx(pdata); + pdata->data_path_stopped = false; + + netif_dbg(pdata, link, pdata->netdev, + "restarting data path after RX adaptation\n"); +} + static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata) { int reg; @@ -2801,13 +2852,27 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) if (pdata->en_rx_adap) { /* if the link is available and adaptation is done, * declare link up + * + * Note: When link is up and adaptation is done, we can + * safely re-enable the data path if it was stopped + * for adaptation. */ - if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done) + if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done) { + xgbe_phy_start_data_path(pdata); return 1; + } /* If either link is not available or adaptation is not done, * retrigger the adaptation logic. (if the mode is not set, * then issue mailbox command first) */ + + /* CRITICAL: Stop data path BEFORE triggering RX adaptation + * to prevent CRC errors from packets corrupted during + * the adaptation process. This is especially important + * when AN is OFF in 10G KR mode. + */ + xgbe_phy_stop_data_path(pdata); + if (pdata->mode_set) { xgbe_phy_rx_adaptation(pdata); } else { @@ -2815,8 +2880,11 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) xgbe_phy_set_mode(pdata, phy_data->cur_mode); } - if (pdata->rx_adapt_done) + if (pdata->rx_adapt_done) { + /* Adaptation complete, safe to re-enable data path */ + xgbe_phy_start_data_path(pdata); return 1; + } } else if (reg & MDIO_STAT1_LSTATUS) return 1; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index e1d7d7150e16f..438033a715238 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -1243,6 +1243,10 @@ struct xgbe_prv_data { bool en_rx_adap; int rx_adapt_retries; bool rx_adapt_done; + /* Flag to track if data path (TX/RX) was stopped for RX adaptation. + * This prevents packet corruption during the adaptation window. + */ + bool data_path_stopped; bool mode_set; bool sph; }; diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 8283aeee35fb6..dde4046cbf010 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -934,6 +934,17 @@ int arc_emac_probe(struct net_device *ndev, int interface) /* Set poll rate so that it polls every 1 ms */ arc_reg_set(priv, R_POLLRATE, clock_frequency / 1000000); + /* + * Put the device into a known quiescent state before requesting + * the IRQ. Clear only EMAC interrupt status bits here; leave the + * MDIO completion bit alone and avoid writing TXPL_MASK, which is + * used to force TX polling rather than acknowledge interrupts. + */ + arc_reg_set(priv, R_ENABLE, 0); + arc_reg_set(priv, R_STATUS, RXINT_MASK | TXINT_MASK | ERR_MASK | + TXCH_MASK | MSER_MASK | RXCR_MASK | + RXFR_MASK | RXFL_MASK); + ndev->irq = irq; dev_info(dev, "IRQ is %d\n", ndev->irq); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index ba47e8294fffa..28d0ece2e7b1f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -979,8 +979,8 @@ static int bnxt_set_channels(struct net_device *dev, if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) && - netif_is_rxfh_configured(dev)) { - netdev_warn(dev, "RSS table size change required, RSS table entries must be default to proceed\n"); + (netif_is_rxfh_configured(dev) || bp->num_rss_ctx)) { + netdev_warn(dev, "RSS table size change required, RSS table entries must be default (with no additional RSS contexts present) to proceed\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index a71cd729fde69..482a31e7b72bc 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1342,8 +1342,7 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev, } } -void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, - bool tx_lpi_enabled) +void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 off = priv->hw_params->tbuf_offset + TBUF_ENERGY_CTRL; @@ -1363,7 +1362,7 @@ void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, /* Enable EEE and switch to a 27Mhz clock automatically */ reg = bcmgenet_readl(priv->base + off); - if (tx_lpi_enabled) + if (enable) reg |= TBUF_EEE_EN | TBUF_PM_EN; else reg &= ~(TBUF_EEE_EN | TBUF_PM_EN); @@ -1382,14 +1381,12 @@ void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, priv->clk_eee_enabled = false; } - priv->eee.eee_enabled = enable; - priv->eee.tx_lpi_enabled = tx_lpi_enabled; } static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_keee *e) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct ethtool_keee *p = &priv->eee; + int ret; if (GENET_IS_V1(priv)) return -EOPNOTSUPP; @@ -1397,17 +1394,21 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_keee *e) if (!dev->phydev) return -ENODEV; - e->tx_lpi_enabled = p->tx_lpi_enabled; + ret = phy_ethtool_get_eee(dev->phydev, e); + if (ret) + return ret; + + /* tx_lpi_timer is maintained by the MAC hardware register; the + * PHY-level eee_cfg timer is not set for GENET. + */ e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(dev->phydev, e); + return 0; } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_keee *e) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct ethtool_keee *p = &priv->eee; - bool active; if (GENET_IS_V1(priv)) return -EOPNOTSUPP; @@ -1415,15 +1416,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_keee *e) if (!dev->phydev) return -ENODEV; - p->eee_enabled = e->eee_enabled; - - if (!p->eee_enabled) { - bcmgenet_eee_enable_set(dev, false, false); - } else { - active = phy_init_eee(dev->phydev, false) >= 0; - bcmgenet_umac_writel(priv, e->tx_lpi_timer, UMAC_EEE_LPI_TIMER); - bcmgenet_eee_enable_set(dev, active, e->tx_lpi_enabled); - } + bcmgenet_umac_writel(priv, e->tx_lpi_timer, UMAC_EEE_LPI_TIMER); return phy_ethtool_set_eee(dev->phydev, e); } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 5ec3979779ece..9e4110c7fdf6f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -665,8 +665,6 @@ struct bcmgenet_priv { u8 sopass[SOPASS_MAX]; struct bcmgenet_mib_counters mib; - - struct ethtool_keee eee; }; static inline bool bcmgenet_has_40bits(struct bcmgenet_priv *priv) @@ -749,7 +747,6 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, int bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode); -void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, - bool tx_lpi_enabled); +void bcmgenet_eee_enable_set(struct net_device *dev, bool enable); #endif /* __BCMGENET_H__ */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 38f854b94a799..a4e0d5a682687 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -29,7 +29,6 @@ static void bcmgenet_mac_config(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); struct phy_device *phydev = dev->phydev; u32 reg, cmd_bits = 0; - bool active; /* speed */ if (phydev->speed == SPEED_1000) @@ -90,10 +89,6 @@ static void bcmgenet_mac_config(struct net_device *dev) bcmgenet_umac_writel(priv, reg, UMAC_CMD); spin_unlock_bh(&priv->reg_lock); - active = phy_init_eee(phydev, 0) >= 0; - bcmgenet_eee_enable_set(dev, - priv->eee.eee_enabled && active, - priv->eee.tx_lpi_enabled); } /* setup netdev link state when PHY link status change and @@ -113,6 +108,8 @@ void bcmgenet_mii_setup(struct net_device *dev) bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } + bcmgenet_eee_enable_set(dev, phydev->enable_tx_lpi); + phy_print_status(phydev); } @@ -412,6 +409,9 @@ int bcmgenet_mii_probe(struct net_device *dev) /* Indicate that the MAC is responsible for PHY PM */ dev->phydev->mac_managed_pm = true; + if (!GENET_IS_V1(priv)) + phy_support_eee(dev->phydev); + return 0; } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 5bc35f651ebd2..f290d608b4094 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "macb.h" @@ -668,6 +669,97 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode, netif_tx_stop_all_queues(ndev); } +/* Use juggling algorithm to left rotate tx ring and tx skb array */ +static void gem_shuffle_tx_one_ring(struct macb_queue *queue) +{ + unsigned int head, tail, count, ring_size, desc_size; + struct macb_tx_skb tx_skb, *skb_curr, *skb_next; + struct macb_dma_desc *desc_curr, *desc_next; + unsigned int i, cycles, shift, curr, next; + struct macb *bp = queue->bp; + unsigned char desc[24]; + unsigned long flags; + + desc_size = macb_dma_desc_get_size(bp); + + if (WARN_ON_ONCE(desc_size > ARRAY_SIZE(desc))) + return; + + spin_lock_irqsave(&queue->tx_ptr_lock, flags); + head = queue->tx_head; + tail = queue->tx_tail; + ring_size = bp->tx_ring_size; + count = CIRC_CNT(head, tail, ring_size); + + if (!(tail % ring_size)) + goto unlock; + + if (!count) { + queue->tx_head = 0; + queue->tx_tail = 0; + goto unlock; + } + + shift = tail % ring_size; + cycles = gcd(ring_size, shift); + + for (i = 0; i < cycles; i++) { + memcpy(&desc, macb_tx_desc(queue, i), desc_size); + memcpy(&tx_skb, macb_tx_skb(queue, i), + sizeof(struct macb_tx_skb)); + + curr = i; + next = (curr + shift) % ring_size; + + while (next != i) { + desc_curr = macb_tx_desc(queue, curr); + desc_next = macb_tx_desc(queue, next); + + memcpy(desc_curr, desc_next, desc_size); + + if (next == ring_size - 1) + desc_curr->ctrl &= ~MACB_BIT(TX_WRAP); + if (curr == ring_size - 1) + desc_curr->ctrl |= MACB_BIT(TX_WRAP); + + skb_curr = macb_tx_skb(queue, curr); + skb_next = macb_tx_skb(queue, next); + memcpy(skb_curr, skb_next, sizeof(struct macb_tx_skb)); + + curr = next; + next = (curr + shift) % ring_size; + } + + desc_curr = macb_tx_desc(queue, curr); + memcpy(desc_curr, &desc, desc_size); + if (i == ring_size - 1) + desc_curr->ctrl &= ~MACB_BIT(TX_WRAP); + if (curr == ring_size - 1) + desc_curr->ctrl |= MACB_BIT(TX_WRAP); + memcpy(macb_tx_skb(queue, curr), &tx_skb, + sizeof(struct macb_tx_skb)); + } + + queue->tx_head = count; + queue->tx_tail = 0; + + /* Make descriptor updates visible to hardware */ + wmb(); + +unlock: + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); +} + +/* Rotate the queue so that the tail is at index 0 */ +static void gem_shuffle_tx_rings(struct macb *bp) +{ + struct macb_queue *queue; + int q; + + for (q = 0, queue = bp->queues; q < bp->num_queues; q++, queue++) + gem_shuffle_tx_one_ring(queue); +} + static void macb_mac_link_up(struct phylink_config *config, struct phy_device *phy, unsigned int mode, phy_interface_t interface, @@ -706,8 +798,6 @@ static void macb_mac_link_up(struct phylink_config *config, ctrl |= MACB_BIT(PAE); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - queue->tx_head = 0; - queue->tx_tail = 0; queue_writel(queue, IER, bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); } @@ -721,8 +811,10 @@ static void macb_mac_link_up(struct phylink_config *config, spin_unlock_irqrestore(&bp->lock, flags); - if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { macb_set_tx_clk(bp, speed); + gem_shuffle_tx_rings(bp); + } /* Enable Rx and Tx; Enable PTP unicast */ ctrl = macb_readl(bp, NCR); diff --git a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c index 7fd39f8952901..92a0f824dae7a 100644 --- a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c +++ b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c @@ -333,11 +333,13 @@ static int netc_get_phy_addr(struct device_node *np) mdio_node = of_get_child_by_name(np, "mdio"); if (!mdio_node) - return 0; + return -ENODEV; phy_node = of_get_next_child(mdio_node, NULL); - if (!phy_node) + if (!phy_node) { + err = -ENODEV; goto of_put_mdio_node; + } err = of_property_read_u32(phy_node, "reg", &addr); if (err) @@ -423,6 +425,9 @@ static int imx95_enetc_mdio_phyaddr_config(struct platform_device *pdev) addr = netc_get_phy_addr(gchild); if (addr < 0) { + if (addr == -ENODEV) + continue; + dev_err(dev, "Failed to get PHY address\n"); return addr; } @@ -433,12 +438,6 @@ static int imx95_enetc_mdio_phyaddr_config(struct platform_device *pdev) return -EINVAL; } - /* The default value of LaBCR[MDIO_PHYAD_PRTAD ] is - * 0, so no need to set the register. - */ - if (!addr) - continue; - switch (bus_devfn) { case IMX95_ENETC0_BUS_DEVFN: netc_reg_write(priv->ierb, IERB_LBCR(0), @@ -578,16 +577,13 @@ static int imx94_enetc_mdio_phyaddr_config(struct netc_blk_ctrl *priv, addr = netc_get_phy_addr(np); if (addr < 0) { + if (addr == -ENODEV) + return 0; + dev_err(dev, "Failed to get PHY address\n"); return addr; } - /* The default value of LaBCR[MDIO_PHYAD_PRTAD] is 0, - * so no need to set the register. - */ - if (!addr) - return 0; - if (phy_mask & BIT(addr)) { dev_err(dev, "Find same PHY address in EMDIO and ENETC node\n"); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3d7648a119e56..9b09eb144b811 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2952,8 +2952,6 @@ static int e1000_tx_map(struct e1000_adapter *adapter, dma_error: dev_err(&pdev->dev, "TX DMA map failed\n"); buffer_info->dma = 0; - if (count) - count--; while (count--) { if (i == 0) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 35dab1e3132fe..9befdacd67301 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5652,8 +5652,6 @@ static int e1000_tx_map(struct e1000_ring *tx_ring, struct sk_buff *skb, dma_error: dev_err(&pdev->dev, "Tx DMA map failed\n"); buffer_info->dma = 0; - if (count) - count--; while (count--) { if (i == 0) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index fdf40f8fb2399..a26c3d47ec156 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3833,10 +3833,10 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) cfilter.n_proto = ETH_P_IP; if (mask.dst_ip[0] & tcf.dst_ip[0]) memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip, - ARRAY_SIZE(tcf.dst_ip)); - else if (mask.src_ip[0] & tcf.dst_ip[0]) + sizeof(cfilter.ip.v4.dst_ip)); + else if (mask.src_ip[0] & tcf.src_ip[0]) memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip, - ARRAY_SIZE(tcf.dst_ip)); + sizeof(cfilter.ip.v4.src_ip)); break; case VIRTCHNL_TCP_V6_FLOW: cfilter.n_proto = ETH_P_IPV6; @@ -3891,7 +3891,7 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) /* for ipv6, mask is set for all sixteen bytes (4 words) */ if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3]) if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6, - sizeof(cfilter.ip.v6.src_ip6))) + sizeof(cfilter.ip.v6.dst_ip6))) continue; if (mask.vlan_id) if (cfilter.vlan_id != cf->vlan_id) @@ -3979,10 +3979,10 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) cfilter->n_proto = ETH_P_IP; if (mask.dst_ip[0] & tcf.dst_ip[0]) memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip, - ARRAY_SIZE(tcf.dst_ip)); - else if (mask.src_ip[0] & tcf.dst_ip[0]) + sizeof(cfilter->ip.v4.dst_ip)); + else if (mask.src_ip[0] & tcf.src_ip[0]) memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip, - ARRAY_SIZE(tcf.dst_ip)); + sizeof(cfilter->ip.v4.src_ip)); break; case VIRTCHNL_TCP_V6_FLOW: cfilter->n_proto = ETH_P_IPV6; diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index a87e0c6d4017a..e9fb0a0919e37 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -260,7 +260,6 @@ struct iavf_adapter { struct work_struct adminq_task; struct work_struct finish_config; wait_queue_head_t down_waitqueue; - wait_queue_head_t reset_waitqueue; wait_queue_head_t vc_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; @@ -626,5 +625,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, const u8 *macaddr); -int iavf_wait_for_reset(struct iavf_adapter *adapter); +void iavf_reset_step(struct iavf_adapter *adapter); #endif /* _IAVF_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index f3a1b2fb9bf89..ab67c709d5a0b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -492,7 +492,6 @@ static int iavf_set_ringparam(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 new_rx_count, new_tx_count; - int ret = 0; if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; @@ -537,13 +536,11 @@ static int iavf_set_ringparam(struct net_device *netdev, } if (netif_running(netdev)) { - iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); - ret = iavf_wait_for_reset(adapter); - if (ret) - netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset"); + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + iavf_reset_step(adapter); } - return ret; + return 0; } /** @@ -1723,7 +1720,6 @@ static int iavf_set_channels(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 num_req = ch->combined_count; - int ret = 0; if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && adapter->num_tc) { @@ -1745,13 +1741,10 @@ static int iavf_set_channels(struct net_device *netdev, adapter->num_req_queues = num_req; adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; - iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + iavf_reset_step(adapter); - ret = iavf_wait_for_reset(adapter); - if (ret) - netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset"); - - return ret; + return 0; } /** diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 86c1964f42e10..7925ee152c760 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -185,31 +185,6 @@ static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter) return false; } -/** - * iavf_wait_for_reset - Wait for reset to finish. - * @adapter: board private structure - * - * Returns 0 if reset finished successfully, negative on timeout or interrupt. - */ -int iavf_wait_for_reset(struct iavf_adapter *adapter) -{ - int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue, - !iavf_is_reset_in_progress(adapter), - msecs_to_jiffies(5000)); - - /* If ret < 0 then it means wait was interrupted. - * If ret == 0 then it means we got a timeout while waiting - * for reset to finish. - * If ret > 0 it means reset has finished. - */ - if (ret > 0) - return 0; - else if (ret < 0) - return -EINTR; - else - return -EBUSY; -} - /** * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -3036,6 +3011,8 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; + iavf_ptp_release(adapter); + /* We don't use netif_running() because it may be true prior to * ndo_open() returning, so we can't assume it means all our open * tasks have finished, since we're not holding the rtnl_lock here. @@ -3111,18 +3088,16 @@ static void iavf_reconfig_qs_bw(struct iavf_adapter *adapter) } /** - * iavf_reset_task - Call-back task to handle hardware reset - * @work: pointer to work_struct + * iavf_reset_step - Perform the VF reset sequence + * @adapter: board private structure * - * During reset we need to shut down and reinitialize the admin queue - * before we can use it to communicate with the PF again. We also clear - * and reinit the rings because that context is lost as well. - **/ -static void iavf_reset_task(struct work_struct *work) + * Requests a reset from PF, polls for completion, and reconfigures + * the driver. Caller must hold the netdev instance lock. + * + * This can sleep for several seconds while polling HW registers. + */ +void iavf_reset_step(struct iavf_adapter *adapter) { - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - reset_task); struct virtchnl_vf_resource *vfres = adapter->vf_res; struct net_device *netdev = adapter->netdev; struct iavf_hw *hw = &adapter->hw; @@ -3133,7 +3108,7 @@ static void iavf_reset_task(struct work_struct *work) int i = 0, err; bool running; - netdev_lock(netdev); + netdev_assert_locked(netdev); iavf_misc_irq_disable(adapter); if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { @@ -3178,7 +3153,6 @@ static void iavf_reset_task(struct work_struct *work) dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", reg_val); iavf_disable_vf(adapter); - netdev_unlock(netdev); return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3190,7 +3164,6 @@ static void iavf_reset_task(struct work_struct *work) iavf_startup(adapter); queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(30)); - netdev_unlock(netdev); return; } @@ -3211,6 +3184,8 @@ static void iavf_reset_task(struct work_struct *work) iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; + iavf_ptp_release(adapter); + /* free the Tx/Rx rings and descriptors, might be better to just * re-use them sometime in the future */ @@ -3331,9 +3306,6 @@ static void iavf_reset_task(struct work_struct *work) adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - wake_up(&adapter->reset_waitqueue); - netdev_unlock(netdev); - return; reset_err: if (running) { @@ -3342,10 +3314,21 @@ static void iavf_reset_task(struct work_struct *work) } iavf_disable_vf(adapter); - netdev_unlock(netdev); dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); } +static void iavf_reset_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = container_of(work, + struct iavf_adapter, + reset_task); + struct net_device *netdev = adapter->netdev; + + netdev_lock(netdev); + iavf_reset_step(adapter); + netdev_unlock(netdev); +} + /** * iavf_adminq_task - worker thread to clean the admin queue * @work: pointer to work_struct containing our data @@ -4611,22 +4594,17 @@ static int iavf_close(struct net_device *netdev) static int iavf_change_mtu(struct net_device *netdev, int new_mtu) { struct iavf_adapter *adapter = netdev_priv(netdev); - int ret = 0; netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) { - iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); - ret = iavf_wait_for_reset(adapter); - if (ret < 0) - netdev_warn(netdev, "MTU change interrupted waiting for reset"); - else if (ret) - netdev_warn(netdev, "MTU change timed out waiting for reset"); + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + iavf_reset_step(adapter); } - return ret; + return 0; } /** @@ -5431,9 +5409,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); - /* Setup the wait queue for indicating transition to running state */ - init_waitqueue_head(&adapter->reset_waitqueue); - /* Setup the wait queue for indicating virtchannel events */ init_waitqueue_head(&adapter->vc_waitqueue); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 88156082a41da..a52c100dcbc56 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -2736,7 +2736,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, case VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ iavf_irq_enable(adapter, true); - wake_up(&adapter->reset_waitqueue); adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED; break; case VIRTCHNL_OP_DISABLE_QUEUES: diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index 6c72bd15db6d7..6144cee8034d7 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1360,7 +1360,7 @@ ice_devlink_enable_roce_get(struct devlink *devlink, u32 id, cdev = pf->cdev_info; if (!cdev) - return -ENODEV; + return -EOPNOTSUPP; ctx->val.vbool = !!(cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_ROCEV2); @@ -1427,7 +1427,7 @@ ice_devlink_enable_iw_get(struct devlink *devlink, u32 id, cdev = pf->cdev_info; if (!cdev) - return -ENODEV; + return -EOPNOTSUPP; ctx->val.vbool = !!(cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_IWARP); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index fb15c794efc9f..a29f1ea04c7d8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -327,10 +327,10 @@ static int rvu_nix_report_show(struct devlink_fmsg *fmsg, void *ctx, rvu_report_pair_end(fmsg); break; case NIX_AF_RVU_RAS: - intr_val = nix_event_context->nix_af_rvu_err; + intr_val = nix_event_context->nix_af_rvu_ras; rvu_report_pair_start(fmsg, "NIX_AF_RAS"); devlink_fmsg_u64_pair_put(fmsg, "\tNIX RAS Interrupt Reg ", - nix_event_context->nix_af_rvu_err); + nix_event_context->nix_af_rvu_ras); devlink_fmsg_string_put(fmsg, "\n\tPoison Data on:"); if (intr_val & BIT_ULL(34)) devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_INST_S"); @@ -475,7 +475,7 @@ static int rvu_hw_nix_ras_recover(struct devlink_health_reporter *reporter, if (blkaddr < 0) return blkaddr; - if (nix_event_ctx->nix_af_rvu_int) + if (nix_event_ctx->nix_af_rvu_ras) rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 60ba840e00fa3..afdeb1b3d4256 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -47,7 +47,6 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", sq->sqn, sq->cc, sq->pc); sq->cc = 0; - sq->dma_fifo_cc = 0; sq->pc = 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 197a1c6930c0a..329608c59313b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2912,7 +2912,7 @@ void mlx5e_ipsec_disable_events(struct mlx5e_priv *priv) goto out; peer_priv = mlx5_devcom_get_next_peer_data(priv->devcom, &tmp); - if (peer_priv) + if (peer_priv && peer_priv->ipsec) complete_all(&peer_priv->ipsec->comp); mlx5_devcom_for_each_peer_end(priv->devcom); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index efcfcddab376a..268e208847577 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1589,6 +1589,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi struct skb_shared_info *sinfo; u32 frag_consumed_bytes; struct bpf_prog *prog; + u8 nr_frags_free = 0; struct sk_buff *skb; dma_addr_t addr; u32 truesize; @@ -1631,15 +1632,13 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi prog = rcu_dereference(rq->xdp_prog); if (prog) { - u8 nr_frags_free, old_nr_frags = sinfo->nr_frags; + u8 old_nr_frags = sinfo->nr_frags; if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_wqe_frag_info *pwi; - wi -= old_nr_frags - sinfo->nr_frags; - for (pwi = head_wi; pwi < wi; pwi++) pwi->frag_page->frags++; } @@ -1647,10 +1646,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi } nr_frags_free = old_nr_frags - sinfo->nr_frags; - if (unlikely(nr_frags_free)) { - wi -= nr_frags_free; + if (unlikely(nr_frags_free)) truesize -= nr_frags_free * frag_info->frag_stride; - } } skb = mlx5e_build_linear_skb( @@ -1666,7 +1663,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi if (xdp_buff_has_frags(&mxbuf->xdp)) { /* sinfo->nr_frags is reset by build_skb, calculate again. */ - xdp_update_skb_frags_info(skb, wi - head_wi - 1, + xdp_update_skb_frags_info(skb, wi - head_wi - nr_frags_free - 1, sinfo->xdp_frags_size, truesize, xdp_buff_get_skb_flags(&mxbuf->xdp)); @@ -1957,14 +1954,13 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { u8 nr_frags_free, old_nr_frags = sinfo->nr_frags; + u8 new_nr_frags; u32 len; if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_frag_page *pfp; - frag_page -= old_nr_frags - sinfo->nr_frags; - for (pfp = head_page; pfp < frag_page; pfp++) pfp->frags++; @@ -1975,13 +1971,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w return NULL; /* page/packet was consumed by XDP */ } - nr_frags_free = old_nr_frags - sinfo->nr_frags; - if (unlikely(nr_frags_free)) { - frag_page -= nr_frags_free; + new_nr_frags = sinfo->nr_frags; + nr_frags_free = old_nr_frags - new_nr_frags; + if (unlikely(nr_frags_free)) truesize -= (nr_frags_free - 1) * PAGE_SIZE + ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); - } len = mxbuf->xdp.data_end - mxbuf->xdp.data; @@ -2003,7 +1998,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5e_frag_page *pagep; /* sinfo->nr_frags is reset by build_skb, calculate again. */ - xdp_update_skb_frags_info(skb, frag_page - head_page, + xdp_update_skb_frags_info(skb, new_nr_frags, sinfo->xdp_frags_size, truesize, xdp_buff_get_skb_flags(&mxbuf->xdp)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d3af87a94a187..123c96716a544 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1072,10 +1072,11 @@ static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw) { - if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) + if (esw->mode == MLX5_ESWITCH_OFFLOADS && + mlx5_eswitch_is_funcs_handler(esw->dev)) { mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); - - flush_workqueue(esw->work_queue); + atomic_inc(&esw->esw_funcs.generation); + } } static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6841caef02d10..c2563bee74dfe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -335,10 +335,12 @@ struct esw_mc_addr { /* SRIOV only */ struct mlx5_host_work { struct work_struct work; struct mlx5_eswitch *esw; + int work_gen; }; struct mlx5_esw_functions { struct mlx5_nb nb; + atomic_t generation; bool host_funcs_disabled; u16 num_vfs; u16 num_ec_vfs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1366f6e489bd2..7a9ee36b8dcaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1241,21 +1241,17 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[peer_vport->index] = flow; } - if (mlx5_esw_host_functions_enabled(esw->dev)) { - mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, - mlx5_core_max_vfs(peer_dev)) { - esw_set_peer_miss_rule_source_port(esw, peer_esw, - spec, - peer_vport->vport); - - flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), - spec, &flow_act, &dest, 1); - if (IS_ERR(flow)) { - err = PTR_ERR(flow); - goto add_vf_flow_err; - } - flows[peer_vport->index] = flow; + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { + esw_set_peer_miss_rule_source_port(esw, peer_esw, spec, + peer_vport->vport); + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_vf_flow_err; } + flows[peer_vport->index] = flow; } if (mlx5_core_ec_sriov_enabled(peer_dev)) { @@ -1347,7 +1343,8 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, mlx5_del_flow_rules(flows[peer_vport->index]); } - if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + if (mlx5_core_is_ecpf_esw_manager(peer_dev) && + mlx5_esw_host_functions_enabled(peer_dev)) { peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); mlx5_del_flow_rules(flows[peer_vport->index]); } @@ -3582,22 +3579,28 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) } static void -esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) +esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, int work_gen, + const u32 *out) { struct devlink *devlink; bool host_pf_disabled; u16 new_num_vfs; + devlink = priv_to_devlink(esw->dev); + devl_lock(devlink); + + /* Stale work from one or more mode changes ago. Bail out. */ + if (work_gen != atomic_read(&esw->esw_funcs.generation)) + goto unlock; + new_num_vfs = MLX5_GET(query_esw_functions_out, out, host_params_context.host_num_of_vfs); host_pf_disabled = MLX5_GET(query_esw_functions_out, out, host_params_context.host_pf_disabled); if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled) - return; + goto unlock; - devlink = priv_to_devlink(esw->dev); - devl_lock(devlink); /* Number of VFs can only change from "0 to x" or "x to 0". */ if (esw->esw_funcs.num_vfs > 0) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); @@ -3612,6 +3615,7 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) } } esw->esw_funcs.num_vfs = new_num_vfs; +unlock: devl_unlock(devlink); } @@ -3628,7 +3632,7 @@ static void esw_functions_changed_event_handler(struct work_struct *work) if (IS_ERR(out)) goto out; - esw_vfs_changed_event_handler(esw, out); + esw_vfs_changed_event_handler(esw, host_work->work_gen, out); kvfree(out); out: kfree(host_work); @@ -3648,6 +3652,7 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); host_work->esw = esw; + host_work->work_gen = atomic_read(&esw_funcs->generation); INIT_WORK(&host_work->work, esw_functions_changed_event_handler); queue_work(esw->work_queue, &host_work->work); diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 37d2f108a839a..786186c9a115f 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1934,6 +1934,7 @@ static int mana_gd_setup(struct pci_dev *pdev) mana_gd_remove_irqs(pdev); free_workqueue: destroy_workqueue(gc->service_wq); + gc->service_wq = NULL; dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err); return err; } diff --git a/drivers/net/ethernet/spacemit/k1_emac.c b/drivers/net/ethernet/spacemit/k1_emac.c index 338a2637b1da9..15d43e4a748bb 100644 --- a/drivers/net/ethernet/spacemit/k1_emac.c +++ b/drivers/net/ethernet/spacemit/k1_emac.c @@ -565,7 +565,9 @@ static void emac_alloc_rx_desc_buffers(struct emac_priv *priv) DMA_FROM_DEVICE); if (dma_mapping_error(&priv->pdev->dev, rx_buf->dma_addr)) { dev_err_ratelimited(&ndev->dev, "Mapping skb failed\n"); - goto err_free_skb; + dev_kfree_skb_any(skb); + rx_buf->skb = NULL; + break; } rx_desc_addr = &((struct emac_desc *)rx_ring->desc_addr)[i]; @@ -590,10 +592,6 @@ static void emac_alloc_rx_desc_buffers(struct emac_priv *priv) rx_ring->head = i; return; - -err_free_skb: - dev_kfree_skb_any(skb); - rx_buf->skb = NULL; } /* Returns number of packets received */ @@ -735,7 +733,7 @@ static void emac_tx_mem_map(struct emac_priv *priv, struct sk_buff *skb) struct emac_desc tx_desc, *tx_desc_addr; struct device *dev = &priv->pdev->dev; struct emac_tx_desc_buffer *tx_buf; - u32 head, old_head, frag_num, f; + u32 head, old_head, frag_num, f, i; bool buf_idx; frag_num = skb_shinfo(skb)->nr_frags; @@ -803,6 +801,15 @@ static void emac_tx_mem_map(struct emac_priv *priv, struct sk_buff *skb) err_free_skb: dev_dstats_tx_dropped(priv->ndev); + + i = old_head; + while (i != head) { + emac_free_tx_buf(priv, i); + + if (++i == tx_ring->total_cnt) + i = 0; + } + dev_kfree_skb_any(skb); } diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 9679180504330..265ce5479915f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1351,7 +1351,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, ndev_priv = netdev_priv(ndev); am65_cpsw_nuss_set_offload_fwd_mark(skb, ndev_priv->offload_fwd_mark); skb_put(skb, pkt_len); - if (port->rx_ts_enabled) + if (port->rx_ts_filter) am65_cpts_rx_timestamp(common->cpts, skb); skb_mark_for_recycle(skb); skb->protocol = eth_type_trans(skb, ndev); @@ -1811,11 +1811,14 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: - port->rx_ts_enabled = false; + port->rx_ts_filter = HWTSTAMP_FILTER_NONE; break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + port->rx_ts_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + break; case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: @@ -1825,8 +1828,8 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - port->rx_ts_enabled = true; - cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT | HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + port->rx_ts_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -1863,7 +1866,7 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, ts_ctrl |= AM65_CPSW_TS_TX_ANX_ALL_EN | AM65_CPSW_PN_TS_CTL_TX_VLAN_LT1_EN; - if (port->rx_ts_enabled) + if (port->rx_ts_filter) ts_ctrl |= AM65_CPSW_TS_RX_ANX_ALL_EN | AM65_CPSW_PN_TS_CTL_RX_VLAN_LT1_EN; @@ -1888,8 +1891,7 @@ static int am65_cpsw_nuss_hwtstamp_get(struct net_device *ndev, cfg->flags = 0; cfg->tx_type = port->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - cfg->rx_filter = port->rx_ts_enabled ? HWTSTAMP_FILTER_PTP_V2_EVENT | - HWTSTAMP_FILTER_PTP_V1_L4_EVENT : HWTSTAMP_FILTER_NONE; + cfg->rx_filter = port->rx_ts_filter; return 0; } diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 917c37e4e89bd..7750448e47468 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -52,7 +52,7 @@ struct am65_cpsw_port { bool disabled; struct am65_cpsw_slave_data slave; bool tx_ts_enabled; - bool rx_ts_enabled; + enum hwtstamp_rx_filters rx_ts_filter; struct am65_cpsw_qos qos; struct devlink_port devlink_port; struct bpf_prog *xdp_prog; diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index de6bc1736734a..15fe4d1163c1c 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -343,6 +343,7 @@ static int mctp_i2c_recv(struct mctp_i2c_dev *midev) } else { status = NET_RX_DROP; spin_unlock_irqrestore(&midev->lock, flags); + kfree_skb(skb); } if (status == NET_RX_SUCCESS) { diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index ef860cfc629f6..3b5dff1441774 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -329,7 +329,7 @@ static int mctp_usb_probe(struct usb_interface *intf, SET_NETDEV_DEV(netdev, &intf->dev); dev = netdev_priv(netdev); dev->netdev = netdev; - dev->usbdev = usb_get_dev(interface_to_usbdev(intf)); + dev->usbdev = interface_to_usbdev(intf); dev->intf = intf; usb_set_intfdata(intf, dev); @@ -365,7 +365,6 @@ static void mctp_usb_disconnect(struct usb_interface *intf) mctp_unregister_netdev(dev->netdev); usb_free_urb(dev->tx_urb); usb_free_urb(dev->rx_urb); - usb_put_dev(dev->usbdev); free_netdev(dev->netdev); } diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index f4bf53da3d4fd..5db8413771990 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -367,6 +367,12 @@ static void sfp_fixup_ignore_tx_fault(struct sfp *sfp) sfp->state_ignore_mask |= SFP_F_TX_FAULT; } +static void sfp_fixup_ignore_tx_fault_and_los(struct sfp *sfp) +{ + sfp_fixup_ignore_tx_fault(sfp); + sfp_fixup_ignore_los(sfp); +} + static void sfp_fixup_ignore_hw(struct sfp *sfp, unsigned int mask) { sfp->state_hw_mask &= ~mask; @@ -530,7 +536,7 @@ static const struct sfp_quirk sfp_quirks[] = { // Huawei MA5671A can operate at 2500base-X, but report 1.2GBd NRZ in // their EEPROM SFP_QUIRK("HUAWEI", "MA5671A", sfp_quirk_2500basex, - sfp_fixup_ignore_tx_fault), + sfp_fixup_ignore_tx_fault_and_los), // Lantech 8330-262D-E and 8330-265D can operate at 2500base-X, but // incorrectly report 2500MBd NRZ in their EEPROM. diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a0021df12fde2..19cdf69fa589c 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3119,6 +3119,10 @@ static int lan78xx_init_ltm(struct lan78xx_net *dev) int ret; u32 buf; + /* LAN7850 is USB 2.0 and does not support LTM */ + if (dev->chipid == ID_REV_CHIP_ID_7850_) + return 0; + ret = lan78xx_read_reg(dev, USB_CFG1, &buf); if (ret < 0) goto init_ltm_failed; @@ -3829,6 +3833,7 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev, */ if (!(dev->net->features & NETIF_F_RXCSUM) || unlikely(rx_cmd_a & RX_CMD_A_ICSM_) || + unlikely(rx_cmd_a & RX_CMD_A_CSE_MASK_) || ((rx_cmd_a & RX_CMD_A_FVTG_) && !(dev->net->features & NETIF_F_HW_VLAN_CTAG_RX))) { skb->ip_summed = CHECKSUM_NONE; @@ -3901,7 +3906,8 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb, return 0; } - if (unlikely(rx_cmd_a & RX_CMD_A_RED_)) { + if (unlikely(rx_cmd_a & RX_CMD_A_RED_) && + (rx_cmd_a & RX_CMD_A_RX_HARD_ERRS_MASK_)) { netif_dbg(dev, rx_err, dev->net, "Error rx_cmd_a=0x%08x", rx_cmd_a); } else { @@ -4176,7 +4182,7 @@ static struct skb_data *lan78xx_tx_buf_fill(struct lan78xx_net *dev, } tx_data += len; - entry->length += len; + entry->length += max_t(unsigned int, len, ETH_ZLEN); entry->num_of_packet += skb_shinfo(skb)->gso_segs ?: 1; dev_kfree_skb_any(skb); @@ -4546,8 +4552,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) phylink_disconnect_phy(dev->phylink); rtnl_unlock(); - netif_napi_del(&dev->napi); - unregister_netdev(net); timer_shutdown_sync(&dev->stat_monitor); diff --git a/drivers/net/usb/lan78xx.h b/drivers/net/usb/lan78xx.h index 968e5e5faee0a..17a934acff3d6 100644 --- a/drivers/net/usb/lan78xx.h +++ b/drivers/net/usb/lan78xx.h @@ -74,6 +74,9 @@ #define RX_CMD_A_ICSM_ (0x00004000) #define RX_CMD_A_LEN_MASK_ (0x00003FFF) +#define RX_CMD_A_RX_HARD_ERRS_MASK_ \ + (RX_CMD_A_RX_ERRS_MASK_ & ~RX_CMD_A_CSE_MASK_) + /* Rx Command B */ #define RX_CMD_B_CSUM_SHIFT_ (16) #define RX_CMD_B_CSUM_MASK_ (0xFFFF0000) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3a4985b582cb1..05acac10cd2ba 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -928,7 +928,7 @@ static int qmi_wwan_resume(struct usb_interface *intf) static const struct driver_info qmi_wwan_info = { .description = "WWAN/QMI device", - .flags = FLAG_WWAN | FLAG_SEND_ZLP, + .flags = FLAG_WWAN | FLAG_NOMAXMTU | FLAG_SEND_ZLP, .bind = qmi_wwan_bind, .unbind = qmi_wwan_unbind, .manage_power = qmi_wwan_manage_power, @@ -937,7 +937,7 @@ static const struct driver_info qmi_wwan_info = { static const struct driver_info qmi_wwan_info_quirk_dtr = { .description = "WWAN/QMI device", - .flags = FLAG_WWAN | FLAG_SEND_ZLP, + .flags = FLAG_WWAN | FLAG_NOMAXMTU | FLAG_SEND_ZLP, .bind = qmi_wwan_bind, .unbind = qmi_wwan_unbind, .manage_power = qmi_wwan_manage_power, diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index ed86ba87ca4e5..b72ba0803392b 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1829,11 +1829,12 @@ usbnet_probe(struct usb_interface *udev, const struct usb_device_id *prod) if ((dev->driver_info->flags & FLAG_NOARP) != 0) net->flags |= IFF_NOARP; - if (net->max_mtu > (dev->hard_mtu - net->hard_header_len)) + if ((dev->driver_info->flags & FLAG_NOMAXMTU) == 0 && + net->max_mtu > (dev->hard_mtu - net->hard_header_len)) net->max_mtu = dev->hard_mtu - net->hard_header_len; - if (net->mtu > net->max_mtu) - net->mtu = net->max_mtu; + if (net->mtu > (dev->hard_mtu - net->hard_header_len)) + net->mtu = dev->hard_mtu - net->hard_header_len; } else if (!info->in || !info->out) status = usbnet_get_endpoints(dev, udev); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b42d8768d2979..1e33af94c24b9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4865,7 +4865,6 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event); int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int cmd_size) { - struct queue_limits lim = {}; int ret; memset(set, 0, sizeof(*set)); @@ -4892,7 +4891,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ctrl->admin_q) blk_put_queue(ctrl->admin_q); - ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL); + ctrl->admin_q = blk_mq_alloc_queue(set, NULL, NULL); if (IS_ERR(ctrl->admin_q)) { ret = PTR_ERR(ctrl->admin_q); goto out_free_tagset; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 8844bbd395159..9597a87cf05dc 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -786,10 +786,6 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) struct nvme_ctrl *ctrl = ioucmd->file->private_data; int ret; - /* IOPOLL not supported yet */ - if (issue_flags & IO_URING_F_IOPOLL) - return -EOPNOTSUPP; - ret = nvme_uring_cmd_checks(issue_flags); if (ret) return ret; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9aa19255b0417..db5fc9bf66272 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -544,7 +544,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev) /* Free memory and continue on */ nvme_dbbuf_dma_free(dev); - for (i = 1; i <= dev->online_queues; i++) + for (i = 1; i < dev->online_queues; i++) nvme_dbbuf_free(&dev->queues[i]); } } @@ -1625,14 +1625,16 @@ static irqreturn_t nvme_irq_check(int irq, void *data) static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) { struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); + int irq; WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); - disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + irq = pci_irq_vector(pdev, nvmeq->cq_vector); + disable_irq(irq); spin_lock(&nvmeq->cq_poll_lock); nvme_poll_cq(nvmeq, NULL); spin_unlock(&nvmeq->cq_poll_lock); - enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + enable_irq(irq); } static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 3794ef2585569..e4fd1caadfb00 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -1587,7 +1587,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req; mutex_unlock(&ctrl->lock); - queue_work(nvmet_wq, &ctrl->async_event_work); + queue_work(nvmet_aen_wq, &ctrl->async_event_work); } void nvmet_execute_keep_alive(struct nvmet_req *req) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 03cc7d5f9683d..45f686175feaa 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -27,6 +27,8 @@ static DEFINE_IDA(cntlid_ida); struct workqueue_struct *nvmet_wq; EXPORT_SYMBOL_GPL(nvmet_wq); +struct workqueue_struct *nvmet_aen_wq; +EXPORT_SYMBOL_GPL(nvmet_aen_wq); /* * This read/write semaphore is used to synchronize access to configuration @@ -206,7 +208,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, list_add_tail(&aen->entry, &ctrl->async_events); mutex_unlock(&ctrl->lock); - queue_work(nvmet_wq, &ctrl->async_event_work); + queue_work(nvmet_aen_wq, &ctrl->async_event_work); } static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) @@ -1957,9 +1959,14 @@ static int __init nvmet_init(void) if (!nvmet_wq) goto out_free_buffered_work_queue; + nvmet_aen_wq = alloc_workqueue("nvmet-aen-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (!nvmet_aen_wq) + goto out_free_nvmet_work_queue; + error = nvmet_init_debugfs(); if (error) - goto out_free_nvmet_work_queue; + goto out_free_nvmet_aen_work_queue; error = nvmet_init_discovery(); if (error) @@ -1975,6 +1982,8 @@ static int __init nvmet_init(void) nvmet_exit_discovery(); out_exit_debugfs: nvmet_exit_debugfs(); +out_free_nvmet_aen_work_queue: + destroy_workqueue(nvmet_aen_wq); out_free_nvmet_work_queue: destroy_workqueue(nvmet_wq); out_free_buffered_work_queue: @@ -1992,6 +2001,7 @@ static void __exit nvmet_exit(void) nvmet_exit_discovery(); nvmet_exit_debugfs(); ida_destroy(&cntlid_ida); + destroy_workqueue(nvmet_aen_wq); destroy_workqueue(nvmet_wq); destroy_workqueue(buffered_io_wq); destroy_workqueue(zbd_wq); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 5db8f0d6e3f2c..50070cfb782ae 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -501,6 +501,7 @@ extern struct kmem_cache *nvmet_bvec_cache; extern struct workqueue_struct *buffered_io_wq; extern struct workqueue_struct *zbd_wq; extern struct workqueue_struct *nvmet_wq; +extern struct workqueue_struct *nvmet_aen_wq; static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 2d6eb89f98af2..e6e2c3f9afdf5 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -2087,6 +2087,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data mutex_unlock(&nvmet_rdma_queue_mutex); flush_workqueue(nvmet_wq); + flush_workqueue(nvmet_aen_wq); } static struct ib_client nvmet_rdma_ib_client = { diff --git a/drivers/power/sequencing/pwrseq-pcie-m2.c b/drivers/power/sequencing/pwrseq-pcie-m2.c index d31a7dd8b35c2..dadb4aad9d5d4 100644 --- a/drivers/power/sequencing/pwrseq-pcie-m2.c +++ b/drivers/power/sequencing/pwrseq-pcie-m2.c @@ -109,7 +109,7 @@ static int pwrseq_pcie_m2_probe(struct platform_device *pdev) if (!ctx) return -ENOMEM; - ctx->of_node = of_node_get(dev->of_node); + ctx->of_node = dev_of_node(dev); ctx->pdata = device_get_match_data(dev); if (!ctx->pdata) return dev_err_probe(dev, -ENODEV, diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c index 5fa8682642505..45d7dc44c2cd0 100644 --- a/drivers/regulator/pca9450-regulator.c +++ b/drivers/regulator/pca9450-regulator.c @@ -1293,6 +1293,7 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) struct regulator_dev *ldo5; struct pca9450 *pca9450; unsigned int device_id, i; + const char *type_name; int ret; pca9450 = devm_kzalloc(&i2c->dev, sizeof(struct pca9450), GFP_KERNEL); @@ -1303,15 +1304,22 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) case PCA9450_TYPE_PCA9450A: regulator_desc = pca9450a_regulators; pca9450->rcnt = ARRAY_SIZE(pca9450a_regulators); + type_name = "pca9450a"; break; case PCA9450_TYPE_PCA9450BC: regulator_desc = pca9450bc_regulators; pca9450->rcnt = ARRAY_SIZE(pca9450bc_regulators); + type_name = "pca9450bc"; break; case PCA9450_TYPE_PCA9451A: + regulator_desc = pca9451a_regulators; + pca9450->rcnt = ARRAY_SIZE(pca9451a_regulators); + type_name = "pca9451a"; + break; case PCA9450_TYPE_PCA9452: regulator_desc = pca9451a_regulators; pca9450->rcnt = ARRAY_SIZE(pca9451a_regulators); + type_name = "pca9452"; break; default: dev_err(&i2c->dev, "Unknown device type"); @@ -1369,7 +1377,7 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) if (pca9450->irq) { ret = devm_request_threaded_irq(pca9450->dev, pca9450->irq, NULL, pca9450_irq_handler, - (IRQF_TRIGGER_FALLING | IRQF_ONESHOT), + (IRQF_TRIGGER_LOW | IRQF_ONESHOT), "pca9450-irq", pca9450); if (ret != 0) return dev_err_probe(pca9450->dev, ret, "Failed to request IRQ: %d\n", @@ -1413,9 +1421,7 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) pca9450_i2c_restart_handler, pca9450)) dev_warn(&i2c->dev, "Failed to register restart handler\n"); - dev_info(&i2c->dev, "%s probed.\n", - type == PCA9450_TYPE_PCA9450A ? "pca9450a" : - (type == PCA9450_TYPE_PCA9451A ? "pca9451a" : "pca9450bc")); + dev_info(&i2c->dev, "%s probed.\n", type_name); return 0; } diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index f5f916d679051..8c8ddbf995a46 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -617,7 +617,7 @@ static int imx_rproc_prepare(struct rproc *rproc) err = of_reserved_mem_region_to_resource(np, i++, &res); if (err) - return 0; + break; /* * Ignore the first memory region which will be used vdev buffer. diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c index 4651311aeb074..bb6f6a16d8957 100644 --- a/drivers/remoteproc/mtk_scp.c +++ b/drivers/remoteproc/mtk_scp.c @@ -1592,12 +1592,51 @@ static const struct of_device_id mtk_scp_of_match[] = { }; MODULE_DEVICE_TABLE(of, mtk_scp_of_match); +static int __maybe_unused scp_suspend(struct device *dev) +{ + struct mtk_scp *scp = dev_get_drvdata(dev); + struct rproc *rproc = scp->rproc; + + /* + * Only unprepare if the SCP is running and holding the clock. + * + * Note: `scp_ops` doesn't implement .attach() callback, hence + * `rproc->state` can never be RPROC_ATTACHED. Otherwise, it + * should also be checked here. + */ + if (rproc->state == RPROC_RUNNING) + clk_unprepare(scp->clk); + return 0; +} + +static int __maybe_unused scp_resume(struct device *dev) +{ + struct mtk_scp *scp = dev_get_drvdata(dev); + struct rproc *rproc = scp->rproc; + + /* + * Only prepare if the SCP was running and holding the clock. + * + * Note: `scp_ops` doesn't implement .attach() callback, hence + * `rproc->state` can never be RPROC_ATTACHED. Otherwise, it + * should also be checked here. + */ + if (rproc->state == RPROC_RUNNING) + return clk_prepare(scp->clk); + return 0; +} + +static const struct dev_pm_ops scp_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(scp_suspend, scp_resume) +}; + static struct platform_driver mtk_scp_driver = { .probe = scp_probe, .remove = scp_remove, .driver = { .name = "mtk-scp", .of_match_table = mtk_scp_of_match, + .pm = &scp_pm_ops, }, }; diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c index cf10e8ecfb8f5..3ceec1fd6d998 100644 --- a/drivers/remoteproc/qcom_sysmon.c +++ b/drivers/remoteproc/qcom_sysmon.c @@ -203,7 +203,7 @@ static const struct qmi_elem_info ssctl_shutdown_resp_ei[] = { }; struct ssctl_subsys_event_req { - u8 subsys_name_len; + u32 subsys_name_len; char subsys_name[SSCTL_SUBSYS_NAME_LENGTH]; u32 event; u8 evt_driven_valid; diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index ee18bf2e80549..4add9037dbd5a 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -537,7 +537,7 @@ static int wcnss_alloc_memory_region(struct qcom_wcnss *wcnss) wcnss->mem_phys = wcnss->mem_reloc = res.start; wcnss->mem_size = resource_size(&res); - wcnss->mem_region = devm_ioremap_resource_wc(wcnss->dev, &res); + wcnss->mem_region = devm_ioremap_wc(wcnss->dev, wcnss->mem_phys, wcnss->mem_size); if (IS_ERR(wcnss->mem_region)) { dev_err(wcnss->dev, "unable to map memory region: %pR\n", &res); return PTR_ERR(wcnss->mem_region); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index cb068d5e2145e..14e58c336baab 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -6135,6 +6135,7 @@ static void copy_pair_set_active(struct dasd_copy_relation *copy, char *new_busi static int dasd_eckd_copy_pair_swap(struct dasd_device *device, char *prim_busid, char *sec_busid) { + struct dasd_eckd_private *prim_priv, *sec_priv; struct dasd_device *primary, *secondary; struct dasd_copy_relation *copy; struct dasd_block *block; @@ -6155,6 +6156,9 @@ static int dasd_eckd_copy_pair_swap(struct dasd_device *device, char *prim_busid if (!secondary) return DASD_COPYPAIRSWAP_SECONDARY; + prim_priv = primary->private; + sec_priv = secondary->private; + /* * usually the device should be quiesced for swap * for paranoia stop device and requeue requests again @@ -6182,6 +6186,18 @@ static int dasd_eckd_copy_pair_swap(struct dasd_device *device, char *prim_busid dev_name(&secondary->cdev->dev), rc); } + if (primary->stopped & DASD_STOPPED_QUIESCE) { + dasd_device_set_stop_bits(secondary, DASD_STOPPED_QUIESCE); + dasd_device_remove_stop_bits(primary, DASD_STOPPED_QUIESCE); + } + + /* + * The secondary device never got through format detection, but since it + * is a copy of the primary device, the format is exactly the same; + * therefore, the detected layout can simply be copied. + */ + sec_priv->uses_cdl = prim_priv->uses_cdl; + /* re-enable device */ dasd_device_remove_stop_bits(primary, DASD_STOPPED_PPRC); dasd_device_remove_stop_bits(secondary, DASD_STOPPED_PPRC); diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 573bad1d6d86c..37a157a1d9691 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -1639,11 +1639,13 @@ int cca_get_info(u16 cardnr, u16 domain, struct cca_info *ci, u32 xflags) memset(ci, 0, sizeof(*ci)); - /* get first info from zcrypt device driver about this apqn */ - rc = zcrypt_device_status_ext(cardnr, domain, &devstat); - if (rc) - return rc; - ci->hwtype = devstat.hwtype; + /* if specific domain given, fetch status and hw info for this apqn */ + if (domain != AUTOSEL_DOM) { + rc = zcrypt_device_status_ext(cardnr, domain, &devstat); + if (rc) + return rc; + ci->hwtype = devstat.hwtype; + } /* * Prep memory for rule array and var array use. diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index e9a984903bffb..e7b0ed26a9ecd 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -85,8 +85,7 @@ static ssize_t cca_serialnr_show(struct device *dev, memset(&ci, 0, sizeof(ci)); - if (ap_domain_index >= 0) - cca_get_info(ac->id, ap_domain_index, &ci, 0); + cca_get_info(ac->id, AUTOSEL_DOM, &ci, 0); return sysfs_emit(buf, "%s\n", ci.serial); } diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 30a9c66126513..c2b082f1252c3 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2578,7 +2578,7 @@ int hisi_sas_probe(struct platform_device *pdev, shost->transportt = hisi_sas_stt; shost->max_id = HISI_SAS_MAX_DEVICES; shost->max_lun = ~0; - shost->max_channel = 1; + shost->max_channel = 0; shost->max_cmd_len = HISI_SAS_MAX_CDB_LEN; if (hisi_hba->hw->slot_index_alloc) { shost->can_queue = HISI_SAS_MAX_COMMANDS; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 2f9e01717ef38..f69efc6494b8e 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4993,7 +4993,7 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) shost->transportt = hisi_sas_stt; shost->max_id = HISI_SAS_MAX_DEVICES; shost->max_lun = ~0; - shost->max_channel = 1; + shost->max_channel = 0; shost->max_cmd_len = HISI_SAS_MAX_CDB_LEN; shost->can_queue = HISI_SAS_UNRESERVED_IPTT; shost->cmd_per_lun = HISI_SAS_UNRESERVED_IPTT; diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 9038f67234448..dbe3cd4e274c8 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2751,7 +2751,6 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_logo.els_logo_pyld) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); - qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2776,7 +2775,6 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (rval != QLA_SUCCESS) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); - qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 2cfcf1f5d6a46..7b11bc7de0e39 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -360,12 +360,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, * default device queue depth to figure out sbitmap shift * since we use this queue depth most of times. */ - if (scsi_realloc_sdev_budget_map(sdev, depth)) { - kref_put(&sdev->host->tagset_refcnt, scsi_mq_free_tags); - put_device(&starget->dev); - kfree(sdev); - goto out; - } + if (scsi_realloc_sdev_budget_map(sdev, depth)) + goto out_device_destroy; scsi_change_queue_depth(sdev, depth); diff --git a/drivers/spi/spi-amlogic-spifc-a4.c b/drivers/spi/spi-amlogic-spifc-a4.c index 2aef528cfc1ba..3956869cfec11 100644 --- a/drivers/spi/spi-amlogic-spifc-a4.c +++ b/drivers/spi/spi-amlogic-spifc-a4.c @@ -411,7 +411,7 @@ static int aml_sfc_dma_buffer_setup(struct aml_sfc *sfc, void *databuf, ret = dma_mapping_error(sfc->dev, sfc->daddr); if (ret) { dev_err(sfc->dev, "DMA mapping error\n"); - goto out_map_data; + return ret; } cmd = CMD_DATA_ADDRL(sfc->daddr); @@ -429,7 +429,6 @@ static int aml_sfc_dma_buffer_setup(struct aml_sfc *sfc, void *databuf, ret = dma_mapping_error(sfc->dev, sfc->iaddr); if (ret) { dev_err(sfc->dev, "DMA mapping error\n"); - dma_unmap_single(sfc->dev, sfc->daddr, datalen, dir); goto out_map_data; } @@ -448,7 +447,7 @@ static int aml_sfc_dma_buffer_setup(struct aml_sfc *sfc, void *databuf, return 0; out_map_info: - dma_unmap_single(sfc->dev, sfc->iaddr, datalen, dir); + dma_unmap_single(sfc->dev, sfc->iaddr, infolen, dir); out_map_data: dma_unmap_single(sfc->dev, sfc->daddr, datalen, dir); diff --git a/drivers/spi/spi-atcspi200.c b/drivers/spi/spi-atcspi200.c index 60a37ff5c6f55..2665f31a49ceb 100644 --- a/drivers/spi/spi-atcspi200.c +++ b/drivers/spi/spi-atcspi200.c @@ -195,7 +195,15 @@ static void atcspi_set_trans_ctl(struct atcspi_dev *spi, if (op->addr.buswidth > 1) tc |= TRANS_ADDR_FMT; if (op->data.nbytes) { - tc |= TRANS_DUAL_QUAD(ffs(op->data.buswidth) - 1); + unsigned int width_code; + + width_code = ffs(op->data.buswidth) - 1; + if (unlikely(width_code > 3)) { + WARN_ON_ONCE(1); + width_code = 0; + } + tc |= TRANS_DUAL_QUAD(width_code); + if (op->data.dir == SPI_MEM_DATA_IN) { if (op->dummy.nbytes) tc |= TRANS_MODE_DMY_READ | @@ -497,31 +505,17 @@ static int atcspi_init_resources(struct platform_device *pdev, static int atcspi_configure_dma(struct atcspi_dev *spi) { - struct dma_chan *dma_chan; - int ret = 0; + spi->host->dma_rx = devm_dma_request_chan(spi->dev, "rx"); + if (IS_ERR(spi->host->dma_rx)) + return PTR_ERR(spi->host->dma_rx); - dma_chan = devm_dma_request_chan(spi->dev, "rx"); - if (IS_ERR(dma_chan)) { - ret = PTR_ERR(dma_chan); - goto err_exit; - } - spi->host->dma_rx = dma_chan; + spi->host->dma_tx = devm_dma_request_chan(spi->dev, "tx"); + if (IS_ERR(spi->host->dma_tx)) + return PTR_ERR(spi->host->dma_tx); - dma_chan = devm_dma_request_chan(spi->dev, "tx"); - if (IS_ERR(dma_chan)) { - ret = PTR_ERR(dma_chan); - goto free_rx; - } - spi->host->dma_tx = dma_chan; init_completion(&spi->dma_completion); - return ret; - -free_rx: - dma_release_channel(spi->host->dma_rx); - spi->host->dma_rx = NULL; -err_exit: - return ret; + return 0; } static int atcspi_enable_clk(struct atcspi_dev *spi) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 649ff55333f05..5fb0cb07c110c 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -76,6 +76,11 @@ struct cqspi_flash_pdata { u8 cs; }; +static const struct clk_bulk_data cqspi_clks[CLK_QSPI_NUM] = { + [CLK_QSPI_APB] = { .id = "apb" }, + [CLK_QSPI_AHB] = { .id = "ahb" }, +}; + struct cqspi_st { struct platform_device *pdev; struct spi_controller *host; @@ -1823,6 +1828,7 @@ static int cqspi_probe(struct platform_device *pdev) } /* Obtain QSPI clocks. */ + memcpy(&cqspi->clks, &cqspi_clks, sizeof(cqspi->clks)); ret = devm_clk_bulk_get_optional(dev, CLK_QSPI_NUM, cqspi->clks); if (ret) return dev_err_probe(dev, ret, "Failed to get clocks\n"); diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index bce3d149bea18..d8ef8f89330ac 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -96,6 +96,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa3a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa823), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0xd323), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xe323), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xe423), (unsigned long)&cnl_info }, { }, diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c index 2990bf85ee478..174995042f53a 100644 --- a/drivers/spi/spi-rockchip-sfc.c +++ b/drivers/spi/spi-rockchip-sfc.c @@ -711,7 +711,7 @@ static int rockchip_sfc_probe(struct platform_device *pdev) } } - ret = devm_spi_register_controller(dev, host); + ret = spi_register_controller(host); if (ret) goto err_register; diff --git a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c index 6cf217e21593b..3e2b5e6b07f93 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c +++ b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c @@ -186,20 +186,25 @@ u8 *rtw_get_ie_ex(u8 *in_ie, uint in_len, u8 eid, u8 *oui, u8 oui_len, u8 *ie, u cnt = 0; - while (cnt < in_len) { + while (cnt + 2 <= in_len) { + u8 ie_len = in_ie[cnt + 1]; + + if (cnt + 2 + ie_len > in_len) + break; + if (eid == in_ie[cnt] - && (!oui || !memcmp(&in_ie[cnt+2], oui, oui_len))) { + && (!oui || (ie_len >= oui_len && !memcmp(&in_ie[cnt + 2], oui, oui_len)))) { target_ie = &in_ie[cnt]; if (ie) - memcpy(ie, &in_ie[cnt], in_ie[cnt+1]+2); + memcpy(ie, &in_ie[cnt], ie_len + 2); if (ielen) - *ielen = in_ie[cnt+1]+2; + *ielen = ie_len + 2; break; } - cnt += in_ie[cnt+1]+2; /* goto next */ + cnt += ie_len + 2; /* goto next */ } return target_ie; diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme.c b/drivers/staging/rtl8723bs/core/rtw_mlme.c index 7df6517083817..1ef48bf6581c4 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c @@ -1988,7 +1988,10 @@ int rtw_restruct_wmm_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie, uint in_ while (i < in_len) { ielength = initial_out_len; - if (in_ie[i] == 0xDD && in_ie[i + 2] == 0x00 && in_ie[i + 3] == 0x50 && in_ie[i + 4] == 0xF2 && in_ie[i + 5] == 0x02 && i + 5 < in_len) { /* WMM element ID and OUI */ + if (i + 5 < in_len && + in_ie[i] == 0xDD && in_ie[i + 2] == 0x00 && + in_ie[i + 3] == 0x50 && in_ie[i + 4] == 0xF2 && + in_ie[i + 5] == 0x02) { for (j = i; j < i + 9; j++) { out_ie[ielength] = in_ie[j]; ielength++; diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c index dec1f6b88a7df..62f6e0cdff4d4 100644 --- a/drivers/staging/sm750fb/sm750.c +++ b/drivers/staging/sm750fb/sm750.c @@ -1123,6 +1123,7 @@ static void lynxfb_pci_remove(struct pci_dev *pdev) iounmap(sm750_dev->pvReg); iounmap(sm750_dev->pvMem); + pci_release_region(pdev, 1); kfree(g_settings); } diff --git a/drivers/staging/sm750fb/sm750_hw.c b/drivers/staging/sm750fb/sm750_hw.c index a29faee91c784..f60b152a647d3 100644 --- a/drivers/staging/sm750fb/sm750_hw.c +++ b/drivers/staging/sm750fb/sm750_hw.c @@ -36,16 +36,11 @@ int hw_sm750_map(struct sm750_dev *sm750_dev, struct pci_dev *pdev) pr_info("mmio phyAddr = %lx\n", sm750_dev->vidreg_start); - /* - * reserve the vidreg space of smi adaptor - * if you do this, you need to add release region code - * in lynxfb_remove, or memory will not be mapped again - * successfully - */ + /* reserve the vidreg space of smi adaptor */ ret = pci_request_region(pdev, 1, "sm750fb"); if (ret) { pr_err("Can not request PCI regions.\n"); - goto exit; + return ret; } /* now map mmio and vidmem */ @@ -54,7 +49,7 @@ int hw_sm750_map(struct sm750_dev *sm750_dev, struct pci_dev *pdev) if (!sm750_dev->pvReg) { pr_err("mmio failed\n"); ret = -EFAULT; - goto exit; + goto err_release_region; } pr_info("mmio virtual addr = %p\n", sm750_dev->pvReg); @@ -79,13 +74,18 @@ int hw_sm750_map(struct sm750_dev *sm750_dev, struct pci_dev *pdev) sm750_dev->pvMem = ioremap_wc(sm750_dev->vidmem_start, sm750_dev->vidmem_size); if (!sm750_dev->pvMem) { - iounmap(sm750_dev->pvReg); pr_err("Map video memory failed\n"); ret = -EFAULT; - goto exit; + goto err_unmap_reg; } pr_info("video memory vaddr = %p\n", sm750_dev->pvMem); -exit: + + return 0; + +err_unmap_reg: + iounmap(sm750_dev->pvReg); +err_release_region: + pci_release_region(pdev, 1); return ret; } diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 899e663fea6e8..9ceb6d6d479d0 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10066,6 +10066,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) } flush_work(&hba->eeh_work); + cancel_delayed_work_sync(&hba->ufs_rtc_update_work); ret = ufshcd_vops_suspend(hba, pm_op, PRE_CHANGE); if (ret) @@ -10120,7 +10121,6 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (ret) goto set_link_active; - cancel_delayed_work_sync(&hba->ufs_rtc_update_work); goto out; set_link_active: diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index ad38c746270af..7ede29d4c7c13 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1379,6 +1379,8 @@ static int acm_probe(struct usb_interface *intf, acm->ctrl_caps = h.usb_cdc_acm_descriptor->bmCapabilities; if (quirks & NO_CAP_LINE) acm->ctrl_caps &= ~USB_CDC_CAP_LINE; + if (quirks & MISSING_CAP_BRK) + acm->ctrl_caps |= USB_CDC_CAP_BRK; acm->ctrlsize = ctrlsize; acm->readsize = readsize; acm->rx_buflimit = num_rx_buf; @@ -2002,6 +2004,9 @@ static const struct usb_device_id acm_ids[] = { .driver_info = IGNORE_DEVICE, }, + /* CH343 supports CAP_BRK, but doesn't advertise it */ + { USB_DEVICE(0x1a86, 0x55d3), .driver_info = MISSING_CAP_BRK, }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 759ac15631d3e..76f73853a60b6 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -113,3 +113,4 @@ struct acm { #define CLEAR_HALT_CONDITIONS BIT(5) #define SEND_ZERO_PACKET BIT(6) #define DISABLE_ECHO BIT(7) +#define MISSING_CAP_BRK BIT(8) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index f2d94cfc70af0..7556c0dac908a 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -225,7 +225,8 @@ static void wdm_in_callback(struct urb *urb) /* we may already be in overflow */ if (!test_bit(WDM_OVERFLOW, &desc->flags)) { memmove(desc->ubuf + desc->length, desc->inbuf, length); - desc->length += length; + smp_wmb(); /* against wdm_read() */ + WRITE_ONCE(desc->length, desc->length + length); } } skip_error: @@ -533,6 +534,7 @@ static ssize_t wdm_read return -ERESTARTSYS; cntr = READ_ONCE(desc->length); + smp_rmb(); /* against wdm_in_callback() */ if (cntr == 0) { desc->read = 0; retry: diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 2526a0e03cdeb..d39bbfd7fd18a 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -727,7 +727,7 @@ static int usbtmc488_ioctl_trigger(struct usbtmc_file_data *file_data) buffer[1] = data->bTag; buffer[2] = ~data->bTag; - retval = usb_bulk_msg(data->usb_dev, + retval = usb_bulk_msg_killable(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), buffer, USBTMC_HEADER_SIZE, @@ -1347,7 +1347,7 @@ static int send_request_dev_dep_msg_in(struct usbtmc_file_data *file_data, buffer[11] = 0; /* Reserved */ /* Send bulk URB */ - retval = usb_bulk_msg(data->usb_dev, + retval = usb_bulk_msg_killable(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), buffer, USBTMC_HEADER_SIZE, @@ -1419,7 +1419,7 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, actual = 0; /* Send bulk URB */ - retval = usb_bulk_msg(data->usb_dev, + retval = usb_bulk_msg_killable(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), buffer, bufsize, &actual, diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 1cd5fa61dc76c..6a1fd967e0a64 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -927,7 +927,11 @@ int usb_get_configuration(struct usb_device *dev) dev->descriptor.bNumConfigurations = ncfg = USB_MAXCONFIG; } - if (ncfg < 1) { + if (ncfg < 1 && dev->quirks & USB_QUIRK_FORCE_ONE_CONFIG) { + dev_info(ddev, "Device claims zero configurations, forcing to 1\n"); + dev->descriptor.bNumConfigurations = 1; + ncfg = 1; + } else if (ncfg < 1) { dev_err(ddev, "no configurations\n"); return -EINVAL; } diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index ea970ddf8879f..2ab120ce2fa85 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -42,16 +42,19 @@ static void usb_api_blocking_completion(struct urb *urb) /* - * Starts urb and waits for completion or timeout. Note that this call - * is NOT interruptible. Many device driver i/o requests should be - * interruptible and therefore these drivers should implement their - * own interruptible routines. + * Starts urb and waits for completion or timeout. + * Whether or not the wait is killable depends on the flag passed in. + * For example, compare usb_bulk_msg() and usb_bulk_msg_killable(). + * + * For non-killable waits, we enforce a maximum limit on the timeout value. */ -static int usb_start_wait_urb(struct urb *urb, int timeout, int *actual_length) +static int usb_start_wait_urb(struct urb *urb, int timeout, int *actual_length, + bool killable) { struct api_context ctx; unsigned long expire; int retval; + long rc; init_completion(&ctx.done); urb->context = &ctx; @@ -60,13 +63,24 @@ static int usb_start_wait_urb(struct urb *urb, int timeout, int *actual_length) if (unlikely(retval)) goto out; - expire = timeout ? msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT; - if (!wait_for_completion_timeout(&ctx.done, expire)) { + if (!killable && (timeout <= 0 || timeout > USB_MAX_SYNCHRONOUS_TIMEOUT)) + timeout = USB_MAX_SYNCHRONOUS_TIMEOUT; + expire = (timeout > 0) ? msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT; + if (killable) + rc = wait_for_completion_killable_timeout(&ctx.done, expire); + else + rc = wait_for_completion_timeout(&ctx.done, expire); + if (rc <= 0) { usb_kill_urb(urb); - retval = (ctx.status == -ENOENT ? -ETIMEDOUT : ctx.status); + if (ctx.status != -ENOENT) + retval = ctx.status; + else if (rc == 0) + retval = -ETIMEDOUT; + else + retval = rc; dev_dbg(&urb->dev->dev, - "%s timed out on ep%d%s len=%u/%u\n", + "%s timed out or killed on ep%d%s len=%u/%u\n", current->comm, usb_endpoint_num(&urb->ep->desc), usb_urb_dir_in(urb) ? "in" : "out", @@ -100,7 +114,7 @@ static int usb_internal_control_msg(struct usb_device *usb_dev, usb_fill_control_urb(urb, usb_dev, pipe, (unsigned char *)cmd, data, len, usb_api_blocking_completion, NULL); - retv = usb_start_wait_urb(urb, timeout, &length); + retv = usb_start_wait_urb(urb, timeout, &length, false); if (retv < 0) return retv; else @@ -117,8 +131,7 @@ static int usb_internal_control_msg(struct usb_device *usb_dev, * @index: USB message index value * @data: pointer to the data to send * @size: length in bytes of the data to send - * @timeout: time in msecs to wait for the message to complete before timing - * out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * * Context: task context, might sleep. * @@ -173,8 +186,7 @@ EXPORT_SYMBOL_GPL(usb_control_msg); * @index: USB message index value * @driver_data: pointer to the data to send * @size: length in bytes of the data to send - * @timeout: time in msecs to wait for the message to complete before timing - * out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * @memflags: the flags for memory allocation for buffers * * Context: !in_interrupt () @@ -232,8 +244,7 @@ EXPORT_SYMBOL_GPL(usb_control_msg_send); * @index: USB message index value * @driver_data: pointer to the data to be filled in by the message * @size: length in bytes of the data to be received - * @timeout: time in msecs to wait for the message to complete before timing - * out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * @memflags: the flags for memory allocation for buffers * * Context: !in_interrupt () @@ -304,8 +315,7 @@ EXPORT_SYMBOL_GPL(usb_control_msg_recv); * @len: length in bytes of the data to send * @actual_length: pointer to a location to put the actual length transferred * in bytes - * @timeout: time in msecs to wait for the message to complete before - * timing out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * * Context: task context, might sleep. * @@ -337,8 +347,7 @@ EXPORT_SYMBOL_GPL(usb_interrupt_msg); * @len: length in bytes of the data to send * @actual_length: pointer to a location to put the actual length transferred * in bytes - * @timeout: time in msecs to wait for the message to complete before - * timing out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * * Context: task context, might sleep. * @@ -385,10 +394,59 @@ int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe, usb_fill_bulk_urb(urb, usb_dev, pipe, data, len, usb_api_blocking_completion, NULL); - return usb_start_wait_urb(urb, timeout, actual_length); + return usb_start_wait_urb(urb, timeout, actual_length, false); } EXPORT_SYMBOL_GPL(usb_bulk_msg); +/** + * usb_bulk_msg_killable - Builds a bulk urb, sends it off and waits for completion in a killable state + * @usb_dev: pointer to the usb device to send the message to + * @pipe: endpoint "pipe" to send the message to + * @data: pointer to the data to send + * @len: length in bytes of the data to send + * @actual_length: pointer to a location to put the actual length transferred + * in bytes + * @timeout: time in msecs to wait for the message to complete before + * timing out (if <= 0, the wait is as long as possible) + * + * Context: task context, might sleep. + * + * This function is just like usb_blk_msg(), except that it waits in a + * killable state and there is no limit on the timeout length. + * + * Return: + * If successful, 0. Otherwise a negative error number. The number of actual + * bytes transferred will be stored in the @actual_length parameter. + * + */ +int usb_bulk_msg_killable(struct usb_device *usb_dev, unsigned int pipe, + void *data, int len, int *actual_length, int timeout) +{ + struct urb *urb; + struct usb_host_endpoint *ep; + + ep = usb_pipe_endpoint(usb_dev, pipe); + if (!ep || len < 0) + return -EINVAL; + + urb = usb_alloc_urb(0, GFP_KERNEL); + if (!urb) + return -ENOMEM; + + if ((ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == + USB_ENDPOINT_XFER_INT) { + pipe = (pipe & ~(3 << 30)) | (PIPE_INTERRUPT << 30); + usb_fill_int_urb(urb, usb_dev, pipe, data, len, + usb_api_blocking_completion, NULL, + ep->desc.bInterval); + } else + usb_fill_bulk_urb(urb, usb_dev, pipe, data, len, + usb_api_blocking_completion, NULL); + + return usb_start_wait_urb(urb, timeout, actual_length, true); +} +EXPORT_SYMBOL_GPL(usb_bulk_msg_killable); + /*-------------------------------------------------------------------*/ static void sg_clean(struct usb_sg_request *io) diff --git a/drivers/usb/core/phy.c b/drivers/usb/core/phy.c index faa20054ad5a1..4bba1c2757406 100644 --- a/drivers/usb/core/phy.c +++ b/drivers/usb/core/phy.c @@ -200,16 +200,10 @@ int usb_phy_roothub_set_mode(struct usb_phy_roothub *phy_roothub, list_for_each_entry(roothub_entry, head, list) { err = phy_set_mode(roothub_entry->phy, mode); if (err) - goto err_out; + return err; } return 0; - -err_out: - list_for_each_entry_continue_reverse(roothub_entry, head, list) - phy_power_off(roothub_entry->phy); - - return err; } EXPORT_SYMBOL_GPL(usb_phy_roothub_set_mode); diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 9e7e49712739d..5523a8e290217 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -140,6 +140,8 @@ static int quirks_param_set(const char *value, const struct kernel_param *kp) case 'p': flags |= USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT; break; + case 'q': + flags |= USB_QUIRK_FORCE_ONE_CONFIG; /* Ignore unrecognized flag characters */ } } @@ -207,6 +209,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* HP v222w 16GB Mini USB Drive */ { USB_DEVICE(0x03f0, 0x3f40), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Huawei 4G LTE module ME906S */ + { USB_DEVICE(0x03f0, 0xa31d), .driver_info = + USB_QUIRK_DISCONNECT_SUSPEND }, + /* Creative SB Audigy 2 NX */ { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -376,6 +382,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* SanDisk Extreme 55AE */ { USB_DEVICE(0x0781, 0x55ae), .driver_info = USB_QUIRK_NO_LPM }, + /* Avermedia Live Gamer Ultra 2.1 (GC553G2) - BOS descriptor fetch hangs at SuperSpeed Plus */ + { USB_DEVICE(0x07ca, 0x2553), .driver_info = USB_QUIRK_NO_BOS }, + /* Realforce 87U Keyboard */ { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, @@ -436,6 +445,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* ASUS TUF 4K PRO - BOS descriptor fetch hangs at SuperSpeed Plus */ + { USB_DEVICE(0x0b05, 0x1ab9), .driver_info = USB_QUIRK_NO_BOS }, + /* Realtek Semiconductor Corp. Mass Storage Device (Multicard Reader)*/ { USB_DEVICE(0x0bda, 0x0151), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, @@ -564,6 +576,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2386, 0x350e), .driver_info = USB_QUIRK_NO_LPM }, + /* UGREEN 35871 - BOS descriptor fetch hangs at SuperSpeed Plus */ + { USB_DEVICE(0x2b89, 0x5871), .driver_info = USB_QUIRK_NO_BOS }, + /* APTIV AUTOMOTIVE HUB */ { USB_DEVICE(0x2c48, 0x0132), .driver_info = USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT }, @@ -574,12 +589,18 @@ static const struct usb_device_id usb_quirk_list[] = { /* Alcor Link AK9563 SC Reader used in 2022 Lenovo ThinkPads */ { USB_DEVICE(0x2ce3, 0x9563), .driver_info = USB_QUIRK_NO_LPM }, + /* ezcap401 - BOS descriptor fetch hangs at SuperSpeed Plus */ + { USB_DEVICE(0x32ed, 0x0401), .driver_info = USB_QUIRK_NO_BOS }, + /* DELL USB GEN2 */ { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME }, /* VCOM device */ { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Noji-MCS SmartCard Reader */ + { USB_DEVICE(0x5131, 0x2007), .driver_info = USB_QUIRK_FORCE_ONE_CONFIG }, + /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 6ecadc81bd6ba..6c1cbb722ca85 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -56,6 +56,7 @@ #define PCI_DEVICE_ID_INTEL_CNPH 0xa36e #define PCI_DEVICE_ID_INTEL_CNPV 0xa3b0 #define PCI_DEVICE_ID_INTEL_RPL 0xa70e +#define PCI_DEVICE_ID_INTEL_NVLH 0xd37f #define PCI_DEVICE_ID_INTEL_PTLH 0xe332 #define PCI_DEVICE_ID_INTEL_PTLH_PCH 0xe37e #define PCI_DEVICE_ID_INTEL_PTLU 0xe432 @@ -447,6 +448,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, CNPH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, CNPV, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, RPL, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, NVLH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, PTLH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, PTLH_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, PTLU, &dwc3_pci_intel_swnode) }, diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 8c855c00b8876..8812ebf33d14b 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -1207,9 +1207,11 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) if (!hidg->interval_user_set) { hidg_fs_in_ep_desc.bInterval = 10; hidg_hs_in_ep_desc.bInterval = 4; + hidg_ss_in_ep_desc.bInterval = 4; } else { hidg_fs_in_ep_desc.bInterval = hidg->interval; hidg_hs_in_ep_desc.bInterval = hidg->interval; + hidg_ss_in_ep_desc.bInterval = hidg->interval; } hidg_ss_out_comp_desc.wBytesPerInterval = @@ -1239,9 +1241,11 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) if (!hidg->interval_user_set) { hidg_fs_out_ep_desc.bInterval = 10; hidg_hs_out_ep_desc.bInterval = 4; + hidg_ss_out_ep_desc.bInterval = 4; } else { hidg_fs_out_ep_desc.bInterval = hidg->interval; hidg_hs_out_ep_desc.bInterval = hidg->interval; + hidg_ss_out_ep_desc.bInterval = hidg->interval; } status = usb_assign_descriptors(f, hidg_fs_descriptors_intout, diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 6af96e2b44eb9..b7b06cb79ff5e 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -180,6 +180,7 @@ #include #include #include +#include #include #include #include @@ -1853,8 +1854,15 @@ static int check_command_size_in_blocks(struct fsg_common *common, int cmnd_size, enum data_direction data_dir, unsigned int mask, int needs_medium, const char *name) { - if (common->curlun) - common->data_size_from_cmnd <<= common->curlun->blkbits; + if (common->curlun) { + if (check_shl_overflow(common->data_size_from_cmnd, + common->curlun->blkbits, + &common->data_size_from_cmnd)) { + common->phase_error = 1; + return -EINVAL; + } + } + return check_command(common, cmnd_size, data_dir, mask, needs_medium, name); } diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 14fc7dce6f395..a6fa5ed3d6cb7 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -83,11 +83,6 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f) return container_of(f, struct f_ncm, port.func); } -static inline struct f_ncm_opts *func_to_ncm_opts(struct usb_function *f) -{ - return container_of(f->fi, struct f_ncm_opts, func_inst); -} - /*-------------------------------------------------------------------------*/ /* @@ -864,7 +859,6 @@ static int ncm_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) { struct f_ncm *ncm = func_to_ncm(f); - struct f_ncm_opts *opts = func_to_ncm_opts(f); struct usb_composite_dev *cdev = f->config->cdev; /* Control interface has only altsetting 0 */ @@ -887,13 +881,12 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) if (alt > 1) goto fail; - scoped_guard(mutex, &opts->lock) - if (opts->net) { - DBG(cdev, "reset ncm\n"); - opts->net = NULL; - gether_disconnect(&ncm->port); - ncm_reset_values(ncm); - } + if (ncm->netdev) { + DBG(cdev, "reset ncm\n"); + ncm->netdev = NULL; + gether_disconnect(&ncm->port); + ncm_reset_values(ncm); + } /* * CDC Network only sends data in non-default altsettings. @@ -926,8 +919,7 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) net = gether_connect(&ncm->port); if (IS_ERR(net)) return PTR_ERR(net); - scoped_guard(mutex, &opts->lock) - opts->net = net; + ncm->netdev = net; } spin_lock(&ncm->lock); @@ -1374,16 +1366,14 @@ static int ncm_unwrap_ntb(struct gether *port, static void ncm_disable(struct usb_function *f) { struct f_ncm *ncm = func_to_ncm(f); - struct f_ncm_opts *opts = func_to_ncm_opts(f); struct usb_composite_dev *cdev = f->config->cdev; DBG(cdev, "ncm deactivated\n"); - scoped_guard(mutex, &opts->lock) - if (opts->net) { - opts->net = NULL; - gether_disconnect(&ncm->port); - } + if (ncm->netdev) { + ncm->netdev = NULL; + gether_disconnect(&ncm->port); + } if (ncm->notify->enabled) { usb_ep_disable(ncm->notify); @@ -1443,44 +1433,41 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) { struct usb_composite_dev *cdev = c->cdev; struct f_ncm *ncm = func_to_ncm(f); - struct f_ncm_opts *ncm_opts = func_to_ncm_opts(f); struct usb_string *us; int status = 0; struct usb_ep *ep; + struct f_ncm_opts *ncm_opts; struct usb_os_desc_table *os_desc_table __free(kfree) = NULL; - struct net_device *netdev __free(free_gether_netdev) = NULL; + struct net_device *net __free(detach_gadget) = NULL; struct usb_request *request __free(free_usb_request) = NULL; if (!can_support_ecm(cdev->gadget)) return -EINVAL; + ncm_opts = container_of(f->fi, struct f_ncm_opts, func_inst); + if (cdev->use_os_string) { os_desc_table = kzalloc(sizeof(*os_desc_table), GFP_KERNEL); if (!os_desc_table) return -ENOMEM; } - netdev = gether_setup_default(); - if (IS_ERR(netdev)) - return -ENOMEM; - - scoped_guard(mutex, &ncm_opts->lock) { - gether_apply_opts(netdev, &ncm_opts->net_opts); - netdev->mtu = ncm_opts->max_segment_size - ETH_HLEN; - } - - gether_set_gadget(netdev, cdev->gadget); - status = gether_register_netdev(netdev); - if (status) - return status; + scoped_guard(mutex, &ncm_opts->lock) + if (ncm_opts->bind_count == 0) { + if (!device_is_registered(&ncm_opts->net->dev)) { + ncm_opts->net->mtu = (ncm_opts->max_segment_size - ETH_HLEN); + gether_set_gadget(ncm_opts->net, cdev->gadget); + status = gether_register_netdev(ncm_opts->net); + } else + status = gether_attach_gadget(ncm_opts->net, cdev->gadget); + + if (status) + return status; + net = ncm_opts->net; + } - /* export host's Ethernet address in CDC format */ - status = gether_get_host_addr_cdc(netdev, ncm->ethaddr, - sizeof(ncm->ethaddr)); - if (status < 12) - return -EINVAL; - ncm_string_defs[STRING_MAC_IDX].s = ncm->ethaddr; + ncm_string_defs[1].s = ncm->ethaddr; us = usb_gstrings_attach(cdev, ncm_strings, ARRAY_SIZE(ncm_string_defs)); @@ -1578,8 +1565,9 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) f->os_desc_n = 1; } ncm->notify_req = no_free_ptr(request); - ncm->netdev = no_free_ptr(netdev); - ncm->port.ioport = netdev_priv(ncm->netdev); + + ncm_opts->bind_count++; + retain_and_null_ptr(net); DBG(cdev, "CDC Network: IN/%s OUT/%s NOTIFY/%s\n", ncm->port.in_ep->name, ncm->port.out_ep->name, @@ -1594,19 +1582,19 @@ static inline struct f_ncm_opts *to_f_ncm_opts(struct config_item *item) } /* f_ncm_item_ops */ -USB_ETHER_OPTS_ITEM(ncm); +USB_ETHERNET_CONFIGFS_ITEM(ncm); /* f_ncm_opts_dev_addr */ -USB_ETHER_OPTS_ATTR_DEV_ADDR(ncm); +USB_ETHERNET_CONFIGFS_ITEM_ATTR_DEV_ADDR(ncm); /* f_ncm_opts_host_addr */ -USB_ETHER_OPTS_ATTR_HOST_ADDR(ncm); +USB_ETHERNET_CONFIGFS_ITEM_ATTR_HOST_ADDR(ncm); /* f_ncm_opts_qmult */ -USB_ETHER_OPTS_ATTR_QMULT(ncm); +USB_ETHERNET_CONFIGFS_ITEM_ATTR_QMULT(ncm); /* f_ncm_opts_ifname */ -USB_ETHER_OPTS_ATTR_IFNAME(ncm); +USB_ETHERNET_CONFIGFS_ITEM_ATTR_IFNAME(ncm); static ssize_t ncm_opts_max_segment_size_show(struct config_item *item, char *page) @@ -1672,27 +1660,34 @@ static void ncm_free_inst(struct usb_function_instance *f) struct f_ncm_opts *opts; opts = container_of(f, struct f_ncm_opts, func_inst); + if (device_is_registered(&opts->net->dev)) + gether_cleanup(netdev_priv(opts->net)); + else + free_netdev(opts->net); kfree(opts->ncm_interf_group); kfree(opts); } static struct usb_function_instance *ncm_alloc_inst(void) { - struct usb_function_instance *ret; + struct f_ncm_opts *opts; struct usb_os_desc *descs[1]; char *names[1]; struct config_group *ncm_interf_group; - struct f_ncm_opts *opts __free(kfree) = kzalloc_obj(*opts); + opts = kzalloc_obj(*opts); if (!opts) return ERR_PTR(-ENOMEM); - - opts->net = NULL; opts->ncm_os_desc.ext_compat_id = opts->ncm_ext_compat_id; - gether_setup_opts_default(&opts->net_opts, "usb"); mutex_init(&opts->lock); opts->func_inst.free_func_inst = ncm_free_inst; + opts->net = gether_setup_default(); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } opts->max_segment_size = ETH_FRAME_LEN; INIT_LIST_HEAD(&opts->ncm_os_desc.ext_prop); @@ -1703,30 +1698,37 @@ static struct usb_function_instance *ncm_alloc_inst(void) ncm_interf_group = usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs, names, THIS_MODULE); - if (IS_ERR(ncm_interf_group)) + if (IS_ERR(ncm_interf_group)) { + ncm_free_inst(&opts->func_inst); return ERR_CAST(ncm_interf_group); + } opts->ncm_interf_group = ncm_interf_group; - ret = &opts->func_inst; - retain_and_null_ptr(opts); - return ret; + return &opts->func_inst; } static void ncm_free(struct usb_function *f) { - struct f_ncm_opts *opts = func_to_ncm_opts(f); + struct f_ncm *ncm; + struct f_ncm_opts *opts; - scoped_guard(mutex, &opts->lock) - opts->refcnt--; - kfree(func_to_ncm(f)); + ncm = func_to_ncm(f); + opts = container_of(f->fi, struct f_ncm_opts, func_inst); + kfree(ncm); + mutex_lock(&opts->lock); + opts->refcnt--; + mutex_unlock(&opts->lock); } static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) { struct f_ncm *ncm = func_to_ncm(f); + struct f_ncm_opts *ncm_opts; DBG(c->cdev, "ncm unbind\n"); + ncm_opts = container_of(f->fi, struct f_ncm_opts, func_inst); + hrtimer_cancel(&ncm->task_timer); kfree(f->os_desc_table); @@ -1743,14 +1745,16 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) kfree(ncm->notify_req->buf); usb_ep_free_request(ncm->notify, ncm->notify_req); - ncm->port.ioport = NULL; - gether_cleanup(netdev_priv(ncm->netdev)); + ncm_opts->bind_count--; + if (ncm_opts->bind_count == 0) + gether_detach_gadget(ncm_opts->net); } static struct usb_function *ncm_alloc(struct usb_function_instance *fi) { struct f_ncm *ncm; struct f_ncm_opts *opts; + int status; /* allocate and initialize one new instance */ ncm = kzalloc(sizeof(*ncm), GFP_KERNEL); @@ -1758,12 +1762,22 @@ static struct usb_function *ncm_alloc(struct usb_function_instance *fi) return ERR_PTR(-ENOMEM); opts = container_of(fi, struct f_ncm_opts, func_inst); + mutex_lock(&opts->lock); + opts->refcnt++; - scoped_guard(mutex, &opts->lock) - opts->refcnt++; + /* export host's Ethernet address in CDC format */ + status = gether_get_host_addr_cdc(opts->net, ncm->ethaddr, + sizeof(ncm->ethaddr)); + if (status < 12) { /* strlen("01234567890a") */ + kfree(ncm); + mutex_unlock(&opts->lock); + return ERR_PTR(-EINVAL); + } spin_lock_init(&ncm->lock); ncm_reset_values(ncm); + ncm->port.ioport = netdev_priv(opts->net); + mutex_unlock(&opts->lock); ncm->port.is_fixed = true; ncm->port.supports_multi_frame = true; diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index ec050d8f99f14..a7853dcbb14ca 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -1222,6 +1222,13 @@ static void usbg_submit_cmd(struct usbg_cmd *cmd) se_cmd = &cmd->se_cmd; tpg = cmd->fu->tpg; tv_nexus = tpg->tpg_nexus; + if (!tv_nexus) { + struct usb_gadget *gadget = fuas_to_gadget(cmd->fu); + + dev_err(&gadget->dev, "Missing nexus, ignoring command\n"); + return; + } + dir = get_cmd_dir(cmd->cmd_buf); if (dir < 0) goto out; @@ -1483,6 +1490,13 @@ static void bot_cmd_work(struct work_struct *work) se_cmd = &cmd->se_cmd; tpg = cmd->fu->tpg; tv_nexus = tpg->tpg_nexus; + if (!tv_nexus) { + struct usb_gadget *gadget = fuas_to_gadget(cmd->fu); + + dev_err(&gadget->dev, "Missing nexus, ignoring command\n"); + return; + } + dir = get_cmd_dir(cmd->cmd_buf); if (dir < 0) goto out; diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 338f6e2a85a94..1a9e7c495e2e8 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -897,6 +897,28 @@ void gether_set_gadget(struct net_device *net, struct usb_gadget *g) } EXPORT_SYMBOL_GPL(gether_set_gadget); +int gether_attach_gadget(struct net_device *net, struct usb_gadget *g) +{ + int ret; + + ret = device_move(&net->dev, &g->dev, DPM_ORDER_DEV_AFTER_PARENT); + if (ret) + return ret; + + gether_set_gadget(net, g); + return 0; +} +EXPORT_SYMBOL_GPL(gether_attach_gadget); + +void gether_detach_gadget(struct net_device *net) +{ + struct eth_dev *dev = netdev_priv(net); + + device_move(&net->dev, NULL, DPM_ORDER_NONE); + dev->gadget = NULL; +} +EXPORT_SYMBOL_GPL(gether_detach_gadget); + int gether_set_dev_addr(struct net_device *net, const char *dev_addr) { struct eth_dev *dev; @@ -1040,36 +1062,6 @@ int gether_set_ifname(struct net_device *net, const char *name, int len) } EXPORT_SYMBOL_GPL(gether_set_ifname); -void gether_setup_opts_default(struct gether_opts *opts, const char *name) -{ - opts->qmult = QMULT_DEFAULT; - snprintf(opts->name, sizeof(opts->name), "%s%%d", name); - eth_random_addr(opts->dev_mac); - opts->addr_assign_type = NET_ADDR_RANDOM; - eth_random_addr(opts->host_mac); -} -EXPORT_SYMBOL_GPL(gether_setup_opts_default); - -void gether_apply_opts(struct net_device *net, struct gether_opts *opts) -{ - struct eth_dev *dev = netdev_priv(net); - - dev->qmult = opts->qmult; - - if (opts->ifname_set) { - strscpy(net->name, opts->name, sizeof(net->name)); - dev->ifname_set = true; - } - - memcpy(dev->host_mac, opts->host_mac, sizeof(dev->host_mac)); - - if (opts->addr_assign_type == NET_ADDR_SET) { - memcpy(dev->dev_mac, opts->dev_mac, sizeof(dev->dev_mac)); - net->addr_assign_type = opts->addr_assign_type; - } -} -EXPORT_SYMBOL_GPL(gether_apply_opts); - void gether_suspend(struct gether *link) { struct eth_dev *dev = link->ioport; @@ -1126,21 +1118,6 @@ void gether_cleanup(struct eth_dev *dev) } EXPORT_SYMBOL_GPL(gether_cleanup); -void gether_unregister_free_netdev(struct net_device *net) -{ - if (!net) - return; - - struct eth_dev *dev = netdev_priv(net); - - if (net->reg_state == NETREG_REGISTERED) { - unregister_netdev(net); - flush_work(&dev->work); - } - free_netdev(net); -} -EXPORT_SYMBOL_GPL(gether_unregister_free_netdev); - /** * gether_connect - notify network layer that USB link is active * @link: the USB link, set up with endpoints, descriptors matching diff --git a/drivers/usb/gadget/function/u_ether.h b/drivers/usb/gadget/function/u_ether.h index a212a8ec5eb1b..c85a1cf3c115d 100644 --- a/drivers/usb/gadget/function/u_ether.h +++ b/drivers/usb/gadget/function/u_ether.h @@ -38,31 +38,6 @@ struct eth_dev; -/** - * struct gether_opts - Options for Ethernet gadget function instances - * @name: Pattern for the network interface name (e.g., "usb%d"). - * Used to generate the net device name. - * @qmult: Queue length multiplier for high/super speed. - * @host_mac: The MAC address to be used by the host side. - * @dev_mac: The MAC address to be used by the device side. - * @ifname_set: True if the interface name pattern has been set by userspace. - * @addr_assign_type: The method used for assigning the device MAC address - * (e.g., NET_ADDR_RANDOM, NET_ADDR_SET). - * - * This structure caches network-related settings provided through configfs - * before the net_device is fully instantiated. This allows for early - * configuration while deferring net_device allocation until the function - * is bound. - */ -struct gether_opts { - char name[IFNAMSIZ]; - unsigned int qmult; - u8 host_mac[ETH_ALEN]; - u8 dev_mac[ETH_ALEN]; - bool ifname_set; - unsigned char addr_assign_type; -}; - /* * This represents the USB side of an "ethernet" link, managed by a USB * function which provides control and (maybe) framing. Two functions @@ -175,6 +150,32 @@ static inline struct net_device *gether_setup_default(void) */ void gether_set_gadget(struct net_device *net, struct usb_gadget *g); +/** + * gether_attach_gadget - Reparent net_device to the gadget device. + * @net: The network device to reparent. + * @g: The target USB gadget device to parent to. + * + * This function moves the network device to be a child of the USB gadget + * device in the device hierarchy. This is typically done when the function + * is bound to a configuration. + * + * Returns 0 on success, or a negative error code on failure. + */ +int gether_attach_gadget(struct net_device *net, struct usb_gadget *g); + +/** + * gether_detach_gadget - Detach net_device from its gadget parent. + * @net: The network device to detach. + * + * This function moves the network device to be a child of the virtual + * devices parent, effectively detaching it from the USB gadget device + * hierarchy. This is typically done when the function is unbound + * from a configuration but the instance is not yet freed. + */ +void gether_detach_gadget(struct net_device *net); + +DEFINE_FREE(detach_gadget, struct net_device *, if (_T) gether_detach_gadget(_T)) + /** * gether_set_dev_addr - initialize an ethernet-over-usb link with eth address * @net: device representing this link @@ -283,11 +284,6 @@ int gether_get_ifname(struct net_device *net, char *name, int len); int gether_set_ifname(struct net_device *net, const char *name, int len); void gether_cleanup(struct eth_dev *dev); -void gether_unregister_free_netdev(struct net_device *net); -DEFINE_FREE(free_gether_netdev, struct net_device *, gether_unregister_free_netdev(_T)); - -void gether_setup_opts_default(struct gether_opts *opts, const char *name); -void gether_apply_opts(struct net_device *net, struct gether_opts *opts); void gether_suspend(struct gether *link); void gether_resume(struct gether *link); diff --git a/drivers/usb/gadget/function/u_ether_configfs.h b/drivers/usb/gadget/function/u_ether_configfs.h index 217990a266b2f..51f0d79e5eca4 100644 --- a/drivers/usb/gadget/function/u_ether_configfs.h +++ b/drivers/usb/gadget/function/u_ether_configfs.h @@ -13,13 +13,6 @@ #ifndef __U_ETHER_CONFIGFS_H #define __U_ETHER_CONFIGFS_H -#include -#include -#include -#include -#include -#include - #define USB_ETHERNET_CONFIGFS_ITEM(_f_) \ static void _f_##_attr_release(struct config_item *item) \ { \ @@ -204,174 +197,4 @@ out: \ \ CONFIGFS_ATTR(_f_##_opts_, _n_) -#define USB_ETHER_OPTS_ITEM(_f_) \ - static void _f_##_attr_release(struct config_item *item) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - \ - usb_put_function_instance(&opts->func_inst); \ - } \ - \ - static struct configfs_item_operations _f_##_item_ops = { \ - .release = _f_##_attr_release, \ - } - -#define USB_ETHER_OPTS_ATTR_DEV_ADDR(_f_) \ - static ssize_t _f_##_opts_dev_addr_show(struct config_item *item, \ - char *page) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - \ - guard(mutex)(&opts->lock); \ - return sysfs_emit(page, "%pM\n", opts->net_opts.dev_mac); \ - } \ - \ - static ssize_t _f_##_opts_dev_addr_store(struct config_item *item, \ - const char *page, size_t len) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - u8 new_addr[ETH_ALEN]; \ - const char *p = page; \ - \ - guard(mutex)(&opts->lock); \ - if (opts->refcnt) \ - return -EBUSY; \ - \ - for (int i = 0; i < ETH_ALEN; i++) { \ - unsigned char num; \ - if ((*p == '.') || (*p == ':')) \ - p++; \ - num = hex_to_bin(*p++) << 4; \ - num |= hex_to_bin(*p++); \ - new_addr[i] = num; \ - } \ - if (!is_valid_ether_addr(new_addr)) \ - return -EINVAL; \ - memcpy(opts->net_opts.dev_mac, new_addr, ETH_ALEN); \ - opts->net_opts.addr_assign_type = NET_ADDR_SET; \ - return len; \ - } \ - \ - CONFIGFS_ATTR(_f_##_opts_, dev_addr) - -#define USB_ETHER_OPTS_ATTR_HOST_ADDR(_f_) \ - static ssize_t _f_##_opts_host_addr_show(struct config_item *item, \ - char *page) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - \ - guard(mutex)(&opts->lock); \ - return sysfs_emit(page, "%pM\n", opts->net_opts.host_mac); \ - } \ - \ - static ssize_t _f_##_opts_host_addr_store(struct config_item *item, \ - const char *page, size_t len) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - u8 new_addr[ETH_ALEN]; \ - const char *p = page; \ - \ - guard(mutex)(&opts->lock); \ - if (opts->refcnt) \ - return -EBUSY; \ - \ - for (int i = 0; i < ETH_ALEN; i++) { \ - unsigned char num; \ - if ((*p == '.') || (*p == ':')) \ - p++; \ - num = hex_to_bin(*p++) << 4; \ - num |= hex_to_bin(*p++); \ - new_addr[i] = num; \ - } \ - if (!is_valid_ether_addr(new_addr)) \ - return -EINVAL; \ - memcpy(opts->net_opts.host_mac, new_addr, ETH_ALEN); \ - return len; \ - } \ - \ - CONFIGFS_ATTR(_f_##_opts_, host_addr) - -#define USB_ETHER_OPTS_ATTR_QMULT(_f_) \ - static ssize_t _f_##_opts_qmult_show(struct config_item *item, \ - char *page) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - \ - guard(mutex)(&opts->lock); \ - return sysfs_emit(page, "%u\n", opts->net_opts.qmult); \ - } \ - \ - static ssize_t _f_##_opts_qmult_store(struct config_item *item, \ - const char *page, size_t len) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - u32 val; \ - int ret; \ - \ - guard(mutex)(&opts->lock); \ - if (opts->refcnt) \ - return -EBUSY; \ - \ - ret = kstrtou32(page, 0, &val); \ - if (ret) \ - return ret; \ - \ - opts->net_opts.qmult = val; \ - return len; \ - } \ - \ - CONFIGFS_ATTR(_f_##_opts_, qmult) - -#define USB_ETHER_OPTS_ATTR_IFNAME(_f_) \ - static ssize_t _f_##_opts_ifname_show(struct config_item *item, \ - char *page) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - const char *name; \ - \ - guard(mutex)(&opts->lock); \ - rtnl_lock(); \ - if (opts->net_opts.ifname_set) \ - name = opts->net_opts.name; \ - else if (opts->net) \ - name = netdev_name(opts->net); \ - else \ - name = "(inactive net_device)"; \ - rtnl_unlock(); \ - return sysfs_emit(page, "%s\n", name); \ - } \ - \ - static ssize_t _f_##_opts_ifname_store(struct config_item *item, \ - const char *page, size_t len) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - char tmp[IFNAMSIZ]; \ - const char *p; \ - size_t c_len = len; \ - \ - if (c_len > 0 && page[c_len - 1] == '\n') \ - c_len--; \ - \ - if (c_len >= sizeof(tmp)) \ - return -E2BIG; \ - \ - strscpy(tmp, page, c_len + 1); \ - if (!dev_valid_name(tmp)) \ - return -EINVAL; \ - \ - /* Require exactly one %d */ \ - p = strchr(tmp, '%'); \ - if (!p || p[1] != 'd' || strchr(p + 2, '%')) \ - return -EINVAL; \ - \ - guard(mutex)(&opts->lock); \ - if (opts->refcnt) \ - return -EBUSY; \ - strscpy(opts->net_opts.name, tmp, sizeof(opts->net_opts.name)); \ - opts->net_opts.ifname_set = true; \ - return len; \ - } \ - \ - CONFIGFS_ATTR(_f_##_opts_, ifname) - #endif /* __U_ETHER_CONFIGFS_H */ diff --git a/drivers/usb/gadget/function/u_ncm.h b/drivers/usb/gadget/function/u_ncm.h index d99330fe31e88..b1f3db8b68c15 100644 --- a/drivers/usb/gadget/function/u_ncm.h +++ b/drivers/usb/gadget/function/u_ncm.h @@ -15,13 +15,11 @@ #include -#include "u_ether.h" - struct f_ncm_opts { struct usb_function_instance func_inst; struct net_device *net; + int bind_count; - struct gether_opts net_opts; struct config_group *ncm_interf_group; struct usb_os_desc ncm_os_desc; char ncm_ext_compat_id[16]; diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 7cea641b06b41..2f9700b3f1b64 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -513,7 +513,7 @@ uvc_video_prep_requests(struct uvc_video *video) return; } - interval_duration = 2 << (video->ep->desc->bInterval - 1); + interval_duration = 1 << (video->ep->desc->bInterval - 1); if (cdev->gadget->speed < USB_SPEED_HIGH) interval_duration *= 10000; else diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index 890fc5e892f1b..ade178ab34a78 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -386,11 +386,19 @@ static const struct file_operations port_fops = { static int xhci_portli_show(struct seq_file *s, void *unused) { struct xhci_port *port = s->private; - struct xhci_hcd *xhci = hcd_to_xhci(port->rhub->hcd); + struct xhci_hcd *xhci; u32 portli; portli = readl(&port->port_reg->portli); + /* port without protocol capability isn't added to a roothub */ + if (!port->rhub) { + seq_printf(s, "0x%08x\n", portli); + return 0; + } + + xhci = hcd_to_xhci(port->rhub->hcd); + /* PORTLI fields are valid if port is a USB3 or eUSB2V2 port */ if (port->rhub == &xhci->usb3_rhub) seq_printf(s, "0x%08x LEC=%u RLC=%u TLC=%u\n", portli, diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 9315ba18310d6..1cbefee3c4cac 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3195,6 +3195,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) if (status & STS_HCE) { xhci_warn(xhci, "WARNING: Host Controller Error\n"); + xhci_halt(xhci); goto out; } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index c36ab323d68e9..ef6d8662adecf 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4146,7 +4146,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) if (state == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING) || (xhci->xhc_state & XHCI_STATE_HALTED)) { spin_unlock_irqrestore(&xhci->lock, flags); - kfree(command); + xhci_free_command(xhci, command); return -ENODEV; } @@ -4154,7 +4154,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) slot_id); if (ret) { spin_unlock_irqrestore(&xhci->lock, flags); - kfree(command); + xhci_free_command(xhci, command); return ret; } xhci_ring_cmd_db(xhci); diff --git a/drivers/usb/image/mdc800.c b/drivers/usb/image/mdc800.c index 8d8e79afa6004..ca287b770e8cb 100644 --- a/drivers/usb/image/mdc800.c +++ b/drivers/usb/image/mdc800.c @@ -707,7 +707,7 @@ static ssize_t mdc800_device_read (struct file *file, char __user *buf, size_t l if (signal_pending (current)) { mutex_unlock(&mdc800->io_lock); - return -EINTR; + return len == left ? -EINTR : len-left; } sts=left > (mdc800->out_count-mdc800->out_ptr)?mdc800->out_count-mdc800->out_ptr:left; @@ -730,9 +730,11 @@ static ssize_t mdc800_device_read (struct file *file, char __user *buf, size_t l mutex_unlock(&mdc800->io_lock); return len-left; } - wait_event_timeout(mdc800->download_wait, + retval = wait_event_timeout(mdc800->download_wait, mdc800->downloaded, msecs_to_jiffies(TO_DOWNLOAD_GET_READY)); + if (!retval) + usb_kill_urb(mdc800->download_urb); mdc800->downloaded = 0; if (mdc800->download_urb->status != 0) { diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index ec8bd968c4de5..a8af7615b1bfb 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -736,7 +736,7 @@ static int uss720_probe(struct usb_interface *intf, ret = get_1284_register(pp, 0, ®, GFP_KERNEL); dev_dbg(&intf->dev, "reg: %7ph\n", priv->reg); if (ret < 0) - return ret; + goto probe_abort; ret = usb_find_last_int_in_endpoint(interface, &epd); if (!ret) { diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 9189e4bb213a4..7a482cdee1e9c 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -272,6 +272,7 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_ dev->int_buffer, YUREX_BUF_SIZE, yurex_interrupt, dev, 1); dev->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + dev->bbu = -1; if (usb_submit_urb(dev->urb, GFP_KERNEL)) { retval = -EIO; dev_err(&interface->dev, "Could not submitting URB\n"); @@ -280,7 +281,6 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_ /* save our data pointer in this interface device */ usb_set_intfdata(interface, dev); - dev->bbu = -1; /* we can register the device now, as it is ready */ retval = usb_register_dev(interface, &yurex_class); diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index cf4a0367d6d60..8c93bde4b8167 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -815,6 +815,15 @@ static void usbhs_remove(struct platform_device *pdev) usbhs_platform_call(priv, hardware_exit, pdev); reset_control_assert(priv->rsts); + + /* + * Explicitly free the IRQ to ensure the interrupt handler is + * disabled and synchronized before freeing resources. + * devm_free_irq() calls free_irq() which waits for any running + * ISR to complete, preventing UAF. + */ + devm_free_irq(&pdev->dev, priv->irq, priv); + usbhs_mod_remove(priv); usbhs_fifo_remove(priv); usbhs_pipe_remove(priv); diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index b8e28ceca51e1..edec139b68b52 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -139,9 +139,14 @@ static void *usb_role_switch_match(const struct fwnode_handle *fwnode, const cha static struct usb_role_switch * usb_role_switch_is_parent(struct fwnode_handle *fwnode) { - struct fwnode_handle *parent = fwnode_get_parent(fwnode); + struct fwnode_handle *parent; struct device *dev; + if (!fwnode_device_is_compatible(fwnode, "usb-b-connector")) + return NULL; + + parent = fwnode_get_parent(fwnode); + if (!fwnode_property_present(parent, "usb-role-switch")) { fwnode_handle_put(parent); return NULL; diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index d185688a16b13..35d9c30869900 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -100,9 +100,14 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con) { u8 pin_assign = 0; u32 conf; + u32 signal; /* DP Signalling */ - conf = (dp->data.conf & DP_CONF_SIGNALLING_MASK) >> DP_CONF_SIGNALLING_SHIFT; + signal = DP_CAP_DP_SIGNALLING(dp->port->vdo) & DP_CAP_DP_SIGNALLING(dp->alt->vdo); + if (dp->plug_prime) + signal &= DP_CAP_DP_SIGNALLING(dp->plug_prime->vdo); + + conf = signal << DP_CONF_SIGNALLING_SHIFT; switch (con) { case DP_STATUS_CON_DISABLED: diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 1d2f3af034c5c..8e0e14a2704e0 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -7890,7 +7890,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) port->partner_desc.identity = &port->partner_ident; port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); - if (IS_ERR_OR_NULL(port->role_sw)) + if (!port->role_sw) port->role_sw = usb_role_switch_get(port->dev); if (IS_ERR(port->role_sw)) { err = PTR_ERR(port->role_sw); diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index a936f9ea56108..63bf096b721aa 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -298,8 +298,8 @@ int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *alist, srx.transport.sin.sin_addr.s_addr = xdr; peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL); - if (!peer) - return -ENOMEM; + if (IS_ERR(peer)) + return PTR_ERR(peer); for (i = 0; i < alist->nr_ipv4; i++) { if (peer == alist->addrs[i].peer) { @@ -342,8 +342,8 @@ int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist, memcpy(&srx.transport.sin6.sin6_addr, xdr, 16); peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL); - if (!peer) - return -ENOMEM; + if (IS_ERR(peer)) + return PTR_ERR(peer); for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { if (peer == alist->addrs[i].peer) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e4fad777b034a..407830d86d0da 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3594,7 +3594,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } } - btrfs_zoned_reserve_data_reloc_bg(fs_info); btrfs_free_zone_cache(fs_info); btrfs_check_active_zone_reservation(fs_info); @@ -3622,6 +3621,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_cleaner; } + /* + * Starts a transaction, must be called after the transaction kthread + * is initialized. + */ + btrfs_zoned_reserve_data_reloc_bg(fs_info); + ret = btrfs_read_qgroup_config(fs_info); if (ret) goto fail_trans_kthread; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 744a1fff6eefc..5f97a3d2a8d72 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4507,6 +4507,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio) */ if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) { spin_unlock(&eb->refs_lock); + rcu_read_lock(); break; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a11fcc9e9f502..a6da98435ef7c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6612,6 +6612,25 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, int ret; bool xa_reserved = false; + if (!args->orphan && !args->subvol) { + /* + * Before anything else, check if we can add the name to the + * parent directory. We want to avoid a dir item overflow in + * case we have an existing dir item due to existing name + * hash collisions. We do this check here before we call + * btrfs_add_link() down below so that we can avoid a + * transaction abort (which could be exploited by malicious + * users). + * + * For subvolumes we already do this in btrfs_mksubvol(). + */ + ret = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, + btrfs_ino(BTRFS_I(dir)), + name); + if (ret < 0) + return ret; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a1fd44c44ecf7..b805dd9227efa 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -672,6 +672,13 @@ static noinline int create_subvol(struct mnt_idmap *idmap, goto out; } + /* + * Subvolumes have orphans cleaned on first dentry lookup. A new + * subvolume cannot have any orphans, so we should set the bit before we + * add the subvolume dentry to the dentry cache, so that it is in the + * same state as a subvolume after first lookup. + */ + set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &new_root->state); d_instantiate_new(dentry, new_inode_args.inode); new_inode_args.inode = NULL; @@ -3852,6 +3859,25 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, goto out; } + received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, + BTRFS_UUID_SIZE); + + /* + * Before we attempt to add the new received uuid, check if we have room + * for it in case there's already an item. If the size of the existing + * item plus this root's ID (u64) exceeds the maximum item size, we can + * return here without the need to abort a transaction. If we don't do + * this check, the btrfs_uuid_tree_add() call below would fail with + * -EOVERFLOW and result in a transaction abort. Malicious users could + * exploit this to turn the fs into RO mode. + */ + if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { + ret = btrfs_uuid_tree_check_overflow(fs_info, sa->uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL); + if (ret < 0) + goto out; + } + /* * 1 - root item * 2 - uuid items (received uuid + subvol uuid) @@ -3867,15 +3893,12 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, sa->rtime.sec = ct.tv_sec; sa->rtime.nsec = ct.tv_nsec; - received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, - BTRFS_UUID_SIZE); if (received_uuid_changed && !btrfs_is_empty_uuid(root_item->received_uuid)) { ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid, BTRFS_UUID_KEY_RECEIVED_SUBVOL, btrfs_root_id(root)); if (unlikely(ret && ret != -ENOENT)) { - btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); goto out; } @@ -3890,7 +3913,8 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, ret = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); - if (ret < 0) { + if (unlikely(ret < 0)) { + btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); goto out; } diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h index 943e53980945e..c8e92efce4058 100644 --- a/fs/btrfs/messages.h +++ b/fs/btrfs/messages.h @@ -28,9 +28,6 @@ void _btrfs_printk(const struct btrfs_fs_info *fs_info, unsigned int level, cons #else -#define btrfs_printk_in_rcu(fs_info, level, fmt, args...) \ - btrfs_no_printk(fs_info, fmt, ##args) - #define btrfs_printk_in_rcu(fs_info, level, fmt, args...) \ btrfs_no_printk(fs_info, fmt, ##args) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f189bf09ce6aa..b7dfe877cf8d9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -38,6 +38,7 @@ static const struct root_name_map root_map[] = { { BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" }, { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }, { BTRFS_RAID_STRIPE_TREE_OBJECTID, "RAID_STRIPE_TREE" }, + { BTRFS_REMAP_TREE_OBJECTID, "REMAP_TREE" }, }; const char *btrfs_root_name(const struct btrfs_key *key, char *buf) @@ -415,6 +416,9 @@ static void key_type_string(const struct btrfs_key *key, char *buf, int buf_size [BTRFS_UUID_KEY_SUBVOL] = "UUID_KEY_SUBVOL", [BTRFS_UUID_KEY_RECEIVED_SUBVOL] = "UUID_KEY_RECEIVED_SUBVOL", [BTRFS_RAID_STRIPE_KEY] = "RAID_STRIPE", + [BTRFS_IDENTITY_REMAP_KEY] = "IDENTITY_REMAP", + [BTRFS_REMAP_KEY] = "REMAP", + [BTRFS_REMAP_BACKREF_KEY] = "REMAP_BACKREF", }; if (key->type == 0 && key->objectid == BTRFS_FREE_SPACE_OBJECTID) @@ -435,6 +439,7 @@ void btrfs_print_leaf(const struct extent_buffer *l) struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; struct btrfs_dev_extent *dev_extent; + struct btrfs_remap_item *remap; struct btrfs_key key; if (!l) @@ -569,6 +574,11 @@ void btrfs_print_leaf(const struct extent_buffer *l) print_raid_stripe_key(l, btrfs_item_size(l, i), btrfs_item_ptr(l, i, struct btrfs_stripe_extent)); break; + case BTRFS_REMAP_KEY: + case BTRFS_REMAP_BACKREF_KEY: + remap = btrfs_item_ptr(l, i, struct btrfs_remap_item); + pr_info("\t\taddress %llu\n", btrfs_remap_address(l, remap)); + break; } } } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a330d8624b837..b2343aed7a5d0 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4399,6 +4399,8 @@ static int move_existing_remaps(struct btrfs_fs_info *fs_info, leaf = path->nodes[0]; } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); } remap = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_remap_item); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 52a267a5dd801..87cbc051cb12f 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -2194,8 +2194,11 @@ void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info) if (!btrfs_should_periodic_reclaim(space_info)) continue; for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) { - if (do_reclaim_sweep(space_info, raid)) + if (do_reclaim_sweep(space_info, raid)) { + spin_lock(&space_info->lock); btrfs_set_periodic_reclaim_ready(space_info, false); + spin_unlock(&space_info->lock); + } } } } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7ef8c9b7dfc17..8dd77c431974d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1905,6 +1905,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid, BTRFS_UUID_KEY_RECEIVED_SUBVOL, objectid); + /* + * We are creating of lot of snapshots of the same root that was + * received (has a received UUID) and reached a leaf's limit for + * an item. We can safely ignore this and avoid a transaction + * abort. A deletion of this snapshot will still work since we + * ignore if an item with a BTRFS_UUID_KEY_RECEIVED_SUBVOL key + * is missing (see btrfs_delete_subvolume()). Send/receive will + * work too since it peeks the first root id from the existing + * item (it could peek any), and in case it's missing it + * falls back to search by BTRFS_UUID_KEY_SUBVOL keys. + * Creation of a snapshot does not require CAP_SYS_ADMIN, so + * we don't want users triggering transaction aborts, either + * intentionally or not. + */ + if (ret == -EOVERFLOW) + ret = 0; if (unlikely(ret && ret != -EEXIST)) { btrfs_abort_transaction(trans, ret); goto fail; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ac4c4573ee391..516ef62c8f43d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1284,7 +1284,7 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, } if (unlikely(btrfs_root_drop_level(&ri) >= BTRFS_MAX_LEVEL)) { generic_err(leaf, slot, - "invalid root level, have %u expect [0, %u]", + "invalid root drop_level, have %u expect [0, %u]", btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1); return -EUCLEAN; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 780a06d592409..552fef3c385a5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6195,6 +6195,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_log_ctx *ctx) { + const bool orig_log_new_dentries = ctx->log_new_dentries; int ret = 0; /* @@ -6256,7 +6257,11 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, * dir index key range logged for the directory. So we * must make sure the deletion is recorded. */ + ctx->log_new_dentries = false; ret = btrfs_log_inode(trans, inode, LOG_INODE_ALL, ctx); + if (!ret && ctx->log_new_dentries) + ret = log_new_dir_dentries(trans, inode, ctx); + btrfs_add_delayed_iput(inode); if (ret) break; @@ -6291,6 +6296,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, break; } + ctx->log_new_dentries = orig_log_new_dentries; ctx->logging_conflict_inodes = false; if (ret) free_conflicting_inodes(ctx); diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index f24c14b9bb2fd..43c17a1d34513 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -199,6 +199,44 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 return 0; } +/* + * Check if we can add one root ID to a UUID key. + * If the key does not yet exists, we can, otherwise only if extended item does + * not exceeds the maximum item size permitted by the leaf size. + * + * Returns 0 on success, negative value on error. + */ +int btrfs_uuid_tree_check_overflow(struct btrfs_fs_info *fs_info, + const u8 *uuid, u8 type) +{ + BTRFS_PATH_AUTO_FREE(path); + int ret; + u32 item_size; + struct btrfs_key key; + + if (WARN_ON_ONCE(!fs_info->uuid_root)) + return -EINVAL; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + btrfs_uuid_to_key(uuid, type, &key); + ret = btrfs_search_slot(NULL, fs_info->uuid_root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (ret > 0) + return 0; + + item_size = btrfs_item_size(path->nodes[0], path->slots[0]); + + if (sizeof(struct btrfs_item) + item_size + sizeof(u64) > + BTRFS_LEAF_DATA_SIZE(fs_info)) + return -EOVERFLOW; + + return 0; +} + static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type, u64 subid) { diff --git a/fs/btrfs/uuid-tree.h b/fs/btrfs/uuid-tree.h index c60ad20325cce..02b235a3653f0 100644 --- a/fs/btrfs/uuid-tree.h +++ b/fs/btrfs/uuid-tree.h @@ -12,6 +12,8 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 typ u64 subid); int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type, u64 subid); +int btrfs_uuid_tree_check_overflow(struct btrfs_fs_info *fs_info, + const u8 *uuid, u8 type); int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info); int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); int btrfs_uuid_scan_kthread(void *data); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 353c9caa8ab92..85d5cd110df93 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3587,7 +3587,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset, bool v /* step one, relocate all the extents inside this chunk */ btrfs_scrub_pause(fs_info); - ret = btrfs_relocate_block_group(fs_info, chunk_offset, true); + ret = btrfs_relocate_block_group(fs_info, chunk_offset, verbose); btrfs_scrub_continue(fs_info); if (ret) { /* diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 39930d99943c1..817ca4fb9efa9 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -337,7 +337,10 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) if (!btrfs_fs_incompat(fs_info, ZONED)) return 0; - mutex_lock(&fs_devices->device_list_mutex); + /* + * No need to take the device_list mutex here, we're still in the mount + * path and devices cannot be added to or removed from the list yet. + */ list_for_each_entry(device, &fs_devices->devices, dev_list) { /* We can skip reading of zone info for missing devices */ if (!device->bdev) @@ -347,7 +350,6 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) if (ret) break; } - mutex_unlock(&fs_devices->device_list_mutex); return ret; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e87b3bb94ee89..2090fc78529cb 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1326,7 +1326,6 @@ void ceph_process_folio_batch(struct address_space *mapping, continue; } else if (rc == -E2BIG) { folio_unlock(folio); - ceph_wbc->fbatch.folios[i] = NULL; break; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f3fe786b4143d..7dc3077902401 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -79,7 +79,7 @@ static int mdsc_show(struct seq_file *s, void *p) if (req->r_inode) { seq_printf(s, " #%llx", ceph_ino(req->r_inode)); } else if (req->r_dentry) { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0); if (IS_ERR(path)) path = NULL; @@ -98,7 +98,7 @@ static int mdsc_show(struct seq_file *s, void *p) } if (req->r_old_dentry) { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &path_info, 0); if (IS_ERR(path)) path = NULL; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 86d7aa594ea99..bac9cfb6b982f 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1339,6 +1339,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = d_inode(dentry); + struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); struct dentry *dn; @@ -1363,7 +1364,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) if (!dn) { try_async = false; } else { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0); if (IS_ERR(path)) { try_async = false; @@ -1424,7 +1425,19 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) * We have enough caps, so we assume that the unlink * will succeed. Fix up the target inode and dcache. */ - drop_nlink(inode); + + /* + * Protect the i_nlink update with i_ceph_lock + * to precent racing against ceph_fill_inode() + * handling our completion on a worker thread + * and don't decrement if i_nlink has already + * been updated to zero by this completion. + */ + spin_lock(&ci->i_ceph_lock); + if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&ci->i_ceph_lock); + d_delete(dentry); } else { spin_lock(&fsc->async_unlink_conflict_lock); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 66bbf6d517a99..5e7c73a29aa3e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -397,7 +397,7 @@ int ceph_open(struct inode *inode, struct file *file) if (!dentry) { do_sync = true; } else { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); if (IS_ERR(path)) { do_sync = true; @@ -807,7 +807,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (!dn) { try_async = false; } else { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0); if (IS_ERR(path)) { try_async = false; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d76f9a79dc0c2..d99e12d1100b1 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2551,7 +2551,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, if (!dentry) { do_sync = true; } else { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); if (IS_ERR(path)) { do_sync = true; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 23b6d00643c9d..b1746273f1863 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2768,6 +2768,7 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, if (ret < 0) { dput(parent); dput(cur); + __putname(path); return ERR_PTR(ret); } @@ -2777,6 +2778,7 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, if (len < 0) { dput(parent); dput(cur); + __putname(path); return ERR_PTR(len); } } @@ -2813,6 +2815,7 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, * cannot ever succeed. Creating paths that long is * possible with Ceph, but Linux cannot use them. */ + __putname(path); return ERR_PTR(-ENAMETOOLONG); } diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 12cb0ca738aff..6bb30543eff00 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -87,7 +87,7 @@ config NFS_V4 space programs which can be found in the Linux nfs-utils package, available from http://linux-nfs.org/. - If unsure, say Y. + If unsure, say N. config NFS_SWAP bool "Provide swap over NFS support" @@ -100,6 +100,7 @@ config NFS_SWAP config NFS_V4_0 bool "NFS client support for NFSv4.0" depends on NFS_V4 + default y help This option enables support for minor version 0 of the NFSv4 protocol (RFC 3530) in the kernel's NFS client. diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3e2de45c95fe5..be2aebf62056b 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -392,8 +392,13 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, if (status != 0) goto out_release_acls; - if (d_alias) + if (d_alias) { + if (d_is_dir(d_alias)) { + status = -EISDIR; + goto out_dput; + } dentry = d_alias; + } /* When we created the file with exclusive semantics, make * sure we set the attributes afterwards. */ diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index f4cb3018a3585..c920039d733c3 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -1489,7 +1489,7 @@ struct smb_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, struct cifsFileInfo *open_file = NULL; if (inode) - open_file = find_readable_file(CIFS_I(inode), true); + open_file = find_readable_file(CIFS_I(inode), FIND_FSUID_ONLY); if (!open_file) return get_cifs_acl_by_path(cifs_sb, path, pacllen, info); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index b6e3db993cc64..32d0305a1239a 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1269,7 +1269,7 @@ static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *s struct cifsFileInfo *writeable_srcfile; int rc = -EINVAL; - writeable_srcfile = find_writable_file(src_cifsi, FIND_WR_FSUID_ONLY); + writeable_srcfile = find_writable_file(src_cifsi, FIND_FSUID_ONLY); if (writeable_srcfile) { if (src_tcon->ses->server->ops->set_file_size) rc = src_tcon->ses->server->ops->set_file_size( diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 6f9b6c72962b0..7877d327dbb03 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "cifs_fs_sb.h" #include "cifsacl.h" #include @@ -1884,12 +1885,12 @@ static inline bool is_replayable_error(int error) } -/* cifs_get_writable_file() flags */ -enum cifs_writable_file_flags { - FIND_WR_ANY = 0U, - FIND_WR_FSUID_ONLY = (1U << 0), - FIND_WR_WITH_DELETE = (1U << 1), - FIND_WR_NO_PENDING_DELETE = (1U << 2), +enum cifs_find_flags { + FIND_ANY = 0U, + FIND_FSUID_ONLY = (1U << 0), + FIND_WITH_DELETE = (1U << 1), + FIND_NO_PENDING_DELETE = (1U << 2), + FIND_OPEN_FLAGS = (1U << 3), }; #define MID_FREE 0 @@ -2375,4 +2376,14 @@ static inline bool cifs_forced_shutdown(const struct cifs_sb_info *sbi) return cifs_sb_flags(sbi) & CIFS_MOUNT_SHUTDOWN; } +static inline int cifs_open_create_options(unsigned int oflags, int opts) +{ + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (oflags & O_SYNC) + opts |= CREATE_WRITE_THROUGH; + if (oflags & O_DIRECT) + opts |= CREATE_NO_BUFFER; + return opts; +} + #endif /* _CIFS_GLOB_H */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 800a7e418c326..884bfa1cf0b42 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -138,12 +138,14 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result); struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, int flags); -int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, - struct cifsFileInfo **ret_file); +int __cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, unsigned int open_flags, + struct cifsFileInfo **ret_file); int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, int flags, struct cifsFileInfo **ret_file); -struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, - bool fsuid_only); +struct cifsFileInfo *__find_readable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, + unsigned int open_flags); int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, struct cifsFileInfo **ret_file); int cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode, @@ -596,4 +598,20 @@ static inline void cifs_sg_set_buf(struct sg_table *sgtable, } } +static inline int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, + struct cifsFileInfo **ret_file) +{ + find_flags &= ~FIND_OPEN_FLAGS; + return __cifs_get_writable_file(cifs_inode, find_flags, 0, ret_file); +} + +static inline struct cifsFileInfo * +find_readable_file(struct cifsInodeInfo *cinode, unsigned int find_flags) +{ + find_flags &= ~FIND_OPEN_FLAGS; + find_flags |= FIND_NO_PENDING_DELETE; + return __find_readable_file(cinode, find_flags, 0); +} + #endif /* _CIFSPROTO_H */ diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index 953f1fee8cb8c..6d2378eeb7f68 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -187,7 +187,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned const char *full_path; void *page = alloc_dentry_path(); struct inode *newinode = NULL; - unsigned int sbflags; + unsigned int sbflags = cifs_sb_flags(cifs_sb); int disposition; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; @@ -308,6 +308,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned goto out; } + create_options |= cifs_open_create_options(oflags, create_options); /* * if we're not using unix extensions, see if we need to set * ATTR_READONLY on the create call @@ -367,7 +368,6 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned * If Open reported that we actually created a file then we now have to * set the mode if possible. */ - sbflags = cifs_sb_flags(cifs_sb); if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) { struct cifs_unix_set_info_args args = { .mode = mode, diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index cffcf82c1b690..27f61fe7e4e28 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -255,7 +255,7 @@ static void cifs_begin_writeback(struct netfs_io_request *wreq) struct cifs_io_request *req = container_of(wreq, struct cifs_io_request, rreq); int ret; - ret = cifs_get_writable_file(CIFS_I(wreq->inode), FIND_WR_ANY, &req->cfile); + ret = cifs_get_writable_file(CIFS_I(wreq->inode), FIND_ANY, &req->cfile); if (ret) { cifs_dbg(VFS, "No writable handle in writepages ret=%d\n", ret); return; @@ -584,15 +584,8 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ *********************************************************************/ disposition = cifs_get_disposition(f_flags); - /* BB pass O_SYNC flag through on file attributes .. BB */ - - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ - if (f_flags & O_SYNC) - create_options |= CREATE_WRITE_THROUGH; - - if (f_flags & O_DIRECT) - create_options |= CREATE_NO_BUFFER; + create_options |= cifs_open_create_options(f_flags, create_options); retry_open: oparms = (struct cifs_open_parms) { @@ -963,7 +956,7 @@ int cifs_file_flush(const unsigned int xid, struct inode *inode, return tcon->ses->server->ops->flush(xid, tcon, &cfile->fid); } - rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); + rc = cifs_get_writable_file(CIFS_I(inode), FIND_ANY, &cfile); if (!rc) { tcon = tlink_tcon(cfile->tlink); rc = tcon->ses->server->ops->flush(xid, tcon, &cfile->fid); @@ -988,7 +981,7 @@ static int cifs_do_truncate(const unsigned int xid, struct dentry *dentry) return -ERESTARTSYS; mapping_set_error(inode->i_mapping, rc); - cfile = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + cfile = find_writable_file(cinode, FIND_FSUID_ONLY); rc = cifs_file_flush(xid, inode, cfile); if (!rc) { if (cfile) { @@ -1068,32 +1061,29 @@ int cifs_open(struct inode *inode, struct file *file) /* Get the cached handle as SMB2 close is deferred */ if (OPEN_FMODE(file->f_flags) & FMODE_WRITE) { - rc = cifs_get_writable_path(tcon, full_path, - FIND_WR_FSUID_ONLY | - FIND_WR_NO_PENDING_DELETE, - &cfile); + rc = __cifs_get_writable_file(CIFS_I(inode), + FIND_FSUID_ONLY | + FIND_NO_PENDING_DELETE | + FIND_OPEN_FLAGS, + file->f_flags, &cfile); } else { - rc = cifs_get_readable_path(tcon, full_path, &cfile); + cfile = __find_readable_file(CIFS_I(inode), + FIND_NO_PENDING_DELETE | + FIND_OPEN_FLAGS, + file->f_flags); + rc = cfile ? 0 : -ENOENT; } if (rc == 0) { - unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); - unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); - - if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) && - (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) { - file->private_data = cfile; - spin_lock(&CIFS_I(inode)->deferred_lock); - cifs_del_deferred_close(cfile); - spin_unlock(&CIFS_I(inode)->deferred_lock); - goto use_cache; - } - _cifsFileInfo_put(cfile, true, false); - } else { - /* hard link on the defeered close file */ - rc = cifs_get_hardlink_path(tcon, inode, file); - if (rc) - cifs_close_deferred_file(CIFS_I(inode)); - } + file->private_data = cfile; + spin_lock(&CIFS_I(inode)->deferred_lock); + cifs_del_deferred_close(cfile); + spin_unlock(&CIFS_I(inode)->deferred_lock); + goto use_cache; + } + /* hard link on the deferred close file */ + rc = cifs_get_hardlink_path(tcon, inode, file); + if (rc) + cifs_close_deferred_file(CIFS_I(inode)); if (server->oplocks) oplock = REQ_OPLOCK; @@ -1314,13 +1304,8 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) rdwr_for_fscache = 1; desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); - - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ - if (cfile->f_flags & O_SYNC) - create_options |= CREATE_WRITE_THROUGH; - - if (cfile->f_flags & O_DIRECT) - create_options |= CREATE_NO_BUFFER; + create_options |= cifs_open_create_options(cfile->f_flags, + create_options); if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); @@ -2524,10 +2509,33 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t netfs_write_subrequest_terminated(&wdata->subreq, result); } -struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, - bool fsuid_only) +static bool open_flags_match(struct cifsInodeInfo *cinode, + unsigned int oflags, unsigned int cflags) +{ + struct inode *inode = &cinode->netfs.inode; + int crw = 0, orw = 0; + + oflags &= ~(O_CREAT | O_EXCL | O_TRUNC); + cflags &= ~(O_CREAT | O_EXCL | O_TRUNC); + + if (cifs_fscache_enabled(inode)) { + if (OPEN_FMODE(cflags) & FMODE_WRITE) + crw = 1; + if (OPEN_FMODE(oflags) & FMODE_WRITE) + orw = 1; + } + if (cifs_convert_flags(oflags, orw) != cifs_convert_flags(cflags, crw)) + return false; + + return (oflags & (O_SYNC | O_DIRECT)) == (cflags & (O_SYNC | O_DIRECT)); +} + +struct cifsFileInfo *__find_readable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, + unsigned int open_flags) { struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode); + bool fsuid_only = find_flags & FIND_FSUID_ONLY; struct cifsFileInfo *open_file = NULL; /* only filter by fsuid on multiuser mounts */ @@ -2541,6 +2549,13 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) continue; + if ((find_flags & FIND_NO_PENDING_DELETE) && + open_file->status_file_deleted) + continue; + if ((find_flags & FIND_OPEN_FLAGS) && + !open_flags_match(cifs_inode, open_flags, + open_file->f_flags)) + continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { if ((!open_file->invalidHandle)) { /* found a good file */ @@ -2559,17 +2574,17 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, } /* Return -EBADF if no handle is found and general rc otherwise */ -int -cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, - struct cifsFileInfo **ret_file) +int __cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, unsigned int open_flags, + struct cifsFileInfo **ret_file) { struct cifsFileInfo *open_file, *inv_file = NULL; struct cifs_sb_info *cifs_sb; bool any_available = false; int rc = -EBADF; unsigned int refind = 0; - bool fsuid_only = flags & FIND_WR_FSUID_ONLY; - bool with_delete = flags & FIND_WR_WITH_DELETE; + bool fsuid_only = find_flags & FIND_FSUID_ONLY; + bool with_delete = find_flags & FIND_WITH_DELETE; *ret_file = NULL; /* @@ -2603,9 +2618,13 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, continue; if (with_delete && !(open_file->fid.access & DELETE)) continue; - if ((flags & FIND_WR_NO_PENDING_DELETE) && + if ((find_flags & FIND_NO_PENDING_DELETE) && open_file->status_file_deleted) continue; + if ((find_flags & FIND_OPEN_FLAGS) && + !open_flags_match(cifs_inode, open_flags, + open_file->f_flags)) + continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { if (!open_file->invalidHandle) { /* found a good writable file */ @@ -2722,17 +2741,7 @@ cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, cinode = CIFS_I(d_inode(cfile->dentry)); spin_unlock(&tcon->open_file_lock); free_dentry_path(page); - *ret_file = find_readable_file(cinode, 0); - if (*ret_file) { - spin_lock(&cinode->open_file_lock); - if ((*ret_file)->status_file_deleted) { - spin_unlock(&cinode->open_file_lock); - cifsFileInfo_put(*ret_file); - *ret_file = NULL; - } else { - spin_unlock(&cinode->open_file_lock); - } - } + *ret_file = find_readable_file(cinode, FIND_ANY); return *ret_file ? 0 : -ENOENT; } @@ -2804,7 +2813,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) } if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { - smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + smbfile = find_writable_file(CIFS_I(inode), FIND_ANY); if (smbfile) { rc = server->ops->flush(xid, tcon, &smbfile->fid); cifsFileInfo_put(smbfile); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 54090739535fb..a4a7c7eee038c 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1997,7 +1997,7 @@ int smb3_init_fs_context(struct fs_context *fc) ctx->backupuid_specified = false; /* no backup intent for a user */ ctx->backupgid_specified = false; /* no backup intent for a group */ - ctx->retrans = 1; + ctx->retrans = 0; ctx->reparse_type = CIFS_REPARSE_TYPE_DEFAULT; ctx->symlink_type = CIFS_SYMLINK_TYPE_DEFAULT; ctx->nonativesocket = 0; diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 3e844c55ab8a2..143fa2e665ed4 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2997,7 +2997,7 @@ int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start, } } - cfile = find_readable_file(cifs_i, false); + cfile = find_readable_file(cifs_i, FIND_ANY); if (cfile == NULL) return -EINVAL; @@ -3050,7 +3050,7 @@ int cifs_file_set_size(const unsigned int xid, struct dentry *dentry, size, false); cifs_dbg(FYI, "%s: set_file_size: rc = %d\n", __func__, rc); } else { - open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); + open_file = find_writable_file(cifsInode, FIND_FSUID_ONLY); if (open_file) { tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; @@ -3219,7 +3219,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) open_file->fid.netfid, open_file->pid); } else { - open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); + open_file = find_writable_file(cifsInode, FIND_FSUID_ONLY); if (open_file) { pTcon = tlink_tcon(open_file->tlink); rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 9643eca0cb70f..9694117050a6c 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -960,7 +960,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_tcon *tcon; /* if the file is already open for write, just use that fileid */ - open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + open_file = find_writable_file(cinode, FIND_FSUID_ONLY); if (open_file) { fid.netfid = open_file->fid.netfid; diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 5280c5c869ad5..364bdcff9c9d5 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1156,7 +1156,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, cifs_i = CIFS_I(inode); dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; data.Attributes = cpu_to_le32(dosattrs); - cifs_get_writable_path(tcon, name, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, name, FIND_ANY, &cfile); oparms = CIFS_OPARMS(cifs_sb, tcon, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, ACL_NO_MODE); tmprc = smb2_compound_op(xid, tcon, cifs_sb, name, @@ -1336,14 +1336,13 @@ int smb2_rename_path(const unsigned int xid, __u32 co = file_create_options(source_dentry); drop_cached_dir_by_name(xid, tcon, from_name, cifs_sb); - cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile); + cifs_get_writable_path(tcon, from_name, FIND_WITH_DELETE, &cfile); int rc = smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, co, DELETE, SMB2_OP_RENAME, cfile, source_dentry); if (rc == -EINVAL) { cifs_dbg(FYI, "invalid lease key, resending request without lease"); - cifs_get_writable_path(tcon, from_name, - FIND_WR_WITH_DELETE, &cfile); + cifs_get_writable_path(tcon, from_name, FIND_WITH_DELETE, &cfile); rc = smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, co, DELETE, SMB2_OP_RENAME, cfile, NULL); } @@ -1377,7 +1376,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, in_iov.iov_base = &eof; in_iov.iov_len = sizeof(eof); - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE); @@ -1387,7 +1386,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, cfile, NULL, NULL, dentry); if (rc == -EINVAL) { cifs_dbg(FYI, "invalid lease key, resending request without lease"); - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, &in_iov, &(int){SMB2_OP_SET_EOF}, 1, @@ -1417,7 +1416,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, (buf->LastWriteTime == 0) && (buf->ChangeTime == 0)) { if (buf->Attributes == 0) goto out; /* would be a no op, no sense sending this */ - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); } oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_WRITE_ATTRIBUTES, @@ -1476,7 +1475,7 @@ struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data, if (tcon->posix_extensions) { cmds[1] = SMB2_OP_POSIX_QUERY_INFO; - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, in_iov, cmds, 2, cfile, out_iov, out_buftype, NULL); if (!rc) { @@ -1485,7 +1484,7 @@ struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data, } } else { cmds[1] = SMB2_OP_QUERY_INFO; - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, in_iov, cmds, 2, cfile, out_iov, out_buftype, NULL); if (!rc) { @@ -1636,13 +1635,12 @@ int smb2_rename_pending_delete(const char *full_path, iov[1].iov_base = utf16_path; iov[1].iov_len = sizeof(*utf16_path) * UniStrlen((wchar_t *)utf16_path); - cifs_get_writable_path(tcon, full_path, FIND_WR_WITH_DELETE, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_WITH_DELETE, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov, cmds, num_cmds, cfile, NULL, NULL, dentry); if (rc == -EINVAL) { cifs_dbg(FYI, "invalid lease key, resending request without lease\n"); - cifs_get_writable_path(tcon, full_path, - FIND_WR_WITH_DELETE, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_WITH_DELETE, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov, cmds, num_cmds, cfile, NULL, NULL, NULL); } diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c index f4cff44e2796d..2b8782c4f684b 100644 --- a/fs/smb/client/smb2maperror.c +++ b/fs/smb/client/smb2maperror.c @@ -109,6 +109,9 @@ int __init smb2_init_maperror(void) } #if IS_ENABLED(CONFIG_SMB_KUNIT_TESTS) +#define EXPORT_SYMBOL_FOR_SMB_TEST(sym) \ + EXPORT_SYMBOL_FOR_MODULES(sym, "smb2maperror_test") + /* Previous prototype for eliminating the build warning. */ const struct status_to_posix_error *smb2_get_err_map_test(__u32 smb2_status); @@ -116,11 +119,11 @@ const struct status_to_posix_error *smb2_get_err_map_test(__u32 smb2_status) { return smb2_get_err_map(smb2_status); } -EXPORT_SYMBOL_GPL(smb2_get_err_map_test); +EXPORT_SYMBOL_FOR_SMB_TEST(smb2_get_err_map_test); const struct status_to_posix_error *smb2_error_map_table_test = smb2_error_map_table; -EXPORT_SYMBOL_GPL(smb2_error_map_table_test); +EXPORT_SYMBOL_FOR_SMB_TEST(smb2_error_map_table_test); unsigned int smb2_error_map_num = ARRAY_SIZE(smb2_error_map_table); -EXPORT_SYMBOL_GPL(smb2_error_map_num); +EXPORT_SYMBOL_FOR_SMB_TEST(smb2_error_map_num); #endif diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7f2d3459cbf9a..98ac4e86bf997 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -628,6 +628,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, struct smb_sockaddr_in6 *p6; struct cifs_server_iface *info = NULL, *iface = NULL, *niface = NULL; struct cifs_server_iface tmp_iface; + __be16 port; ssize_t bytes_left; size_t next = 0; int nb_iface = 0; @@ -662,6 +663,15 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } + spin_lock(&ses->server->srv_lock); + if (ses->server->dstaddr.ss_family == AF_INET) + port = ((struct sockaddr_in *)&ses->server->dstaddr)->sin_port; + else if (ses->server->dstaddr.ss_family == AF_INET6) + port = ((struct sockaddr_in6 *)&ses->server->dstaddr)->sin6_port; + else + port = cpu_to_be16(CIFS_PORT); + spin_unlock(&ses->server->srv_lock); + while (bytes_left >= (ssize_t)sizeof(*p)) { memset(&tmp_iface, 0, sizeof(tmp_iface)); /* default to 1Gbps when link speed is unset */ @@ -682,7 +692,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, memcpy(&addr4->sin_addr, &p4->IPv4Address, 4); /* [MS-SMB2] 2.2.32.5.1.1 Clients MUST ignore these */ - addr4->sin_port = cpu_to_be16(CIFS_PORT); + addr4->sin_port = port; cifs_dbg(FYI, "%s: ipv4 %pI4\n", __func__, &addr4->sin_addr); @@ -696,7 +706,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, /* [MS-SMB2] 2.2.32.5.1.2 Clients MUST ignore these */ addr6->sin6_flowinfo = 0; addr6->sin6_scope_id = 0; - addr6->sin6_port = cpu_to_be16(CIFS_PORT); + addr6->sin6_port = port; cifs_dbg(FYI, "%s: ipv6 %pI6\n", __func__, &addr6->sin6_addr); @@ -3352,7 +3362,7 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb, struct cifsFileInfo *open_file = NULL; if (inode && !(info & SACL_SECINFO)) - open_file = find_readable_file(CIFS_I(inode), true); + open_file = find_readable_file(CIFS_I(inode), FIND_FSUID_ONLY); if (!open_file || (info & SACL_SECINFO)) return get_smb2_acl_by_path(cifs_sb, path, pacllen, info); @@ -3898,7 +3908,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs * some servers (Windows2016) will not reflect recent writes in * QUERY_ALLOCATED_RANGES until SMB2_flush is called. */ - wrcfile = find_writable_file(cifsi, FIND_WR_ANY); + wrcfile = find_writable_file(cifsi, FIND_ANY); if (wrcfile) { filemap_write_and_wait(inode->i_mapping); smb2_flush_file(xid, tcon, &wrcfile->fid); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c43ca74e8704c..5188218c25be4 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -5307,7 +5307,10 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, memset(&rqst, 0, sizeof(struct smb_rqst)); rqst.rq_iov = iov; - rqst.rq_nvec = n_vec + 1; + /* iov[0] is the SMB header; move payload to rq_iter for encryption safety */ + rqst.rq_nvec = 1; + iov_iter_kvec(&rqst.rq_iter, ITER_SOURCE, &iov[1], n_vec, + io_parms->length); if (retries) { /* Back-off before retry */ diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 5fe8c667c6b1d..af5f403043317 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c @@ -589,12 +589,8 @@ static int generate_smb3signingkey(struct ksmbd_session *sess, if (!(conn->dialect >= SMB30_PROT_ID && signing->binding)) memcpy(chann->smb3signingkey, key, SMB3_SIGN_KEY_SIZE); - ksmbd_debug(AUTH, "dumping generated AES signing keys\n"); + ksmbd_debug(AUTH, "generated SMB3 signing key\n"); ksmbd_debug(AUTH, "Session Id %llu\n", sess->id); - ksmbd_debug(AUTH, "Session Key %*ph\n", - SMB2_NTLMV2_SESSKEY_SIZE, sess->sess_key); - ksmbd_debug(AUTH, "Signing Key %*ph\n", - SMB3_SIGN_KEY_SIZE, key); return 0; } @@ -652,23 +648,9 @@ static void generate_smb3encryptionkey(struct ksmbd_conn *conn, ptwin->decryption.context, sess->smb3decryptionkey, SMB3_ENC_DEC_KEY_SIZE); - ksmbd_debug(AUTH, "dumping generated AES encryption keys\n"); + ksmbd_debug(AUTH, "generated SMB3 encryption/decryption keys\n"); ksmbd_debug(AUTH, "Cipher type %d\n", conn->cipher_type); ksmbd_debug(AUTH, "Session Id %llu\n", sess->id); - ksmbd_debug(AUTH, "Session Key %*ph\n", - SMB2_NTLMV2_SESSKEY_SIZE, sess->sess_key); - if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM || - conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) { - ksmbd_debug(AUTH, "ServerIn Key %*ph\n", - SMB3_GCM256_CRYPTKEY_SIZE, sess->smb3encryptionkey); - ksmbd_debug(AUTH, "ServerOut Key %*ph\n", - SMB3_GCM256_CRYPTKEY_SIZE, sess->smb3decryptionkey); - } else { - ksmbd_debug(AUTH, "ServerIn Key %*ph\n", - SMB3_GCM128_CRYPTKEY_SIZE, sess->smb3encryptionkey); - ksmbd_debug(AUTH, "ServerOut Key %*ph\n", - SMB3_GCM128_CRYPTKEY_SIZE, sess->smb3decryptionkey); - } } void ksmbd_gen_smb30_encryptionkey(struct ksmbd_conn *conn, diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 09d9878db9cbf..393a4ae47cc1d 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -120,7 +120,7 @@ static void free_lease(struct oplock_info *opinfo) kfree(lease); } -static void free_opinfo(struct oplock_info *opinfo) +static void __free_opinfo(struct oplock_info *opinfo) { if (opinfo->is_lease) free_lease(opinfo); @@ -129,6 +129,18 @@ static void free_opinfo(struct oplock_info *opinfo) kfree(opinfo); } +static void free_opinfo_rcu(struct rcu_head *rcu) +{ + struct oplock_info *opinfo = container_of(rcu, struct oplock_info, rcu); + + __free_opinfo(opinfo); +} + +static void free_opinfo(struct oplock_info *opinfo) +{ + call_rcu(&opinfo->rcu, free_opinfo_rcu); +} + struct oplock_info *opinfo_get(struct ksmbd_file *fp) { struct oplock_info *opinfo; @@ -176,9 +188,9 @@ void opinfo_put(struct oplock_info *opinfo) free_opinfo(opinfo); } -static void opinfo_add(struct oplock_info *opinfo) +static void opinfo_add(struct oplock_info *opinfo, struct ksmbd_file *fp) { - struct ksmbd_inode *ci = opinfo->o_fp->f_ci; + struct ksmbd_inode *ci = fp->f_ci; down_write(&ci->m_lock); list_add(&opinfo->op_entry, &ci->m_op_list); @@ -1123,10 +1135,12 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) rcu_read_lock(); opinfo = rcu_dereference(fp->f_opinfo); - rcu_read_unlock(); - if (!opinfo || !opinfo->is_lease || opinfo->o_lease->version != 2) + if (!opinfo || !opinfo->is_lease || opinfo->o_lease->version != 2) { + rcu_read_unlock(); return; + } + rcu_read_unlock(); p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent); if (!p_ci) @@ -1277,20 +1291,21 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, set_oplock_level(opinfo, req_op_level, lctx); out: - rcu_assign_pointer(fp->f_opinfo, opinfo); - opinfo->o_fp = fp; - opinfo_count_inc(fp); - opinfo_add(opinfo); + opinfo_add(opinfo, fp); + if (opinfo->is_lease) { err = add_lease_global_list(opinfo); if (err) goto err_out; } + rcu_assign_pointer(fp->f_opinfo, opinfo); + opinfo->o_fp = fp; + return 0; err_out: - free_opinfo(opinfo); + __free_opinfo(opinfo); return err; } diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 9a56eaadd0dd8..921e3199e4df4 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -69,8 +69,9 @@ struct oplock_info { struct lease *o_lease; struct list_head op_entry; struct list_head lease_entry; - wait_queue_head_t oplock_q; /* Other server threads */ - wait_queue_head_t oplock_brk; /* oplock breaking wait */ + wait_queue_head_t oplock_q; /* Other server threads */ + wait_queue_head_t oplock_brk; /* oplock breaking wait */ + struct rcu_head rcu; }; struct lease_break_info { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 743c629fe7ec6..9f7ff7491e9a8 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3012,13 +3012,14 @@ int smb2_open(struct ksmbd_work *work) goto err_out2; } + fp = dh_info.fp; + if (ksmbd_override_fsids(work)) { rc = -ENOMEM; ksmbd_put_durable_fd(dh_info.fp); goto err_out2; } - fp = dh_info.fp; file_info = FILE_OPENED; rc = ksmbd_vfs_getattr(&fp->filp->f_path, &stat); @@ -3616,10 +3617,8 @@ int smb2_open(struct ksmbd_work *work) reconnected_fp: rsp->StructureSize = cpu_to_le16(89); - rcu_read_lock(); - opinfo = rcu_dereference(fp->f_opinfo); + opinfo = opinfo_get(fp); rsp->OplockLevel = opinfo != NULL ? opinfo->level : 0; - rcu_read_unlock(); rsp->Flags = 0; rsp->CreateAction = cpu_to_le32(file_info); rsp->CreationTime = cpu_to_le64(fp->create_time); @@ -3660,6 +3659,7 @@ int smb2_open(struct ksmbd_work *work) next_ptr = &lease_ccontext->Next; next_off = conn->vals->create_lease_size; } + opinfo_put(opinfo); if (maximal_access_ctxt) { struct create_context *mxac_ccontext; diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index ff4ea412d9003..168f2dd7e200b 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -87,11 +87,7 @@ static int proc_show_files(struct seq_file *m, void *v) rcu_read_lock(); opinfo = rcu_dereference(fp->f_opinfo); - rcu_read_unlock(); - - if (!opinfo) { - seq_printf(m, " %-15s", " "); - } else { + if (opinfo) { const struct ksmbd_const_name *const_names; int count; unsigned int level; @@ -105,8 +101,12 @@ static int proc_show_files(struct seq_file *m, void *v) count = ARRAY_SIZE(ksmbd_oplock_const_names); level = opinfo->level; } + rcu_read_unlock(); ksmbd_proc_show_const_name(m, " %-15s", const_names, count, level); + } else { + rcu_read_unlock(); + seq_printf(m, " %-15s", " "); } seq_printf(m, " %#010x %#010x %s\n", diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 766631f0562e1..09d4c17b3e7ba 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2716,12 +2716,8 @@ xfs_dabuf_map( * larger one that needs to be free by the caller. */ if (nirecs > 1) { - map = kzalloc(nirecs * sizeof(struct xfs_buf_map), - GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); - if (!map) { - error = -ENOMEM; - goto out_free_irecs; - } + map = kcalloc(nirecs, sizeof(struct xfs_buf_map), + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); *mapp = map; } diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 472c261163ed4..c6909716b0415 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -809,7 +809,7 @@ xfs_defer_can_append( /* Paused items cannot absorb more work */ if (dfp->dfp_flags & XFS_DEFER_PAUSED) - return NULL; + return false; /* Already full? */ if (ops->max_items && dfp->dfp_count >= ops->max_items) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index e8775f254c89c..b237a25d6045d 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -245,7 +245,7 @@ xfs_bmap_update_diff_items( struct xfs_bmap_intent *ba = bi_entry(a); struct xfs_bmap_intent *bb = bi_entry(b); - return ba->bi_owner->i_ino - bb->bi_owner->i_ino; + return cmp_int(ba->bi_owner->i_ino, bb->bi_owner->i_ino); } /* Log bmap updates in the intent item. */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 2b208e2c52644..69e9bc588c8b6 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1439,9 +1439,15 @@ xfs_qm_dqflush( return 0; out_abort: + /* + * Shut down the log before removing the dquot item from the AIL. + * Otherwise, the log tail may advance past this item's LSN while + * log writes are still in progress, making these unflushed changes + * unrecoverable on the next mount. + */ + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); dqp->q_flags &= ~XFS_DQFLAG_DIRTY; xfs_trans_ail_delete(lip, 0); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_dqfunlock(dqp); return error; } diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index 4a06d6632f65e..26c325d34bd1a 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -141,6 +141,16 @@ xfs_healthmon_detach( hm->mount_cookie = DETACHED_MOUNT_COOKIE; spin_unlock(&xfs_healthmon_lock); + /* + * Wake up any readers that might remain. This can happen if unmount + * races with the healthmon fd owner entering ->read_iter, having + * already emptied the event queue. + * + * In the ->release case there shouldn't be any readers because the + * only users of the waiter are read and poll. + */ + wake_up_all(&hm->wait); + trace_xfs_healthmon_detach(hm); xfs_healthmon_put(hm); } @@ -1027,13 +1037,6 @@ xfs_healthmon_release( * process can create another health monitor file. */ xfs_healthmon_detach(hm); - - /* - * Wake up any readers that might be left. There shouldn't be any - * because the only users of the waiter are read and poll. - */ - wake_up_all(&hm->wait); - xfs_healthmon_put(hm); return 0; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index a7a09e7eec815..2040a9292ee65 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -159,7 +159,6 @@ xfs_inode_free_callback( ASSERT(!test_bit(XFS_LI_IN_AIL, &ip->i_itemp->ili_item.li_flags)); xfs_inode_item_destroy(ip); - ip->i_itemp = NULL; } kmem_cache_free(xfs_inode_cache, ip); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b96f262ba1391..f807f8f4f7058 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1357,6 +1357,8 @@ xlog_alloc_log( if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1) log->l_iclog_roundoff = mp->m_sb.sb_logsunit; + else if (mp->m_sb.sb_logsectsize > 0) + log->l_iclog_roundoff = mp->m_sb.sb_logsectsize; else log->l_iclog_roundoff = BBSIZE; diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 7efeecd2d85f5..309f700985243 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -96,7 +96,6 @@ struct xfs_gc_bio { */ xfs_fsblock_t old_startblock; xfs_daddr_t new_daddr; - struct xfs_zone_scratch *scratch; /* Are we writing to a sequential write required zone? */ bool is_seq; @@ -779,7 +778,6 @@ xfs_zone_gc_split_write( ihold(VFS_I(chunk->ip)); split_chunk->ip = chunk->ip; split_chunk->is_seq = chunk->is_seq; - split_chunk->scratch = chunk->scratch; split_chunk->offset = chunk->offset; split_chunk->len = split_len; split_chunk->old_startblock = chunk->old_startblock; diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index d290060f4c73d..91013161e9db9 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -68,12 +68,12 @@ * timeout value used in Stratix10 FPGA manager driver. * timeout value used in RSU driver */ -#define SVC_RECONFIG_REQUEST_TIMEOUT_MS 300 -#define SVC_RECONFIG_BUFFER_TIMEOUT_MS 720 -#define SVC_RSU_REQUEST_TIMEOUT_MS 300 +#define SVC_RECONFIG_REQUEST_TIMEOUT_MS 5000 +#define SVC_RECONFIG_BUFFER_TIMEOUT_MS 5000 +#define SVC_RSU_REQUEST_TIMEOUT_MS 2000 #define SVC_FCS_REQUEST_TIMEOUT_MS 2000 #define SVC_COMPLETED_TIMEOUT_MS 30000 -#define SVC_HWMON_REQUEST_TIMEOUT_MS 300 +#define SVC_HWMON_REQUEST_TIMEOUT_MS 2000 struct stratix10_svc_chan; diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 331dcbefe72f1..89e1a80d9f5fb 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -78,7 +78,7 @@ void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd); /* * Select a buffer from the provided buffer group for multishot uring_cmd. - * Returns the selected buffer address and size. + * Returns the selected buffer address, size, and id. */ struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, unsigned buf_group, size_t *len, @@ -91,6 +91,21 @@ struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, struct io_br_sel *sel, unsigned int issue_flags); +int io_uring_buf_ring_pin(struct io_uring_cmd *cmd, unsigned buf_group, + unsigned issue_flags, struct io_buffer_list **out_bl); +int io_uring_buf_ring_unpin(struct io_uring_cmd *cmd, unsigned buf_group, + unsigned issue_flags); + +int io_uring_kmbuf_recycle(struct io_uring_cmd *cmd, unsigned int buf_group, + u64 addr, unsigned int len, unsigned int bid, + unsigned int issue_flags); + +bool io_uring_is_kmbuf_ring(struct io_uring_cmd *cmd, unsigned int buf_group, + unsigned int issue_flags); + +struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, + struct io_buffer_list *bl, + unsigned int issue_flags); #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, @@ -133,6 +148,42 @@ static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, { return true; } +static inline int io_uring_buf_ring_pin(struct io_uring_cmd *cmd, + unsigned buf_group, + unsigned issue_flags, + struct io_buffer_list **bl) +{ + return -EOPNOTSUPP; +} +static inline int io_uring_buf_ring_unpin(struct io_uring_cmd *cmd, + unsigned buf_group, + unsigned issue_flags) +{ + return -EOPNOTSUPP; +} +static inline int io_uring_kmbuf_recycle(struct io_uring_cmd *cmd, + unsigned int buf_group, u64 addr, + unsigned int len, unsigned int bid, + unsigned int issue_flags) +{ + return -EOPNOTSUPP; +} +static inline bool io_uring_is_kmbuf_ring(struct io_uring_cmd *cmd, + unsigned int buf_group, + unsigned int issue_flags) +{ + return false; +} +static inline struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, + size_t *len, + struct io_buffer_list *bl, + unsigned int issue_flags) +{ + struct io_br_sel sel = { + .val = -EOPNOTSUPP, + }; + return sel; +} #endif static inline struct io_uring_cmd *io_uring_cmd_from_tw(struct io_tw_req tw_req) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3e4a82a6f8178..619c7314ddfba 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -8,6 +8,9 @@ #include #include +struct iou_loop_params; +struct io_uring_bpf_ops; + enum { /* * A hint to not wake right away but delay until there are enough of @@ -41,6 +44,8 @@ enum io_uring_cmd_flags { IO_URING_F_COMPAT = (1 << 12), }; +struct iou_loop_params; + struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -93,13 +98,15 @@ struct io_mapped_region { */ struct io_br_sel { struct io_buffer_list *buf_list; - /* - * Some selection parts return the user address, others return an error. - */ union { + /* for classic/ring provided buffers */ void __user *addr; - ssize_t val; + /* for kernel-managed buffers */ + void *kaddr; }; + ssize_t val; + /* id of the selected buffer */ + unsigned buf_id; }; @@ -268,24 +275,30 @@ struct io_alloc_cache { unsigned int init_clear; }; +enum { + IO_RING_F_DRAIN_NEXT = BIT(0), + IO_RING_F_OP_RESTRICTED = BIT(1), + IO_RING_F_REG_RESTRICTED = BIT(2), + IO_RING_F_OFF_TIMEOUT_USED = BIT(3), + IO_RING_F_DRAIN_ACTIVE = BIT(4), + IO_RING_F_HAS_EVFD = BIT(5), + /* all CQEs should be posted only by the submitter task */ + IO_RING_F_TASK_COMPLETE = BIT(6), + IO_RING_F_LOCKLESS_CQ = BIT(7), + IO_RING_F_SYSCALL_IOPOLL = BIT(8), + IO_RING_F_POLL_ACTIVATED = BIT(9), + IO_RING_F_DRAIN_DISABLED = BIT(10), + IO_RING_F_COMPAT = BIT(11), + IO_RING_F_IOWQ_LIMITS_SET = BIT(12), +}; + struct io_ring_ctx { /* const or read-mostly hot data */ struct { + /* ring setup flags */ unsigned int flags; - unsigned int drain_next: 1; - unsigned int op_restricted: 1; - unsigned int reg_restricted: 1; - unsigned int off_timeout_used: 1; - unsigned int drain_active: 1; - unsigned int has_evfd: 1; - /* all CQEs should be posted only by the submitter task */ - unsigned int task_complete: 1; - unsigned int lockless_cq: 1; - unsigned int syscall_iopoll: 1; - unsigned int poll_activated: 1; - unsigned int drain_disabled: 1; - unsigned int compat: 1; - unsigned int iowq_limits_set : 1; + /* internal state flags IO_RING_F_* flags , mostly read-only */ + unsigned int int_flags; struct task_struct *submitter_task; struct io_rings *rings; @@ -355,6 +368,9 @@ struct io_ring_ctx { struct io_alloc_cache rw_cache; struct io_alloc_cache cmd_cache; + int (*loop_step)(struct io_ring_ctx *ctx, + struct iou_loop_params *); + /* * Any cancelable uring_cmd is added to this list in * ->uring_cmd() by io_uring_cmd_insert_cancelable() @@ -388,6 +404,7 @@ struct io_ring_ctx { * regularly bounce b/w CPUs. */ struct { + struct io_rings __rcu *rings_rcu; struct llist_head work_llist; struct llist_head retry_llist; unsigned long check_cq; @@ -476,6 +493,8 @@ struct io_ring_ctx { DECLARE_HASHTABLE(napi_ht, 4); #endif + struct io_uring_bpf_ops *bpf_ops; + /* * Protection for resize vs mmap races - both the mmap and resize * side will need to grab this lock, to prevent either side from @@ -540,9 +559,11 @@ enum { REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BUFFERS_COMMIT_BIT, REQ_F_BUF_NODE_BIT, + REQ_F_BUF_MORE_BIT, REQ_F_HAS_METADATA_BIT, REQ_F_IMPORT_BUFFER_BIT, REQ_F_SQE_COPIED_BIT, + REQ_F_IOPOLL_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -625,6 +646,8 @@ enum { REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), /* buf node is valid */ REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT), + /* incremental buffer consumption, more space available */ + REQ_F_BUF_MORE = IO_REQ_FLAG(REQ_F_BUF_MORE_BIT), /* request has read/write metadata assigned */ REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT), /* @@ -634,6 +657,8 @@ enum { REQ_F_IMPORT_BUFFER = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT), /* ->sqe_copy() has been called, if necessary */ REQ_F_SQE_COPIED = IO_REQ_FLAG(REQ_F_SQE_COPIED_BIT), + /* request must be iopolled to completion (set in ->issue()) */ + REQ_F_IOPOLL = IO_REQ_FLAG(REQ_F_IOPOLL_BIT), }; struct io_tw_req { diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 443053a76dcfd..a7421382a9162 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,7 +333,12 @@ struct tcp6_timewait_sock { }; #if IS_ENABLED(CONFIG_IPV6) -bool ipv6_mod_enabled(void); +extern int disable_ipv6_mod; + +static inline bool ipv6_mod_enabled(void) +{ + return disable_ipv6_mod == 0; +} static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 34759a262b289..6b76e7a6f4c22 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1940,56 +1940,43 @@ enum kvm_stat_kind { struct kvm_stat_data { struct kvm *kvm; - const struct _kvm_stats_desc *desc; + const struct kvm_stats_desc *desc; enum kvm_stat_kind kind; }; -struct _kvm_stats_desc { - struct kvm_stats_desc desc; - char name[KVM_STATS_NAME_SIZE]; -}; - -#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ - .flags = type | unit | base | \ - BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ - BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ - BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ - .exponent = exp, \ - .size = sz, \ +#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ + .flags = type | unit | base | \ + BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ + BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ + BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ + .exponent = exp, \ + .size = sz, \ .bucket_size = bsz -#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vm_stat, generic.stat) \ - }, \ - .name = #stat, \ - } -#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vcpu_stat, generic.stat) \ - }, \ - .name = #stat, \ - } -#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vm_stat, stat) \ - }, \ - .name = #stat, \ - } -#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vcpu_stat, stat) \ - }, \ - .name = #stat, \ - } +#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vm_stat, generic.stat), \ + .name = #stat, \ +} +#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vcpu_stat, generic.stat), \ + .name = #stat, \ +} +#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vm_stat, stat), \ + .name = #stat, \ +} +#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vcpu_stat, stat), \ + .name = #stat, \ +} /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */ #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz) @@ -2066,7 +2053,7 @@ struct _kvm_stats_desc { STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, - const struct _kvm_stats_desc *desc, + const struct kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset); @@ -2111,9 +2098,9 @@ static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value) extern const struct kvm_stats_header kvm_vm_stats_header; -extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; +extern const struct kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; -extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; +extern const struct kvm_stats_desc kvm_vcpu_stats_desc[]; static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 5be3d8a8f806d..abb4963c1f064 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3514,26 +3514,21 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ -static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc) -{ - return compound_nr(ptdesc_page(ptdesc)); -} - static inline void __pagetable_ctor(struct ptdesc *ptdesc) { - pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); + struct folio *folio = ptdesc_folio(ptdesc); - __SetPageTable(ptdesc_page(ptdesc)); - mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc)); + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); } static inline void pagetable_dtor(struct ptdesc *ptdesc) { - pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); + struct folio *folio = ptdesc_folio(ptdesc); ptlock_free(ptdesc); - __ClearPageTable(ptdesc_page(ptdesc)); - mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc)); + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); } static inline void pagetable_dtor_free(struct ptdesc *ptdesc) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 07a2bbaf86e90..8450e18a87c26 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -234,7 +234,7 @@ struct mmu_notifier { }; /** - * struct mmu_interval_notifier_ops + * struct mmu_interval_notifier_ops - callback for range notification * @invalidate: Upon return the caller must stop using any SPTEs within this * range. This function can sleep. Return false only if sleeping * was required but mmu_notifier_range_blockable(range) is false. @@ -309,8 +309,8 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub); /** * mmu_interval_set_seq - Save the invalidation sequence - * @interval_sub - The subscription passed to invalidate - * @cur_seq - The cur_seq passed to the invalidate() callback + * @interval_sub: The subscription passed to invalidate + * @cur_seq: The cur_seq passed to the invalidate() callback * * This must be called unconditionally from the invalidate callback of a * struct mmu_interval_notifier_ops under the same lock that is used to call @@ -329,8 +329,8 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub, /** * mmu_interval_read_retry - End a read side critical section against a VA range - * interval_sub: The subscription - * seq: The return of the paired mmu_interval_read_begin() + * @interval_sub: The subscription + * @seq: The return of the paired mmu_interval_read_begin() * * This MUST be called under a user provided lock that is also held * unconditionally by op->invalidate() when it calls mmu_interval_set_seq(). @@ -338,7 +338,7 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub, * Each call should be paired with a single mmu_interval_read_begin() and * should be used to conclude the read side. * - * Returns true if an invalidation collided with this critical section, and + * Returns: true if an invalidation collided with this critical section, and * the caller should retry. */ static inline bool @@ -350,20 +350,21 @@ mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub, /** * mmu_interval_check_retry - Test if a collision has occurred - * interval_sub: The subscription - * seq: The return of the matching mmu_interval_read_begin() + * @interval_sub: The subscription + * @seq: The return of the matching mmu_interval_read_begin() * * This can be used in the critical section between mmu_interval_read_begin() - * and mmu_interval_read_retry(). A return of true indicates an invalidation - * has collided with this critical region and a future - * mmu_interval_read_retry() will return true. - * - * False is not reliable and only suggests a collision may not have - * occurred. It can be called many times and does not have to hold the user - * provided lock. + * and mmu_interval_read_retry(). * * This call can be used as part of loops and other expensive operations to * expedite a retry. + * It can be called many times and does not have to hold the user + * provided lock. + * + * Returns: true indicates an invalidation has collided with this critical + * region and a future mmu_interval_read_retry() will return true. + * False is not reliable and only suggests a collision may not have + * occurred. */ static inline bool mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67e25f6d15a47..ae269a2e7f4dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3576,17 +3576,49 @@ struct page_pool_bh { }; DECLARE_PER_CPU(struct page_pool_bh, system_page_pool); +#define XMIT_RECURSION_LIMIT 8 + #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) { return this_cpu_read(softnet_data.xmit.recursion); } + +static inline bool dev_xmit_recursion(void) +{ + return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > + XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + __this_cpu_inc(softnet_data.xmit.recursion); +} + +static inline void dev_xmit_recursion_dec(void) +{ + __this_cpu_dec(softnet_data.xmit.recursion); +} #else static inline int dev_recursion_level(void) { return current->net_xmit.recursion; } +static inline bool dev_xmit_recursion(void) +{ + return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + current->net_xmit.recursion++; +} + +static inline void dev_xmit_recursion_dec(void) +{ + current->net_xmit.recursion--; +} #endif void __netif_schedule(struct Qdisc *q); diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index 184a1f9510fad..682f810463459 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -12,7 +12,7 @@ struct nvme_dhchap_key { size_t len; u8 hash; - u8 key[]; + u8 key[] __counted_by(len); }; u32 nvme_auth_get_seqnum(void); diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index da5fa6f402947..0b42045988db0 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -133,10 +133,12 @@ struct rseq_data { }; * @active: MM CID is active for the task * @cid: The CID associated to the task either permanently or * borrowed from the CPU + * @node: Queued in the per MM MMCID list */ struct sched_mm_cid { unsigned int active; unsigned int cid; + struct hlist_node node; }; /** @@ -157,6 +159,7 @@ struct mm_cid_pcpu { * @work: Regular work to handle the affinity mode change case * @lock: Spinlock to protect against affinity setting which can't take @mutex * @mutex: Mutex to serialize forks and exits related to this mm + * @user_list: List of the MM CID users of a MM * @nr_cpus_allowed: The number of CPUs in the per MM allowed CPUs map. The map * is growth only. * @users: The number of tasks sharing this MM. Separate from mm::mm_users @@ -177,13 +180,14 @@ struct mm_mm_cid { raw_spinlock_t lock; struct mutex mutex; + struct hlist_head user_list; /* Low frequency modified */ unsigned int nr_cpus_allowed; unsigned int users; unsigned int pcpu_thrs; unsigned int update_deferred; -}____cacheline_aligned_in_smp; +} ____cacheline_aligned; #else /* CONFIG_SCHED_MM_CID */ struct mm_mm_cid { }; struct sched_mm_cid { }; diff --git a/include/linux/sched.h b/include/linux/sched.h index a7b4a980eb2f0..5a5d3dbc9cdf3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2354,7 +2354,6 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo #ifdef CONFIG_SCHED_MM_CID void sched_mm_cid_before_execve(struct task_struct *t); void sched_mm_cid_after_execve(struct task_struct *t); -void sched_mm_cid_fork(struct task_struct *t); void sched_mm_cid_exit(struct task_struct *t); static __always_inline int task_mm_cid(struct task_struct *t) { @@ -2363,7 +2362,6 @@ static __always_inline int task_mm_cid(struct task_struct *t) #else static inline void sched_mm_cid_before_execve(struct task_struct *t) { } static inline void sched_mm_cid_after_execve(struct task_struct *t) { } -static inline void sched_mm_cid_fork(struct task_struct *t) { } static inline void sched_mm_cid_exit(struct task_struct *t) { } static __always_inline int task_mm_cid(struct task_struct *t) { diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 809e4f7dfdbd4..4fe63169d5a21 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -792,7 +792,7 @@ for (bool done = false; !done; done = true) \ /** * scoped_user_rw_access_size - Start a scoped user read/write access with given size - * @uptr Pointer to the user space address to read from and write to + * @uptr: Pointer to the user space address to read from and write to * @size: Size of the access starting from @uptr * @elbl: Error label to goto when the access region is rejected * @@ -803,7 +803,7 @@ for (bool done = false; !done; done = true) \ /** * scoped_user_rw_access - Start a scoped user read/write access - * @uptr Pointer to the user space address to read from and write to + * @uptr: Pointer to the user space address to read from and write to * @elbl: Error label to goto when the access region is rejected * * The size of the access starting from @uptr is determined via sizeof(*@uptr)). diff --git a/include/linux/usb.h b/include/linux/usb.h index fbfcc70b07fbe..04277af4bb9d5 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1862,14 +1862,18 @@ void usb_free_noncoherent(struct usb_device *dev, size_t size, * SYNCHRONOUS CALL SUPPORT * *-------------------------------------------------------------------*/ +/* Maximum value allowed for timeout in synchronous routines below */ +#define USB_MAX_SYNCHRONOUS_TIMEOUT 60000 /* ms */ + extern int usb_control_msg(struct usb_device *dev, unsigned int pipe, __u8 request, __u8 requesttype, __u16 value, __u16 index, void *data, __u16 size, int timeout); extern int usb_interrupt_msg(struct usb_device *usb_dev, unsigned int pipe, void *data, int len, int *actual_length, int timeout); extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe, - void *data, int len, int *actual_length, - int timeout); + void *data, int len, int *actual_length, int timeout); +extern int usb_bulk_msg_killable(struct usb_device *usb_dev, unsigned int pipe, + void *data, int len, int *actual_length, int timeout); /* wrappers around usb_control_msg() for the most common standard requests */ int usb_control_msg_send(struct usb_device *dev, __u8 endpoint, __u8 request, diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 2f7bd2fdc6164..b3cc7beab4a3c 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -78,4 +78,7 @@ /* skip BOS descriptor request */ #define USB_QUIRK_NO_BOS BIT(17) +/* Device claims zero configurations, forcing to 1 */ +#define USB_QUIRK_FORCE_ONE_CONFIG BIT(18) + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index b0e84896e6aca..bbf799ccf3b30 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -132,6 +132,7 @@ struct driver_info { #define FLAG_MULTI_PACKET 0x2000 #define FLAG_RX_ASSEMBLE 0x4000 /* rx packets may span >1 frames */ #define FLAG_NOARP 0x8000 /* device can't do ARP */ +#define FLAG_NOMAXMTU 0x10000 /* allow max_mtu above hard_mtu */ /* init device ... can sleep, or cause probe() failure */ int (*bind)(struct usbnet *, struct usb_interface *); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 120db28658112..359b595f1df93 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -156,6 +156,18 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) { + if (dev) { + net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); + DEV_STATS_INC(dev, tx_errors); + } + kfree_skb(skb); + return; + } + + dev_xmit_recursion_inc(); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->flags = ip6cb_flags; pkt_len = skb->len - skb_inner_network_offset(skb); @@ -166,6 +178,8 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, pkt_len = -1; iptunnel_xmit_stats(dev, pkt_len); } + + dev_xmit_recursion_dec(); } #endif #endif diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 4021e6a73e32b..80662f8120803 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -27,6 +27,13 @@ #include #endif +/* Recursion limit for tunnel xmit to detect routing loops. + * Unlike XMIT_RECURSION_LIMIT (8) used in the no-qdisc path, tunnel + * recursion involves route lookups and full IP output, consuming much + * more stack per level, so a lower limit is needed. + */ +#define IP_TUNNEL_RECURSION_LIMIT 4 + /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 0d453484a585f..cdd95477af7a2 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -247,7 +247,7 @@ struct page_pool { /* User-facing fields, protected by page_pools_lock */ struct { struct hlist_node list; - u64 detach_time; + ktime_t detach_time; u32 id; } user; }; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1ff16141c8a5b..c10c4827f6b00 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -10,6 +10,8 @@ #include #include +#include + /* * this file is shared with liburing and that has to autodetect * if linux/time_types.h is available or not, it can @@ -341,6 +343,10 @@ enum io_uring_op { /* * sqe->timeout_flags + * + * IORING_TIMEOUT_IMMEDIATE_ARG: If set, sqe->addr stores the timeout + * value in nanoseconds instead of + * pointing to a timespec. */ #define IORING_TIMEOUT_ABS (1U << 0) #define IORING_TIMEOUT_UPDATE (1U << 1) @@ -349,6 +355,7 @@ enum io_uring_op { #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_MULTISHOT (1U << 6) +#define IORING_TIMEOUT_IMMEDIATE_ARG (1U << 7) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* @@ -886,15 +893,29 @@ struct io_uring_buf_ring { * use of it will consume only as much as it needs. This * requires that both the kernel and application keep * track of where the current read/recv index is at. + * IOU_PBUF_RING_KERNEL_MANAGED: If set, kernel allocates and manages the memory + * for the ring and its buffers. The application must set + * the buffer size through reg->buf_size and the size must + * be page-aligned. When the application subsequently calls + * mmap(2) with + * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT), + * the virtual mapping returned is a contiguous mapping of + * the buffers. If set, IOU_PBUF_RING_MMAP must be set as + * well. */ enum io_uring_register_pbuf_ring_flags { IOU_PBUF_RING_MMAP = 1, IOU_PBUF_RING_INC = 2, + IOU_PBUF_RING_KERNEL_MANAGED = 4, }; /* argument for IORING_(UN)REGISTER_PBUF_RING */ struct io_uring_buf_reg { - __u64 ring_addr; + union { + __u64 ring_addr; + /* used if reg->flags & IOU_PBUF_RING_KERNEL_MANAGED */ + __u32 buf_size; + }; __u32 ring_entries; __u16 bgid; __u16 flags; @@ -1050,100 +1071,6 @@ struct io_timespec { __u64 tv_nsec; }; -/* Zero copy receive refill queue entry */ -struct io_uring_zcrx_rqe { - __u64 off; - __u32 len; - __u32 __pad; -}; - -struct io_uring_zcrx_cqe { - __u64 off; - __u64 __pad; -}; - -/* The bit from which area id is encoded into offsets */ -#define IORING_ZCRX_AREA_SHIFT 48 -#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1)) - -struct io_uring_zcrx_offsets { - __u32 head; - __u32 tail; - __u32 rqes; - __u32 __resv2; - __u64 __resv[2]; -}; - -enum io_uring_zcrx_area_flags { - IORING_ZCRX_AREA_DMABUF = 1, -}; - -struct io_uring_zcrx_area_reg { - __u64 addr; - __u64 len; - __u64 rq_area_token; - __u32 flags; - __u32 dmabuf_fd; - __u64 __resv2[2]; -}; - -enum zcrx_reg_flags { - ZCRX_REG_IMPORT = 1, -}; - -enum zcrx_features { - /* - * The user can ask for the desired rx page size by passing the - * value in struct io_uring_zcrx_ifq_reg::rx_buf_len. - */ - ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0, -}; - -/* - * Argument for IORING_REGISTER_ZCRX_IFQ - */ -struct io_uring_zcrx_ifq_reg { - __u32 if_idx; - __u32 if_rxq; - __u32 rq_entries; - __u32 flags; - - __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */ - __u64 region_ptr; /* struct io_uring_region_desc * */ - - struct io_uring_zcrx_offsets offsets; - __u32 zcrx_id; - __u32 rx_buf_len; - __u64 __resv[3]; -}; - -enum zcrx_ctrl_op { - ZCRX_CTRL_FLUSH_RQ, - ZCRX_CTRL_EXPORT, - - __ZCRX_CTRL_LAST, -}; - -struct zcrx_ctrl_flush_rq { - __u64 __resv[6]; -}; - -struct zcrx_ctrl_export { - __u32 zcrx_fd; - __u32 __resv1[11]; -}; - -struct zcrx_ctrl { - __u32 zcrx_id; - __u32 op; /* see enum zcrx_ctrl_op */ - __u64 __resv[2]; - - union { - struct zcrx_ctrl_export zc_export; - struct zcrx_ctrl_flush_rq zc_flush; - }; -}; - #ifdef __cplusplus } #endif diff --git a/include/uapi/linux/io_uring/zcrx.h b/include/uapi/linux/io_uring/zcrx.h new file mode 100644 index 0000000000000..5ce02c7a60969 --- /dev/null +++ b/include/uapi/linux/io_uring/zcrx.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Header file for the io_uring zerocopy receive (zcrx) interface. + * + * Copyright (C) 2026 Pavel Begunkov + * Copyright (C) 2026 David Wei + * Copyright (C) Meta Platforms, Inc. + */ +#ifndef LINUX_IO_ZCRX_H +#define LINUX_IO_ZCRX_H + +#include + +/* Zero copy receive refill queue entry */ +struct io_uring_zcrx_rqe { + __u64 off; + __u32 len; + __u32 __pad; +}; + +struct io_uring_zcrx_cqe { + __u64 off; + __u64 __pad; +}; + +/* The bit from which area id is encoded into offsets */ +#define IORING_ZCRX_AREA_SHIFT 48 +#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1)) + +struct io_uring_zcrx_offsets { + __u32 head; + __u32 tail; + __u32 rqes; + __u32 __resv2; + __u64 __resv[2]; +}; + +enum io_uring_zcrx_area_flags { + IORING_ZCRX_AREA_DMABUF = 1, +}; + +struct io_uring_zcrx_area_reg { + __u64 addr; + __u64 len; + __u64 rq_area_token; + __u32 flags; + __u32 dmabuf_fd; + __u64 __resv2[2]; +}; + +enum zcrx_reg_flags { + ZCRX_REG_IMPORT = 1, + + /* + * Register a zcrx instance without a net device. All data will be + * copied. The refill queue entries might not be automatically + * consumed and need to be flushed, see ZCRX_CTRL_FLUSH_RQ. + */ + ZCRX_REG_NODEV = 2, +}; + +enum zcrx_features { + /* + * The user can ask for the desired rx page size by passing the + * value in struct io_uring_zcrx_ifq_reg::rx_buf_len. + */ + ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0, +}; + +/* + * Argument for IORING_REGISTER_ZCRX_IFQ + */ +struct io_uring_zcrx_ifq_reg { + __u32 if_idx; + __u32 if_rxq; + __u32 rq_entries; + __u32 flags; + + __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */ + __u64 region_ptr; /* struct io_uring_region_desc * */ + + struct io_uring_zcrx_offsets offsets; + __u32 zcrx_id; + __u32 rx_buf_len; + __u64 __resv[3]; +}; + +enum zcrx_ctrl_op { + ZCRX_CTRL_FLUSH_RQ, + ZCRX_CTRL_EXPORT, + + __ZCRX_CTRL_LAST, +}; + +struct zcrx_ctrl_flush_rq { + __u64 __resv[6]; +}; + +struct zcrx_ctrl_export { + __u32 zcrx_fd; + __u32 __resv1[11]; +}; + +struct zcrx_ctrl { + __u32 zcrx_id; + __u32 op; /* see enum zcrx_ctrl_op */ + __u64 __resv[2]; + + union { + struct zcrx_ctrl_export zc_export; + struct zcrx_ctrl_flush_rq zc_flush; + }; +}; + +#endif /* LINUX_IO_ZCRX_H */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 65500f5db3799..80364d4dbebb0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -14,6 +14,10 @@ #include #include +#ifdef __KERNEL__ +#include +#endif + #define KVM_API_VERSION 12 /* @@ -1601,7 +1605,11 @@ struct kvm_stats_desc { __u16 size; __u32 offset; __u32 bucket_size; +#ifdef __KERNEL__ + char name[KVM_STATS_NAME_SIZE]; +#else char name[]; +#endif }; #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) diff --git a/io_uring/Kconfig b/io_uring/Kconfig index a7ae23cf10357..a283d9e53787f 100644 --- a/io_uring/Kconfig +++ b/io_uring/Kconfig @@ -14,3 +14,8 @@ config IO_URING_BPF def_bool y depends on BPF depends on NET + +config IO_URING_BPF_OPS + def_bool y + depends on IO_URING + depends on BPF_SYSCALL && BPF_JIT && DEBUG_INFO_BTF diff --git a/io_uring/Makefile b/io_uring/Makefile index 931f9156132a0..c54e328d14104 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \ advise.o openclose.o statx.o timeout.o \ cancel.o waitid.o register.o \ truncate.o memmap.o alloc_cache.o \ - query.o + query.o loop.o obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o obj-$(CONFIG_IO_WQ) += io-wq.o @@ -25,3 +25,4 @@ obj-$(CONFIG_NET) += net.o cmd_net.o obj-$(CONFIG_PROC_FS) += fdinfo.o obj-$(CONFIG_IO_URING_MOCK_FILE) += mock_file.o obj-$(CONFIG_IO_URING_BPF) += bpf_filter.o +obj-$(CONFIG_IO_URING_BPF_OPS) += bpf-ops.o diff --git a/io_uring/bpf-ops.c b/io_uring/bpf-ops.c new file mode 100644 index 0000000000000..e4b244337aa98 --- /dev/null +++ b/io_uring/bpf-ops.c @@ -0,0 +1,270 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include + +#include "io_uring.h" +#include "register.h" +#include "loop.h" +#include "memmap.h" +#include "bpf-ops.h" + +static DEFINE_MUTEX(io_bpf_ctrl_mutex); +static const struct btf_type *loop_params_type; + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_io_uring_submit_sqes(struct io_ring_ctx *ctx, u32 nr) +{ + return io_submit_sqes(ctx, nr); +} + +__bpf_kfunc +__u8 *bpf_io_uring_get_region(struct io_ring_ctx *ctx, __u32 region_id, + const size_t rdwr_buf_size) +{ + struct io_mapped_region *r; + + lockdep_assert_held(&ctx->uring_lock); + + switch (region_id) { + case IOU_REGION_MEM: + r = &ctx->param_region; + break; + case IOU_REGION_CQ: + r = &ctx->ring_region; + break; + case IOU_REGION_SQ: + r = &ctx->sq_region; + break; + default: + return NULL; + } + + if (unlikely(rdwr_buf_size > io_region_size(r))) + return NULL; + return io_region_get_ptr(r); +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(io_uring_kfunc_set) +BTF_ID_FLAGS(func, bpf_io_uring_submit_sqes, KF_SLEEPABLE); +BTF_ID_FLAGS(func, bpf_io_uring_get_region, KF_RET_NULL); +BTF_KFUNCS_END(io_uring_kfunc_set) + +static const struct btf_kfunc_id_set bpf_io_uring_kfunc_set = { + .owner = THIS_MODULE, + .set = &io_uring_kfunc_set, +}; + +static int io_bpf_ops__loop_step(struct io_ring_ctx *ctx, + struct iou_loop_params *lp) +{ + return IOU_LOOP_STOP; +} + +static struct io_uring_bpf_ops io_bpf_ops_stubs = { + .loop_step = io_bpf_ops__loop_step, +}; + +static bool bpf_io_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (type != BPF_READ) + return false; + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) + return false; + if (off % size != 0) + return false; + + return btf_ctx_access(off, size, type, prog, info); +} + +static int bpf_io_btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, int off, + int size) +{ + const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); + + if (t == loop_params_type) { + if (off + size <= offsetofend(struct iou_loop_params, cq_wait_idx)) + return SCALAR_VALUE; + } + + return -EACCES; +} + +static const struct bpf_verifier_ops bpf_io_verifier_ops = { + .get_func_proto = bpf_base_func_proto, + .is_valid_access = bpf_io_is_valid_access, + .btf_struct_access = bpf_io_btf_struct_access, +}; + +static const struct btf_type * +io_lookup_struct_type(struct btf *btf, const char *name) +{ + s32 type_id; + + type_id = btf_find_by_name_kind(btf, name, BTF_KIND_STRUCT); + if (type_id < 0) + return NULL; + return btf_type_by_id(btf, type_id); +} + +static int bpf_io_init(struct btf *btf) +{ + int ret; + + loop_params_type = io_lookup_struct_type(btf, "iou_loop_params"); + if (!loop_params_type) { + pr_err("io_uring: Failed to locate iou_loop_params\n"); + return -EINVAL; + } + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, + &bpf_io_uring_kfunc_set); + if (ret) { + pr_err("io_uring: Failed to register kfuncs (%d)\n", ret); + return ret; + } + return 0; +} + +static int bpf_io_check_member(const struct btf_type *t, + const struct btf_member *member, + const struct bpf_prog *prog) +{ + return 0; +} + +static int bpf_io_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + u32 moff = __btf_member_bit_offset(t, member) / 8; + const struct io_uring_bpf_ops *uops = udata; + struct io_uring_bpf_ops *ops = kdata; + + switch (moff) { + case offsetof(struct io_uring_bpf_ops, ring_fd): + ops->ring_fd = uops->ring_fd; + return 1; + } + return 0; +} + +static int io_install_bpf(struct io_ring_ctx *ctx, struct io_uring_bpf_ops *ops) +{ + if (ctx->flags & (IORING_SETUP_SQPOLL | IORING_SETUP_IOPOLL)) + return -EOPNOTSUPP; + if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) + return -EOPNOTSUPP; + + if (ctx->bpf_ops) + return -EBUSY; + if (WARN_ON_ONCE(!ops->loop_step)) + return -EINVAL; + + ops->priv = ctx; + ctx->bpf_ops = ops; + ctx->loop_step = ops->loop_step; + return 0; +} + +static int bpf_io_reg(void *kdata, struct bpf_link *link) +{ + struct io_uring_bpf_ops *ops = kdata; + struct io_ring_ctx *ctx; + struct file *file; + int ret = -EBUSY; + + file = io_uring_register_get_file(ops->ring_fd, false); + if (IS_ERR(file)) + return PTR_ERR(file); + ctx = file->private_data; + + scoped_guard(mutex, &io_bpf_ctrl_mutex) { + guard(mutex)(&ctx->uring_lock); + ret = io_install_bpf(ctx, ops); + } + + fput(file); + return ret; +} + +static void io_eject_bpf(struct io_ring_ctx *ctx) +{ + struct io_uring_bpf_ops *ops = ctx->bpf_ops; + + if (WARN_ON_ONCE(!ops)) + return; + if (WARN_ON_ONCE(ops->priv != ctx)) + return; + + ops->priv = NULL; + ctx->bpf_ops = NULL; + ctx->loop_step = NULL; +} + +static void bpf_io_unreg(void *kdata, struct bpf_link *link) +{ + struct io_uring_bpf_ops *ops = kdata; + struct io_ring_ctx *ctx; + + guard(mutex)(&io_bpf_ctrl_mutex); + ctx = ops->priv; + if (ctx) { + guard(mutex)(&ctx->uring_lock); + if (WARN_ON_ONCE(ctx->bpf_ops != ops)) + return; + + io_eject_bpf(ctx); + } +} + +void io_unregister_bpf_ops(struct io_ring_ctx *ctx) +{ + /* + * ->bpf_ops is write protected by io_bpf_ctrl_mutex and uring_lock, + * and read protected by either. Try to avoid taking the global lock + * for rings that never had any bpf installed. + */ + scoped_guard(mutex, &ctx->uring_lock) { + if (!ctx->bpf_ops) + return; + } + + guard(mutex)(&io_bpf_ctrl_mutex); + guard(mutex)(&ctx->uring_lock); + if (ctx->bpf_ops) + io_eject_bpf(ctx); +} + +static struct bpf_struct_ops bpf_ring_ops = { + .verifier_ops = &bpf_io_verifier_ops, + .reg = bpf_io_reg, + .unreg = bpf_io_unreg, + .check_member = bpf_io_check_member, + .init_member = bpf_io_init_member, + .init = bpf_io_init, + .cfi_stubs = &io_bpf_ops_stubs, + .name = "io_uring_bpf_ops", + .owner = THIS_MODULE, +}; + +static int __init io_uring_bpf_init(void) +{ + int ret; + + ret = register_bpf_struct_ops(&bpf_ring_ops, io_uring_bpf_ops); + if (ret) { + pr_err("io_uring: Failed to register struct_ops (%d)\n", ret); + return ret; + } + + return 0; +} +__initcall(io_uring_bpf_init); diff --git a/io_uring/bpf-ops.h b/io_uring/bpf-ops.h new file mode 100644 index 0000000000000..b39b3fd3acdab --- /dev/null +++ b/io_uring/bpf-ops.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef IOU_BPF_OPS_H +#define IOU_BPF_OPS_H + +#include + +enum { + IOU_REGION_MEM, + IOU_REGION_CQ, + IOU_REGION_SQ, +}; + +struct io_uring_bpf_ops { + int (*loop_step)(struct io_ring_ctx *ctx, struct iou_loop_params *lp); + + __u32 ring_fd; + void *priv; +}; + +#ifdef CONFIG_IO_URING_BPF_OPS +void io_unregister_bpf_ops(struct io_ring_ctx *ctx); +#else +static inline void io_unregister_bpf_ops(struct io_ring_ctx *ctx) +{ +} +#endif + +#endif /* IOU_BPF_OPS_H */ diff --git a/io_uring/bpf_filter.c b/io_uring/bpf_filter.c index 6a98750e38b00..c0037632b7af6 100644 --- a/io_uring/bpf_filter.c +++ b/io_uring/bpf_filter.c @@ -85,7 +85,7 @@ int __io_uring_run_bpf_filters(struct io_bpf_filter __rcu **filters, do { if (filter == &dummy_filter) return -EACCES; - ret = bpf_prog_run(filter->prog, &bpf_ctx); + ret = bpf_prog_run_pin_on_cpu(filter->prog, &bpf_ctx); if (!ret) return -EACCES; filter = filter->next; diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c index 125a81c520a6c..7cd411fc4f33e 100644 --- a/io_uring/cmd_net.c +++ b/io_uring/cmd_net.c @@ -7,6 +7,21 @@ #include "uring_cmd.h" #include "io_uring.h" +static int io_uring_cmd_get_sock_ioctl(struct socket *sock, int op) +{ + struct sock *sk = sock->sk; + struct proto *prot = READ_ONCE(sk->sk_prot); + int ret, arg = 0; + + if (!prot || !prot->ioctl) + return -EOPNOTSUPP; + + ret = prot->ioctl(sk, op, &arg); + if (ret) + return ret; + return arg; +} + static inline int io_uring_cmd_getsockopt(struct socket *sock, struct io_uring_cmd *cmd, unsigned int issue_flags) @@ -156,27 +171,12 @@ static int io_uring_cmd_getsockname(struct socket *sock, int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct socket *sock = cmd->file->private_data; - struct sock *sk = sock->sk; - struct proto *prot = READ_ONCE(sk->sk_prot); - int ret, arg = 0; switch (cmd->cmd_op) { case SOCKET_URING_OP_SIOCINQ: - if (!prot || !prot->ioctl) - return -EOPNOTSUPP; - - ret = prot->ioctl(sk, SIOCINQ, &arg); - if (ret) - return ret; - return arg; + return io_uring_cmd_get_sock_ioctl(sock, SIOCINQ); case SOCKET_URING_OP_SIOCOUTQ: - if (!prot || !prot->ioctl) - return -EOPNOTSUPP; - - ret = prot->ioctl(sk, SIOCOUTQ, &arg); - if (ret) - return ret; - return arg; + return io_uring_cmd_get_sock_ioctl(sock, SIOCOUTQ); case SOCKET_URING_OP_GETSOCKOPT: return io_uring_cmd_getsockopt(sock, cmd, issue_flags); case SOCKET_URING_OP_SETSOCKOPT: diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c index cbea1c2894857..3da028500f76a 100644 --- a/io_uring/eventfd.c +++ b/io_uring/eventfd.c @@ -76,11 +76,15 @@ void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event) { bool skip = false; struct io_ev_fd *ev_fd; - - if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) - return; + struct io_rings *rings; guard(rcu)(); + + rings = rcu_dereference(ctx->rings_rcu); + if (!rings) + return; + if (READ_ONCE(rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) + return; ev_fd = rcu_dereference(ctx->io_ev_fd); /* * Check again if ev_fd exists in case an io_eventfd_unregister call @@ -144,7 +148,7 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, spin_unlock(&ctx->completion_lock); ev_fd->eventfd_async = eventfd_async; - ctx->has_evfd = true; + ctx->int_flags |= IO_RING_F_HAS_EVFD; refcount_set(&ev_fd->refs, 1); atomic_set(&ev_fd->ops, 0); rcu_assign_pointer(ctx->io_ev_fd, ev_fd); @@ -158,7 +162,7 @@ int io_eventfd_unregister(struct io_ring_ctx *ctx) ev_fd = rcu_dereference_protected(ctx->io_ev_fd, lockdep_is_held(&ctx->uring_lock)); if (ev_fd) { - ctx->has_evfd = false; + ctx->int_flags &= ~IO_RING_F_HAS_EVFD; rcu_assign_pointer(ctx->io_ev_fd, NULL); io_eventfd_put(ev_fd); return 0; diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 80178b69e05a2..c2d3e45544bb4 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -119,12 +119,14 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) sq_idx); break; } - if ((++sq_head & sq_mask) == 0) { + if (sq_idx == sq_mask) { seq_printf(m, "%5u: corrupted sqe, wrapping 128B entry\n", sq_idx); break; } + sq_head++; + i++; sqe128 = true; } seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, " diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index aa95703165f12..079b378358335 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -87,6 +87,7 @@ #include "msg_ring.h" #include "memmap.h" #include "zcrx.h" +#include "bpf-ops.h" #include "timeout.h" #include "poll.h" @@ -95,6 +96,7 @@ #include "eventfd.h" #include "wait.h" #include "bpf_filter.h" +#include "loop.h" #define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \ IOSQE_IO_HARDLINK | IOSQE_ASYNC) @@ -356,7 +358,6 @@ static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) static void io_prep_async_work(struct io_kiocb *req) { const struct io_issue_def *def = &io_issue_defs[req->opcode]; - struct io_ring_ctx *ctx = req->ctx; if (!(req->flags & REQ_F_CREDS)) { req->flags |= REQ_F_CREDS; @@ -378,7 +379,7 @@ static void io_prep_async_work(struct io_kiocb *req) if (should_hash && (req->file->f_flags & O_DIRECT) && (req->file->f_op->fop_flags & FOP_DIO_PARALLEL_WRITE)) should_hash = false; - if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL)) + if (should_hash || (req->flags & REQ_F_IOPOLL)) io_wq_hash_work(&req->work, file_inode(req->file)); } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) { if (def->unbound_nonreg_file) @@ -477,17 +478,17 @@ static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx) void __io_commit_cqring_flush(struct io_ring_ctx *ctx) { - if (ctx->poll_activated) + if (ctx->int_flags & IO_RING_F_POLL_ACTIVATED) io_poll_wq_wake(ctx); - if (ctx->off_timeout_used) + if (ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED) io_flush_timeouts(ctx); - if (ctx->has_evfd) + if (ctx->int_flags & IO_RING_F_HAS_EVFD) io_eventfd_signal(ctx, true); } static inline void __io_cq_lock(struct io_ring_ctx *ctx) { - if (!ctx->lockless_cq) + if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) spin_lock(&ctx->completion_lock); } @@ -500,11 +501,11 @@ static inline void io_cq_lock(struct io_ring_ctx *ctx) static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx) { io_commit_cqring(ctx); - if (!ctx->task_complete) { - if (!ctx->lockless_cq) + if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) { + if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) spin_unlock(&ctx->completion_lock); /* IOPOLL rings only need to wake up if it's also SQPOLL */ - if (!ctx->syscall_iopoll) + if (!(ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL)) io_cqring_wake(ctx); } io_commit_cqring_flush(ctx); @@ -589,6 +590,11 @@ void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } +void io_cqring_overflow_flush_locked(struct io_ring_ctx *ctx) +{ + __io_cqring_overflow_flush(ctx, false); +} + /* must to be called somewhat shortly after putting a request */ static inline void io_put_task(struct io_kiocb *req) { @@ -830,7 +836,7 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags) { lockdep_assert_held(&ctx->uring_lock); - lockdep_assert(ctx->lockless_cq); + lockdep_assert(ctx->int_flags & IO_RING_F_LOCKLESS_CQ); if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) { struct io_cqe cqe = io_init_cqe(user_data, res, cflags); @@ -860,7 +866,7 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) lockdep_assert(!io_wq_current_is_worker()); lockdep_assert_held(&ctx->uring_lock); - if (!ctx->lockless_cq) { + if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) { spin_lock(&ctx->completion_lock); posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); spin_unlock(&ctx->completion_lock); @@ -885,7 +891,7 @@ bool io_req_post_cqe32(struct io_kiocb *req, struct io_uring_cqe cqe[2]) lockdep_assert_held(&ctx->uring_lock); cqe[0].user_data = req->cqe.user_data; - if (!ctx->lockless_cq) { + if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) { spin_lock(&ctx->completion_lock); posted = io_fill_cqe_aux32(ctx, cqe); spin_unlock(&ctx->completion_lock); @@ -913,7 +919,7 @@ static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags) * Handle special CQ sync cases via task_work. DEFER_TASKRUN requires * the submitter task context, IOPOLL protects with uring_lock. */ - if (ctx->lockless_cq || (req->flags & REQ_F_REISSUE)) { + if ((ctx->int_flags & IO_RING_F_LOCKLESS_CQ) || (req->flags & REQ_F_REISSUE)) { defer_complete: req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); @@ -1067,12 +1073,14 @@ void io_queue_next(struct io_kiocb *req) static inline void io_req_put_rsrc_nodes(struct io_kiocb *req) { + struct io_ring_ctx *ctx = req->ctx; + if (req->file_node) { - io_put_rsrc_node(req->ctx, req->file_node); + io_put_rsrc_node(ctx, req->file_node); req->file_node = NULL; } if (req->flags & REQ_F_BUF_NODE) - io_put_rsrc_node(req->ctx, req->buf_node); + io_put_rsrc_node(ctx, req->buf_node); } static void io_free_batch_list(struct io_ring_ctx *ctx, @@ -1135,7 +1143,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx) */ if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) && unlikely(!io_fill_cqe_req(ctx, req))) { - if (ctx->lockless_cq) + if (ctx->int_flags & IO_RING_F_LOCKLESS_CQ) io_cqe_overflow(ctx, &req->cqe, &req->big_cqe); else io_cqe_overflow_locked(ctx, &req->cqe, &req->big_cqe); @@ -1148,7 +1156,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx) INIT_WQ_LIST(&state->compl_reqs); } - if (unlikely(ctx->drain_active)) + if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_ACTIVE)) io_queue_deferred(ctx); ctx->submit_state.cq_flush = false; @@ -1187,7 +1195,6 @@ __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) { - unsigned int nr_events = 0; unsigned long check_cq; min_events = min(min_events, ctx->cq_entries); @@ -1230,8 +1237,6 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) * very same mutex. */ if (list_empty(&ctx->iopoll_list) || io_task_work_pending(ctx)) { - u32 tail = ctx->cached_cq_tail; - (void) io_run_local_work_locked(ctx, min_events); if (task_work_pending(current) || list_empty(&ctx->iopoll_list)) { @@ -1240,7 +1245,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) mutex_lock(&ctx->uring_lock); } /* some requests don't go through iopoll_list */ - if (tail != ctx->cached_cq_tail || list_empty(&ctx->iopoll_list)) + if (list_empty(&ctx->iopoll_list)) break; } ret = io_do_iopoll(ctx, !min_events); @@ -1251,9 +1256,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) return -EINTR; if (need_resched()) break; - - nr_events += ret; - } while (nr_events < min_events); + } while (io_cqring_events(ctx) < min_events); return 0; } @@ -1344,7 +1347,7 @@ static __cold void io_drain_req(struct io_kiocb *req) list_add_tail(&de->list, &ctx->defer_list); io_queue_deferred(ctx); if (!drain && list_empty(&ctx->defer_list)) - ctx->drain_active = false; + ctx->int_flags &= ~IO_RING_F_DRAIN_ACTIVE; } static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def, @@ -1418,8 +1421,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) if (ret == IOU_ISSUE_SKIP_COMPLETE) { ret = 0; - /* If the op doesn't have a file, we're not polling for it */ - if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) + if (req->flags & REQ_F_IOPOLL) io_iopoll_req_issued(req, issue_flags); } return ret; @@ -1435,7 +1437,7 @@ int io_poll_issue(struct io_kiocb *req, io_tw_token_t tw) io_tw_lock(req->ctx, tw); WARN_ON_ONCE(!req->file); - if (WARN_ON_ONCE(req->ctx->flags & IORING_SETUP_IOPOLL)) + if (WARN_ON_ONCE(req->flags & REQ_F_IOPOLL)) return -EFAULT; ret = __io_issue_sqe(req, issue_flags, &io_issue_defs[req->opcode]); @@ -1533,7 +1535,7 @@ void io_wq_submit_work(struct io_wq_work *work) * wait for request slots on the block side. */ if (!needs_poll) { - if (!(req->ctx->flags & IORING_SETUP_IOPOLL)) + if (!(req->flags & REQ_F_IOPOLL)) break; if (io_wq_worker_stopped()) break; @@ -1655,7 +1657,7 @@ static void io_queue_sqe_fallback(struct io_kiocb *req) } else { /* can't fail with IO_URING_F_INLINE */ io_req_sqe_copy(req, IO_URING_F_INLINE); - if (unlikely(req->ctx->drain_active)) + if (unlikely(req->ctx->int_flags & IO_RING_F_DRAIN_ACTIVE)) io_drain_req(req); else io_queue_iowq(req); @@ -1671,7 +1673,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx, struct io_kiocb *req, unsigned int sqe_flags) { - if (!ctx->op_restricted) + if (!(ctx->int_flags & IO_RING_F_OP_RESTRICTED)) return true; if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) return false; @@ -1691,7 +1693,7 @@ static void io_init_drain(struct io_ring_ctx *ctx) { struct io_kiocb *head = ctx->submit_state.link.head; - ctx->drain_active = true; + ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE; if (head) { /* * If we need to drain a request in the middle of a link, drain @@ -1701,7 +1703,7 @@ static void io_init_drain(struct io_ring_ctx *ctx) * link. */ head->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; - ctx->drain_next = true; + ctx->int_flags |= IO_RING_F_DRAIN_NEXT; } } @@ -1745,7 +1747,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, * well as 2 contiguous entries. */ if (!(ctx->flags & IORING_SETUP_SQE_MIXED) || *left < 2 || - !(ctx->cached_sq_head & (ctx->sq_entries - 1))) + (unsigned)(sqe - ctx->sq_sqes) >= ctx->sq_entries - 1) return io_init_fail_req(req, -EINVAL); /* * A 128b operation on a mixed SQ uses two entries, so we have @@ -1767,23 +1769,23 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->buf_index = READ_ONCE(sqe->buf_group); } if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS) - ctx->drain_disabled = true; + ctx->int_flags |= IO_RING_F_DRAIN_DISABLED; if (sqe_flags & IOSQE_IO_DRAIN) { - if (ctx->drain_disabled) + if (ctx->int_flags & IO_RING_F_DRAIN_DISABLED) return io_init_fail_req(req, -EOPNOTSUPP); io_init_drain(ctx); } } - if (unlikely(ctx->op_restricted || ctx->drain_active || ctx->drain_next)) { + if (unlikely(ctx->int_flags & (IO_RING_F_OP_RESTRICTED | IO_RING_F_DRAIN_ACTIVE | IO_RING_F_DRAIN_NEXT))) { if (!io_check_restriction(ctx, req, sqe_flags)) return io_init_fail_req(req, -EACCES); /* knock it to the slow queue path, will be drained there */ - if (ctx->drain_active) + if (ctx->int_flags & IO_RING_F_DRAIN_ACTIVE) req->flags |= REQ_F_FORCE_ASYNC; /* if there is no link, we're at "next" request and need to drain */ - if (unlikely(ctx->drain_next) && !ctx->submit_state.link.head) { - ctx->drain_next = false; - ctx->drain_active = true; + if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_NEXT) && !ctx->submit_state.link.head) { + ctx->int_flags &= ~IO_RING_F_DRAIN_NEXT; + ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE; req->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; } } @@ -2015,7 +2017,7 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) if (ctx->flags & IORING_SETUP_SQ_REWIND) entries = ctx->sq_entries; else - entries = io_sqring_entries(ctx); + entries = __io_sqring_entries(ctx); entries = min(nr, entries); if (unlikely(!entries)) @@ -2066,6 +2068,7 @@ static void io_rings_free(struct io_ring_ctx *ctx) io_free_region(ctx->user, &ctx->sq_region); io_free_region(ctx->user, &ctx->ring_region); ctx->rings = NULL; + RCU_INIT_POINTER(ctx->rings_rcu, NULL); ctx->sq_sqes = NULL; } @@ -2147,12 +2150,13 @@ static __cold void io_req_caches_free(struct io_ring_ctx *ctx) static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) { + io_unregister_bpf_ops(ctx); io_sq_thread_finish(ctx); mutex_lock(&ctx->uring_lock); io_sqe_buffers_unregister(ctx); io_sqe_files_unregister(ctx); - io_unregister_zcrx_ifqs(ctx); + io_unregister_zcrx(ctx); io_cqring_overflow_kill(ctx); io_eventfd_unregister(ctx); io_free_alloc_caches(ctx); @@ -2203,7 +2207,7 @@ static __cold void io_activate_pollwq_cb(struct callback_head *cb) poll_wq_task_work); mutex_lock(&ctx->uring_lock); - ctx->poll_activated = true; + ctx->int_flags |= IO_RING_F_POLL_ACTIVATED; mutex_unlock(&ctx->uring_lock); /* @@ -2218,9 +2222,9 @@ __cold void io_activate_pollwq(struct io_ring_ctx *ctx) { spin_lock(&ctx->completion_lock); /* already activated or in progress */ - if (ctx->poll_activated || ctx->poll_wq_task_work.func) + if ((ctx->int_flags & IO_RING_F_POLL_ACTIVATED) || ctx->poll_wq_task_work.func) goto out; - if (WARN_ON_ONCE(!ctx->task_complete)) + if (WARN_ON_ONCE(!(ctx->int_flags & IO_RING_F_TASK_COMPLETE))) goto out; if (!ctx->submitter_task) goto out; @@ -2241,7 +2245,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) struct io_ring_ctx *ctx = file->private_data; __poll_t mask = 0; - if (unlikely(!ctx->poll_activated)) + if (unlikely(!(data_race(ctx->int_flags) & IO_RING_F_POLL_ACTIVATED))) io_activate_pollwq(ctx); /* * provides mb() which pairs with barrier from wq_has_sleeper @@ -2249,7 +2253,9 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) */ poll_wait(file, &ctx->poll_wq, wait); - if (!io_sqring_full(ctx)) + rcu_read_lock(); + + if (!__io_sqring_full(ctx)) mask |= EPOLLOUT | EPOLLWRNORM; /* @@ -2269,6 +2275,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) if (__io_cqring_events_user(ctx) || io_has_work(ctx)) mask |= EPOLLIN | EPOLLRDNORM; + rcu_read_unlock(); return mask; } @@ -2304,6 +2311,10 @@ static __cold void io_ring_exit_work(struct work_struct *work) struct io_tctx_node *node; int ret; + mutex_lock(&ctx->uring_lock); + io_terminate_zcrx(ctx); + mutex_unlock(&ctx->uring_lock); + /* * If we're doing polled IO and end up having requests being * submitted async (out-of-line), then completions can come in while @@ -2577,6 +2588,11 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (unlikely(smp_load_acquire(&ctx->flags) & IORING_SETUP_R_DISABLED)) goto out; + if (io_has_loop_ops(ctx)) { + ret = io_run_loop(ctx); + goto out; + } + /* * For SQ polling, the thread will do all submissions and completions. * Just return the requested submit count, and wake the thread if @@ -2606,7 +2622,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, goto out; } if (flags & IORING_ENTER_GETEVENTS) { - if (ctx->syscall_iopoll) + if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL) goto iopoll_locked; /* * Ignore errors, we'll soon call io_cqring_wait() and @@ -2621,7 +2637,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (flags & IORING_ENTER_GETEVENTS) { int ret2; - if (ctx->syscall_iopoll) { + if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL) { /* * We disallow the app entering submit/complete with * polling, but we still need to lock the ring to @@ -2703,6 +2719,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, if (ret) return ret; ctx->rings = rings = io_region_get_ptr(&ctx->ring_region); + rcu_assign_pointer(ctx->rings_rcu, rings); if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) ctx->sq_array = (u32 *)((char *)rings + rl->sq_array_offset); @@ -2921,9 +2938,9 @@ static void io_ctx_restriction_clone(struct io_ring_ctx *ctx, if (dst->bpf_filters) WRITE_ONCE(ctx->bpf_filters, dst->bpf_filters->filters); if (dst->op_registered) - ctx->op_restricted = 1; + ctx->int_flags |= IO_RING_F_OP_RESTRICTED; if (dst->reg_registered) - ctx->reg_restricted = 1; + ctx->int_flags |= IO_RING_F_REG_RESTRICTED; } static __cold int io_uring_create(struct io_ctx_config *config) @@ -2950,17 +2967,18 @@ static __cold int io_uring_create(struct io_ctx_config *config) if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && !(ctx->flags & IORING_SETUP_IOPOLL)) - ctx->task_complete = true; + ctx->int_flags |= IO_RING_F_TASK_COMPLETE; - if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL)) - ctx->lockless_cq = true; + if ((ctx->int_flags & IO_RING_F_TASK_COMPLETE) || + (ctx->flags & IORING_SETUP_IOPOLL)) + ctx->int_flags |= IO_RING_F_LOCKLESS_CQ; /* * lazy poll_wq activation relies on ->task_complete for synchronisation * purposes, see io_activate_pollwq() */ - if (!ctx->task_complete) - ctx->poll_activated = true; + if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) + ctx->int_flags |= IO_RING_F_POLL_ACTIVATED; /* * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user @@ -2970,9 +2988,10 @@ static __cold int io_uring_create(struct io_ctx_config *config) */ if (ctx->flags & IORING_SETUP_IOPOLL && !(ctx->flags & IORING_SETUP_SQPOLL)) - ctx->syscall_iopoll = 1; + ctx->int_flags |= IO_RING_F_SYSCALL_IOPOLL; - ctx->compat = in_compat_syscall(); + if (in_compat_syscall()) + ctx->int_flags |= IO_RING_F_COMPAT; if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK)) ctx->user = get_uid(current_user()); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 0fa844faf2871..5c47ed0b4276d 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -142,16 +142,28 @@ struct io_wait_queue { #endif }; +static inline struct io_rings *io_get_rings(struct io_ring_ctx *ctx) +{ + return rcu_dereference_check(ctx->rings_rcu, + lockdep_is_held(&ctx->uring_lock) || + lockdep_is_held(&ctx->completion_lock)); +} + static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; - int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail; + struct io_rings *rings; + int dist; + + guard(rcu)(); + rings = io_get_rings(ctx); /* * Wake up if we have enough events, or if a timeout occurred since we * started waiting. For timeouts, we always want to return to userspace, * regardless of event count. */ + dist = READ_ONCE(rings->cq.tail) - (int) iowq->cq_tail; return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; } @@ -211,7 +223,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) if (ctx->flags & IORING_SETUP_IOPOLL) { lockdep_assert_held(&ctx->uring_lock); - } else if (!ctx->task_complete) { + } else if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) { lockdep_assert_held(&ctx->completion_lock); } else if (ctx->submitter_task) { /* @@ -228,7 +240,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) static inline bool io_is_compat(struct io_ring_ctx *ctx) { - return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->compat); + return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->int_flags & IO_RING_F_COMPAT); } static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) @@ -431,9 +443,9 @@ static inline void io_cqring_wake(struct io_ring_ctx *ctx) __io_wq_wake(&ctx->cq_wait); } -static inline bool io_sqring_full(struct io_ring_ctx *ctx) +static inline bool __io_sqring_full(struct io_ring_ctx *ctx) { - struct io_rings *r = ctx->rings; + struct io_rings *r = io_get_rings(ctx); /* * SQPOLL must use the actual sqring head, as using the cached_sq_head @@ -445,9 +457,15 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx) return READ_ONCE(r->sq.tail) - READ_ONCE(r->sq.head) == ctx->sq_entries; } -static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) +static inline bool io_sqring_full(struct io_ring_ctx *ctx) { - struct io_rings *rings = ctx->rings; + guard(rcu)(); + return __io_sqring_full(ctx); +} + +static inline unsigned int __io_sqring_entries(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = io_get_rings(ctx); unsigned int entries; /* make sure SQ entry isn't read before tail */ @@ -455,6 +473,12 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) return min(entries, ctx->sq_entries); } +static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) +{ + guard(rcu)(); + return __io_sqring_entries(ctx); +} + /* * Don't complete immediately but use deferred completion infrastructure. * Protected by ->uring_lock and can only be used either with @@ -470,10 +494,12 @@ static inline void io_req_complete_defer(struct io_kiocb *req) wq_list_add_tail(&req->comp_list, &state->compl_reqs); } +#define SHOULD_FLUSH_MASK (IO_RING_F_OFF_TIMEOUT_USED | \ + IO_RING_F_HAS_EVFD | IO_RING_F_POLL_ACTIVATED) + static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx) { - if (unlikely(ctx->off_timeout_used || - ctx->has_evfd || ctx->poll_activated)) + if (unlikely(data_race(ctx->int_flags) & SHOULD_FLUSH_MASK)) __io_commit_cqring_flush(ctx); } diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 2ffa95b1c6010..59e83cfa2e1f6 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -34,6 +35,10 @@ struct io_provide_buf { static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) { + /* No data consumed, return false early to avoid consuming the buffer */ + if (!len) + return false; + while (len) { struct io_uring_buf *buf; u32 buf_len, this_len; @@ -101,6 +106,54 @@ void io_kbuf_drop_legacy(struct io_kiocb *req) req->kbuf = NULL; } +int io_uring_kmbuf_recycle(struct io_uring_cmd *cmd, unsigned int buf_group, + u64 addr, unsigned int len, unsigned int bid, + unsigned int issue_flags) +{ + struct io_kiocb *req = cmd_to_io_kiocb(cmd); + struct io_ring_ctx *ctx = req->ctx; + struct io_uring_buf_ring *br; + struct io_uring_buf *buf; + struct io_buffer_list *bl; + unsigned int required_flags; + int ret = -EINVAL; + + if (WARN_ON_ONCE(req->flags & REQ_F_BUFFERS_COMMIT)) + return ret; + + io_ring_submit_lock(ctx, issue_flags); + + bl = io_buffer_get_list(ctx, buf_group); + + if (!bl) + goto err; + + required_flags = IOBL_BUF_RING | IOBL_KERNEL_MANAGED; + if (WARN_ON_ONCE((bl->flags & required_flags) != required_flags)) + goto err; + + br = bl->buf_ring; + + if (WARN_ON_ONCE((__u16)(br->tail - bl->head) >= bl->nr_entries)) + goto err; + + buf = &br->bufs[(br->tail) & bl->mask]; + + buf->addr = addr; + buf->len = len; + buf->bid = bid; + + req->flags &= ~REQ_F_BUFFER_RING; + + br->tail++; + ret = 0; + +err: + io_ring_submit_unlock(ctx, issue_flags); + return ret; +} +EXPORT_SYMBOL_GPL(io_uring_kmbuf_recycle); + bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; @@ -111,9 +164,18 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) buf = req->kbuf; bl = io_buffer_get_list(ctx, buf->bgid); - list_add(&buf->list, &bl->buf_list); - bl->nbufs++; + /* + * If the buffer list was upgraded to a ring-based one, or removed, + * while the request was in-flight in io-wq, drop it. + */ + if (bl && !(bl->flags & IOBL_BUF_RING)) { + list_add(&buf->list, &bl->buf_list); + bl->nbufs++; + } else { + kfree(buf); + } req->flags &= ~REQ_F_BUFFER_SELECTED; + req->kbuf = NULL; io_ring_submit_unlock(ctx, issue_flags); return true; @@ -155,7 +217,8 @@ static int io_provided_buffers_select(struct io_kiocb *req, size_t *len, return 1; } -static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags) +static bool io_should_commit(struct io_kiocb *req, struct io_buffer_list *bl, + unsigned int issue_flags) { /* * If we came in unlocked, we have no choice but to consume the @@ -170,15 +233,19 @@ static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags) if (issue_flags & IO_URING_F_UNLOCKED) return true; - /* uring_cmd commits kbuf upfront, no need to auto-commit */ + /* kernel-managed buffers are auto-committed */ + if (bl->flags & IOBL_KERNEL_MANAGED) + return true; + + /* multishot uring_cmd commits kbuf upfront, no need to auto-commit */ if (!io_file_can_poll(req) && !io_is_uring_cmd(req)) return true; return false; } -static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, - struct io_buffer_list *bl, - unsigned int issue_flags) +struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, + struct io_buffer_list *bl, + unsigned int issue_flags) { struct io_uring_buf_ring *br = bl->buf_ring; __u16 tail, head = bl->head; @@ -200,14 +267,20 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; req->buf_index = READ_ONCE(buf->bid); sel.buf_list = bl; - sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr)); + sel.buf_id = req->buf_index; + if (bl->flags & IOBL_KERNEL_MANAGED) + sel.kaddr = (void *)(uintptr_t)READ_ONCE(buf->addr); + else + sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr)); - if (io_should_commit(req, issue_flags)) { - io_kbuf_commit(req, sel.buf_list, *len, 1); + if (io_should_commit(req, bl, issue_flags)) { + if (!io_kbuf_commit(req, sel.buf_list, *len, 1)) + req->flags |= REQ_F_BUF_MORE; sel.buf_list = NULL; } return sel; } +EXPORT_SYMBOL_GPL(io_ring_buffer_select); struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len, unsigned buf_group, unsigned int issue_flags) @@ -216,19 +289,73 @@ struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len, struct io_br_sel sel = { }; struct io_buffer_list *bl; - io_ring_submit_lock(req->ctx, issue_flags); + io_ring_submit_lock(ctx, issue_flags); bl = io_buffer_get_list(ctx, buf_group); if (likely(bl)) { - if (bl->flags & IOBL_BUF_RING) + if (bl->flags & IOBL_BUF_RING) { sel = io_ring_buffer_select(req, len, bl, issue_flags); - else + } else { sel.addr = io_provided_buffer_select(req, len, bl); + sel.buf_id = req->buf_index; + } } - io_ring_submit_unlock(req->ctx, issue_flags); + io_ring_submit_unlock(ctx, issue_flags); return sel; } +int io_uring_buf_ring_pin(struct io_uring_cmd *cmd, unsigned buf_group, + unsigned issue_flags, struct io_buffer_list **out_bl) +{ + struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx; + struct io_buffer_list *bl; + int ret = -EINVAL; + + io_ring_submit_lock(ctx, issue_flags); + + bl = io_buffer_get_list(ctx, buf_group); + if (!bl || !(bl->flags & IOBL_BUF_RING)) + goto err; + + if (unlikely(bl->flags & IOBL_PINNED)) { + ret = -EALREADY; + goto err; + } + + bl->flags |= IOBL_PINNED; + ret = 0; + *out_bl = bl; +err: + io_ring_submit_unlock(ctx, issue_flags); + return ret; +} +EXPORT_SYMBOL_GPL(io_uring_buf_ring_pin); + +int io_uring_buf_ring_unpin(struct io_uring_cmd *cmd, unsigned buf_group, + unsigned issue_flags) +{ + struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx; + struct io_buffer_list *bl; + unsigned int required_flags; + int ret = -EINVAL; + + io_ring_submit_lock(ctx, issue_flags); + + bl = io_buffer_get_list(ctx, buf_group); + if (!bl) + goto err; + + required_flags = IOBL_BUF_RING | IOBL_PINNED; + if ((bl->flags & required_flags) == required_flags) { + bl->flags &= ~IOBL_PINNED; + ret = 0; + } +err: + io_ring_submit_unlock(ctx, issue_flags); + return ret; +} +EXPORT_SYMBOL_GPL(io_uring_buf_ring_unpin); + /* cap it at a reasonable 256, will be one page even for 4K */ #define PEEK_MAX_IMPORT 256 @@ -336,7 +463,8 @@ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, */ if (ret > 0) { req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE; - io_kbuf_commit(req, sel->buf_list, arg->out_len, ret); + if (!io_kbuf_commit(req, sel->buf_list, arg->out_len, ret)) + req->flags |= REQ_F_BUF_MORE; } } else { ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs); @@ -382,8 +510,10 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req, if (bl) ret = io_kbuf_commit(req, bl, len, nr); + if (ret && (req->flags & REQ_F_BUF_MORE)) + ret = false; - req->flags &= ~REQ_F_BUFFER_RING; + req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE); return ret; } @@ -427,10 +557,13 @@ static int io_remove_buffers_legacy(struct io_ring_ctx *ctx, static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) { - if (bl->flags & IOBL_BUF_RING) + if (bl->flags & IOBL_BUF_RING) { io_free_region(ctx->user, &bl->region); - else + if (bl->flags & IOBL_KERNEL_MANAGED) + kfree(bl->buf_ring); + } else { io_remove_buffers_legacy(ctx, bl, -1U); + } kfree(bl); } @@ -596,14 +729,53 @@ int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags) return IOU_COMPLETE; } +static int io_setup_kmbuf_ring(struct io_ring_ctx *ctx, + struct io_buffer_list *bl, + const struct io_uring_buf_reg *reg) +{ + struct io_uring_buf_ring *ring; + unsigned long ring_size; + void *buf_region; + unsigned int i; + int ret; + + /* allocate pages for the ring structure */ + ring_size = flex_array_size(ring, bufs, reg->ring_entries); + ring = kzalloc(ring_size, GFP_KERNEL_ACCOUNT); + if (!ring) + return -ENOMEM; + + ret = io_create_region_multi_buf(ctx, &bl->region, reg->ring_entries, + reg->buf_size); + if (ret) { + kfree(ring); + return ret; + } + + /* initialize ring buf entries to point to the buffers */ + buf_region = bl->region.ptr; + for (i = 0; i < reg->ring_entries; i++) { + struct io_uring_buf *buf = &ring->bufs[i]; + + buf->addr = (u64)(uintptr_t)buf_region; + buf->len = reg->buf_size; + buf->bid = i; + + buf_region += reg->buf_size; + } + ring->tail = reg->ring_entries; + + bl->buf_ring = ring; + bl->flags |= IOBL_KERNEL_MANAGED; + + return 0; +} + int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_buf_reg reg; struct io_buffer_list *bl; - struct io_uring_region_desc rd; struct io_uring_buf_ring *br; - unsigned long mmap_offset; - unsigned long ring_size; int ret; lockdep_assert_held(&ctx->uring_lock); @@ -612,7 +784,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return -EFAULT; if (!mem_is_zero(reg.resv, sizeof(reg.resv))) return -EINVAL; - if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC)) + if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC | + IOU_PBUF_RING_KERNEL_MANAGED)) return -EINVAL; if (!is_power_of_2(reg.ring_entries)) return -EINVAL; @@ -620,6 +793,16 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (reg.ring_entries >= 65536) return -EINVAL; + if (reg.flags & IOU_PBUF_RING_KERNEL_MANAGED) { + if (!(reg.flags & IOU_PBUF_RING_MMAP)) + return -EINVAL; + /* not yet supported */ + if (reg.flags & IOU_PBUF_RING_INC) + return -EINVAL; + if (!reg.buf_size || !PAGE_ALIGNED(reg.buf_size)) + return -EINVAL; + } + bl = io_buffer_get_list(ctx, reg.bgid); if (bl) { /* if mapped buffer ring OR classic exists, don't allow */ @@ -632,19 +815,30 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (!bl) return -ENOMEM; - mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT; - ring_size = flex_array_size(br, bufs, reg.ring_entries); + if (!(reg.flags & IOU_PBUF_RING_KERNEL_MANAGED)) { + struct io_uring_region_desc rd; + unsigned long mmap_offset; + unsigned long ring_size; + + mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT; + ring_size = flex_array_size(br, bufs, reg.ring_entries); - memset(&rd, 0, sizeof(rd)); - rd.size = PAGE_ALIGN(ring_size); - if (!(reg.flags & IOU_PBUF_RING_MMAP)) { - rd.user_addr = reg.ring_addr; - rd.flags |= IORING_MEM_REGION_TYPE_USER; + memset(&rd, 0, sizeof(rd)); + rd.size = PAGE_ALIGN(ring_size); + if (!(reg.flags & IOU_PBUF_RING_MMAP)) { + rd.user_addr = reg.ring_addr; + rd.flags |= IORING_MEM_REGION_TYPE_USER; + } + ret = io_create_region(ctx, &bl->region, &rd, mmap_offset); + if (!ret) + bl->buf_ring = io_region_get_ptr(&bl->region); + } else { + ret = io_setup_kmbuf_ring(ctx, bl, ®); } - ret = io_create_region(ctx, &bl->region, &rd, mmap_offset); if (ret) goto fail; - br = io_region_get_ptr(&bl->region); + + br = bl->buf_ring; #ifdef SHM_COLOUR /* @@ -666,7 +860,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) bl->nr_entries = reg.ring_entries; bl->mask = reg.ring_entries - 1; bl->flags |= IOBL_BUF_RING; - bl->buf_ring = br; if (reg.flags & IOU_PBUF_RING_INC) bl->flags |= IOBL_INC; ret = io_buffer_add_list(ctx, bl, reg.bgid); @@ -674,6 +867,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return 0; fail: io_free_region(ctx->user, &bl->region); + if (bl->flags & IOBL_KERNEL_MANAGED) + kfree(bl->buf_ring); kfree(bl); return ret; } @@ -695,6 +890,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return -ENOENT; if (!(bl->flags & IOBL_BUF_RING)) return -EINVAL; + if (bl->flags & IOBL_PINNED) + return -EBUSY; scoped_guard(mutex, &ctx->mmap_lock) xa_erase(&ctx->io_bl_xa, bl->bgid); @@ -738,3 +935,23 @@ struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx, return NULL; return &bl->region; } + +bool io_uring_is_kmbuf_ring(struct io_uring_cmd *cmd, unsigned int buf_group, + unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx; + struct io_buffer_list *bl; + bool is_kmbuf_ring = false; + + io_ring_submit_lock(ctx, issue_flags); + + bl = io_buffer_get_list(ctx, buf_group); + if (likely(bl) && (bl->flags & IOBL_KERNEL_MANAGED)) { + WARN_ON_ONCE(!(bl->flags & IOBL_BUF_RING)); + is_kmbuf_ring = true; + } + + io_ring_submit_unlock(ctx, issue_flags); + return is_kmbuf_ring; +} +EXPORT_SYMBOL_GPL(io_uring_is_kmbuf_ring); diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index bf15e26520d38..006e8a73a1176 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -7,9 +7,16 @@ enum { /* ring mapped provided buffers */ - IOBL_BUF_RING = 1, + IOBL_BUF_RING = 1, /* buffers are consumed incrementally rather than always fully */ - IOBL_INC = 2, + IOBL_INC = 2, + /* buffers are kernel managed */ + IOBL_KERNEL_MANAGED = 4, + /* + * buffer ring is pinned and cannot be unregistered by userspace until + * it has been unpinned + */ + IOBL_PINNED = 8, }; struct io_buffer_list { diff --git a/io_uring/loop.c b/io_uring/loop.c new file mode 100644 index 0000000000000..31843cc3e4510 --- /dev/null +++ b/io_uring/loop.c @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "io_uring.h" +#include "wait.h" +#include "loop.h" + +static inline int io_loop_nr_cqes(const struct io_ring_ctx *ctx, + const struct iou_loop_params *lp) +{ + return lp->cq_wait_idx - READ_ONCE(ctx->rings->cq.tail); +} + +static inline void io_loop_wait_start(struct io_ring_ctx *ctx, unsigned nr_wait) +{ + atomic_set(&ctx->cq_wait_nr, nr_wait); + set_current_state(TASK_INTERRUPTIBLE); +} + +static inline void io_loop_wait_finish(struct io_ring_ctx *ctx) +{ + __set_current_state(TASK_RUNNING); + atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT); +} + +static void io_loop_wait(struct io_ring_ctx *ctx, struct iou_loop_params *lp, + unsigned nr_wait) +{ + io_loop_wait_start(ctx, nr_wait); + + if (unlikely(io_local_work_pending(ctx) || + io_loop_nr_cqes(ctx, lp) <= 0) || + READ_ONCE(ctx->check_cq)) { + io_loop_wait_finish(ctx); + return; + } + + mutex_unlock(&ctx->uring_lock); + schedule(); + io_loop_wait_finish(ctx); + mutex_lock(&ctx->uring_lock); +} + +static int __io_run_loop(struct io_ring_ctx *ctx) +{ + struct iou_loop_params lp = {}; + + while (true) { + int nr_wait, step_res; + + if (unlikely(!ctx->loop_step)) + return -EFAULT; + + step_res = ctx->loop_step(ctx, &lp); + if (step_res == IOU_LOOP_STOP) + break; + if (step_res != IOU_LOOP_CONTINUE) + return -EINVAL; + + nr_wait = io_loop_nr_cqes(ctx, &lp); + if (nr_wait > 0) + io_loop_wait(ctx, &lp, nr_wait); + else + nr_wait = 0; + + if (task_work_pending(current)) { + mutex_unlock(&ctx->uring_lock); + io_run_task_work(); + mutex_lock(&ctx->uring_lock); + } + if (unlikely(task_sigpending(current))) + return -EINTR; + io_run_local_work_locked(ctx, nr_wait); + + if (READ_ONCE(ctx->check_cq) & BIT(IO_CHECK_CQ_OVERFLOW_BIT)) + io_cqring_overflow_flush_locked(ctx); + } + + return 0; +} + +int io_run_loop(struct io_ring_ctx *ctx) +{ + int ret; + + if (!io_allowed_run_tw(ctx)) + return -EEXIST; + + mutex_lock(&ctx->uring_lock); + ret = __io_run_loop(ctx); + mutex_unlock(&ctx->uring_lock); + return ret; +} diff --git a/io_uring/loop.h b/io_uring/loop.h new file mode 100644 index 0000000000000..d7718b9ce61ee --- /dev/null +++ b/io_uring/loop.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef IOU_LOOP_H +#define IOU_LOOP_H + +#include + +struct iou_loop_params { + /* + * The CQE index to wait for. Only serves as a hint and can still be + * woken up earlier. + */ + __u32 cq_wait_idx; +}; + +enum { + IOU_LOOP_CONTINUE = 0, + IOU_LOOP_STOP, +}; + +static inline bool io_has_loop_ops(struct io_ring_ctx *ctx) +{ + return data_race(ctx->loop_step); +} + +int io_run_loop(struct io_ring_ctx *ctx); + +#endif diff --git a/io_uring/memmap.c b/io_uring/memmap.c index e6958968975a8..4979cbbfa27c9 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -15,6 +15,28 @@ #include "rsrc.h" #include "zcrx.h" +static void release_multi_buf_pages(struct page **pages, unsigned long nr_pages) +{ + struct page *page; + unsigned int nr, i = 0; + + while (nr_pages) { + page = pages[i]; + + if (!page || WARN_ON_ONCE(page != compound_head(page))) + return; + + nr = compound_nr(page); + put_page(page); + + if (WARN_ON_ONCE(nr > nr_pages)) + return; + + i += nr; + nr_pages -= nr; + } +} + static bool io_mem_alloc_compound(struct page **pages, int nr_pages, size_t size, gfp_t gfp) { @@ -86,6 +108,8 @@ enum { IO_REGION_F_USER_PROVIDED = 2, /* only the first page in the array is ref'ed */ IO_REGION_F_SINGLE_REF = 4, + /* pages in the array belong to multiple discrete allocations */ + IO_REGION_F_MULTI_BUF = 8, }; void io_free_region(struct user_struct *user, struct io_mapped_region *mr) @@ -98,6 +122,8 @@ void io_free_region(struct user_struct *user, struct io_mapped_region *mr) if (mr->flags & IO_REGION_F_USER_PROVIDED) unpin_user_pages(mr->pages, nr_refs); + else if (mr->flags & IO_REGION_F_MULTI_BUF) + release_multi_buf_pages(mr->pages, nr_refs); else release_pages(mr->pages, nr_refs); @@ -149,6 +175,54 @@ static int io_region_pin_pages(struct io_mapped_region *mr, return 0; } +static int io_region_allocate_pages_multi_buf(struct io_mapped_region *mr, + unsigned int nr_bufs, + unsigned int buf_size) +{ + gfp_t gfp = GFP_USER | __GFP_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; + struct page **pages, **cur_pages; + unsigned int nr_allocated; + unsigned int buf_pages; + unsigned int i; + + if (!PAGE_ALIGNED(buf_size)) + return -EINVAL; + + buf_pages = buf_size >> PAGE_SHIFT; + + pages = kvmalloc_array(mr->nr_pages, sizeof(*pages), gfp); + if (!pages) + return -ENOMEM; + + cur_pages = pages; + + for (i = 0; i < nr_bufs; i++) { + if (io_mem_alloc_compound(cur_pages, buf_pages, buf_size, + gfp)) { + cur_pages += buf_pages; + continue; + } + + nr_allocated = alloc_pages_bulk_node(gfp, NUMA_NO_NODE, + buf_pages, cur_pages); + if (nr_allocated != buf_pages) { + unsigned int total = + (cur_pages - pages) + nr_allocated; + + release_multi_buf_pages(pages, total); + kvfree(pages); + return -ENOMEM; + } + + cur_pages += buf_pages; + } + + mr->flags |= IO_REGION_F_MULTI_BUF; + mr->pages = pages; + + return 0; +} + static int io_region_allocate_pages(struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset) @@ -181,6 +255,43 @@ static int io_region_allocate_pages(struct io_mapped_region *mr, return 0; } +int io_create_region_multi_buf(struct io_ring_ctx *ctx, + struct io_mapped_region *mr, + unsigned int nr_bufs, unsigned int buf_size) +{ + unsigned int nr_pages; + int ret; + + if (WARN_ON_ONCE(mr->pages || mr->ptr || mr->nr_pages)) + return -EFAULT; + + if (WARN_ON_ONCE(!nr_bufs || !buf_size || !PAGE_ALIGNED(buf_size))) + return -EINVAL; + + if (check_mul_overflow(buf_size >> PAGE_SHIFT, nr_bufs, &nr_pages)) + return -EINVAL; + + if (ctx->user) { + ret = __io_account_mem(ctx->user, nr_pages); + if (ret) + return ret; + } + mr->nr_pages = nr_pages; + + ret = io_region_allocate_pages_multi_buf(mr, nr_bufs, buf_size); + if (ret) + goto out_free; + + ret = io_region_init_ptr(mr); + if (ret) + goto out_free; + + return 0; +out_free: + io_free_region(ctx->user, mr); + return ret; +} + int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset) diff --git a/io_uring/memmap.h b/io_uring/memmap.h index f4cfbb6b9a1f4..3aa1167462aec 100644 --- a/io_uring/memmap.h +++ b/io_uring/memmap.h @@ -22,6 +22,10 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset); +int io_create_region_multi_buf(struct io_ring_ctx *ctx, + struct io_mapped_region *mr, + unsigned int nr_bufs, unsigned int buf_size); + static inline void *io_region_get_ptr(struct io_mapped_region *mr) { return mr->ptr; diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 57ad0085869a8..3ff9098573dbf 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -67,7 +67,7 @@ void io_msg_ring_cleanup(struct io_kiocb *req) static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) { - return target_ctx->task_complete; + return target_ctx->int_flags & IO_RING_F_TASK_COMPLETE; } static void io_msg_tw_complete(struct io_tw_req tw_req, io_tw_token_t tw) diff --git a/io_uring/net.c b/io_uring/net.c index d27adbe3f20be..30cd22c0b934b 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -375,10 +375,13 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) kmsg->msg.msg_namelen = addr_len; } if (sr->flags & IORING_RECVSEND_FIXED_BUF) { - if (sr->flags & IORING_SEND_VECTORIZED) - return -EINVAL; - req->flags |= REQ_F_IMPORT_BUFFER; - return 0; + if (!(sr->flags & IORING_SEND_VECTORIZED)) { + req->flags |= REQ_F_IMPORT_BUFFER; + return 0; + } + + kmsg->msg.msg_iter.nr_segs = sr->len; + return io_prep_reg_iovec(req, &kmsg->vec, sr->buf, sr->len); } if (req->flags & REQ_F_BUFFER_SELECT) return 0; @@ -396,6 +399,7 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe struct user_msghdr msg; int ret; + sr->flags |= IORING_SEND_VECTORIZED; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); if (unlikely(ret)) @@ -421,6 +425,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->done_io = 0; sr->len = READ_ONCE(sqe->len); + if (unlikely(sr->len < 0)) + return -EINVAL; sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~SENDMSG_FLAGS) return -EINVAL; @@ -791,6 +797,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); + if (unlikely(sr->len < 0)) + return -EINVAL; sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~RECVMSG_FLAGS) return -EINVAL; @@ -1329,11 +1337,12 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_ring_ctx *ctx = req->ctx; struct io_async_msghdr *iomsg; struct io_kiocb *notif; + u64 user_data; int ret; zc->done_io = 0; - if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) + if (unlikely(READ_ONCE(sqe->__pad2[0]))) return -EINVAL; /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ if (req->flags & REQ_F_CQE_SKIP) @@ -1342,7 +1351,11 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) notif = zc->notif = io_alloc_notif(ctx); if (!notif) return -ENOMEM; - notif->cqe.user_data = req->cqe.user_data; + user_data = READ_ONCE(sqe->addr3); + if (!user_data) + user_data = req->cqe.user_data; + + notif->cqe.user_data = user_data; notif->cqe.res = 0; notif->cqe.flags = IORING_CQE_F_NOTIF; req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; @@ -1366,7 +1379,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (zc->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; - if (io_is_compat(req->ctx)) + if (io_is_compat(ctx)) zc->msg_flags |= MSG_CMSG_COMPAT; iomsg = io_msg_alloc_async(req); @@ -1441,22 +1454,39 @@ static int io_sg_from_iter(struct sk_buff *skb, return ret; } -static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) +static int io_send_zc_import(struct io_kiocb *req, + struct io_async_msghdr *kmsg, + unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg = req->async_data; + struct io_kiocb *notif = sr->notif; + int ret; WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); - sr->notif->buf_index = req->buf_index; - return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, - (u64)(uintptr_t)sr->buf, sr->len, - ITER_SOURCE, issue_flags); + notif->buf_index = req->buf_index; + + if (!(sr->flags & IORING_SEND_VECTORIZED)) { + ret = io_import_reg_buf(notif, &kmsg->msg.msg_iter, + (u64)(uintptr_t)sr->buf, sr->len, + ITER_SOURCE, issue_flags); + } else { + unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; + + ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, + notif, &kmsg->vec, uvec_segs, + issue_flags); + } + + if (unlikely(ret)) + return ret; + req->flags &= ~REQ_F_IMPORT_BUFFER; + return 0; } -int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) +int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) { - struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *kmsg = req->async_data; struct socket *sock; unsigned msg_flags; @@ -1467,106 +1497,38 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) return -ENOTSOCK; if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) return -EOPNOTSUPP; - if (!(req->flags & REQ_F_POLLED) && - (zc->flags & IORING_RECVSEND_POLL_FIRST)) + (sr->flags & IORING_RECVSEND_POLL_FIRST)) return -EAGAIN; if (req->flags & REQ_F_IMPORT_BUFFER) { - req->flags &= ~REQ_F_IMPORT_BUFFER; - ret = io_send_zc_import(req, issue_flags); + ret = io_send_zc_import(req, kmsg, issue_flags); if (unlikely(ret)) return ret; } - msg_flags = zc->msg_flags; + msg_flags = sr->msg_flags; if (issue_flags & IO_URING_F_NONBLOCK) msg_flags |= MSG_DONTWAIT; if (msg_flags & MSG_WAITALL) min_ret = iov_iter_count(&kmsg->msg.msg_iter); - msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; - - kmsg->msg.msg_flags = msg_flags; - kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; - ret = sock_sendmsg(sock, &kmsg->msg); - - if (unlikely(ret < min_ret)) { - if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return -EAGAIN; - - if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { - zc->done_io += ret; - return -EAGAIN; - } - if (ret == -ERESTARTSYS) - ret = -EINTR; - req_set_fail(req); - } - - if (ret >= 0) - ret += zc->done_io; - else if (zc->done_io) - ret = zc->done_io; - - /* - * If we're in io-wq we can't rely on tw ordering guarantees, defer - * flushing notif to io_send_zc_cleanup() - */ - if (!(issue_flags & IO_URING_F_UNLOCKED)) { - io_notif_flush(zc->notif); - zc->notif = NULL; - io_req_msg_cleanup(req, 0); - } - io_req_set_res(req, ret, IORING_CQE_F_MORE); - return IOU_COMPLETE; -} - -int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg = req->async_data; - struct socket *sock; - unsigned flags; - int ret, min_ret = 0; - if (req->flags & REQ_F_IMPORT_BUFFER) { - unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; - int ret; + kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; - sr->notif->buf_index = req->buf_index; - ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, - sr->notif, &kmsg->vec, uvec_segs, - issue_flags); - if (unlikely(ret)) - return ret; - req->flags &= ~REQ_F_IMPORT_BUFFER; + if (req->opcode == IORING_OP_SEND_ZC) { + msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; + kmsg->msg.msg_flags = msg_flags; + ret = sock_sendmsg(sock, &kmsg->msg); + } else { + kmsg->msg.msg_control_user = sr->msg_control; + ret = __sys_sendmsg_sock(sock, &kmsg->msg, msg_flags); } - sock = sock_from_file(req->file); - if (unlikely(!sock)) - return -ENOTSOCK; - if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) - return -EOPNOTSUPP; - - if (!(req->flags & REQ_F_POLLED) && - (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return -EAGAIN; - - flags = sr->msg_flags; - if (issue_flags & IO_URING_F_NONBLOCK) - flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) - min_ret = iov_iter_count(&kmsg->msg.msg_iter); - - kmsg->msg.msg_control_user = sr->msg_control; - kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; - ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); - if (unlikely(ret < min_ret)) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) return -EAGAIN; - if (ret > 0 && io_net_retry(sock, flags)) { + if (ret > 0 && io_net_retry(sock, sr->msg_flags)) { sr->done_io += ret; return -EAGAIN; } diff --git a/io_uring/net.h b/io_uring/net.h index a862960a3bb99..d4d1ddce50e3a 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -50,7 +50,6 @@ void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_connect(struct io_kiocb *req, unsigned int issue_flags); -int io_send_zc(struct io_kiocb *req, unsigned int issue_flags); int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags); int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); void io_send_zc_cleanup(struct io_kiocb *req); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 91a23baf415e8..c3ef52b708113 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -67,7 +67,6 @@ const struct io_issue_def io_issue_defs[] = { .audit_skip = 1, .ioprio = 1, .iopoll = 1, - .iopoll_queue = 1, .vectored = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_readv, @@ -82,7 +81,6 @@ const struct io_issue_def io_issue_defs[] = { .audit_skip = 1, .ioprio = 1, .iopoll = 1, - .iopoll_queue = 1, .vectored = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_writev, @@ -102,7 +100,6 @@ const struct io_issue_def io_issue_defs[] = { .audit_skip = 1, .ioprio = 1, .iopoll = 1, - .iopoll_queue = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_read_fixed, .issue = io_read_fixed, @@ -116,7 +113,6 @@ const struct io_issue_def io_issue_defs[] = { .audit_skip = 1, .ioprio = 1, .iopoll = 1, - .iopoll_queue = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_write_fixed, .issue = io_write_fixed, @@ -250,7 +246,6 @@ const struct io_issue_def io_issue_defs[] = { .audit_skip = 1, .ioprio = 1, .iopoll = 1, - .iopoll_queue = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_read, .issue = io_read, @@ -264,7 +259,6 @@ const struct io_issue_def io_issue_defs[] = { .audit_skip = 1, .ioprio = 1, .iopoll = 1, - .iopoll_queue = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_write, .issue = io_write, @@ -423,7 +417,6 @@ const struct io_issue_def io_issue_defs[] = { .needs_file = 1, .plug = 1, .iopoll = 1, - .iopoll_queue = 1, .async_size = sizeof(struct io_async_cmd), .prep = io_uring_cmd_prep, .issue = io_uring_cmd, @@ -437,7 +430,7 @@ const struct io_issue_def io_issue_defs[] = { #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), .prep = io_send_zc_prep, - .issue = io_send_zc, + .issue = io_sendmsg_zc, #else .prep = io_eopnotsupp_prep, #endif @@ -556,7 +549,6 @@ const struct io_issue_def io_issue_defs[] = { .audit_skip = 1, .ioprio = 1, .iopoll = 1, - .iopoll_queue = 1, .vectored = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_readv_fixed, @@ -571,7 +563,6 @@ const struct io_issue_def io_issue_defs[] = { .audit_skip = 1, .ioprio = 1, .iopoll = 1, - .iopoll_queue = 1, .vectored = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_writev_fixed, @@ -593,7 +584,6 @@ const struct io_issue_def io_issue_defs[] = { .needs_file = 1, .plug = 1, .iopoll = 1, - .iopoll_queue = 1, .is_128 = 1, .async_size = sizeof(struct io_async_cmd), .prep = io_uring_cmd_prep, diff --git a/io_uring/opdef.h b/io_uring/opdef.h index faf3955dce8b9..667f981e63b05 100644 --- a/io_uring/opdef.h +++ b/io_uring/opdef.h @@ -25,8 +25,6 @@ struct io_issue_def { unsigned poll_exclusive : 1; /* skip auditing */ unsigned audit_skip : 1; - /* have to be put into the iopoll list */ - unsigned iopoll_queue : 1; /* vectored opcode, set if 1) vectored, and 2) handler needs to know */ unsigned vectored : 1; /* set to 1 if this opcode uses 128b sqes in a mixed sq */ diff --git a/io_uring/poll.c b/io_uring/poll.c index b671b84657d9d..74eef78841596 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -272,12 +272,15 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); v &= ~IO_POLL_RETRY_FLAG; } + v &= IO_POLL_REF_MASK; } /* the mask was stashed in __io_poll_execute */ if (!req->cqe.res) { - struct poll_table_struct pt = { ._key = req->apoll_events }; - req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; + __poll_t events = req->apoll_events; + struct poll_table_struct pt = { ._key = events }; + + req->cqe.res = vfs_poll(req->file, &pt) & events; /* * We got woken with a mask, but someone else got to * it first. The above vfs_poll() doesn't add us back @@ -286,7 +289,7 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) */ if (unlikely(!req->cqe.res)) { /* Multishot armed need not reissue */ - if (!(req->apoll_events & EPOLLONESHOT)) + if (!(events & EPOLLONESHOT)) continue; return IOU_POLL_REISSUE; } @@ -304,8 +307,13 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) return IOU_POLL_REMOVE_POLL_USE_RES; } } else { - int ret = io_poll_issue(req, tw); + int ret; + + /* multiple refs and HUP, ensure we loop once more */ + if ((req->cqe.res & (POLLHUP | POLLRDHUP)) && v != 1) + v--; + ret = io_poll_issue(req, tw); if (ret == IOU_COMPLETE) return IOU_POLL_REMOVE_POLL_USE_RES; else if (ret == IOU_REQUEUE) @@ -321,7 +329,6 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) * Release all references, retry if someone tried to restart * task_work while we were executing it. */ - v &= IO_POLL_REF_MASK; } while (atomic_sub_return(v, &req->poll_refs) & IO_POLL_REF_MASK); io_napi_add(req); diff --git a/io_uring/query.c b/io_uring/query.c index 63cc30c9803d1..c1704d088374c 100644 --- a/io_uring/query.c +++ b/io_uring/query.c @@ -34,12 +34,12 @@ static ssize_t io_query_zcrx(union io_query_data *data) { struct io_uring_query_zcrx *e = &data->zcrx; - e->register_flags = ZCRX_REG_IMPORT; + e->register_flags = ZCRX_SUPPORTED_REG_FLAGS; e->area_flags = IORING_ZCRX_AREA_DMABUF; e->nr_ctrl_opcodes = __ZCRX_CTRL_LAST; e->rq_hdr_size = sizeof(struct io_uring); e->rq_hdr_alignment = L1_CACHE_BYTES; - e->features = ZCRX_FEATURE_RX_PAGE_SIZE; + e->features = ZCRX_FEATURES; e->__resv2 = 0; return sizeof(*e); } diff --git a/io_uring/register.c b/io_uring/register.c index 6015a3e9ce692..96cdb356ea29c 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -178,15 +178,23 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx, return -EBUSY; ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions); - /* Reset all restrictions if an error happened */ + /* + * Reset all restrictions if an error happened, but retain any COW'ed + * settings. + */ if (ret < 0) { + struct io_bpf_filters *bpf = ctx->restrictions.bpf_filters; + bool cowed = ctx->restrictions.bpf_filters_cow; + memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); + ctx->restrictions.bpf_filters = bpf; + ctx->restrictions.bpf_filters_cow = cowed; return ret; } if (ctx->restrictions.op_registered) - ctx->op_restricted = 1; + ctx->int_flags |= IO_RING_F_OP_RESTRICTED; if (ctx->restrictions.reg_registered) - ctx->reg_restricted = 1; + ctx->int_flags |= IO_RING_F_REG_RESTRICTED; return 0; } @@ -202,7 +210,7 @@ static int io_register_restrictions_task(void __user *arg, unsigned int nr_args) return -EPERM; /* * Similar to seccomp, disallow setting a filter if task_no_new_privs - * is true and we're not CAP_SYS_ADMIN. + * is false and we're not CAP_SYS_ADMIN. */ if (!task_no_new_privs(current) && !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) @@ -238,7 +246,7 @@ static int io_register_bpf_filter_task(void __user *arg, unsigned int nr_args) /* * Similar to seccomp, disallow setting a filter if task_no_new_privs - * is true and we're not CAP_SYS_ADMIN. + * is false and we're not CAP_SYS_ADMIN. */ if (!task_no_new_privs(current) && !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) @@ -384,7 +392,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, for (i = 0; i < ARRAY_SIZE(new_count); i++) if (new_count[i]) ctx->iowq_limits[i] = new_count[i]; - ctx->iowq_limits_set = true; + ctx->int_flags |= IO_RING_F_IOWQ_LIMITS_SET; if (tctx && tctx->io_wq) { ret = io_wq_max_workers(tctx->io_wq, new_count); @@ -633,7 +641,15 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) ctx->sq_entries = p->sq_entries; ctx->cq_entries = p->cq_entries; + /* + * Just mark any flag we may have missed and that the application + * should act on unconditionally. Worst case it'll be an extra + * syscall. + */ + atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags); ctx->rings = n.rings; + rcu_assign_pointer(ctx->rings_rcu, n.rings); + ctx->sq_sqes = n.sq_sqes; swap_old(ctx, o, n, ring_region); swap_old(ctx, o, n, sq_region); @@ -642,6 +658,9 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) out: spin_unlock(&ctx->completion_lock); mutex_unlock(&ctx->mmap_lock); + /* Wait for concurrent io_ctx_mark_taskrun() */ + if (to_free == &o) + synchronize_rcu_expedited(); io_register_free_rings(ctx, to_free); if (ctx->sq_data) @@ -714,7 +733,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, if (ctx->submitter_task && ctx->submitter_task != current) return -EEXIST; - if (ctx->reg_restricted && !(ctx->flags & IORING_SETUP_R_DISABLED)) { + if ((ctx->int_flags & IO_RING_F_REG_RESTRICTED) && !(ctx->flags & IORING_SETUP_R_DISABLED)) { opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; @@ -889,7 +908,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ret = -EINVAL; if (!arg || nr_args != 1) break; - ret = io_register_zcrx_ifq(ctx, arg); + ret = io_register_zcrx(ctx, arg); break; case IORING_REGISTER_RESIZE_RINGS: ret = -EINVAL; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 4fa59bf89bba1..f1dd281ec733b 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -295,7 +295,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, u64 tag = 0; uvec = u64_to_user_ptr(user_data); - iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat); + iov = iovec_from_user(uvec, 1, 1, &fast_iov, io_is_compat(ctx)); if (IS_ERR(iov)) { err = PTR_ERR(iov); break; @@ -319,7 +319,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, i = array_index_nospec(up->offset + done, ctx->buf_table.nr); io_reset_rsrc_node(ctx, &ctx->buf_table, i); ctx->buf_table.nodes[i] = node; - if (ctx->compat) + if (io_is_compat(ctx)) user_data += sizeof(struct compat_iovec); else user_data += sizeof(struct iovec); @@ -883,12 +883,12 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, if (arg) { uvec = (struct iovec __user *) arg; - iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat); + iov = iovec_from_user(uvec, 1, 1, &fast_iov, io_is_compat(ctx)); if (IS_ERR(iov)) { ret = PTR_ERR(iov); break; } - if (ctx->compat) + if (io_is_compat(ctx)) arg += sizeof(struct compat_iovec); else arg += sizeof(struct iovec); @@ -961,7 +961,7 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, */ imu = io_alloc_imu(ctx, blk_rq_nr_phys_segments(rq)); if (!imu) { - kfree(node); + io_cache_free(&ctx->node_cache, node); ret = -ENOMEM; goto unlock; } @@ -1061,6 +1061,10 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, return ret; if (!(imu->dir & (1 << ddir))) return -EFAULT; + if (unlikely(!len)) { + iov_iter_bvec(iter, ddir, NULL, 0, 0); + return 0; + } offset = buf_addr - imu->ubuf; diff --git a/io_uring/rw.c b/io_uring/rw.c index 1a5f262734e8e..046f76a71b9c1 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -504,7 +504,7 @@ static bool io_rw_should_reissue(struct io_kiocb *req) if (!S_ISBLK(mode) && !S_ISREG(mode)) return false; if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && - !(ctx->flags & IORING_SETUP_IOPOLL))) + !(req->flags & REQ_F_IOPOLL))) return false; /* * If ref is dying, we might be running poll reap from the exit work. @@ -640,7 +640,7 @@ static inline void io_rw_done(struct io_kiocb *req, ssize_t ret) } } - if (req->ctx->flags & IORING_SETUP_IOPOLL) + if (req->flags & REQ_F_IOPOLL) io_complete_rw_iopoll(&rw->kiocb, ret); else io_complete_rw(&rw->kiocb, ret); @@ -654,7 +654,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, if (ret >= 0 && req->flags & REQ_F_CUR_POS) req->file->f_pos = rw->kiocb.ki_pos; - if (ret >= 0 && !(req->ctx->flags & IORING_SETUP_IOPOLL)) { + if (ret >= 0 && !(req->flags & REQ_F_IOPOLL)) { u32 cflags = 0; __io_complete_rw_common(req, ret); @@ -876,6 +876,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) if (ctx->flags & IORING_SETUP_IOPOLL) { if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) return -EOPNOTSUPP; + req->flags |= REQ_F_IOPOLL; kiocb->private = NULL; kiocb->ki_flags |= IOCB_HIPRI; req->iopoll_completed = 0; @@ -899,7 +900,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) * We have a union of meta fields with wpq used for buffered-io * in io_async_rw, so fail it here. */ - if (!(req->file->f_flags & O_DIRECT)) + if (!(file->f_flags & O_DIRECT)) return -EOPNOTSUPP; kiocb->ki_flags |= IOCB_HAS_METADATA; kiocb->private = &io->meta; @@ -963,7 +964,7 @@ static int __io_read(struct io_kiocb *req, struct io_br_sel *sel, if (io_file_can_poll(req)) return -EAGAIN; /* IOPOLL retry should happen for io-wq threads */ - if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) + if (!force_nonblock && !(req->flags & REQ_F_IOPOLL)) goto done; /* no retry on NONBLOCK nor RWF_NOWAIT */ if (req->flags & REQ_F_NOWAIT) @@ -1188,7 +1189,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) goto done; if (!force_nonblock || ret2 != -EAGAIN) { /* IOPOLL retry should happen for io-wq threads */ - if (ret2 == -EAGAIN && (req->ctx->flags & IORING_SETUP_IOPOLL)) + if (ret2 == -EAGAIN && (req->flags & REQ_F_IOPOLL)) goto ret_eagain; if (ret2 != req->cqe.res && ret2 >= 0 && need_complete_io(req)) { diff --git a/io_uring/tctx.c b/io_uring/tctx.c index 7cbcb82aedfb8..143de8e990eb3 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -121,7 +121,7 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) return ret; tctx = current->io_uring; - if (ctx->iowq_limits_set) { + if (ctx->int_flags & IO_RING_F_IOWQ_LIMITS_SET) { unsigned int limits[2] = { ctx->iowq_limits[0], ctx->iowq_limits[1], }; diff --git a/io_uring/timeout.c b/io_uring/timeout.c index cb61d4862fc65..579fdddac71ae 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -30,11 +30,30 @@ struct io_timeout_rem { u64 addr; /* timeout update */ - struct timespec64 ts; + ktime_t time; u32 flags; bool ltimeout; }; +static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) +{ + struct timespec64 ts; + + if (flags & IORING_TIMEOUT_IMMEDIATE_ARG) { + *time = ns_to_ktime(arg); + if (*time < 0) + return -EINVAL; + return 0; + } + + if (get_timespec64(&ts, u64_to_user_ptr(arg))) + return -EFAULT; + if (ts.tv_sec < 0 || ts.tv_nsec < 0) + return -EINVAL; + *time = timespec64_to_ktime(ts); + return 0; +} + static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req, struct io_kiocb *link); @@ -80,7 +99,7 @@ static void io_timeout_complete(struct io_tw_req tw_req, io_tw_token_t tw) /* re-arm timer */ raw_spin_lock_irq(&ctx->timeout_lock); list_add(&timeout->list, ctx->timeout_list.prev); - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); + hrtimer_start(&data->timer, data->time, data->mode); raw_spin_unlock_irq(&ctx->timeout_lock); return; } @@ -395,7 +414,7 @@ static clockid_t io_timeout_get_clock(struct io_timeout_data *data) } static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, - struct timespec64 *ts, enum hrtimer_mode mode) + ktime_t ts, enum hrtimer_mode mode) __must_hold(&ctx->timeout_lock) { struct io_timeout_data *io; @@ -417,12 +436,12 @@ static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, if (hrtimer_try_to_cancel(&io->timer) == -1) return -EALREADY; hrtimer_setup(&io->timer, io_link_timeout_fn, io_timeout_get_clock(io), mode); - hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); + hrtimer_start(&io->timer, ts, mode); return 0; } static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, - struct timespec64 *ts, enum hrtimer_mode mode) + ktime_t time, enum hrtimer_mode mode) __must_hold(&ctx->timeout_lock) { struct io_cancel_data cd = { .ctx = ctx, .data = user_data, }; @@ -435,20 +454,23 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, timeout->off = 0; /* noseq */ data = req->async_data; - data->ts = *ts; + data->time = time; list_add_tail(&timeout->list, &ctx->timeout_list); hrtimer_setup(&data->timer, io_timeout_fn, io_timeout_get_clock(data), mode); - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), mode); + hrtimer_start(&data->timer, data->time, mode); return 0; } int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem); + int ret; if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; + if (sqe->addr3 || sqe->__pad2[0]) + return -EINVAL; if (sqe->buf_index || sqe->len || sqe->splice_fd_in) return -EINVAL; @@ -460,12 +482,13 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) tr->ltimeout = true; - if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) - return -EINVAL; - if (get_timespec64(&tr->ts, u64_to_user_ptr(READ_ONCE(sqe->addr2)))) - return -EFAULT; - if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) + if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK | + IORING_TIMEOUT_ABS | + IORING_TIMEOUT_IMMEDIATE_ARG)) return -EINVAL; + ret = io_parse_user_time(&tr->time, READ_ONCE(sqe->addr2), tr->flags); + if (ret) + return ret; } else if (tr->flags) { /* timeout removal doesn't support flags */ return -EINVAL; @@ -500,9 +523,9 @@ int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) raw_spin_lock_irq(&ctx->timeout_lock); if (tr->ltimeout) - ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); + ret = io_linked_timeout_update(ctx, tr->addr, tr->time, mode); else - ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); + ret = io_timeout_update(ctx, tr->addr, tr->time, mode); raw_spin_unlock_irq(&ctx->timeout_lock); } @@ -520,7 +543,10 @@ static int __io_timeout_prep(struct io_kiocb *req, struct io_timeout_data *data; unsigned flags; u32 off = READ_ONCE(sqe->off); + int ret; + if (sqe->addr3 || sqe->__pad2[0]) + return -EINVAL; if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) return -EINVAL; if (off && is_timeout_link) @@ -528,7 +554,8 @@ static int __io_timeout_prep(struct io_kiocb *req, flags = READ_ONCE(sqe->timeout_flags); if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | IORING_TIMEOUT_ETIME_SUCCESS | - IORING_TIMEOUT_MULTISHOT)) + IORING_TIMEOUT_MULTISHOT | + IORING_TIMEOUT_IMMEDIATE_ARG)) return -EINVAL; /* more than one clock specified is invalid, obviously */ if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) @@ -539,8 +566,8 @@ static int __io_timeout_prep(struct io_kiocb *req, INIT_LIST_HEAD(&timeout->list); timeout->off = off; - if (unlikely(off && !req->ctx->off_timeout_used)) - req->ctx->off_timeout_used = true; + if (unlikely(off && !(req->ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED))) + req->ctx->int_flags |= IO_RING_F_OFF_TIMEOUT_USED; /* * for multishot reqs w/ fixed nr of repeats, repeats tracks the * remaining nr @@ -557,11 +584,9 @@ static int __io_timeout_prep(struct io_kiocb *req, data->req = req; data->flags = flags; - if (get_timespec64(&data->ts, u64_to_user_ptr(READ_ONCE(sqe->addr)))) - return -EFAULT; - - if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) - return -EINVAL; + ret = io_parse_user_time(&data->time, READ_ONCE(sqe->addr), flags); + if (ret) + return ret; data->mode = io_translate_timeout_mode(flags); @@ -637,7 +662,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags) } add: list_add(&timeout->list, entry); - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); + hrtimer_start(&data->timer, data->time, data->mode); raw_spin_unlock_irq(&ctx->timeout_lock); return IOU_ISSUE_SKIP_COMPLETE; } @@ -655,8 +680,7 @@ void io_queue_linked_timeout(struct io_kiocb *req) if (timeout->head) { struct io_timeout_data *data = req->async_data; - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), - data->mode); + hrtimer_start(&data->timer, data->time, data->mode); list_add_tail(&timeout->list, &ctx->ltimeout_list); } raw_spin_unlock_irq(&ctx->timeout_lock); diff --git a/io_uring/timeout.h b/io_uring/timeout.h index 2b7c9ad729925..1620f94dd45a8 100644 --- a/io_uring/timeout.h +++ b/io_uring/timeout.h @@ -3,7 +3,7 @@ struct io_timeout_data { struct io_kiocb *req; struct hrtimer timer; - struct timespec64 ts; + ktime_t time; enum hrtimer_mode mode; u32 flags; }; diff --git a/io_uring/tw.c b/io_uring/tw.c index 1ee2b8ab07c8b..fdff81eebc95c 100644 --- a/io_uring/tw.c +++ b/io_uring/tw.c @@ -152,6 +152,21 @@ void tctx_task_work(struct callback_head *cb) WARN_ON_ONCE(ret); } +/* + * Sets IORING_SQ_TASKRUN in the sq_flags shared with userspace, using the + * RCU protected rings pointer to be safe against concurrent ring resizing. + */ +static void io_ctx_mark_taskrun(struct io_ring_ctx *ctx) +{ + lockdep_assert_in_rcu_read_lock(); + + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) { + struct io_rings *rings = rcu_dereference(ctx->rings_rcu); + + atomic_or(IORING_SQ_TASKRUN, &rings->sq_flags); + } +} + void io_req_local_work_add(struct io_kiocb *req, unsigned flags) { struct io_ring_ctx *ctx = req->ctx; @@ -206,9 +221,8 @@ void io_req_local_work_add(struct io_kiocb *req, unsigned flags) */ if (!head) { - if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) - atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); - if (ctx->has_evfd) + io_ctx_mark_taskrun(ctx); + if (data_race(ctx->int_flags) & IO_RING_F_HAS_EVFD) io_eventfd_signal(ctx, false); } @@ -231,6 +245,10 @@ void io_req_normal_work_add(struct io_kiocb *req) if (!llist_add(&req->io_task_work.node, &tctx->task_list)) return; + /* + * Doesn't need to use ->rings_rcu, as resizing isn't supported for + * !DEFER_TASKRUN. + */ if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index ee7b49f47cb55..7c23f06e76432 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -110,7 +110,7 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, * because iopoll completion data overlaps with the hash_node used * for tracking. */ - if (ctx->flags & IORING_SETUP_IOPOLL) + if (req->flags & REQ_F_IOPOLL) return; if (!(cmd->flags & IORING_URING_CMD_CANCELABLE)) { @@ -151,6 +151,7 @@ void __io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, u64 res2, unsigned issue_flags, bool is_cqe32) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); + u32 cflags = 0; if (WARN_ON_ONCE(req->flags & REQ_F_APOLL_MULTISHOT)) return; @@ -160,14 +161,17 @@ void __io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, u64 res2, if (ret < 0) req_set_fail(req); - io_req_set_res(req, ret, 0); + if (req->flags & (REQ_F_BUFFER_SELECTED | REQ_F_BUFFER_RING)) + cflags |= IORING_CQE_F_BUFFER | + (req->buf_index << IORING_CQE_BUFFER_SHIFT); + io_req_set_res(req, ret, cflags); if (is_cqe32) { if (req->ctx->flags & IORING_SETUP_CQE_MIXED) req->cqe.flags |= IORING_CQE_F_32; io_req_set_cqe32_extra(req, res2, 0); } io_req_uring_cleanup(req, issue_flags); - if (req->ctx->flags & IORING_SETUP_IOPOLL) { + if (req->flags & REQ_F_IOPOLL) { /* order with io_iopoll_req_issued() checking ->iopoll_complete */ smp_store_release(&req->iopoll_completed, 1); } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) { @@ -257,9 +261,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) issue_flags |= IO_URING_F_CQE32; if (io_is_compat(ctx)) issue_flags |= IO_URING_F_COMPAT; - if (ctx->flags & IORING_SETUP_IOPOLL) { - if (!file->f_op->uring_cmd_iopoll) - return -EOPNOTSUPP; + if (ctx->flags & IORING_SETUP_IOPOLL && file->f_op->uring_cmd_iopoll) { + req->flags |= REQ_F_IOPOLL; issue_flags |= IO_URING_F_IOPOLL; req->iopoll_completed = 0; if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) { diff --git a/io_uring/wait.c b/io_uring/wait.c index 0581cadf20eee..c24d018d53abe 100644 --- a/io_uring/wait.c +++ b/io_uring/wait.c @@ -79,12 +79,15 @@ static enum hrtimer_restart io_cqring_min_timer_wakeup(struct hrtimer *timer) if (io_has_work(ctx)) goto out_wake; /* got events since we started waiting, min timeout is done */ - if (iowq->cq_min_tail != READ_ONCE(ctx->rings->cq.tail)) - goto out_wake; - /* if we have any events and min timeout expired, we're done */ - if (io_cqring_events(ctx)) - goto out_wake; + scoped_guard(rcu) { + struct io_rings *rings = io_get_rings(ctx); + if (iowq->cq_min_tail != READ_ONCE(rings->cq.tail)) + goto out_wake; + /* if we have any events and min timeout expired, we're done */ + if (io_cqring_events(ctx)) + goto out_wake; + } /* * If using deferred task_work running and application is waiting on * more than one request, ensure we reset it now where we are switching @@ -186,9 +189,9 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, struct ext_arg *ext_arg) { struct io_wait_queue iowq; - struct io_rings *rings = ctx->rings; + struct io_rings *rings; ktime_t start_time; - int ret; + int ret, nr_wait; min_events = min_t(int, min_events, ctx->cq_entries); @@ -201,15 +204,23 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, if (unlikely(test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))) io_cqring_do_overflow_flush(ctx); - if (__io_cqring_events_user(ctx) >= min_events) + + rcu_read_lock(); + rings = io_get_rings(ctx); + if (__io_cqring_events_user(ctx) >= min_events) { + rcu_read_unlock(); return 0; + } init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; INIT_LIST_HEAD(&iowq.wq.entry); iowq.ctx = ctx; - iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; - iowq.cq_min_tail = READ_ONCE(ctx->rings->cq.tail); + iowq.cq_tail = READ_ONCE(rings->cq.head) + min_events; + iowq.cq_min_tail = READ_ONCE(rings->cq.tail); + nr_wait = (int) iowq.cq_tail - READ_ONCE(rings->cq.tail); + rcu_read_unlock(); + rings = NULL; iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); iowq.hit_timeout = 0; iowq.min_timeout = ext_arg->min_time; @@ -240,14 +251,6 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, trace_io_uring_cqring_wait(ctx, min_events); do { unsigned long check_cq; - int nr_wait; - - /* if min timeout has been hit, don't reset wait count */ - if (!iowq.hit_timeout) - nr_wait = (int) iowq.cq_tail - - READ_ONCE(ctx->rings->cq.tail); - else - nr_wait = 1; if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { atomic_set(&ctx->cq_wait_nr, nr_wait); @@ -298,11 +301,20 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, break; } cond_resched(); + + /* if min timeout has been hit, don't reset wait count */ + if (!iowq.hit_timeout) + scoped_guard(rcu) + nr_wait = (int) iowq.cq_tail - + READ_ONCE(ctx->rings_rcu->cq.tail); + else + nr_wait = 1; } while (1); if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) finish_wait(&ctx->cq_wait, &iowq.wq); restore_saved_sigmask_unless(ret == -EINTR); - return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; + guard(rcu)(); + return READ_ONCE(ctx->rings_rcu->cq.head) == READ_ONCE(ctx->rings_rcu->cq.tail) ? ret : 0; } diff --git a/io_uring/wait.h b/io_uring/wait.h index 5e236f74e1aff..a4274b137f817 100644 --- a/io_uring/wait.h +++ b/io_uring/wait.h @@ -25,15 +25,19 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, struct ext_arg *ext_arg); int io_run_task_work_sig(struct io_ring_ctx *ctx); void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx); +void io_cqring_overflow_flush_locked(struct io_ring_ctx *ctx); static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) { - return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); + struct io_rings *rings = io_get_rings(ctx); + return ctx->cached_cq_tail - READ_ONCE(rings->cq.head); } static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx) { - return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head); + struct io_rings *rings = io_get_rings(ctx); + + return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head); } /* diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 62d693287457f..f94f74d0f5660 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -127,10 +127,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq, int dmabuf_fd = area_reg->dmabuf_fd; int i, ret; + if (!ifq->dev) + return -EINVAL; if (off) return -EINVAL; - if (WARN_ON_ONCE(!ifq->dev)) - return -EFAULT; if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) return -EINVAL; @@ -194,6 +194,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq, { struct page **pages; int nr_pages, ret; + bool mapped = false; if (area_reg->dmabuf_fd) return -EINVAL; @@ -207,22 +208,37 @@ static int io_import_umem(struct io_zcrx_ifq *ifq, ret = sg_alloc_table_from_pages(&mem->page_sg_table, pages, nr_pages, 0, (unsigned long)nr_pages << PAGE_SHIFT, GFP_KERNEL_ACCOUNT); - if (ret) { - unpin_user_pages(pages, nr_pages); - kvfree(pages); - return ret; + if (ret) + goto out_err; + + if (ifq->dev) { + ret = dma_map_sgtable(ifq->dev, &mem->page_sg_table, + DMA_FROM_DEVICE, IO_DMA_ATTR); + if (ret < 0) + goto out_err; + mapped = true; } mem->account_pages = io_count_account_pages(pages, nr_pages); ret = io_account_mem(ifq->user, ifq->mm_account, mem->account_pages); - if (ret < 0) + if (ret < 0) { mem->account_pages = 0; + goto out_err; + } mem->sgt = &mem->page_sg_table; mem->pages = pages; mem->nr_folios = nr_pages; mem->size = area_reg->len; return ret; +out_err: + if (mapped) + dma_unmap_sgtable(ifq->dev, &mem->page_sg_table, + DMA_FROM_DEVICE, IO_DMA_ATTR); + sg_free_table(&mem->page_sg_table); + unpin_user_pages(pages, nr_pages); + kvfree(pages); + return ret; } static void io_release_area_mem(struct io_zcrx_mem *mem) @@ -284,45 +300,23 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, } } -static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) -{ - int ret; - - guard(mutex)(&ifq->pp_lock); - if (area->is_mapped) - return 0; - - if (!area->mem.is_dmabuf) { - ret = dma_map_sgtable(ifq->dev, &area->mem.page_sg_table, - DMA_FROM_DEVICE, IO_DMA_ATTR); - if (ret < 0) - return ret; - } - - ret = io_populate_area_dma(ifq, area); - if (ret && !area->mem.is_dmabuf) - dma_unmap_sgtable(ifq->dev, &area->mem.page_sg_table, - DMA_FROM_DEVICE, IO_DMA_ATTR); - if (ret == 0) - area->is_mapped = true; - return ret; -} - -static void io_zcrx_sync_for_device(struct page_pool *pool, - struct net_iov *niov) +static void zcrx_sync_for_device(struct page_pool *pp, struct io_zcrx_ifq *zcrx, + netmem_ref *netmems, unsigned nr) { #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) + struct device *dev = pp->p.dev; + unsigned i, niov_size; dma_addr_t dma_addr; - unsigned niov_size; - - if (!dma_dev_need_sync(pool->p.dev)) + if (!dma_dev_need_sync(dev)) return; + niov_size = 1U << zcrx->niov_shift; - niov_size = 1U << io_pp_to_ifq(pool)->niov_shift; - dma_addr = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov)); - __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset, - niov_size, pool->p.dma_dir); + for (i = 0; i < nr; i++) { + dma_addr = page_pool_get_dma_addr_netmem(netmems[i]); + __dma_sync_single_for_device(dev, dma_addr + pp->p.offset, + niov_size, pp->p.dma_dir); + } #endif } @@ -392,22 +386,22 @@ static int io_allocate_rbuf_ring(struct io_ring_ctx *ctx, mmap_offset = IORING_MAP_OFF_ZCRX_REGION; mmap_offset += id << IORING_OFF_PBUF_SHIFT; - ret = io_create_region(ctx, &ifq->region, rd, mmap_offset); + ret = io_create_region(ctx, &ifq->rq_region, rd, mmap_offset); if (ret < 0) return ret; - ptr = io_region_get_ptr(&ifq->region); - ifq->rq_ring = (struct io_uring *)ptr; - ifq->rqes = (struct io_uring_zcrx_rqe *)(ptr + off); + ptr = io_region_get_ptr(&ifq->rq_region); + ifq->rq.ring = (struct io_uring *)ptr; + ifq->rq.rqes = (struct io_uring_zcrx_rqe *)(ptr + off); return 0; } static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq) { - io_free_region(ifq->user, &ifq->region); - ifq->rq_ring = NULL; - ifq->rqes = NULL; + io_free_region(ifq->user, &ifq->rq_region); + ifq->rq.ring = NULL; + ifq->rq.rqes = NULL; } static void io_zcrx_free_area(struct io_zcrx_ifq *ifq, @@ -429,8 +423,13 @@ static void io_zcrx_free_area(struct io_zcrx_ifq *ifq, static int io_zcrx_append_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) { - if (ifq->area) + bool kern_readable = !area->mem.is_dmabuf; + + if (WARN_ON_ONCE(ifq->area)) + return -EINVAL; + if (WARN_ON_ONCE(ifq->kern_readable != kern_readable)) return -EINVAL; + ifq->area = area; return 0; } @@ -460,6 +459,8 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, ret = io_import_area(ifq, &area->mem, area_reg); if (ret) goto err; + if (ifq->dev) + area->is_mapped = true; if (buf_size_shift > io_area_max_shift(&area->mem)) { ret = -ERANGE; @@ -495,6 +496,12 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, niov->type = NET_IOV_IOURING; } + if (ifq->dev) { + ret = io_populate_area_dma(ifq, area); + if (ret) + goto err; + } + area->free_count = nr_iovs; /* we're only supporting one area per ifq for now */ area->area_id = 0; @@ -519,7 +526,7 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) return NULL; ifq->if_rxq = -1; - spin_lock_init(&ifq->rq_lock); + spin_lock_init(&ifq->rq.lock); mutex_init(&ifq->pp_lock); refcount_set(&ifq->refs, 1); refcount_set(&ifq->user_refs, 1); @@ -586,9 +593,21 @@ static void io_zcrx_return_niov_freelist(struct net_iov *niov) { struct io_zcrx_area *area = io_zcrx_iov_to_area(niov); - spin_lock_bh(&area->freelist_lock); + guard(spinlock_bh)(&area->freelist_lock); area->freelist[area->free_count++] = net_iov_idx(niov); - spin_unlock_bh(&area->freelist_lock); +} + +static struct net_iov *zcrx_get_free_niov(struct io_zcrx_area *area) +{ + unsigned niov_idx; + + lockdep_assert_held(&area->freelist_lock); + + if (unlikely(!area->free_count)) + return NULL; + + niov_idx = area->freelist[--area->free_count]; + return &area->nia.niovs[niov_idx]; } static void io_zcrx_return_niov(struct net_iov *niov) @@ -624,12 +643,17 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq) } } -static void zcrx_unregister(struct io_zcrx_ifq *ifq) +static void zcrx_unregister_user(struct io_zcrx_ifq *ifq) { if (refcount_dec_and_test(&ifq->user_refs)) { io_close_queue(ifq); io_zcrx_scrub(ifq); } +} + +static void zcrx_unregister(struct io_zcrx_ifq *ifq) +{ + zcrx_unregister_user(ifq); io_put_zcrx_ifq(ifq); } @@ -640,7 +664,7 @@ struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx, lockdep_assert_held(&ctx->mmap_lock); - return ifq ? &ifq->region : NULL; + return ifq ? &ifq->rq_region : NULL; } static int zcrx_box_release(struct inode *inode, struct file *file) @@ -751,10 +775,50 @@ static int import_zcrx(struct io_ring_ctx *ctx, return ret; } -int io_register_zcrx_ifq(struct io_ring_ctx *ctx, - struct io_uring_zcrx_ifq_reg __user *arg) +static int zcrx_register_netdev(struct io_zcrx_ifq *ifq, + struct io_uring_zcrx_ifq_reg *reg, + struct io_uring_zcrx_area_reg *area) { struct pp_memory_provider_params mp_param = {}; + unsigned if_rxq = reg->if_rxq; + int ret; + + ifq->netdev = netdev_get_by_index_lock(current->nsproxy->net_ns, + reg->if_idx); + if (!ifq->netdev) + return -ENODEV; + + netdev_hold(ifq->netdev, &ifq->netdev_tracker, GFP_KERNEL); + + ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, if_rxq); + if (!ifq->dev) { + ret = -EOPNOTSUPP; + goto netdev_put_unlock; + } + get_device(ifq->dev); + + ret = io_zcrx_create_area(ifq, area, reg); + if (ret) + goto netdev_put_unlock; + + if (reg->rx_buf_len) + mp_param.rx_page_size = 1U << ifq->niov_shift; + mp_param.mp_ops = &io_uring_pp_zc_ops; + mp_param.mp_priv = ifq; + ret = __net_mp_open_rxq(ifq->netdev, if_rxq, &mp_param, NULL); + if (ret) + goto netdev_put_unlock; + + ifq->if_rxq = if_rxq; + ret = 0; +netdev_put_unlock: + netdev_unlock(ifq->netdev); + return ret; +} + +int io_register_zcrx(struct io_ring_ctx *ctx, + struct io_uring_zcrx_ifq_reg __user *arg) +{ struct io_uring_zcrx_area_reg area; struct io_uring_zcrx_ifq_reg reg; struct io_uring_region_desc rd; @@ -778,11 +842,15 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, return -EFAULT; if (!mem_is_zero(®.__resv, sizeof(reg.__resv)) || reg.zcrx_id) return -EINVAL; + if (reg.flags & ~ZCRX_SUPPORTED_REG_FLAGS) + return -EINVAL; if (reg.flags & ZCRX_REG_IMPORT) return import_zcrx(ctx, arg, ®); if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd))) return -EFAULT; - if (reg.if_rxq == -1 || !reg.rq_entries || reg.flags) + if (reg.if_rxq == -1 || !reg.rq_entries) + return -EINVAL; + if ((reg.if_rxq || reg.if_idx) && (reg.flags & ZCRX_REG_NODEV)) return -EINVAL; if (reg.rq_entries > IO_RQ_MAX_ENTRIES) { if (!(ctx->flags & IORING_SETUP_CLAMP)) @@ -806,7 +874,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, mmgrab(ctx->mm_account); ifq->mm_account = ctx->mm_account; } - ifq->rq_entries = reg.rq_entries; + ifq->rq.nr_entries = reg.rq_entries; scoped_guard(mutex, &ctx->mmap_lock) { /* preallocate id */ @@ -819,33 +887,17 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, if (ret) goto err; - ifq->netdev = netdev_get_by_index_lock(current->nsproxy->net_ns, reg.if_idx); - if (!ifq->netdev) { - ret = -ENODEV; - goto err; - } - netdev_hold(ifq->netdev, &ifq->netdev_tracker, GFP_KERNEL); + ifq->kern_readable = !(area.flags & IORING_ZCRX_AREA_DMABUF); - ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, reg.if_rxq); - if (!ifq->dev) { - ret = -EOPNOTSUPP; - goto netdev_put_unlock; + if (!(reg.flags & ZCRX_REG_NODEV)) { + ret = zcrx_register_netdev(ifq, ®, &area); + if (ret) + goto err; + } else { + ret = io_zcrx_create_area(ifq, &area, ®); + if (ret) + goto err; } - get_device(ifq->dev); - - ret = io_zcrx_create_area(ifq, &area, ®); - if (ret) - goto netdev_put_unlock; - - if (reg.rx_buf_len) - mp_param.rx_page_size = 1U << ifq->niov_shift; - mp_param.mp_ops = &io_uring_pp_zc_ops; - mp_param.mp_priv = ifq; - ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL); - if (ret) - goto netdev_put_unlock; - netdev_unlock(ifq->netdev); - ifq->if_rxq = reg.if_rxq; reg.zcrx_id = id; @@ -865,8 +917,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; } return 0; -netdev_put_unlock: - netdev_unlock(ifq->netdev); err: scoped_guard(mutex, &ctx->mmap_lock) xa_erase(&ctx->zcrx_ctxs, id); @@ -875,17 +925,37 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, return ret; } -static struct net_iov *__io_zcrx_get_free_niov(struct io_zcrx_area *area) +static inline bool is_zcrx_entry_marked(struct io_ring_ctx *ctx, unsigned long id) { - unsigned niov_idx; + return xa_get_mark(&ctx->zcrx_ctxs, id, XA_MARK_0); +} - lockdep_assert_held(&area->freelist_lock); +static inline void set_zcrx_entry_mark(struct io_ring_ctx *ctx, unsigned long id) +{ + xa_set_mark(&ctx->zcrx_ctxs, id, XA_MARK_0); +} - niov_idx = area->freelist[--area->free_count]; - return &area->nia.niovs[niov_idx]; +void io_terminate_zcrx(struct io_ring_ctx *ctx) +{ + struct io_zcrx_ifq *ifq; + unsigned long id = 0; + + lockdep_assert_held(&ctx->uring_lock); + + while (1) { + scoped_guard(mutex, &ctx->mmap_lock) + ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT); + if (!ifq) + break; + if (WARN_ON_ONCE(is_zcrx_entry_marked(ctx, id))) + break; + set_zcrx_entry_mark(ctx, id); + id++; + zcrx_unregister_user(ifq); + } } -void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) +void io_unregister_zcrx(struct io_ring_ctx *ctx) { struct io_zcrx_ifq *ifq; @@ -896,31 +966,35 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) unsigned long id = 0; ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT); - if (ifq) + if (ifq) { + if (WARN_ON_ONCE(!is_zcrx_entry_marked(ctx, id))) { + ifq = NULL; + break; + } xa_erase(&ctx->zcrx_ctxs, id); + } } if (!ifq) break; - zcrx_unregister(ifq); + io_put_zcrx_ifq(ifq); } xa_destroy(&ctx->zcrx_ctxs); } -static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq) +static inline u32 zcrx_rq_entries(struct zcrx_rq *rq) { u32 entries; - entries = smp_load_acquire(&ifq->rq_ring->tail) - ifq->cached_rq_head; - return min(entries, ifq->rq_entries); + entries = smp_load_acquire(&rq->ring->tail) - rq->cached_head; + return min(entries, rq->nr_entries); } -static struct io_uring_zcrx_rqe *io_zcrx_get_rqe(struct io_zcrx_ifq *ifq, - unsigned mask) +static struct io_uring_zcrx_rqe *zcrx_next_rqe(struct zcrx_rq *rq, unsigned mask) { - unsigned int idx = ifq->cached_rq_head++ & mask; + unsigned int idx = rq->cached_head++ & mask; - return &ifq->rqes[idx]; + return &rq->rqes[idx]; } static inline bool io_parse_rqe(struct io_uring_zcrx_rqe *rqe, @@ -946,21 +1020,24 @@ static inline bool io_parse_rqe(struct io_uring_zcrx_rqe *rqe, return true; } -static void io_zcrx_ring_refill(struct page_pool *pp, - struct io_zcrx_ifq *ifq) +static unsigned io_zcrx_ring_refill(struct page_pool *pp, + struct io_zcrx_ifq *ifq, + netmem_ref *netmems, unsigned to_alloc) { - unsigned int mask = ifq->rq_entries - 1; + struct zcrx_rq *rq = &ifq->rq; + unsigned int mask = rq->nr_entries - 1; unsigned int entries; + unsigned allocated = 0; - guard(spinlock_bh)(&ifq->rq_lock); + guard(spinlock_bh)(&rq->lock); - entries = io_zcrx_rqring_entries(ifq); - entries = min_t(unsigned, entries, PP_ALLOC_CACHE_REFILL); + entries = zcrx_rq_entries(rq); + entries = min_t(unsigned, entries, to_alloc); if (unlikely(!entries)) - return; + return 0; do { - struct io_uring_zcrx_rqe *rqe = io_zcrx_get_rqe(ifq, mask); + struct io_uring_zcrx_rqe *rqe = zcrx_next_rqe(rq, mask); struct net_iov *niov; netmem_ref netmem; @@ -978,46 +1055,56 @@ static void io_zcrx_ring_refill(struct page_pool *pp, continue; } - io_zcrx_sync_for_device(pp, niov); - net_mp_netmem_place_in_cache(pp, netmem); + netmems[allocated] = netmem; + allocated++; } while (--entries); - smp_store_release(&ifq->rq_ring->head, ifq->cached_rq_head); + smp_store_release(&rq->ring->head, rq->cached_head); + return allocated; } -static void io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq) +static unsigned io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq, + netmem_ref *netmems, unsigned to_alloc) { struct io_zcrx_area *area = ifq->area; + unsigned allocated = 0; - spin_lock_bh(&area->freelist_lock); - while (area->free_count && pp->alloc.count < PP_ALLOC_CACHE_REFILL) { - struct net_iov *niov = __io_zcrx_get_free_niov(area); - netmem_ref netmem = net_iov_to_netmem(niov); + guard(spinlock_bh)(&area->freelist_lock); + for (allocated = 0; allocated < to_alloc; allocated++) { + struct net_iov *niov = zcrx_get_free_niov(area); + + if (!niov) + break; net_mp_niov_set_page_pool(pp, niov); - io_zcrx_sync_for_device(pp, niov); - net_mp_netmem_place_in_cache(pp, netmem); + netmems[allocated] = net_iov_to_netmem(niov); } - spin_unlock_bh(&area->freelist_lock); + return allocated; } static netmem_ref io_pp_zc_alloc_netmems(struct page_pool *pp, gfp_t gfp) { struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); + netmem_ref *netmems = pp->alloc.cache; + unsigned to_alloc = PP_ALLOC_CACHE_REFILL; + unsigned allocated; /* pp should already be ensuring that */ - if (unlikely(pp->alloc.count)) - goto out_return; + if (WARN_ON_ONCE(pp->alloc.count)) + return 0; - io_zcrx_ring_refill(pp, ifq); - if (likely(pp->alloc.count)) + allocated = io_zcrx_ring_refill(pp, ifq, netmems, to_alloc); + if (likely(allocated)) goto out_return; - io_zcrx_refill_slow(pp, ifq); - if (!pp->alloc.count) + allocated = io_zcrx_refill_slow(pp, ifq, netmems, to_alloc); + if (!allocated) return 0; out_return: - return pp->alloc.cache[--pp->alloc.count]; + zcrx_sync_for_device(pp, ifq, netmems, allocated); + allocated--; + pp->alloc.count += allocated; + return netmems[allocated]; } static bool io_pp_zc_release_netmem(struct page_pool *pp, netmem_ref netmem) @@ -1036,7 +1123,6 @@ static bool io_pp_zc_release_netmem(struct page_pool *pp, netmem_ref netmem) static int io_pp_zc_init(struct page_pool *pp) { struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); - int ret; if (WARN_ON_ONCE(!ifq)) return -EINVAL; @@ -1049,10 +1135,6 @@ static int io_pp_zc_init(struct page_pool *pp) if (pp->p.dma_dir != DMA_FROM_DEVICE) return -EOPNOTSUPP; - ret = io_zcrx_map_area(ifq, ifq->area); - if (ret) - return ret; - refcount_inc(&ifq->refs); return 0; } @@ -1100,14 +1182,14 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = { }; static unsigned zcrx_parse_rq(netmem_ref *netmem_array, unsigned nr, - struct io_zcrx_ifq *zcrx) + struct io_zcrx_ifq *zcrx, struct zcrx_rq *rq) { - unsigned int mask = zcrx->rq_entries - 1; + unsigned int mask = rq->nr_entries - 1; unsigned int i; - nr = min(nr, io_zcrx_rqring_entries(zcrx)); + nr = min(nr, zcrx_rq_entries(rq)); for (i = 0; i < nr; i++) { - struct io_uring_zcrx_rqe *rqe = io_zcrx_get_rqe(zcrx, mask); + struct io_uring_zcrx_rqe *rqe = zcrx_next_rqe(rq, mask); struct net_iov *niov; if (!io_parse_rqe(rqe, zcrx, &niov)) @@ -1115,7 +1197,7 @@ static unsigned zcrx_parse_rq(netmem_ref *netmem_array, unsigned nr, netmem_array[i] = net_iov_to_netmem(niov); } - smp_store_release(&zcrx->rq_ring->head, zcrx->cached_rq_head); + smp_store_release(&rq->ring->head, rq->cached_head); return i; } @@ -1149,8 +1231,10 @@ static int zcrx_flush_rq(struct io_ring_ctx *ctx, struct io_zcrx_ifq *zcrx, return -EINVAL; do { - scoped_guard(spinlock_bh, &zcrx->rq_lock) { - nr = zcrx_parse_rq(netmems, ZCRX_FLUSH_BATCH, zcrx); + struct zcrx_rq *rq = &zcrx->rq; + + scoped_guard(spinlock_bh, &rq->lock) { + nr = zcrx_parse_rq(netmems, ZCRX_FLUSH_BATCH, zcrx, rq); zcrx_return_buffers(netmems, nr); } @@ -1159,7 +1243,7 @@ static int zcrx_flush_rq(struct io_ring_ctx *ctx, struct io_zcrx_ifq *zcrx, if (fatal_signal_pending(current)) break; cond_resched(); - } while (nr == ZCRX_FLUSH_BATCH && total < zcrx->rq_entries); + } while (nr == ZCRX_FLUSH_BATCH && total < zcrx->rq.nr_entries); return 0; } @@ -1169,6 +1253,8 @@ int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) struct zcrx_ctrl ctrl; struct io_zcrx_ifq *zcrx; + BUILD_BUG_ON(sizeof(ctrl.zc_export) != sizeof(ctrl.zc_flush)); + if (nr_args) return -EINVAL; if (copy_from_user(&ctrl, arg, sizeof(ctrl))) @@ -1221,13 +1307,11 @@ static struct net_iov *io_alloc_fallback_niov(struct io_zcrx_ifq *ifq) struct io_zcrx_area *area = ifq->area; struct net_iov *niov = NULL; - if (area->mem.is_dmabuf) + if (!ifq->kern_readable) return NULL; - spin_lock_bh(&area->freelist_lock); - if (area->free_count) - niov = __io_zcrx_get_free_niov(area); - spin_unlock_bh(&area->freelist_lock); + scoped_guard(spinlock_bh, &area->freelist_lock) + niov = zcrx_get_free_niov(area); if (niov) page_pool_fragment_netmem(net_iov_to_netmem(niov), 1); diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 32ab95b2cb811..75e0a4e6ef6e4 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -8,6 +8,9 @@ #include #include +#define ZCRX_SUPPORTED_REG_FLAGS (ZCRX_REG_IMPORT | ZCRX_REG_NODEV) +#define ZCRX_FEATURES (ZCRX_FEATURE_RX_PAGE_SIZE) + struct io_zcrx_mem { unsigned long size; bool is_dmabuf; @@ -38,17 +41,22 @@ struct io_zcrx_area { struct io_zcrx_mem mem; }; +struct zcrx_rq { + spinlock_t lock; + struct io_uring *ring; + struct io_uring_zcrx_rqe *rqes; + u32 cached_head; + u32 nr_entries; +}; + struct io_zcrx_ifq { struct io_zcrx_area *area; unsigned niov_shift; struct user_struct *user; struct mm_struct *mm_account; + bool kern_readable; - spinlock_t rq_lock ____cacheline_aligned_in_smp; - struct io_uring *rq_ring; - struct io_uring_zcrx_rqe *rqes; - u32 cached_rq_head; - u32 rq_entries; + struct zcrx_rq rq ____cacheline_aligned_in_smp; u32 if_rxq; struct device *dev; @@ -63,26 +71,30 @@ struct io_zcrx_ifq { * net stack. */ struct mutex pp_lock; - struct io_mapped_region region; + struct io_mapped_region rq_region; }; #if defined(CONFIG_IO_URING_ZCRX) int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_arg); -int io_register_zcrx_ifq(struct io_ring_ctx *ctx, +int io_register_zcrx(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg); -void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); +void io_unregister_zcrx(struct io_ring_ctx *ctx); +void io_terminate_zcrx(struct io_ring_ctx *ctx); int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq, struct socket *sock, unsigned int flags, unsigned issue_flags, unsigned int *len); struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx, unsigned int id); #else -static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx, - struct io_uring_zcrx_ifq_reg __user *arg) +static inline int io_register_zcrx(struct io_ring_ctx *ctx, + struct io_uring_zcrx_ifq_reg __user *arg) { return -EOPNOTSUPP; } -static inline void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) +static inline void io_unregister_zcrx(struct io_ring_ctx *ctx) +{ +} +static inline void io_terminate_zcrx(struct io_ring_ctx *ctx) { } static inline int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq, diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index be1d71dda3179..01fc2a93f3ef2 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5109,6 +5109,12 @@ static void css_task_iter_advance(struct css_task_iter *it) return; task = list_entry(it->task_pos, struct task_struct, cg_list); + /* + * Hide tasks that are exiting but not yet removed. Keep zombie + * leaders with live threads visible. + */ + if ((task->flags & PF_EXITING) && !atomic_read(&task->signal->live)) + goto repeat; if (it->flags & CSS_TASK_ITER_PROCS) { /* if PROCS, skip over tasks which aren't group leaders */ diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e200de7c60b62..d21868455341a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -879,7 +879,7 @@ static int generate_sched_domains(cpumask_var_t **domains, /* * Cgroup v2 doesn't support domain attributes, just set all of them * to SD_ATTR_INIT. Also non-isolating partition root CPUs are a - * subset of HK_TYPE_DOMAIN housekeeping CPUs. + * subset of HK_TYPE_DOMAIN_BOOT housekeeping CPUs. */ for (i = 0; i < ndoms; i++) { /* @@ -888,7 +888,7 @@ static int generate_sched_domains(cpumask_var_t **domains, */ if (!csa || csa[i] == &top_cpuset) cpumask_and(doms[i], top_cpuset.effective_cpus, - housekeeping_cpumask(HK_TYPE_DOMAIN)); + housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT)); else cpumask_copy(doms[i], csa[i]->effective_cpus); if (dattr) @@ -1329,17 +1329,22 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus) } /* - * update_hk_sched_domains - Update HK cpumasks & rebuild sched domains + * cpuset_update_sd_hk_unlock - Rebuild sched domains, update HK & unlock * - * Update housekeeping cpumasks and rebuild sched domains if necessary. - * This should be called at the end of cpuset or hotplug actions. + * Update housekeeping cpumasks and rebuild sched domains if necessary and + * then do a cpuset_full_unlock(). + * This should be called at the end of cpuset operation. */ -static void update_hk_sched_domains(void) +static void cpuset_update_sd_hk_unlock(void) + __releases(&cpuset_mutex) + __releases(&cpuset_top_mutex) { + /* force_sd_rebuild will be cleared in rebuild_sched_domains_locked() */ + if (force_sd_rebuild) + rebuild_sched_domains_locked(); + if (update_housekeeping) { - /* Updating HK cpumasks implies rebuild sched domains */ update_housekeeping = false; - force_sd_rebuild = true; cpumask_copy(isolated_hk_cpus, isolated_cpus); /* @@ -1350,22 +1355,19 @@ static void update_hk_sched_domains(void) mutex_unlock(&cpuset_mutex); cpus_read_unlock(); WARN_ON_ONCE(housekeeping_update(isolated_hk_cpus)); - cpus_read_lock(); - mutex_lock(&cpuset_mutex); + mutex_unlock(&cpuset_top_mutex); + } else { + cpuset_full_unlock(); } - /* force_sd_rebuild will be cleared in rebuild_sched_domains_locked() */ - if (force_sd_rebuild) - rebuild_sched_domains_locked(); } /* - * Work function to invoke update_hk_sched_domains() + * Work function to invoke cpuset_update_sd_hk_unlock() */ static void hk_sd_workfn(struct work_struct *work) { cpuset_full_lock(); - update_hk_sched_domains(); - cpuset_full_unlock(); + cpuset_update_sd_hk_unlock(); } /** @@ -3230,8 +3232,7 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of, free_cpuset(trialcs); out_unlock: - update_hk_sched_domains(); - cpuset_full_unlock(); + cpuset_update_sd_hk_unlock(); if (of_cft(of)->private == FILE_MEMLIST) schedule_flush_migrate_mm(); return retval ?: nbytes; @@ -3338,8 +3339,7 @@ static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf, cpuset_full_lock(); if (is_cpuset_online(cs)) retval = update_prstate(cs, val); - update_hk_sched_domains(); - cpuset_full_unlock(); + cpuset_update_sd_hk_unlock(); return retval ?: nbytes; } @@ -3513,8 +3513,7 @@ static void cpuset_css_killed(struct cgroup_subsys_state *css) /* Reset valid partition back to member */ if (is_partition_valid(cs)) update_prstate(cs, PRS_MEMBER); - update_hk_sched_domains(); - cpuset_full_unlock(); + cpuset_update_sd_hk_unlock(); } static void cpuset_css_free(struct cgroup_subsys_state *css) @@ -3923,11 +3922,13 @@ static void cpuset_handle_hotplug(void) rcu_read_unlock(); } - /* - * Queue a work to call housekeeping_update() & rebuild_sched_domains() - * There will be a slight delay before the HK_TYPE_DOMAIN housekeeping - * cpumask can correctly reflect what is in isolated_cpus. + * rebuild_sched_domains() will always be called directly if needed + * to make sure that newly added or removed CPU will be reflected in + * the sched domains. However, if isolated partition invalidation + * or recreation is being done (update_housekeeping set), a work item + * will be queued to call housekeeping_update() to update the + * corresponding housekeeping cpumasks after some slight delay. * * We rely on WORK_STRUCT_PENDING_BIT to not requeue a work item that * is still pending. Before the pending bit is cleared, the work data @@ -3936,8 +3937,10 @@ static void cpuset_handle_hotplug(void) * previously queued work. Since hk_sd_workfn() doesn't use the work * item at all, this is not a problem. */ - if (update_housekeeping || force_sd_rebuild) - queue_work(system_unbound_wq, &hk_sd_work); + if (force_sd_rebuild) + rebuild_sched_domains_cpuslocked(); + if (update_housekeeping) + queue_work(system_dfl_wq, &hk_sd_work); free_tmpmasks(ptmp); } diff --git a/kernel/fork.c b/kernel/fork.c index 65113a304518a..bc2bf58b93b65 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1000,6 +1000,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) #ifdef CONFIG_SCHED_MM_CID tsk->mm_cid.cid = MM_CID_UNSET; tsk->mm_cid.active = 0; + INIT_HLIST_NODE(&tsk->mm_cid.node); #endif return tsk; @@ -1586,7 +1587,6 @@ static int copy_mm(u64 clone_flags, struct task_struct *tsk) tsk->mm = mm; tsk->active_mm = mm; - sched_mm_cid_fork(tsk); return 0; } @@ -2498,7 +2498,6 @@ __latent_entropy struct task_struct *copy_process( exit_nsproxy_namespaces(p); bad_fork_cleanup_mm: if (p->mm) { - sched_mm_cid_exit(p); mm_clear_owner(p->mm, p); mmput(p->mm); } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ab25b4aa90953..bfc89083daa93 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1144,12 +1144,12 @@ static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, lockdep_assert_held(&kprobe_mutex); ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0); - if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret)) + if (ret < 0) return ret; if (*cnt == 0) { ret = register_ftrace_function(ops); - if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret)) { + if (ret < 0) { /* * At this point, sinec ops is not registered, we should be sefe from * registering empty filter. @@ -1178,6 +1178,10 @@ static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, int ret; lockdep_assert_held(&kprobe_mutex); + if (unlikely(kprobe_ftrace_disabled)) { + /* Now ftrace is disabled forever, disarm is already done. */ + return 0; + } if (*cnt == 1) { ret = unregister_ftrace_function(ops); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b7f77c165a6e0..496dff740dcaf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4729,8 +4729,11 @@ void sched_cancel_fork(struct task_struct *p) scx_cancel_fork(p); } +static void sched_mm_cid_fork(struct task_struct *t); + void sched_post_fork(struct task_struct *p) { + sched_mm_cid_fork(p); uclamp_post_fork(p); scx_post_fork(p); } @@ -10617,13 +10620,10 @@ static inline void mm_cid_transit_to_cpu(struct task_struct *t, struct mm_cid_pc } } -static bool mm_cid_fixup_task_to_cpu(struct task_struct *t, struct mm_struct *mm) +static void mm_cid_fixup_task_to_cpu(struct task_struct *t, struct mm_struct *mm) { /* Remote access to mm::mm_cid::pcpu requires rq_lock */ guard(task_rq_lock)(t); - /* If the task is not active it is not in the users count */ - if (!t->mm_cid.active) - return false; if (cid_on_task(t->mm_cid.cid)) { /* If running on the CPU, put the CID in transit mode, otherwise drop it */ if (task_rq(t)->curr == t) @@ -10631,69 +10631,43 @@ static bool mm_cid_fixup_task_to_cpu(struct task_struct *t, struct mm_struct *mm else mm_unset_cid_on_task(t); } - return true; } -static void mm_cid_do_fixup_tasks_to_cpus(struct mm_struct *mm) +static void mm_cid_fixup_tasks_to_cpus(void) { - struct task_struct *p, *t; - unsigned int users; + struct mm_struct *mm = current->mm; + struct task_struct *t; - /* - * This can obviously race with a concurrent affinity change, which - * increases the number of allowed CPUs for this mm, but that does - * not affect the mode and only changes the CID constraints. A - * possible switch back to per task mode happens either in the - * deferred handler function or in the next fork()/exit(). - * - * The caller has already transferred. The newly incoming task is - * already accounted for, but not yet visible. - */ - users = mm->mm_cid.users - 2; - if (!users) - return; + lockdep_assert_held(&mm->mm_cid.mutex); - guard(rcu)(); - for_other_threads(current, t) { - if (mm_cid_fixup_task_to_cpu(t, mm)) - users--; + hlist_for_each_entry(t, &mm->mm_cid.user_list, mm_cid.node) { + /* Current has already transferred before invoking the fixup. */ + if (t != current) + mm_cid_fixup_task_to_cpu(t, mm); } - if (!users) - return; - - /* Happens only for VM_CLONE processes. */ - for_each_process_thread(p, t) { - if (t == current || t->mm != mm) - continue; - if (mm_cid_fixup_task_to_cpu(t, mm)) { - if (--users == 0) - return; - } - } -} - -static void mm_cid_fixup_tasks_to_cpus(void) -{ - struct mm_struct *mm = current->mm; - - mm_cid_do_fixup_tasks_to_cpus(mm); mm_cid_complete_transit(mm, MM_CID_ONCPU); } static bool sched_mm_cid_add_user(struct task_struct *t, struct mm_struct *mm) { + lockdep_assert_held(&mm->mm_cid.lock); + t->mm_cid.active = 1; + hlist_add_head(&t->mm_cid.node, &mm->mm_cid.user_list); mm->mm_cid.users++; return mm_update_max_cids(mm); } -void sched_mm_cid_fork(struct task_struct *t) +static void sched_mm_cid_fork(struct task_struct *t) { struct mm_struct *mm = t->mm; bool percpu; - WARN_ON_ONCE(!mm || t->mm_cid.cid != MM_CID_UNSET); + if (!mm) + return; + + WARN_ON_ONCE(t->mm_cid.cid != MM_CID_UNSET); guard(mutex)(&mm->mm_cid.mutex); scoped_guard(raw_spinlock_irq, &mm->mm_cid.lock) { @@ -10732,12 +10706,13 @@ void sched_mm_cid_fork(struct task_struct *t) static bool sched_mm_cid_remove_user(struct task_struct *t) { + lockdep_assert_held(&t->mm->mm_cid.lock); + t->mm_cid.active = 0; - scoped_guard(preempt) { - /* Clear the transition bit */ - t->mm_cid.cid = cid_from_transit_cid(t->mm_cid.cid); - mm_unset_cid_on_task(t); - } + /* Clear the transition bit */ + t->mm_cid.cid = cid_from_transit_cid(t->mm_cid.cid); + mm_unset_cid_on_task(t); + hlist_del_init(&t->mm_cid.node); t->mm->mm_cid.users--; return mm_update_max_cids(t->mm); } @@ -10880,11 +10855,13 @@ void mm_init_cid(struct mm_struct *mm, struct task_struct *p) mutex_init(&mm->mm_cid.mutex); mm->mm_cid.irq_work = IRQ_WORK_INIT_HARD(mm_cid_irq_work); INIT_WORK(&mm->mm_cid.work, mm_cid_work_fn); + INIT_HLIST_HEAD(&mm->mm_cid.user_list); cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask); bitmap_zero(mm_cidmask(mm), num_possible_cpus()); } #else /* CONFIG_SCHED_MM_CID */ static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpumask *affmsk) { } +static inline void sched_mm_cid_fork(struct task_struct *t) { } #endif /* !CONFIG_SCHED_MM_CID */ static DEFINE_PER_CPU(struct sched_change_ctx, sched_change_ctx); diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 1594987d637b0..26a6ac2f88267 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1103,7 +1103,7 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq, } /* seq records the order tasks are queued, used by BPF DSQ iterator */ - dsq->seq++; + WRITE_ONCE(dsq->seq, dsq->seq + 1); p->scx.dsq_seq = dsq->seq; dsq_mod_nr(dsq, 1); @@ -1470,16 +1470,15 @@ static void clr_task_runnable(struct task_struct *p, bool reset_runnable_at) p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT; } -static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags) +static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int core_enq_flags) { struct scx_sched *sch = scx_root; int sticky_cpu = p->scx.sticky_cpu; + u64 enq_flags = core_enq_flags | rq->scx.extra_enq_flags; if (enq_flags & ENQUEUE_WAKEUP) rq->scx.flags |= SCX_RQ_IN_WAKEUP; - enq_flags |= rq->scx.extra_enq_flags; - if (sticky_cpu >= 0) p->scx.sticky_cpu = -1; @@ -3908,8 +3907,8 @@ static u32 bypass_lb_cpu(struct scx_sched *sch, struct rq *rq, * consider offloading iff the total queued duration is over the * threshold. */ - min_delta_us = scx_bypass_lb_intv_us / SCX_BYPASS_LB_MIN_DELTA_DIV; - if (delta < DIV_ROUND_UP(min_delta_us, scx_slice_bypass_us)) + min_delta_us = READ_ONCE(scx_bypass_lb_intv_us) / SCX_BYPASS_LB_MIN_DELTA_DIV; + if (delta < DIV_ROUND_UP(min_delta_us, READ_ONCE(scx_slice_bypass_us))) return 0; raw_spin_rq_lock_irq(rq); @@ -4137,7 +4136,7 @@ static void scx_bypass(bool bypass) WARN_ON_ONCE(scx_bypass_depth <= 0); if (scx_bypass_depth != 1) goto unlock; - WRITE_ONCE(scx_slice_dfl, scx_slice_bypass_us * NSEC_PER_USEC); + WRITE_ONCE(scx_slice_dfl, READ_ONCE(scx_slice_bypass_us) * NSEC_PER_USEC); bypass_timestamp = ktime_get_ns(); if (sch) scx_add_event(sch, SCX_EV_BYPASS_ACTIVATE, 1); @@ -5259,13 +5258,14 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (!READ_ONCE(helper)) { mutex_lock(&helper_mutex); if (!helper) { - helper = kthread_run_worker(0, "scx_enable_helper"); - if (IS_ERR_OR_NULL(helper)) { - helper = NULL; + struct kthread_worker *w = + kthread_run_worker(0, "scx_enable_helper"); + if (IS_ERR_OR_NULL(w)) { mutex_unlock(&helper_mutex); return -ENOMEM; } - sched_set_fifo(helper->task); + sched_set_fifo(w->task); + WRITE_ONCE(helper, w); } mutex_unlock(&helper_mutex); } diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h index 11ebb744d8931..00b450597f3e0 100644 --- a/kernel/sched/ext_internal.h +++ b/kernel/sched/ext_internal.h @@ -1035,26 +1035,108 @@ static const char *scx_enable_state_str[] = { }; /* - * sched_ext_entity->ops_state + * Task Ownership State Machine (sched_ext_entity->ops_state) * - * Used to track the task ownership between the SCX core and the BPF scheduler. - * State transitions look as follows: + * The sched_ext core uses this state machine to track task ownership + * between the SCX core and the BPF scheduler. This allows the BPF + * scheduler to dispatch tasks without strict ordering requirements, while + * the SCX core safely rejects invalid dispatches. * - * NONE -> QUEUEING -> QUEUED -> DISPATCHING - * ^ | | - * | v v - * \-------------------------------/ + * State Transitions * - * QUEUEING and DISPATCHING states can be waited upon. See wait_ops_state() call - * sites for explanations on the conditions being waited upon and why they are - * safe. Transitions out of them into NONE or QUEUED must store_release and the - * waiters should load_acquire. + * .------------> NONE (owned by SCX core) + * | | ^ + * | enqueue | | direct dispatch + * | v | + * | QUEUEING -------' + * | | + * | enqueue | + * | completes | + * | v + * | QUEUED (owned by BPF scheduler) + * | | + * | dispatch | + * | | + * | v + * | DISPATCHING + * | | + * | dispatch | + * | completes | + * `---------------' * - * Tracking scx_ops_state enables sched_ext core to reliably determine whether - * any given task can be dispatched by the BPF scheduler at all times and thus - * relaxes the requirements on the BPF scheduler. This allows the BPF scheduler - * to try to dispatch any task anytime regardless of its state as the SCX core - * can safely reject invalid dispatches. + * State Descriptions + * + * - %SCX_OPSS_NONE: + * Task is owned by the SCX core. It's either on a run queue, running, + * or being manipulated by the core scheduler. The BPF scheduler has no + * claim on this task. + * + * - %SCX_OPSS_QUEUEING: + * Transitional state while transferring a task from the SCX core to + * the BPF scheduler. The task's rq lock is held during this state. + * Since QUEUEING is both entered and exited under the rq lock, dequeue + * can never observe this state (it would be a BUG). When finishing a + * dispatch, if the task is still in %SCX_OPSS_QUEUEING the completion + * path busy-waits for it to leave this state (via wait_ops_state()) + * before retrying. + * + * - %SCX_OPSS_QUEUED: + * Task is owned by the BPF scheduler. It's on a DSQ (dispatch queue) + * and the BPF scheduler is responsible for dispatching it. A QSEQ + * (queue sequence number) is embedded in this state to detect + * dispatch/dequeue races: if a task is dequeued and re-enqueued, the + * QSEQ changes and any in-flight dispatch operations targeting the old + * QSEQ are safely ignored. + * + * - %SCX_OPSS_DISPATCHING: + * Transitional state while transferring a task from the BPF scheduler + * back to the SCX core. This state indicates the BPF scheduler has + * selected the task for execution. When dequeue needs to take the task + * off a DSQ and it is still in %SCX_OPSS_DISPATCHING, the dequeue path + * busy-waits for it to leave this state (via wait_ops_state()) before + * proceeding. Exits to %SCX_OPSS_NONE when dispatch completes. + * + * Memory Ordering + * + * Transitions out of %SCX_OPSS_QUEUEING and %SCX_OPSS_DISPATCHING into + * %SCX_OPSS_NONE or %SCX_OPSS_QUEUED must use atomic_long_set_release() + * and waiters must use atomic_long_read_acquire(). This ensures proper + * synchronization between concurrent operations. + * + * Cross-CPU Task Migration + * + * When moving a task in the %SCX_OPSS_DISPATCHING state, we can't simply + * grab the target CPU's rq lock because a concurrent dequeue might be + * waiting on %SCX_OPSS_DISPATCHING while holding the source rq lock + * (deadlock). + * + * The sched_ext core uses a "lock dancing" protocol coordinated by + * p->scx.holding_cpu. When moving a task to a different rq: + * + * 1. Verify task can be moved (CPU affinity, migration_disabled, etc.) + * 2. Set p->scx.holding_cpu to the current CPU + * 3. Set task state to %SCX_OPSS_NONE; dequeue waits while DISPATCHING + * is set, so clearing DISPATCHING first prevents the circular wait + * (safe to lock the rq we need) + * 4. Unlock the current CPU's rq + * 5. Lock src_rq (where the task currently lives) + * 6. Verify p->scx.holding_cpu == current CPU, if not, dequeue won the + * race (dequeue clears holding_cpu to -1 when it takes the task), in + * this case migration is aborted + * 7. If src_rq == dst_rq: clear holding_cpu and enqueue directly + * into dst_rq's local DSQ (no lock swap needed) + * 8. Otherwise: call move_remote_task_to_local_dsq(), which releases + * src_rq, locks dst_rq, and performs the deactivate/activate + * migration cycle (dst_rq is held on return) + * 9. Unlock dst_rq and re-lock the current CPU's rq to restore + * the lock state expected by the caller + * + * If any verification fails, abort the migration. + * + * This state tracking allows the BPF scheduler to try to dispatch any task + * at any time regardless of its state. The SCX core can safely + * reject/ignore invalid dispatches, simplifying the BPF scheduler + * implementation. */ enum scx_ops_state { SCX_OPSS_NONE, /* owned by the SCX core */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 3681b6ad9276f..b954491651227 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -221,7 +221,7 @@ static void cpuidle_idle_call(void) next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns); call_cpuidle(drv, dev, next_state); - } else { + } else if (drv->state_count > 1) { bool stop_tick = true; /* @@ -239,6 +239,15 @@ static void cpuidle_idle_call(void) * Give the governor an opportunity to reflect on the outcome */ cpuidle_reflect(dev, entered_state); + } else { + tick_nohz_idle_retain_tick(); + + /* + * If there is only a single idle state (or none), there is + * nothing meaningful for the governor to choose. Skip the + * governor and always use state 0. + */ + call_cpuidle(drv, dev, 0); } exit_idle: diff --git a/kernel/time/time.c b/kernel/time/time.c index 36fd2313ae7ee..0d832317d5766 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -697,7 +697,7 @@ EXPORT_SYMBOL(clock_t_to_jiffies); * * Return: jiffies_64 value converted to 64-bit "clock_t" (CLOCKS_PER_SEC) */ -u64 jiffies_64_to_clock_t(u64 x) +notrace u64 jiffies_64_to_clock_t(u64 x) { #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 # if HZ < USER_HZ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index aeaec79bc09c4..b77119d71641a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -190,7 +190,7 @@ struct worker_pool { int id; /* I: pool ID */ unsigned int flags; /* L: flags */ - unsigned long watchdog_ts; /* L: watchdog timestamp */ + unsigned long last_progress_ts; /* L: last forward progress timestamp */ bool cpu_stall; /* WD: stalled cpu bound pool */ /* @@ -1697,7 +1697,7 @@ static void __pwq_activate_work(struct pool_workqueue *pwq, WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE)); trace_workqueue_activate_work(work); if (list_empty(&pwq->pool->worklist)) - pwq->pool->watchdog_ts = jiffies; + pwq->pool->last_progress_ts = jiffies; move_linked_works(work, &pwq->pool->worklist, NULL); __clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb); } @@ -2348,7 +2348,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, */ if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) { if (list_empty(&pool->worklist)) - pool->watchdog_ts = jiffies; + pool->last_progress_ts = jiffies; trace_workqueue_activate_work(work); insert_work(pwq, work, &pool->worklist, work_flags); @@ -3204,6 +3204,7 @@ __acquires(&pool->lock) worker->current_pwq = pwq; if (worker->task) worker->current_at = worker->task->se.sum_exec_runtime; + worker->current_start = jiffies; work_data = *work_data_bits(work); worker->current_color = get_work_color(work_data); @@ -3352,7 +3353,7 @@ static void process_scheduled_works(struct worker *worker) while ((work = list_first_entry_or_null(&worker->scheduled, struct work_struct, entry))) { if (first) { - worker->pool->watchdog_ts = jiffies; + worker->pool->last_progress_ts = jiffies; first = false; } process_one_work(worker, work); @@ -4850,7 +4851,7 @@ static int init_worker_pool(struct worker_pool *pool) pool->cpu = -1; pool->node = NUMA_NO_NODE; pool->flags |= POOL_DISASSOCIATED; - pool->watchdog_ts = jiffies; + pool->last_progress_ts = jiffies; INIT_LIST_HEAD(&pool->worklist); INIT_LIST_HEAD(&pool->idle_list); hash_init(pool->busy_hash); @@ -6274,7 +6275,7 @@ static void pr_cont_worker_id(struct worker *worker) { struct worker_pool *pool = worker->pool; - if (pool->flags & WQ_BH) + if (pool->flags & POOL_BH) pr_cont("bh%s", pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); else @@ -6359,6 +6360,8 @@ static void show_pwq(struct pool_workqueue *pwq) pr_cont(" %s", comma ? "," : ""); pr_cont_worker_id(worker); pr_cont(":%ps", worker->current_func); + pr_cont(" for %us", + jiffies_to_msecs(jiffies - worker->current_start) / 1000); list_for_each_entry(work, &worker->scheduled, entry) pr_cont_work(false, work, &pcws); pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); @@ -6462,7 +6465,7 @@ static void show_one_worker_pool(struct worker_pool *pool) /* How long the first pending work is waiting for a worker. */ if (!list_empty(&pool->worklist)) - hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000; + hung = jiffies_to_msecs(jiffies - pool->last_progress_ts) / 1000; /* * Defer printing to avoid deadlocks in console drivers that @@ -7580,11 +7583,11 @@ MODULE_PARM_DESC(panic_on_stall_time, "Panic if stall exceeds this many seconds /* * Show workers that might prevent the processing of pending work items. - * The only candidates are CPU-bound workers in the running state. - * Pending work items should be handled by another idle worker - * in all other situations. + * A busy worker that is not running on the CPU (e.g. sleeping in + * wait_event_idle() with PF_WQ_WORKER cleared) can stall the pool just as + * effectively as a CPU-bound one, so dump every in-flight worker. */ -static void show_cpu_pool_hog(struct worker_pool *pool) +static void show_cpu_pool_busy_workers(struct worker_pool *pool) { struct worker *worker; unsigned long irq_flags; @@ -7593,36 +7596,34 @@ static void show_cpu_pool_hog(struct worker_pool *pool) raw_spin_lock_irqsave(&pool->lock, irq_flags); hash_for_each(pool->busy_hash, bkt, worker, hentry) { - if (task_is_running(worker->task)) { - /* - * Defer printing to avoid deadlocks in console - * drivers that queue work while holding locks - * also taken in their write paths. - */ - printk_deferred_enter(); + /* + * Defer printing to avoid deadlocks in console + * drivers that queue work while holding locks + * also taken in their write paths. + */ + printk_deferred_enter(); - pr_info("pool %d:\n", pool->id); - sched_show_task(worker->task); + pr_info("pool %d:\n", pool->id); + sched_show_task(worker->task); - printk_deferred_exit(); - } + printk_deferred_exit(); } raw_spin_unlock_irqrestore(&pool->lock, irq_flags); } -static void show_cpu_pools_hogs(void) +static void show_cpu_pools_busy_workers(void) { struct worker_pool *pool; int pi; - pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n"); + pr_info("Showing backtraces of busy workers in stalled worker pools:\n"); rcu_read_lock(); for_each_pool(pool, pi) { if (pool->cpu_stall) - show_cpu_pool_hog(pool); + show_cpu_pool_busy_workers(pool); } @@ -7691,7 +7692,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu)); else touched = READ_ONCE(wq_watchdog_touched); - pool_ts = READ_ONCE(pool->watchdog_ts); + pool_ts = READ_ONCE(pool->last_progress_ts); if (time_after(pool_ts, touched)) ts = pool_ts; @@ -7719,7 +7720,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) show_all_workqueues(); if (cpu_pool_stall) - show_cpu_pools_hogs(); + show_cpu_pools_busy_workers(); if (lockup_detected) panic_on_wq_watchdog(max_stall_time); diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index f6275944ada77..8def1ddc5a1bf 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -32,6 +32,7 @@ struct worker { work_func_t current_func; /* K: function */ struct pool_workqueue *current_pwq; /* K: pwq */ u64 current_at; /* K: runtime at start or last wakeup */ + unsigned long current_start; /* K: start time of current work item */ unsigned int current_color; /* K: color */ int sleeping; /* S: is worker sleeping? */ diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 449369a608461..2da049216fe0e 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -316,7 +316,7 @@ int __init xbc_node_compose_key_after(struct xbc_node *root, depth ? "." : ""); if (ret < 0) return ret; - if (ret > size) { + if (ret >= size) { size = 0; } else { size -= ret; @@ -532,9 +532,9 @@ static char *skip_spaces_until_newline(char *p) static int __init __xbc_open_brace(char *p) { /* Push the last key as open brace */ - open_brace[brace_index++] = xbc_node_index(last_parent); if (brace_index >= XBC_DEPTH_MAX) return xbc_parse_error("Exceed max depth of braces", p); + open_brace[brace_index++] = xbc_node_index(last_parent); return 0; } @@ -802,7 +802,7 @@ static int __init xbc_verify_tree(void) /* Brace closing */ if (brace_index) { - n = &xbc_nodes[open_brace[brace_index]]; + n = &xbc_nodes[open_brace[brace_index - 1]]; return xbc_parse_error("Brace is not closed", xbc_node_get_data(n)); } diff --git a/mm/cma.c b/mm/cma.c index 94b5da468a7d7..15cc0ae76c8eb 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -1013,6 +1013,7 @@ bool cma_release(struct cma *cma, const struct page *pages, unsigned long count) { struct cma_memrange *cmr; + unsigned long ret = 0; unsigned long i, pfn; cmr = find_cma_memrange(cma, pages, count); @@ -1021,7 +1022,9 @@ bool cma_release(struct cma *cma, const struct page *pages, pfn = page_to_pfn(pages); for (i = 0; i < count; i++, pfn++) - VM_WARN_ON(!put_page_testzero(pfn_to_page(pfn))); + ret += !put_page_testzero(pfn_to_page(pfn)); + + WARN(ret, "%lu pages are still in use!\n", ret); __cma_release_frozen(cma, cmr, pages, count); diff --git a/mm/damon/core.c b/mm/damon/core.c index adfc52fee9dc2..c1d1091d307e4 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1562,8 +1562,13 @@ int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control) } ctx->walk_control = control; mutex_unlock(&ctx->walk_control_lock); - if (!damon_is_running(ctx)) + if (!damon_is_running(ctx)) { + mutex_lock(&ctx->walk_control_lock); + if (ctx->walk_control == control) + ctx->walk_control = NULL; + mutex_unlock(&ctx->walk_control_lock); return -EINVAL; + } wait_for_completion(&control->completion); if (control->canceled) return -ECANCELED; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8e2746ea74adf..912c248a3f7e1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3631,6 +3631,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, const bool is_anon = folio_test_anon(folio); int old_order = folio_order(folio); int start_order = split_type == SPLIT_TYPE_UNIFORM ? new_order : old_order - 1; + struct folio *old_folio = folio; int split_order; /* @@ -3651,12 +3652,16 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, * uniform split has xas_split_alloc() called before * irq is disabled to allocate enough memory, whereas * non-uniform split can handle ENOMEM. + * Use the to-be-split folio, so that a parallel + * folio_try_get() waits on it until xarray is updated + * with after-split folios and the original one is + * unfrozen. */ - if (split_type == SPLIT_TYPE_UNIFORM) - xas_split(xas, folio, old_order); - else { + if (split_type == SPLIT_TYPE_UNIFORM) { + xas_split(xas, old_folio, old_order); + } else { xas_set_order(xas, folio->index, split_order); - xas_try_split(xas, folio, old_order); + xas_try_split(xas, old_folio, old_order); if (xas_error(xas)) return xas_error(xas); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0beb6e22bc269..327eaa4074d39 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3101,7 +3101,7 @@ static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact) * extract the actual node first. */ if (m) - listnode = early_pfn_to_nid(PHYS_PFN(virt_to_phys(m))); + listnode = early_pfn_to_nid(PHYS_PFN(__pa(m))); } if (m) { @@ -3160,7 +3160,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) * The head struct page is used to get folio information by the HugeTLB * subsystem like zone id and node id. */ - memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE), + memblock_reserved_mark_noinit(__pa((void *)m + PAGE_SIZE), huge_page_size(h) - PAGE_SIZE); return 1; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a52da3a5e4fd9..772bac21d1558 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3086,7 +3086,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, if (!local_trylock(&obj_stock.lock)) { if (pgdat) - mod_objcg_mlstate(objcg, pgdat, idx, nr_bytes); + mod_objcg_mlstate(objcg, pgdat, idx, nr_acct); nr_pages = nr_bytes >> PAGE_SHIFT; nr_bytes = nr_bytes & (PAGE_SIZE - 1); atomic_add(nr_bytes, &objcg->nr_charged_bytes); diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index e485b828d173f..b8edb9f981d7f 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c @@ -146,19 +146,56 @@ static int memfd_luo_preserve_folios(struct file *file, for (i = 0; i < nr_folios; i++) { struct memfd_luo_folio_ser *pfolio = &folios_ser[i]; struct folio *folio = folios[i]; - unsigned int flags = 0; err = kho_preserve_folio(folio); if (err) goto err_unpreserve; - if (folio_test_dirty(folio)) - flags |= MEMFD_LUO_FOLIO_DIRTY; - if (folio_test_uptodate(folio)) - flags |= MEMFD_LUO_FOLIO_UPTODATE; + folio_lock(folio); + + /* + * A dirty folio is one which has been written to. A clean folio + * is its opposite. Since a clean folio does not carry user + * data, it can be freed by page reclaim under memory pressure. + * + * Saving the dirty flag at prepare() time doesn't work since it + * can change later. Saving it at freeze() also won't work + * because the dirty bit is normally synced at unmap and there + * might still be a mapping of the file at freeze(). + * + * To see why this is a problem, say a folio is clean at + * preserve, but gets dirtied later. The pfolio flags will mark + * it as clean. After retrieve, the next kernel might try to + * reclaim this folio under memory pressure, losing user data. + * + * Unconditionally mark it dirty to avoid this problem. This + * comes at the cost of making clean folios un-reclaimable after + * live update. + */ + folio_mark_dirty(folio); + + /* + * If the folio is not uptodate, it was fallocated but never + * used. Saving this flag at prepare() doesn't work since it + * might change later when someone uses the folio. + * + * Since we have taken the performance penalty of allocating, + * zeroing, and pinning all the folios in the holes, take a bit + * more and zero all non-uptodate folios too. + * + * NOTE: For someone looking to improve preserve performance, + * this is a good place to look. + */ + if (!folio_test_uptodate(folio)) { + folio_zero_range(folio, 0, folio_size(folio)); + flush_dcache_folio(folio); + folio_mark_uptodate(folio); + } + + folio_unlock(folio); pfolio->pfn = folio_pfn(folio); - pfolio->flags = flags; + pfolio->flags = MEMFD_LUO_FOLIO_DIRTY | MEMFD_LUO_FOLIO_UPTODATE; pfolio->index = folio->index; } diff --git a/mm/slub.c b/mm/slub.c index 20cb4f3b636db..2b2d33cc735cb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2119,13 +2119,6 @@ static inline size_t obj_exts_alloc_size(struct kmem_cache *s, size_t sz = sizeof(struct slabobj_ext) * slab->objects; struct kmem_cache *obj_exts_cache; - /* - * slabobj_ext array for KMALLOC_CGROUP allocations - * are served from KMALLOC_NORMAL caches. - */ - if (!mem_alloc_profiling_enabled()) - return sz; - if (sz > KMALLOC_MAX_CACHE_SIZE) return sz; @@ -2797,6 +2790,7 @@ static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf) if (s->flags & SLAB_KMALLOC) mark_obj_codetag_empty(sheaf); + VM_WARN_ON_ONCE(sheaf->size > 0); kfree(sheaf); stat(s, SHEAF_FREE); @@ -2828,6 +2822,7 @@ static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf, return 0; } +static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf); static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) { @@ -2837,6 +2832,7 @@ static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) return NULL; if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) { + sheaf_flush_unused(s, sheaf); free_empty_sheaf(s, sheaf); return NULL; } @@ -4623,6 +4619,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, * we must be very low on memory so don't bother * with the barn */ + sheaf_flush_unused(s, empty); free_empty_sheaf(s, empty); } } else { diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 343c841784ce5..901b93530b214 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -205,9 +205,9 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, s32 result; u64 global_id; void *payload, *payload_end; - int payload_len; + u32 payload_len; char *result_msg; - int result_msg_len; + u32 result_msg_len; int ret = -EINVAL; mutex_lock(&ac->mutex); @@ -217,10 +217,12 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, result = ceph_decode_32(&p); global_id = ceph_decode_64(&p); payload_len = ceph_decode_32(&p); + ceph_decode_need(&p, end, payload_len, bad); payload = p; p += payload_len; ceph_decode_need(&p, end, sizeof(u32), bad); result_msg_len = ceph_decode_32(&p); + ceph_decode_need(&p, end, result_msg_len, bad); result_msg = p; p += result_msg_len; if (p != end) diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 5ec3272cd2dd1..50f65820f623f 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -392,7 +392,7 @@ static int head_onwire_len(int ctrl_len, bool secure) int head_len; int rem_len; - BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN); + BUG_ON(ctrl_len < 1 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN); if (secure) { head_len = CEPH_PREAMBLE_SECURE_LEN; @@ -401,9 +401,7 @@ static int head_onwire_len(int ctrl_len, bool secure) head_len += padded_len(rem_len) + CEPH_GCM_TAG_LEN; } } else { - head_len = CEPH_PREAMBLE_PLAIN_LEN; - if (ctrl_len) - head_len += ctrl_len + CEPH_CRC_LEN; + head_len = CEPH_PREAMBLE_PLAIN_LEN + ctrl_len + CEPH_CRC_LEN; } return head_len; } @@ -528,11 +526,16 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc) desc->fd_aligns[i] = ceph_decode_16(&p); } - if (desc->fd_lens[0] < 0 || + /* + * This would fire for FRAME_TAG_WAIT (it has one empty + * segment), but we should never get it as client. + */ + if (desc->fd_lens[0] < 1 || desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) { pr_err("bad control segment length %d\n", desc->fd_lens[0]); return -EINVAL; } + if (desc->fd_lens[1] < 0 || desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) { pr_err("bad front segment length %d\n", desc->fd_lens[1]); @@ -549,10 +552,6 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc) return -EINVAL; } - /* - * This would fire for FRAME_TAG_WAIT (it has one empty - * segment), but we should never get it as client. - */ if (!desc->fd_lens[desc->fd_seg_cnt - 1]) { pr_err("last segment empty, segment count %d\n", desc->fd_seg_cnt); @@ -2833,12 +2832,15 @@ static int process_message_header(struct ceph_connection *con, void *p, void *end) { struct ceph_frame_desc *desc = &con->v2.in_desc; - struct ceph_msg_header2 *hdr2 = p; + struct ceph_msg_header2 *hdr2; struct ceph_msg_header hdr; int skip; int ret; u64 seq; + ceph_decode_need(&p, end, sizeof(*hdr2), bad); + hdr2 = p; + /* verify seq# */ seq = le64_to_cpu(hdr2->seq); if ((s64)seq - (s64)con->in_seq < 1) { @@ -2869,6 +2871,10 @@ static int process_message_header(struct ceph_connection *con, WARN_ON(!con->in_msg); WARN_ON(con->in_msg->con != con); return 1; + +bad: + pr_err("failed to decode message header\n"); + return -EINVAL; } static int process_message(struct ceph_connection *con) @@ -2898,6 +2904,11 @@ static int __handle_control(struct ceph_connection *con, void *p) if (con->v2.in_desc.fd_tag != FRAME_TAG_MESSAGE) return process_control(con, p, end); + if (con->state != CEPH_CON_S_OPEN) { + con->error_msg = "protocol error, unexpected message"; + return -EINVAL; + } + ret = process_message_header(con, p, end); if (ret < 0) return ret; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 5136b3766c44d..d5080530ce0cc 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -72,8 +72,8 @@ static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2) struct ceph_monmap *monmap = NULL; struct ceph_fsid fsid; u32 struct_len; - int blob_len; - int num_mon; + u32 blob_len; + u32 num_mon; u8 struct_v; u32 epoch; int ret; @@ -112,7 +112,7 @@ static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2) } ceph_decode_32_safe(p, end, num_mon, e_inval); - dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch, + dout("%s fsid %pU epoch %u num_mon %u\n", __func__, &fsid, epoch, num_mon); if (num_mon > CEPH_MAX_MON) goto e_inval; diff --git a/net/core/dev.h b/net/core/dev.h index 98793a738f43c..781619e76b3e8 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -366,41 +366,6 @@ static inline void napi_assert_will_not_race(const struct napi_struct *napi) void kick_defer_list_purge(unsigned int cpu); -#define XMIT_RECURSION_LIMIT 8 - -#ifndef CONFIG_PREEMPT_RT -static inline bool dev_xmit_recursion(void) -{ - return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > - XMIT_RECURSION_LIMIT); -} - -static inline void dev_xmit_recursion_inc(void) -{ - __this_cpu_inc(softnet_data.xmit.recursion); -} - -static inline void dev_xmit_recursion_dec(void) -{ - __this_cpu_dec(softnet_data.xmit.recursion); -} -#else -static inline bool dev_xmit_recursion(void) -{ - return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT); -} - -static inline void dev_xmit_recursion_inc(void) -{ - current->net_xmit.recursion++; -} - -static inline void dev_xmit_recursion_dec(void) -{ - current->net_xmit.recursion--; -} -#endif - int dev_set_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack); diff --git a/net/core/filter.c b/net/core/filter.c index a77d23fe23597..78b548158fb05 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2228,6 +2228,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, return -ENOMEM; } + if (unlikely(!ipv6_mod_enabled())) + goto out_drop; + rcu_read_lock(); if (!nh) { dst = skb_dst(skb); @@ -2335,6 +2338,10 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); } else if (nh->nh_family == AF_INET6) { + if (unlikely(!ipv6_mod_enabled())) { + rcu_read_unlock(); + goto out_drop; + } neigh = ip_neigh_gw6(dev, &nh->ipv6_nh); is_v6gw = true; } else if (nh->nh_family == AF_INET) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a95cfe77f7f00..c56a4e7bf790c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -820,7 +820,8 @@ int pneigh_create(struct neigh_table *tbl, struct net *net, update: WRITE_ONCE(n->flags, flags); n->permanent = permanent; - WRITE_ONCE(n->protocol, protocol); + if (protocol) + WRITE_ONCE(n->protocol, protocol); out: mutex_unlock(&tbl->phash_lock); return err; diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index c82a95beceff8..ee5060d8eec0e 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -245,7 +245,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, goto err_cancel; if (pool->user.detach_time && nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME, - pool->user.detach_time)) + ktime_divns(pool->user.detach_time, NSEC_PER_SEC))) goto err_cancel; if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL)) @@ -337,7 +337,7 @@ int page_pool_list(struct page_pool *pool) void page_pool_detached(struct page_pool *pool) { mutex_lock(&page_pools_lock); - pool->user.detach_time = ktime_get_boottime_seconds(); + pool->user.detach_time = ktime_get_boottime(); netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF); mutex_unlock(&page_pools_lock); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8036e76aa1e47..c7731e300a442 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -124,6 +124,12 @@ #include +/* Keep the definition of IPv6 disable here for now, to avoid annoying linker + * issues in case IPv6=m + */ +int disable_ipv6_mod; +EXPORT_SYMBOL(disable_ipv6_mod); + /* The inetsw table contains everything that inet_create needs to * build a new socket. */ diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 2e61ac1371289..5683c328990f4 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -58,6 +58,19 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, struct iphdr *iph; int err; + if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) { + if (dev) { + net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); + DEV_STATS_INC(dev, tx_errors); + } + ip_rt_put(rt); + kfree_skb(skb); + return; + } + + dev_xmit_recursion_inc(); + skb_scrub_packet(skb, xnet); skb_clear_hash_if_not_l4(skb); @@ -88,6 +101,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, pkt_len = 0; iptunnel_xmit_stats(dev, pkt_len); } + + dev_xmit_recursion_dec(); } EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 1aa2b05ee8de8..c942f12822363 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2002,7 +2002,8 @@ static void nh_hthr_group_rebalance(struct nh_group *nhg) } static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, - struct nl_info *nlinfo) + struct nl_info *nlinfo, + struct list_head *deferred_free) { struct nh_grp_entry *nhges, *new_nhges; struct nexthop *nhp = nhge->nh_parent; @@ -2062,8 +2063,8 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, rcu_assign_pointer(nhp->nh_grp, newg); list_del(&nhge->nh_list); - free_percpu(nhge->stats); nexthop_put(nhge->nh); + list_add(&nhge->nh_list, deferred_free); /* Removal of a NH from a resilient group is notified through * bucket notifications. @@ -2083,6 +2084,7 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, struct nl_info *nlinfo) { struct nh_grp_entry *nhge, *tmp; + LIST_HEAD(deferred_free); /* If there is nothing to do, let's avoid the costly call to * synchronize_net() @@ -2091,10 +2093,16 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, return; list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) - remove_nh_grp_entry(net, nhge, nlinfo); + remove_nh_grp_entry(net, nhge, nlinfo, &deferred_free); /* make sure all see the newly published array before releasing rtnl */ synchronize_net(); + + /* Now safe to free percpu stats — all RCU readers have finished */ + list_for_each_entry_safe(nhge, tmp, &deferred_free, nh_list) { + list_del(&nhge->nh_list); + free_percpu(nhge->stats); + } } static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 23cc9b4cb2f12..4cbd45b68088a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -86,8 +86,6 @@ struct ipv6_params ipv6_defaults = { .autoconf = 1, }; -static int disable_ipv6_mod; - module_param_named(disable, disable_ipv6_mod, int, 0444); MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional"); @@ -97,12 +95,6 @@ MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces"); module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444); MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces"); -bool ipv6_mod_enabled(void) -{ - return disable_ipv6_mod == 0; -} -EXPORT_SYMBOL_GPL(ipv6_mod_enabled); - static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->ipv6_pinfo_offset; diff --git a/net/mctp/route.c b/net/mctp/route.c index 0381377ab7604..59ad60b885631 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -359,6 +359,7 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) { struct mctp_sk_key *key; struct mctp_flow *flow; + unsigned long flags; flow = skb_ext_find(skb, SKB_EXT_MCTP); if (!flow) @@ -366,12 +367,14 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) key = flow->key; - if (key->dev) { + spin_lock_irqsave(&key->lock, flags); + + if (!key->dev) + mctp_dev_set_key(dev, key); + else WARN_ON(key->dev != dev); - return; - } - mctp_dev_set_key(dev, key); + spin_unlock_irqrestore(&key->lock, flags); } #else static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {} diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index 62fb1031763d1..040a31557201b 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -224,7 +224,8 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) if (!nah) { netdev_warn(ndp->ndev.dev, "Invalid AEN (0x%x) received\n", h->type); - return -ENOENT; + ret = -ENOENT; + goto out; } ret = ncsi_validate_aen_pkt(h, nah->payload); diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 271ec6c3929e8..fbd84bc8026a3 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1176,8 +1176,10 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, /* Find the NCSI device */ nd = ncsi_find_dev(orig_dev); ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL; - if (!ndp) - return -ENODEV; + if (!ndp) { + ret = -ENODEV; + goto err_free_skb; + } /* Check if it is AEN packet */ hdr = (struct ncsi_pkt_hdr *)skb_network_header(skb); @@ -1199,7 +1201,8 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, if (!nrh) { netdev_err(nd->dev, "Received unrecognized packet (0x%x)\n", hdr->type); - return -ENOENT; + ret = -ENOENT; + goto err_free_skb; } /* Associate with the request */ @@ -1207,7 +1210,8 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, nr = &ndp->requests[hdr->id]; if (!nr->used) { spin_unlock_irqrestore(&ndp->lock, flags); - return -ENODEV; + ret = -ENODEV; + goto err_free_skb; } nr->rsp = skb; @@ -1261,4 +1265,8 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, out: ncsi_free_request(nr); return ret; + +err_free_skb: + kfree_skb(skb); + return ret; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1862bd7fe804a..dacec5f8a11c4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -829,7 +829,6 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, nft_set_elem_change_active(ctx->net, set, ext); nft_setelem_data_deactivate(ctx->net, set, catchall->elem); - break; } } @@ -5873,7 +5872,6 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx, nft_clear(ctx->net, ext); nft_setelem_data_activate(ctx->net, set, catchall->elem); - break; } } @@ -9688,7 +9686,7 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev, break; case NETDEV_REGISTER: /* NOP if not matching or already registered */ - if (!match || (changename && ops)) + if (!match || ops) continue; ops = kzalloc_obj(struct nf_hook_ops, diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d658b1478fa05..d545fa4594558 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -601,10 +601,10 @@ nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) goto out; } } - } - if (cb->args[1]) { - cb->args[1] = 0; - goto restart; + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } } out: rcu_read_unlock(); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7f5248b5f1ee0..47f7f62906e21 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1546,8 +1546,10 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, if (entry->state.pf == PF_BRIDGE) { err = nfqa_parse_bridge(entry, nfqa); - if (err < 0) + if (err < 0) { + nfqnl_reinject(entry, NF_DROP); return err; + } } if (nfqa[NFQA_PAYLOAD]) { diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index b16185e9a6dd7..041426e3bdbf1 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -344,7 +344,7 @@ static int nft_netdev_event(unsigned long event, struct net_device *dev, break; case NETDEV_REGISTER: /* NOP if not matching or already registered */ - if (!match || (changename && ops)) + if (!match || ops) continue; ops = kmemdup(&basechain->ops, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index a34632ae60480..7fd24e0cc4287 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1640,6 +1640,7 @@ static void pipapo_drop(struct nft_pipapo_match *m, int i; nft_pipapo_for_each_field(f, i, m) { + bool last = i == m->field_count - 1; int g; for (g = 0; g < f->groups; g++) { @@ -1659,7 +1660,7 @@ static void pipapo_drop(struct nft_pipapo_match *m, } pipapo_unmap(f->mt, f->rules, rulemap[i].to, rulemap[i].n, - rulemap[i + 1].n, i == m->field_count - 1); + last ? 0 : rulemap[i + 1].n, last); if (pipapo_resize(f, f->rules, f->rules - rulemap[i].n)) { /* We can ignore this, a failure to shrink tables down * doesn't make tables invalid. diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 5d93e225d0f8f..517106165ad2a 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -318,6 +318,12 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { + if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n"); + mutex_unlock(&list_mutex); + return -EINVAL; + } + info->timer->refcnt++; mod_timer(&info->timer->timer, secs_to_jiffies(info->timeout) + jiffies); diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index e5a13ecbe67a0..037ab93e25d0a 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -62,10 +62,10 @@ dccp_find_option(u_int8_t option, return true; } - if (op[i] < 2) + if (op[i] < 2 || i == optlen - 1) i++; else - i += op[i+1]?:1; + i += op[i + 1] ? : 1; } spin_unlock_bh(&dccp_buflock); diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index e8991130a3de0..f76cf18f1a244 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -59,8 +59,10 @@ tcp_find_option(u_int8_t option, for (i = 0; i < optlen; ) { if (op[i] == option) return !invert; - if (op[i] < 2) i++; - else i += op[i+1]?:1; + if (op[i] < 2 || i == optlen - 1) + i++; + else + i += op[i + 1] ? : 1; } return invert; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0c2c68c4b07e4..0f90272ac254b 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -267,12 +267,13 @@ static int rxrpc_listen(struct socket *sock, int backlog) * Lookup or create a remote transport endpoint record for the specified * address. * - * Return: The peer record found with a reference, %NULL if no record is found - * or a negative error code if the address is invalid or unsupported. + * Return: The peer record found with a reference or a negative error code if + * the address is invalid or unsupported. */ struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, struct sockaddr_rxrpc *srx, gfp_t gfp) { + struct rxrpc_peer *peer; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); int ret; @@ -280,7 +281,8 @@ struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, if (ret < 0) return ERR_PTR(ret); - return rxrpc_lookup_peer(rx->local, srx, gfp); + peer = rxrpc_lookup_peer(rx->local, srx, gfp); + return peer ?: ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(rxrpc_kernel_lookup_peer); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6e4bdaa876ed6..783300d8b0197 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -315,6 +315,7 @@ static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); + skb->dev = slave; if (!netif_xmit_frozen_or_stopped(slave_txq) && netdev_start_xmit(skb, slave, slave_txq, false) == NETDEV_TX_OK) { diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 005bfc766e22d..3fd6629cb9992 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -759,11 +759,7 @@ int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) if (ret) goto free_msg; - ret = genlmsg_reply(msg, info); - if (ret) - goto free_msg; - - return 0; + return genlmsg_reply(msg, info); free_msg: nlmsg_free(msg); @@ -1313,10 +1309,7 @@ int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info) if (ret) goto free_msg; - ret = genlmsg_reply(msg, info); - if (ret) - goto free_msg; - return 0; + return genlmsg_reply(msg, info); free_msg: nlmsg_free(msg); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 15bbf953dfadc..b51a162885bbc 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1362,7 +1362,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed) needed += RPCRDMA_MAX_RECV_BATCH; if (atomic_inc_return(&ep->re_receiving) > 1) - goto out; + goto out_dec; /* fast path: all needed reps can be found on the free list */ wr = NULL; @@ -1385,7 +1385,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed) ++count; } if (!wr) - goto out; + goto out_dec; rc = ib_post_recv(ep->re_id->qp, wr, (const struct ib_recv_wr **)&bad_wr); @@ -1400,9 +1400,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed) --count; } } + +out_dec: if (atomic_dec_return(&ep->re_receiving) > 0) complete(&ep->re_done); - out: trace_xprtrdma_post_recvs(r_xprt, count); ep->re_receive_count += count; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4c618c2b871db..9329919fb07f0 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2233,6 +2233,8 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, if (skb_queue_empty(&sk->sk_write_queue)) break; get_random_bytes(&delay, 2); + if (tsk->conn_timeout < 4) + tsk->conn_timeout = 4; delay %= (tsk->conn_timeout / 4); delay = msecs_to_jiffies(delay + 100); sk_reset_timer(sk, &sk->sk_timer, jiffies + delay); diff --git a/rust/Makefile b/rust/Makefile index 629b3bdd2b206..9801af2e1e027 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -148,7 +148,8 @@ doctests_modifiers_workaround := $(rustdoc_modifiers_workaround)$(if $(call rust quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $< cmd_rustdoc = \ OBJTREE=$(abspath $(objtree)) \ - $(RUSTDOC) $(filter-out $(skip_flags) --remap-path-prefix=%,$(if $(rustdoc_host),$(rust_common_flags),$(rust_flags))) \ + $(RUSTDOC) $(filter-out $(skip_flags) --remap-path-prefix=% --remap-path-scope=%, \ + $(if $(rustdoc_host),$(rust_common_flags),$(rust_flags))) \ $(rustc_target_flags) -L$(objtree)/$(obj) \ -Zunstable-options --generate-link-to-definition \ --output $(rustdoc_output) \ @@ -334,7 +335,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $< rm -rf $(objtree)/$(obj)/test/doctests/kernel; \ mkdir -p $(objtree)/$(obj)/test/doctests/kernel; \ OBJTREE=$(abspath $(objtree)) \ - $(RUSTDOC) --test $(filter-out --remap-path-prefix=%,$(rust_flags)) \ + $(RUSTDOC) --test $(filter-out --remap-path-prefix=% --remap-path-scope=%,$(rust_flags)) \ -L$(objtree)/$(obj) --extern ffi --extern pin_init \ --extern kernel --extern build_error --extern macros \ --extern bindings --extern uapi \ @@ -526,11 +527,9 @@ quiet_cmd_rustc_procmacrolibrary = $(RUSTC_OR_CLIPPY_QUIET) PL $@ cmd_rustc_procmacrolibrary = \ $(if $(skip_clippy),$(RUSTC),$(RUSTC_OR_CLIPPY)) \ $(filter-out $(skip_flags),$(rust_common_flags) $(rustc_target_flags)) \ - --emit=dep-info,link --crate-type rlib -O \ + --emit=dep-info=$(depfile) --emit=link=$@ --crate-type rlib -O \ --out-dir $(objtree)/$(obj) -L$(objtree)/$(obj) \ - --crate-name $(patsubst lib%.rlib,%,$(notdir $@)) $<; \ - mv $(objtree)/$(obj)/$(patsubst lib%.rlib,%,$(notdir $@)).d $(depfile); \ - sed -i '/^\#/d' $(depfile) + --crate-name $(patsubst lib%.rlib,%,$(notdir $@)) $< $(obj)/libproc_macro2.rlib: private skip_clippy = 1 $(obj)/libproc_macro2.rlib: private rustc_target_flags = $(proc_macro2-flags) diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 76faa1ac85017..f5adee48d40cb 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -401,6 +401,7 @@ impl TableBuilder { /// ``` /// use kernel::cpufreq::{DEFAULT_TRANSITION_LATENCY_NS, Policy}; /// +/// #[allow(clippy::double_parens, reason = "False positive before 1.92.0")] /// fn update_policy(policy: &mut Policy) { /// policy /// .set_dvfs_possible_from_any_cpu(true) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 909d56fd5118e..a396f84357394 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -461,6 +461,19 @@ impl CoherentAllocation { self.count * core::mem::size_of::() } + /// Returns the raw pointer to the allocated region in the CPU's virtual address space. + #[inline] + pub fn as_ptr(&self) -> *const [T] { + core::ptr::slice_from_raw_parts(self.cpu_addr.as_ptr(), self.count) + } + + /// Returns the raw pointer to the allocated region in the CPU's virtual address space as + /// a mutable pointer. + #[inline] + pub fn as_mut_ptr(&self) -> *mut [T] { + core::ptr::slice_from_raw_parts_mut(self.cpu_addr.as_ptr(), self.count) + } + /// Returns the base address to the allocated region in the CPU's virtual address space. pub fn start_ptr(&self) -> *const T { self.cpu_addr.as_ptr() @@ -581,23 +594,6 @@ impl CoherentAllocation { Ok(()) } - /// Returns a pointer to an element from the region with bounds checking. `offset` is in - /// units of `T`, not the number of bytes. - /// - /// Public but hidden since it should only be used from [`dma_read`] and [`dma_write`] macros. - #[doc(hidden)] - pub fn item_from_index(&self, offset: usize) -> Result<*mut T> { - if offset >= self.count { - return Err(EINVAL); - } - // SAFETY: - // - The pointer is valid due to type invariant on `CoherentAllocation` - // and we've just checked that the range and index is within bounds. - // - `offset` can't overflow since it is smaller than `self.count` and we've checked - // that `self.count` won't overflow early in the constructor. - Ok(unsafe { self.cpu_addr.as_ptr().add(offset) }) - } - /// Reads the value of `field` and ensures that its type is [`FromBytes`]. /// /// # Safety @@ -670,6 +666,9 @@ unsafe impl Send for CoherentAllocation {} /// Reads a field of an item from an allocated region of structs. /// +/// The syntax is of the form `kernel::dma_read!(dma, proj)` where `dma` is an expression evaluating +/// to a [`CoherentAllocation`] and `proj` is a [projection specification](kernel::ptr::project!). +/// /// # Examples /// /// ``` @@ -684,36 +683,29 @@ unsafe impl Send for CoherentAllocation {} /// unsafe impl kernel::transmute::AsBytes for MyStruct{}; /// /// # fn test(alloc: &kernel::dma::CoherentAllocation) -> Result { -/// let whole = kernel::dma_read!(alloc[2]); -/// let field = kernel::dma_read!(alloc[1].field); +/// let whole = kernel::dma_read!(alloc, [2]?); +/// let field = kernel::dma_read!(alloc, [1]?.field); /// # Ok::<(), Error>(()) } /// ``` #[macro_export] macro_rules! dma_read { - ($dma:expr, $idx: expr, $($field:tt)*) => {{ - (|| -> ::core::result::Result<_, $crate::error::Error> { - let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; - // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be - // dereferenced. The compiler also further validates the expression on whether `field` - // is a member of `item` when expanded by the macro. - unsafe { - let ptr_field = ::core::ptr::addr_of!((*item) $($field)*); - ::core::result::Result::Ok( - $crate::dma::CoherentAllocation::field_read(&$dma, ptr_field) - ) - } - })() + ($dma:expr, $($proj:tt)*) => {{ + let dma = &$dma; + let ptr = $crate::ptr::project!( + $crate::dma::CoherentAllocation::as_ptr(dma), $($proj)* + ); + // SAFETY: The pointer created by the projection is within the DMA region. + unsafe { $crate::dma::CoherentAllocation::field_read(dma, ptr) } }}; - ($dma:ident [ $idx:expr ] $($field:tt)* ) => { - $crate::dma_read!($dma, $idx, $($field)*) - }; - ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => { - $crate::dma_read!($($dma).*, $idx, $($field)*) - }; } /// Writes to a field of an item from an allocated region of structs. /// +/// The syntax is of the form `kernel::dma_write!(dma, proj, val)` where `dma` is an expression +/// evaluating to a [`CoherentAllocation`], `proj` is a +/// [projection specification](kernel::ptr::project!), and `val` is the value to be written to the +/// projected location. +/// /// # Examples /// /// ``` @@ -728,37 +720,31 @@ macro_rules! dma_read { /// unsafe impl kernel::transmute::AsBytes for MyStruct{}; /// /// # fn test(alloc: &kernel::dma::CoherentAllocation) -> Result { -/// kernel::dma_write!(alloc[2].member = 0xf); -/// kernel::dma_write!(alloc[1] = MyStruct { member: 0xf }); +/// kernel::dma_write!(alloc, [2]?.member, 0xf); +/// kernel::dma_write!(alloc, [1]?, MyStruct { member: 0xf }); /// # Ok::<(), Error>(()) } /// ``` #[macro_export] macro_rules! dma_write { - ($dma:ident [ $idx:expr ] $($field:tt)*) => {{ - $crate::dma_write!($dma, $idx, $($field)*) - }}; - ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => {{ - $crate::dma_write!($($dma).*, $idx, $($field)*) + (@parse [$dma:expr] [$($proj:tt)*] [, $val:expr]) => {{ + let dma = &$dma; + let ptr = $crate::ptr::project!( + mut $crate::dma::CoherentAllocation::as_mut_ptr(dma), $($proj)* + ); + let val = $val; + // SAFETY: The pointer created by the projection is within the DMA region. + unsafe { $crate::dma::CoherentAllocation::field_write(dma, ptr, val) } }}; - ($dma:expr, $idx: expr, = $val:expr) => { - (|| -> ::core::result::Result<_, $crate::error::Error> { - let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; - // SAFETY: `item_from_index` ensures that `item` is always a valid item. - unsafe { $crate::dma::CoherentAllocation::field_write(&$dma, item, $val) } - ::core::result::Result::Ok(()) - })() + (@parse [$dma:expr] [$($proj:tt)*] [.$field:tt $($rest:tt)*]) => { + $crate::dma_write!(@parse [$dma] [$($proj)* .$field] [$($rest)*]) + }; + (@parse [$dma:expr] [$($proj:tt)*] [[$index:expr]? $($rest:tt)*]) => { + $crate::dma_write!(@parse [$dma] [$($proj)* [$index]?] [$($rest)*]) + }; + (@parse [$dma:expr] [$($proj:tt)*] [[$index:expr] $($rest:tt)*]) => { + $crate::dma_write!(@parse [$dma] [$($proj)* [$index]] [$($rest)*]) }; - ($dma:expr, $idx: expr, $(.$field:ident)* = $val:expr) => { - (|| -> ::core::result::Result<_, $crate::error::Error> { - let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; - // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be - // dereferenced. The compiler also further validates the expression on whether `field` - // is a member of `item` when expanded by the macro. - unsafe { - let ptr_field = ::core::ptr::addr_of_mut!((*item) $(.$field)*); - $crate::dma::CoherentAllocation::field_write(&$dma, ptr_field, $val) - } - ::core::result::Result::Ok(()) - })() + ($dma:expr, $($rest:tt)*) => { + $crate::dma_write!(@parse [$dma] [] [$($rest)*]) }; } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 3da92f18f4eed..d93292d47420f 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -20,6 +20,7 @@ #![feature(generic_nonzero)] #![feature(inline_const)] #![feature(pointer_is_aligned)] +#![feature(slice_ptr_len)] // // Stable since Rust 1.80.0. #![feature(slice_flatten)] @@ -37,6 +38,9 @@ #![feature(const_ptr_write)] #![feature(const_refs_to_cell)] // +// Stable since Rust 1.84.0. +#![feature(strict_provenance)] +// // Expected to become stable. #![feature(arbitrary_self_types)] // diff --git a/rust/kernel/ptr.rs b/rust/kernel/ptr.rs index 5b6a382637fef..bdc2d79ff6699 100644 --- a/rust/kernel/ptr.rs +++ b/rust/kernel/ptr.rs @@ -2,7 +2,13 @@ //! Types and functions to work with pointers and addresses. -use core::mem::align_of; +pub mod projection; +pub use crate::project_pointer as project; + +use core::mem::{ + align_of, + size_of, // +}; use core::num::NonZero; /// Type representing an alignment, which is always a power of two. @@ -225,3 +231,25 @@ macro_rules! impl_alignable_uint { } impl_alignable_uint!(u8, u16, u32, u64, usize); + +/// Trait to represent compile-time known size information. +/// +/// This is a generalization of [`size_of`] that works for dynamically sized types. +pub trait KnownSize { + /// Get the size of an object of this type in bytes, with the metadata of the given pointer. + fn size(p: *const Self) -> usize; +} + +impl KnownSize for T { + #[inline(always)] + fn size(_: *const Self) -> usize { + size_of::() + } +} + +impl KnownSize for [T] { + #[inline(always)] + fn size(p: *const Self) -> usize { + p.len() * size_of::() + } +} diff --git a/rust/kernel/ptr/projection.rs b/rust/kernel/ptr/projection.rs new file mode 100644 index 0000000000000..140ea8e21617d --- /dev/null +++ b/rust/kernel/ptr/projection.rs @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Infrastructure for handling projections. + +use core::{ + mem::MaybeUninit, + ops::Deref, // +}; + +use crate::prelude::*; + +/// Error raised when a projection is attempted on an array or slice out of bounds. +pub struct OutOfBound; + +impl From for Error { + #[inline(always)] + fn from(_: OutOfBound) -> Self { + ERANGE + } +} + +/// A helper trait to perform index projection. +/// +/// This is similar to [`core::slice::SliceIndex`], but operates on raw pointers safely and +/// fallibly. +/// +/// # Safety +/// +/// The implementation of `index` and `get` (if [`Some`] is returned) must ensure that, if provided +/// input pointer `slice` and returned pointer `output`, then: +/// - `output` has the same provenance as `slice`; +/// - `output.byte_offset_from(slice)` is between 0 to +/// `KnownSize::size(slice) - KnownSize::size(output)`. +/// +/// This means that if the input pointer is valid, then pointer returned by `get` or `index` is +/// also valid. +#[diagnostic::on_unimplemented(message = "`{Self}` cannot be used to index `{T}`")] +#[doc(hidden)] +pub unsafe trait ProjectIndex: Sized { + type Output: ?Sized; + + /// Returns an index-projected pointer, if in bounds. + fn get(self, slice: *mut T) -> Option<*mut Self::Output>; + + /// Returns an index-projected pointer; fail the build if it cannot be proved to be in bounds. + #[inline(always)] + fn index(self, slice: *mut T) -> *mut Self::Output { + Self::get(self, slice).unwrap_or_else(|| build_error!()) + } +} + +// Forward array impl to slice impl. +// +// SAFETY: Safety requirement guaranteed by the forwarded impl. +unsafe impl ProjectIndex<[T; N]> for I +where + I: ProjectIndex<[T]>, +{ + type Output = >::Output; + + #[inline(always)] + fn get(self, slice: *mut [T; N]) -> Option<*mut Self::Output> { + >::get(self, slice) + } + + #[inline(always)] + fn index(self, slice: *mut [T; N]) -> *mut Self::Output { + >::index(self, slice) + } +} + +// SAFETY: `get`-returned pointer has the same provenance as `slice` and the offset is checked to +// not exceed the required bound. +unsafe impl ProjectIndex<[T]> for usize { + type Output = T; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut T> { + if self >= slice.len() { + None + } else { + Some(slice.cast::().wrapping_add(self)) + } + } +} + +// SAFETY: `get`-returned pointer has the same provenance as `slice` and the offset is checked to +// not exceed the required bound. +unsafe impl ProjectIndex<[T]> for core::ops::Range { + type Output = [T]; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut [T]> { + let new_len = self.end.checked_sub(self.start)?; + if self.end > slice.len() { + return None; + } + Some(core::ptr::slice_from_raw_parts_mut( + slice.cast::().wrapping_add(self.start), + new_len, + )) + } +} + +// SAFETY: Safety requirement guaranteed by the forwarded impl. +unsafe impl ProjectIndex<[T]> for core::ops::RangeTo { + type Output = [T]; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut [T]> { + (0..self.end).get(slice) + } +} + +// SAFETY: Safety requirement guaranteed by the forwarded impl. +unsafe impl ProjectIndex<[T]> for core::ops::RangeFrom { + type Output = [T]; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut [T]> { + (self.start..slice.len()).get(slice) + } +} + +// SAFETY: `get` returned the pointer as is, so it always has the same provenance and offset of 0. +unsafe impl ProjectIndex<[T]> for core::ops::RangeFull { + type Output = [T]; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut [T]> { + Some(slice) + } +} + +/// A helper trait to perform field projection. +/// +/// This trait has a `DEREF` generic parameter so it can be implemented twice for types that +/// implement [`Deref`]. This will cause an ambiguity error and thus block [`Deref`] types being +/// used as base of projection, as they can inject unsoundness. Users therefore must not specify +/// `DEREF` and should always leave it to be inferred. +/// +/// # Safety +/// +/// `proj` may only invoke `f` with a valid allocation, as the documentation of [`Self::proj`] +/// describes. +#[doc(hidden)] +pub unsafe trait ProjectField { + /// Project a pointer to a type to a pointer of a field. + /// + /// `f` may only be invoked with a valid allocation so it can safely obtain raw pointers to + /// fields using `&raw mut`. + /// + /// This is needed because `base` might not point to a valid allocation, while `&raw mut` + /// requires pointers to be in bounds of a valid allocation. + /// + /// # Safety + /// + /// `f` must return a pointer in bounds of the provided pointer. + unsafe fn proj(base: *mut Self, f: impl FnOnce(*mut Self) -> *mut F) -> *mut F; +} + +// NOTE: in theory, this API should work for `T: ?Sized` and `F: ?Sized`, too. However, we cannot +// currently support that as we need to obtain a valid allocation that `&raw const` can operate on. +// +// SAFETY: `proj` invokes `f` with valid allocation. +unsafe impl ProjectField for T { + #[inline(always)] + unsafe fn proj(base: *mut Self, f: impl FnOnce(*mut Self) -> *mut F) -> *mut F { + // Create a valid allocation to start projection, as `base` is not necessarily so. The + // memory is never actually used so it will be optimized out, so it should work even for + // very large `T` (`memoffset` crate also relies on this). To be extra certain, we also + // annotate `f` closure with `#[inline(always)]` in the macro. + let mut place = MaybeUninit::uninit(); + let place_base = place.as_mut_ptr(); + let field = f(place_base); + // SAFETY: `field` is in bounds from `base` per safety requirement. + let offset = unsafe { field.byte_offset_from(place_base) }; + // Use `wrapping_byte_offset` as `base` does not need to be of valid allocation. + base.wrapping_byte_offset(offset).cast() + } +} + +// SAFETY: Vacuously satisfied. +unsafe impl ProjectField for T { + #[inline(always)] + unsafe fn proj(_: *mut Self, _: impl FnOnce(*mut Self) -> *mut F) -> *mut F { + build_error!("this function is a guard against `Deref` impl and is never invoked"); + } +} + +/// Create a projection from a raw pointer. +/// +/// The projected pointer is within the memory region marked by the input pointer. There is no +/// requirement that the input raw pointer needs to be valid, so this macro may be used for +/// projecting pointers outside normal address space, e.g. I/O pointers. However, if the input +/// pointer is valid, the projected pointer is also valid. +/// +/// Supported projections include field projections and index projections. +/// It is not allowed to project into types that implement custom [`Deref`] or +/// [`Index`](core::ops::Index). +/// +/// The macro has basic syntax of `kernel::ptr::project!(ptr, projection)`, where `ptr` is an +/// expression that evaluates to a raw pointer which serves as the base of projection. `projection` +/// can be a projection expression of form `.field` (normally identifier, or numeral in case of +/// tuple structs) or of form `[index]`. +/// +/// If a mutable pointer is needed, the macro input can be prefixed with the `mut` keyword, i.e. +/// `kernel::ptr::project!(mut ptr, projection)`. By default, a const pointer is created. +/// +/// `ptr::project!` macro can perform both fallible indexing and build-time checked indexing. +/// `[index]` form performs build-time bounds checking; if compiler fails to prove `[index]` is in +/// bounds, compilation will fail. `[index]?` can be used to perform runtime bounds checking; +/// `OutOfBound` error is raised via `?` if the index is out of bounds. +/// +/// # Examples +/// +/// Field projections are performed with `.field_name`: +/// +/// ``` +/// struct MyStruct { field: u32, } +/// let ptr: *const MyStruct = core::ptr::dangling(); +/// let field_ptr: *const u32 = kernel::ptr::project!(ptr, .field); +/// +/// struct MyTupleStruct(u32, u32); +/// +/// fn proj(ptr: *const MyTupleStruct) { +/// let field_ptr: *const u32 = kernel::ptr::project!(ptr, .1); +/// } +/// ``` +/// +/// Index projections are performed with `[index]`: +/// +/// ``` +/// fn proj(ptr: *const [u8; 32]) -> Result { +/// let field_ptr: *const u8 = kernel::ptr::project!(ptr, [1]); +/// // The following invocation, if uncommented, would fail the build. +/// // +/// // kernel::ptr::project!(ptr, [128]); +/// +/// // This will raise an `OutOfBound` error (which is convertible to `ERANGE`). +/// kernel::ptr::project!(ptr, [128]?); +/// Ok(()) +/// } +/// ``` +/// +/// If you need to match on the error instead of propagate, put the invocation inside a closure: +/// +/// ``` +/// let ptr: *const [u8; 32] = core::ptr::dangling(); +/// let field_ptr: Result<*const u8> = (|| -> Result<_> { +/// Ok(kernel::ptr::project!(ptr, [128]?)) +/// })(); +/// assert!(field_ptr.is_err()); +/// ``` +/// +/// For mutable pointers, put `mut` as the first token in macro invocation. +/// +/// ``` +/// let ptr: *mut [(u8, u16); 32] = core::ptr::dangling_mut(); +/// let field_ptr: *mut u16 = kernel::ptr::project!(mut ptr, [1].1); +/// ``` +#[macro_export] +macro_rules! project_pointer { + (@gen $ptr:ident, ) => {}; + // Field projection. `$field` needs to be `tt` to support tuple index like `.0`. + (@gen $ptr:ident, .$field:tt $($rest:tt)*) => { + // SAFETY: The provided closure always returns an in-bounds pointer. + let $ptr = unsafe { + $crate::ptr::projection::ProjectField::proj($ptr, #[inline(always)] |ptr| { + // Check unaligned field. Not all users (e.g. DMA) can handle unaligned + // projections. + if false { + let _ = &(*ptr).$field; + } + // SAFETY: `$field` is in bounds, and no implicit `Deref` is possible (if the + // type implements `Deref`, Rust cannot infer the generic parameter `DEREF`). + &raw mut (*ptr).$field + }) + }; + $crate::ptr::project!(@gen $ptr, $($rest)*) + }; + // Fallible index projection. + (@gen $ptr:ident, [$index:expr]? $($rest:tt)*) => { + let $ptr = $crate::ptr::projection::ProjectIndex::get($index, $ptr) + .ok_or($crate::ptr::projection::OutOfBound)?; + $crate::ptr::project!(@gen $ptr, $($rest)*) + }; + // Build-time checked index projection. + (@gen $ptr:ident, [$index:expr] $($rest:tt)*) => { + let $ptr = $crate::ptr::projection::ProjectIndex::index($index, $ptr); + $crate::ptr::project!(@gen $ptr, $($rest)*) + }; + (mut $ptr:expr, $($proj:tt)*) => {{ + let ptr: *mut _ = $ptr; + $crate::ptr::project!(@gen ptr, $($proj)*); + ptr + }}; + ($ptr:expr, $($proj:tt)*) => {{ + let ptr = <*const _>::cast_mut($ptr); + // We currently always project using mutable pointer, as it is not decided whether `&raw + // const` allows the resulting pointer to be mutated (see documentation of `addr_of!`). + $crate::ptr::project!(@gen ptr, $($proj)*); + ptr.cast_const() + }}; +} diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index fa87779d22539..3f8918764640c 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -664,13 +664,13 @@ impl fmt::Write for Formatter<'_> { /// /// * The first byte of `buffer` is always zero. /// * The length of `buffer` is at least 1. -pub(crate) struct NullTerminatedFormatter<'a> { +pub struct NullTerminatedFormatter<'a> { buffer: &'a mut [u8], } impl<'a> NullTerminatedFormatter<'a> { /// Create a new [`Self`] instance. - pub(crate) fn new(buffer: &'a mut [u8]) -> Option> { + pub fn new(buffer: &'a mut [u8]) -> Option> { *(buffer.first_mut()?) = 0; // INVARIANT: diff --git a/rust/pin-init/internal/src/init.rs b/rust/pin-init/internal/src/init.rs index 42936f915a07a..2fe918f4d82aa 100644 --- a/rust/pin-init/internal/src/init.rs +++ b/rust/pin-init/internal/src/init.rs @@ -62,7 +62,6 @@ impl InitializerKind { enum InitializerAttribute { DefaultError(DefaultErrorAttribute), - DisableInitializedFieldAccess, } struct DefaultErrorAttribute { @@ -86,6 +85,7 @@ pub(crate) fn expand( let error = error.map_or_else( || { if let Some(default_error) = attrs.iter().fold(None, |acc, attr| { + #[expect(irrefutable_let_patterns)] if let InitializerAttribute::DefaultError(DefaultErrorAttribute { ty }) = attr { Some(ty.clone()) } else { @@ -145,22 +145,9 @@ pub(crate) fn expand( }; // `mixed_site` ensures that the data is not accessible to the user-controlled code. let data = Ident::new("__data", Span::mixed_site()); - let init_fields = init_fields( - &fields, - pinned, - !attrs - .iter() - .any(|attr| matches!(attr, InitializerAttribute::DisableInitializedFieldAccess)), - &data, - &slot, - ); + let init_fields = init_fields(&fields, pinned, &data, &slot); let field_check = make_field_check(&fields, init_kind, &path); Ok(quote! {{ - // We do not want to allow arbitrary returns, so we declare this type as the `Ok` return - // type and shadow it later when we insert the arbitrary user code. That way there will be - // no possibility of returning without `unsafe`. - struct __InitOk; - // Get the data about fields from the supplied type. // SAFETY: TODO let #data = unsafe { @@ -170,18 +157,15 @@ pub(crate) fn expand( #path::#get_data() }; // Ensure that `#data` really is of type `#data` and help with type inference: - let init = ::pin_init::__internal::#data_trait::make_closure::<_, __InitOk, #error>( + let init = ::pin_init::__internal::#data_trait::make_closure::<_, #error>( #data, move |slot| { - { - // Shadow the structure so it cannot be used to return early. - struct __InitOk; - #zeroable_check - #this - #init_fields - #field_check - } - Ok(__InitOk) + #zeroable_check + #this + #init_fields + #field_check + // SAFETY: we are the `init!` macro that is allowed to call this. + Ok(unsafe { ::pin_init::__internal::InitOk::new() }) } ); let init = move |slot| -> ::core::result::Result<(), #error> { @@ -236,7 +220,6 @@ fn get_init_kind(rest: Option<(Token![..], Expr)>, dcx: &mut DiagCtxt) -> InitKi fn init_fields( fields: &Punctuated, pinned: bool, - generate_initialized_accessors: bool, data: &Ident, slot: &Ident, ) -> TokenStream { @@ -260,6 +243,10 @@ fn init_fields( }); // Again span for better diagnostics let write = quote_spanned!(ident.span()=> ::core::ptr::write); + // NOTE: the field accessor ensures that the initialized field is properly aligned. + // Unaligned fields will cause the compiler to emit E0793. We do not support + // unaligned fields since `Init::__init` requires an aligned pointer; the call to + // `ptr::write` below has the same requirement. let accessor = if pinned { let project_ident = format_ident!("__project_{ident}"); quote! { @@ -272,13 +259,6 @@ fn init_fields( unsafe { &mut (*#slot).#ident } } }; - let accessor = generate_initialized_accessors.then(|| { - quote! { - #(#cfgs)* - #[allow(unused_variables)] - let #ident = #accessor; - } - }); quote! { #(#attrs)* { @@ -286,12 +266,18 @@ fn init_fields( // SAFETY: TODO unsafe { #write(::core::ptr::addr_of_mut!((*#slot).#ident), #value_ident) }; } - #accessor + #(#cfgs)* + #[allow(unused_variables)] + let #ident = #accessor; } } InitializerKind::Init { ident, value, .. } => { // Again span for better diagnostics let init = format_ident!("init", span = value.span()); + // NOTE: the field accessor ensures that the initialized field is properly aligned. + // Unaligned fields will cause the compiler to emit E0793. We do not support + // unaligned fields since `Init::__init` requires an aligned pointer; the call to + // `ptr::write` below has the same requirement. let (value_init, accessor) = if pinned { let project_ident = format_ident!("__project_{ident}"); ( @@ -326,20 +312,15 @@ fn init_fields( }, ) }; - let accessor = generate_initialized_accessors.then(|| { - quote! { - #(#cfgs)* - #[allow(unused_variables)] - let #ident = #accessor; - } - }); quote! { #(#attrs)* { let #init = #value; #value_init } - #accessor + #(#cfgs)* + #[allow(unused_variables)] + let #ident = #accessor; } } InitializerKind::Code { block: value, .. } => quote! { @@ -466,10 +447,6 @@ impl Parse for Initializer { if a.path().is_ident("default_error") { a.parse_args::() .map(InitializerAttribute::DefaultError) - } else if a.path().is_ident("disable_initialized_field_access") { - a.meta - .require_path_only() - .map(|_| InitializerAttribute::DisableInitializedFieldAccess) } else { Err(syn::Error::new_spanned(a, "unknown initializer attribute")) } diff --git a/rust/pin-init/src/__internal.rs b/rust/pin-init/src/__internal.rs index 90f18e9a2912c..90adbdc1893bb 100644 --- a/rust/pin-init/src/__internal.rs +++ b/rust/pin-init/src/__internal.rs @@ -46,6 +46,24 @@ where } } +/// Token type to signify successful initialization. +/// +/// Can only be constructed via the unsafe [`Self::new`] function. The initializer macros use this +/// token type to prevent returning `Ok` from an initializer without initializing all fields. +pub struct InitOk(()); + +impl InitOk { + /// Creates a new token. + /// + /// # Safety + /// + /// This function may only be called from the `init!` macro in `../internal/src/init.rs`. + #[inline(always)] + pub unsafe fn new() -> Self { + Self(()) + } +} + /// This trait is only implemented via the `#[pin_data]` proc-macro. It is used to facilitate /// the pin projections within the initializers. /// @@ -68,9 +86,10 @@ pub unsafe trait PinData: Copy { type Datee: ?Sized + HasPinData; /// Type inference helper function. - fn make_closure(self, f: F) -> F + #[inline(always)] + fn make_closure(self, f: F) -> F where - F: FnOnce(*mut Self::Datee) -> Result, + F: FnOnce(*mut Self::Datee) -> Result, { f } @@ -98,9 +117,10 @@ pub unsafe trait InitData: Copy { type Datee: ?Sized + HasInitData; /// Type inference helper function. - fn make_closure(self, f: F) -> F + #[inline(always)] + fn make_closure(self, f: F) -> F where - F: FnOnce(*mut Self::Datee) -> Result, + F: FnOnce(*mut Self::Datee) -> Result, { f } diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs index 9c45851c876ef..ce39b55450978 100644 --- a/samples/rust/rust_dma.rs +++ b/samples/rust/rust_dma.rs @@ -68,7 +68,7 @@ impl pci::Driver for DmaSampleDriver { CoherentAllocation::alloc_coherent(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?; for (i, value) in TEST_VALUES.into_iter().enumerate() { - kernel::dma_write!(ca[i] = MyStruct::new(value.0, value.1))?; + kernel::dma_write!(ca, [i]?, MyStruct::new(value.0, value.1)); } let size = 4 * page::PAGE_SIZE; @@ -85,24 +85,26 @@ impl pci::Driver for DmaSampleDriver { } } +impl DmaSampleDriver { + fn check_dma(&self) -> Result { + for (i, value) in TEST_VALUES.into_iter().enumerate() { + let val0 = kernel::dma_read!(self.ca, [i]?.h); + let val1 = kernel::dma_read!(self.ca, [i]?.b); + + assert_eq!(val0, value.0); + assert_eq!(val1, value.1); + } + + Ok(()) + } +} + #[pinned_drop] impl PinnedDrop for DmaSampleDriver { fn drop(self: Pin<&mut Self>) { dev_info!(self.pdev, "Unload DMA test driver.\n"); - for (i, value) in TEST_VALUES.into_iter().enumerate() { - let val0 = kernel::dma_read!(self.ca[i].h); - let val1 = kernel::dma_read!(self.ca[i].b); - assert!(val0.is_ok()); - assert!(val1.is_ok()); - - if let Ok(val0) = val0 { - assert_eq!(val0, value.0); - } - if let Ok(val1) = val1 { - assert_eq!(val1, value.1); - } - } + assert!(self.check_dma().is_ok()); for (i, entry) in self.sgt.iter().enumerate() { dev_info!( diff --git a/samples/workqueue/stall_detector/Makefile b/samples/workqueue/stall_detector/Makefile new file mode 100644 index 0000000000000..8849e85e95bb9 --- /dev/null +++ b/samples/workqueue/stall_detector/Makefile @@ -0,0 +1 @@ +obj-m += wq_stall.o diff --git a/samples/workqueue/stall_detector/wq_stall.c b/samples/workqueue/stall_detector/wq_stall.c new file mode 100644 index 0000000000000..6f4a497b18814 --- /dev/null +++ b/samples/workqueue/stall_detector/wq_stall.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * wq_stall - Test module for the workqueue stall detector. + * + * Deliberately creates a workqueue stall so the watchdog fires and + * prints diagnostic output. Useful for verifying that the stall + * detector correctly identifies stuck workers and produces useful + * backtraces. + * + * The stall is triggered by clearing PF_WQ_WORKER before sleeping, + * which hides the worker from the concurrency manager. A second + * work item queued on the same pool then sits in the worklist with + * no worker available to process it. + * + * After ~30s the workqueue watchdog fires: + * BUG: workqueue lockup - pool cpus=N ... + * + * Build: + * make -C M=samples/workqueue/stall_detector modules + * + * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2026 Breno Leitao + */ + +#include +#include +#include +#include +#include + +static DECLARE_WAIT_QUEUE_HEAD(stall_wq_head); +static atomic_t wake_condition = ATOMIC_INIT(0); +static struct work_struct stall_work1; +static struct work_struct stall_work2; + +static void stall_work2_fn(struct work_struct *work) +{ + pr_info("wq_stall: second work item finally ran\n"); +} + +static void stall_work1_fn(struct work_struct *work) +{ + pr_info("wq_stall: first work item running on cpu %d\n", + raw_smp_processor_id()); + + /* + * Queue second item while we're still counted as running + * (pool->nr_running > 0). Since schedule_work() on a per-CPU + * workqueue targets raw_smp_processor_id(), item 2 lands on the + * same pool. __queue_work -> kick_pool -> need_more_worker() + * sees nr_running > 0 and does NOT wake a new worker. + */ + schedule_work(&stall_work2); + + /* + * Hide from the workqueue concurrency manager. Without + * PF_WQ_WORKER, schedule() won't call wq_worker_sleeping(), + * so nr_running is never decremented and no replacement + * worker is created. Item 2 stays stuck in pool->worklist. + */ + current->flags &= ~PF_WQ_WORKER; + + pr_info("wq_stall: entering wait_event_idle (PF_WQ_WORKER cleared)\n"); + pr_info("wq_stall: expect 'BUG: workqueue lockup' in ~30-60s\n"); + wait_event_idle(stall_wq_head, atomic_read(&wake_condition) != 0); + + /* Restore so process_one_work() cleanup works correctly */ + current->flags |= PF_WQ_WORKER; + pr_info("wq_stall: woke up, PF_WQ_WORKER restored\n"); +} + +static int __init wq_stall_init(void) +{ + pr_info("wq_stall: loading\n"); + + INIT_WORK(&stall_work1, stall_work1_fn); + INIT_WORK(&stall_work2, stall_work2_fn); + schedule_work(&stall_work1); + + return 0; +} + +static void __exit wq_stall_exit(void) +{ + pr_info("wq_stall: unloading\n"); + atomic_set(&wake_condition, 1); + wake_up(&stall_wq_head); + flush_work(&stall_work1); + flush_work(&stall_work2); + pr_info("wq_stall: all work flushed, module unloaded\n"); +} + +module_init(wq_stall_init); +module_exit(wq_stall_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Reproduce workqueue stall caused by PF_WQ_WORKER misuse"); +MODULE_AUTHOR("Breno Leitao "); diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 32e209bc7985c..3652b85be5459 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -310,16 +310,18 @@ $(obj)/%.lst: $(obj)/%.c FORCE # The features in this list are the ones allowed for non-`rust/` code. # +# - Stable since Rust 1.79.0: `feature(slice_ptr_len)`. # - Stable since Rust 1.81.0: `feature(lint_reasons)`. # - Stable since Rust 1.82.0: `feature(asm_const)`, # `feature(offset_of_nested)`, `feature(raw_ref_op)`. +# - Stable since Rust 1.84.0: `feature(strict_provenance)`. # - Stable since Rust 1.87.0: `feature(asm_goto)`. # - Expected to become stable: `feature(arbitrary_self_types)`. # - To be determined: `feature(used_with_arg)`. # # Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on # the unstable features in use. -rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op,used_with_arg +rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op,slice_ptr_len,strict_provenance,used_with_arg # `--out-dir` is required to avoid temporaries being created by `rustc` in the # current working directory, which may be not accessible in the out-of-tree diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 2f84bd23edb69..242c71b3fb6ef 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -32,6 +32,7 @@ #include "include/crypto.h" #include "include/ipc.h" #include "include/label.h" +#include "include/lib.h" #include "include/policy.h" #include "include/policy_ns.h" #include "include/resource.h" @@ -62,6 +63,7 @@ * securityfs and apparmorfs filesystems. */ +#define IREF_POISON 101 /* * support fns @@ -79,7 +81,7 @@ static void rawdata_f_data_free(struct rawdata_f_data *private) if (!private) return; - aa_put_loaddata(private->loaddata); + aa_put_i_loaddata(private->loaddata); kvfree(private); } @@ -153,6 +155,71 @@ static int aafs_show_path(struct seq_file *seq, struct dentry *dentry) return 0; } +static struct aa_ns *get_ns_common_ref(struct aa_common_ref *ref) +{ + if (ref) { + struct aa_label *reflabel = container_of(ref, struct aa_label, + count); + return aa_get_ns(labels_ns(reflabel)); + } + + return NULL; +} + +static struct aa_proxy *get_proxy_common_ref(struct aa_common_ref *ref) +{ + if (ref) + return aa_get_proxy(container_of(ref, struct aa_proxy, count)); + + return NULL; +} + +static struct aa_loaddata *get_loaddata_common_ref(struct aa_common_ref *ref) +{ + if (ref) + return aa_get_i_loaddata(container_of(ref, struct aa_loaddata, + count)); + return NULL; +} + +static void aa_put_common_ref(struct aa_common_ref *ref) +{ + if (!ref) + return; + + switch (ref->reftype) { + case REF_RAWDATA: + aa_put_i_loaddata(container_of(ref, struct aa_loaddata, + count)); + break; + case REF_PROXY: + aa_put_proxy(container_of(ref, struct aa_proxy, + count)); + break; + case REF_NS: + /* ns count is held on its unconfined label */ + aa_put_ns(labels_ns(container_of(ref, struct aa_label, count))); + break; + default: + AA_BUG(true, "unknown refcount type"); + break; + } +} + +static void aa_get_common_ref(struct aa_common_ref *ref) +{ + kref_get(&ref->count); +} + +static void aafs_evict(struct inode *inode) +{ + struct aa_common_ref *ref = inode->i_private; + + clear_inode(inode); + aa_put_common_ref(ref); + inode->i_private = (void *) IREF_POISON; +} + static void aafs_free_inode(struct inode *inode) { if (S_ISLNK(inode->i_mode)) @@ -162,6 +229,7 @@ static void aafs_free_inode(struct inode *inode) static const struct super_operations aafs_super_ops = { .statfs = simple_statfs, + .evict_inode = aafs_evict, .free_inode = aafs_free_inode, .show_path = aafs_show_path, }; @@ -262,7 +330,8 @@ static int __aafs_setup_d_inode(struct inode *dir, struct dentry *dentry, * aafs_remove(). Will return ERR_PTR on failure. */ static struct dentry *aafs_create(const char *name, umode_t mode, - struct dentry *parent, void *data, void *link, + struct dentry *parent, + struct aa_common_ref *data, void *link, const struct file_operations *fops, const struct inode_operations *iops) { @@ -299,6 +368,9 @@ static struct dentry *aafs_create(const char *name, umode_t mode, goto fail_dentry; inode_unlock(dir); + if (data) + aa_get_common_ref(data); + return dentry; fail_dentry: @@ -323,7 +395,8 @@ static struct dentry *aafs_create(const char *name, umode_t mode, * see aafs_create */ static struct dentry *aafs_create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, + struct dentry *parent, + struct aa_common_ref *data, const struct file_operations *fops) { return aafs_create(name, mode, parent, data, NULL, fops, NULL); @@ -409,7 +482,8 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf, data->size = copy_size; if (copy_from_user(data->data, userbuf, copy_size)) { - aa_put_loaddata(data); + /* trigger free - don't need to put pcount */ + aa_put_i_loaddata(data); return ERR_PTR(-EFAULT); } @@ -417,7 +491,8 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf, } static ssize_t policy_update(u32 mask, const char __user *buf, size_t size, - loff_t *pos, struct aa_ns *ns) + loff_t *pos, struct aa_ns *ns, + const struct cred *ocred) { struct aa_loaddata *data; struct aa_label *label; @@ -428,7 +503,7 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size, /* high level check about policy management - fine grained in * below after unpack */ - error = aa_may_manage_policy(current_cred(), label, ns, mask); + error = aa_may_manage_policy(current_cred(), label, ns, ocred, mask); if (error) goto end_section; @@ -436,7 +511,10 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size, error = PTR_ERR(data); if (!IS_ERR(data)) { error = aa_replace_profiles(ns, label, mask, data); - aa_put_loaddata(data); + /* put pcount, which will put count and free if no + * profiles referencing it. + */ + aa_put_profile_loaddata(data); } end_section: end_current_label_crit_section(label); @@ -448,8 +526,9 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size, static ssize_t profile_load(struct file *f, const char __user *buf, size_t size, loff_t *pos) { - struct aa_ns *ns = aa_get_ns(f->f_inode->i_private); - int error = policy_update(AA_MAY_LOAD_POLICY, buf, size, pos, ns); + struct aa_ns *ns = get_ns_common_ref(f->f_inode->i_private); + int error = policy_update(AA_MAY_LOAD_POLICY, buf, size, pos, ns, + f->f_cred); aa_put_ns(ns); @@ -465,9 +544,9 @@ static const struct file_operations aa_fs_profile_load = { static ssize_t profile_replace(struct file *f, const char __user *buf, size_t size, loff_t *pos) { - struct aa_ns *ns = aa_get_ns(f->f_inode->i_private); + struct aa_ns *ns = get_ns_common_ref(f->f_inode->i_private); int error = policy_update(AA_MAY_LOAD_POLICY | AA_MAY_REPLACE_POLICY, - buf, size, pos, ns); + buf, size, pos, ns, f->f_cred); aa_put_ns(ns); return error; @@ -485,14 +564,14 @@ static ssize_t profile_remove(struct file *f, const char __user *buf, struct aa_loaddata *data; struct aa_label *label; ssize_t error; - struct aa_ns *ns = aa_get_ns(f->f_inode->i_private); + struct aa_ns *ns = get_ns_common_ref(f->f_inode->i_private); label = begin_current_label_crit_section(); /* high level check about policy management - fine grained in * below after unpack */ error = aa_may_manage_policy(current_cred(), label, ns, - AA_MAY_REMOVE_POLICY); + f->f_cred, AA_MAY_REMOVE_POLICY); if (error) goto out; @@ -506,7 +585,7 @@ static ssize_t profile_remove(struct file *f, const char __user *buf, if (!IS_ERR(data)) { data->data[size] = 0; error = aa_remove_profiles(ns, label, data->data, size); - aa_put_loaddata(data); + aa_put_profile_loaddata(data); } out: end_current_label_crit_section(label); @@ -575,7 +654,7 @@ static int ns_revision_open(struct inode *inode, struct file *file) if (!rev) return -ENOMEM; - rev->ns = aa_get_ns(inode->i_private); + rev->ns = get_ns_common_ref(inode->i_private); if (!rev->ns) rev->ns = aa_get_current_ns(); file->private_data = rev; @@ -1061,7 +1140,7 @@ static const struct file_operations seq_profile_ ##NAME ##_fops = { \ static int seq_profile_open(struct inode *inode, struct file *file, int (*show)(struct seq_file *, void *)) { - struct aa_proxy *proxy = aa_get_proxy(inode->i_private); + struct aa_proxy *proxy = get_proxy_common_ref(inode->i_private); int error = single_open(file, show, proxy); if (error) { @@ -1253,18 +1332,17 @@ static const struct file_operations seq_rawdata_ ##NAME ##_fops = { \ static int seq_rawdata_open(struct inode *inode, struct file *file, int (*show)(struct seq_file *, void *)) { - struct aa_loaddata *data = __aa_get_loaddata(inode->i_private); + struct aa_loaddata *data = get_loaddata_common_ref(inode->i_private); int error; if (!data) - /* lost race this ent is being reaped */ return -ENOENT; error = single_open(file, show, data); if (error) { AA_BUG(file->private_data && ((struct seq_file *)file->private_data)->private); - aa_put_loaddata(data); + aa_put_i_loaddata(data); } return error; @@ -1275,7 +1353,7 @@ static int seq_rawdata_release(struct inode *inode, struct file *file) struct seq_file *seq = (struct seq_file *) file->private_data; if (seq) - aa_put_loaddata(seq->private); + aa_put_i_loaddata(seq->private); return single_release(inode, file); } @@ -1387,9 +1465,8 @@ static int rawdata_open(struct inode *inode, struct file *file) if (!aa_current_policy_view_capable(NULL)) return -EACCES; - loaddata = __aa_get_loaddata(inode->i_private); + loaddata = get_loaddata_common_ref(inode->i_private); if (!loaddata) - /* lost race: this entry is being reaped */ return -ENOENT; private = rawdata_f_data_alloc(loaddata->size); @@ -1414,7 +1491,7 @@ static int rawdata_open(struct inode *inode, struct file *file) return error; fail_private_alloc: - aa_put_loaddata(loaddata); + aa_put_i_loaddata(loaddata); return error; } @@ -1431,7 +1508,6 @@ static void remove_rawdata_dents(struct aa_loaddata *rawdata) for (i = 0; i < AAFS_LOADDATA_NDENTS; i++) { if (!IS_ERR_OR_NULL(rawdata->dents[i])) { - /* no refcounts on i_private */ aafs_remove(rawdata->dents[i]); rawdata->dents[i] = NULL; } @@ -1474,35 +1550,37 @@ int __aa_fs_create_rawdata(struct aa_ns *ns, struct aa_loaddata *rawdata) return PTR_ERR(dir); rawdata->dents[AAFS_LOADDATA_DIR] = dir; - dent = aafs_create_file("abi", S_IFREG | 0444, dir, rawdata, + dent = aafs_create_file("abi", S_IFREG | 0444, dir, &rawdata->count, &seq_rawdata_abi_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_ABI] = dent; - dent = aafs_create_file("revision", S_IFREG | 0444, dir, rawdata, - &seq_rawdata_revision_fops); + dent = aafs_create_file("revision", S_IFREG | 0444, dir, + &rawdata->count, + &seq_rawdata_revision_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_REVISION] = dent; if (aa_g_hash_policy) { dent = aafs_create_file("sha256", S_IFREG | 0444, dir, - rawdata, &seq_rawdata_hash_fops); + &rawdata->count, + &seq_rawdata_hash_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_HASH] = dent; } dent = aafs_create_file("compressed_size", S_IFREG | 0444, dir, - rawdata, + &rawdata->count, &seq_rawdata_compressed_size_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_COMPRESSED_SIZE] = dent; - dent = aafs_create_file("raw_data", S_IFREG | 0444, - dir, rawdata, &rawdata_fops); + dent = aafs_create_file("raw_data", S_IFREG | 0444, dir, + &rawdata->count, &rawdata_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_DATA] = dent; @@ -1510,13 +1588,11 @@ int __aa_fs_create_rawdata(struct aa_ns *ns, struct aa_loaddata *rawdata) rawdata->ns = aa_get_ns(ns); list_add(&rawdata->list, &ns->rawdata_list); - /* no refcount on inode rawdata */ return 0; fail: remove_rawdata_dents(rawdata); - return PTR_ERR(dent); } #endif /* CONFIG_SECURITY_APPARMOR_EXPORT_BINARY */ @@ -1540,13 +1616,10 @@ void __aafs_profile_rmdir(struct aa_profile *profile) __aafs_profile_rmdir(child); for (i = AAFS_PROF_SIZEOF - 1; i >= 0; --i) { - struct aa_proxy *proxy; if (!profile->dents[i]) continue; - proxy = d_inode(profile->dents[i])->i_private; aafs_remove(profile->dents[i]); - aa_put_proxy(proxy); profile->dents[i] = NULL; } } @@ -1580,14 +1653,7 @@ static struct dentry *create_profile_file(struct dentry *dir, const char *name, struct aa_profile *profile, const struct file_operations *fops) { - struct aa_proxy *proxy = aa_get_proxy(profile->label.proxy); - struct dentry *dent; - - dent = aafs_create_file(name, S_IFREG | 0444, dir, proxy, fops); - if (IS_ERR(dent)) - aa_put_proxy(proxy); - - return dent; + return aafs_create_file(name, S_IFREG | 0444, dir, &profile->label.proxy->count, fops); } #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY @@ -1637,7 +1703,8 @@ static const char *rawdata_get_link_base(struct dentry *dentry, struct delayed_call *done, const char *name) { - struct aa_proxy *proxy = inode->i_private; + struct aa_common_ref *ref = inode->i_private; + struct aa_proxy *proxy = container_of(ref, struct aa_proxy, count); struct aa_label *label; struct aa_profile *profile; char *target; @@ -1779,27 +1846,24 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent) if (profile->rawdata) { if (aa_g_hash_policy) { dent = aafs_create("raw_sha256", S_IFLNK | 0444, dir, - profile->label.proxy, NULL, NULL, - &rawdata_link_sha256_iops); + &profile->label.proxy->count, NULL, + NULL, &rawdata_link_sha256_iops); if (IS_ERR(dent)) goto fail; - aa_get_proxy(profile->label.proxy); profile->dents[AAFS_PROF_RAW_HASH] = dent; } dent = aafs_create("raw_abi", S_IFLNK | 0444, dir, - profile->label.proxy, NULL, NULL, + &profile->label.proxy->count, NULL, NULL, &rawdata_link_abi_iops); if (IS_ERR(dent)) goto fail; - aa_get_proxy(profile->label.proxy); profile->dents[AAFS_PROF_RAW_ABI] = dent; dent = aafs_create("raw_data", S_IFLNK | 0444, dir, - profile->label.proxy, NULL, NULL, + &profile->label.proxy->count, NULL, NULL, &rawdata_link_data_iops); if (IS_ERR(dent)) goto fail; - aa_get_proxy(profile->label.proxy); profile->dents[AAFS_PROF_RAW_DATA] = dent; } #endif /*CONFIG_SECURITY_APPARMOR_EXPORT_BINARY */ @@ -1830,13 +1894,13 @@ static struct dentry *ns_mkdir_op(struct mnt_idmap *idmap, struct inode *dir, int error; label = begin_current_label_crit_section(); - error = aa_may_manage_policy(current_cred(), label, NULL, + error = aa_may_manage_policy(current_cred(), label, NULL, NULL, AA_MAY_LOAD_POLICY); end_current_label_crit_section(label); if (error) return ERR_PTR(error); - parent = aa_get_ns(dir->i_private); + parent = get_ns_common_ref(dir->i_private); AA_BUG(d_inode(ns_subns_dir(parent)) != dir); /* we have to unlock and then relock to get locking order right @@ -1880,13 +1944,13 @@ static int ns_rmdir_op(struct inode *dir, struct dentry *dentry) int error; label = begin_current_label_crit_section(); - error = aa_may_manage_policy(current_cred(), label, NULL, + error = aa_may_manage_policy(current_cred(), label, NULL, NULL, AA_MAY_LOAD_POLICY); end_current_label_crit_section(label); if (error) return error; - parent = aa_get_ns(dir->i_private); + parent = get_ns_common_ref(dir->i_private); /* rmdir calls the generic securityfs functions to remove files * from the apparmor dir. It is up to the apparmor ns locking * to avoid races. @@ -1956,27 +2020,6 @@ void __aafs_ns_rmdir(struct aa_ns *ns) __aa_fs_list_remove_rawdata(ns); - if (ns_subns_dir(ns)) { - sub = d_inode(ns_subns_dir(ns))->i_private; - aa_put_ns(sub); - } - if (ns_subload(ns)) { - sub = d_inode(ns_subload(ns))->i_private; - aa_put_ns(sub); - } - if (ns_subreplace(ns)) { - sub = d_inode(ns_subreplace(ns))->i_private; - aa_put_ns(sub); - } - if (ns_subremove(ns)) { - sub = d_inode(ns_subremove(ns))->i_private; - aa_put_ns(sub); - } - if (ns_subrevision(ns)) { - sub = d_inode(ns_subrevision(ns))->i_private; - aa_put_ns(sub); - } - for (i = AAFS_NS_SIZEOF - 1; i >= 0; --i) { aafs_remove(ns->dents[i]); ns->dents[i] = NULL; @@ -2001,40 +2044,40 @@ static int __aafs_ns_mkdir_entries(struct aa_ns *ns, struct dentry *dir) return PTR_ERR(dent); ns_subdata_dir(ns) = dent; - dent = aafs_create_file("revision", 0444, dir, ns, + dent = aafs_create_file("revision", 0444, dir, + &ns->unconfined->label.count, &aa_fs_ns_revision_fops); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subrevision(ns) = dent; - dent = aafs_create_file(".load", 0640, dir, ns, - &aa_fs_profile_load); + dent = aafs_create_file(".load", 0640, dir, + &ns->unconfined->label.count, + &aa_fs_profile_load); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subload(ns) = dent; - dent = aafs_create_file(".replace", 0640, dir, ns, - &aa_fs_profile_replace); + dent = aafs_create_file(".replace", 0640, dir, + &ns->unconfined->label.count, + &aa_fs_profile_replace); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subreplace(ns) = dent; - dent = aafs_create_file(".remove", 0640, dir, ns, - &aa_fs_profile_remove); + dent = aafs_create_file(".remove", 0640, dir, + &ns->unconfined->label.count, + &aa_fs_profile_remove); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subremove(ns) = dent; /* use create_dentry so we can supply private data */ - dent = aafs_create("namespaces", S_IFDIR | 0755, dir, ns, NULL, NULL, - &ns_dir_inode_operations); + dent = aafs_create("namespaces", S_IFDIR | 0755, dir, + &ns->unconfined->label.count, + NULL, NULL, &ns_dir_inode_operations); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subns_dir(ns) = dent; return 0; diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index c0812dbc1b5b0..335f21930702a 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -102,7 +102,7 @@ enum label_flags { struct aa_label; struct aa_proxy { - struct kref count; + struct aa_common_ref count; struct aa_label __rcu *label; }; @@ -125,7 +125,7 @@ struct label_it { * vec: vector of profiles comprising the compound label */ struct aa_label { - struct kref count; + struct aa_common_ref count; struct rb_node node; struct rcu_head rcu; struct aa_proxy *proxy; @@ -357,7 +357,7 @@ int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules, */ static inline struct aa_label *__aa_get_label(struct aa_label *l) { - if (l && kref_get_unless_zero(&l->count)) + if (l && kref_get_unless_zero(&l->count.count)) return l; return NULL; @@ -366,7 +366,7 @@ static inline struct aa_label *__aa_get_label(struct aa_label *l) static inline struct aa_label *aa_get_label(struct aa_label *l) { if (l) - kref_get(&(l->count)); + kref_get(&(l->count.count)); return l; } @@ -386,7 +386,7 @@ static inline struct aa_label *aa_get_label_rcu(struct aa_label __rcu **l) rcu_read_lock(); do { c = rcu_dereference(*l); - } while (c && !kref_get_unless_zero(&c->count)); + } while (c && !kref_get_unless_zero(&c->count.count)); rcu_read_unlock(); return c; @@ -426,7 +426,7 @@ static inline struct aa_label *aa_get_newest_label(struct aa_label *l) static inline void aa_put_label(struct aa_label *l) { if (l) - kref_put(&l->count, aa_label_kref); + kref_put(&l->count.count, aa_label_kref); } /* wrapper fn to indicate semantics of the check */ @@ -443,7 +443,7 @@ void aa_proxy_kref(struct kref *kref); static inline struct aa_proxy *aa_get_proxy(struct aa_proxy *proxy) { if (proxy) - kref_get(&(proxy->count)); + kref_get(&(proxy->count.count)); return proxy; } @@ -451,7 +451,7 @@ static inline struct aa_proxy *aa_get_proxy(struct aa_proxy *proxy) static inline void aa_put_proxy(struct aa_proxy *proxy) { if (proxy) - kref_put(&proxy->count, aa_proxy_kref); + kref_put(&proxy->count.count, aa_proxy_kref); } void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new); diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 1c5d1f60f6a7a..8c6ce84845526 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -102,6 +102,18 @@ void aa_info_message(const char *str); /* Security blob offsets */ extern struct lsm_blob_sizes apparmor_blob_sizes; +enum reftype { + REF_NS, + REF_PROXY, + REF_RAWDATA, +}; + +/* common reference count used by data the shows up in aafs */ +struct aa_common_ref { + struct kref count; + enum reftype reftype; +}; + /** * aa_strneq - compare null terminated @str to a non null terminated substring * @str: a null terminated string diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h index 0dde8eda3d1a5..7accb1c39849a 100644 --- a/security/apparmor/include/match.h +++ b/security/apparmor/include/match.h @@ -185,6 +185,7 @@ static inline void aa_put_dfa(struct aa_dfa *dfa) #define MATCH_FLAG_DIFF_ENCODE 0x80000000 #define MARK_DIFF_ENCODE 0x40000000 #define MATCH_FLAG_OOB_TRANSITION 0x20000000 +#define MARK_DIFF_ENCODE_VERIFIED 0x10000000 #define MATCH_FLAGS_MASK 0xff000000 #define MATCH_FLAGS_VALID (MATCH_FLAG_DIFF_ENCODE | MATCH_FLAG_OOB_TRANSITION) #define MATCH_FLAGS_INVALID (MATCH_FLAGS_MASK & ~MATCH_FLAGS_VALID) diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 5115ebae26615..3895f8774a3f9 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -379,7 +379,7 @@ static inline bool profile_mediates_safe(struct aa_profile *profile, static inline struct aa_profile *aa_get_profile(struct aa_profile *p) { if (p) - kref_get(&(p->label.count)); + kref_get(&(p->label.count.count)); return p; } @@ -393,7 +393,7 @@ static inline struct aa_profile *aa_get_profile(struct aa_profile *p) */ static inline struct aa_profile *aa_get_profile_not0(struct aa_profile *p) { - if (p && kref_get_unless_zero(&p->label.count)) + if (p && kref_get_unless_zero(&p->label.count.count)) return p; return NULL; @@ -413,7 +413,7 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p) rcu_read_lock(); do { c = rcu_dereference(*p); - } while (c && !kref_get_unless_zero(&c->label.count)); + } while (c && !kref_get_unless_zero(&c->label.count.count)); rcu_read_unlock(); return c; @@ -426,7 +426,7 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p) static inline void aa_put_profile(struct aa_profile *p) { if (p) - kref_put(&p->label.count, aa_label_kref); + kref_put(&p->label.count.count, aa_label_kref); } static inline int AUDIT_MODE(struct aa_profile *profile) @@ -443,7 +443,7 @@ bool aa_policy_admin_capable(const struct cred *subj_cred, struct aa_label *label, struct aa_ns *ns); int aa_may_manage_policy(const struct cred *subj_cred, struct aa_label *label, struct aa_ns *ns, - u32 mask); + const struct cred *ocred, u32 mask); bool aa_current_policy_view_capable(struct aa_ns *ns); bool aa_current_policy_admin_capable(struct aa_ns *ns); diff --git a/security/apparmor/include/policy_ns.h b/security/apparmor/include/policy_ns.h index d646070fd966b..cc6e841518120 100644 --- a/security/apparmor/include/policy_ns.h +++ b/security/apparmor/include/policy_ns.h @@ -18,6 +18,8 @@ #include "label.h" #include "policy.h" +/* Match max depth of user namespaces */ +#define MAX_NS_DEPTH 32 /* struct aa_ns_acct - accounting of profiles in namespace * @max_size: maximum space allowed for all profiles in namespace diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h index a6f4611ee50cf..e5a95dc4da1f7 100644 --- a/security/apparmor/include/policy_unpack.h +++ b/security/apparmor/include/policy_unpack.h @@ -87,17 +87,29 @@ struct aa_ext { u32 version; }; -/* - * struct aa_loaddata - buffer of policy raw_data set +/* struct aa_loaddata - buffer of policy raw_data set + * @count: inode/filesystem refcount - use aa_get_i_loaddata() + * @pcount: profile refcount - use aa_get_profile_loaddata() + * @list: list the loaddata is on + * @work: used to do a delayed cleanup + * @dents: refs to dents created in aafs + * @ns: the namespace this loaddata was loaded into + * @name: + * @size: the size of the data that was loaded + * @compressed_size: the size of the data when it is compressed + * @revision: unique revision count that this data was loaded as + * @abi: the abi number the loaddata uses + * @hash: a hash of the loaddata, used to help dedup data * - * there is no loaddata ref for being on ns list, nor a ref from - * d_inode(@dentry) when grab a ref from these, @ns->lock must be held - * && __aa_get_loaddata() needs to be used, and the return value - * checked, if NULL the loaddata is already being reaped and should be - * considered dead. + * There is no loaddata ref for being on ns->rawdata_list, so + * @ns->lock must be held when walking the list. Dentries and + * inode opens hold refs on @count; profiles hold refs on @pcount. + * When the last @pcount drops, do_ploaddata_rmfs() removes the + * fs entries and drops the associated @count ref. */ struct aa_loaddata { - struct kref count; + struct aa_common_ref count; + struct kref pcount; struct list_head list; struct work_struct work; struct dentry *dents[AAFS_LOADDATA_NDENTS]; @@ -119,50 +131,53 @@ struct aa_loaddata { int aa_unpack(struct aa_loaddata *udata, struct list_head *lh, const char **ns); /** - * __aa_get_loaddata - get a reference count to uncounted data reference + * aa_get_loaddata - get a reference count from a counted data reference * @data: reference to get a count on * - * Returns: pointer to reference OR NULL if race is lost and reference is - * being repeated. - * Requires: @data->ns->lock held, and the return code MUST be checked - * - * Use only from inode->i_private and @data->list found references + * Returns: pointer to reference + * Requires: @data to have a valid reference count on it. It is a bug + * if the race to reap can be encountered when it is used. */ static inline struct aa_loaddata * -__aa_get_loaddata(struct aa_loaddata *data) +aa_get_i_loaddata(struct aa_loaddata *data) { - if (data && kref_get_unless_zero(&(data->count))) - return data; - return NULL; + if (data) + kref_get(&(data->count.count)); + return data; } + /** - * aa_get_loaddata - get a reference count from a counted data reference + * aa_get_profile_loaddata - get a profile reference count on loaddata * @data: reference to get a count on * - * Returns: point to reference - * Requires: @data to have a valid reference count on it. It is a bug - * if the race to reap can be encountered when it is used. + * Returns: pointer to reference + * Requires: @data to have a valid reference count on it. */ static inline struct aa_loaddata * -aa_get_loaddata(struct aa_loaddata *data) +aa_get_profile_loaddata(struct aa_loaddata *data) { - struct aa_loaddata *tmp = __aa_get_loaddata(data); - - AA_BUG(data && !tmp); - - return tmp; + if (data) + kref_get(&(data->pcount)); + return data; } void __aa_loaddata_update(struct aa_loaddata *data, long revision); bool aa_rawdata_eq(struct aa_loaddata *l, struct aa_loaddata *r); void aa_loaddata_kref(struct kref *kref); +void aa_ploaddata_kref(struct kref *kref); struct aa_loaddata *aa_loaddata_alloc(size_t size); -static inline void aa_put_loaddata(struct aa_loaddata *data) +static inline void aa_put_i_loaddata(struct aa_loaddata *data) +{ + if (data) + kref_put(&data->count.count, aa_loaddata_kref); +} + +static inline void aa_put_profile_loaddata(struct aa_loaddata *data) { if (data) - kref_put(&data->count, aa_loaddata_kref); + kref_put(&data->pcount, aa_ploaddata_kref); } #if IS_ENABLED(CONFIG_KUNIT) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index e478283bc514a..3a721fdf18339 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -52,7 +52,8 @@ static void free_proxy(struct aa_proxy *proxy) void aa_proxy_kref(struct kref *kref) { - struct aa_proxy *proxy = container_of(kref, struct aa_proxy, count); + struct aa_proxy *proxy = container_of(kref, struct aa_proxy, + count.count); free_proxy(proxy); } @@ -63,7 +64,8 @@ struct aa_proxy *aa_alloc_proxy(struct aa_label *label, gfp_t gfp) new = kzalloc_obj(struct aa_proxy, gfp); if (new) { - kref_init(&new->count); + kref_init(&new->count.count); + new->count.reftype = REF_PROXY; rcu_assign_pointer(new->label, aa_get_label(label)); } return new; @@ -375,7 +377,8 @@ static void label_free_rcu(struct rcu_head *head) void aa_label_kref(struct kref *kref) { - struct aa_label *label = container_of(kref, struct aa_label, count); + struct aa_label *label = container_of(kref, struct aa_label, + count.count); struct aa_ns *ns = labels_ns(label); if (!ns) { @@ -412,7 +415,8 @@ bool aa_label_init(struct aa_label *label, int size, gfp_t gfp) label->size = size; /* doesn't include null */ label->vec[size] = NULL; /* null terminate */ - kref_init(&label->count); + kref_init(&label->count.count); + label->count.reftype = REF_NS; /* for aafs purposes */ RB_CLEAR_NODE(&label->node); return true; diff --git a/security/apparmor/match.c b/security/apparmor/match.c index 8fa0a1494acdc..e9fac67e5178c 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c @@ -160,9 +160,10 @@ static int verify_dfa(struct aa_dfa *dfa) if (state_count == 0) goto out; for (i = 0; i < state_count; i++) { - if (!(BASE_TABLE(dfa)[i] & MATCH_FLAG_DIFF_ENCODE) && - (DEFAULT_TABLE(dfa)[i] >= state_count)) + if (DEFAULT_TABLE(dfa)[i] >= state_count) { + pr_err("AppArmor DFA default state out of bounds"); goto out; + } if (BASE_TABLE(dfa)[i] & MATCH_FLAGS_INVALID) { pr_err("AppArmor DFA state with invalid match flags"); goto out; @@ -201,16 +202,31 @@ static int verify_dfa(struct aa_dfa *dfa) size_t j, k; for (j = i; - (BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE) && - !(BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE); + ((BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE) && + !(BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE_VERIFIED)); j = k) { + if (BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE) + /* loop in current chain */ + goto out; k = DEFAULT_TABLE(dfa)[j]; if (j == k) + /* self loop */ goto out; - if (k < j) - break; /* already verified */ BASE_TABLE(dfa)[j] |= MARK_DIFF_ENCODE; } + /* move mark to verified */ + for (j = i; + (BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE); + j = k) { + k = DEFAULT_TABLE(dfa)[j]; + if (j < i) + /* jumps to state/chain that has been + * verified + */ + break; + BASE_TABLE(dfa)[j] &= ~MARK_DIFF_ENCODE; + BASE_TABLE(dfa)[j] |= MARK_DIFF_ENCODE_VERIFIED; + } } error = 0; @@ -463,13 +479,18 @@ aa_state_t aa_dfa_match_len(struct aa_dfa *dfa, aa_state_t start, if (dfa->tables[YYTD_ID_EC]) { /* Equivalence class table defined */ u8 *equiv = EQUIV_TABLE(dfa); - for (; len; len--) - match_char(state, def, base, next, check, - equiv[(u8) *str++]); + for (; len; len--) { + u8 c = equiv[(u8) *str]; + + match_char(state, def, base, next, check, c); + str++; + } } else { /* default is direct to next state */ - for (; len; len--) - match_char(state, def, base, next, check, (u8) *str++); + for (; len; len--) { + match_char(state, def, base, next, check, (u8) *str); + str++; + } } return state; @@ -503,13 +524,18 @@ aa_state_t aa_dfa_match(struct aa_dfa *dfa, aa_state_t start, const char *str) /* Equivalence class table defined */ u8 *equiv = EQUIV_TABLE(dfa); /* default is direct to next state */ - while (*str) - match_char(state, def, base, next, check, - equiv[(u8) *str++]); + while (*str) { + u8 c = equiv[(u8) *str]; + + match_char(state, def, base, next, check, c); + str++; + } } else { /* default is direct to next state */ - while (*str) - match_char(state, def, base, next, check, (u8) *str++); + while (*str) { + match_char(state, def, base, next, check, (u8) *str); + str++; + } } return state; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 9108d74c6b46b..b6a5eb4021dbd 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -191,19 +191,43 @@ static void __list_remove_profile(struct aa_profile *profile) } /** - * __remove_profile - remove old profile, and children - * @profile: profile to be replaced (NOT NULL) + * __remove_profile - remove profile, and children + * @profile: profile to be removed (NOT NULL) * * Requires: namespace list lock be held, or list not be shared */ static void __remove_profile(struct aa_profile *profile) { + struct aa_profile *curr, *to_remove; + AA_BUG(!profile); AA_BUG(!profile->ns); AA_BUG(!mutex_is_locked(&profile->ns->lock)); /* release any children lists first */ - __aa_profile_list_release(&profile->base.profiles); + if (!list_empty(&profile->base.profiles)) { + curr = list_first_entry(&profile->base.profiles, struct aa_profile, base.list); + + while (curr != profile) { + + while (!list_empty(&curr->base.profiles)) + curr = list_first_entry(&curr->base.profiles, + struct aa_profile, base.list); + + to_remove = curr; + if (!list_is_last(&to_remove->base.list, + &aa_deref_parent(curr)->base.profiles)) + curr = list_next_entry(to_remove, base.list); + else + curr = aa_deref_parent(curr); + + /* released by free_profile */ + aa_label_remove(&to_remove->label); + __aafs_profile_rmdir(to_remove); + __list_remove_profile(to_remove); + } + } + /* released by free_profile */ aa_label_remove(&profile->label); __aafs_profile_rmdir(profile); @@ -326,7 +350,7 @@ void aa_free_profile(struct aa_profile *profile) } kfree_sensitive(profile->hash); - aa_put_loaddata(profile->rawdata); + aa_put_profile_loaddata(profile->rawdata); aa_label_destroy(&profile->label); kfree_sensitive(profile); @@ -918,17 +942,44 @@ bool aa_current_policy_admin_capable(struct aa_ns *ns) return res; } +static bool is_subset_of_obj_privilege(const struct cred *cred, + struct aa_label *label, + const struct cred *ocred) +{ + if (cred == ocred) + return true; + + if (!aa_label_is_subset(label, cred_label(ocred))) + return false; + /* don't allow crossing userns for now */ + if (cred->user_ns != ocred->user_ns) + return false; + if (!cap_issubset(cred->cap_inheritable, ocred->cap_inheritable)) + return false; + if (!cap_issubset(cred->cap_permitted, ocred->cap_permitted)) + return false; + if (!cap_issubset(cred->cap_effective, ocred->cap_effective)) + return false; + if (!cap_issubset(cred->cap_bset, ocred->cap_bset)) + return false; + if (!cap_issubset(cred->cap_ambient, ocred->cap_ambient)) + return false; + return true; +} + + /** * aa_may_manage_policy - can the current task manage policy * @subj_cred: subjects cred * @label: label to check if it can manage policy * @ns: namespace being managed by @label (may be NULL if @label's ns) + * @ocred: object cred if request is coming from an open object * @mask: contains the policy manipulation operation being done * * Returns: 0 if the task is allowed to manipulate policy else error */ int aa_may_manage_policy(const struct cred *subj_cred, struct aa_label *label, - struct aa_ns *ns, u32 mask) + struct aa_ns *ns, const struct cred *ocred, u32 mask) { const char *op; @@ -944,6 +995,11 @@ int aa_may_manage_policy(const struct cred *subj_cred, struct aa_label *label, return audit_policy(label, op, NULL, NULL, "policy_locked", -EACCES); + if (ocred && !is_subset_of_obj_privilege(subj_cred, label, ocred)) + return audit_policy(label, op, NULL, NULL, + "not privileged for target profile", + -EACCES); + if (!aa_policy_admin_capable(subj_cred, label, ns)) return audit_policy(label, op, NULL, NULL, "not policy admin", -EACCES); @@ -1115,7 +1171,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, LIST_HEAD(lh); op = mask & AA_MAY_REPLACE_POLICY ? OP_PROF_REPL : OP_PROF_LOAD; - aa_get_loaddata(udata); + aa_get_profile_loaddata(udata); /* released below */ error = aa_unpack(udata, &lh, &ns_name); if (error) @@ -1142,6 +1198,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, goto fail; } ns_name = ent->ns_name; + ent->ns_name = NULL; } else count++; } @@ -1166,10 +1223,10 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, if (aa_rawdata_eq(rawdata_ent, udata)) { struct aa_loaddata *tmp; - tmp = __aa_get_loaddata(rawdata_ent); + tmp = aa_get_profile_loaddata(rawdata_ent); /* check we didn't fail the race */ if (tmp) { - aa_put_loaddata(udata); + aa_put_profile_loaddata(udata); udata = tmp; break; } @@ -1182,7 +1239,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, struct aa_profile *p; if (aa_g_export_binary) - ent->new->rawdata = aa_get_loaddata(udata); + ent->new->rawdata = aa_get_profile_loaddata(udata); error = __lookup_replace(ns, ent->new->base.hname, !(mask & AA_MAY_REPLACE_POLICY), &ent->old, &info); @@ -1315,7 +1372,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, out: aa_put_ns(ns); - aa_put_loaddata(udata); + aa_put_profile_loaddata(udata); kfree(ns_name); if (error) diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c index 01b653a3609bc..5a907a875d8f6 100644 --- a/security/apparmor/policy_ns.c +++ b/security/apparmor/policy_ns.c @@ -223,6 +223,8 @@ static struct aa_ns *__aa_create_ns(struct aa_ns *parent, const char *name, AA_BUG(!name); AA_BUG(!mutex_is_locked(&parent->lock)); + if (parent->level > MAX_NS_DEPTH) + return ERR_PTR(-ENOSPC); ns = alloc_ns(parent->base.hname, name); if (!ns) return ERR_PTR(-ENOMEM); diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 1769417a99624..076d3ff14da68 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -109,34 +109,48 @@ bool aa_rawdata_eq(struct aa_loaddata *l, struct aa_loaddata *r) return memcmp(l->data, r->data, r->compressed_size ?: r->size) == 0; } +static void do_loaddata_free(struct aa_loaddata *d) +{ + kfree_sensitive(d->hash); + kfree_sensitive(d->name); + kvfree(d->data); + kfree_sensitive(d); +} + +void aa_loaddata_kref(struct kref *kref) +{ + struct aa_loaddata *d = container_of(kref, struct aa_loaddata, + count.count); + + do_loaddata_free(d); +} + /* * need to take the ns mutex lock which is NOT safe most places that * put_loaddata is called, so we have to delay freeing it */ -static void do_loaddata_free(struct work_struct *work) +static void do_ploaddata_rmfs(struct work_struct *work) { struct aa_loaddata *d = container_of(work, struct aa_loaddata, work); struct aa_ns *ns = aa_get_ns(d->ns); if (ns) { mutex_lock_nested(&ns->lock, ns->level); + /* remove fs ref to loaddata */ __aa_fs_remove_rawdata(d); mutex_unlock(&ns->lock); aa_put_ns(ns); } - - kfree_sensitive(d->hash); - kfree_sensitive(d->name); - kvfree(d->data); - kfree_sensitive(d); + /* called by dropping last pcount, so drop its associated icount */ + aa_put_i_loaddata(d); } -void aa_loaddata_kref(struct kref *kref) +void aa_ploaddata_kref(struct kref *kref) { - struct aa_loaddata *d = container_of(kref, struct aa_loaddata, count); + struct aa_loaddata *d = container_of(kref, struct aa_loaddata, pcount); if (d) { - INIT_WORK(&d->work, do_loaddata_free); + INIT_WORK(&d->work, do_ploaddata_rmfs); schedule_work(&d->work); } } @@ -153,7 +167,9 @@ struct aa_loaddata *aa_loaddata_alloc(size_t size) kfree(d); return ERR_PTR(-ENOMEM); } - kref_init(&d->count); + kref_init(&d->count.count); + d->count.reftype = REF_RAWDATA; + kref_init(&d->pcount); INIT_LIST_HEAD(&d->list); return d; @@ -1010,7 +1026,17 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, if (!aa_unpack_u32(e, &pdb->start[AA_CLASS_FILE], "dfa_start")) { /* default start state for xmatch and file dfa */ pdb->start[AA_CLASS_FILE] = DFA_START; - } /* setup class index */ + } + + size_t state_count = pdb->dfa->tables[YYTD_ID_BASE]->td_lolen; + + if (pdb->start[0] >= state_count || + pdb->start[AA_CLASS_FILE] >= state_count) { + *info = "invalid dfa start state"; + goto fail; + } + + /* setup class index */ for (i = AA_CLASS_FILE + 1; i <= AA_CLASS_LAST; i++) { pdb->start[i] = aa_dfa_next(pdb->dfa, pdb->start[0], i); @@ -1409,7 +1435,6 @@ static int verify_header(struct aa_ext *e, int required, const char **ns) { int error = -EPROTONOSUPPORT; const char *name = NULL; - *ns = NULL; /* get the interface version */ if (!aa_unpack_u32(e, &e->version, "version")) { diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 67cf6a0e17ba8..5a64453da7283 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -2144,6 +2144,10 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, for (;;) { long tout; struct snd_pcm_runtime *to_check; + unsigned int drain_rate; + snd_pcm_uframes_t drain_bufsz; + bool drain_no_period_wakeup; + if (signal_pending(current)) { result = -ERESTARTSYS; break; @@ -2163,16 +2167,25 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, snd_pcm_group_unref(group, substream); if (!to_check) break; /* all drained */ + /* + * Cache the runtime fields needed after unlock. + * A concurrent close() on the linked stream may free + * its runtime via snd_pcm_detach_substream() once we + * release the stream lock below. + */ + drain_no_period_wakeup = to_check->no_period_wakeup; + drain_rate = to_check->rate; + drain_bufsz = to_check->buffer_size; init_waitqueue_entry(&wait, current); set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&to_check->sleep, &wait); snd_pcm_stream_unlock_irq(substream); - if (runtime->no_period_wakeup) + if (drain_no_period_wakeup) tout = MAX_SCHEDULE_TIMEOUT; else { tout = 100; - if (runtime->rate) { - long t = runtime->buffer_size * 1100 / runtime->rate; + if (drain_rate) { + long t = drain_bufsz * 1100 / drain_rate; tout = max(t, tout); } tout = msecs_to_jiffies(tout); diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 4c49f1195e1bc..ab4b22fcb72ed 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6940,6 +6940,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x89da, "HP Spectre x360 14t-ea100", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x89e7, "HP Elite x2 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8a0f, "HP Pavilion 14-ec1xxx", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8a1f, "HP Laptop 14s-dr5xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8a20, "HP Laptop 15s-fq5xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8a25, "HP Victus 16-d1xxx (MB 8A25)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a26, "HP Victus 16-d1xxx (MB 8A26)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), @@ -7273,6 +7274,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e93, "ASUS ExpertBook B9403CVAR", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1eb3, "ASUS Ally RCLA72", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1ed3, "ASUS HN7306W", ALC287_FIXUP_CS35L41_I2C_2), + HDA_CODEC_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC285_FIXUP_ASUS_I2C_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), @@ -7493,6 +7495,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x2288, "Thinkpad X390", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), diff --git a/sound/hda/codecs/realtek/alc662.c b/sound/hda/codecs/realtek/alc662.c index 5073165d1f3cf..3a943adf90876 100644 --- a/sound/hda/codecs/realtek/alc662.c +++ b/sound/hda/codecs/realtek/alc662.c @@ -313,6 +313,7 @@ enum { ALC897_FIXUP_HEADSET_MIC_PIN2, ALC897_FIXUP_UNIS_H3C_X500S, ALC897_FIXUP_HEADSET_MIC_PIN3, + ALC897_FIXUP_H610M_HP_PIN, }; static const struct hda_fixup alc662_fixups[] = { @@ -766,6 +767,13 @@ static const struct hda_fixup alc662_fixups[] = { { } }, }, + [ALC897_FIXUP_H610M_HP_PIN] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x0321403f }, /* HP out */ + { } + }, + }, }; static const struct hda_quirk alc662_fixup_tbl[] = { @@ -815,6 +823,7 @@ static const struct hda_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), + SND_PCI_QUIRK(0x1458, 0xa194, "H610M H V2 DDR4", ALC897_FIXUP_H610M_HP_PIN), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN), diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index 4d99472c75baf..09f6c9a2c0410 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -127,8 +127,13 @@ static int acp_card_rt5682_init(struct snd_soc_pcm_runtime *rtd) if (drvdata->hs_codec_id != RT5682) return -EINVAL; - drvdata->wclk = clk_get(component->dev, "rt5682-dai-wclk"); - drvdata->bclk = clk_get(component->dev, "rt5682-dai-bclk"); + drvdata->wclk = devm_clk_get(component->dev, "rt5682-dai-wclk"); + if (IS_ERR(drvdata->wclk)) + return PTR_ERR(drvdata->wclk); + + drvdata->bclk = devm_clk_get(component->dev, "rt5682-dai-bclk"); + if (IS_ERR(drvdata->bclk)) + return PTR_ERR(drvdata->bclk); ret = snd_soc_dapm_new_controls(dapm, rt5682_widgets, ARRAY_SIZE(rt5682_widgets)); @@ -370,8 +375,13 @@ static int acp_card_rt5682s_init(struct snd_soc_pcm_runtime *rtd) return -EINVAL; if (!drvdata->soc_mclk) { - drvdata->wclk = clk_get(component->dev, "rt5682-dai-wclk"); - drvdata->bclk = clk_get(component->dev, "rt5682-dai-bclk"); + drvdata->wclk = devm_clk_get(component->dev, "rt5682-dai-wclk"); + if (IS_ERR(drvdata->wclk)) + return PTR_ERR(drvdata->wclk); + + drvdata->bclk = devm_clk_get(component->dev, "rt5682-dai-bclk"); + if (IS_ERR(drvdata->bclk)) + return PTR_ERR(drvdata->bclk); } ret = snd_soc_dapm_new_controls(dapm, rt5682s_widgets, diff --git a/sound/soc/amd/acp3x-rt5682-max9836.c b/sound/soc/amd/acp3x-rt5682-max9836.c index 4ca1978020a96..d1eb6f12a1830 100644 --- a/sound/soc/amd/acp3x-rt5682-max9836.c +++ b/sound/soc/amd/acp3x-rt5682-max9836.c @@ -94,8 +94,13 @@ static int acp3x_5682_init(struct snd_soc_pcm_runtime *rtd) return ret; } - rt5682_dai_wclk = clk_get(component->dev, "rt5682-dai-wclk"); - rt5682_dai_bclk = clk_get(component->dev, "rt5682-dai-bclk"); + rt5682_dai_wclk = devm_clk_get(component->dev, "rt5682-dai-wclk"); + if (IS_ERR(rt5682_dai_wclk)) + return PTR_ERR(rt5682_dai_wclk); + + rt5682_dai_bclk = devm_clk_get(component->dev, "rt5682-dai-bclk"); + if (IS_ERR(rt5682_dai_bclk)) + return PTR_ERR(rt5682_dai_bclk); ret = snd_soc_card_jack_new_pins(card, "Headset Jack", SND_JACK_HEADSET | diff --git a/sound/soc/codecs/rt1011.c b/sound/soc/codecs/rt1011.c index 9f34a6a354876..03f31d9d916e6 100644 --- a/sound/soc/codecs/rt1011.c +++ b/sound/soc/codecs/rt1011.c @@ -1047,7 +1047,7 @@ static int rt1011_recv_spk_mode_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_to_dapm(kcontrol); + struct snd_soc_dapm_context *dapm = snd_soc_component_to_dapm(component); struct rt1011_priv *rt1011 = snd_soc_component_get_drvdata(component); diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index bdc02e85b089f..9e5be0eaa77f3 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1038,11 +1038,15 @@ int graph_util_is_ports0(struct device_node *np) else port = np; - struct device_node *ports __free(device_node) = of_get_parent(port); - struct device_node *top __free(device_node) = of_get_parent(ports); - struct device_node *ports0 __free(device_node) = of_get_child_by_name(top, "ports"); + struct device_node *ports __free(device_node) = of_get_parent(port); + const char *at = strchr(kbasename(ports->full_name), '@'); - return ports0 == ports; + /* + * Since child iteration order may differ + * between a base DT and DT overlays, + * string match "ports" or "ports@0" in the node name instead. + */ + return !at || !strcmp(at, "@0"); } EXPORT_SYMBOL_GPL(graph_util_is_ports0); diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index de3bdac3e791c..168c166c960d7 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -838,6 +838,7 @@ static const struct snd_soc_component_driver q6apm_fe_dai_component = { .ack = q6apm_dai_ack, .compress_ops = &q6apm_dai_compress_ops, .use_dai_pcm_id = true, + .remove_order = SND_SOC_COMP_ORDER_EARLY, }; static int q6apm_dai_probe(struct platform_device *pdev) diff --git a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c index 528756f1332bc..5be37eeea329f 100644 --- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c +++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c @@ -278,6 +278,7 @@ static const struct snd_soc_component_driver q6apm_lpass_dai_component = { .of_xlate_dai_name = q6dsp_audio_ports_of_xlate_dai_name, .be_pcm_base = AUDIOREACH_BE_PCM_BASE, .use_dai_pcm_id = true, + .remove_order = SND_SOC_COMP_ORDER_FIRST, }; static int q6apm_lpass_dai_dev_probe(struct platform_device *pdev) diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c index 44841fde38567..970b08c89bb35 100644 --- a/sound/soc/qcom/qdsp6/q6apm.c +++ b/sound/soc/qcom/qdsp6/q6apm.c @@ -715,6 +715,7 @@ static const struct snd_soc_component_driver q6apm_audio_component = { .name = APM_AUDIO_DRV_NAME, .probe = q6apm_audio_probe, .remove = q6apm_audio_remove, + .remove_order = SND_SOC_COMP_ORDER_LAST, }; static int apm_probe(gpr_device_t *gdev) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index d0fffef65daf5..573693e21780a 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -462,8 +462,7 @@ static void soc_free_pcm_runtime(struct snd_soc_pcm_runtime *rtd) list_del(&rtd->list); - if (delayed_work_pending(&rtd->delayed_work)) - flush_delayed_work(&rtd->delayed_work); + flush_delayed_work(&rtd->delayed_work); snd_soc_pcm_component_free(rtd); /* @@ -1864,12 +1863,15 @@ static void cleanup_dmi_name(char *name) /* * Check if a DMI field is valid, i.e. not containing any string - * in the black list. + * in the black list and not the empty string. */ static int is_dmi_valid(const char *field) { int i = 0; + if (!field[0]) + return 0; + while (dmi_blacklist[i]) { if (strstr(field, dmi_blacklist[i])) return 0; @@ -2122,6 +2124,9 @@ static void soc_cleanup_card_resources(struct snd_soc_card *card) for_each_card_rtds(card, rtd) if (rtd->initialized) snd_soc_link_exit(rtd); + /* flush delayed work before removing DAIs and DAPM widgets */ + snd_soc_flush_all_delayed_work(card); + /* remove and free each DAI */ soc_remove_link_dais(card); soc_remove_link_components(card); diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c index 94b5ab77649b3..ea10e6e8a9fe7 100644 --- a/sound/soc/tegra/tegra_audio_graph_card.c +++ b/sound/soc/tegra/tegra_audio_graph_card.c @@ -231,6 +231,15 @@ static const struct tegra_audio_cdata tegra186_data = { .plla_out0_rates[x11_RATE] = 45158400, }; +static const struct tegra_audio_cdata tegra238_data = { + /* PLLA */ + .plla_rates[x8_RATE] = 1277952000, + .plla_rates[x11_RATE] = 1264435200, + /* PLLA_OUT0 */ + .plla_out0_rates[x8_RATE] = 49152000, + .plla_out0_rates[x11_RATE] = 45158400, +}; + static const struct tegra_audio_cdata tegra264_data = { /* PLLA1 */ .plla_rates[x8_RATE] = 983040000, @@ -245,6 +254,8 @@ static const struct of_device_id graph_of_tegra_match[] = { .data = &tegra210_data }, { .compatible = "nvidia,tegra186-audio-graph-card", .data = &tegra186_data }, + { .compatible = "nvidia,tegra238-audio-graph-card", + .data = &tegra238_data }, { .compatible = "nvidia,tegra264-audio-graph-card", .data = &tegra264_data }, {}, diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index ef3150581eabd..fd1fb668929a2 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c @@ -8251,6 +8251,8 @@ static int scarlett2_find_fc_interface(struct usb_device *dev, if (desc->bInterfaceClass != 255) continue; + if (desc->bNumEndpoints < 1) + continue; epd = get_endpoint(intf->altsetting, 0); private->bInterfaceNumber = desc->bInterfaceNumber; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index d54a1a44a69bd..049a94079f9e9 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2243,6 +2243,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IFACE_DELAY | QUIRK_FLAG_FORCE_IFACE_RESET), DEVICE_FLG(0x0661, 0x0883, /* iBasso DC04 Ultra */ QUIRK_FLAG_DSD_RAW), + DEVICE_FLG(0x0666, 0x0880, /* SPACETOUCH USB Audio */ + QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x06f8, 0xb000, /* Hercules DJ Console (Windows Edition) */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */ diff --git a/tools/arch/x86/include/asm/amd/ibs.h b/tools/arch/x86/include/asm/amd/ibs.h index cbce54fec7b93..41e8abd72c8b8 100644 --- a/tools/arch/x86/include/asm/amd/ibs.h +++ b/tools/arch/x86/include/asm/amd/ibs.h @@ -110,7 +110,7 @@ union ibs_op_data3 { __u64 ld_op:1, /* 0: load op */ st_op:1, /* 1: store op */ dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */ - dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */ + dc_l2tlb_miss:1, /* 3: data cache L2TLB miss in 2M page */ dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */ dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */ dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index c3b53beb13007..dbe104df339b8 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ #define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ #define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_SYSFAST32 ( 3*32+15) /* sysenter/syscall in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ #define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ @@ -326,6 +326,7 @@ #define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ #define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ +#define X86_FEATURE_MOVRS (12*32+31) /* MOVRS instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ @@ -472,6 +473,7 @@ #define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */ #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ +#define X86_FEATURE_ERAPS (20*32+24) /* Enhanced Return Address Predictor Security */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 43adc38d31d57..da5275d8eda63 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -263,6 +263,11 @@ #define MSR_SNOOP_RSP_0 0x00001328 #define MSR_SNOOP_RSP_1 0x00001329 +#define MSR_OMR_0 0x000003e0 +#define MSR_OMR_1 0x000003e1 +#define MSR_OMR_2 0x000003e2 +#define MSR_OMR_3 0x000003e3 + #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 @@ -1219,6 +1224,7 @@ #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 +#define MSR_CORE_PERF_GLOBAL_STATUS_SET 0x00000391 #define MSR_PERF_METRICS 0x00000329 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 7ceff65836525..846a63215ce14 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -503,6 +503,7 @@ struct kvm_sync_regs { #define KVM_X86_GRP_SEV 1 # define KVM_X86_SEV_VMSA_FEATURES 0 # define KVM_X86_SNP_POLICY_BITS 1 +# define KVM_X86_SEV_SNP_REQ_CERTS 2 struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; @@ -743,6 +744,7 @@ enum sev_cmd_id { KVM_SEV_SNP_LAUNCH_START = 100, KVM_SEV_SNP_LAUNCH_UPDATE, KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_SNP_ENABLE_REQ_CERTS, KVM_SEV_NR_MAX, }; @@ -914,8 +916,10 @@ struct kvm_sev_snp_launch_finish { __u64 pad1[4]; }; -#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) -#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +#define KVM_X2APIC_API_USE_32BIT_IDS _BITULL(0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK _BITULL(1) +#define KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST _BITULL(2) +#define KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST _BITULL(3) struct kvm_hyperv_eventfd { __u32 conn_id; diff --git a/tools/bootconfig/samples/bad-non-closed-brace.bconf b/tools/bootconfig/samples/bad-non-closed-brace.bconf new file mode 100644 index 0000000000000..6ed9f3363dde3 --- /dev/null +++ b/tools/bootconfig/samples/bad-non-closed-brace.bconf @@ -0,0 +1,4 @@ +foo { + bar { + buz + } diff --git a/tools/bootconfig/samples/bad-over-max-brace.bconf b/tools/bootconfig/samples/bad-over-max-brace.bconf new file mode 100644 index 0000000000000..74b5dc9e21dcc --- /dev/null +++ b/tools/bootconfig/samples/bad-over-max-brace.bconf @@ -0,0 +1,19 @@ +key1 { +key2 { +key3 { +key4 { +key5 { +key6 { +key7 { +key8 { +key9 { +key10 { +key11 { +key12 { +key13 { +key14 { +key15 { +key16 { +key17 { +}}}}}}}}}}}}}}}}} + diff --git a/tools/bootconfig/samples/exp-good-nested-brace.bconf b/tools/bootconfig/samples/exp-good-nested-brace.bconf new file mode 100644 index 0000000000000..19e0f51b45533 --- /dev/null +++ b/tools/bootconfig/samples/exp-good-nested-brace.bconf @@ -0,0 +1 @@ +key1.key2.key3.key4.key5.key6.key7.key8.key9.key10.key11.key12.key13.key14.key15.key16; diff --git a/tools/bootconfig/samples/good-nested-brace.bconf b/tools/bootconfig/samples/good-nested-brace.bconf new file mode 100644 index 0000000000000..980d094f296e6 --- /dev/null +++ b/tools/bootconfig/samples/good-nested-brace.bconf @@ -0,0 +1,18 @@ +key1 { +key2 { +key3 { +key4 { +key5 { +key6 { +key7 { +key8 { +key9 { +key10 { +key11 { +key12 { +key13 { +key14 { +key15 { +key16 { +}}}}}}}}}}}}}}}} + diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index be9bd18b1d562..fc69f815ce4af 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -171,6 +171,15 @@ $BOOTCONF $INITRD > $OUTFILE xfail grep -q 'val[[:space:]]' $OUTFILE xpass grep -q 'val2[[:space:]]' $OUTFILE +echo "Showing correct line:column of no closing brace" +cat > $TEMPCONF << EOF +foo { +bar { +} +EOF +$BOOTCONF -a $TEMPCONF $INITRD 2> $OUTFILE +xpass grep -q "1:1" $OUTFILE + echo "=== expected failure cases ===" for i in samples/bad-* ; do xfail $BOOTCONF -a $i $INITRD diff --git a/tools/build/Build.include b/tools/build/Build.include index e45b2eb0d24af..cd0baa7a168d8 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -98,6 +98,15 @@ c_flags_2 = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(c_flags_1)) c_flags = $(filter-out $(CFLAGS_REMOVE_$(obj)), $(c_flags_2)) cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj)) +### +# Rust flags to be used on rule definition, includes: +# - global $(RUST_FLAGS) +# - per target Rust flags +# - per object Rust flags +rust_flags_1 = $(RUST_FLAGS) $(RUST_FLAGS_$(basetarget).o) $(RUST_FLAGS_$(obj)) +rust_flags_2 = $(filter-out $(RUST_FLAGS_REMOVE_$(basetarget).o), $(rust_flags_1)) +rust_flags = $(filter-out $(RUST_FLAGS_REMOVE_$(obj)), $(rust_flags_2)) + ### ## HOSTCC C flags diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 60e65870eae14..ad69efdd4e85c 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -70,11 +70,13 @@ quiet_cmd_gen = GEN $@ # If there's nothing to link, create empty $@ object. quiet_cmd_ld_multi = LD $@ cmd_ld_multi = $(if $(strip $(obj-y)),\ - $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) + printf "$(objprefix)%s " $(patsubst $(objprefix)%,%,$(filter $(obj-y),$^)) | \ + xargs $(LD) -r -o $@,rm -f $@; $(AR) rcs $@) quiet_cmd_host_ld_multi = HOSTLD $@ cmd_host_ld_multi = $(if $(strip $(obj-y)),\ - $(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@) + printf "$(objprefix)%s " $(patsubst $(objprefix)%,%,$(filter $(obj-y),$^)) | \ + xargs $(HOSTLD) -r -o $@,rm -f $@; $(HOSTAR) rcs $@) rust_common_cmd = \ $(RUSTC) $(rust_flags) \ diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 89b0ac0014b0d..2e179abe472a0 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -21,30 +21,6 @@ */ #define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) -/* - * Below are the definition of bit offsets for perf option, and works as - * arbitrary values for all ETM versions. - * - * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore, - * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and - * directly use below macros as config bits. - */ -#define ETM_OPT_BRANCH_BROADCAST 8 -#define ETM_OPT_CYCACC 12 -#define ETM_OPT_CTXTID 14 -#define ETM_OPT_CTXTID2 15 -#define ETM_OPT_TS 28 -#define ETM_OPT_RETSTK 29 - -/* ETMv4 CONFIGR programming bits for the ETM OPTs */ -#define ETM4_CFG_BIT_BB 3 -#define ETM4_CFG_BIT_CYCACC 4 -#define ETM4_CFG_BIT_CTXTID 6 -#define ETM4_CFG_BIT_VMID 7 -#define ETM4_CFG_BIT_TS 11 -#define ETM4_CFG_BIT_RETSTK 12 -#define ETM4_CFG_BIT_VMID_OPT 15 - /* * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. * Used to associate a CPU with the CoreSight Trace ID. diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h index 6a10ff5f5be90..9e957b57b6945 100644 --- a/tools/include/linux/gfp.h +++ b/tools/include/linux/gfp.h @@ -5,6 +5,10 @@ #include #include +/* Helper macro to avoid gfp flags if they are the default one */ +#define __default_gfp(a,...) a +#define default_gfp(...) __default_gfp(__VA_ARGS__ __VA_OPT__(,) GFP_KERNEL) + static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { return !!(gfp_flags & __GFP_DIRECT_RECLAIM); diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h index 3de43b12209ee..6c75df30a281d 100644 --- a/tools/include/linux/gfp_types.h +++ b/tools/include/linux/gfp_types.h @@ -139,6 +139,8 @@ enum { * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. * * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + * mark_obj_codetag_empty() should be called upon freeing for objects allocated + * with this flag to indicate that their NULL tags are expected and normal. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) @@ -309,8 +311,10 @@ enum { * * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower * watermark is applied to allow access to "atomic reserves". - * The current implementation doesn't support NMI and few other strict - * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. + * The current implementation doesn't support NMI, nor contexts that disable + * preemption under PREEMPT_RT. This includes raw_spin_lock() and plain + * preempt_disable() - see "Memory allocation" in + * Documentation/core-api/real-time/differences.rst for more info. * * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. @@ -321,6 +325,7 @@ enum { * %GFP_NOWAIT is for kernel allocations that should not stall for direct * reclaim, start physical IO or use any filesystem callback. It is very * likely to fail to allocate memory, even for very small allocations. + * The same restrictions on calling contexts apply as for %GFP_ATOMIC. * * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages * that do not require the starting of any physical IO. diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h index dcb0c1bf68660..3427d78803264 100644 --- a/tools/include/linux/overflow.h +++ b/tools/include/linux/overflow.h @@ -68,6 +68,25 @@ __builtin_mul_overflow(__a, __b, __d); \ }) +/** + * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX + * @factor1: first factor + * @factor2: second factor + * + * Returns: calculate @factor1 * @factor2, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. The + * lvalue must be size_t to avoid implicit type conversion. + */ +static inline size_t __must_check size_mul(size_t factor1, size_t factor2) +{ + size_t bytes; + + if (check_mul_overflow(factor1, factor2, &bytes)) + return SIZE_MAX; + + return bytes; +} + /** * array_size() - Calculate size of 2-dimensional array. * diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index 94937a699402b..6d8e9413d5a4d 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -202,4 +202,13 @@ static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) return sheaf->size; } +#define __alloc_objs(KMALLOC, GFP, TYPE, COUNT) \ +({ \ + const size_t __obj_size = size_mul(sizeof(TYPE), COUNT); \ + (TYPE *)KMALLOC(__obj_size, GFP); \ +}) + +#define kzalloc_obj(P, ...) \ + __alloc_objs(kzalloc, default_gfp(__VA_ARGS__), typeof(P), 1) + #endif /* _TOOLS_SLAB_H */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 942370b3f5d25..a627acc8fb5fe 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -860,8 +860,11 @@ __SYSCALL(__NR_file_setattr, sys_file_setattr) #define __NR_listns 470 __SYSCALL(__NR_listns, sys_listns) +#define __NR_rseq_slice_yield 471 +__SYSCALL(__NR_rseq_slice_yield, sys_rseq_slice_yield) + #undef __NR_syscalls -#define __NR_syscalls 471 +#define __NR_syscalls 472 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index dddb781b0507d..65500f5db3799 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -135,6 +135,12 @@ struct kvm_xen_exit { } u; }; +struct kvm_exit_snp_req_certs { + __u64 gpa; + __u64 npages; + __u64 ret; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -180,6 +186,8 @@ struct kvm_xen_exit { #define KVM_EXIT_MEMORY_FAULT 39 #define KVM_EXIT_TDX 40 #define KVM_EXIT_ARM_SEA 41 +#define KVM_EXIT_ARM_LDST64B 42 +#define KVM_EXIT_SNP_REQ_CERTS 43 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -402,7 +410,7 @@ struct kvm_run { } eoi; /* KVM_EXIT_HYPERV */ struct kvm_hyperv_exit hyperv; - /* KVM_EXIT_ARM_NISV */ + /* KVM_EXIT_ARM_NISV / KVM_EXIT_ARM_LDST64B */ struct { __u64 esr_iss; __u64 fault_ipa; @@ -482,6 +490,8 @@ struct kvm_run { __u64 gva; __u64 gpa; } arm_sea; + /* KVM_EXIT_SNP_REQ_CERTS */ + struct kvm_exit_snp_req_certs snp_req_certs; /* Fix the size of the union. */ char padding[256]; }; @@ -974,6 +984,7 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD_FLAGS 244 #define KVM_CAP_ARM_SEA_TO_USER 245 #define KVM_CAP_S390_USER_OPEREXEC 246 +#define KVM_CAP_S390_KEYOP 247 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1219,6 +1230,16 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; +#define KVM_S390_KEYOP_ISKE 0x01 +#define KVM_S390_KEYOP_RRBE 0x02 +#define KVM_S390_KEYOP_SSKE 0x03 +struct kvm_s390_keyop { + __u64 guest_addr; + __u8 key; + __u8 operation; + __u8 pad[6]; +}; + /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. @@ -1238,6 +1259,7 @@ struct kvm_vfio_spapr_tce { #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) +#define KVM_S390_KEYOP _IOWR(KVMIO, 0x53, struct kvm_s390_keyop) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 76e9d0664d0c7..fd10aa8d697f2 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1396,7 +1396,7 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ #define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ -#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */ +#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */ #define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 76bcd4e85de34..b71d1886022e9 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -13,7 +13,7 @@ endif ifeq ($(ARCH_HAS_KLP),y) HAVE_XXHASH = $(shell printf "$(pound)include \nXXH3_state_t *state;int main() {}" | \ - $(HOSTCC) -xc - -o /dev/null -lxxhash 2> /dev/null && echo y || echo n) + $(HOSTCC) $(HOSTCFLAGS) -xc - -o /dev/null -lxxhash 2> /dev/null && echo y || echo n) ifeq ($(HAVE_XXHASH),y) BUILD_KLP := y LIBXXHASH_CFLAGS := $(shell $(HOSTPKG_CONFIG) libxxhash --cflags 2>/dev/null) \ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 73bfea220d1bf..c5817829cdfac 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -395,52 +395,36 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec if (!rex_w) break; - if (modrm_reg == CFI_SP) { - - if (mod_is_reg()) { - /* mov %rsp, reg */ - ADD_OP(op) { - op->src.type = OP_SRC_REG; - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = modrm_rm; - } - break; - - } else { - /* skip RIP relative displacement */ - if (is_RIP()) - break; - - /* skip nontrivial SIB */ - if (have_SIB()) { - modrm_rm = sib_base; - if (sib_index != CFI_SP) - break; - } - - /* mov %rsp, disp(%reg) */ - ADD_OP(op) { - op->src.type = OP_SRC_REG; - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG_INDIRECT; - op->dest.reg = modrm_rm; - op->dest.offset = ins.displacement.value; - } - break; + if (mod_is_reg()) { + /* mov reg, reg */ + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = modrm_reg; + op->dest.type = OP_DEST_REG; + op->dest.reg = modrm_rm; } - break; } - if (rm_is_reg(CFI_SP)) { + /* skip RIP relative displacement */ + if (is_RIP()) + break; - /* mov reg, %rsp */ + /* skip nontrivial SIB */ + if (have_SIB()) { + modrm_rm = sib_base; + if (sib_index != CFI_SP) + break; + } + + /* mov %rsp, disp(%reg) */ + if (modrm_reg == CFI_SP) { ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = modrm_reg; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = modrm_rm; + op->dest.offset = ins.displacement.value; } break; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a30379e4ff97e..91b3ff4803cf2 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3000,6 +3000,20 @@ static int update_cfi_state(struct instruction *insn, cfi->stack_size += 8; } + else if (cfi->vals[op->src.reg].base == CFI_CFA) { + /* + * Clang RSP musical chairs: + * + * mov %rsp, %rdx [handled above] + * ... + * mov %rdx, %rbx [handled here] + * ... + * mov %rbx, %rsp [handled above] + */ + cfi->vals[op->dest.reg].base = CFI_CFA; + cfi->vals[op->dest.reg].offset = cfi->vals[op->src.reg].offset; + } + break; @@ -3734,7 +3748,7 @@ static void checksum_update_insn(struct objtool_file *file, struct symbol *func, static int validate_branch(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state state); static int do_validate_branch(struct objtool_file *file, struct symbol *func, - struct instruction *insn, struct insn_state state); + struct instruction *insn, struct insn_state *state); static int validate_insn(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state *statep, @@ -3999,7 +4013,7 @@ static int validate_insn(struct objtool_file *file, struct symbol *func, * tools/objtool/Documentation/objtool.txt. */ static int do_validate_branch(struct objtool_file *file, struct symbol *func, - struct instruction *insn, struct insn_state state) + struct instruction *insn, struct insn_state *state) { struct instruction *next_insn, *prev_insn = NULL; bool dead_end; @@ -4030,7 +4044,7 @@ static int do_validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - ret = validate_insn(file, func, insn, &state, prev_insn, next_insn, + ret = validate_insn(file, func, insn, state, prev_insn, next_insn, &dead_end); if (!insn->trace) { @@ -4041,7 +4055,7 @@ static int do_validate_branch(struct objtool_file *file, struct symbol *func, } if (!dead_end && !next_insn) { - if (state.cfi.cfa.base == CFI_UNDEFINED) + if (state->cfi.cfa.base == CFI_UNDEFINED) return 0; if (file->ignore_unreachables) return 0; @@ -4066,7 +4080,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, int ret; trace_depth_inc(); - ret = do_validate_branch(file, func, insn, state); + ret = do_validate_branch(file, func, insn, &state); trace_depth_dec(); return ret; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 2c02c7b492658..3da90686350d7 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -1375,7 +1375,7 @@ void *elf_add_data(struct elf *elf, struct section *sec, const void *data, size_ memcpy(sec->data->d_buf, data, size); sec->data->d_size = size; - sec->data->d_align = 1; + sec->data->d_align = sec->sh.sh_addralign; offset = ALIGN(sec->sh.sh_size, sec->sh.sh_addralign); sec->sh.sh_size = offset + size; diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h index 2b27b54096b86..fa8b7d292e839 100644 --- a/tools/objtool/include/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h @@ -107,7 +107,7 @@ static inline char *offstr(struct section *sec, unsigned long offset) #define ERROR_ELF(format, ...) __WARN_ELF(ERROR_STR, format, ##__VA_ARGS__) #define ERROR_GLIBC(format, ...) __WARN_GLIBC(ERROR_STR, format, ##__VA_ARGS__) #define ERROR_FUNC(sec, offset, format, ...) __WARN_FUNC(ERROR_STR, sec, offset, format, ##__VA_ARGS__) -#define ERROR_INSN(insn, format, ...) WARN_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__) +#define ERROR_INSN(insn, format, ...) ERROR_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__) extern bool debug; extern int indent; diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c index 9f1f4011eb9cd..a3198a63c2f0d 100644 --- a/tools/objtool/klp-diff.c +++ b/tools/objtool/klp-diff.c @@ -1334,25 +1334,25 @@ static bool should_keep_special_sym(struct elf *elf, struct symbol *sym) * be applied after static branch/call init, resulting in code corruption. * * Validate a special section entry to avoid that. Note that an inert - * tracepoint is harmless enough, in that case just skip the entry and print a - * warning. Otherwise, return an error. + * tracepoint or pr_debug() is harmless enough, in that case just skip the + * entry and print a warning. Otherwise, return an error. * - * This is only a temporary limitation which will be fixed when livepatch adds - * support for submodules: fully self-contained modules which are embedded in - * the top-level livepatch module's data and which can be loaded on demand when - * their corresponding to-be-patched module gets loaded. Then klp relocs can - * be retired. + * TODO: This is only a temporary limitation which will be fixed when livepatch + * adds support for submodules: fully self-contained modules which are embedded + * in the top-level livepatch module's data and which can be loaded on demand + * when their corresponding to-be-patched module gets loaded. Then klp relocs + * can be retired. * * Return: * -1: error: validation failed - * 1: warning: tracepoint skipped + * 1: warning: disabled tracepoint or pr_debug() * 0: success */ static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym) { bool static_branch = !strcmp(sym->sec->name, "__jump_table"); bool static_call = !strcmp(sym->sec->name, ".static_call_sites"); - struct symbol *code_sym = NULL; + const char *code_sym = NULL; unsigned long code_offset = 0; struct reloc *reloc; int ret = 0; @@ -1364,12 +1364,15 @@ static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym const char *sym_modname; struct export *export; + if (convert_reloc_sym(e->patched, reloc)) + continue; + /* Static branch/call keys are always STT_OBJECT */ if (reloc->sym->type != STT_OBJECT) { /* Save code location which can be printed below */ if (reloc->sym->type == STT_FUNC && !code_sym) { - code_sym = reloc->sym; + code_sym = reloc->sym->name; code_offset = reloc_addend(reloc); } @@ -1392,16 +1395,26 @@ static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym if (!strcmp(sym_modname, "vmlinux")) continue; + if (!code_sym) + code_sym = ""; + if (static_branch) { if (strstarts(reloc->sym->name, "__tracepoint_")) { WARN("%s: disabling unsupported tracepoint %s", - code_sym->name, reloc->sym->name + 13); + code_sym, reloc->sym->name + 13); + ret = 1; + continue; + } + + if (strstr(reloc->sym->name, "__UNIQUE_ID_ddebug_")) { + WARN("%s: disabling unsupported pr_debug()", + code_sym); ret = 1; continue; } ERROR("%s+0x%lx: unsupported static branch key %s. Use static_key_enabled() instead", - code_sym->name, code_offset, reloc->sym->name); + code_sym, code_offset, reloc->sym->name); return -1; } @@ -1412,7 +1425,7 @@ static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym } ERROR("%s()+0x%lx: unsupported static call key %s. Use KLP_STATIC_CALL() instead", - code_sym->name, code_offset, reloc->sym->name); + code_sym, code_offset, reloc->sym->name); return -1; } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a8dc72cfe48ee..15fbba9f4ca89 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1163,6 +1163,24 @@ ifndef NO_RUST CFLAGS += -DHAVE_RUST_SUPPORT $(call detected,CONFIG_RUST_SUPPORT) endif + + ifneq ($(CROSS_COMPILE),) + RUST_TARGET_FLAGS_arm := arm-unknown-linux-gnueabi + RUST_TARGET_FLAGS_arm64 := aarch64-unknown-linux-gnu + RUST_TARGET_FLAGS_m68k := m68k-unknown-linux-gnu + RUST_TARGET_FLAGS_mips := mipsel-unknown-linux-gnu + RUST_TARGET_FLAGS_powerpc := powerpc64le-unknown-linux-gnu + RUST_TARGET_FLAGS_riscv := riscv64gc-unknown-linux-gnu + RUST_TARGET_FLAGS_s390 := s390x-unknown-linux-gnu + RUST_TARGET_FLAGS_x86 := x86_64-unknown-linux-gnu + RUST_TARGET_FLAGS_x86_64 := x86_64-unknown-linux-gnu + + ifeq ($(RUST_TARGET_FLAGS_$(ARCH)),) + $(error Unknown rust cross compilation architecture $(ARCH)) + endif + + RUST_FLAGS += --target=$(RUST_TARGET_FLAGS_$(ARCH)) + endif endif # Among the variables below, these: diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 11b63bafdb232..f7b936deeaa2c 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -274,7 +274,7 @@ ifeq ($(PYLINT),1) PYLINT := $(shell which pylint 2> /dev/null) endif -export srctree OUTPUT RM CC CXX RUSTC LD AR CFLAGS CXXFLAGS V BISON FLEX AWK +export srctree OUTPUT RM CC CXX RUSTC LD AR CFLAGS CXXFLAGS RUST_FLAGS V BISON FLEX AWK export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK MYPY PYLINT include $(srctree)/tools/build/Makefile.include diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index fd09afae72a24..94351e22bfcf7 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -485,3 +485,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index dc3f4e86b075e..4418d21708d66 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -68,20 +68,6 @@ static const char * const metadata_ete_ro[] = { enum cs_etm_version { CS_NOT_PRESENT, CS_ETMV3, CS_ETMV4, CS_ETE }; -/* ETMv4 CONFIGR register bits */ -#define TRCCONFIGR_BB BIT(3) -#define TRCCONFIGR_CCI BIT(4) -#define TRCCONFIGR_CID BIT(6) -#define TRCCONFIGR_VMID BIT(7) -#define TRCCONFIGR_TS BIT(11) -#define TRCCONFIGR_RS BIT(12) -#define TRCCONFIGR_VMIDOPT BIT(15) - -/* ETMv3 ETMCR register bits */ -#define ETMCR_CYC_ACC BIT(12) -#define ETMCR_TIMESTAMP_EN BIT(28) -#define ETMCR_RETURN_STACK BIT(29) - static bool cs_etm_is_ete(struct perf_pmu *cs_etm_pmu, struct perf_cpu cpu); static int cs_etm_get_ro(struct perf_pmu *pmu, struct perf_cpu cpu, const char *path, __u64 *val); static bool cs_etm_pmu_path_exists(struct perf_pmu *pmu, struct perf_cpu cpu, const char *path); diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 9b92bddf06b57..630aab9e54259 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -385,3 +385,4 @@ 468 n64 file_getattr sys_file_getattr 469 n64 file_setattr sys_file_setattr 470 n64 listns sys_listns +471 n64 rseq_slice_yield sys_rseq_slice_yield diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index ec4458cdb97b6..4fcc7c58a105d 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -561,3 +561,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 nospu rseq_slice_yield sys_rseq_slice_yield diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 5863787ab0363..09a7ef04d9791 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -3,473 +3,398 @@ # System call table for s390 # # Format: +# # -# -# -# where can be common, 64, or 32 +# is always common. -1 common exit sys_exit sys_exit -2 common fork sys_fork sys_fork -3 common read sys_read compat_sys_s390_read -4 common write sys_write compat_sys_s390_write -5 common open sys_open compat_sys_open -6 common close sys_close sys_close -7 common restart_syscall sys_restart_syscall sys_restart_syscall -8 common creat sys_creat sys_creat -9 common link sys_link sys_link -10 common unlink sys_unlink sys_unlink -11 common execve sys_execve compat_sys_execve -12 common chdir sys_chdir sys_chdir -13 32 time - sys_time32 -14 common mknod sys_mknod sys_mknod -15 common chmod sys_chmod sys_chmod -16 32 lchown - sys_lchown16 -19 common lseek sys_lseek compat_sys_lseek -20 common getpid sys_getpid sys_getpid -21 common mount sys_mount sys_mount -22 common umount sys_oldumount sys_oldumount -23 32 setuid - sys_setuid16 -24 32 getuid - sys_getuid16 -25 32 stime - sys_stime32 -26 common ptrace sys_ptrace compat_sys_ptrace -27 common alarm sys_alarm sys_alarm -29 common pause sys_pause sys_pause -30 common utime sys_utime sys_utime32 -33 common access sys_access sys_access -34 common nice sys_nice sys_nice -36 common sync sys_sync sys_sync -37 common kill sys_kill sys_kill -38 common rename sys_rename sys_rename -39 common mkdir sys_mkdir sys_mkdir -40 common rmdir sys_rmdir sys_rmdir -41 common dup sys_dup sys_dup -42 common pipe sys_pipe sys_pipe -43 common times sys_times compat_sys_times -45 common brk sys_brk sys_brk -46 32 setgid - sys_setgid16 -47 32 getgid - sys_getgid16 -48 common signal sys_signal sys_signal -49 32 geteuid - sys_geteuid16 -50 32 getegid - sys_getegid16 -51 common acct sys_acct sys_acct -52 common umount2 sys_umount sys_umount -54 common ioctl sys_ioctl compat_sys_ioctl -55 common fcntl sys_fcntl compat_sys_fcntl -57 common setpgid sys_setpgid sys_setpgid -60 common umask sys_umask sys_umask -61 common chroot sys_chroot sys_chroot -62 common ustat sys_ustat compat_sys_ustat -63 common dup2 sys_dup2 sys_dup2 -64 common getppid sys_getppid sys_getppid -65 common getpgrp sys_getpgrp sys_getpgrp -66 common setsid sys_setsid sys_setsid -67 common sigaction sys_sigaction compat_sys_sigaction -70 32 setreuid - sys_setreuid16 -71 32 setregid - sys_setregid16 -72 common sigsuspend sys_sigsuspend sys_sigsuspend -73 common sigpending sys_sigpending compat_sys_sigpending -74 common sethostname sys_sethostname sys_sethostname -75 common setrlimit sys_setrlimit compat_sys_setrlimit -76 32 getrlimit - compat_sys_old_getrlimit -77 common getrusage sys_getrusage compat_sys_getrusage -78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday -79 common settimeofday sys_settimeofday compat_sys_settimeofday -80 32 getgroups - sys_getgroups16 -81 32 setgroups - sys_setgroups16 -83 common symlink sys_symlink sys_symlink -85 common readlink sys_readlink sys_readlink -86 common uselib sys_uselib sys_uselib -87 common swapon sys_swapon sys_swapon -88 common reboot sys_reboot sys_reboot -89 common readdir - compat_sys_old_readdir -90 common mmap sys_old_mmap compat_sys_s390_old_mmap -91 common munmap sys_munmap sys_munmap -92 common truncate sys_truncate compat_sys_truncate -93 common ftruncate sys_ftruncate compat_sys_ftruncate -94 common fchmod sys_fchmod sys_fchmod -95 32 fchown - sys_fchown16 -96 common getpriority sys_getpriority sys_getpriority -97 common setpriority sys_setpriority sys_setpriority -99 common statfs sys_statfs compat_sys_statfs -100 common fstatfs sys_fstatfs compat_sys_fstatfs -101 32 ioperm - - -102 common socketcall sys_socketcall compat_sys_socketcall -103 common syslog sys_syslog sys_syslog -104 common setitimer sys_setitimer compat_sys_setitimer -105 common getitimer sys_getitimer compat_sys_getitimer -106 common stat sys_newstat compat_sys_newstat -107 common lstat sys_newlstat compat_sys_newlstat -108 common fstat sys_newfstat compat_sys_newfstat -110 common lookup_dcookie - - -111 common vhangup sys_vhangup sys_vhangup -112 common idle - - -114 common wait4 sys_wait4 compat_sys_wait4 -115 common swapoff sys_swapoff sys_swapoff -116 common sysinfo sys_sysinfo compat_sys_sysinfo -117 common ipc sys_s390_ipc compat_sys_s390_ipc -118 common fsync sys_fsync sys_fsync -119 common sigreturn sys_sigreturn compat_sys_sigreturn -120 common clone sys_clone sys_clone -121 common setdomainname sys_setdomainname sys_setdomainname -122 common uname sys_newuname sys_newuname -124 common adjtimex sys_adjtimex sys_adjtimex_time32 -125 common mprotect sys_mprotect sys_mprotect -126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask -127 common create_module - - -128 common init_module sys_init_module sys_init_module -129 common delete_module sys_delete_module sys_delete_module -130 common get_kernel_syms - - -131 common quotactl sys_quotactl sys_quotactl -132 common getpgid sys_getpgid sys_getpgid -133 common fchdir sys_fchdir sys_fchdir -134 common bdflush sys_ni_syscall sys_ni_syscall -135 common sysfs sys_sysfs sys_sysfs -136 common personality sys_s390_personality sys_s390_personality -137 common afs_syscall - - -138 32 setfsuid - sys_setfsuid16 -139 32 setfsgid - sys_setfsgid16 -140 32 _llseek - sys_llseek -141 common getdents sys_getdents compat_sys_getdents -142 32 _newselect - compat_sys_select -142 64 select sys_select - -143 common flock sys_flock sys_flock -144 common msync sys_msync sys_msync -145 common readv sys_readv sys_readv -146 common writev sys_writev sys_writev -147 common getsid sys_getsid sys_getsid -148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl - - -150 common mlock sys_mlock sys_mlock -151 common munlock sys_munlock sys_munlock -152 common mlockall sys_mlockall sys_mlockall -153 common munlockall sys_munlockall sys_munlockall -154 common sched_setparam sys_sched_setparam sys_sched_setparam -155 common sched_getparam sys_sched_getparam sys_sched_getparam -156 common sched_setscheduler sys_sched_setscheduler sys_sched_setscheduler -157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler -158 common sched_yield sys_sched_yield sys_sched_yield -159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max -160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 -162 common nanosleep sys_nanosleep sys_nanosleep_time32 -163 common mremap sys_mremap sys_mremap -164 32 setresuid - sys_setresuid16 -165 32 getresuid - sys_getresuid16 -167 common query_module - - -168 common poll sys_poll sys_poll -169 common nfsservctl - - -170 32 setresgid - sys_setresgid16 -171 32 getresgid - sys_getresgid16 -172 common prctl sys_prctl sys_prctl -173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn -174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction -175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask -176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 -178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo -179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend -180 common pread64 sys_pread64 compat_sys_s390_pread64 -181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 -182 32 chown - sys_chown16 -183 common getcwd sys_getcwd sys_getcwd -184 common capget sys_capget sys_capget -185 common capset sys_capset sys_capset -186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack -187 common sendfile sys_sendfile64 compat_sys_sendfile -188 common getpmsg - - -189 common putpmsg - - -190 common vfork sys_vfork sys_vfork -191 32 ugetrlimit - compat_sys_getrlimit -191 64 getrlimit sys_getrlimit - -192 32 mmap2 - compat_sys_s390_mmap2 -193 32 truncate64 - compat_sys_s390_truncate64 -194 32 ftruncate64 - compat_sys_s390_ftruncate64 -195 32 stat64 - compat_sys_s390_stat64 -196 32 lstat64 - compat_sys_s390_lstat64 -197 32 fstat64 - compat_sys_s390_fstat64 -198 32 lchown32 - sys_lchown -198 64 lchown sys_lchown - -199 32 getuid32 - sys_getuid -199 64 getuid sys_getuid - -200 32 getgid32 - sys_getgid -200 64 getgid sys_getgid - -201 32 geteuid32 - sys_geteuid -201 64 geteuid sys_geteuid - -202 32 getegid32 - sys_getegid -202 64 getegid sys_getegid - -203 32 setreuid32 - sys_setreuid -203 64 setreuid sys_setreuid - -204 32 setregid32 - sys_setregid -204 64 setregid sys_setregid - -205 32 getgroups32 - sys_getgroups -205 64 getgroups sys_getgroups - -206 32 setgroups32 - sys_setgroups -206 64 setgroups sys_setgroups - -207 32 fchown32 - sys_fchown -207 64 fchown sys_fchown - -208 32 setresuid32 - sys_setresuid -208 64 setresuid sys_setresuid - -209 32 getresuid32 - sys_getresuid -209 64 getresuid sys_getresuid - -210 32 setresgid32 - sys_setresgid -210 64 setresgid sys_setresgid - -211 32 getresgid32 - sys_getresgid -211 64 getresgid sys_getresgid - -212 32 chown32 - sys_chown -212 64 chown sys_chown - -213 32 setuid32 - sys_setuid -213 64 setuid sys_setuid - -214 32 setgid32 - sys_setgid -214 64 setgid sys_setgid - -215 32 setfsuid32 - sys_setfsuid -215 64 setfsuid sys_setfsuid - -216 32 setfsgid32 - sys_setfsgid -216 64 setfsgid sys_setfsgid - -217 common pivot_root sys_pivot_root sys_pivot_root -218 common mincore sys_mincore sys_mincore -219 common madvise sys_madvise sys_madvise -220 common getdents64 sys_getdents64 sys_getdents64 -221 32 fcntl64 - compat_sys_fcntl64 -222 common readahead sys_readahead compat_sys_s390_readahead -223 32 sendfile64 - compat_sys_sendfile64 -224 common setxattr sys_setxattr sys_setxattr -225 common lsetxattr sys_lsetxattr sys_lsetxattr -226 common fsetxattr sys_fsetxattr sys_fsetxattr -227 common getxattr sys_getxattr sys_getxattr -228 common lgetxattr sys_lgetxattr sys_lgetxattr -229 common fgetxattr sys_fgetxattr sys_fgetxattr -230 common listxattr sys_listxattr sys_listxattr -231 common llistxattr sys_llistxattr sys_llistxattr -232 common flistxattr sys_flistxattr sys_flistxattr -233 common removexattr sys_removexattr sys_removexattr -234 common lremovexattr sys_lremovexattr sys_lremovexattr -235 common fremovexattr sys_fremovexattr sys_fremovexattr -236 common gettid sys_gettid sys_gettid -237 common tkill sys_tkill sys_tkill -238 common futex sys_futex sys_futex_time32 -239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity -240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity -241 common tgkill sys_tgkill sys_tgkill -243 common io_setup sys_io_setup compat_sys_io_setup -244 common io_destroy sys_io_destroy sys_io_destroy -245 common io_getevents sys_io_getevents sys_io_getevents_time32 -246 common io_submit sys_io_submit compat_sys_io_submit -247 common io_cancel sys_io_cancel sys_io_cancel -248 common exit_group sys_exit_group sys_exit_group -249 common epoll_create sys_epoll_create sys_epoll_create -250 common epoll_ctl sys_epoll_ctl sys_epoll_ctl -251 common epoll_wait sys_epoll_wait sys_epoll_wait -252 common set_tid_address sys_set_tid_address sys_set_tid_address -253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 -254 common timer_create sys_timer_create compat_sys_timer_create -255 common timer_settime sys_timer_settime sys_timer_settime32 -256 common timer_gettime sys_timer_gettime sys_timer_gettime32 -257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun -258 common timer_delete sys_timer_delete sys_timer_delete -259 common clock_settime sys_clock_settime sys_clock_settime32 -260 common clock_gettime sys_clock_gettime sys_clock_gettime32 -261 common clock_getres sys_clock_getres sys_clock_getres_time32 -262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 -264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 -265 common statfs64 sys_statfs64 compat_sys_statfs64 -266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages -268 common mbind sys_mbind sys_mbind -269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy -270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy -271 common mq_open sys_mq_open compat_sys_mq_open -272 common mq_unlink sys_mq_unlink sys_mq_unlink -273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 -274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 -275 common mq_notify sys_mq_notify compat_sys_mq_notify -276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr -277 common kexec_load sys_kexec_load compat_sys_kexec_load -278 common add_key sys_add_key sys_add_key -279 common request_key sys_request_key sys_request_key -280 common keyctl sys_keyctl compat_sys_keyctl -281 common waitid sys_waitid compat_sys_waitid -282 common ioprio_set sys_ioprio_set sys_ioprio_set -283 common ioprio_get sys_ioprio_get sys_ioprio_get -284 common inotify_init sys_inotify_init sys_inotify_init -285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch -286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch -287 common migrate_pages sys_migrate_pages sys_migrate_pages -288 common openat sys_openat compat_sys_openat -289 common mkdirat sys_mkdirat sys_mkdirat -290 common mknodat sys_mknodat sys_mknodat -291 common fchownat sys_fchownat sys_fchownat -292 common futimesat sys_futimesat sys_futimesat_time32 -293 32 fstatat64 - compat_sys_s390_fstatat64 -293 64 newfstatat sys_newfstatat - -294 common unlinkat sys_unlinkat sys_unlinkat -295 common renameat sys_renameat sys_renameat -296 common linkat sys_linkat sys_linkat -297 common symlinkat sys_symlinkat sys_symlinkat -298 common readlinkat sys_readlinkat sys_readlinkat -299 common fchmodat sys_fchmodat sys_fchmodat -300 common faccessat sys_faccessat sys_faccessat -301 common pselect6 sys_pselect6 compat_sys_pselect6_time32 -302 common ppoll sys_ppoll compat_sys_ppoll_time32 -303 common unshare sys_unshare sys_unshare -304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list -305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list -306 common splice sys_splice sys_splice -307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range -308 common tee sys_tee sys_tee -309 common vmsplice sys_vmsplice sys_vmsplice -310 common move_pages sys_move_pages sys_move_pages -311 common getcpu sys_getcpu sys_getcpu -312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -313 common utimes sys_utimes sys_utimes_time32 -314 common fallocate sys_fallocate compat_sys_s390_fallocate -315 common utimensat sys_utimensat sys_utimensat_time32 -316 common signalfd sys_signalfd compat_sys_signalfd -317 common timerfd - - -318 common eventfd sys_eventfd sys_eventfd -319 common timerfd_create sys_timerfd_create sys_timerfd_create -320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 -321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 -322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 -323 common eventfd2 sys_eventfd2 sys_eventfd2 -324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 -325 common pipe2 sys_pipe2 sys_pipe2 -326 common dup3 sys_dup3 sys_dup3 -327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 -328 common preadv sys_preadv compat_sys_preadv -329 common pwritev sys_pwritev compat_sys_pwritev -330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -331 common perf_event_open sys_perf_event_open sys_perf_event_open -332 common fanotify_init sys_fanotify_init sys_fanotify_init -333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark -334 common prlimit64 sys_prlimit64 sys_prlimit64 -335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at -336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 -338 common syncfs sys_syncfs sys_syncfs -339 common setns sys_setns sys_setns -340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv -341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev -342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr -343 common kcmp sys_kcmp sys_kcmp -344 common finit_module sys_finit_module sys_finit_module -345 common sched_setattr sys_sched_setattr sys_sched_setattr -346 common sched_getattr sys_sched_getattr sys_sched_getattr -347 common renameat2 sys_renameat2 sys_renameat2 -348 common seccomp sys_seccomp sys_seccomp -349 common getrandom sys_getrandom sys_getrandom -350 common memfd_create sys_memfd_create sys_memfd_create -351 common bpf sys_bpf sys_bpf -352 common s390_pci_mmio_write sys_s390_pci_mmio_write sys_s390_pci_mmio_write -353 common s390_pci_mmio_read sys_s390_pci_mmio_read sys_s390_pci_mmio_read -354 common execveat sys_execveat compat_sys_execveat -355 common userfaultfd sys_userfaultfd sys_userfaultfd -356 common membarrier sys_membarrier sys_membarrier -357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 -358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg -359 common socket sys_socket sys_socket -360 common socketpair sys_socketpair sys_socketpair -361 common bind sys_bind sys_bind -362 common connect sys_connect sys_connect -363 common listen sys_listen sys_listen -364 common accept4 sys_accept4 sys_accept4 -365 common getsockopt sys_getsockopt sys_getsockopt -366 common setsockopt sys_setsockopt sys_setsockopt -367 common getsockname sys_getsockname sys_getsockname -368 common getpeername sys_getpeername sys_getpeername -369 common sendto sys_sendto sys_sendto -370 common sendmsg sys_sendmsg compat_sys_sendmsg -371 common recvfrom sys_recvfrom compat_sys_recvfrom -372 common recvmsg sys_recvmsg compat_sys_recvmsg -373 common shutdown sys_shutdown sys_shutdown -374 common mlock2 sys_mlock2 sys_mlock2 -375 common copy_file_range sys_copy_file_range sys_copy_file_range -376 common preadv2 sys_preadv2 compat_sys_preadv2 -377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 -378 common s390_guarded_storage sys_s390_guarded_storage sys_s390_guarded_storage -379 common statx sys_statx sys_statx -380 common s390_sthyi sys_s390_sthyi sys_s390_sthyi -381 common kexec_file_load sys_kexec_file_load sys_kexec_file_load -382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents -383 common rseq sys_rseq sys_rseq -384 common pkey_mprotect sys_pkey_mprotect sys_pkey_mprotect -385 common pkey_alloc sys_pkey_alloc sys_pkey_alloc -386 common pkey_free sys_pkey_free sys_pkey_free +1 common exit sys_exit +2 common fork sys_fork +3 common read sys_read +4 common write sys_write +5 common open sys_open +6 common close sys_close +7 common restart_syscall sys_restart_syscall +8 common creat sys_creat +9 common link sys_link +10 common unlink sys_unlink +11 common execve sys_execve +12 common chdir sys_chdir +14 common mknod sys_mknod +15 common chmod sys_chmod +19 common lseek sys_lseek +20 common getpid sys_getpid +21 common mount sys_mount +22 common umount sys_oldumount +26 common ptrace sys_ptrace +27 common alarm sys_alarm +29 common pause sys_pause +30 common utime sys_utime +33 common access sys_access +34 common nice sys_nice +36 common sync sys_sync +37 common kill sys_kill +38 common rename sys_rename +39 common mkdir sys_mkdir +40 common rmdir sys_rmdir +41 common dup sys_dup +42 common pipe sys_pipe +43 common times sys_times +45 common brk sys_brk +48 common signal sys_signal +51 common acct sys_acct +52 common umount2 sys_umount +54 common ioctl sys_ioctl +55 common fcntl sys_fcntl +57 common setpgid sys_setpgid +60 common umask sys_umask +61 common chroot sys_chroot +62 common ustat sys_ustat +63 common dup2 sys_dup2 +64 common getppid sys_getppid +65 common getpgrp sys_getpgrp +66 common setsid sys_setsid +67 common sigaction sys_sigaction +72 common sigsuspend sys_sigsuspend +73 common sigpending sys_sigpending +74 common sethostname sys_sethostname +75 common setrlimit sys_setrlimit +77 common getrusage sys_getrusage +78 common gettimeofday sys_gettimeofday +79 common settimeofday sys_settimeofday +83 common symlink sys_symlink +85 common readlink sys_readlink +86 common uselib sys_uselib +87 common swapon sys_swapon +88 common reboot sys_reboot +89 common readdir sys_ni_syscall +90 common mmap sys_old_mmap +91 common munmap sys_munmap +92 common truncate sys_truncate +93 common ftruncate sys_ftruncate +94 common fchmod sys_fchmod +96 common getpriority sys_getpriority +97 common setpriority sys_setpriority +99 common statfs sys_statfs +100 common fstatfs sys_fstatfs +102 common socketcall sys_socketcall +103 common syslog sys_syslog +104 common setitimer sys_setitimer +105 common getitimer sys_getitimer +106 common stat sys_newstat +107 common lstat sys_newlstat +108 common fstat sys_newfstat +110 common lookup_dcookie sys_ni_syscall +111 common vhangup sys_vhangup +112 common idle sys_ni_syscall +114 common wait4 sys_wait4 +115 common swapoff sys_swapoff +116 common sysinfo sys_sysinfo +117 common ipc sys_s390_ipc +118 common fsync sys_fsync +119 common sigreturn sys_sigreturn +120 common clone sys_clone +121 common setdomainname sys_setdomainname +122 common uname sys_newuname +124 common adjtimex sys_adjtimex +125 common mprotect sys_mprotect +126 common sigprocmask sys_sigprocmask +127 common create_module sys_ni_syscall +128 common init_module sys_init_module +129 common delete_module sys_delete_module +130 common get_kernel_syms sys_ni_syscall +131 common quotactl sys_quotactl +132 common getpgid sys_getpgid +133 common fchdir sys_fchdir +134 common bdflush sys_ni_syscall +135 common sysfs sys_sysfs +136 common personality sys_s390_personality +137 common afs_syscall sys_ni_syscall +141 common getdents sys_getdents +142 common select sys_select +143 common flock sys_flock +144 common msync sys_msync +145 common readv sys_readv +146 common writev sys_writev +147 common getsid sys_getsid +148 common fdatasync sys_fdatasync +149 common _sysctl sys_ni_syscall +150 common mlock sys_mlock +151 common munlock sys_munlock +152 common mlockall sys_mlockall +153 common munlockall sys_munlockall +154 common sched_setparam sys_sched_setparam +155 common sched_getparam sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler +157 common sched_getscheduler sys_sched_getscheduler +158 common sched_yield sys_sched_yield +159 common sched_get_priority_max sys_sched_get_priority_max +160 common sched_get_priority_min sys_sched_get_priority_min +161 common sched_rr_get_interval sys_sched_rr_get_interval +162 common nanosleep sys_nanosleep +163 common mremap sys_mremap +167 common query_module sys_ni_syscall +168 common poll sys_poll +169 common nfsservctl sys_ni_syscall +172 common prctl sys_prctl +173 common rt_sigreturn sys_rt_sigreturn +174 common rt_sigaction sys_rt_sigaction +175 common rt_sigprocmask sys_rt_sigprocmask +176 common rt_sigpending sys_rt_sigpending +177 common rt_sigtimedwait sys_rt_sigtimedwait +178 common rt_sigqueueinfo sys_rt_sigqueueinfo +179 common rt_sigsuspend sys_rt_sigsuspend +180 common pread64 sys_pread64 +181 common pwrite64 sys_pwrite64 +183 common getcwd sys_getcwd +184 common capget sys_capget +185 common capset sys_capset +186 common sigaltstack sys_sigaltstack +187 common sendfile sys_sendfile64 +188 common getpmsg sys_ni_syscall +189 common putpmsg sys_ni_syscall +190 common vfork sys_vfork +191 common getrlimit sys_getrlimit +198 common lchown sys_lchown +199 common getuid sys_getuid +200 common getgid sys_getgid +201 common geteuid sys_geteuid +202 common getegid sys_getegid +203 common setreuid sys_setreuid +204 common setregid sys_setregid +205 common getgroups sys_getgroups +206 common setgroups sys_setgroups +207 common fchown sys_fchown +208 common setresuid sys_setresuid +209 common getresuid sys_getresuid +210 common setresgid sys_setresgid +211 common getresgid sys_getresgid +212 common chown sys_chown +213 common setuid sys_setuid +214 common setgid sys_setgid +215 common setfsuid sys_setfsuid +216 common setfsgid sys_setfsgid +217 common pivot_root sys_pivot_root +218 common mincore sys_mincore +219 common madvise sys_madvise +220 common getdents64 sys_getdents64 +222 common readahead sys_readahead +224 common setxattr sys_setxattr +225 common lsetxattr sys_lsetxattr +226 common fsetxattr sys_fsetxattr +227 common getxattr sys_getxattr +228 common lgetxattr sys_lgetxattr +229 common fgetxattr sys_fgetxattr +230 common listxattr sys_listxattr +231 common llistxattr sys_llistxattr +232 common flistxattr sys_flistxattr +233 common removexattr sys_removexattr +234 common lremovexattr sys_lremovexattr +235 common fremovexattr sys_fremovexattr +236 common gettid sys_gettid +237 common tkill sys_tkill +238 common futex sys_futex +239 common sched_setaffinity sys_sched_setaffinity +240 common sched_getaffinity sys_sched_getaffinity +241 common tgkill sys_tgkill +243 common io_setup sys_io_setup +244 common io_destroy sys_io_destroy +245 common io_getevents sys_io_getevents +246 common io_submit sys_io_submit +247 common io_cancel sys_io_cancel +248 common exit_group sys_exit_group +249 common epoll_create sys_epoll_create +250 common epoll_ctl sys_epoll_ctl +251 common epoll_wait sys_epoll_wait +252 common set_tid_address sys_set_tid_address +253 common fadvise64 sys_fadvise64_64 +254 common timer_create sys_timer_create +255 common timer_settime sys_timer_settime +256 common timer_gettime sys_timer_gettime +257 common timer_getoverrun sys_timer_getoverrun +258 common timer_delete sys_timer_delete +259 common clock_settime sys_clock_settime +260 common clock_gettime sys_clock_gettime +261 common clock_getres sys_clock_getres +262 common clock_nanosleep sys_clock_nanosleep +265 common statfs64 sys_statfs64 +266 common fstatfs64 sys_fstatfs64 +267 common remap_file_pages sys_remap_file_pages +268 common mbind sys_mbind +269 common get_mempolicy sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy +271 common mq_open sys_mq_open +272 common mq_unlink sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend +274 common mq_timedreceive sys_mq_timedreceive +275 common mq_notify sys_mq_notify +276 common mq_getsetattr sys_mq_getsetattr +277 common kexec_load sys_kexec_load +278 common add_key sys_add_key +279 common request_key sys_request_key +280 common keyctl sys_keyctl +281 common waitid sys_waitid +282 common ioprio_set sys_ioprio_set +283 common ioprio_get sys_ioprio_get +284 common inotify_init sys_inotify_init +285 common inotify_add_watch sys_inotify_add_watch +286 common inotify_rm_watch sys_inotify_rm_watch +287 common migrate_pages sys_migrate_pages +288 common openat sys_openat +289 common mkdirat sys_mkdirat +290 common mknodat sys_mknodat +291 common fchownat sys_fchownat +292 common futimesat sys_futimesat +293 common newfstatat sys_newfstatat +294 common unlinkat sys_unlinkat +295 common renameat sys_renameat +296 common linkat sys_linkat +297 common symlinkat sys_symlinkat +298 common readlinkat sys_readlinkat +299 common fchmodat sys_fchmodat +300 common faccessat sys_faccessat +301 common pselect6 sys_pselect6 +302 common ppoll sys_ppoll +303 common unshare sys_unshare +304 common set_robust_list sys_set_robust_list +305 common get_robust_list sys_get_robust_list +306 common splice sys_splice +307 common sync_file_range sys_sync_file_range +308 common tee sys_tee +309 common vmsplice sys_vmsplice +310 common move_pages sys_move_pages +311 common getcpu sys_getcpu +312 common epoll_pwait sys_epoll_pwait +313 common utimes sys_utimes +314 common fallocate sys_fallocate +315 common utimensat sys_utimensat +316 common signalfd sys_signalfd +317 common timerfd sys_ni_syscall +318 common eventfd sys_eventfd +319 common timerfd_create sys_timerfd_create +320 common timerfd_settime sys_timerfd_settime +321 common timerfd_gettime sys_timerfd_gettime +322 common signalfd4 sys_signalfd4 +323 common eventfd2 sys_eventfd2 +324 common inotify_init1 sys_inotify_init1 +325 common pipe2 sys_pipe2 +326 common dup3 sys_dup3 +327 common epoll_create1 sys_epoll_create1 +328 common preadv sys_preadv +329 common pwritev sys_pwritev +330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +331 common perf_event_open sys_perf_event_open +332 common fanotify_init sys_fanotify_init +333 common fanotify_mark sys_fanotify_mark +334 common prlimit64 sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at +336 common open_by_handle_at sys_open_by_handle_at +337 common clock_adjtime sys_clock_adjtime +338 common syncfs sys_syncfs +339 common setns sys_setns +340 common process_vm_readv sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev +342 common s390_runtime_instr sys_s390_runtime_instr +343 common kcmp sys_kcmp +344 common finit_module sys_finit_module +345 common sched_setattr sys_sched_setattr +346 common sched_getattr sys_sched_getattr +347 common renameat2 sys_renameat2 +348 common seccomp sys_seccomp +349 common getrandom sys_getrandom +350 common memfd_create sys_memfd_create +351 common bpf sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read +354 common execveat sys_execveat +355 common userfaultfd sys_userfaultfd +356 common membarrier sys_membarrier +357 common recvmmsg sys_recvmmsg +358 common sendmmsg sys_sendmmsg +359 common socket sys_socket +360 common socketpair sys_socketpair +361 common bind sys_bind +362 common connect sys_connect +363 common listen sys_listen +364 common accept4 sys_accept4 +365 common getsockopt sys_getsockopt +366 common setsockopt sys_setsockopt +367 common getsockname sys_getsockname +368 common getpeername sys_getpeername +369 common sendto sys_sendto +370 common sendmsg sys_sendmsg +371 common recvfrom sys_recvfrom +372 common recvmsg sys_recvmsg +373 common shutdown sys_shutdown +374 common mlock2 sys_mlock2 +375 common copy_file_range sys_copy_file_range +376 common preadv2 sys_preadv2 +377 common pwritev2 sys_pwritev2 +378 common s390_guarded_storage sys_s390_guarded_storage +379 common statx sys_statx +380 common s390_sthyi sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents +383 common rseq sys_rseq +384 common pkey_mprotect sys_pkey_mprotect +385 common pkey_alloc sys_pkey_alloc +386 common pkey_free sys_pkey_free # room for arch specific syscalls -392 64 semtimedop sys_semtimedop - -393 common semget sys_semget sys_semget -394 common semctl sys_semctl compat_sys_semctl -395 common shmget sys_shmget sys_shmget -396 common shmctl sys_shmctl compat_sys_shmctl -397 common shmat sys_shmat compat_sys_shmat -398 common shmdt sys_shmdt sys_shmdt -399 common msgget sys_msgget sys_msgget -400 common msgsnd sys_msgsnd compat_sys_msgsnd -401 common msgrcv sys_msgrcv compat_sys_msgrcv -402 common msgctl sys_msgctl compat_sys_msgctl -403 32 clock_gettime64 - sys_clock_gettime -404 32 clock_settime64 - sys_clock_settime -405 32 clock_adjtime64 - sys_clock_adjtime -406 32 clock_getres_time64 - sys_clock_getres -407 32 clock_nanosleep_time64 - sys_clock_nanosleep -408 32 timer_gettime64 - sys_timer_gettime -409 32 timer_settime64 - sys_timer_settime -410 32 timerfd_gettime64 - sys_timerfd_gettime -411 32 timerfd_settime64 - sys_timerfd_settime -412 32 utimensat_time64 - sys_utimensat -413 32 pselect6_time64 - compat_sys_pselect6_time64 -414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 -417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 -418 32 mq_timedsend_time64 - sys_mq_timedsend -419 32 mq_timedreceive_time64 - sys_mq_timedreceive -420 32 semtimedop_time64 - sys_semtimedop -421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64 -422 32 futex_time64 - sys_futex -423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval -424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal -425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup -426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter -427 common io_uring_register sys_io_uring_register sys_io_uring_register -428 common open_tree sys_open_tree sys_open_tree -429 common move_mount sys_move_mount sys_move_mount -430 common fsopen sys_fsopen sys_fsopen -431 common fsconfig sys_fsconfig sys_fsconfig -432 common fsmount sys_fsmount sys_fsmount -433 common fspick sys_fspick sys_fspick -434 common pidfd_open sys_pidfd_open sys_pidfd_open -435 common clone3 sys_clone3 sys_clone3 -436 common close_range sys_close_range sys_close_range -437 common openat2 sys_openat2 sys_openat2 -438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd -439 common faccessat2 sys_faccessat2 sys_faccessat2 -440 common process_madvise sys_process_madvise sys_process_madvise -441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 -442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_fd sys_quotactl_fd sys_quotactl_fd -444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset -445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule -446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self -447 common memfd_secret sys_memfd_secret sys_memfd_secret -448 common process_mrelease sys_process_mrelease sys_process_mrelease -449 common futex_waitv sys_futex_waitv sys_futex_waitv -450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node -451 common cachestat sys_cachestat sys_cachestat -452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 -453 common map_shadow_stack sys_map_shadow_stack sys_map_shadow_stack -454 common futex_wake sys_futex_wake sys_futex_wake -455 common futex_wait sys_futex_wait sys_futex_wait -456 common futex_requeue sys_futex_requeue sys_futex_requeue -457 common statmount sys_statmount sys_statmount -458 common listmount sys_listmount sys_listmount -459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr -460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr -461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules -462 common mseal sys_mseal sys_mseal -463 common setxattrat sys_setxattrat sys_setxattrat -464 common getxattrat sys_getxattrat sys_getxattrat -465 common listxattrat sys_listxattrat sys_listxattrat -466 common removexattrat sys_removexattrat sys_removexattrat -467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr -468 common file_getattr sys_file_getattr sys_file_getattr -469 common file_setattr sys_file_setattr sys_file_setattr -470 common listns sys_listns sys_listns +392 common semtimedop sys_semtimedop +393 common semget sys_semget +394 common semctl sys_semctl +395 common shmget sys_shmget +396 common shmctl sys_shmctl +397 common shmat sys_shmat +398 common shmdt sys_shmdt +399 common msgget sys_msgget +400 common msgsnd sys_msgsnd +401 common msgrcv sys_msgrcv +402 common msgctl sys_msgctl +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree +429 common move_mount sys_move_mount +430 common fsopen sys_fsopen +431 common fsconfig sys_fsconfig +432 common fsmount sys_fsmount +433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 +436 common close_range sys_close_range +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr +443 common quotactl_fd sys_quotactl_fd +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self +447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount +459 common lsm_get_self_attr sys_lsm_get_self_attr +460 common lsm_set_self_attr sys_lsm_set_self_attr +461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr +470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index 969c11325adeb..70b315cbe710c 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -474,3 +474,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 39aa26b6a50be..7e71bf7fcd14f 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -480,7 +480,7 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 common clone3 __sys_clone3 436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd @@ -516,3 +516,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index e979a3eac7a35..f832ebd2d79b0 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -476,3 +476,4 @@ 468 i386 file_getattr sys_file_getattr 469 i386 file_setattr sys_file_setattr 470 i386 listns sys_listns +471 i386 rseq_slice_yield sys_rseq_slice_yield diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 8a4ac4841be6e..524155d655da1 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -395,6 +395,7 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index 438a3b1704022..a9bca4e484dec 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -441,3 +441,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 6b6eec65f93f5..4cc33452d79b6 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -1209,8 +1210,12 @@ static int prepare_func_profile(struct perf_ftrace *ftrace) ftrace->graph_verbose = 0; ftrace->profile_hash = hashmap__new(profile_hash, profile_equal, NULL); - if (ftrace->profile_hash == NULL) - return -ENOMEM; + if (IS_ERR(ftrace->profile_hash)) { + int err = PTR_ERR(ftrace->profile_hash); + + ftrace->profile_hash = NULL; + return err; + } return 0; } diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 63c65788d4428..dc5f94862a3bc 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -214,7 +214,8 @@ ifneq ($(strip $(ORPHAN_FILES)),) quiet_cmd_rm = RM $^ prune_orphans: $(ORPHAN_FILES) - $(Q)$(call echo-cmd,rm)rm -f $^ + # The list of files can be long. Use xargs to prevent issues. + $(Q)$(call echo-cmd,rm)echo "$^" | xargs rm -f JEVENTS_DEPS += prune_orphans endif diff --git a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h index 6e1d5b955aae4..85253fc8e3845 100644 --- a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h +++ b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h @@ -77,6 +77,7 @@ */ #define IRQ_WORK_VECTOR 0xf6 +/* IRQ vector for PMIs when running a guest with a mediated PMU. */ #define PERF_GUEST_MEDIATED_PMI_VECTOR 0xf5 #define DEFERRED_ERROR_VECTOR 0xf4 diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 66ca526cf786c..70b2b661f42cb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -253,6 +253,7 @@ struct file_attr { #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ #define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ +#define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */ #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* the read-only stuff doesn't really belong here, but any other place is diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index 5d3f8c9e3a625..d9d86598d100c 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -61,7 +61,8 @@ /* * open_tree() flags. */ -#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLONE (1 << 0) /* Clone the target tree and attach the clone */ +#define OPEN_TREE_NAMESPACE (1 << 1) /* Clone the target tree into a new mount namespace */ #define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ /* @@ -197,7 +198,10 @@ struct statmount { */ struct mnt_id_req { __u32 size; - __u32 mnt_ns_fd; + union { + __u32 mnt_ns_fd; + __u32 mnt_fd; + }; __u64 mnt_id; __u64 param; __u64 mnt_ns_id; @@ -232,4 +236,9 @@ struct mnt_id_req { #define LSMT_ROOT 0xffffffffffffffff /* root mount */ #define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ +/* + * @flag bits for statmount(2) + */ +#define STATMOUNT_BY_FD 0x00000001U /* want mountinfo for given fd */ + #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 51c4e8c82b1e9..55b0446fff9d9 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -386,4 +386,41 @@ struct prctl_mm_map { # define PR_FUTEX_HASH_SET_SLOTS 1 # define PR_FUTEX_HASH_GET_SLOTS 2 +/* RSEQ time slice extensions */ +#define PR_RSEQ_SLICE_EXTENSION 79 +# define PR_RSEQ_SLICE_EXTENSION_GET 1 +# define PR_RSEQ_SLICE_EXTENSION_SET 2 +/* + * Bits for RSEQ_SLICE_EXTENSION_GET/SET + * PR_RSEQ_SLICE_EXT_ENABLE: Enable + */ +# define PR_RSEQ_SLICE_EXT_ENABLE 0x01 + +/* + * Get the current indirect branch tracking configuration for the current + * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + */ +#define PR_GET_INDIR_BR_LP_STATUS 80 + +/* + * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will + * enable cpu feature for user thread, to track all indirect branches and ensure + * they land on arch defined landing pad instruction. + * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction. + * arch64 - If enabled, an indirect branch must land on a BTI instruction. + * riscv - If enabled, an indirect branch must land on an lpad instruction. + * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect + * branches will no more be tracked by cpu to land on arch defined landing pad + * instruction. + */ +#define PR_SET_INDIR_BR_LP_STATUS 81 +# define PR_INDIR_BR_LP_ENABLE (1UL << 0) + +/* + * Prevent further changes to the specified indirect branch tracking + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_INDIR_BR_LP_STATUS 82 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/util/annotate-arch/annotate-loongarch.c b/tools/perf/util/annotate-arch/annotate-loongarch.c index 3aeab453a059e..950f34e59e5cd 100644 --- a/tools/perf/util/annotate-arch/annotate-loongarch.c +++ b/tools/perf/util/annotate-arch/annotate-loongarch.c @@ -93,7 +93,7 @@ static int loongarch_jump__parse(const struct arch *arch, struct ins_operands *o start = map__unmap_ip(map, sym->start); end = map__unmap_ip(map, sym->end); - ops->target.outside = target.addr < start || target.addr > end; + ops->target.outside = target.addr < start || target.addr >= end; if (maps__find_ams(thread__maps(ms->thread), &target) == 0 && map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 2e3522905046c..63f0ee9d4c03c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -44,6 +44,7 @@ #include "strbuf.h" #include #include +#include #include #include #include @@ -137,8 +138,10 @@ static int annotated_source__alloc_histograms(struct annotated_source *src, return -1; src->samples = hashmap__new(sym_hist_hash, sym_hist_equal, NULL); - if (src->samples == NULL) + if (IS_ERR(src->samples)) { zfree(&src->histograms); + src->samples = NULL; + } return src->histograms ? 0 : -1; } diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3050fe2126665..212f17a3dc72d 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -549,7 +549,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, /* * Process the PE_CONTEXT packets if we have a valid contextID or VMID. * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 - * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. + * as VMID, Format attribute 'contextid2' is set in this case. */ switch (cs_etm__get_pid_fmt(etmq)) { case CS_ETM_PIDFMT_CTXTID: diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 95f439c96180d..8a639d2e51a4c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -194,7 +194,7 @@ int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu) * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. * CS_ETM_PIDFMT_NONE: No context IDs * - * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 + * It's possible that the two format attributes 'contextid1' and 'contextid2' * are enabled at the same time when the session runs on an EL2 kernel. * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be * recorded in the trace data, the tool will selectively use @@ -210,15 +210,15 @@ static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { val = metadata[CS_ETM_ETMCR]; /* CONTEXTIDR is traced */ - if (val & BIT(ETM_OPT_CTXTID)) + if (val & ETMCR_CTXTID) return CS_ETM_PIDFMT_CTXTID; } else { val = metadata[CS_ETMV4_TRCCONFIGR]; /* CONTEXTIDR_EL2 is traced */ - if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) + if (val & (TRCCONFIGR_VMID | TRCCONFIGR_VMIDOPT)) return CS_ETM_PIDFMT_CTXTID2; /* CONTEXTIDR_EL1 is traced */ - else if (val & BIT(ETM4_CFG_BIT_CTXTID)) + else if (val & TRCCONFIGR_CID) return CS_ETM_PIDFMT_CTXTID; } @@ -2914,29 +2914,21 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, return 0; } -static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) +static void cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) { - struct evsel *evsel; - struct evlist *evlist = etm->session->evlist; + /* Take first ETM as all options will be the same for all ETMs */ + u64 *metadata = etm->metadata[0]; /* Override timeless mode with user input from --itrace=Z */ if (etm->synth_opts.timeless_decoding) { etm->timeless_decoding = true; - return 0; + return; } - /* - * Find the cs_etm evsel and look at what its timestamp setting was - */ - evlist__for_each_entry(evlist, evsel) - if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) { - etm->timeless_decoding = - !(evsel->core.attr.config & BIT(ETM_OPT_TS)); - return 0; - } - - pr_err("CS ETM: Couldn't find ETM evsel\n"); - return -EINVAL; + if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) + etm->timeless_decoding = !(metadata[CS_ETM_ETMCR] & ETMCR_TIMESTAMP_EN); + else + etm->timeless_decoding = !(metadata[CS_ETMV4_TRCCONFIGR] & TRCCONFIGR_TS); } /* @@ -3499,9 +3491,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; session->auxtrace = &etm->auxtrace; - err = cs_etm__setup_timeless_decoding(etm); - if (err) - return err; + cs_etm__setup_timeless_decoding(etm); etm->tc.time_shift = tc->time_shift; etm->tc.time_mult = tc->time_mult; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index a8caeea720aa1..aa9bb4a32ecaf 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -230,6 +230,21 @@ struct cs_etm_packet_queue { /* CoreSight trace ID is currently the bottom 7 bits of the value */ #define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) +/* ETMv4 CONFIGR register bits */ +#define TRCCONFIGR_BB BIT(3) +#define TRCCONFIGR_CCI BIT(4) +#define TRCCONFIGR_CID BIT(6) +#define TRCCONFIGR_VMID BIT(7) +#define TRCCONFIGR_TS BIT(11) +#define TRCCONFIGR_RS BIT(12) +#define TRCCONFIGR_VMIDOPT BIT(15) + +/* ETMv3 ETMCR register bits */ +#define ETMCR_CYC_ACC BIT(12) +#define ETMCR_CTXTID BIT(14) +#define ETMCR_TIMESTAMP_EN BIT(28) +#define ETMCR_RETURN_STACK BIT(29) + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr); diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index ddcc488f2e5f0..9e0420e14be19 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -384,7 +384,7 @@ static int jump__parse(const struct arch *arch, struct ins_operands *ops, struct start = map__unmap_ip(map, sym->start); end = map__unmap_ip(map, sym->end); - ops->target.outside = target.addr < start || target.addr > end; + ops->target.outside = target.addr < start || target.addr >= end; /* * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index ef79433ebc3a6..ddf1cbda1902c 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -703,6 +703,11 @@ static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) memcpy(event->mmap2.filename, dso__long_name(dso), dso__long_name_len(dso) + 1); + /* Clear stale build ID from previous module iteration */ + event->mmap2.header.misc &= ~PERF_RECORD_MISC_MMAP_BUILD_ID; + memset(event->mmap2.build_id, 0, sizeof(event->mmap2.build_id)); + event->mmap2.build_id_size = 0; + perf_record_mmap2__read_build_id(&event->mmap2, args->machine, false); } else { size = PERF_ALIGN(dso__long_name_len(dso) + 1, sizeof(u64)); diff --git a/tools/power/cpupower/cpupower-service.conf b/tools/power/cpupower/cpupower-service.conf index 02eabe8e36142..abbb469675656 100644 --- a/tools/power/cpupower/cpupower-service.conf +++ b/tools/power/cpupower/cpupower-service.conf @@ -30,3 +30,8 @@ # its policy for the relative importance of performance versus energy savings to # the processor. See man CPUPOWER-SET(1) for additional details #PERF_BIAS= + +# Set the Energy Performance Preference +# Available options can be read from +# /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences +#EPP= diff --git a/tools/power/cpupower/cpupower.sh b/tools/power/cpupower/cpupower.sh index a37dd4cfdb2b9..6283e8bf275d6 100644 --- a/tools/power/cpupower/cpupower.sh +++ b/tools/power/cpupower/cpupower.sh @@ -23,4 +23,10 @@ then cpupower set -b "$PERF_BIAS" > /dev/null || ESTATUS=1 fi +# apply Energy Performance Preference +if test -n "$EPP" +then + cpupower set -e "$EPP" > /dev/null || ESTATUS=1 +fi + exit $ESTATUS diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index c2117e5650dd3..550a942e72ce7 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -124,7 +124,11 @@ int cmd_set(int argc, char **argv) } if (params.turbo_boost) { - ret = cpupower_set_turbo_boost(turbo_boost); + if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) + ret = cpupower_set_intel_turbo_boost(turbo_boost); + else + ret = cpupower_set_generic_turbo_boost(turbo_boost); + if (ret) fprintf(stderr, "Error setting turbo-boost\n"); } diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 82ea62bdf5a26..a3ad80b9c2c2b 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -104,7 +104,7 @@ extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ int cpufreq_has_generic_boost_support(bool *active); -int cpupower_set_turbo_boost(int turbo_boost); +int cpupower_set_generic_turbo_boost(int turbo_boost); /* X86 ONLY ****************************************/ #if defined(__i386__) || defined(__x86_64__) @@ -143,6 +143,7 @@ extern int decode_pstates(unsigned int cpu, int boost_states, int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, int *states); +int cpupower_set_intel_turbo_boost(int turbo_boost); /* AMD P-State stuff **************************/ bool cpupower_amd_pstate_enabled(void); @@ -189,6 +190,8 @@ static inline int cpupower_set_amd_pstate_mode(char *mode) static inline int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, int *states) { return -1; } +static inline int cpupower_set_intel_turbo_boost(int turbo_boost) +{ return -1; } static inline bool cpupower_amd_pstate_enabled(void) { return false; } diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 166dc1e470ea6..eebfc79a48899 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -19,6 +19,9 @@ int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, { int ret; unsigned long long val; + char linebuf[MAX_LINE_LEN]; + char path[SYSFS_PATH_MAX]; + char *endp; *support = *active = *states = 0; @@ -42,8 +45,42 @@ int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, } } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { amd_pstate_boost_init(cpu, support, active); - } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) + } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) { *support = *active = 1; + + snprintf(path, sizeof(path), PATH_TO_CPU "intel_pstate/no_turbo"); + + if (!is_valid_path(path)) + return 0; + + if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0) + return -1; + + val = strtol(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -1; + + *active = !val; + } + return 0; +} + +int cpupower_set_intel_turbo_boost(int turbo_boost) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + + snprintf(path, sizeof(path), PATH_TO_CPU "intel_pstate/no_turbo"); + + /* Fallback to generic solution when intel_pstate driver not running */ + if (!is_valid_path(path)) + return cpupower_set_generic_turbo_boost(turbo_boost); + + snprintf(linebuf, sizeof(linebuf), "%d", !turbo_boost); + + if (cpupower_write_sysfs(path, linebuf, 2) <= 0) + return -1; + return 0; } @@ -274,7 +311,7 @@ void print_speed(unsigned long speed, int no_rounding) } } -int cpupower_set_turbo_boost(int turbo_boost) +int cpupower_set_generic_turbo_boost(int turbo_boost) { char path[SYSFS_PATH_MAX]; char linebuf[2] = {}; diff --git a/tools/power/cpupower/utils/powercap-info.c b/tools/power/cpupower/utils/powercap-info.c index 3ea4486f1a0e2..e53033488218e 100644 --- a/tools/power/cpupower/utils/powercap-info.c +++ b/tools/power/cpupower/utils/powercap-info.c @@ -38,11 +38,11 @@ static int powercap_print_one_zone(struct powercap_zone *zone) printf(" (%s)\n", mode ? "enabled" : "disabled"); if (zone->has_power_uw) - printf(_("%sPower can be monitored in micro Jules\n"), + printf(_("%sPower can be monitored in micro Watts\n"), pr_prefix); if (zone->has_energy_uj) - printf(_("%sPower can be monitored in micro Watts\n"), + printf(_("%sPower can be monitored in micro Jules\n"), pr_prefix); printf("\n"); diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index e74868be513cf..7a42b32b65776 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -411,3 +411,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index fdec90e854671..dc68371f76a33 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86 += x86/cpuid_test TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test TEST_GEN_PROGS_x86 += x86/feature_msrs_test +TEST_GEN_PROGS_x86 += x86/evmcs_smm_controls_test TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test TEST_GEN_PROGS_x86 += x86/fastops_test TEST_GEN_PROGS_x86 += x86/fix_hypercall_test diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 618c937f3c90f..cc329b57ce2e9 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -80,7 +80,7 @@ static void test_mbind(int fd, size_t total_size) { const unsigned long nodemask_0 = 1; /* nid: 0 */ unsigned long nodemask = 0; - unsigned long maxnode = 8; + unsigned long maxnode = BITS_PER_TYPE(nodemask); int policy; char *mem; int ret; diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 4ebae4269e681..469a221221575 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -557,6 +557,11 @@ static inline uint64_t get_cr0(void) return cr0; } +static inline void set_cr0(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory"); +} + static inline uint64_t get_cr3(void) { uint64_t cr3; @@ -566,6 +571,11 @@ static inline uint64_t get_cr3(void) return cr3; } +static inline void set_cr3(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr3" : : "r" (val) : "memory"); +} + static inline uint64_t get_cr4(void) { uint64_t cr4; @@ -580,6 +590,19 @@ static inline void set_cr4(uint64_t val) __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } +static inline uint64_t get_cr8(void) +{ + uint64_t cr8; + + __asm__ __volatile__("mov %%cr8, %[cr8]" : [cr8]"=r"(cr8)); + return cr8; +} + +static inline void set_cr8(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr8" : : "r" (val) : "memory"); +} + static inline void set_idt(const struct desc_ptr *idt_desc) { __asm__ __volatile__("lidt %0"::"m"(*idt_desc)); diff --git a/tools/testing/selftests/kvm/include/x86/smm.h b/tools/testing/selftests/kvm/include/x86/smm.h new file mode 100644 index 0000000000000..19337c34f13eb --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/smm.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef SELFTEST_KVM_SMM_H +#define SELFTEST_KVM_SMM_H + +#include "kvm_util.h" + +#define SMRAM_SIZE 65536 +#define SMRAM_MEMSLOT ((1 << 16) | 1) +#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) + +void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t smram_gpa, + const void *smi_handler, size_t handler_size); + +void inject_smi(struct kvm_vcpu *vcpu); + +#endif /* SELFTEST_KVM_SMM_H */ diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index fab18e9be66c9..23a44941e2837 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -8,6 +8,7 @@ #include "kvm_util.h" #include "pmu.h" #include "processor.h" +#include "smm.h" #include "svm_util.h" #include "sev.h" #include "vmx.h" @@ -1444,3 +1445,28 @@ bool kvm_arch_has_default_irqchip(void) { return true; } + +void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t smram_gpa, + const void *smi_handler, size_t handler_size) +{ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, smram_gpa, + SMRAM_MEMSLOT, SMRAM_PAGES, 0); + TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, smram_gpa, + SMRAM_MEMSLOT) == smram_gpa, + "Could not allocate guest physical addresses for SMRAM"); + + memset(addr_gpa2hva(vm, smram_gpa), 0x0, SMRAM_SIZE); + memcpy(addr_gpa2hva(vm, smram_gpa) + 0x8000, smi_handler, handler_size); + vcpu_set_msr(vcpu, MSR_IA32_SMBASE, smram_gpa); +} + +void inject_smi(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vcpu, &events); + events.smi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_SMM; + vcpu_events_set(vcpu, &events); +} diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c new file mode 100644 index 0000000000000..af7c901033966 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2026, Red Hat, Inc. + * + * Test that vmx_leave_smm() validates vmcs12 controls before re-entering + * nested guest mode on RSM. + */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "smm.h" +#include "hyperv.h" +#include "vmx.h" + +#define SMRAM_GPA 0x1000000 +#define SMRAM_STAGE 0xfe + +#define SYNC_PORT 0xe + +#define STR(x) #x +#define XSTR(s) STR(s) + +/* + * SMI handler: runs in real-address mode. + * Reports SMRAM_STAGE via port IO, then does RSM. + */ +static uint8_t smi_handler[] = { + 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ + 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ + 0x0f, 0xaa, /* rsm */ +}; + +static inline void sync_with_host(uint64_t phase) +{ + asm volatile("in $" XSTR(SYNC_PORT) ", %%al \n" + : "+a" (phase)); +} + +static void l2_guest_code(void) +{ + sync_with_host(1); + + /* After SMI+RSM with invalid controls, we should not reach here. */ + vmcall(); +} + +static void guest_code(struct vmx_pages *vmx_pages, + struct hyperv_test_pages *hv_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Set up Hyper-V enlightenments and eVMCS */ + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); + evmcs_enable(); + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_evmcs(hv_pages)); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmlaunch()); + + /* L2 exits via vmcall if test fails */ + sync_with_host(2); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; + struct hyperv_test_pages *hv; + struct hv_enlightened_vmcs *evmcs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_regs regs; + int stage_reported; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler)); + + vcpu_set_hv_cpuid(vcpu); + vcpu_enable_evmcs(vcpu); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + hv = vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva); + + vcpu_run(vcpu); + + /* L2 is running and syncs with host. */ + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + vcpu_regs_get(vcpu, ®s); + stage_reported = regs.rax & 0xff; + TEST_ASSERT(stage_reported == 1, + "Expected stage 1, got %d", stage_reported); + + /* Inject SMI while L2 is running. */ + inject_smi(vcpu); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + vcpu_regs_get(vcpu, ®s); + stage_reported = regs.rax & 0xff; + TEST_ASSERT(stage_reported == SMRAM_STAGE, + "Expected SMM handler stage %#x, got %#x", + SMRAM_STAGE, stage_reported); + + /* + * Guest is now paused in the SMI handler, about to execute RSM. + * Hack the eVMCS page to set-up invalid pin-based execution + * control (PIN_BASED_VIRTUAL_NMIS without PIN_BASED_NMI_EXITING). + */ + evmcs = hv->enlightened_vmcs_hva; + evmcs->pin_based_vm_exec_control |= PIN_BASED_VIRTUAL_NMIS; + evmcs->hv_clean_fields = 0; + + /* + * Trigger copy_enlightened_to_vmcs12() via KVM_GET_NESTED_STATE, + * copying the invalid pin_based_vm_exec_control into cached_vmcs12. + */ + union { + struct kvm_nested_state state; + char state_[16384]; + } nested_state_buf; + + memset(&nested_state_buf, 0, sizeof(nested_state_buf)); + nested_state_buf.state.size = sizeof(nested_state_buf); + vcpu_nested_state_get(vcpu, &nested_state_buf.state); + + /* + * Resume the guest. The SMI handler executes RSM, which calls + * vmx_leave_smm(). nested_vmx_check_controls() should detect + * VIRTUAL_NMIS without NMI_EXITING and cause a triple fault. + */ + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index 86ad1c7d068f2..8bd37a476f159 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -13,6 +13,30 @@ #include "linux/psp-sev.h" #include "sev.h" +static void guest_sev_test_msr(uint32_t msr) +{ + uint64_t val = rdmsr(msr); + + wrmsr(msr, val); + GUEST_ASSERT(val == rdmsr(msr)); +} + +#define guest_sev_test_reg(reg) \ +do { \ + uint64_t val = get_##reg(); \ + \ + set_##reg(val); \ + GUEST_ASSERT(val == get_##reg()); \ +} while (0) + +static void guest_sev_test_regs(void) +{ + guest_sev_test_msr(MSR_EFER); + guest_sev_test_reg(cr0); + guest_sev_test_reg(cr3); + guest_sev_test_reg(cr4); + guest_sev_test_reg(cr8); +} #define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) @@ -24,6 +48,8 @@ static void guest_snp_code(void) GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED); GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED); + guest_sev_test_regs(); + wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); vmgexit(); } @@ -34,6 +60,8 @@ static void guest_sev_es_code(void) GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED); + guest_sev_test_regs(); + /* * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply * force "termination" to signal "done" via the GHCB MSR protocol. @@ -47,6 +75,8 @@ static void guest_sev_code(void) GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV)); GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); + guest_sev_test_regs(); + GUEST_DONE(); } diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index 55c88d664a945..ade8412bf94aa 100644 --- a/tools/testing/selftests/kvm/x86/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c @@ -14,13 +14,11 @@ #include "test_util.h" #include "kvm_util.h" +#include "smm.h" #include "vmx.h" #include "svm_util.h" -#define SMRAM_SIZE 65536 -#define SMRAM_MEMSLOT ((1 << 16) | 1) -#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) #define SMRAM_GPA 0x1000000 #define SMRAM_STAGE 0xfe @@ -113,18 +111,6 @@ static void guest_code(void *arg) sync_with_host(DONE); } -void inject_smi(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events; - - vcpu_events_get(vcpu, &events); - - events.smi.pending = 1; - events.flags |= KVM_VCPUEVENT_VALID_SMM; - - vcpu_events_set(vcpu, &events); -} - int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; @@ -140,16 +126,7 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, - SMRAM_MEMSLOT, SMRAM_PAGES, 0); - TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT) - == SMRAM_GPA, "could not allocate guest physical addresses?"); - - memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE); - memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler, - sizeof(smi_handler)); - - vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA); + setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler)); if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { if (kvm_cpu_has(X86_FEATURE_SVM)) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 248c2b91fe42b..5a5ff88321d57 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -28,6 +28,7 @@ ALL_TESTS=" kci_test_fdb_get kci_test_fdb_del kci_test_neigh_get + kci_test_neigh_update kci_test_bridge_parent_id kci_test_address_proto kci_test_enslave_bonding @@ -1160,6 +1161,60 @@ kci_test_neigh_get() end_test "PASS: neigh get" } +kci_test_neigh_update() +{ + dstip=10.0.2.4 + dstmac=de:ad:be:ef:13:37 + local ret=0 + + for proxy in "" "proxy" ; do + # add a neighbour entry without any flags + run_cmd ip neigh add $proxy $dstip dev "$devdummy" lladdr $dstmac nud permanent + run_cmd_grep $dstip ip neigh show $proxy + run_cmd_grep_fail "$dstip dev $devdummy .*\(managed\|use\|router\|extern\)" ip neigh show $proxy + + # set the extern_learn flag, but no other + run_cmd ip neigh change $proxy $dstip dev "$devdummy" extern_learn + run_cmd_grep "$dstip dev $devdummy .* extern_learn" ip neigh show $proxy + run_cmd_grep_fail "$dstip dev $devdummy .* \(managed\|use\|router\)" ip neigh show $proxy + + # flags are reset when not provided + run_cmd ip neigh change $proxy $dstip dev "$devdummy" + run_cmd_grep $dstip ip neigh show $proxy + run_cmd_grep_fail "$dstip dev $devdummy .* extern_learn" ip neigh show $proxy + + # add a protocol + run_cmd ip neigh change $proxy $dstip dev "$devdummy" protocol boot + run_cmd_grep "$dstip dev $devdummy .* proto boot" ip neigh show $proxy + + # protocol is retained when not provided + run_cmd ip neigh change $proxy $dstip dev "$devdummy" + run_cmd_grep "$dstip dev $devdummy .* proto boot" ip neigh show $proxy + + # change protocol + run_cmd ip neigh change $proxy $dstip dev "$devdummy" protocol static + run_cmd_grep "$dstip dev $devdummy .* proto static" ip neigh show $proxy + + # also check an extended flag for non-proxy neighs + if [ "$proxy" = "" ]; then + run_cmd ip neigh change $proxy $dstip dev "$devdummy" managed + run_cmd_grep "$dstip dev $devdummy managed" ip neigh show $proxy + + run_cmd ip neigh change $proxy $dstip dev "$devdummy" lladdr $dstmac + run_cmd_grep_fail "$dstip dev $devdummy managed" ip neigh show $proxy + fi + + run_cmd ip neigh del $proxy $dstip dev "$devdummy" + done + + if [ $ret -ne 0 ];then + end_test "FAIL: neigh update" + return 1 + fi + + end_test "PASS: neigh update" +} + kci_test_bridge_parent_id() { local ret=0 diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index 7283e8b07b75b..80d4270a71ac2 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -2,8 +2,8 @@ copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 -copyuser_p7_t0 -copyuser_p7_t1 +copyuser_p7 +copyuser_p7_vmx memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 42940f92d8322..0c8efb0bddeb9 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ - copyuser_p7_t0 copyuser_p7_t1 \ + copyuser_p7 copyuser_p7_vmx \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \ @@ -28,10 +28,15 @@ $(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES) -D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \ -o $@ $^ -$(OUTPUT)/copyuser_p7_t%: copyuser_power7.S $(EXTRA_SOURCES) +$(OUTPUT)/copyuser_p7: copyuser_power7.S $(EXTRA_SOURCES) $(CC) $(CPPFLAGS) $(CFLAGS) \ -D COPY_LOOP=test___copy_tofrom_user_power7 \ - -D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \ + -o $@ $^ + +$(OUTPUT)/copyuser_p7_vmx: copyuser_power7.S $(EXTRA_SOURCES) ../utils.c + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D COPY_LOOP=test___copy_tofrom_user_power7_vmx \ + -D VMX_TEST \ -o $@ $^ # Strictly speaking, we only need the memcpy_64 test cases for big-endian diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S index ec8bcf2bf1c27..3a9cb8c9a3eed 100644 --- a/tools/testing/selftests/powerpc/copyloops/stubs.S +++ b/tools/testing/selftests/powerpc/copyloops/stubs.S @@ -1,13 +1,5 @@ #include -FUNC_START(enter_vmx_usercopy) - li r3,1 - blr - -FUNC_START(exit_vmx_usercopy) - li r3,0 - blr - FUNC_START(enter_vmx_ops) li r3,1 blr diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c b/tools/testing/selftests/powerpc/copyloops/validate.c index 0f68736185523..fb822534fbe95 100644 --- a/tools/testing/selftests/powerpc/copyloops/validate.c +++ b/tools/testing/selftests/powerpc/copyloops/validate.c @@ -12,6 +12,10 @@ #define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE) #define POISON 0xa5 +#ifdef VMX_TEST +#define VMX_COPY_THRESHOLD 3328 +#endif + unsigned long COPY_LOOP(void *to, const void *from, unsigned long size); static void do_one(char *src, char *dst, unsigned long src_off, @@ -81,8 +85,12 @@ int test_copy_loop(void) /* Fill with sequential bytes */ for (i = 0; i < BUFLEN; i++) fill[i] = i & 0xff; - +#ifdef VMX_TEST + /* Force sizes above kernel VMX threshold (3328) */ + for (len = VMX_COPY_THRESHOLD + 1; len < MAX_LEN; len++) { +#else for (len = 1; len < MAX_LEN; len++) { +#endif for (src_off = 0; src_off < MAX_OFFSET; src_off++) { for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) { do_one(src, dst, src_off, dst_off, len, @@ -96,5 +104,10 @@ int test_copy_loop(void) int main(void) { +#ifdef VMX_TEST + /* Skip if Altivec not present */ + SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC), "ALTIVEC not supported"); +#endif + return test_harness(test_copy_loop, str(COPY_LOOP)); } diff --git a/tools/testing/selftests/sched_ext/util.c b/tools/testing/selftests/sched_ext/util.c index e47769c919187..2111329ed2893 100644 --- a/tools/testing/selftests/sched_ext/util.c +++ b/tools/testing/selftests/sched_ext/util.c @@ -60,11 +60,11 @@ int file_write_long(const char *path, long val) char buf[64]; int ret; - ret = sprintf(buf, "%lu", val); + ret = sprintf(buf, "%ld", val); if (ret < 0) return ret; - if (write_text(path, buf, sizeof(buf)) <= 0) + if (write_text(path, buf, ret) <= 0) return -1; return 0; diff --git a/virt/kvm/binary_stats.c b/virt/kvm/binary_stats.c index eefca6c69f519..76ce697c773bf 100644 --- a/virt/kvm/binary_stats.c +++ b/virt/kvm/binary_stats.c @@ -50,7 +50,7 @@ * Return: the number of bytes that has been successfully read */ ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, - const struct _kvm_stats_desc *desc, + const struct kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1bc1da66b4b02..9093251beb398 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -973,9 +973,9 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) kvm_free_memslot(kvm, memslot); } -static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc) +static umode_t kvm_stats_debugfs_mode(const struct kvm_stats_desc *desc) { - switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) { + switch (desc->flags & KVM_STATS_TYPE_MASK) { case KVM_STATS_TYPE_INSTANT: return 0444; case KVM_STATS_TYPE_CUMULATIVE: @@ -1010,7 +1010,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; - const struct _kvm_stats_desc *pdesc; + const struct kvm_stats_desc *pdesc; int i, ret = -ENOMEM; int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; @@ -6171,11 +6171,11 @@ static int kvm_stat_data_get(void *data, u64 *val) switch (stat_data->kind) { case KVM_STAT_VM: r = kvm_get_stat_per_vm(stat_data->kvm, - stat_data->desc->desc.offset, val); + stat_data->desc->offset, val); break; case KVM_STAT_VCPU: r = kvm_get_stat_per_vcpu(stat_data->kvm, - stat_data->desc->desc.offset, val); + stat_data->desc->offset, val); break; } @@ -6193,11 +6193,11 @@ static int kvm_stat_data_clear(void *data, u64 val) switch (stat_data->kind) { case KVM_STAT_VM: r = kvm_clear_stat_per_vm(stat_data->kvm, - stat_data->desc->desc.offset); + stat_data->desc->offset); break; case KVM_STAT_VCPU: r = kvm_clear_stat_per_vcpu(stat_data->kvm, - stat_data->desc->desc.offset); + stat_data->desc->offset); break; } @@ -6345,7 +6345,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) static void kvm_init_debug(void) { const struct file_operations *fops; - const struct _kvm_stats_desc *pdesc; + const struct kvm_stats_desc *pdesc; int i; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); @@ -6358,7 +6358,7 @@ static void kvm_init_debug(void) fops = &vm_stat_readonly_fops; debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), kvm_debugfs_dir, - (void *)(long)pdesc->desc.offset, fops); + (void *)(long)pdesc->offset, fops); } for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) { @@ -6369,7 +6369,7 @@ static void kvm_init_debug(void) fops = &vcpu_stat_readonly_fops; debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), kvm_debugfs_dir, - (void *)(long)pdesc->desc.offset, fops); + (void *)(long)pdesc->offset, fops); } }