diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 9605dbd4b5b5..141a7bb58b80 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -484,6 +484,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/l1tf + /sys/devices/system/cpu/vulnerabilities/mds Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities @@ -496,8 +497,7 @@ Description: Information about CPU vulnerabilities "Vulnerable" CPU is affected and no mitigation in effect "Mitigation: $M" CPU is affected and mitigation $M is in effect - Details about the l1tf file can be found in - Documentation/admin-guide/l1tf.rst + See also: Documentation/admin-guide/hw-vuln/index.rst What: /sys/devices/system/cpu/smt /sys/devices/system/cpu/smt/active diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 20f92c16ffbf..26b3a3e7daf6 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -177,6 +177,15 @@ cgroup v2 currently supports the following mount options. ignored on non-init namespace mounts. Please refer to the Delegation section for details. + memory_localevents + + Only populate memory.events with data for the current cgroup, + and not any subtrees. This is legacy behaviour, the default + behaviour without this option is to include subtree counts. + This option is system wide and can only be set on mount or + modified through remount from the init namespace. The mount + option is ignored on non-init namespace mounts. + Organizing Processes and Threads -------------------------------- diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst new file mode 100644 index 000000000000..ffc064c1ec68 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -0,0 +1,13 @@ +======================== +Hardware vulnerabilities +======================== + +This section describes CPU vulnerabilities and provides an overview of the +possible mitigations along with guidance for selecting mitigations if they +are configurable at compile, boot or run time. + +.. toctree:: + :maxdepth: 1 + + l1tf + mds diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst similarity index 99% rename from Documentation/admin-guide/l1tf.rst rename to Documentation/admin-guide/hw-vuln/l1tf.rst index 9af977384168..31653a9f0e1b 100644 --- a/Documentation/admin-guide/l1tf.rst +++ b/Documentation/admin-guide/hw-vuln/l1tf.rst @@ -445,6 +445,7 @@ The default is 'cond'. If 'l1tf=full,force' is given on the kernel command line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush module parameter is ignored and writes to the sysfs file are rejected. +.. _mitigation_selection: Mitigation selection guide -------------------------- diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst new file mode 100644 index 000000000000..e3a796c0d3a2 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/mds.rst @@ -0,0 +1,308 @@ +MDS - Microarchitectural Data Sampling +====================================== + +Microarchitectural Data Sampling is a hardware vulnerability which allows +unprivileged speculative access to data which is available in various CPU +internal buffers. + +Affected processors +------------------- + +This vulnerability affects a wide range of Intel processors. The +vulnerability is not present on: + + - Processors from AMD, Centaur and other non Intel vendors + + - Older processor models, where the CPU family is < 6 + + - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus) + + - Intel processors which have the ARCH_CAP_MDS_NO bit set in the + IA32_ARCH_CAPABILITIES MSR. + +Whether a processor is affected or not can be read out from the MDS +vulnerability file in sysfs. See :ref:`mds_sys_info`. + +Not all processors are affected by all variants of MDS, but the mitigation +is identical for all of them so the kernel treats them as a single +vulnerability. + +Related CVEs +------------ + +The following CVE entries are related to the MDS vulnerability: + + ============== ===== =================================================== + CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling + CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling + CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling + CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory + ============== ===== =================================================== + +Problem +------- + +When performing store, load, L1 refill operations, processors write data +into temporary microarchitectural structures (buffers). The data in the +buffer can be forwarded to load operations as an optimization. + +Under certain conditions, usually a fault/assist caused by a load +operation, data unrelated to the load memory address can be speculatively +forwarded from the buffers. Because the load operation causes a fault or +assist and its result will be discarded, the forwarded data will not cause +incorrect program execution or state changes. But a malicious operation +may be able to forward this speculative data to a disclosure gadget which +allows in turn to infer the value via a cache side channel attack. + +Because the buffers are potentially shared between Hyper-Threads cross +Hyper-Thread attacks are possible. + +Deeper technical information is available in the MDS specific x86 +architecture section: :ref:`Documentation/x86/mds.rst `. + + +Attack scenarios +---------------- + +Attacks against the MDS vulnerabilities can be mounted from malicious non +priviledged user space applications running on hosts or guest. Malicious +guest OSes can obviously mount attacks as well. + +Contrary to other speculation based vulnerabilities the MDS vulnerability +does not allow the attacker to control the memory target address. As a +consequence the attacks are purely sampling based, but as demonstrated with +the TLBleed attack samples can be postprocessed successfully. + +Web-Browsers +^^^^^^^^^^^^ + + It's unclear whether attacks through Web-Browsers are possible at + all. The exploitation through Java-Script is considered very unlikely, + but other widely used web technologies like Webassembly could possibly be + abused. + + +.. _mds_sys_info: + +MDS system information +----------------------- + +The Linux kernel provides a sysfs interface to enumerate the current MDS +status of the system: whether the system is vulnerable, and which +mitigations are active. The relevant sysfs file is: + +/sys/devices/system/cpu/vulnerabilities/mds + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The processor is vulnerable but microcode is not updated. + + The mitigation is enabled on a best effort basis. See :ref:`vmwerv` + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +If the processor is vulnerable then the following information is appended +to the above information: + + ======================== ============================================ + 'SMT vulnerable' SMT is enabled + 'SMT mitigated' SMT is enabled and mitigated + 'SMT disabled' SMT is disabled + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown + ======================== ============================================ + +.. _vmwerv: + +Best effort mitigation mode +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + If the processor is vulnerable, but the availability of the microcode based + mitigation mechanism is not advertised via CPUID the kernel selects a best + effort mitigation mode. This mode invokes the mitigation instructions + without a guarantee that they clear the CPU buffers. + + This is done to address virtualization scenarios where the host has the + microcode update applied, but the hypervisor is not yet updated to expose + the CPUID to the guest. If the host has updated microcode the protection + takes effect otherwise a few cpu cycles are wasted pointlessly. + + The state in the mds sysfs file reflects this situation accordingly. + + +Mitigation mechanism +------------------------- + +The kernel detects the affected CPUs and the presence of the microcode +which is required. + +If a CPU is affected and the microcode is available, then the kernel +enables the mitigation by default. The mitigation can be controlled at boot +time via a kernel command line option. See +:ref:`mds_mitigation_control_command_line`. + +.. _cpu_buffer_clear: + +CPU buffer clearing +^^^^^^^^^^^^^^^^^^^ + + The mitigation for MDS clears the affected CPU buffers on return to user + space and when entering a guest. + + If SMT is enabled it also clears the buffers on idle entry when the CPU + is only affected by MSBDS and not any other MDS variant, because the + other variants cannot be protected against cross Hyper-Thread attacks. + + For CPUs which are only affected by MSBDS the user space, guest and idle + transition mitigations are sufficient and SMT is not affected. + +.. _virt_mechanism: + +Virtualization mitigation +^^^^^^^^^^^^^^^^^^^^^^^^^ + + The protection for host to guest transition depends on the L1TF + vulnerability of the CPU: + + - CPU is affected by L1TF: + + If the L1D flush mitigation is enabled and up to date microcode is + available, the L1D flush mitigation is automatically protecting the + guest transition. + + If the L1D flush mitigation is disabled then the MDS mitigation is + invoked explicit when the host MDS mitigation is enabled. + + For details on L1TF and virtualization see: + :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst `. + + - CPU is not affected by L1TF: + + CPU buffers are flushed before entering the guest when the host MDS + mitigation is enabled. + + The resulting MDS protection matrix for the host to guest transition: + + ============ ===== ============= ============ ================= + L1TF MDS VMX-L1FLUSH Host MDS MDS-State + + Don't care No Don't care N/A Not affected + + Yes Yes Disabled Off Vulnerable + + Yes Yes Disabled Full Mitigated + + Yes Yes Enabled Don't care Mitigated + + No Yes N/A Off Vulnerable + + No Yes N/A Full Mitigated + ============ ===== ============= ============ ================= + + This only covers the host to guest transition, i.e. prevents leakage from + host to guest, but does not protect the guest internally. Guests need to + have their own protections. + +.. _xeon_phi: + +XEON PHI specific considerations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The XEON PHI processor family is affected by MSBDS which can be exploited + cross Hyper-Threads when entering idle states. Some XEON PHI variants allow + to use MWAIT in user space (Ring 3) which opens an potential attack vector + for malicious user space. The exposure can be disabled on the kernel + command line with the 'ring3mwait=disable' command line option. + + XEON PHI is not affected by the other MDS variants and MSBDS is mitigated + before the CPU enters a idle state. As XEON PHI is not affected by L1TF + either disabling SMT is not required for full protection. + +.. _mds_smt_control: + +SMT control +^^^^^^^^^^^ + + All MDS variants except MSBDS can be attacked cross Hyper-Threads. That + means on CPUs which are affected by MFBDS or MLPDS it is necessary to + disable SMT for full protection. These are most of the affected CPUs; the + exception is XEON PHI, see :ref:`xeon_phi`. + + Disabling SMT can have a significant performance impact, but the impact + depends on the type of workloads. + + See the relevant chapter in the L1TF mitigation documentation for details: + :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst `. + + +.. _mds_mitigation_control_command_line: + +Mitigation control on the kernel command line +--------------------------------------------- + +The kernel command line allows to control the MDS mitigations at boot +time with the option "mds=". The valid arguments for this option are: + + ============ ============================================================= + full If the CPU is vulnerable, enable all available mitigations + for the MDS vulnerability, CPU buffer clearing on exit to + userspace and when entering a VM. Idle transitions are + protected as well if SMT is enabled. + + It does not automatically disable SMT. + + full,nosmt The same as mds=full, with SMT disabled on vulnerable + CPUs. This is the complete mitigation. + + off Disables MDS mitigations completely. + + ============ ============================================================= + +Not specifying this option is equivalent to "mds=full". + + +Mitigation selection guide +-------------------------- + +1. Trusted userspace +^^^^^^^^^^^^^^^^^^^^ + + If all userspace applications are from a trusted source and do not + execute untrusted code which is supplied externally, then the mitigation + can be disabled. + + +2. Virtualization with trusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The same considerations as above versus trusted user space apply. + +3. Virtualization with untrusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The protection depends on the state of the L1TF mitigations. + See :ref:`virt_mechanism`. + + If the MDS mitigation is enabled and SMT is disabled, guest to host and + guest to guest attacks are prevented. + +.. _mds_default_mitigations: + +Default mitigations +------------------- + + The kernel default mitigations for vulnerable processors are: + + - Enable CPU buffer clearing + + The kernel does not by default enforce the disabling of SMT, which leaves + SMT systems vulnerable when running untrusted code. The same rationale as + for L1TF applies. + See :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst `. diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 0a491676685e..42247516962a 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -17,14 +17,12 @@ etc. kernel-parameters devices -This section describes CPU vulnerabilities and provides an overview of the -possible mitigations along with guidance for selecting mitigations if they -are configurable at compile, boot or run time. +This section describes CPU vulnerabilities and their mitigations. .. toctree:: :maxdepth: 1 - l1tf + hw-vuln/index Here is a set of documents aimed at users who are trying to track down problems and bugs in particular. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2b8ee90bb644..c7937f379d22 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2141,7 +2141,7 @@ Default is 'flush'. - For details see: Documentation/admin-guide/l1tf.rst + For details see: Documentation/admin-guide/hw-vuln/l1tf.rst l2cr= [PPC] @@ -2387,6 +2387,32 @@ Format: , Specifies range of consoles to be captured by the MDA. + mds= [X86,INTEL] + Control mitigation for the Micro-architectural Data + Sampling (MDS) vulnerability. + + Certain CPUs are vulnerable to an exploit against CPU + internal buffers which can forward information to a + disclosure gadget under certain conditions. + + In vulnerable processors, the speculatively + forwarded data can be used in a cache side channel + attack, to access data to which the attacker does + not have direct access. + + This parameter controls the MDS mitigation. The + options are: + + full - Enable MDS mitigation on vulnerable CPUs + full,nosmt - Enable MDS mitigation and disable + SMT on vulnerable CPUs + off - Unconditionally disable MDS mitigation + + Not specifying this option is equivalent to + mds=full. + + For details see: Documentation/admin-guide/hw-vuln/mds.rst + mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory Amount of memory to be used when the kernel is not able to see the whole system memory or for test. @@ -2544,6 +2570,40 @@ in the "bleeding edge" mini2440 support kernel at http://repo.or.cz/w/linux-2.6/mini2440.git + mitigations= + [X86,PPC,S390] Control optional mitigations for CPU + vulnerabilities. This is a set of curated, + arch-independent options, each of which is an + aggregation of existing arch-specific options. + + off + Disable all optional CPU mitigations. This + improves system performance, but it may also + expose users to several CPU vulnerabilities. + Equivalent to: nopti [X86,PPC] + nospectre_v1 [PPC] + nobp=0 [S390] + nospectre_v2 [X86,PPC,S390] + spectre_v2_user=off [X86] + spec_store_bypass_disable=off [X86,PPC] + l1tf=off [X86] + mds=off [X86] + + auto (default) + Mitigate all CPU vulnerabilities, but leave SMT + enabled, even if it's vulnerable. This is for + users who don't want to be surprised by SMT + getting disabled across kernel upgrades, or who + have other ways of avoiding SMT-based attacks. + Equivalent to: (default behavior) + + auto,nosmt + Mitigate all CPU vulnerabilities, disabling SMT + if needed. This is for users who always want to + be fully mitigated, even if it means losing SMT. + Equivalent to: l1tf=flush,nosmt [X86] + mds=full,nosmt [X86] + mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this parameter allows control of the logging verbosity for diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index d1e2bb801e1b..6e97a3f771ef 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -61,6 +61,7 @@ stable kernels. | ARM | Cortex-A76 | #1188873 | ARM64_ERRATUM_1188873 | | ARM | Cortex-A76 | #1165522 | ARM64_ERRATUM_1165522 | | ARM | Cortex-A76 | #1286807 | ARM64_ERRATUM_1286807 | +| ARM | Cortex-A76 | #1463225 | ARM64_ERRATUM_1463225 | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt index 98a8dd5ee385..1a0f2ac02eb6 100644 --- a/Documentation/block/bfq-iosched.txt +++ b/Documentation/block/bfq-iosched.txt @@ -20,13 +20,26 @@ for that device, by setting low_latency to 0. See Section 3 for details on how to configure BFQ for the desired tradeoff between latency and throughput, or on how to maximize throughput. -BFQ has a non-null overhead, which limits the maximum IOPS that a CPU -can process for a device scheduled with BFQ. To give an idea of the -limits on slow or average CPUs, here are, first, the limits of BFQ for -three different CPUs, on, respectively, an average laptop, an old -desktop, and a cheap embedded system, in case full hierarchical -support is enabled (i.e., CONFIG_BFQ_GROUP_IOSCHED is set), but -CONFIG_DEBUG_BLK_CGROUP is not set (Section 4-2): +As every I/O scheduler, BFQ adds some overhead to per-I/O-request +processing. To give an idea of this overhead, the total, +single-lock-protected, per-request processing time of BFQ---i.e., the +sum of the execution times of the request insertion, dispatch and +completion hooks---is, e.g., 1.9 us on an Intel Core i7-2760QM@2.40GHz +(dated CPU for notebooks; time measured with simple code +instrumentation, and using the throughput-sync.sh script of the S +suite [1], in performance-profiling mode). To put this result into +context, the total, single-lock-protected, per-request execution time +of the lightest I/O scheduler available in blk-mq, mq-deadline, is 0.7 +us (mq-deadline is ~800 LOC, against ~10500 LOC for BFQ). + +Scheduling overhead further limits the maximum IOPS that a CPU can +process (already limited by the execution of the rest of the I/O +stack). To give an idea of the limits with BFQ, on slow or average +CPUs, here are, first, the limits of BFQ for three different CPUs, on, +respectively, an average laptop, an old desktop, and a cheap embedded +system, in case full hierarchical support is enabled (i.e., +CONFIG_BFQ_GROUP_IOSCHED is set), but CONFIG_DEBUG_BLK_CGROUP is not +set (Section 4-2): - Intel i7-4850HQ: 400 KIOPS - AMD A8-3850: 250 KIOPS - ARM CortexTM-A53 Octa-core: 80 KIOPS @@ -566,3 +579,5 @@ applications. Unset this tunable if you need/want to control weights. Slightly extended version: http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite- results.pdf + +[3] https://github.com/Algodev-github/S diff --git a/Documentation/conf.py b/Documentation/conf.py index 72647a38b5c2..7ace3f8852bd 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -37,7 +37,7 @@ extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include', 'cdomain', 'kfigure', 'sphinx.ext.ifconfig'] # The name of the math extension changed on Sphinx 1.4 -if major == 1 and minor > 3: +if (major == 1 and minor > 3) or (major > 1): extensions.append("sphinx.ext.imgmath") else: extensions.append("sphinx.ext.pngmath") diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.txt b/Documentation/devicetree/bindings/input/touchscreen/goodix.txt index 8cf0b4d38a7e..109cc0cebaa2 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/goodix.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.txt @@ -3,6 +3,7 @@ Device tree bindings for Goodix GT9xx series touchscreen controller Required properties: - compatible : Should be "goodix,gt1151" + or "goodix,gt5663" or "goodix,gt5688" or "goodix,gt911" or "goodix,gt9110" diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt index cdbcfd3a4ff2..c269dbe384fe 100644 --- a/Documentation/devicetree/bindings/mmc/mmc.txt +++ b/Documentation/devicetree/bindings/mmc/mmc.txt @@ -64,6 +64,8 @@ Optional properties: whether pwrseq-simple is used. Default to 10ms if no available. - supports-cqe : The presence of this property indicates that the corresponding MMC host controller supports HW command queue feature. +- disable-cqe-dcmd: This property indicates that the MMC controller's command + queue engine (CQE) does not support direct commands (DCMDs). *NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line polarity properties, we have to fix the meaning of the "normal" and "inverted" diff --git a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt index 5d181fc3cc18..4a78ba8b85bc 100644 --- a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt +++ b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt @@ -59,7 +59,8 @@ Required properties: one for each entry in reset-names. - reset-names: "phy" for reset of phy block, "common" for phy common block reset, - "cfg" for phy's ahb cfg block reset. + "cfg" for phy's ahb cfg block reset, + "ufsphy" for the PHY reset in the UFS controller. For "qcom,ipq8074-qmp-pcie-phy" must contain: "phy", "common". @@ -74,7 +75,8 @@ Required properties: "phy", "common". For "qcom,sdm845-qmp-usb3-uni-phy" must contain: "phy", "common". - For "qcom,sdm845-qmp-ufs-phy": no resets are listed. + For "qcom,sdm845-qmp-ufs-phy": must contain: + "ufsphy". - vdda-phy-supply: Phandle to a regulator supply to PHY core block. - vdda-pll-supply: Phandle to 1.8V regulator supply to PHY refclk pll block. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index cf43bc4dbf31..a60fa516d4cb 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -638,3 +638,8 @@ in your dentry operations instead. inode to d_splice_alias() will also do the right thing (equivalent of d_add(dentry, NULL); return NULL;), so that kind of special cases also doesn't need a separate treatment. +-- +[mandatory] + DCACHE_RCUACCESS is gone; having an RCU delay on dentry freeing is the + default. DCACHE_NORCU opts out, and only d_alloc_pseudo() has any + business doing so. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 66cad5c86171..62ad2bbfd2de 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -45,6 +45,7 @@ Table of Contents 3.9 /proc//map_files - Information about memory mapped files 3.10 /proc//timerslack_ns - Task timerslack value 3.11 /proc//patch_state - Livepatch patch operation state + 3.12 /proc//ksm - Remote KSM 4 Configuring procfs 4.1 Mount options @@ -1948,6 +1949,19 @@ patched. If the patch is being enabled, then the task has already been patched. If the patch is being disabled, then the task hasn't been unpatched yet. +3.12 /proc//ksm - Remote KSM +-------------------------------------------- +This write-only file allows marking memory of another task for merging +and unmerging via KSM. + +The following actions are available: + + * mark task's memory as mergeable: + # echo merge > /proc//ksm + + * unmerging all the task's memory: + # echo unmerge > /proc//ksm + ------------------------------------------------------------------------------ Configuring procfs diff --git a/Documentation/index.rst b/Documentation/index.rst index 80a421cb935e..3511400dc092 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -102,6 +102,7 @@ implementation. :maxdepth: 2 sh/index + x86/index Filesystem Documentation ------------------------ diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index c4ac35234f05..f0d09162c7a3 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -250,6 +250,14 @@ tcp_base_mss - INTEGER Path MTU discovery (MTU probing). If MTU probing is enabled, this is the initial MSS used by the connection. +tcp_min_snd_mss - INTEGER + TCP SYN and SYNACK messages usually advertise an ADVMSS option, + as described in RFC 1122 and RFC 6691. + If this ADVMSS option is smaller than tcp_min_snd_mss, + it is silently capped to tcp_min_snd_mss. + + Default : 48 (at least 8 bytes of payload per segment) + tcp_congestion_control - STRING Set the congestion control algorithm to be used for new connections. The algorithm "reno" is always available, but diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt new file mode 100644 index 000000000000..52a8fe57e208 --- /dev/null +++ b/Documentation/scheduler/sched-BMQ.txt @@ -0,0 +1,95 @@ + BitMap queue CPU Scheduler + -------------------------- + +CONTENT +======== + + Background + Design + Overview + Task policy + Priority management + BitMap Queue + CPU Assignment and Migration + + +Background +========== + +BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution +of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS), +and inspired by Zircon scheduler. The goal of it is to keep the scheduler code +simple, while efficiency and scalable for interactive tasks, such as desktop, +movie playback and gaming etc. + +Design +====== + +Overview +-------- + +BMQ use per CPU run queue design, each CPU(logical) has it's own run queue, +each CPU is responsible for scheduling the tasks that are putting into it's +run queue. + +The run queue is a set of priority queues. Note that these queues are fifo +queue for non-rt tasks or priority queue for rt tasks in data structure. See +BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact +that most applications are non-rt tasks. No matter the queue is fifo or +priority, In each queue is an ordered list of runnable tasks awaiting execution +and the data structures are the same. When it is time for a new task to run, +the scheduler simply looks the lowest numbered queueue that contains a task, +and runs the first task from the head of that queue. And per CPU idle task is +also in the run queue, so the scheduler can always find a task to run on from +its run queue. + +Each task will assigned the same timeslice(default 4ms) when it is picked to +start running. Task will be reinserted at the end of the appropriate priority +queue when it uses its whole timeslice. When the scheduler selects a new task +from the priority queue it sets the CPU's preemption timer for the remainder of +the previous timeslice. When that timer fires the scheduler will stop execution +on that task, select another task and start over again. + +If a task blocks waiting for a shared resource then it's taken out of its +priority queue and is placed in a wait queue for the shared resource. When it +is unblocked it will be reinserted in the appropriate priority queue of an +eligible CPU. + +Task policy +----------- + +BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the +mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's +NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each +policy. + +DEADLINE + It is squashed as priority 0 FIFO task. + +FIFO/RR + All RT tasks share one single priority queue in BMQ run queue designed. The +complexity of insert operation is O(n). BMQ is not designed for system runs +with major rt policy tasks. + +NORMAL/BATCH/IDLE + BATCH and IDLE tasks are treated as the same policy. They compete CPU with +NORMAL policy tasks, but they just don't boost. To control the priority of +NORMAL/BATCH/IDLE tasks, simply use nice level. + +ISO + ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy +task instead. + +Priority management +------------------- + +RT tasks have priority from 0-99. For non-rt tasks, there are three different +factors used to determine the effective priority of a task. The effective +priority being what is used to determine which queue it will be in. + +The first factor is simply the task’s static priority. Which is assigned from +task's nice level, within [-20, 19] in userland's point of view and [0, 39] +internally. + + +To be continued... diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index 9d0a7f08f93b..1159405cb920 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -37,7 +37,19 @@ from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive -from sphinx.ext.autodoc import AutodocReporter + +# +# AutodocReporter is only good up to Sphinx 1.7 +# +import sphinx + +Use_SSI = sphinx.__version__[:3] >= '1.7' +if Use_SSI: + from sphinx.util.docutils import switch_source_input +else: + from sphinx.ext.autodoc import AutodocReporter + +import kernellog __version__ = '1.0' @@ -90,7 +102,8 @@ def run(self): cmd += [filename] try: - env.app.verbose('calling kernel-doc \'%s\'' % (" ".join(cmd))) + kernellog.verbose(env.app, + 'calling kernel-doc \'%s\'' % (" ".join(cmd))) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() @@ -100,7 +113,8 @@ def run(self): if p.returncode != 0: sys.stderr.write(err) - env.app.warn('kernel-doc \'%s\' failed with return code %d' % (" ".join(cmd), p.returncode)) + kernellog.warn(env.app, + 'kernel-doc \'%s\' failed with return code %d' % (" ".join(cmd), p.returncode)) return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))] elif env.config.kerneldoc_verbosity > 0: sys.stderr.write(err) @@ -121,20 +135,28 @@ def run(self): lineoffset += 1 node = nodes.section() - buf = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter + self.do_parse(result, node) + + return node.children + + except Exception as e: # pylint: disable=W0703 + kernellog.warn(env.app, 'kernel-doc \'%s\' processing failed with: %s' % + (" ".join(cmd), str(e))) + return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))] + + def do_parse(self, result, node): + if Use_SSI: + with switch_source_input(self.state, result): + self.state.nested_parse(result, 0, node, match_titles=1) + else: + save = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter self.state.memo.reporter = AutodocReporter(result, self.state.memo.reporter) self.state.memo.title_styles, self.state.memo.section_level = [], 0 try: self.state.nested_parse(result, 0, node, match_titles=1) finally: - self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = buf + self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = save - return node.children - - except Exception as e: # pylint: disable=W0703 - env.app.warn('kernel-doc \'%s\' processing failed with: %s' % - (" ".join(cmd), str(e))) - return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))] def setup(app): app.add_config_value('kerneldoc_bin', None, 'env') diff --git a/Documentation/sphinx/kernellog.py b/Documentation/sphinx/kernellog.py new file mode 100644 index 000000000000..af924f51a7dc --- /dev/null +++ b/Documentation/sphinx/kernellog.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Sphinx has deprecated its older logging interface, but the replacement +# only goes back to 1.6. So here's a wrapper layer to keep around for +# as long as we support 1.4. +# +import sphinx + +if sphinx.__version__[:3] >= '1.6': + UseLogging = True + from sphinx.util import logging + logger = logging.getLogger('kerneldoc') +else: + UseLogging = False + +def warn(app, message): + if UseLogging: + logger.warning(message) + else: + app.warn(message) + +def verbose(app, message): + if UseLogging: + logger.verbose(message) + else: + app.verbose(message) + + diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py index b97228d2cc0e..fbfe6693bb60 100644 --- a/Documentation/sphinx/kfigure.py +++ b/Documentation/sphinx/kfigure.py @@ -60,6 +60,8 @@ from sphinx.util.nodes import clean_astext from six import iteritems +import kernellog + PY3 = sys.version_info[0] == 3 if PY3: @@ -171,20 +173,20 @@ def setupTools(app): This function is called once, when the builder is initiated. """ global dot_cmd, convert_cmd # pylint: disable=W0603 - app.verbose("kfigure: check installed tools ...") + kernellog.verbose(app, "kfigure: check installed tools ...") dot_cmd = which('dot') convert_cmd = which('convert') if dot_cmd: - app.verbose("use dot(1) from: " + dot_cmd) + kernellog.verbose(app, "use dot(1) from: " + dot_cmd) else: - app.warn("dot(1) not found, for better output quality install " - "graphviz from http://www.graphviz.org") + kernellog.warn(app, "dot(1) not found, for better output quality install " + "graphviz from http://www.graphviz.org") if convert_cmd: - app.verbose("use convert(1) from: " + convert_cmd) + kernellog.verbose(app, "use convert(1) from: " + convert_cmd) else: - app.warn( + kernellog.warn(app, "convert(1) not found, for SVG to PDF conversion install " "ImageMagick (https://www.imagemagick.org)") @@ -220,12 +222,13 @@ def convert_image(img_node, translator, src_fname=None): # in kernel builds, use 'make SPHINXOPTS=-v' to see verbose messages - app.verbose('assert best format for: ' + img_node['uri']) + kernellog.verbose(app, 'assert best format for: ' + img_node['uri']) if in_ext == '.dot': if not dot_cmd: - app.verbose("dot from graphviz not available / include DOT raw.") + kernellog.verbose(app, + "dot from graphviz not available / include DOT raw.") img_node.replace_self(file2literal(src_fname)) elif translator.builder.format == 'latex': @@ -252,7 +255,8 @@ def convert_image(img_node, translator, src_fname=None): if translator.builder.format == 'latex': if convert_cmd is None: - app.verbose("no SVG to PDF conversion available / include SVG raw.") + kernellog.verbose(app, + "no SVG to PDF conversion available / include SVG raw.") img_node.replace_self(file2literal(src_fname)) else: dst_fname = path.join(translator.builder.outdir, fname + '.pdf') @@ -265,18 +269,19 @@ def convert_image(img_node, translator, src_fname=None): _name = dst_fname[len(translator.builder.outdir) + 1:] if isNewer(dst_fname, src_fname): - app.verbose("convert: {out}/%s already exists and is newer" % _name) + kernellog.verbose(app, + "convert: {out}/%s already exists and is newer" % _name) else: ok = False mkdir(path.dirname(dst_fname)) if in_ext == '.dot': - app.verbose('convert DOT to: {out}/' + _name) + kernellog.verbose(app, 'convert DOT to: {out}/' + _name) ok = dot2format(app, src_fname, dst_fname) elif in_ext == '.svg': - app.verbose('convert SVG to: {out}/' + _name) + kernellog.verbose(app, 'convert SVG to: {out}/' + _name) ok = svg2pdf(app, src_fname, dst_fname) if not ok: @@ -305,7 +310,8 @@ def dot2format(app, dot_fname, out_fname): with open(out_fname, "w") as out: exit_code = subprocess.call(cmd, stdout = out) if exit_code != 0: - app.warn("Error #%d when calling: %s" % (exit_code, " ".join(cmd))) + kernellog.warn(app, + "Error #%d when calling: %s" % (exit_code, " ".join(cmd))) return bool(exit_code == 0) def svg2pdf(app, svg_fname, pdf_fname): @@ -322,7 +328,7 @@ def svg2pdf(app, svg_fname, pdf_fname): # use stdout and stderr from parent exit_code = subprocess.call(cmd) if exit_code != 0: - app.warn("Error #%d when calling: %s" % (exit_code, " ".join(cmd))) + kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd))) return bool(exit_code == 0) @@ -415,15 +421,15 @@ def visit_kernel_render(self, node): app = self.builder.app srclang = node.get('srclang') - app.verbose('visit kernel-render node lang: "%s"' % (srclang)) + kernellog.verbose(app, 'visit kernel-render node lang: "%s"' % (srclang)) tmp_ext = RENDER_MARKUP_EXT.get(srclang, None) if tmp_ext is None: - app.warn('kernel-render: "%s" unknown / include raw.' % (srclang)) + kernellog.warn(app, 'kernel-render: "%s" unknown / include raw.' % (srclang)) return if not dot_cmd and tmp_ext == '.dot': - app.verbose("dot from graphviz not available / include raw.") + kernellog.verbose(app, "dot from graphviz not available / include raw.") return literal_block = node[0] diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index aa058aa7bf28..f92c0ff84f95 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -100,6 +100,7 @@ show up in /proc/sys/kernel: - unknown_nmi_panic - watchdog - watchdog_thresh +- yield_type - version ============================================================== @@ -1143,3 +1144,13 @@ The softlockup threshold is (2 * watchdog_thresh). Setting this tunable to zero will disable lockup detection altogether. ============================================================== + +yield_type: (BMQ CPU scheduler only) + +This determines what type of yield calls to sched_yield will perform. + + 0 - No yield. + 1 - Yield only to better priority/deadline tasks. (default) + 2 - Expire timeslice and recalculate deadline. + +============================================================== diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py new file mode 100644 index 000000000000..33c5c3142e20 --- /dev/null +++ b/Documentation/x86/conf.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8; mode: python -*- + +project = "X86 architecture specific documentation" + +tags.add("subproject") + +latex_documents = [ + ('index', 'x86.tex', project, + 'The kernel development community', 'manual'), +] diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst new file mode 100644 index 000000000000..ef389dcf1b1d --- /dev/null +++ b/Documentation/x86/index.rst @@ -0,0 +1,8 @@ +========================== +x86 architecture specifics +========================== + +.. toctree:: + :maxdepth: 1 + + mds diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst new file mode 100644 index 000000000000..5d4330be200f --- /dev/null +++ b/Documentation/x86/mds.rst @@ -0,0 +1,193 @@ +Microarchitectural Data Sampling (MDS) mitigation +================================================= + +.. _mds: + +Overview +-------- + +Microarchitectural Data Sampling (MDS) is a family of side channel attacks +on internal buffers in Intel CPUs. The variants are: + + - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) + - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) + - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) + - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091) + +MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a +dependent load (store-to-load forwarding) as an optimization. The forward +can also happen to a faulting or assisting load operation for a different +memory address, which can be exploited under certain conditions. Store +buffers are partitioned between Hyper-Threads so cross thread forwarding is +not possible. But if a thread enters or exits a sleep state the store +buffer is repartitioned which can expose data from one thread to the other. + +MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage +L1 miss situations and to hold data which is returned or sent in response +to a memory or I/O operation. Fill buffers can forward data to a load +operation and also write data to the cache. When the fill buffer is +deallocated it can retain the stale data of the preceding operations which +can then be forwarded to a faulting or assisting load operation, which can +be exploited under certain conditions. Fill buffers are shared between +Hyper-Threads so cross thread leakage is possible. + +MLPDS leaks Load Port Data. Load ports are used to perform load operations +from memory or I/O. The received data is then forwarded to the register +file or a subsequent operation. In some implementations the Load Port can +contain stale data from a previous operation which can be forwarded to +faulting or assisting loads under certain conditions, which again can be +exploited eventually. Load ports are shared between Hyper-Threads so cross +thread leakage is possible. + +MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from +memory that takes a fault or assist can leave data in a microarchitectural +structure that may later be observed using one of the same methods used by +MSBDS, MFBDS or MLPDS. + +Exposure assumptions +-------------------- + +It is assumed that attack code resides in user space or in a guest with one +exception. The rationale behind this assumption is that the code construct +needed for exploiting MDS requires: + + - to control the load to trigger a fault or assist + + - to have a disclosure gadget which exposes the speculatively accessed + data for consumption through a side channel. + + - to control the pointer through which the disclosure gadget exposes the + data + +The existence of such a construct in the kernel cannot be excluded with +100% certainty, but the complexity involved makes it extremly unlikely. + +There is one exception, which is untrusted BPF. The functionality of +untrusted BPF is limited, but it needs to be thoroughly investigated +whether it can be used to create such a construct. + + +Mitigation strategy +------------------- + +All variants have the same mitigation strategy at least for the single CPU +thread case (SMT off): Force the CPU to clear the affected buffers. + +This is achieved by using the otherwise unused and obsolete VERW +instruction in combination with a microcode update. The microcode clears +the affected CPU buffers when the VERW instruction is executed. + +For virtualization there are two ways to achieve CPU buffer +clearing. Either the modified VERW instruction or via the L1D Flush +command. The latter is issued when L1TF mitigation is enabled so the extra +VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to +be issued. + +If the VERW instruction with the supplied segment selector argument is +executed on a CPU without the microcode update there is no side effect +other than a small number of pointlessly wasted CPU cycles. + +This does not protect against cross Hyper-Thread attacks except for MSBDS +which is only exploitable cross Hyper-thread when one of the Hyper-Threads +enters a C-state. + +The kernel provides a function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + +The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state +(idle) transitions. + +As a special quirk to address virtualization scenarios where the host has +the microcode updated, but the hypervisor does not (yet) expose the +MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the +hope that it might actually clear the buffers. The state is reflected +accordingly. + +According to current knowledge additional mitigations inside the kernel +itself are not required because the necessary gadgets to expose the leaked +data cannot be controlled in a way which allows exploitation from malicious +user space or VM guests. + +Kernel internal mitigation modes +-------------------------------- + + ======= ============================================================ + off Mitigation is disabled. Either the CPU is not affected or + mds=off is supplied on the kernel command line + + full Mitigation is enabled. CPU is affected and MD_CLEAR is + advertised in CPUID. + + vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not + advertised in CPUID. That is mainly for virtualization + scenarios where the host has the updated microcode but the + hypervisor does not expose MD_CLEAR in CPUID. It's a best + effort approach without guarantee. + ======= ============================================================ + +If the CPU is affected and mds=off is not supplied on the kernel command +line then the kernel selects the appropriate mitigation mode depending on +the availability of the MD_CLEAR CPUID bit. + +Mitigation points +----------------- + +1. Return to user space +^^^^^^^^^^^^^^^^^^^^^^^ + + When transitioning from kernel to user space the CPU buffers are flushed + on affected CPUs when the mitigation is not disabled on the kernel + command line. The migitation is enabled through the static key + mds_user_clear. + + The mitigation is invoked in prepare_exit_to_usermode() which covers + all but one of the kernel to user space transitions. The exception + is when we return from a Non Maskable Interrupt (NMI), which is + handled directly in do_nmi(). + + (The reason that NMI is special is that prepare_exit_to_usermode() can + enable IRQs. In NMI context, NMIs are blocked, and we don't want to + enable IRQs with NMIs blocked.) + + +2. C-State transition +^^^^^^^^^^^^^^^^^^^^^ + + When a CPU goes idle and enters a C-State the CPU buffers need to be + cleared on affected CPUs when SMT is active. This addresses the + repartitioning of the store buffer when one of the Hyper-Threads enters + a C-State. + + When SMT is inactive, i.e. either the CPU does not support it or all + sibling threads are offline CPU buffer clearing is not required. + + The idle clearing is enabled on CPUs which are only affected by MSBDS + and not by any other MDS variant. The other MDS variants cannot be + protected against cross Hyper-Thread attacks because the Fill Buffer and + the Load Ports are shared. So on CPUs affected by other variants, the + idle clearing would be a window dressing exercise and is therefore not + activated. + + The invocation is controlled by the static key mds_idle_clear which is + switched depending on the chosen mitigation mode and the SMT state of + the system. + + The buffer clear is only invoked before entering the C-State to prevent + that stale data from the idling CPU from spilling to the Hyper-Thread + sibling after the store buffer got repartitioned and all entries are + available to the non idle sibling. + + When coming out of idle the store buffer is partitioned again so each + sibling has half of it available. The back from idle CPU could be then + speculatively exposed to contents of the sibling. The buffers are + flushed either on exit to user space or on VMENTER so malicious code + in user space or the guest cannot speculatively access them. + + The mitigation is hooked into all variants of halt()/mwait(), but does + not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver + has been superseded by the intel_idle driver around 2010 and is + preferred on all affected CPUs which are expected to gain the MD_CLEAR + functionality in microcode. Aside of that the IO-Port mechanism is a + legacy interface which is only used on older systems which are either + not affected or do not receive microcode updates anymore. diff --git a/Makefile b/Makefile index 26c92f892d24..07d09be0c5f9 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -pf7 NAME = Shy Crocodile # *DOCUMENTATION* @@ -636,7 +636,7 @@ ifeq ($(may-sync-config),1) # Read in dependencies to all Kconfig* files, make sure to run syncconfig if # changes are detected. This should be included after arch/$(SRCARCH)/Makefile # because some architectures define CROSS_COMPILE there. --include include/config/auto.conf.cmd +include include/config/auto.conf.cmd $(KCONFIG_CONFIG): @echo >&2 '***' @@ -683,8 +683,12 @@ KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os else +ifdef CONFIG_CC_OPTIMIZE_HARDER +KBUILD_CFLAGS += -O3 +else KBUILD_CFLAGS += -O2 endif +endif ifdef CONFIG_CC_DISABLE_WARN_MAYBE_UNINITIALIZED KBUILD_CFLAGS += -Wno-maybe-uninitialized diff --git a/arch/Kconfig b/arch/Kconfig index 33687dddd86a..9092e0ffe4d3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -764,7 +764,7 @@ config COMPAT_OLD_SIGACTION bool config 64BIT_TIME - def_bool ARCH_HAS_64BIT_TIME + def_bool y help This should be selected by all architectures that need to support new system calls with a 64-bit time_t. This is relevant on all 32-bit diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 7425bb0f2d1b..6219b372e9c1 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -187,6 +187,7 @@ interrupt-names = "macirq"; phy-mode = "rgmii"; snps,pbl = <32>; + snps,multicast-filter-bins = <256>; clocks = <&gmacclk>; clock-names = "stmmaceth"; phy-handle = <&phy0>; @@ -195,6 +196,9 @@ mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */ dma-coherent; + tx-fifo-depth = <4096>; + rx-fifo-depth = <4096>; + mdio { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index d819de1c5d10..3ea4112c8302 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -92,8 +92,11 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) #endif /* CONFIG_ARC_HAS_LLSC */ -#define cmpxchg(ptr, o, n) ((typeof(*(ptr)))__cmpxchg((ptr), \ - (unsigned long)(o), (unsigned long)(n))) +#define cmpxchg(ptr, o, n) ({ \ + (typeof(*(ptr)))__cmpxchg((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n)); \ +}) /* * atomic_cmpxchg is same as cmpxchg @@ -198,8 +201,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, return __xchg_bad_pointer(); } -#define xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \ - sizeof(*(ptr)))) +#define xchg(ptr, with) ({ \ + (typeof(*(ptr)))__xchg((unsigned long)(with), \ + (ptr), \ + sizeof(*(ptr))); \ +}) #endif /* CONFIG_ARC_PLAT_EZNPS */ diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 8df1638259f3..6836095251ed 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -66,7 +66,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - int si_code = 0; + int si_code = SEGV_MAPERR; int ret; vm_fault_t fault; int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ @@ -81,16 +81,14 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (address >= VMALLOC_START) { + if (address >= VMALLOC_START && !user_mode(regs)) { ret = handle_kernel_vaddr_fault(address); if (unlikely(ret)) - goto bad_area_nosemaphore; + goto no_context; else return; } - si_code = SEGV_MAPERR; - /* * If we're in an interrupt or have no user * context, we must not take the fault.. @@ -198,7 +196,6 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) bad_area: up_read(&mm->mmap_sem); -bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.fault_address = address; diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 4097764fea23..fa18c00b0cfd 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -911,9 +911,11 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - unsigned int pd0[mmu->ways]; unsigned long flags; - int set; + int set, n_ways = mmu->ways; + + n_ways = min(n_ways, 4); + BUG_ON(mmu->ways > 4); local_irq_save(flags); @@ -921,9 +923,10 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, for (set = 0; set < mmu->sets; set++) { int is_valid, way; + unsigned int pd0[4]; /* read out all the ways of current set */ - for (way = 0, is_valid = 0; way < mmu->ways; way++) { + for (way = 0, is_valid = 0; way < n_ways; way++) { write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); @@ -937,14 +940,14 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, continue; /* Scan the set for duplicate ways: needs a nested loop */ - for (way = 0; way < mmu->ways - 1; way++) { + for (way = 0; way < n_ways - 1; way++) { int n; if (!pd0[way]) continue; - for (n = way + 1; n < mmu->ways; n++) { + for (n = way + 1; n < n_ways; n++) { if (pd0[way] != pd0[n]) continue; diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi index f7bd26458915..42e433da79ec 100644 --- a/arch/arm/boot/dts/am57xx-idk-common.dtsi +++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi @@ -420,6 +420,7 @@ vqmmc-supply = <&ldo1_reg>; bus-width = <4>; cd-gpios = <&gpio6 27 GPIO_ACTIVE_LOW>; /* gpio 219 */ + no-1-8-v; }; &mmc2 { diff --git a/arch/arm/boot/dts/dra76x-mmc-iodelay.dtsi b/arch/arm/boot/dts/dra76x-mmc-iodelay.dtsi index baba7b00eca7..fdca48186916 100644 --- a/arch/arm/boot/dts/dra76x-mmc-iodelay.dtsi +++ b/arch/arm/boot/dts/dra76x-mmc-iodelay.dtsi @@ -22,7 +22,7 @@ * * Datamanual Revisions: * - * DRA76x Silicon Revision 1.0: SPRS993A, Revised July 2017 + * DRA76x Silicon Revision 1.0: SPRS993E, Revised December 2018 * */ @@ -169,25 +169,25 @@ /* Corresponds to MMC2_HS200_MANUAL1 in datamanual */ mmc2_iodelay_hs200_conf: mmc2_iodelay_hs200_conf { pinctrl-pin-array = < - 0x190 A_DELAY_PS(384) G_DELAY_PS(0) /* CFG_GPMC_A19_OEN */ - 0x194 A_DELAY_PS(0) G_DELAY_PS(174) /* CFG_GPMC_A19_OUT */ - 0x1a8 A_DELAY_PS(410) G_DELAY_PS(0) /* CFG_GPMC_A20_OEN */ - 0x1ac A_DELAY_PS(85) G_DELAY_PS(0) /* CFG_GPMC_A20_OUT */ - 0x1b4 A_DELAY_PS(468) G_DELAY_PS(0) /* CFG_GPMC_A21_OEN */ - 0x1b8 A_DELAY_PS(139) G_DELAY_PS(0) /* CFG_GPMC_A21_OUT */ - 0x1c0 A_DELAY_PS(676) G_DELAY_PS(0) /* CFG_GPMC_A22_OEN */ - 0x1c4 A_DELAY_PS(69) G_DELAY_PS(0) /* CFG_GPMC_A22_OUT */ - 0x1d0 A_DELAY_PS(1062) G_DELAY_PS(154) /* CFG_GPMC_A23_OUT */ - 0x1d8 A_DELAY_PS(640) G_DELAY_PS(0) /* CFG_GPMC_A24_OEN */ - 0x1dc A_DELAY_PS(0) G_DELAY_PS(0) /* CFG_GPMC_A24_OUT */ - 0x1e4 A_DELAY_PS(356) G_DELAY_PS(0) /* CFG_GPMC_A25_OEN */ - 0x1e8 A_DELAY_PS(0) G_DELAY_PS(0) /* CFG_GPMC_A25_OUT */ - 0x1f0 A_DELAY_PS(579) G_DELAY_PS(0) /* CFG_GPMC_A26_OEN */ - 0x1f4 A_DELAY_PS(0) G_DELAY_PS(0) /* CFG_GPMC_A26_OUT */ - 0x1fc A_DELAY_PS(435) G_DELAY_PS(0) /* CFG_GPMC_A27_OEN */ - 0x200 A_DELAY_PS(36) G_DELAY_PS(0) /* CFG_GPMC_A27_OUT */ - 0x364 A_DELAY_PS(759) G_DELAY_PS(0) /* CFG_GPMC_CS1_OEN */ - 0x368 A_DELAY_PS(72) G_DELAY_PS(0) /* CFG_GPMC_CS1_OUT */ + 0x190 A_DELAY_PS(384) G_DELAY_PS(0) /* CFG_GPMC_A19_OEN */ + 0x194 A_DELAY_PS(350) G_DELAY_PS(174) /* CFG_GPMC_A19_OUT */ + 0x1a8 A_DELAY_PS(410) G_DELAY_PS(0) /* CFG_GPMC_A20_OEN */ + 0x1ac A_DELAY_PS(335) G_DELAY_PS(0) /* CFG_GPMC_A20_OUT */ + 0x1b4 A_DELAY_PS(468) G_DELAY_PS(0) /* CFG_GPMC_A21_OEN */ + 0x1b8 A_DELAY_PS(339) G_DELAY_PS(0) /* CFG_GPMC_A21_OUT */ + 0x1c0 A_DELAY_PS(676) G_DELAY_PS(0) /* CFG_GPMC_A22_OEN */ + 0x1c4 A_DELAY_PS(219) G_DELAY_PS(0) /* CFG_GPMC_A22_OUT */ + 0x1d0 A_DELAY_PS(1062) G_DELAY_PS(154) /* CFG_GPMC_A23_OUT */ + 0x1d8 A_DELAY_PS(640) G_DELAY_PS(0) /* CFG_GPMC_A24_OEN */ + 0x1dc A_DELAY_PS(150) G_DELAY_PS(0) /* CFG_GPMC_A24_OUT */ + 0x1e4 A_DELAY_PS(356) G_DELAY_PS(0) /* CFG_GPMC_A25_OEN */ + 0x1e8 A_DELAY_PS(150) G_DELAY_PS(0) /* CFG_GPMC_A25_OUT */ + 0x1f0 A_DELAY_PS(579) G_DELAY_PS(0) /* CFG_GPMC_A26_OEN */ + 0x1f4 A_DELAY_PS(200) G_DELAY_PS(0) /* CFG_GPMC_A26_OUT */ + 0x1fc A_DELAY_PS(435) G_DELAY_PS(0) /* CFG_GPMC_A27_OEN */ + 0x200 A_DELAY_PS(236) G_DELAY_PS(0) /* CFG_GPMC_A27_OUT */ + 0x364 A_DELAY_PS(759) G_DELAY_PS(0) /* CFG_GPMC_CS1_OEN */ + 0x368 A_DELAY_PS(372) G_DELAY_PS(0) /* CFG_GPMC_CS1_OUT */ >; }; diff --git a/arch/arm/boot/dts/exynos5260.dtsi b/arch/arm/boot/dts/exynos5260.dtsi index 55167850619c..33a085ffc447 100644 --- a/arch/arm/boot/dts/exynos5260.dtsi +++ b/arch/arm/boot/dts/exynos5260.dtsi @@ -223,7 +223,7 @@ wakeup-interrupt-controller { compatible = "samsung,exynos4210-wakeup-eint"; interrupt-parent = <&gic>; - interrupts = ; + interrupts = ; }; }; diff --git a/arch/arm/boot/dts/exynos5420-arndale-octa.dts b/arch/arm/boot/dts/exynos5420-arndale-octa.dts index 3447160e1fbf..a0e27e1c0feb 100644 --- a/arch/arm/boot/dts/exynos5420-arndale-octa.dts +++ b/arch/arm/boot/dts/exynos5420-arndale-octa.dts @@ -107,6 +107,7 @@ regulator-name = "PVDD_APIO_1V8"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; }; ldo3_reg: LDO3 { @@ -145,6 +146,7 @@ regulator-name = "PVDD_ABB_1V8"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; }; ldo9_reg: LDO9 { diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi b/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi index 51a843bd65ed..c3c2d85267da 100644 --- a/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi +++ b/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi @@ -22,11 +22,12 @@ "Headphone Jack", "HPL", "Headphone Jack", "HPR", "Headphone Jack", "MICBIAS", - "IN1", "Headphone Jack", + "IN12", "Headphone Jack", "Speakers", "SPKL", "Speakers", "SPKR", "I2S Playback", "Mixer DAI TX", - "HiFi Playback", "Mixer DAI TX"; + "HiFi Playback", "Mixer DAI TX", + "Mixer DAI RX", "HiFi Capture"; assigned-clocks = <&clock CLK_MOUT_EPLL>, <&clock CLK_MOUT_MAU_EPLL>, diff --git a/arch/arm/boot/dts/imx50.dtsi b/arch/arm/boot/dts/imx50.dtsi index ee1e3e8bf4ec..4a68e30cc668 100644 --- a/arch/arm/boot/dts/imx50.dtsi +++ b/arch/arm/boot/dts/imx50.dtsi @@ -411,7 +411,7 @@ reg = <0x63fb0000 0x4000>; interrupts = <6>; clocks = <&clks IMX5_CLK_SDMA_GATE>, - <&clks IMX5_CLK_SDMA_GATE>; + <&clks IMX5_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx50.bin"; diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi index a5ee25cedc10..0a4b9a5d9a9c 100644 --- a/arch/arm/boot/dts/imx51.dtsi +++ b/arch/arm/boot/dts/imx51.dtsi @@ -489,7 +489,7 @@ reg = <0x83fb0000 0x4000>; interrupts = <6>; clocks = <&clks IMX5_CLK_SDMA_GATE>, - <&clks IMX5_CLK_SDMA_GATE>; + <&clks IMX5_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx51.bin"; diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index b3300300aabe..9b672ed2486d 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -702,7 +702,7 @@ reg = <0x63fb0000 0x4000>; interrupts = <6>; clocks = <&clks IMX5_CLK_SDMA_GATE>, - <&clks IMX5_CLK_SDMA_GATE>; + <&clks IMX5_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx53.bin"; diff --git a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi index fb01fa6e4224..c40a7af6ebee 100644 --- a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi +++ b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi @@ -88,6 +88,7 @@ regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; gpio = <&gpio7 12 GPIO_ACTIVE_HIGH>; + startup-delay-us = <70000>; enable-active-high; }; @@ -99,6 +100,7 @@ regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; gpio = <&gpio1 26 GPIO_ACTIVE_HIGH>; + startup-delay-us = <70000>; enable-active-high; regulator-always-on; }; @@ -216,7 +218,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-duration = <10>; phy-reset-gpios = <&gpio1 24 GPIO_ACTIVE_LOW>; phy-supply = <®_enet>; diff --git a/arch/arm/boot/dts/imx6dl-riotboard.dts b/arch/arm/boot/dts/imx6dl-riotboard.dts index 65c184bb8fb0..d9de49efa802 100644 --- a/arch/arm/boot/dts/imx6dl-riotboard.dts +++ b/arch/arm/boot/dts/imx6dl-riotboard.dts @@ -92,7 +92,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio3 31 GPIO_ACTIVE_LOW>; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/imx6q-ba16.dtsi b/arch/arm/boot/dts/imx6q-ba16.dtsi index adc9455e42c7..37c63402157b 100644 --- a/arch/arm/boot/dts/imx6q-ba16.dtsi +++ b/arch/arm/boot/dts/imx6q-ba16.dtsi @@ -171,7 +171,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-marsboard.dts b/arch/arm/boot/dts/imx6q-marsboard.dts index d8ccb533b6b7..84b30bd6908f 100644 --- a/arch/arm/boot/dts/imx6q-marsboard.dts +++ b/arch/arm/boot/dts/imx6q-marsboard.dts @@ -110,7 +110,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio3 31 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-tbs2910.dts b/arch/arm/boot/dts/imx6q-tbs2910.dts index 2ce8399a10ba..bfff87ce2e1f 100644 --- a/arch/arm/boot/dts/imx6q-tbs2910.dts +++ b/arch/arm/boot/dts/imx6q-tbs2910.dts @@ -98,7 +98,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-apf6.dtsi b/arch/arm/boot/dts/imx6qdl-apf6.dtsi index 1ebf29f43a24..4738c3c1ab50 100644 --- a/arch/arm/boot/dts/imx6qdl-apf6.dtsi +++ b/arch/arm/boot/dts/imx6qdl-apf6.dtsi @@ -51,7 +51,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-duration = <10>; phy-reset-gpios = <&gpio1 24 GPIO_ACTIVE_LOW>; status = "okay"; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index 1280de50a984..f3404dd10537 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -292,7 +292,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi index a0705066ccba..185fb17a3500 100644 --- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi @@ -202,7 +202,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi index 4ccb7afc4b35..6d7f6b9035bc 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi @@ -53,7 +53,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_microsom_enet_ar8035>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-duration = <2>; phy-reset-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>; status = "okay"; diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi index b7d5fb421404..50d9a989e06a 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi @@ -224,7 +224,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio3 29 GPIO_ACTIVE_LOW>; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index fe17a3405edc..2df39c308c83 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -918,7 +918,7 @@ compatible = "fsl,imx6q-sdma", "fsl,imx35-sdma"; reg = <0x020ec000 0x4000>; interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX6QDL_CLK_SDMA>, + clocks = <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_SDMA>; clock-names = "ipg", "ahb"; #dma-cells = <3>; diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index 4b4813f176cd..1f2a4ed99ed3 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -741,7 +741,7 @@ reg = <0x020ec000 0x4000>; interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SL_CLK_SDMA>, - <&clks IMX6SL_CLK_SDMA>; + <&clks IMX6SL_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; /* imx6sl reuses imx6q sdma firmware */ diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi index 62847c68330b..ed598d72038c 100644 --- a/arch/arm/boot/dts/imx6sll.dtsi +++ b/arch/arm/boot/dts/imx6sll.dtsi @@ -621,7 +621,7 @@ compatible = "fsl,imx6sll-sdma", "fsl,imx35-sdma"; reg = <0x020ec000 0x4000>; interrupts = ; - clocks = <&clks IMX6SLL_CLK_SDMA>, + clocks = <&clks IMX6SLL_CLK_IPG>, <&clks IMX6SLL_CLK_SDMA>; clock-names = "ipg", "ahb"; #dma-cells = <3>; diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts index b0ee324afe58..315044ccd65f 100644 --- a/arch/arm/boot/dts/imx6sx-sabreauto.dts +++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts @@ -75,7 +75,7 @@ &fec1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <ðphy1>; fsl,magic-packet; status = "okay"; diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi index 08ede56c3f10..f6972deb5e39 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dtsi +++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi @@ -191,7 +191,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet1>; phy-supply = <®_enet_3v3>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <ðphy1>; phy-reset-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; status = "okay"; diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 5b16e65f7696..fc5a8fc74091 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -820,7 +820,7 @@ compatible = "fsl,imx6sx-sdma", "fsl,imx6q-sdma"; reg = <0x020ec000 0x4000>; interrupts = ; - clocks = <&clks IMX6SX_CLK_SDMA>, + clocks = <&clks IMX6SX_CLK_IPG>, <&clks IMX6SX_CLK_SDMA>; clock-names = "ipg", "ahb"; #dma-cells = <3>; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index 62ed30c781ed..facd65602c2d 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -708,7 +708,7 @@ "fsl,imx35-sdma"; reg = <0x020ec000 0x4000>; interrupts = ; - clocks = <&clks IMX6UL_CLK_SDMA>, + clocks = <&clks IMX6UL_CLK_IPG>, <&clks IMX6UL_CLK_SDMA>; clock-names = "ipg", "ahb"; #dma-cells = <3>; diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi index 3fd595a71202..6f50ebf31a0a 100644 --- a/arch/arm/boot/dts/imx7d-pico.dtsi +++ b/arch/arm/boot/dts/imx7d-pico.dtsi @@ -92,7 +92,7 @@ <&clks IMX7D_ENET1_TIME_ROOT_CLK>; assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>; assigned-clock-rates = <0>, <100000000>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <ðphy0>; fsl,magic-packet; phy-reset-gpios = <&gpio6 11 GPIO_ACTIVE_LOW>; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index e88f53a4c7f4..2f45ef527e6c 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1067,8 +1067,8 @@ compatible = "fsl,imx7d-sdma", "fsl,imx35-sdma"; reg = <0x30bd0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_SDMA_CORE_CLK>, - <&clks IMX7D_AHB_CHANNEL_ROOT_CLK>; + clocks = <&clks IMX7D_IPG_ROOT_CLK>, + <&clks IMX7D_SDMA_CORE_CLK>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin"; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index 9e75f97770ce..1008dfbcb972 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -400,8 +400,8 @@ #address-cells = <3>; #size-cells = <2>; - ranges = <0x81000000 0 0x40200000 0x40200000 0 0x00100000 - 0x82000000 0 0x40300000 0x40300000 0 0x400000>; + ranges = <0x81000000 0 0x40200000 0x40200000 0 0x00100000>, + <0x82000000 0 0x40300000 0x40300000 0 0x00d00000>; interrupts = ; interrupt-names = "msi"; diff --git a/arch/arm/configs/mvebu_v7_defconfig b/arch/arm/configs/mvebu_v7_defconfig index 55140219ab11..001460ee519e 100644 --- a/arch/arm/configs/mvebu_v7_defconfig +++ b/arch/arm/configs/mvebu_v7_defconfig @@ -131,6 +131,7 @@ CONFIG_MV_XOR=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_MEMORY=y CONFIG_PWM=y +CONFIG_PHY_MVEBU_A38X_COMPHY=y CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index 07e31941dc67..617c2c99ebfb 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -278,6 +278,8 @@ static int __xts_crypt(struct skcipher_request *req, int err; err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv); diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index 07e27f212dc7..d2453e2d3f1f 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -68,6 +68,8 @@ #define BPIALL __ACCESS_CP15(c7, 0, c5, 6) #define ICIALLU __ACCESS_CP15(c7, 0, c5, 0) +#define CNTVCT __ACCESS_CP15_64(1, c14) + extern unsigned long cr_alignment; /* defined in entry-armv.S */ static inline unsigned long get_cr(void) diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index cba23eaa6072..7a88f160b1fb 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -6,6 +6,7 @@ #include #include +/* number of IPIS _not_ including IPI_CPU_BACKTRACE */ #define NR_IPI 7 typedef struct { diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index facd4240ca02..c93fe0f256de 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -70,6 +70,10 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_IRQ_WORK, IPI_COMPLETION, + /* + * CPU_BACKTRACE is special and not included in NR_IPI + * or tracable with trace_ipi_* + */ IPI_CPU_BACKTRACE, /* * SGI8-15 can be reserved by secure firmware, and thus may @@ -797,7 +801,7 @@ core_initcall(register_cpufreq_notifier); static void raise_nmi(cpumask_t *mask) { - smp_cross_call(mask, IPI_CPU_BACKTRACE); + __smp_cross_call(mask, IPI_CPU_BACKTRACE); } void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index d2b5ec9c4b92..ba88b1eca93c 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -11,6 +11,7 @@ CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o diff --git a/arch/arm/mach-exynos/firmware.c b/arch/arm/mach-exynos/firmware.c index d602e3bf3f96..2eaf2dbb8e81 100644 --- a/arch/arm/mach-exynos/firmware.c +++ b/arch/arm/mach-exynos/firmware.c @@ -196,6 +196,7 @@ bool __init exynos_secure_firmware_available(void) return false; addr = of_get_address(nd, 0, NULL, NULL); + of_node_put(nd); if (!addr) { pr_err("%s: No address specified.\n", __func__); return false; diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 0850505ac78b..5bc2d76deccb 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -444,8 +444,27 @@ static void exynos3250_pm_resume(void) static void exynos5420_prepare_pm_resume(void) { + unsigned int mpidr, cluster; + + mpidr = read_cpuid_mpidr(); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + if (IS_ENABLED(CONFIG_EXYNOS5420_MCPM)) WARN_ON(mcpm_cpu_powered_up()); + + if (IS_ENABLED(CONFIG_HW_PERF_EVENTS) && cluster != 0) { + /* + * When system is resumed on the LITTLE/KFC core (cluster 1), + * the DSCR is not properly updated until the power is turned + * on also for the cluster 0. Enable it for a while to + * propagate the SPNIDEN and SPIDEN signals from Secure JTAG + * block and avoid undefined instruction issue on CP14 reset. + */ + pmu_raw_writel(S5P_CORE_LOCAL_PWR_EN, + EXYNOS_COMMON_CONFIGURATION(0)); + pmu_raw_writel(0, + EXYNOS_COMMON_CONFIGURATION(0)); + } } static void exynos5420_pm_resume(void) @@ -639,8 +658,10 @@ void __init exynos_pm_init(void) if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) { pr_warn("Outdated DT detected, suspend/resume will NOT work\n"); + of_node_put(np); return; } + of_node_put(np); pm_data = (const struct exynos_pm_data *) match->data; diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c index fd0053e47a15..3708a71f30e6 100644 --- a/arch/arm/mach-imx/cpuidle-imx6sx.c +++ b/arch/arm/mach-imx/cpuidle-imx6sx.c @@ -15,6 +15,7 @@ #include "common.h" #include "cpuidle.h" +#include "hardware.h" static int imx6sx_idle_finish(unsigned long val) { @@ -110,7 +111,7 @@ int __init imx6sx_cpuidle_init(void) * except for power up sw2iso which need to be * larger than LDO ramp up time. */ - imx_gpc_set_arm_power_up_timing(0xf, 1); + imx_gpc_set_arm_power_up_timing(cpu_is_imx6sx() ? 0xf : 0x2, 1); imx_gpc_set_arm_power_down_timing(1, 1); return cpuidle_register(&imx6sx_cpuidle_driver, NULL); diff --git a/arch/arm/mach-omap2/pm33xx-core.c b/arch/arm/mach-omap2/pm33xx-core.c index 724cf5774a6c..c93b6efd565f 100644 --- a/arch/arm/mach-omap2/pm33xx-core.c +++ b/arch/arm/mach-omap2/pm33xx-core.c @@ -51,10 +51,12 @@ static int amx3_common_init(void) /* CEFUSE domain can be turned off post bootup */ cefuse_pwrdm = pwrdm_lookup("cefuse_pwrdm"); - if (cefuse_pwrdm) - omap_set_pwrdm_state(cefuse_pwrdm, PWRDM_POWER_OFF); - else + if (!cefuse_pwrdm) pr_err("PM: Failed to get cefuse_pwrdm\n"); + else if (omap_type() != OMAP2_DEVICE_TYPE_GP) + pr_info("PM: Leaving EFUSE power domain active\n"); + else + omap_set_pwrdm_state(cefuse_pwrdm, PWRDM_POWER_OFF); return 0; } diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index dc526ef2e9b3..ee949255ced3 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * R-Car Generation 2 da9063/da9210 regulator quirk + * R-Car Generation 2 da9063(L)/da9210 regulator quirk * * Certain Gen2 development boards have an da9063 and one or more da9210 * regulators. All of these regulators have their interrupt request lines @@ -65,6 +65,7 @@ static struct i2c_msg da9210_msg = { static const struct of_device_id rcar_gen2_quirk_match[] = { { .compatible = "dlg,da9063", .data = &da9063_msg }, + { .compatible = "dlg,da9063l", .data = &da9063_msg }, { .compatible = "dlg,da9210", .data = &da9210_msg }, {}, }; @@ -147,6 +148,7 @@ static int __init rcar_gen2_regulator_quirk(void) if (!of_machine_is_compatible("renesas,koelsch") && !of_machine_is_compatible("renesas,lager") && + !of_machine_is_compatible("renesas,porter") && !of_machine_is_compatible("renesas,stout") && !of_machine_is_compatible("renesas,gose")) return -ENODEV; @@ -210,7 +212,7 @@ static int __init rcar_gen2_regulator_quirk(void) goto err_free; } - pr_info("IRQ2 is asserted, installing da9063/da9210 regulator quirk\n"); + pr_info("IRQ2 is asserted, installing regulator quirk\n"); bus_register_notifier(&i2c_bus_type, ®ulator_quirk_nb); return 0; diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c index a9dd619c6c29..7bdbf5d5c47d 100644 --- a/arch/arm/vdso/vgettimeofday.c +++ b/arch/arm/vdso/vgettimeofday.c @@ -18,9 +18,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -123,7 +123,8 @@ static notrace u64 get_ns(struct vdso_data *vdata) u64 cycle_now; u64 nsec; - cycle_now = arch_counter_get_cntvct(); + isb(); + cycle_now = read_sysreg(CNTVCT); cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7e34b9eba5de..d218729ec852 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -517,6 +517,24 @@ config ARM64_ERRATUM_1286807 If unsure, say Y. +config ARM64_ERRATUM_1463225 + bool "Cortex-A76: Software Step might prevent interrupt recognition" + default y + help + This option adds a workaround for Arm Cortex-A76 erratum 1463225. + + On the affected Cortex-A76 cores (r0p0 to r3p1), software stepping + of a system call instruction (SVC) can prevent recognition of + subsequent interrupts when software stepping is disabled in the + exception handler of the system call and either kernel debugging + is enabled or VHE is in use. + + Work around the erratum by triggering a dummy step exception + when handling a system call from a task that is being stepped + in a VHE configuration of the kernel. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -1347,6 +1365,7 @@ config ARM64_MODULE_PLTS config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" + depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com select CONFIG_ARM_GIC_V3 help Adds support for mimicking Non-Maskable Interrupts through the use of diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b025304bde46..8fbd583b18e1 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -51,6 +51,7 @@ endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS += -Wno-psabi KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 9155bd4784eb..aa051af23bd0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -240,7 +240,7 @@ }; iomuxc_gpr: syscon@30340000 { - compatible = "fsl,imx8mq-iomuxc-gpr", "syscon"; + compatible = "fsl,imx8mq-iomuxc-gpr", "fsl,imx6q-iomuxc-gpr", "syscon"; reg = <0x30340000 0x10000>; }; diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi index 50b3589c7f15..536f735243d2 100644 --- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi @@ -37,18 +37,18 @@ pms405-regulators { compatible = "qcom,rpm-pms405-regulators"; - vdd-s1-supply = <&vph_pwr>; - vdd-s2-supply = <&vph_pwr>; - vdd-s3-supply = <&vph_pwr>; - vdd-s4-supply = <&vph_pwr>; - vdd-s5-supply = <&vph_pwr>; - vdd-l1-l2-supply = <&vreg_s5_1p35>; - vdd-l3-l8-supply = <&vreg_s5_1p35>; - vdd-l4-supply = <&vreg_s5_1p35>; - vdd-l5-l6-supply = <&vreg_s4_1p8>; - vdd-l7-supply = <&vph_pwr>; - vdd-l9-supply = <&vreg_s5_1p35>; - vdd-l10-l11-l12-l13-supply = <&vph_pwr>; + vdd_s1-supply = <&vph_pwr>; + vdd_s2-supply = <&vph_pwr>; + vdd_s3-supply = <&vph_pwr>; + vdd_s4-supply = <&vph_pwr>; + vdd_s5-supply = <&vph_pwr>; + vdd_l1_l2-supply = <&vreg_s5_1p35>; + vdd_l3_l8-supply = <&vreg_s5_1p35>; + vdd_l4-supply = <&vreg_s5_1p35>; + vdd_l5_l6-supply = <&vreg_s4_1p8>; + vdd_l7-supply = <&vph_pwr>; + vdd_l9-supply = <&vreg_s5_1p35>; + vdd_l10_l11_l12_l13-supply = <&vph_pwr>; vreg_s4_1p8: s4 { regulator-min-microvolt = <1728000>; @@ -56,8 +56,8 @@ }; vreg_s5_1p35: s5 { - regulator-min-microvolt = <>; - regulator-max-microvolt = <>; + regulator-min-microvolt = <1352000>; + regulator-max-microvolt = <1352000>; }; vreg_l1_1p3: l1 { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts index 1f2394e0587d..d473ce290f0c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts @@ -504,7 +504,7 @@ status = "okay"; bt656-supply = <&vcc1v8_dvp>; - audio-supply = <&vcca1v8_codec>; + audio-supply = <&vcc_3v0>; sdmmc-supply = <&vcc_sdio>; gpio1830-supply = <&vcc_3v0>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index db9d948c0b03..1a16d6ce3ea8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -333,6 +333,7 @@ phys = <&emmc_phy>; phy-names = "phy_arasan"; power-domains = <&power RK3399_PD_EMMC>; + disable-cqe-dcmd; status = "disabled"; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 2d9c39033c1a..32fb03503b0b 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -222,10 +222,10 @@ CONFIG_BLK_DEV_SD=y CONFIG_SCSI_SAS_ATA=y CONFIG_SCSI_HISI_SAS=y CONFIG_SCSI_HISI_SAS_PCI=y -CONFIG_SCSI_UFSHCD=m -CONFIG_SCSI_UFSHCD_PLATFORM=m +CONFIG_SCSI_UFSHCD=y +CONFIG_SCSI_UFSHCD_PLATFORM=y CONFIG_SCSI_UFS_QCOM=m -CONFIG_SCSI_UFS_HISI=m +CONFIG_SCSI_UFS_HISI=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index e7a95a566462..5cc248967387 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -304,6 +304,8 @@ static int __xts_crypt(struct skcipher_request *req, int err; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; kernel_neon_begin(); neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 791ad422c427..089b09286da7 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -473,9 +473,11 @@ static int gcm_encrypt(struct aead_request *req) put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + const int blocks = + walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; + int remaining = blocks; do { __aes_arm64_encrypt(ctx->aes_key.key_enc, @@ -485,9 +487,9 @@ static int gcm_encrypt(struct aead_request *req) dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; - } while (--blocks > 0); + } while (--remaining > 0); - ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg, + ghash_do_update(blocks, dg, walk.dst.virt.addr, &ctx->ghash_key, NULL, pmull_ghash_update_p64); @@ -609,7 +611,7 @@ static int gcm_decrypt(struct aead_request *req) put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index f2a234d6516c..93e07512b4b6 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -148,18 +148,47 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } +/* + * Ensure that reads of the counter are treated the same as memory reads + * for the purposes of ordering by subsequent memory barriers. + * + * This insanity brought to you by speculative system register reads, + * out-of-order memory accesses, sequence locks and Thomas Gleixner. + * + * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + */ +#define arch_counter_enforce_ordering(val) do { \ + u64 tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + static inline u64 arch_counter_get_cntpct(void) { + u64 cnt; + isb(); - return arch_timer_reg_read_stable(cntpct_el0); + cnt = arch_timer_reg_read_stable(cntpct_el0); + arch_counter_enforce_ordering(cnt); + return cnt; } static inline u64 arch_counter_get_cntvct(void) { + u64 cnt; + isb(); - return arch_timer_reg_read_stable(cntvct_el0); + cnt = arch_timer_reg_read_stable(cntvct_el0); + arch_counter_enforce_ordering(cnt); + return cnt; } +#undef arch_counter_enforce_ordering + static inline int arch_timer_arch_init(void) { return 0; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index f6a76e43f39e..4389d5d0ca0f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -61,7 +61,8 @@ #define ARM64_HAS_GENERIC_AUTH_ARCH 40 #define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41 #define ARM64_HAS_IRQ_PRIO_MASKING 42 +#define ARM64_WORKAROUND_1463225 43 -#define ARM64_NCAPS 43 +#define ARM64_NCAPS 44 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index c7e1a7837706..2d78ea6932b7 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -23,26 +23,34 @@ #include +#define FUTEX_MAX_LOOPS 128 /* What's the largest number you can think of? */ + #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ do { \ + unsigned int loops = FUTEX_MAX_LOOPS; \ + \ uaccess_enable(); \ asm volatile( \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w0, %w3, %2\n" \ -" cbnz %w0, 1b\n" \ -" dmb ish\n" \ +" cbz %w0, 3f\n" \ +" sub %w4, %w4, %w0\n" \ +" cbnz %w4, 1b\n" \ +" mov %w0, %w7\n" \ "3:\n" \ +" dmb ish\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ -"4: mov %w0, %w5\n" \ +"4: mov %w0, %w6\n" \ " b 3b\n" \ " .popsection\n" \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ - : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ - : "r" (oparg), "Ir" (-EFAULT) \ + : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \ + "+r" (loops) \ + : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \ : "memory"); \ uaccess_disable(); \ } while (0) @@ -50,30 +58,30 @@ do { \ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) { - int oldval = 0, ret, tmp; + int oldval, ret, tmp; u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); pagefault_disable(); switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %w3, %w4", + __futex_atomic_op("mov %w3, %w5", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %w3, %w1, %w4", + __futex_atomic_op("add %w3, %w1, %w5", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %w3, %w1, %w4", + __futex_atomic_op("orr %w3, %w1, %w5", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %w3, %w1, %w4", + __futex_atomic_op("and %w3, %w1, %w5", ret, oldval, uaddr, tmp, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %w3, %w1, %w4", + __futex_atomic_op("eor %w3, %w1, %w5", ret, oldval, uaddr, tmp, oparg); break; default: @@ -93,6 +101,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, u32 oldval, u32 newval) { int ret = 0; + unsigned int loops = FUTEX_MAX_LOOPS; u32 val, tmp; u32 __user *uaddr; @@ -104,20 +113,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, asm volatile("// futex_atomic_cmpxchg_inatomic\n" " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" -" sub %w3, %w1, %w4\n" -" cbnz %w3, 3f\n" -"2: stlxr %w3, %w5, %2\n" -" cbnz %w3, 1b\n" -" dmb ish\n" +" sub %w3, %w1, %w5\n" +" cbnz %w3, 4f\n" +"2: stlxr %w3, %w6, %2\n" +" cbz %w3, 3f\n" +" sub %w4, %w4, %w3\n" +" cbnz %w4, 1b\n" +" mov %w0, %w8\n" "3:\n" +" dmb ish\n" +"4:\n" " .pushsection .fixup,\"ax\"\n" -"4: mov %w0, %w6\n" -" b 3b\n" +"5: mov %w0, %w7\n" +" b 4b\n" " .popsection\n" - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 4b) - : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) - : "r" (oldval), "r" (newval), "Ir" (-EFAULT) + _ASM_EXTABLE(1b, 5b) + _ASM_EXTABLE(2b, 5b) + : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops) + : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN) : "memory"); uaccess_disable(); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index de70c1eabf33..74ebe9693714 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -478,6 +478,8 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) return __pmd_to_phys(pmd); } +static inline void pte_unmap(pte_t *pte) { } + /* Find an entry in the third-level page table. */ #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) @@ -486,7 +488,6 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5d9ce62bdebd..228af56ece48 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -57,7 +57,15 @@ #define TASK_SIZE_64 (UL(1) << vabits_user) #ifdef CONFIG_COMPAT +#ifdef CONFIG_ARM64_64K_PAGES +/* + * With CONFIG_ARM64_64K_PAGES enabled, the last page is occupied + * by the compat vectors page. + */ #define TASK_SIZE_32 UL(0x100000000) +#else +#define TASK_SIZE_32 (UL(0x100000000) - PAGE_SIZE) +#endif /* CONFIG_ARM64_64K_PAGES */ #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) #define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index a179df3674a1..6206ab9bfcfc 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -20,7 +20,7 @@ #include #include -typedef long (*syscall_fn_t)(struct pt_regs *regs); +typedef long (*syscall_fn_t)(const struct pt_regs *regs); extern const syscall_fn_t sys_call_table[]; diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index a4477e515b79..507d0ee6bc69 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -30,10 +30,10 @@ } \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) -#define COMPAT_SYSCALL_DEFINE0(sname) \ - asmlinkage long __arm64_compat_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__arm64_compat_sys_##sname, ERRNO); \ - asmlinkage long __arm64_compat_sys_##sname(void) +#define COMPAT_SYSCALL_DEFINE0(sname) \ + asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused); \ + ALLOW_ERROR_INJECTION(__arm64_compat_sys_##sname, ERRNO); \ + asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL_COMPAT(name) \ cond_syscall(__arm64_compat_sys_##name); @@ -62,11 +62,11 @@ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #ifndef SYSCALL_DEFINE0 -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __arm64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \ - asmlinkage long __arm64_sys_##sname(void) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused); \ + ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \ + asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused) #endif #ifndef COND_SYSCALL diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h index 2b9a63771eda..f89263c8e11a 100644 --- a/arch/arm64/include/asm/vdso_datapage.h +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -38,6 +38,7 @@ struct vdso_data { __u32 tz_minuteswest; /* Whacky timezone stuff */ __u32 tz_dsttime; __u32 use_syscall; + __u32 hrtimer_res; }; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index d78623acb649..438759e7e8a7 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -65,8 +65,6 @@ #ifndef __ASSEMBLY__ -#include - /* * User structures for general purpose, floating point and debug registers. */ @@ -113,10 +111,10 @@ struct user_sve_header { /* * Common SVE_PT_* flags: - * These must be kept in sync with prctl interface in + * These must be kept in sync with prctl interface in */ -#define SVE_PT_VL_INHERIT (PR_SVE_VL_INHERIT >> 16) -#define SVE_PT_VL_ONEXEC (PR_SVE_SET_VL_ONEXEC >> 16) +#define SVE_PT_VL_INHERIT ((1 << 17) /* PR_SVE_VL_INHERIT */ >> 16) +#define SVE_PT_VL_ONEXEC ((1 << 18) /* PR_SVE_SET_VL_ONEXEC */ >> 16) /* diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 7f40dcbdd51d..e10e2a5d9ddc 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -94,7 +94,7 @@ int main(void) DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); - DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res)); DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9950bb0cbd52..87019cd73f22 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -464,6 +464,22 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, } #endif /* CONFIG_ARM64_SSBD */ +#ifdef CONFIG_ARM64_ERRATUM_1463225 +DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); + +static bool +has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry, + int scope) +{ + u32 midr = read_cpuid_id(); + /* Cortex-A76 r0p0 - r3p1 */ + struct midr_range range = MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1); + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range(midr, &range) && is_kernel_in_hyp_mode(); +} +#endif + static void __maybe_unused cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) { @@ -738,6 +754,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_1165522, ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1463225 + { + .desc = "ARM erratum 1463225", + .capability = ARM64_WORKAROUND_1463225, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_cortex_a76_erratum_1463225, + }, #endif { } diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index ea001241bdd4..00f8b8612b69 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -85,6 +85,7 @@ static const char *__init cpu_read_enable_method(int cpu) pr_err("%pOF: missing enable-method property\n", dn); } + of_node_put(dn); } else { enable_method = acpi_get_enable_method(cpu); if (!enable_method) { diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index d7bb6aefae0a..0fa6db521e44 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -135,6 +135,7 @@ NOKPROBE_SYMBOL(disable_debug_monitors); */ static int clear_os_lock(unsigned int cpu) { + write_sysreg(0, osdlr_el1); write_sysreg(0, oslar_el1); isb(); return 0; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index b09b6f75f759..06941c1fe418 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -145,15 +145,15 @@ u64 __init kaslr_early_init(u64 dt_phys) if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { /* - * Randomize the module region over a 4 GB window covering the + * Randomize the module region over a 2 GB window covering the * kernel. This reduces the risk of modules leaking information * about the address of the kernel itself, but results in * branches between modules and the core kernel that are * resolved via PLTs. (Branches between modules will be * resolved normally.) */ - module_range = SZ_4G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end + offset - SZ_4G, + module_range = SZ_2G - (u64)(_end - _stext); + module_alloc_base = max((u64)_end + offset - SZ_2G, (u64)MODULES_VADDR); } else { /* diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f713e2fc4d75..1e418e69b58c 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -56,7 +56,7 @@ void *module_alloc(unsigned long size) * can simply omit this fallback in that case. */ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + SZ_4G, GFP_KERNEL, + module_alloc_base + SZ_2G, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 885f13e58708..52cfc6148355 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index b44065fb1616..fe20c461582a 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -31,7 +31,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, off) + unsigned long, fd, unsigned long, off) { if (offset_in_page(off) != 0) return -EINVAL; @@ -47,22 +47,26 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) return ksys_personality(personality); } +asmlinkage long sys_ni_syscall(void); + +asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused) +{ + return sys_ni_syscall(); +} + /* * Wrappers to pass the pt_regs argument. */ -#define sys_personality sys_arm64_personality - -asmlinkage long sys_ni_syscall(const struct pt_regs *); -#define __arm64_sys_ni_syscall sys_ni_syscall +#define __arm64_sys_personality __arm64_sys_arm64_personality #undef __SYSCALL #define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); #include #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, +#define __SYSCALL(nr, sym) [nr] = __arm64_##sym, const syscall_fn_t sys_call_table[__NR_syscalls] = { - [0 ... __NR_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, + [0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall, #include }; diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index 0f8bcb7de700..3c80a40c1c9d 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -133,17 +133,14 @@ COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode, return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len)); } -asmlinkage long sys_ni_syscall(const struct pt_regs *); -#define __arm64_sys_ni_syscall sys_ni_syscall - #undef __SYSCALL #define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); #include #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, +#define __SYSCALL(nr, sym) [nr] = __arm64_##sym, const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = { - [0 ... __NR_compat_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, + [0 ... __NR_compat_syscalls - 1] = __arm64_sys_ni_syscall, #include }; diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 5610ac01c1ec..871c739f060a 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -60,6 +61,35 @@ static inline bool has_syscall_work(unsigned long flags) int syscall_trace_enter(struct pt_regs *regs); void syscall_trace_exit(struct pt_regs *regs); +#ifdef CONFIG_ARM64_ERRATUM_1463225 +DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); + +static void cortex_a76_erratum_1463225_svc_handler(void) +{ + u32 reg, val; + + if (!unlikely(test_thread_flag(TIF_SINGLESTEP))) + return; + + if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225))) + return; + + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1); + reg = read_sysreg(mdscr_el1); + val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE; + write_sysreg(val, mdscr_el1); + asm volatile("msr daifclr, #8"); + isb(); + + /* We will have taken a single-step exception by this point */ + + write_sysreg(reg, mdscr_el1); + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); +} +#else +static void cortex_a76_erratum_1463225_svc_handler(void) { } +#endif /* CONFIG_ARM64_ERRATUM_1463225 */ + static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, const syscall_fn_t syscall_table[]) { @@ -68,6 +98,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, regs->orig_x0 = regs->regs[0]; regs->syscallno = scno; + cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); user_exit(); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 29755989f616..3102685a6cd7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -256,7 +256,10 @@ void arm64_force_sig_fault(int signo, int code, void __user *addr, const char *str) { arm64_show_signal(signo, str); - force_sig_fault(signo, code, addr, current); + if (signo == SIGKILL) + force_sig(SIGKILL, current); + else + force_sig_fault(signo, code, addr, current); } void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 2d419006ad43..ec0bb588d755 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -232,6 +232,9 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; + /* Read without the seqlock held by clock_getres() */ + WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution); + if (!use_syscall) { /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index c39872a7b03c..856fee6d3512 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -73,6 +73,13 @@ x_tmp .req x8 movn x_tmp, #0xff00, lsl #48 and \res, x_tmp, \res mul \res, \res, \mult + /* + * Fake address dependency from the value computed from the counter + * register to subsequent data page accesses so that the sequence + * locking also orders the read of the counter. + */ + and x_tmp, \res, xzr + add vdso_data, vdso_data, x_tmp .endm /* @@ -147,12 +154,12 @@ ENTRY(__kernel_gettimeofday) /* w11 = cs_mono_mult, w12 = cs_shift */ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - seqcnt_check fail=1b get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=1b get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 @@ -211,13 +218,13 @@ realtime: /* w11 = cs_mono_mult, w12 = cs_shift */ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - seqcnt_check fail=realtime /* All computations are done with left-shifted nsecs. */ get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=realtime get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 clock_gettime_return, shift=1 @@ -231,7 +238,6 @@ monotonic: ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] - seqcnt_check fail=monotonic /* All computations are done with left-shifted nsecs. */ lsl x4, x4, x12 @@ -239,6 +245,7 @@ monotonic: lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=monotonic get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 @@ -253,13 +260,13 @@ monotonic_raw: /* w11 = cs_raw_mult, w12 = cs_shift */ ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] - seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=monotonic_raw get_ts_clock_raw res_sec=x10, res_nsec=x11, \ clock_nsec=x15, nsec_to_sec=x9 @@ -301,13 +308,14 @@ ENTRY(__kernel_clock_getres) ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne b.ne 1f - ldr x2, 5f + adr vdso_data, _vdso_data + ldr w2, [vdso_data, #CLOCK_REALTIME_RES] b 2f 1: cmp w0, #CLOCK_REALTIME_COARSE ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne b.ne 4f - ldr x2, 6f + ldr x2, 5f 2: cbz x1, 3f stp xzr, x2, [x1] @@ -321,8 +329,6 @@ ENTRY(__kernel_clock_getres) svc #0 ret 5: - .quad CLOCK_REALTIME_RES -6: .quad CLOCK_COARSE_RES .cfi_endproc ENDPROC(__kernel_clock_getres) diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 82d1904328ad..ea710f674cb6 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,6 +10,7 @@ KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 78c0a72f822c..674860e3e478 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -249,6 +249,11 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; + if (!is_vmalloc_addr(cpu_addr)) { + unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); + return __swiotlb_mmap_pfn(vma, pfn, size); + } + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { /* * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, @@ -272,6 +277,11 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct vm_struct *area = find_vm_area(cpu_addr); + if (!is_vmalloc_addr(cpu_addr)) { + struct page *page = virt_to_page(cpu_addr); + return __swiotlb_get_sgtable_page(sgt, page, size); + } + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { /* * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1a7e92ab69eb..f637447e96b0 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -171,9 +171,10 @@ void show_pte(unsigned long addr) return; } - pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n", + pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, - mm == &init_mm ? VA_BITS : (int) vabits_user, mm->pgd); + mm == &init_mm ? VA_BITS : (int)vabits_user, + (unsigned long)virt_to_phys(mm->pgd)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); @@ -810,14 +811,47 @@ void __init hook_debug_fault_code(int nr, debug_fault_info[nr].name = name; } +#ifdef CONFIG_ARM64_ERRATUM_1463225 +DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); + +static int __exception +cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) + return 0; + + /* + * We've taken a dummy step exception from the kernel to ensure + * that interrupts are re-enabled on the syscall path. Return back + * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions + * masked so that we can safely restore the mdscr and get on with + * handling the syscall. + */ + regs->pstate |= PSR_D_BIT; + return 1; +} +#else +static int __exception +cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + return 0; +} +#endif /* CONFIG_ARM64_ERRATUM_1463225 */ + asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, - unsigned int esr, - struct pt_regs *regs) + unsigned int esr, + struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); int rv; + if (cortex_a76_erratum_1463225_debug_handler(regs)) + return 0; + /* * Tell lockdep we disabled irqs in entry.S. Do nothing if they were * already disabled to preserve the last enabled/disabled addresses. diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e97f018ff740..ece9490e3018 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -936,13 +936,18 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) int __init arch_ioremap_pud_supported(void) { - /* only 4k granule supports level 1 block mappings */ - return IS_ENABLED(CONFIG_ARM64_4K_PAGES); + /* + * Only 4k granule supports level 1 block mappings. + * SW table walks can't handle removal of intermediate entries. + */ + return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && + !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); } int __init arch_ioremap_pmd_supported(void) { - return 1; + /* See arch_ioremap_pud_supported() */ + return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); } int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index aa0817c9c4c3..fdd626d34274 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -65,24 +65,25 @@ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 - mrs x5, cpacr_el1 - mrs x6, tcr_el1 - mrs x7, vbar_el1 - mrs x8, mdscr_el1 - mrs x9, oslsr_el1 - mrs x10, sctlr_el1 + mrs x5, osdlr_el1 + mrs x6, cpacr_el1 + mrs x7, tcr_el1 + mrs x8, vbar_el1 + mrs x9, mdscr_el1 + mrs x10, oslsr_el1 + mrs x11, sctlr_el1 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x11, tpidr_el1 + mrs x12, tpidr_el1 alternative_else - mrs x11, tpidr_el2 + mrs x12, tpidr_el2 alternative_endif - mrs x12, sp_el0 + mrs x13, sp_el0 stp x2, x3, [x0] - stp x4, xzr, [x0, #16] - stp x5, x6, [x0, #32] - stp x7, x8, [x0, #48] - stp x9, x10, [x0, #64] - stp x11, x12, [x0, #80] + stp x4, x5, [x0, #16] + stp x6, x7, [x0, #32] + stp x8, x9, [x0, #48] + stp x10, x11, [x0, #64] + stp x12, x13, [x0, #80] ret ENDPROC(cpu_do_suspend) @@ -105,8 +106,8 @@ ENTRY(cpu_do_resume) msr cpacr_el1, x6 /* Don't change t0sz here, mask those bits when restoring */ - mrs x5, tcr_el1 - bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + mrs x7, tcr_el1 + bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH msr tcr_el1, x8 msr vbar_el1, x9 @@ -130,6 +131,7 @@ alternative_endif /* * Restore oslsr_el1 by writing oslar_el1 */ + msr osdlr_el1, x5 ubfx x11, x11, #1, #1 msr oslar_el1, x11 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 783de51a6c4e..6c881659ee8a 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -100,12 +100,6 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) -/* Prefetch */ -#define A64_PRFM(Rn, type, target, policy) \ - aarch64_insn_gen_prefetch(Rn, AARCH64_INSN_PRFM_TYPE_##type, \ - AARCH64_INSN_PRFM_TARGET_##target, \ - AARCH64_INSN_PRFM_POLICY_##policy) - /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index aaddc0217e73..a1420626fca2 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -762,7 +762,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_STX | BPF_XADD | BPF_DW: emit_a64_mov_i(1, tmp, off, ctx); emit(A64_ADD(1, tmp, tmp, dst), ctx); - emit(A64_PRFM(tmp, PST, L1, STRM), ctx); emit(A64_LDXR(isdw, tmp2, tmp), ctx); emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx); diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index a03803506b0c..5e1015eb6d0d 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -55,6 +55,7 @@ paddr_to_nid(unsigned long paddr) return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0); } +EXPORT_SYMBOL(paddr_to_nid); #if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA) /* diff --git a/arch/mips/include/asm/ginvt.h b/arch/mips/include/asm/ginvt.h index 49c6dbe37338..6eb7c2b94dc7 100644 --- a/arch/mips/include/asm/ginvt.h +++ b/arch/mips/include/asm/ginvt.h @@ -19,7 +19,7 @@ _ASM_MACRO_1R1I(ginvt, rs, type, # define _ASM_SET_GINV #endif -static inline void ginvt(unsigned long addr, enum ginvt_type type) +static __always_inline void ginvt(unsigned long addr, enum ginvt_type type) { asm volatile( ".set push\n" diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 413863508f6f..d67fb64e908c 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -64,17 +64,11 @@ struct mips_perf_event { #define CNTR_EVEN 0x55555555 #define CNTR_ODD 0xaaaaaaaa #define CNTR_ALL 0xffffffff -#ifdef CONFIG_MIPS_MT_SMP enum { T = 0, V = 1, P = 2, } range; -#else - #define T - #define V - #define P -#endif }; static struct mips_perf_event raw_event; @@ -325,9 +319,7 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) { struct perf_event *event = container_of(evt, struct perf_event, hw); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); -#ifdef CONFIG_MIPS_MT_SMP unsigned int range = evt->event_base >> 24; -#endif /* CONFIG_MIPS_MT_SMP */ WARN_ON(idx < 0 || idx >= mipspmu.num_counters); @@ -336,21 +328,15 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) /* Make sure interrupt enabled. */ MIPS_PERFCTRL_IE; -#ifdef CONFIG_CPU_BMIPS5000 - { + if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) { /* enable the counter for the calling thread */ cpuc->saved_ctrl[idx] |= (1 << (12 + vpe_id())) | BRCM_PERFCTRL_TC; - } -#else -#ifdef CONFIG_MIPS_MT_SMP - if (range > V) { + } else if (IS_ENABLED(CONFIG_MIPS_MT_SMP) && range > V) { /* The counter is processor wide. Set it up to count all TCs. */ pr_debug("Enabling perf counter for all TCs\n"); cpuc->saved_ctrl[idx] |= M_TC_EN_ALL; - } else -#endif /* CONFIG_MIPS_MT_SMP */ - { + } else { unsigned int cpu, ctrl; /* @@ -365,7 +351,6 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) cpuc->saved_ctrl[idx] |= ctrl; pr_debug("Enabling perf counter for CPU%d\n", cpu); } -#endif /* CONFIG_CPU_BMIPS5000 */ /* * We do not actually let the counter run. Leave it until start(). */ diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 93b8e0b4332f..b9d6c6ec4177 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -41,7 +41,19 @@ char *mips_get_machine_name(void) #ifdef CONFIG_USE_OF void __init early_init_dt_add_memory_arch(u64 base, u64 size) { - return add_memory_region(base, size, BOOT_MEM_RAM); + if (base >= PHYS_ADDR_MAX) { + pr_warn("Trying to add an invalid memory region, skipped\n"); + return; + } + + /* Truncate the passed memory region instead of type casting */ + if (base + size - 1 >= PHYS_ADDR_MAX || base + size < base) { + pr_warn("Truncate memory region %llx @ %llx to size %llx\n", + size, base, PHYS_ADDR_MAX - base); + size = PHYS_ADDR_MAX - base; + } + + add_memory_region(base, size, BOOT_MEM_RAM); } int __init early_init_dt_reserve_memory_arch(phys_addr_t base, diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 4aaff3b3175c..6dbe4eab0a0e 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -112,9 +112,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) */ aup->resume_epc = regs->cp0_epc + 4; if (insn_has_delay_slot((union mips_instruction) aup->insn[0])) { - unsigned long epc; - - epc = regs->cp0_epc; __compute_return_epc_for_insn(regs, (union mips_instruction) aup->insn[0]); aup->resume_epc = regs->cp0_epc; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 6d0517ac18e5..0369f26ab96d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1122,6 +1122,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_ID; + break; case KVM_CAP_MIPS_FPU: /* We don't handle systems with inconsistent cpu_has_fpu */ r = !!raw_cpu_has_fpu; diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 2f616ebeb7e0..7755a1fad05a 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -203,6 +203,11 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) int __virt_addr_valid(const volatile void *kaddr) { + unsigned long vaddr = (unsigned long)vaddr; + + if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE)) + return 0; + return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); } EXPORT_SYMBOL_GPL(__virt_addr_valid); diff --git a/arch/mips/pistachio/Platform b/arch/mips/pistachio/Platform index d80cd612df1f..c3592b374ad2 100644 --- a/arch/mips/pistachio/Platform +++ b/arch/mips/pistachio/Platform @@ -6,3 +6,4 @@ cflags-$(CONFIG_MACH_PISTACHIO) += \ -I$(srctree)/arch/mips/include/asm/mach-pistachio load-$(CONFIG_MACH_PISTACHIO) += 0xffffffff80400000 zload-$(CONFIG_MACH_PISTACHIO) += 0xffffffff81000000 +all-$(CONFIG_MACH_PISTACHIO) := uImage.gz diff --git a/arch/parisc/boot/compressed/head.S b/arch/parisc/boot/compressed/head.S index 5aba20fa48aa..e8b798fd0cf0 100644 --- a/arch/parisc/boot/compressed/head.S +++ b/arch/parisc/boot/compressed/head.S @@ -22,7 +22,7 @@ __HEAD ENTRY(startup) - .level LEVEL + .level PA_ASM_LEVEL #define PSW_W_SM 0x200 #define PSW_W_BIT 36 @@ -63,7 +63,7 @@ $bss_loop: load32 BOOTADDR(decompress_kernel),%r3 #ifdef CONFIG_64BIT - .level LEVEL + .level PA_ASM_LEVEL ssm PSW_W_SM, %r0 /* set W-bit */ depdi 0, 31, 32, %r3 #endif @@ -72,7 +72,7 @@ $bss_loop: startup_continue: #ifdef CONFIG_64BIT - .level LEVEL + .level PA_ASM_LEVEL rsm PSW_W_SM, %r0 /* clear W-bit */ #endif diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index c17ec0ee6e7c..d85738a7bbe6 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -61,14 +61,14 @@ #define LDCW ldcw,co #define BL b,l # ifdef CONFIG_64BIT -# define LEVEL 2.0w +# define PA_ASM_LEVEL 2.0w # else -# define LEVEL 2.0 +# define PA_ASM_LEVEL 2.0 # endif #else #define LDCW ldcw #define BL bl -#define LEVEL 1.1 +#define PA_ASM_LEVEL 1.1 #endif #ifdef __ASSEMBLY__ diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 006fb939cac8..4016fe1c65a9 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -44,22 +44,22 @@ void parisc_setup_cache_timing(void); #define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \ ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define asm_io_sync() asm volatile("sync" \ ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ - ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) :: ) + ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) :::"memory") #endif /* ! __ASSEMBLY__ */ diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c index bf2274e01a96..ca1f5ca0540a 100644 --- a/arch/parisc/kernel/alternative.c +++ b/arch/parisc/kernel/alternative.c @@ -56,7 +56,8 @@ void __init_or_module apply_alternatives(struct alt_instr *start, * time IO-PDIR is changed in Ike/Astro. */ if ((cond & ALT_COND_NO_IOC_FDC) && - (boot_cpu_data.pdc.capabilities & PDC_MODEL_IOPDIR_FDC)) + ((boot_cpu_data.cpu_type <= pcxw_) || + (boot_cpu_data.pdc.capabilities & PDC_MODEL_IOPDIR_FDC))) continue; /* Want to replace pdtlb by a pdtlb,l instruction? */ diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index fbb4e43fda05..f56cbab64ac1 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -22,7 +22,7 @@ #include #include - .level LEVEL + .level PA_ASM_LEVEL __INITDATA ENTRY(boot_args) @@ -258,7 +258,7 @@ stext_pdc_ret: ldo R%PA(fault_vector_11)(%r10),%r10 $is_pa20: - .level LEVEL /* restore 1.1 || 2.0w */ + .level PA_ASM_LEVEL /* restore 1.1 || 2.0w */ #endif /*!CONFIG_64BIT*/ load32 PA(fault_vector_20),%r10 diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 841db71958cd..97c206734e24 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -193,6 +193,7 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) */ int running_on_qemu __read_mostly; +EXPORT_SYMBOL(running_on_qemu); void __cpuidle arch_cpu_idle_dead(void) { diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 4f77bd9be66b..93cc36d98875 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -48,7 +48,7 @@ registers). */ #define KILL_INSN break 0,0 - .level LEVEL + .level PA_ASM_LEVEL .text diff --git a/arch/parisc/math-emu/cnv_float.h b/arch/parisc/math-emu/cnv_float.h index 933423fa5144..b0db61188a61 100644 --- a/arch/parisc/math-emu/cnv_float.h +++ b/arch/parisc/math-emu/cnv_float.h @@ -60,19 +60,19 @@ ((exponent < (SGL_P - 1)) ? \ (Sall(sgl_value) << (SGL_EXP_LENGTH + 1 + exponent)) : FALSE) -#define Int_isinexact_to_sgl(int_value) (int_value << 33 - SGL_EXP_LENGTH) +#define Int_isinexact_to_sgl(int_value) ((int_value << 33 - SGL_EXP_LENGTH) != 0) #define Sgl_roundnearest_from_int(int_value,sgl_value) \ if (int_value & 1<<(SGL_EXP_LENGTH - 2)) /* round bit */ \ - if ((int_value << 34 - SGL_EXP_LENGTH) || Slow(sgl_value)) \ + if (((int_value << 34 - SGL_EXP_LENGTH) != 0) || Slow(sgl_value)) \ Sall(sgl_value)++ #define Dint_isinexact_to_sgl(dint_valueA,dint_valueB) \ - ((Dintp1(dint_valueA) << 33 - SGL_EXP_LENGTH) || Dintp2(dint_valueB)) + (((Dintp1(dint_valueA) << 33 - SGL_EXP_LENGTH) != 0) || Dintp2(dint_valueB)) #define Sgl_roundnearest_from_dint(dint_valueA,dint_valueB,sgl_value) \ if (Dintp1(dint_valueA) & 1<<(SGL_EXP_LENGTH - 2)) \ - if ((Dintp1(dint_valueA) << 34 - SGL_EXP_LENGTH) || \ + if (((Dintp1(dint_valueA) << 34 - SGL_EXP_LENGTH) != 0) || \ Dintp2(dint_valueB) || Slow(sgl_value)) Sall(sgl_value)++ #define Dint_isinexact_to_dbl(dint_value) \ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index d0b166256f1a..14147eb7a142 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -495,7 +495,7 @@ static void __init map_pages(unsigned long start_vaddr, void __init set_kernel_text_rw(int enable_read_write) { - unsigned long start = (unsigned long) _text; + unsigned long start = (unsigned long) __init_begin; unsigned long end = (unsigned long) &data_start; map_pages(start, __pa(start), end-start, diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c index 9d9f6f334d3c..3da3e2b1b51b 100644 --- a/arch/powerpc/boot/addnote.c +++ b/arch/powerpc/boot/addnote.c @@ -223,7 +223,11 @@ main(int ac, char **av) PUT_16(E_PHNUM, np + 2); /* write back */ - lseek(fd, (long) 0, SEEK_SET); + i = lseek(fd, (long) 0, SEEK_SET); + if (i < 0) { + perror("lseek"); + exit(1); + } i = write(fd, buf, n); if (i < 0) { perror("write"); diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 138bc2ecc0c4..ba188797d940 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -81,6 +81,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); + if (unlikely(!pgd)) + return pgd; + /* * Don't scan the PGD for pointers, it contains references to PUDs but * those references are not full pointers and so can't be recognised by diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 581f91be9dd4..ebc6c03d4afe 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -875,6 +875,23 @@ static inline int pmd_present(pmd_t pmd) return false; } +static inline int pmd_is_serializing(pmd_t pmd) +{ + /* + * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear + * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate). + * + * This condition may also occur when flushing a pmd while flushing + * it (see ptep_modify_prot_start), so callers must ensure this + * case is fine as well. + */ + if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) == + cpu_to_be64(_PAGE_INVALID)) + return true; + + return false; +} + static inline int pmd_bad(pmd_t pmd) { if (radix_enabled()) @@ -1090,6 +1107,19 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_access_permitted pmd_access_permitted static inline bool pmd_access_permitted(pmd_t pmd, bool write) { + /* + * pmdp_invalidate sets this combination (which is not caught by + * !pte_present() check in pte_access_permitted), to prevent + * lock-free lookups, as part of the serialize_against_pte_lookup() + * synchronisation. + * + * This also catches the case where the PTE's hardware PRESENT bit is + * cleared while TLB is flushed, which is suboptimal but should not + * be frequent. + */ + if (pmd_is_serializing(pmd)) + return false; + return pte_access_permitted(pmd_pte(pmd), write); } diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 7c1d8e74b25d..7f3279b014db 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -17,6 +17,9 @@ struct drmem_lmb { u32 drc_index; u32 aa_index; u32 flags; +#ifdef CONFIG_MEMORY_HOTPLUG + int nid; +#endif }; struct drmem_lmb_info { @@ -104,4 +107,22 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0xffffffff; } +#ifdef CONFIG_MEMORY_HOTPLUG +static inline void lmb_set_nid(struct drmem_lmb *lmb) +{ + lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); +} +static inline void lmb_clear_nid(struct drmem_lmb *lmb) +{ + lmb->nid = -1; +} +#else +static inline void lmb_set_nid(struct drmem_lmb *lmb) +{ +} +static inline void lmb_clear_nid(struct drmem_lmb *lmb) +{ +} +#endif + #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 4a585cba1787..c68476818753 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -94,6 +94,9 @@ static inline bool kdump_in_progress(void) return crashing_cpu >= 0; } +void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, + unsigned long start_address) __noreturn; + #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e6b5bb012ccb..1f9eb75ce95a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -305,6 +305,7 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; + struct mutex rtas_token_lock; DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); #endif #ifdef CONFIG_KVM_MPIC @@ -317,6 +318,7 @@ struct kvm_arch { #endif struct kvmppc_ops *kvm_ops; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + struct mutex mmu_setup_lock; /* nests inside vcpu mutexes */ u64 l1_ptcr; int max_nested_lpid; struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS]; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6ee8195a2ffb..4a6dd3ba0b0b 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -237,7 +237,6 @@ extern void arch_exit_mmap(struct mm_struct *mm); #endif static inline void arch_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (start <= mm->context.vdso_base && mm->context.vdso_base < end) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 23f7ed796f38..49d65cd08ee0 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -342,6 +342,7 @@ #define PPC_INST_MADDLD 0x10000033 #define PPC_INST_DIVWU 0x7c000396 #define PPC_INST_DIVD 0x7c0003d2 +#define PPC_INST_DIVDU 0x7c000392 #define PPC_INST_RLWINM 0x54000000 #define PPC_INST_RLWINM_DOT 0x54000001 #define PPC_INST_RLWIMI 0x50000000 diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index eb2a33d5df26..e382bd6ede84 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -41,7 +41,7 @@ #if defined(CONFIG_PPC_BOOK3E_64) #define MSR_64BIT MSR_CM -#define MSR_ (MSR_ME | MSR_CE) +#define MSR_ (MSR_ME | MSR_RI | MSR_CE) #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3fad8d499767..5321a11c2835 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -968,7 +968,9 @@ start_here_multiplatform: /* Restore parameters passed from prom_init/kexec */ mr r3,r31 - bl early_setup /* also sets r13 and SPRG_PACA */ + LOAD_REG_ADDR(r12, DOTSYM(early_setup)) + mtctr r12 + bctrl /* also sets r13 and SPRG_PACA */ LOAD_REG_ADDR(r3, start_here_common) ld r4,PACAKMSR(r13) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 7f5ac2e8581b..36178000a2f2 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -170,6 +170,9 @@ core_idle_lock_held: bne- core_idle_lock_held blr +/* Reuse an unused pt_regs slot for IAMR */ +#define PNV_POWERSAVE_IAMR _DAR + /* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 @@ -200,6 +203,12 @@ pnv_powersave_common: /* Continue saving state */ SAVE_GPR(2, r1) SAVE_NVGPRS(r1) + +BEGIN_FTR_SECTION + mfspr r5, SPRN_IAMR + std r5, PNV_POWERSAVE_IAMR(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + mfcr r5 std r5,_CCR(r1) std r1,PACAR1(r13) @@ -924,6 +933,17 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) REST_NVGPRS(r1) REST_GPR(2, r1) + +BEGIN_FTR_SECTION + /* IAMR was saved in pnv_powersave_common() */ + ld r5, PNV_POWERSAVE_IAMR(r1) + mtspr SPRN_IAMR, r5 + /* + * We don't need an isync here because the upcoming mtmsrd is + * execution synchronizing. + */ +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + ld r4,PACAKMSR(r13) ld r5,_LINK(r1) ld r6,_CCR(r1) diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c index ba4f18a43ee8..52a29fc73730 100644 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -547,6 +547,7 @@ static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr, kbuf.memsz = phdr->p_memsz; kbuf.buf_align = phdr->p_align; kbuf.buf_min = phdr->p_paddr + base; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) goto out; @@ -581,7 +582,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, struct kexec_buf kbuf = { .image = image, .buf_min = 0, .buf_max = ppc64_rma_size }; struct kexec_buf pbuf = { .image = image, .buf_min = 0, - .buf_max = ppc64_rma_size, .top_down = true }; + .buf_max = ppc64_rma_size, .top_down = true, + .mem = KEXEC_BUF_MEM_UNKNOWN }; ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info); if (ret) @@ -606,6 +608,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, kbuf.bufsz = kbuf.memsz = initrd_len; kbuf.buf_align = PAGE_SIZE; kbuf.top_down = false; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) goto out; @@ -638,6 +641,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, kbuf.bufsz = kbuf.memsz = fdt_size; kbuf.buf_align = PAGE_SIZE; kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) goto out; diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c index affe5dcce7f4..2b160d68db49 100644 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -30,7 +30,6 @@ typedef void (*relocate_new_kernel_t)( */ void default_machine_kexec(struct kimage *image) { - extern const unsigned char relocate_new_kernel[]; extern const unsigned int relocate_new_kernel_size; unsigned long page_list; unsigned long reboot_code_buffer, reboot_code_buffer_phys; @@ -58,6 +57,9 @@ void default_machine_kexec(struct kimage *image) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); + if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x)) + relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start); + /* now call it */ rnk = (relocate_new_kernel_t) reboot_code_buffer; (*rnk)(page_list, reboot_code_buffer_phys, image->start); diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index b33bafb8fcea..70568ccbd9fd 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -57,7 +57,7 @@ void setup_barrier_nospec(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); - if (!no_nospec) + if (!no_nospec && !cpu_mitigations_off()) enable_barrier_nospec(enable); } @@ -116,7 +116,7 @@ static int __init handle_nospectre_v2(char *p) early_param("nospectre_v2", handle_nospectre_v2); void setup_spectre_v2(void) { - if (no_spectrev2) + if (no_spectrev2 || cpu_mitigations_off()) do_btb_flush_fixups(); else btb_flush_enabled = true; @@ -300,7 +300,7 @@ void setup_stf_barrier(void) stf_enabled_flush_types = type; - if (!no_stf_barrier) + if (!no_stf_barrier && !cpu_mitigations_off()) stf_barrier_enable(enable); } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index ba404dd9ce1d..4f49e1a3594c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -932,7 +932,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush) + if (!no_rfi_flush && !cpu_mitigations_off()) rfi_flush_enable(enable); } diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3c6ab22a0c4e..af3c15a1d41e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -77,7 +77,7 @@ static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ -static DEFINE_PER_CPU(struct timer_list, wd_timer); +static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer); static DEFINE_PER_CPU(u64, wd_timer_tb); /* SMP checker bits */ @@ -293,21 +293,21 @@ void soft_nmi_interrupt(struct pt_regs *regs) nmi_exit(); } -static void wd_timer_reset(unsigned int cpu, struct timer_list *t) -{ - t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms); - if (wd_timer_period_ms > 1000) - t->expires = __round_jiffies_up(t->expires, cpu); - add_timer_on(t, cpu); -} - -static void wd_timer_fn(struct timer_list *t) +static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return HRTIMER_NORESTART; + + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return HRTIMER_NORESTART; + watchdog_timer_interrupt(cpu); - wd_timer_reset(cpu, t); + hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms)); + + return HRTIMER_RESTART; } void arch_touch_nmi_watchdog(void) @@ -323,37 +323,22 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); -static void start_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - per_cpu(wd_timer_tb, cpu) = get_tb(); - - timer_setup(t, wd_timer_fn, TIMER_PINNED); - wd_timer_reset(cpu, t); -} - -static void stop_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - del_timer_sync(t); -} - -static int start_wd_on_cpu(unsigned int cpu) +static void start_watchdog(void *arg) { + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { WARN_ON(1); - return 0; + return; } if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - return 0; + return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) - return 0; + return; wd_smp_lock(&flags); cpumask_set_cpu(cpu, &wd_cpus_enabled); @@ -363,27 +348,40 @@ static int start_wd_on_cpu(unsigned int cpu) } wd_smp_unlock(&flags); - start_watchdog_timer_on(cpu); + *this_cpu_ptr(&wd_timer_tb) = get_tb(); - return 0; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; + hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms), + HRTIMER_MODE_REL_PINNED); } -static int stop_wd_on_cpu(unsigned int cpu) +static int start_watchdog_on_cpu(unsigned int cpu) { + return smp_call_function_single(cpu, start_watchdog, NULL, true); +} + +static void stop_watchdog(void *arg) +{ + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) - return 0; /* Can happen in CPU unplug case */ + return; /* Can happen in CPU unplug case */ - stop_watchdog_timer_on(cpu); + hrtimer_cancel(hrtimer); wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_cpus_enabled); wd_smp_unlock(&flags); wd_smp_clear_cpu_pending(cpu, get_tb()); +} - return 0; +static int stop_watchdog_on_cpu(unsigned int cpu) +{ + return smp_call_function_single(cpu, stop_watchdog, NULL, true); } static void watchdog_calc_timeouts(void) @@ -402,7 +400,7 @@ void watchdog_nmi_stop(void) int cpu; for_each_cpu(cpu, &wd_cpus_enabled) - stop_wd_on_cpu(cpu); + stop_watchdog_on_cpu(cpu); } void watchdog_nmi_start(void) @@ -411,7 +409,7 @@ void watchdog_nmi_start(void) watchdog_calc_timeouts(); for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) - start_wd_on_cpu(cpu); + start_watchdog_on_cpu(cpu); } /* @@ -423,7 +421,8 @@ int __init watchdog_nmi_probe(void) err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", - start_wd_on_cpu, stop_wd_on_cpu); + start_watchdog_on_cpu, + stop_watchdog_on_cpu); if (err < 0) { pr_warn("could not be initialized"); return err; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 10c5579d20ce..020304403bae 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -878,6 +878,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) #ifdef CONFIG_PPC64 INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables); INIT_LIST_HEAD(&kvm->arch.rtas_tokens); + mutex_init(&kvm->arch.rtas_token_lock); #endif return kvm->arch.kvm_ops->init_vm(kvm); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index be7bc070eae5..c1ced22455f9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -63,7 +63,7 @@ struct kvm_resize_hpt { struct work_struct work; u32 order; - /* These fields protected by kvm->lock */ + /* These fields protected by kvm->arch.mmu_setup_lock */ /* Possible values and their usage: * <0 an error occurred during allocation, @@ -73,7 +73,7 @@ struct kvm_resize_hpt { int error; /* Private to the work thread, until error != -EBUSY, - * then protected by kvm->lock. + * then protected by kvm->arch.mmu_setup_lock. */ struct kvm_hpt_info hpt; }; @@ -139,7 +139,7 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) long err = -EBUSY; struct kvm_hpt_info info; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); if (kvm->arch.mmu_ready) { kvm->arch.mmu_ready = 0; /* order mmu_ready vs. vcpus_running */ @@ -183,7 +183,7 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) /* Ensure that each vcpu will flush its TLB on next entry. */ cpumask_setall(&kvm->arch.need_tlb_flush); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return err; } @@ -1447,7 +1447,7 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize) static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) { - if (WARN_ON(!mutex_is_locked(&kvm->lock))) + if (WARN_ON(!mutex_is_locked(&kvm->arch.mmu_setup_lock))) return; if (!resize) @@ -1474,14 +1474,14 @@ static void resize_hpt_prepare_work(struct work_struct *work) if (WARN_ON(resize->error != -EBUSY)) return; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); /* Request is still current? */ if (kvm->arch.resize_hpt == resize) { /* We may request large allocations here: - * do not sleep with kvm->lock held for a while. + * do not sleep with kvm->arch.mmu_setup_lock held for a while. */ - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", resize->order); @@ -1494,9 +1494,9 @@ static void resize_hpt_prepare_work(struct work_struct *work) if (WARN_ON(err == -EBUSY)) err = -EINPROGRESS; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); /* It is possible that kvm->arch.resize_hpt != resize - * after we grab kvm->lock again. + * after we grab kvm->arch.mmu_setup_lock again. */ } @@ -1505,7 +1505,7 @@ static void resize_hpt_prepare_work(struct work_struct *work) if (kvm->arch.resize_hpt != resize) resize_hpt_release(kvm, resize); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); } long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, @@ -1522,7 +1522,7 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, if (shift && ((shift < 18) || (shift > 46))) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); resize = kvm->arch.resize_hpt; @@ -1565,7 +1565,7 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, ret = 100; /* estimated time in ms */ out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return ret; } @@ -1588,7 +1588,7 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, if (shift && ((shift < 18) || (shift > 46))) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); resize = kvm->arch.resize_hpt; @@ -1625,7 +1625,7 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, smp_mb(); out_no_hpt: resize_hpt_release(kvm, resize); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return ret; } @@ -1868,7 +1868,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, return -EINVAL; /* lock out vcpus from running while we're doing this */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); mmu_ready = kvm->arch.mmu_ready; if (mmu_ready) { kvm->arch.mmu_ready = 0; /* temporarily */ @@ -1876,7 +1876,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, smp_mb(); if (atomic_read(&kvm->arch.vcpus_running)) { kvm->arch.mmu_ready = 1; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return -EBUSY; } } @@ -1963,7 +1963,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, /* Order HPTE updates vs. mmu_ready */ smp_wmb(); kvm->arch.mmu_ready = mmu_ready; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); if (err) return err; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b2b29d4f9842..6d4f0f72231f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -445,12 +445,7 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) { - struct kvm_vcpu *ret; - - mutex_lock(&kvm->lock); - ret = kvm_get_vcpu_by_id(kvm, id); - mutex_unlock(&kvm->lock); - return ret; + return kvm_get_vcpu_by_id(kvm, id); } static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) @@ -1502,7 +1497,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask; - mutex_lock(&kvm->lock); spin_lock(&vc->lock); /* * If ILE (interrupt little-endian) has changed, update the @@ -1542,7 +1536,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); - mutex_unlock(&kvm->lock); } static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, @@ -2257,11 +2250,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, pr_devel("KVM: collision on id %u", id); vcore = NULL; } else if (!vcore) { + /* + * Take mmu_setup_lock for mutual exclusion + * with kvmppc_update_lpcr(). + */ err = -ENOMEM; vcore = kvmppc_vcore_create(kvm, id & ~(kvm->arch.smt_mode - 1)); + mutex_lock(&kvm->arch.mmu_setup_lock); kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; + mutex_unlock(&kvm->arch.mmu_setup_lock); } } mutex_unlock(&kvm->lock); @@ -3624,6 +3623,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vc->in_guest = 0; mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb()); + mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); kvmhv_load_host_pmu(); @@ -3820,7 +3820,7 @@ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu) int r = 0; struct kvm *kvm = vcpu->kvm; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); if (!kvm->arch.mmu_ready) { if (!kvm_is_radix(kvm)) r = kvmppc_hv_setup_htab_rma(vcpu); @@ -3830,7 +3830,7 @@ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu) kvm->arch.mmu_ready = 1; } } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return r; } @@ -4048,16 +4048,20 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, if (cpu_has_feature(CPU_FTR_HVMODE)) kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested); - trace_hardirqs_on(); guest_enter_irqoff(); srcu_idx = srcu_read_lock(&kvm->srcu); this_cpu_disable_ftrace(); + /* Tell lockdep that we're about to enable interrupts */ + trace_hardirqs_on(); + trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr); vcpu->arch.trap = trap; + trace_hardirqs_off(); + this_cpu_enable_ftrace(); srcu_read_unlock(&kvm->srcu, srcu_idx); @@ -4067,7 +4071,6 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, isync(); } - trace_hardirqs_off(); set_irq_happened(trap); kvmppc_set_host_core(pcpu); @@ -4435,7 +4438,8 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, /* * Update LPCR values in kvm->arch and in vcores. - * Caller must hold kvm->lock. + * Caller must hold kvm->arch.mmu_setup_lock (for mutual exclusion + * of kvm->arch.lpcr update). */ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) { @@ -4487,7 +4491,7 @@ void kvmppc_setup_partition_table(struct kvm *kvm) /* * Set up HPT (hashed page table) and RMA (real-mode area). - * Must be called with kvm->lock held. + * Must be called with kvm->arch.mmu_setup_lock held. */ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { @@ -4575,7 +4579,10 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) goto out_srcu; } -/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */ +/* + * Must be called with kvm->arch.mmu_setup_lock held and + * mmu_ready = 0 and no vcpus running. + */ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) { if (nesting_enabled(kvm)) @@ -4592,7 +4599,10 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) return 0; } -/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */ +/* + * Must be called with kvm->arch.mmu_setup_lock held and + * mmu_ready = 0 and no vcpus running. + */ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) { int err; @@ -4697,6 +4707,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) char buf[32]; int ret; + mutex_init(&kvm->arch.mmu_setup_lock); + /* Allocate the guest's logical partition ID */ lpid = kvmppc_alloc_lpid(); @@ -5222,7 +5234,7 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) if (kvmhv_on_pseries() && !radix) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); if (radix != kvm_is_radix(kvm)) { if (kvm->arch.mmu_ready) { kvm->arch.mmu_ready = 0; @@ -5250,7 +5262,7 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) err = 0; out_unlock: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return err; } diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 4e178c4c1ea5..b7ae3dfbf00e 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -146,7 +146,7 @@ static int rtas_token_undefine(struct kvm *kvm, char *name) { struct rtas_token_definition *d, *tmp; - lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&kvm->arch.rtas_token_lock); list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { if (rtas_name_matches(d->handler->name, name)) { @@ -167,7 +167,7 @@ static int rtas_token_define(struct kvm *kvm, char *name, u64 token) bool found; int i; - lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&kvm->arch.rtas_token_lock); list_for_each_entry(d, &kvm->arch.rtas_tokens, list) { if (d->token == token) @@ -206,14 +206,14 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.rtas_token_lock); if (args.token) rc = rtas_token_define(kvm, args.name, args.token); else rc = rtas_token_undefine(kvm, args.name); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.rtas_token_lock); return rc; } @@ -245,7 +245,7 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) orig_rets = args.rets; args.rets = &args.args[be32_to_cpu(args.nargs)]; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.rtas_token_lock); rc = -ENOENT; list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { @@ -256,7 +256,7 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) } } - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.rtas_token_lock); if (rc == 0) { args.rets = orig_rets; @@ -282,8 +282,6 @@ void kvmppc_rtas_tokens_free(struct kvm *kvm) { struct rtas_token_definition *d, *tmp; - lockdep_assert_held(&kvm->lock); - list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { list_del(&d->list); kfree(d); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f78d002f0fe0..1ad950af7b7b 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1786,7 +1786,6 @@ static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd) { xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); xive_native_configure_irq(hw_num, 0, MASKED, 0); - xive_cleanup_irq_data(xd); } static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) @@ -1800,9 +1799,10 @@ static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) continue; kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data); + xive_cleanup_irq_data(&state->ipi_data); xive_native_free_irq(state->ipi_number); - /* Pass-through, cleanup too */ + /* Pass-through, cleanup too but keep IRQ hw data */ if (state->pt_number) kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8885377ec3e0..d2e800e27f03 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -650,6 +650,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_ID; + break; #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_PPC_GET_SMMU_INFO: r = 1; diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 3f1803672c9b..641891df2046 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -366,8 +366,10 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) if (!drmem_info->lmbs) return; - for_each_drmem_lmb(lmb) + for_each_drmem_lmb(lmb) { read_drconf_v1_cell(lmb, &prop); + lmb_set_nid(lmb); + } } static void __init init_drmem_v2_lmbs(const __be32 *prop) @@ -412,6 +414,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) lmb->aa_index = dr_cell.aa_index; lmb->flags = dr_cell.flags; + + lmb_set_nid(lmb); } } } diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index a6c491f18a04..5842cfa0394d 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -539,7 +539,8 @@ _GLOBAL(flush_hash_pages) #ifdef CONFIG_SMP lis r9, (mmu_hash_lock - PAGE_OFFSET)@ha addi r9, r9, (mmu_hash_lock - PAGE_OFFSET)@l - lwz r8,TASK_CPU(r2) + tophys (r8, r2) + lwz r8, TASK_CPU(r8) oris r8,r8,9 10: lwarx r0,0,r9 cmpi 0,r0,0 diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index f720c5cc0b5e..8751ae2e2d04 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -55,14 +55,48 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id); void slb_setup_new_exec(void); +static int realloc_context_ids(mm_context_t *ctx) +{ + int i, id; + + /* + * id 0 (aka. ctx->id) is special, we always allocate a new one, even if + * there wasn't one allocated previously (which happens in the exec + * case where ctx is newly allocated). + * + * We have to be a bit careful here. We must keep the existing ids in + * the array, so that we can test if they're non-zero to decide if we + * need to allocate a new one. However in case of error we must free the + * ids we've allocated but *not* any of the existing ones (or risk a + * UAF). That's why we decrement i at the start of the error handling + * loop, to skip the id that we just tested but couldn't reallocate. + */ + for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { + if (i == 0 || ctx->extended_id[i]) { + id = hash__alloc_context_id(); + if (id < 0) + goto error; + + ctx->extended_id[i] = id; + } + } + + /* The caller expects us to return id */ + return ctx->id; + +error: + for (i--; i >= 0; i--) { + if (ctx->extended_id[i]) + ida_free(&mmu_context_ida, ctx->extended_id[i]); + } + + return id; +} + static int hash__init_new_context(struct mm_struct *mm) { int index; - index = hash__alloc_context_id(); - if (index < 0) - return index; - /* * The old code would re-promote on fork, we don't do that when using * slices as it could cause problem promoting slices that have been @@ -80,6 +114,10 @@ static int hash__init_new_context(struct mm_struct *mm) if (mm->context.id == 0) slice_init_new_context_exec(mm); + index = realloc_context_ids(&mm->context); + if (index < 0) + return index; + subpage_prot_init_new_context(mm); pkey_mm_init(mm); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f976676004ad..48c9a97eb2c3 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1498,6 +1498,9 @@ int start_topology_update(void) { int rc = 0; + if (!topology_updates_enabled) + return 0; + if (firmware_has_feature(FW_FEATURE_PRRN)) { if (!prrn_enabled) { prrn_enabled = 1; @@ -1531,6 +1534,9 @@ int stop_topology_update(void) { int rc = 0; + if (!topology_updates_enabled) + return 0; + if (prrn_enabled) { prrn_enabled = 0; #ifdef CONFIG_SMP @@ -1588,11 +1594,13 @@ static ssize_t topology_write(struct file *file, const char __user *buf, kbuf[read_len] = '\0'; - if (!strncmp(kbuf, "on", 2)) + if (!strncmp(kbuf, "on", 2)) { + topology_updates_enabled = true; start_topology_update(); - else if (!strncmp(kbuf, "off", 3)) + } else if (!strncmp(kbuf, "off", 3)) { stop_topology_update(); - else + topology_updates_enabled = false; + } else return -EINVAL; return count; @@ -1607,9 +1615,7 @@ static const struct file_operations topology_ops = { static int topology_update_init(void) { - /* Do not poll for changes if disabled at boot */ - if (topology_updates_enabled) - start_topology_update(); + start_topology_update(); if (vphn_enabled) topology_schedule_update(); diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index a4341aba0af4..e538804d3f4a 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -116,6 +116,9 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, /* * This ensures that generic code that rely on IRQ disabling * to prevent a parallel THP split work as expected. + * + * Marking the entry with _PAGE_INVALID && ~_PAGE_PRESENT requires + * a special case check in pmd_access_permitted. */ serialize_against_pte_lookup(vma->vm_mm); return __pmd(old_pmd); diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index dcac37745b05..1e932898d430 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -116,7 +116,7 @@ ___PPC_RA(a) | IMM_L(i)) #define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_DIVD(d, a, b) EMIT(PPC_INST_DIVD | ___PPC_RT(d) | \ +#define PPC_DIVDU(d, a, b) EMIT(PPC_INST_DIVDU | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ ___PPC_RS(a) | ___PPC_RB(b)) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 21a1dcd4b156..e3fedeffe40f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -399,12 +399,12 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg); + PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg); PPC_MULD(b2p[TMP_REG_1], src_reg, b2p[TMP_REG_1]); PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); } else - PPC_DIVD(dst_reg, dst_reg, src_reg); + PPC_DIVDU(dst_reg, dst_reg, src_reg); break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ @@ -432,7 +432,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - PPC_DIVD(b2p[TMP_REG_2], dst_reg, + PPC_DIVDU(b2p[TMP_REG_2], dst_reg, b2p[TMP_REG_1]); PPC_MULD(b2p[TMP_REG_1], b2p[TMP_REG_1], @@ -440,7 +440,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); } else - PPC_DIVD(dst_reg, dst_reg, + PPC_DIVDU(dst_reg, dst_reg, b2p[TMP_REG_1]); break; } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b0723002a396..8eb5dc5df62b 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1846,6 +1846,7 @@ static int power_pmu_event_init(struct perf_event *event) int n; int err; struct cpu_hw_events *cpuhw; + u64 bhrb_filter; if (!ppmu) return -ENOENT; @@ -1951,13 +1952,14 @@ static int power_pmu_event_init(struct perf_event *event) err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { - cpuhw->bhrb_filter = ppmu->bhrb_filter_map( + bhrb_filter = ppmu->bhrb_filter_map( event->attr.branch_sample_type); - if (cpuhw->bhrb_filter == -1) { + if (bhrb_filter == -1) { put_cpu_var(cpu_hw_events); return -EOPNOTSUPP; } + cpuhw->bhrb_filter = bhrb_filter; } put_cpu_var(cpu_hw_events); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index b1c37cc3fa98..2d12f0037e3a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -487,6 +487,11 @@ static int nest_imc_event_init(struct perf_event *event) * Get the base memory addresss for this cpu. */ chip_id = cpu_to_chip_id(event->cpu); + + /* Return, if chip_id is not valid */ + if (chip_id < 0) + return -ENODEV; + pcni = pmu->mem_info; do { if (pcni->id == chip_id) { @@ -494,7 +499,7 @@ static int nest_imc_event_init(struct perf_event *event) break; } pcni++; - } while (pcni); + } while (pcni->vbase != 0); if (!flag) return -ENODEV; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index d12a2db26353..d10feef93b6b 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -29,6 +29,7 @@ enum { #define POWER8_MMCRA_IFM1 0x0000000040000000UL #define POWER8_MMCRA_IFM2 0x0000000080000000UL #define POWER8_MMCRA_IFM3 0x00000000C0000000UL +#define POWER8_MMCRA_BHRB_MASK 0x00000000C0000000UL /* * Raw event encoding for PowerISA v2.07 (Power8): @@ -243,6 +244,8 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type) static void power8_config_bhrb(u64 pmu_bhrb_filter) { + pmu_bhrb_filter &= POWER8_MMCRA_BHRB_MASK; + /* Enable BHRB filter in PMU */ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); } diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 030544e35959..f3987915cadc 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -92,6 +92,7 @@ enum { #define POWER9_MMCRA_IFM1 0x0000000040000000UL #define POWER9_MMCRA_IFM2 0x0000000080000000UL #define POWER9_MMCRA_IFM3 0x00000000C0000000UL +#define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL /* Nasty Power9 specific hack */ #define PVR_POWER9_CUMULUS 0x00002000 @@ -300,6 +301,8 @@ static u64 power9_bhrb_filter_map(u64 branch_sample_type) static void power9_config_bhrb(u64 pmu_bhrb_filter) { + pmu_bhrb_filter &= POWER9_MMCRA_BHRB_MASK; + /* Enable BHRB filter in PMU */ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); } diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 9fcccb4490b9..7f2b6c226eed 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -64,11 +64,6 @@ static struct task_struct *spusched_task; static struct timer_list spusched_timer; static struct timer_list spuloadavg_timer; -/* - * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). - */ -#define NORMAL_PRIO 120 - /* * Frequency of the spu scheduler tick. By default we do one SPU scheduler * tick for every 10 CPU scheduler ticks. diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 58a07948c76e..828f6656f8f7 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -127,7 +127,7 @@ static int imc_get_mem_addr_nest(struct device_node *node, nr_chips)) goto error; - pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info), + pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info), GFP_KERNEL); if (!pmu_ptr->mem_info) goto error; @@ -161,6 +161,10 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) struct imc_pmu *pmu_ptr; u32 offset; + /* Return for unknown domain */ + if (domain < 0) + return -EINVAL; + /* memory for pmu */ pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL); if (!pmu_ptr) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index d291b618a559..47087832f8b2 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -379,7 +379,7 @@ static int dlpar_add_lmb(struct drmem_lmb *); static int dlpar_remove_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int nid, rc; + int rc; if (!lmb_is_removable(lmb)) return -EINVAL; @@ -389,14 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) return rc; block_sz = pseries_memory_block_size(); - nid = memory_add_physaddr_to_nid(lmb->base_addr); - __remove_memory(nid, lmb->base_addr, block_sz); + __remove_memory(lmb->nid, lmb->base_addr, block_sz); /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); + lmb_clear_nid(lmb); lmb->flags &= ~DRCONF_MEM_ASSIGNED; return 0; @@ -653,7 +653,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) static int dlpar_add_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int nid, rc; + int rc; if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; @@ -664,13 +664,11 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) return rc; } + lmb_set_nid(lmb); block_sz = memory_block_size_bytes(); - /* Find the node id for this address */ - nid = memory_add_physaddr_to_nid(lmb->base_addr); - /* Add the memory */ - rc = __add_memory(nid, lmb->base_addr, block_sz); + rc = __add_memory(lmb->nid, lmb->base_addr, block_sz); if (rc) { invalidate_lmb_associativity_index(lmb); return rc; @@ -678,8 +676,9 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(nid, lmb->base_addr, block_sz); + __remove_memory(lmb->nid, lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); + lmb_clear_nid(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; } diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 88401d5125bc..523dbfbac03d 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -29,6 +29,7 @@ #include #include +#include /* * This routine handles page faults. It determines the address and the @@ -281,6 +282,18 @@ asmlinkage void do_page_fault(struct pt_regs *regs) pte_k = pte_offset_kernel(pmd_k, addr); if (!pte_present(*pte_k)) goto no_context; + + /* + * The kernel assumes that TLBs don't cache invalid + * entries, but in RISC-V, SFENCE.VMA specifies an + * ordering constraint, not a cache flush; it is + * necessary even after writing invalid entries. + * Relying on flush_tlb_fix_spurious_fault would + * suffice, but the extra traps reduce + * performance. So, eagerly SFENCE.VMA. + */ + local_flush_tlb_page(addr); + return; } } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b6e3d0653002..3c01d5946f3b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -149,6 +149,7 @@ config S390 select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS + select HAVE_GENERIC_GUP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index dd456725189f..d00f84add5f4 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -27,14 +27,14 @@ #include #include #include -#include +#include #include #include #include #include static u8 *ctrblk; -static DEFINE_SPINLOCK(ctrblk_lock); +static DEFINE_MUTEX(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions, kma_functions; @@ -698,7 +698,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, unsigned int n, nbytes; int ret, locked; - locked = spin_trylock(&ctrblk_lock); + locked = mutex_trylock(&ctrblk_lock); ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { @@ -716,7 +716,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, ret = blkcipher_walk_done(desc, walk, nbytes - n); } if (locked) - spin_unlock(&ctrblk_lock); + mutex_unlock(&ctrblk_lock); /* * final block may be < AES_BLOCK_SIZE, copy only nbytes */ @@ -826,19 +826,45 @@ static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) return 0; } -static void gcm_sg_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg, - unsigned int len) +static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg, + unsigned int len) { memset(gw, 0, sizeof(*gw)); gw->walk_bytes_remain = len; scatterwalk_start(&gw->walk, sg); } -static int gcm_sg_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) +static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw) +{ + struct scatterlist *nextsg; + + gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain); + while (!gw->walk_bytes) { + nextsg = sg_next(gw->walk.sg); + if (!nextsg) + return 0; + scatterwalk_start(&gw->walk, nextsg); + gw->walk_bytes = scatterwalk_clamp(&gw->walk, + gw->walk_bytes_remain); + } + gw->walk_ptr = scatterwalk_map(&gw->walk); + return gw->walk_bytes; +} + +static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, + unsigned int nbytes) +{ + gw->walk_bytes_remain -= nbytes; + scatterwalk_unmap(&gw->walk); + scatterwalk_advance(&gw->walk, nbytes); + scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); + gw->walk_ptr = NULL; +} + +static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) { int n; - /* minbytesneeded <= AES_BLOCK_SIZE */ if (gw->buf_bytes && gw->buf_bytes >= minbytesneeded) { gw->ptr = gw->buf; gw->nbytes = gw->buf_bytes; @@ -851,13 +877,11 @@ static int gcm_sg_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) goto out; } - gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain); - if (!gw->walk_bytes) { - scatterwalk_start(&gw->walk, sg_next(gw->walk.sg)); - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); + if (!_gcm_sg_clamp_and_map(gw)) { + gw->ptr = NULL; + gw->nbytes = 0; + goto out; } - gw->walk_ptr = scatterwalk_map(&gw->walk); if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) { gw->ptr = gw->walk_ptr; @@ -869,51 +893,90 @@ static int gcm_sg_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes); memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n); gw->buf_bytes += n; - gw->walk_bytes_remain -= n; - scatterwalk_unmap(&gw->walk); - scatterwalk_advance(&gw->walk, n); - scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); - + _gcm_sg_unmap_and_advance(gw, n); if (gw->buf_bytes >= minbytesneeded) { gw->ptr = gw->buf; gw->nbytes = gw->buf_bytes; goto out; } - - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); - if (!gw->walk_bytes) { - scatterwalk_start(&gw->walk, sg_next(gw->walk.sg)); - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); + if (!_gcm_sg_clamp_and_map(gw)) { + gw->ptr = NULL; + gw->nbytes = 0; + goto out; } - gw->walk_ptr = scatterwalk_map(&gw->walk); } out: return gw->nbytes; } -static void gcm_sg_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) +static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) { - int n; + if (gw->walk_bytes_remain == 0) { + gw->ptr = NULL; + gw->nbytes = 0; + goto out; + } + if (!_gcm_sg_clamp_and_map(gw)) { + gw->ptr = NULL; + gw->nbytes = 0; + goto out; + } + + if (gw->walk_bytes >= minbytesneeded) { + gw->ptr = gw->walk_ptr; + gw->nbytes = gw->walk_bytes; + goto out; + } + + scatterwalk_unmap(&gw->walk); + gw->walk_ptr = NULL; + + gw->ptr = gw->buf; + gw->nbytes = sizeof(gw->buf); + +out: + return gw->nbytes; +} + +static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) +{ if (gw->ptr == NULL) - return; + return 0; if (gw->ptr == gw->buf) { - n = gw->buf_bytes - bytesdone; + int n = gw->buf_bytes - bytesdone; if (n > 0) { memmove(gw->buf, gw->buf + bytesdone, n); - gw->buf_bytes -= n; + gw->buf_bytes = n; } else gw->buf_bytes = 0; - } else { - gw->walk_bytes_remain -= bytesdone; - scatterwalk_unmap(&gw->walk); - scatterwalk_advance(&gw->walk, bytesdone); - scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); - } + } else + _gcm_sg_unmap_and_advance(gw, bytesdone); + + return bytesdone; +} + +static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) +{ + int i, n; + + if (gw->ptr == NULL) + return 0; + + if (gw->ptr == gw->buf) { + for (i = 0; i < bytesdone; i += n) { + if (!_gcm_sg_clamp_and_map(gw)) + return i; + n = min(gw->walk_bytes, bytesdone - i); + memcpy(gw->walk_ptr, gw->buf + i, n); + _gcm_sg_unmap_and_advance(gw, n); + } + } else + _gcm_sg_unmap_and_advance(gw, bytesdone); + + return bytesdone; } static int gcm_aes_crypt(struct aead_request *req, unsigned int flags) @@ -926,7 +989,7 @@ static int gcm_aes_crypt(struct aead_request *req, unsigned int flags) unsigned int pclen = req->cryptlen; int ret = 0; - unsigned int len, in_bytes, out_bytes, + unsigned int n, len, in_bytes, out_bytes, min_bytes, bytes, aad_bytes, pc_bytes; struct gcm_sg_walk gw_in, gw_out; u8 tag[GHASH_DIGEST_SIZE]; @@ -963,14 +1026,14 @@ static int gcm_aes_crypt(struct aead_request *req, unsigned int flags) *(u32 *)(param.j0 + ivsize) = 1; memcpy(param.k, ctx->key, ctx->key_len); - gcm_sg_walk_start(&gw_in, req->src, len); - gcm_sg_walk_start(&gw_out, req->dst, len); + gcm_walk_start(&gw_in, req->src, len); + gcm_walk_start(&gw_out, req->dst, len); do { min_bytes = min_t(unsigned int, aadlen > 0 ? aadlen : pclen, AES_BLOCK_SIZE); - in_bytes = gcm_sg_walk_go(&gw_in, min_bytes); - out_bytes = gcm_sg_walk_go(&gw_out, min_bytes); + in_bytes = gcm_in_walk_go(&gw_in, min_bytes); + out_bytes = gcm_out_walk_go(&gw_out, min_bytes); bytes = min(in_bytes, out_bytes); if (aadlen + pclen <= bytes) { @@ -997,8 +1060,11 @@ static int gcm_aes_crypt(struct aead_request *req, unsigned int flags) gw_in.ptr + aad_bytes, pc_bytes, gw_in.ptr, aad_bytes); - gcm_sg_walk_done(&gw_in, aad_bytes + pc_bytes); - gcm_sg_walk_done(&gw_out, aad_bytes + pc_bytes); + n = aad_bytes + pc_bytes; + if (gcm_in_walk_done(&gw_in, n) != n) + return -ENOMEM; + if (gcm_out_walk_done(&gw_out, n) != n) + return -ENOMEM; aadlen -= aad_bytes; pclen -= pc_bytes; } while (aadlen + pclen > 0); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 0d15383d0ff1..d64531cc9623 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -21,7 +22,7 @@ #define DES3_KEY_SIZE (3 * DES_KEY_SIZE) static u8 *ctrblk; -static DEFINE_SPINLOCK(ctrblk_lock); +static DEFINE_MUTEX(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; @@ -387,7 +388,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, unsigned int n, nbytes; int ret, locked; - locked = spin_trylock(&ctrblk_lock); + locked = mutex_trylock(&ctrblk_lock); ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { @@ -404,7 +405,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, ret = blkcipher_walk_done(desc, walk, nbytes - n); } if (locked) - spin_unlock(&ctrblk_lock); + mutex_unlock(&ctrblk_lock); /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr, diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index e94a0a28b5eb..aea32dda3d14 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -160,8 +160,8 @@ struct ap_config_info { unsigned char Nd; /* max # of Domains - 1 */ unsigned char _reserved3[10]; unsigned int apm[8]; /* AP ID mask */ - unsigned int aqm[8]; /* AP queue mask */ - unsigned int adm[8]; /* AP domain mask */ + unsigned int aqm[8]; /* AP (usage) queue mask */ + unsigned int adm[8]; /* AP (control) domain mask */ unsigned char _reserved4[16]; } __aligned(8); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 76dc344edb8c..394bec31cb97 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1204,42 +1204,79 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -#define pgd_offset_k(address) pgd_offset(&init_mm, address) -#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr)) - #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) -static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +/* + * The pgd_offset function *always* adds the index for the top-level + * region/segment table. This is done to get a sequence like the + * following to work: + * pgdp = pgd_offset(current->mm, addr); + * pgd = READ_ONCE(*pgdp); + * p4dp = p4d_offset(&pgd, addr); + * ... + * The subsequent p4d_offset, pud_offset and pmd_offset functions + * only add an index if they dereferenced the pointer. + */ +static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address) { - p4d_t *p4d = (p4d_t *) pgd; + unsigned long rste; + unsigned int shift; - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) - p4d = (p4d_t *) pgd_deref(*pgd); - return p4d + p4d_index(address); + /* Get the first entry of the top level table */ + rste = pgd_val(*pgd); + /* Pick up the shift from the table type of the first entry */ + shift = ((rste & _REGION_ENTRY_TYPE_MASK) >> 2) * 11 + 20; + return pgd + ((address >> shift) & (PTRS_PER_PGD - 1)); } -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address) +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { - pud_t *pud = (pud_t *) p4d; + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) + return (p4d_t *) pgd_deref(*pgd) + p4d_index(address); + return (p4d_t *) pgd; +} - if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pud = (pud_t *) p4d_deref(*p4d); - return pud + pud_index(address); +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) + return (pud_t *) p4d_deref(*p4d) + pud_index(address); + return (pud_t *) p4d; } static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { - pmd_t *pmd = (pmd_t *) pud; + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) + return (pmd_t *) pud_deref(*pud) + pmd_index(address); + return (pmd_t *) pud; +} - if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pmd = (pmd_t *) pud_deref(*pud); - return pmd + pmd_index(address); +static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) +{ + return (pte_t *) pmd_deref(*pmd) + pte_index(address); } +#define pte_offset_kernel(pmd, address) pte_offset(pmd, address) +#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) +#define pte_unmap(pte) do { } while (0) + +static inline bool gup_fast_permitted(unsigned long start, int nr_pages) +{ + unsigned long len, end; + + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (end < start) + return false; + return end <= current->mm->context.asce_limit; +} +#define gup_fast_permitted gup_fast_permitted + #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) @@ -1249,12 +1286,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) -/* Find an entry in the lowest level page table.. */ -#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) -#define pte_offset_kernel(pmd, address) pte_offset(pmd,address) -#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) -#define pte_unmap(pte) do { } while (0) - static inline pmd_t pmd_wrprotect(pmd_t pmd) { pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c index 5a286b012043..602e7cc26d11 100644 --- a/arch/s390/kernel/kexec_elf.c +++ b/arch/s390/kernel/kexec_elf.c @@ -19,10 +19,15 @@ static int kexec_file_add_elf_kernel(struct kimage *image, struct kexec_buf buf; const Elf_Ehdr *ehdr; const Elf_Phdr *phdr; + Elf_Addr entry; int i, ret; ehdr = (Elf_Ehdr *)kernel; buf.image = image; + if (image->type == KEXEC_TYPE_CRASH) + entry = STARTUP_KDUMP_OFFSET; + else + entry = ehdr->e_entry; phdr = (void *)ehdr + ehdr->e_phoff; for (i = 0; i < ehdr->e_phnum; i++, phdr++) { @@ -35,7 +40,7 @@ static int kexec_file_add_elf_kernel(struct kimage *image, buf.mem = ALIGN(phdr->p_paddr, phdr->p_align); buf.memsz = phdr->p_memsz; - if (phdr->p_paddr == 0) { + if (entry - phdr->p_paddr < phdr->p_memsz) { data->kernel_buf = buf.buffer; data->memsz += STARTUP_NORMAL_OFFSET; diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index bdddaae96559..649135cbedd5 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include static int __init nobp_setup_early(char *str) @@ -58,7 +59,7 @@ early_param("nospectre_v2", nospectre_v2_setup_early); void __init nospec_auto_detect(void) { - if (test_facility(156)) { + if (test_facility(156) || cpu_mitigations_off()) { /* * The machine supports etokens. * Disable expolines and disable nobp. diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4638303ba6a8..ee35f1112db9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -507,6 +507,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: + case KVM_CAP_MAX_VCPU_ID: r = KVM_S390_BSCA_CPU_SLOTS; if (!kvm_s390_use_sca_entries()) r = KVM_MAX_VCPUS; @@ -4412,21 +4413,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - int rc; - - /* If the basics of the memslot do not change, we do not want - * to update the gmap. Every update causes several unnecessary - * segment translation exceptions. This is usually handled just - * fine by the normal fault handler + gmap, but it will also - * cause faults on the prefix page of running guest CPUs. - */ - if (old->userspace_addr == mem->userspace_addr && - old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && - old->npages * PAGE_SIZE == mem->memory_size) - return; + int rc = 0; - rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, - mem->guest_phys_addr, mem->memory_size); + switch (change) { + case KVM_MR_DELETE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + break; + case KVM_MR_MOVE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + if (rc) + break; + /* FALLTHROUGH */ + case KVM_MR_CREATE: + rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, + mem->guest_phys_addr, mem->memory_size); + break; + case KVM_MR_FLAGS_ONLY: + break; + default: + WARN(1, "Unknown KVM MR CHANGE: %d\n", change); + } if (rc) pr_warn("failed to commit memory region\n"); return; diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index f5880bfd1b0c..3175413186b9 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -4,7 +4,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o -obj-y += page-states.o gup.o pageattr.o pgtable.o pgalloc.o +obj-y += page-states.o pageattr.o pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 11613362c4e7..5f14d0df99bf 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -83,7 +83,6 @@ static inline int notify_page_fault(struct pt_regs *regs) /* * Find out which address space caused the exception. - * Access register mode is impossible, ignore space == 3. */ static inline enum fault_type get_fault_type(struct pt_regs *regs) { @@ -108,6 +107,10 @@ static inline enum fault_type get_fault_type(struct pt_regs *regs) } return VDSO_FAULT; } + if (trans_exc_code == 1) { + /* access register mode, not used in the kernel */ + return USER_FAULT; + } /* home space exception -> access via kernel ASCE */ return KERNEL_FAULT; } diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c deleted file mode 100644 index 2809d11c7a28..000000000000 --- a/arch/s390/mm/gup.c +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Lockless get_user_pages_fast for s390 - * - * Copyright IBM Corp. 2010 - * Author(s): Martin Schwidefsky - */ -#include -#include -#include -#include -#include -#include -#include - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - struct page *head, *page; - unsigned long mask; - pte_t *ptep, pte; - - mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; - - ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); - do { - pte = *ptep; - barrier(); - /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_protnone(pte)) - return 0; - if ((pte_val(pte) & mask) != 0) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - head = compound_head(page); - if (!page_cache_get_speculative(head)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(head); - return 0; - } - VM_BUG_ON_PAGE(compound_head(page) != head, page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - struct page *head, *page; - unsigned long mask; - int refs; - - mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID; - if ((pmd_val(pmd) & mask) != 0) - return 0; - VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); - - refs = 0; - head = pmd_page(pmd); - page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - - -static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp, pmd; - - pmdp = (pmd_t *) pudp; - if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pmdp = (pmd_t *) pud_deref(pud); - pmdp += pmd_index(addr); - do { - pmd = *pmdp; - barrier(); - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (unlikely(pmd_large(pmd))) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_protnone(pmd)) - return 0; - if (!gup_huge_pmd(pmdp, pmd, addr, next, - write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmdp, pmd, addr, next, - write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - struct page *head, *page; - unsigned long mask; - int refs; - - mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID; - if ((pud_val(pud) & mask) != 0) - return 0; - VM_BUG_ON(!pfn_valid(pud_pfn(pud))); - - refs = 0; - head = pud_page(pud); - page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON_PAGE(compound_head(page) != head, page); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pud_val(pud) != pud_val(*pudp))) { - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - -static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp, pud; - - pudp = (pud_t *) p4dp; - if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pudp = (pud_t *) p4d_deref(p4d); - pudp += pud_index(addr); - do { - pud = *pudp; - barrier(); - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (unlikely(pud_large(pud))) { - if (!gup_huge_pud(pudp, pud, addr, next, write, pages, - nr)) - return 0; - } else if (!gup_pmd_range(pudp, pud, addr, next, write, pages, - nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long next; - p4d_t *p4dp, p4d; - - p4dp = (p4d_t *) pgdp; - if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) - p4dp = (p4d_t *) pgd_deref(pgd); - p4dp += p4d_index(addr); - do { - p4d = *p4dp; - barrier(); - next = p4d_addr_end(addr, end); - if (p4d_none(p4d)) - return 0; - if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr)) - return 0; - } while (p4dp++, addr = next, addr != end); - - return 1; -} - -/* - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall - * back to the regular GUP. - * Note a difference with get_user_pages_fast: this always returns the - * number of pages pinned, 0 if no pages were pinned. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next, flags; - pgd_t *pgdp, pgd; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if ((end <= start) || (end > mm->context.asce_limit)) - return 0; - /* - * local_irq_save() doesn't prevent pagetable teardown, but does - * prevent the pagetables from being freed on s390. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd = *pgdp; - barrier(); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - int nr, ret; - - might_sleep(); - start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - if (nr == nr_pages) - return nr; - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - ret = get_user_pages_unlocked(start, nr_pages - nr, pages, - write ? FOLL_WRITE : 0); - /* Have to be a bit careful with return values */ - if (nr > 0) - ret = (ret < 0) ? nr : ret + nr; - return ret; -} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 8485d6dc2754..9ebd01219812 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -410,6 +410,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, return old; } +#ifdef CONFIG_PGSTE static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; @@ -427,6 +428,7 @@ static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) pmd = pmd_alloc(mm, pud, addr); return pmd; } +#endif pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t new) diff --git a/arch/sh/include/cpu-sh4/cpu/sh7786.h b/arch/sh/include/cpu-sh4/cpu/sh7786.h index 8f9bfbf3cdb1..d6cce65b4871 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7786.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7786.h @@ -132,7 +132,7 @@ enum { static inline u32 sh7786_mm_sel(void) { - return __raw_readl(0xFC400020) & 0x7; + return __raw_readl((const volatile void __iomem *)0xFC400020) & 0x7; } #endif /* __CPU_SH7786_H__ */ diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 9a26b442f820..8e645ddac58e 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -356,6 +356,8 @@ static int get_vdev_port_node_info(struct mdesc_handle *md, u64 node, node_info->vdev_port.id = *idp; node_info->vdev_port.name = kstrdup_const(name, GFP_KERNEL); + if (!node_info->vdev_port.name) + return -1; node_info->vdev_port.parent_cfg_hdl = *parent_cfg_hdlp; return 0; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6de7c684c29f..a58ae9c42803 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -891,6 +891,10 @@ static int sparc_perf_event_set_period(struct perf_event *event, s64 period = hwc->sample_period; int ret = 0; + /* The period may have been changed by PERF_EVENT_IOC_PERIOD */ + if (unlikely(period != hwc->last_period)) + left = period - (hwc->last_period - left); + if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index d245f89d1395..d220b6848746 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -587,7 +587,7 @@ xcall_flush_tlb_kernel_range: /* 44 insns */ sub %g7, %g1, %g3 srlx %g3, 18, %g2 brnz,pn %g2, 2f - add %g2, 1, %g2 + sethi %hi(PAGE_SIZE), %g2 sub %g3, %g2, %g3 or %g1, 0x20, %g1 ! Nucleus 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP @@ -751,7 +751,7 @@ __cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ sub %g7, %g1, %g3 srlx %g3, 18, %g2 brnz,pn %g2, 2f - add %g2, 1, %g2 + sethi %hi(PAGE_SIZE), %g2 sub %g3, %g2, %g3 or %g1, 0x20, %g1 ! Nucleus 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index fca34b2177e2..9f4b4bb78120 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -22,7 +22,6 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) } extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long start, unsigned long end) { } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 052de4c8acb2..0c572a48158e 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -56,7 +56,7 @@ static int itimer_one_shot(struct clock_event_device *evt) static struct clock_event_device timer_clockevent = { .name = "posix-timer", .rating = 250, - .cpumask = cpu_all_mask, + .cpumask = cpu_possible_mask, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_state_shutdown = itimer_shutdown, diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 5c205a9cb5a6..9f06ea5466dd 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -88,7 +88,6 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, } static inline void arch_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long start, unsigned long end) { } diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 6adce15268bd..f8612b07d32f 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -116,6 +116,7 @@ config MPENTIUMM config MPENTIUM4 bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" depends on X86_32 + select X86_P6_NOP ---help--- Select this for Intel Pentium 4 chips. This includes the Pentium 4, Pentium D, P4-based Celeron and Xeon, and @@ -148,9 +149,8 @@ config MPENTIUM4 -Paxville -Dempsey - config MK6 - bool "K6/K6-II/K6-III" + bool "AMD K6/K6-II/K6-III" depends on X86_32 ---help--- Select this for an AMD K6-family processor. Enables use of @@ -158,7 +158,7 @@ config MK6 flags to GCC. config MK7 - bool "Athlon/Duron/K7" + bool "AMD Athlon/Duron/K7" depends on X86_32 ---help--- Select this for an AMD Athlon K7-family processor. Enables use of @@ -166,12 +166,83 @@ config MK7 flags to GCC. config MK8 - bool "Opteron/Athlon64/Hammer/K8" + bool "AMD Opteron/Athlon64/Hammer/K8" ---help--- Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. +config MK8SSE3 + bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" + ---help--- + Select this for improved AMD Opteron or Athlon64 Hammer-family processors. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MK10 + bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" + ---help--- + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, + Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MBARCELONA + bool "AMD Barcelona" + ---help--- + Select this for AMD Family 10h Barcelona processors. + + Enables -march=barcelona + +config MBOBCAT + bool "AMD Bobcat" + ---help--- + Select this for AMD Family 14h Bobcat processors. + + Enables -march=btver1 + +config MJAGUAR + bool "AMD Jaguar" + ---help--- + Select this for AMD Family 16h Jaguar processors. + + Enables -march=btver2 + +config MBULLDOZER + bool "AMD Bulldozer" + ---help--- + Select this for AMD Family 15h Bulldozer processors. + + Enables -march=bdver1 + +config MPILEDRIVER + bool "AMD Piledriver" + ---help--- + Select this for AMD Family 15h Piledriver processors. + + Enables -march=bdver2 + +config MSTEAMROLLER + bool "AMD Steamroller" + ---help--- + Select this for AMD Family 15h Steamroller processors. + + Enables -march=bdver3 + +config MEXCAVATOR + bool "AMD Excavator" + ---help--- + Select this for AMD Family 15h Excavator processors. + + Enables -march=bdver4 + +config MZEN + bool "AMD Zen" + ---help--- + Select this for AMD Family 17h Zen processors. + + Enables -march=znver1 + config MCRUSOE bool "Crusoe" depends on X86_32 @@ -253,6 +324,7 @@ config MVIAC7 config MPSC bool "Intel P4 / older Netburst based Xeon" + select X86_P6_NOP depends on X86_64 ---help--- Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey @@ -262,8 +334,19 @@ config MPSC using the cpu family field in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. +config MATOM + bool "Intel Atom" + select X86_P6_NOP + ---help--- + + Select this for the Intel Atom platform. Intel Atom CPUs have an + in-order pipelining architecture and thus can benefit from + accordingly optimized code. Use a recent GCC with specific Atom + support in order to fully benefit from selecting this option. + config MCORE2 - bool "Core 2/newer Xeon" + bool "Intel Core 2" + select X86_P6_NOP ---help--- Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and @@ -271,14 +354,106 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) -config MATOM - bool "Intel Atom" + Enables -march=core2 + +config MNEHALEM + bool "Intel Nehalem" + select X86_P6_NOP ---help--- - Select this for the Intel Atom platform. Intel Atom CPUs have an - in-order pipelining architecture and thus can benefit from - accordingly optimized code. Use a recent GCC with specific Atom - support in order to fully benefit from selecting this option. + Select this for 1st Gen Core processors in the Nehalem family. + + Enables -march=nehalem + +config MWESTMERE + bool "Intel Westmere" + select X86_P6_NOP + ---help--- + + Select this for the Intel Westmere formerly Nehalem-C family. + + Enables -march=westmere + +config MSILVERMONT + bool "Intel Silvermont" + select X86_P6_NOP + ---help--- + + Select this for the Intel Silvermont platform. + + Enables -march=silvermont + +config MSANDYBRIDGE + bool "Intel Sandy Bridge" + select X86_P6_NOP + ---help--- + + Select this for 2nd Gen Core processors in the Sandy Bridge family. + + Enables -march=sandybridge + +config MIVYBRIDGE + bool "Intel Ivy Bridge" + select X86_P6_NOP + ---help--- + + Select this for 3rd Gen Core processors in the Ivy Bridge family. + + Enables -march=ivybridge + +config MHASWELL + bool "Intel Haswell" + select X86_P6_NOP + ---help--- + + Select this for 4th Gen Core processors in the Haswell family. + + Enables -march=haswell + +config MBROADWELL + bool "Intel Broadwell" + select X86_P6_NOP + ---help--- + + Select this for 5th Gen Core processors in the Broadwell family. + + Enables -march=broadwell + +config MSKYLAKE + bool "Intel Skylake" + select X86_P6_NOP + ---help--- + + Select this for 6th Gen Core processors in the Skylake family. + + Enables -march=skylake + +config MSKYLAKEX + bool "Intel Skylake X" + select X86_P6_NOP + ---help--- + + Select this for 6th Gen Core processors in the Skylake X family. + + Enables -march=skylake-avx512 + +config MCANNONLAKE + bool "Intel Cannon Lake" + select X86_P6_NOP + ---help--- + + Select this for 8th Gen Core processors + + Enables -march=cannonlake + +config MICELAKE + bool "Intel Ice Lake" + select X86_P6_NOP + ---help--- + + Select this for 8th Gen Core processors in the Ice Lake family. + + Enables -march=icelake config GENERIC_CPU bool "Generic-x86-64" @@ -287,6 +462,19 @@ config GENERIC_CPU Generic x86-64 CPU. Run equally well on all x86-64 CPUs. +config MNATIVE + bool "Native optimizations autodetected by GCC" + ---help--- + + GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. -march=native + also detects and applies additional settings beyond -march specific + to your CPU, (eg. -msse4). Unless you have a specific reason not to + (e.g. distcc cross-compiling), you should probably be using + -march=native rather than anything listed below. + + Enables -march=native + endchoice config X86_GENERIC @@ -311,7 +499,7 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU default "4" if MELAN || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX @@ -329,35 +517,36 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MNATIVE config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MATOM || MNATIVE config X86_USE_3DNOW def_bool y depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML -# -# P6_NOPs are a relatively minor optimization that require a family >= -# 6 processor, except that it is broken on certain VIA chips. -# Furthermore, AMD chips prefer a totally different sequence of NOPs -# (which work on all CPUs). In addition, it looks like Virtual PC -# does not understand them. -# -# As a result, disallow these if we're not compiling for X86_64 (these -# NOPs do work on all x86-64 capable chips); the list of processors in -# the right-hand clause are the cores that benefit from this optimization. -# config X86_P6_NOP - def_bool y - depends on X86_64 - depends on (MCORE2 || MPENTIUM4 || MPSC) + default n + bool "Support for P6_NOPs on Intel chips" + depends on (MCORE2 || MPENTIUM4 || MPSC || MATOM || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MNATIVE) + ---help--- + P6_NOPs are a relatively minor optimization that require a family >= + 6 processor, except that it is broken on certain VIA chips. + Furthermore, AMD chips prefer a totally different sequence of NOPs + (which work on all CPUs). In addition, it looks like Virtual PC + does not understand them. + + As a result, disallow these if we're not compiling for X86_64 (these + NOPs do work on all x86-64 capable chips); the list of processors in + the right-hand clause are the cores that benefit from this optimization. + + Say Y if you have Intel CPU newer than Pentium Pro, N otherwise. config X86_TSC def_bool y - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MNATIVE || MATOM) || X86_64 config X86_CMPXCHG64 def_bool y @@ -367,7 +556,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a587805c6687..29eb64851528 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -47,7 +47,7 @@ export REALMODE_CFLAGS export BITS ifdef CONFIG_X86_NEED_RELOCS - LDFLAGS_vmlinux := --emit-relocs + LDFLAGS_vmlinux := --emit-relocs --discard-none endif # @@ -118,13 +118,46 @@ else KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) + cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) + cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8) + cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10) + cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona) + cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1) + cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) + cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1) + cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2) + cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3) + cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4) + cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) cflags-$(CONFIG_MCORE2) += \ - $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) - cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) + $(call cc-option,-march=core2,$(call cc-option,-mtune=core2)) + cflags-$(CONFIG_MNEHALEM) += \ + $(call cc-option,-march=nehalem,$(call cc-option,-mtune=nehalem)) + cflags-$(CONFIG_MWESTMERE) += \ + $(call cc-option,-march=westmere,$(call cc-option,-mtune=westmere)) + cflags-$(CONFIG_MSILVERMONT) += \ + $(call cc-option,-march=silvermont,$(call cc-option,-mtune=silvermont)) + cflags-$(CONFIG_MSANDYBRIDGE) += \ + $(call cc-option,-march=sandybridge,$(call cc-option,-mtune=sandybridge)) + cflags-$(CONFIG_MIVYBRIDGE) += \ + $(call cc-option,-march=ivybridge,$(call cc-option,-mtune=ivybridge)) + cflags-$(CONFIG_MHASWELL) += \ + $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell)) + cflags-$(CONFIG_MBROADWELL) += \ + $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell)) + cflags-$(CONFIG_MSKYLAKE) += \ + $(call cc-option,-march=skylake,$(call cc-option,-mtune=skylake)) + cflags-$(CONFIG_MSKYLAKEX) += \ + $(call cc-option,-march=skylake-avx512,$(call cc-option,-mtune=skylake-avx512)) + cflags-$(CONFIG_MCANNONLAKE) += \ + $(call cc-option,-march=cannonlake,$(call cc-option,-mtune=cannonlake)) + cflags-$(CONFIG_MICELAKE) += \ + $(call cc-option,-march=icelake,$(call cc-option,-mtune=icelake)) + cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \ + $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic)) cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 1f5faf8606b4..14a6d19995cc 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -23,7 +23,18 @@ cflags-$(CONFIG_MK6) += -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. cflags-$(CONFIG_MK7) += -march=athlon +cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) +cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-march=athlon) +cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon) +cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon) +cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon) +cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon) +cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon) +cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon) +cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3,-march=athlon) +cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4,-march=athlon) +cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1,-march=athlon) cflags-$(CONFIG_MCRUSOE) += -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0 cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0 cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) @@ -32,8 +43,19 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) -falign-fu cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_MVIAC7) += -march=i686 cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) -cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) +cflags-$(CONFIG_MNEHALEM) += -march=i686 $(call tune,nehalem) +cflags-$(CONFIG_MWESTMERE) += -march=i686 $(call tune,westmere) +cflags-$(CONFIG_MSILVERMONT) += -march=i686 $(call tune,silvermont) +cflags-$(CONFIG_MSANDYBRIDGE) += -march=i686 $(call tune,sandybridge) +cflags-$(CONFIG_MIVYBRIDGE) += -march=i686 $(call tune,ivybridge) +cflags-$(CONFIG_MHASWELL) += -march=i686 $(call tune,haswell) +cflags-$(CONFIG_MBROADWELL) += -march=i686 $(call tune,broadwell) +cflags-$(CONFIG_MSKYLAKE) += -march=i686 $(call tune,skylake) +cflags-$(CONFIG_MSKYLAKEX) += -march=i686 $(call tune,skylake-avx512) +cflags-$(CONFIG_MCANNONLAKE) += -march=i686 $(call tune,cannonlake) +cflags-$(CONFIG_MICELAKE) += -march=i686 $(call tune,icelake) +cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \ + $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic)) # AMD Elan support cflags-$(CONFIG_MELAN) += -march=i486 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index 0e785c0b2354..b67cb4c7b2f6 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -70,15 +70,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out) return 0; } -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) +static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { if (len >= 16 && irq_fpu_usable()) { kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); + *(__u16 *)out = crc_t10dif_pcl(crc, data, len); kernel_fpu_end(); } else - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } @@ -87,15 +86,13 @@ static int chksum_finup(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - return __chksum_finup(&ctx->crc, data, len, out); + return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); + return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7bc105f47d21..19f650d729f5 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -31,6 +31,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -212,6 +213,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) #endif user_enter_irqoff(); + + mds_user_clear_cpu_buffers(); } #define SYSCALL_EXIT_WORK_FLAGS \ diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index d309f30cf7af..5fc76b755510 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -650,6 +650,7 @@ ENTRY(__switch_to_asm) pushl %ebx pushl %edi pushl %esi + pushfl /* switch stack */ movl %esp, TASK_threadsp(%eax) @@ -672,6 +673,7 @@ ENTRY(__switch_to_asm) #endif /* restore callee-saved registers */ + popfl popl %esi popl %edi popl %ebx diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1f0efdb7b629..b1d59a7c556e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -291,6 +291,7 @@ ENTRY(__switch_to_asm) pushq %r13 pushq %r14 pushq %r15 + pushfq /* switch stack */ movq %rsp, TASK_threadsp(%rdi) @@ -313,6 +314,7 @@ ENTRY(__switch_to_asm) #endif /* restore callee-saved registers */ + popfq popq %r15 popq %r14 popq %r13 @@ -879,7 +881,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -899,6 +901,20 @@ ENTRY(\sym) jnz .Lfrom_usermode_switch_stack_\@ .endif + .if \create_gap == 1 + /* + * If coming from kernel space, create a 6-word gap to allow the + * int3 handler to emulate a call instruction. + */ + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_no_gap_\@ + .rept 6 + pushq 5*8(%rsp) + .endr + UNWIND_HINT_IRET_REGS offset=8 +.Lfrom_usermode_no_gap_\@: + .endif + .if \paranoid call paranoid_entry .else @@ -1130,7 +1146,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 +idtentry int3 do_int3 has_error_code=0 create_gap=1 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN_PV diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 98c7d12b945c..ed7ddee1ae69 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -128,13 +128,24 @@ notrace static inline u64 vgetcyc(int mode) { if (mode == VCLOCK_TSC) return (u64)rdtsc_ordered(); + + /* + * For any memory-mapped vclock type, we need to make sure that gcc + * doesn't cleverly hoist a load before the mode check. Otherwise we + * might end up touching the memory-mapped page even if the vclock in + * question isn't enabled, which will segfault. Hence the barriers. + */ #ifdef CONFIG_PARAVIRT_CLOCK - else if (mode == VCLOCK_PVCLOCK) + if (mode == VCLOCK_PVCLOCK) { + barrier(); return vread_pvclock(); + } #endif #ifdef CONFIG_HYPERV_TSCPAGE - else if (mode == VCLOCK_HVCLOCK) + if (mode == VCLOCK_HVCLOCK) { + barrier(); return vread_hvclock(); + } #endif return U64_MAX; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index d35f4775d5f1..82dad001d1ea 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3189,7 +3189,7 @@ static int intel_pmu_hw_config(struct perf_event *event) return ret; if (event->attr.precise_ip) { - if (!(event->attr.freq || event->attr.wakeup_events)) { + if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event))) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index d41de9af7a39..6072f92cb8ea 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -578,6 +578,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 10c99ce1fead..b71adf603b86 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -684,7 +684,7 @@ struct event_constraint intel_core2_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01), EVENT_CONSTRAINT_END }; @@ -693,7 +693,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01), /* Allow all events as PEBS with no flags */ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), EVENT_CONSTRAINT_END @@ -701,7 +701,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { struct event_constraint intel_slm_pebs_event_constraints[] = { /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1), /* Allow all events as PEBS with no flags */ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), EVENT_CONSTRAINT_END @@ -726,7 +726,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), EVENT_CONSTRAINT_END }; @@ -743,7 +743,7 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = { INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), EVENT_CONSTRAINT_END }; @@ -752,7 +752,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -767,9 +767,9 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -783,9 +783,9 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ @@ -806,9 +806,9 @@ struct event_constraint intel_bdw_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ @@ -829,9 +829,9 @@ struct event_constraint intel_bdw_pebs_event_constraints[] = { struct event_constraint intel_skl_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 94dc564146ca..37ebf6fc5415 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -775,6 +775,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init), + + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init), {}, }; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index a878e6286e4a..f3f4c2263501 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -89,6 +89,7 @@ static bool test_intel(int idx) case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: + case INTEL_FAM6_ICELAKE_MOBILE: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 321fe5f5d0e9..4d5fcd47ab75 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -61,9 +61,8 @@ } while (0) #define RELOAD_SEG(seg) { \ - unsigned int pre = GET_SEG(seg); \ + unsigned int pre = (seg) | 3; \ unsigned int cur = get_user_seg(seg); \ - pre |= 3; \ if (pre != cur) \ set_user_seg(seg, pre); \ } @@ -72,6 +71,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_32 __user *sc) { unsigned int tmpflags, err = 0; + u16 gs, fs, es, ds; void __user *buf; u32 tmp; @@ -79,16 +79,10 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, current->restart_block.fn = do_no_restart_syscall; get_user_try { - /* - * Reload fs and gs if they have changed in the signal - * handler. This does not handle long fs/gs base changes in - * the handler, but does not clobber them at least in the - * normal case. - */ - RELOAD_SEG(gs); - RELOAD_SEG(fs); - RELOAD_SEG(ds); - RELOAD_SEG(es); + gs = GET_SEG(gs); + fs = GET_SEG(fs); + ds = GET_SEG(ds); + es = GET_SEG(es); COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); COPY(ax); @@ -106,6 +100,17 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, buf = compat_ptr(tmp); } get_user_catch(err); + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ + RELOAD_SEG(gs); + RELOAD_SEG(fs); + RELOAD_SEG(ds); + RELOAD_SEG(es); + err |= fpu__restore_sig(buf, 1); force_iret(); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 981ff9479648..75f27ee2c263 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -344,6 +344,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ @@ -382,5 +383,7 @@ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 058e40fed167..8a0e56e1dcc9 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -6,6 +6,8 @@ #ifndef __ASSEMBLY__ +#include + /* Provide __cpuidle; we can't safely include */ #define __cpuidle __attribute__((__section__(".cpuidle.text"))) @@ -54,11 +56,13 @@ static inline void native_irq_enable(void) static inline __cpuidle void native_safe_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 19d18fae6ec6..41019af68adf 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -277,8 +277,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, mpx_mm_init(mm); } -static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void arch_unmap(struct mm_struct *mm, unsigned long start, + unsigned long end) { /* * mpx_notify_unmap() goes and reads a rarely-hot @@ -298,7 +298,7 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, * consistently wrong. */ if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) - mpx_notify_unmap(mm, vma, start, end); + mpx_notify_unmap(mm, start, end); } /* diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 7948a17febb4..44b776297dc3 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -25,6 +25,30 @@ struct mod_arch_specific { #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 #define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MNATIVE +#define MODULE_PROC_FAMILY "NATIVE " +#elif defined CONFIG_MNEHALEM +#define MODULE_PROC_FAMILY "NEHALEM " +#elif defined CONFIG_MWESTMERE +#define MODULE_PROC_FAMILY "WESTMERE " +#elif defined CONFIG_MSILVERMONT +#define MODULE_PROC_FAMILY "SILVERMONT " +#elif defined CONFIG_MSANDYBRIDGE +#define MODULE_PROC_FAMILY "SANDYBRIDGE " +#elif defined CONFIG_MIVYBRIDGE +#define MODULE_PROC_FAMILY "IVYBRIDGE " +#elif defined CONFIG_MHASWELL +#define MODULE_PROC_FAMILY "HASWELL " +#elif defined CONFIG_MBROADWELL +#define MODULE_PROC_FAMILY "BROADWELL " +#elif defined CONFIG_MSKYLAKE +#define MODULE_PROC_FAMILY "SKYLAKE " +#elif defined CONFIG_MSKYLAKEX +#define MODULE_PROC_FAMILY "SKYLAKEX " +#elif defined CONFIG_MCANNONLAKE +#define MODULE_PROC_FAMILY "CANNONLAKE " +#elif defined CONFIG_MICELAKE +#define MODULE_PROC_FAMILY "ICELAKE " #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 @@ -43,6 +67,26 @@ struct mod_arch_specific { #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MK8SSE3 +#define MODULE_PROC_FAMILY "K8SSE3 " +#elif defined CONFIG_MK10 +#define MODULE_PROC_FAMILY "K10 " +#elif defined CONFIG_MBARCELONA +#define MODULE_PROC_FAMILY "BARCELONA " +#elif defined CONFIG_MBOBCAT +#define MODULE_PROC_FAMILY "BOBCAT " +#elif defined CONFIG_MBULLDOZER +#define MODULE_PROC_FAMILY "BULLDOZER " +#elif defined CONFIG_MPILEDRIVER +#define MODULE_PROC_FAMILY "PILEDRIVER " +#elif defined CONFIG_MSTEAMROLLER +#define MODULE_PROC_FAMILY "STEAMROLLER " +#elif defined CONFIG_MJAGUAR +#define MODULE_PROC_FAMILY "JAGUAR " +#elif defined CONFIG_MEXCAVATOR +#define MODULE_PROC_FAMILY "EXCAVATOR " +#elif defined CONFIG_MZEN +#define MODULE_PROC_FAMILY "ZEN " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index d0b1434fb0b6..143a5c193ed3 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -64,12 +64,15 @@ struct mpx_fault_info { }; #ifdef CONFIG_X86_INTEL_MPX -int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); -int mpx_handle_bd_fault(void); + +extern int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); +extern int mpx_handle_bd_fault(void); + static inline int kernel_managing_mpx_tables(struct mm_struct *mm) { return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR); } + static inline void mpx_mm_init(struct mm_struct *mm) { /* @@ -78,11 +81,10 @@ static inline void mpx_mm_init(struct mm_struct *mm) */ mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; } -void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long start, unsigned long end); -unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, - unsigned long flags); +extern void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, unsigned long end); +extern unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags); + #else static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) { @@ -100,7 +102,6 @@ static inline void mpx_mm_init(struct mm_struct *mm) { } static inline void mpx_notify_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long start, unsigned long end) { } diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ca5bc0eacb95..20f7da552e90 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H +#include + /* * CPU model specific register (MSR) numbers. * @@ -40,14 +42,14 @@ /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -69,20 +71,25 @@ #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ -#define ARCH_CAP_SSB_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass - * attack, so no Speculative Store Bypass - * control required. - */ +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b -#define L1D_FLUSH (1 << 0) /* - * Writeback and invalidate the - * L1 data cache. - */ +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 39a2fb29378a..eb0f80ce8524 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -6,6 +6,7 @@ #include #include +#include #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf @@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __mwaitx(unsigned long eax, unsigned long ebx, unsigned long ecx) { + /* No MDS buffer clear as this is AMD/HYGON only */ + /* "mwaitx %eax, %ebx, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xfb;" :: "a" (eax), "b" (ebx), "c" (ecx)); @@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + trace_hardirqs_on(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index dad12b767ba0..4e970390110f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -318,6 +318,56 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +DECLARE_STATIC_KEY_FALSE(mds_user_clear); +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); + +#include + +/** + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the + * instruction is executed. + */ +static inline void mds_clear_cpu_buffers(void) +{ + static const u16 ds = __KERNEL_DS; + + /* + * Has to be the memory-operand variant because only that + * guarantees the CPU buffer flush functionality according to + * documentation. The register-operand variant does not. + * Works with any segment selector, but a valid writable + * data segment is the fastest variant. + * + * "cc" clobber is required because VERW modifies ZF. + */ + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); +} + +/** + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_user_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_user_clear)) + mds_clear_cpu_buffers(); +} + +/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_idle_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_idle_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2bb3a648fc12..31e9895db75e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -991,4 +991,10 @@ enum l1tf_mitigations { extern enum l1tf_mitigations l1tf_mitigation; +enum mds_mitigations { + MDS_MITIGATION_OFF, + MDS_MITIGATION_FULL, + MDS_MITIGATION_VMWERV, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 7cf1a270d891..157149d4129c 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -40,6 +40,7 @@ asmlinkage void ret_from_fork(void); * order of the fields must match the code in __switch_to_asm(). */ struct inactive_task_frame { + unsigned long flags; #ifdef CONFIG_X86_64 unsigned long r15; unsigned long r14; diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index e85ff65c43c3..0bbb07eaed6b 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -39,4 +39,34 @@ extern int poke_int3_handler(struct pt_regs *regs); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); extern int after_bootmem; +#ifndef CONFIG_UML_X86 +static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + +#define INT3_INSN_SIZE 1 +#define CALL_INSN_SIZE 5 + +#ifdef CONFIG_X86_64 +static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) +{ + /* + * The int3 handler in entry_64.S adds a gap between the + * stack where the break point happened, and the saving of + * pt_regs. We can extend the original stack because of + * this gap. See the idtentry macro's create_gap option. + */ + regs->sp -= sizeof(unsigned long); + *(unsigned long *)regs->sp = val; +} + +static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) +{ + int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); + int3_emulate_jmp(regs, func); +} +#endif /* CONFIG_X86_64 */ +#endif /* !CONFIG_UML_X86 */ + #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 1954dd5552a2..3822cc8ac9d6 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -427,10 +427,11 @@ do { \ ({ \ __label__ __pu_label; \ int __pu_err = -EFAULT; \ - __typeof__(*(ptr)) __pu_val; \ - __pu_val = x; \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(ptr) __pu_ptr = (ptr); \ + __typeof__(size) __pu_size = (size); \ __uaccess_begin(); \ - __put_user_size(__pu_val, (ptr), (size), __pu_label); \ + __put_user_size(__pu_val, __pu_ptr, __pu_size, __pu_label); \ __pu_err = 0; \ __pu_label: \ __uaccess_end(); \ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9a79c7808f9c..d7df79fc448c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -667,15 +667,29 @@ void __init alternative_instructions(void) * handlers seeing an inconsistent instruction while you patch. */ void *__init_or_module text_poke_early(void *addr, const void *opcode, - size_t len) + size_t len) { unsigned long flags; - local_irq_save(flags); - memcpy(addr, opcode, len); - local_irq_restore(flags); - sync_core(); - /* Could also do a CLFLUSH here to speed up CPU recovery; but - that causes hangs on some VIA CPUs. */ + + if (boot_cpu_has(X86_FEATURE_NX) && + is_module_text_address((unsigned long)addr)) { + /* + * Modules text is marked initially as non-executable, so the + * code cannot be running and speculative code-fetches are + * prevented. Just change the code. + */ + memcpy(addr, opcode, len); + } else { + local_irq_save(flags); + memcpy(addr, opcode, len); + local_irq_restore(flags); + sync_core(); + + /* + * Could also do a CLFLUSH here to speed up CPU recovery; but + * that causes hangs on some VIA CPUs. + */ + } return addr; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 01004bfb1a1b..524709dcf749 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -820,8 +820,11 @@ static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); - /* Fix erratum 1076: CPB feature bit not being set in CPUID. */ - if (!cpu_has(c, X86_FEATURE_CPB)) + /* + * Fix erratum 1076: CPB feature bit not being set in CPUID. + * Always set it, except when running under a hypervisor. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) set_cpu_cap(c, X86_FEATURE_CPB); } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b91b3bfa5cfb..03b4cc0ec3a7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -37,6 +37,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); +static void __init mds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -63,6 +64,13 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +/* Control MDS CPU buffer clear before returning to user space */ +DEFINE_STATIC_KEY_FALSE(mds_user_clear); +EXPORT_SYMBOL_GPL(mds_user_clear); +/* Control MDS CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(mds_idle_clear); +EXPORT_SYMBOL_GPL(mds_idle_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -101,6 +109,10 @@ void __init check_bugs(void) l1tf_select_mitigation(); + mds_select_mitigation(); + + arch_smt_update(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -206,6 +218,61 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "MDS: " fmt + +/* Default mitigation for MDS-affected CPUs */ +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; +static bool mds_nosmt __ro_after_init = false; + +static const char * const mds_strings[] = { + [MDS_MITIGATION_OFF] = "Vulnerable", + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode", +}; + +static void __init mds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { + mds_mitigation = MDS_MITIGATION_OFF; + return; + } + + if (mds_mitigation == MDS_MITIGATION_FULL) { + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) + mds_mitigation = MDS_MITIGATION_VMWERV; + + static_branch_enable(&mds_user_clear); + + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && + (mds_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + } + + pr_info("%s\n", mds_strings[mds_mitigation]); +} + +static int __init mds_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + mds_mitigation = MDS_MITIGATION_OFF; + else if (!strcmp(str, "full")) + mds_mitigation = MDS_MITIGATION_FULL; + else if (!strcmp(str, "full,nosmt")) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_nosmt = true; + } + + return 0; +} +early_param("mds", mds_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -440,7 +507,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) char arg[20]; int ret, i; - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") || + cpu_mitigations_off()) return SPECTRE_V2_CMD_NONE; ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); @@ -574,9 +642,6 @@ static void __init spectre_v2_select_mitigation(void) /* Set up IBPB and STIBP depending on the general spectre V2 command */ spectre_v2_user_select_mitigation(cmd); - - /* Enable STIBP if appropriate */ - arch_smt_update(); } static void update_stibp_msr(void * __unused) @@ -610,6 +675,31 @@ static void update_indir_branch_cond(void) static_branch_disable(&switch_to_cond_stibp); } +#undef pr_fmt +#define pr_fmt(fmt) fmt + +/* Update the static key controlling the MDS CPU buffer clear in idle */ +static void update_mds_branch_idle(void) +{ + /* + * Enable the idle clearing if SMT is active on CPUs which are + * affected only by MSBDS and not any other MDS variant. + * + * The other variants cannot be mitigated when SMT is enabled, so + * clearing the buffers on idle just to prevent the Store Buffer + * repartitioning leak would be a window dressing exercise. + */ + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) + return; + + if (sched_smt_active()) + static_branch_enable(&mds_idle_clear); + else + static_branch_disable(&mds_idle_clear); +} + +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -631,6 +721,17 @@ void arch_smt_update(void) break; } + switch (mds_mitigation) { + case MDS_MITIGATION_FULL: + case MDS_MITIGATION_VMWERV: + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + pr_warn_once(MDS_MSG_SMT); + update_mds_branch_idle(); + break; + case MDS_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -672,7 +773,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) char arg[20]; int ret, i; - if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") || + cpu_mitigations_off()) { return SPEC_STORE_BYPASS_CMD_NONE; } else { ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", @@ -1008,6 +1110,11 @@ static void __init l1tf_select_mitigation(void) if (!boot_cpu_has_bug(X86_BUG_L1TF)) return; + if (cpu_mitigations_off()) + l1tf_mitigation = L1TF_MITIGATION_OFF; + else if (cpu_mitigations_auto_nosmt()) + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; + override_cache_bits(&boot_cpu_data); switch (l1tf_mitigation) { @@ -1036,7 +1143,7 @@ static void __init l1tf_select_mitigation(void) pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", half_pa); pr_info("However, doing so will make a part of your RAM unusable.\n"); - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n"); return; } @@ -1069,6 +1176,7 @@ static int __init l1tf_cmdline(char *str) early_param("l1tf", l1tf_cmdline); #undef pr_fmt +#define pr_fmt(fmt) fmt #ifdef CONFIG_SYSFS @@ -1107,6 +1215,23 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static ssize_t mds_show_state(char *buf) +{ + if (!hypervisor_is_type(X86_HYPER_NATIVE)) { + return sprintf(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); + } + + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); + } + + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) @@ -1173,6 +1298,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) return l1tf_show_state(buf); break; + + case X86_BUG_MDS: + return mds_show_state(buf); + default: break; } @@ -1204,4 +1333,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b { return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); } + +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb28e98a0659..132a63dc5a76 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -948,61 +948,77 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initconst struct x86_cpu_id cpu_no_speculation[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY }, - { X86_VENDOR_CENTAUR, 5 }, - { X86_VENDOR_INTEL, 5 }, - { X86_VENDOR_NSC, 5 }, - { X86_VENDOR_ANY, 4 }, +#define NO_SPECULATION BIT(0) +#define NO_MELTDOWN BIT(1) +#define NO_SSB BIT(2) +#define NO_L1TF BIT(3) +#define NO_MDS BIT(4) +#define MSBDS_ONLY BIT(5) + +#define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } + +#define VULNWL_INTEL(model, whitelist) \ + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) + +#define VULNWL_AMD(family, whitelist) \ + VULNWL(AMD, family, X86_MODEL_ANY, whitelist) + +#define VULNWL_HYGON(family, whitelist) \ + VULNWL(HYGON, family, X86_MODEL_ANY, whitelist) + +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + + /* Intel Family 6 */ + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), + + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), + + VULNWL_INTEL(CORE_YONAH, NO_SSB), + + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), + + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), + + /* AMD Family 0xf - 0x12 */ + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), {} }; -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { - { X86_VENDOR_AMD }, - { X86_VENDOR_HYGON }, - {} -}; - -/* Only list CPUs which speculate but are non susceptible to SSB */ -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - { X86_VENDOR_AMD, 0x12, }, - { X86_VENDOR_AMD, 0x11, }, - { X86_VENDOR_AMD, 0x10, }, - { X86_VENDOR_AMD, 0xf, }, - {} -}; +static bool __init cpu_matches(unsigned long which) +{ + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { - /* in addition to cpu_no_speculation */ - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - {} -}; + return m && !!(m->driver_data & which); +} static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (x86_match_cpu(cpu_no_speculation)) + if (cpu_matches(NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); @@ -1011,15 +1027,20 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (x86_match_cpu(cpu_no_meltdown)) + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { + setup_force_cpu_bug(X86_BUG_MDS); + if (cpu_matches(MSBDS_ONLY)) + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); + } + + if (cpu_matches(NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -1028,7 +1049,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (x86_match_cpu(cpu_no_l1tf)) + if (cpu_matches(NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index cf25405444ab..415621ddb8a2 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -19,6 +19,8 @@ #include "cpu.h" +#define APICID_SOCKET_ID_BIT 6 + /* * nodes_per_socket: Stores the number of nodes per socket. * Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8] @@ -87,6 +89,9 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) if (!err) c->x86_coreid_bits = get_count_order(c->x86_max_cores); + /* Socket ID is ApicId[6] for these processors. */ + c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; + cacheinfo_hygon_init_llc_id(c, cpu, node_id); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index e64de5149e50..d904aafe6409 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -563,33 +563,59 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, return offset; } +bool amd_filter_mce(struct mce *m) +{ + enum smca_bank_types bank_type = smca_get_bank_type(m->bank); + struct cpuinfo_x86 *c = &boot_cpu_data; + u8 xec = (m->status >> 16) & 0x3F; + + /* See Family 17h Models 10h-2Fh Erratum #1114. */ + if (c->x86 == 0x17 && + c->x86_model >= 0x10 && c->x86_model <= 0x2F && + bank_type == SMCA_IF && xec == 10) + return true; + + return false; +} + /* - * Turn off MC4_MISC thresholding banks on all family 0x15 models since - * they're not supported there. + * Turn off thresholding banks for the following conditions: + * - MC4_MISC thresholding is not supported on Family 0x15. + * - Prevent possible spurious interrupts from the IF bank on Family 0x17 + * Models 0x10-0x2F due to Erratum #1114. */ -void disable_err_thresholding(struct cpuinfo_x86 *c) +void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) { - int i; + int i, num_msrs; u64 hwcr; bool need_toggle; - u32 msrs[] = { - 0x00000413, /* MC4_MISC0 */ - 0xc0000408, /* MC4_MISC1 */ - }; + u32 msrs[NR_BLOCKS]; + + if (c->x86 == 0x15 && bank == 4) { + msrs[0] = 0x00000413; /* MC4_MISC0 */ + msrs[1] = 0xc0000408; /* MC4_MISC1 */ + num_msrs = 2; + } else if (c->x86 == 0x17 && + (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) { - if (c->x86 != 0x15) + if (smca_get_bank_type(bank) != SMCA_IF) + return; + + msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank); + num_msrs = 1; + } else { return; + } rdmsrl(MSR_K7_HWCR, hwcr); /* McStatusWrEn has to be set */ need_toggle = !(hwcr & BIT(18)); - if (need_toggle) wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); /* Clear CntP bit safely */ - for (i = 0; i < ARRAY_SIZE(msrs); i++) + for (i = 0; i < num_msrs; i++) msr_clear_bit(msrs[i], 62); /* restore old settings */ @@ -604,12 +630,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int bank, block, cpu = smp_processor_id(); int offset = -1; - disable_err_thresholding(c); - for (bank = 0; bank < mca_cfg.banks; ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); + disable_err_thresholding(c, bank); + for (block = 0; block < NR_BLOCKS; ++block) { address = get_block_address(address, low, high, bank, block); if (!address) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b7fb541a4873..9e6a94c208e0 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -712,19 +712,49 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) barrier(); m.status = mce_rdmsrl(msr_ops.status(i)); + + /* If this entry is not valid, ignore it */ if (!(m.status & MCI_STATUS_VAL)) continue; /* - * Uncorrected or signalled events are handled by the exception - * handler when it is enabled, so don't process those here. - * - * TBD do the same check for MCI_STATUS_EN here? + * If we are logging everything (at CPU online) or this + * is a corrected error, then we must log it. */ - if (!(flags & MCP_UC) && - (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) - continue; + if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC)) + goto log_it; + + /* + * Newer Intel systems that support software error + * recovery need to make additional checks. Other + * CPUs should skip over uncorrected errors, but log + * everything else. + */ + if (!mca_cfg.ser) { + if (m.status & MCI_STATUS_UC) + continue; + goto log_it; + } + + /* Log "not enabled" (speculative) errors */ + if (!(m.status & MCI_STATUS_EN)) + goto log_it; + + /* + * Log UCNA (SDM: 15.6.3 "UCR Error Classification") + * UC == 1 && PCC == 0 && S == 0 + */ + if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S)) + goto log_it; + /* + * Skip anything else. Presumption is that our read of this + * bank is racing with a machine check. Leave the log alone + * for do_machine_check() to deal with it. + */ + continue; + +log_it: error_seen = true; mce_read_aux(&m, i); @@ -1451,13 +1481,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); static int __mcheck_cpu_mce_banks_init(void) { int i; - u8 num_banks = mca_cfg.banks; - mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; - for (i = 0; i < num_banks; i++) { + for (i = 0; i < MAX_NR_BANKS; i++) { struct mce_bank *b = &mce_banks[i]; b->ctl = -1ULL; @@ -1471,28 +1500,19 @@ static int __mcheck_cpu_mce_banks_init(void) */ static int __mcheck_cpu_cap_init(void) { - unsigned b; u64 cap; + u8 b; rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (!mca_cfg.banks) - pr_info("CPU supports %d MCE banks\n", b); - - if (b > MAX_NR_BANKS) { - pr_warn("Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); + if (WARN_ON_ONCE(b > MAX_NR_BANKS)) b = MAX_NR_BANKS; - } - /* Don't support asymmetric configurations today */ - WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); - mca_cfg.banks = b; + mca_cfg.banks = max(mca_cfg.banks, b); if (!mce_banks) { int err = __mcheck_cpu_mce_banks_init(); - if (err) return err; } @@ -1771,6 +1791,14 @@ static void __mcheck_cpu_init_timer(void) mce_start_timer(t); } +bool filter_mce(struct mce *m) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return amd_filter_mce(m); + + return false; +} + /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { @@ -2451,6 +2479,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { + pr_info("Using %d MCE banks\n", mca_cfg.banks); + if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index 3395549c51d3..64d1d5a00f39 100644 --- a/arch/x86/kernel/cpu/mce/genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c @@ -99,6 +99,9 @@ int mce_gen_pool_add(struct mce *mce) { struct mce_evt_llist *node; + if (filter_mce(mce)) + return -EINVAL; + if (!mce_evt_pool) return -EINVAL; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 8492ef7d9015..3f82afd0f46f 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -46,8 +46,6 @@ static struct mce i_mce; static struct dentry *dfs_inj; -static u8 n_banks; - #define MAX_FLAG_OPT_SIZE 4 #define NBCFG 0x44 @@ -570,9 +568,15 @@ static void do_inject(void) static int inj_bank_set(void *data, u64 val) { struct mce *m = (struct mce *)data; + u8 n_banks; + u64 cap; + + /* Get bank count on target CPU so we can handle non-uniform values. */ + rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap); + n_banks = cap & MCG_BANKCNT_MASK; if (val >= n_banks) { - pr_err("Non-existent MCE bank: %llu\n", val); + pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu); return -EINVAL; } @@ -665,10 +669,6 @@ static struct dfs_node { static int __init debugfs_init(void) { unsigned int i; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - n_banks = cap & MCG_BANKCNT_MASK; dfs_inj = debugfs_create_dir("mce-inject", NULL); if (!dfs_inj) diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index af5eab1e65e2..a34b55baa7aa 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -173,4 +173,13 @@ struct mca_msr_regs { extern struct mca_msr_regs msr_ops; +/* Decide whether to add MCE record to MCE event pool or filter it out. */ +extern bool filter_mce(struct mce *m); + +#ifdef CONFIG_X86_MCE_AMD +extern bool amd_filter_mce(struct mce *m); +#else +static inline bool amd_filter_mce(struct mce *m) { return false; }; +#endif + #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 5260185cbf7b..f53658dde639 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -418,8 +418,9 @@ static int do_microcode_update(const void __user *buf, size_t size) if (ustate == UCODE_ERROR) { error = -1; break; - } else if (ustate == UCODE_OK) + } else if (ustate == UCODE_NEW) { apply_microcode_on_target(cpu); + } } return error; @@ -875,7 +876,7 @@ int __init microcode_init(void) goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", + cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 1573a0a6b525..ff6e8e561405 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -368,6 +368,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) struct list_head *head; struct rdtgroup *entry; + if (!is_mbm_local_enabled()) + return; + r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; closid = rgrp->closid; rmid = rgrp->mon.rmid; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 85212a32b54d..c51b56e29948 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2556,7 +2556,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) if (closid_allocated(i) && i != closid) { mode = rdtgroup_mode_by_closid(i); if (mode == RDT_MODE_PSEUDO_LOCKSETUP) - break; + continue; /* * If CDP is active include peer * domain's usage to ensure there diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ef49517f6bb2..6e0c0ed8e4bf 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_DYNAMIC_FTRACE @@ -231,6 +232,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } static unsigned long ftrace_update_func; +static unsigned long ftrace_update_func_call; static int update_ftrace_func(unsigned long ip, void *new) { @@ -259,6 +261,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func) unsigned char *new; int ret; + ftrace_update_func_call = (unsigned long)func; + new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -294,13 +298,28 @@ int ftrace_int3_handler(struct pt_regs *regs) if (WARN_ON_ONCE(!regs)) return 0; - ip = regs->ip - 1; - if (!ftrace_location(ip) && !is_ftrace_caller(ip)) - return 0; + ip = regs->ip - INT3_INSN_SIZE; - regs->ip += MCOUNT_INSN_SIZE - 1; +#ifdef CONFIG_X86_64 + if (ftrace_location(ip)) { + int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); + return 1; + } else if (is_ftrace_caller(ip)) { + if (!ftrace_update_func_call) { + int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); + return 1; + } + int3_emulate_call(regs, ftrace_update_func_call); + return 1; + } +#else + if (ftrace_location(ip) || is_ftrace_caller(ip)) { + int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); + return 1; + } +#endif - return 1; + return 0; } NOKPROBE_SYMBOL(ftrace_int3_handler); @@ -730,6 +749,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long end_offset; unsigned long op_offset; unsigned long offset; + unsigned long npages; unsigned long size; unsigned long retq; unsigned long *ptr; @@ -762,6 +782,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) return 0; *tramp_size = size + RET_SIZE + sizeof(void *); + npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ ret = probe_kernel_read(trampoline, (void *)start_offset, size); @@ -806,6 +827,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* ALLOC_TRAMP flags lets us know we created it */ ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; + /* + * Module allocation needs to be completed by making the page + * executable. The page is still writable, which is a security hazard, + * but anyhow ftrace breaks W^X completely. + */ + set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: tramp_free(trampoline, *tramp_size); @@ -859,6 +886,8 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) func = ftrace_ops_get_func(ops); + ftrace_update_func_call = (unsigned long)func; + /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -960,6 +989,7 @@ static int ftrace_mod_jmp(unsigned long ip, void *func) { unsigned char *new; + ftrace_update_func_call = 0UL; new = ftrace_jmp_replace(ip, (unsigned long)func); return update_ftrace_func(ip, new); diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index e47cd9390ab4..2a2e87717bad 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -17,6 +17,11 @@ static enum efi_secureboot_mode get_sb_mode(void) size = sizeof(secboot); + if (!efi_enabled(EFI_RUNTIME_SERVICES)) { + pr_info("ima: secureboot mode unknown, no efi\n"); + return efi_secureboot_mode_unknown; + } + /* Get variable contents into buffer */ status = efi.get_variable(efi_SecureBoot_name, &efi_variable_guid, NULL, &size, &secboot); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0469cd078db1..b50ac9c7397b 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -26,9 +26,18 @@ int sysctl_panic_on_stackoverflow; /* * Probabilistic stack overflow check: * - * Only check the stack in process context, because everything else - * runs on the big interrupt stacks. Checking reliably is too expensive, - * so we just check from interrupts. + * Regular device interrupts can enter on the following stacks: + * + * - User stack + * + * - Kernel task stack + * + * - Interrupt stack if a device driver reenables interrupts + * which should only happen in really old drivers. + * + * - Debug IST stack + * + * All other contexts are invalid. */ static inline void stack_overflow_check(struct pt_regs *regs) { @@ -53,8 +62,8 @@ static inline void stack_overflow_check(struct pt_regs *regs) return; oist = this_cpu_ptr(&orig_ist); - estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; - estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; + estack_bottom = (u64)oist->ist[DEBUG_STACK]; + estack_top = estack_bottom - DEBUG_STKSZ + STACK_TOP_MARGIN; if (regs->sp >= estack_top && regs->sp <= estack_bottom) return; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index fed46ddb1eef..06058c44ab57 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -431,8 +431,20 @@ void *alloc_insn_page(void) void *page; page = module_alloc(PAGE_SIZE); - if (page) - set_memory_ro((unsigned long)page & PAGE_MASK, 1); + if (!page) + return NULL; + + /* + * First make the page read-only, and only then make it executable to + * prevent it from being W+X in between. + */ + set_memory_ro((unsigned long)page, 1); + + /* + * TODO: Once additional kernel code protection mechanisms are set, ensure + * that the page was not maliciously altered and it is still zeroed. + */ + set_memory_x((unsigned long)page, 1); return page; } @@ -440,8 +452,12 @@ void *alloc_insn_page(void) /* Recover page to RW mode before releasing it */ void free_insn_page(void *page) { - set_memory_nx((unsigned long)page & PAGE_MASK, 1); - set_memory_rw((unsigned long)page & PAGE_MASK, 1); + /* + * First make the page non-executable, and only then make it writable to + * prevent it from being W+X in between. + */ + set_memory_nx((unsigned long)page, 1); + set_memory_rw((unsigned long)page, 1); module_memfree(page); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b052e883dd8c..cfa3106faee4 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -87,7 +87,7 @@ void *module_alloc(unsigned long size) p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), MODULES_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { vfree(p); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 18bc9b51ac9b..086cf1d1d71d 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -34,6 +34,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -533,6 +534,9 @@ do_nmi(struct pt_regs *regs, long error_code) write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; + + if (user_mode(regs)) + mds_user_clear_cpu_buffers(); } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index e471d8e6f0b2..70933193878c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -127,6 +127,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, struct task_struct *tsk; int err; + /* + * For a new task use the RESET flags value since there is no before. + * All the status flags are zero; DF and all the system flags must also + * be 0, specifically IF must be 0 because we context switch to the new + * task with interrupts disabled. + */ + frame->flags = X86_EFLAGS_FIXED; frame->bp = 0; frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6a62f4af9fcf..026a43be9bd1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -392,6 +392,14 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, childregs = task_pt_regs(p); fork_frame = container_of(childregs, struct fork_frame, regs); frame = &fork_frame->frame; + + /* + * For a new task use the RESET flags value since there is no before. + * All the status flags are zero; DF and all the system flags must also + * be 0, specifically IF must be 0 because we context switch to the new + * task with interrupts disabled. + */ + frame->flags = X86_EFLAGS_FIXED; frame->bp = 0; frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 08dfd4c1a4f9..c8aa58a2bab9 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -132,16 +132,6 @@ static int restore_sigcontext(struct pt_regs *regs, COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); -#ifdef CONFIG_X86_64 - /* - * Fix up SS if needed for the benefit of old DOSEMU and - * CRIU. - */ - if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && - user_64bit_mode(regs))) - force_valid_ss(regs); -#endif - get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); regs->orig_ax = -1; /* disable syscall checks */ @@ -150,6 +140,15 @@ static int restore_sigcontext(struct pt_regs *regs, buf = (void __user *)buf_val; } get_user_catch(err); +#ifdef CONFIG_X86_64 + /* + * Fix up SS if needed for the benefit of old DOSEMU and + * CRIU. + */ + if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs))) + force_valid_ss(regs); +#endif + err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32)); force_iret(); @@ -461,6 +460,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, { struct rt_sigframe __user *frame; void __user *fp = NULL; + unsigned long uc_flags; int err = 0; frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp); @@ -473,9 +473,11 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, return -EFAULT; } + uc_flags = frame_uc_flags(regs); + put_user_try { /* Create the ucontext. */ - put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); + put_user_ex(uc_flags, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); save_altstack_ex(&frame->uc.uc_stack, regs->sp); @@ -541,6 +543,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, { #ifdef CONFIG_X86_X32_ABI struct rt_sigframe_x32 __user *frame; + unsigned long uc_flags; void __user *restorer; int err = 0; void __user *fpstate = NULL; @@ -555,9 +558,11 @@ static int x32_setup_rt_frame(struct ksignal *ksig, return -EFAULT; } + uc_flags = frame_uc_flags(regs); + put_user_try { /* Create the ucontext. */ - put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); + put_user_ex(uc_flags, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); put_user_ex(0, &frame->uc.uc__pad0); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index fd3951638ae4..bbbe611f0c49 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -410,7 +410,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index faa264822cee..007bc654f928 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -172,3 +172,10 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu) __kvm_migrate_apic_timer(vcpu); __kvm_migrate_pit_timer(vcpu); } + +bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) +{ + bool resample = args->flags & KVM_IRQFD_FLAG_RESAMPLE; + + return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm); +} diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index d5005cc26521..fd210cdd4983 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -114,6 +114,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return mode != KVM_IRQCHIP_NONE; } +bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bd13fdddbdc4..ea188545a15c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1454,7 +1454,7 @@ static void apic_timer_expired(struct kvm_lapic *apic) if (swait_active(q)) swake_up_one(q); - if (apic_lvtt_tscdeadline(apic)) + if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) ktimer->expired_tscdeadline = ktimer->tscdeadline; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d9c7b45d231f..85438a624930 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5591,14 +5591,18 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) struct page *page; int i; - if (tdp_enabled) - return 0; - /* - * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. - * Therefore we need to allocate shadow page tables in the first - * 4GB of memory, which happens to fit the DMA32 zone. + * When using PAE paging, the four PDPTEs are treated as 'root' pages, + * while the PDP table is a per-vCPU construct that's allocated at MMU + * creation. When emulating 32-bit mode, cr3 is only 32 bits even on + * x86_64. Therefore we need to allocate the PDP table in the first + * 4GB of memory, which happens to fit the DMA32 zone. Except for + * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can + * skip allocating the PDP table. */ + if (tdp_enabled && kvm_x86_ops->get_tdp_level(vcpu) > PT32E_ROOT_LEVEL) + return 0; + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); if (!page) return -ENOMEM; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index e39741997893..dd745b58ffd8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -283,7 +283,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) bool fast_mode = idx & (1u << 31); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; - u64 ctr_val; + u64 mask = fast_mode ? ~0u : ~0ull; if (!pmu->version) return 1; @@ -291,15 +291,11 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) if (is_vmware_backdoor_pmc(idx)) return kvm_pmu_rdpmc_vmware(vcpu, idx, data); - pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx); + pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask); if (!pmc) return 1; - ctr_val = pmc_read_counter(pmc); - if (fast_mode) - ctr_val = (u32)ctr_val; - - *data = ctr_val; + *data = pmc_read_counter(pmc) & mask; return 0; } diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index ba8898e1a854..22dff661145a 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -25,7 +25,8 @@ struct kvm_pmu_ops { unsigned (*find_fixed_event)(int idx); bool (*pmc_is_enabled)(struct kvm_pmc *pmc); struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); - struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx); + struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx, + u64 *mask); int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx); bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 1495a735b38e..d3118088f1cd 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -186,7 +186,7 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) } /* idx is the ECX register of RDPMC instruction */ -static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx) +static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *counters; @@ -269,10 +269,10 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xffffffff00200000ull; + pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->nr_arch_fixed_counters = 0; - pmu->version = 0; pmu->global_status = 0; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 406b558abfef..aa3b77acfbf9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -379,6 +379,9 @@ module_param(vgif, int, 0444); static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +static bool __read_mostly dump_invalid_vmcb = 0; +module_param(dump_invalid_vmcb, bool, 0644); + static u8 rsm_ins_bytes[] = "\x0f\xaa"; static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); @@ -2024,7 +2027,11 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!kvm_vcpu_apicv_active(vcpu)) return; - if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT)) + /* + * Since the host physical APIC id is 8 bits, + * we can support host APIC ID upto 255. + */ + if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; entry = READ_ONCE(*(svm->avic_physical_id_cache)); @@ -4830,6 +4837,11 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; + if (!dump_invalid_vmcb) { + pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); + return; + } + pr_err("VMCB Control Area:\n"); pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); @@ -4988,7 +5000,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = svm->vmcb->control.exit_code; - pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); dump_vmcb(vcpu); return 0; } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c601d079cd2..5fa0c17d0b41 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2792,14 +2792,13 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) : "cc", "memory" ); - preempt_enable(); - if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); if (vmx->msr_autoload.guest.nr) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); if (vm_fail) { + preempt_enable(); WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -2811,6 +2810,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) local_irq_enable(); if (hw_breakpoint_active()) set_debugreg(__this_cpu_read(cpu_dr7), 7); + preempt_enable(); /* * A non-failing VMEntry means we somehow entered guest mode with @@ -5467,7 +5467,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); - if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12)) + if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12)) return -EINVAL; if (copy_from_user(shadow_vmcs12, diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 5ab4a364348e..c3f103e2b08e 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -126,7 +126,7 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) } static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, - unsigned idx) + unsigned idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); @@ -138,6 +138,7 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, if (fixed && idx >= pmu->nr_arch_fixed_counters) return NULL; counters = fixed ? pmu->fixed_counters : pmu->gp_counters; + *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; return &counters[idx]; } @@ -183,9 +184,13 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) *data = pmu->global_ovf_ctrl; return 0; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || - (pmc = get_fixed_pmc(pmu, msr))) { - *data = pmc_read_counter(pmc); + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + u64 val = pmc_read_counter(pmc); + *data = val & pmu->counter_bitmask[KVM_PMC_GP]; + return 0; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { + u64 val = pmc_read_counter(pmc); + *data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { *data = pmc->eventsel; @@ -235,11 +240,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || - (pmc = get_fixed_pmc(pmu, msr))) { - if (!msr_info->host_initiated) - data = (s64)(s32)data; - pmc->counter += data - pmc_read_counter(pmc); + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + if (msr_info->host_initiated) + pmc->counter = data; + else + pmc->counter = (s32)data; + return 0; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { + pmc->counter = data; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0c955bb286ff..cfb8f1ec9a0a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -114,6 +114,9 @@ static u64 __read_mostly host_xss; bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +static bool __read_mostly dump_invalid_vmcs = 0; +module_param(dump_invalid_vmcs, bool, 0644); + #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 @@ -5605,15 +5608,24 @@ static void vmx_dump_dtsel(char *name, uint32_t limit) void dump_vmcs(void) { - u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); - u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); - u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); - u32 secondary_exec_control = 0; - unsigned long cr4 = vmcs_readl(GUEST_CR4); - u64 efer = vmcs_read64(GUEST_IA32_EFER); + u32 vmentry_ctl, vmexit_ctl; + u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; + unsigned long cr4; + u64 efer; int i, n; + if (!dump_invalid_vmcs) { + pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); + return; + } + + vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); + vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); + cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); + cr4 = vmcs_readl(GUEST_CR4); + efer = vmcs_read64(GUEST_IA32_EFER); + secondary_exec_control = 0; if (cpu_has_secondary_exec_ctrls()) secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); @@ -6431,8 +6443,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); + else if (static_branch_unlikely(&mds_user_clear)) + mds_clear_cpu_buffers(); if (vcpu->arch.cr2 != read_cr2()) write_cr2(vcpu->arch.cr2); @@ -6668,8 +6683,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return ERR_PTR(err); } -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" static int vmx_vm_init(struct kvm *kvm) { @@ -6853,30 +6868,6 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) } } -static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *entry; - union cpuid10_eax eax; - - entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); - if (!entry) - return false; - - eax.full = entry->eax; - return (eax.split.version_id > 0); -} - -static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool pmu_enabled = guest_cpuid_has_pmu(vcpu); - - if (pmu_enabled) - vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING; - else - vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING; -} - static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6965,7 +6956,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); nested_vmx_entry_exit_ctls_update(vcpu); - nested_vmx_procbased_ctls_update(vcpu); } if (boot_cpu_has(X86_FEATURE_INTEL_PT) && diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f879529906b4..9cd72decdfe4 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -314,6 +314,7 @@ void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); +void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); #define POSTED_INTR_ON 0 #define POSTED_INTR_SN 1 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b5edc8e3ce1d..b07868eb1656 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -143,7 +143,7 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); * tuning, i.e. allows priveleged userspace to set an exact advancement time. */ static int __read_mostly lapic_timer_advance_ns = -1; -module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); +module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); @@ -1262,31 +1262,42 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) return 0; } -bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) +static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { - if (efer & efer_reserved_bits) - return false; - if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) - return false; + return false; if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) - return false; + return false; return true; + +} +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + if (efer & efer_reserved_bits) + return false; + + return __kvm_valid_efer(vcpu, efer); } EXPORT_SYMBOL_GPL(kvm_valid_efer); -static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u64 old_efer = vcpu->arch.efer; + u64 efer = msr_info->data; - if (!kvm_valid_efer(vcpu, efer)) + if (efer & efer_reserved_bits) return 1; - if (is_paging(vcpu) - && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) - return 1; + if (!msr_info->host_initiated) { + if (!__kvm_valid_efer(vcpu, efer)) + return 1; + + if (is_paging(vcpu) && + (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) + return 1; + } efer &= ~EFER_LMA; efer |= vcpu->arch.efer & EFER_LMA; @@ -2456,7 +2467,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.arch_capabilities = data; break; case MSR_EFER: - return set_efer(vcpu, data); + return set_efer(vcpu, msr_info); case MSR_K7_HWCR: data &= ~(u64)0x40; /* ignore flush filter disable */ data &= ~(u64)0x100; /* ignore ignne emulation enable */ @@ -3079,6 +3090,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_ID; + break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; break; diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index cf00ab6c6621..306c3a0902ba 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -557,7 +557,8 @@ static int get_reg_offset_16(struct insn *insn, struct pt_regs *regs, } /** - * get_desc() - Obtain pointer to a segment descriptor + * get_desc() - Obtain contents of a segment descriptor + * @out: Segment descriptor contents on success * @sel: Segment selector * * Given a segment selector, obtain a pointer to the segment descriptor. @@ -565,18 +566,18 @@ static int get_reg_offset_16(struct insn *insn, struct pt_regs *regs, * * Returns: * - * Pointer to segment descriptor on success. + * True on success, false on failure. * * NULL on error. */ -static struct desc_struct *get_desc(unsigned short sel) +static bool get_desc(struct desc_struct *out, unsigned short sel) { struct desc_ptr gdt_desc = {0, 0}; unsigned long desc_base; #ifdef CONFIG_MODIFY_LDT_SYSCALL if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) { - struct desc_struct *desc = NULL; + bool success = false; struct ldt_struct *ldt; /* Bits [15:3] contain the index of the desired entry. */ @@ -584,12 +585,14 @@ static struct desc_struct *get_desc(unsigned short sel) mutex_lock(¤t->active_mm->context.lock); ldt = current->active_mm->context.ldt; - if (ldt && sel < ldt->nr_entries) - desc = &ldt->entries[sel]; + if (ldt && sel < ldt->nr_entries) { + *out = ldt->entries[sel]; + success = true; + } mutex_unlock(¤t->active_mm->context.lock); - return desc; + return success; } #endif native_store_gdt(&gdt_desc); @@ -604,9 +607,10 @@ static struct desc_struct *get_desc(unsigned short sel) desc_base = sel & ~(SEGMENT_RPL_MASK | SEGMENT_TI_MASK); if (desc_base > gdt_desc.size) - return NULL; + return false; - return (struct desc_struct *)(gdt_desc.address + desc_base); + *out = *(struct desc_struct *)(gdt_desc.address + desc_base); + return true; } /** @@ -628,7 +632,7 @@ static struct desc_struct *get_desc(unsigned short sel) */ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) { - struct desc_struct *desc; + struct desc_struct desc; short sel; sel = get_segment_selector(regs, seg_reg_idx); @@ -666,11 +670,10 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) if (!sel) return -1L; - desc = get_desc(sel); - if (!desc) + if (!get_desc(&desc, sel)) return -1L; - return get_desc_base(desc); + return get_desc_base(&desc); } /** @@ -692,7 +695,7 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) */ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) { - struct desc_struct *desc; + struct desc_struct desc; unsigned long limit; short sel; @@ -706,8 +709,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) if (!sel) return 0; - desc = get_desc(sel); - if (!desc) + if (!get_desc(&desc, sel)) return 0; /* @@ -716,8 +718,8 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) * not tested when checking the segment limits. In practice, * this means that the segment ends in (limit << 12) + 0xfff. */ - limit = get_desc_limit(desc); - if (desc->g) + limit = get_desc_limit(&desc); + if (desc.g) limit = (limit << 12) + 0xfff; return limit; @@ -741,7 +743,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) */ int insn_get_code_seg_params(struct pt_regs *regs) { - struct desc_struct *desc; + struct desc_struct desc; short sel; if (v8086_mode(regs)) @@ -752,8 +754,7 @@ int insn_get_code_seg_params(struct pt_regs *regs) if (sel < 0) return sel; - desc = get_desc(sel); - if (!desc) + if (!get_desc(&desc, sel)) return -EINVAL; /* @@ -761,10 +762,10 @@ int insn_get_code_seg_params(struct pt_regs *regs) * determines whether a segment contains data or code. If this is a data * segment, return error. */ - if (!(desc->type & BIT(3))) + if (!(desc.type & BIT(3))) return -EINVAL; - switch ((desc->l << 1) | desc->d) { + switch ((desc.l << 1) | desc.d) { case 0: /* * Legacy mode. CS.L=0, CS.D=0. Address and operand size are * both 16-bit. diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 3b24dc05251c..9d05572370ed 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -257,6 +257,7 @@ ENTRY(__memcpy_mcsafe) /* Copy successful. Return zero */ .L_done_memcpy_trap: xorl %eax, %eax +.L_done: ret ENDPROC(__memcpy_mcsafe) EXPORT_SYMBOL_GPL(__memcpy_mcsafe) @@ -273,7 +274,7 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe) addl %edx, %ecx .E_trailing_bytes: mov %ecx, %eax - ret + jmp .L_done /* * For write fault handling, given the destination is unaligned, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 667f1da36208..5eaf67e8314f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -359,8 +359,6 @@ static noinline int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; - WARN_ON_ONCE(in_nmi()); - /* * Copy kernel mappings over when needed. This can also * happen within a race in page table update. In the later diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 8dc0fc0b1382..296da58f3013 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -199,7 +199,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) if (!pgtable_l5_enabled()) return (p4d_t *)pgd; - p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; + p4d = pgd_val(*pgd) & PTE_PFN_MASK; p4d += __START_KERNEL_map - phys_base; return (p4d_t *)p4d + p4d_index(addr); } diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index d669c5e797e0..5aef69b7ff52 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region { } kaslr_regions[] = { { &page_offset_base, 0 }, { &vmalloc_base, 0 }, - { &vmemmap_base, 1 }, + { &vmemmap_base, 0 }, }; /* Get size in bytes used by the memory region */ @@ -78,6 +78,7 @@ void __init kernel_randomize_memory(void) unsigned long rand, memory_tb; struct rnd_state rand_state; unsigned long remain_entropy; + unsigned long vmemmap_size; vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4; vaddr = vaddr_start; @@ -109,6 +110,14 @@ void __init kernel_randomize_memory(void) if (memory_tb < kaslr_regions[0].size_tb) kaslr_regions[0].size_tb = memory_tb; + /* + * Calculate the vmemmap region size in TBs, aligned to a TB + * boundary. + */ + vmemmap_size = (kaslr_regions[0].size_tb << (TB_SHIFT - PAGE_SHIFT)) * + sizeof(struct page); + kaslr_regions[2].size_tb = DIV_ROUND_UP(vmemmap_size, 1UL << TB_SHIFT); + /* Calculate entropy available between regions */ remain_entropy = vaddr_end - vaddr_start; for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c805db6236b4..7aeb9fe2955f 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -881,9 +881,10 @@ static int mpx_unmap_tables(struct mm_struct *mm, * the virtual address region start...end have already been split if * necessary, and the 'vma' is the first vma in this range (start -> end). */ -void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long start, unsigned long end) +void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, + unsigned long end) { + struct vm_area_struct *vma; int ret; /* @@ -902,11 +903,12 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, * which should not occur normally. Being strict about it here * helps ensure that we do not have an exploitable stack overflow. */ - do { + vma = find_vma(mm, start); + while (vma && vma->vm_start < end) { if (vma->vm_flags & VM_MPX) return; vma = vma->vm_next; - } while (vma && vma->vm_start < end); + } ret = mpx_unmap_tables(mm, start, end); if (ret) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 139b28a01ce4..d0255d64edce 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void) } } - if (cmdline_find_option_bool(boot_command_line, "nopti")) { + if (cmdline_find_option_bool(boot_command_line, "nopti") || + cpu_mitigations_off()) { pti_mode = PTI_FORCE_OFF; pti_print_if_insecure("disabled on command line."); return; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 52e55108404e..d3a73f9335e1 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1119,6 +1119,8 @@ static const struct dmi_system_id pciirq_dmi_table[] __initconst = { void __init pcibios_irq_init(void) { + struct irq_routing_table *rtable = NULL; + DBG(KERN_DEBUG "PCI: IRQ init\n"); if (raw_pci_ops == NULL) @@ -1129,8 +1131,10 @@ void __init pcibios_irq_init(void) pirq_table = pirq_find_routing_table(); #ifdef CONFIG_PCI_BIOS - if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) + if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) { pirq_table = pcibios_get_irq_routing_table(); + rtable = pirq_table; + } #endif if (pirq_table) { pirq_peer_trick(); @@ -1145,8 +1149,10 @@ void __init pcibios_irq_init(void) * If we're using the I/O APIC, avoid using the PCI IRQ * routing table */ - if (io_apic_assign_pci_irqs) + if (io_apic_assign_pci_irqs) { + kfree(rtable); pirq_table = NULL; + } } x86_init.pci.fixup_irqs(); diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 62f5c7045944..1861a2ba0f2b 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -44,8 +44,6 @@ void __init __weak mem_map_via_hcall(struct boot_params *ptr __maybe_unused) static void __init init_pvh_bootparams(bool xen_guest) { - memset(&pvh_bootparams, 0, sizeof(pvh_bootparams)); - if ((pvh_start_info.version > 0) && (pvh_start_info.memmap_entries)) { struct hvm_memmap_table_entry *ep; int i; @@ -103,7 +101,7 @@ static void __init init_pvh_bootparams(bool xen_guest) * If we are trying to boot a Xen PVH guest, it is expected that the kernel * will have been configured to provide the required override for this routine. */ -void __init __weak xen_pvh_init(void) +void __init __weak xen_pvh_init(struct boot_params *boot_params) { xen_raw_printk("Error: Missing xen PVH initialization\n"); BUG(); @@ -112,7 +110,7 @@ void __init __weak xen_pvh_init(void) static void hypervisor_specific_init(bool xen_guest) { if (xen_guest) - xen_pvh_init(); + xen_pvh_init(&pvh_bootparams); } /* @@ -131,6 +129,8 @@ void __init xen_prepare_pvh(void) BUG(); } + memset(&pvh_bootparams, 0, sizeof(pvh_bootparams)); + hypervisor_specific_init(xen_guest); init_pvh_bootparams(xen_guest); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 2c53b0f19329..1297e185b8c8 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -2133,14 +2133,19 @@ static int __init summarize_uvhub_sockets(int nuvhubs, */ static int __init init_per_cpu(int nuvhubs, int base_part_pnode) { - unsigned char *uvhub_mask; struct uvhub_desc *uvhub_descs; + unsigned char *uvhub_mask = NULL; if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub()) timeout_us = calculate_destination_timeout(); uvhub_descs = kcalloc(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL); + if (!uvhub_descs) + goto fail; + uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); + if (!uvhub_mask) + goto fail; if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) goto fail; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index a7d966964c6f..513ce09e9950 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -299,7 +299,17 @@ int hibernate_resume_nonboot_cpu_disable(void) * address in its instruction pointer may not be possible to resolve * any more at that point (the page tables used by it previously may * have been overwritten by hibernate image data). + * + * First, make sure that we wake up all the potentially disabled SMT + * threads which have been initially brought up and then put into + * mwait/cpuidle sleep. + * Those will be put to proper (not interfering with hibernation + * resume) sleep afterwards, and the resumed kernel will decide itself + * what to do with them. */ + ret = cpuhp_smt_enable(); + if (ret) + return ret; smp_ops.play_dead = resume_play_dead; ret = disable_nonboot_cpus(); smp_ops.play_dead = play_dead; diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index bcddf09b5aa3..b924a81ca0df 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -246,3 +247,35 @@ int relocate_restore_code(void) __flush_tlb_all(); return 0; } + +int arch_resume_nosmt(void) +{ + int ret = 0; + /* + * We reached this while coming out of hibernation. This means + * that SMT siblings are sleeping in hlt, as mwait is not safe + * against control transition during resume (see comment in + * hibernate_resume_nonboot_cpu_disable()). + * + * If the resumed kernel has SMT disabled, we have to take all the + * SMT siblings out of hlt, and offline them again so that they + * end up in mwait proper. + * + * Called with hotplug disabled. + */ + cpu_hotplug_enable(); + if (cpu_smt_control == CPU_SMT_DISABLED || + cpu_smt_control == CPU_SMT_FORCE_DISABLED) { + enum cpuhp_smt_control old = cpu_smt_control; + + ret = cpuhp_smt_enable(); + if (ret) + goto out; + ret = cpuhp_smt_disable(old); + if (ret) + goto out; + } +out: + cpu_hotplug_disable(); + return ret; +} diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 1fbb629a9d78..0d3365cb64de 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -158,7 +158,7 @@ static enum efi_secureboot_mode xen_efi_get_secureboot(void) return efi_secureboot_mode_unknown; } -void __init xen_efi_init(void) +void __init xen_efi_init(struct boot_params *boot_params) { efi_system_table_t *efi_systab_xen; @@ -167,12 +167,12 @@ void __init xen_efi_init(void) if (efi_systab_xen == NULL) return; - strncpy((char *)&boot_params.efi_info.efi_loader_signature, "Xen", - sizeof(boot_params.efi_info.efi_loader_signature)); - boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen); - boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32); + strncpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", + sizeof(boot_params->efi_info.efi_loader_signature)); + boot_params->efi_info.efi_systab = (__u32)__pa(efi_systab_xen); + boot_params->efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32); - boot_params.secure_boot = xen_efi_get_secureboot(); + boot_params->secure_boot = xen_efi_get_secureboot(); set_bit(EFI_BOOT, &efi.flags); set_bit(EFI_PARAVIRT, &efi.flags); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c54a493e139a..4722ba2966ac 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1403,7 +1403,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* We need this for printk timestamps */ xen_setup_runstate_info(0); - xen_efi_init(); + xen_efi_init(&boot_params); /* Start the world */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 35b7599d2d0b..80a79db72fcf 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -13,6 +13,8 @@ #include +#include "xen-ops.h" + /* * PVH variables. * @@ -21,17 +23,20 @@ */ bool xen_pvh __attribute__((section(".data"))) = 0; -void __init xen_pvh_init(void) +void __init xen_pvh_init(struct boot_params *boot_params) { u32 msr; u64 pfn; xen_pvh = 1; + xen_domain_type = XEN_HVM_DOMAIN; xen_start_flags = pvh_start_info.flags; msr = cpuid_ebx(xen_cpuid_base() + 2); pfn = __pa(hypercall_page); wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + + xen_efi_init(boot_params); } void __init mem_map_via_hcall(struct boot_params *boot_params_p) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 0e60bd918695..2f111f47ba98 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -122,9 +122,9 @@ static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, void __init xen_init_apic(void); #ifdef CONFIG_XEN_EFI -extern void xen_efi_init(void); +extern void xen_efi_init(struct boot_params *boot_params); #else -static inline void __init xen_efi_init(void) +static inline void __init xen_efi_init(struct boot_params *boot_params) { } #endif diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 4ec6fbb696bf..a5139f1d9220 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -310,7 +310,8 @@ extern char _SecondaryResetVector_text_start; extern char _SecondaryResetVector_text_end; #endif -static inline int mem_reserve(unsigned long start, unsigned long end) +static inline int __init_memblock mem_reserve(unsigned long start, + unsigned long end) { return memblock_reserve(start, end - start); } diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index c6113af31960..19e61b192c2f 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -578,7 +578,8 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfqg_and_blkg_get(bfqg); if (bfq_bfqq_busy(bfqq)) { - bfq_pos_tree_add_move(bfqd, bfqq); + if (unlikely(!bfqd->nonrot_with_queueing)) + bfq_pos_tree_add_move(bfqd, bfqq); bfq_activate_bfqq(bfqd, bfqq); } @@ -1054,7 +1055,8 @@ struct blkcg_policy blkcg_policy_bfq = { struct cftype bfq_blkcg_legacy_files[] = { { .name = "bfq.weight", - .flags = CFTYPE_NOT_ON_ROOT, + .link_name = "weight", + .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_SYMLINKED, .seq_show = bfq_io_show_weight, .write_u64 = bfq_io_set_weight_legacy, }, @@ -1102,7 +1104,7 @@ struct cftype bfq_blkcg_legacy_files[] = { }, #endif /* CONFIG_DEBUG_BLK_CGROUP */ - /* the same statictics which cover the bfqg and its descendants */ + /* the same statistics which cover the bfqg and its descendants */ { .name = "bfq.io_service_bytes_recursive", .private = (unsigned long)&blkcg_policy_bfq, @@ -1174,7 +1176,8 @@ struct cftype bfq_blkcg_legacy_files[] = { struct cftype bfq_blkg_files[] = { { .name = "bfq.weight", - .flags = CFTYPE_NOT_ON_ROOT, + .link_name = "weight", + .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_SYMLINKED, .seq_show = bfq_io_show_weight, .write = bfq_io_set_weight, }, diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 5ba1e0d841b4..dda82620ab6f 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -166,6 +166,7 @@ BFQ_BFQQ_FNS(in_large_burst); BFQ_BFQQ_FNS(coop); BFQ_BFQQ_FNS(split_coop); BFQ_BFQQ_FNS(softrt_update); +BFQ_BFQQ_FNS(has_waker); #undef BFQ_BFQQ_FNS \ /* Expiration time of sync (0) and async (1) requests, in ns. */ @@ -189,7 +190,7 @@ static const int bfq_default_max_budget = 16 * 1024; /* * When a sync request is dispatched, the queue that contains that * request, and all the ancestor entities of that queue, are charged - * with the number of sectors of the request. In constrast, if the + * with the number of sectors of the request. In contrast, if the * request is async, then the queue and its ancestor entities are * charged with the number of sectors of the request, multiplied by * the factor below. This throttles the bandwidth for async I/O, @@ -217,7 +218,7 @@ const int bfq_timeout = HZ / 8; * queue merging. * * As can be deduced from the low time limit below, queue merging, if - * successful, happens at the very beggining of the I/O of the involved + * successful, happens at the very beginning of the I/O of the involved * cooperating processes, as a consequence of the arrival of the very * first requests from each cooperator. After that, there is very * little chance to find cooperators. @@ -242,6 +243,14 @@ static struct kmem_cache *bfq_pool; blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT)) #define BFQQ_CLOSE_THR (sector_t)(8 * 1024) #define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19) +/* + * Sync random I/O is likely to be confused with soft real-time I/O, + * because it is characterized by limited throughput and apparently + * isochronous arrival pattern. To avoid false positives, queues + * containing only random (seeky) I/O are prevented from being tagged + * as soft real-time. + */ +#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history == -1) /* Min number of samples required to perform peak-rate update */ #define BFQ_RATE_MIN_SAMPLES 32 @@ -433,7 +442,7 @@ void bfq_schedule_dispatch(struct bfq_data *bfqd) /* * Lifted from AS - choose which of rq1 and rq2 that is best served now. - * We choose the request that is closesr to the head right now. Distance + * We choose the request that is closer to the head right now. Distance * behind the head is penalized and only allowed to a certain extent. */ static struct request *bfq_choose_req(struct bfq_data *bfqd, @@ -595,7 +604,16 @@ static bool bfq_too_late_for_merging(struct bfq_queue *bfqq) bfq_merge_time_limit); } -void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) +/* + * The following function is not marked as __cold because it is + * actually cold, but for the same performance goal described in the + * comments on the likely() at the beginning of + * bfq_setup_cooperator(). Unexpectedly, to reach an even lower + * execution time for the case where this function is not invoked, we + * had to add an unlikely() in each involved if(). + */ +void __cold +bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct rb_node **p, *parent; struct bfq_queue *__bfqq; @@ -629,12 +647,19 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) } /* - * The following function returns true if every queue must receive the - * same share of the throughput (this condition is used when deciding - * whether idling may be disabled, see the comments in the function - * bfq_better_to_idle()). + * The following function returns false either if every active queue + * must receive the same share of the throughput (symmetric scenario), + * or, as a special case, if bfqq must receive a share of the + * throughput lower than or equal to the share that every other active + * queue must receive. If bfqq does sync I/O, then these are the only + * two cases where bfqq happens to be guaranteed its share of the + * throughput even if I/O dispatching is not plugged when bfqq remains + * temporarily empty (for more details, see the comments in the + * function bfq_better_to_idle()). For this reason, the return value + * of this function is used to check whether I/O-dispatch plugging can + * be avoided. * - * Such a scenario occurs when: + * The above first case (symmetric scenario) occurs when: * 1) all active queues have the same weight, * 2) all active queues belong to the same I/O-priority class, * 3) all active groups at the same level in the groups tree have the same @@ -654,30 +679,36 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) * support or the cgroups interface are not enabled, thus no state * needs to be maintained in this case. */ -static bool bfq_symmetric_scenario(struct bfq_data *bfqd) +static bool bfq_asymmetric_scenario(struct bfq_data *bfqd, + struct bfq_queue *bfqq) { + bool smallest_weight = bfqq && + bfqq->weight_counter && + bfqq->weight_counter == + container_of( + rb_first_cached(&bfqd->queue_weights_tree), + struct bfq_weight_counter, + weights_node); + /* * For queue weights to differ, queue_weights_tree must contain * at least two nodes. */ - bool varied_queue_weights = !RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && - (bfqd->queue_weights_tree.rb_node->rb_left || - bfqd->queue_weights_tree.rb_node->rb_right); + bool varied_queue_weights = !smallest_weight && + !RB_EMPTY_ROOT(&bfqd->queue_weights_tree.rb_root) && + (bfqd->queue_weights_tree.rb_root.rb_node->rb_left || + bfqd->queue_weights_tree.rb_root.rb_node->rb_right); bool multiple_classes_busy = (bfqd->busy_queues[0] && bfqd->busy_queues[1]) || (bfqd->busy_queues[0] && bfqd->busy_queues[2]) || (bfqd->busy_queues[1] && bfqd->busy_queues[2]); - /* - * For queue weights to differ, queue_weights_tree must contain - * at least two nodes. - */ - return !(varied_queue_weights || multiple_classes_busy + return varied_queue_weights || multiple_classes_busy #ifdef CONFIG_BFQ_GROUP_IOSCHED || bfqd->num_groups_with_pending_reqs > 0 #endif - ); + ; } /* @@ -694,10 +725,11 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) * should be low too. */ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct rb_root *root) + struct rb_root_cached *root) { struct bfq_entity *entity = &bfqq->entity; - struct rb_node **new = &(root->rb_node), *parent = NULL; + struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; + bool leftmost = true; /* * Do not insert if the queue is already associated with a @@ -726,8 +758,10 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, } if (entity->weight < __counter->weight) new = &((*new)->rb_left); - else + else { new = &((*new)->rb_right); + leftmost = false; + } } bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), @@ -736,7 +770,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, /* * In the unlucky event of an allocation failure, we just * exit. This will cause the weight of queue to not be - * considered in bfq_symmetric_scenario, which, in its turn, + * considered in bfq_asymmetric_scenario, which, in its turn, * causes the scenario to be deemed wrongly symmetric in case * bfqq's weight would have been the only weight making the * scenario asymmetric. On the bright side, no unbalance will @@ -750,7 +784,8 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfqq->weight_counter->weight = entity->weight; rb_link_node(&bfqq->weight_counter->weights_node, parent, new); - rb_insert_color(&bfqq->weight_counter->weights_node, root); + rb_insert_color_cached(&bfqq->weight_counter->weights_node, root, + leftmost); inc_counter: bfqq->weight_counter->num_active++; @@ -765,7 +800,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, */ void __bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct rb_root *root) + struct rb_root_cached *root) { if (!bfqq->weight_counter) return; @@ -774,7 +809,7 @@ void __bfq_weights_tree_remove(struct bfq_data *bfqd, if (bfqq->weight_counter->num_active > 0) goto reset_entity_pointer; - rb_erase(&bfqq->weight_counter->weights_node, root); + rb_erase_cached(&bfqq->weight_counter->weights_node, root); kfree(bfqq->weight_counter); reset_entity_pointer: @@ -889,7 +924,7 @@ static unsigned long bfq_serv_to_charge(struct request *rq, struct bfq_queue *bfqq) { if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1 || - !bfq_symmetric_scenario(bfqq->bfqd)) + bfq_asymmetric_scenario(bfqq->bfqd, bfqq)) return blk_rq_sectors(rq); return blk_rq_sectors(rq) * bfq_async_charge_factor; @@ -955,7 +990,7 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) * of several files * mplayer took 23 seconds to start, if constantly weight-raised. * - * As for higher values than that accomodating the above bad + * As for higher values than that accommodating the above bad * scenario, tests show that higher values would often yield * the opposite of the desired result, i.e., would worsen * responsiveness by allowing non-interactive applications to @@ -994,6 +1029,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, else bfq_clear_bfqq_IO_bound(bfqq); + bfqq->entity.new_weight = bic->saved_weight; bfqq->ttime = bic->saved_ttime; bfqq->wr_coeff = bic->saved_wr_coeff; bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; @@ -1041,8 +1077,18 @@ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node) hlist_del_init(&item->burst_list_node); - hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); - bfqd->burst_size = 1; + + /* + * Start the creation of a new burst list only if there is no + * active queue. See comments on the conditional invocation of + * bfq_handle_burst(). + */ + if (bfq_tot_busy_queues(bfqd) == 0) { + hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); + bfqd->burst_size = 1; + } else + bfqd->burst_size = 0; + bfqd->burst_parent_entity = bfqq->entity.parent; } @@ -1098,7 +1144,8 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * many parallel threads/processes. Examples are systemd during boot, * or git grep. To help these processes get their job done as soon as * possible, it is usually better to not grant either weight-raising - * or device idling to their queues. + * or device idling to their queues, unless these queues must be + * protected from the I/O flowing through other active queues. * * In this comment we describe, firstly, the reasons why this fact * holds, and, secondly, the next function, which implements the main @@ -1110,7 +1157,10 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * cumulatively served, the sooner the target job of these queues gets * completed. As a consequence, weight-raising any of these queues, * which also implies idling the device for it, is almost always - * counterproductive. In most cases it just lowers throughput. + * counterproductive, unless there are other active queues to isolate + * these new queues from. If there no other active queues, then + * weight-raising these new queues just lowers throughput in most + * cases. * * On the other hand, a burst of queue creations may be caused also by * the start of an application that does not consist of a lot of @@ -1144,14 +1194,16 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * are very rare. They typically occur if some service happens to * start doing I/O exactly when the interactive task starts. * - * Turning back to the next function, it implements all the steps - * needed to detect the occurrence of a large burst and to properly - * mark all the queues belonging to it (so that they can then be - * treated in a different way). This goal is achieved by maintaining a - * "burst list" that holds, temporarily, the queues that belong to the - * burst in progress. The list is then used to mark these queues as - * belonging to a large burst if the burst does become large. The main - * steps are the following. + * Turning back to the next function, it is invoked only if there are + * no active queues (apart from active queues that would belong to the + * same, possible burst bfqq would belong to), and it implements all + * the steps needed to detect the occurrence of a large burst and to + * properly mark all the queues belonging to it (so that they can then + * be treated in a different way). This goal is achieved by + * maintaining a "burst list" that holds, temporarily, the queues that + * belong to the burst in progress. The list is then used to mark + * these queues as belonging to a large burst if the burst does become + * large. The main steps are the following. * * . when the very first queue is created, the queue is inserted into the * list (as it could be the first queue in a possible burst) @@ -1385,17 +1437,19 @@ static int bfq_min_budget(struct bfq_data *bfqd) * mechanism may be re-designed in such a way to make it possible to * know whether preemption is needed without needing to update service * trees). In addition, queue preemptions almost always cause random - * I/O, and thus loss of throughput. Because of these facts, the next - * function adopts the following simple scheme to avoid both costly - * operations and too frequent preemptions: it requests the expiration - * of the in-service queue (unconditionally) only for queues that need - * to recover a hole, or that either are weight-raised or deserve to - * be weight-raised. + * I/O, which may in turn cause loss of throughput. Finally, there may + * even be no in-service queue when the next function is invoked (so, + * no queue to compare timestamps with). Because of these facts, the + * next function adopts the following simple scheme to avoid costly + * operations, too frequent preemptions and too many dependencies on + * the state of the scheduler: it requests the expiration of the + * in-service queue (unconditionally) only for queues that need to + * recover a hole. Then it delegates to other parts of the code the + * responsibility of handling the above case 2. */ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, struct bfq_queue *bfqq, - bool arrived_in_time, - bool wr_or_deserves_wr) + bool arrived_in_time) { struct bfq_entity *entity = &bfqq->entity; @@ -1450,7 +1504,7 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, entity->budget = max_t(unsigned long, bfqq->max_budget, bfq_serv_to_charge(bfqq->next_rq, bfqq)); bfq_clear_bfqq_non_blocking_wait_rq(bfqq); - return wr_or_deserves_wr; + return false; } /* @@ -1568,6 +1622,36 @@ static bool bfq_bfqq_idle_for_long_time(struct bfq_data *bfqd, bfqd->bfq_wr_min_idle_time); } + +/* + * Return true if bfqq is in a higher priority class, or has a higher + * weight than the in-service queue. + */ +static bool bfq_bfqq_higher_class_or_weight(struct bfq_queue *bfqq, + struct bfq_queue *in_serv_bfqq) +{ + int bfqq_weight, in_serv_weight; + + if (bfqq->ioprio_class < in_serv_bfqq->ioprio_class) + return true; + + if (in_serv_bfqq->entity.parent == bfqq->entity.parent) { + bfqq_weight = bfqq->entity.weight; + in_serv_weight = in_serv_bfqq->entity.weight; + } else { + if (bfqq->entity.parent) + bfqq_weight = bfqq->entity.parent->weight; + else + bfqq_weight = bfqq->entity.weight; + if (in_serv_bfqq->entity.parent) + in_serv_weight = in_serv_bfqq->entity.parent->weight; + else + in_serv_weight = in_serv_bfqq->entity.weight; + } + + return bfqq_weight > in_serv_weight; +} + static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, struct bfq_queue *bfqq, int old_wr_coeff, @@ -1596,6 +1680,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, */ in_burst = bfq_bfqq_in_large_burst(bfqq); soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && + !BFQQ_TOTALLY_SEEKY(bfqq) && !in_burst && time_is_before_jiffies(bfqq->soft_rt_next_start) && bfqq->dispatched == 0; @@ -1611,8 +1696,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, */ bfqq_wants_to_preempt = bfq_bfqq_update_budg_for_activation(bfqd, bfqq, - arrived_in_time, - wr_or_deserves_wr); + arrived_in_time); /* * If bfqq happened to be activated in a burst, but has been @@ -1677,21 +1761,111 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, /* * Expire in-service queue only if preemption may be needed - * for guarantees. In this respect, the function - * next_queue_may_preempt just checks a simple, necessary - * condition, and not a sufficient condition based on - * timestamps. In fact, for the latter condition to be - * evaluated, timestamps would need first to be updated, and - * this operation is quite costly (see the comments on the - * function bfq_bfqq_update_budg_for_activation). + * for guarantees. In particular, we care only about two + * cases. The first is that bfqq has to recover a service + * hole, as explained in the comments on + * bfq_bfqq_update_budg_for_activation(), i.e., that + * bfqq_wants_to_preempt is true. However, if bfqq does not + * carry time-critical I/O, then bfqq's bandwidth is less + * important than that of queues that carry time-critical I/O. + * So, as a further constraint, we consider this case only if + * bfqq is at least as weight-raised, i.e., at least as time + * critical, as the in-service queue. + * + * The second case is that bfqq is in a higher priority class, + * or has a higher weight than the in-service queue. If this + * condition does not hold, we don't care because, even if + * bfqq does not start to be served immediately, the resulting + * delay for bfqq's I/O is however lower or much lower than + * the ideal completion time to be guaranteed to bfqq's I/O. + * + * In both cases, preemption is needed only if, according to + * the timestamps of both bfqq and of the in-service queue, + * bfqq actually is the next queue to serve. So, to reduce + * useless preemptions, the return value of + * next_queue_may_preempt() is considered in the next compound + * condition too. Yet next_queue_may_preempt() just checks a + * simple, necessary condition for bfqq to be the next queue + * to serve. In fact, to evaluate a sufficient condition, the + * timestamps of the in-service queue would need to be + * updated, and this operation is quite costly (see the + * comments on bfq_bfqq_update_budg_for_activation()). */ - if (bfqd->in_service_queue && bfqq_wants_to_preempt && - bfqd->in_service_queue->wr_coeff < bfqq->wr_coeff && + if (bfqd->in_service_queue && + ((bfqq_wants_to_preempt && + bfqq->wr_coeff >= bfqd->in_service_queue->wr_coeff) || + bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue)) && next_queue_may_preempt(bfqd)) bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); } +static void bfq_reset_inject_limit(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + /* invalidate baseline total service time */ + bfqq->last_serv_time_ns = 0; + + /* + * Reset pointer in case we are waiting for + * some request completion. + */ + bfqd->waited_rq = NULL; + + /* + * If bfqq has a short think time, then start by setting the + * inject limit to 0 prudentially, because the service time of + * an injected I/O request may be higher than the think time + * of bfqq, and therefore, if one request was injected when + * bfqq remains empty, this injected request might delay the + * service of the next I/O request for bfqq significantly. In + * case bfqq can actually tolerate some injection, then the + * adaptive update will however raise the limit soon. This + * lucky circumstance holds exactly because bfqq has a short + * think time, and thus, after remaining empty, is likely to + * get new I/O enqueued---and then completed---before being + * expired. This is the very pattern that gives the + * limit-update algorithm the chance to measure the effect of + * injection on request service times, and then to update the + * limit accordingly. + * + * However, in the following special case, the inject limit is + * left to 1 even if the think time is short: bfqq's I/O is + * synchronized with that of some other queue, i.e., bfqq may + * receive new I/O only after the I/O of the other queue is + * completed. Keeping the inject limit to 1 allows the + * blocking I/O to be served while bfqq is in service. And + * this is very convenient both for bfqq and for overall + * throughput, as explained in detail in the comments in + * bfq_update_has_short_ttime(). + * + * On the opposite end, if bfqq has a long think time, then + * start directly by 1, because: + * a) on the bright side, keeping at most one request in + * service in the drive is unlikely to cause any harm to the + * latency of bfqq's requests, as the service time of a single + * request is likely to be lower than the think time of bfqq; + * b) on the downside, after becoming empty, bfqq is likely to + * expire before getting its next request. With this request + * arrival pattern, it is very hard to sample total service + * times and update the inject limit accordingly (see comments + * on bfq_update_inject_limit()). So the limit is likely to be + * never, or at least seldom, updated. As a consequence, by + * setting the limit to 1, we avoid that no injection ever + * occurs with bfqq. On the downside, this proactive step + * further reduces chances to actually compute the baseline + * total service time. Thus it reduces chances to execute the + * limit-update algorithm and possibly raise the limit to more + * than 1. + */ + if (bfq_bfqq_has_short_ttime(bfqq)) + bfqq->inject_limit = 0; + else + bfqq->inject_limit = 1; + + bfqq->decrease_time_jif = jiffies; +} + static void bfq_add_request(struct request *rq) { struct bfq_queue *bfqq = RQ_BFQQ(rq); @@ -1704,6 +1878,165 @@ static void bfq_add_request(struct request *rq) bfqq->queued[rq_is_sync(rq)]++; bfqd->queued++; + if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) { + /* + * Detect whether bfqq's I/O seems synchronized with + * that of some other queue, i.e., whether bfqq, after + * remaining empty, happens to receive new I/O only + * right after some I/O request of the other queue has + * been completed. We call waker queue the other + * queue, and we assume, for simplicity, that bfqq may + * have at most one waker queue. + * + * A remarkable throughput boost can be reached by + * unconditionally injecting the I/O of the waker + * queue, every time a new bfq_dispatch_request + * happens to be invoked while I/O is being plugged + * for bfqq. In addition to boosting throughput, this + * unblocks bfqq's I/O, thereby improving bandwidth + * and latency for bfqq. Note that these same results + * may be achieved with the general injection + * mechanism, but less effectively. For details on + * this aspect, see the comments on the choice of the + * queue for injection in bfq_select_queue(). + * + * Turning back to the detection of a waker queue, a + * queue Q is deemed as a waker queue for bfqq if, for + * two consecutive times, bfqq happens to become non + * empty right after a request of Q has been + * completed. In particular, on the first time, Q is + * tentatively set as a candidate waker queue, while + * on the second time, the flag + * bfq_bfqq_has_waker(bfqq) is set to confirm that Q + * is a waker queue for bfqq. These detection steps + * are performed only if bfqq has a long think time, + * so as to make it more likely that bfqq's I/O is + * actually being blocked by a synchronization. This + * last filter, plus the above two-times requirement, + * make false positives less likely. + * + * NOTE + * + * The sooner a waker queue is detected, the sooner + * throughput can be boosted by injecting I/O from the + * waker queue. Fortunately, detection is likely to be + * actually fast, for the following reasons. While + * blocked by synchronization, bfqq has a long think + * time. This implies that bfqq's inject limit is at + * least equal to 1 (see the comments in + * bfq_update_inject_limit()). So, thanks to + * injection, the waker queue is likely to be served + * during the very first I/O-plugging time interval + * for bfqq. This triggers the first step of the + * detection mechanism. Thanks again to injection, the + * candidate waker queue is then likely to be + * confirmed no later than during the next + * I/O-plugging interval for bfqq. + */ + if (!bfq_bfqq_has_short_ttime(bfqq) && + ktime_get_ns() - bfqd->last_completion < + 200 * NSEC_PER_USEC) { + if (bfqd->last_completed_rq_bfqq != bfqq && + bfqd->last_completed_rq_bfqq != + bfqq->waker_bfqq) { + /* + * First synchronization detected with + * a candidate waker queue, or with a + * different candidate waker queue + * from the current one. + */ + bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq; + + /* + * If the waker queue disappears, then + * bfqq->waker_bfqq must be reset. To + * this goal, we maintain in each + * waker queue a list, woken_list, of + * all the queues that reference the + * waker queue through their + * waker_bfqq pointer. When the waker + * queue exits, the waker_bfqq pointer + * of all the queues in the woken_list + * is reset. + * + * In addition, if bfqq is already in + * the woken_list of a waker queue, + * then, before being inserted into + * the woken_list of a new waker + * queue, bfqq must be removed from + * the woken_list of the old waker + * queue. + */ + if (!hlist_unhashed(&bfqq->woken_list_node)) + hlist_del_init(&bfqq->woken_list_node); + hlist_add_head(&bfqq->woken_list_node, + &bfqd->last_completed_rq_bfqq->woken_list); + + bfq_clear_bfqq_has_waker(bfqq); + } else if (bfqd->last_completed_rq_bfqq == + bfqq->waker_bfqq && + !bfq_bfqq_has_waker(bfqq)) { + /* + * synchronization with waker_bfqq + * seen for the second time + */ + bfq_mark_bfqq_has_waker(bfqq); + } + } + + /* + * Periodically reset inject limit, to make sure that + * the latter eventually drops in case workload + * changes, see step (3) in the comments on + * bfq_update_inject_limit(). + */ + if (time_is_before_eq_jiffies(bfqq->decrease_time_jif + + msecs_to_jiffies(1000))) + bfq_reset_inject_limit(bfqd, bfqq); + + /* + * The following conditions must hold to setup a new + * sampling of total service time, and then a new + * update of the inject limit: + * - bfqq is in service, because the total service + * time is evaluated only for the I/O requests of + * the queues in service; + * - this is the right occasion to compute or to + * lower the baseline total service time, because + * there are actually no requests in the drive, + * or + * the baseline total service time is available, and + * this is the right occasion to compute the other + * quantity needed to update the inject limit, i.e., + * the total service time caused by the amount of + * injection allowed by the current value of the + * limit. It is the right occasion because injection + * has actually been performed during the service + * hole, and there are still in-flight requests, + * which are very likely to be exactly the injected + * requests, or part of them; + * - the minimum interval for sampling the total + * service time and updating the inject limit has + * elapsed. + */ + if (bfqq == bfqd->in_service_queue && + (bfqd->rq_in_driver == 0 || + (bfqq->last_serv_time_ns > 0 && + bfqd->rqs_injected && bfqd->rq_in_driver > 0)) && + time_is_before_eq_jiffies(bfqq->decrease_time_jif + + msecs_to_jiffies(100))) { + bfqd->last_empty_occupied_ns = ktime_get_ns(); + /* + * Start the state machine for measuring the + * total service time of rq: setting + * wait_dispatch will cause bfqd->waited_rq to + * be set when rq will be dispatched. + */ + bfqd->wait_dispatch = true; + bfqd->rqs_injected = false; + } + } + elv_rb_add(&bfqq->sort_list, rq); /* @@ -1715,8 +2048,9 @@ static void bfq_add_request(struct request *rq) /* * Adjust priority tree position, if next_rq changes. + * See comments on bfq_pos_tree_add_move() for the unlikely(). */ - if (prev != bfqq->next_rq) + if (unlikely(!bfqd->nonrot_with_queueing && prev != bfqq->next_rq)) bfq_pos_tree_add_move(bfqd, bfqq); if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */ @@ -1856,7 +2190,9 @@ static void bfq_remove_request(struct request_queue *q, bfqq->pos_root = NULL; } } else { - bfq_pos_tree_add_move(bfqd, bfqq); + /* see comments on bfq_pos_tree_add_move() for the unlikely() */ + if (unlikely(!bfqd->nonrot_with_queueing)) + bfq_pos_tree_add_move(bfqd, bfqq); } if (rq->cmd_flags & REQ_META) @@ -1941,7 +2277,12 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, */ if (prev != bfqq->next_rq) { bfq_updated_next_req(bfqd, bfqq); - bfq_pos_tree_add_move(bfqd, bfqq); + /* + * See comments on bfq_pos_tree_add_move() for + * the unlikely(). + */ + if (unlikely(!bfqd->nonrot_with_queueing)) + bfq_pos_tree_add_move(bfqd, bfqq); } } } @@ -2223,6 +2564,46 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq; + /* + * Do not perform queue merging if the device is non + * rotational and performs internal queueing. In fact, such a + * device reaches a high speed through internal parallelism + * and pipelining. This means that, to reach a high + * throughput, it must have many requests enqueued at the same + * time. But, in this configuration, the internal scheduling + * algorithm of the device does exactly the job of queue + * merging: it reorders requests so as to obtain as much as + * possible a sequential I/O pattern. As a consequence, with + * the workload generated by processes doing interleaved I/O, + * the throughput reached by the device is likely to be the + * same, with and without queue merging. + * + * Disabling merging also provides a remarkable benefit in + * terms of throughput. Merging tends to make many workloads + * artificially more uneven, because of shared queues + * remaining non empty for incomparably more time than + * non-merged queues. This may accentuate workload + * asymmetries. For example, if one of the queues in a set of + * merged queues has a higher weight than a normal queue, then + * the shared queue may inherit such a high weight and, by + * staying almost always active, may force BFQ to perform I/O + * plugging most of the time. This evidently makes it harder + * for BFQ to let the device reach a high throughput. + * + * Finally, the likely() macro below is not used because one + * of the two branches is more likely than the other, but to + * have the code path after the following if() executed as + * fast as possible for the case of a non rotational device + * with queueing. We want it because this is the fastest kind + * of device. On the opposite end, the likely() may lengthen + * the execution time of BFQ for the case of slower devices + * (rotational or at least without queueing). But in this case + * the execution time of BFQ matters very little, if not at + * all. + */ + if (likely(bfqd->nonrot_with_queueing)) + return NULL; + /* * Prevent bfqq from being merged if it has been created too * long ago. The idea is that true cooperating processes, and @@ -2286,6 +2667,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) if (!bic) return; + bic->saved_weight = bfqq->entity.orig_weight; bic->saved_ttime = bfqq->ttime; bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq); bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); @@ -2374,6 +2756,16 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, * assignment causes no harm). */ new_bfqq->bic = NULL; + /* + * If the queue is shared, the pid is the pid of one of the associated + * processes. Which pid depends on the exact sequence of merge events + * the queue underwent. So printing such a pid is useless and confusing + * because it reports a random pid between those of the associated + * processes. + * We mark such a queue with a pid -1, and then print SHARED instead of + * a pid in logging messages. + */ + new_bfqq->pid = -1; bfqq->bic = NULL; /* release process reference to bfqq */ bfq_put_queue(bfqq); @@ -2408,8 +2800,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, /* * bic still points to bfqq, then it has not yet been * redirected to some other bfq_queue, and a queue - * merge beween bfqq and new_bfqq can be safely - * fulfillled, i.e., bic can be redirected to new_bfqq + * merge between bfqq and new_bfqq can be safely + * fulfilled, i.e., bic can be redirected to new_bfqq * and bfqq can be put. */ bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq, @@ -2543,10 +2935,14 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd) * queue). */ if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && - bfq_symmetric_scenario(bfqd)) + !bfq_asymmetric_scenario(bfqd, bfqq)) sl = min_t(u64, sl, BFQ_MIN_TT); + else if (bfqq->wr_coeff > 1) + sl = max_t(u32, sl, 20ULL * NSEC_PER_MSEC); bfqd->last_idling_start = ktime_get(); + bfqd->last_idling_start_jiffies = jiffies; + hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl), HRTIMER_MODE_REL); bfqg_stats_set_start_idle_time(bfqq_group(bfqq)); @@ -2822,7 +3218,186 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) bfq_remove_request(q, rq); } -static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +/* + * There is a case where idling does not have to be performed for + * throughput concerns, but to preserve the throughput share of + * the process associated with bfqq. + * + * To introduce this case, we can note that allowing the drive + * to enqueue more than one request at a time, and hence + * delegating de facto final scheduling decisions to the + * drive's internal scheduler, entails loss of control on the + * actual request service order. In particular, the critical + * situation is when requests from different processes happen + * to be present, at the same time, in the internal queue(s) + * of the drive. In such a situation, the drive, by deciding + * the service order of the internally-queued requests, does + * determine also the actual throughput distribution among + * these processes. But the drive typically has no notion or + * concern about per-process throughput distribution, and + * makes its decisions only on a per-request basis. Therefore, + * the service distribution enforced by the drive's internal + * scheduler is likely to coincide with the desired throughput + * distribution only in a completely symmetric, or favorably + * skewed scenario where: + * (i-a) each of these processes must get the same throughput as + * the others, + * (i-b) in case (i-a) does not hold, it holds that the process + * associated with bfqq must receive a lower or equal + * throughput than any of the other processes; + * (ii) the I/O of each process has the same properties, in + * terms of locality (sequential or random), direction + * (reads or writes), request sizes, greediness + * (from I/O-bound to sporadic), and so on; + + * In fact, in such a scenario, the drive tends to treat the requests + * of each process in about the same way as the requests of the + * others, and thus to provide each of these processes with about the + * same throughput. This is exactly the desired throughput + * distribution if (i-a) holds, or, if (i-b) holds instead, this is an + * even more convenient distribution for (the process associated with) + * bfqq. + * + * In contrast, in any asymmetric or unfavorable scenario, device + * idling (I/O-dispatch plugging) is certainly needed to guarantee + * that bfqq receives its assigned fraction of the device throughput + * (see [1] for details). + * + * The problem is that idling may significantly reduce throughput with + * certain combinations of types of I/O and devices. An important + * example is sync random I/O on flash storage with command + * queueing. So, unless bfqq falls in cases where idling also boosts + * throughput, it is important to check conditions (i-a), i(-b) and + * (ii) accurately, so as to avoid idling when not strictly needed for + * service guarantees. + * + * Unfortunately, it is extremely difficult to thoroughly check + * condition (ii). And, in case there are active groups, it becomes + * very difficult to check conditions (i-a) and (i-b) too. In fact, + * if there are active groups, then, for conditions (i-a) or (i-b) to + * become false 'indirectly', it is enough that an active group + * contains more active processes or sub-groups than some other active + * group. More precisely, for conditions (i-a) or (i-b) to become + * false because of such a group, it is not even necessary that the + * group is (still) active: it is sufficient that, even if the group + * has become inactive, some of its descendant processes still have + * some request already dispatched but still waiting for + * completion. In fact, requests have still to be guaranteed their + * share of the throughput even after being dispatched. In this + * respect, it is easy to show that, if a group frequently becomes + * inactive while still having in-flight requests, and if, when this + * happens, the group is not considered in the calculation of whether + * the scenario is asymmetric, then the group may fail to be + * guaranteed its fair share of the throughput (basically because + * idling may not be performed for the descendant processes of the + * group, but it had to be). We address this issue with the following + * bi-modal behavior, implemented in the function + * bfq_asymmetric_scenario(). + * + * If there are groups with requests waiting for completion + * (as commented above, some of these groups may even be + * already inactive), then the scenario is tagged as + * asymmetric, conservatively, without checking any of the + * conditions (i-a), (i-b) or (ii). So the device is idled for bfqq. + * This behavior matches also the fact that groups are created + * exactly if controlling I/O is a primary concern (to + * preserve bandwidth and latency guarantees). + * + * On the opposite end, if there are no groups with requests waiting + * for completion, then only conditions (i-a) and (i-b) are actually + * controlled, i.e., provided that conditions (i-a) or (i-b) holds, + * idling is not performed, regardless of whether condition (ii) + * holds. In other words, only if conditions (i-a) and (i-b) do not + * hold, then idling is allowed, and the device tends to be prevented + * from queueing many requests, possibly of several processes. Since + * there are no groups with requests waiting for completion, then, to + * control conditions (i-a) and (i-b) it is enough to check just + * whether all the queues with requests waiting for completion also + * have the same weight. + * + * Not checking condition (ii) evidently exposes bfqq to the + * risk of getting less throughput than its fair share. + * However, for queues with the same weight, a further + * mechanism, preemption, mitigates or even eliminates this + * problem. And it does so without consequences on overall + * throughput. This mechanism and its benefits are explained + * in the next three paragraphs. + * + * Even if a queue, say Q, is expired when it remains idle, Q + * can still preempt the new in-service queue if the next + * request of Q arrives soon (see the comments on + * bfq_bfqq_update_budg_for_activation). If all queues and + * groups have the same weight, this form of preemption, + * combined with the hole-recovery heuristic described in the + * comments on function bfq_bfqq_update_budg_for_activation, + * are enough to preserve a correct bandwidth distribution in + * the mid term, even without idling. In fact, even if not + * idling allows the internal queues of the device to contain + * many requests, and thus to reorder requests, we can rather + * safely assume that the internal scheduler still preserves a + * minimum of mid-term fairness. + * + * More precisely, this preemption-based, idleless approach + * provides fairness in terms of IOPS, and not sectors per + * second. This can be seen with a simple example. Suppose + * that there are two queues with the same weight, but that + * the first queue receives requests of 8 sectors, while the + * second queue receives requests of 1024 sectors. In + * addition, suppose that each of the two queues contains at + * most one request at a time, which implies that each queue + * always remains idle after it is served. Finally, after + * remaining idle, each queue receives very quickly a new + * request. It follows that the two queues are served + * alternatively, preempting each other if needed. This + * implies that, although both queues have the same weight, + * the queue with large requests receives a service that is + * 1024/8 times as high as the service received by the other + * queue. + * + * The motivation for using preemption instead of idling (for + * queues with the same weight) is that, by not idling, + * service guarantees are preserved (completely or at least in + * part) without minimally sacrificing throughput. And, if + * there is no active group, then the primary expectation for + * this device is probably a high throughput. + * + * We are now left only with explaining the additional + * compound condition that is checked below for deciding + * whether the scenario is asymmetric. To explain this + * compound condition, we need to add that the function + * bfq_asymmetric_scenario checks the weights of only + * non-weight-raised queues, for efficiency reasons (see + * comments on bfq_weights_tree_add()). Then the fact that + * bfqq is weight-raised is checked explicitly here. More + * precisely, the compound condition below takes into account + * also the fact that, even if bfqq is being weight-raised, + * the scenario is still symmetric if all queues with requests + * waiting for completion happen to be + * weight-raised. Actually, we should be even more precise + * here, and differentiate between interactive weight raising + * and soft real-time weight raising. + * + * As a side note, it is worth considering that the above + * device-idling countermeasures may however fail in the + * following unlucky scenario: if idling is (correctly) + * disabled in a time period during which all symmetry + * sub-conditions hold, and hence the device is allowed to + * enqueue many requests, but at some later point in time some + * sub-condition stops to hold, then it may become impossible + * to let requests be served in the desired order until all + * the requests already queued in the device have been served. + */ +static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + return (bfqq->wr_coeff > 1 && + bfqd->wr_busy_queues < + bfq_tot_busy_queues(bfqd)) || + bfq_asymmetric_scenario(bfqd, bfqq); +} + +static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, + enum bfqq_expiration reason) { /* * If this bfqq is shared between multiple processes, check @@ -2833,7 +3408,22 @@ static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) bfq_mark_bfqq_split_coop(bfqq); - if (RB_EMPTY_ROOT(&bfqq->sort_list)) { + /* + * Consider queues with a higher finish virtual time than + * bfqq. If idling_needed_for_service_guarantees(bfqq) returns + * true, then bfqq's bandwidth would be violated if an + * uncontrolled amount of I/O from these queues were + * dispatched while bfqq is waiting for its new I/O to + * arrive. This is exactly what may happen if this is a forced + * expiration caused by a preemption attempt, and if bfqq is + * not re-scheduled. To prevent this from happening, re-queue + * bfqq if it needs I/O-dispatch plugging, even if it is + * empty. By doing so, bfqq is granted to be served before the + * above queues (provided that bfqq is of course eligible). + */ + if (RB_EMPTY_ROOT(&bfqq->sort_list) && + !(reason == BFQQE_PREEMPTED && + idling_needed_for_service_guarantees(bfqd, bfqq))) { if (bfqq->dispatched == 0) /* * Overloading budget_timeout field to store @@ -2848,8 +3438,11 @@ static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_requeue_bfqq(bfqd, bfqq, true); /* * Resort priority tree of potential close cooperators. + * See comments on bfq_pos_tree_add_move() for the unlikely(). */ - bfq_pos_tree_add_move(bfqd, bfqq); + if (unlikely(!bfqd->nonrot_with_queueing && + !RB_EMPTY_ROOT(&bfqq->sort_list))) + bfq_pos_tree_add_move(bfqd, bfqq); } /* @@ -3223,13 +3816,6 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); } -static bool bfq_bfqq_injectable(struct bfq_queue *bfqq) -{ - return BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && - blk_queue_nonrot(bfqq->bfqd->queue) && - bfqq->bfqd->hw_tag; -} - /** * bfq_bfqq_expire - expire a queue. * @bfqd: device owning the queue. @@ -3343,17 +3929,23 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, "expire (%d, slow %d, num_disp %d, short_ttime %d)", reason, slow, bfqq->dispatched, bfq_bfqq_has_short_ttime(bfqq)); + /* + * bfqq expired, so no total service time needs to be computed + * any longer: reset state machine for measuring total service + * times. + */ + bfqd->rqs_injected = bfqd->wait_dispatch = false; + bfqd->waited_rq = NULL; + /* * Increase, decrease or leave budget unchanged according to * reason. */ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); - if (__bfq_bfqq_expire(bfqd, bfqq)) + if (__bfq_bfqq_expire(bfqd, bfqq, reason)) /* bfqq is gone, no more actions on it */ return; - bfqq->injected_service = 0; - /* mark bfqq as waiting a request only if a bic still points to it */ if (!bfq_bfqq_busy(bfqq) && reason != BFQQE_BUDGET_TIMEOUT && @@ -3496,177 +4088,6 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, bfqd->wr_busy_queues == 0; } -/* - * There is a case where idling must be performed not for - * throughput concerns, but to preserve service guarantees. - * - * To introduce this case, we can note that allowing the drive - * to enqueue more than one request at a time, and hence - * delegating de facto final scheduling decisions to the - * drive's internal scheduler, entails loss of control on the - * actual request service order. In particular, the critical - * situation is when requests from different processes happen - * to be present, at the same time, in the internal queue(s) - * of the drive. In such a situation, the drive, by deciding - * the service order of the internally-queued requests, does - * determine also the actual throughput distribution among - * these processes. But the drive typically has no notion or - * concern about per-process throughput distribution, and - * makes its decisions only on a per-request basis. Therefore, - * the service distribution enforced by the drive's internal - * scheduler is likely to coincide with the desired - * device-throughput distribution only in a completely - * symmetric scenario where: - * (i) each of these processes must get the same throughput as - * the others; - * (ii) the I/O of each process has the same properties, in - * terms of locality (sequential or random), direction - * (reads or writes), request sizes, greediness - * (from I/O-bound to sporadic), and so on. - * In fact, in such a scenario, the drive tends to treat - * the requests of each of these processes in about the same - * way as the requests of the others, and thus to provide - * each of these processes with about the same throughput - * (which is exactly the desired throughput distribution). In - * contrast, in any asymmetric scenario, device idling is - * certainly needed to guarantee that bfqq receives its - * assigned fraction of the device throughput (see [1] for - * details). - * The problem is that idling may significantly reduce - * throughput with certain combinations of types of I/O and - * devices. An important example is sync random I/O, on flash - * storage with command queueing. So, unless bfqq falls in the - * above cases where idling also boosts throughput, it would - * be important to check conditions (i) and (ii) accurately, - * so as to avoid idling when not strictly needed for service - * guarantees. - * - * Unfortunately, it is extremely difficult to thoroughly - * check condition (ii). And, in case there are active groups, - * it becomes very difficult to check condition (i) too. In - * fact, if there are active groups, then, for condition (i) - * to become false, it is enough that an active group contains - * more active processes or sub-groups than some other active - * group. More precisely, for condition (i) to hold because of - * such a group, it is not even necessary that the group is - * (still) active: it is sufficient that, even if the group - * has become inactive, some of its descendant processes still - * have some request already dispatched but still waiting for - * completion. In fact, requests have still to be guaranteed - * their share of the throughput even after being - * dispatched. In this respect, it is easy to show that, if a - * group frequently becomes inactive while still having - * in-flight requests, and if, when this happens, the group is - * not considered in the calculation of whether the scenario - * is asymmetric, then the group may fail to be guaranteed its - * fair share of the throughput (basically because idling may - * not be performed for the descendant processes of the group, - * but it had to be). We address this issue with the - * following bi-modal behavior, implemented in the function - * bfq_symmetric_scenario(). - * - * If there are groups with requests waiting for completion - * (as commented above, some of these groups may even be - * already inactive), then the scenario is tagged as - * asymmetric, conservatively, without checking any of the - * conditions (i) and (ii). So the device is idled for bfqq. - * This behavior matches also the fact that groups are created - * exactly if controlling I/O is a primary concern (to - * preserve bandwidth and latency guarantees). - * - * On the opposite end, if there are no groups with requests - * waiting for completion, then only condition (i) is actually - * controlled, i.e., provided that condition (i) holds, idling - * is not performed, regardless of whether condition (ii) - * holds. In other words, only if condition (i) does not hold, - * then idling is allowed, and the device tends to be - * prevented from queueing many requests, possibly of several - * processes. Since there are no groups with requests waiting - * for completion, then, to control condition (i) it is enough - * to check just whether all the queues with requests waiting - * for completion also have the same weight. - * - * Not checking condition (ii) evidently exposes bfqq to the - * risk of getting less throughput than its fair share. - * However, for queues with the same weight, a further - * mechanism, preemption, mitigates or even eliminates this - * problem. And it does so without consequences on overall - * throughput. This mechanism and its benefits are explained - * in the next three paragraphs. - * - * Even if a queue, say Q, is expired when it remains idle, Q - * can still preempt the new in-service queue if the next - * request of Q arrives soon (see the comments on - * bfq_bfqq_update_budg_for_activation). If all queues and - * groups have the same weight, this form of preemption, - * combined with the hole-recovery heuristic described in the - * comments on function bfq_bfqq_update_budg_for_activation, - * are enough to preserve a correct bandwidth distribution in - * the mid term, even without idling. In fact, even if not - * idling allows the internal queues of the device to contain - * many requests, and thus to reorder requests, we can rather - * safely assume that the internal scheduler still preserves a - * minimum of mid-term fairness. - * - * More precisely, this preemption-based, idleless approach - * provides fairness in terms of IOPS, and not sectors per - * second. This can be seen with a simple example. Suppose - * that there are two queues with the same weight, but that - * the first queue receives requests of 8 sectors, while the - * second queue receives requests of 1024 sectors. In - * addition, suppose that each of the two queues contains at - * most one request at a time, which implies that each queue - * always remains idle after it is served. Finally, after - * remaining idle, each queue receives very quickly a new - * request. It follows that the two queues are served - * alternatively, preempting each other if needed. This - * implies that, although both queues have the same weight, - * the queue with large requests receives a service that is - * 1024/8 times as high as the service received by the other - * queue. - * - * The motivation for using preemption instead of idling (for - * queues with the same weight) is that, by not idling, - * service guarantees are preserved (completely or at least in - * part) without minimally sacrificing throughput. And, if - * there is no active group, then the primary expectation for - * this device is probably a high throughput. - * - * We are now left only with explaining the additional - * compound condition that is checked below for deciding - * whether the scenario is asymmetric. To explain this - * compound condition, we need to add that the function - * bfq_symmetric_scenario checks the weights of only - * non-weight-raised queues, for efficiency reasons (see - * comments on bfq_weights_tree_add()). Then the fact that - * bfqq is weight-raised is checked explicitly here. More - * precisely, the compound condition below takes into account - * also the fact that, even if bfqq is being weight-raised, - * the scenario is still symmetric if all queues with requests - * waiting for completion happen to be - * weight-raised. Actually, we should be even more precise - * here, and differentiate between interactive weight raising - * and soft real-time weight raising. - * - * As a side note, it is worth considering that the above - * device-idling countermeasures may however fail in the - * following unlucky scenario: if idling is (correctly) - * disabled in a time period during which all symmetry - * sub-conditions hold, and hence the device is allowed to - * enqueue many requests, but at some later point in time some - * sub-condition stops to hold, then it may become impossible - * to let requests be served in the desired order until all - * the requests already queued in the device have been served. - */ -static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - return (bfqq->wr_coeff > 1 && - bfqd->wr_busy_queues < - bfq_tot_busy_queues(bfqd)) || - !bfq_symmetric_scenario(bfqd); -} - /* * For a queue that becomes empty, device idling is allowed only if * this function returns true for that queue. As a consequence, since @@ -3740,26 +4161,98 @@ static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_better_to_idle(bfqq); } -static struct bfq_queue *bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) +/* + * This function chooses the queue from which to pick the next extra + * I/O request to inject, if it finds a compatible queue. See the + * comments on bfq_update_inject_limit() for details on the injection + * mechanism, and for the definitions of the quantities mentioned + * below. + */ +static struct bfq_queue * +bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) { - struct bfq_queue *bfqq; + struct bfq_queue *bfqq, *in_serv_bfqq = bfqd->in_service_queue; + unsigned int limit = in_serv_bfqq->inject_limit; + /* + * If + * - bfqq is not weight-raised and therefore does not carry + * time-critical I/O, + * or + * - regardless of whether bfqq is weight-raised, bfqq has + * however a long think time, during which it can absorb the + * effect of an appropriate number of extra I/O requests + * from other queues (see bfq_update_inject_limit for + * details on the computation of this number); + * then injection can be performed without restrictions. + */ + bool in_serv_always_inject = in_serv_bfqq->wr_coeff == 1 || + !bfq_bfqq_has_short_ttime(in_serv_bfqq); /* - * A linear search; but, with a high probability, very few - * steps are needed to find a candidate queue, i.e., a queue - * with enough budget left for its next request. In fact: + * If + * - the baseline total service time could not be sampled yet, + * so the inject limit happens to be still 0, and + * - a lot of time has elapsed since the plugging of I/O + * dispatching started, so drive speed is being wasted + * significantly; + * then temporarily raise inject limit to one request. + */ + if (limit == 0 && in_serv_bfqq->last_serv_time_ns == 0 && + bfq_bfqq_wait_request(in_serv_bfqq) && + time_is_before_eq_jiffies(bfqd->last_idling_start_jiffies + + bfqd->bfq_slice_idle) + ) + limit = 1; + + if (bfqd->rq_in_driver >= limit) + return NULL; + + /* + * Linear search of the source queue for injection; but, with + * a high probability, very few steps are needed to find a + * candidate queue, i.e., a queue with enough budget left for + * its next request. In fact: * - BFQ dynamically updates the budget of every queue so as * to accommodate the expected backlog of the queue; * - if a queue gets all its requests dispatched as injected * service, then the queue is removed from the active list - * (and re-added only if it gets new requests, but with - * enough budget for its new backlog). + * (and re-added only if it gets new requests, but then it + * is assigned again enough budget for its new backlog). */ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) if (!RB_EMPTY_ROOT(&bfqq->sort_list) && + (in_serv_always_inject || bfqq->wr_coeff > 1) && bfq_serv_to_charge(bfqq->next_rq, bfqq) <= - bfq_bfqq_budget_left(bfqq)) - return bfqq; + bfq_bfqq_budget_left(bfqq)) { + /* + * Allow for only one large in-flight request + * on non-rotational devices, for the + * following reason. On non-rotationl drives, + * large requests take much longer than + * smaller requests to be served. In addition, + * the drive prefers to serve large requests + * w.r.t. to small ones, if it can choose. So, + * having more than one large requests queued + * in the drive may easily make the next first + * request of the in-service queue wait for so + * long to break bfqq's service guarantees. On + * the bright side, large requests let the + * drive reach a very high throughput, even if + * there is only one in-flight large request + * at a time. + */ + if (blk_queue_nonrot(bfqd->queue) && + blk_rq_sectors(bfqq->next_rq) >= + BFQQ_SECT_THR_NONROT) + limit = min_t(unsigned int, 1, limit); + else + limit = in_serv_bfqq->inject_limit; + + if (bfqd->rq_in_driver < limit) { + bfqd->rqs_injected = true; + return bfqq; + } + } return NULL; } @@ -3846,14 +4339,105 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) * for a new request, or has requests waiting for a completion and * may idle after their completion, then keep it anyway. * - * Yet, to boost throughput, inject service from other queues if - * possible. + * Yet, inject service from other queues if it boosts + * throughput and is possible. */ if (bfq_bfqq_wait_request(bfqq) || (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) { - if (bfq_bfqq_injectable(bfqq) && - bfqq->injected_service * bfqq->inject_coeff < - bfqq->entity.service * 10) + struct bfq_queue *async_bfqq = + bfqq->bic && bfqq->bic->bfqq[0] && + bfq_bfqq_busy(bfqq->bic->bfqq[0]) && + bfqq->bic->bfqq[0]->next_rq ? + bfqq->bic->bfqq[0] : NULL; + + /* + * The next three mutually-exclusive ifs decide + * whether to try injection, and choose the queue to + * pick an I/O request from. + * + * The first if checks whether the process associated + * with bfqq has also async I/O pending. If so, it + * injects such I/O unconditionally. Injecting async + * I/O from the same process can cause no harm to the + * process. On the contrary, it can only increase + * bandwidth and reduce latency for the process. + * + * The second if checks whether there happens to be a + * non-empty waker queue for bfqq, i.e., a queue whose + * I/O needs to be completed for bfqq to receive new + * I/O. This happens, e.g., if bfqq is associated with + * a process that does some sync. A sync generates + * extra blocking I/O, which must be completed before + * the process associated with bfqq can go on with its + * I/O. If the I/O of the waker queue is not served, + * then bfqq remains empty, and no I/O is dispatched, + * until the idle timeout fires for bfqq. This is + * likely to result in lower bandwidth and higher + * latencies for bfqq, and in a severe loss of total + * throughput. The best action to take is therefore to + * serve the waker queue as soon as possible. So do it + * (without relying on the third alternative below for + * eventually serving waker_bfqq's I/O; see the last + * paragraph for further details). This systematic + * injection of I/O from the waker queue does not + * cause any delay to bfqq's I/O. On the contrary, + * next bfqq's I/O is brought forward dramatically, + * for it is not blocked for milliseconds. + * + * The third if checks whether bfqq is a queue for + * which it is better to avoid injection. It is so if + * bfqq delivers more throughput when served without + * any further I/O from other queues in the middle, or + * if the service times of bfqq's I/O requests both + * count more than overall throughput, and may be + * easily increased by injection (this happens if bfqq + * has a short think time). If none of these + * conditions holds, then a candidate queue for + * injection is looked for through + * bfq_choose_bfqq_for_injection(). Note that the + * latter may return NULL (for example if the inject + * limit for bfqq is currently 0). + * + * NOTE: motivation for the second alternative + * + * Thanks to the way the inject limit is updated in + * bfq_update_has_short_ttime(), it is rather likely + * that, if I/O is being plugged for bfqq and the + * waker queue has pending I/O requests that are + * blocking bfqq's I/O, then the third alternative + * above lets the waker queue get served before the + * I/O-plugging timeout fires. So one may deem the + * second alternative superfluous. It is not, because + * the third alternative may be way less effective in + * case of a synchronization. For two main + * reasons. First, throughput may be low because the + * inject limit may be too low to guarantee the same + * amount of injected I/O, from the waker queue or + * other queues, that the second alternative + * guarantees (the second alternative unconditionally + * injects a pending I/O request of the waker queue + * for each bfq_dispatch_request()). Second, with the + * third alternative, the duration of the plugging, + * i.e., the time before bfqq finally receives new I/O, + * may not be minimized, because the waker queue may + * happen to be served only after other queues. + */ + if (async_bfqq && + icq_to_bic(async_bfqq->next_rq->elv.icq) == bfqq->bic && + bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <= + bfq_bfqq_budget_left(async_bfqq)) + bfqq = bfqq->bic->bfqq[0]; + else if (bfq_bfqq_has_waker(bfqq) && + bfq_bfqq_busy(bfqq->waker_bfqq) && + bfqq->next_rq && + bfq_serv_to_charge(bfqq->waker_bfqq->next_rq, + bfqq->waker_bfqq) <= + bfq_bfqq_budget_left(bfqq->waker_bfqq) + ) + bfqq = bfqq->waker_bfqq; + else if (!idling_boosts_thr_without_issues(bfqd, bfqq) && + (bfqq->wr_coeff == 1 || bfqd->wr_busy_queues > 1 || + !bfq_bfqq_has_short_ttime(bfqq))) bfqq = bfq_choose_bfqq_for_injection(bfqd); else bfqq = NULL; @@ -3945,15 +4529,15 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, bfq_bfqq_served(bfqq, service_to_charge); - bfq_dispatch_remove(bfqd->queue, rq); + if (bfqq == bfqd->in_service_queue && bfqd->wait_dispatch) { + bfqd->wait_dispatch = false; + bfqd->waited_rq = rq; + } - if (bfqq != bfqd->in_service_queue) { - if (likely(bfqd->in_service_queue)) - bfqd->in_service_queue->injected_service += - bfq_serv_to_charge(rq, bfqq); + bfq_dispatch_remove(bfqd->queue, rq); + if (bfqq != bfqd->in_service_queue) goto return_rq; - } /* * If weight raising has to terminate for bfqq, then next @@ -4239,8 +4823,11 @@ static void bfq_put_cooperator(struct bfq_queue *bfqq) static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) { + struct bfq_queue *item; + struct hlist_node *n; + if (bfqq == bfqd->in_service_queue) { - __bfq_bfqq_expire(bfqd, bfqq); + __bfq_bfqq_expire(bfqd, bfqq, BFQQE_BUDGET_TIMEOUT); bfq_schedule_dispatch(bfqd); } @@ -4248,6 +4835,18 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_put_cooperator(bfqq); + /* remove bfqq from woken list */ + if (!hlist_unhashed(&bfqq->woken_list_node)) + hlist_del_init(&bfqq->woken_list_node); + + /* reset waker for all queues in woken list */ + hlist_for_each_entry_safe(item, n, &bfqq->woken_list, + woken_list_node) { + item->waker_bfqq = NULL; + bfq_clear_bfqq_has_waker(item); + hlist_del_init(&item->woken_list_node); + } + bfq_put_queue(bfqq); /* release process reference */ } @@ -4366,6 +4965,8 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, RB_CLEAR_NODE(&bfqq->entity.rb_node); INIT_LIST_HEAD(&bfqq->fifo); INIT_HLIST_NODE(&bfqq->burst_list_node); + INIT_HLIST_NODE(&bfqq->woken_list_node); + INIT_HLIST_HEAD(&bfqq->woken_list); bfqq->ref = 0; bfqq->bfqd = bfqd; @@ -4384,13 +4985,6 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_mark_bfqq_has_short_ttime(bfqq); bfq_mark_bfqq_sync(bfqq); bfq_mark_bfqq_just_created(bfqq); - /* - * Aggressively inject a lot of service: up to 90%. - * This coefficient remains constant during bfqq life, - * but this behavior might be changed, after enough - * testing and tuning. - */ - bfqq->inject_coeff = 1; } else bfq_clear_bfqq_sync(bfqq); @@ -4529,13 +5123,18 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, { bfqq->seek_history <<= 1; bfqq->seek_history |= BFQ_RQ_SEEKY(bfqd, bfqq->last_request_pos, rq); + + if (bfqq->wr_coeff > 1 && + bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && + BFQQ_TOTALLY_SEEKY(bfqq)) + bfq_bfqq_end_wr(bfqq); } static void bfq_update_has_short_ttime(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_io_cq *bic) { - bool has_short_ttime = true; + bool has_short_ttime = true, state_changed; /* * No need to update has_short_ttime if bfqq is async or in @@ -4560,13 +5159,102 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd, bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle)) has_short_ttime = false; - bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d", - has_short_ttime); + state_changed = has_short_ttime != bfq_bfqq_has_short_ttime(bfqq); if (has_short_ttime) bfq_mark_bfqq_has_short_ttime(bfqq); else bfq_clear_bfqq_has_short_ttime(bfqq); + + /* + * Until the base value for the total service time gets + * finally computed for bfqq, the inject limit does depend on + * the think-time state (short|long). In particular, the limit + * is 0 or 1 if the think time is deemed, respectively, as + * short or long (details in the comments in + * bfq_update_inject_limit()). Accordingly, the next + * instructions reset the inject limit if the think-time state + * has changed and the above base value is still to be + * computed. + * + * However, the reset is performed only if more than 100 ms + * have elapsed since the last update of the inject limit, or + * (inclusive) if the change is from short to long think + * time. The reason for this waiting is as follows. + * + * bfqq may have a long think time because of a + * synchronization with some other queue, i.e., because the + * I/O of some other queue may need to be completed for bfqq + * to receive new I/O. Details in the comments on the choice + * of the queue for injection in bfq_select_queue(). + * + * As stressed in those comments, if such a synchronization is + * actually in place, then, without injection on bfqq, the + * blocking I/O cannot happen to served while bfqq is in + * service. As a consequence, if bfqq is granted + * I/O-dispatch-plugging, then bfqq remains empty, and no I/O + * is dispatched, until the idle timeout fires. This is likely + * to result in lower bandwidth and higher latencies for bfqq, + * and in a severe loss of total throughput. + * + * On the opposite end, a non-zero inject limit may allow the + * I/O that blocks bfqq to be executed soon, and therefore + * bfqq to receive new I/O soon. + * + * But, if the blocking gets actually eliminated, then the + * next think-time sample for bfqq may be very low. This in + * turn may cause bfqq's think time to be deemed + * short. Without the 100 ms barrier, this new state change + * would cause the body of the next if to be executed + * immediately. But this would set to 0 the inject + * limit. Without injection, the blocking I/O would cause the + * think time of bfqq to become long again, and therefore the + * inject limit to be raised again, and so on. The only effect + * of such a steady oscillation between the two think-time + * states would be to prevent effective injection on bfqq. + * + * In contrast, if the inject limit is not reset during such a + * long time interval as 100 ms, then the number of short + * think time samples can grow significantly before the reset + * is performed. As a consequence, the think time state can + * become stable before the reset. Therefore there will be no + * state change when the 100 ms elapse, and no reset of the + * inject limit. The inject limit remains steadily equal to 1 + * both during and after the 100 ms. So injection can be + * performed at all times, and throughput gets boosted. + * + * An inject limit equal to 1 is however in conflict, in + * general, with the fact that the think time of bfqq is + * short, because injection may be likely to delay bfqq's I/O + * (as explained in the comments in + * bfq_update_inject_limit()). But this does not happen in + * this special case, because bfqq's low think time is due to + * an effective handling of a synchronization, through + * injection. In this special case, bfqq's I/O does not get + * delayed by injection; on the contrary, bfqq's I/O is + * brought forward, because it is not blocked for + * milliseconds. + * + * In addition, serving the blocking I/O much sooner, and much + * more frequently than once per I/O-plugging timeout, makes + * it much quicker to detect a waker queue (the concept of + * waker queue is defined in the comments in + * bfq_add_request()). This makes it possible to start sooner + * to boost throughput more effectively, by injecting the I/O + * of the waker queue unconditionally on every + * bfq_dispatch_request(). + * + * One last, important benefit of not resetting the inject + * limit before 100 ms is that, during this time interval, the + * base value for the total service time is likely to get + * finally computed for bfqq, freeing the inject limit from + * its relation with the think time. + */ + if (state_changed && bfqq->last_serv_time_ns == 0 && + (time_is_before_eq_jiffies(bfqq->decrease_time_jif + + msecs_to_jiffies(100)) || + !has_short_ttime)) + bfq_reset_inject_limit(bfqd, bfqq); } /* @@ -4576,19 +5264,9 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd, static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct request *rq) { - struct bfq_io_cq *bic = RQ_BIC(rq); - if (rq->cmd_flags & REQ_META) bfqq->meta_pending++; - bfq_update_io_thinktime(bfqd, bfqq); - bfq_update_has_short_ttime(bfqd, bfqq, bic); - bfq_update_io_seektime(bfqd, bfqq, rq); - - bfq_log_bfqq(bfqd, bfqq, - "rq_enqueued: has_short_ttime=%d (seeky %d)", - bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq)); - bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) { @@ -4676,6 +5354,10 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) bfqq = new_bfqq; } + bfq_update_io_thinktime(bfqd, bfqq); + bfq_update_has_short_ttime(bfqd, bfqq, RQ_BIC(rq)); + bfq_update_io_seektime(bfqd, bfqq, rq); + waiting = bfqq && bfq_bfqq_wait_request(bfqq); bfq_add_request(rq); idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq); @@ -4823,6 +5505,9 @@ static void bfq_update_hw_tag(struct bfq_data *bfqd) bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; bfqd->max_rq_in_driver = 0; bfqd->hw_tag_samples = 0; + + bfqd->nonrot_with_queueing = + blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag; } static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) @@ -4878,6 +5563,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) 1UL<<(BFQ_RATE_SHIFT - 10)) bfq_update_rate_reset(bfqd, NULL); bfqd->last_completion = now_ns; + bfqd->last_completed_rq_bfqq = bfqq; /* * If we are waiting to discover whether the request pattern @@ -4949,6 +5635,164 @@ static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq) bfq_put_queue(bfqq); } +/* + * The processes associated with bfqq may happen to generate their + * cumulative I/O at a lower rate than the rate at which the device + * could serve the same I/O. This is rather probable, e.g., if only + * one process is associated with bfqq and the device is an SSD. It + * results in bfqq becoming often empty while in service. In this + * respect, if BFQ is allowed to switch to another queue when bfqq + * remains empty, then the device goes on being fed with I/O requests, + * and the throughput is not affected. In contrast, if BFQ is not + * allowed to switch to another queue---because bfqq is sync and + * I/O-dispatch needs to be plugged while bfqq is temporarily + * empty---then, during the service of bfqq, there will be frequent + * "service holes", i.e., time intervals during which bfqq gets empty + * and the device can only consume the I/O already queued in its + * hardware queues. During service holes, the device may even get to + * remaining idle. In the end, during the service of bfqq, the device + * is driven at a lower speed than the one it can reach with the kind + * of I/O flowing through bfqq. + * + * To counter this loss of throughput, BFQ implements a "request + * injection mechanism", which tries to fill the above service holes + * with I/O requests taken from other queues. The hard part in this + * mechanism is finding the right amount of I/O to inject, so as to + * both boost throughput and not break bfqq's bandwidth and latency + * guarantees. In this respect, the mechanism maintains a per-queue + * inject limit, computed as below. While bfqq is empty, the injection + * mechanism dispatches extra I/O requests only until the total number + * of I/O requests in flight---i.e., already dispatched but not yet + * completed---remains lower than this limit. + * + * A first definition comes in handy to introduce the algorithm by + * which the inject limit is computed. We define as first request for + * bfqq, an I/O request for bfqq that arrives while bfqq is in + * service, and causes bfqq to switch from empty to non-empty. The + * algorithm updates the limit as a function of the effect of + * injection on the service times of only the first requests of + * bfqq. The reason for this restriction is that these are the + * requests whose service time is affected most, because they are the + * first to arrive after injection possibly occurred. + * + * To evaluate the effect of injection, the algorithm measures the + * "total service time" of first requests. We define as total service + * time of an I/O request, the time that elapses since when the + * request is enqueued into bfqq, to when it is completed. This + * quantity allows the whole effect of injection to be measured. It is + * easy to see why. Suppose that some requests of other queues are + * actually injected while bfqq is empty, and that a new request R + * then arrives for bfqq. If the device does start to serve all or + * part of the injected requests during the service hole, then, + * because of this extra service, it may delay the next invocation of + * the dispatch hook of BFQ. Then, even after R gets eventually + * dispatched, the device may delay the actual service of R if it is + * still busy serving the extra requests, or if it decides to serve, + * before R, some extra request still present in its queues. As a + * conclusion, the cumulative extra delay caused by injection can be + * easily evaluated by just comparing the total service time of first + * requests with and without injection. + * + * The limit-update algorithm works as follows. On the arrival of a + * first request of bfqq, the algorithm measures the total time of the + * request only if one of the three cases below holds, and, for each + * case, it updates the limit as described below: + * + * (1) If there is no in-flight request. This gives a baseline for the + * total service time of the requests of bfqq. If the baseline has + * not been computed yet, then, after computing it, the limit is + * set to 1, to start boosting throughput, and to prepare the + * ground for the next case. If the baseline has already been + * computed, then it is updated, in case it results to be lower + * than the previous value. + * + * (2) If the limit is higher than 0 and there are in-flight + * requests. By comparing the total service time in this case with + * the above baseline, it is possible to know at which extent the + * current value of the limit is inflating the total service + * time. If the inflation is below a certain threshold, then bfqq + * is assumed to be suffering from no perceivable loss of its + * service guarantees, and the limit is even tentatively + * increased. If the inflation is above the threshold, then the + * limit is decreased. Due to the lack of any hysteresis, this + * logic makes the limit oscillate even in steady workload + * conditions. Yet we opted for it, because it is fast in reaching + * the best value for the limit, as a function of the current I/O + * workload. To reduce oscillations, this step is disabled for a + * short time interval after the limit happens to be decreased. + * + * (3) Periodically, after resetting the limit, to make sure that the + * limit eventually drops in case the workload changes. This is + * needed because, after the limit has gone safely up for a + * certain workload, it is impossible to guess whether the + * baseline total service time may have changed, without measuring + * it again without injection. A more effective version of this + * step might be to just sample the baseline, by interrupting + * injection only once, and then to reset/lower the limit only if + * the total service time with the current limit does happen to be + * too large. + * + * More details on each step are provided in the comments on the + * pieces of code that implement these steps: the branch handling the + * transition from empty to non empty in bfq_add_request(), the branch + * handling injection in bfq_select_queue(), and the function + * bfq_choose_bfqq_for_injection(). These comments also explain some + * exceptions, made by the injection mechanism in some special cases. + */ +static void bfq_update_inject_limit(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + u64 tot_time_ns = ktime_get_ns() - bfqd->last_empty_occupied_ns; + unsigned int old_limit = bfqq->inject_limit; + + if (bfqq->last_serv_time_ns > 0) { + u64 threshold = (bfqq->last_serv_time_ns * 3)>>1; + + if (tot_time_ns >= threshold && old_limit > 0) { + bfqq->inject_limit--; + bfqq->decrease_time_jif = jiffies; + } else if (tot_time_ns < threshold && + old_limit < bfqd->max_rq_in_driver<<1) + bfqq->inject_limit++; + } + + /* + * Either we still have to compute the base value for the + * total service time, and there seem to be the right + * conditions to do it, or we can lower the last base value + * computed. + * + * NOTE: (bfqd->rq_in_driver == 1) means that there is no I/O + * request in flight, because this function is in the code + * path that handles the completion of a request of bfqq, and, + * in particular, this function is executed before + * bfqd->rq_in_driver is decremented in such a code path. + */ + if ((bfqq->last_serv_time_ns == 0 && bfqd->rq_in_driver == 1) || + tot_time_ns < bfqq->last_serv_time_ns) { + bfqq->last_serv_time_ns = tot_time_ns; + /* + * Now we certainly have a base value: make sure we + * start trying injection. + */ + bfqq->inject_limit = max_t(unsigned int, 1, old_limit); + } else if (!bfqd->rqs_injected && bfqd->rq_in_driver == 1) + /* + * No I/O injected and no request still in service in + * the drive: these are the exact conditions for + * computing the base value of the total service time + * for bfqq. So let's update this value, because it is + * rather variable. For example, it varies if the size + * or the spatial locality of the I/O requests in bfqq + * change. + */ + bfqq->last_serv_time_ns = tot_time_ns; + + + /* update complete, not waiting for any request completion any longer */ + bfqd->waited_rq = NULL; +} + /* * Handle either a requeue or a finish for rq. The things to do are * the same in both cases: all references to rq are to be dropped. In @@ -4993,6 +5837,9 @@ static void bfq_finish_requeue_request(struct request *rq) spin_lock_irqsave(&bfqd->lock, flags); + if (rq == bfqd->waited_rq) + bfq_update_inject_limit(bfqd, bfqq); + bfq_completed_request(bfqq, bfqd); bfq_finish_requeue_request_body(bfqq); @@ -5156,7 +6003,7 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) * preparation is that, after the prepare_request hook is invoked for * rq, rq may still be transformed into a request with no icq, i.e., a * request not associated with any queue. No bfq hook is invoked to - * signal this tranformation. As a consequence, should these + * signal this transformation. As a consequence, should these * preparation operations be performed when the prepare_request hook * is invoked, and should rq be transformed one moment later, bfq * would end up in an inconsistent state, because it would have @@ -5247,7 +6094,29 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) } } - if (unlikely(bfq_bfqq_just_created(bfqq))) + /* + * Consider bfqq as possibly belonging to a burst of newly + * created queues only if: + * 1) A burst is actually happening (bfqd->burst_size > 0) + * or + * 2) There is no other active queue. In fact, if, in + * contrast, there are active queues not belonging to the + * possible burst bfqq may belong to, then there is no gain + * in considering bfqq as belonging to a burst, and + * therefore in not weight-raising bfqq. See comments on + * bfq_handle_burst(). + * + * This filtering also helps eliminating false positives, + * occurring when bfqq does not belong to an actual large + * burst, but some background task (e.g., a service) happens + * to trigger the creation of new queues very close to when + * bfqq and its possible companion queues are created. See + * comments on bfq_handle_burst() for further details also on + * this issue. + */ + if (unlikely(bfq_bfqq_just_created(bfqq) && + (bfqd->burst_size > 0 || + bfq_tot_busy_queues(bfqd) == 0))) bfq_handle_burst(bfqd, bfqq); return bfqq; @@ -5507,7 +6376,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) HRTIMER_MODE_REL); bfqd->idle_slice_timer.function = bfq_idle_slice_timer; - bfqd->queue_weights_tree = RB_ROOT; + bfqd->queue_weights_tree = RB_ROOT_CACHED; bfqd->num_groups_with_pending_reqs = 0; INIT_LIST_HEAD(&bfqd->active_list); @@ -5515,6 +6384,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) INIT_HLIST_HEAD(&bfqd->burst_list); bfqd->hw_tag = -1; + bfqd->nonrot_with_queueing = blk_queue_nonrot(bfqd->queue); bfqd->bfq_max_budget = bfq_default_max_budget; diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 86394e503ca9..0741b68dbeec 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -32,6 +32,8 @@ #define BFQ_DEFAULT_GRP_IOPRIO 0 #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE +#define MAX_PID_STR_LENGTH 12 + /* * Soft real-time applications are extremely more latency sensitive * than interactive ones. Over-raise the weight of the former to @@ -89,7 +91,7 @@ struct bfq_service_tree { * expiration. This peculiar definition allows for the following * optimization, not yet exploited: while a given entity is still in * service, we already know which is the best candidate for next - * service among the other active entitities in the same parent + * service among the other active entities in the same parent * entity. We can then quickly compare the timestamps of the * in-service entity with those of such best candidate. * @@ -140,7 +142,7 @@ struct bfq_weight_counter { * * Unless cgroups are used, the weight value is calculated from the * ioprio to export the same interface as CFQ. When dealing with - * ``well-behaved'' queues (i.e., queues that do not spend too much + * "well-behaved" queues (i.e., queues that do not spend too much * time to consume their budget and have true sequential behavior, and * when there are no external factors breaking anticipation) the * relative weights at each level of the cgroups hierarchy should be @@ -240,6 +242,13 @@ struct bfq_queue { /* next ioprio and ioprio class if a change is in progress */ unsigned short new_ioprio, new_ioprio_class; + /* last total-service-time sample, see bfq_update_inject_limit() */ + u64 last_serv_time_ns; + /* limit for request injection */ + unsigned int inject_limit; + /* last time the inject limit has been decreased, in jiffies */ + unsigned long decrease_time_jif; + /* * Shared bfq_queue if queue is cooperating with one or more * other queues. @@ -357,29 +366,24 @@ struct bfq_queue { /* max service rate measured so far */ u32 max_service_rate; + /* - * Ratio between the service received by bfqq while it is in - * service, and the cumulative service (of requests of other - * queues) that may be injected while bfqq is empty but still - * in service. To increase precision, the coefficient is - * measured in tenths of unit. Here are some example of (1) - * ratios, (2) resulting percentages of service injected - * w.r.t. to the total service dispatched while bfqq is in - * service, and (3) corresponding values of the coefficient: - * 1 (50%) -> 10 - * 2 (33%) -> 20 - * 10 (9%) -> 100 - * 9.9 (9%) -> 99 - * 1.5 (40%) -> 15 - * 0.5 (66%) -> 5 - * 0.1 (90%) -> 1 - * - * So, if the coefficient is lower than 10, then - * injected service is more than bfqq service. + * Pointer to the waker queue for this queue, i.e., to the + * queue Q such that this queue happens to get new I/O right + * after some I/O request of Q is completed. For details, see + * the comments on the choice of the queue for injection in + * bfq_select_queue(). + */ + struct bfq_queue *waker_bfqq; + /* node for woken_list, see below */ + struct hlist_node woken_list_node; + /* + * Head of the list of the woken queues for this queue, i.e., + * of the list of the queues for which this queue is a waker + * queue. This list is used to reset the waker_bfqq pointer in + * the woken queues when this queue exits. */ - unsigned int inject_coeff; - /* amount of service injected in current service slot */ - unsigned int injected_service; + struct hlist_head woken_list; }; /** @@ -418,6 +422,15 @@ struct bfq_io_cq { */ bool was_in_burst_list; + /* + * Save the weight when a merge occurs, to be able + * to restore it in case of split. If the weight is not + * correctly resumed when the queue is recycled, + * then the weight of the recycled queue could differ + * from the weight of the original queue. + */ + unsigned int saved_weight; + /* * Similar to previous fields: save wr information. */ @@ -450,7 +463,7 @@ struct bfq_data { * weight-raised @bfq_queue (see the comments to the functions * bfq_weights_tree_[add|remove] for further details). */ - struct rb_root queue_weights_tree; + struct rb_root_cached queue_weights_tree; /* * Number of groups with at least one descendant process that @@ -513,6 +526,9 @@ struct bfq_data { /* number of requests dispatched and waiting for completion */ int rq_in_driver; + /* true if the device is non rotational and performs queueing */ + bool nonrot_with_queueing; + /* * Maximum number of requests in driver in the last * @hw_tag_samples completed requests. @@ -544,6 +560,29 @@ struct bfq_data { /* time of last request completion (ns) */ u64 last_completion; + /* bfqq owning the last completed rq */ + struct bfq_queue *last_completed_rq_bfqq; + + /* time of last transition from empty to non-empty (ns) */ + u64 last_empty_occupied_ns; + + /* + * Flag set to activate the sampling of the total service time + * of a just-arrived first I/O request (see + * bfq_update_inject_limit()). This will cause the setting of + * waited_rq when the request is finally dispatched. + */ + bool wait_dispatch; + /* + * If set, then bfq_update_inject_limit() is invoked when + * waited_rq is eventually completed. + */ + struct request *waited_rq; + /* + * True if some request has been injected during the last service hole. + */ + bool rqs_injected; + /* time of first rq dispatch in current observation interval (ns) */ u64 first_dispatch; /* time of last rq dispatch in current observation interval (ns) */ @@ -553,6 +592,7 @@ struct bfq_data { ktime_t last_budget_start; /* beginning of the last idle slice */ ktime_t last_idling_start; + unsigned long last_idling_start_jiffies; /* number of samples in current observation interval */ int peak_rate_samples; @@ -733,7 +773,8 @@ enum bfqq_state_flags { * update */ BFQQF_coop, /* bfqq is shared */ - BFQQF_split_coop /* shared bfqq will be split */ + BFQQF_split_coop, /* shared bfqq will be split */ + BFQQF_has_waker /* bfqq has a waker queue */ }; #define BFQ_BFQQ_FNS(name) \ @@ -753,6 +794,7 @@ BFQ_BFQQ_FNS(in_large_burst); BFQ_BFQQ_FNS(coop); BFQ_BFQQ_FNS(split_coop); BFQ_BFQQ_FNS(softrt_update); +BFQ_BFQQ_FNS(has_waker); #undef BFQ_BFQQ_FNS /* Expiration reasons. */ @@ -898,10 +940,10 @@ void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync); struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct rb_root *root); + struct rb_root_cached *root); void __bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct rb_root *root); + struct rb_root_cached *root); void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, @@ -1008,13 +1050,23 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq); /* --------------- end of interface of B-WF2Q+ ---------------- */ /* Logging facilities. */ +static inline void bfq_pid_to_str(int pid, char *str, int len) +{ + if (pid != -1) + snprintf(str, len, "%d", pid); + else + snprintf(str, len, "SHARED-"); +} + #ifdef CONFIG_BFQ_GROUP_IOSCHED struct bfq_group *bfqq_group(struct bfq_queue *bfqq); #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ + char pid_str[MAX_PID_STR_LENGTH]; \ + bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ blk_add_cgroup_trace_msg((bfqd)->queue, \ bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \ - "bfq%d%c " fmt, (bfqq)->pid, \ + "bfq%s%c " fmt, pid_str, \ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \ } while (0) @@ -1025,10 +1077,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq); #else /* CONFIG_BFQ_GROUP_IOSCHED */ -#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \ +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ + char pid_str[MAX_PID_STR_LENGTH]; \ + bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ + blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str, \ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - ##args) + ##args); \ +} while (0) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) #endif /* CONFIG_BFQ_GROUP_IOSCHED */ diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index ae4d000ac0af..48d899cfbe03 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -59,7 +59,7 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service); * bfq_update_next_in_service - update sd->next_in_service * @sd: sched_data for which to perform the update. * @new_entity: if not NULL, pointer to the entity whose activation, - * requeueing or repositionig triggered the invocation of + * requeueing or repositioning triggered the invocation of * this function. * @expiration: id true, this function is being invoked after the * expiration of the in-service entity @@ -90,7 +90,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, /* * If this update is triggered by the activation, requeueing - * or repositiong of an entity that does not coincide with + * or repositioning of an entity that does not coincide with * sd->next_in_service, then a full lookup in the active tree * can be avoided. In fact, it is enough to check whether the * just-modified entity has the same priority as @@ -737,7 +737,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); unsigned int prev_weight, new_weight; struct bfq_data *bfqd = NULL; - struct rb_root *root; + struct rb_root_cached *root; #ifdef CONFIG_BFQ_GROUP_IOSCHED struct bfq_sched_data *sd; struct bfq_group *bfqg; @@ -1396,7 +1396,7 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st, * In this first case, update the virtual time in @st too (see the * comments on this update inside the function). * - * In constrast, if there is an in-service entity, then return the + * In contrast, if there is an in-service entity, then return the * entity that would be set in service if not only the above * conditions, but also the next one held true: the currently * in-service entity, on expiration, @@ -1479,12 +1479,12 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, * is being invoked as a part of the expiration path * of the in-service queue. In this case, even if * sd->in_service_entity is not NULL, - * sd->in_service_entiy at this point is actually not + * sd->in_service_entity at this point is actually not * in service any more, and, if needed, has already * been properly queued or requeued into the right * tree. The reason why sd->in_service_entity is still * not NULL here, even if expiration is true, is that - * sd->in_service_entiy is reset as a last step in the + * sd->in_service_entity is reset as a last step in the * expiration path. So, if expiration is true, tell * __bfq_lookup_next_entity that there is no * sd->in_service_entity. diff --git a/block/bio.c b/block/bio.c index 716510ecd7ff..a3c80a6c1fe5 100644 --- a/block/bio.c +++ b/block/bio.c @@ -776,6 +776,8 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, if (vec_end_addr + 1 != page_addr + off) return false; + if (xen_domain() && !xen_biovec_phys_mergeable(bv, page)) + return false; if (same_page && (vec_end_addr & PAGE_MASK) != page_addr) return false; diff --git a/block/blk-core.c b/block/blk-core.c index a55389ba8779..2dd94b3e9ece 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -237,7 +237,6 @@ void blk_sync_queue(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; - cancel_delayed_work_sync(&q->requeue_work); queue_for_each_hw_ctx(q, hctx, i) cancel_delayed_work_sync(&hctx->run_work); } @@ -375,7 +374,7 @@ void blk_cleanup_queue(struct request_queue *q) blk_exit_queue(q); if (queue_is_mq(q)) - blk_mq_free_queue(q); + blk_mq_exit_queue(q); percpu_ref_exit(&q->q_usage_counter); diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index aa6bc5c02643..c59babca6857 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -413,6 +413,14 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list, bool run_queue_async) { struct elevator_queue *e; + struct request_queue *q = hctx->queue; + + /* + * blk_mq_sched_insert_requests() is called from flush plug + * context only, and hold one usage counter to prevent queue + * from being released. + */ + percpu_ref_get(&q->q_usage_counter); e = hctx->queue->elevator; if (e && e->type->ops.insert_requests) @@ -426,12 +434,14 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, if (!hctx->dispatch_busy && !e && !run_queue_async) { blk_mq_try_issue_list_directly(hctx, list); if (list_empty(list)) - return; + goto out; } blk_mq_insert_requests(hctx, ctx, list); } blk_mq_run_hw_queue(hctx, run_queue_async); + out: + percpu_ref_put(&q->q_usage_counter); } static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 3f9c3f4ac44c..4040e62c3737 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -10,6 +10,7 @@ #include #include +#include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" @@ -33,6 +34,11 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj) { struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj); + + if (hctx->flags & BLK_MQ_F_BLOCKING) + cleanup_srcu_struct(hctx->srcu); + blk_free_flush_queue(hctx->fq); + sbitmap_free(&hctx->ctx_map); free_cpumask_var(hctx->cpumask); kfree(hctx->ctxs); kfree(hctx); diff --git a/block/blk-mq.c b/block/blk-mq.c index fc60ed7e940e..00b826399228 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2267,12 +2267,7 @@ static void blk_mq_exit_hctx(struct request_queue *q, if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); - if (hctx->flags & BLK_MQ_F_BLOCKING) - cleanup_srcu_struct(hctx->srcu); - blk_mq_remove_cpuhp(hctx); - blk_free_flush_queue(hctx->fq); - sbitmap_free(&hctx->ctx_map); } static void blk_mq_exit_hw_queues(struct request_queue *q, @@ -2289,15 +2284,65 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, } } +static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) +{ + int hw_ctx_size = sizeof(struct blk_mq_hw_ctx); + + BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu), + __alignof__(struct blk_mq_hw_ctx)) != + sizeof(struct blk_mq_hw_ctx)); + + if (tag_set->flags & BLK_MQ_F_BLOCKING) + hw_ctx_size += sizeof(struct srcu_struct); + + return hw_ctx_size; +} + static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) { - int node; + hctx->queue_num = hctx_idx; - node = hctx->numa_node; + cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); + + hctx->tags = set->tags[hctx_idx]; + + if (set->ops->init_hctx && + set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) + goto unregister_cpu_notifier; + + if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, + hctx->numa_node)) + goto exit_hctx; + return 0; + + exit_hctx: + if (set->ops->exit_hctx) + set->ops->exit_hctx(hctx, hctx_idx); + unregister_cpu_notifier: + blk_mq_remove_cpuhp(hctx); + return -1; +} + +static struct blk_mq_hw_ctx * +blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, + int node) +{ + struct blk_mq_hw_ctx *hctx; + gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY; + + hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node); + if (!hctx) + goto fail_alloc_hctx; + + if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node)) + goto free_hctx; + + atomic_set(&hctx->nr_active, 0); if (node == NUMA_NO_NODE) - node = hctx->numa_node = set->numa_node; + node = set->numa_node; + hctx->numa_node = node; INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); spin_lock_init(&hctx->lock); @@ -2305,58 +2350,45 @@ static int blk_mq_init_hctx(struct request_queue *q, hctx->queue = q; hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; - cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); - - hctx->tags = set->tags[hctx_idx]; - /* * Allocate space for all possible cpus to avoid allocation at * runtime */ hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), - GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node); + gfp, node); if (!hctx->ctxs) - goto unregister_cpu_notifier; + goto free_cpumask; if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), - GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node)) + gfp, node)) goto free_ctxs; - hctx->nr_ctx = 0; spin_lock_init(&hctx->dispatch_wait_lock); init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); INIT_LIST_HEAD(&hctx->dispatch_wait.entry); - if (set->ops->init_hctx && - set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) - goto free_bitmap; - hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size, - GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); + gfp); if (!hctx->fq) - goto exit_hctx; - - if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node)) - goto free_fq; + goto free_bitmap; if (hctx->flags & BLK_MQ_F_BLOCKING) init_srcu_struct(hctx->srcu); + blk_mq_hctx_kobj_init(hctx); - return 0; + return hctx; - free_fq: - blk_free_flush_queue(hctx->fq); - exit_hctx: - if (set->ops->exit_hctx) - set->ops->exit_hctx(hctx, hctx_idx); free_bitmap: sbitmap_free(&hctx->ctx_map); free_ctxs: kfree(hctx->ctxs); - unregister_cpu_notifier: - blk_mq_remove_cpuhp(hctx); - return -1; + free_cpumask: + free_cpumask_var(hctx->cpumask); + free_hctx: + kfree(hctx); + fail_alloc_hctx: + return NULL; } static void blk_mq_init_cpu_queues(struct request_queue *q, @@ -2634,6 +2666,8 @@ void blk_mq_release(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned int i; + cancel_delayed_work_sync(&q->requeue_work); + /* hctx kobj stays in hctx */ queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) @@ -2700,51 +2734,25 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, } EXPORT_SYMBOL(blk_mq_init_sq_queue); -static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) -{ - int hw_ctx_size = sizeof(struct blk_mq_hw_ctx); - - BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu), - __alignof__(struct blk_mq_hw_ctx)) != - sizeof(struct blk_mq_hw_ctx)); - - if (tag_set->flags & BLK_MQ_F_BLOCKING) - hw_ctx_size += sizeof(struct srcu_struct); - - return hw_ctx_size; -} - static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( struct blk_mq_tag_set *set, struct request_queue *q, int hctx_idx, int node) { struct blk_mq_hw_ctx *hctx; - hctx = kzalloc_node(blk_mq_hw_ctx_size(set), - GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, - node); + hctx = blk_mq_alloc_hctx(q, set, node); if (!hctx) - return NULL; - - if (!zalloc_cpumask_var_node(&hctx->cpumask, - GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, - node)) { - kfree(hctx); - return NULL; - } - - atomic_set(&hctx->nr_active, 0); - hctx->numa_node = node; - hctx->queue_num = hctx_idx; + goto fail; - if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) { - free_cpumask_var(hctx->cpumask); - kfree(hctx); - return NULL; - } - blk_mq_hctx_kobj_init(hctx); + if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) + goto free_hctx; return hctx; + + free_hctx: + kobject_put(&hctx->kobj); + fail: + return NULL; } static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, @@ -2838,7 +2846,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, goto err_exit; if (blk_mq_alloc_ctxs(q)) - goto err_exit; + goto err_poll; /* init q->mq_kobj and sw queues' kobjects */ blk_mq_sysfs_init(q); @@ -2899,13 +2907,17 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, kfree(q->queue_hw_ctx); err_sys_init: blk_mq_sysfs_deinit(q); +err_poll: + blk_stat_free_callback(q->poll_cb); + q->poll_cb = NULL; err_exit: q->mq_ops = NULL; return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(blk_mq_init_allocated_queue); -void blk_mq_free_queue(struct request_queue *q) +/* tags can _not_ be used after returning from blk_mq_exit_queue */ +void blk_mq_exit_queue(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; diff --git a/block/blk-mq.h b/block/blk-mq.h index 423ea88ab6fb..633a5a77ee8b 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -37,7 +37,7 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; -void blk_mq_free_queue(struct request_queue *q); +void blk_mq_exit_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool); diff --git a/block/blk.h b/block/blk.h index 5d636ee41663..e27fd1512e4b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -75,7 +75,7 @@ static inline bool biovec_phys_mergeable(struct request_queue *q, if (addr1 + vec1->bv_len != addr2) return false; - if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2)) + if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page)) return false; if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) return false; diff --git a/block/genhd.c b/block/genhd.c index 703267865f14..d8dff0b21f7d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -531,6 +531,18 @@ void blk_free_devt(dev_t devt) } } +/** + * We invalidate devt by assigning NULL pointer for devt in idr. + */ +void blk_invalidate_devt(dev_t devt) +{ + if (MAJOR(devt) == BLOCK_EXT_MAJOR) { + spin_lock_bh(&ext_devt_lock); + idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt))); + spin_unlock_bh(&ext_devt_lock); + } +} + static char *bdevt_str(dev_t devt, char *buf) { if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { @@ -793,6 +805,13 @@ void del_gendisk(struct gendisk *disk) if (!(disk->flags & GENHD_FL_HIDDEN)) blk_unregister_region(disk_devt(disk), disk->minors); + /* + * Remove gendisk pointer from idr so that it cannot be looked up + * while RCU period before freeing gendisk is running to prevent + * use-after-free issues. Note that the device number stays + * "in-use" until we really free the gendisk. + */ + blk_invalidate_devt(disk_devt(disk)); kobject_put(disk->part0.holder_dir); kobject_put(disk->slave_dir); diff --git a/block/partition-generic.c b/block/partition-generic.c index 8e596a8dff32..aee643ce13d1 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -285,6 +285,13 @@ void delete_partition(struct gendisk *disk, int partno) kobject_put(part->holder_dir); device_del(part_to_dev(part)); + /* + * Remove gendisk pointer from idr so that it cannot be looked up + * while RCU period before freeing gendisk is running to prevent + * use-after-free issues. Note that the device number stays + * "in-use" until we really free the gendisk. + */ + blk_invalidate_devt(part_devt(part)); hd_struct_kill(part); } diff --git a/block/sed-opal.c b/block/sed-opal.c index e0de4dd448b3..119640897293 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -2095,13 +2095,16 @@ static int opal_erase_locking_range(struct opal_dev *dev, static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, struct opal_mbr_data *opal_mbr) { + u8 enable_disable = opal_mbr->enable_disable == OPAL_MBR_ENABLE ? + OPAL_TRUE : OPAL_FALSE; + const struct opal_step mbr_steps[] = { { opal_discovery0, }, { start_admin1LSP_opal_session, &opal_mbr->key }, - { set_mbr_done, &opal_mbr->enable_disable }, + { set_mbr_done, &enable_disable }, { end_opal_session, }, { start_admin1LSP_opal_session, &opal_mbr->key }, - { set_mbr_enable_disable, &opal_mbr->enable_disable }, + { set_mbr_enable_disable, &enable_disable }, { end_opal_session, }, { NULL, } }; @@ -2221,7 +2224,7 @@ static int __opal_lock_unlock(struct opal_dev *dev, static int __opal_set_mbr_done(struct opal_dev *dev, struct opal_key *key) { - u8 mbr_done_tf = 1; + u8 mbr_done_tf = OPAL_TRUE; const struct opal_step mbrdone_step [] = { { opal_discovery0, }, { start_admin1LSP_opal_session, key }, diff --git a/crypto/ccm.c b/crypto/ccm.c index 50df8f001c1c..f4caa149b9d2 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -458,7 +458,6 @@ static void crypto_ccm_free(struct aead_instance *inst) static int crypto_ccm_create_common(struct crypto_template *tmpl, struct rtattr **tb, - const char *full_name, const char *ctr_name, const char *mac_name) { @@ -486,7 +485,8 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, mac = __crypto_hash_alg_common(mac_alg); err = -EINVAL; - if (mac->digestsize != 16) + if (strncmp(mac->base.cra_name, "cbcmac(", 7) != 0 || + mac->digestsize != 16) goto out_put_mac; inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL); @@ -509,23 +509,27 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, ctr = crypto_spawn_skcipher_alg(&ictx->ctr); - /* Not a stream cipher? */ + /* The skcipher algorithm must be CTR mode, using 16-byte blocks. */ err = -EINVAL; - if (ctr->base.cra_blocksize != 1) + if (strncmp(ctr->base.cra_name, "ctr(", 4) != 0 || + crypto_skcipher_alg_ivsize(ctr) != 16 || + ctr->base.cra_blocksize != 1) goto err_drop_ctr; - /* We want the real thing! */ - if (crypto_skcipher_alg_ivsize(ctr) != 16) + /* ctr and cbcmac must use the same underlying block cipher. */ + if (strcmp(ctr->base.cra_name + 4, mac->base.cra_name + 7) != 0) goto err_drop_ctr; err = -ENAMETOOLONG; + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "ccm(%s", ctr->base.cra_name + 4) >= CRYPTO_MAX_ALG_NAME) + goto err_drop_ctr; + if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)", ctr->base.cra_driver_name, mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_ctr; - memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME); - inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = (mac->base.cra_priority + ctr->base.cra_priority) / 2; @@ -567,7 +571,6 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb) const char *cipher_name; char ctr_name[CRYPTO_MAX_ALG_NAME]; char mac_name[CRYPTO_MAX_ALG_NAME]; - char full_name[CRYPTO_MAX_ALG_NAME]; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) @@ -581,35 +584,24 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb) cipher_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; - if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm(%s)", cipher_name) >= - CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name, - mac_name); + return crypto_ccm_create_common(tmpl, tb, ctr_name, mac_name); } static int crypto_ccm_base_create(struct crypto_template *tmpl, struct rtattr **tb) { const char *ctr_name; - const char *cipher_name; - char full_name[CRYPTO_MAX_ALG_NAME]; + const char *mac_name; ctr_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(ctr_name)) return PTR_ERR(ctr_name); - cipher_name = crypto_attr_alg_name(tb[2]); - if (IS_ERR(cipher_name)) - return PTR_ERR(cipher_name); - - if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)", - ctr_name, cipher_name) >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; + mac_name = crypto_attr_alg_name(tb[2]); + if (IS_ERR(mac_name)) + return PTR_ERR(mac_name); - return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name, - cipher_name); + return crypto_ccm_create_common(tmpl, tb, ctr_name, mac_name); } static int crypto_rfc4309_setkey(struct crypto_aead *parent, const u8 *key, diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index ed2e12e26dd8..279d816ab51d 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -645,8 +645,8 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, - "%s(%s,%s)", name, chacha_name, - poly_name) >= CRYPTO_MAX_ALG_NAME) + "%s(%s,%s)", name, chacha->base.cra_name, + poly->cra_name) >= CRYPTO_MAX_ALG_NAME) goto out_drop_chacha; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s,%s)", name, chacha->base.cra_driver_name, diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c index 35b583101f4f..90ec0ec1b4f7 100644 --- a/crypto/chacha_generic.c +++ b/crypto/chacha_generic.c @@ -52,7 +52,7 @@ static int chacha_stream_xor(struct skcipher_request *req, unsigned int nbytes = walk.nbytes; if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); + nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE); chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, nbytes, ctx->nrounds); diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c index 8e94e29dc6fc..d08048ae5552 100644 --- a/crypto/crct10dif_generic.c +++ b/crypto/crct10dif_generic.c @@ -65,10 +65,9 @@ static int chksum_final(struct shash_desc *desc, u8 *out) return 0; } -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) +static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } @@ -77,15 +76,13 @@ static int chksum_finup(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - return __chksum_finup(&ctx->crc, data, len, out); + return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); + return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { diff --git a/crypto/gcm.c b/crypto/gcm.c index e1a11f529d25..eea16e726ede 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -597,7 +597,6 @@ static void crypto_gcm_free(struct aead_instance *inst) static int crypto_gcm_create_common(struct crypto_template *tmpl, struct rtattr **tb, - const char *full_name, const char *ctr_name, const char *ghash_name) { @@ -638,7 +637,8 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, goto err_free_inst; err = -EINVAL; - if (ghash->digestsize != 16) + if (strcmp(ghash->base.cra_name, "ghash") != 0 || + ghash->digestsize != 16) goto err_drop_ghash; crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst)); @@ -650,24 +650,24 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, ctr = crypto_spawn_skcipher_alg(&ctx->ctr); - /* We only support 16-byte blocks. */ + /* The skcipher algorithm must be CTR mode, using 16-byte blocks. */ err = -EINVAL; - if (crypto_skcipher_alg_ivsize(ctr) != 16) + if (strncmp(ctr->base.cra_name, "ctr(", 4) != 0 || + crypto_skcipher_alg_ivsize(ctr) != 16 || + ctr->base.cra_blocksize != 1) goto out_put_ctr; - /* Not a stream cipher? */ - if (ctr->base.cra_blocksize != 1) + err = -ENAMETOOLONG; + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "gcm(%s", ctr->base.cra_name + 4) >= CRYPTO_MAX_ALG_NAME) goto out_put_ctr; - err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "gcm_base(%s,%s)", ctr->base.cra_driver_name, ghash_alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto out_put_ctr; - memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME); - inst->alg.base.cra_flags = (ghash->base.cra_flags | ctr->base.cra_flags) & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = (ghash->base.cra_priority + @@ -709,7 +709,6 @@ static int crypto_gcm_create(struct crypto_template *tmpl, struct rtattr **tb) { const char *cipher_name; char ctr_name[CRYPTO_MAX_ALG_NAME]; - char full_name[CRYPTO_MAX_ALG_NAME]; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) @@ -719,12 +718,7 @@ static int crypto_gcm_create(struct crypto_template *tmpl, struct rtattr **tb) CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; - if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "gcm(%s)", cipher_name) >= - CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - return crypto_gcm_create_common(tmpl, tb, full_name, - ctr_name, "ghash"); + return crypto_gcm_create_common(tmpl, tb, ctr_name, "ghash"); } static int crypto_gcm_base_create(struct crypto_template *tmpl, @@ -732,7 +726,6 @@ static int crypto_gcm_base_create(struct crypto_template *tmpl, { const char *ctr_name; const char *ghash_name; - char full_name[CRYPTO_MAX_ALG_NAME]; ctr_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(ctr_name)) @@ -742,12 +735,7 @@ static int crypto_gcm_base_create(struct crypto_template *tmpl, if (IS_ERR(ghash_name)) return PTR_ERR(ghash_name); - if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "gcm_base(%s,%s)", - ctr_name, ghash_name) >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - return crypto_gcm_create_common(tmpl, tb, full_name, - ctr_name, ghash_name); + return crypto_gcm_create_common(tmpl, tb, ctr_name, ghash_name); } static int crypto_rfc4106_setkey(struct crypto_aead *parent, const u8 *key, diff --git a/crypto/hmac.c b/crypto/hmac.c index e74730224f0a..c623778b36ba 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -168,6 +168,10 @@ static int hmac_init_tfm(struct crypto_tfm *tfm) parent->descsize = sizeof(struct shash_desc) + crypto_shash_descsize(hash); + if (WARN_ON(parent->descsize > HASH_MAX_DESCSIZE)) { + crypto_free_shash(hash); + return -EINVAL; + } ctx->hash = hash; return 0; diff --git a/crypto/lrw.c b/crypto/lrw.c index 08a0e458bc3e..cc5c89246193 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -162,8 +162,10 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass) } err = skcipher_walk_virt(&w, req, false); - iv = (__be32 *)w.iv; + if (err) + return err; + iv = (__be32 *)w.iv; counter[0] = be32_to_cpu(iv[3]); counter[1] = be32_to_cpu(iv[2]); counter[2] = be32_to_cpu(iv[1]); diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index 00fce32ae17a..1d7ad0256fd3 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -161,7 +161,7 @@ static int salsa20_crypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); - salsa20_init(state, ctx, walk.iv); + salsa20_init(state, ctx, req->iv); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; diff --git a/crypto/skcipher.c b/crypto/skcipher.c index bcf13d95f54a..2e66f312e2c4 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -131,8 +131,13 @@ int skcipher_walk_done(struct skcipher_walk *walk, int err) memcpy(walk->dst.virt.addr, walk->page, n); skcipher_unmap_dst(walk); } else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) { - if (WARN_ON(err)) { - /* unexpected case; didn't process all bytes */ + if (err) { + /* + * Didn't process all bytes. Either the algorithm is + * broken, or this was the last step and it turned out + * the message wasn't evenly divisible into blocks but + * the algorithm requires it. + */ err = -EINVAL; goto finish; } diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 1e2a10a06b9d..cf768608437e 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -1142,8 +1142,8 @@ static struct dev_pm_domain acpi_lpss_pm_domain = { .thaw_noirq = acpi_subsys_thaw_noirq, .poweroff = acpi_subsys_suspend, .poweroff_late = acpi_lpss_suspend_late, - .poweroff_noirq = acpi_subsys_suspend_noirq, - .restore_noirq = acpi_subsys_resume_noirq, + .poweroff_noirq = acpi_lpss_suspend_noirq, + .restore_noirq = acpi_lpss_resume_noirq, .restore_early = acpi_lpss_resume_early, #endif .runtime_suspend = acpi_lpss_runtime_suspend, diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index e48894e002ba..a46c2c162c03 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1232,18 +1232,24 @@ static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node) /* * set numa proximity domain for smmuv3 device */ -static void __init arm_smmu_v3_set_proximity(struct device *dev, +static int __init arm_smmu_v3_set_proximity(struct device *dev, struct acpi_iort_node *node) { struct acpi_iort_smmu_v3 *smmu; smmu = (struct acpi_iort_smmu_v3 *)node->node_data; if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) { - set_dev_node(dev, acpi_map_pxm_to_node(smmu->pxm)); + int node = acpi_map_pxm_to_node(smmu->pxm); + + if (node != NUMA_NO_NODE && !node_online(node)) + return -EINVAL; + + set_dev_node(dev, node); pr_info("SMMU-v3[%llx] Mapped to Proximity domain %d\n", smmu->base_address, smmu->pxm); } + return 0; } #else #define arm_smmu_v3_set_proximity NULL @@ -1318,7 +1324,7 @@ struct iort_dev_config { int (*dev_count_resources)(struct acpi_iort_node *node); void (*dev_init_resources)(struct resource *res, struct acpi_iort_node *node); - void (*dev_set_proximity)(struct device *dev, + int (*dev_set_proximity)(struct device *dev, struct acpi_iort_node *node); }; @@ -1369,8 +1375,11 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node, if (!pdev) return -ENOMEM; - if (ops->dev_set_proximity) - ops->dev_set_proximity(&pdev->dev, node); + if (ops->dev_set_proximity) { + ret = ops->dev_set_proximity(&pdev->dev, node); + if (ret) + goto dev_put; + } count = ops->dev_count_resources(node); diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 824ae985ad93..ccb59768b1f3 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -949,8 +949,8 @@ static bool acpi_dev_needs_resume(struct device *dev, struct acpi_device *adev) u32 sys_target = acpi_target_system_state(); int ret, state; - if (!pm_runtime_suspended(dev) || !adev || - device_may_wakeup(dev) != !!adev->wakeup.prepare_count) + if (!pm_runtime_suspended(dev) || !adev || (adev->wakeup.flags.valid && + device_may_wakeup(dev) != !!adev->wakeup.prepare_count)) return true; if (sys_target == ACPI_STATE_S0) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 77abe0ec4043..bd533f68b1de 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1031,6 +1031,14 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, const struct acpi_data_node *data = to_acpi_data_node(fwnode); struct acpi_data_node *dn; + /* + * We can have a combination of device and data nodes, e.g. with + * hierarchical _DSD properties. Make sure the adev pointer is + * restored before going through data nodes, otherwise we will + * be looking for data_nodes below the last device found instead + * of the common fwnode shared by device_nodes and data_nodes. + */ + adev = to_acpi_device_node(fwnode); if (adev) head = &adev->data.subnodes; else if (data) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 403c4ff15349..e52f1238d2d6 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -977,6 +977,8 @@ static int acpi_s2idle_prepare(void) if (acpi_sci_irq_valid()) enable_irq_wake(acpi_sci_irq); + acpi_enable_wakeup_devices(ACPI_STATE_S0); + /* Change the configuration of GPEs to avoid spurious wakeup. */ acpi_enable_all_wakeup_gpes(); acpi_os_wait_events_complete(); @@ -1027,6 +1029,8 @@ static void acpi_s2idle_restore(void) { acpi_enable_all_runtime_gpes(); + acpi_disable_wakeup_devices(ACPI_STATE_S0); + if (acpi_sci_irq_valid()) disable_irq_wake(acpi_sci_irq); diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4b9c7ca492e6..cc19d91c1688 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1950,8 +1950,18 @@ static void binder_free_txn_fixups(struct binder_transaction *t) static void binder_free_transaction(struct binder_transaction *t) { - if (t->buffer) - t->buffer->transaction = NULL; + struct binder_proc *target_proc = t->to_proc; + + if (target_proc) { + binder_inner_proc_lock(target_proc); + if (t->buffer) + t->buffer->transaction = NULL; + binder_inner_proc_unlock(target_proc); + } + /* + * If the transaction has no target_proc, then + * t->buffer->transaction has already been cleared. + */ binder_free_txn_fixups(t); kfree(t); binder_stats_deleted(BINDER_STAT_TRANSACTION); @@ -3550,10 +3560,12 @@ static void binder_transaction(struct binder_proc *proc, static void binder_free_buf(struct binder_proc *proc, struct binder_buffer *buffer) { + binder_inner_proc_lock(proc); if (buffer->transaction) { buffer->transaction->buffer = NULL; buffer->transaction = NULL; } + binder_inner_proc_unlock(proc); if (buffer->async_transaction && buffer->target_node) { struct binder_node *buf_node; struct binder_work *w; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index adf28788cab5..133fed8e4a8b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4476,9 +4476,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST3320[68]13AS", "SD1[5-9]", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, - /* drives which fail FPDMA_AA activation (some may freeze afterwards) */ - { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA }, - { "ST1000LM024 HN-M101MBB", "2BA30001", ATA_HORKAGE_BROKEN_FPDMA_AA }, + /* drives which fail FPDMA_AA activation (some may freeze afterwards) + the ST disks also have LPM issues */ + { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA | + ATA_HORKAGE_NOLPM, }, + { "ST1000LM024 HN-M101MBB", "2BA30001", ATA_HORKAGE_BROKEN_FPDMA_AA | + ATA_HORKAGE_NOLPM, }, { "VB0250EAVER", "HPG7", ATA_HORKAGE_BROKEN_FPDMA_AA }, /* Blacklist entries taken from Silicon Image 3124/3132 diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 668139cfa664..cc37511de866 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -548,11 +548,18 @@ ssize_t __weak cpu_show_l1tf(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_mds(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); +static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -560,6 +567,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spectre_v2.attr, &dev_attr_spec_store_bypass.attr, &dev_attr_l1tf.attr, + &dev_attr_mds.attr, NULL }; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a823f469e53f..0df9b4461766 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -490,7 +490,7 @@ static int really_probe(struct device *dev, struct device_driver *drv) if (dev->bus->dma_configure) { ret = dev->bus->dma_configure(dev); if (ret) - goto dma_failed; + goto probe_failed; } if (driver_sysfs_add(dev)) { @@ -546,14 +546,13 @@ static int really_probe(struct device *dev, struct device_driver *drv) goto done; probe_failed: - arch_teardown_dma_ops(dev); -dma_failed: if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); pinctrl_bind_failed: device_links_no_driver(dev); devres_release_all(dev); + arch_teardown_dma_ops(dev); driver_sysfs_remove(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f80d298de3fa..8ad20ed0cb7c 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1747,6 +1747,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; + /* Avoid direct_complete to let wakeup_path propagate. */ + if (device_may_wakeup(dev) || dev->power.wakeup_path) + dev->power.direct_complete = false; + if (dev->power.direct_complete) { if (pm_runtime_status_suspended(dev)) { pm_runtime_disable(dev); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c18586fccb6f..17defbf4f332 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -96,13 +96,8 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) /* * Must use NOIO because we don't want to recurse back into the * block or filesystem layers from page reclaim. - * - * Cannot support DAX and highmem, because our ->direct_access - * routine for DAX must return memory that is always addressable. - * If DAX was reworked to use pfns and kmap throughout, this - * restriction might be able to be lifted. */ - gfp_flags = GFP_NOIO | __GFP_ZERO; + gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM; page = alloc_page(gfp_flags); if (!page) return NULL; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d43a5677ccbc..a74d03913822 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1310,11 +1310,11 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) } free_shadow: - kfree(rinfo->shadow[i].grants_used); + kvfree(rinfo->shadow[i].grants_used); rinfo->shadow[i].grants_used = NULL; - kfree(rinfo->shadow[i].indirect_grants); + kvfree(rinfo->shadow[i].indirect_grants); rinfo->shadow[i].indirect_grants = NULL; - kfree(rinfo->shadow[i].sg); + kvfree(rinfo->shadow[i].sg); rinfo->shadow[i].sg = NULL; } @@ -1353,7 +1353,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) for (i = 0; i < info->nr_rings; i++) blkif_free_ring(&info->rinfo[i]); - kfree(info->rinfo); + kvfree(info->rinfo); info->rinfo = NULL; info->nr_rings = 0; } @@ -1914,9 +1914,9 @@ static int negotiate_mq(struct blkfront_info *info) if (!info->nr_rings) info->nr_rings = 1; - info->rinfo = kcalloc(info->nr_rings, - sizeof(struct blkfront_ring_info), - GFP_KERNEL); + info->rinfo = kvcalloc(info->nr_rings, + sizeof(struct blkfront_ring_info), + GFP_KERNEL); if (!info->rinfo) { xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure"); info->nr_rings = 0; @@ -2232,17 +2232,17 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) for (i = 0; i < BLK_RING_SIZE(info); i++) { rinfo->shadow[i].grants_used = - kcalloc(grants, - sizeof(rinfo->shadow[i].grants_used[0]), - GFP_NOIO); - rinfo->shadow[i].sg = kcalloc(psegs, - sizeof(rinfo->shadow[i].sg[0]), - GFP_NOIO); + kvcalloc(grants, + sizeof(rinfo->shadow[i].grants_used[0]), + GFP_NOIO); + rinfo->shadow[i].sg = kvcalloc(psegs, + sizeof(rinfo->shadow[i].sg[0]), + GFP_NOIO); if (info->max_indirect_segments) rinfo->shadow[i].indirect_grants = - kcalloc(INDIRECT_GREFS(grants), - sizeof(rinfo->shadow[i].indirect_grants[0]), - GFP_NOIO); + kvcalloc(INDIRECT_GREFS(grants), + sizeof(rinfo->shadow[i].indirect_grants[0]), + GFP_NOIO); if ((rinfo->shadow[i].grants_used == NULL) || (rinfo->shadow[i].sg == NULL) || (info->max_indirect_segments && @@ -2256,11 +2256,11 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) out_of_memory: for (i = 0; i < BLK_RING_SIZE(info); i++) { - kfree(rinfo->shadow[i].grants_used); + kvfree(rinfo->shadow[i].grants_used); rinfo->shadow[i].grants_used = NULL; - kfree(rinfo->shadow[i].sg); + kvfree(rinfo->shadow[i].sg); rinfo->shadow[i].sg = NULL; - kfree(rinfo->shadow[i].indirect_grants); + kvfree(rinfo->shadow[i].indirect_grants); rinfo->shadow[i].indirect_grants = NULL; } if (!list_empty(&rinfo->indirect_pages)) { diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index d5d6e6e5da3b..62d3aa2b26f6 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -37,6 +37,7 @@ #define BDADDR_BCM43430A0 (&(bdaddr_t) {{0xac, 0x1f, 0x12, 0xa0, 0x43, 0x43}}) #define BDADDR_BCM4324B3 (&(bdaddr_t) {{0x00, 0x00, 0x00, 0xb3, 0x24, 0x43}}) #define BDADDR_BCM4330B1 (&(bdaddr_t) {{0x00, 0x00, 0x00, 0xb1, 0x30, 0x43}}) +#define BDADDR_BCM43341B (&(bdaddr_t) {{0xac, 0x1f, 0x00, 0x1b, 0x34, 0x43}}) int btbcm_check_bdaddr(struct hci_dev *hdev) { @@ -82,7 +83,8 @@ int btbcm_check_bdaddr(struct hci_dev *hdev) !bacmp(&bda->bdaddr, BDADDR_BCM20702A1) || !bacmp(&bda->bdaddr, BDADDR_BCM4324B3) || !bacmp(&bda->bdaddr, BDADDR_BCM4330B1) || - !bacmp(&bda->bdaddr, BDADDR_BCM43430A0)) { + !bacmp(&bda->bdaddr, BDADDR_BCM43430A0) || + !bacmp(&bda->bdaddr, BDADDR_BCM43341B)) { bt_dev_info(hdev, "BCM: Using default device address (%pMR)", &bda->bdaddr); set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index b0b680dd69f4..f5dbeec8e274 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -661,7 +661,7 @@ static int btmtkuart_change_baudrate(struct hci_dev *hdev) { struct btmtkuart_dev *bdev = hci_get_drvdata(hdev); struct btmtk_hci_wmt_params wmt_params; - u32 baudrate; + __le32 baudrate; u8 param; int err; diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index ddbe518c3e5b..b5d31d583d60 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -228,9 +228,15 @@ static int bcm_gpio_set_power(struct bcm_device *dev, bool powered) int err; if (powered && !dev->res_enabled) { - err = regulator_bulk_enable(BCM_NUM_SUPPLIES, dev->supplies); - if (err) - return err; + /* Intel Macs use bcm_apple_get_resources() and don't + * have regulator supplies configured. + */ + if (dev->supplies[0].supply) { + err = regulator_bulk_enable(BCM_NUM_SUPPLIES, + dev->supplies); + if (err) + return err; + } /* LPO clock needs to be 32.768 kHz */ err = clk_set_rate(dev->lpo_clk, 32768); @@ -259,7 +265,13 @@ static int bcm_gpio_set_power(struct bcm_device *dev, bool powered) if (!powered && dev->res_enabled) { clk_disable_unprepare(dev->txco_clk); clk_disable_unprepare(dev->lpo_clk); - regulator_bulk_disable(BCM_NUM_SUPPLIES, dev->supplies); + + /* Intel Macs use bcm_apple_get_resources() and don't + * have regulator supplies configured. + */ + if (dev->supplies[0].supply) + regulator_bulk_disable(BCM_NUM_SUPPLIES, + dev->supplies); } /* wait for device to power on and come out of reset */ diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 237aea34b69f..d3b467792eb3 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -508,6 +508,8 @@ static int qca_open(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->btsoc_type != QCA_WCN3990) { gpiod_set_value_cansleep(qcadev->bt_en, 1); + /* Controller needs time to bootup. */ + msleep(150); } else { hu->init_speed = qcadev->init_speed; hu->oper_speed = qcadev->oper_speed; @@ -992,7 +994,8 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) while (!skb_queue_empty(&qca->txq)) usleep_range(100, 200); - serdev_device_wait_until_sent(hu->serdev, + if (hu->serdev) + serdev_device_wait_until_sent(hu->serdev, msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); /* Give the controller time to process the request */ diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index b65ff6962899..e9b6ac61fb7f 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -443,6 +443,7 @@ static int omap_rng_probe(struct platform_device *pdev) priv->rng.read = omap_rng_do_read; priv->rng.init = omap_rng_init; priv->rng.cleanup = omap_rng_cleanup; + priv->rng.quality = 900; priv->rng.priv = (unsigned long)priv; platform_set_drvdata(pdev, priv); diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c index f2411468f33f..f38e651dd1b5 100644 --- a/drivers/char/ipmi/ipmi_dmi.c +++ b/drivers/char/ipmi/ipmi_dmi.c @@ -47,9 +47,11 @@ static void __init dmi_add_platform_ipmi(unsigned long base_addr, memset(&p, 0, sizeof(p)); name = "dmi-ipmi-si"; + p.iftype = IPMI_PLAT_IF_SI; switch (type) { case IPMI_DMI_TYPE_SSIF: name = "dmi-ipmi-ssif"; + p.iftype = IPMI_PLAT_IF_SSIF; p.type = SI_TYPE_INVALID; break; case IPMI_DMI_TYPE_BT: diff --git a/drivers/char/ipmi/ipmi_plat_data.c b/drivers/char/ipmi/ipmi_plat_data.c index 8f0ca2a848eb..28471ff2a3a3 100644 --- a/drivers/char/ipmi/ipmi_plat_data.c +++ b/drivers/char/ipmi/ipmi_plat_data.c @@ -12,7 +12,7 @@ struct platform_device *ipmi_platform_add(const char *name, unsigned int inst, struct ipmi_plat_data *p) { struct platform_device *pdev; - unsigned int num_r = 1, size, pidx = 0; + unsigned int num_r = 1, size = 0, pidx = 0; struct resource r[4]; struct property_entry pr[6]; u32 flags; @@ -21,19 +21,22 @@ struct platform_device *ipmi_platform_add(const char *name, unsigned int inst, memset(pr, 0, sizeof(pr)); memset(r, 0, sizeof(r)); - if (p->type == SI_BT) - size = 3; - else if (p->type == SI_TYPE_INVALID) - size = 0; - else - size = 2; + if (p->iftype == IPMI_PLAT_IF_SI) { + if (p->type == SI_BT) + size = 3; + else if (p->type != SI_TYPE_INVALID) + size = 2; + + if (p->regsize == 0) + p->regsize = DEFAULT_REGSIZE; + if (p->regspacing == 0) + p->regspacing = p->regsize; - if (p->regsize == 0) - p->regsize = DEFAULT_REGSIZE; - if (p->regspacing == 0) - p->regspacing = p->regsize; + pr[pidx++] = PROPERTY_ENTRY_U8("ipmi-type", p->type); + } else if (p->iftype == IPMI_PLAT_IF_SSIF) { + pr[pidx++] = PROPERTY_ENTRY_U16("i2c-addr", p->addr); + } - pr[pidx++] = PROPERTY_ENTRY_U8("ipmi-type", p->type); if (p->slave_addr) pr[pidx++] = PROPERTY_ENTRY_U8("slave-addr", p->slave_addr); pr[pidx++] = PROPERTY_ENTRY_U8("addr-source", p->addr_source); diff --git a/drivers/char/ipmi/ipmi_plat_data.h b/drivers/char/ipmi/ipmi_plat_data.h index 567cfcec8ada..9ba744ea9571 100644 --- a/drivers/char/ipmi/ipmi_plat_data.h +++ b/drivers/char/ipmi/ipmi_plat_data.h @@ -6,7 +6,10 @@ #include +enum ipmi_plat_interface_type { IPMI_PLAT_IF_SI, IPMI_PLAT_IF_SSIF }; + struct ipmi_plat_data { + enum ipmi_plat_interface_type iftype; unsigned int type; /* si_type for si, SI_INVALID for others */ unsigned int space; /* addr_space for si, intf# for ssif. */ unsigned long addr; diff --git a/drivers/char/ipmi/ipmi_si_hardcode.c b/drivers/char/ipmi/ipmi_si_hardcode.c index 682221eebd66..f6ece7569504 100644 --- a/drivers/char/ipmi/ipmi_si_hardcode.c +++ b/drivers/char/ipmi/ipmi_si_hardcode.c @@ -83,6 +83,7 @@ static void __init ipmi_hardcode_init_one(const char *si_type_str, memset(&p, 0, sizeof(p)); + p.iftype = IPMI_PLAT_IF_SI; if (!si_type_str || !*si_type_str || strcmp(si_type_str, "kcs") == 0) { p.type = SI_KCS; } else if (strcmp(si_type_str, "smic") == 0) { diff --git a/drivers/char/ipmi/ipmi_si_hotmod.c b/drivers/char/ipmi/ipmi_si_hotmod.c index 03140f6cdf6f..42a925f8cf69 100644 --- a/drivers/char/ipmi/ipmi_si_hotmod.c +++ b/drivers/char/ipmi/ipmi_si_hotmod.c @@ -108,6 +108,7 @@ static int parse_hotmod_str(const char *curr, enum hotmod_op *op, int rv; unsigned int ival; + h->iftype = IPMI_PLAT_IF_SI; rv = parse_str(hotmod_ops, &ival, "operation", &curr); if (rv) return rv; diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 8b5aec5430f1..aaccb0ff1ea6 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -727,12 +727,16 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, /* End of read */ len = ssif_info->multi_len; data = ssif_info->data; - } else if (blocknum != ssif_info->multi_pos) { + } else if (blocknum + 1 != ssif_info->multi_pos) { /* * Out of sequence block, just abort. Block * numbers start at zero for the second block, * but multi_pos starts at one, so the +1. */ + if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) + dev_dbg(&ssif_info->client->dev, + "Received message out of sequence, expected %u, got %u\n", + ssif_info->multi_pos - 1, blocknum); result = -EIO; } else { ssif_inc_stat(ssif_info, received_message_parts); diff --git a/drivers/char/random.c b/drivers/char/random.c index 38c6d1af6d1c..af6e240f98ff 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -777,6 +777,7 @@ static struct crng_state **crng_node_pool __read_mostly; #endif static void invalidate_batched_entropy(void); +static void numa_crng_init(void); static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); static int __init parse_trust_cpu(char *arg) @@ -805,7 +806,9 @@ static void crng_initialize(struct crng_state *crng) } crng->state[i] ^= rv; } - if (trust_cpu && arch_init) { + if (trust_cpu && arch_init && crng == &primary_crng) { + invalidate_batched_entropy(); + numa_crng_init(); crng_init = 2; pr_notice("random: crng done (trusting CPU's manufacturer)\n"); } @@ -2211,8 +2214,8 @@ struct batched_entropy { u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; }; unsigned int position; + spinlock_t batch_lock; }; -static rwlock_t batched_entropy_reset_lock = __RW_LOCK_UNLOCKED(batched_entropy_reset_lock); /* * Get a random word for internal kernel use only. The quality of the random @@ -2222,12 +2225,14 @@ static rwlock_t batched_entropy_reset_lock = __RW_LOCK_UNLOCKED(batched_entropy_ * wait_for_random_bytes() should be called and return 0 at least once * at any point prior. */ -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64); +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), +}; + u64 get_random_u64(void) { u64 ret; - bool use_lock; - unsigned long flags = 0; + unsigned long flags; struct batched_entropy *batch; static void *previous; @@ -2242,28 +2247,25 @@ u64 get_random_u64(void) warn_unseeded_randomness(&previous); - use_lock = READ_ONCE(crng_init) < 2; - batch = &get_cpu_var(batched_entropy_u64); - if (use_lock) - read_lock_irqsave(&batched_entropy_reset_lock, flags); + batch = raw_cpu_ptr(&batched_entropy_u64); + spin_lock_irqsave(&batch->batch_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { extract_crng((u8 *)batch->entropy_u64); batch->position = 0; } ret = batch->entropy_u64[batch->position++]; - if (use_lock) - read_unlock_irqrestore(&batched_entropy_reset_lock, flags); - put_cpu_var(batched_entropy_u64); + spin_unlock_irqrestore(&batch->batch_lock, flags); return ret; } EXPORT_SYMBOL(get_random_u64); -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32); +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), +}; u32 get_random_u32(void) { u32 ret; - bool use_lock; - unsigned long flags = 0; + unsigned long flags; struct batched_entropy *batch; static void *previous; @@ -2272,18 +2274,14 @@ u32 get_random_u32(void) warn_unseeded_randomness(&previous); - use_lock = READ_ONCE(crng_init) < 2; - batch = &get_cpu_var(batched_entropy_u32); - if (use_lock) - read_lock_irqsave(&batched_entropy_reset_lock, flags); + batch = raw_cpu_ptr(&batched_entropy_u32); + spin_lock_irqsave(&batch->batch_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { extract_crng((u8 *)batch->entropy_u32); batch->position = 0; } ret = batch->entropy_u32[batch->position++]; - if (use_lock) - read_unlock_irqrestore(&batched_entropy_reset_lock, flags); - put_cpu_var(batched_entropy_u32); + spin_unlock_irqrestore(&batch->batch_lock, flags); return ret; } EXPORT_SYMBOL(get_random_u32); @@ -2297,12 +2295,19 @@ static void invalidate_batched_entropy(void) int cpu; unsigned long flags; - write_lock_irqsave(&batched_entropy_reset_lock, flags); for_each_possible_cpu (cpu) { - per_cpu_ptr(&batched_entropy_u32, cpu)->position = 0; - per_cpu_ptr(&batched_entropy_u64, cpu)->position = 0; + struct batched_entropy *batched_entropy; + + batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); + spin_lock_irqsave(&batched_entropy->batch_lock, flags); + batched_entropy->position = 0; + spin_unlock(&batched_entropy->batch_lock); + + batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu); + spin_lock(&batched_entropy->batch_lock); + batched_entropy->position = 0; + spin_unlock_irqrestore(&batched_entropy->batch_lock, flags); } - write_unlock_irqrestore(&batched_entropy_reset_lock, flags); } /** diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index fbeb71953526..05dbfdb9f4af 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -75,7 +75,7 @@ struct ports_driver_data { /* All the console devices handled by this driver */ struct list_head consoles; }; -static struct ports_driver_data pdrvdata; +static struct ports_driver_data pdrvdata = { .next_vtermno = 1}; static DEFINE_SPINLOCK(pdrvdata_lock); static DECLARE_COMPLETION(early_console_added); @@ -1394,6 +1394,7 @@ static int add_port(struct ports_device *portdev, u32 id) port->async_queue = NULL; port->cons.ws.ws_row = port->cons.ws.ws_col = 0; + port->cons.vtermno = 0; port->host_connected = port->guest_connected = false; port->stats = (struct port_stats) { 0 }; diff --git a/drivers/clk/hisilicon/clk-hi3660.c b/drivers/clk/hisilicon/clk-hi3660.c index f40419959656..794eeff0d5d2 100644 --- a/drivers/clk/hisilicon/clk-hi3660.c +++ b/drivers/clk/hisilicon/clk-hi3660.c @@ -163,8 +163,12 @@ static const struct hisi_gate_clock hi3660_crgctrl_gate_sep_clks[] = { "clk_isp_snclk_mux", CLK_SET_RATE_PARENT, 0x50, 17, 0, }, { HI3660_CLK_GATE_ISP_SNCLK2, "clk_gate_isp_snclk2", "clk_isp_snclk_mux", CLK_SET_RATE_PARENT, 0x50, 18, 0, }, + /* + * clk_gate_ufs_subsys is a system bus clock, mark it as critical + * clock and keep it on for system suspend and resume. + */ { HI3660_CLK_GATE_UFS_SUBSYS, "clk_gate_ufs_subsys", "clk_div_sysbus", - CLK_SET_RATE_PARENT, 0x50, 21, 0, }, + CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, 0x50, 21, 0, }, { HI3660_PCLK_GATE_DSI0, "pclk_gate_dsi0", "clk_div_cfgbus", CLK_SET_RATE_PARENT, 0x50, 28, 0, }, { HI3660_PCLK_GATE_DSI1, "pclk_gate_dsi1", "clk_div_cfgbus", diff --git a/drivers/clk/imx/clk-imx8mm.c b/drivers/clk/imx/clk-imx8mm.c index 1ef8438e3d6d..122a81ab8e48 100644 --- a/drivers/clk/imx/clk-imx8mm.c +++ b/drivers/clk/imx/clk-imx8mm.c @@ -449,12 +449,12 @@ static int __init imx8mm_clocks_init(struct device_node *ccm_node) clks[IMX8MM_AUDIO_PLL2_OUT] = imx_clk_gate("audio_pll2_out", "audio_pll2_bypass", base + 0x14, 13); clks[IMX8MM_VIDEO_PLL1_OUT] = imx_clk_gate("video_pll1_out", "video_pll1_bypass", base + 0x28, 13); clks[IMX8MM_DRAM_PLL_OUT] = imx_clk_gate("dram_pll_out", "dram_pll_bypass", base + 0x50, 13); - clks[IMX8MM_GPU_PLL_OUT] = imx_clk_gate("gpu_pll_out", "gpu_pll_bypass", base + 0x64, 13); - clks[IMX8MM_VPU_PLL_OUT] = imx_clk_gate("vpu_pll_out", "vpu_pll_bypass", base + 0x74, 13); - clks[IMX8MM_ARM_PLL_OUT] = imx_clk_gate("arm_pll_out", "arm_pll_bypass", base + 0x84, 13); - clks[IMX8MM_SYS_PLL1_OUT] = imx_clk_gate("sys_pll1_out", "sys_pll1_bypass", base + 0x94, 13); - clks[IMX8MM_SYS_PLL2_OUT] = imx_clk_gate("sys_pll2_out", "sys_pll2_bypass", base + 0x104, 13); - clks[IMX8MM_SYS_PLL3_OUT] = imx_clk_gate("sys_pll3_out", "sys_pll3_bypass", base + 0x114, 13); + clks[IMX8MM_GPU_PLL_OUT] = imx_clk_gate("gpu_pll_out", "gpu_pll_bypass", base + 0x64, 11); + clks[IMX8MM_VPU_PLL_OUT] = imx_clk_gate("vpu_pll_out", "vpu_pll_bypass", base + 0x74, 11); + clks[IMX8MM_ARM_PLL_OUT] = imx_clk_gate("arm_pll_out", "arm_pll_bypass", base + 0x84, 11); + clks[IMX8MM_SYS_PLL1_OUT] = imx_clk_gate("sys_pll1_out", "sys_pll1_bypass", base + 0x94, 11); + clks[IMX8MM_SYS_PLL2_OUT] = imx_clk_gate("sys_pll2_out", "sys_pll2_bypass", base + 0x104, 11); + clks[IMX8MM_SYS_PLL3_OUT] = imx_clk_gate("sys_pll3_out", "sys_pll3_bypass", base + 0x114, 11); /* SYS PLL fixed output */ clks[IMX8MM_SYS_PLL1_40M] = imx_clk_fixed_factor("sys_pll1_40m", "sys_pll1_out", 1, 20); diff --git a/drivers/clk/mediatek/clk-pll.c b/drivers/clk/mediatek/clk-pll.c index f54e4015b0b1..18842d660317 100644 --- a/drivers/clk/mediatek/clk-pll.c +++ b/drivers/clk/mediatek/clk-pll.c @@ -88,6 +88,32 @@ static unsigned long __mtk_pll_recalc_rate(struct mtk_clk_pll *pll, u32 fin, return ((unsigned long)vco + postdiv - 1) / postdiv; } +static void __mtk_pll_tuner_enable(struct mtk_clk_pll *pll) +{ + u32 r; + + if (pll->tuner_en_addr) { + r = readl(pll->tuner_en_addr) | BIT(pll->data->tuner_en_bit); + writel(r, pll->tuner_en_addr); + } else if (pll->tuner_addr) { + r = readl(pll->tuner_addr) | AUDPLL_TUNER_EN; + writel(r, pll->tuner_addr); + } +} + +static void __mtk_pll_tuner_disable(struct mtk_clk_pll *pll) +{ + u32 r; + + if (pll->tuner_en_addr) { + r = readl(pll->tuner_en_addr) & ~BIT(pll->data->tuner_en_bit); + writel(r, pll->tuner_en_addr); + } else if (pll->tuner_addr) { + r = readl(pll->tuner_addr) & ~AUDPLL_TUNER_EN; + writel(r, pll->tuner_addr); + } +} + static void mtk_pll_set_rate_regs(struct mtk_clk_pll *pll, u32 pcw, int postdiv) { @@ -96,6 +122,9 @@ static void mtk_pll_set_rate_regs(struct mtk_clk_pll *pll, u32 pcw, pll_en = readl(pll->base_addr + REG_CON0) & CON0_BASE_EN; + /* disable tuner */ + __mtk_pll_tuner_disable(pll); + /* set postdiv */ val = readl(pll->pd_addr); val &= ~(POSTDIV_MASK << pll->data->pd_shift); @@ -122,6 +151,9 @@ static void mtk_pll_set_rate_regs(struct mtk_clk_pll *pll, u32 pcw, if (pll->tuner_addr) writel(con1 + 1, pll->tuner_addr); + /* restore tuner_en */ + __mtk_pll_tuner_enable(pll); + if (pll_en) udelay(20); } @@ -228,13 +260,7 @@ static int mtk_pll_prepare(struct clk_hw *hw) r |= pll->data->en_mask; writel(r, pll->base_addr + REG_CON0); - if (pll->tuner_en_addr) { - r = readl(pll->tuner_en_addr) | BIT(pll->data->tuner_en_bit); - writel(r, pll->tuner_en_addr); - } else if (pll->tuner_addr) { - r = readl(pll->tuner_addr) | AUDPLL_TUNER_EN; - writel(r, pll->tuner_addr); - } + __mtk_pll_tuner_enable(pll); udelay(20); @@ -258,13 +284,7 @@ static void mtk_pll_unprepare(struct clk_hw *hw) writel(r, pll->base_addr + REG_CON0); } - if (pll->tuner_en_addr) { - r = readl(pll->tuner_en_addr) & ~BIT(pll->data->tuner_en_bit); - writel(r, pll->tuner_en_addr); - } else if (pll->tuner_addr) { - r = readl(pll->tuner_addr) & ~AUDPLL_TUNER_EN; - writel(r, pll->tuner_addr); - } + __mtk_pll_tuner_disable(pll); r = readl(pll->base_addr + REG_CON0); r &= ~CON0_BASE_EN; diff --git a/drivers/clk/renesas/r8a774a1-cpg-mssr.c b/drivers/clk/renesas/r8a774a1-cpg-mssr.c index 4d92b27a6153..7a4c5957939a 100644 --- a/drivers/clk/renesas/r8a774a1-cpg-mssr.c +++ b/drivers/clk/renesas/r8a774a1-cpg-mssr.c @@ -123,8 +123,8 @@ static const struct mssr_mod_clk r8a774a1_mod_clks[] __initconst = { DEF_MOD("msiof2", 209, R8A774A1_CLK_MSO), DEF_MOD("msiof1", 210, R8A774A1_CLK_MSO), DEF_MOD("msiof0", 211, R8A774A1_CLK_MSO), - DEF_MOD("sys-dmac2", 217, R8A774A1_CLK_S0D3), - DEF_MOD("sys-dmac1", 218, R8A774A1_CLK_S0D3), + DEF_MOD("sys-dmac2", 217, R8A774A1_CLK_S3D1), + DEF_MOD("sys-dmac1", 218, R8A774A1_CLK_S3D1), DEF_MOD("sys-dmac0", 219, R8A774A1_CLK_S0D3), DEF_MOD("cmt3", 300, R8A774A1_CLK_R), DEF_MOD("cmt2", 301, R8A774A1_CLK_R), @@ -143,8 +143,8 @@ static const struct mssr_mod_clk r8a774a1_mod_clks[] __initconst = { DEF_MOD("rwdt", 402, R8A774A1_CLK_R), DEF_MOD("intc-ex", 407, R8A774A1_CLK_CP), DEF_MOD("intc-ap", 408, R8A774A1_CLK_S0D3), - DEF_MOD("audmac1", 501, R8A774A1_CLK_S0D3), - DEF_MOD("audmac0", 502, R8A774A1_CLK_S0D3), + DEF_MOD("audmac1", 501, R8A774A1_CLK_S1D2), + DEF_MOD("audmac0", 502, R8A774A1_CLK_S1D2), DEF_MOD("hscif4", 516, R8A774A1_CLK_S3D1), DEF_MOD("hscif3", 517, R8A774A1_CLK_S3D1), DEF_MOD("hscif2", 518, R8A774A1_CLK_S3D1), diff --git a/drivers/clk/renesas/r8a774c0-cpg-mssr.c b/drivers/clk/renesas/r8a774c0-cpg-mssr.c index 34e274f2a273..93dacd826fd0 100644 --- a/drivers/clk/renesas/r8a774c0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a774c0-cpg-mssr.c @@ -157,7 +157,7 @@ static const struct mssr_mod_clk r8a774c0_mod_clks[] __initconst = { DEF_MOD("intc-ex", 407, R8A774C0_CLK_CP), DEF_MOD("intc-ap", 408, R8A774C0_CLK_S0D3), - DEF_MOD("audmac0", 502, R8A774C0_CLK_S3D4), + DEF_MOD("audmac0", 502, R8A774C0_CLK_S1D2), DEF_MOD("hscif4", 516, R8A774C0_CLK_S3D1C), DEF_MOD("hscif3", 517, R8A774C0_CLK_S3D1C), DEF_MOD("hscif2", 518, R8A774C0_CLK_S3D1C), diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c index 86842c9fd314..0825cd0ff286 100644 --- a/drivers/clk/renesas/r8a7795-cpg-mssr.c +++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c @@ -129,8 +129,8 @@ static struct mssr_mod_clk r8a7795_mod_clks[] __initdata = { DEF_MOD("msiof2", 209, R8A7795_CLK_MSO), DEF_MOD("msiof1", 210, R8A7795_CLK_MSO), DEF_MOD("msiof0", 211, R8A7795_CLK_MSO), - DEF_MOD("sys-dmac2", 217, R8A7795_CLK_S0D3), - DEF_MOD("sys-dmac1", 218, R8A7795_CLK_S0D3), + DEF_MOD("sys-dmac2", 217, R8A7795_CLK_S3D1), + DEF_MOD("sys-dmac1", 218, R8A7795_CLK_S3D1), DEF_MOD("sys-dmac0", 219, R8A7795_CLK_S0D3), DEF_MOD("sceg-pub", 229, R8A7795_CLK_CR), DEF_MOD("cmt3", 300, R8A7795_CLK_R), @@ -153,8 +153,8 @@ static struct mssr_mod_clk r8a7795_mod_clks[] __initdata = { DEF_MOD("rwdt", 402, R8A7795_CLK_R), DEF_MOD("intc-ex", 407, R8A7795_CLK_CP), DEF_MOD("intc-ap", 408, R8A7795_CLK_S0D3), - DEF_MOD("audmac1", 501, R8A7795_CLK_S0D3), - DEF_MOD("audmac0", 502, R8A7795_CLK_S0D3), + DEF_MOD("audmac1", 501, R8A7795_CLK_S1D2), + DEF_MOD("audmac0", 502, R8A7795_CLK_S1D2), DEF_MOD("drif7", 508, R8A7795_CLK_S3D2), DEF_MOD("drif6", 509, R8A7795_CLK_S3D2), DEF_MOD("drif5", 510, R8A7795_CLK_S3D2), diff --git a/drivers/clk/renesas/r8a7796-cpg-mssr.c b/drivers/clk/renesas/r8a7796-cpg-mssr.c index 12c455859f2c..997cd956f12b 100644 --- a/drivers/clk/renesas/r8a7796-cpg-mssr.c +++ b/drivers/clk/renesas/r8a7796-cpg-mssr.c @@ -126,8 +126,8 @@ static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = { DEF_MOD("msiof2", 209, R8A7796_CLK_MSO), DEF_MOD("msiof1", 210, R8A7796_CLK_MSO), DEF_MOD("msiof0", 211, R8A7796_CLK_MSO), - DEF_MOD("sys-dmac2", 217, R8A7796_CLK_S0D3), - DEF_MOD("sys-dmac1", 218, R8A7796_CLK_S0D3), + DEF_MOD("sys-dmac2", 217, R8A7796_CLK_S3D1), + DEF_MOD("sys-dmac1", 218, R8A7796_CLK_S3D1), DEF_MOD("sys-dmac0", 219, R8A7796_CLK_S0D3), DEF_MOD("cmt3", 300, R8A7796_CLK_R), DEF_MOD("cmt2", 301, R8A7796_CLK_R), @@ -146,8 +146,8 @@ static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = { DEF_MOD("rwdt", 402, R8A7796_CLK_R), DEF_MOD("intc-ex", 407, R8A7796_CLK_CP), DEF_MOD("intc-ap", 408, R8A7796_CLK_S0D3), - DEF_MOD("audmac1", 501, R8A7796_CLK_S0D3), - DEF_MOD("audmac0", 502, R8A7796_CLK_S0D3), + DEF_MOD("audmac1", 501, R8A7796_CLK_S1D2), + DEF_MOD("audmac0", 502, R8A7796_CLK_S1D2), DEF_MOD("drif7", 508, R8A7796_CLK_S3D2), DEF_MOD("drif6", 509, R8A7796_CLK_S3D2), DEF_MOD("drif5", 510, R8A7796_CLK_S3D2), diff --git a/drivers/clk/renesas/r8a77965-cpg-mssr.c b/drivers/clk/renesas/r8a77965-cpg-mssr.c index eb1cca58a1e1..afc9c72fa094 100644 --- a/drivers/clk/renesas/r8a77965-cpg-mssr.c +++ b/drivers/clk/renesas/r8a77965-cpg-mssr.c @@ -123,8 +123,8 @@ static const struct mssr_mod_clk r8a77965_mod_clks[] __initconst = { DEF_MOD("msiof2", 209, R8A77965_CLK_MSO), DEF_MOD("msiof1", 210, R8A77965_CLK_MSO), DEF_MOD("msiof0", 211, R8A77965_CLK_MSO), - DEF_MOD("sys-dmac2", 217, R8A77965_CLK_S0D3), - DEF_MOD("sys-dmac1", 218, R8A77965_CLK_S0D3), + DEF_MOD("sys-dmac2", 217, R8A77965_CLK_S3D1), + DEF_MOD("sys-dmac1", 218, R8A77965_CLK_S3D1), DEF_MOD("sys-dmac0", 219, R8A77965_CLK_S0D3), DEF_MOD("cmt3", 300, R8A77965_CLK_R), @@ -146,8 +146,8 @@ static const struct mssr_mod_clk r8a77965_mod_clks[] __initconst = { DEF_MOD("intc-ex", 407, R8A77965_CLK_CP), DEF_MOD("intc-ap", 408, R8A77965_CLK_S0D3), - DEF_MOD("audmac1", 501, R8A77965_CLK_S0D3), - DEF_MOD("audmac0", 502, R8A77965_CLK_S0D3), + DEF_MOD("audmac1", 501, R8A77965_CLK_S1D2), + DEF_MOD("audmac0", 502, R8A77965_CLK_S1D2), DEF_MOD("drif7", 508, R8A77965_CLK_S3D2), DEF_MOD("drif6", 509, R8A77965_CLK_S3D2), DEF_MOD("drif5", 510, R8A77965_CLK_S3D2), diff --git a/drivers/clk/renesas/r8a77990-cpg-mssr.c b/drivers/clk/renesas/r8a77990-cpg-mssr.c index 9a278c75c918..03f445d47ef6 100644 --- a/drivers/clk/renesas/r8a77990-cpg-mssr.c +++ b/drivers/clk/renesas/r8a77990-cpg-mssr.c @@ -152,7 +152,7 @@ static const struct mssr_mod_clk r8a77990_mod_clks[] __initconst = { DEF_MOD("intc-ex", 407, R8A77990_CLK_CP), DEF_MOD("intc-ap", 408, R8A77990_CLK_S0D3), - DEF_MOD("audmac0", 502, R8A77990_CLK_S3D4), + DEF_MOD("audmac0", 502, R8A77990_CLK_S1D2), DEF_MOD("drif7", 508, R8A77990_CLK_S3D2), DEF_MOD("drif6", 509, R8A77990_CLK_S3D2), DEF_MOD("drif5", 510, R8A77990_CLK_S3D2), diff --git a/drivers/clk/renesas/r8a77995-cpg-mssr.c b/drivers/clk/renesas/r8a77995-cpg-mssr.c index eee3874865a9..68707277b17b 100644 --- a/drivers/clk/renesas/r8a77995-cpg-mssr.c +++ b/drivers/clk/renesas/r8a77995-cpg-mssr.c @@ -133,7 +133,7 @@ static const struct mssr_mod_clk r8a77995_mod_clks[] __initconst = { DEF_MOD("rwdt", 402, R8A77995_CLK_R), DEF_MOD("intc-ex", 407, R8A77995_CLK_CP), DEF_MOD("intc-ap", 408, R8A77995_CLK_S1D2), - DEF_MOD("audmac0", 502, R8A77995_CLK_S3D1), + DEF_MOD("audmac0", 502, R8A77995_CLK_S1D2), DEF_MOD("hscif3", 517, R8A77995_CLK_S3D1C), DEF_MOD("hscif0", 520, R8A77995_CLK_S3D1C), DEF_MOD("thermal", 522, R8A77995_CLK_CP), diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c index 5a67b7869960..18460e0993bc 100644 --- a/drivers/clk/rockchip/clk-rk3288.c +++ b/drivers/clk/rockchip/clk-rk3288.c @@ -219,7 +219,7 @@ PNAME(mux_hsadcout_p) = { "hsadc_src", "ext_hsadc" }; PNAME(mux_edp_24m_p) = { "ext_edp_24m", "xin24m" }; PNAME(mux_tspout_p) = { "cpll", "gpll", "npll", "xin27m" }; -PNAME(mux_aclk_vcodec_pre_p) = { "aclk_vepu", "aclk_vdpu" }; +PNAME(mux_aclk_vcodec_pre_p) = { "aclk_vdpu", "aclk_vepu" }; PNAME(mux_usbphy480m_p) = { "sclk_otgphy1_480m", "sclk_otgphy2_480m", "sclk_otgphy0_480m" }; PNAME(mux_hsicphy480m_p) = { "cpll", "gpll", "usbphy480m_src" }; @@ -313,13 +313,13 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { COMPOSITE_NOMUX(0, "aclk_core_mp", "armclk", CLK_IGNORE_UNUSED, RK3288_CLKSEL_CON(0), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, RK3288_CLKGATE_CON(12), 6, GFLAGS), - COMPOSITE_NOMUX(0, "atclk", "armclk", CLK_IGNORE_UNUSED, + COMPOSITE_NOMUX(0, "atclk", "armclk", 0, RK3288_CLKSEL_CON(37), 4, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, RK3288_CLKGATE_CON(12), 7, GFLAGS), COMPOSITE_NOMUX(0, "pclk_dbg_pre", "armclk", CLK_IGNORE_UNUSED, RK3288_CLKSEL_CON(37), 9, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, RK3288_CLKGATE_CON(12), 8, GFLAGS), - GATE(0, "pclk_dbg", "pclk_dbg_pre", CLK_IGNORE_UNUSED, + GATE(0, "pclk_dbg", "pclk_dbg_pre", 0, RK3288_CLKGATE_CON(12), 9, GFLAGS), GATE(0, "cs_dbg", "pclk_dbg_pre", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(12), 10, GFLAGS), @@ -420,7 +420,7 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_usb480m_p, 0, RK3288_CLKSEL_CON(32), 14, 2, MFLAGS, 8, 5, DFLAGS, RK3288_CLKGATE_CON(3), 11, GFLAGS), - MUXGRF(0, "aclk_vcodec_pre", mux_aclk_vcodec_pre_p, 0, + MUXGRF(0, "aclk_vcodec_pre", mux_aclk_vcodec_pre_p, CLK_SET_RATE_PARENT, RK3288_GRF_SOC_CON(0), 7, 1, MFLAGS), GATE(ACLK_VCODEC, "aclk_vcodec", "aclk_vcodec_pre", 0, RK3288_CLKGATE_CON(9), 0, GFLAGS), @@ -647,7 +647,7 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { INVERTER(SCLK_HSADC, "sclk_hsadc", "sclk_hsadc_out", RK3288_CLKSEL_CON(22), 7, IFLAGS), - GATE(0, "jtag", "ext_jtag", CLK_IGNORE_UNUSED, + GATE(0, "jtag", "ext_jtag", 0, RK3288_CLKGATE_CON(4), 14, GFLAGS), COMPOSITE_NODIV(SCLK_USBPHY480M_SRC, "usbphy480m_src", mux_usbphy480m_p, 0, @@ -656,7 +656,7 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { COMPOSITE_NODIV(SCLK_HSICPHY480M, "sclk_hsicphy480m", mux_hsicphy480m_p, 0, RK3288_CLKSEL_CON(29), 0, 2, MFLAGS, RK3288_CLKGATE_CON(3), 6, GFLAGS), - GATE(0, "hsicphy12m_xin12m", "xin12m", CLK_IGNORE_UNUSED, + GATE(0, "hsicphy12m_xin12m", "xin12m", 0, RK3288_CLKGATE_CON(13), 9, GFLAGS), DIV(0, "hsicphy12m_usbphy", "sclk_hsicphy480m", 0, RK3288_CLKSEL_CON(11), 8, 6, DFLAGS), @@ -697,7 +697,7 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { GATE(PCLK_TZPC, "pclk_tzpc", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 3, GFLAGS), GATE(PCLK_UART2, "pclk_uart2", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 9, GFLAGS), GATE(PCLK_EFUSE256, "pclk_efuse_256", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 10, GFLAGS), - GATE(PCLK_RKPWM, "pclk_rkpwm", "pclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(11), 11, GFLAGS), + GATE(PCLK_RKPWM, "pclk_rkpwm", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 11, GFLAGS), /* ddrctrl [DDR Controller PHY clock] gates */ GATE(0, "nclk_ddrupctl0", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(11), 4, GFLAGS), @@ -837,12 +837,9 @@ static const char *const rk3288_critical_clocks[] __initconst = { "pclk_alive_niu", "pclk_pd_pmu", "pclk_pmu_niu", - "pclk_core_niu", - "pclk_ddrupctl0", - "pclk_publ0", - "pclk_ddrupctl1", - "pclk_publ1", "pmu_hclk_otg0", + /* pwm-regulators on some boards, so handoff-critical later */ + "pclk_rkpwm", }; static void __iomem *rk3288_cru_base; @@ -859,6 +856,9 @@ static const int rk3288_saved_cru_reg_ids[] = { RK3288_CLKSEL_CON(10), RK3288_CLKSEL_CON(33), RK3288_CLKSEL_CON(37), + + /* We turn aclk_dmac1 on for suspend; this will restore it */ + RK3288_CLKGATE_CON(10), }; static u32 rk3288_saved_cru_regs[ARRAY_SIZE(rk3288_saved_cru_reg_ids)]; @@ -874,6 +874,14 @@ static int rk3288_clk_suspend(void) readl_relaxed(rk3288_cru_base + reg_id); } + /* + * Going into deep sleep (specifically setting PMU_CLR_DMA in + * RK3288_PMU_PWRMODE_CON1) appears to fail unless + * "aclk_dmac1" is on. + */ + writel_relaxed(1 << (12 + 16), + rk3288_cru_base + RK3288_CLKGATE_CON(10)); + /* * Switch PLLs other than DPLL (for SDRAM) to slow mode to * avoid crashes on resume. The Mask ROM on the system will diff --git a/drivers/clk/rockchip/clk-rk3328.c b/drivers/clk/rockchip/clk-rk3328.c index 65ab5c2f48b0..f12142d9cea2 100644 --- a/drivers/clk/rockchip/clk-rk3328.c +++ b/drivers/clk/rockchip/clk-rk3328.c @@ -458,7 +458,7 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { RK3328_CLKSEL_CON(35), 15, 1, MFLAGS, 8, 7, DFLAGS, RK3328_CLKGATE_CON(2), 12, GFLAGS), COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_2plls_p, 0, - RK3328_CLKSEL_CON(20), 7, 1, MFLAGS, 0, 7, DFLAGS, + RK3328_CLKSEL_CON(20), 7, 1, MFLAGS, 0, 5, DFLAGS, RK3328_CLKGATE_CON(2), 4, GFLAGS), COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "clk_24m", 0, RK3328_CLKSEL_CON(22), 0, 10, DFLAGS, @@ -550,15 +550,15 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { GATE(0, "hclk_rkvenc_niu", "hclk_rkvenc", 0, RK3328_CLKGATE_CON(25), 1, GFLAGS), GATE(ACLK_H265, "aclk_h265", "aclk_rkvenc", 0, - RK3328_CLKGATE_CON(25), 0, GFLAGS), + RK3328_CLKGATE_CON(25), 2, GFLAGS), GATE(PCLK_H265, "pclk_h265", "hclk_rkvenc", 0, - RK3328_CLKGATE_CON(25), 1, GFLAGS), + RK3328_CLKGATE_CON(25), 3, GFLAGS), GATE(ACLK_H264, "aclk_h264", "aclk_rkvenc", 0, - RK3328_CLKGATE_CON(25), 0, GFLAGS), + RK3328_CLKGATE_CON(25), 4, GFLAGS), GATE(HCLK_H264, "hclk_h264", "hclk_rkvenc", 0, - RK3328_CLKGATE_CON(25), 1, GFLAGS), + RK3328_CLKGATE_CON(25), 5, GFLAGS), GATE(ACLK_AXISRAM, "aclk_axisram", "aclk_rkvenc", CLK_IGNORE_UNUSED, - RK3328_CLKGATE_CON(25), 0, GFLAGS), + RK3328_CLKGATE_CON(25), 6, GFLAGS), COMPOSITE(SCLK_VENC_CORE, "sclk_venc_core", mux_4plls_p, 0, RK3328_CLKSEL_CON(51), 14, 2, MFLAGS, 8, 5, DFLAGS, @@ -663,7 +663,7 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { /* PD_GMAC */ COMPOSITE(ACLK_GMAC, "aclk_gmac", mux_2plls_hdmiphy_p, 0, - RK3328_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS, + RK3328_CLKSEL_CON(25), 6, 2, MFLAGS, 0, 5, DFLAGS, RK3328_CLKGATE_CON(3), 2, GFLAGS), COMPOSITE_NOMUX(PCLK_GMAC, "pclk_gmac", "aclk_gmac", 0, RK3328_CLKSEL_CON(25), 8, 3, DFLAGS, @@ -733,7 +733,7 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { /* PD_PERI */ GATE(0, "aclk_peri_noc", "aclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 11, GFLAGS), - GATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_peri", 0, RK3328_CLKGATE_CON(19), 4, GFLAGS), + GATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_peri", 0, RK3328_CLKGATE_CON(19), 14, GFLAGS), GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 0, GFLAGS), GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 1, GFLAGS), @@ -913,7 +913,7 @@ static void __init rk3328_clk_init(struct device_node *np) &rk3328_cpuclk_data, rk3328_cpuclk_rates, ARRAY_SIZE(rk3328_cpuclk_rates)); - rockchip_register_softrst(np, 11, reg_base + RK3328_SOFTRST_CON(0), + rockchip_register_softrst(np, 12, reg_base + RK3328_SOFTRST_CON(0), ROCKCHIP_SOFTRST_HIWORD_MASK); rockchip_register_restart_notifier(ctx, RK3328_GLB_SRST_FST, NULL); diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index b50b7460014b..3e67cbcd80da 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -663,8 +663,8 @@ static void _update_pll_mnp(struct tegra_clk_pll *pll, pll_override_writel(val, params->pmc_divp_reg, pll); val = pll_override_readl(params->pmc_divnm_reg, pll); - val &= ~(divm_mask(pll) << div_nmp->override_divm_shift) | - ~(divn_mask(pll) << div_nmp->override_divn_shift); + val &= ~((divm_mask(pll) << div_nmp->override_divm_shift) | + (divn_mask(pll) << div_nmp->override_divn_shift)); val |= (cfg->m << div_nmp->override_divm_shift) | (cfg->n << div_nmp->override_divn_shift); pll_override_writel(val, params->pmc_divnm_reg, pll); diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c index 639f515e08f0..3325ee43bcc1 100644 --- a/drivers/clk/ti/clkctrl.c +++ b/drivers/clk/ti/clkctrl.c @@ -137,9 +137,6 @@ static int _omap4_clkctrl_clk_enable(struct clk_hw *hw) int ret; union omap4_timeout timeout = { 0 }; - if (!clk->enable_bit) - return 0; - if (clk->clkdm) { ret = ti_clk_ll_ops->clkdm_clk_enable(clk->clkdm, hw->clk); if (ret) { @@ -151,6 +148,9 @@ static int _omap4_clkctrl_clk_enable(struct clk_hw *hw) } } + if (!clk->enable_bit) + return 0; + val = ti_clk_ll_ops->clk_readl(&clk->enable_reg); val &= ~OMAP4_MODULEMODE_MASK; @@ -179,7 +179,7 @@ static void _omap4_clkctrl_clk_disable(struct clk_hw *hw) union omap4_timeout timeout = { 0 }; if (!clk->enable_bit) - return; + goto exit; val = ti_clk_ll_ops->clk_readl(&clk->enable_reg); diff --git a/drivers/clk/zynqmp/divider.c b/drivers/clk/zynqmp/divider.c index a371c66e72ef..bd9b5fbc443b 100644 --- a/drivers/clk/zynqmp/divider.c +++ b/drivers/clk/zynqmp/divider.c @@ -31,12 +31,14 @@ * struct zynqmp_clk_divider - adjustable divider clock * @hw: handle between common and hardware-specific interfaces * @flags: Hardware specific flags + * @is_frac: The divider is a fractional divider * @clk_id: Id of clock * @div_type: divisor type (TYPE_DIV1 or TYPE_DIV2) */ struct zynqmp_clk_divider { struct clk_hw hw; u8 flags; + bool is_frac; u32 clk_id; u32 div_type; }; @@ -116,8 +118,7 @@ static long zynqmp_clk_divider_round_rate(struct clk_hw *hw, bestdiv = zynqmp_divider_get_val(*prate, rate); - if ((clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) && - (divider->flags & CLK_FRAC)) + if ((clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) && divider->is_frac) bestdiv = rate % *prate ? 1 : bestdiv; *prate = rate * bestdiv; @@ -195,11 +196,13 @@ struct clk_hw *zynqmp_clk_register_divider(const char *name, init.name = name; init.ops = &zynqmp_clk_divider_ops; - init.flags = nodes->flag; + /* CLK_FRAC is not defined in the common clk framework */ + init.flags = nodes->flag & ~CLK_FRAC; init.parent_names = parents; init.num_parents = 1; /* struct clk_divider assignments */ + div->is_frac = !!(nodes->flag & CLK_FRAC); div->flags = nodes->type_flag; div->hw.init = &init; div->clk_id = clk_id; diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index 75491fc841a6..0df16eb1eb3c 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -359,11 +359,11 @@ static int __init armada37xx_cpufreq_driver_init(void) struct armada_37xx_dvfs *dvfs; struct platform_device *pdev; unsigned long freq; - unsigned int cur_frequency; + unsigned int cur_frequency, base_frequency; struct regmap *nb_pm_base, *avs_base; struct device *cpu_dev; int load_lvl, ret; - struct clk *clk; + struct clk *clk, *parent; nb_pm_base = syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm"); @@ -399,6 +399,22 @@ static int __init armada37xx_cpufreq_driver_init(void) return PTR_ERR(clk); } + parent = clk_get_parent(clk); + if (IS_ERR(parent)) { + dev_err(cpu_dev, "Cannot get parent clock for CPU0\n"); + clk_put(clk); + return PTR_ERR(parent); + } + + /* Get parent CPU frequency */ + base_frequency = clk_get_rate(parent); + + if (!base_frequency) { + dev_err(cpu_dev, "Failed to get parent clock rate for CPU\n"); + clk_put(clk); + return -EINVAL; + } + /* Get nominal (current) CPU frequency */ cur_frequency = clk_get_rate(clk); if (!cur_frequency) { @@ -431,7 +447,7 @@ static int __init armada37xx_cpufreq_driver_init(void) for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; load_lvl++) { unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000; - freq = cur_frequency / dvfs->divider[load_lvl]; + freq = base_frequency / dvfs->divider[load_lvl]; ret = dev_pm_opp_add(cpu_dev, freq, u_volt); if (ret) goto remove_opp; diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index b3f4bd647e9b..988ebc326bdb 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -132,6 +132,7 @@ static int __init armada_8k_cpufreq_init(void) of_node_put(node); return -ENODEV; } + of_node_put(node); nb_cpus = num_possible_cpus(); freq_tables = kcalloc(nb_cpus, sizeof(*freq_tables), GFP_KERNEL); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e10922709d13..bbf79544d0ad 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1098,6 +1098,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) cpufreq_global_kobject, "policy%u", cpu); if (ret) { pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); + kobject_put(&policy->kobj); goto err_free_real_cpus; } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4268f87e99fc..47fa3a161770 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -31,8 +31,8 @@ struct cs_dbs_tuners { }; /* Conservative governor macros */ -#define DEF_FREQUENCY_UP_THRESHOLD (80) -#define DEF_FREQUENCY_DOWN_THRESHOLD (20) +#define DEF_FREQUENCY_UP_THRESHOLD (63) +#define DEF_FREQUENCY_DOWN_THRESHOLD (26) #define DEF_FREQUENCY_STEP (5) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index ffa9adeaba31..9d1d9bf02710 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -459,6 +459,8 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) /* Failure, so roll back. */ pr_err("initialization failed (dbs_data kobject init error %d)\n", ret); + kobject_put(&dbs_data->attr_set.kobj); + policy->governor_data = NULL; if (!have_governor_per_policy()) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 6b423eebfd5d..e8c8aff4cba4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -21,7 +21,7 @@ #include "cpufreq_ondemand.h" /* On-demand governor macros */ -#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_UP_THRESHOLD (63) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_UP_THRESHOLD (95) @@ -130,7 +130,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) } /* - * Every sampling_rate, we check, if current idle time is less than 20% + * Every sampling_rate, we check, if current idle time is less than 37% * (default), then we try to increase frequency. Else, we adjust the frequency * proportional to load. */ diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index a4ff09f91c8f..3e17560b1efe 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -388,11 +388,11 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) ret = imx6ul_opp_check_speed_grading(cpu_dev); if (ret) { if (ret == -EPROBE_DEFER) - return ret; + goto put_node; dev_err(cpu_dev, "failed to read ocotp: %d\n", ret); - return ret; + goto put_node; } } else { imx6q_opp_check_speed_grading(cpu_dev); diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index c2dd43f3f5d8..8d63a6dc8383 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -124,13 +124,14 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk"); if (IS_ERR(priv.cpu_clk)) { dev_err(priv.dev, "Unable to get cpuclk\n"); - return PTR_ERR(priv.cpu_clk); + err = PTR_ERR(priv.cpu_clk); + goto out_node; } err = clk_prepare_enable(priv.cpu_clk); if (err) { dev_err(priv.dev, "Unable to prepare cpuclk\n"); - return err; + goto out_node; } kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000; @@ -161,20 +162,22 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) goto out_ddr; } - of_node_put(np); - np = NULL; - err = cpufreq_register_driver(&kirkwood_cpufreq_driver); - if (!err) - return 0; + if (err) { + dev_err(priv.dev, "Failed to register cpufreq driver\n"); + goto out_powersave; + } - dev_err(priv.dev, "Failed to register cpufreq driver\n"); + of_node_put(np); + return 0; +out_powersave: clk_disable_unprepare(priv.powersave_clk); out_ddr: clk_disable_unprepare(priv.ddr_clk); out_cpu: clk_disable_unprepare(priv.cpu_clk); +out_node: of_node_put(np); return err; diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 75dfbd2a58ea..c7710c149de8 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -146,6 +146,7 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu = of_get_cpu_node(policy->cpu, NULL); + of_node_put(cpu); if (!cpu) goto out; diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 52f0d91d30c1..9b4ce2eb8222 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -552,6 +552,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode) volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); if (volt_gpio_np) voltage_gpio = read_gpio(volt_gpio_np); + of_node_put(volt_gpio_np); if (!voltage_gpio){ pr_err("missing cpu-vcore-select gpio\n"); return 1; @@ -588,6 +589,7 @@ static int pmac_cpufreq_init_750FX(struct device_node *cpunode) if (volt_gpio_np) voltage_gpio = read_gpio(volt_gpio_np); + of_node_put(volt_gpio_np); pvr = mfspr(SPRN_PVR); has_cpu_l2lve = !((pvr & 0xf00) == 0x100); diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index 41a0f0be3f9f..8414c3a4ea08 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -86,6 +86,7 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) if (!cbe_get_cpu_pmd_regs(policy->cpu) || !cbe_get_cpu_mic_tm_regs(policy->cpu)) { pr_info("invalid CBE regs pointers for cpufreq\n"); + of_node_put(cpu); return -EINVAL; } diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c index 4092c2aad8e2..3458c5a085d9 100644 --- a/drivers/crypto/amcc/crypto4xx_alg.c +++ b/drivers/crypto/amcc/crypto4xx_alg.c @@ -141,9 +141,10 @@ static int crypto4xx_setkey_aes(struct crypto_skcipher *cipher, /* Setup SA */ sa = ctx->sa_in; - set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, (cm == CRYPTO_MODE_CBC ? - SA_SAVE_IV : SA_NOT_SAVE_IV), - SA_LOAD_HASH_FROM_SA, SA_LOAD_IV_FROM_STATE, + set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, (cm == CRYPTO_MODE_ECB ? + SA_NOT_SAVE_IV : SA_SAVE_IV), + SA_NOT_LOAD_HASH, (cm == CRYPTO_MODE_ECB ? + SA_LOAD_IV_FROM_SA : SA_LOAD_IV_FROM_STATE), SA_NO_HEADER_PROC, SA_HASH_ALG_NULL, SA_CIPHER_ALG_AES, SA_PAD_TYPE_ZERO, SA_OP_GROUP_BASIC, SA_OPCODE_DECRYPT, @@ -162,6 +163,11 @@ static int crypto4xx_setkey_aes(struct crypto_skcipher *cipher, memcpy(ctx->sa_out, ctx->sa_in, ctx->sa_len * 4); sa = ctx->sa_out; sa->sa_command_0.bf.dir = DIR_OUTBOUND; + /* + * SA_OPCODE_ENCRYPT is the same value as SA_OPCODE_DECRYPT. + * it's the DIR_(IN|OUT)BOUND that matters + */ + sa->sa_command_0.bf.opcode = SA_OPCODE_ENCRYPT; return 0; } diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 06574a884715..920bd5e720b2 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -714,7 +714,23 @@ int crypto4xx_build_pd(struct crypto_async_request *req, size_t offset_to_sr_ptr; u32 gd_idx = 0; int tmp; - bool is_busy; + bool is_busy, force_sd; + + /* + * There's a very subtile/disguised "bug" in the hardware that + * gets indirectly mentioned in 18.1.3.5 Encryption/Decryption + * of the hardware spec: + * *drum roll* the AES/(T)DES OFB and CFB modes are listed as + * operation modes for >>> "Block ciphers" <<<. + * + * To workaround this issue and stop the hardware from causing + * "overran dst buffer" on crypttexts that are not a multiple + * of 16 (AES_BLOCK_SIZE), we force the driver to use the + * scatter buffers. + */ + force_sd = (req_sa->sa_command_1.bf.crypto_mode9_8 == CRYPTO_MODE_CFB + || req_sa->sa_command_1.bf.crypto_mode9_8 == CRYPTO_MODE_OFB) + && (datalen % AES_BLOCK_SIZE); /* figure how many gd are needed */ tmp = sg_nents_for_len(src, assoclen + datalen); @@ -732,7 +748,7 @@ int crypto4xx_build_pd(struct crypto_async_request *req, } /* figure how many sd are needed */ - if (sg_is_last(dst)) { + if (sg_is_last(dst) && force_sd == false) { num_sd = 0; } else { if (datalen > PPC4XX_SD_BUFFER_SIZE) { @@ -807,9 +823,10 @@ int crypto4xx_build_pd(struct crypto_async_request *req, pd->sa_len = sa_len; pd_uinfo = &dev->pdr_uinfo[pd_entry]; - pd_uinfo->async_req = req; pd_uinfo->num_gd = num_gd; pd_uinfo->num_sd = num_sd; + pd_uinfo->dest_va = dst; + pd_uinfo->async_req = req; if (iv_len) memcpy(pd_uinfo->sr_va->save_iv, iv, iv_len); @@ -828,7 +845,6 @@ int crypto4xx_build_pd(struct crypto_async_request *req, /* get first gd we are going to use */ gd_idx = fst_gd; pd_uinfo->first_gd = fst_gd; - pd_uinfo->num_gd = num_gd; gd = crypto4xx_get_gdp(dev, &gd_dma, gd_idx); pd->src = gd_dma; /* enable gather */ @@ -865,17 +881,14 @@ int crypto4xx_build_pd(struct crypto_async_request *req, * Indicate gather array is not used */ pd_uinfo->first_gd = 0xffffffff; - pd_uinfo->num_gd = 0; } - if (sg_is_last(dst)) { + if (!num_sd) { /* * we know application give us dst a whole piece of memory * no need to use scatter ring. */ pd_uinfo->using_sd = 0; pd_uinfo->first_sd = 0xffffffff; - pd_uinfo->num_sd = 0; - pd_uinfo->dest_va = dst; sa->sa_command_0.bf.scatter = 0; pd->dest = (u32)dma_map_page(dev->core_dev->device, sg_page(dst), dst->offset, @@ -889,9 +902,7 @@ int crypto4xx_build_pd(struct crypto_async_request *req, nbytes = datalen; sa->sa_command_0.bf.scatter = 1; pd_uinfo->using_sd = 1; - pd_uinfo->dest_va = dst; pd_uinfo->first_sd = fst_sd; - pd_uinfo->num_sd = num_sd; sd = crypto4xx_get_sdp(dev, &sd_dma, sd_idx); pd->dest = sd_dma; /* setup scatter descriptor */ diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index c2c1abc68f81..0a72c96708c4 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -2854,6 +2854,7 @@ struct caam_hash_state { struct caam_request caam_req; dma_addr_t buf_dma; dma_addr_t ctx_dma; + int ctx_dma_len; u8 buf_0[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned; int buflen_0; u8 buf_1[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned; @@ -2927,6 +2928,7 @@ static inline int ctx_map_to_qm_sg(struct device *dev, struct caam_hash_state *state, int ctx_len, struct dpaa2_sg_entry *qm_sg, u32 flag) { + state->ctx_dma_len = ctx_len; state->ctx_dma = dma_map_single(dev, state->caam_ctx, ctx_len, flag); if (dma_mapping_error(dev, state->ctx_dma)) { dev_err(dev, "unable to map ctx\n"); @@ -3018,13 +3020,13 @@ static void split_key_sh_done(void *cbk_ctx, u32 err) } /* Digest hash size if it is too large */ -static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, - u32 *keylen, u8 *key_out, u32 digestsize) +static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, + u32 digestsize) { struct caam_request *req_ctx; u32 *desc; struct split_key_sh_result result; - dma_addr_t src_dma, dst_dma; + dma_addr_t key_dma; struct caam_flc *flc; dma_addr_t flc_dma; int ret = -ENOMEM; @@ -3041,17 +3043,10 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, if (!flc) goto err_flc; - src_dma = dma_map_single(ctx->dev, (void *)key_in, *keylen, - DMA_TO_DEVICE); - if (dma_mapping_error(ctx->dev, src_dma)) { - dev_err(ctx->dev, "unable to map key input memory\n"); - goto err_src_dma; - } - dst_dma = dma_map_single(ctx->dev, (void *)key_out, digestsize, - DMA_FROM_DEVICE); - if (dma_mapping_error(ctx->dev, dst_dma)) { - dev_err(ctx->dev, "unable to map key output memory\n"); - goto err_dst_dma; + key_dma = dma_map_single(ctx->dev, key, *keylen, DMA_BIDIRECTIONAL); + if (dma_mapping_error(ctx->dev, key_dma)) { + dev_err(ctx->dev, "unable to map key memory\n"); + goto err_key_dma; } desc = flc->sh_desc; @@ -3076,14 +3071,14 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, dpaa2_fl_set_final(in_fle, true); dpaa2_fl_set_format(in_fle, dpaa2_fl_single); - dpaa2_fl_set_addr(in_fle, src_dma); + dpaa2_fl_set_addr(in_fle, key_dma); dpaa2_fl_set_len(in_fle, *keylen); dpaa2_fl_set_format(out_fle, dpaa2_fl_single); - dpaa2_fl_set_addr(out_fle, dst_dma); + dpaa2_fl_set_addr(out_fle, key_dma); dpaa2_fl_set_len(out_fle, digestsize); print_hex_dump_debug("key_in@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, key_in, *keylen, 1); + DUMP_PREFIX_ADDRESS, 16, 4, key, *keylen, 1); print_hex_dump_debug("shdesc@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); @@ -3103,17 +3098,15 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, wait_for_completion(&result.completion); ret = result.err; print_hex_dump_debug("digested key@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, key_in, + DUMP_PREFIX_ADDRESS, 16, 4, key, digestsize, 1); } dma_unmap_single(ctx->dev, flc_dma, sizeof(flc->flc) + desc_bytes(desc), DMA_TO_DEVICE); err_flc_dma: - dma_unmap_single(ctx->dev, dst_dma, digestsize, DMA_FROM_DEVICE); -err_dst_dma: - dma_unmap_single(ctx->dev, src_dma, *keylen, DMA_TO_DEVICE); -err_src_dma: + dma_unmap_single(ctx->dev, key_dma, *keylen, DMA_BIDIRECTIONAL); +err_key_dma: kfree(flc); err_flc: kfree(req_ctx); @@ -3135,12 +3128,10 @@ static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, dev_dbg(ctx->dev, "keylen %d blocksize %d\n", keylen, blocksize); if (keylen > blocksize) { - hashed_key = kmalloc_array(digestsize, sizeof(*hashed_key), - GFP_KERNEL | GFP_DMA); + hashed_key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); if (!hashed_key) return -ENOMEM; - ret = hash_digest_key(ctx, key, &keylen, hashed_key, - digestsize); + ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize); if (ret) goto bad_free_key; key = hashed_key; @@ -3165,14 +3156,12 @@ static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, } static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc, - struct ahash_request *req, int dst_len) + struct ahash_request *req) { struct caam_hash_state *state = ahash_request_ctx(req); if (edesc->src_nents) dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); - if (edesc->dst_dma) - dma_unmap_single(dev, edesc->dst_dma, dst_len, DMA_FROM_DEVICE); if (edesc->qm_sg_bytes) dma_unmap_single(dev, edesc->qm_sg_dma, edesc->qm_sg_bytes, @@ -3187,18 +3176,15 @@ static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc, static inline void ahash_unmap_ctx(struct device *dev, struct ahash_edesc *edesc, - struct ahash_request *req, int dst_len, - u32 flag) + struct ahash_request *req, u32 flag) { - struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); if (state->ctx_dma) { - dma_unmap_single(dev, state->ctx_dma, ctx->ctx_len, flag); + dma_unmap_single(dev, state->ctx_dma, state->ctx_dma_len, flag); state->ctx_dma = 0; } - ahash_unmap(dev, edesc, req, dst_len); + ahash_unmap(dev, edesc, req); } static void ahash_done(void *cbk_ctx, u32 status) @@ -3219,16 +3205,13 @@ static void ahash_done(void *cbk_ctx, u32 status) ecode = -EIO; } - ahash_unmap(ctx->dev, edesc, req, digestsize); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_FROM_DEVICE); + memcpy(req->result, state->caam_ctx, digestsize); qi_cache_free(edesc); print_hex_dump_debug("ctx@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, ctx->ctx_len, 1); - if (req->result) - print_hex_dump_debug("result@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->result, - digestsize, 1); req->base.complete(&req->base, ecode); } @@ -3250,7 +3233,7 @@ static void ahash_done_bi(void *cbk_ctx, u32 status) ecode = -EIO; } - ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_BIDIRECTIONAL); switch_buf(state); qi_cache_free(edesc); @@ -3283,16 +3266,13 @@ static void ahash_done_ctx_src(void *cbk_ctx, u32 status) ecode = -EIO; } - ahash_unmap_ctx(ctx->dev, edesc, req, digestsize, DMA_TO_DEVICE); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_BIDIRECTIONAL); + memcpy(req->result, state->caam_ctx, digestsize); qi_cache_free(edesc); print_hex_dump_debug("ctx@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, ctx->ctx_len, 1); - if (req->result) - print_hex_dump_debug("result@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->result, - digestsize, 1); req->base.complete(&req->base, ecode); } @@ -3314,7 +3294,7 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status) ecode = -EIO; } - ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_FROM_DEVICE); switch_buf(state); qi_cache_free(edesc); @@ -3452,7 +3432,7 @@ static int ahash_update_ctx(struct ahash_request *req) return ret; unmap_ctx: - ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_BIDIRECTIONAL); qi_cache_free(edesc); return ret; } @@ -3484,7 +3464,7 @@ static int ahash_final_ctx(struct ahash_request *req) sg_table = &edesc->sgt[0]; ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table, - DMA_TO_DEVICE); + DMA_BIDIRECTIONAL); if (ret) goto unmap_ctx; @@ -3503,22 +3483,13 @@ static int ahash_final_ctx(struct ahash_request *req) } edesc->qm_sg_bytes = qm_sg_bytes; - edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize, - DMA_FROM_DEVICE); - if (dma_mapping_error(ctx->dev, edesc->dst_dma)) { - dev_err(ctx->dev, "unable to map dst\n"); - edesc->dst_dma = 0; - ret = -ENOMEM; - goto unmap_ctx; - } - memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt)); dpaa2_fl_set_final(in_fle, true); dpaa2_fl_set_format(in_fle, dpaa2_fl_sg); dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma); dpaa2_fl_set_len(in_fle, ctx->ctx_len + buflen); dpaa2_fl_set_format(out_fle, dpaa2_fl_single); - dpaa2_fl_set_addr(out_fle, edesc->dst_dma); + dpaa2_fl_set_addr(out_fle, state->ctx_dma); dpaa2_fl_set_len(out_fle, digestsize); req_ctx->flc = &ctx->flc[FINALIZE]; @@ -3533,7 +3504,7 @@ static int ahash_final_ctx(struct ahash_request *req) return ret; unmap_ctx: - ahash_unmap_ctx(ctx->dev, edesc, req, digestsize, DMA_FROM_DEVICE); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_BIDIRECTIONAL); qi_cache_free(edesc); return ret; } @@ -3586,7 +3557,7 @@ static int ahash_finup_ctx(struct ahash_request *req) sg_table = &edesc->sgt[0]; ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table, - DMA_TO_DEVICE); + DMA_BIDIRECTIONAL); if (ret) goto unmap_ctx; @@ -3605,22 +3576,13 @@ static int ahash_finup_ctx(struct ahash_request *req) } edesc->qm_sg_bytes = qm_sg_bytes; - edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize, - DMA_FROM_DEVICE); - if (dma_mapping_error(ctx->dev, edesc->dst_dma)) { - dev_err(ctx->dev, "unable to map dst\n"); - edesc->dst_dma = 0; - ret = -ENOMEM; - goto unmap_ctx; - } - memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt)); dpaa2_fl_set_final(in_fle, true); dpaa2_fl_set_format(in_fle, dpaa2_fl_sg); dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma); dpaa2_fl_set_len(in_fle, ctx->ctx_len + buflen + req->nbytes); dpaa2_fl_set_format(out_fle, dpaa2_fl_single); - dpaa2_fl_set_addr(out_fle, edesc->dst_dma); + dpaa2_fl_set_addr(out_fle, state->ctx_dma); dpaa2_fl_set_len(out_fle, digestsize); req_ctx->flc = &ctx->flc[FINALIZE]; @@ -3635,7 +3597,7 @@ static int ahash_finup_ctx(struct ahash_request *req) return ret; unmap_ctx: - ahash_unmap_ctx(ctx->dev, edesc, req, digestsize, DMA_FROM_DEVICE); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_BIDIRECTIONAL); qi_cache_free(edesc); return ret; } @@ -3704,18 +3666,19 @@ static int ahash_digest(struct ahash_request *req) dpaa2_fl_set_addr(in_fle, sg_dma_address(req->src)); } - edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize, + state->ctx_dma_len = digestsize; + state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx, digestsize, DMA_FROM_DEVICE); - if (dma_mapping_error(ctx->dev, edesc->dst_dma)) { - dev_err(ctx->dev, "unable to map dst\n"); - edesc->dst_dma = 0; + if (dma_mapping_error(ctx->dev, state->ctx_dma)) { + dev_err(ctx->dev, "unable to map ctx\n"); + state->ctx_dma = 0; goto unmap; } dpaa2_fl_set_final(in_fle, true); dpaa2_fl_set_len(in_fle, req->nbytes); dpaa2_fl_set_format(out_fle, dpaa2_fl_single); - dpaa2_fl_set_addr(out_fle, edesc->dst_dma); + dpaa2_fl_set_addr(out_fle, state->ctx_dma); dpaa2_fl_set_len(out_fle, digestsize); req_ctx->flc = &ctx->flc[DIGEST]; @@ -3729,7 +3692,7 @@ static int ahash_digest(struct ahash_request *req) return ret; unmap: - ahash_unmap(ctx->dev, edesc, req, digestsize); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_FROM_DEVICE); qi_cache_free(edesc); return ret; } @@ -3755,27 +3718,39 @@ static int ahash_final_no_ctx(struct ahash_request *req) if (!edesc) return ret; - state->buf_dma = dma_map_single(ctx->dev, buf, buflen, DMA_TO_DEVICE); - if (dma_mapping_error(ctx->dev, state->buf_dma)) { - dev_err(ctx->dev, "unable to map src\n"); - goto unmap; + if (buflen) { + state->buf_dma = dma_map_single(ctx->dev, buf, buflen, + DMA_TO_DEVICE); + if (dma_mapping_error(ctx->dev, state->buf_dma)) { + dev_err(ctx->dev, "unable to map src\n"); + goto unmap; + } } - edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize, + state->ctx_dma_len = digestsize; + state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx, digestsize, DMA_FROM_DEVICE); - if (dma_mapping_error(ctx->dev, edesc->dst_dma)) { - dev_err(ctx->dev, "unable to map dst\n"); - edesc->dst_dma = 0; + if (dma_mapping_error(ctx->dev, state->ctx_dma)) { + dev_err(ctx->dev, "unable to map ctx\n"); + state->ctx_dma = 0; goto unmap; } memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt)); dpaa2_fl_set_final(in_fle, true); - dpaa2_fl_set_format(in_fle, dpaa2_fl_single); - dpaa2_fl_set_addr(in_fle, state->buf_dma); - dpaa2_fl_set_len(in_fle, buflen); + /* + * crypto engine requires the input entry to be present when + * "frame list" FD is used. + * Since engine does not support FMT=2'b11 (unused entry type), leaving + * in_fle zeroized (except for "Final" flag) is the best option. + */ + if (buflen) { + dpaa2_fl_set_format(in_fle, dpaa2_fl_single); + dpaa2_fl_set_addr(in_fle, state->buf_dma); + dpaa2_fl_set_len(in_fle, buflen); + } dpaa2_fl_set_format(out_fle, dpaa2_fl_single); - dpaa2_fl_set_addr(out_fle, edesc->dst_dma); + dpaa2_fl_set_addr(out_fle, state->ctx_dma); dpaa2_fl_set_len(out_fle, digestsize); req_ctx->flc = &ctx->flc[DIGEST]; @@ -3790,7 +3765,7 @@ static int ahash_final_no_ctx(struct ahash_request *req) return ret; unmap: - ahash_unmap(ctx->dev, edesc, req, digestsize); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_FROM_DEVICE); qi_cache_free(edesc); return ret; } @@ -3870,6 +3845,7 @@ static int ahash_update_no_ctx(struct ahash_request *req) } edesc->qm_sg_bytes = qm_sg_bytes; + state->ctx_dma_len = ctx->ctx_len; state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx, ctx->ctx_len, DMA_FROM_DEVICE); if (dma_mapping_error(ctx->dev, state->ctx_dma)) { @@ -3918,7 +3894,7 @@ static int ahash_update_no_ctx(struct ahash_request *req) return ret; unmap_ctx: - ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_TO_DEVICE); qi_cache_free(edesc); return ret; } @@ -3983,11 +3959,12 @@ static int ahash_finup_no_ctx(struct ahash_request *req) } edesc->qm_sg_bytes = qm_sg_bytes; - edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize, + state->ctx_dma_len = digestsize; + state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx, digestsize, DMA_FROM_DEVICE); - if (dma_mapping_error(ctx->dev, edesc->dst_dma)) { - dev_err(ctx->dev, "unable to map dst\n"); - edesc->dst_dma = 0; + if (dma_mapping_error(ctx->dev, state->ctx_dma)) { + dev_err(ctx->dev, "unable to map ctx\n"); + state->ctx_dma = 0; ret = -ENOMEM; goto unmap; } @@ -3998,7 +3975,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req) dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma); dpaa2_fl_set_len(in_fle, buflen + req->nbytes); dpaa2_fl_set_format(out_fle, dpaa2_fl_single); - dpaa2_fl_set_addr(out_fle, edesc->dst_dma); + dpaa2_fl_set_addr(out_fle, state->ctx_dma); dpaa2_fl_set_len(out_fle, digestsize); req_ctx->flc = &ctx->flc[DIGEST]; @@ -4013,7 +3990,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req) return ret; unmap: - ahash_unmap(ctx->dev, edesc, req, digestsize); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_FROM_DEVICE); qi_cache_free(edesc); return -ENOMEM; } @@ -4100,6 +4077,7 @@ static int ahash_update_first(struct ahash_request *req) scatterwalk_map_and_copy(next_buf, req->src, to_hash, *next_buflen, 0); + state->ctx_dma_len = ctx->ctx_len; state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx, ctx->ctx_len, DMA_FROM_DEVICE); if (dma_mapping_error(ctx->dev, state->ctx_dma)) { @@ -4143,7 +4121,7 @@ static int ahash_update_first(struct ahash_request *req) return ret; unmap_ctx: - ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE); + ahash_unmap_ctx(ctx->dev, edesc, req, DMA_TO_DEVICE); qi_cache_free(edesc); return ret; } @@ -4162,6 +4140,7 @@ static int ahash_init(struct ahash_request *req) state->final = ahash_final_no_ctx; state->ctx_dma = 0; + state->ctx_dma_len = 0; state->current_buf = 0; state->buf_dma = 0; state->buflen_0 = 0; diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h index 20890780fb82..be5085451053 100644 --- a/drivers/crypto/caam/caamalg_qi2.h +++ b/drivers/crypto/caam/caamalg_qi2.h @@ -162,14 +162,12 @@ struct skcipher_edesc { /* * ahash_edesc - s/w-extended ahash descriptor - * @dst_dma: I/O virtual address of req->result * @qm_sg_dma: I/O virtual address of h/w link table * @src_nents: number of segments in input scatterlist * @qm_sg_bytes: length of dma mapped qm_sg space * @sgt: pointer to h/w link table */ struct ahash_edesc { - dma_addr_t dst_dma; dma_addr_t qm_sg_dma; int src_nents; int qm_sg_bytes; diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index fadf859a14b8..59c4796300e8 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -997,7 +997,7 @@ void psp_pci_init(void) rc = sev_platform_init(&error); if (rc) { dev_err(sp->dev, "SEV: failed to INIT error %#x\n", error); - goto err; + return; } dev_info(sp->dev, "SEV API:%d.%d build:%d\n", psp_master->api_major, diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index a3527c00b29a..009ce649ff25 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -424,7 +424,7 @@ static int validate_keys_sizes(struct cc_aead_ctx *ctx) /* This function prepers the user key so it can pass to the hmac processing * (copy to intenral buffer or hash in case of key longer than block */ -static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, +static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, unsigned int keylen) { dma_addr_t key_dma_addr = 0; @@ -437,6 +437,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, unsigned int hashmode; unsigned int idx = 0; int rc = 0; + u8 *key = NULL; struct cc_hw_desc desc[MAX_AEAD_SETKEY_SEQ]; dma_addr_t padded_authkey_dma_addr = ctx->auth_state.hmac.padded_authkey_dma_addr; @@ -455,11 +456,17 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, } if (keylen != 0) { + + key = kmemdup(authkey, keylen, GFP_KERNEL); + if (!key) + return -ENOMEM; + key_dma_addr = dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", key, keylen); + kzfree(key); return -ENOMEM; } if (keylen > blocksize) { @@ -542,6 +549,8 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, if (key_dma_addr) dma_unmap_single(dev, key_dma_addr, keylen, DMA_TO_DEVICE); + kzfree(key); + return rc; } diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 0ee1c52da0a4..583737e50611 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -83,24 +83,17 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req, */ static unsigned int cc_get_sgl_nents(struct device *dev, struct scatterlist *sg_list, - unsigned int nbytes, u32 *lbytes, - bool *is_chained) + unsigned int nbytes, u32 *lbytes) { unsigned int nents = 0; while (nbytes && sg_list) { - if (sg_list->length) { - nents++; - /* get the number of bytes in the last entry */ - *lbytes = nbytes; - nbytes -= (sg_list->length > nbytes) ? - nbytes : sg_list->length; - sg_list = sg_next(sg_list); - } else { - sg_list = (struct scatterlist *)sg_page(sg_list); - if (is_chained) - *is_chained = true; - } + nents++; + /* get the number of bytes in the last entry */ + *lbytes = nbytes; + nbytes -= (sg_list->length > nbytes) ? + nbytes : sg_list->length; + sg_list = sg_next(sg_list); } dev_dbg(dev, "nents %d last bytes %d\n", nents, *lbytes); return nents; @@ -142,7 +135,7 @@ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg, { u32 nents, lbytes; - nents = cc_get_sgl_nents(dev, sg, end, &lbytes, NULL); + nents = cc_get_sgl_nents(dev, sg, end, &lbytes); sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip, (direct == CC_SG_TO_BUF)); } @@ -314,40 +307,10 @@ static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data, sgl_data->num_of_buffers++; } -static int cc_dma_map_sg(struct device *dev, struct scatterlist *sg, u32 nents, - enum dma_data_direction direction) -{ - u32 i, j; - struct scatterlist *l_sg = sg; - - for (i = 0; i < nents; i++) { - if (!l_sg) - break; - if (dma_map_sg(dev, l_sg, 1, direction) != 1) { - dev_err(dev, "dma_map_page() sg buffer failed\n"); - goto err; - } - l_sg = sg_next(l_sg); - } - return nents; - -err: - /* Restore mapped parts */ - for (j = 0; j < i; j++) { - if (!sg) - break; - dma_unmap_sg(dev, sg, 1, direction); - sg = sg_next(sg); - } - return 0; -} - static int cc_map_sg(struct device *dev, struct scatterlist *sg, unsigned int nbytes, int direction, u32 *nents, u32 max_sg_nents, u32 *lbytes, u32 *mapped_nents) { - bool is_chained = false; - if (sg_is_last(sg)) { /* One entry only case -set to DLLI */ if (dma_map_sg(dev, sg, 1, direction) != 1) { @@ -361,35 +324,21 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg, *nents = 1; *mapped_nents = 1; } else { /*sg_is_last*/ - *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes, - &is_chained); + *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); if (*nents > max_sg_nents) { *nents = 0; dev_err(dev, "Too many fragments. current %d max %d\n", *nents, max_sg_nents); return -ENOMEM; } - if (!is_chained) { - /* In case of mmu the number of mapped nents might - * be changed from the original sgl nents - */ - *mapped_nents = dma_map_sg(dev, sg, *nents, direction); - if (*mapped_nents == 0) { - *nents = 0; - dev_err(dev, "dma_map_sg() sg buffer failed\n"); - return -ENOMEM; - } - } else { - /*In this case the driver maps entry by entry so it - * must have the same nents before and after map - */ - *mapped_nents = cc_dma_map_sg(dev, sg, *nents, - direction); - if (*mapped_nents != *nents) { - *nents = *mapped_nents; - dev_err(dev, "dma_map_sg() sg buffer failed\n"); - return -ENOMEM; - } + /* In case of mmu the number of mapped nents might + * be changed from the original sgl nents + */ + *mapped_nents = dma_map_sg(dev, sg, *nents, direction); + if (*mapped_nents == 0) { + *nents = 0; + dev_err(dev, "dma_map_sg() sg buffer failed\n"); + return -ENOMEM; } } @@ -571,7 +520,6 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_drvdata *drvdata = dev_get_drvdata(dev); u32 dummy; - bool chained; u32 size_to_unmap = 0; if (areq_ctx->mac_buf_dma_addr) { @@ -612,6 +560,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) if (areq_ctx->gen_ctx.iv_dma_addr) { dma_unmap_single(dev, areq_ctx->gen_ctx.iv_dma_addr, hw_iv_size, DMA_BIDIRECTIONAL); + kzfree(areq_ctx->gen_ctx.iv); } /* Release pool */ @@ -636,15 +585,14 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) size_to_unmap += crypto_aead_ivsize(tfm); dma_unmap_sg(dev, req->src, - cc_get_sgl_nents(dev, req->src, size_to_unmap, - &dummy, &chained), + cc_get_sgl_nents(dev, req->src, size_to_unmap, &dummy), DMA_BIDIRECTIONAL); if (req->src != req->dst) { dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", sg_virt(req->dst)); dma_unmap_sg(dev, req->dst, cc_get_sgl_nents(dev, req->dst, size_to_unmap, - &dummy, &chained), + &dummy), DMA_BIDIRECTIONAL); } if (drvdata->coherent && @@ -717,19 +665,27 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, struct aead_req_ctx *areq_ctx = aead_request_ctx(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct device *dev = drvdata_to_dev(drvdata); + gfp_t flags = cc_gfp_flags(&req->base); int rc = 0; if (!req->iv) { areq_ctx->gen_ctx.iv_dma_addr = 0; + areq_ctx->gen_ctx.iv = NULL; goto chain_iv_exit; } - areq_ctx->gen_ctx.iv_dma_addr = dma_map_single(dev, req->iv, - hw_iv_size, - DMA_BIDIRECTIONAL); + areq_ctx->gen_ctx.iv = kmemdup(req->iv, hw_iv_size, flags); + if (!areq_ctx->gen_ctx.iv) + return -ENOMEM; + + areq_ctx->gen_ctx.iv_dma_addr = + dma_map_single(dev, areq_ctx->gen_ctx.iv, hw_iv_size, + DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, areq_ctx->gen_ctx.iv_dma_addr)) { dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n", hw_iv_size, req->iv); + kzfree(areq_ctx->gen_ctx.iv); + areq_ctx->gen_ctx.iv = NULL; rc = -ENOMEM; goto chain_iv_exit; } @@ -1022,7 +978,6 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, unsigned int size_for_map = req->assoclen + req->cryptlen; struct crypto_aead *tfm = crypto_aead_reqtfm(req); u32 sg_index = 0; - bool chained = false; bool is_gcm4543 = areq_ctx->is_gcm4543; u32 size_to_skip = req->assoclen; @@ -1043,7 +998,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? authsize : 0; src_mapped_nents = cc_get_sgl_nents(dev, req->src, size_for_map, - &src_last_bytes, &chained); + &src_last_bytes); sg_index = areq_ctx->src_sgl->length; //check where the data starts while (sg_index <= size_to_skip) { @@ -1083,7 +1038,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, } dst_mapped_nents = cc_get_sgl_nents(dev, req->dst, size_for_map, - &dst_last_bytes, &chained); + &dst_last_bytes); sg_index = areq_ctx->dst_sgl->length; offset = size_to_skip; @@ -1484,7 +1439,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, dev_dbg(dev, " less than one block: curr_buff=%pK *curr_buff_cnt=0x%X copy_to=%pK\n", curr_buff, *curr_buff_cnt, &curr_buff[*curr_buff_cnt]); areq_ctx->in_nents = - cc_get_sgl_nents(dev, src, nbytes, &dummy, NULL); + cc_get_sgl_nents(dev, src, nbytes, &dummy); sg_copy_to_buffer(src, areq_ctx->in_nents, &curr_buff[*curr_buff_cnt], nbytes); *curr_buff_cnt += nbytes; diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index 33dbf3e6d15d..a6c500dca529 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -168,6 +168,7 @@ struct cc_alg_template { struct async_gen_req_ctx { dma_addr_t iv_dma_addr; + u8 *iv; enum drv_crypto_direction op_type; }; diff --git a/drivers/crypto/ccree/cc_fips.c b/drivers/crypto/ccree/cc_fips.c index b4d0a6d983e0..09f708f6418e 100644 --- a/drivers/crypto/ccree/cc_fips.c +++ b/drivers/crypto/ccree/cc_fips.c @@ -72,20 +72,28 @@ static inline void tee_fips_error(struct device *dev) dev_err(dev, "TEE reported error!\n"); } +/* + * This function check if cryptocell tee fips error occurred + * and in such case triggers system error + */ +void cc_tee_handle_fips_error(struct cc_drvdata *p_drvdata) +{ + struct device *dev = drvdata_to_dev(p_drvdata); + + if (!cc_get_tee_fips_status(p_drvdata)) + tee_fips_error(dev); +} + /* Deferred service handler, run as interrupt-fired tasklet */ static void fips_dsr(unsigned long devarg) { struct cc_drvdata *drvdata = (struct cc_drvdata *)devarg; - struct device *dev = drvdata_to_dev(drvdata); - u32 irq, state, val; + u32 irq, val; irq = (drvdata->irq & (CC_GPR0_IRQ_MASK)); if (irq) { - state = cc_ioread(drvdata, CC_REG(GPR_HOST)); - - if (state != (CC_FIPS_SYNC_TEE_STATUS | CC_FIPS_SYNC_MODULE_OK)) - tee_fips_error(dev); + cc_tee_handle_fips_error(drvdata); } /* after verifing that there is nothing to do, @@ -113,8 +121,7 @@ int cc_fips_init(struct cc_drvdata *p_drvdata) dev_dbg(dev, "Initializing fips tasklet\n"); tasklet_init(&fips_h->tasklet, fips_dsr, (unsigned long)p_drvdata); - if (!cc_get_tee_fips_status(p_drvdata)) - tee_fips_error(dev); + cc_tee_handle_fips_error(p_drvdata); return 0; } diff --git a/drivers/crypto/ccree/cc_fips.h b/drivers/crypto/ccree/cc_fips.h index 645e096a7a82..67d5fbfa09b5 100644 --- a/drivers/crypto/ccree/cc_fips.h +++ b/drivers/crypto/ccree/cc_fips.h @@ -18,6 +18,7 @@ int cc_fips_init(struct cc_drvdata *p_drvdata); void cc_fips_fini(struct cc_drvdata *drvdata); void fips_handler(struct cc_drvdata *drvdata); void cc_set_ree_fips_status(struct cc_drvdata *drvdata, bool ok); +void cc_tee_handle_fips_error(struct cc_drvdata *p_drvdata); #else /* CONFIG_CRYPTO_FIPS */ @@ -30,6 +31,7 @@ static inline void cc_fips_fini(struct cc_drvdata *drvdata) {} static inline void cc_set_ree_fips_status(struct cc_drvdata *drvdata, bool ok) {} static inline void fips_handler(struct cc_drvdata *drvdata) {} +static inline void cc_tee_handle_fips_error(struct cc_drvdata *p_drvdata) {} #endif /* CONFIG_CRYPTO_FIPS */ diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 2c4ddc8fb76b..e44cbf173606 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -69,6 +69,7 @@ struct cc_hash_alg { struct hash_key_req_ctx { u32 keylen; dma_addr_t key_dma_addr; + u8 *key; }; /* hash per-session context */ @@ -730,13 +731,20 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, ctx->key_params.keylen = keylen; ctx->key_params.key_dma_addr = 0; ctx->is_hmac = true; + ctx->key_params.key = NULL; if (keylen) { + ctx->key_params.key = kmemdup(key, keylen, GFP_KERNEL); + if (!ctx->key_params.key) + return -ENOMEM; + ctx->key_params.key_dma_addr = - dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE); + dma_map_single(dev, (void *)ctx->key_params.key, keylen, + DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", - key, keylen); + ctx->key_params.key, keylen); + kzfree(ctx->key_params.key); return -ENOMEM; } dev_dbg(dev, "mapping key-buffer: key_dma_addr=%pad keylen=%u\n", @@ -887,6 +895,9 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, dev_dbg(dev, "Unmapped key-buffer: key_dma_addr=%pad keylen=%u\n", &ctx->key_params.key_dma_addr, ctx->key_params.keylen); } + + kzfree(ctx->key_params.key); + return rc; } @@ -913,11 +924,16 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash, ctx->key_params.keylen = keylen; + ctx->key_params.key = kmemdup(key, keylen, GFP_KERNEL); + if (!ctx->key_params.key) + return -ENOMEM; + ctx->key_params.key_dma_addr = - dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE); + dma_map_single(dev, ctx->key_params.key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", key, keylen); + kzfree(ctx->key_params.key); return -ENOMEM; } dev_dbg(dev, "mapping key-buffer: key_dma_addr=%pad keylen=%u\n", @@ -969,6 +985,8 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash, dev_dbg(dev, "Unmapped key-buffer: key_dma_addr=%pad keylen=%u\n", &ctx->key_params.key_dma_addr, ctx->key_params.keylen); + kzfree(ctx->key_params.key); + return rc; } @@ -1621,7 +1639,7 @@ static struct cc_hash_template driver_hash[] = { .setkey = cc_hash_setkey, .halg = { .digestsize = SHA224_DIGEST_SIZE, - .statesize = CC_STATE_SIZE(SHA224_DIGEST_SIZE), + .statesize = CC_STATE_SIZE(SHA256_DIGEST_SIZE), }, }, .hash_mode = DRV_HASH_SHA224, @@ -1648,7 +1666,7 @@ static struct cc_hash_template driver_hash[] = { .setkey = cc_hash_setkey, .halg = { .digestsize = SHA384_DIGEST_SIZE, - .statesize = CC_STATE_SIZE(SHA384_DIGEST_SIZE), + .statesize = CC_STATE_SIZE(SHA512_DIGEST_SIZE), }, }, .hash_mode = DRV_HASH_SHA384, diff --git a/drivers/crypto/ccree/cc_ivgen.c b/drivers/crypto/ccree/cc_ivgen.c index 769458323394..1abec3896a78 100644 --- a/drivers/crypto/ccree/cc_ivgen.c +++ b/drivers/crypto/ccree/cc_ivgen.c @@ -154,9 +154,6 @@ void cc_ivgen_fini(struct cc_drvdata *drvdata) } ivgen_ctx->pool = NULL_SRAM_ADDR; - - /* release "this" context */ - kfree(ivgen_ctx); } /*! @@ -174,10 +171,12 @@ int cc_ivgen_init(struct cc_drvdata *drvdata) int rc; /* Allocate "this" context */ - ivgen_ctx = kzalloc(sizeof(*ivgen_ctx), GFP_KERNEL); + ivgen_ctx = devm_kzalloc(device, sizeof(*ivgen_ctx), GFP_KERNEL); if (!ivgen_ctx) return -ENOMEM; + drvdata->ivgen_handle = ivgen_ctx; + /* Allocate pool's header for initial enc. key/IV */ ivgen_ctx->pool_meta = dma_alloc_coherent(device, CC_IVPOOL_META_SIZE, &ivgen_ctx->pool_meta_dma, @@ -196,8 +195,6 @@ int cc_ivgen_init(struct cc_drvdata *drvdata) goto out; } - drvdata->ivgen_handle = ivgen_ctx; - return cc_init_iv_sram(drvdata); out: diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index 6ff7e75ad90e..638082dff183 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -11,6 +11,7 @@ #include "cc_ivgen.h" #include "cc_hash.h" #include "cc_pm.h" +#include "cc_fips.h" #define POWER_DOWN_ENABLE 0x01 #define POWER_DOWN_DISABLE 0x00 @@ -25,13 +26,13 @@ int cc_pm_suspend(struct device *dev) int rc; dev_dbg(dev, "set HOST_POWER_DOWN_EN\n"); - cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); rc = cc_suspend_req_queue(drvdata); if (rc) { dev_err(dev, "cc_suspend_req_queue (%x)\n", rc); return rc; } fini_cc_regs(drvdata); + cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); cc_clk_off(drvdata); return 0; } @@ -42,19 +43,21 @@ int cc_pm_resume(struct device *dev) struct cc_drvdata *drvdata = dev_get_drvdata(dev); dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n"); - cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE); - + /* Enables the device source clk */ rc = cc_clk_on(drvdata); if (rc) { dev_err(dev, "failed getting clock back on. We're toast.\n"); return rc; } + cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE); rc = init_cc_regs(drvdata, false); if (rc) { dev_err(dev, "init_cc_regs (%x)\n", rc); return rc; } + /* check if tee fips error occurred during power down */ + cc_tee_handle_fips_error(drvdata); rc = cc_resume_req_queue(drvdata); if (rc) { diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c index 02dac6ae7e53..7564b4c41afc 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c @@ -250,9 +250,14 @@ static int rk_set_data_start(struct rk_crypto_info *dev) u8 *src_last_blk = page_address(sg_page(dev->sg_src)) + dev->sg_src->offset + dev->sg_src->length - ivsize; - /* store the iv that need to be updated in chain mode */ - if (ctx->mode & RK_CRYPTO_DEC) + /* Store the iv that need to be updated in chain mode. + * And update the IV buffer to contain the next IV for decryption mode. + */ + if (ctx->mode & RK_CRYPTO_DEC) { memcpy(ctx->iv, src_last_blk, ivsize); + sg_pcopy_to_buffer(dev->first, dev->src_nents, req->info, + ivsize, dev->total - ivsize); + } err = dev->load_data(dev, dev->sg_src, dev->sg_dst); if (!err) @@ -288,13 +293,19 @@ static void rk_iv_copyback(struct rk_crypto_info *dev) struct ablkcipher_request *req = ablkcipher_request_cast(dev->async_req); struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); u32 ivsize = crypto_ablkcipher_ivsize(tfm); - if (ivsize == DES_BLOCK_SIZE) - memcpy_fromio(req->info, dev->reg + RK_CRYPTO_TDES_IV_0, - ivsize); - else if (ivsize == AES_BLOCK_SIZE) - memcpy_fromio(req->info, dev->reg + RK_CRYPTO_AES_IV_0, ivsize); + /* Update the IV buffer to contain the next IV for encryption mode. */ + if (!(ctx->mode & RK_CRYPTO_DEC)) { + if (dev->aligned) { + memcpy(req->info, sg_virt(dev->sg_dst) + + dev->sg_dst->length - ivsize, ivsize); + } else { + memcpy(req->info, dev->addr_vir + + dev->count - ivsize, ivsize); + } + } } static void rk_update_iv(struct rk_crypto_info *dev) diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c index a4b5ff2b72f8..f6936bb3b7be 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c @@ -240,7 +240,10 @@ static int sun4i_hash(struct ahash_request *areq) } } else { /* Since we have the flag final, we can go up to modulo 4 */ - end = ((areq->nbytes + op->len) / 4) * 4 - op->len; + if (areq->nbytes < 4) + end = 0; + else + end = ((areq->nbytes + op->len) / 4) * 4 - op->len; } /* TODO if SGlen % 4 and !op->len then DMA */ diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index d6a9f63d65ba..9c6b5c1d6a1a 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -1357,7 +1357,7 @@ () addi $idx,$idx,16 bdnz Loop_ctr32_enc - vadduwm $ivec,$ivec,$one + vadduqm $ivec,$ivec,$one vmr $dat,$inptail lvx $inptail,0,$inp addi $inp,$inp,16 @@ -1854,7 +1854,7 @@ () stvx_u $out1,$x10,$out stvx_u $out2,$x20,$out addi $out,$out,0x30 - b Lcbc_dec8x_done + b Lctr32_enc8x_done .align 5 Lctr32_enc8x_two: @@ -1866,7 +1866,7 @@ () stvx_u $out0,$x00,$out stvx_u $out1,$x10,$out addi $out,$out,0x20 - b Lcbc_dec8x_done + b Lctr32_enc8x_done .align 5 Lctr32_enc8x_one: diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c index dd8b8716467a..2d1a8cd35509 100644 --- a/drivers/crypto/vmx/ghash.c +++ b/drivers/crypto/vmx/ghash.c @@ -1,22 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 /** * GHASH routines supporting VMX instructions on the Power 8 * - * Copyright (C) 2015 International Business Machines Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 only. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * Copyright (C) 2015, 2019 International Business Machines Inc. * * Author: Marcelo Henrique Cerri + * + * Extended by Daniel Axtens to replace the fallback + * mechanism. The new approach is based on arm64 code, which is: + * Copyright (C) 2014 - 2018 Linaro Ltd. */ #include @@ -39,71 +31,25 @@ void gcm_ghash_p8(u64 Xi[2], const u128 htable[16], const u8 *in, size_t len); struct p8_ghash_ctx { + /* key used by vector asm */ u128 htable[16]; - struct crypto_shash *fallback; + /* key used by software fallback */ + be128 key; }; struct p8_ghash_desc_ctx { u64 shash[2]; u8 buffer[GHASH_DIGEST_SIZE]; int bytes; - struct shash_desc fallback_desc; }; -static int p8_ghash_init_tfm(struct crypto_tfm *tfm) -{ - const char *alg = "ghash-generic"; - struct crypto_shash *fallback; - struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm); - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); - - fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(fallback)) { - printk(KERN_ERR - "Failed to allocate transformation for '%s': %ld\n", - alg, PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - - crypto_shash_set_flags(fallback, - crypto_shash_get_flags((struct crypto_shash - *) tfm)); - - /* Check if the descsize defined in the algorithm is still enough. */ - if (shash_tfm->descsize < sizeof(struct p8_ghash_desc_ctx) - + crypto_shash_descsize(fallback)) { - printk(KERN_ERR - "Desc size of the fallback implementation (%s) does not match the expected value: %lu vs %u\n", - alg, - shash_tfm->descsize - sizeof(struct p8_ghash_desc_ctx), - crypto_shash_descsize(fallback)); - return -EINVAL; - } - ctx->fallback = fallback; - - return 0; -} - -static void p8_ghash_exit_tfm(struct crypto_tfm *tfm) -{ - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); - - if (ctx->fallback) { - crypto_free_shash(ctx->fallback); - ctx->fallback = NULL; - } -} - static int p8_ghash_init(struct shash_desc *desc) { - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); dctx->bytes = 0; memset(dctx->shash, 0, GHASH_DIGEST_SIZE); - dctx->fallback_desc.tfm = ctx->fallback; - dctx->fallback_desc.flags = desc->flags; - return crypto_shash_init(&dctx->fallback_desc); + return 0; } static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, @@ -121,7 +67,51 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, disable_kernel_vsx(); pagefault_enable(); preempt_enable(); - return crypto_shash_setkey(ctx->fallback, key, keylen); + + memcpy(&ctx->key, key, GHASH_BLOCK_SIZE); + + return 0; +} + +static inline void __ghash_block(struct p8_ghash_ctx *ctx, + struct p8_ghash_desc_ctx *dctx) +{ + if (!IN_INTERRUPT) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + gcm_ghash_p8(dctx->shash, ctx->htable, + dctx->buffer, GHASH_DIGEST_SIZE); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + } else { + crypto_xor((u8 *)dctx->shash, dctx->buffer, GHASH_BLOCK_SIZE); + gf128mul_lle((be128 *)dctx->shash, &ctx->key); + } +} + +static inline void __ghash_blocks(struct p8_ghash_ctx *ctx, + struct p8_ghash_desc_ctx *dctx, + const u8 *src, unsigned int srclen) +{ + if (!IN_INTERRUPT) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + gcm_ghash_p8(dctx->shash, ctx->htable, + src, srclen); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + } else { + while (srclen >= GHASH_BLOCK_SIZE) { + crypto_xor((u8 *)dctx->shash, src, GHASH_BLOCK_SIZE); + gf128mul_lle((be128 *)dctx->shash, &ctx->key); + srclen -= GHASH_BLOCK_SIZE; + src += GHASH_BLOCK_SIZE; + } + } } static int p8_ghash_update(struct shash_desc *desc, @@ -131,49 +121,33 @@ static int p8_ghash_update(struct shash_desc *desc, struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - if (IN_INTERRUPT) { - return crypto_shash_update(&dctx->fallback_desc, src, - srclen); - } else { - if (dctx->bytes) { - if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) { - memcpy(dctx->buffer + dctx->bytes, src, - srclen); - dctx->bytes += srclen; - return 0; - } + if (dctx->bytes) { + if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) { memcpy(dctx->buffer + dctx->bytes, src, - GHASH_DIGEST_SIZE - dctx->bytes); - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - gcm_ghash_p8(dctx->shash, ctx->htable, - dctx->buffer, GHASH_DIGEST_SIZE); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - src += GHASH_DIGEST_SIZE - dctx->bytes; - srclen -= GHASH_DIGEST_SIZE - dctx->bytes; - dctx->bytes = 0; - } - len = srclen & ~(GHASH_DIGEST_SIZE - 1); - if (len) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - gcm_ghash_p8(dctx->shash, ctx->htable, src, len); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - src += len; - srclen -= len; - } - if (srclen) { - memcpy(dctx->buffer, src, srclen); - dctx->bytes = srclen; + srclen); + dctx->bytes += srclen; + return 0; } - return 0; + memcpy(dctx->buffer + dctx->bytes, src, + GHASH_DIGEST_SIZE - dctx->bytes); + + __ghash_block(ctx, dctx); + + src += GHASH_DIGEST_SIZE - dctx->bytes; + srclen -= GHASH_DIGEST_SIZE - dctx->bytes; + dctx->bytes = 0; + } + len = srclen & ~(GHASH_DIGEST_SIZE - 1); + if (len) { + __ghash_blocks(ctx, dctx, src, len); + src += len; + srclen -= len; } + if (srclen) { + memcpy(dctx->buffer, src, srclen); + dctx->bytes = srclen; + } + return 0; } static int p8_ghash_final(struct shash_desc *desc, u8 *out) @@ -182,25 +156,14 @@ static int p8_ghash_final(struct shash_desc *desc, u8 *out) struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - if (IN_INTERRUPT) { - return crypto_shash_final(&dctx->fallback_desc, out); - } else { - if (dctx->bytes) { - for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) - dctx->buffer[i] = 0; - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - gcm_ghash_p8(dctx->shash, ctx->htable, - dctx->buffer, GHASH_DIGEST_SIZE); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - dctx->bytes = 0; - } - memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); - return 0; + if (dctx->bytes) { + for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) + dctx->buffer[i] = 0; + __ghash_block(ctx, dctx); + dctx->bytes = 0; } + memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); + return 0; } struct shash_alg p8_ghash_alg = { @@ -215,11 +178,8 @@ struct shash_alg p8_ghash_alg = { .cra_name = "ghash", .cra_driver_name = "p8_ghash", .cra_priority = 1000, - .cra_flags = CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = GHASH_BLOCK_SIZE, .cra_ctxsize = sizeof(struct p8_ghash_ctx), .cra_module = THIS_MODULE, - .cra_init = p8_ghash_init_tfm, - .cra_exit = p8_ghash_exit_tfm, }, }; diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 5ef624fe3934..a59f338f520f 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -23,7 +23,6 @@ config DEV_DAX config DEV_DAX_PMEM tristate "PMEM DAX: direct access to persistent memory" depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX - depends on m # until we can kill DEV_DAX_PMEM_COMPAT default DEV_DAX help Support raw access to persistent memory. Note that this @@ -50,7 +49,7 @@ config DEV_DAX_KMEM config DEV_DAX_PMEM_COMPAT tristate "PMEM DAX: support the deprecated /sys/class/dax interface" - depends on DEV_DAX_PMEM + depends on m && DEV_DAX_PMEM=m default DEV_DAX_PMEM help Older versions of the libdaxctl library expect to find all diff --git a/drivers/dax/device.c b/drivers/dax/device.c index e428468ab661..996d68ff992a 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -184,8 +184,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn, - vmf->flags & FAULT_FLAG_WRITE); + return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD @@ -235,8 +234,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn, - vmf->flags & FAULT_FLAG_WRITE); + return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #else static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 0a339b85133e..d7f2257f2568 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -73,22 +73,12 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); #endif -/** - * __bdev_dax_supported() - Check if the device supports dax for filesystem - * @bdev: block device to check - * @blocksize: The block size of the device - * - * This is a library function for filesystems to check if the block device - * can be mounted with dax option. - * - * Return: true if supported, false if unsupported - */ -bool __bdev_dax_supported(struct block_device *bdev, int blocksize) +bool __generic_fsdax_supported(struct dax_device *dax_dev, + struct block_device *bdev, int blocksize, sector_t start, + sector_t sectors) { - struct dax_device *dax_dev; bool dax_enabled = false; pgoff_t pgoff, pgoff_end; - struct request_queue *q; char buf[BDEVNAME_SIZE]; void *kaddr, *end_kaddr; pfn_t pfn, end_pfn; @@ -102,21 +92,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) return false; } - q = bdev_get_queue(bdev); - if (!q || !blk_queue_dax(q)) { - pr_debug("%s: error: request queue doesn't support dax\n", - bdevname(bdev, buf)); - return false; - } - - err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); + err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff); if (err) { pr_debug("%s: error: unaligned partition for dax\n", bdevname(bdev, buf)); return false; } - last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8; + last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512; err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end); if (err) { pr_debug("%s: error: unaligned partition for dax\n", @@ -124,20 +107,11 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) return false; } - dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); - if (!dax_dev) { - pr_debug("%s: error: device does not support dax\n", - bdevname(bdev, buf)); - return false; - } - id = dax_read_lock(); len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn); dax_read_unlock(id); - put_dax(dax_dev); - if (len < 1 || len2 < 1) { pr_debug("%s: error: dax access failed (%ld)\n", bdevname(bdev, buf), len < 1 ? len : len2); @@ -178,6 +152,49 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) } return true; } +EXPORT_SYMBOL_GPL(__generic_fsdax_supported); + +/** + * __bdev_dax_supported() - Check if the device supports dax for filesystem + * @bdev: block device to check + * @blocksize: The block size of the device + * + * This is a library function for filesystems to check if the block device + * can be mounted with dax option. + * + * Return: true if supported, false if unsupported + */ +bool __bdev_dax_supported(struct block_device *bdev, int blocksize) +{ + struct dax_device *dax_dev; + struct request_queue *q; + char buf[BDEVNAME_SIZE]; + bool ret; + int id; + + q = bdev_get_queue(bdev); + if (!q || !blk_queue_dax(q)) { + pr_debug("%s: error: request queue doesn't support dax\n", + bdevname(bdev, buf)); + return false; + } + + dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + if (!dax_dev) { + pr_debug("%s: error: device does not support dax\n", + bdevname(bdev, buf)); + return false; + } + + id = dax_read_lock(); + ret = dax_supported(dax_dev, bdev, blocksize, 0, + i_size_read(bdev->bd_inode) / 512); + dax_read_unlock(id); + + put_dax(dax_dev); + + return ret; +} EXPORT_SYMBOL_GPL(__bdev_dax_supported); #endif @@ -303,6 +320,15 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, } EXPORT_SYMBOL_GPL(dax_direct_access); +bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, + int blocksize, sector_t start, sector_t len) +{ + if (!dax_alive(dax_dev)) + return false; + + return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len); +} + size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 0ae3de76833b..839621b044f4 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -228,7 +228,7 @@ static struct devfreq_governor *find_devfreq_governor(const char *name) * if is not found. This can happen when both drivers (the governor driver * and the driver that call devfreq_add_device) are built as modules. * devfreq_list_lock should be held by the caller. Returns the matched - * governor's pointer. + * governor's pointer or an error pointer. */ static struct devfreq_governor *try_then_request_governor(const char *name) { @@ -254,7 +254,7 @@ static struct devfreq_governor *try_then_request_governor(const char *name) /* Restore previous state before return */ mutex_lock(&devfreq_list_lock); if (err) - return NULL; + return ERR_PTR(err); governor = find_devfreq_governor(name); } diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index cd57747286f2..9635897458a0 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -77,6 +77,7 @@ static void unmap_udmabuf(struct dma_buf_attachment *at, struct sg_table *sg, enum dma_data_direction direction) { + dma_unmap_sg(at->dev, sg->sgl, sg->nents, direction); sg_free_table(sg); kfree(sg); } diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index fe69dccfa0c0..37a269420435 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1606,7 +1606,11 @@ static void at_xdmac_tasklet(unsigned long data) struct at_xdmac_desc, xfer_node); dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); - BUG_ON(!desc->active_xfer); + if (!desc->active_xfer) { + dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting"); + spin_unlock_bh(&atchan->lock); + return; + } txd = &desc->tx_dma_desc; diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 9ce0a386225b..f49534019d37 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -666,10 +666,11 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, return status; } -static void jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma, - struct jz4780_dma_chan *jzchan) +static bool jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma, + struct jz4780_dma_chan *jzchan) { uint32_t dcs; + bool ack = true; spin_lock(&jzchan->vchan.lock); @@ -692,12 +693,20 @@ static void jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma, if ((dcs & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) == 0) { if (jzchan->desc->type == DMA_CYCLIC) { vchan_cyclic_callback(&jzchan->desc->vdesc); - } else { + + jz4780_dma_begin(jzchan); + } else if (dcs & JZ_DMA_DCS_TT) { vchan_cookie_complete(&jzchan->desc->vdesc); jzchan->desc = NULL; - } - jz4780_dma_begin(jzchan); + jz4780_dma_begin(jzchan); + } else { + /* False positive - continue the transfer */ + ack = false; + jz4780_dma_chn_writel(jzdma, jzchan->id, + JZ_DMA_REG_DCS, + JZ_DMA_DCS_CTE); + } } } else { dev_err(&jzchan->vchan.chan.dev->device, @@ -705,21 +714,22 @@ static void jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma, } spin_unlock(&jzchan->vchan.lock); + + return ack; } static irqreturn_t jz4780_dma_irq_handler(int irq, void *data) { struct jz4780_dma_dev *jzdma = data; + unsigned int nb_channels = jzdma->soc_data->nb_channels; uint32_t pending, dmac; int i; pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP); - for (i = 0; i < jzdma->soc_data->nb_channels; i++) { - if (!(pending & (1<chan[i]); + for_each_set_bit(i, (unsigned long *)&pending, nb_channels) { + if (jz4780_dma_chan_irq(jzdma, &jzdma->chan[i])) + pending &= ~BIT(i); } /* Clear halt and address error status of all channels. */ @@ -728,7 +738,7 @@ static irqreturn_t jz4780_dma_irq_handler(int irq, void *data) jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DMAC, dmac); /* Clear interrupt pending status. */ - jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DIRQP, 0); + jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DIRQP, pending); return IRQ_HANDLED; } diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c index b2ac1d2c5b86..a1ce307c502f 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -512,7 +512,8 @@ dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr, return vchan_tx_prep(&chan->vc, &first->vd, flags); err_desc_get: - axi_desc_put(first); + if (first) + axi_desc_put(first); return NULL; } diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index 0baf9797cc09..83796a33dc16 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -592,7 +592,7 @@ static int idma64_probe(struct idma64_chip *chip) idma64->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); idma64->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; - idma64->dma.dev = chip->dev; + idma64->dma.dev = chip->sysdev; dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK); @@ -632,6 +632,7 @@ static int idma64_platform_probe(struct platform_device *pdev) { struct idma64_chip *chip; struct device *dev = &pdev->dev; + struct device *sysdev = dev->parent; struct resource *mem; int ret; @@ -648,11 +649,12 @@ static int idma64_platform_probe(struct platform_device *pdev) if (IS_ERR(chip->regs)) return PTR_ERR(chip->regs); - ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + ret = dma_coerce_mask_and_coherent(sysdev, DMA_BIT_MASK(64)); if (ret) return ret; chip->dev = dev; + chip->sysdev = sysdev; ret = idma64_probe(chip); if (ret) diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h index 6b816878e5e7..baa32e1425de 100644 --- a/drivers/dma/idma64.h +++ b/drivers/dma/idma64.h @@ -216,12 +216,14 @@ static inline void idma64_writel(struct idma64 *idma64, int offset, u32 value) /** * struct idma64_chip - representation of iDMA 64-bit controller hardware * @dev: struct device of the DMA controller + * @sysdev: struct device of the physical device that does DMA * @irq: irq line * @regs: memory mapped I/O space * @idma64: struct idma64 that is filed by idma64_probe() */ struct idma64_chip { struct device *dev; + struct device *sysdev; int irq; void __iomem *regs; struct idma64 *idma64; diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 5f3c1378b90e..99d9f431ae2c 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -419,6 +419,7 @@ struct sdma_driver_data { int chnenbl0; int num_events; struct sdma_script_start_addrs *script_addrs; + bool check_ratio; }; struct sdma_engine { @@ -557,6 +558,13 @@ static struct sdma_driver_data sdma_imx7d = { .script_addrs = &sdma_script_imx7d, }; +static struct sdma_driver_data sdma_imx8mq = { + .chnenbl0 = SDMA_CHNENBL0_IMX35, + .num_events = 48, + .script_addrs = &sdma_script_imx7d, + .check_ratio = 1, +}; + static const struct platform_device_id sdma_devtypes[] = { { .name = "imx25-sdma", @@ -579,6 +587,9 @@ static const struct platform_device_id sdma_devtypes[] = { }, { .name = "imx7d-sdma", .driver_data = (unsigned long)&sdma_imx7d, + }, { + .name = "imx8mq-sdma", + .driver_data = (unsigned long)&sdma_imx8mq, }, { /* sentinel */ } @@ -593,6 +604,7 @@ static const struct of_device_id sdma_dt_ids[] = { { .compatible = "fsl,imx31-sdma", .data = &sdma_imx31, }, { .compatible = "fsl,imx25-sdma", .data = &sdma_imx25, }, { .compatible = "fsl,imx7d-sdma", .data = &sdma_imx7d, }, + { .compatible = "fsl,imx8mq-sdma", .data = &sdma_imx8mq, }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sdma_dt_ids); @@ -1852,7 +1864,8 @@ static int sdma_init(struct sdma_engine *sdma) if (ret) goto disable_clk_ipg; - if (clk_get_rate(sdma->clk_ahb) == clk_get_rate(sdma->clk_ipg)) + if (sdma->drvdata->check_ratio && + (clk_get_rate(sdma->clk_ahb) == clk_get_rate(sdma->clk_ipg))) sdma->clk_ratio = 1; /* Be sure SDMA has not started yet */ diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index 814853842e29..723b11c190b3 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -225,7 +225,7 @@ static int mtk_cqdma_hard_reset(struct mtk_cqdma_pchan *pc) mtk_dma_set(pc, MTK_CQDMA_RESET, MTK_CQDMA_HARD_RST_BIT); mtk_dma_clr(pc, MTK_CQDMA_RESET, MTK_CQDMA_HARD_RST_BIT); - return mtk_cqdma_poll_engine_done(pc, false); + return mtk_cqdma_poll_engine_done(pc, true); } static void mtk_cqdma_start(struct mtk_cqdma_pchan *pc, @@ -671,7 +671,7 @@ static void mtk_cqdma_free_chan_resources(struct dma_chan *c) mtk_dma_set(cvc->pc, MTK_CQDMA_FLUSH, MTK_CQDMA_FLUSH_BIT); /* wait for the completion of flush operation */ - if (mtk_cqdma_poll_engine_done(cvc->pc, false) < 0) + if (mtk_cqdma_poll_engine_done(cvc->pc, true) < 0) dev_err(cqdma2dev(to_cqdma_dev(c)), "cqdma flush timeout\n"); /* clear the flush bit and interrupt flag */ diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index eec79fdf27a5..56695ffb5d37 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -966,6 +966,7 @@ static void _stop(struct pl330_thread *thrd) { void __iomem *regs = thrd->dmac->base; u8 insn[6] = {0, 0, 0, 0, 0, 0}; + u32 inten = readl(regs + INTEN); if (_state(thrd) == PL330_STATE_FAULT_COMPLETING) UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING); @@ -978,10 +979,13 @@ static void _stop(struct pl330_thread *thrd) _emit_KILL(0, insn); - /* Stop generating interrupts for SEV */ - writel(readl(regs + INTEN) & ~(1 << thrd->ev), regs + INTEN); - _execute_DBGINSN(thrd, insn, is_manager(thrd)); + + /* clear the event */ + if (inten & (1 << thrd->ev)) + writel(1 << thrd->ev, regs + INTCLR); + /* Stop generating interrupts for SEV */ + writel(inten & ~(1 << thrd->ev), regs + INTEN); } /* Start doing req 'idx' of thread 'thrd' */ diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c index 48431e2da987..01abed5cde49 100644 --- a/drivers/dma/sprd-dma.c +++ b/drivers/dma/sprd-dma.c @@ -510,7 +510,9 @@ static void sprd_dma_start(struct sprd_dma_chn *schan) sprd_dma_set_uid(schan); sprd_dma_enable_chn(schan); - if (schan->dev_id == SPRD_DMA_SOFTWARE_UID) + if (schan->dev_id == SPRD_DMA_SOFTWARE_UID && + schan->chn_mode != SPRD_DMA_DST_CHN0 && + schan->chn_mode != SPRD_DMA_DST_CHN1) sprd_dma_soft_request(schan); } @@ -552,12 +554,17 @@ static irqreturn_t dma_irq_handle(int irq, void *dev_id) schan = &sdev->channels[i]; spin_lock(&schan->vc.lock); + + sdesc = schan->cur_desc; + if (!sdesc) { + spin_unlock(&schan->vc.lock); + return IRQ_HANDLED; + } + int_type = sprd_dma_get_int_type(schan); req_type = sprd_dma_get_req_type(schan); sprd_dma_clear_int(schan); - sdesc = schan->cur_desc; - /* cyclic mode schedule callback */ cyclic = schan->linklist.phy_addr ? true : false; if (cyclic == true) { @@ -625,7 +632,7 @@ static enum dma_status sprd_dma_tx_status(struct dma_chan *chan, else pos = 0; } else if (schan->cur_desc && schan->cur_desc->vd.tx.cookie == cookie) { - struct sprd_dma_desc *sdesc = to_sprd_dma_desc(vd); + struct sprd_dma_desc *sdesc = schan->cur_desc; if (sdesc->dir == DMA_DEV_TO_MEM) pos = sprd_dma_get_dst_addr(schan); @@ -771,7 +778,7 @@ static int sprd_dma_fill_desc(struct dma_chan *chan, temp |= slave_cfg->src_maxburst & SPRD_DMA_FRG_LEN_MASK; hw->frg_len = temp; - hw->blk_len = len & SPRD_DMA_BLK_LEN_MASK; + hw->blk_len = slave_cfg->src_maxburst & SPRD_DMA_BLK_LEN_MASK; hw->trsc_len = len & SPRD_DMA_TRSC_LEN_MASK; temp = (dst_step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_DEST_TRSF_STEP_OFFSET; @@ -904,6 +911,12 @@ sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, schan->linklist.virt_addr = 0; } + /* Set channel mode and trigger mode for 2-stage transfer */ + schan->chn_mode = + (flags >> SPRD_DMA_CHN_MODE_SHIFT) & SPRD_DMA_CHN_MODE_MASK; + schan->trg_mode = + (flags >> SPRD_DMA_TRG_MODE_SHIFT) & SPRD_DMA_TRG_MODE_MASK; + sdesc = kzalloc(sizeof(*sdesc), GFP_NOWAIT); if (!sdesc) return NULL; @@ -937,12 +950,6 @@ sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } } - /* Set channel mode and trigger mode for 2-stage transfer */ - schan->chn_mode = - (flags >> SPRD_DMA_CHN_MODE_SHIFT) & SPRD_DMA_CHN_MODE_MASK; - schan->trg_mode = - (flags >> SPRD_DMA_TRG_MODE_SHIFT) & SPRD_DMA_TRG_MODE_MASK; - ret = sprd_dma_fill_desc(chan, &sdesc->chn_hw, 0, 0, src, dst, len, dir, flags, slave_cfg); if (ret) { diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 5ec0dd97b397..1477cce33dbe 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -141,6 +140,7 @@ struct tegra_adma { struct dma_device dma_dev; struct device *dev; void __iomem *base_addr; + struct clk *ahub_clk; unsigned int nr_channels; unsigned long rx_requests_reserved; unsigned long tx_requests_reserved; @@ -637,8 +637,9 @@ static int tegra_adma_runtime_suspend(struct device *dev) struct tegra_adma *tdma = dev_get_drvdata(dev); tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD); + clk_disable_unprepare(tdma->ahub_clk); - return pm_clk_suspend(dev); + return 0; } static int tegra_adma_runtime_resume(struct device *dev) @@ -646,10 +647,11 @@ static int tegra_adma_runtime_resume(struct device *dev) struct tegra_adma *tdma = dev_get_drvdata(dev); int ret; - ret = pm_clk_resume(dev); - if (ret) + ret = clk_prepare_enable(tdma->ahub_clk); + if (ret) { + dev_err(dev, "ahub clk_enable failed: %d\n", ret); return ret; - + } tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd); return 0; @@ -693,13 +695,11 @@ static int tegra_adma_probe(struct platform_device *pdev) if (IS_ERR(tdma->base_addr)) return PTR_ERR(tdma->base_addr); - ret = pm_clk_create(&pdev->dev); - if (ret) - return ret; - - ret = of_pm_clk_add_clk(&pdev->dev, "d_audio"); - if (ret) - goto clk_destroy; + tdma->ahub_clk = devm_clk_get(&pdev->dev, "d_audio"); + if (IS_ERR(tdma->ahub_clk)) { + dev_err(&pdev->dev, "Error: Missing ahub controller clock\n"); + return PTR_ERR(tdma->ahub_clk); + } pm_runtime_enable(&pdev->dev); @@ -776,8 +776,6 @@ static int tegra_adma_probe(struct platform_device *pdev) pm_runtime_put_sync(&pdev->dev); rpm_disable: pm_runtime_disable(&pdev->dev); -clk_destroy: - pm_clk_destroy(&pdev->dev); return ret; } @@ -787,6 +785,7 @@ static int tegra_adma_remove(struct platform_device *pdev) struct tegra_adma *tdma = platform_get_drvdata(pdev); int i; + of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&tdma->dma_dev); for (i = 0; i < tdma->nr_channels; ++i) @@ -794,7 +793,6 @@ static int tegra_adma_remove(struct platform_device *pdev) pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - pm_clk_destroy(&pdev->dev); return 0; } diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 47eb4d13ed5f..5e2e0348d460 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -263,8 +263,8 @@ config EDAC_PND2 micro-server but may appear on others in the future. config EDAC_MPC85XX - tristate "Freescale MPC83xx / MPC85xx" - depends on FSL_SOC + bool "Freescale MPC83xx / MPC85xx" + depends on FSL_SOC && EDAC=y help Support for error detection and correction on the Freescale MPC8349, MPC8560, MPC8540, MPC8548, T4240 diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 0a1814dad6cf..bb0202ad7a13 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1004,7 +1004,7 @@ static inline void amd_decode_err_code(u16 ec) /* * Filter out unwanted MCE signatures here. */ -static bool amd_filter_mce(struct mce *m) +static bool ignore_mce(struct mce *m) { /* * NB GART TLB error reporting is disabled by default. @@ -1038,7 +1038,7 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) unsigned int fam = x86_family(m->cpuid); int ecc; - if (amd_filter_mce(m)) + if (ignore_mce(m)) return NOTIFY_STOP; pr_emerg(HW_ERR "%s\n", decode_error_status(m)); diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig index 540e8cd16ee6..db3bcf96b98f 100644 --- a/drivers/extcon/Kconfig +++ b/drivers/extcon/Kconfig @@ -30,7 +30,7 @@ config EXTCON_ARIZONA config EXTCON_AXP288 tristate "X-Power AXP288 EXTCON support" - depends on MFD_AXP20X && USB_SUPPORT && X86 + depends on MFD_AXP20X && USB_SUPPORT && X86 && ACPI select USB_ROLE_SWITCH help Say Y here to enable support for USB peripheral detection diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c index da0e9bc4262f..9327479c719c 100644 --- a/drivers/extcon/extcon-arizona.c +++ b/drivers/extcon/extcon-arizona.c @@ -1726,6 +1726,16 @@ static int arizona_extcon_remove(struct platform_device *pdev) struct arizona_extcon_info *info = platform_get_drvdata(pdev); struct arizona *arizona = info->arizona; int jack_irq_rise, jack_irq_fall; + bool change; + + regmap_update_bits_check(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_ENA, 0, + &change); + + if (change) { + regulator_disable(info->micvdd); + pm_runtime_put(info->dev); + } gpiod_put(info->micd_pol_gpio); diff --git a/drivers/fpga/dfl-afu-dma-region.c b/drivers/fpga/dfl-afu-dma-region.c index e18a786fc943..cd68002ac097 100644 --- a/drivers/fpga/dfl-afu-dma-region.c +++ b/drivers/fpga/dfl-afu-dma-region.c @@ -399,7 +399,7 @@ int afu_dma_map_region(struct dfl_feature_platform_data *pdata, region->pages[0], 0, region->length, DMA_BIDIRECTIONAL); - if (dma_mapping_error(&pdata->dev->dev, region->iova)) { + if (dma_mapping_error(dfl_fpga_pdata_to_parent(pdata), region->iova)) { dev_err(&pdata->dev->dev, "failed to map for dma\n"); ret = -EFAULT; goto unpin_pages; diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c index 2c09e502e721..c25217cde5ca 100644 --- a/drivers/fpga/dfl.c +++ b/drivers/fpga/dfl.c @@ -40,6 +40,13 @@ enum dfl_fpga_devt_type { DFL_FPGA_DEVT_MAX, }; +static struct lock_class_key dfl_pdata_keys[DFL_ID_MAX]; + +static const char *dfl_pdata_key_strings[DFL_ID_MAX] = { + "dfl-fme-pdata", + "dfl-port-pdata", +}; + /** * dfl_dev_info - dfl feature device information. * @name: name string of the feature platform device. @@ -443,11 +450,16 @@ static int build_info_commit_dev(struct build_feature_devs_info *binfo) struct platform_device *fdev = binfo->feature_dev; struct dfl_feature_platform_data *pdata; struct dfl_feature_info *finfo, *p; + enum dfl_id_type type; int ret, index = 0; if (!fdev) return 0; + type = feature_dev_id_type(fdev); + if (WARN_ON_ONCE(type >= DFL_ID_MAX)) + return -EINVAL; + /* * we do not need to care for the memory which is associated with * the platform device. After calling platform_device_unregister(), @@ -463,6 +475,8 @@ static int build_info_commit_dev(struct build_feature_devs_info *binfo) pdata->num = binfo->feature_num; pdata->dfl_cdev = binfo->cdev; mutex_init(&pdata->lock); + lockdep_set_class_and_name(&pdata->lock, &dfl_pdata_keys[type], + dfl_pdata_key_strings[type]); /* * the count should be initialized to 0 to make sure @@ -497,7 +511,7 @@ static int build_info_commit_dev(struct build_feature_devs_info *binfo) ret = platform_device_add(binfo->feature_dev); if (!ret) { - if (feature_dev_id_type(binfo->feature_dev) == PORT_ID) + if (type == PORT_ID) dfl_fpga_cdev_add_port_dev(binfo->cdev, binfo->feature_dev); else diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c index 13851b3d1c56..215d33789c74 100644 --- a/drivers/fpga/stratix10-soc.c +++ b/drivers/fpga/stratix10-soc.c @@ -507,12 +507,16 @@ static int __init s10_init(void) if (!fw_np) return -ENODEV; + of_node_get(fw_np); np = of_find_matching_node(fw_np, s10_of_match); - if (!np) + if (!np) { + of_node_put(fw_np); return -ENODEV; + } of_node_put(np); ret = of_platform_populate(fw_np, s10_of_match, NULL, NULL); + of_node_put(fw_np); if (ret) return ret; diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 3f50526a771f..864a1ba7aa3a 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -824,6 +824,7 @@ config GPIO_ADP5588 config GPIO_ADP5588_IRQ bool "Interrupt controller support for ADP5588" depends on GPIO_ADP5588=y + select GPIOLIB_IRQCHIP help Say yes here to enable the adp5588 to be used as an interrupt controller. It requires the driver to be built in the kernel. diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 7f33024b6d83..fafd79438bbf 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -353,6 +353,22 @@ static void omap_clear_gpio_debounce(struct gpio_bank *bank, unsigned offset) } } +/* + * Off mode wake-up capable GPIOs in bank(s) that are in the wakeup domain. + * See TRM section for GPIO for "Wake-Up Generation" for the list of GPIOs + * in wakeup domain. If bank->non_wakeup_gpios is not configured, assume none + * are capable waking up the system from off mode. + */ +static bool omap_gpio_is_off_wakeup_capable(struct gpio_bank *bank, u32 gpio_mask) +{ + u32 no_wake = bank->non_wakeup_gpios; + + if (no_wake) + return !!(~no_wake & gpio_mask); + + return false; +} + static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio, unsigned trigger) { @@ -384,13 +400,7 @@ static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio, } /* This part needs to be executed always for OMAP{34xx, 44xx} */ - if (!bank->regs->irqctrl) { - /* On omap24xx proceed only when valid GPIO bit is set */ - if (bank->non_wakeup_gpios) { - if (!(bank->non_wakeup_gpios & gpio_bit)) - goto exit; - } - + if (!bank->regs->irqctrl && !omap_gpio_is_off_wakeup_capable(bank, gpio)) { /* * Log the edge gpio and manually trigger the IRQ * after resume if the input level changes @@ -403,7 +413,6 @@ static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio, bank->enabled_non_wakeup_gpios &= ~gpio_bit; } -exit: bank->level_mask = readl_relaxed(bank->base + bank->regs->leveldetect0) | readl_relaxed(bank->base + bank->regs->leveldetect1); @@ -1444,7 +1453,10 @@ static void omap_gpio_restore_context(struct gpio_bank *bank); static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context) { struct device *dev = bank->chip.parent; - u32 l1 = 0, l2 = 0; + void __iomem *base = bank->base; + u32 nowake; + + bank->saved_datain = readl_relaxed(base + bank->regs->datain); if (bank->funcs.idle_enable_level_quirk) bank->funcs.idle_enable_level_quirk(bank); @@ -1456,20 +1468,15 @@ static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context) goto update_gpio_context_count; /* - * If going to OFF, remove triggering for all + * If going to OFF, remove triggering for all wkup domain * non-wakeup GPIOs. Otherwise spurious IRQs will be * generated. See OMAP2420 Errata item 1.101. */ - bank->saved_datain = readl_relaxed(bank->base + - bank->regs->datain); - l1 = bank->context.fallingdetect; - l2 = bank->context.risingdetect; - - l1 &= ~bank->enabled_non_wakeup_gpios; - l2 &= ~bank->enabled_non_wakeup_gpios; - - writel_relaxed(l1, bank->base + bank->regs->fallingdetect); - writel_relaxed(l2, bank->base + bank->regs->risingdetect); + if (!bank->loses_context && bank->enabled_non_wakeup_gpios) { + nowake = bank->enabled_non_wakeup_gpios; + omap_gpio_rmw(base, bank->regs->fallingdetect, nowake, ~nowake); + omap_gpio_rmw(base, bank->regs->risingdetect, nowake, ~nowake); + } bank->workaround_enabled = true; @@ -1518,6 +1525,12 @@ static void omap_gpio_unidle(struct gpio_bank *bank) return; } } + } else { + /* Restore changes done for OMAP2420 errata 1.101 */ + writel_relaxed(bank->context.fallingdetect, + bank->base + bank->regs->fallingdetect); + writel_relaxed(bank->context.risingdetect, + bank->base + bank->regs->risingdetect); } if (!bank->workaround_enabled) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 541fa6ac399d..7e9451f47efe 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -29,6 +29,7 @@ struct fsl_gpio_soc_data { struct vf610_gpio_port { struct gpio_chip gc; + struct irq_chip ic; void __iomem *base; void __iomem *gpio_base; const struct fsl_gpio_soc_data *sdata; @@ -60,8 +61,6 @@ struct vf610_gpio_port { #define PORT_INT_EITHER_EDGE 0xb #define PORT_INT_LOGIC_ONE 0xc -static struct irq_chip vf610_gpio_irq_chip; - static const struct fsl_gpio_soc_data imx_data = { .have_paddr = true, }; @@ -237,15 +236,6 @@ static int vf610_gpio_irq_set_wake(struct irq_data *d, u32 enable) return 0; } -static struct irq_chip vf610_gpio_irq_chip = { - .name = "gpio-vf610", - .irq_ack = vf610_gpio_irq_ack, - .irq_mask = vf610_gpio_irq_mask, - .irq_unmask = vf610_gpio_irq_unmask, - .irq_set_type = vf610_gpio_irq_set_type, - .irq_set_wake = vf610_gpio_irq_set_wake, -}; - static int vf610_gpio_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -253,6 +243,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) struct vf610_gpio_port *port; struct resource *iores; struct gpio_chip *gc; + struct irq_chip *ic; int i; int ret; @@ -316,6 +307,14 @@ static int vf610_gpio_probe(struct platform_device *pdev) gc->direction_output = vf610_gpio_direction_output; gc->set = vf610_gpio_set; + ic = &port->ic; + ic->name = "gpio-vf610"; + ic->irq_ack = vf610_gpio_irq_ack; + ic->irq_mask = vf610_gpio_irq_mask; + ic->irq_unmask = vf610_gpio_irq_unmask; + ic->irq_set_type = vf610_gpio_irq_set_type; + ic->irq_set_wake = vf610_gpio_irq_set_wake; + ret = gpiochip_add_data(gc, port); if (ret < 0) return ret; @@ -327,14 +326,13 @@ static int vf610_gpio_probe(struct platform_device *pdev) /* Clear the interrupt status register for all GPIO's */ vf610_gpio_writel(~0, port->base + PORT_ISFR); - ret = gpiochip_irqchip_add(gc, &vf610_gpio_irq_chip, 0, - handle_edge_irq, IRQ_TYPE_NONE); + ret = gpiochip_irqchip_add(gc, ic, 0, handle_edge_irq, IRQ_TYPE_NONE); if (ret) { dev_err(dev, "failed to add irqchip\n"); gpiochip_remove(gc); return ret; } - gpiochip_set_chained_irqchip(gc, &vf610_gpio_irq_chip, port->irq, + gpiochip_set_chained_irqchip(gc, ic, port->irq, vf610_gpio_irq_handler); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 466da5954a68..62bf9da25e4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -23,7 +23,7 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -FULL_AMD_PATH=$(src)/.. +FULL_AMD_PATH=$(srctree)/$(src)/.. DISPLAY_FOLDER_NAME=display FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 4376b17ca594..56f8ca2a3bb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -464,8 +464,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, } } if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { - if ((adev->flags & AMD_IS_PX) && - amdgpu_atpx_dgpu_req_power_for_displays()) { + if (adev->flags & AMD_IS_PX) { pm_runtime_get_sync(adev->ddev->dev); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(adev->ddev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index ee47c11e92ce..4dee2326b29c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -136,8 +136,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; - struct dma_fence *old, **ptr; + struct dma_fence __rcu **ptr; uint32_t seq; + int r; fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); if (fence == NULL) @@ -153,15 +154,24 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, seq, flags | AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; + if (unlikely(rcu_dereference_protected(*ptr, 1))) { + struct dma_fence *old; + + rcu_read_lock(); + old = dma_fence_get_rcu_safe(ptr); + rcu_read_unlock(); + + if (old) { + r = dma_fence_wait(old, false); + dma_fence_put(old); + if (r) + return r; + } + } + /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ - old = rcu_dereference_protected(*ptr, 1); - if (old && !dma_fence_is_signaled(old)) { - DRM_INFO("rcu slot is busy\n"); - dma_fence_wait(old, false); - } - rcu_assign_pointer(*ptr, dma_fence_get(&fence->base)); *f = &fence->base; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3091488cd8cc..e0877fd0c051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -38,18 +38,10 @@ static void psp_set_funcs(struct amdgpu_device *adev); static int psp_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; psp_set_funcs(adev); - return 0; -} - -static int psp_sw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct psp_context *psp = &adev->psp; - int ret; - switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: @@ -67,6 +59,15 @@ static int psp_sw_init(void *handle) psp->adev = adev; + return 0; +} + +static int psp_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; + int ret; + ret = psp_init_microcode(psp); if (ret) { DRM_ERROR("Failed to load psp firmware!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index c021b114c8a4..f7189e22f6b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -1072,7 +1072,7 @@ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); + uint32_t rptr; unsigned i; int r, timeout = adev->usec_timeout; @@ -1084,6 +1084,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) if (r) return r; + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, VCE_CMD_END); amdgpu_ring_commit(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ecf6f96df2ad..e6b07ece3910 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -594,7 +594,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); + uint32_t rptr; unsigned i; int r; @@ -602,6 +602,8 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) if (r) return r; + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, VCN_ENC_CMD_END); amdgpu_ring_commit(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2fe8397241ea..1611bef19a2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -715,6 +715,7 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) case CHIP_VEGA10: return true; case CHIP_RAVEN: + return (adev->pdev->device == 0x15d8); case CHIP_VEGA12: case CHIP_VEGA20: default: diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ed89a101f73f..3f38fae08ff8 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -713,6 +713,11 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) { u32 sol_reg; + /* Just return false for soc15 GPUs. Reset does not seem to + * be necessary. + */ + return false; + if (adev->flags & AMD_IS_APU) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index c9edddf9f88a..be70e6e5f9df 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -170,13 +170,16 @@ static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring) static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); + uint32_t rptr; unsigned i; int r; r = amdgpu_ring_alloc(ring, 16); if (r) return r; + + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, HEVC_ENC_CMD_END); amdgpu_ring_commit(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index dc461df48da0..9682f39d57c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -175,7 +175,7 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); + uint32_t rptr; unsigned i; int r; @@ -185,6 +185,9 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 16); if (r) return r; + + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, HEVC_ENC_CMD_END); amdgpu_ring_commit(ring); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3082b55b1e77..bfb65e8c728f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3587,6 +3587,8 @@ static void dm_drm_plane_reset(struct drm_plane *plane) plane->state = &amdgpu_state->base; plane->state->plane = plane; plane->state->rotation = DRM_MODE_ROTATE_0; + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; + plane->state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI; } } @@ -3787,8 +3789,7 @@ static void dm_plane_atomic_async_update(struct drm_plane *plane, struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(new_state->state, plane); - if (plane->state->fb != new_state->fb) - drm_atomic_set_fb_for_plane(plane->state, new_state->fb); + swap(plane->state->fb, new_state->fb); plane->state->src_x = new_state->src_x; plane->state->src_y = new_state->src_y; @@ -4953,8 +4954,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_state, struct dc_stream_state *stream_state) { - stream_state->mode_changed = - crtc_state->mode_changed || crtc_state->active_changed; + stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state); } static int amdgpu_dm_atomic_commit(struct drm_device *dev, @@ -5661,6 +5661,9 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, update_stream_scaling_settings( &new_crtc_state->mode, dm_new_conn_state, dm_new_crtc_state->stream); + /* ABM settings */ + dm_new_crtc_state->abm_level = dm_new_conn_state->abm_level; + /* * Color management settings. We also update color properties * when a modeset is needed, to ensure it gets reprogrammed. @@ -5858,7 +5861,9 @@ dm_determine_update_type_for_commit(struct dc *dc, } for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - struct dc_stream_update stream_update = { 0 }; + struct dc_stream_update stream_update; + + memset(&stream_update, 0, sizeof(stream_update)); new_dm_crtc_state = to_dm_crtc_state(new_crtc_state); old_dm_crtc_state = to_dm_crtc_state(old_crtc_state); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index a6cda201c964..88fe4fb43bfd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -524,6 +524,14 @@ void dc_link_set_preferred_link_settings(struct dc *dc, struct dc_stream_state *link_stream; struct dc_link_settings store_settings = *link_setting; + link->preferred_link_setting = store_settings; + + /* Retrain with preferred link settings only relevant for + * DP signal type + */ + if (!dc_is_dp_signal(link->connector_signal)) + return; + for (i = 0; i < MAX_PIPES; i++) { pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->stream->link) { @@ -538,7 +546,10 @@ void dc_link_set_preferred_link_settings(struct dc *dc, link_stream = link->dc->current_state->res_ctx.pipe_ctx[i].stream; - link->preferred_link_setting = store_settings; + /* Cannot retrain link if backend is off */ + if (link_stream->dpms_off) + return; + if (link_stream) decide_link_settings(link_stream, &store_settings); @@ -1666,6 +1677,7 @@ static void commit_planes_do_stream_update(struct dc *dc, continue; if (stream_update->dpms_off) { + dc->hwss.pipe_control_lock(dc, pipe_ctx, true); if (*stream_update->dpms_off) { core_link_disable_stream(pipe_ctx, KEEP_ACQUIRED_RESOURCE); dc->hwss.optimize_bandwidth(dc, dc->current_state); @@ -1673,6 +1685,7 @@ static void commit_planes_do_stream_update(struct dc *dc, dc->hwss.prepare_bandwidth(dc, dc->current_state); core_link_enable_stream(dc->current_state, pipe_ctx); } + dc->hwss.pipe_control_lock(dc, pipe_ctx, false); } if (stream_update->abm_level && pipe_ctx->stream_res.abm) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index ea18e9c2d8ce..6072636da388 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1399,6 +1399,15 @@ static enum dc_status enable_link_dp( /* get link settings for video mode timing */ decide_link_settings(stream, &link_settings); + /* If link settings are different than current and link already enabled + * then need to disable before programming to new rate. + */ + if (link->link_status.link_active && + (link->cur_link_settings.lane_count != link_settings.lane_count || + link->cur_link_settings.link_rate != link_settings.link_rate)) { + dp_disable_link_phy(link, pipe_ctx->stream->signal); + } + pipe_ctx->stream_res.pix_clk_params.requested_sym_clk = link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ; state->dccg->funcs->update_clocks(state->dccg, state, false); @@ -2074,11 +2083,28 @@ static void disable_link(struct dc_link *link, enum signal_type signal) } } +static uint32_t get_timing_pixel_clock_100hz(const struct dc_crtc_timing *timing) +{ + + uint32_t pxl_clk = timing->pix_clk_100hz; + + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + pxl_clk /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + pxl_clk = pxl_clk * 2 / 3; + + if (timing->display_color_depth == COLOR_DEPTH_101010) + pxl_clk = pxl_clk * 10 / 8; + else if (timing->display_color_depth == COLOR_DEPTH_121212) + pxl_clk = pxl_clk * 12 / 8; + + return pxl_clk; +} + static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, const struct dpcd_caps *dpcd_caps) { - unsigned int required_pix_clk_100hz = timing->pix_clk_100hz; const struct dc_dongle_caps *dongle_caps = &dpcd_caps->dongle_caps; switch (dpcd_caps->dongle_type) { @@ -2115,13 +2141,6 @@ static bool dp_active_dongle_validate_timing( return false; } - - /* Check Color Depth and Pixel Clock */ - if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - required_pix_clk_100hz /= 2; - else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) - required_pix_clk_100hz = required_pix_clk_100hz * 2 / 3; - switch (timing->display_color_depth) { case COLOR_DEPTH_666: case COLOR_DEPTH_888: @@ -2130,14 +2149,11 @@ static bool dp_active_dongle_validate_timing( case COLOR_DEPTH_101010: if (dongle_caps->dp_hdmi_max_bpc < 10) return false; - required_pix_clk_100hz = required_pix_clk_100hz * 10 / 8; break; case COLOR_DEPTH_121212: if (dongle_caps->dp_hdmi_max_bpc < 12) return false; - required_pix_clk_100hz = required_pix_clk_100hz * 12 / 8; break; - case COLOR_DEPTH_141414: case COLOR_DEPTH_161616: default: @@ -2145,7 +2161,7 @@ static bool dp_active_dongle_validate_timing( return false; } - if (required_pix_clk_100hz > (dongle_caps->dp_hdmi_max_pixel_clk * 10)) + if (get_timing_pixel_clock_100hz(timing) > (dongle_caps->dp_hdmi_max_pixel_clk * 10)) return false; return true; @@ -2166,7 +2182,7 @@ enum dc_status dc_link_validate_mode_timing( return DC_OK; /* Passive Dongle */ - if (0 != max_pix_clk && timing->pix_clk_100hz > max_pix_clk) + if (max_pix_clk != 0 && get_timing_pixel_clock_100hz(timing) > max_pix_clk) return DC_EXCEED_DONGLE_CAP; /* Active Dongle*/ @@ -2316,7 +2332,7 @@ static struct fixed31_32 get_pbn_from_timing(struct pipe_ctx *pipe_ctx) uint32_t denominator; bpc = get_color_depth(pipe_ctx->stream_res.pix_clk_params.color_depth); - kbps = pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz / 10 * bpc * 3; + kbps = dc_bandwidth_in_kbps_from_timing(&pipe_ctx->stream->timing); /* * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 @@ -2736,3 +2752,49 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable) } } +uint32_t dc_bandwidth_in_kbps_from_timing( + const struct dc_crtc_timing *timing) +{ + uint32_t bits_per_channel = 0; + uint32_t kbps; + + switch (timing->display_color_depth) { + case COLOR_DEPTH_666: + bits_per_channel = 6; + break; + case COLOR_DEPTH_888: + bits_per_channel = 8; + break; + case COLOR_DEPTH_101010: + bits_per_channel = 10; + break; + case COLOR_DEPTH_121212: + bits_per_channel = 12; + break; + case COLOR_DEPTH_141414: + bits_per_channel = 14; + break; + case COLOR_DEPTH_161616: + bits_per_channel = 16; + break; + default: + break; + } + + ASSERT(bits_per_channel != 0); + + kbps = timing->pix_clk_100hz / 10; + kbps *= bits_per_channel; + + if (timing->flags.Y_ONLY != 1) { + /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/ + kbps *= 3; + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + kbps /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + kbps = kbps * 2 / 3; + } + + return kbps; + +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 09d301216076..6809932e80be 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1520,53 +1520,6 @@ static bool decide_fallback_link_setting( return true; } -static uint32_t bandwidth_in_kbps_from_timing( - const struct dc_crtc_timing *timing) -{ - uint32_t bits_per_channel = 0; - uint32_t kbps; - - switch (timing->display_color_depth) { - case COLOR_DEPTH_666: - bits_per_channel = 6; - break; - case COLOR_DEPTH_888: - bits_per_channel = 8; - break; - case COLOR_DEPTH_101010: - bits_per_channel = 10; - break; - case COLOR_DEPTH_121212: - bits_per_channel = 12; - break; - case COLOR_DEPTH_141414: - bits_per_channel = 14; - break; - case COLOR_DEPTH_161616: - bits_per_channel = 16; - break; - default: - break; - } - - ASSERT(bits_per_channel != 0); - - kbps = timing->pix_clk_100hz / 10; - kbps *= bits_per_channel; - - if (timing->flags.Y_ONLY != 1) { - /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/ - kbps *= 3; - if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - kbps /= 2; - else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) - kbps = kbps * 2 / 3; - } - - return kbps; - -} - static uint32_t bandwidth_in_kbps_from_link_settings( const struct dc_link_settings *link_setting) { @@ -1607,7 +1560,7 @@ bool dp_validate_mode_timing( link_setting = &link->verified_link_cap; */ - req_bw = bandwidth_in_kbps_from_timing(timing); + req_bw = dc_bandwidth_in_kbps_from_timing(timing); max_bw = bandwidth_in_kbps_from_link_settings(link_setting); if (req_bw <= max_bw) { @@ -1641,7 +1594,7 @@ void decide_link_settings(struct dc_stream_state *stream, uint32_t req_bw; uint32_t link_bw; - req_bw = bandwidth_in_kbps_from_timing(&stream->timing); + req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing); link = stream->link; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 349ab8017776..4c06eb52ab73 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1266,10 +1266,12 @@ bool dc_remove_plane_from_context( * For head pipe detach surfaces from pipe for tail * pipe just zero it out */ - if (!pipe_ctx->top_pipe) { + if (!pipe_ctx->top_pipe || + (!pipe_ctx->top_pipe->top_pipe && + pipe_ctx->top_pipe->stream_res.opp != pipe_ctx->stream_res.opp)) { pipe_ctx->plane_state = NULL; pipe_ctx->bottom_pipe = NULL; - } else { + } else { memset(pipe_ctx, 0, sizeof(*pipe_ctx)); } } diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 8fc223defed4..a83e1c60f9db 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -252,4 +252,6 @@ bool dc_submit_i2c( uint32_t link_index, struct i2c_command *cmd); +uint32_t dc_bandwidth_in_kbps_from_timing( + const struct dc_crtc_timing *timing); #endif /* DC_LINK_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 4fe3664fb495..5ecfcb9ee8a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -377,7 +377,6 @@ static bool acquire( struct dce_aux *engine, struct ddc *ddc) { - enum gpio_result result; if (!is_engine_available(engine)) @@ -458,7 +457,8 @@ int dce_aux_transfer(struct ddc_service *ddc, memset(&aux_rep, 0, sizeof(aux_rep)); aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]; - acquire(aux_engine, ddc_pin); + if (!acquire(aux_engine, ddc_pin)) + return -1; if (payload->i2c_over_aux) aux_req.type = AUX_TRANSACTION_TYPE_I2C; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index cd1ebe57ed59..1951f9276e41 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -392,6 +392,10 @@ void dpp1_cnv_setup ( default: break; } + + /* Set default color space based on format if none is given. */ + color_space = input_color_space ? input_color_space : color_space; + REG_SET(CNVC_SURFACE_PIXEL_FORMAT, 0, CNVC_SURFACE_PIXEL_FORMAT, pixel_format); REG_UPDATE(FORMAT_CONTROL, FORMAT_CONTROL__ALPHA_EN, alpha_en); @@ -403,7 +407,7 @@ void dpp1_cnv_setup ( for (i = 0; i < 12; i++) tbl_entry.regval[i] = input_csc_color_matrix.matrix[i]; - tbl_entry.color_space = input_color_space; + tbl_entry.color_space = color_space; if (color_space >= COLOR_SPACE_YCBCR601) select = INPUT_CSC_SELECT_ICSC; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index c7642e748297..ce21a290bf3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -406,15 +406,25 @@ void dpp1_dscl_calc_lb_num_partitions( int *num_part_y, int *num_part_c) { + int lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a, + lb_bpc, memory_line_size_y, memory_line_size_c, memory_line_size_a; + int line_size = scl_data->viewport.width < scl_data->recout.width ? scl_data->viewport.width : scl_data->recout.width; int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ? scl_data->viewport_c.width : scl_data->recout.width; - int lb_bpc = dpp1_dscl_get_lb_depth_bpc(scl_data->lb_params.depth); - int memory_line_size_y = (line_size * lb_bpc + 71) / 72; /* +71 to ceil */ - int memory_line_size_c = (line_size_c * lb_bpc + 71) / 72; /* +71 to ceil */ - int memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */ - int lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a; + + if (line_size == 0) + line_size = 1; + + if (line_size_c == 0) + line_size_c = 1; + + + lb_bpc = dpp1_dscl_get_lb_depth_bpc(scl_data->lb_params.depth); + memory_line_size_y = (line_size * lb_bpc + 71) / 72; /* +71 to ceil */ + memory_line_size_c = (line_size_c * lb_bpc + 71) / 72; /* +71 to ceil */ + memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */ if (lb_config == LB_MEMORY_CONFIG_1) { lb_memory_size = 816; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index d1a8f1c302a9..1fac86d3032d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1008,9 +1008,14 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context) * to non-preferred front end. If pipe_ctx->stream is not NULL, * we will use the pipe, so don't disable */ - if (pipe_ctx->stream != NULL) + if (pipe_ctx->stream != NULL && + pipe_ctx->stream_res.tg->funcs->is_tg_enabled( + pipe_ctx->stream_res.tg)) continue; + /* Disable on the current state so the new one isn't cleared. */ + pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; @@ -1931,7 +1936,7 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state) plane_state->format, EXPANSION_MODE_ZERO, plane_state->input_csc_color_matrix, - COLOR_SPACE_YCBCR601_LIMITED); + plane_state->color_space); //set scale and bias registers build_prescale_params(&bns_params, plane_state); @@ -2692,9 +2697,15 @@ static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) .rotation = pipe_ctx->plane_state->rotation, .mirror = pipe_ctx->plane_state->horizontal_mirror }; - - pos_cpy.x_hotspot += pipe_ctx->plane_state->dst_rect.x; - pos_cpy.y_hotspot += pipe_ctx->plane_state->dst_rect.y; + uint32_t x_plane = pipe_ctx->plane_state->dst_rect.x; + uint32_t y_plane = pipe_ctx->plane_state->dst_rect.y; + uint32_t x_offset = min(x_plane, pos_cpy.x); + uint32_t y_offset = min(y_plane, pos_cpy.y); + + pos_cpy.x -= x_offset; + pos_cpy.y -= y_offset; + pos_cpy.x_hotspot += (x_plane - x_offset); + pos_cpy.y_hotspot += (y_plane - y_offset); if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 34d6fdcb32e2..4c8ce7938f01 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -138,13 +138,14 @@ #endif #define RAVEN_UNKNOWN 0xFF -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) -#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) -#endif /* DCN1_01 */ #define ASIC_REV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN) #define RAVEN1_F0 0xF0 #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) +#if defined(CONFIG_DRM_AMD_DC_DCN1_01) +#define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0)) +#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) +#endif /* DCN1_01 */ #define FAMILY_RV 142 /* DCN 1*/ diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 0fbc8fbc3541..a1055413bade 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1854,6 +1854,8 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, coordinates_x, axis_x, curve, MAX_HW_POINTS, tf_pts, mapUserRamp && ramp && ramp->type == GAMMA_RGB_256); + if (ramp->type == GAMMA_CUSTOM) + apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts); ret = true; diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile b/drivers/gpu/drm/arm/display/komeda/Makefile index 1b875e5dc0f6..a72e30c0e03d 100644 --- a/drivers/gpu/drm/arm/display/komeda/Makefile +++ b/drivers/gpu/drm/arm/display/komeda/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y := \ - -I$(src)/../include \ - -I$(src) + -I $(srctree)/$(src)/../include \ + -I $(srctree)/$(src) komeda-y := \ komeda_drv.o \ diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index 0b2b62f8fa3c..a3efa28436ea 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -186,20 +186,20 @@ static void hdlcd_crtc_atomic_disable(struct drm_crtc *crtc, clk_disable_unprepare(hdlcd->clk); } -static int hdlcd_crtc_atomic_check(struct drm_crtc *crtc, - struct drm_crtc_state *state) +static enum drm_mode_status hdlcd_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) { struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc); - struct drm_display_mode *mode = &state->adjusted_mode; long rate, clk_rate = mode->clock * 1000; rate = clk_round_rate(hdlcd->clk, clk_rate); - if (rate != clk_rate) { + /* 0.1% seems a close enough tolerance for the TDA19988 on Juno */ + if (abs(rate - clk_rate) * 1000 > clk_rate) { /* clock required by mode not supported by hardware */ - return -EINVAL; + return MODE_NOCLOCK; } - return 0; + return MODE_OK; } static void hdlcd_crtc_atomic_begin(struct drm_crtc *crtc, @@ -220,7 +220,7 @@ static void hdlcd_crtc_atomic_begin(struct drm_crtc *crtc, } static const struct drm_crtc_helper_funcs hdlcd_crtc_helper_funcs = { - .atomic_check = hdlcd_crtc_atomic_check, + .mode_valid = hdlcd_crtc_mode_valid, .atomic_begin = hdlcd_crtc_atomic_begin, .atomic_enable = hdlcd_crtc_atomic_enable, .atomic_disable = hdlcd_crtc_atomic_disable, diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index ab50ad06e271..64da56f4b0cf 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -192,6 +192,7 @@ static void malidp_atomic_commit_hw_done(struct drm_atomic_state *state) { struct drm_device *drm = state->dev; struct malidp_drm *malidp = drm->dev_private; + int loop = 5; malidp->event = malidp->crtc.state->event; malidp->crtc.state->event = NULL; @@ -206,8 +207,18 @@ static void malidp_atomic_commit_hw_done(struct drm_atomic_state *state) drm_crtc_vblank_get(&malidp->crtc); /* only set config_valid if the CRTC is enabled */ - if (malidp_set_and_wait_config_valid(drm) < 0) + if (malidp_set_and_wait_config_valid(drm) < 0) { + /* + * make a loop around the second CVAL setting and + * try 5 times before giving up. + */ + while (loop--) { + if (!malidp_set_and_wait_config_valid(drm)) + break; + } DRM_DEBUG_DRIVER("timed out waiting for updated configuration\n"); + } + } else if (malidp->event) { /* CRTC inactive means vblank IRQ is disabled, send event directly */ spin_lock_irq(&drm->event_lock); diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index ec2ca71e1323..c532e9c9e491 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -748,11 +748,11 @@ static void adv7511_mode_set(struct adv7511 *adv7511, vsync_polarity = 1; } - if (mode->vrefresh <= 24000) + if (drm_mode_vrefresh(mode) <= 24) low_refresh_rate = ADV7511_LOW_REFRESH_RATE_24HZ; - else if (mode->vrefresh <= 25000) + else if (drm_mode_vrefresh(mode) <= 25) low_refresh_rate = ADV7511_LOW_REFRESH_RATE_25HZ; - else if (mode->vrefresh <= 30000) + else if (drm_mode_vrefresh(mode) <= 30) low_refresh_rate = ADV7511_LOW_REFRESH_RATE_30HZ; else low_refresh_rate = ADV7511_LOW_REFRESH_RATE_NONE; diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index fbb76332cc9f..9a80ed005d1f 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1607,15 +1607,6 @@ int drm_atomic_helper_async_check(struct drm_device *dev, old_plane_state->crtc != new_plane_state->crtc) return -EINVAL; - /* - * FIXME: Since prepare_fb and cleanup_fb are always called on - * the new_plane_state for async updates we need to block framebuffer - * changes. This prevents use of a fb that's been cleaned up and - * double cleanups from occuring. - */ - if (old_plane_state->fb != new_plane_state->fb) - return -EINVAL; - funcs = plane->helper_private; if (!funcs->atomic_async_update) return -EINVAL; @@ -1646,6 +1637,8 @@ EXPORT_SYMBOL(drm_atomic_helper_async_check); * drm_atomic_async_check() succeeds. Async commits are not supposed to swap * the states like normal sync commits, but just do in-place changes on the * current state. + * + * TODO: Implement full swap instead of doing in-place changes. */ void drm_atomic_helper_async_commit(struct drm_device *dev, struct drm_atomic_state *state) @@ -1656,6 +1649,9 @@ void drm_atomic_helper_async_commit(struct drm_device *dev, int i; for_each_new_plane_in_state(state, plane, plane_state, i) { + struct drm_framebuffer *new_fb = plane_state->fb; + struct drm_framebuffer *old_fb = plane->state->fb; + funcs = plane->helper_private; funcs->atomic_async_update(plane, plane_state); @@ -1664,11 +1660,17 @@ void drm_atomic_helper_async_commit(struct drm_device *dev, * plane->state in-place, make sure at least common * properties have been properly updated. */ - WARN_ON_ONCE(plane->state->fb != plane_state->fb); + WARN_ON_ONCE(plane->state->fb != new_fb); WARN_ON_ONCE(plane->state->crtc_x != plane_state->crtc_x); WARN_ON_ONCE(plane->state->crtc_y != plane_state->crtc_y); WARN_ON_ONCE(plane->state->src_x != plane_state->src_x); WARN_ON_ONCE(plane->state->src_y != plane_state->src_y); + + /* + * Make sure the FBs have been swapped so that cleanups in the + * new_state performs a cleanup in the old FB. + */ + WARN_ON_ONCE(plane_state->fb != old_fb); } } EXPORT_SYMBOL(drm_atomic_helper_async_commit); diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 4985384e51f6..59ffb6b9c745 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -412,6 +413,9 @@ __drm_atomic_helper_connector_destroy_state(struct drm_connector_state *state) if (state->commit) drm_crtc_commit_put(state->commit); + + if (state->writeback_job) + drm_writeback_cleanup_job(state->writeback_job); } EXPORT_SYMBOL(__drm_atomic_helper_connector_destroy_state); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 0aabd401d3ca..0ceda7c14915 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -512,8 +512,8 @@ drm_atomic_crtc_get_property(struct drm_crtc *crtc, } static int drm_atomic_plane_set_property(struct drm_plane *plane, - struct drm_plane_state *state, struct drm_property *property, - uint64_t val) + struct drm_plane_state *state, struct drm_file *file_priv, + struct drm_property *property, uint64_t val) { struct drm_device *dev = plane->dev; struct drm_mode_config *config = &dev->mode_config; @@ -521,7 +521,8 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, int ret; if (property == config->prop_fb_id) { - struct drm_framebuffer *fb = drm_framebuffer_lookup(dev, NULL, val); + struct drm_framebuffer *fb; + fb = drm_framebuffer_lookup(dev, file_priv, val); drm_atomic_set_fb_for_plane(state, fb); if (fb) drm_framebuffer_put(fb); @@ -537,7 +538,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, return -EINVAL; } else if (property == config->prop_crtc_id) { - struct drm_crtc *crtc = drm_crtc_find(dev, NULL, val); + struct drm_crtc *crtc = drm_crtc_find(dev, file_priv, val); return drm_atomic_set_crtc_for_plane(state, crtc); } else if (property == config->prop_crtc_x) { state->crtc_x = U642I64(val); @@ -681,14 +682,14 @@ static int drm_atomic_set_writeback_fb_for_connector( } static int drm_atomic_connector_set_property(struct drm_connector *connector, - struct drm_connector_state *state, struct drm_property *property, - uint64_t val) + struct drm_connector_state *state, struct drm_file *file_priv, + struct drm_property *property, uint64_t val) { struct drm_device *dev = connector->dev; struct drm_mode_config *config = &dev->mode_config; if (property == config->prop_crtc_id) { - struct drm_crtc *crtc = drm_crtc_find(dev, NULL, val); + struct drm_crtc *crtc = drm_crtc_find(dev, file_priv, val); return drm_atomic_set_crtc_for_connector(state, crtc); } else if (property == config->dpms_property) { /* setting DPMS property requires special handling, which @@ -747,8 +748,10 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, } state->content_protection = val; } else if (property == config->writeback_fb_id_property) { - struct drm_framebuffer *fb = drm_framebuffer_lookup(dev, NULL, val); - int ret = drm_atomic_set_writeback_fb_for_connector(state, fb); + struct drm_framebuffer *fb; + int ret; + fb = drm_framebuffer_lookup(dev, file_priv, val); + ret = drm_atomic_set_writeback_fb_for_connector(state, fb); if (fb) drm_framebuffer_put(fb); return ret; @@ -943,6 +946,7 @@ int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state, } int drm_atomic_set_property(struct drm_atomic_state *state, + struct drm_file *file_priv, struct drm_mode_object *obj, struct drm_property *prop, uint64_t prop_value) @@ -965,7 +969,8 @@ int drm_atomic_set_property(struct drm_atomic_state *state, } ret = drm_atomic_connector_set_property(connector, - connector_state, prop, prop_value); + connector_state, file_priv, + prop, prop_value); break; } case DRM_MODE_OBJECT_CRTC: { @@ -993,7 +998,8 @@ int drm_atomic_set_property(struct drm_atomic_state *state, } ret = drm_atomic_plane_set_property(plane, - plane_state, prop, prop_value); + plane_state, file_priv, + prop, prop_value); break; } default: @@ -1365,8 +1371,8 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, goto out; } - ret = drm_atomic_set_property(state, obj, prop, - prop_value); + ret = drm_atomic_set_property(state, file_priv, + obj, prop, prop_value); if (ret) { drm_mode_object_put(obj); goto out; diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index dd40eff0911c..160edeafa6d6 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1385,12 +1385,6 @@ EXPORT_SYMBOL(drm_mode_create_scaling_mode_property); * * The driver may place further restrictions within these minimum * and maximum bounds. - * - * The semantics for the vertical blank timestamp differ when - * variable refresh rate is active. The vertical blank timestamp - * is defined to be an estimate using the current mode's fixed - * refresh rate timings. The semantics for the page-flip event - * timestamp remain the same. */ /** diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 7dabbaf033a1..790ba5941954 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -559,6 +559,10 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, plane = crtc->primary; + /* allow disabling with the primary plane leased */ + if (crtc_req->mode_valid && !drm_lease_held(file_priv, plane->base.id)) + return -EACCES; + mutex_lock(&crtc->dev->mode_config.mutex); DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE, ret); diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 216f2a9ee3d4..0719a235d6cc 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -214,6 +214,7 @@ int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state, struct drm_connector *connector, int mode); int drm_atomic_set_property(struct drm_atomic_state *state, + struct drm_file *file_priv, struct drm_mode_object *obj, struct drm_property *prop, uint64_t prop_value); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 05bbc2b622fc..04aa6ccdfb24 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -497,7 +497,7 @@ int drm_dev_init(struct drm_device *dev, BUG_ON(!parent); kref_init(&dev->ref); - dev->dev = parent; + dev->dev = get_device(parent); dev->driver = driver; /* no per-device feature limits by default */ @@ -567,6 +567,7 @@ int drm_dev_init(struct drm_device *dev, drm_minor_free(dev, DRM_MINOR_RENDER); drm_fs_inode_free(dev->anon_inode); err_free: + put_device(dev->dev); mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->ctxlist_mutex); mutex_destroy(&dev->clientlist_mutex); @@ -602,6 +603,8 @@ void drm_dev_fini(struct drm_device *dev) drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); + put_device(dev->dev); + mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->ctxlist_mutex); mutex_destroy(&dev->clientlist_mutex); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 990b1909f9d7..d94778915312 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -172,6 +172,25 @@ static const struct edid_quirk { /* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/ { "ETR", 13896, EDID_QUIRK_FORCE_8BPC }, + /* Valve Index Headset */ + { "VLV", 0x91a8, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b0, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b1, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b2, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b3, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b4, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b5, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b6, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b7, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b8, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91b9, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91ba, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91bb, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91bc, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91bd, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91be, EDID_QUIRK_NON_DESKTOP }, + { "VLV", 0x91bf, EDID_QUIRK_NON_DESKTOP }, + /* HTC Vive and Vive Pro VR Headsets */ { "HVR", 0xaa01, EDID_QUIRK_NON_DESKTOP }, { "HVR", 0xaa02, EDID_QUIRK_NON_DESKTOP }, @@ -193,6 +212,12 @@ static const struct edid_quirk { /* Sony PlayStation VR Headset */ { "SNY", 0x0704, EDID_QUIRK_NON_DESKTOP }, + + /* Sensics VR Headsets */ + { "SEN", 0x1019, EDID_QUIRK_NON_DESKTOP }, + + /* OSVR HDK and HDK2 VR Headsets */ + { "SVR", 0x1019, EDID_QUIRK_NON_DESKTOP }, }; /* @@ -1555,6 +1580,50 @@ static void connector_bad_edid(struct drm_connector *connector, } } +/* Get override or firmware EDID */ +static struct edid *drm_get_override_edid(struct drm_connector *connector) +{ + struct edid *override = NULL; + + if (connector->override_edid) + override = drm_edid_duplicate(connector->edid_blob_ptr->data); + + if (!override) + override = drm_load_edid_firmware(connector); + + return IS_ERR(override) ? NULL : override; +} + +/** + * drm_add_override_edid_modes - add modes from override/firmware EDID + * @connector: connector we're probing + * + * Add modes from the override/firmware EDID, if available. Only to be used from + * drm_helper_probe_single_connector_modes() as a fallback for when DDC probe + * failed during drm_get_edid() and caused the override/firmware EDID to be + * skipped. + * + * Return: The number of modes added or 0 if we couldn't find any. + */ +int drm_add_override_edid_modes(struct drm_connector *connector) +{ + struct edid *override; + int num_modes = 0; + + override = drm_get_override_edid(connector); + if (override) { + drm_connector_update_edid_property(connector, override); + num_modes = drm_add_edid_modes(connector, override); + kfree(override); + + DRM_DEBUG_KMS("[CONNECTOR:%d:%s] adding %d modes via fallback override/firmware EDID\n", + connector->base.id, connector->name, num_modes); + } + + return num_modes; +} +EXPORT_SYMBOL(drm_add_override_edid_modes); + /** * drm_do_get_edid - get EDID data using a custom EDID block read function * @connector: connector we're probing @@ -1582,15 +1651,10 @@ struct edid *drm_do_get_edid(struct drm_connector *connector, { int i, j = 0, valid_extensions = 0; u8 *edid, *new; - struct edid *override = NULL; - - if (connector->override_edid) - override = drm_edid_duplicate(connector->edid_blob_ptr->data); - - if (!override) - override = drm_load_edid_firmware(connector); + struct edid *override; - if (!IS_ERR_OR_NULL(override)) + override = drm_get_override_edid(connector); + if (override) return override; if ((edid = kmalloc(EDID_LENGTH, GFP_KERNEL)) == NULL) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index af2ab640cadb..4a5b2d2bc508 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -3317,8 +3317,6 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp) return ret; } - drm_client_add(&fb_helper->client); - if (!preferred_bpp) preferred_bpp = dev->mode_config.preferred_depth; if (!preferred_bpp) @@ -3329,6 +3327,8 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp) if (ret) DRM_DEV_DEBUG(dev->dev, "client hotplug ret=%d\n", ret); + drm_client_add(&fb_helper->client); + return 0; } EXPORT_SYMBOL(drm_fbdev_generic_setup); diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 7caa3c7ed978..9701469a6e93 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -577,6 +577,7 @@ ssize_t drm_read(struct file *filp, char __user *buffer, file_priv->event_space -= length; list_add(&e->link, &file_priv->event_list); spin_unlock_irq(&dev->event_lock); + wake_up_interruptible(&file_priv->event_wait); break; } diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index cc26625b4b33..e01ceed09e67 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -186,13 +186,13 @@ void drm_gem_cma_free_object(struct drm_gem_object *gem_obj) cma_obj = to_drm_gem_cma_obj(gem_obj); - if (cma_obj->vaddr) { - dma_free_wc(gem_obj->dev->dev, cma_obj->base.size, - cma_obj->vaddr, cma_obj->paddr); - } else if (gem_obj->import_attach) { + if (gem_obj->import_attach) { if (cma_obj->vaddr) dma_buf_vunmap(gem_obj->import_attach->dmabuf, cma_obj->vaddr); drm_prime_gem_destroy(gem_obj, cma_obj->sgt); + } else if (cma_obj->vaddr) { + dma_free_wc(gem_obj->dev->dev, cma_obj->base.size, + cma_obj->vaddr, cma_obj->paddr); } drm_gem_object_release(gem_obj); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 687943df58e1..ab5692104ea0 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -508,13 +508,6 @@ int drm_version(struct drm_device *dev, void *data, return err; } -static inline bool -drm_render_driver_and_ioctl(const struct drm_device *dev, u32 flags) -{ - return drm_core_check_feature(dev, DRIVER_RENDER) && - (flags & DRM_RENDER_ALLOW); -} - /** * drm_ioctl_permit - Check ioctl permissions against caller * @@ -529,19 +522,14 @@ drm_render_driver_and_ioctl(const struct drm_device *dev, u32 flags) */ int drm_ioctl_permit(u32 flags, struct drm_file *file_priv) { - const struct drm_device *dev = file_priv->minor->dev; - /* ROOT_ONLY is only for CAP_SYS_ADMIN */ if (unlikely((flags & DRM_ROOT_ONLY) && !capable(CAP_SYS_ADMIN))) return -EACCES; - /* AUTH is only for master ... */ - if (unlikely((flags & DRM_AUTH) && drm_is_primary_client(file_priv))) { - /* authenticated ones, or render capable on DRM_RENDER_ALLOW. */ - if (!file_priv->authenticated && - !drm_render_driver_and_ioctl(dev, flags)) - return -EACCES; - } + /* AUTH is only for authenticated or render client */ + if (unlikely((flags & DRM_AUTH) && !drm_is_render_client(file_priv) && + !file_priv->authenticated)) + return -EACCES; /* MASTER is only for master or control clients */ if (unlikely((flags & DRM_MASTER) && diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index 4a1c2023ccf0..1a346ae1599d 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -297,8 +297,9 @@ static int drm_mode_create_standard_properties(struct drm_device *dev) return -ENOMEM; dev->mode_config.prop_crtc_id = prop; - prop = drm_property_create(dev, DRM_MODE_PROP_BLOB, "FB_DAMAGE_CLIPS", - 0); + prop = drm_property_create(dev, + DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB, + "FB_DAMAGE_CLIPS", 0); if (!prop) return -ENOMEM; dev->mode_config.prop_fb_damage_clips = prop; diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index a9005c1c2384..f32507e65b79 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -451,6 +451,7 @@ static int set_property_legacy(struct drm_mode_object *obj, } static int set_property_atomic(struct drm_mode_object *obj, + struct drm_file *file_priv, struct drm_property *prop, uint64_t prop_value) { @@ -477,7 +478,7 @@ static int set_property_atomic(struct drm_mode_object *obj, obj_to_connector(obj), prop_value); } else { - ret = drm_atomic_set_property(state, obj, prop, prop_value); + ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value); if (ret) goto out; ret = drm_atomic_commit(state); @@ -520,7 +521,7 @@ int drm_mode_obj_set_property_ioctl(struct drm_device *dev, void *data, goto out_unref; if (drm_drv_uses_atomic_modeset(property->dev)) - ret = set_property_atomic(arg_obj, property, arg->value); + ret = set_property_atomic(arg_obj, file_priv, property, arg->value); else ret = set_property_legacy(arg_obj, property, arg->value); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 4cfb56893b7f..d6ad60ab0d38 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -960,6 +960,11 @@ static int drm_mode_cursor_common(struct drm_device *dev, if (ret) goto out; + if (!drm_lease_held(file_priv, crtc->cursor->base.id)) { + ret = -EACCES; + goto out; + } + ret = drm_mode_cursor_universal(crtc, req, file_priv, &ctx); goto out; } @@ -1062,6 +1067,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, plane = crtc->primary; + if (!drm_lease_held(file_priv, plane->base.id)) + return -EACCES; + if (crtc->funcs->page_flip_target) { u32 current_vblank; int r; diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 6fd08e04b323..dd427c7ff967 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -479,6 +479,13 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, count = (*connector_funcs->get_modes)(connector); + /* + * Fallback for when DDC probe failed in drm_get_edid() and thus skipped + * override/firmware EDID. + */ + if (count == 0 && connector->status == connector_status_connected) + count = drm_add_override_edid_modes(connector); + if (count == 0 && connector->status == connector_status_connected) count = drm_add_modes_noedid(connector, 1024, 768); count += drm_helper_probe_add_cmdline_mode(connector); diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index c20e6fe00cb3..2d75032f8159 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -268,6 +268,15 @@ void drm_writeback_queue_job(struct drm_writeback_connector *wb_connector, } EXPORT_SYMBOL(drm_writeback_queue_job); +void drm_writeback_cleanup_job(struct drm_writeback_job *job) +{ + if (job->fb) + drm_framebuffer_put(job->fb); + + kfree(job); +} +EXPORT_SYMBOL(drm_writeback_cleanup_job); + /* * @cleanup_work: deferred cleanup of a writeback job * @@ -280,10 +289,9 @@ static void cleanup_work(struct work_struct *work) struct drm_writeback_job *job = container_of(work, struct drm_writeback_job, cleanup_work); - drm_framebuffer_put(job->fb); - kfree(job); -} + drm_writeback_cleanup_job(job); +} /** * drm_writeback_signal_completion - Signal the completion of a writeback job diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 18c27f795cf6..3156450723ba 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -515,6 +515,9 @@ static int etnaviv_bind(struct device *dev) } drm->dev_private = priv; + dev->dma_parms = &priv->dma_parms; + dma_set_max_seg_size(dev, SZ_2G); + mutex_init(&priv->gem_lock); INIT_LIST_HEAD(&priv->gem_list); priv->num_gpus = 0; @@ -552,6 +555,8 @@ static void etnaviv_unbind(struct device *dev) component_unbind_all(dev, drm); + dev->dma_parms = NULL; + drm->dev_private = NULL; kfree(priv); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h index a6a7ded37ef1..6a4ea127c4f1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h @@ -42,6 +42,7 @@ struct etnaviv_file_private { struct etnaviv_drm_private { int num_gpus; + struct device_dma_parameters dma_parms; struct etnaviv_gpu *gpu[ETNA_MAX_PIPES]; /* list of GEM objects: */ diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 33854c94cb85..515515ef24f9 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -125,6 +125,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) return; etnaviv_dump_core = false; + mutex_lock(&gpu->mmu->lock); + mmu_size = etnaviv_iommu_dump_size(gpu->mmu); /* We always dump registers, mmu, ring and end marker */ @@ -167,6 +169,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, PAGE_KERNEL); if (!iter.start) { + mutex_unlock(&gpu->mmu->lock); dev_warn(gpu->dev, "failed to allocate devcoredump file\n"); return; } @@ -234,6 +237,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) obj->base.size); } + mutex_unlock(&gpu->mmu->lock); + etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data); dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL); diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c index de9531caaca0..9c8446184b17 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c +++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c @@ -594,6 +594,9 @@ void cdv_intel_lvds_init(struct drm_device *dev, int pipe; u8 pin; + if (!dev_priv->lvds_enabled_in_vbt) + return; + pin = GMBUS_PORT_PANEL; if (!lvds_is_present_in_vbt(dev, &pin)) { DRM_DEBUG_KMS("LVDS is not present in VBT\n"); diff --git a/drivers/gpu/drm/gma500/intel_bios.c b/drivers/gpu/drm/gma500/intel_bios.c index 63bde4e86c6a..e019ea271ffc 100644 --- a/drivers/gpu/drm/gma500/intel_bios.c +++ b/drivers/gpu/drm/gma500/intel_bios.c @@ -436,6 +436,9 @@ parse_driver_features(struct drm_psb_private *dev_priv, if (driver->lvds_config == BDB_DRIVER_FEATURE_EDP) dev_priv->edp.support = 1; + dev_priv->lvds_enabled_in_vbt = driver->lvds_config != 0; + DRM_DEBUG_KMS("LVDS VBT config bits: 0x%x\n", driver->lvds_config); + /* This bit means to use 96Mhz for DPLL_A or not */ if (driver->primary_lfp_id) dev_priv->dplla_96mhz = true; diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h index 941b238bdcc9..bc608ddc3bd1 100644 --- a/drivers/gpu/drm/gma500/psb_drv.h +++ b/drivers/gpu/drm/gma500/psb_drv.h @@ -537,6 +537,7 @@ struct drm_psb_private { int lvds_ssc_freq; bool is_lvds_on; bool is_mipi_on; + bool lvds_enabled_in_vbt; u32 mipi_ctrl_display; unsigned int core_freq; diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index 271fb46d4dd0..ea8324abc784 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -5,5 +5,5 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ execlist.o scheduler.o sched_policy.o mmio_context.o cmd_parser.o debugfs.o \ fb_decoder.o dmabuf.o page_track.o -ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) +ccflags-y += -I $(srctree)/$(src) -I $(srctree)/$(src)/$(GVT_DIR)/ i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 9814773882ec..3090a3862668 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2178,7 +2178,8 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; unsigned long gma, gfn; - struct intel_gvt_gtt_entry e, m; + struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; + struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; dma_addr_t dma_addr; int ret; struct intel_gvt_partial_pte *partial_pte, *pos, *n; @@ -2245,7 +2246,8 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, if (!partial_update && (ops->test_present(&e))) { gfn = ops->get_pfn(&e); - m = e; + m.val64 = e.val64; + m.type = e.type; /* one PTE update may be issued in multiple writes and the * first write may not construct a valid gfn diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 05b953793316..5df868eb3dc6 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -298,12 +298,31 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) struct i915_request *req = workload->req; void *shadow_ring_buffer_va; u32 *cs; + int err; if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915) || IS_COFFEELAKE(req->i915)) && is_inhibit_context(req->hw_context)) intel_vgpu_restore_inhibit_context(vgpu, req); + /* + * To track whether a request has started on HW, we can emit a + * breadcrumb at the beginning of the request and check its + * timeline's HWSP to see if the breadcrumb has advanced past the + * start of this request. Actually, the request must have the + * init_breadcrumb if its timeline set has_init_bread_crumb, or the + * scheduler might get a wrong state of it during reset. Since the + * requests from gvt always set the has_init_breadcrumb flag, here + * need to do the emit_init_breadcrumb for all the requests. + */ + if (req->engine->emit_init_breadcrumb) { + err = req->engine->emit_init_breadcrumb(req); + if (err) { + gvt_vgpu_err("fail to emit init breadcrumb\n"); + return err; + } + } + /* allocate shadow ring buffer */ cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); if (IS_ERR(cs)) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 047855dd8c6b..328823ab6f60 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -32,7 +32,7 @@ * macros. Do **not** mass change existing definitions just to update the style. * * Layout - * '''''' + * ~~~~~~ * * Keep helper macros near the top. For example, _PIPE() and friends. * @@ -78,7 +78,7 @@ * style. Use lower case in hexadecimal values. * * Naming - * '''''' + * ~~~~~~ * * Try to name registers according to the specs. If the register name changes in * the specs from platform to another, stick to the original name. @@ -96,7 +96,7 @@ * suffix to the name. For example, ``_SKL`` or ``_GEN8``. * * Examples - * '''''''' + * ~~~~~~~~ * * (Note that the values in the example are indented using spaces instead of * TABs to avoid misalignment in generated documentation. Use TABs in the diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index e8ac04c33e29..b552d9d28127 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -300,10 +300,17 @@ static u32 *parse_csr_fw(struct drm_i915_private *dev_priv, u32 dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes; u32 i; u32 *dmc_payload; + size_t fsize; if (!fw) return NULL; + fsize = sizeof(struct intel_css_header) + + sizeof(struct intel_package_header) + + sizeof(struct intel_dmc_header); + if (fsize > fw->size) + goto error_truncated; + /* Extract CSS Header information*/ css_header = (struct intel_css_header *)fw->data; if (sizeof(struct intel_css_header) != @@ -363,6 +370,9 @@ static u32 *parse_csr_fw(struct drm_i915_private *dev_priv, /* Convert dmc_offset into number of bytes. By default it is in dwords*/ dmc_offset *= 4; readcount += dmc_offset; + fsize += dmc_offset; + if (fsize > fw->size) + goto error_truncated; /* Extract dmc_header information. */ dmc_header = (struct intel_dmc_header *)&fw->data[readcount]; @@ -394,6 +404,10 @@ static u32 *parse_csr_fw(struct drm_i915_private *dev_priv, /* fw_size is in dwords, so multiplied by 4 to convert into bytes. */ nbytes = dmc_header->fw_size * 4; + fsize += nbytes; + if (fsize > fw->size) + goto error_truncated; + if (nbytes > csr->max_fw_size) { DRM_ERROR("DMC FW too big (%u bytes)\n", nbytes); return NULL; @@ -407,6 +421,10 @@ static u32 *parse_csr_fw(struct drm_i915_private *dev_priv, } return memcpy(dmc_payload, &fw->data[readcount], nbytes); + +error_truncated: + DRM_ERROR("Truncated DMC firmware, rejecting.\n"); + return NULL; } static void intel_csr_runtime_pm_get(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 421aac80a838..be4024f0e3a8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2444,10 +2444,14 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) * main surface. */ static const struct drm_format_info ccs_formats[] = { - { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, - { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, - { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, - { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, + { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, + .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, + { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, + .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, + { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, + .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, .has_alpha = true, }, + { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, + .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, .has_alpha = true, }, }; static const struct drm_format_info * @@ -11757,7 +11761,7 @@ intel_modeset_pipe_config(struct drm_crtc *crtc, return 0; } -static bool intel_fuzzy_clock_check(int clock1, int clock2) +bool intel_fuzzy_clock_check(int clock1, int clock2) { int diff; @@ -11816,9 +11820,6 @@ intel_compare_link_m_n(const struct intel_link_m_n *m_n, m2_n2->gmch_m, m2_n2->gmch_n, !adjust) && intel_compare_m_n(m_n->link_m, m_n->link_n, m2_n2->link_m, m2_n2->link_n, !adjust)) { - if (adjust) - *m2_n2 = *m_n; - return true; } @@ -12851,6 +12852,33 @@ static int calc_watermark_data(struct intel_atomic_state *state) return 0; } +static void intel_crtc_check_fastset(struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = + to_i915(new_crtc_state->base.crtc->dev); + + if (!intel_pipe_config_compare(dev_priv, old_crtc_state, + new_crtc_state, true)) + return; + + new_crtc_state->base.mode_changed = false; + new_crtc_state->update_pipe = true; + + /* + * If we're not doing the full modeset we want to + * keep the current M/N values as they may be + * sufficiently different to the computed values + * to cause problems. + * + * FIXME: should really copy more fuzzy state here + */ + new_crtc_state->fdi_m_n = old_crtc_state->fdi_m_n; + new_crtc_state->dp_m_n = old_crtc_state->dp_m_n; + new_crtc_state->dp_m2_n2 = old_crtc_state->dp_m2_n2; + new_crtc_state->has_drrs = old_crtc_state->has_drrs; +} + /** * intel_atomic_check - validate state object * @dev: drm device @@ -12899,12 +12927,8 @@ static int intel_atomic_check(struct drm_device *dev, return ret; } - if (intel_pipe_config_compare(dev_priv, - to_intel_crtc_state(old_crtc_state), - pipe_config, true)) { - crtc_state->mode_changed = false; - pipe_config->update_pipe = true; - } + intel_crtc_check_fastset(to_intel_crtc_state(old_crtc_state), + pipe_config); if (needs_modeset(crtc_state)) any_ms = true; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index d5660ac1b0d6..18e17b422701 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1707,6 +1707,7 @@ int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe, const struct dpll *dpll); void vlv_force_pll_off(struct drm_i915_private *dev_priv, enum pipe pipe); int lpt_get_iclkip(struct drm_i915_private *dev_priv); +bool intel_fuzzy_clock_check(int clock1, int clock2); /* modesetting asserts */ void assert_panel_unlocked(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 06a11c35a784..2ec792ce49a5 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -871,6 +871,17 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) if (mipi_config->target_burst_mode_freq) { u32 bitrate = intel_dsi_bitrate(intel_dsi); + /* + * Sometimes the VBT contains a slightly lower clock, + * then the bitrate we have calculated, in this case + * just replace it with the calculated bitrate. + */ + if (mipi_config->target_burst_mode_freq < bitrate && + intel_fuzzy_clock_check( + mipi_config->target_burst_mode_freq, + bitrate)) + mipi_config->target_burst_mode_freq = bitrate; + if (mipi_config->target_burst_mode_freq < bitrate) { DRM_ERROR("Burst mode freq is less than computed\n"); return false; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 656e684e7c9a..fc018f3f53a1 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1278,6 +1278,10 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (!HAS_FBC(dev_priv)) return 0; + /* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */ + if (IS_GEMINILAKE(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) return 1; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index e7b0884ba5a5..366d5554e8e9 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -909,6 +909,13 @@ static bool intel_sdvo_set_colorimetry(struct intel_sdvo *intel_sdvo, return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_COLORIMETRY, &mode, 1); } +static bool intel_sdvo_set_audio_state(struct intel_sdvo *intel_sdvo, + u8 audio_state) +{ + return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_AUDIO_STAT, + &audio_state, 1); +} + #if 0 static void intel_sdvo_dump_hdmi_buf(struct intel_sdvo *intel_sdvo) { @@ -1366,11 +1373,6 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, else sdvox |= SDVO_PIPE_SEL(crtc->pipe); - if (crtc_state->has_audio) { - WARN_ON_ONCE(INTEL_GEN(dev_priv) < 4); - sdvox |= SDVO_AUDIO_ENABLE; - } - if (INTEL_GEN(dev_priv) >= 4) { /* done in crtc_mode_set as the dpll_md reg must be written early */ } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv) || @@ -1510,8 +1512,13 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, if (sdvox & HDMI_COLOR_RANGE_16_235) pipe_config->limited_color_range = true; - if (sdvox & SDVO_AUDIO_ENABLE) - pipe_config->has_audio = true; + if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_AUDIO_STAT, + &val, 1)) { + u8 mask = SDVO_AUDIO_ELD_VALID | SDVO_AUDIO_PRESENCE_DETECT; + + if ((val & mask) == mask) + pipe_config->has_audio = true; + } if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_ENCODE, &val, 1)) { @@ -1524,6 +1531,32 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, pipe_config->pixel_multiplier, encoder_pixel_multiplier); } +static void intel_sdvo_disable_audio(struct intel_sdvo *intel_sdvo) +{ + intel_sdvo_set_audio_state(intel_sdvo, 0); +} + +static void intel_sdvo_enable_audio(struct intel_sdvo *intel_sdvo, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + struct drm_connector *connector = conn_state->connector; + u8 *eld = connector->eld; + + eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2; + + intel_sdvo_set_audio_state(intel_sdvo, 0); + + intel_sdvo_write_infoframe(intel_sdvo, SDVO_HBUF_INDEX_ELD, + SDVO_HBUF_TX_DISABLED, + eld, drm_eld_size(eld)); + + intel_sdvo_set_audio_state(intel_sdvo, SDVO_AUDIO_ELD_VALID | + SDVO_AUDIO_PRESENCE_DETECT); +} + static void intel_disable_sdvo(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *conn_state) @@ -1533,6 +1566,9 @@ static void intel_disable_sdvo(struct intel_encoder *encoder, struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); u32 temp; + if (old_crtc_state->has_audio) + intel_sdvo_disable_audio(intel_sdvo); + intel_sdvo_set_active_outputs(intel_sdvo, 0); if (0) intel_sdvo_set_encoder_power_state(intel_sdvo, @@ -1618,6 +1654,9 @@ static void intel_enable_sdvo(struct intel_encoder *encoder, intel_sdvo_set_encoder_power_state(intel_sdvo, DRM_MODE_DPMS_ON); intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo->attached_output); + + if (pipe_config->has_audio) + intel_sdvo_enable_audio(intel_sdvo, pipe_config, conn_state); } static enum drm_mode_status @@ -2480,7 +2519,6 @@ static bool intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) { struct drm_encoder *encoder = &intel_sdvo->base.base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); struct drm_connector *connector; struct intel_encoder *intel_encoder = to_intel_encoder(encoder); struct intel_connector *intel_connector; @@ -2517,9 +2555,7 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) encoder->encoder_type = DRM_MODE_ENCODER_TMDS; connector->connector_type = DRM_MODE_CONNECTOR_DVID; - /* gen3 doesn't do the hdmi bits in the SDVO register */ - if (INTEL_GEN(dev_priv) >= 4 && - intel_sdvo_is_hdmi_connector(intel_sdvo, device)) { + if (intel_sdvo_is_hdmi_connector(intel_sdvo, device)) { connector->connector_type = DRM_MODE_CONNECTOR_HDMIA; intel_sdvo_connector->is_hdmi = true; } diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h index db0ed499268a..e9ba3b047f93 100644 --- a/drivers/gpu/drm/i915/intel_sdvo_regs.h +++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h @@ -707,6 +707,9 @@ struct intel_sdvo_enhancements_arg { #define SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER 0x90 #define SDVO_CMD_SET_AUDIO_STAT 0x91 #define SDVO_CMD_GET_AUDIO_STAT 0x92 + #define SDVO_AUDIO_ELD_VALID (1 << 0) + #define SDVO_AUDIO_PRESENCE_DETECT (1 << 1) + #define SDVO_AUDIO_CP_READY (1 << 2) #define SDVO_CMD_SET_HBUF_INDEX 0x93 #define SDVO_HBUF_INDEX_ELD 0 #define SDVO_HBUF_INDEX_AVI_IF 1 diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 15f4a6dee5aa..3899e1f67ebc 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -37,7 +37,7 @@ * costly and simplifies things. We can revisit this in the future. * * Layout - * '''''' + * ~~~~~~ * * Keep things in this file ordered by WA type, as per the above (context, GT, * display, register whitelist, batchbuffer). Then, inside each type, keep the diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index d7a727a6e3d7..91edfe2498a6 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -605,7 +605,6 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, active = ipu_idmac_get_current_buffer(ipu_plane->ipu_ch); ipu_cpmem_set_buffer(ipu_plane->ipu_ch, !active, eba); ipu_idmac_select_buffer(ipu_plane->ipu_ch, !active); - ipu_plane->next_buf = !active; if (ipu_plane_separate_alpha(ipu_plane)) { active = ipu_idmac_get_current_buffer(ipu_plane->alpha_ch); ipu_cpmem_set_buffer(ipu_plane->alpha_ch, !active, @@ -710,7 +709,6 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, ipu_cpmem_set_buffer(ipu_plane->ipu_ch, 1, eba); ipu_idmac_lock_enable(ipu_plane->ipu_ch, num_bursts); ipu_plane_enable(ipu_plane); - ipu_plane->next_buf = -1; } static const struct drm_plane_helper_funcs ipu_plane_helper_funcs = { @@ -732,10 +730,15 @@ bool ipu_plane_atomic_update_pending(struct drm_plane *plane) if (ipu_state->use_pre) return ipu_prg_channel_configure_pending(ipu_plane->ipu_ch); - else if (ipu_plane->next_buf >= 0) - return ipu_idmac_get_current_buffer(ipu_plane->ipu_ch) != - ipu_plane->next_buf; + /* + * Pretend no update is pending in the non-PRE/PRG case. For this to + * happen, an atomic update would have to be deferred until after the + * start of the next frame and simultaneously interrupt latency would + * have to be high enough to let the atomic update finish and issue an + * event before the previous end of frame interrupt handler can be + * executed. + */ return false; } int ipu_planes_assign_pre(struct drm_device *dev, diff --git a/drivers/gpu/drm/imx/ipuv3-plane.h b/drivers/gpu/drm/imx/ipuv3-plane.h index 15e85e15d35c..ffacbcdd2f98 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.h +++ b/drivers/gpu/drm/imx/ipuv3-plane.h @@ -27,7 +27,6 @@ struct ipu_plane { int dp_flow; bool disabling; - int next_buf; }; struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu, diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 56a70c74af4e..b7b1ebdc8190 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-y := -Idrivers/gpu/drm/msm -ccflags-y += -Idrivers/gpu/drm/msm/disp/dpu1 -ccflags-$(CONFIG_DRM_MSM_DSI) += -Idrivers/gpu/drm/msm/dsi +ccflags-y := -I $(srctree)/$(src) +ccflags-y += -I $(srctree)/$(src)/disp/dpu1 +ccflags-$(CONFIG_DRM_MSM_DSI) += -I $(srctree)/$(src)/dsi msm-y := \ adreno/adreno_device.o \ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index d5f5e56422f5..270da14cba67 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -34,7 +34,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) { struct device *dev = &gpu->pdev->dev; const struct firmware *fw; - struct device_node *np; + struct device_node *np, *mem_np; struct resource r; phys_addr_t mem_phys; ssize_t mem_size; @@ -48,11 +48,13 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) if (!np) return -ENODEV; - np = of_parse_phandle(np, "memory-region", 0); - if (!np) + mem_np = of_parse_phandle(np, "memory-region", 0); + of_node_put(np); + if (!mem_np) return -EINVAL; - ret = of_address_to_resource(np, 0, &r); + ret = of_address_to_resource(mem_np, 0, &r); + of_node_put(mem_np); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 5aa3307f3f0c..dd2c4d11d0e1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1023,13 +1023,13 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc, if (!dpu_enc->hw_pp[i]) { DPU_ERROR_ENC(dpu_enc, "no pp block assigned" "at idx: %d\n", i); - return; + goto error; } if (!hw_ctl[i]) { DPU_ERROR_ENC(dpu_enc, "no ctl block assigned" "at idx: %d\n", i); - return; + goto error; } phys->hw_pp = dpu_enc->hw_pp[i]; @@ -1042,6 +1042,9 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc, } dpu_enc->mode_set_complete = true; + +error: + dpu_rm_release(&dpu_kms->rm, drm_enc); } static void _dpu_encoder_virt_enable_helper(struct drm_encoder *drm_enc) @@ -1547,8 +1550,14 @@ static void _dpu_encoder_kickoff_phys(struct dpu_encoder_virt *dpu_enc, if (!ctl) continue; - if (phys->split_role != ENC_ROLE_SLAVE) + /* + * This is cleared in frame_done worker, which isn't invoked + * for async commits. So don't set this for async, since it'll + * roll over to the next commit. + */ + if (!async && phys->split_role != ENC_ROLE_SLAVE) set_bit(i, dpu_enc->frame_busy_mask); + if (!phys->ops.needs_single_flush || !phys->ops.needs_single_flush(phys)) _dpu_encoder_trigger_flush(&dpu_enc->base, phys, 0x0, diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c index be13140967b4..b854f471e9e5 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c @@ -502,6 +502,8 @@ static int mdp5_plane_atomic_async_check(struct drm_plane *plane, static void mdp5_plane_atomic_async_update(struct drm_plane *plane, struct drm_plane_state *new_state) { + struct drm_framebuffer *old_fb = plane->state->fb; + plane->state->src_x = new_state->src_x; plane->state->src_y = new_state->src_y; plane->state->crtc_x = new_state->crtc_x; @@ -524,6 +526,8 @@ static void mdp5_plane_atomic_async_update(struct drm_plane *plane, *to_mdp5_plane_state(plane->state) = *to_mdp5_plane_state(new_state); + + new_state->fb = old_fb; } static const struct drm_plane_helper_funcs mdp5_plane_helper_funcs = { diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 18ca651ab942..23de4d1b7b1a 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -805,7 +805,8 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) seq_puts(m, " vmas:"); list_for_each_entry(vma, &msm_obj->vmas, list) - seq_printf(m, " [%s: %08llx,%s,inuse=%d]", vma->aspace->name, + seq_printf(m, " [%s: %08llx,%s,inuse=%d]", + vma->aspace != NULL ? vma->aspace->name : NULL, vma->iova, vma->mapped ? "mapped" : "unmapped", vma->inuse); diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 49c04829cf34..fcf7a83f0e6f 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -85,7 +85,7 @@ msm_gem_map_vma(struct msm_gem_address_space *aspace, vma->mapped = true; - if (aspace->mmu) + if (aspace && aspace->mmu) ret = aspace->mmu->funcs->map(aspace->mmu, vma->iova, sgt, size, prot); diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild index 581404e6544d..378c5dd692b0 100644 --- a/drivers/gpu/drm/nouveau/Kbuild +++ b/drivers/gpu/drm/nouveau/Kbuild @@ -1,7 +1,7 @@ -ccflags-y += -I$(src)/include -ccflags-y += -I$(src)/include/nvkm -ccflags-y += -I$(src)/nvkm -ccflags-y += -I$(src) +ccflags-y += -I $(srctree)/$(src)/include +ccflags-y += -I $(srctree)/$(src)/include/nvkm +ccflags-y += -I $(srctree)/$(src)/nvkm +ccflags-y += -I $(srctree)/$(src) # NVKM - HW resource manager #- code also used by various userspace tools/tests diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 00cd9ab8948d..db28012dbf54 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -17,10 +17,21 @@ config DRM_NOUVEAU select INPUT if ACPI && X86 select THERMAL if ACPI && X86 select ACPI_VIDEO if ACPI && X86 - select DRM_VM help Choose this option for open-source NVIDIA support. +config NOUVEAU_LEGACY_CTX_SUPPORT + bool "Nouveau legacy context support" + depends on DRM_NOUVEAU + select DRM_VM + default y + help + There was a version of the nouveau DDX that relied on legacy + ctx ioctls not erroring out. But that was back in time a long + ways, so offer a way to disable it now. For uapi compat with + old nouveau ddx this should be on by default, but modern distros + should consider turning it off. + config NOUVEAU_PLATFORM_DRIVER bool "Nouveau (NVIDIA) SoC GPUs" depends on DRM_NOUVEAU && ARCH_TEGRA diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h index 2216c58620c2..7c41b0599d1a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h @@ -41,6 +41,7 @@ struct nv50_disp_interlock { NV50_DISP_INTERLOCK__SIZE } type; u32 data; + u32 wimm; }; void corec37d_ntfy_init(struct nouveau_bo *, u32); diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c index 2e7a0c347ddb..06ee23823a68 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head.c @@ -306,7 +306,7 @@ nv50_head_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state) asyh->set.or = head->func->or != NULL; } - if (asyh->state.mode_changed) + if (asyh->state.mode_changed || asyh->state.connectors_changed) nv50_head_atomic_check_mode(head, asyh); if (asyh->state.color_mgmt_changed || @@ -413,6 +413,7 @@ nv50_head_atomic_duplicate_state(struct drm_crtc *crtc) asyh->ovly = armh->ovly; asyh->dither = armh->dither; asyh->procamp = armh->procamp; + asyh->or = armh->or; asyh->dp = armh->dp; asyh->clr.mask = 0; asyh->set.mask = 0; diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c index 9103b8494279..f7dbd965e4e7 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c @@ -75,6 +75,7 @@ wimmc37b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm, return ret; } + wndw->interlock.wimm = wndw->interlock.data; wndw->immd = func; return 0; } diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index b95181027b31..471a39a077e5 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -149,7 +149,7 @@ nv50_wndw_flush_set(struct nv50_wndw *wndw, u32 *interlock, if (asyw->set.point) { if (asyw->set.point = false, asyw->set.mask) interlock[wndw->interlock.type] |= wndw->interlock.data; - interlock[NV50_DISP_INTERLOCK_WIMM] |= wndw->interlock.data; + interlock[NV50_DISP_INTERLOCK_WIMM] |= wndw->interlock.wimm; wndw->immd->point(wndw, asyw); wndw->immd->update(wndw, interlock); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h index eef54e9b5d77..7957eafa5f0e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h @@ -38,6 +38,7 @@ struct nvkm_i2c_bus { struct mutex mutex; struct list_head head; struct i2c_adapter i2c; + u8 enabled; }; int nvkm_i2c_bus_acquire(struct nvkm_i2c_bus *); @@ -57,6 +58,7 @@ struct nvkm_i2c_aux { struct mutex mutex; struct list_head head; struct i2c_adapter i2c; + u8 enabled; u32 intr; }; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 5020265bfbd9..6ab9033f49da 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1094,8 +1094,11 @@ nouveau_driver_fops = { static struct drm_driver driver_stub = { .driver_features = - DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_RENDER | - DRIVER_KMS_LEGACY_CONTEXT, + DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_RENDER +#if defined(CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT) + | DRIVER_KMS_LEGACY_CONTEXT +#endif + , .open = nouveau_drm_open, .postclose = nouveau_drm_postclose, diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 1543c2f8d3d3..05d513d54555 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -169,7 +169,11 @@ nouveau_ttm_mmap(struct file *filp, struct vm_area_struct *vma) struct nouveau_drm *drm = nouveau_drm(file_priv->minor->dev); if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) +#if defined(CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT) return drm_legacy_mmap(filp, vma); +#else + return -EINVAL; +#endif return ttm_bo_mmap(filp, vma, &drm->ttm.bdev); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 5f301e632599..818d21bd28d3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -365,8 +365,15 @@ nvkm_dp_train(struct nvkm_dp *dp, u32 dataKBps) * and it's better to have a failed modeset than that. */ for (cfg = nvkm_dp_rates; cfg->rate; cfg++) { - if (cfg->nr <= outp_nr && cfg->nr <= outp_bw) - failsafe = cfg; + if (cfg->nr <= outp_nr && cfg->nr <= outp_bw) { + /* Try to respect sink limits too when selecting + * lowest link configuration. + */ + if (!failsafe || + (cfg->nr <= sink_nr && cfg->bw <= sink_bw)) + failsafe = cfg; + } + if (failsafe && cfg[1].rate < dataKBps) break; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c index 157b076a1272..38c9c086754b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c @@ -109,7 +109,7 @@ nv50_bar_oneinit(struct nvkm_bar *base) struct nvkm_device *device = bar->base.subdev.device; static struct lock_class_key bar1_lock; static struct lock_class_key bar2_lock; - u64 start, limit; + u64 start, limit, size; int ret; ret = nvkm_gpuobj_new(device, 0x20000, 0, false, NULL, &bar->mem); @@ -127,7 +127,10 @@ nv50_bar_oneinit(struct nvkm_bar *base) /* BAR2 */ start = 0x0100000000ULL; - limit = start + device->func->resource_size(device, 3); + size = device->func->resource_size(device, 3); + if (!size) + return -ENOMEM; + limit = start + size; ret = nvkm_vmm_new(device, start, limit-- - start, NULL, 0, &bar2_lock, "bar2", &bar->bar2_vmm); @@ -164,7 +167,10 @@ nv50_bar_oneinit(struct nvkm_bar *base) /* BAR1 */ start = 0x0000000000ULL; - limit = start + device->func->resource_size(device, 1); + size = device->func->resource_size(device, 1); + if (!size) + return -ENOMEM; + limit = start + size; ret = nvkm_vmm_new(device, start, limit-- - start, NULL, 0, &bar1_lock, "bar1", &bar->bar1_vmm); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c index 4c1f547da463..b4e7404fe660 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c @@ -105,9 +105,15 @@ nvkm_i2c_aux_acquire(struct nvkm_i2c_aux *aux) { struct nvkm_i2c_pad *pad = aux->pad; int ret; + AUX_TRACE(aux, "acquire"); mutex_lock(&aux->mutex); - ret = nvkm_i2c_pad_acquire(pad, NVKM_I2C_PAD_AUX); + + if (aux->enabled) + ret = nvkm_i2c_pad_acquire(pad, NVKM_I2C_PAD_AUX); + else + ret = -EIO; + if (ret) mutex_unlock(&aux->mutex); return ret; @@ -145,6 +151,24 @@ nvkm_i2c_aux_del(struct nvkm_i2c_aux **paux) } } +void +nvkm_i2c_aux_init(struct nvkm_i2c_aux *aux) +{ + AUX_TRACE(aux, "init"); + mutex_lock(&aux->mutex); + aux->enabled = true; + mutex_unlock(&aux->mutex); +} + +void +nvkm_i2c_aux_fini(struct nvkm_i2c_aux *aux) +{ + AUX_TRACE(aux, "fini"); + mutex_lock(&aux->mutex); + aux->enabled = false; + mutex_unlock(&aux->mutex); +} + int nvkm_i2c_aux_ctor(const struct nvkm_i2c_aux_func *func, struct nvkm_i2c_pad *pad, int id, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h index 7d56c4ba693c..08f6b2ee64ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h @@ -16,6 +16,8 @@ int nvkm_i2c_aux_ctor(const struct nvkm_i2c_aux_func *, struct nvkm_i2c_pad *, int nvkm_i2c_aux_new_(const struct nvkm_i2c_aux_func *, struct nvkm_i2c_pad *, int id, struct nvkm_i2c_aux **); void nvkm_i2c_aux_del(struct nvkm_i2c_aux **); +void nvkm_i2c_aux_init(struct nvkm_i2c_aux *); +void nvkm_i2c_aux_fini(struct nvkm_i2c_aux *); int nvkm_i2c_aux_xfer(struct nvkm_i2c_aux *, bool retry, u8 type, u32 addr, u8 *data, u8 *size); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c index 4f197b15acf6..ecacb22834d7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c @@ -160,8 +160,18 @@ nvkm_i2c_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_i2c *i2c = nvkm_i2c(subdev); struct nvkm_i2c_pad *pad; + struct nvkm_i2c_bus *bus; + struct nvkm_i2c_aux *aux; u32 mask; + list_for_each_entry(aux, &i2c->aux, head) { + nvkm_i2c_aux_fini(aux); + } + + list_for_each_entry(bus, &i2c->bus, head) { + nvkm_i2c_bus_fini(bus); + } + if ((mask = (1 << i2c->func->aux) - 1), i2c->func->aux_stat) { i2c->func->aux_mask(i2c, NVKM_I2C_ANY, mask, 0); i2c->func->aux_stat(i2c, &mask, &mask, &mask, &mask); @@ -180,6 +190,7 @@ nvkm_i2c_init(struct nvkm_subdev *subdev) struct nvkm_i2c *i2c = nvkm_i2c(subdev); struct nvkm_i2c_bus *bus; struct nvkm_i2c_pad *pad; + struct nvkm_i2c_aux *aux; list_for_each_entry(pad, &i2c->pad, head) { nvkm_i2c_pad_init(pad); @@ -189,6 +200,10 @@ nvkm_i2c_init(struct nvkm_subdev *subdev) nvkm_i2c_bus_init(bus); } + list_for_each_entry(aux, &i2c->aux, head) { + nvkm_i2c_aux_init(aux); + } + return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.c index 807a2b67bd64..ed50cc3736b9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.c @@ -110,6 +110,19 @@ nvkm_i2c_bus_init(struct nvkm_i2c_bus *bus) BUS_TRACE(bus, "init"); if (bus->func->init) bus->func->init(bus); + + mutex_lock(&bus->mutex); + bus->enabled = true; + mutex_unlock(&bus->mutex); +} + +void +nvkm_i2c_bus_fini(struct nvkm_i2c_bus *bus) +{ + BUS_TRACE(bus, "fini"); + mutex_lock(&bus->mutex); + bus->enabled = false; + mutex_unlock(&bus->mutex); } void @@ -126,9 +139,15 @@ nvkm_i2c_bus_acquire(struct nvkm_i2c_bus *bus) { struct nvkm_i2c_pad *pad = bus->pad; int ret; + BUS_TRACE(bus, "acquire"); mutex_lock(&bus->mutex); - ret = nvkm_i2c_pad_acquire(pad, NVKM_I2C_PAD_I2C); + + if (bus->enabled) + ret = nvkm_i2c_pad_acquire(pad, NVKM_I2C_PAD_I2C); + else + ret = -EIO; + if (ret) mutex_unlock(&bus->mutex); return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.h index bea0dd33961e..465464bba58b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.h @@ -18,6 +18,7 @@ int nvkm_i2c_bus_new_(const struct nvkm_i2c_bus_func *, struct nvkm_i2c_pad *, int id, struct nvkm_i2c_bus **); void nvkm_i2c_bus_del(struct nvkm_i2c_bus **); void nvkm_i2c_bus_init(struct nvkm_i2c_bus *); +void nvkm_i2c_bus_fini(struct nvkm_i2c_bus *); int nvkm_i2c_bit_xfer(struct nvkm_i2c_bus *, struct i2c_msg *, int); diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 64fb788b6647..f0fe975ed46c 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -1342,12 +1342,9 @@ static int dsi_pll_enable(struct dss_pll *pll) */ dsi_enable_scp_clk(dsi); - if (!dsi->vdds_dsi_enabled) { - r = regulator_enable(dsi->vdds_dsi_reg); - if (r) - goto err0; - dsi->vdds_dsi_enabled = true; - } + r = regulator_enable(dsi->vdds_dsi_reg); + if (r) + goto err0; /* XXX PLL does not come out of reset without this... */ dispc_pck_free_enable(dsi->dss->dispc, 1); @@ -1372,36 +1369,25 @@ static int dsi_pll_enable(struct dss_pll *pll) return 0; err1: - if (dsi->vdds_dsi_enabled) { - regulator_disable(dsi->vdds_dsi_reg); - dsi->vdds_dsi_enabled = false; - } + regulator_disable(dsi->vdds_dsi_reg); err0: dsi_disable_scp_clk(dsi); dsi_runtime_put(dsi); return r; } -static void dsi_pll_uninit(struct dsi_data *dsi, bool disconnect_lanes) +static void dsi_pll_disable(struct dss_pll *pll) { + struct dsi_data *dsi = container_of(pll, struct dsi_data, pll); + dsi_pll_power(dsi, DSI_PLL_POWER_OFF); - if (disconnect_lanes) { - WARN_ON(!dsi->vdds_dsi_enabled); - regulator_disable(dsi->vdds_dsi_reg); - dsi->vdds_dsi_enabled = false; - } + + regulator_disable(dsi->vdds_dsi_reg); dsi_disable_scp_clk(dsi); dsi_runtime_put(dsi); - DSSDBG("PLL uninit done\n"); -} - -static void dsi_pll_disable(struct dss_pll *pll) -{ - struct dsi_data *dsi = container_of(pll, struct dsi_data, pll); - - dsi_pll_uninit(dsi, true); + DSSDBG("PLL disable done\n"); } static int dsi_dump_dsi_clocks(struct seq_file *s, void *p) @@ -4096,11 +4082,11 @@ static int dsi_display_init_dsi(struct dsi_data *dsi) r = dss_pll_enable(&dsi->pll); if (r) - goto err0; + return r; r = dsi_configure_dsi_clocks(dsi); if (r) - goto err1; + goto err0; dss_select_dsi_clk_source(dsi->dss, dsi->module_id, dsi->module_id == 0 ? @@ -4108,6 +4094,14 @@ static int dsi_display_init_dsi(struct dsi_data *dsi) DSSDBG("PLL OK\n"); + if (!dsi->vdds_dsi_enabled) { + r = regulator_enable(dsi->vdds_dsi_reg); + if (r) + goto err1; + + dsi->vdds_dsi_enabled = true; + } + r = dsi_cio_init(dsi); if (r) goto err2; @@ -4136,10 +4130,13 @@ static int dsi_display_init_dsi(struct dsi_data *dsi) err3: dsi_cio_uninit(dsi); err2: - dss_select_dsi_clk_source(dsi->dss, dsi->module_id, DSS_CLK_SRC_FCK); + regulator_disable(dsi->vdds_dsi_reg); + dsi->vdds_dsi_enabled = false; err1: - dss_pll_disable(&dsi->pll); + dss_select_dsi_clk_source(dsi->dss, dsi->module_id, DSS_CLK_SRC_FCK); err0: + dss_pll_disable(&dsi->pll); + return r; } @@ -4158,7 +4155,12 @@ static void dsi_display_uninit_dsi(struct dsi_data *dsi, bool disconnect_lanes, dss_select_dsi_clk_source(dsi->dss, dsi->module_id, DSS_CLK_SRC_FCK); dsi_cio_uninit(dsi); - dsi_pll_uninit(dsi, disconnect_lanes); + dss_pll_disable(&dsi->pll); + + if (disconnect_lanes) { + regulator_disable(dsi->vdds_dsi_reg); + dsi->vdds_dsi_enabled = false; + } } static int dsi_display_enable(struct omap_dss_device *dssdev) diff --git a/drivers/gpu/drm/omapdrm/omap_connector.c b/drivers/gpu/drm/omapdrm/omap_connector.c index 9da94d10782a..d37e3c001e24 100644 --- a/drivers/gpu/drm/omapdrm/omap_connector.c +++ b/drivers/gpu/drm/omapdrm/omap_connector.c @@ -36,18 +36,22 @@ struct omap_connector { }; static void omap_connector_hpd_notify(struct drm_connector *connector, - struct omap_dss_device *src, enum drm_connector_status status) { - if (status == connector_status_disconnected) { - /* - * If the source is an HDMI encoder, notify it of disconnection. - * This is required to let the HDMI encoder reset any internal - * state related to connection status, such as the CEC address. - */ - if (src && src->type == OMAP_DISPLAY_TYPE_HDMI && - src->ops->hdmi.lost_hotplug) - src->ops->hdmi.lost_hotplug(src); + struct omap_connector *omap_connector = to_omap_connector(connector); + struct omap_dss_device *dssdev; + + if (status != connector_status_disconnected) + return; + + /* + * Notify all devics in the pipeline of disconnection. This is required + * to let the HDMI encoders reset their internal state related to + * connection status, such as the CEC address. + */ + for (dssdev = omap_connector->output; dssdev; dssdev = dssdev->next) { + if (dssdev->ops && dssdev->ops->hdmi.lost_hotplug) + dssdev->ops->hdmi.lost_hotplug(dssdev); } } @@ -67,7 +71,7 @@ static void omap_connector_hpd_cb(void *cb_data, if (old_status == status) return; - omap_connector_hpd_notify(connector, omap_connector->hpd, status); + omap_connector_hpd_notify(connector, status); drm_kms_helper_hotplug_event(dev); } @@ -128,7 +132,7 @@ static enum drm_connector_status omap_connector_detect( ? connector_status_connected : connector_status_disconnected; - omap_connector_hpd_notify(connector, dssdev->src, status); + omap_connector_hpd_notify(connector, status); } else { switch (omap_connector->display->type) { case OMAP_DISPLAY_TYPE_DPI: diff --git a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c index 87fa316e1d7b..58ccf648b70f 100644 --- a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c +++ b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c @@ -248,6 +248,9 @@ static int otm8009a_init_sequence(struct otm8009a *ctx) /* Send Command GRAM memory write (no parameters) */ dcs_write_seq(ctx, MIPI_DCS_WRITE_MEMORY_START); + /* Wait a short while to let the panel be ready before the 1st frame */ + mdelay(10); + return 0; } diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c index 754f6b25f265..6d9f78612dee 100644 --- a/drivers/gpu/drm/pl111/pl111_display.c +++ b/drivers/gpu/drm/pl111/pl111_display.c @@ -531,14 +531,15 @@ pl111_init_clock_divider(struct drm_device *drm) dev_err(drm->dev, "CLCD: unable to get clcdclk.\n"); return PTR_ERR(parent); } + + spin_lock_init(&priv->tim2_lock); + /* If the clock divider is broken, use the parent directly */ if (priv->variant->broken_clockdivider) { priv->clk = parent; return 0; } parent_name = __clk_get_name(parent); - - spin_lock_init(&priv->tim2_lock); div->init = &init; ret = devm_clk_hw_register(drm->dev, div); diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c index b9baefdba38a..1c318ad32a8c 100644 --- a/drivers/gpu/drm/pl111/pl111_versatile.c +++ b/drivers/gpu/drm/pl111/pl111_versatile.c @@ -330,6 +330,7 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) ret = vexpress_muxfpga_init(); if (ret) { dev_err(dev, "unable to initialize muxfpga driver\n"); + of_node_put(np); return ret; } @@ -337,17 +338,20 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) pdev = of_find_device_by_node(np); if (!pdev) { dev_err(dev, "can't find the sysreg device, deferring\n"); + of_node_put(np); return -EPROBE_DEFER; } map = dev_get_drvdata(&pdev->dev); if (!map) { dev_err(dev, "sysreg has not yet probed\n"); platform_device_put(pdev); + of_node_put(np); return -EPROBE_DEFER; } } else { map = syscon_node_to_regmap(np); } + of_node_put(np); if (IS_ERR(map)) { dev_err(dev, "no Versatile syscon regmap\n"); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index aa898c699101..433df7036f96 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -922,12 +922,12 @@ static void avivo_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div, ref_div_max = max(min(100 / post_div, ref_div_max), 1u); /* get matching reference and feedback divider */ - *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max); + *ref_div = min(max(den/post_div, 1u), ref_div_max); *fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den); /* limit fb divider to its maximum */ if (*fb_div > fb_div_max) { - *ref_div = DIV_ROUND_CLOSEST(*ref_div * fb_div_max, *fb_div); + *ref_div = (*ref_div * fb_div_max)/(*fb_div); *fb_div = fb_div_max; } } diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index 7ef97b2a6eda..033f44e46daf 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -283,7 +283,7 @@ static void rcar_lvds_d3_e3_pll_calc(struct rcar_lvds *lvds, struct clk *clk, * divider. */ fout = fvco / (1 << e) / div7; - div = DIV_ROUND_CLOSEST(fout, target); + div = max(1UL, DIV_ROUND_CLOSEST(fout, target)); diff = abs(fout / div - target); if (diff < pll->diff) { @@ -485,9 +485,13 @@ static void rcar_lvds_enable(struct drm_bridge *bridge) } if (lvds->info->quirks & RCAR_LVDS_QUIRK_GEN3_LVEN) { - /* Turn on the LVDS PHY. */ + /* + * Turn on the LVDS PHY. On D3, the LVEN and LVRES bit must be + * set at the same time, so don't write the register yet. + */ lvdcr0 |= LVDCR0_LVEN; - rcar_lvds_write(lvds, LVDCR0, lvdcr0); + if (!(lvds->info->quirks & RCAR_LVDS_QUIRK_PWD)) + rcar_lvds_write(lvds, LVDCR0, lvdcr0); } if (!(lvds->info->quirks & RCAR_LVDS_QUIRK_EXT_PLL)) { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index d7fa17f12769..8ddb0c0e4bff 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -448,6 +448,14 @@ static int rockchip_drm_platform_remove(struct platform_device *pdev) return 0; } +static void rockchip_drm_platform_shutdown(struct platform_device *pdev) +{ + struct drm_device *drm = platform_get_drvdata(pdev); + + if (drm) + drm_atomic_helper_shutdown(drm); +} + static const struct of_device_id rockchip_drm_dt_ids[] = { { .compatible = "rockchip,display-subsystem", }, { /* sentinel */ }, @@ -457,6 +465,7 @@ MODULE_DEVICE_TABLE(of, rockchip_drm_dt_ids); static struct platform_driver rockchip_drm_platform_driver = { .probe = rockchip_drm_platform_probe, .remove = rockchip_drm_platform_remove, + .shutdown = rockchip_drm_platform_shutdown, .driver = { .name = "rockchip-drm", .of_match_table = rockchip_drm_dt_ids, diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 0d4ade9d4722..cd58dc81ccf3 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -924,29 +924,17 @@ static void vop_plane_atomic_async_update(struct drm_plane *plane, struct drm_plane_state *new_state) { struct vop *vop = to_vop(plane->state->crtc); - struct drm_plane_state *plane_state; - - plane_state = plane->funcs->atomic_duplicate_state(plane); - plane_state->crtc_x = new_state->crtc_x; - plane_state->crtc_y = new_state->crtc_y; - plane_state->crtc_h = new_state->crtc_h; - plane_state->crtc_w = new_state->crtc_w; - plane_state->src_x = new_state->src_x; - plane_state->src_y = new_state->src_y; - plane_state->src_h = new_state->src_h; - plane_state->src_w = new_state->src_w; - - if (plane_state->fb != new_state->fb) - drm_atomic_set_fb_for_plane(plane_state, new_state->fb); - - swap(plane_state, plane->state); - - if (plane->state->fb && plane->state->fb != new_state->fb) { - drm_framebuffer_get(plane->state->fb); - WARN_ON(drm_crtc_vblank_get(plane->state->crtc) != 0); - drm_flip_work_queue(&vop->fb_unref_work, plane->state->fb); - set_bit(VOP_PENDING_FB_UNREF, &vop->pending); - } + struct drm_framebuffer *old_fb = plane->state->fb; + + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + plane->state->crtc_h = new_state->crtc_h; + plane->state->crtc_w = new_state->crtc_w; + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->src_h = new_state->src_h; + plane->state->src_w = new_state->src_w; + swap(plane->state->fb, new_state->fb); if (vop->is_enabled) { rockchip_drm_psr_inhibit_get_state(new_state->state); @@ -955,9 +943,22 @@ static void vop_plane_atomic_async_update(struct drm_plane *plane, vop_cfg_done(vop); spin_unlock(&vop->reg_lock); rockchip_drm_psr_inhibit_put_state(new_state->state); - } - plane->funcs->atomic_destroy_state(plane, plane_state); + /* + * A scanout can still be occurring, so we can't drop the + * reference to the old framebuffer. To solve this we get a + * reference to old_fb and set a worker to release it later. + * FIXME: if we perform 500 async_update calls before the + * vblank, then we can have 500 different framebuffers waiting + * to be released. + */ + if (old_fb && plane->state->fb != old_fb) { + drm_framebuffer_get(old_fb); + WARN_ON(drm_crtc_vblank_get(plane->state->crtc) != 0); + drm_flip_work_queue(&vop->fb_unref_work, old_fb); + set_bit(VOP_PENDING_FB_UNREF, &vop->pending); + } + } } static const struct drm_plane_helper_funcs plane_helper_funcs = { diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 7136fc91c603..e75f77ff8e0f 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -341,8 +341,8 @@ static void sun4i_tcon0_mode_set_cpu(struct sun4i_tcon *tcon, u32 block_space, start_delay; u32 tcon_div; - tcon->dclk_min_div = 4; - tcon->dclk_max_div = 127; + tcon->dclk_min_div = SUN6I_DSI_TCON_DIV; + tcon->dclk_max_div = SUN6I_DSI_TCON_DIV; sun4i_tcon0_mode_set_common(tcon, mode); diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c index 318994cd1b85..869e0aedf343 100644 --- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c @@ -358,7 +358,13 @@ static void sun6i_dsi_inst_init(struct sun6i_dsi *dsi, static u16 sun6i_dsi_get_video_start_delay(struct sun6i_dsi *dsi, struct drm_display_mode *mode) { - return mode->vtotal - (mode->vsync_end - mode->vdisplay) + 1; + u16 start = clamp(mode->vtotal - mode->vdisplay - 10, 8, 100); + u16 delay = mode->vtotal - (mode->vsync_end - mode->vdisplay) + start; + + if (delay > mode->vtotal) + delay = delay % mode->vtotal; + + return max_t(u16, delay, 1); } static void sun6i_dsi_setup_burst(struct sun6i_dsi *dsi, diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h index a07090579f84..5c3ad5be0690 100644 --- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h @@ -13,6 +13,8 @@ #include #include +#define SUN6I_DSI_TCON_DIV 4 + struct sun6i_dsi { struct drm_connector connector; struct drm_encoder encoder; diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c index 66ea3a902e36..43643ad31730 100644 --- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c +++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c @@ -293,7 +293,8 @@ static int sun8i_hdmi_phy_config_h3(struct dw_hdmi *hdmi, SUN8I_HDMI_PHY_ANA_CFG2_REG_BIGSW | SUN8I_HDMI_PHY_ANA_CFG2_REG_SLV(4); ana_cfg3_init |= SUN8I_HDMI_PHY_ANA_CFG3_REG_AMPCK(9) | - SUN8I_HDMI_PHY_ANA_CFG3_REG_AMP(13); + SUN8I_HDMI_PHY_ANA_CFG3_REG_AMP(13) | + SUN8I_HDMI_PHY_ANA_CFG3_REG_EMP(3); } regmap_update_bits(phy->regs, SUN8I_HDMI_PHY_ANA_CFG1_REG, @@ -672,22 +673,13 @@ int sun8i_hdmi_phy_probe(struct sun8i_dw_hdmi *hdmi, struct device_node *node) goto err_put_clk_pll0; } } - - ret = sun8i_phy_clk_create(phy, dev, - phy->variant->has_second_pll); - if (ret) { - dev_err(dev, "Couldn't create the PHY clock\n"); - goto err_put_clk_pll1; - } - - clk_prepare_enable(phy->clk_phy); } phy->rst_phy = of_reset_control_get_shared(node, "phy"); if (IS_ERR(phy->rst_phy)) { dev_err(dev, "Could not get phy reset control\n"); ret = PTR_ERR(phy->rst_phy); - goto err_disable_clk_phy; + goto err_put_clk_pll1; } ret = reset_control_deassert(phy->rst_phy); @@ -708,18 +700,29 @@ int sun8i_hdmi_phy_probe(struct sun8i_dw_hdmi *hdmi, struct device_node *node) goto err_disable_clk_bus; } + if (phy->variant->has_phy_clk) { + ret = sun8i_phy_clk_create(phy, dev, + phy->variant->has_second_pll); + if (ret) { + dev_err(dev, "Couldn't create the PHY clock\n"); + goto err_disable_clk_mod; + } + + clk_prepare_enable(phy->clk_phy); + } + hdmi->phy = phy; return 0; +err_disable_clk_mod: + clk_disable_unprepare(phy->clk_mod); err_disable_clk_bus: clk_disable_unprepare(phy->clk_bus); err_deassert_rst_phy: reset_control_assert(phy->rst_phy); err_put_rst_phy: reset_control_put(phy->rst_phy); -err_disable_clk_phy: - clk_disable_unprepare(phy->clk_phy); err_put_clk_pll1: clk_put(phy->clk_pll1); err_put_clk_pll0: diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 4f80100ff5f3..4cce11fd8836 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -204,7 +204,7 @@ static void tegra_bo_free(struct drm_device *drm, struct tegra_bo *bo) { if (bo->pages) { dma_unmap_sg(drm->dev, bo->sgt->sgl, bo->sgt->nents, - DMA_BIDIRECTIONAL); + DMA_FROM_DEVICE); drm_gem_put_pages(&bo->gem, bo->pages, true, true); sg_free_table(bo->sgt); kfree(bo->sgt); @@ -230,7 +230,7 @@ static int tegra_bo_get_pages(struct drm_device *drm, struct tegra_bo *bo) } err = dma_map_sg(drm->dev, bo->sgt->sgl, bo->sgt->nents, - DMA_BIDIRECTIONAL); + DMA_FROM_DEVICE); if (err == 0) { err = -EFAULT; goto free_sgt; diff --git a/drivers/gpu/drm/tinydrm/ili9225.c b/drivers/gpu/drm/tinydrm/ili9225.c index 43a3b68d90a2..998d75be9e16 100644 --- a/drivers/gpu/drm/tinydrm/ili9225.c +++ b/drivers/gpu/drm/tinydrm/ili9225.c @@ -301,7 +301,7 @@ static void ili9225_pipe_disable(struct drm_simple_display_pipe *pipe) mipi->enabled = false; } -static int ili9225_dbi_command(struct mipi_dbi *mipi, u8 cmd, u8 *par, +static int ili9225_dbi_command(struct mipi_dbi *mipi, u8 *cmd, u8 *par, size_t num) { struct spi_device *spi = mipi->spi; @@ -311,11 +311,11 @@ static int ili9225_dbi_command(struct mipi_dbi *mipi, u8 cmd, u8 *par, gpiod_set_value_cansleep(mipi->dc, 0); speed_hz = mipi_dbi_spi_cmd_max_speed(spi, 1); - ret = tinydrm_spi_transfer(spi, speed_hz, NULL, 8, &cmd, 1); + ret = tinydrm_spi_transfer(spi, speed_hz, NULL, 8, cmd, 1); if (ret || !num) return ret; - if (cmd == ILI9225_WRITE_DATA_TO_GRAM && !mipi->swap_bytes) + if (*cmd == ILI9225_WRITE_DATA_TO_GRAM && !mipi->swap_bytes) bpw = 16; gpiod_set_value_cansleep(mipi->dc, 1); diff --git a/drivers/gpu/drm/tinydrm/mipi-dbi.c b/drivers/gpu/drm/tinydrm/mipi-dbi.c index 918f77c7de34..295cbcbc2bb6 100644 --- a/drivers/gpu/drm/tinydrm/mipi-dbi.c +++ b/drivers/gpu/drm/tinydrm/mipi-dbi.c @@ -153,16 +153,42 @@ EXPORT_SYMBOL(mipi_dbi_command_read); */ int mipi_dbi_command_buf(struct mipi_dbi *mipi, u8 cmd, u8 *data, size_t len) { + u8 *cmdbuf; int ret; + /* SPI requires dma-safe buffers */ + cmdbuf = kmemdup(&cmd, 1, GFP_KERNEL); + if (!cmdbuf) + return -ENOMEM; + mutex_lock(&mipi->cmdlock); - ret = mipi->command(mipi, cmd, data, len); + ret = mipi->command(mipi, cmdbuf, data, len); mutex_unlock(&mipi->cmdlock); + kfree(cmdbuf); + return ret; } EXPORT_SYMBOL(mipi_dbi_command_buf); +/* This should only be used by mipi_dbi_command() */ +int mipi_dbi_command_stackbuf(struct mipi_dbi *mipi, u8 cmd, u8 *data, size_t len) +{ + u8 *buf; + int ret; + + buf = kmemdup(data, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = mipi_dbi_command_buf(mipi, cmd, buf, len); + + kfree(buf); + + return ret; +} +EXPORT_SYMBOL(mipi_dbi_command_stackbuf); + /** * mipi_dbi_buf_copy - Copy a framebuffer, transforming it if necessary * @dst: The destination buffer @@ -774,18 +800,18 @@ static int mipi_dbi_spi1_transfer(struct mipi_dbi *mipi, int dc, return 0; } -static int mipi_dbi_typec1_command(struct mipi_dbi *mipi, u8 cmd, +static int mipi_dbi_typec1_command(struct mipi_dbi *mipi, u8 *cmd, u8 *parameters, size_t num) { - unsigned int bpw = (cmd == MIPI_DCS_WRITE_MEMORY_START) ? 16 : 8; + unsigned int bpw = (*cmd == MIPI_DCS_WRITE_MEMORY_START) ? 16 : 8; int ret; - if (mipi_dbi_command_is_read(mipi, cmd)) + if (mipi_dbi_command_is_read(mipi, *cmd)) return -ENOTSUPP; - MIPI_DBI_DEBUG_COMMAND(cmd, parameters, num); + MIPI_DBI_DEBUG_COMMAND(*cmd, parameters, num); - ret = mipi_dbi_spi1_transfer(mipi, 0, &cmd, 1, 8); + ret = mipi_dbi_spi1_transfer(mipi, 0, cmd, 1, 8); if (ret || !num) return ret; @@ -794,7 +820,7 @@ static int mipi_dbi_typec1_command(struct mipi_dbi *mipi, u8 cmd, /* MIPI DBI Type C Option 3 */ -static int mipi_dbi_typec3_command_read(struct mipi_dbi *mipi, u8 cmd, +static int mipi_dbi_typec3_command_read(struct mipi_dbi *mipi, u8 *cmd, u8 *data, size_t len) { struct spi_device *spi = mipi->spi; @@ -803,7 +829,7 @@ static int mipi_dbi_typec3_command_read(struct mipi_dbi *mipi, u8 cmd, struct spi_transfer tr[2] = { { .speed_hz = speed_hz, - .tx_buf = &cmd, + .tx_buf = cmd, .len = 1, }, { .speed_hz = speed_hz, @@ -821,8 +847,8 @@ static int mipi_dbi_typec3_command_read(struct mipi_dbi *mipi, u8 cmd, * Support non-standard 24-bit and 32-bit Nokia read commands which * start with a dummy clock, so we need to read an extra byte. */ - if (cmd == MIPI_DCS_GET_DISPLAY_ID || - cmd == MIPI_DCS_GET_DISPLAY_STATUS) { + if (*cmd == MIPI_DCS_GET_DISPLAY_ID || + *cmd == MIPI_DCS_GET_DISPLAY_STATUS) { if (!(len == 3 || len == 4)) return -EINVAL; @@ -852,7 +878,7 @@ static int mipi_dbi_typec3_command_read(struct mipi_dbi *mipi, u8 cmd, data[i] = (buf[i] << 1) | !!(buf[i + 1] & BIT(7)); } - MIPI_DBI_DEBUG_COMMAND(cmd, data, len); + MIPI_DBI_DEBUG_COMMAND(*cmd, data, len); err_free: kfree(buf); @@ -860,7 +886,7 @@ static int mipi_dbi_typec3_command_read(struct mipi_dbi *mipi, u8 cmd, return ret; } -static int mipi_dbi_typec3_command(struct mipi_dbi *mipi, u8 cmd, +static int mipi_dbi_typec3_command(struct mipi_dbi *mipi, u8 *cmd, u8 *par, size_t num) { struct spi_device *spi = mipi->spi; @@ -868,18 +894,18 @@ static int mipi_dbi_typec3_command(struct mipi_dbi *mipi, u8 cmd, u32 speed_hz; int ret; - if (mipi_dbi_command_is_read(mipi, cmd)) + if (mipi_dbi_command_is_read(mipi, *cmd)) return mipi_dbi_typec3_command_read(mipi, cmd, par, num); - MIPI_DBI_DEBUG_COMMAND(cmd, par, num); + MIPI_DBI_DEBUG_COMMAND(*cmd, par, num); gpiod_set_value_cansleep(mipi->dc, 0); speed_hz = mipi_dbi_spi_cmd_max_speed(spi, 1); - ret = tinydrm_spi_transfer(spi, speed_hz, NULL, 8, &cmd, 1); + ret = tinydrm_spi_transfer(spi, speed_hz, NULL, 8, cmd, 1); if (ret || !num) return ret; - if (cmd == MIPI_DCS_WRITE_MEMORY_START && !mipi->swap_bytes) + if (*cmd == MIPI_DCS_WRITE_MEMORY_START && !mipi->swap_bytes) bpw = 16; gpiod_set_value_cansleep(mipi->dc, 1); diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index f0afcec72c34..30ae1c74edaa 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -312,14 +312,18 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) if (ret) goto dev_destroy; - v3d_irq_init(v3d); + ret = v3d_irq_init(v3d); + if (ret) + goto gem_destroy; ret = drm_dev_register(drm, 0); if (ret) - goto gem_destroy; + goto irq_disable; return 0; +irq_disable: + v3d_irq_disable(v3d); gem_destroy: v3d_gem_destroy(drm); dev_destroy: diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index fdda3037f7af..2fdb456b72d3 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -310,7 +310,7 @@ void v3d_reset(struct v3d_dev *v3d); void v3d_invalidate_caches(struct v3d_dev *v3d); /* v3d_irq.c */ -void v3d_irq_init(struct v3d_dev *v3d); +int v3d_irq_init(struct v3d_dev *v3d); void v3d_irq_enable(struct v3d_dev *v3d); void v3d_irq_disable(struct v3d_dev *v3d); void v3d_irq_reset(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 69338da70ddc..29d746cfce57 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -156,7 +156,7 @@ v3d_hub_irq(int irq, void *arg) return status; } -void +int v3d_irq_init(struct v3d_dev *v3d) { int ret, core; @@ -173,13 +173,22 @@ v3d_irq_init(struct v3d_dev *v3d) ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0), v3d_hub_irq, IRQF_SHARED, "v3d_hub", v3d); + if (ret) + goto fail; + ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 1), v3d_irq, IRQF_SHARED, "v3d_core0", v3d); if (ret) - dev_err(v3d->dev, "IRQ setup failed: %d\n", ret); + goto fail; v3d_irq_enable(v3d); + return 0; + +fail: + if (ret != -EPROBE_DEFER) + dev_err(v3d->dev, "IRQ setup failed: %d\n", ret); + return ret; } void diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index d098337c10e9..7dad38e554a5 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -968,7 +968,7 @@ static void vc4_plane_atomic_async_update(struct drm_plane *plane, { struct vc4_plane_state *vc4_state, *new_vc4_state; - drm_atomic_set_fb_for_plane(plane->state, state->fb); + swap(plane->state->fb, state->fb); plane->state->crtc_x = state->crtc_x; plane->state->crtc_y = state->crtc_y; plane->state->crtc_w = state->crtc_w; diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 36990b80e790..16077785ad47 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -174,7 +174,7 @@ int ttm_base_object_init(struct ttm_object_file *tfile, kref_init(&base->refcount); idr_preload(GFP_KERNEL); spin_lock(&tdev->object_lock); - ret = idr_alloc(&tdev->idr, base, 0, 0, GFP_NOWAIT); + ret = idr_alloc(&tdev->idr, base, 1, 0, GFP_NOWAIT); spin_unlock(&tdev->object_lock); idr_preload_end(); if (ret < 0) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 1bfa353d995c..9d2e1ce5c0a6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1240,7 +1240,13 @@ static int vmw_master_set(struct drm_device *dev, } dev_priv->active_master = vmaster; - drm_sysfs_hotplug_event(dev); + + /* + * Inform a new master that the layout may have changed while + * it was gone. + */ + if (!from_open) + drm_sysfs_hotplug_event(dev); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 88b8178d4687..6dc7d90a12b4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2129,6 +2129,11 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, return 0; if (cmd->body.shid != SVGA3D_INVALID_ID) { + /* + * This is the compat shader path - Per device guest-backed + * shaders, but user-space thinks it's per context host- + * backed shaders. + */ res = vmw_shader_lookup(vmw_context_res_man(ctx), cmd->body.shid, cmd->body.type); @@ -2137,6 +2142,14 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, ret = vmw_execbuf_res_noctx_val_add(sw_context, res); if (unlikely(ret != 0)) return ret; + + ret = vmw_resource_relocation_add + (sw_context, res, + vmw_ptr_diff(sw_context->buf_start, + &cmd->body.shid), + vmw_res_rel_normal); + if (unlikely(ret != 0)) + return ret; } } @@ -2338,7 +2351,8 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv, cmd = container_of(header, typeof(*cmd), header); - if (cmd->body.type >= SVGA3D_SHADERTYPE_DX10_MAX) { + if (cmd->body.type >= SVGA3D_SHADERTYPE_DX10_MAX || + cmd->body.type < SVGA3D_SHADERTYPE_MIN) { DRM_ERROR("Illegal shader type %u.\n", (unsigned) cmd->body.type); return -EINVAL; @@ -2574,6 +2588,10 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, if (view_type == vmw_view_max) return -EINVAL; cmd = container_of(header, typeof(*cmd), header); + if (unlikely(cmd->sid == SVGA3D_INVALID_ID)) { + DRM_ERROR("Invalid surface id.\n"); + return -EINVAL; + } ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, user_surface_converter, &cmd->sid, &srf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 8b9270f31409..e4e09d47c5c0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -136,6 +136,114 @@ static int vmw_close_channel(struct rpc_channel *channel) return 0; } +/** + * vmw_port_hb_out - Send the message payload either through the + * high-bandwidth port if available, or through the backdoor otherwise. + * @channel: The rpc channel. + * @msg: NULL-terminated message. + * @hb: Whether the high-bandwidth port is available. + * + * Return: The port status. + */ +static unsigned long vmw_port_hb_out(struct rpc_channel *channel, + const char *msg, bool hb) +{ + unsigned long si, di, eax, ebx, ecx, edx; + unsigned long msg_len = strlen(msg); + + if (hb) { + unsigned long bp = channel->cookie_high; + + si = (uintptr_t) msg; + di = channel->cookie_low; + + VMW_PORT_HB_OUT( + (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG, + msg_len, si, di, + VMW_HYPERVISOR_HB_PORT | (channel->channel_id << 16), + VMW_HYPERVISOR_MAGIC, bp, + eax, ebx, ecx, edx, si, di); + + return ebx; + } + + /* HB port not available. Send the message 4 bytes at a time. */ + ecx = MESSAGE_STATUS_SUCCESS << 16; + while (msg_len && (HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS)) { + unsigned int bytes = min_t(size_t, msg_len, 4); + unsigned long word = 0; + + memcpy(&word, msg, bytes); + msg_len -= bytes; + msg += bytes; + si = channel->cookie_high; + di = channel->cookie_low; + + VMW_PORT(VMW_PORT_CMD_MSG | (MSG_TYPE_SENDPAYLOAD << 16), + word, si, di, + VMW_HYPERVISOR_PORT | (channel->channel_id << 16), + VMW_HYPERVISOR_MAGIC, + eax, ebx, ecx, edx, si, di); + } + + return ecx; +} + +/** + * vmw_port_hb_in - Receive the message payload either through the + * high-bandwidth port if available, or through the backdoor otherwise. + * @channel: The rpc channel. + * @reply: Pointer to buffer holding reply. + * @reply_len: Length of the reply. + * @hb: Whether the high-bandwidth port is available. + * + * Return: The port status. + */ +static unsigned long vmw_port_hb_in(struct rpc_channel *channel, char *reply, + unsigned long reply_len, bool hb) +{ + unsigned long si, di, eax, ebx, ecx, edx; + + if (hb) { + unsigned long bp = channel->cookie_low; + + si = channel->cookie_high; + di = (uintptr_t) reply; + + VMW_PORT_HB_IN( + (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG, + reply_len, si, di, + VMW_HYPERVISOR_HB_PORT | (channel->channel_id << 16), + VMW_HYPERVISOR_MAGIC, bp, + eax, ebx, ecx, edx, si, di); + + return ebx; + } + + /* HB port not available. Retrieve the message 4 bytes at a time. */ + ecx = MESSAGE_STATUS_SUCCESS << 16; + while (reply_len) { + unsigned int bytes = min_t(unsigned long, reply_len, 4); + + si = channel->cookie_high; + di = channel->cookie_low; + + VMW_PORT(VMW_PORT_CMD_MSG | (MSG_TYPE_RECVPAYLOAD << 16), + MESSAGE_STATUS_SUCCESS, si, di, + VMW_HYPERVISOR_PORT | (channel->channel_id << 16), + VMW_HYPERVISOR_MAGIC, + eax, ebx, ecx, edx, si, di); + + if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0) + break; + + memcpy(reply, &ebx, bytes); + reply_len -= bytes; + reply += bytes; + } + + return ecx; +} /** @@ -148,11 +256,10 @@ static int vmw_close_channel(struct rpc_channel *channel) */ static int vmw_send_msg(struct rpc_channel *channel, const char *msg) { - unsigned long eax, ebx, ecx, edx, si, di, bp; + unsigned long eax, ebx, ecx, edx, si, di; size_t msg_len = strlen(msg); int retries = 0; - while (retries < RETRIES) { retries++; @@ -166,23 +273,14 @@ static int vmw_send_msg(struct rpc_channel *channel, const char *msg) VMW_HYPERVISOR_MAGIC, eax, ebx, ecx, edx, si, di); - if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0 || - (HIGH_WORD(ecx) & MESSAGE_STATUS_HB) == 0) { - /* Expected success + high-bandwidth. Give up. */ + if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0) { + /* Expected success. Give up. */ return -EINVAL; } /* Send msg */ - si = (uintptr_t) msg; - di = channel->cookie_low; - bp = channel->cookie_high; - - VMW_PORT_HB_OUT( - (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG, - msg_len, si, di, - VMW_HYPERVISOR_HB_PORT | (channel->channel_id << 16), - VMW_HYPERVISOR_MAGIC, bp, - eax, ebx, ecx, edx, si, di); + ebx = vmw_port_hb_out(channel, msg, + !!(HIGH_WORD(ecx) & MESSAGE_STATUS_HB)); if ((HIGH_WORD(ebx) & MESSAGE_STATUS_SUCCESS) != 0) { return 0; @@ -211,7 +309,7 @@ STACK_FRAME_NON_STANDARD(vmw_send_msg); static int vmw_recv_msg(struct rpc_channel *channel, void **msg, size_t *msg_len) { - unsigned long eax, ebx, ecx, edx, si, di, bp; + unsigned long eax, ebx, ecx, edx, si, di; char *reply; size_t reply_len; int retries = 0; @@ -233,8 +331,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, VMW_HYPERVISOR_MAGIC, eax, ebx, ecx, edx, si, di); - if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0 || - (HIGH_WORD(ecx) & MESSAGE_STATUS_HB) == 0) { + if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0) { DRM_ERROR("Failed to get reply size for host message.\n"); return -EINVAL; } @@ -252,17 +349,8 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, /* Receive buffer */ - si = channel->cookie_high; - di = (uintptr_t) reply; - bp = channel->cookie_low; - - VMW_PORT_HB_IN( - (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG, - reply_len, si, di, - VMW_HYPERVISOR_HB_PORT | (channel->channel_id << 16), - VMW_HYPERVISOR_MAGIC, bp, - eax, ebx, ecx, edx, si, di); - + ebx = vmw_port_hb_in(channel, reply, reply_len, + !!(HIGH_WORD(ecx) & MESSAGE_STATUS_HB)); if ((HIGH_WORD(ebx) & MESSAGE_STATUS_SUCCESS) == 0) { kfree(reply); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 860e21ec6a49..39eba8106d40 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -218,13 +218,14 @@ static unsigned hid_lookup_collection(struct hid_parser *parser, unsigned type) * Add a usage to the temporary parser table. */ -static int hid_add_usage(struct hid_parser *parser, unsigned usage) +static int hid_add_usage(struct hid_parser *parser, unsigned usage, u8 size) { if (parser->local.usage_index >= HID_MAX_USAGES) { hid_err(parser->device, "usage index exceeded\n"); return -1; } parser->local.usage[parser->local.usage_index] = usage; + parser->local.usage_size[parser->local.usage_index] = size; parser->local.collection_index[parser->local.usage_index] = parser->collection_stack_ptr ? parser->collection_stack[parser->collection_stack_ptr - 1] : 0; @@ -486,10 +487,7 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) return 0; } - if (item->size <= 2) - data = (parser->global.usage_page << 16) + data; - - return hid_add_usage(parser, data); + return hid_add_usage(parser, data, item->size); case HID_LOCAL_ITEM_TAG_USAGE_MINIMUM: @@ -498,9 +496,6 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) return 0; } - if (item->size <= 2) - data = (parser->global.usage_page << 16) + data; - parser->local.usage_minimum = data; return 0; @@ -511,9 +506,6 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) return 0; } - if (item->size <= 2) - data = (parser->global.usage_page << 16) + data; - count = data - parser->local.usage_minimum; if (count + parser->local.usage_index >= HID_MAX_USAGES) { /* @@ -533,7 +525,7 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) } for (n = parser->local.usage_minimum; n <= data; n++) - if (hid_add_usage(parser, n)) { + if (hid_add_usage(parser, n, item->size)) { dbg_hid("hid_add_usage failed\n"); return -1; } @@ -547,6 +539,22 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) return 0; } +/* + * Concatenate Usage Pages into Usages where relevant: + * As per specification, 6.2.2.8: "When the parser encounters a main item it + * concatenates the last declared Usage Page with a Usage to form a complete + * usage value." + */ + +static void hid_concatenate_usage_page(struct hid_parser *parser) +{ + int i; + + for (i = 0; i < parser->local.usage_index; i++) + if (parser->local.usage_size[i] <= 2) + parser->local.usage[i] += parser->global.usage_page << 16; +} + /* * Process a main item. */ @@ -556,6 +564,8 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item) __u32 data; int ret; + hid_concatenate_usage_page(parser); + data = item_udata(item); switch (item->tag) { @@ -765,6 +775,8 @@ static int hid_scan_main(struct hid_parser *parser, struct hid_item *item) __u32 data; int i; + hid_concatenate_usage_page(parser); + data = item_udata(item); switch (item->tag) { @@ -1301,10 +1313,10 @@ static u32 __extract(u8 *report, unsigned offset, int n) u32 hid_field_extract(const struct hid_device *hid, u8 *report, unsigned offset, unsigned n) { - if (n > 256) { - hid_warn(hid, "hid_field_extract() called with n (%d) > 256! (%s)\n", + if (n > 32) { + hid_warn(hid, "hid_field_extract() called with n (%d) > 32! (%s)\n", n, current->comm); - n = 256; + n = 32; } return __extract(report, offset, n); @@ -1624,7 +1636,7 @@ static struct hid_report *hid_get_report(struct hid_report_enum *report_enum, * Implement a generic .request() callback, using .raw_request() * DO NOT USE in hid drivers directly, but through hid_hw_request instead. */ -void __hid_request(struct hid_device *hid, struct hid_report *report, +int __hid_request(struct hid_device *hid, struct hid_report *report, int reqtype) { char *buf; @@ -1633,7 +1645,7 @@ void __hid_request(struct hid_device *hid, struct hid_report *report, buf = hid_alloc_report_buf(report, GFP_KERNEL); if (!buf) - return; + return -ENOMEM; len = hid_report_len(report); @@ -1650,8 +1662,11 @@ void __hid_request(struct hid_device *hid, struct hid_report *report, if (reqtype == HID_REQ_GET_REPORT) hid_input_report(hid, report->type, buf, ret, 0); + ret = 0; + out: kfree(buf); + return ret; } EXPORT_SYMBOL_GPL(__hid_request); diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index b607286a0bc8..46c6efea1404 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1557,52 +1557,71 @@ static void hidinput_close(struct input_dev *dev) hid_hw_close(hid); } -static void hidinput_change_resolution_multipliers(struct hid_device *hid) +static bool __hidinput_change_resolution_multipliers(struct hid_device *hid, + struct hid_report *report, bool use_logical_max) { - struct hid_report_enum *rep_enum; - struct hid_report *rep; struct hid_usage *usage; + bool update_needed = false; int i, j; - rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; - list_for_each_entry(rep, &rep_enum->report_list, list) { - bool update_needed = false; + if (report->maxfield == 0) + return false; - if (rep->maxfield == 0) - continue; + /* + * If we have more than one feature within this report we + * need to fill in the bits from the others before we can + * overwrite the ones for the Resolution Multiplier. + */ + if (report->maxfield > 1) { + hid_hw_request(hid, report, HID_REQ_GET_REPORT); + hid_hw_wait(hid); + } - /* - * If we have more than one feature within this report we - * need to fill in the bits from the others before we can - * overwrite the ones for the Resolution Multiplier. + for (i = 0; i < report->maxfield; i++) { + __s32 value = use_logical_max ? + report->field[i]->logical_maximum : + report->field[i]->logical_minimum; + + /* There is no good reason for a Resolution + * Multiplier to have a count other than 1. + * Ignore that case. */ - if (rep->maxfield > 1) { - hid_hw_request(hid, rep, HID_REQ_GET_REPORT); - hid_hw_wait(hid); - } + if (report->field[i]->report_count != 1) + continue; - for (i = 0; i < rep->maxfield; i++) { - __s32 logical_max = rep->field[i]->logical_maximum; + for (j = 0; j < report->field[i]->maxusage; j++) { + usage = &report->field[i]->usage[j]; - /* There is no good reason for a Resolution - * Multiplier to have a count other than 1. - * Ignore that case. - */ - if (rep->field[i]->report_count != 1) + if (usage->hid != HID_GD_RESOLUTION_MULTIPLIER) continue; - for (j = 0; j < rep->field[i]->maxusage; j++) { - usage = &rep->field[i]->usage[j]; + report->field[i]->value[j] = value; + update_needed = true; + } + } + + return update_needed; +} - if (usage->hid != HID_GD_RESOLUTION_MULTIPLIER) - continue; +static void hidinput_change_resolution_multipliers(struct hid_device *hid) +{ + struct hid_report_enum *rep_enum; + struct hid_report *rep; + int ret; - *rep->field[i]->value = logical_max; - update_needed = true; + rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; + list_for_each_entry(rep, &rep_enum->report_list, list) { + bool update_needed = __hidinput_change_resolution_multipliers(hid, + rep, true); + + if (update_needed) { + ret = __hid_request(hid, rep, HID_REQ_SET_REPORT); + if (ret) { + __hidinput_change_resolution_multipliers(hid, + rep, false); + return; } } - if (update_needed) - hid_hw_request(hid, rep, HID_REQ_SET_REPORT); } /* refresh our structs */ diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 199cc256e9d9..e74fa990ba13 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -836,13 +836,16 @@ static int hidpp_root_get_feature(struct hidpp_device *hidpp, u16 feature, static int hidpp_root_get_protocol_version(struct hidpp_device *hidpp) { + const u8 ping_byte = 0x5a; + u8 ping_data[3] = { 0, 0, ping_byte }; struct hidpp_report response; int ret; - ret = hidpp_send_fap_command_sync(hidpp, + ret = hidpp_send_rap_command_sync(hidpp, + REPORT_ID_HIDPP_SHORT, HIDPP_PAGE_ROOT_IDX, CMD_ROOT_GET_PROTOCOL_VERSION, - NULL, 0, &response); + ping_data, sizeof(ping_data), &response); if (ret == HIDPP_ERROR_INVALID_SUBID) { hidpp->protocol_major = 1; @@ -862,8 +865,14 @@ static int hidpp_root_get_protocol_version(struct hidpp_device *hidpp) if (ret) return ret; - hidpp->protocol_major = response.fap.params[0]; - hidpp->protocol_minor = response.fap.params[1]; + if (response.rap.params[2] != ping_byte) { + hid_err(hidpp->hid_dev, "%s: ping mismatch 0x%02x != 0x%02x\n", + __func__, response.rap.params[2], ping_byte); + return -EPROTO; + } + + hidpp->protocol_major = response.rap.params[0]; + hidpp->protocol_minor = response.rap.params[1]; return ret; } @@ -1012,7 +1021,11 @@ static int hidpp_map_battery_level(int capacity) { if (capacity < 11) return POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL; - else if (capacity < 31) + /* + * The spec says this should be < 31 but some devices report 30 + * with brand new batteries and Windows reports 30 as "Good". + */ + else if (capacity < 30) return POWER_SUPPLY_CAPACITY_LEVEL_LOW; else if (capacity < 81) return POWER_SUPPLY_CAPACITY_LEVEL_NORMAL; diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index c02d4cad1893..1565a307170a 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -641,6 +641,13 @@ static void mt_store_field(struct hid_device *hdev, if (*target != DEFAULT_TRUE && *target != DEFAULT_FALSE && *target != DEFAULT_ZERO) { + if (usage->contactid == DEFAULT_ZERO || + usage->x == DEFAULT_ZERO || + usage->y == DEFAULT_ZERO) { + hid_dbg(hdev, + "ignoring duplicate usage on incomplete"); + return; + } usage = mt_allocate_usage(hdev, application); if (!usage) return; diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 747730d32ab6..09b8e4aac82f 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1236,13 +1236,13 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) /* Add back in missing bits of ID for non-USI pens */ wacom->id[0] |= (wacom->serial[0] >> 32) & 0xFFFFF; } - wacom->tool[0] = wacom_intuos_get_tool_type(wacom_intuos_id_mangle(wacom->id[0])); for (i = 0; i < pen_frames; i++) { unsigned char *frame = &data[i*pen_frame_len + 1]; bool valid = frame[0] & 0x80; bool prox = frame[0] & 0x40; bool range = frame[0] & 0x20; + bool invert = frame[0] & 0x10; if (!valid) continue; @@ -1251,9 +1251,24 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) wacom->shared->stylus_in_proximity = false; wacom_exit_report(wacom); input_sync(pen_input); + + wacom->tool[0] = 0; + wacom->id[0] = 0; + wacom->serial[0] = 0; return; } + if (range) { + if (!wacom->tool[0]) { /* first in range */ + /* Going into range select tool */ + if (invert) + wacom->tool[0] = BTN_TOOL_RUBBER; + else if (wacom->id[0]) + wacom->tool[0] = wacom_intuos_get_tool_type(wacom->id[0]); + else + wacom->tool[0] = BTN_TOOL_PEN; + } + input_report_abs(pen_input, ABS_X, get_unaligned_le16(&frame[1])); input_report_abs(pen_input, ABS_Y, get_unaligned_le16(&frame[3])); @@ -1275,23 +1290,26 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) get_unaligned_le16(&frame[11])); } } - input_report_abs(pen_input, ABS_PRESSURE, get_unaligned_le16(&frame[5])); - if (wacom->features.type == INTUOSP2_BT) { - input_report_abs(pen_input, ABS_DISTANCE, - range ? frame[13] : wacom->features.distance_max); - } else { - input_report_abs(pen_input, ABS_DISTANCE, - range ? frame[7] : wacom->features.distance_max); - } - input_report_key(pen_input, BTN_TOUCH, frame[0] & 0x01); - input_report_key(pen_input, BTN_STYLUS, frame[0] & 0x02); - input_report_key(pen_input, BTN_STYLUS2, frame[0] & 0x04); + if (wacom->tool[0]) { + input_report_abs(pen_input, ABS_PRESSURE, get_unaligned_le16(&frame[5])); + if (wacom->features.type == INTUOSP2_BT) { + input_report_abs(pen_input, ABS_DISTANCE, + range ? frame[13] : wacom->features.distance_max); + } else { + input_report_abs(pen_input, ABS_DISTANCE, + range ? frame[7] : wacom->features.distance_max); + } - input_report_key(pen_input, wacom->tool[0], prox); - input_event(pen_input, EV_MSC, MSC_SERIAL, wacom->serial[0]); - input_report_abs(pen_input, ABS_MISC, - wacom_intuos_id_mangle(wacom->id[0])); /* report tool id */ + input_report_key(pen_input, BTN_TOUCH, frame[0] & 0x09); + input_report_key(pen_input, BTN_STYLUS, frame[0] & 0x02); + input_report_key(pen_input, BTN_STYLUS2, frame[0] & 0x04); + + input_report_key(pen_input, wacom->tool[0], prox); + input_event(pen_input, EV_MSC, MSC_SERIAL, wacom->serial[0]); + input_report_abs(pen_input, ABS_MISC, + wacom_intuos_id_mangle(wacom->id[0])); /* report tool id */ + } wacom->shared->stylus_in_proximity = prox; @@ -1353,11 +1371,17 @@ static void wacom_intuos_pro2_bt_touch(struct wacom_wac *wacom) if (wacom->num_contacts_left <= 0) { wacom->num_contacts_left = 0; wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom); + input_sync(touch_input); } } - input_report_switch(touch_input, SW_MUTE_DEVICE, !(data[281] >> 7)); - input_sync(touch_input); + if (wacom->num_contacts_left == 0) { + // Be careful that we don't accidentally call input_sync with + // only a partial set of fingers of processed + input_report_switch(touch_input, SW_MUTE_DEVICE, !(data[281] >> 7)); + input_sync(touch_input); + } + } static void wacom_intuos_pro2_bt_pad(struct wacom_wac *wacom) @@ -1365,7 +1389,7 @@ static void wacom_intuos_pro2_bt_pad(struct wacom_wac *wacom) struct input_dev *pad_input = wacom->pad_input; unsigned char *data = wacom->data; - int buttons = (data[282] << 1) | ((data[281] >> 6) & 0x01); + int buttons = data[282] | ((data[281] & 0x40) << 2); int ring = data[285] & 0x7F; bool ringstatus = data[285] & 0x80; bool prox = buttons || ringstatus; @@ -3814,7 +3838,7 @@ static void wacom_24hd_update_leds(struct wacom *wacom, int mask, int group) static bool wacom_is_led_toggled(struct wacom *wacom, int button_count, int mask, int group) { - int button_per_group; + int group_button; /* * 21UX2 has LED group 1 to the left and LED group 0 @@ -3824,9 +3848,12 @@ static bool wacom_is_led_toggled(struct wacom *wacom, int button_count, if (wacom->wacom_wac.features.type == WACOM_21UX2) group = 1 - group; - button_per_group = button_count/wacom->led.count; + group_button = group * (button_count/wacom->led.count); + + if (wacom->wacom_wac.features.type == INTUOSP2_BT) + group_button = 8; - return mask & (1 << (group * button_per_group)); + return mask & (1 << group_button); } static void wacom_update_led(struct wacom *wacom, int button_count, int mask, diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 632d25674e7f..45653029ee18 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -408,7 +408,6 @@ int hv_synic_cleanup(unsigned int cpu) clockevents_unbind_device(hv_cpu->clk_evt, cpu); hv_ce_shutdown(hv_cpu->clk_evt); - put_cpu_ptr(hv_cpu); } hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); diff --git a/drivers/hwmon/f71805f.c b/drivers/hwmon/f71805f.c index 73c681162653..623736d2a7c1 100644 --- a/drivers/hwmon/f71805f.c +++ b/drivers/hwmon/f71805f.c @@ -96,17 +96,23 @@ superio_select(int base, int ld) outb(ld, base + 1); } -static inline void +static inline int superio_enter(int base) { + if (!request_muxed_region(base, 2, DRVNAME)) + return -EBUSY; + outb(0x87, base); outb(0x87, base); + + return 0; } static inline void superio_exit(int base) { outb(0xaa, base); + release_region(base, 2); } /* @@ -1561,7 +1567,7 @@ static int __init f71805f_device_add(unsigned short address, static int __init f71805f_find(int sioaddr, unsigned short *address, struct f71805f_sio_data *sio_data) { - int err = -ENODEV; + int err; u16 devid; static const char * const names[] = { @@ -1569,8 +1575,11 @@ static int __init f71805f_find(int sioaddr, unsigned short *address, "F71872F/FG or F71806F/FG", }; - superio_enter(sioaddr); + err = superio_enter(sioaddr); + if (err) + return err; + err = -ENODEV; devid = superio_inw(sioaddr, SIO_REG_MANID); if (devid != SIO_FINTEK_ID) goto exit; diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index c22dc1e07911..c38883f748a1 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -633,7 +633,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, if (err) goto free_hwmon; - if (dev && chip && chip->ops->read && + if (dev && dev->of_node && chip && chip->ops->read && chip->info[0]->type == hwmon_chip && (chip->info[0]->config[0] & HWMON_C_REGISTER_TZ)) { const struct hwmon_channel_info **info = chip->info; diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c index fe3d15e416e7..a71ca94c789f 100644 --- a/drivers/hwmon/occ/sysfs.c +++ b/drivers/hwmon/occ/sysfs.c @@ -42,16 +42,16 @@ static ssize_t occ_sysfs_show(struct device *dev, val = !!(header->status & OCC_STAT_ACTIVE); break; case 2: - val = !!(header->status & OCC_EXT_STAT_DVFS_OT); + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_OT); break; case 3: - val = !!(header->status & OCC_EXT_STAT_DVFS_POWER); + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_POWER); break; case 4: - val = !!(header->status & OCC_EXT_STAT_MEM_THROTTLE); + val = !!(header->ext_status & OCC_EXT_STAT_MEM_THROTTLE); break; case 5: - val = !!(header->status & OCC_EXT_STAT_QUICK_DROP); + val = !!(header->ext_status & OCC_EXT_STAT_QUICK_DROP); break; case 6: val = header->occ_state; diff --git a/drivers/hwmon/pc87427.c b/drivers/hwmon/pc87427.c index d1a3f2040c00..58eee8fa3e6d 100644 --- a/drivers/hwmon/pc87427.c +++ b/drivers/hwmon/pc87427.c @@ -106,6 +106,13 @@ static const char *logdev_str[2] = { DRVNAME " FMC", DRVNAME " HMC" }; #define LD_IN 1 #define LD_TEMP 1 +static inline int superio_enter(int sioaddr) +{ + if (!request_muxed_region(sioaddr, 2, DRVNAME)) + return -EBUSY; + return 0; +} + static inline void superio_outb(int sioaddr, int reg, int val) { outb(reg, sioaddr); @@ -122,6 +129,7 @@ static inline void superio_exit(int sioaddr) { outb(0x02, sioaddr); outb(0x02, sioaddr + 1); + release_region(sioaddr, 2); } /* @@ -1195,7 +1203,11 @@ static int __init pc87427_find(int sioaddr, struct pc87427_sio_data *sio_data) { u16 val; u8 cfg, cfg_b; - int i, err = 0; + int i, err; + + err = superio_enter(sioaddr); + if (err) + return err; /* Identify device */ val = force_id ? force_id : superio_inb(sioaddr, SIOREG_DEVID); diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 2e2b5851139c..cd24b375df1e 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1230,7 +1230,8 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client, const struct pmbus_driver_info *info, const char *name, int index, int page, - const struct pmbus_sensor_attr *attr) + const struct pmbus_sensor_attr *attr, + bool paged) { struct pmbus_sensor *base; bool upper = !!(attr->gbit & 0xff00); /* need to check STATUS_WORD */ @@ -1238,7 +1239,7 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client, if (attr->label) { ret = pmbus_add_label(data, name, index, attr->label, - attr->paged ? page + 1 : 0); + paged ? page + 1 : 0); if (ret) return ret; } @@ -1271,6 +1272,30 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client, return 0; } +static bool pmbus_sensor_is_paged(const struct pmbus_driver_info *info, + const struct pmbus_sensor_attr *attr) +{ + int p; + + if (attr->paged) + return true; + + /* + * Some attributes may be present on more than one page despite + * not being marked with the paged attribute. If that is the case, + * then treat the sensor as being paged and add the page suffix to the + * attribute name. + * We don't just add the paged attribute to all such attributes, in + * order to maintain the un-suffixed labels in the case where the + * attribute is only on page 0. + */ + for (p = 1; p < info->pages; p++) { + if (info->func[p] & attr->func) + return true; + } + return false; +} + static int pmbus_add_sensor_attrs(struct i2c_client *client, struct pmbus_data *data, const char *name, @@ -1284,14 +1309,15 @@ static int pmbus_add_sensor_attrs(struct i2c_client *client, index = 1; for (i = 0; i < nattrs; i++) { int page, pages; + bool paged = pmbus_sensor_is_paged(info, attrs); - pages = attrs->paged ? info->pages : 1; + pages = paged ? info->pages : 1; for (page = 0; page < pages; page++) { if (!(info->func[page] & attrs->func)) continue; ret = pmbus_add_sensor_attrs_one(client, data, info, name, index, page, - attrs); + attrs, paged); if (ret) return ret; index++; diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 167221c7628a..e4c5197417a8 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -271,7 +271,7 @@ static int pwm_fan_probe(struct platform_device *pdev) ret = pwm_fan_of_get_cooling_data(&pdev->dev, ctx); if (ret) - return ret; + goto err_pwm_disable; ctx->pwm_fan_state = ctx->pwm_fan_max_state; if (IS_ENABLED(CONFIG_THERMAL)) { diff --git a/drivers/hwmon/smsc47b397.c b/drivers/hwmon/smsc47b397.c index c0775084dde0..60e193f2e970 100644 --- a/drivers/hwmon/smsc47b397.c +++ b/drivers/hwmon/smsc47b397.c @@ -72,14 +72,19 @@ static inline void superio_select(int ld) superio_outb(0x07, ld); } -static inline void superio_enter(void) +static inline int superio_enter(void) { + if (!request_muxed_region(REG, 2, DRVNAME)) + return -EBUSY; + outb(0x55, REG); + return 0; } static inline void superio_exit(void) { outb(0xAA, REG); + release_region(REG, 2); } #define SUPERIO_REG_DEVID 0x20 @@ -300,8 +305,12 @@ static int __init smsc47b397_find(void) u8 id, rev; char *name; unsigned short addr; + int err; + + err = superio_enter(); + if (err) + return err; - superio_enter(); id = force_id ? force_id : superio_inb(SUPERIO_REG_DEVID); switch (id) { diff --git a/drivers/hwmon/smsc47m1.c b/drivers/hwmon/smsc47m1.c index c7b6a425e2c0..5eeac9853d0a 100644 --- a/drivers/hwmon/smsc47m1.c +++ b/drivers/hwmon/smsc47m1.c @@ -73,16 +73,21 @@ superio_inb(int reg) /* logical device for fans is 0x0A */ #define superio_select() superio_outb(0x07, 0x0A) -static inline void +static inline int superio_enter(void) { + if (!request_muxed_region(REG, 2, DRVNAME)) + return -EBUSY; + outb(0x55, REG); + return 0; } static inline void superio_exit(void) { outb(0xAA, REG); + release_region(REG, 2); } #define SUPERIO_REG_ACT 0x30 @@ -531,8 +536,12 @@ static int __init smsc47m1_find(struct smsc47m1_sio_data *sio_data) { u8 val; unsigned short addr; + int err; + + err = superio_enter(); + if (err) + return err; - superio_enter(); val = force_id ? force_id : superio_inb(SUPERIO_REG_DEVID); /* @@ -608,13 +617,14 @@ static int __init smsc47m1_find(struct smsc47m1_sio_data *sio_data) static void smsc47m1_restore(const struct smsc47m1_sio_data *sio_data) { if ((sio_data->activate & 0x01) == 0) { - superio_enter(); - superio_select(); - - pr_info("Disabling device\n"); - superio_outb(SUPERIO_REG_ACT, sio_data->activate); - - superio_exit(); + if (!superio_enter()) { + superio_select(); + pr_info("Disabling device\n"); + superio_outb(SUPERIO_REG_ACT, sio_data->activate); + superio_exit(); + } else { + pr_warn("Failed to disable device\n"); + } } } diff --git a/drivers/hwmon/vt1211.c b/drivers/hwmon/vt1211.c index 3a6bfa51cb94..95d5e8ec8b7f 100644 --- a/drivers/hwmon/vt1211.c +++ b/drivers/hwmon/vt1211.c @@ -226,15 +226,21 @@ static inline void superio_select(int sio_cip, int ldn) outb(ldn, sio_cip + 1); } -static inline void superio_enter(int sio_cip) +static inline int superio_enter(int sio_cip) { + if (!request_muxed_region(sio_cip, 2, DRVNAME)) + return -EBUSY; + outb(0x87, sio_cip); outb(0x87, sio_cip); + + return 0; } static inline void superio_exit(int sio_cip) { outb(0xaa, sio_cip); + release_region(sio_cip, 2); } /* --------------------------------------------------------------------- @@ -1282,11 +1288,14 @@ static int __init vt1211_device_add(unsigned short address) static int __init vt1211_find(int sio_cip, unsigned short *address) { - int err = -ENODEV; + int err; int devid; - superio_enter(sio_cip); + err = superio_enter(sio_cip); + if (err) + return err; + err = -ENODEV; devid = force_id ? force_id : superio_inb(sio_cip, SIO_VT1211_DEVID); if (devid != SIO_VT1211_ID) goto EXIT; diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index ba7aaf421f36..8ff326c0c406 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -84,6 +84,7 @@ struct msc_iter { * @reg_base: register window base address * @thdev: intel_th_device pointer * @win_list: list of windows in multiblock mode + * @single_sgt: single mode buffer * @nr_pages: total number of pages allocated for this buffer * @single_sz: amount of data in single mode * @single_wrap: single mode wrap occurred @@ -104,6 +105,7 @@ struct msc { struct intel_th_device *thdev; struct list_head win_list; + struct sg_table single_sgt; unsigned long nr_pages; unsigned long single_sz; unsigned int single_wrap : 1; @@ -617,22 +619,45 @@ static void intel_th_msc_deactivate(struct intel_th_device *thdev) */ static int msc_buffer_contig_alloc(struct msc *msc, unsigned long size) { + unsigned long nr_pages = size >> PAGE_SHIFT; unsigned int order = get_order(size); struct page *page; + int ret; if (!size) return 0; + ret = sg_alloc_table(&msc->single_sgt, 1, GFP_KERNEL); + if (ret) + goto err_out; + + ret = -ENOMEM; page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (!page) - return -ENOMEM; + goto err_free_sgt; split_page(page, order); - msc->nr_pages = size >> PAGE_SHIFT; + sg_set_buf(msc->single_sgt.sgl, page_address(page), size); + + ret = dma_map_sg(msc_dev(msc)->parent->parent, msc->single_sgt.sgl, 1, + DMA_FROM_DEVICE); + if (ret < 0) + goto err_free_pages; + + msc->nr_pages = nr_pages; msc->base = page_address(page); - msc->base_addr = page_to_phys(page); + msc->base_addr = sg_dma_address(msc->single_sgt.sgl); return 0; + +err_free_pages: + __free_pages(page, order); + +err_free_sgt: + sg_free_table(&msc->single_sgt); + +err_out: + return ret; } /** @@ -643,6 +668,10 @@ static void msc_buffer_contig_free(struct msc *msc) { unsigned long off; + dma_unmap_sg(msc_dev(msc)->parent->parent, msc->single_sgt.sgl, + 1, DMA_FROM_DEVICE); + sg_free_table(&msc->single_sgt); + for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) { struct page *page = virt_to_page(msc->base + off); diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 1cf6290d6435..70f2cb90adc5 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -165,6 +165,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x34a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Comet Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x02a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { 0 }, }; diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index c7ba8acfd4d5..e55b902560de 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -166,11 +166,10 @@ stm_master(struct stm_device *stm, unsigned int idx) static int stp_master_alloc(struct stm_device *stm, unsigned int idx) { struct stp_master *master; - size_t size; - size = ALIGN(stm->data->sw_nchannels, 8) / 8; - size += sizeof(struct stp_master); - master = kzalloc(size, GFP_ATOMIC); + master = kzalloc(struct_size(master, chan_map, + BITS_TO_LONGS(stm->data->sw_nchannels)), + GFP_ATOMIC); if (!master) return -ENOMEM; @@ -218,8 +217,8 @@ stm_output_disclaim(struct stm_device *stm, struct stm_output *output) bitmap_release_region(&master->chan_map[0], output->channel, ilog2(output->nr_chans)); - output->nr_chans = 0; master->nr_free += output->nr_chans; + output->nr_chans = 0; } /* diff --git a/drivers/i2c/busses/i2c-acorn.c b/drivers/i2c/busses/i2c-acorn.c index f4a5ae69bf6a..fa3763e4b3ee 100644 --- a/drivers/i2c/busses/i2c-acorn.c +++ b/drivers/i2c/busses/i2c-acorn.c @@ -81,6 +81,7 @@ static struct i2c_algo_bit_data ioc_data = { static struct i2c_adapter ioc_ops = { .nr = 0, + .name = "ioc", .algo_data = &ioc_data, }; diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c index 745ed43a22d6..2fd717d8dd30 100644 --- a/drivers/i2c/busses/i2c-mlxcpld.c +++ b/drivers/i2c/busses/i2c-mlxcpld.c @@ -503,6 +503,7 @@ static int mlxcpld_i2c_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); priv->dev = &pdev->dev; + priv->base_addr = MLXPLAT_CPLD_LPC_I2C_BASE_ADDR; /* Register with i2c layer */ mlxcpld_i2c_adapter.timeout = usecs_to_jiffies(MLXCPLD_I2C_XFER_TO); @@ -518,7 +519,6 @@ static int mlxcpld_i2c_probe(struct platform_device *pdev) mlxcpld_i2c_adapter.nr = pdev->id; priv->adap = mlxcpld_i2c_adapter; priv->adap.dev.parent = &pdev->dev; - priv->base_addr = MLXPLAT_CPLD_LPC_I2C_BASE_ADDR; i2c_set_adapdata(&priv->adap, priv); err = i2c_add_numbered_adapter(&priv->adap); diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c index f14d4b3fab44..f724c8e6b360 100644 --- a/drivers/i2c/busses/i2c-synquacer.c +++ b/drivers/i2c/busses/i2c-synquacer.c @@ -351,7 +351,7 @@ static int synquacer_i2c_doxfer(struct synquacer_i2c *i2c, /* wait 2 clock periods to ensure the stop has been through the bus */ udelay(DIV_ROUND_UP(2 * 1000, i2c->speed_khz)); - return 0; + return ret; } static irqreturn_t synquacer_i2c_isr(int irq, void *dev_id) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 0c51c0ffdda9..8d6b6eeef71c 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -718,11 +718,16 @@ static const struct i2c_algorithm xiic_algorithm = { .functionality = xiic_func, }; +static const struct i2c_adapter_quirks xiic_quirks = { + .max_read_len = 255, +}; + static const struct i2c_adapter xiic_adapter = { .owner = THIS_MODULE, .name = DRIVER_NAME, .class = I2C_CLASS_DEPRECATED, .algo = &xiic_algorithm, + .quirks = &xiic_quirks, }; diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 688aa3b5f3ac..2f0f88b79c4b 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1871,8 +1871,11 @@ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) if (WARN_ON(!msgs || num < 1)) return -EINVAL; - if (WARN_ON(test_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags))) + if (test_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags)) { + if (!test_and_set_bit(I2C_ALF_SUSPEND_REPORTED, &adap->locked_flags)) + dev_WARN(&adap->dev, "Transfer while suspended\n"); return -ESHUTDOWN; + } if (adap->quirks && i2c_check_for_quirks(adap, msgs, num)) return -EOPNOTSUPP; diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 3f7b9af11137..776f36690448 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -283,6 +283,7 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, msgs[i].len < 1 || msgs[i].buf[0] < 1 || msgs[i].len < msgs[i].buf[0] + I2C_SMBUS_BLOCK_MAX) { + i++; res = -EINVAL; break; } diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 1412abcff010..5f4bd52121fe 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -385,8 +385,9 @@ static void i3c_bus_set_addr_slot_status(struct i3c_bus *bus, u16 addr, return; ptr = bus->addrslots + (bitpos / BITS_PER_LONG); - *ptr &= ~(I3C_ADDR_SLOT_STATUS_MASK << (bitpos % BITS_PER_LONG)); - *ptr |= status << (bitpos % BITS_PER_LONG); + *ptr &= ~((unsigned long)I3C_ADDR_SLOT_STATUS_MASK << + (bitpos % BITS_PER_LONG)); + *ptr |= (unsigned long)status << (bitpos % BITS_PER_LONG); } static bool i3c_bus_dev_addr_is_avail(struct i3c_bus *bus, u8 addr) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 76db6e5cc296..9ca21a8dfcd7 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -809,6 +809,7 @@ config STM32_DFSDM_ADC depends on (ARCH_STM32 && OF) || COMPILE_TEST select STM32_DFSDM_CORE select REGMAP_MMIO + select IIO_BUFFER select IIO_BUFFER_HW_CONSUMER help Select this option to support ADCSigma delta modulator for diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index 54d9978b2740..a4310600a853 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -62,7 +62,7 @@ int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, struct spi_transfer t = { .tx_buf = data, .len = size + 1, - .cs_change = sigma_delta->bus_locked, + .cs_change = sigma_delta->keep_cs_asserted, }; struct spi_message m; int ret; @@ -218,6 +218,7 @@ static int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, spi_bus_lock(sigma_delta->spi->master); sigma_delta->bus_locked = true; + sigma_delta->keep_cs_asserted = true; reinit_completion(&sigma_delta->completion); ret = ad_sigma_delta_set_mode(sigma_delta, mode); @@ -235,9 +236,10 @@ static int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, ret = 0; } out: + sigma_delta->keep_cs_asserted = false; + ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); sigma_delta->bus_locked = false; spi_bus_unlock(sigma_delta->spi->master); - ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); return ret; } @@ -290,6 +292,7 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, spi_bus_lock(sigma_delta->spi->master); sigma_delta->bus_locked = true; + sigma_delta->keep_cs_asserted = true; reinit_completion(&sigma_delta->completion); ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_SINGLE); @@ -299,9 +302,6 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, ret = wait_for_completion_interruptible_timeout( &sigma_delta->completion, HZ); - sigma_delta->bus_locked = false; - spi_bus_unlock(sigma_delta->spi->master); - if (ret == 0) ret = -EIO; if (ret < 0) @@ -322,7 +322,10 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, sigma_delta->irq_dis = true; } + sigma_delta->keep_cs_asserted = false; ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); + sigma_delta->bus_locked = false; + spi_bus_unlock(sigma_delta->spi->master); mutex_unlock(&indio_dev->mlock); if (ret) @@ -359,6 +362,8 @@ static int ad_sd_buffer_postenable(struct iio_dev *indio_dev) spi_bus_lock(sigma_delta->spi->master); sigma_delta->bus_locked = true; + sigma_delta->keep_cs_asserted = true; + ret = ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_CONTINUOUS); if (ret) goto err_unlock; @@ -387,6 +392,7 @@ static int ad_sd_buffer_postdisable(struct iio_dev *indio_dev) sigma_delta->irq_dis = true; } + sigma_delta->keep_cs_asserted = false; ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); sigma_delta->bus_locked = false; diff --git a/drivers/iio/adc/npcm_adc.c b/drivers/iio/adc/npcm_adc.c index 9e25bbec9c70..193b3b81de4d 100644 --- a/drivers/iio/adc/npcm_adc.c +++ b/drivers/iio/adc/npcm_adc.c @@ -149,7 +149,7 @@ static int npcm_adc_read_raw(struct iio_dev *indio_dev, } return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: - if (info->vref) { + if (!IS_ERR(info->vref)) { vref_uv = regulator_get_voltage(info->vref); *val = vref_uv / 1000; } else { diff --git a/drivers/iio/adc/qcom-spmi-adc5.c b/drivers/iio/adc/qcom-spmi-adc5.c index 6a866cc187f7..21fdcde77883 100644 --- a/drivers/iio/adc/qcom-spmi-adc5.c +++ b/drivers/iio/adc/qcom-spmi-adc5.c @@ -664,6 +664,7 @@ static const struct of_device_id adc5_match_table[] = { }, { } }; +MODULE_DEVICE_TABLE(of, adc5_match_table); static int adc5_get_dt_data(struct adc5_chip *adc, struct device_node *node) { diff --git a/drivers/iio/adc/ti-ads124s08.c b/drivers/iio/adc/ti-ads124s08.c index 53f17e4f2f23..552c2be8d87a 100644 --- a/drivers/iio/adc/ti-ads124s08.c +++ b/drivers/iio/adc/ti-ads124s08.c @@ -202,7 +202,7 @@ static int ads124s_read(struct iio_dev *indio_dev, unsigned int chan) }; priv->data[0] = ADS124S08_CMD_RDATA; - memset(&priv->data[1], ADS124S08_CMD_NOP, sizeof(priv->data)); + memset(&priv->data[1], ADS124S08_CMD_NOP, sizeof(priv->data) - 1); ret = spi_sync_transfer(priv->spi, t, ARRAY_SIZE(t)); if (ret < 0) diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index 0ad63592cc3c..1e47bef72bb7 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -56,6 +56,9 @@ struct ti_ads7950_state { struct spi_message ring_msg; struct spi_message scan_single_msg; + /* Lock to protect the spi xfer buffers */ + struct mutex slock; + struct regulator *reg; unsigned int vref_mv; @@ -268,6 +271,7 @@ static irqreturn_t ti_ads7950_trigger_handler(int irq, void *p) struct ti_ads7950_state *st = iio_priv(indio_dev); int ret; + mutex_lock(&st->slock); ret = spi_sync(st->spi, &st->ring_msg); if (ret < 0) goto out; @@ -276,6 +280,7 @@ static irqreturn_t ti_ads7950_trigger_handler(int irq, void *p) iio_get_time_ns(indio_dev)); out: + mutex_unlock(&st->slock); iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; @@ -286,7 +291,7 @@ static int ti_ads7950_scan_direct(struct iio_dev *indio_dev, unsigned int ch) struct ti_ads7950_state *st = iio_priv(indio_dev); int ret, cmd; - mutex_lock(&indio_dev->mlock); + mutex_lock(&st->slock); cmd = TI_ADS7950_CR_WRITE | TI_ADS7950_CR_CHAN(ch) | st->settings; st->single_tx = cmd; @@ -298,7 +303,7 @@ static int ti_ads7950_scan_direct(struct iio_dev *indio_dev, unsigned int ch) ret = st->single_rx; out: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&st->slock); return ret; } @@ -432,16 +437,19 @@ static int ti_ads7950_probe(struct spi_device *spi) if (ACPI_COMPANION(&spi->dev)) st->vref_mv = TI_ADS7950_VA_MV_ACPI_DEFAULT; + mutex_init(&st->slock); + st->reg = devm_regulator_get(&spi->dev, "vref"); if (IS_ERR(st->reg)) { dev_err(&spi->dev, "Failed get get regulator \"vref\"\n"); - return PTR_ERR(st->reg); + ret = PTR_ERR(st->reg); + goto error_destroy_mutex; } ret = regulator_enable(st->reg); if (ret) { dev_err(&spi->dev, "Failed to enable regulator \"vref\"\n"); - return ret; + goto error_destroy_mutex; } ret = iio_triggered_buffer_setup(indio_dev, NULL, @@ -463,6 +471,8 @@ static int ti_ads7950_probe(struct spi_device *spi) iio_triggered_buffer_cleanup(indio_dev); error_disable_reg: regulator_disable(st->reg); +error_destroy_mutex: + mutex_destroy(&st->slock); return ret; } @@ -475,6 +485,7 @@ static int ti_ads7950_remove(struct spi_device *spi) iio_device_unregister(indio_dev); iio_triggered_buffer_cleanup(indio_dev); regulator_disable(st->reg); + mutex_destroy(&st->slock); return 0; } diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c index 8b4568edd5cb..7f16c77b99fb 100644 --- a/drivers/iio/adc/ti-ads8688.c +++ b/drivers/iio/adc/ti-ads8688.c @@ -397,7 +397,7 @@ static irqreturn_t ads8688_trigger_handler(int irq, void *p) } iio_push_to_buffers_with_timestamp(indio_dev, buffer, - pf->timestamp); + iio_get_time_ns(indio_dev)); iio_trigger_notify_done(indio_dev->trig); diff --git a/drivers/iio/common/ssp_sensors/ssp_iio.c b/drivers/iio/common/ssp_sensors/ssp_iio.c index 645f2e3975db..e38f704d88b7 100644 --- a/drivers/iio/common/ssp_sensors/ssp_iio.c +++ b/drivers/iio/common/ssp_sensors/ssp_iio.c @@ -81,7 +81,7 @@ int ssp_common_process_data(struct iio_dev *indio_dev, void *buf, unsigned int len, int64_t timestamp) { __le32 time; - int64_t calculated_time; + int64_t calculated_time = 0; struct ssp_sensor_data *spd = iio_priv(indio_dev); if (indio_dev->scan_bytes == 0) diff --git a/drivers/iio/dac/ds4424.c b/drivers/iio/dac/ds4424.c index 883a47562055..714a97f91319 100644 --- a/drivers/iio/dac/ds4424.c +++ b/drivers/iio/dac/ds4424.c @@ -166,7 +166,7 @@ static int ds4424_verify_chip(struct iio_dev *indio_dev) { int ret, val; - ret = ds4424_get_value(indio_dev, &val, DS4424_DAC_ADDR(0)); + ret = ds4424_get_value(indio_dev, &val, 0); if (ret < 0) dev_err(&indio_dev->dev, "%s failed. ret: %d\n", __func__, ret); diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index 650de0fefb7b..385f14a4d5a7 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -471,7 +471,10 @@ inv_mpu6050_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT_PLUS_MICRO; case IIO_TEMP: *val = 0; - *val2 = INV_MPU6050_TEMP_SCALE; + if (st->chip_type == INV_ICM20602) + *val2 = INV_ICM20602_TEMP_SCALE; + else + *val2 = INV_MPU6050_TEMP_SCALE; return IIO_VAL_INT_PLUS_MICRO; default: @@ -480,7 +483,10 @@ inv_mpu6050_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_OFFSET: switch (chan->type) { case IIO_TEMP: - *val = INV_MPU6050_TEMP_OFFSET; + if (st->chip_type == INV_ICM20602) + *val = INV_ICM20602_TEMP_OFFSET; + else + *val = INV_MPU6050_TEMP_OFFSET; return IIO_VAL_INT; default: @@ -845,6 +851,32 @@ static const struct iio_chan_spec inv_mpu_channels[] = { INV_MPU6050_CHAN(IIO_ACCEL, IIO_MOD_Z, INV_MPU6050_SCAN_ACCL_Z), }; +static const struct iio_chan_spec inv_icm20602_channels[] = { + IIO_CHAN_SOFT_TIMESTAMP(INV_ICM20602_SCAN_TIMESTAMP), + { + .type = IIO_TEMP, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) + | BIT(IIO_CHAN_INFO_OFFSET) + | BIT(IIO_CHAN_INFO_SCALE), + .scan_index = INV_ICM20602_SCAN_TEMP, + .scan_type = { + .sign = 's', + .realbits = 16, + .storagebits = 16, + .shift = 0, + .endianness = IIO_BE, + }, + }, + + INV_MPU6050_CHAN(IIO_ANGL_VEL, IIO_MOD_X, INV_ICM20602_SCAN_GYRO_X), + INV_MPU6050_CHAN(IIO_ANGL_VEL, IIO_MOD_Y, INV_ICM20602_SCAN_GYRO_Y), + INV_MPU6050_CHAN(IIO_ANGL_VEL, IIO_MOD_Z, INV_ICM20602_SCAN_GYRO_Z), + + INV_MPU6050_CHAN(IIO_ACCEL, IIO_MOD_Y, INV_ICM20602_SCAN_ACCL_Y), + INV_MPU6050_CHAN(IIO_ACCEL, IIO_MOD_X, INV_ICM20602_SCAN_ACCL_X), + INV_MPU6050_CHAN(IIO_ACCEL, IIO_MOD_Z, INV_ICM20602_SCAN_ACCL_Z), +}; + /* * The user can choose any frequency between INV_MPU6050_MIN_FIFO_RATE and * INV_MPU6050_MAX_FIFO_RATE, but only these frequencies are matched by the @@ -1100,8 +1132,14 @@ int inv_mpu_core_probe(struct regmap *regmap, int irq, const char *name, indio_dev->name = name; else indio_dev->name = dev_name(dev); - indio_dev->channels = inv_mpu_channels; - indio_dev->num_channels = ARRAY_SIZE(inv_mpu_channels); + + if (chip_type == INV_ICM20602) { + indio_dev->channels = inv_icm20602_channels; + indio_dev->num_channels = ARRAY_SIZE(inv_icm20602_channels); + } else { + indio_dev->channels = inv_mpu_channels; + indio_dev->num_channels = ARRAY_SIZE(inv_mpu_channels); + } indio_dev->info = &mpu_info; indio_dev->modes = INDIO_BUFFER_TRIGGERED; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h index 325afd9f5f61..3d5fe4474378 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h @@ -208,6 +208,9 @@ struct inv_mpu6050_state { #define INV_MPU6050_BYTES_PER_3AXIS_SENSOR 6 #define INV_MPU6050_FIFO_COUNT_BYTE 2 +/* ICM20602 FIFO samples include temperature readings */ +#define INV_ICM20602_BYTES_PER_TEMP_SENSOR 2 + /* mpu6500 registers */ #define INV_MPU6500_REG_ACCEL_CONFIG_2 0x1D #define INV_MPU6500_REG_ACCEL_OFFSET 0x77 @@ -229,6 +232,9 @@ struct inv_mpu6050_state { #define INV_MPU6050_GYRO_CONFIG_FSR_SHIFT 3 #define INV_MPU6050_ACCL_CONFIG_FSR_SHIFT 3 +#define INV_ICM20602_TEMP_OFFSET 8170 +#define INV_ICM20602_TEMP_SCALE 3060 + /* 6 + 6 round up and plus 8 */ #define INV_MPU6050_OUTPUT_DATA_SIZE 24 @@ -270,7 +276,7 @@ struct inv_mpu6050_state { #define INV_ICM20608_WHOAMI_VALUE 0xAF #define INV_ICM20602_WHOAMI_VALUE 0x12 -/* scan element definition */ +/* scan element definition for generic MPU6xxx devices */ enum inv_mpu6050_scan { INV_MPU6050_SCAN_ACCL_X, INV_MPU6050_SCAN_ACCL_Y, @@ -281,6 +287,18 @@ enum inv_mpu6050_scan { INV_MPU6050_SCAN_TIMESTAMP, }; +/* scan element definition for ICM20602, which includes temperature */ +enum inv_icm20602_scan { + INV_ICM20602_SCAN_ACCL_X, + INV_ICM20602_SCAN_ACCL_Y, + INV_ICM20602_SCAN_ACCL_Z, + INV_ICM20602_SCAN_TEMP, + INV_ICM20602_SCAN_GYRO_X, + INV_ICM20602_SCAN_GYRO_Y, + INV_ICM20602_SCAN_GYRO_Z, + INV_ICM20602_SCAN_TIMESTAMP, +}; + enum inv_mpu6050_filter_e { INV_MPU6050_FILTER_256HZ_NOLPF2 = 0, INV_MPU6050_FILTER_188HZ, diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 548e042f7b5b..57bd11bde56b 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -207,6 +207,9 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p) if (st->chip_config.gyro_fifo_enable) bytes_per_datum += INV_MPU6050_BYTES_PER_3AXIS_SENSOR; + if (st->chip_type == INV_ICM20602) + bytes_per_datum += INV_ICM20602_BYTES_PER_TEMP_SENSOR; + /* * read fifo_count register to know how many bytes are inside the FIFO * right now diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index d1d8d07a0714..83425b7b580c 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -265,6 +265,7 @@ struct st_lsm6dsx_sensor { * @conf_lock: Mutex to prevent concurrent FIFO configuration update. * @page_lock: Mutex to prevent concurrent memory page configuration. * @fifo_mode: FIFO operating mode supported by the device. + * @suspend_mask: Suspended sensor bitmask. * @enable_mask: Enabled sensor bitmask. * @ts_sip: Total number of timestamp samples in a given pattern. * @sip: Total number of samples (acc/gyro/ts) in a given pattern. @@ -282,6 +283,7 @@ struct st_lsm6dsx_hw { struct mutex page_lock; enum st_lsm6dsx_fifo_mode fifo_mode; + u8 suspend_mask; u8 enable_mask; u8 ts_sip; u8 sip; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 12e29dda9b98..96986d84e418 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -1023,8 +1023,6 @@ static int __maybe_unused st_lsm6dsx_suspend(struct device *dev) { struct st_lsm6dsx_hw *hw = dev_get_drvdata(dev); struct st_lsm6dsx_sensor *sensor; - const struct st_lsm6dsx_reg *reg; - unsigned int data; int i, err = 0; for (i = 0; i < ST_LSM6DSX_ID_MAX; i++) { @@ -1035,12 +1033,16 @@ static int __maybe_unused st_lsm6dsx_suspend(struct device *dev) if (!(hw->enable_mask & BIT(sensor->id))) continue; - reg = &st_lsm6dsx_odr_table[sensor->id].reg; - data = ST_LSM6DSX_SHIFT_VAL(0, reg->mask); - err = st_lsm6dsx_update_bits_locked(hw, reg->addr, reg->mask, - data); + if (sensor->id == ST_LSM6DSX_ID_EXT0 || + sensor->id == ST_LSM6DSX_ID_EXT1 || + sensor->id == ST_LSM6DSX_ID_EXT2) + err = st_lsm6dsx_shub_set_enable(sensor, false); + else + err = st_lsm6dsx_sensor_set_enable(sensor, false); if (err < 0) return err; + + hw->suspend_mask |= BIT(sensor->id); } if (hw->fifo_mode != ST_LSM6DSX_FIFO_BYPASS) @@ -1060,12 +1062,19 @@ static int __maybe_unused st_lsm6dsx_resume(struct device *dev) continue; sensor = iio_priv(hw->iio_devs[i]); - if (!(hw->enable_mask & BIT(sensor->id))) + if (!(hw->suspend_mask & BIT(sensor->id))) continue; - err = st_lsm6dsx_set_odr(sensor, sensor->odr); + if (sensor->id == ST_LSM6DSX_ID_EXT0 || + sensor->id == ST_LSM6DSX_ID_EXT1 || + sensor->id == ST_LSM6DSX_ID_EXT2) + err = st_lsm6dsx_shub_set_enable(sensor, true); + else + err = st_lsm6dsx_sensor_set_enable(sensor, true); if (err < 0) return err; + + hw->suspend_mask &= ~BIT(sensor->id); } if (hw->enable_mask) diff --git a/drivers/iio/magnetometer/hmc5843_i2c.c b/drivers/iio/magnetometer/hmc5843_i2c.c index 3de7f4426ac4..86abba5827a2 100644 --- a/drivers/iio/magnetometer/hmc5843_i2c.c +++ b/drivers/iio/magnetometer/hmc5843_i2c.c @@ -58,8 +58,13 @@ static const struct regmap_config hmc5843_i2c_regmap_config = { static int hmc5843_i2c_probe(struct i2c_client *cli, const struct i2c_device_id *id) { + struct regmap *regmap = devm_regmap_init_i2c(cli, + &hmc5843_i2c_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + return hmc5843_common_probe(&cli->dev, - devm_regmap_init_i2c(cli, &hmc5843_i2c_regmap_config), + regmap, id->driver_data, id->name); } diff --git a/drivers/iio/magnetometer/hmc5843_spi.c b/drivers/iio/magnetometer/hmc5843_spi.c index 535f03a70d63..79b2b707f90e 100644 --- a/drivers/iio/magnetometer/hmc5843_spi.c +++ b/drivers/iio/magnetometer/hmc5843_spi.c @@ -58,6 +58,7 @@ static const struct regmap_config hmc5843_spi_regmap_config = { static int hmc5843_spi_probe(struct spi_device *spi) { int ret; + struct regmap *regmap; const struct spi_device_id *id = spi_get_device_id(spi); spi->mode = SPI_MODE_3; @@ -67,8 +68,12 @@ static int hmc5843_spi_probe(struct spi_device *spi) if (ret) return ret; + regmap = devm_regmap_init_spi(spi, &hmc5843_spi_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + return hmc5843_common_probe(&spi->dev, - devm_regmap_init_spi(spi, &hmc5843_spi_regmap_config), + regmap, id->driver_data, id->name); } diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index be03be719efe..f71918430f95 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -81,6 +81,8 @@ /* Magic constants */ #define MLX90632_ID_MEDICAL 0x0105 /* EEPROM DSPv5 Medical device id */ #define MLX90632_ID_CONSUMER 0x0205 /* EEPROM DSPv5 Consumer device id */ +#define MLX90632_DSP_VERSION 5 /* DSP version */ +#define MLX90632_DSP_MASK GENMASK(7, 0) /* DSP version in EE_VERSION */ #define MLX90632_RESET_CMD 0x0006 /* Reset sensor (address or global) */ #define MLX90632_REF_12 12LL /**< ResCtrlRef value of Ch 1 or Ch 2 */ #define MLX90632_REF_3 12LL /**< ResCtrlRef value of Channel 3 */ @@ -667,10 +669,13 @@ static int mlx90632_probe(struct i2c_client *client, } else if (read == MLX90632_ID_CONSUMER) { dev_dbg(&client->dev, "Detected Consumer EEPROM calibration %x\n", read); + } else if ((read & MLX90632_DSP_MASK) == MLX90632_DSP_VERSION) { + dev_dbg(&client->dev, + "Detected Unknown EEPROM calibration %x\n", read); } else { dev_err(&client->dev, - "EEPROM version mismatch %x (expected %x or %x)\n", - read, MLX90632_ID_CONSUMER, MLX90632_ID_MEDICAL); + "Wrong DSP version %x (expected %x)\n", + read, MLX90632_DSP_VERSION); return -EPROTONOSUPPORT; } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 68c997be2429..c54da16df0be 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1173,18 +1173,31 @@ static inline bool cma_any_addr(const struct sockaddr *addr) return cma_zero_addr(addr) || cma_loopback_addr(addr); } -static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) +static int cma_addr_cmp(const struct sockaddr *src, const struct sockaddr *dst) { if (src->sa_family != dst->sa_family) return -1; switch (src->sa_family) { case AF_INET: - return ((struct sockaddr_in *) src)->sin_addr.s_addr != - ((struct sockaddr_in *) dst)->sin_addr.s_addr; - case AF_INET6: - return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, - &((struct sockaddr_in6 *) dst)->sin6_addr); + return ((struct sockaddr_in *)src)->sin_addr.s_addr != + ((struct sockaddr_in *)dst)->sin_addr.s_addr; + case AF_INET6: { + struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)src; + struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst; + bool link_local; + + if (ipv6_addr_cmp(&src_addr6->sin6_addr, + &dst_addr6->sin6_addr)) + return 1; + link_local = ipv6_addr_type(&dst_addr6->sin6_addr) & + IPV6_ADDR_LINKLOCAL; + /* Link local must match their scope_ids */ + return link_local ? (src_addr6->sin6_scope_id != + dst_addr6->sin6_scope_id) : + 0; + } + default: return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, &((struct sockaddr_ib *) dst)->sib_addr); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 4d232bdf9e97..689ba6bc2ca9 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -457,6 +457,8 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) skb_reset_transport_header(skb); } else { skb = alloc_skb(len, gfp); + if (!skb) + return NULL; } t4_set_arp_err_handler(skb, NULL, NULL); return skb; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9784c6c0d2ec..597f2f02f3a8 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9848,6 +9848,7 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) /* disable the port */ clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); + cancel_work_sync(&ppd->freeze_work); } static inline int init_cpu_counters(struct hfi1_devdata *dd) @@ -14027,6 +14028,19 @@ static void init_kdeth_qp(struct hfi1_devdata *dd) RCV_BTH_QP_KDETH_QP_SHIFT); } +/** + * hfi1_get_qp_map + * @dd: device data + * @idx: index to read + */ +u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx) +{ + u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8); + + reg >>= (idx % 8) * 8; + return reg; +} + /** * init_qpmap_table * @dd - device data diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 6c27c1c6a868..a5c61400b295 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1442,6 +1442,7 @@ void clear_all_interrupts(struct hfi1_devdata *dd); void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr); void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr); void reset_interrupts(struct hfi1_devdata *dd); +u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx); /* * Interrupt source table. diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 3fd3315d0fb0..93613e5def9b 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -153,6 +153,7 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, char *dash; unsigned long range_start, range_end, i; bool remove = false; + unsigned long bound = 1U << BITS_PER_BYTE; end = strchr(ptr, ','); if (end) @@ -178,6 +179,10 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, BITS_PER_BYTE); break; } + /* Check the inputs */ + if (range_start >= bound || range_end >= bound) + break; + for (i = range_start; i <= range_end; i++) { if (remove) clear_bit(i, fault->opcodes); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index faaaac8fbc55..3af5eb10a5ff 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -805,7 +805,8 @@ static int create_workqueues(struct hfi1_devdata *dd) ppd->hfi1_wq = alloc_workqueue( "hfi%d_%d", - WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE, + WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | + WQ_MEM_RECLAIM, HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, dd->unit, pidx); if (!ppd->hfi1_wq) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index b0110728f541..70828de7436b 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -410,10 +410,7 @@ static void sdma_flush(struct sdma_engine *sde) sdma_flush_descq(sde); spin_lock_irqsave(&sde->flushlist_lock, flags); /* copy flush list */ - list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) { - list_del_init(&txp->list); - list_add_tail(&txp->list, &flushlist); - } + list_splice_init(&sde->flushlist, &flushlist); spin_unlock_irqrestore(&sde->flushlist_lock, flags); /* flush from flush list */ list_for_each_entry_safe(txp, txp_next, &flushlist, list) @@ -2413,7 +2410,7 @@ int sdma_send_txreq(struct sdma_engine *sde, list_add_tail(&tx->list, &sde->flushlist); spin_unlock(&sde->flushlist_lock); iowait_inc_wait_count(wait, tx->num_desc); - schedule_work(&sde->flush_worker); + queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker); ret = -ECOMM; goto unlock; nodesc: @@ -2511,7 +2508,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait, iowait_inc_wait_count(wait, tx->num_desc); } spin_unlock(&sde->flushlist_lock); - schedule_work(&sde->flush_worker); + queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker); ret = -ECOMM; goto update_tail; nodesc: diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 43cbce7a19ea..e0851f01a804 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -305,9 +305,7 @@ static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi, if (qp->ibqp.qp_num == 0) ctxt = 0; else - ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) % - (dd->n_krcv_queues - 1)) + 1; - + ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift); return dd->rcd[ctxt]; } diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 0cd71ce7cc71..3592a9ec155e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -324,6 +324,9 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, u32 *tidlist = NULL; struct tid_user_buf *tidbuf; + if (!PAGE_ALIGNED(tinfo->vaddr)) + return -EINVAL; + tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); if (!tidbuf) return -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8bfbc6d7ea34..fd754a16475a 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -130,20 +130,16 @@ static int defer_packet_queue( { struct hfi1_user_sdma_pkt_q *pq = container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); - struct user_sdma_txreq *tx = - container_of(txreq, struct user_sdma_txreq, txreq); - if (sdma_progress(sde, seq, txreq)) { - if (tx->busycount++ < MAX_DEFER_RETRY_COUNT) - goto eagain; - } + write_seqlock(&sde->waitlock); + if (sdma_progress(sde, seq, txreq)) + goto eagain; /* * We are assuming that if the list is enqueued somewhere, it * is to the dmawait list since that is the only place where * it is supposed to be enqueued. */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); - write_seqlock(&sde->waitlock); if (list_empty(&pq->busy.list)) { iowait_get_priority(&pq->busy); iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); @@ -151,6 +147,7 @@ static int defer_packet_queue( write_sequnlock(&sde->waitlock); return -EBUSY; eagain: + write_sequnlock(&sde->waitlock); return -EAGAIN; } @@ -804,7 +801,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) tx->flags = 0; tx->req = req; - tx->busycount = 0; INIT_LIST_HEAD(&tx->list); /* diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 14dfd757dafd..4d8510b0fc38 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -245,7 +245,6 @@ struct user_sdma_txreq { struct list_head list; struct user_sdma_request *req; u16 flags; - unsigned int busycount; u16 seqnum; }; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 55a56b3d7f83..ea68eeba3f22 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1355,8 +1355,6 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; rdi->dparms.props.max_cqe = hfi1_max_cqes; - rdi->dparms.props.max_mr = rdi->lkey_table.max; - rdi->dparms.props.max_fmr = rdi->lkey_table.max; rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_pd = hfi1_max_pds; rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index c4ab2d5b4502..8f766dd3f61c 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -100,7 +100,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct hfi1_qp_priv *priv; - tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); + tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP); if (tx) goto out; priv = qp->priv; diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index b002e96eb335..bfa6e081cb56 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -72,6 +72,7 @@ struct hfi1_ibdev; struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp); +#define VERBS_TXREQ_GFP (GFP_ATOMIC | __GFP_NOWARN) static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) __must_hold(&qp->slock) @@ -79,7 +80,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct verbs_txreq *tx; struct hfi1_qp_priv *priv = qp->priv; - tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); + tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP); if (unlikely(!tx)) { /* call slow path to get the lock */ tx = __get_txreq(dev, qp); diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index b3c8c45ec1e3..64e0c69b69c5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -70,7 +70,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; - ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn | + ah->av.port_pd = cpu_to_le32(to_hr_pd(ibpd)->pdn | (rdma_ah_get_port_num(ah_attr) << HNS_ROCE_PORT_NUM_SHIFT)); ah->av.gid_index = grh->sgid_index; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d3dd290ae1b1..da81402992bc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2070,11 +2070,12 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, return -EPERM; vma->vm_flags &= ~VM_MAYWRITE; - if (!dev->mdev->clock_info_page) + if (!dev->mdev->clock_info) return -EOPNOTSUPP; return rdma_user_mmap_page(&context->ibucontext, vma, - dev->mdev->clock_info_page, PAGE_SIZE); + virt_to_page(dev->mdev->clock_info), + PAGE_SIZE); } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 0aa10ebda5d9..91669e35c6ca 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -711,6 +711,15 @@ struct pf_frame { int depth; }; +static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key) +{ + if (!mmkey) + return false; + if (mmkey->type == MLX5_MKEY_MW) + return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key); + return mmkey->key == key; +} + static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) { struct mlx5_ib_mw *mw; @@ -760,7 +769,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, next_mr: mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key)); - if (!mmkey || mmkey->key != key) { + if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; goto srcu_unlock; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 5ff32d32c61c..2c4e569ce438 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1459,8 +1459,6 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_cq = ib_qib_max_cqs; rdi->dparms.props.max_cqe = ib_qib_max_cqes; rdi->dparms.props.max_ah = ib_qib_max_ahs; - rdi->dparms.props.max_mr = rdi->lkey_table.max; - rdi->dparms.props.max_fmr = rdi->lkey_table.max; rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; rdi->dparms.props.max_qp_init_rd_atom = 255; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 0bb6e39dd03a..b04d2173e3f4 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -96,6 +96,8 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) for (i = 0; i < rdi->lkey_table.max; i++) RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); + rdi->dparms.props.max_mr = rdi->lkey_table.max; + rdi->dparms.props.max_fmr = rdi->lkey_table.max; return 0; } diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a34b9a2a32b6..a77436ee5ff7 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -594,7 +594,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, offset = qpt->incr | ((offset & 1) ^ 1); } /* there can be no set bits in low-order QoS bits */ - WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1)); + WARN_ON(rdi->dparms.qos_shift > 1 && + offset & ((BIT(rdi->dparms.qos_shift - 1) - 1) << 1)); qpn = mk_qpn(qpt, map, offset); } diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 42f0f25e396c..ec89fbd06c53 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -199,6 +199,12 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, buf = map[0]->buf; for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + if (num_buf >= RXE_BUF_PER_MAP) { + map++; + buf = map[0]->buf; + num_buf = 0; + } + vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { pr_warn("null vaddr\n"); @@ -211,11 +217,6 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, num_buf++; buf++; - if (num_buf >= RXE_BUF_PER_MAP) { - map++; - buf = map[0]->buf; - num_buf = 0; - } } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 48eda16db1a7..9b5e11d3fb85 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2402,7 +2402,18 @@ static ssize_t dev_id_show(struct device *dev, { struct net_device *ndev = to_net_dev(dev); - if (ndev->dev_id == ndev->dev_port) + /* + * ndev->dev_port will be equal to 0 in old kernel prior to commit + * 9b8b2a323008 ("IB/ipoib: Use dev_port to expose network interface + * port numbers") Zero was chosen as special case for user space + * applications to fallback and query dev_id to check if it has + * different value or not. + * + * Don't print warning in such scenario. + * + * https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L358 + */ + if (ndev->dev_port && ndev->dev_id == ndev->dev_port) netdev_info_once(ndev, "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", current->comm); diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 26ec603fe220..83d1499fe021 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -1051,13 +1051,31 @@ static long uinput_ioctl(struct file *file, unsigned int cmd, unsigned long arg) #ifdef CONFIG_COMPAT -#define UI_SET_PHYS_COMPAT _IOW(UINPUT_IOCTL_BASE, 108, compat_uptr_t) +/* + * These IOCTLs change their size and thus their numbers between + * 32 and 64 bits. + */ +#define UI_SET_PHYS_COMPAT \ + _IOW(UINPUT_IOCTL_BASE, 108, compat_uptr_t) +#define UI_BEGIN_FF_UPLOAD_COMPAT \ + _IOWR(UINPUT_IOCTL_BASE, 200, struct uinput_ff_upload_compat) +#define UI_END_FF_UPLOAD_COMPAT \ + _IOW(UINPUT_IOCTL_BASE, 201, struct uinput_ff_upload_compat) static long uinput_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - if (cmd == UI_SET_PHYS_COMPAT) + switch (cmd) { + case UI_SET_PHYS_COMPAT: cmd = UI_SET_PHYS; + break; + case UI_BEGIN_FF_UPLOAD_COMPAT: + cmd = UI_BEGIN_FF_UPLOAD; + break; + case UI_END_FF_UPLOAD_COMPAT: + cmd = UI_END_FF_UPLOAD; + break; + } return uinput_ioctl_handler(file, cmd, arg, compat_ptr(arg)); } diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index b6da0c1267e3..8e6077d8e434 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -179,6 +179,8 @@ static const char * const smbus_pnp_ids[] = { "LEN0096", /* X280 */ "LEN0097", /* X280 -> ALPS trackpoint */ "LEN200f", /* T450s */ + "LEN2054", /* E480 */ + "LEN2055", /* E580 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ NULL diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index f57d82220a88..dd029e689903 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -216,6 +216,7 @@ static const struct goodix_chip_data *goodix_get_chip_data(u16 id) { switch (id) { case 1151: + case 5663: case 5688: return >1x_chip_data; @@ -945,6 +946,7 @@ MODULE_DEVICE_TABLE(acpi, goodix_acpi_match); #ifdef CONFIG_OF static const struct of_device_id goodix_of_match[] = { { .compatible = "goodix,gt1151" }, + { .compatible = "goodix,gt5663" }, { .compatible = "goodix,gt5688" }, { .compatible = "goodix,gt911" }, { .compatible = "goodix,gt9110" }, diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index 09241d4cdebc..06f0eb04a8fd 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -617,6 +617,7 @@ static const struct acpi_device_id silead_ts_acpi_match[] = { { "MSSL1680", 0 }, { "MSSL0001", 0 }, { "MSSL0002", 0 }, + { "MSSL0017", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, silead_ts_acpi_match); diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index d3880010c6cf..d8b73da6447d 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2454,13 +2454,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Clear CR0 and sync (disables SMMU and queue processing) */ reg = readl_relaxed(smmu->base + ARM_SMMU_CR0); if (reg & CR0_SMMUEN) { - if (is_kdump_kernel()) { - arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); - arm_smmu_device_disable(smmu); - return -EBUSY; - } - dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n"); + WARN_ON(is_kdump_kernel() && !disable_bypass); + arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); } ret = arm_smmu_device_disable(smmu); @@ -2553,6 +2549,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) return ret; } + if (is_kdump_kernel()) + enables &= ~(CR0_EVTQEN | CR0_PRIQEN); /* Enable the SMMU interface, or ensure bypass */ if (!bypass || disable_bypass) { diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 045d93884164..7e63a15fa724 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -59,6 +59,15 @@ #include "arm-smmu-regs.h" +/* + * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU + * global register space are still, in fact, using a hypervisor to mediate it + * by trapping and emulating register accesses. Sadly, some deployed versions + * of said trapping code have bugs wherein they go horribly wrong for stores + * using r31 (i.e. XZR/WZR) as the source register. + */ +#define QCOM_DUMMY_VAL -1 + #define ARM_MMU500_ACTLR_CPRE (1 << 1) #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) @@ -422,7 +431,7 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, { unsigned int spin_cnt, delay; - writel_relaxed(0, sync); + writel_relaxed(QCOM_DUMMY_VAL, sync); for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE)) @@ -1760,8 +1769,8 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) } /* Invalidate the TLB, just in case */ - writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH); - writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH); + writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH); + writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH); reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 28cb713d728c..0feb3f70da16 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3496,7 +3496,13 @@ static int __init init_dmars(void) #ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { + /* + * Call dmar_alloc_hwirq() with dmar_global_lock held, + * could cause possible lock race condition. + */ + up_write(&dmar_global_lock); ret = intel_svm_enable_prq(iommu); + down_write(&dmar_global_lock); if (ret) goto free_iommu; } @@ -3730,6 +3736,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) unsigned long iova_pfn; struct intel_iommu *iommu; struct page *freelist; + struct pci_dev *pdev = NULL; if (iommu_no_mapping(dev)) return; @@ -3745,11 +3752,14 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) start_pfn = mm_to_dma_pfn(iova_pfn); last_pfn = start_pfn + nrpages - 1; + if (dev_is_pci(dev)) + pdev = to_pci_dev(dev); + dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn); freelist = domain_unmap(domain, start_pfn, last_pfn); - if (intel_iommu_strict) { + if (intel_iommu_strict || (pdev && pdev->untrusted)) { iommu_flush_iotlb_psi(iommu, domain, start_pfn, nrpages, !freelist, 0); /* free iova */ @@ -4055,9 +4065,7 @@ static void __init init_no_remapping_devices(void) /* This IOMMU has *only* gfx devices. Either bypass it or set the gfx_mapped flag, as appropriate */ - if (dmar_map_gfx) { - intel_iommu_gfx_mapped = 1; - } else { + if (!dmar_map_gfx) { drhd->ignored = 1; for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) @@ -4896,6 +4904,9 @@ int __init intel_iommu_init(void) goto out_free_reserved_range; } + if (dmar_map_gfx) + intel_iommu_gfx_mapped = 1; + init_no_remapping_devices(); ret = init_dmars(); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 5182c7d6171e..8d30653cd13a 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -102,7 +102,6 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) #define SMMU_TLB_FLUSH_VA_MATCH_ALL (0 << 0) #define SMMU_TLB_FLUSH_VA_MATCH_SECTION (2 << 0) #define SMMU_TLB_FLUSH_VA_MATCH_GROUP (3 << 0) -#define SMMU_TLB_FLUSH_ASID(x) (((x) & 0x7f) << 24) #define SMMU_TLB_FLUSH_VA_SECTION(addr) ((((addr) & 0xffc00000) >> 12) | \ SMMU_TLB_FLUSH_VA_MATCH_SECTION) #define SMMU_TLB_FLUSH_VA_GROUP(addr) ((((addr) & 0xffffc000) >> 12) | \ @@ -205,8 +204,12 @@ static inline void smmu_flush_tlb_asid(struct tegra_smmu *smmu, { u32 value; - value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | - SMMU_TLB_FLUSH_VA_MATCH_ALL; + if (smmu->soc->num_asids == 4) + value = (asid & 0x3) << 29; + else + value = (asid & 0x7f) << 24; + + value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_MATCH_ALL; smmu_writel(smmu, value, SMMU_TLB_FLUSH); } @@ -216,8 +219,12 @@ static inline void smmu_flush_tlb_section(struct tegra_smmu *smmu, { u32 value; - value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | - SMMU_TLB_FLUSH_VA_SECTION(iova); + if (smmu->soc->num_asids == 4) + value = (asid & 0x3) << 29; + else + value = (asid & 0x7f) << 24; + + value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_SECTION(iova); smmu_writel(smmu, value, SMMU_TLB_FLUSH); } @@ -227,8 +234,12 @@ static inline void smmu_flush_tlb_group(struct tegra_smmu *smmu, { u32 value; - value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | - SMMU_TLB_FLUSH_VA_GROUP(iova); + if (smmu->soc->num_asids == 4) + value = (asid & 0x3) << 29; + else + value = (asid & 0x7f) << 24; + + value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_GROUP(iova); smmu_writel(smmu, value, SMMU_TLB_FLUSH); } diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index ecdeb89645d0..149b1aca52a2 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -958,6 +958,7 @@ static void write_iso_callback(struct urb *urb) */ static int starturbs(struct bc_state *bcs) { + struct usb_device *udev = bcs->cs->hw.bas->udev; struct bas_bc_state *ubc = bcs->hw.bas; struct urb *urb; int j, k; @@ -975,8 +976,8 @@ static int starturbs(struct bc_state *bcs) rc = -EFAULT; goto error; } - usb_fill_int_urb(urb, bcs->cs->hw.bas->udev, - usb_rcvisocpipe(urb->dev, 3 + 2 * bcs->channel), + usb_fill_int_urb(urb, udev, + usb_rcvisocpipe(udev, 3 + 2 * bcs->channel), ubc->isoinbuf + k * BAS_INBUFSIZE, BAS_INBUFSIZE, read_iso_callback, bcs, BAS_FRAMETIME); @@ -1006,8 +1007,8 @@ static int starturbs(struct bc_state *bcs) rc = -EFAULT; goto error; } - usb_fill_int_urb(urb, bcs->cs->hw.bas->udev, - usb_sndisocpipe(urb->dev, 4 + 2 * bcs->channel), + usb_fill_int_urb(urb, udev, + usb_sndisocpipe(udev, 4 + 2 * bcs->channel), ubc->isooutbuf->data, sizeof(ubc->isooutbuf->data), write_iso_callback, &ubc->isoouturbs[k], diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index a14e35d40538..84e1d4c2db66 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -393,7 +393,7 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) memcpy(di.channelmap, dev->channelmap, sizeof(di.channelmap)); di.nrbchan = dev->nrbchan; - strcpy(di.name, dev_name(&dev->dev)); + strscpy(di.name, dev_name(&dev->dev), sizeof(di.name)); if (copy_to_user((void __user *)arg, &di, sizeof(di))) err = -EFAULT; } else @@ -676,7 +676,7 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) memcpy(di.channelmap, dev->channelmap, sizeof(di.channelmap)); di.nrbchan = dev->nrbchan; - strcpy(di.name, dev_name(&dev->dev)); + strscpy(di.name, dev_name(&dev->dev), sizeof(di.name)); if (copy_to_user((void __user *)arg, &di, sizeof(di))) err = -EFAULT; } else @@ -690,6 +690,7 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = -EFAULT; break; } + dn.name[sizeof(dn.name) - 1] = '\0'; dev = get_mdevice(dn.id); if (dev) err = device_rename(&dev->dev, dn.name); diff --git a/drivers/mailbox/stm32-ipcc.c b/drivers/mailbox/stm32-ipcc.c index 210fe504f5ae..f91dfb1327c7 100644 --- a/drivers/mailbox/stm32-ipcc.c +++ b/drivers/mailbox/stm32-ipcc.c @@ -8,9 +8,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -240,9 +240,11 @@ static int stm32_ipcc_probe(struct platform_device *pdev) /* irq */ for (i = 0; i < IPCC_IRQ_NUM; i++) { - ipcc->irqs[i] = of_irq_get_byname(dev->of_node, irq_name[i]); + ipcc->irqs[i] = platform_get_irq_byname(pdev, irq_name[i]); if (ipcc->irqs[i] < 0) { - dev_err(dev, "no IRQ specified %s\n", irq_name[i]); + if (ipcc->irqs[i] != -EPROBE_DEFER) + dev_err(dev, "no IRQ specified %s\n", + irq_name[i]); ret = ipcc->irqs[i]; goto err_clk; } @@ -263,9 +265,10 @@ static int stm32_ipcc_probe(struct platform_device *pdev) /* wakeup */ if (of_property_read_bool(np, "wakeup-source")) { - ipcc->wkp = of_irq_get_byname(dev->of_node, "wakeup"); + ipcc->wkp = platform_get_irq_byname(pdev, "wakeup"); if (ipcc->wkp < 0) { - dev_err(dev, "could not get wakeup IRQ\n"); + if (ipcc->wkp != -EPROBE_DEFER) + dev_err(dev, "could not get wakeup IRQ\n"); ret = ipcc->wkp; goto err_clk; } diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 5002838ea476..f8986effcb50 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -327,10 +327,11 @@ static int bch_allocator_thread(void *arg) * possibly issue discards to them, then we add the bucket to * the free list: */ - while (!fifo_empty(&ca->free_inc)) { + while (1) { long bucket; - fifo_pop(&ca->free_inc, bucket); + if (!fifo_pop(&ca->free_inc, bucket)) + break; if (ca->discard) { mutex_unlock(&ca->set->bucket_lock); diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 8f07fa6e1739..268f1b685084 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; struct btree_iter iter; + struct bkey preceding_key_on_stack = ZERO_KEY; + struct bkey *preceding_key_p = &preceding_key_on_stack; BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); - m = bch_btree_iter_init(b, &iter, b->ops->is_extents - ? PRECEDING_KEY(&START_KEY(k)) - : PRECEDING_KEY(k)); + /* + * If k has preceding key, preceding_key_p will be set to address + * of k's preceding key; otherwise preceding_key_p will be set + * to NULL inside preceding_key(). + */ + if (b->ops->is_extents) + preceding_key(&START_KEY(k), &preceding_key_p); + else + preceding_key(k, &preceding_key_p); + + m = bch_btree_iter_init(b, &iter, preceding_key_p); if (b->ops->insert_fixup(b, k, &iter, replace_key)) return status; diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index bac76aabca6d..c71365e7c1fa 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) return __bch_cut_back(where, k); } -#define PRECEDING_KEY(_k) \ -({ \ - struct bkey *_ret = NULL; \ - \ - if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ - _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ - \ - if (!_ret->low) \ - _ret->high--; \ - _ret->low--; \ - } \ - \ - _ret; \ -}) +/* + * Pointer '*preceding_key_p' points to a memory object to store preceding + * key of k. If the preceding key does not exist, set '*preceding_key_p' to + * NULL. So the caller of preceding_key() needs to take care of memory + * which '*preceding_key_p' pointed to before calling preceding_key(). + * Currently the only caller of preceding_key() is bch_btree_insert_key(), + * and it points to an on-stack variable, so the memory release is handled + * by stackframe itself. + */ +static inline void preceding_key(struct bkey *k, struct bkey **preceding_key_p) +{ + if (KEY_INODE(k) || KEY_OFFSET(k)) { + (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0); + if (!(*preceding_key_p)->low) + (*preceding_key_p)->high--; + (*preceding_key_p)->low--; + } else { + (*preceding_key_p) = NULL; + } +} static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k) { diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index b2fd412715b1..6c94fa007796 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -317,6 +317,18 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) } } +bool is_discard_enabled(struct cache_set *s) +{ + struct cache *ca; + unsigned int i; + + for_each_cache(ca, s, i) + if (ca->discard) + return true; + + return false; +} + int bch_journal_replay(struct cache_set *s, struct list_head *list) { int ret = 0, keys = 0, entries = 0; @@ -330,9 +342,17 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) list_for_each_entry(i, list, list) { BUG_ON(i->pin && atomic_read(i->pin) != 1); - cache_set_err_on(n != i->j.seq, s, -"bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)", - n, i->j.seq - 1, start, end); + if (n != i->j.seq) { + if (n == start && is_discard_enabled(s)) + pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)", + n, i->j.seq - 1, start, end); + else { + pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)", + n, i->j.seq - 1, start, end); + ret = -EIO; + goto err; + } + } for (k = i->j.start; k < bset_bkey_last(&i->j); @@ -540,11 +560,11 @@ static void journal_reclaim(struct cache_set *c) ca->sb.nr_this_dev); } - bkey_init(k); - SET_KEY_PTRS(k, n); - - if (n) + if (n) { + bkey_init(k); + SET_KEY_PTRS(k, n); c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; + } out: if (!journal_full(&c->journal)) __closure_wake_up(&c->journal.wait); @@ -671,6 +691,9 @@ static void journal_write_unlocked(struct closure *cl) ca->journal.seq[ca->journal.cur_idx] = w->data->seq; } + /* If KEY_PTRS(k) == 0, this jset gets lost in air */ + BUG_ON(i == 0); + atomic_dec_bug(&fifo_back(&c->journal.pin)); bch_journal_next(&c->journal); journal_reclaim(c); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a697a3a923cd..e489d2459569 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1516,6 +1516,7 @@ static void cache_set_free(struct closure *cl) bch_btree_cache_free(c); bch_journal_free(c); + mutex_lock(&bch_register_lock); for_each_cache(ca, c, i) if (ca) { ca->set = NULL; @@ -1534,7 +1535,6 @@ static void cache_set_free(struct closure *cl) mempool_exit(&c->search); kfree(c->devices); - mutex_lock(&bch_register_lock); list_del(&c->list); mutex_unlock(&bch_register_lock); @@ -1775,13 +1775,15 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) return NULL; } -static void run_cache_set(struct cache_set *c) +static int run_cache_set(struct cache_set *c) { const char *err = "cannot allocate memory"; struct cached_dev *dc, *t; struct cache *ca; struct closure cl; unsigned int i; + LIST_HEAD(journal); + struct journal_replay *l; closure_init_stack(&cl); @@ -1869,7 +1871,9 @@ static void run_cache_set(struct cache_set *c) if (j->version < BCACHE_JSET_VERSION_UUID) __uuid_write(c); - bch_journal_replay(c, &journal); + err = "bcache: replay journal failed"; + if (bch_journal_replay(c, &journal)) + goto err; } else { pr_notice("invalidating existing data"); @@ -1937,11 +1941,19 @@ static void run_cache_set(struct cache_set *c) flash_devs_run(c); set_bit(CACHE_SET_RUNNING, &c->flags); - return; + return 0; err: + while (!list_empty(&journal)) { + l = list_first_entry(&journal, struct journal_replay, list); + list_del(&l->list); + kfree(l); + } + closure_sync(&cl); /* XXX: test this, it's broken */ bch_cache_set_error(c, "%s", err); + + return -EIO; } static bool can_attach_cache(struct cache *ca, struct cache_set *c) @@ -2005,8 +2017,11 @@ static const char *register_cache_set(struct cache *ca) ca->set->cache[ca->sb.nr_this_dev] = ca; c->cache_by_alloc[c->caches_loaded++] = ca; - if (c->caches_loaded == c->sb.nr_in_set) - run_cache_set(c); + if (c->caches_loaded == c->sb.nr_in_set) { + err = "failed to run cache set"; + if (run_cache_set(c) < 0) + goto err; + } return NULL; err: diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 17bae9c14ca0..eb42dcf52277 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -431,8 +431,13 @@ STORE(bch_cached_dev) bch_writeback_queue(dc); } + /* + * Only set BCACHE_DEV_WB_RUNNING when cached device attached to + * a cache set, otherwise it doesn't make sense. + */ if (attr == &sysfs_writeback_percent) - if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) + if ((dc->disk.c != NULL) && + (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))) schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 6fc93834da44..151aa95775be 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1167,11 +1167,18 @@ static int __load_discards(struct dm_cache_metadata *cmd, if (r) return r; - for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { + for (b = 0; ; b++) { r = fn(context, cmd->discard_block_size, to_dblock(b), dm_bitset_cursor_get_value(&c)); if (r) break; + + if (b >= (from_dblock(cmd->discard_nr_blocks) - 1)) + break; + + r = dm_bitset_cursor_next(&c); + if (r) + break; } dm_bitset_cursor_end(&c); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index dd6565798778..86fd2d0fa975 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -949,6 +949,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) { #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *bi = blk_get_integrity(cc->dev->bdev->bd_disk); + struct mapped_device *md = dm_table_get_md(ti->table); /* From now we require underlying device with our integrity profile */ if (!bi || strcasecmp(bi->profile->name, "DM-DIF-EXT-TAG")) { @@ -968,7 +969,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) if (crypt_integrity_aead(cc)) { cc->integrity_tag_size = cc->on_disk_tag_size - cc->integrity_iv_size; - DMINFO("Integrity AEAD, tag size %u, IV size %u.", + DMDEBUG("%s: Integrity AEAD, tag size %u, IV size %u.", dm_device_name(md), cc->integrity_tag_size, cc->integrity_iv_size); if (crypto_aead_setauthsize(any_tfm_aead(cc), cc->integrity_tag_size)) { @@ -976,7 +977,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) return -EINVAL; } } else if (cc->integrity_iv_size) - DMINFO("Additional per-sector space %u bytes for IV.", + DMDEBUG("%s: Additional per-sector space %u bytes for IV.", dm_device_name(md), cc->integrity_iv_size); if ((cc->integrity_tag_size + cc->integrity_iv_size) != bi->tag_size) { @@ -1891,7 +1892,7 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode) * algorithm implementation is used. Help people debug performance * problems by logging the ->cra_driver_name. */ - DMINFO("%s using implementation \"%s\"", ciphermode, + DMDEBUG_LIMIT("%s using implementation \"%s\"", ciphermode, crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name); return 0; } @@ -1911,7 +1912,7 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode) return err; } - DMINFO("%s using implementation \"%s\"", ciphermode, + DMDEBUG_LIMIT("%s using implementation \"%s\"", ciphermode, crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name); return 0; } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index fddffe251bf6..f496213f8b67 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -121,7 +121,8 @@ static void delay_dtr(struct dm_target *ti) { struct delay_c *dc = ti->private; - destroy_workqueue(dc->kdelayd_wq); + if (dc->kdelayd_wq) + destroy_workqueue(dc->kdelayd_wq); if (dc->read.dev) dm_put_device(ti, dc->read.dev); diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index 4b76f84424c3..352e803f566e 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -160,7 +160,7 @@ static int __init dm_parse_table(struct dm_device *dev, char *str) while (table_entry) { DMDEBUG("parsing table \"%s\"", str); - if (++dev->dmi.target_count >= DM_MAX_TARGETS) { + if (++dev->dmi.target_count > DM_MAX_TARGETS) { DMERR("too many targets %u > %d", dev->dmi.target_count, DM_MAX_TARGETS); return -EINVAL; @@ -242,9 +242,9 @@ static int __init dm_parse_devices(struct list_head *devices, char *str) return -ENOMEM; list_add_tail(&dev->list, devices); - if (++ndev >= DM_MAX_DEVICES) { - DMERR("too many targets %u > %d", - dev->dmi.target_count, DM_MAX_TARGETS); + if (++ndev > DM_MAX_DEVICES) { + DMERR("too many devices %lu > %d", + ndev, DM_MAX_DEVICES); return -EINVAL; } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 7c678f50aaa3..7848ef019880 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2568,7 +2568,7 @@ static int calculate_device_limits(struct dm_integrity_c *ic) if (last_sector < ic->start || last_sector >= ic->meta_device_sectors) return -EINVAL; } else { - __u64 meta_size = ic->provided_data_sectors * ic->tag_size; + __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size; meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1)) >> (ic->log2_buffer_sectors + SECTOR_SHIFT); meta_size <<= ic->log2_buffer_sectors; @@ -3439,7 +3439,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections)); DEBUG_print(" journal_entries %u\n", ic->journal_entries); DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); - DEBUG_print(" device_sectors 0x%llx\n", (unsigned long long)ic->device_sectors); + DEBUG_print(" data_device_sectors 0x%llx\n", (unsigned long long)ic->data_device_sectors); DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c740153b4e52..1e03bc89e20f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -2069,7 +2069,7 @@ int __init dm_early_create(struct dm_ioctl *dmi, /* alloc table */ r = dm_table_create(&t, get_mode(dmi), dmi->target_count, md); if (r) - goto err_destroy_dm; + goto err_hash_remove; /* add targets */ for (i = 0; i < dmi->target_count; i++) { @@ -2116,6 +2116,10 @@ int __init dm_early_create(struct dm_ioctl *dmi, err_destroy_table: dm_table_destroy(t); +err_hash_remove: + (void) __hash_remove(__get_name_cell(dmi->name)); + /* release reference from __get_name_cell */ + dm_put(md); err_destroy_dm: dm_put(md); dm_destroy(md); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 2ee5e357a0a7..cc5173dfd466 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -882,6 +882,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps if (attached_handler_name || m->hw_handler_name) { INIT_DELAYED_WORK(&p->activate_path, activate_path_work); r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error); + kfree(attached_handler_name); if (r) { dm_put_device(ti, p->path.dev); goto bad; @@ -896,7 +897,6 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps return p; bad: - kfree(attached_handler_name); free_pgpath(p); return ERR_PTR(r); } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index cde3b49b2a91..350cf0451456 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -880,13 +880,17 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type) } EXPORT_SYMBOL_GPL(dm_table_set_type); +/* validate the dax capability of the target device span */ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) + sector_t start, sector_t len, void *data) { - return bdev_dax_supported(dev->bdev, PAGE_SIZE); + int blocksize = *(int *) data; + + return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize, + start, len); } -static bool dm_table_supports_dax(struct dm_table *t) +bool dm_table_supports_dax(struct dm_table *t, int blocksize) { struct dm_target *ti; unsigned i; @@ -899,7 +903,8 @@ static bool dm_table_supports_dax(struct dm_table *t) return false; if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_supports_dax, NULL)) + !ti->type->iterate_devices(ti, device_supports_dax, + &blocksize)) return false; } @@ -979,7 +984,7 @@ static int dm_table_determine_type(struct dm_table *t) verify_bio_based: /* We must use this table as bio-based */ t->type = DM_TYPE_BIO_BASED; - if (dm_table_supports_dax(t) || + if (dm_table_supports_dax(t, PAGE_SIZE) || (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { t->type = DM_TYPE_DAX_BIO_BASED; } else { @@ -1905,7 +1910,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); - if (dm_table_supports_dax(t)) + if (dm_table_supports_dax(t, PAGE_SIZE)) blk_queue_flag_set(QUEUE_FLAG_DAX, q); else blk_queue_flag_clear(QUEUE_FLAG_DAX, q); diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index fa68336560c3..d8334cd45d7c 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1169,6 +1169,9 @@ static int dmz_init_zones(struct dmz_metadata *zmd) goto out; } + if (!nr_blkz) + break; + /* Process report */ for (i = 0; i < nr_blkz; i++) { ret = dmz_init_zone(zmd, zone, &blkz[i]); @@ -1204,6 +1207,8 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) /* Get zone information from disk */ ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), &blkz, &nr_blkz, GFP_NOIO); + if (!nr_blkz) + ret = -EIO; if (ret) { dmz_dev_err(zmd->dev, "Get zone %u report failed", dmz_id(zmd, zone)); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 043f0761e4a0..1cacf02633ec 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1105,6 +1105,25 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } +static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev, + int blocksize, sector_t start, sector_t len) +{ + struct mapped_device *md = dax_get_private(dax_dev); + struct dm_table *map; + int srcu_idx; + bool ret; + + map = dm_get_live_table(md, &srcu_idx); + if (!map) + return false; + + ret = dm_table_supports_dax(map, blocksize); + + dm_put_live_table(md, srcu_idx); + + return ret; +} + static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { @@ -1467,7 +1486,7 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti) static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, unsigned num_bios) { - unsigned len = ci->sector_count; + unsigned len; /* * Even though the device advertised support for this type of @@ -1478,6 +1497,8 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * if (!num_bios) return -EOPNOTSUPP; + len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); + __send_duplicate_bios(ci, ti, num_bios, &len); ci->sector += len; @@ -3192,6 +3213,7 @@ static const struct block_device_operations dm_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, + .dax_supported = dm_dax_supported, .copy_from_iter = dm_dax_copy_from_iter, .copy_to_iter = dm_dax_copy_to_iter, }; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 2d539b82ec08..17e3db54404c 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -72,6 +72,7 @@ bool dm_table_bio_based(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); +bool dm_table_supports_dax(struct dm_table *t, int blocksize); void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); diff --git a/drivers/md/md.c b/drivers/md/md.c index 05ffffb8b769..295ff09cff4c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -132,24 +132,6 @@ static inline int speed_max(struct mddev *mddev) mddev->sync_speed_max : sysctl_speed_limit_max; } -static void * flush_info_alloc(gfp_t gfp_flags, void *data) -{ - return kzalloc(sizeof(struct flush_info), gfp_flags); -} -static void flush_info_free(void *flush_info, void *data) -{ - kfree(flush_info); -} - -static void * flush_bio_alloc(gfp_t gfp_flags, void *data) -{ - return kzalloc(sizeof(struct flush_bio), gfp_flags); -} -static void flush_bio_free(void *flush_bio, void *data) -{ - kfree(flush_bio); -} - static struct ctl_table_header *raid_table_header; static struct ctl_table raid_table[] = { @@ -423,54 +405,31 @@ static int md_congested(void *data, int bits) /* * Generic flush handling for md */ -static void submit_flushes(struct work_struct *ws) -{ - struct flush_info *fi = container_of(ws, struct flush_info, flush_work); - struct mddev *mddev = fi->mddev; - struct bio *bio = fi->bio; - - bio->bi_opf &= ~REQ_PREFLUSH; - md_handle_request(mddev, bio); - - mempool_free(fi, mddev->flush_pool); -} -static void md_end_flush(struct bio *fbio) +static void md_end_flush(struct bio *bio) { - struct flush_bio *fb = fbio->bi_private; - struct md_rdev *rdev = fb->rdev; - struct flush_info *fi = fb->fi; - struct bio *bio = fi->bio; - struct mddev *mddev = fi->mddev; + struct md_rdev *rdev = bio->bi_private; + struct mddev *mddev = rdev->mddev; rdev_dec_pending(rdev, mddev); - if (atomic_dec_and_test(&fi->flush_pending)) { - if (bio->bi_iter.bi_size == 0) { - /* an empty barrier - all done */ - bio_endio(bio); - mempool_free(fi, mddev->flush_pool); - } else { - INIT_WORK(&fi->flush_work, submit_flushes); - queue_work(md_wq, &fi->flush_work); - } + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pre-request flush has finished */ + queue_work(md_wq, &mddev->flush_work); } - - mempool_free(fb, mddev->flush_bio_pool); - bio_put(fbio); + bio_put(bio); } -void md_flush_request(struct mddev *mddev, struct bio *bio) +static void md_submit_flush_data(struct work_struct *ws); + +static void submit_flushes(struct work_struct *ws) { + struct mddev *mddev = container_of(ws, struct mddev, flush_work); struct md_rdev *rdev; - struct flush_info *fi; - - fi = mempool_alloc(mddev->flush_pool, GFP_NOIO); - - fi->bio = bio; - fi->mddev = mddev; - atomic_set(&fi->flush_pending, 1); + mddev->start_flush = ktime_get_boottime(); + INIT_WORK(&mddev->flush_work, md_submit_flush_data); + atomic_set(&mddev->flush_pending, 1); rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) if (rdev->raid_disk >= 0 && @@ -480,37 +439,74 @@ void md_flush_request(struct mddev *mddev, struct bio *bio) * we reclaim rcu_read_lock */ struct bio *bi; - struct flush_bio *fb; atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - - fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO); - fb->fi = fi; - fb->rdev = rdev; - bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); - bio_set_dev(bi, rdev->bdev); bi->bi_end_io = md_end_flush; - bi->bi_private = fb; + bi->bi_private = rdev; + bio_set_dev(bi, rdev->bdev); bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - - atomic_inc(&fi->flush_pending); + atomic_inc(&mddev->flush_pending); submit_bio(bi); - rcu_read_lock(); rdev_dec_pending(rdev, mddev); } rcu_read_unlock(); + if (atomic_dec_and_test(&mddev->flush_pending)) + queue_work(md_wq, &mddev->flush_work); +} + +static void md_submit_flush_data(struct work_struct *ws) +{ + struct mddev *mddev = container_of(ws, struct mddev, flush_work); + struct bio *bio = mddev->flush_bio; + + /* + * must reset flush_bio before calling into md_handle_request to avoid a + * deadlock, because other bios passed md_handle_request suspend check + * could wait for this and below md_handle_request could wait for those + * bios because of suspend check + */ + mddev->last_flush = mddev->start_flush; + mddev->flush_bio = NULL; + wake_up(&mddev->sb_wait); + + if (bio->bi_iter.bi_size == 0) { + /* an empty barrier - all done */ + bio_endio(bio); + } else { + bio->bi_opf &= ~REQ_PREFLUSH; + md_handle_request(mddev, bio); + } +} - if (atomic_dec_and_test(&fi->flush_pending)) { - if (bio->bi_iter.bi_size == 0) { +void md_flush_request(struct mddev *mddev, struct bio *bio) +{ + ktime_t start = ktime_get_boottime(); + spin_lock_irq(&mddev->lock); + wait_event_lock_irq(mddev->sb_wait, + !mddev->flush_bio || + ktime_after(mddev->last_flush, start), + mddev->lock); + if (!ktime_after(mddev->last_flush, start)) { + WARN_ON(mddev->flush_bio); + mddev->flush_bio = bio; + bio = NULL; + } + spin_unlock_irq(&mddev->lock); + + if (!bio) { + INIT_WORK(&mddev->flush_work, submit_flushes); + queue_work(md_wq, &mddev->flush_work); + } else { + /* flush was performed for some other bio while we waited. */ + if (bio->bi_iter.bi_size == 0) /* an empty barrier - all done */ bio_endio(bio); - mempool_free(fi, mddev->flush_pool); - } else { - INIT_WORK(&fi->flush_work, submit_flushes); - queue_work(md_wq, &fi->flush_work); + else { + bio->bi_opf &= ~REQ_PREFLUSH; + mddev->pers->make_request(mddev, bio); } } } @@ -560,6 +556,7 @@ void mddev_init(struct mddev *mddev) atomic_set(&mddev->openers, 0); atomic_set(&mddev->active_io, 0); spin_lock_init(&mddev->lock); + atomic_set(&mddev->flush_pending, 0); init_waitqueue_head(&mddev->sb_wait); init_waitqueue_head(&mddev->recovery_wait); mddev->reshape_position = MaxSector; @@ -2855,8 +2852,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) err = 0; } } else if (cmd_match(buf, "re-add")) { - if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) && - rdev->saved_raid_disk >= 0) { + if (!rdev->mddev->pers) + err = -EINVAL; + else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) && + rdev->saved_raid_disk >= 0) { /* clear_bit is performed _after_ all the devices * have their local Faulty bit cleared. If any writes * happen in the meantime in the local node, they @@ -5511,22 +5510,6 @@ int md_run(struct mddev *mddev) if (err) return err; } - if (mddev->flush_pool == NULL) { - mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc, - flush_info_free, mddev); - if (!mddev->flush_pool) { - err = -ENOMEM; - goto abort; - } - } - if (mddev->flush_bio_pool == NULL) { - mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc, - flush_bio_free, mddev); - if (!mddev->flush_bio_pool) { - err = -ENOMEM; - goto abort; - } - } spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); @@ -5686,11 +5669,8 @@ int md_run(struct mddev *mddev) return 0; abort: - mempool_destroy(mddev->flush_bio_pool); - mddev->flush_bio_pool = NULL; - mempool_destroy(mddev->flush_pool); - mddev->flush_pool = NULL; - + bioset_exit(&mddev->bio_set); + bioset_exit(&mddev->sync_set); return err; } EXPORT_SYMBOL_GPL(md_run); @@ -5894,14 +5874,6 @@ static void __md_stop(struct mddev *mddev) mddev->to_remove = &md_redundancy_group; module_put(pers->owner); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - if (mddev->flush_bio_pool) { - mempool_destroy(mddev->flush_bio_pool); - mddev->flush_bio_pool = NULL; - } - if (mddev->flush_pool) { - mempool_destroy(mddev->flush_pool); - mddev->flush_pool = NULL; - } } void md_stop(struct mddev *mddev) @@ -9257,7 +9229,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) * reshape is happening in the remote node, we need to * update reshape_position and call start_reshape. */ - mddev->reshape_position = sb->reshape_position; + mddev->reshape_position = le64_to_cpu(sb->reshape_position); if (mddev->pers->update_reshape_pos) mddev->pers->update_reshape_pos(mddev); if (mddev->pers->start_reshape) diff --git a/drivers/md/md.h b/drivers/md/md.h index c52afb52c776..257cb4c9e22b 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -252,19 +252,6 @@ enum mddev_sb_flags { MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ }; -#define NR_FLUSH_INFOS 8 -#define NR_FLUSH_BIOS 64 -struct flush_info { - struct bio *bio; - struct mddev *mddev; - struct work_struct flush_work; - atomic_t flush_pending; -}; -struct flush_bio { - struct flush_info *fi; - struct md_rdev *rdev; -}; - struct mddev { void *private; struct md_personality *pers; @@ -470,8 +457,16 @@ struct mddev { * metadata and bitmap writes */ - mempool_t *flush_pool; - mempool_t *flush_bio_pool; + /* Generic flush handling. + * The last to finish preflush schedules a worker to submit + * the rest of the request (without the REQ_PREFLUSH flag). + */ + struct bio *flush_bio; + atomic_t flush_pending; + ktime_t start_flush, last_flush; /* last_flush is when the last completed + * flush was started. + */ + struct work_struct flush_work; struct work_struct event_work; /* used by dm to report failure event */ void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); struct md_cluster_info *cluster_info; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c033bfcb209e..a4d2f552c8ab 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4187,7 +4187,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, /* now write out any block on a failed drive, * or P or Q if they were recomputed */ - BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ + dev = NULL; if (s->failed == 2) { dev = &sh->dev[s->failed_num[1]]; s->locked++; @@ -4212,6 +4212,14 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); } + if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags), + "%s: disk%td not up to date\n", + mdname(conf->mddev), + dev - (struct r5dev *) &sh->dev)) { + clear_bit(R5_LOCKED, &dev->flags); + clear_bit(R5_Wantwrite, &dev->flags); + s->locked--; + } clear_bit(STRIPE_DEGRADED, &sh->state); set_bit(STRIPE_INSYNC, &sh->state); diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 15b6b9c0a2e4..9c163f658aaf 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -672,6 +672,11 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, return -EBUSY; } + if (q->waiting_in_dqbuf && *count) { + dprintk(1, "another dup()ped fd is waiting for a buffer\n"); + return -EBUSY; + } + if (*count == 0 || q->num_buffers != 0 || (q->memory != VB2_MEMORY_UNKNOWN && q->memory != memory)) { /* @@ -807,6 +812,10 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, } if (!q->num_buffers) { + if (q->waiting_in_dqbuf && *count) { + dprintk(1, "another dup()ped fd is waiting for a buffer\n"); + return -EBUSY; + } memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); q->memory = memory; q->waiting_for_buffers = !q->is_output; @@ -1659,6 +1668,11 @@ static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking) for (;;) { int ret; + if (q->waiting_in_dqbuf) { + dprintk(1, "another dup()ped fd is waiting for a buffer\n"); + return -EBUSY; + } + if (!q->streaming) { dprintk(1, "streaming off, will not wait for buffers\n"); return -EINVAL; @@ -1686,6 +1700,7 @@ static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking) return -EAGAIN; } + q->waiting_in_dqbuf = 1; /* * We are streaming and blocking, wait for another buffer to * become ready or for streamoff. Driver's lock is released to @@ -1706,6 +1721,7 @@ static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking) * the locks or return an error if one occurred. */ call_void_qop(q, wait_finish, q); + q->waiting_in_dqbuf = 0; if (ret) { dprintk(1, "sleep was interrupted\n"); return ret; @@ -2585,6 +2601,12 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_ if (!data) return -EINVAL; + if (q->waiting_in_dqbuf) { + dprintk(3, "another dup()ped fd is %s\n", + read ? "reading" : "writing"); + return -EBUSY; + } + /* * Initialize emulator on first call. */ diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index fbdb4ecc7c50..7402c9834189 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -917,7 +917,7 @@ static void dvb_frontend_get_frequency_limits(struct dvb_frontend *fe, "DVB: adapter %i frontend %u frequency limits undefined - fix the driver\n", fe->dvb->num, fe->id); - dprintk("frequency interval: tuner: %u...%u, frontend: %u...%u", + dev_dbg(fe->dvb->device, "frequency interval: tuner: %u...%u, frontend: %u...%u", tuner_min, tuner_max, frontend_min, frontend_max); /* If the standard is for satellite, convert frequencies to kHz */ diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c index 123f2a33738b..403f42806455 100644 --- a/drivers/media/dvb-frontends/m88ds3103.c +++ b/drivers/media/dvb-frontends/m88ds3103.c @@ -309,6 +309,9 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe) u16 u16tmp; u32 tuner_frequency_khz, target_mclk; s32 s32tmp; + static const struct reg_sequence reset_buf[] = { + {0x07, 0x80}, {0x07, 0x00} + }; dev_dbg(&client->dev, "delivery_system=%d modulation=%d frequency=%u symbol_rate=%d inversion=%d pilot=%d rolloff=%d\n", @@ -321,11 +324,7 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe) } /* reset */ - ret = regmap_write(dev->regmap, 0x07, 0x80); - if (ret) - goto err; - - ret = regmap_write(dev->regmap, 0x07, 0x00); + ret = regmap_multi_reg_write(dev->regmap, reset_buf, 2); if (ret) goto err; diff --git a/drivers/media/dvb-frontends/si2165.c b/drivers/media/dvb-frontends/si2165.c index feacd8da421d..d55d8f169dca 100644 --- a/drivers/media/dvb-frontends/si2165.c +++ b/drivers/media/dvb-frontends/si2165.c @@ -275,18 +275,20 @@ static u32 si2165_get_fe_clk(struct si2165_state *state) static int si2165_wait_init_done(struct si2165_state *state) { - int ret = -EINVAL; + int ret; u8 val = 0; int i; for (i = 0; i < 3; ++i) { - si2165_readreg8(state, REG_INIT_DONE, &val); + ret = si2165_readreg8(state, REG_INIT_DONE, &val); + if (ret < 0) + return ret; if (val == 0x01) return 0; usleep_range(1000, 50000); } dev_err(&state->client->dev, "init_done was not set\n"); - return ret; + return -EINVAL; } static int si2165_upload_firmware_block(struct si2165_state *state, diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c index 799acce803fe..a1e9a980a445 100644 --- a/drivers/media/i2c/ov2659.c +++ b/drivers/media/i2c/ov2659.c @@ -1117,8 +1117,10 @@ static int ov2659_set_fmt(struct v4l2_subdev *sd, if (ov2659_formats[index].code == mf->code) break; - if (index < 0) - return -EINVAL; + if (index < 0) { + index = 0; + mf->code = ov2659_formats[index].code; + } mf->colorspace = V4L2_COLORSPACE_SRGB; mf->field = V4L2_FIELD_NONE; diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index c33fd584cb44..de7d9790f054 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -810,9 +810,18 @@ static int ov6650_video_probe(struct i2c_client *client) u8 pidh, pidl, midh, midl; int ret; + priv->clk = v4l2_clk_get(&client->dev, NULL); + if (IS_ERR(priv->clk)) { + ret = PTR_ERR(priv->clk); + dev_err(&client->dev, "v4l2_clk request err: %d\n", ret); + return ret; + } + ret = ov6650_s_power(&priv->subdev, 1); if (ret < 0) - return ret; + goto eclkput; + + msleep(20); /* * check and show product ID and manufacturer ID @@ -847,6 +856,11 @@ static int ov6650_video_probe(struct i2c_client *client) done: ov6650_s_power(&priv->subdev, 0); + if (!ret) + return 0; +eclkput: + v4l2_clk_put(priv->clk); + return ret; } @@ -989,18 +1003,9 @@ static int ov6650_probe(struct i2c_client *client, priv->code = MEDIA_BUS_FMT_YUYV8_2X8; priv->colorspace = V4L2_COLORSPACE_JPEG; - priv->clk = v4l2_clk_get(&client->dev, NULL); - if (IS_ERR(priv->clk)) { - ret = PTR_ERR(priv->clk); - goto eclkget; - } - ret = ov6650_video_probe(client); - if (ret) { - v4l2_clk_put(priv->clk); -eclkget: + if (ret) v4l2_ctrl_handler_free(&priv->hdl); - } return ret; } diff --git a/drivers/media/i2c/ov7670.c b/drivers/media/i2c/ov7670.c index a7d26b294eb5..e65693c2aad5 100644 --- a/drivers/media/i2c/ov7670.c +++ b/drivers/media/i2c/ov7670.c @@ -1664,6 +1664,7 @@ static int ov7670_s_power(struct v4l2_subdev *sd, int on) if (on) { ov7670_power_on (sd); + ov7670_init(sd, 0); ov7670_apply_fmt(sd); ov7675_apply_framerate(sd); v4l2_ctrl_handler_setup(&info->hdl); diff --git a/drivers/media/pci/saa7146/hexium_gemini.c b/drivers/media/pci/saa7146/hexium_gemini.c index 5817d9cde4d0..6d8e4afe9673 100644 --- a/drivers/media/pci/saa7146/hexium_gemini.c +++ b/drivers/media/pci/saa7146/hexium_gemini.c @@ -270,9 +270,8 @@ static int hexium_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_d /* enable i2c-port pins */ saa7146_write(dev, MC1, (MASK_08 | MASK_24 | MASK_10 | MASK_26)); - hexium->i2c_adapter = (struct i2c_adapter) { - .name = "hexium gemini", - }; + strscpy(hexium->i2c_adapter.name, "hexium gemini", + sizeof(hexium->i2c_adapter.name)); saa7146_i2c_adapter_prepare(dev, &hexium->i2c_adapter, SAA7146_I2C_BUS_BIT_RATE_480); if (i2c_add_adapter(&hexium->i2c_adapter) < 0) { DEB_S("cannot register i2c-device. skipping.\n"); diff --git a/drivers/media/pci/saa7146/hexium_orion.c b/drivers/media/pci/saa7146/hexium_orion.c index 0a05176c18ab..a794f9e5f990 100644 --- a/drivers/media/pci/saa7146/hexium_orion.c +++ b/drivers/media/pci/saa7146/hexium_orion.c @@ -231,9 +231,8 @@ static int hexium_probe(struct saa7146_dev *dev) saa7146_write(dev, DD1_STREAM_B, 0x00000000); saa7146_write(dev, MC2, (MASK_09 | MASK_25 | MASK_10 | MASK_26)); - hexium->i2c_adapter = (struct i2c_adapter) { - .name = "hexium orion", - }; + strscpy(hexium->i2c_adapter.name, "hexium orion", + sizeof(hexium->i2c_adapter.name)); saa7146_i2c_adapter_prepare(dev, &hexium->i2c_adapter, SAA7146_I2C_BUS_BIT_RATE_480); if (i2c_add_adapter(&hexium->i2c_adapter) < 0) { DEB_S("cannot register i2c-device. skipping.\n"); diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 4acbed189644..67e48ff10532 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -649,7 +649,7 @@ config VIDEO_SECO_CEC config VIDEO_SECO_RC bool "SECO Boards IR RC5 support" depends on VIDEO_SECO_CEC - depends on RC_CORE + depends on RC_CORE=y || RC_CORE = VIDEO_SECO_CEC help If you say yes here you will get support for the SECO Boards Consumer-IR in seco-cec driver. diff --git a/drivers/media/platform/atmel/atmel-isc.c b/drivers/media/platform/atmel/atmel-isc.c index 50178968b8a6..5b0e96bd8c7e 100644 --- a/drivers/media/platform/atmel/atmel-isc.c +++ b/drivers/media/platform/atmel/atmel-isc.c @@ -2065,8 +2065,11 @@ static int isc_parse_dt(struct device *dev, struct isc_device *isc) break; } - subdev_entity->asd = devm_kzalloc(dev, - sizeof(*subdev_entity->asd), GFP_KERNEL); + /* asd will be freed by the subsystem once it's added to the + * notifier list + */ + subdev_entity->asd = kzalloc(sizeof(*subdev_entity->asd), + GFP_KERNEL); if (!subdev_entity->asd) { of_node_put(rem); ret = -ENOMEM; @@ -2210,6 +2213,7 @@ static int atmel_isc_probe(struct platform_device *pdev) subdev_entity->asd); if (ret) { fwnode_handle_put(subdev_entity->asd->match.fwnode); + kfree(subdev_entity->asd); goto cleanup_subdev; } diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c index b4f396c2e72c..eaa86737fa04 100644 --- a/drivers/media/platform/coda/coda-bit.c +++ b/drivers/media/platform/coda/coda-bit.c @@ -2010,6 +2010,9 @@ static int coda_prepare_decode(struct coda_ctx *ctx) /* Clear decode success flag */ coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS); + /* Clear error return value */ + coda_write(dev, 0, CODA_RET_DEC_PIC_ERR_MB); + trace_coda_dec_pic_run(ctx, meta); coda_command_async(ctx, CODA_COMMAND_PIC_RUN); diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c index d022c65bb34c..e20b340855e7 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c @@ -388,7 +388,7 @@ static void mtk_vdec_worker(struct work_struct *work) } buf.va = vb2_plane_vaddr(&src_buf->vb2_buf, 0); buf.dma_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); - buf.size = (size_t)src_buf->planes[0].bytesused; + buf.size = (size_t)src_buf->vb2_buf.planes[0].bytesused; if (!buf.va) { v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); mtk_v4l2_err("[%d] id=%d src_addr is NULL!!", @@ -1155,10 +1155,10 @@ static void vb2ops_vdec_buf_queue(struct vb2_buffer *vb) src_mem.va = vb2_plane_vaddr(&src_buf->vb2_buf, 0); src_mem.dma_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); - src_mem.size = (size_t)src_buf->planes[0].bytesused; + src_mem.size = (size_t)src_buf->vb2_buf.planes[0].bytesused; mtk_v4l2_debug(2, "[%d] buf id=%d va=%p dma=%pad size=%zx", - ctx->id, src_buf->index, + ctx->id, src_buf->vb2_buf.index, src_mem.va, &src_mem.dma_addr, src_mem.size); @@ -1182,7 +1182,7 @@ static void vb2ops_vdec_buf_queue(struct vb2_buffer *vb) } mtk_v4l2_debug(ret ? 0 : 1, "[%d] vdec_if_decode() src_buf=%d, size=%zu, fail=%d, res_chg=%d", - ctx->id, src_buf->index, + ctx->id, src_buf->vb2_buf.index, src_mem.size, ret, res_chg); return; } diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index c6b48b5925fb..50351adafc47 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -894,7 +894,7 @@ static void vb2ops_venc_stop_streaming(struct vb2_queue *q) if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { while ((dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx))) { - dst_buf->planes[0].bytesused = 0; + dst_buf->vb2_buf.planes[0].bytesused = 0; v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR); } } else { @@ -947,7 +947,7 @@ static int mtk_venc_encode_header(void *priv) bs_buf.va = vb2_plane_vaddr(&dst_buf->vb2_buf, 0); bs_buf.dma_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); - bs_buf.size = (size_t)dst_buf->planes[0].length; + bs_buf.size = (size_t)dst_buf->vb2_buf.planes[0].length; mtk_v4l2_debug(1, "[%d] buf id=%d va=0x%p dma_addr=0x%llx size=%zu", @@ -976,7 +976,7 @@ static int mtk_venc_encode_header(void *priv) } ctx->state = MTK_STATE_HEADER; - dst_buf->planes[0].bytesused = enc_result.bs_size; + dst_buf->vb2_buf.planes[0].bytesused = enc_result.bs_size; v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE); return 0; @@ -1107,12 +1107,12 @@ static void mtk_venc_worker(struct work_struct *work) if (ret) { v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); - dst_buf->planes[0].bytesused = 0; + dst_buf->vb2_buf.planes[0].bytesused = 0; v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR); mtk_v4l2_err("venc_if_encode failed=%d", ret); } else { v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); - dst_buf->planes[0].bytesused = enc_result.bs_size; + dst_buf->vb2_buf.planes[0].bytesused = enc_result.bs_size; v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE); mtk_v4l2_debug(2, "venc_if_encode bs size=%d", enc_result.bs_size); diff --git a/drivers/media/platform/stm32/stm32-dcmi.c b/drivers/media/platform/stm32/stm32-dcmi.c index 5fe5b38fa901..922855b6025c 100644 --- a/drivers/media/platform/stm32/stm32-dcmi.c +++ b/drivers/media/platform/stm32/stm32-dcmi.c @@ -811,6 +811,9 @@ static int dcmi_try_fmt(struct stm32_dcmi *dcmi, struct v4l2_format *f, sd_fmt = find_format_by_fourcc(dcmi, pix->pixelformat); if (!sd_fmt) { + if (!dcmi->num_of_sd_formats) + return -ENODATA; + sd_fmt = dcmi->sd_formats[dcmi->num_of_sd_formats - 1]; pix->pixelformat = sd_fmt->fourcc; } @@ -989,6 +992,9 @@ static int dcmi_set_sensor_format(struct stm32_dcmi *dcmi, sd_fmt = find_format_by_fourcc(dcmi, pix->pixelformat); if (!sd_fmt) { + if (!dcmi->num_of_sd_formats) + return -ENODATA; + sd_fmt = dcmi->sd_formats[dcmi->num_of_sd_formats - 1]; pix->pixelformat = sd_fmt->fourcc; } @@ -1645,7 +1651,7 @@ static int dcmi_probe(struct platform_device *pdev) dcmi->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(dcmi->rstc)) { dev_err(&pdev->dev, "Could not get reset control\n"); - return -ENODEV; + return PTR_ERR(dcmi->rstc); } /* Get bus characteristics from devicetree */ @@ -1660,7 +1666,7 @@ static int dcmi_probe(struct platform_device *pdev) of_node_put(np); if (ret) { dev_err(&pdev->dev, "Could not parse the endpoint\n"); - return -ENODEV; + return ret; } if (ep.bus_type == V4L2_MBUS_CSI2_DPHY) { @@ -1673,8 +1679,9 @@ static int dcmi_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq <= 0) { - dev_err(&pdev->dev, "Could not get irq\n"); - return -ENODEV; + if (irq != -EPROBE_DEFER) + dev_err(&pdev->dev, "Could not get irq\n"); + return irq; } dcmi->res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1694,12 +1701,13 @@ static int dcmi_probe(struct platform_device *pdev) dev_name(&pdev->dev), dcmi); if (ret) { dev_err(&pdev->dev, "Unable to request irq %d\n", irq); - return -ENODEV; + return ret; } mclk = devm_clk_get(&pdev->dev, "mclk"); if (IS_ERR(mclk)) { - dev_err(&pdev->dev, "Unable to get mclk\n"); + if (PTR_ERR(mclk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Unable to get mclk\n"); return PTR_ERR(mclk); } diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c index d1d6085da9f1..cf469a1191aa 100644 --- a/drivers/media/platform/vicodec/codec-fwht.c +++ b/drivers/media/platform/vicodec/codec-fwht.c @@ -46,8 +46,12 @@ static const uint8_t zigzag[64] = { 63, }; - -static int rlc(const s16 *in, __be16 *output, int blocktype) +/* + * noinline_for_stack to work around + * https://bugs.llvm.org/show_bug.cgi?id=38809 + */ +static int noinline_for_stack +rlc(const s16 *in, __be16 *output, int blocktype) { s16 block[8 * 8]; s16 *wp = block; @@ -106,8 +110,8 @@ static int rlc(const s16 *in, __be16 *output, int blocktype) * This function will worst-case increase rlc_in by 65*2 bytes: * one s16 value for the header and 8 * 8 coefficients of type s16. */ -static u16 derlc(const __be16 **rlc_in, s16 *dwht_out, - const __be16 *end_of_input) +static noinline_for_stack u16 +derlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input) { /* header */ const __be16 *input = *rlc_in; @@ -240,8 +244,9 @@ static void dequantize_inter(s16 *coeff) *coeff <<= *quant; } -static void fwht(const u8 *block, s16 *output_block, unsigned int stride, - unsigned int input_step, bool intra) +static void noinline_for_stack fwht(const u8 *block, s16 *output_block, + unsigned int stride, + unsigned int input_step, bool intra) { /* we'll need more than 8 bits for the transformed coefficients */ s32 workspace1[8], workspace2[8]; @@ -373,7 +378,8 @@ static void fwht(const u8 *block, s16 *output_block, unsigned int stride, * Furthermore values can be negative... This is just a version that * works with 16 signed data */ -static void fwht16(const s16 *block, s16 *output_block, int stride, int intra) +static void noinline_for_stack +fwht16(const s16 *block, s16 *output_block, int stride, int intra) { /* we'll need more than 8 bits for the transformed coefficients */ s32 workspace1[8], workspace2[8]; @@ -456,7 +462,8 @@ static void fwht16(const s16 *block, s16 *output_block, int stride, int intra) } } -static void ifwht(const s16 *block, s16 *output_block, int intra) +static noinline_for_stack void +ifwht(const s16 *block, s16 *output_block, int intra) { /* * we'll need more than 8 bits for the transformed coefficients @@ -604,9 +611,9 @@ static int var_inter(const s16 *old, const s16 *new) return ret; } -static int decide_blocktype(const u8 *cur, const u8 *reference, - s16 *deltablock, unsigned int stride, - unsigned int input_step) +static noinline_for_stack int +decide_blocktype(const u8 *cur, const u8 *reference, s16 *deltablock, + unsigned int stride, unsigned int input_step) { s16 tmp[64]; s16 old[64]; diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c index d7636fe9e174..8788369e59a0 100644 --- a/drivers/media/platform/vicodec/vicodec-core.c +++ b/drivers/media/platform/vicodec/vicodec-core.c @@ -159,12 +159,10 @@ static int device_process(struct vicodec_ctx *ctx, struct vb2_v4l2_buffer *dst_vb) { struct vicodec_dev *dev = ctx->dev; - struct vicodec_q_data *q_dst; struct v4l2_fwht_state *state = &ctx->state; u8 *p_src, *p_dst; int ret; - q_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); if (ctx->is_enc) p_src = vb2_plane_vaddr(&src_vb->vb2_buf, 0); else @@ -186,8 +184,10 @@ static int device_process(struct vicodec_ctx *ctx, return ret; vb2_set_plane_payload(&dst_vb->vb2_buf, 0, ret); } else { + struct vicodec_q_data *q_dst; unsigned int comp_frame_size = ntohl(ctx->state.header.size); + q_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); if (comp_frame_size > ctx->comp_max_size) return -EINVAL; state->info = q_dst->info; @@ -196,11 +196,6 @@ static int device_process(struct vicodec_ctx *ctx, return ret; vb2_set_plane_payload(&dst_vb->vb2_buf, 0, q_dst->sizeimage); } - - dst_vb->sequence = q_dst->sequence++; - dst_vb->flags &= ~V4L2_BUF_FLAG_LAST; - v4l2_m2m_buf_copy_metadata(src_vb, dst_vb, !ctx->is_enc); - return 0; } @@ -274,16 +269,22 @@ static void device_run(void *priv) struct vicodec_ctx *ctx = priv; struct vicodec_dev *dev = ctx->dev; struct vb2_v4l2_buffer *src_buf, *dst_buf; - struct vicodec_q_data *q_src; + struct vicodec_q_data *q_src, *q_dst; u32 state; src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); q_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + q_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); state = VB2_BUF_STATE_DONE; if (device_process(ctx, src_buf, dst_buf)) state = VB2_BUF_STATE_ERROR; + else + dst_buf->sequence = q_dst->sequence++; + dst_buf->flags &= ~V4L2_BUF_FLAG_LAST; + v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, !ctx->is_enc); + ctx->last_dst_buf = dst_buf; spin_lock(ctx->lock); @@ -1338,8 +1339,11 @@ static int vicodec_start_streaming(struct vb2_queue *q, chroma_div = info->width_div * info->height_div; q_data->sequence = 0; - ctx->last_src_buf = NULL; - ctx->last_dst_buf = NULL; + if (V4L2_TYPE_IS_OUTPUT(q->type)) + ctx->last_src_buf = NULL; + else + ctx->last_dst_buf = NULL; + state->gop_cnt = 0; if ((V4L2_TYPE_IS_OUTPUT(q->type) && !ctx->is_enc) || diff --git a/drivers/media/platform/video-mux.c b/drivers/media/platform/video-mux.c index 0ba30756e1e4..d8cd5f5cb10d 100644 --- a/drivers/media/platform/video-mux.c +++ b/drivers/media/platform/video-mux.c @@ -419,9 +419,14 @@ static int video_mux_probe(struct platform_device *pdev) vmux->active = -1; vmux->pads = devm_kcalloc(dev, num_pads, sizeof(*vmux->pads), GFP_KERNEL); + if (!vmux->pads) + return -ENOMEM; + vmux->format_mbus = devm_kcalloc(dev, num_pads, sizeof(*vmux->format_mbus), GFP_KERNEL); + if (!vmux->format_mbus) + return -ENOMEM; for (i = 0; i < num_pads; i++) { vmux->pads[i].flags = (i < num_pads - 1) ? MEDIA_PAD_FL_SINK diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c index 34dcaca45d8b..dd47821fc661 100644 --- a/drivers/media/platform/vim2m.c +++ b/drivers/media/platform/vim2m.c @@ -1262,6 +1262,15 @@ static int vim2m_release(struct file *file) return 0; } +static void vim2m_device_release(struct video_device *vdev) +{ + struct vim2m_dev *dev = container_of(vdev, struct vim2m_dev, vfd); + + v4l2_device_unregister(&dev->v4l2_dev); + v4l2_m2m_release(dev->m2m_dev); + kfree(dev); +} + static const struct v4l2_file_operations vim2m_fops = { .owner = THIS_MODULE, .open = vim2m_open, @@ -1277,7 +1286,7 @@ static const struct video_device vim2m_videodev = { .fops = &vim2m_fops, .ioctl_ops = &vim2m_ioctl_ops, .minor = -1, - .release = video_device_release_empty, + .release = vim2m_device_release, .device_caps = V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING, }; @@ -1298,13 +1307,13 @@ static int vim2m_probe(struct platform_device *pdev) struct video_device *vfd; int ret; - dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); if (ret) - return ret; + goto error_free; atomic_set(&dev->num_inst, 0); mutex_init(&dev->dev_mutex); @@ -1317,7 +1326,7 @@ static int vim2m_probe(struct platform_device *pdev) ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to register video device\n"); - goto unreg_v4l2; + goto error_v4l2; } video_set_drvdata(vfd, dev); @@ -1330,7 +1339,7 @@ static int vim2m_probe(struct platform_device *pdev) if (IS_ERR(dev->m2m_dev)) { v4l2_err(&dev->v4l2_dev, "Failed to init mem2mem device\n"); ret = PTR_ERR(dev->m2m_dev); - goto unreg_dev; + goto error_dev; } #ifdef CONFIG_MEDIA_CONTROLLER @@ -1346,27 +1355,29 @@ static int vim2m_probe(struct platform_device *pdev) MEDIA_ENT_F_PROC_VIDEO_SCALER); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to init mem2mem media controller\n"); - goto unreg_m2m; + goto error_m2m; } ret = media_device_register(&dev->mdev); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to register mem2mem media device\n"); - goto unreg_m2m_mc; + goto error_m2m_mc; } #endif return 0; #ifdef CONFIG_MEDIA_CONTROLLER -unreg_m2m_mc: +error_m2m_mc: v4l2_m2m_unregister_media_controller(dev->m2m_dev); -unreg_m2m: +error_m2m: v4l2_m2m_release(dev->m2m_dev); #endif -unreg_dev: +error_dev: video_unregister_device(&dev->vfd); -unreg_v4l2: +error_v4l2: v4l2_device_unregister(&dev->v4l2_dev); +error_free: + kfree(dev); return ret; } @@ -1382,9 +1393,7 @@ static int vim2m_remove(struct platform_device *pdev) v4l2_m2m_unregister_media_controller(dev->m2m_dev); media_device_cleanup(&dev->mdev); #endif - v4l2_m2m_release(dev->m2m_dev); video_unregister_device(&dev->vfd); - v4l2_device_unregister(&dev->v4l2_dev); return 0; } diff --git a/drivers/media/platform/vimc/vimc-core.c b/drivers/media/platform/vimc/vimc-core.c index 0fbb7914098f..3aa62d7e3d0e 100644 --- a/drivers/media/platform/vimc/vimc-core.c +++ b/drivers/media/platform/vimc/vimc-core.c @@ -304,6 +304,8 @@ static int vimc_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "probe"); + memset(&vimc->mdev, 0, sizeof(vimc->mdev)); + /* Create platform_device for each entity in the topology*/ vimc->subdevs = devm_kcalloc(&vimc->pdev.dev, vimc->pipe_cfg->num_ents, sizeof(*vimc->subdevs), GFP_KERNEL); diff --git a/drivers/media/platform/vimc/vimc-streamer.c b/drivers/media/platform/vimc/vimc-streamer.c index fcc897fb247b..392754c18046 100644 --- a/drivers/media/platform/vimc/vimc-streamer.c +++ b/drivers/media/platform/vimc/vimc-streamer.c @@ -120,7 +120,6 @@ static int vimc_streamer_thread(void *data) int i; set_freezable(); - set_current_state(TASK_UNINTERRUPTIBLE); for (;;) { try_to_freeze(); @@ -137,6 +136,7 @@ static int vimc_streamer_thread(void *data) break; } //wait for 60hz + set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ / 60); } diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c index 52eeda624d7e..530ac8decb25 100644 --- a/drivers/media/platform/vivid/vivid-vid-cap.c +++ b/drivers/media/platform/vivid/vivid-vid-cap.c @@ -1007,7 +1007,7 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection v4l2_rect_map_inside(&s->r, &dev->fmt_cap_rect); if (dev->bitmap_cap && (compose->width != s->r.width || compose->height != s->r.height)) { - kfree(dev->bitmap_cap); + vfree(dev->bitmap_cap); dev->bitmap_cap = NULL; } *compose = s->r; diff --git a/drivers/media/radio/wl128x/fmdrv_common.c b/drivers/media/radio/wl128x/fmdrv_common.c index 3c8987af3772..ac5706b4cab8 100644 --- a/drivers/media/radio/wl128x/fmdrv_common.c +++ b/drivers/media/radio/wl128x/fmdrv_common.c @@ -489,7 +489,8 @@ int fmc_send_cmd(struct fmdev *fmdev, u8 fm_op, u16 type, void *payload, return -EIO; } /* Send response data to caller */ - if (response != NULL && response_len != NULL && evt_hdr->dlen) { + if (response != NULL && response_len != NULL && evt_hdr->dlen && + evt_hdr->dlen <= payload_len) { /* Skip header info and copy only response data */ skb_pull(skb, sizeof(struct fm_event_msg_hdr)); memcpy(response, skb->data, evt_hdr->dlen); @@ -583,6 +584,8 @@ static void fm_irq_handle_flag_getcmd_resp(struct fmdev *fmdev) return; fm_evt_hdr = (void *)skb->data; + if (fm_evt_hdr->dlen > sizeof(fmdev->irq_info.flag)) + return; /* Skip header info and copy only response data */ skb_pull(skb, sizeof(struct fm_event_msg_hdr)); @@ -1308,7 +1311,7 @@ static int load_default_rx_configuration(struct fmdev *fmdev) static int fm_power_up(struct fmdev *fmdev, u8 mode) { u16 payload; - __be16 asic_id, asic_ver; + __be16 asic_id = 0, asic_ver = 0; int resp_len, ret; u8 fw_name[50]; diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c index ffe2c672d105..3998ba29beb6 100644 --- a/drivers/media/rc/serial_ir.c +++ b/drivers/media/rc/serial_ir.c @@ -773,8 +773,6 @@ static void serial_ir_exit(void) static int __init serial_ir_init_module(void) { - int result; - switch (type) { case IR_HOMEBREW: case IR_IRDEO: @@ -802,12 +800,7 @@ static int __init serial_ir_init_module(void) if (sense != -1) sense = !!sense; - result = serial_ir_init(); - if (!result) - return 0; - - serial_ir_exit(); - return result; + return serial_ir_init(); } static void __exit serial_ir_exit_module(void) diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c index 7876c897cc1d..222723d946e4 100644 --- a/drivers/media/usb/au0828/au0828-video.c +++ b/drivers/media/usb/au0828/au0828-video.c @@ -758,6 +758,9 @@ static int au0828_analog_stream_enable(struct au0828_dev *d) dprintk(1, "au0828_analog_stream_enable called\n"); + if (test_bit(DEV_DISCONNECTED, &d->dev_state)) + return -ENODEV; + iface = usb_ifnum_to_if(d->usbdev, 0); if (iface && iface->cur_altsetting->desc.bAlternateSetting != 5) { dprintk(1, "Changing intf#0 to alt 5\n"); @@ -839,9 +842,9 @@ int au0828_start_analog_streaming(struct vb2_queue *vq, unsigned int count) return rc; } + v4l2_device_call_all(&dev->v4l2_dev, 0, video, s_stream, 1); + if (vq->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) { - v4l2_device_call_all(&dev->v4l2_dev, 0, video, - s_stream, 1); dev->vid_timeout_running = 1; mod_timer(&dev->vid_timeout, jiffies + (HZ / 10)); } else if (vq->type == V4L2_BUF_TYPE_VBI_CAPTURE) { @@ -861,10 +864,11 @@ static void au0828_stop_streaming(struct vb2_queue *vq) dprintk(1, "au0828_stop_streaming called %d\n", dev->streaming_users); - if (dev->streaming_users-- == 1) + if (dev->streaming_users-- == 1) { au0828_uninit_isoc(dev); + v4l2_device_call_all(&dev->v4l2_dev, 0, video, s_stream, 0); + } - v4l2_device_call_all(&dev->v4l2_dev, 0, video, s_stream, 0); dev->vid_timeout_running = 0; del_timer_sync(&dev->vid_timeout); @@ -893,8 +897,10 @@ void au0828_stop_vbi_streaming(struct vb2_queue *vq) dprintk(1, "au0828_stop_vbi_streaming called %d\n", dev->streaming_users); - if (dev->streaming_users-- == 1) + if (dev->streaming_users-- == 1) { au0828_uninit_isoc(dev); + v4l2_device_call_all(&dev->v4l2_dev, 0, video, s_stream, 0); + } spin_lock_irqsave(&dev->slock, flags); if (dev->isoc_ctl.vbi_buf != NULL) { diff --git a/drivers/media/usb/cpia2/cpia2_v4l.c b/drivers/media/usb/cpia2/cpia2_v4l.c index 95c0bd4a19dc..45caf78119c4 100644 --- a/drivers/media/usb/cpia2/cpia2_v4l.c +++ b/drivers/media/usb/cpia2/cpia2_v4l.c @@ -1240,8 +1240,7 @@ static int __init cpia2_init(void) LOG("%s v%s\n", ABOUT, CPIA_VERSION); check_parameters(); - cpia2_usb_init(); - return 0; + return cpia2_usb_init(); } diff --git a/drivers/media/usb/dvb-usb-v2/dvbsky.c b/drivers/media/usb/dvb-usb-v2/dvbsky.c index e28bd8836751..ae0814dd202a 100644 --- a/drivers/media/usb/dvb-usb-v2/dvbsky.c +++ b/drivers/media/usb/dvb-usb-v2/dvbsky.c @@ -615,16 +615,18 @@ static int dvbsky_init(struct dvb_usb_device *d) return 0; } -static void dvbsky_exit(struct dvb_usb_device *d) +static int dvbsky_frontend_detach(struct dvb_usb_adapter *adap) { + struct dvb_usb_device *d = adap_to_d(adap); struct dvbsky_state *state = d_to_priv(d); - struct dvb_usb_adapter *adap = &d->adapter[0]; + + dev_dbg(&d->udev->dev, "%s: adap=%d\n", __func__, adap->id); dvb_module_release(state->i2c_client_tuner); dvb_module_release(state->i2c_client_demod); dvb_module_release(state->i2c_client_ci); - adap->fe[0] = NULL; + return 0; } /* DVB USB Driver stuff */ @@ -640,11 +642,11 @@ static struct dvb_usb_device_properties dvbsky_s960_props = { .i2c_algo = &dvbsky_i2c_algo, .frontend_attach = dvbsky_s960_attach, + .frontend_detach = dvbsky_frontend_detach, .init = dvbsky_init, .get_rc_config = dvbsky_get_rc_config, .streaming_ctrl = dvbsky_streaming_ctrl, .identify_state = dvbsky_identify_state, - .exit = dvbsky_exit, .read_mac_address = dvbsky_read_mac_addr, .num_adapters = 1, @@ -667,11 +669,11 @@ static struct dvb_usb_device_properties dvbsky_s960c_props = { .i2c_algo = &dvbsky_i2c_algo, .frontend_attach = dvbsky_s960c_attach, + .frontend_detach = dvbsky_frontend_detach, .init = dvbsky_init, .get_rc_config = dvbsky_get_rc_config, .streaming_ctrl = dvbsky_streaming_ctrl, .identify_state = dvbsky_identify_state, - .exit = dvbsky_exit, .read_mac_address = dvbsky_read_mac_addr, .num_adapters = 1, @@ -694,11 +696,11 @@ static struct dvb_usb_device_properties dvbsky_t680c_props = { .i2c_algo = &dvbsky_i2c_algo, .frontend_attach = dvbsky_t680c_attach, + .frontend_detach = dvbsky_frontend_detach, .init = dvbsky_init, .get_rc_config = dvbsky_get_rc_config, .streaming_ctrl = dvbsky_streaming_ctrl, .identify_state = dvbsky_identify_state, - .exit = dvbsky_exit, .read_mac_address = dvbsky_read_mac_addr, .num_adapters = 1, @@ -721,11 +723,11 @@ static struct dvb_usb_device_properties dvbsky_t330_props = { .i2c_algo = &dvbsky_i2c_algo, .frontend_attach = dvbsky_t330_attach, + .frontend_detach = dvbsky_frontend_detach, .init = dvbsky_init, .get_rc_config = dvbsky_get_rc_config, .streaming_ctrl = dvbsky_streaming_ctrl, .identify_state = dvbsky_identify_state, - .exit = dvbsky_exit, .read_mac_address = dvbsky_read_mac_addr, .num_adapters = 1, @@ -748,11 +750,11 @@ static struct dvb_usb_device_properties mygica_t230c_props = { .i2c_algo = &dvbsky_i2c_algo, .frontend_attach = dvbsky_mygica_t230c_attach, + .frontend_detach = dvbsky_frontend_detach, .init = dvbsky_init, .get_rc_config = dvbsky_get_rc_config, .streaming_ctrl = dvbsky_streaming_ctrl, .identify_state = dvbsky_identify_state, - .exit = dvbsky_exit, .num_adapters = 1, .adapter = { diff --git a/drivers/media/usb/go7007/go7007-fw.c b/drivers/media/usb/go7007/go7007-fw.c index 24f5b615dc7a..dfa9f899d0c2 100644 --- a/drivers/media/usb/go7007/go7007-fw.c +++ b/drivers/media/usb/go7007/go7007-fw.c @@ -1499,8 +1499,8 @@ static int modet_to_package(struct go7007 *go, __le16 *code, int space) return cnt; } -static int do_special(struct go7007 *go, u16 type, __le16 *code, int space, - int *framelen) +static noinline_for_stack int do_special(struct go7007 *go, u16 type, + __le16 *code, int space, int *framelen) { switch (type) { case SPECIAL_FRM_HEAD: diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c index ac70b36d67b7..4d7517411cc2 100644 --- a/drivers/media/usb/gspca/gspca.c +++ b/drivers/media/usb/gspca/gspca.c @@ -294,7 +294,7 @@ static void fill_frame(struct gspca_dev *gspca_dev, /* check the packet status and length */ st = urb->iso_frame_desc[i].status; if (st) { - pr_err("ISOC data error: [%d] len=%d, status=%d\n", + gspca_dbg(gspca_dev, D_PACK, "ISOC data error: [%d] len=%d, status=%d\n", i, len, st); gspca_dev->last_packet_type = DISCARD_PACKET; continue; @@ -314,6 +314,8 @@ static void fill_frame(struct gspca_dev *gspca_dev, } resubmit: + if (!gspca_dev->streaming) + return; /* resubmit the URB */ st = usb_submit_urb(urb, GFP_ATOMIC); if (st < 0) @@ -330,7 +332,7 @@ static void isoc_irq(struct urb *urb) struct gspca_dev *gspca_dev = (struct gspca_dev *) urb->context; gspca_dbg(gspca_dev, D_PACK, "isoc irq\n"); - if (!vb2_start_streaming_called(&gspca_dev->queue)) + if (!gspca_dev->streaming) return; fill_frame(gspca_dev, urb); } @@ -344,7 +346,7 @@ static void bulk_irq(struct urb *urb) int st; gspca_dbg(gspca_dev, D_PACK, "bulk irq\n"); - if (!vb2_start_streaming_called(&gspca_dev->queue)) + if (!gspca_dev->streaming) return; switch (urb->status) { case 0: @@ -367,6 +369,8 @@ static void bulk_irq(struct urb *urb) urb->actual_length); resubmit: + if (!gspca_dev->streaming) + return; /* resubmit the URB */ if (gspca_dev->cam.bulk_nurbs != 0) { st = usb_submit_urb(urb, GFP_ATOMIC); @@ -1638,6 +1642,8 @@ void gspca_disconnect(struct usb_interface *intf) mutex_lock(&gspca_dev->usb_lock); gspca_dev->present = false; + destroy_urbs(gspca_dev); + gspca_input_destroy_urb(gspca_dev); vb2_queue_error(&gspca_dev->queue); diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index 446a999dd2ce..2bab4713bc5b 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -666,6 +666,8 @@ static int ctrl_get_input(struct pvr2_ctrl *cptr,int *vp) static int ctrl_check_input(struct pvr2_ctrl *cptr,int v) { + if (v < 0 || v > PVR2_CVAL_INPUT_MAX) + return 0; return ((1 << v) & cptr->hdw->input_allowed_mask) != 0; } diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.h b/drivers/media/usb/pvrusb2/pvrusb2-hdw.h index 25648add77e5..bd2b7a67b732 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.h +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.h @@ -50,6 +50,7 @@ #define PVR2_CVAL_INPUT_COMPOSITE 2 #define PVR2_CVAL_INPUT_SVIDEO 3 #define PVR2_CVAL_INPUT_RADIO 4 +#define PVR2_CVAL_INPUT_MAX PVR2_CVAL_INPUT_RADIO enum pvr2_config { pvr2_config_empty, /* No configuration */ diff --git a/drivers/media/usb/siano/smsusb.c b/drivers/media/usb/siano/smsusb.c index 4fc03ec8a4f1..e39f3f40dfdd 100644 --- a/drivers/media/usb/siano/smsusb.c +++ b/drivers/media/usb/siano/smsusb.c @@ -400,6 +400,7 @@ static int smsusb_init_device(struct usb_interface *intf, int board_id) struct smsusb_device_t *dev; void *mdev; int i, rc; + int align = 0; /* create device object */ dev = kzalloc(sizeof(struct smsusb_device_t), GFP_KERNEL); @@ -411,6 +412,24 @@ static int smsusb_init_device(struct usb_interface *intf, int board_id) dev->udev = interface_to_usbdev(intf); dev->state = SMSUSB_DISCONNECTED; + for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) { + struct usb_endpoint_descriptor *desc = + &intf->cur_altsetting->endpoint[i].desc; + + if (desc->bEndpointAddress & USB_DIR_IN) { + dev->in_ep = desc->bEndpointAddress; + align = usb_endpoint_maxp(desc) - sizeof(struct sms_msg_hdr); + } else { + dev->out_ep = desc->bEndpointAddress; + } + } + + pr_debug("in_ep = %02x, out_ep = %02x\n", dev->in_ep, dev->out_ep); + if (!dev->in_ep || !dev->out_ep || align < 0) { /* Missing endpoints? */ + smsusb_term_device(intf); + return -ENODEV; + } + params.device_type = sms_get_board(board_id)->type; switch (params.device_type) { @@ -425,24 +444,12 @@ static int smsusb_init_device(struct usb_interface *intf, int board_id) /* fall-thru */ default: dev->buffer_size = USB2_BUFFER_SIZE; - dev->response_alignment = - le16_to_cpu(dev->udev->ep_in[1]->desc.wMaxPacketSize) - - sizeof(struct sms_msg_hdr); + dev->response_alignment = align; params.flags |= SMS_DEVICE_FAMILY2; break; } - for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) { - if (intf->cur_altsetting->endpoint[i].desc. bEndpointAddress & USB_DIR_IN) - dev->in_ep = intf->cur_altsetting->endpoint[i].desc.bEndpointAddress; - else - dev->out_ep = intf->cur_altsetting->endpoint[i].desc.bEndpointAddress; - } - - pr_debug("in_ep = %02x, out_ep = %02x\n", - dev->in_ep, dev->out_ep); - params.device = &dev->udev->dev; params.usb_device = dev->udev; params.buffer_size = dev->buffer_size; diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index b79d3bbd8350..54d66dbc2a31 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -3899,18 +3899,19 @@ void v4l2_ctrl_request_complete(struct media_request *req, } EXPORT_SYMBOL(v4l2_ctrl_request_complete); -void v4l2_ctrl_request_setup(struct media_request *req, +int v4l2_ctrl_request_setup(struct media_request *req, struct v4l2_ctrl_handler *main_hdl) { struct media_request_object *obj; struct v4l2_ctrl_handler *hdl; struct v4l2_ctrl_ref *ref; + int ret = 0; if (!req || !main_hdl) - return; + return 0; if (WARN_ON(req->state != MEDIA_REQUEST_STATE_QUEUED)) - return; + return -EBUSY; /* * Note that it is valid if nothing was found. It means @@ -3919,10 +3920,10 @@ void v4l2_ctrl_request_setup(struct media_request *req, */ obj = media_request_object_find(req, &req_ops, main_hdl); if (!obj) - return; + return 0; if (obj->completed) { media_request_object_put(obj); - return; + return -EBUSY; } hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj); @@ -3990,12 +3991,15 @@ void v4l2_ctrl_request_setup(struct media_request *req, update_from_auto_cluster(master); } - try_or_set_cluster(NULL, master, true, 0); - + ret = try_or_set_cluster(NULL, master, true, 0); v4l2_ctrl_unlock(master); + + if (ret) + break; } media_request_object_put(obj); + return ret; } EXPORT_SYMBOL(v4l2_ctrl_request_setup); diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c index 20571846e636..ccefa55813ad 100644 --- a/drivers/media/v4l2-core/v4l2-fwnode.c +++ b/drivers/media/v4l2-core/v4l2-fwnode.c @@ -163,7 +163,7 @@ static int v4l2_fwnode_endpoint_parse_csi2_bus(struct fwnode_handle *fwnode, } if (use_default_lane_mapping) - pr_debug("using default lane mapping\n"); + pr_debug("no lane mapping given, using defaults\n"); } rval = fwnode_property_read_u32_array(fwnode, "data-lanes", NULL, 0); @@ -175,6 +175,10 @@ static int v4l2_fwnode_endpoint_parse_csi2_bus(struct fwnode_handle *fwnode, num_data_lanes); have_data_lanes = true; + if (use_default_lane_mapping) { + pr_debug("data-lanes property exists; disabling default mapping\n"); + use_default_lane_mapping = false; + } } for (i = 0; i < num_data_lanes; i++) { @@ -225,6 +229,10 @@ static int v4l2_fwnode_endpoint_parse_csi2_bus(struct fwnode_handle *fwnode, if (bus_type == V4L2_MBUS_CSI2_DPHY || bus_type == V4L2_MBUS_CSI2_CPHY || lanes_used || have_clk_lane || (flags & ~V4L2_MBUS_CSI2_CONTINUOUS_CLOCK)) { + /* Only D-PHY has a clock lane. */ + unsigned int dfl_data_lane_index = + bus_type == V4L2_MBUS_CSI2_DPHY; + bus->flags = flags; if (bus_type == V4L2_MBUS_UNKNOWN) vep->bus_type = V4L2_MBUS_CSI2_DPHY; @@ -233,7 +241,7 @@ static int v4l2_fwnode_endpoint_parse_csi2_bus(struct fwnode_handle *fwnode, if (use_default_lane_mapping) { bus->clock_lane = 0; for (i = 0; i < num_data_lanes; i++) - bus->data_lanes[i] = 1 + i; + bus->data_lanes[i] = dfl_data_lane_index + i; } else { bus->clock_lane = clock_lane; for (i = 0; i < num_data_lanes; i++) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 0a53598d982f..5bd8df926052 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -282,7 +282,7 @@ static int tegra_mc_setup_latency_allowance(struct tegra_mc *mc) u32 value; /* compute the number of MC clock cycles per tick */ - tick = mc->tick * clk_get_rate(mc->clk); + tick = (unsigned long long)mc->tick * clk_get_rate(mc->clk); do_div(tick, NSEC_PER_SEC); value = readl(mc->regs + MC_EMEM_ARB_CFG); diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index aba50ec98b4d..9545e87b6085 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -694,13 +694,13 @@ static void h_mspro_block_setup_cmd(struct memstick_dev *card, u64 offset, /*** Data transfer ***/ -static int mspro_block_issue_req(struct memstick_dev *card, bool chunk) +static int mspro_block_issue_req(struct memstick_dev *card) { struct mspro_block_data *msb = memstick_get_drvdata(card); u64 t_off; unsigned int count; - while (chunk) { + while (true) { msb->current_page = 0; msb->current_seg = 0; msb->seg_count = blk_rq_map_sg(msb->block_req->q, @@ -709,6 +709,7 @@ static int mspro_block_issue_req(struct memstick_dev *card, bool chunk) if (!msb->seg_count) { unsigned int bytes = blk_rq_cur_bytes(msb->block_req); + bool chunk; chunk = blk_update_request(msb->block_req, BLK_STS_RESOURCE, @@ -718,7 +719,7 @@ static int mspro_block_issue_req(struct memstick_dev *card, bool chunk) __blk_mq_end_request(msb->block_req, BLK_STS_RESOURCE); msb->block_req = NULL; - break; + return -EAGAIN; } t_off = blk_rq_pos(msb->block_req); @@ -735,8 +736,6 @@ static int mspro_block_issue_req(struct memstick_dev *card, bool chunk) memstick_new_req(card->host); return 0; } - - return 1; } static int mspro_block_complete_req(struct memstick_dev *card, int error) @@ -779,7 +778,7 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error) chunk = blk_update_request(msb->block_req, errno_to_blk_status(error), t_len); if (chunk) { - error = mspro_block_issue_req(card, chunk); + error = mspro_block_issue_req(card); if (!error) goto out; } else { @@ -849,7 +848,7 @@ static blk_status_t mspro_queue_rq(struct blk_mq_hw_ctx *hctx, msb->block_req = bd->rq; blk_mq_start_request(bd->rq); - if (mspro_block_issue_req(card, true)) + if (mspro_block_issue_req(card)) msb->block_req = NULL; spin_unlock_irq(&msb->q_lock); diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index 50bffc3382d7..ff3fba16e735 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -273,6 +273,9 @@ static void intel_lpss_init_dev(const struct intel_lpss *lpss) { u32 value = LPSS_PRIV_SSP_REG_DIS_DMA_FIN; + /* Set the device in reset state */ + writel(0, lpss->priv + LPSS_PRIV_RESETS); + intel_lpss_deassert_reset(lpss); intel_lpss_set_remap_addr(lpss); diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c index 3bd75061f777..f78be039e463 100644 --- a/drivers/mfd/tps65912-spi.c +++ b/drivers/mfd/tps65912-spi.c @@ -27,6 +27,7 @@ static const struct of_device_id tps65912_spi_of_match_table[] = { { .compatible = "ti,tps65912", }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, tps65912_spi_of_match_table); static int tps65912_spi_probe(struct spi_device *spi) { diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index 7c3c5fd5fcd0..86052c5c6069 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -322,8 +322,19 @@ int twl6040_power(struct twl6040 *twl6040, int on) } } + /* + * Register access can produce errors after power-up unless we + * wait at least 8ms based on measurements on duovero. + */ + usleep_range(10000, 12000); + /* Sync with the HW */ - regcache_sync(twl6040->regmap); + ret = regcache_sync(twl6040->regmap); + if (ret) { + dev_err(twl6040->dev, "Failed to sync with the HW: %i\n", + ret); + goto out; + } /* Default PLL configuration after power up */ twl6040->pll = TWL6040_SYSCLK_SEL_LPPLL; diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 36d0d5c9cfba..35be1cc11dd8 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -667,8 +667,16 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) pages[i].size = roundup(len, PAGE_SIZE); if (ctx->maps[i]) { + struct vm_area_struct *vma = NULL; + rpra[i].pv = (u64) ctx->args[i].ptr; pages[i].addr = ctx->maps[i]->phys; + + vma = find_vma(current->mm, ctx->args[i].ptr); + if (vma) + pages[i].addr += ctx->args[i].ptr - + vma->vm_start; + } else { rlen -= ALIGN(args, FASTRPC_ALIGN) - args; args = ALIGN(args, FASTRPC_ALIGN); @@ -782,6 +790,9 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (err) goto bail; } + + /* make sure that all CPU memory writes are seen by DSP */ + dma_wmb(); /* Send invoke buffer to remote dsp */ err = fastrpc_invoke_send(fl->sctx, ctx, kernel, handle); if (err) @@ -798,6 +809,8 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, goto bail; if (ctx->nscalars) { + /* make sure that all memory writes by DSP are seen by CPU */ + dma_rmb(); /* populate all the output buffers with results */ err = fastrpc_put_args(ctx, kernel); if (err) @@ -843,12 +856,12 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl, if (copy_from_user(&init, argp, sizeof(init))) { err = -EFAULT; - goto bail; + goto err; } if (init.filelen > INIT_FILELEN_MAX) { err = -EINVAL; - goto bail; + goto err; } inbuf.pgid = fl->tgid; @@ -862,17 +875,15 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl, if (init.filelen && init.filefd) { err = fastrpc_map_create(fl, init.filefd, init.filelen, &map); if (err) - goto bail; + goto err; } memlen = ALIGN(max(INIT_FILELEN_MAX, (int)init.filelen * 4), 1024 * 1024); err = fastrpc_buf_alloc(fl, fl->sctx->dev, memlen, &imem); - if (err) { - fastrpc_map_put(map); - goto bail; - } + if (err) + goto err_alloc; fl->init_mem = imem; args[0].ptr = (u64)(uintptr_t)&inbuf; @@ -908,13 +919,24 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl, err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, args); + if (err) + goto err_invoke; - if (err) { + kfree(args); + + return 0; + +err_invoke: + fl->init_mem = NULL; + fastrpc_buf_free(imem); +err_alloc: + if (map) { + spin_lock(&fl->lock); + list_del(&map->node); + spin_unlock(&fl->lock); fastrpc_map_put(map); - fastrpc_buf_free(imem); } - -bail: +err: kfree(args); return err; diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 8c1b63a4337b..d2098b4d2945 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -780,6 +780,8 @@ static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) if ((m->addr == 0x0) || (m->size == 0)) return -EINVAL; + if (m->size > ULONG_MAX - PAGE_SIZE - (m->addr & ~PAGE_MASK)) + return -EINVAL; map_addr = (m->addr & PAGE_MASK); map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 25265fd0fd6e..459cdbd94302 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -586,6 +586,10 @@ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, /* determine space needed for page_list. */ data = (unsigned long)uaddr; offs = offset_in_page(data); + if (size > ULONG_MAX - PAGE_SIZE - offs) { + m->size = 0; /* mark unused and not added */ + return -EINVAL; + } m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE); m->page_list = kcalloc(m->nr_pages, diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 974a87789bd8..17ba26422b29 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -459,41 +459,31 @@ static ssize_t mmu_write(struct file *file, const char __user *buf, struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - char kbuf[MMU_KBUF_SIZE], asid_kbuf[MMU_ASID_BUF_SIZE], - addr_kbuf[MMU_ADDR_BUF_SIZE]; + char kbuf[MMU_KBUF_SIZE]; char *c; ssize_t rc; if (!hdev->mmu_enable) return count; - memset(kbuf, 0, sizeof(kbuf)); - memset(asid_kbuf, 0, sizeof(asid_kbuf)); - memset(addr_kbuf, 0, sizeof(addr_kbuf)); - + if (count > sizeof(kbuf) - 1) + goto err; if (copy_from_user(kbuf, buf, count)) goto err; - - kbuf[MMU_KBUF_SIZE - 1] = 0; + kbuf[count] = 0; c = strchr(kbuf, ' '); if (!c) goto err; + *c = '\0'; - memcpy(asid_kbuf, kbuf, c - kbuf); - - rc = kstrtouint(asid_kbuf, 10, &dev_entry->mmu_asid); + rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid); if (rc) goto err; - c = strstr(kbuf, " 0x"); - if (!c) + if (strncmp(c+1, "0x", 2)) goto err; - - c += 3; - memcpy(addr_kbuf, c, (kbuf + count) - c); - - rc = kstrtoull(addr_kbuf, 16, &dev_entry->mmu_addr); + rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr); if (rc) goto err; @@ -525,10 +515,8 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, } sprintf(tmp_buf, "0x%08x\n", val); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); } static ssize_t hl_data_write32(struct file *f, const char __user *buf, @@ -559,7 +547,6 @@ static ssize_t hl_get_power_state(struct file *f, char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; char tmp_buf[200]; - ssize_t rc; int i; if (*ppos) @@ -574,10 +561,8 @@ static ssize_t hl_get_power_state(struct file *f, char __user *buf, sprintf(tmp_buf, "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); } static ssize_t hl_set_power_state(struct file *f, const char __user *buf, @@ -630,8 +615,8 @@ static ssize_t hl_i2c_data_read(struct file *f, char __user *buf, } sprintf(tmp_buf, "0x%02x\n", val); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); + rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); return rc; } @@ -720,18 +705,9 @@ static ssize_t hl_led2_write(struct file *f, const char __user *buf, static ssize_t hl_device_read(struct file *f, char __user *buf, size_t count, loff_t *ppos) { - char tmp_buf[200]; - ssize_t rc; - - if (*ppos) - return 0; - - sprintf(tmp_buf, - "Valid values: disable, enable, suspend, resume, cpu_timeout\n"); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; + static const char *help = + "Valid values: disable, enable, suspend, resume, cpu_timeout\n"; + return simple_read_from_buffer(buf, count, ppos, help, strlen(help)); } static ssize_t hl_device_write(struct file *f, const char __user *buf, @@ -739,7 +715,7 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - char data[30]; + char data[30] = {0}; /* don't allow partial writes */ if (*ppos != 0) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 77d51be66c7e..652c8edb2164 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -498,11 +498,8 @@ int hl_device_resume(struct hl_device *hdev) return rc; } -static void hl_device_hard_reset_pending(struct work_struct *work) +static void device_kill_open_processes(struct hl_device *hdev) { - struct hl_device_reset_work *device_reset_work = - container_of(work, struct hl_device_reset_work, reset_work); - struct hl_device *hdev = device_reset_work->hdev; u16 pending_total, pending_cnt; struct task_struct *task = NULL; @@ -537,6 +534,12 @@ static void hl_device_hard_reset_pending(struct work_struct *work) } } + /* We killed the open users, but because the driver cleans up after the + * user contexts are closed (e.g. mmu mappings), we need to wait again + * to make sure the cleaning phase is finished before continuing with + * the reset + */ + pending_cnt = pending_total; while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) { @@ -552,6 +555,16 @@ static void hl_device_hard_reset_pending(struct work_struct *work) mutex_unlock(&hdev->fd_open_cnt_lock); +} + +static void device_hard_reset_pending(struct work_struct *work) +{ + struct hl_device_reset_work *device_reset_work = + container_of(work, struct hl_device_reset_work, reset_work); + struct hl_device *hdev = device_reset_work->hdev; + + device_kill_open_processes(hdev); + hl_device_reset(hdev, true, true); kfree(device_reset_work); @@ -635,7 +648,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, * from a dedicated work */ INIT_WORK(&device_reset_work->reset_work, - hl_device_hard_reset_pending); + device_hard_reset_pending); device_reset_work->hdev = hdev; schedule_work(&device_reset_work->reset_work); @@ -1035,6 +1048,15 @@ void hl_device_fini(struct hl_device *hdev) /* Mark device as disabled */ hdev->disabled = true; + /* + * Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + device_kill_open_processes(hdev); + hl_hwmon_fini(hdev); device_late_fini(hdev); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3c509e19d69d..1533cb320540 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4407,6 +4407,9 @@ static u64 goya_read_pte(struct hl_device *hdev, u64 addr) { struct goya_device *goya = hdev->asic_specific; + if (hdev->hard_reset_pending) + return U64_MAX; + return readq(hdev->pcie_bar[DDR_BAR_ID] + (addr - goya->ddr_bar_cur_addr)); } @@ -4415,6 +4418,9 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val) { struct goya_device *goya = hdev->asic_specific; + if (hdev->hard_reset_pending) + return; + writeq(val, hdev->pcie_bar[DDR_BAR_ID] + (addr - goya->ddr_bar_cur_addr)); } diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index ce1fda40a8b8..425442819d31 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -675,11 +675,6 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, total_npages += npages; - if (first) { - first = false; - dma_addr &= PAGE_MASK_2MB; - } - if ((npages % PGS_IN_2MB_PAGE) || (dma_addr & (PAGE_SIZE_2MB - 1))) is_huge_page_opt = false; @@ -704,7 +699,6 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, phys_pg_pack->total_size = total_npages * page_size; j = 0; - first = true; for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { npages = get_sg_info(sg, &dma_addr); @@ -1046,10 +1040,17 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) mutex_lock(&ctx->mmu_lock); - for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) + for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { if (hl_mmu_unmap(ctx, next_vaddr, page_size)) dev_warn_ratelimited(hdev->dev, - "unmap failed for vaddr: 0x%llx\n", next_vaddr); + "unmap failed for vaddr: 0x%llx\n", next_vaddr); + + /* unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + */ + if (hdev->pldm) + usleep_range(500, 1000); + } hdev->asic_funcs->mmu_invalidate_cache(hdev, true); diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index de20bdaa148d..8b01257783dd 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -1135,7 +1135,7 @@ static void kgdbts_put_char(u8 chr) static int param_set_kgdbts_var(const char *kmessage, const struct kernel_param *kp) { - int len = strlen(kmessage); + size_t len = strlen(kmessage); if (len >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdbts: config string too long\n"); @@ -1155,7 +1155,7 @@ static int param_set_kgdbts_var(const char *kmessage, strcpy(config, kmessage); /* Chop out \n char as a result of echo */ - if (config[len - 1] == '\n') + if (len && config[len - 1] == '\n') config[len - 1] = '\0'; /* Go and configure with the new params. */ diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c index d5a0e7f1813b..e172719dd86d 100644 --- a/drivers/misc/lkdtm/usercopy.c +++ b/drivers/misc/lkdtm/usercopy.c @@ -324,14 +324,16 @@ void lkdtm_USERCOPY_KERNEL(void) void lkdtm_USERCOPY_KERNEL_DS(void) { - char __user *user_ptr = (char __user *)ERR_PTR(-EINVAL); + char __user *user_ptr = + (char __user *)(0xFUL << (sizeof(unsigned long) * 8 - 4)); mm_segment_t old_fs = get_fs(); char buf[10] = {0}; - pr_info("attempting copy_to_user on unmapped kernel address\n"); + pr_info("attempting copy_to_user() to noncanonical address: %px\n", + user_ptr); set_fs(KERNEL_DS); - if (copy_to_user(user_ptr, buf, sizeof(buf))) - pr_info("copy_to_user un unmapped kernel address failed\n"); + if (copy_to_user(user_ptr, buf, sizeof(buf)) == 0) + pr_err("copy_to_user() to noncanonical address succeeded!?\n"); set_fs(old_fs); } diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 29582fe57151..733274a061dc 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -662,6 +662,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, data = (struct pci_endpoint_test_data *)ent->driver_data; if (data) { test_reg_bar = data->test_reg_bar; + test->test_reg_bar = test_reg_bar; test->alignment = data->alignment; irq_type = data->irq_type; } diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 6db36dc870b5..9020cb2490f7 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -144,8 +144,9 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) int err = cmd->error; /* Flag re-tuning needed on CRC errors */ - if ((cmd->opcode != MMC_SEND_TUNING_BLOCK && - cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) && + if (cmd->opcode != MMC_SEND_TUNING_BLOCK && + cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200 && + !host->retune_crc_disable && (err == -EILSEQ || (mrq->sbc && mrq->sbc->error == -EILSEQ) || (mrq->data && mrq->data->error == -EILSEQ) || (mrq->stop && mrq->stop->error == -EILSEQ))) diff --git a/drivers/mmc/core/pwrseq_emmc.c b/drivers/mmc/core/pwrseq_emmc.c index efb8a7965dd4..154f4204d58c 100644 --- a/drivers/mmc/core/pwrseq_emmc.c +++ b/drivers/mmc/core/pwrseq_emmc.c @@ -30,19 +30,14 @@ struct mmc_pwrseq_emmc { #define to_pwrseq_emmc(p) container_of(p, struct mmc_pwrseq_emmc, pwrseq) -static void __mmc_pwrseq_emmc_reset(struct mmc_pwrseq_emmc *pwrseq) -{ - gpiod_set_value(pwrseq->reset_gpio, 1); - udelay(1); - gpiod_set_value(pwrseq->reset_gpio, 0); - udelay(200); -} - static void mmc_pwrseq_emmc_reset(struct mmc_host *host) { struct mmc_pwrseq_emmc *pwrseq = to_pwrseq_emmc(host->pwrseq); - __mmc_pwrseq_emmc_reset(pwrseq); + gpiod_set_value_cansleep(pwrseq->reset_gpio, 1); + udelay(1); + gpiod_set_value_cansleep(pwrseq->reset_gpio, 0); + udelay(200); } static int mmc_pwrseq_emmc_reset_nb(struct notifier_block *this, @@ -50,8 +45,11 @@ static int mmc_pwrseq_emmc_reset_nb(struct notifier_block *this, { struct mmc_pwrseq_emmc *pwrseq = container_of(this, struct mmc_pwrseq_emmc, reset_nb); + gpiod_set_value(pwrseq->reset_gpio, 1); + udelay(1); + gpiod_set_value(pwrseq->reset_gpio, 0); + udelay(200); - __mmc_pwrseq_emmc_reset(pwrseq); return NOTIFY_DONE; } @@ -72,14 +70,18 @@ static int mmc_pwrseq_emmc_probe(struct platform_device *pdev) if (IS_ERR(pwrseq->reset_gpio)) return PTR_ERR(pwrseq->reset_gpio); - /* - * register reset handler to ensure emmc reset also from - * emergency_reboot(), priority 255 is the highest priority - * so it will be executed before any system reboot handler. - */ - pwrseq->reset_nb.notifier_call = mmc_pwrseq_emmc_reset_nb; - pwrseq->reset_nb.priority = 255; - register_restart_handler(&pwrseq->reset_nb); + if (!gpiod_cansleep(pwrseq->reset_gpio)) { + /* + * register reset handler to ensure emmc reset also from + * emergency_reboot(), priority 255 is the highest priority + * so it will be executed before any system reboot handler. + */ + pwrseq->reset_nb.notifier_call = mmc_pwrseq_emmc_reset_nb; + pwrseq->reset_nb.priority = 255; + register_restart_handler(&pwrseq->reset_nb); + } else { + dev_notice(dev, "EMMC reset pin tied to a sleepy GPIO driver; reset on emergency-reboot disabled\n"); + } pwrseq->pwrseq.ops = &mmc_pwrseq_emmc_ops; pwrseq->pwrseq.dev = dev; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 7c364a9c4eeb..b5b9c6142f08 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -472,6 +472,7 @@ void mmc_cleanup_queue(struct mmc_queue *mq) blk_mq_unquiesce_queue(q); blk_cleanup_queue(q); + blk_mq_free_tag_set(&mq->tag_set); /* * A request can be completed before the next request, potentially diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 265e1aeeb9d8..d3d32f9a2cb1 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -221,6 +221,14 @@ static int mmc_decode_scr(struct mmc_card *card) if (scr->sda_spec3) scr->cmds = UNSTUFF_BITS(resp, 32, 2); + + /* SD Spec says: any SD Card shall set at least bits 0 and 2 */ + if (!(scr->bus_widths & SD_SCR_BUS_WIDTH_1) || + !(scr->bus_widths & SD_SCR_BUS_WIDTH_4)) { + pr_err("%s: invalid bus width\n", mmc_hostname(card->host)); + return -EINVAL; + } + return 0; } diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 6718fc8bb40f..0f51e774183e 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -941,6 +941,10 @@ static int mmc_sdio_pre_suspend(struct mmc_host *host) */ static int mmc_sdio_suspend(struct mmc_host *host) { + /* Prevent processing of SDIO IRQs in suspended state. */ + mmc_card_set_suspended(host->card); + cancel_delayed_work_sync(&host->sdio_irq_work); + mmc_claim_host(host); if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) @@ -989,13 +993,20 @@ static int mmc_sdio_resume(struct mmc_host *host) err = sdio_enable_4bit_bus(host->card); } - if (!err && host->sdio_irqs) { + if (err) + goto out; + + /* Allow SDIO IRQs to be processed again. */ + mmc_card_clr_suspended(host->card); + + if (host->sdio_irqs) { if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) wake_up_process(host->sdio_irq_thread); else if (host->caps & MMC_CAP_SDIO_IRQ) host->ops->enable_sdio_irq(host, 1); } +out: mmc_release_host(host); host->pm_flags &= ~MMC_PM_KEEP_POWER; diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c index 3f67fbbe0d75..df887ced9666 100644 --- a/drivers/mmc/core/sdio_io.c +++ b/drivers/mmc/core/sdio_io.c @@ -19,6 +19,7 @@ #include "sdio_ops.h" #include "core.h" #include "card.h" +#include "host.h" /** * sdio_claim_host - exclusively claim a bus for a certain SDIO function @@ -738,3 +739,79 @@ int sdio_set_host_pm_flags(struct sdio_func *func, mmc_pm_flag_t flags) return 0; } EXPORT_SYMBOL_GPL(sdio_set_host_pm_flags); + +/** + * sdio_retune_crc_disable - temporarily disable retuning on CRC errors + * @func: SDIO function attached to host + * + * If the SDIO card is known to be in a state where it might produce + * CRC errors on the bus in response to commands (like if we know it is + * transitioning between power states), an SDIO function driver can + * call this function to temporarily disable the SD/MMC core behavior of + * triggering an automatic retuning. + * + * This function should be called while the host is claimed and the host + * should remain claimed until sdio_retune_crc_enable() is called. + * Specifically, the expected sequence of calls is: + * - sdio_claim_host() + * - sdio_retune_crc_disable() + * - some number of calls like sdio_writeb() and sdio_readb() + * - sdio_retune_crc_enable() + * - sdio_release_host() + */ +void sdio_retune_crc_disable(struct sdio_func *func) +{ + func->card->host->retune_crc_disable = true; +} +EXPORT_SYMBOL_GPL(sdio_retune_crc_disable); + +/** + * sdio_retune_crc_enable - re-enable retuning on CRC errors + * @func: SDIO function attached to host + * + * This is the compement to sdio_retune_crc_disable(). + */ +void sdio_retune_crc_enable(struct sdio_func *func) +{ + func->card->host->retune_crc_disable = false; +} +EXPORT_SYMBOL_GPL(sdio_retune_crc_enable); + +/** + * sdio_retune_hold_now - start deferring retuning requests till release + * @func: SDIO function attached to host + * + * This function can be called if it's currently a bad time to do + * a retune of the SDIO card. Retune requests made during this time + * will be held and we'll actually do the retune sometime after the + * release. + * + * This function could be useful if an SDIO card is in a power state + * where it can respond to a small subset of commands that doesn't + * include the retuning command. Care should be taken when using + * this function since (presumably) the retuning request we might be + * deferring was made for a good reason. + * + * This function should be called while the host is claimed. + */ +void sdio_retune_hold_now(struct sdio_func *func) +{ + mmc_retune_hold_now(func->card->host); +} +EXPORT_SYMBOL_GPL(sdio_retune_hold_now); + +/** + * sdio_retune_release - signal that it's OK to retune now + * @func: SDIO function attached to host + * + * This is the complement to sdio_retune_hold_now(). Calling this + * function won't make a retune happen right away but will allow + * them to be scheduled normally. + * + * This function should be called while the host is claimed. + */ +void sdio_retune_release(struct sdio_func *func) +{ + mmc_retune_release(func->card->host); +} +EXPORT_SYMBOL_GPL(sdio_retune_release); diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c index 7ca7b99413f0..b299a24d33f9 100644 --- a/drivers/mmc/core/sdio_irq.c +++ b/drivers/mmc/core/sdio_irq.c @@ -38,6 +38,10 @@ static int process_sdio_pending_irqs(struct mmc_host *host) unsigned char pending; struct sdio_func *func; + /* Don't process SDIO IRQs if the card is suspended. */ + if (mmc_card_suspended(card)) + return 0; + /* * Optimization, if there is only 1 function interrupt registered * and we know an IRQ was signaled then call irq handler directly. diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 28fcd8f580a1..94032939cbbd 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -92,6 +92,7 @@ config MMC_SDHCI_PCI tristate "SDHCI support on PCI bus" depends on MMC_SDHCI && PCI select MMC_CQHCI + select IOSF_MBI if X86 help This selects the PCI Secure Digital Host Controller Interface. Most controllers found today are PCI devices. diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 1b1498805972..a3533935e282 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -819,6 +819,10 @@ mmc_spi_readblock(struct mmc_spi_host *host, struct spi_transfer *t, } status = spi_sync_locked(spi, &host->m); + if (status < 0) { + dev_dbg(&spi->dev, "read error %d\n", status); + return status; + } if (host->dma_dev) { dma_sync_single_for_cpu(host->dma_dev, diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 387ff14587b8..e27978c47db7 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1550,9 +1550,10 @@ static irqreturn_t mmci_irq(int irq, void *dev_id) } /* - * Don't poll for busy completion in irq context. + * Busy detection has been handled by mmci_cmd_irq() above. + * Clear the status bit to prevent polling in IRQ context. */ - if (host->variant->busy_detect && host->busy_status) + if (host->variant->busy_detect_flag) status &= ~host->variant->busy_detect_flag; ret = 1; diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 833ef0590af8..b33f2c90d8d8 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -1003,6 +1003,8 @@ static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq) msdc_track_cmd_data(host, mrq->cmd, mrq->data); if (mrq->data) msdc_unprepare_data(host, mrq); + if (host->error) + msdc_reset_hw(host); mmc_request_done(host->mmc, mrq); } @@ -1355,24 +1357,25 @@ static void msdc_request_timeout(struct work_struct *work) } } -static void __msdc_enable_sdio_irq(struct mmc_host *mmc, int enb) +static void __msdc_enable_sdio_irq(struct msdc_host *host, int enb) { - unsigned long flags; - struct msdc_host *host = mmc_priv(mmc); - - spin_lock_irqsave(&host->lock, flags); - if (enb) + if (enb) { sdr_set_bits(host->base + MSDC_INTEN, MSDC_INTEN_SDIOIRQ); - else + sdr_set_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE); + } else { sdr_clr_bits(host->base + MSDC_INTEN, MSDC_INTEN_SDIOIRQ); - spin_unlock_irqrestore(&host->lock, flags); + sdr_clr_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE); + } } static void msdc_enable_sdio_irq(struct mmc_host *mmc, int enb) { + unsigned long flags; struct msdc_host *host = mmc_priv(mmc); - __msdc_enable_sdio_irq(mmc, enb); + spin_lock_irqsave(&host->lock, flags); + __msdc_enable_sdio_irq(host, enb); + spin_unlock_irqrestore(&host->lock, flags); if (enb) pm_runtime_get_noresume(host->dev); @@ -1394,6 +1397,8 @@ static irqreturn_t msdc_irq(int irq, void *dev_id) spin_lock_irqsave(&host->lock, flags); events = readl(host->base + MSDC_INT); event_mask = readl(host->base + MSDC_INTEN); + if ((events & event_mask) & MSDC_INT_SDIOIRQ) + __msdc_enable_sdio_irq(host, 0); /* clear interrupts */ writel(events & event_mask, host->base + MSDC_INT); @@ -1402,10 +1407,8 @@ static irqreturn_t msdc_irq(int irq, void *dev_id) data = host->data; spin_unlock_irqrestore(&host->lock, flags); - if ((events & event_mask) & MSDC_INT_SDIOIRQ) { - __msdc_enable_sdio_irq(host->mmc, 0); + if ((events & event_mask) & MSDC_INT_SDIOIRQ) sdio_signal_irq(host->mmc); - } if (!(events & (event_mask & ~MSDC_INT_SDIOIRQ))) break; @@ -1528,10 +1531,7 @@ static void msdc_init_hw(struct msdc_host *host) sdr_set_bits(host->base + SDC_CFG, SDC_CFG_SDIO); /* Config SDIO device detect interrupt function */ - if (host->mmc->caps & MMC_CAP_SDIO_IRQ) - sdr_set_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE); - else - sdr_clr_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE); + sdr_clr_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE); /* Configure to default data timeout */ sdr_set_field(host->base + SDC_CFG, SDC_CFG_DTOC, 3); @@ -2052,7 +2052,12 @@ static void msdc_hw_reset(struct mmc_host *mmc) static void msdc_ack_sdio_irq(struct mmc_host *mmc) { - __msdc_enable_sdio_irq(mmc, 1); + unsigned long flags; + struct msdc_host *host = mmc_priv(mmc); + + spin_lock_irqsave(&host->lock, flags); + __msdc_enable_sdio_irq(host, 1); + spin_unlock_irqrestore(&host->lock, flags); } static const struct mmc_host_ops mt_msdc_ops = { diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 8742e27e4e8b..c42e442ba7ab 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -620,11 +620,16 @@ static const struct renesas_sdhi_quirks sdhi_quirks_h3_es2 = { .hs400_4taps = true, }; +static const struct renesas_sdhi_quirks sdhi_quirks_nohs400 = { + .hs400_disabled = true, +}; + static const struct soc_device_attribute sdhi_quirks_match[] = { { .soc_id = "r8a7795", .revision = "ES1.*", .data = &sdhi_quirks_h3_m3w_es1 }, { .soc_id = "r8a7795", .revision = "ES2.0", .data = &sdhi_quirks_h3_es2 }, - { .soc_id = "r8a7796", .revision = "ES1.0", .data = &sdhi_quirks_h3_m3w_es1 }, - { .soc_id = "r8a7796", .revision = "ES1.1", .data = &sdhi_quirks_h3_m3w_es1 }, + { .soc_id = "r8a7796", .revision = "ES1.[012]", .data = &sdhi_quirks_h3_m3w_es1 }, + { .soc_id = "r8a774a1", .revision = "ES1.[012]", .data = &sdhi_quirks_h3_m3w_es1 }, + { .soc_id = "r8a77980", .data = &sdhi_quirks_nohs400 }, { /* Sentinel. */ }, }; diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 9d12c06c7fd6..2feb4ef32035 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -196,7 +196,8 @@ static const struct sdhci_ops sdhci_iproc_32only_ops = { }; static const struct sdhci_pltfm_data sdhci_iproc_cygnus_pltfm_data = { - .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, + .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | + SDHCI_QUIRK_NO_HISPD_BIT, .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN | SDHCI_QUIRK2_HOST_OFF_CARD_ON, .ops = &sdhci_iproc_32only_ops, }; @@ -219,7 +220,8 @@ static const struct sdhci_iproc_data iproc_cygnus_data = { static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = { .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | - SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, + SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 | + SDHCI_QUIRK_NO_HISPD_BIT, .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN, .ops = &sdhci_iproc_ops, }; diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index c9e3e050ccc8..88dc3f00a5be 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -832,7 +832,10 @@ static int sdhci_arasan_probe(struct platform_device *pdev) host->mmc_host_ops.start_signal_voltage_switch = sdhci_arasan_voltage_switch; sdhci_arasan->has_cqe = true; - host->mmc->caps2 |= MMC_CAP2_CQE | MMC_CAP2_CQE_DCMD; + host->mmc->caps2 |= MMC_CAP2_CQE; + + if (!of_property_read_bool(np, "disable-cqe-dcmd")) + host->mmc->caps2 |= MMC_CAP2_CQE_DCMD; } ret = sdhci_arasan_add_host(sdhci_arasan); diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 4e669b4edfc1..7e0eae8dafae 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -694,6 +694,9 @@ static void esdhc_reset(struct sdhci_host *host, u8 mask) sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); + if (of_find_compatible_node(NULL, NULL, "fsl,p2020-esdhc")) + mdelay(5); + if (mask & SDHCI_RESET_ALL) { val = sdhci_readl(host, ESDHC_TBCTL); val &= ~ESDHC_TB_EN; @@ -1074,6 +1077,11 @@ static int sdhci_esdhc_probe(struct platform_device *pdev) if (esdhc->vendor_ver > VENDOR_V_22) host->quirks &= ~SDHCI_QUIRK_NO_BUSY_IRQ; + if (of_find_compatible_node(NULL, NULL, "fsl,p2020-esdhc")) { + host->quirks2 |= SDHCI_QUIRK_RESET_AFTER_REQUEST; + host->quirks2 |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + } + if (of_device_is_compatible(np, "fsl,p5040-esdhc") || of_device_is_compatible(np, "fsl,p5020-esdhc") || of_device_is_compatible(np, "fsl,p4080-esdhc") || diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 99b0fec2836b..7bd27198178d 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -31,6 +31,10 @@ #include #include +#ifdef CONFIG_X86 +#include +#endif + #include "cqhci.h" #include "sdhci.h" @@ -451,6 +455,50 @@ static const struct sdhci_pci_fixes sdhci_intel_pch_sdio = { .probe_slot = pch_hc_probe_slot, }; +#ifdef CONFIG_X86 + +#define BYT_IOSF_SCCEP 0x63 +#define BYT_IOSF_OCP_NETCTRL0 0x1078 +#define BYT_IOSF_OCP_TIMEOUT_BASE GENMASK(10, 8) + +static void byt_ocp_setting(struct pci_dev *pdev) +{ + u32 val = 0; + + if (pdev->device != PCI_DEVICE_ID_INTEL_BYT_EMMC && + pdev->device != PCI_DEVICE_ID_INTEL_BYT_SDIO && + pdev->device != PCI_DEVICE_ID_INTEL_BYT_SD && + pdev->device != PCI_DEVICE_ID_INTEL_BYT_EMMC2) + return; + + if (iosf_mbi_read(BYT_IOSF_SCCEP, MBI_CR_READ, BYT_IOSF_OCP_NETCTRL0, + &val)) { + dev_err(&pdev->dev, "%s read error\n", __func__); + return; + } + + if (!(val & BYT_IOSF_OCP_TIMEOUT_BASE)) + return; + + val &= ~BYT_IOSF_OCP_TIMEOUT_BASE; + + if (iosf_mbi_write(BYT_IOSF_SCCEP, MBI_CR_WRITE, BYT_IOSF_OCP_NETCTRL0, + val)) { + dev_err(&pdev->dev, "%s write error\n", __func__); + return; + } + + dev_dbg(&pdev->dev, "%s completed\n", __func__); +} + +#else + +static inline void byt_ocp_setting(struct pci_dev *pdev) +{ +} + +#endif + enum { INTEL_DSM_FNS = 0, INTEL_DSM_V18_SWITCH = 3, @@ -715,6 +763,8 @@ static void byt_probe_slot(struct sdhci_pci_slot *slot) byt_read_dsm(slot); + byt_ocp_setting(slot->chip->pdev); + ops->execute_tuning = intel_execute_tuning; ops->start_signal_voltage_switch = intel_start_signal_voltage_switch; @@ -938,7 +988,35 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot) return 0; } +#ifdef CONFIG_PM_SLEEP + +static int byt_resume(struct sdhci_pci_chip *chip) +{ + byt_ocp_setting(chip->pdev); + + return sdhci_pci_resume_host(chip); +} + +#endif + +#ifdef CONFIG_PM + +static int byt_runtime_resume(struct sdhci_pci_chip *chip) +{ + byt_ocp_setting(chip->pdev); + + return sdhci_pci_runtime_resume_host(chip); +} + +#endif + static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = { +#ifdef CONFIG_PM_SLEEP + .resume = byt_resume, +#endif +#ifdef CONFIG_PM + .runtime_resume = byt_runtime_resume, +#endif .allow_runtime_pm = true, .probe_slot = byt_emmc_probe_slot, .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | @@ -972,6 +1050,12 @@ static const struct sdhci_pci_fixes sdhci_intel_glk_emmc = { }; static const struct sdhci_pci_fixes sdhci_ni_byt_sdio = { +#ifdef CONFIG_PM_SLEEP + .resume = byt_resume, +#endif +#ifdef CONFIG_PM + .runtime_resume = byt_runtime_resume, +#endif .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_HOST_OFF_CARD_ON | @@ -983,6 +1067,12 @@ static const struct sdhci_pci_fixes sdhci_ni_byt_sdio = { }; static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { +#ifdef CONFIG_PM_SLEEP + .resume = byt_resume, +#endif +#ifdef CONFIG_PM + .runtime_resume = byt_runtime_resume, +#endif .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_HOST_OFF_CARD_ON | @@ -994,6 +1084,12 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { }; static const struct sdhci_pci_fixes sdhci_intel_byt_sd = { +#ifdef CONFIG_PM_SLEEP + .resume = byt_resume, +#endif +#ifdef CONFIG_PM + .runtime_resume = byt_runtime_resume, +#endif .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON | diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 05a012a694b2..423c3339c03b 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -124,6 +124,7 @@ static int sdhci_o2_execute_tuning(struct mmc_host *mmc, u32 opcode) */ if (mmc->ios.bus_width == MMC_BUS_WIDTH_8) { current_bus_width = mmc->ios.bus_width; + mmc->ios.bus_width = MMC_BUS_WIDTH_4; sdhci_set_bus_width(host, MMC_BUS_WIDTH_4); } @@ -135,8 +136,10 @@ static int sdhci_o2_execute_tuning(struct mmc_host *mmc, u32 opcode) sdhci_end_tuning(host); - if (current_bus_width == MMC_BUS_WIDTH_8) + if (current_bus_width == MMC_BUS_WIDTH_8) { + mmc->ios.bus_width = MMC_BUS_WIDTH_8; sdhci_set_bus_width(host, current_bus_width); + } host->flags &= ~SDHCI_HS400_TUNING; return 0; diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 32e62904c0d3..46086dd43bfb 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -779,6 +779,7 @@ static void tegra_sdhci_set_uhs_signaling(struct sdhci_host *host, bool set_dqs_trim = false; bool do_hs400_dll_cal = false; + tegra_host->ddr_signaling = false; switch (timing) { case MMC_TIMING_UHS_SDR50: case MMC_TIMING_UHS_SDR104: diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index eea183e90f1b..f427f04991a6 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -209,7 +209,7 @@ static int sdhci_am654_init(struct sdhci_host *host) ctl_cfg_2 = SLOTTYPE_EMBEDDED; regmap_update_bits(sdhci_am654->base, CTL_CFG_2, - ctl_cfg_2, SLOTTYPE_MASK); + SLOTTYPE_MASK, ctl_cfg_2); return sdhci_add_host(host); } diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 595949f1f001..78cc2a928efe 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -842,8 +842,9 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host) if (mrq->cmd->error || (mrq->data && mrq->data->error)) tmio_mmc_abort_dma(host); + /* SCC error means retune, but executed command was still successful */ if (host->check_scc_error && host->check_scc_error(host)) - mrq->cmd->error = -EILSEQ; + mmc_retune_needed(host->mmc); /* If SET_BLOCK_COUNT, continue with main command */ if (host->mrq && !mrq->cmd->error) { diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index e0cf869c8544..544ed1931843 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -10,7 +10,7 @@ config MTD_COMPLEX_MAPPINGS config MTD_PHYSMAP tristate "Flash device in physical memory map" - depends on MTD_CFI || MTD_JEDECPROBE || MTD_ROM || MTD_LPDDR + depends on MTD_CFI || MTD_JEDECPROBE || MTD_ROM || MTD_RAM || MTD_LPDDR help This provides a 'mapping' driver which allows the NOR Flash and ROM driver code to communicate with chips which are mapped diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c index d9a3e4bebe5d..21b556afc305 100644 --- a/drivers/mtd/maps/physmap-core.c +++ b/drivers/mtd/maps/physmap-core.c @@ -132,6 +132,8 @@ static void physmap_set_addr_gpios(struct physmap_flash_info *info, gpiod_set_value(info->gpios->desc[i], !!(BIT(i) & ofs)); } + + info->gpio_values = ofs; } #define win_mask(order) (BIT(order) - 1) diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c index af0a22019516..d60cbf23d9aa 100644 --- a/drivers/mtd/spi-nor/intel-spi.c +++ b/drivers/mtd/spi-nor/intel-spi.c @@ -632,6 +632,10 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len, while (len > 0) { block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + /* Read cannot cross 4K boundary */ + block_size = min_t(loff_t, from + block_size, + round_up(from + 1, SZ_4K)) - from; + writel(from, ispi->base + FADDR); val = readl(ispi->base + HSFSTS_CTL); @@ -685,6 +689,10 @@ static ssize_t intel_spi_write(struct spi_nor *nor, loff_t to, size_t len, while (len > 0) { block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + /* Write cannot cross 4K boundary */ + block_size = min_t(loff_t, to + block_size, + round_up(to + 1, SZ_4K)) - to; + writel(to, ispi->base + FADDR); val = readl(ispi->base + HSFSTS_CTL); diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 21cde7e78621..0d3ba056cda3 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_ARCNET) += arcnet/ obj-$(CONFIG_DEV_APPLETALK) += appletalk/ obj-$(CONFIG_CAIF) += caif/ obj-$(CONFIG_CAN) += can/ -obj-$(CONFIG_NET_DSA) += dsa/ +obj-y += dsa/ obj-$(CONFIG_ETHERNET) += ethernet/ obj-$(CONFIG_FDDI) += fddi/ obj-$(CONFIG_HIPPI) += hippi/ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ee610721098e..f96efa363d34 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3122,13 +3122,18 @@ static int bond_slave_netdev_event(unsigned long event, case NETDEV_CHANGE: /* For 802.3ad mode only: * Getting invalid Speed/Duplex values here will put slave - * in weird state. So mark it as link-fail for the time - * being and let link-monitoring (miimon) set it right when - * correct speeds/duplex are available. + * in weird state. Mark it as link-fail if the link was + * previously up or link-down if it hasn't yet come up, and + * let link-monitoring (miimon) set it right when correct + * speeds/duplex are available. */ if (bond_update_speed_duplex(slave) && - BOND_MODE(bond) == BOND_MODE_8023AD) - slave->link = BOND_LINK_FAIL; + BOND_MODE(bond) == BOND_MODE_8023AD) { + if (slave->last_link_up) + slave->link = BOND_LINK_FAIL; + else + slave->link = BOND_LINK_DOWN; + } if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_adapter_speed_duplex_changed(slave); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index da1fc17295d9..b996967af8d9 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1098,13 +1098,6 @@ static int bond_option_arp_validate_set(struct bonding *bond, { netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n", newval->string, newval->value); - - if (bond->dev->flags & IFF_UP) { - if (!newval->value) - bond->recv_probe = NULL; - else if (bond->params.arp_interval) - bond->recv_probe = bond_arp_rcv; - } bond->params.arp_validate = newval->value; return 0; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 1c66fb2ad76b..f97c628eb2ad 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -166,7 +166,7 @@ #define FLEXCAN_MB_CNT_LENGTH(x) (((x) & 0xf) << 16) #define FLEXCAN_MB_CNT_TIMESTAMP(x) ((x) & 0xffff) -#define FLEXCAN_TIMEOUT_US (50) +#define FLEXCAN_TIMEOUT_US (250) /* FLEXCAN hardware feature flags * diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 97d0933d9bd9..e4a5038eba9a 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1443,7 +1443,7 @@ static const struct xcan_devtype_data xcan_canfd_data = { XCAN_FLAG_RXMNF | XCAN_FLAG_TX_MAILBOXES | XCAN_FLAG_RX_FIFO_MULTI, - .bittiming_const = &xcan_bittiming_const, + .bittiming_const = &xcan_bittiming_const_canfd, .btr_ts2_shift = XCAN_BTR_TS2_SHIFT_CANFD, .btr_sjw_shift = XCAN_BTR_SJW_SHIFT_CANFD, .bus_clk_name = "s_axi_aclk", diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 39dace8e3512..f46086fa9064 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -83,6 +83,9 @@ static void ksz_mib_read_work(struct work_struct *work) int i; for (i = 0; i < dev->mib_port_cnt; i++) { + if (dsa_is_unused_port(dev->ds, i)) + continue; + p = &dev->ports[i]; mib = &p->mib; mutex_lock(&mib->cnt_mutex); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index f4e2db44ad91..ae750ab9a4d7 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -910,7 +910,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, err = mv88e6xxx_port_read(chip, port, s->reg + 1, ®); if (err) return U64_MAX; - high = reg; + low |= ((u32)reg) << 16; } break; case STATS_TYPE_BANK1: @@ -1517,7 +1517,7 @@ static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, int err; if (!vid) - return -EINVAL; + return -EOPNOTSUPP; entry->vid = vid - 1; entry->valid = false; diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index 6dedd43442cc..35b767baf21f 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -307,7 +307,8 @@ int rtl8366_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) struct rtl8366_vlan_4k vlan4k; int ret; - if (!smi->ops->is_vlan_valid(smi, port)) + /* Use VLAN nr port + 1 since VLAN0 is not valid */ + if (!smi->ops->is_vlan_valid(smi, port + 1)) return -EINVAL; dev_info(smi->dev, "%s filtering on port %d\n", @@ -318,12 +319,12 @@ int rtl8366_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) * The hardware support filter ID (FID) 0..7, I have no clue how to * support this in the driver when the callback only says on/off. */ - ret = smi->ops->get_vlan_4k(smi, port, &vlan4k); + ret = smi->ops->get_vlan_4k(smi, port + 1, &vlan4k); if (ret) return ret; /* Just set the filter to FID 1 for now then */ - ret = rtl8366_set_vlan(smi, port, + ret = rtl8366_set_vlan(smi, port + 1, vlan4k.member, vlan4k.untag, 1); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a6eacf2099c3..9b03d7e404f8 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -224,28 +224,23 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) if (!tx_ring->tx_buffer_info) { tx_ring->tx_buffer_info = vzalloc(size); if (!tx_ring->tx_buffer_info) - return -ENOMEM; + goto err_tx_buffer_info; } size = sizeof(u16) * tx_ring->ring_size; tx_ring->free_tx_ids = vzalloc_node(size, node); if (!tx_ring->free_tx_ids) { tx_ring->free_tx_ids = vzalloc(size); - if (!tx_ring->free_tx_ids) { - vfree(tx_ring->tx_buffer_info); - return -ENOMEM; - } + if (!tx_ring->free_tx_ids) + goto err_free_tx_ids; } size = tx_ring->tx_max_header_size; tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node); if (!tx_ring->push_buf_intermediate_buf) { tx_ring->push_buf_intermediate_buf = vzalloc(size); - if (!tx_ring->push_buf_intermediate_buf) { - vfree(tx_ring->tx_buffer_info); - vfree(tx_ring->free_tx_ids); - return -ENOMEM; - } + if (!tx_ring->push_buf_intermediate_buf) + goto err_push_buf_intermediate_buf; } /* Req id ring for TX out of order completions */ @@ -259,6 +254,15 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->next_to_clean = 0; tx_ring->cpu = ena_irq->cpu; return 0; + +err_push_buf_intermediate_buf: + vfree(tx_ring->free_tx_ids); + tx_ring->free_tx_ids = NULL; +err_free_tx_ids: + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; +err_tx_buffer_info: + return -ENOMEM; } /* ena_free_tx_resources - Free I/O Tx Resources per Queue @@ -378,6 +382,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, rx_ring->free_rx_ids = vzalloc(size); if (!rx_ring->free_rx_ids) { vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; return -ENOMEM; } } @@ -2292,7 +2297,7 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, host_info->bdf = (pdev->bus->number << 8) | pdev->devfn; host_info->os_type = ENA_ADMIN_OS_LINUX; host_info->kernel_ver = LINUX_VERSION_CODE; - strncpy(host_info->kernel_ver_str, utsname()->version, + strlcpy(host_info->kernel_ver_str, utsname()->version, sizeof(host_info->kernel_ver_str) - 1); host_info->os_dist = 0; strncpy(host_info->os_dist_str, utsname()->release, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index e2ffb159cbe2..bf4aa7060f1a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -139,10 +139,10 @@ void aq_ring_queue_stop(struct aq_ring_s *ring) bool aq_ring_tx_clean(struct aq_ring_s *self) { struct device *dev = aq_nic_get_dev(self->aq_nic); - unsigned int budget = AQ_CFG_TX_CLEAN_BUDGET; + unsigned int budget; - for (; self->sw_head != self->hw_head && budget--; - self->sw_head = aq_ring_next_dx(self, self->sw_head)) { + for (budget = AQ_CFG_TX_CLEAN_BUDGET; + budget && self->sw_head != self->hw_head; budget--) { struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head]; if (likely(buff->is_mapped)) { @@ -167,6 +167,7 @@ bool aq_ring_tx_clean(struct aq_ring_s *self) buff->pa = 0U; buff->eop_index = 0xffffU; + self->sw_head = aq_ring_next_dx(self, self->sw_head); } return !!budget; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index b31dba1b1a55..ec302fdfec63 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -702,38 +702,41 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) { /* MAC error or DMA error */ buff->is_error = 1U; - } else { - if (self->aq_nic_cfg->is_rss) { - /* last 4 byte */ - u16 rss_type = rxd_wb->type & 0xFU; - - if (rss_type && rss_type < 0x8U) { - buff->is_hash_l4 = (rss_type == 0x4 || - rss_type == 0x5); - buff->rss_hash = rxd_wb->rss_hash; - } + } + if (self->aq_nic_cfg->is_rss) { + /* last 4 byte */ + u16 rss_type = rxd_wb->type & 0xFU; + + if (rss_type && rss_type < 0x8U) { + buff->is_hash_l4 = (rss_type == 0x4 || + rss_type == 0x5); + buff->rss_hash = rxd_wb->rss_hash; } + } - if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) { - buff->len = rxd_wb->pkt_len % - AQ_CFG_RX_FRAME_MAX; - buff->len = buff->len ? - buff->len : AQ_CFG_RX_FRAME_MAX; - buff->next = 0U; - buff->is_eop = 1U; + if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) { + buff->len = rxd_wb->pkt_len % + AQ_CFG_RX_FRAME_MAX; + buff->len = buff->len ? + buff->len : AQ_CFG_RX_FRAME_MAX; + buff->next = 0U; + buff->is_eop = 1U; + } else { + buff->len = + rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ? + AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len; + + if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT & + rxd_wb->status) { + /* LRO */ + buff->next = rxd_wb->next_desc_ptr; + ++ring->stats.rx.lro_packets; } else { - if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT & - rxd_wb->status) { - /* LRO */ - buff->next = rxd_wb->next_desc_ptr; - ++ring->stats.rx.lro_packets; - } else { - /* jumbo */ - buff->next = - aq_ring_next_dx(ring, - ring->hw_head); - ++ring->stats.rx.jumbo_packets; - } + /* jumbo */ + buff->next = + aq_ring_next_dx(ring, + ring->hw_head); + ++ring->stats.rx.jumbo_packets; } } } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index eb4b99d56081..33d3c3789209 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -335,13 +335,13 @@ static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p, { u32 val; int err = 0; - bool is_locked; - is_locked = hw_atl_sem_ram_get(self); - if (!is_locked) { - err = -ETIME; + err = readx_poll_timeout_atomic(hw_atl_sem_ram_get, self, + val, val == 1U, + 10U, 100000U); + if (err < 0) goto err_exit; - } + if (IS_CHIP_FEATURE(REVISION_B1)) { u32 offset = 0; @@ -353,8 +353,8 @@ static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p, /* 1000 times by 10us = 10ms */ err = readx_poll_timeout_atomic(hw_atl_scrpad12_get, self, val, - (val & 0xF0000000) == - 0x80000000, + (val & 0xF0000000) != + 0x80000000, 10U, 10000U); } } else { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index fe6c5658e016..9d0292aa071d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -349,7 +349,7 @@ static int aq_fw2x_set_sleep_proxy(struct aq_hw_s *self, u8 *mac) err = readx_poll_timeout_atomic(aq_fw2x_state2_get, self, val, val & HW_ATL_FW2X_CTRL_SLEEP_PROXY, - 1U, 10000U); + 1U, 100000U); err_exit: return err; @@ -369,6 +369,8 @@ static int aq_fw2x_set_wol_params(struct aq_hw_s *self, u8 *mac) msg = (struct fw2x_msg_wol *)rpc; + memset(msg, 0, sizeof(*msg)); + msg->msg_id = HAL_ATLANTIC_UTILS_FW2X_MSG_WOL; msg->magic_packet_enabled = true; memcpy(msg->hw_addr, mac, ETH_ALEN); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 52ade133b57c..30cafe4cdb6e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1640,6 +1640,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); bnxt_reuse_rx_data(rxr, cons, data); if (!skb) { + if (agg_bufs) + bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); rc = -ENOMEM; goto next_rx; } @@ -6340,7 +6342,7 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp) if (!ctx || (ctx->flags & BNXT_CTX_FLAG_INITED)) return 0; - if (bp->flags & BNXT_FLAG_ROCE_CAP) { + if ((bp->flags & BNXT_FLAG_ROCE_CAP) && !is_kdump_kernel()) { pg_lvl = 2; extra_qps = 65536; extra_srqs = 8192; @@ -7512,22 +7514,23 @@ static void bnxt_clear_int_mode(struct bnxt *bp) bp->flags &= ~BNXT_FLAG_USING_MSIX; } -int bnxt_reserve_rings(struct bnxt *bp) +int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) { int tcs = netdev_get_num_tc(bp->dev); - bool reinit_irq = false; + bool irq_cleared = false; int rc; if (!bnxt_need_reserve_rings(bp)) return 0; - if (BNXT_NEW_RM(bp) && (bnxt_get_num_msix(bp) != bp->total_irqs)) { + if (irq_re_init && BNXT_NEW_RM(bp) && + bnxt_get_num_msix(bp) != bp->total_irqs) { bnxt_ulp_irq_stop(bp); bnxt_clear_int_mode(bp); - reinit_irq = true; + irq_cleared = true; } rc = __bnxt_reserve_rings(bp); - if (reinit_irq) { + if (irq_cleared) { if (!rc) rc = bnxt_init_int_mode(bp); bnxt_ulp_irq_restart(bp, rc); @@ -8426,7 +8429,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) return rc; } } - rc = bnxt_reserve_rings(bp); + rc = bnxt_reserve_rings(bp, irq_re_init); if (rc) return rc; if ((bp->flags & BNXT_FLAG_RFS) && @@ -10337,7 +10340,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) if (sh) bp->flags |= BNXT_FLAG_SHARED_RINGS; - dflt_rings = netif_get_num_default_rss_queues(); + dflt_rings = is_kdump_kernel() ? 1 : netif_get_num_default_rss_queues(); /* Reduce default rings on multi-port cards so that total default * rings do not exceed CPU count. */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index cf81ace7a6e6..0fb93280ad4e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -1367,7 +1368,8 @@ struct bnxt { #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0) #define BNXT_RX_PAGE_MODE(bp) ((bp)->flags & BNXT_FLAG_RX_PAGE_MODE) #define BNXT_SUPPORTS_TPA(bp) (!BNXT_CHIP_TYPE_NITRO_A0(bp) && \ - !(bp->flags & BNXT_FLAG_CHIP_P5)) + !(bp->flags & BNXT_FLAG_CHIP_P5) && \ + !is_kdump_kernel()) /* Chip class phase 5 */ #define BNXT_CHIP_P5(bp) \ @@ -1778,7 +1780,7 @@ unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp); unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp); unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp); int bnxt_get_avail_msix(struct bnxt *bp, int num); -int bnxt_reserve_rings(struct bnxt *bp); +int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init); void bnxt_tx_disable(struct bnxt *bp); void bnxt_tx_enable(struct bnxt *bp); int bnxt_hwrm_set_pause(struct bnxt *); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index adabbe94a259..e1460e391952 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -788,7 +788,7 @@ static int bnxt_set_channels(struct net_device *dev, */ } } else { - rc = bnxt_reserve_rings(bp); + rc = bnxt_reserve_rings(bp, true); } return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index cf475873ce81..bfa342a98d08 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -147,7 +147,7 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id, bnxt_close_nic(bp, true, false); rc = bnxt_open_nic(bp, true, false); } else { - rc = bnxt_reserve_rings(bp); + rc = bnxt_reserve_rings(bp, true); } } if (rc) { diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 3da2795e2486..d005ed12b4d1 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2461,12 +2461,12 @@ static int macb_open(struct net_device *dev) goto pm_exit; } - bp->macbgem_ops.mog_init_rings(bp); - macb_init_hw(bp); - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) napi_enable(&queue->napi); + bp->macbgem_ops.mog_init_rings(bp); + macb_init_hw(bp); + /* schedule a link state check */ phy_start(dev->phydev); @@ -3377,7 +3377,7 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (!err) err = -ENODEV; - dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err); return err; } @@ -3386,7 +3386,7 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (!err) err = -ENODEV; - dev_err(&pdev->dev, "failed to get hclk (%u)\n", err); + dev_err(&pdev->dev, "failed to get hclk (%d)\n", err); return err; } @@ -3404,31 +3404,31 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, err = clk_prepare_enable(*pclk); if (err) { - dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err); return err; } err = clk_prepare_enable(*hclk); if (err) { - dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable hclk (%d)\n", err); goto err_disable_pclk; } err = clk_prepare_enable(*tx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err); goto err_disable_hclk; } err = clk_prepare_enable(*rx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err); goto err_disable_txclk; } err = clk_prepare_enable(*tsu_clk); if (err) { - dev_err(&pdev->dev, "failed to enable tsu_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable tsu_clk (%d)\n", err); goto err_disable_rxclk; } @@ -3902,7 +3902,7 @@ static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk, err = clk_prepare_enable(*pclk); if (err) { - dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err); return err; } diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.h b/drivers/net/ethernet/chelsio/cxgb3/l2t.h index c2fd323c4078..ea75f275023f 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.h @@ -75,8 +75,8 @@ struct l2t_data { struct l2t_entry *rover; /* starting point for next allocation */ atomic_t nfree; /* number of free entries */ rwlock_t lock; - struct l2t_entry l2tab[0]; struct rcu_head rcu_head; /* to handle rcu cleanup */ + struct l2t_entry l2tab[]; }; typedef void (*arp_failure_handler_func)(struct t3cdev * dev, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 89179e316687..4bc0c357cb8e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6161,15 +6161,24 @@ static int __init cxgb4_init_module(void) ret = pci_register_driver(&cxgb4_driver); if (ret < 0) - debugfs_remove(cxgb4_debugfs_root); + goto err_pci; #if IS_ENABLED(CONFIG_IPV6) if (!inet6addr_registered) { - register_inet6addr_notifier(&cxgb4_inet6addr_notifier); - inet6addr_registered = true; + ret = register_inet6addr_notifier(&cxgb4_inet6addr_notifier); + if (ret) + pci_unregister_driver(&cxgb4_driver); + else + inet6addr_registered = true; } #endif + if (ret == 0) + return ret; + +err_pci: + debugfs_remove(cxgb4_debugfs_root); + return ret; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 82a8d1970060..35462bccd91a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -197,6 +197,9 @@ static void cxgb4_process_flow_match(struct net_device *dev, fs->val.ivlan = vlan_tci; fs->mask.ivlan = vlan_tci_mask; + fs->val.ivlan_vld = 1; + fs->mask.ivlan_vld = 1; + /* Chelsio adapters use ivlan_vld bit to match vlan packets * as 802.1Q. Also, when vlan tag is present in packets, * ethtype match is used then to match on ethtype of inner @@ -207,8 +210,6 @@ static void cxgb4_process_flow_match(struct net_device *dev, * ethtype value with ethtype of inner header. */ if (fs->val.ethtype == ETH_P_8021Q) { - fs->val.ivlan_vld = 1; - fs->mask.ivlan_vld = 1; fs->val.ethtype = 0; fs->mask.ethtype = 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index a3544041ad32..8d63eed628d7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -7206,10 +7206,21 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size, unsigned int cache_line_size) { unsigned int page_shift = fls(page_size) - 1; + unsigned int sge_hps = page_shift - 10; unsigned int stat_len = cache_line_size > 64 ? 128 : 64; unsigned int fl_align = cache_line_size < 32 ? 32 : cache_line_size; unsigned int fl_align_log = fls(fl_align) - 1; + t4_write_reg(adap, SGE_HOST_PAGE_SIZE_A, + HOSTPAGESIZEPF0_V(sge_hps) | + HOSTPAGESIZEPF1_V(sge_hps) | + HOSTPAGESIZEPF2_V(sge_hps) | + HOSTPAGESIZEPF3_V(sge_hps) | + HOSTPAGESIZEPF4_V(sge_hps) | + HOSTPAGESIZEPF5_V(sge_hps) | + HOSTPAGESIZEPF6_V(sge_hps) | + HOSTPAGESIZEPF7_V(sge_hps)); + if (is_t4(adap->params.chip)) { t4_set_reg_field(adap, SGE_CONTROL_A, INGPADBOUNDARY_V(INGPADBOUNDARY_M) | diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 66535d1653f6..f16853c3c851 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -2107,7 +2107,6 @@ static struct eisa_driver de4x5_eisa_driver = { .remove = de4x5_eisa_remove, } }; -MODULE_DEVICE_TABLE(eisa, de4x5_eisa_ids); #endif #ifdef CONFIG_PCI diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 4c218341c51b..6e635debc7fd 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1105,7 +1105,7 @@ static int be_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, cmd->data = be_get_rss_hash_opts(adapter, cmd->flow_type); break; case ETHTOOL_GRXRINGS: - cmd->data = adapter->num_rx_qs - 1; + cmd->data = adapter->num_rx_qs; break; default: return -EINVAL; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index dfebc30c4841..f38c3fa7d705 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -780,7 +780,7 @@ static void dpaa_eth_add_channel(u16 channel) struct qman_portal *portal; int cpu; - for_each_cpu(cpu, cpus) { + for_each_cpu_and(cpu, cpus, cpu_online_mask) { portal = qman_get_affine_portal(cpu); qman_p_static_dequeue_add(portal, pool); } @@ -896,7 +896,7 @@ static void dpaa_fq_setup(struct dpaa_priv *priv, u16 channels[NR_CPUS]; struct dpaa_fq *fq; - for_each_cpu(cpu, affine_cpus) + for_each_cpu_and(cpu, affine_cpus, cpu_online_mask) channels[num_portals++] = qman_affine_channel(cpu); if (num_portals == 0) @@ -1648,7 +1648,7 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv, qm_sg_entry_get_len(&sgt[0]), dma_dir); /* remaining pages were mapped with skb_frag_dma_map() */ - for (i = 1; i < nr_frags; i++) { + for (i = 1; i <= nr_frags; i++) { WARN_ON(qm_sg_entry_is_ext(&sgt[i])); dma_unmap_page(dev, qm_sg_addr(&sgt[i]), @@ -2174,7 +2174,6 @@ static int dpaa_eth_poll(struct napi_struct *napi, int budget) if (cleaned < budget) { napi_complete_done(napi, cleaned); qman_p_irqsource_add(np->p, QM_PIRQ_DQRI); - } else if (np->down) { qman_p_irqsource_add(np->p, QM_PIRQ_DQRI); } @@ -2448,7 +2447,7 @@ static void dpaa_eth_napi_enable(struct dpaa_priv *priv) struct dpaa_percpu_priv *percpu_priv; int i; - for_each_possible_cpu(i) { + for_each_online_cpu(i) { percpu_priv = per_cpu_ptr(priv->percpu_priv, i); percpu_priv->np.down = 0; @@ -2461,7 +2460,7 @@ static void dpaa_eth_napi_disable(struct dpaa_priv *priv) struct dpaa_percpu_priv *percpu_priv; int i; - for_each_possible_cpu(i) { + for_each_online_cpu(i) { percpu_priv = per_cpu_ptr(priv->percpu_priv, i); percpu_priv->np.down = 1; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index bdee441bc3b7..7ce2e99b594d 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -569,7 +569,7 @@ static int dpaa_set_coalesce(struct net_device *dev, qman_dqrr_get_ithresh(portal, &prev_thresh); /* set new values */ - for_each_cpu(cpu, cpus) { + for_each_cpu_and(cpu, cpus, cpu_online_mask) { portal = qman_get_affine_portal(cpu); res = qman_portal_set_iperiod(portal, period); if (res) @@ -586,7 +586,7 @@ static int dpaa_set_coalesce(struct net_device *dev, revert_values: /* restore previous values */ - for_each_cpu(cpu, cpus) { + for_each_cpu_and(cpu, cpus, cpu_online_mask) { if (!needs_revert[cpu]) continue; portal = qman_get_affine_portal(cpu); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index dc339dc1adb2..df371c81a706 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1966,7 +1966,7 @@ alloc_channel(struct dpaa2_eth_priv *priv) channel->dpcon = setup_dpcon(priv); if (IS_ERR_OR_NULL(channel->dpcon)) { - err = PTR_ERR(channel->dpcon); + err = PTR_ERR_OR_ZERO(channel->dpcon); goto err_setup; } @@ -2022,7 +2022,7 @@ static int setup_dpio(struct dpaa2_eth_priv *priv) /* Try to allocate a channel */ channel = alloc_channel(priv); if (IS_ERR_OR_NULL(channel)) { - err = PTR_ERR(channel); + err = PTR_ERR_OR_ZERO(channel); if (err != -EPROBE_DEFER) dev_info(dev, "No affine channel for cpu %d and above\n", i); @@ -2796,6 +2796,7 @@ int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags) static int dpaa2_eth_set_cls(struct dpaa2_eth_priv *priv) { struct device *dev = priv->net_dev->dev.parent; + int err; /* Check if we actually support Rx flow classification */ if (dpaa2_eth_has_legacy_dist(priv)) { @@ -2814,9 +2815,13 @@ static int dpaa2_eth_set_cls(struct dpaa2_eth_priv *priv) return -EOPNOTSUPP; } + err = dpaa2_eth_set_dist_key(priv->net_dev, DPAA2_ETH_RX_DIST_CLS, 0); + if (err) + return err; + priv->rx_cls_enabled = 1; - return dpaa2_eth_set_dist_key(priv->net_dev, DPAA2_ETH_RX_DIST_CLS, 0); + return 0; } /* Bind the DPNI to its needed objects and resources: buffer pool, DPIOs, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 591dfcf76adb..0610fc0bebc2 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -4,6 +4,7 @@ */ #include +#include #include "dpni.h" /* DPNI_LINK_OPT_* */ #include "dpaa2-eth.h" @@ -589,6 +590,8 @@ static int dpaa2_eth_get_rxnfc(struct net_device *net_dev, case ETHTOOL_GRXCLSRULE: if (rxnfc->fs.location >= max_rules) return -EINVAL; + rxnfc->fs.location = array_index_nospec(rxnfc->fs.location, + max_rules); if (!priv->cls_rules[rxnfc->fs.location].in_use) return -EINVAL; rxnfc->fs = priv->cls_rules[rxnfc->fs.location].fs; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 5bb9eb35d76d..491475d87736 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -313,7 +313,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) { bool is_eof = !!tx_swbd->skb; - enetc_unmap_tx_buff(tx_ring, tx_swbd); + if (likely(tx_swbd->dma)) + enetc_unmap_tx_buff(tx_ring, tx_swbd); + if (is_eof) { napi_consume_skb(tx_swbd->skb, napi_budget); tx_swbd->skb = NULL; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a96ad20ee484..878ccce1dfcd 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3556,7 +3556,7 @@ fec_probe(struct platform_device *pdev) if (fep->reg_phy) regulator_disable(fep->reg_phy); failed_reset: - pm_runtime_put(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); failed_regulator: clk_disable_unprepare(fep->clk_ahb); diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 0beee2cc2ddd..722b6de24816 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -252,14 +252,12 @@ uec_set_ringparam(struct net_device *netdev, return -EINVAL; } + if (netif_running(netdev)) + return -EBUSY; + ug_info->bdRingLenRx[queue] = ring->rx_pending; ug_info->bdRingLenTx[queue] = ring->tx_pending; - if (netif_running(netdev)) { - /* FIXME: restart automatically */ - netdev_info(netdev, "Please re-open the interface\n"); - } - return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index ce15d2350db9..188c3f6791b5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -339,6 +339,7 @@ static int __lb_setup(struct net_device *ndev, static int __lb_up(struct net_device *ndev, enum hnae_loop loop_mode) { +#define NIC_LB_TEST_WAIT_PHY_LINK_TIME 300 struct hns_nic_priv *priv = netdev_priv(ndev); struct hnae_handle *h = priv->ae_handle; int speed, duplex; @@ -365,6 +366,9 @@ static int __lb_up(struct net_device *ndev, h->dev->ops->adjust_link(h, speed, duplex); + /* wait adjust link done and phy ready */ + msleep(NIC_LB_TEST_WAIT_PHY_LINK_TIME); + return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 299b277bc7ae..589b7ee32bff 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -107,7 +107,7 @@ struct hclgevf_mbx_arq_ring { struct hclgevf_dev *hdev; u32 head; u32 tail; - u32 count; + atomic_t count; u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE]; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 162cb9afa0e7..c7d310903319 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2705,7 +2705,7 @@ int hns3_clean_rx_ring( #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16 struct net_device *netdev = ring->tqp->handle->kinfo.netdev; int recv_pkts, recv_bds, clean_count, err; - int unused_count = hns3_desc_unused(ring) - ring->pending_buf; + int unused_count = hns3_desc_unused(ring); struct sk_buff *skb = ring->skb; int num; @@ -2714,6 +2714,7 @@ int hns3_clean_rx_ring( recv_pkts = 0, recv_bds = 0, clean_count = 0; num -= unused_count; + unused_count -= ring->pending_buf; while (recv_pkts < budget && recv_bds < num) { /* Reuse or realloc buffers */ @@ -3773,12 +3774,13 @@ static int hns3_recover_hw_addr(struct net_device *ndev) struct netdev_hw_addr *ha, *tmp; int ret = 0; + netif_addr_lock_bh(ndev); /* go through and sync uc_addr entries to the device */ list = &ndev->uc; list_for_each_entry_safe(ha, tmp, &list->list, list) { ret = hns3_nic_uc_sync(ndev, ha->addr); if (ret) - return ret; + goto out; } /* go through and sync mc_addr entries to the device */ @@ -3786,9 +3788,11 @@ static int hns3_recover_hw_addr(struct net_device *ndev) list_for_each_entry_safe(ha, tmp, &list->list, list) { ret = hns3_nic_mc_sync(ndev, ha->addr); if (ret) - return ret; + goto out; } +out: + netif_addr_unlock_bh(ndev); return ret; } @@ -3799,6 +3803,7 @@ static void hns3_remove_hw_addr(struct net_device *netdev) hns3_nic_uc_unsync(netdev, netdev->dev_addr); + netif_addr_lock_bh(netdev); /* go through and unsync uc_addr entries to the device */ list = &netdev->uc; list_for_each_entry_safe(ha, tmp, &list->list, list) @@ -3809,6 +3814,8 @@ static void hns3_remove_hw_addr(struct net_device *netdev) list_for_each_entry_safe(ha, tmp, &list->list, list) if (ha->refcount > 1) hns3_nic_mc_unsync(netdev, ha->addr); + + netif_addr_unlock_bh(netdev); } static void hns3_clear_tx_ring(struct hns3_enet_ring *ring) @@ -3850,6 +3857,13 @@ static int hns3_clear_rx_ring(struct hns3_enet_ring *ring) ring_ptr_move_fw(ring, next_to_use); } + /* Free the pending skb in rx ring */ + if (ring->skb) { + dev_kfree_skb_any(ring->skb); + ring->skb = NULL; + ring->pending_buf = 0; + } + return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 359d4731fb2d..ea94b5152963 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -483,6 +483,11 @@ static void hns3_get_stats(struct net_device *netdev, struct hnae3_handle *h = hns3_get_handle(netdev); u64 *p = data; + if (hns3_nic_resetting(netdev)) { + netdev_err(netdev, "dev resetting, could not get stats\n"); + return; + } + if (!h->ae_algo->ops->get_stats || !h->ae_algo->ops->update_stats) { netdev_err(netdev, "could not get any statistics\n"); return; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 3a093a92eac5..d92e4af11b1f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -373,21 +373,26 @@ int hclge_cmd_init(struct hclge_dev *hdev) * reset may happen when lower level reset is being processed. */ if ((hclge_is_reset_pending(hdev))) { - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); - return -EBUSY; + ret = -EBUSY; + goto err_cmd_init; } ret = hclge_cmd_query_firmware_version(&hdev->hw, &version); if (ret) { dev_err(&hdev->pdev->dev, "firmware version query failed %d\n", ret); - return ret; + goto err_cmd_init; } hdev->fw_version = version; dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version); return 0; + +err_cmd_init: + set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + + return ret; } static void hclge_cmd_uninit_regs(struct hclge_hw *hw) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index deda606c51e7..6d4d5a470163 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -5942,8 +5942,11 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, } /* check if we just hit the duplicate */ - if (!ret) - ret = -EINVAL; + if (!ret) { + dev_warn(&hdev->pdev->dev, "VF %d mac(%pM) exists\n", + vport->vport_id, addr); + return 0; + } dev_err(&hdev->pdev->dev, "PF failed to add unicast entry(%pM) in the MAC table\n", diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index aafc69f4bfdd..a7bbb6d3091a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1331,8 +1331,11 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev, bool init) ret = hclge_pfc_setup_hw(hdev); if (init && ret == -EOPNOTSUPP) dev_warn(&hdev->pdev->dev, "GE MAC does not support pfc\n"); - else + else if (ret) { + dev_err(&hdev->pdev->dev, "config pfc failed! ret = %d\n", + ret); return ret; + } return hclge_tm_bp_setup(hdev); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index 9441b453d38d..382ecb15e743 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -327,7 +327,7 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev) hdev->arq.hdev = hdev; hdev->arq.head = 0; hdev->arq.tail = 0; - hdev->arq.count = 0; + atomic_set(&hdev->arq.count, 0); hdev->hw.cmq.csq.next_to_clean = 0; hdev->hw.cmq.csq.next_to_use = 0; hdev->hw.cmq.crq.next_to_clean = 0; @@ -344,8 +344,8 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev) * reset may happen when lower level reset is being processed. */ if (hclgevf_is_reset_pending(hdev)) { - set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); - return -EBUSY; + ret = -EBUSY; + goto err_cmd_init; } /* get firmware version */ @@ -353,13 +353,18 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev) if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to query firmware version\n", ret); - return ret; + goto err_cmd_init; } hdev->fw_version = version; dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version); return 0; + +err_cmd_init: + set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); + + return ret; } static void hclgevf_cmd_uninit_regs(struct hclgevf_hw *hw) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 8bc28e6f465f..8dd7fef863f6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2007,9 +2007,15 @@ static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive) static int hclgevf_client_start(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + + ret = hclgevf_set_alive(handle, true); + if (ret) + return ret; mod_timer(&hdev->keep_alive_timer, jiffies + 2 * HZ); - return hclgevf_set_alive(handle, true); + + return 0; } static void hclgevf_client_stop(struct hnae3_handle *handle) @@ -2051,6 +2057,10 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev) { set_bit(HCLGEVF_STATE_DOWN, &hdev->state); + if (hdev->keep_alive_timer.function) + del_timer_sync(&hdev->keep_alive_timer); + if (hdev->keep_alive_task.func) + cancel_work_sync(&hdev->keep_alive_task); if (hdev->service_timer.function) del_timer_sync(&hdev->service_timer); if (hdev->service_task.func) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 7dc3c9f79169..4f2c77283cb4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -208,7 +208,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) /* we will drop the async msg if we find ARQ as full * and continue with next message */ - if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) { + if (atomic_read(&hdev->arq.count) >= + HCLGE_MBX_MAX_ARQ_MSG_NUM) { dev_warn(&hdev->pdev->dev, "Async Q full, dropping msg(%d)\n", req->msg[1]); @@ -220,7 +221,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16)); hclge_mbx_tail_ptr_move_arq(hdev->arq); - hdev->arq.count++; + atomic_inc(&hdev->arq.count); hclgevf_mbx_task_schedule(hdev); @@ -308,7 +309,7 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) } hclge_mbx_head_ptr_move_arq(hdev->arq); - hdev->arq.count--; + atomic_dec(&hdev->arq.count); msg_q = hdev->arq.msg_q[hdev->arq.head]; } } diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 7acc61e4f645..f7fa1448af15 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4209,7 +4209,7 @@ void e1000e_up(struct e1000_adapter *adapter) e1000_configure_msix(adapter); e1000_irq_enable(adapter); - netif_start_queue(adapter->netdev); + /* tx queue started by watchdog timer when link is up */ e1000e_trigger_lsc(adapter); } @@ -4607,6 +4607,7 @@ int e1000e_open(struct net_device *netdev) pm_runtime_get_sync(&pdev->dev); netif_carrier_off(netdev); + netif_stop_queue(netdev); /* allocate transmit descriptors */ err = e1000e_setup_tx_resources(adapter->tx_ring); @@ -4667,7 +4668,6 @@ int e1000e_open(struct net_device *netdev) e1000_irq_enable(adapter); adapter->tx_hang_recheck = false; - netif_start_queue(netdev); hw->mac.get_link_status = true; pm_runtime_put(&pdev->dev); @@ -5289,6 +5289,7 @@ static void e1000_watchdog_task(struct work_struct *work) if (phy->ops.cfg_on_link_up) phy->ops.cfg_on_link_up(hw); + netif_wake_queue(netdev); netif_carrier_on(netdev); if (!test_bit(__E1000_DOWN, &adapter->state)) @@ -5302,6 +5303,7 @@ static void e1000_watchdog_task(struct work_struct *work) /* Link status message must follow this format */ pr_info("%s NIC Link is Down\n", adapter->netdev->name); netif_carrier_off(netdev); + netif_stop_queue(netdev); if (!test_bit(__E1000_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); @@ -5309,13 +5311,8 @@ static void e1000_watchdog_task(struct work_struct *work) /* 8000ES2LAN requires a Rx packet buffer work-around * on link down event; reset the controller to flush * the Rx packet buffer. - * - * If the link is lost the controller stops DMA, but - * if there is queued Tx work it cannot be done. So - * reset the controller to flush the Tx packet buffers. */ - if ((adapter->flags & FLAG_RX_NEEDS_RESTART) || - e1000_desc_unused(tx_ring) + 1 < tx_ring->count) + if (adapter->flags & FLAG_RX_NEEDS_RESTART) adapter->flags |= FLAG_RESTART_NOW; else pm_schedule_suspend(netdev->dev.parent, @@ -5338,6 +5335,14 @@ static void e1000_watchdog_task(struct work_struct *work) adapter->gotc_old = adapter->stats.gotc; spin_unlock(&adapter->stats64_lock); + /* If the link is lost the controller stops DMA, but + * if there is queued Tx work it cannot be done. So + * reset the controller to flush the Tx packet buffers. + */ + if (!netif_carrier_ok(netdev) && + (e1000_desc_unused(tx_ring) + 1 < tx_ring->count)) + adapter->flags |= FLAG_RESTART_NOW; + /* If reset is necessary, do it outside of interrupt context. */ if (adapter->flags & FLAG_RESTART_NOW) { schedule_work(&adapter->reset_task); @@ -7350,7 +7355,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); - if (pci_dev_run_wake(pdev)) + if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp) pm_runtime_put_noidle(&pdev->dev); return 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b1c265012c8a..133f5e008822 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2654,6 +2654,10 @@ void i40e_vlan_stripping_enable(struct i40e_vsi *vsi) struct i40e_vsi_context ctxt; i40e_status ret; + /* Don't modify stripping options if a port VLAN is active */ + if (vsi->info.pvid) + return; + if ((vsi->info.valid_sections & cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) && ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_MODE_MASK) == 0)) @@ -2684,6 +2688,10 @@ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi) struct i40e_vsi_context ctxt; i40e_status ret; + /* Don't modify stripping options if a port VLAN is active */ + if (vsi->info.pvid) + return; + if ((vsi->info.valid_sections & cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) && ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_EMOD_MASK) == @@ -6846,10 +6854,12 @@ static int i40e_setup_tc(struct net_device *netdev, void *type_data) struct i40e_pf *pf = vsi->back; u8 enabled_tc = 0, num_tc, hw; bool need_reset = false; + int old_queue_pairs; int ret = -EINVAL; u16 mode; int i; + old_queue_pairs = vsi->num_queue_pairs; num_tc = mqprio_qopt->qopt.num_tc; hw = mqprio_qopt->qopt.hw; mode = mqprio_qopt->mode; @@ -6950,6 +6960,7 @@ static int i40e_setup_tc(struct net_device *netdev, void *type_data) } ret = i40e_configure_queue_channels(vsi); if (ret) { + vsi->num_queue_pairs = old_queue_pairs; netdev_info(netdev, "Failed configuring queue channels\n"); need_reset = true; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 831d52bc3c9a..2b0362c827e9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -181,7 +181,7 @@ static inline bool i40e_vc_isvalid_vsi_id(struct i40e_vf *vf, u16 vsi_id) * check for the valid queue id **/ static inline bool i40e_vc_isvalid_queue_id(struct i40e_vf *vf, u16 vsi_id, - u8 qid) + u16 qid) { struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = i40e_find_vsi_from_id(pf, vsi_id); @@ -2454,8 +2454,10 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg) (u8 *)&stats, sizeof(stats)); } -/* If the VF is not trusted restrict the number of MAC/VLAN it can program */ -#define I40E_VC_MAX_MAC_ADDR_PER_VF 12 +/* If the VF is not trusted restrict the number of MAC/VLAN it can program + * MAC filters: 16 for multicast, 1 for MAC, 1 for broadcast + */ +#define I40E_VC_MAX_MAC_ADDR_PER_VF (16 + 1 + 1) #define I40E_VC_MAX_VLAN_PER_VF 8 /** @@ -3374,7 +3376,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; - goto err; + goto err_out; } if (!vf->adq_enabled) { @@ -3382,7 +3384,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) "VF %d: ADq is not enabled, can't apply cloud filter\n", vf->vf_id); aq_ret = I40E_ERR_PARAM; - goto err; + goto err_out; } if (i40e_validate_cloud_filter(vf, vcf)) { @@ -3390,7 +3392,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) "VF %d: Invalid input/s, can't apply cloud filter\n", vf->vf_id); aq_ret = I40E_ERR_PARAM; - goto err; + goto err_out; } cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL); @@ -3451,13 +3453,17 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) "VF %d: Failed to add cloud filter, err %s aq_err %s\n", vf->vf_id, i40e_stat_str(&pf->hw, ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - goto err; + goto err_free; } INIT_HLIST_NODE(&cfilter->cloud_node); hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list); + /* release the pointer passing it to the collection */ + cfilter = NULL; vf->num_cloud_filters++; -err: +err_free: + kfree(cfilter); +err_out: return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER, aq_ret); } diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 89440775aea1..6af5bd5883ca 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -277,6 +277,7 @@ struct ice_q_vector { * value to the device */ u8 intrl; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index fa61203bee26..b710545cf7d1 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1848,6 +1848,10 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) */ ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL; + /* Preserve existing VLAN strip setting */ + ctxt->info.vlan_flags |= (vsi->info.vlan_flags & + ICE_AQ_VSI_VLAN_EMOD_M); + ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 47cc3f905b7f..b562476b1251 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -342,6 +342,10 @@ ice_prepare_for_reset(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; + /* already prepared for reset */ + if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) + return; + /* Notify VFs of impending reset */ if (ice_check_sq_alive(hw, &hw->mailboxq)) ice_vc_notify_reset(pf); @@ -416,10 +420,15 @@ static void ice_reset_subtask(struct ice_pf *pf) * for the reset now), poll for reset done, rebuild and return. */ if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) { - clear_bit(__ICE_GLOBR_RECV, pf->state); - clear_bit(__ICE_CORER_RECV, pf->state); - if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) - ice_prepare_for_reset(pf); + /* Perform the largest reset requested */ + if (test_and_clear_bit(__ICE_CORER_RECV, pf->state)) + reset_type = ICE_RESET_CORER; + if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state)) + reset_type = ICE_RESET_GLOBR; + /* return if no valid reset type requested */ + if (reset_type == ICE_RESET_INVAL) + return; + ice_prepare_for_reset(pf); /* make sure we are ready to rebuild */ if (ice_check_reset(&pf->hw)) { @@ -519,6 +528,9 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) case ICE_FC_RX_PAUSE: fc = "RX"; break; + case ICE_FC_NONE: + fc = "None"; + break; default: fc = "Unknown"; break; @@ -2545,6 +2557,9 @@ static int ice_set_features(struct net_device *netdev, struct ice_vsi *vsi = np->vsi; int ret = 0; + /* Multiple features can be changed in one call so keep features in + * separate if/else statements to guarantee each feature is checked + */ if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH)) ret = ice_vsi_manage_rss_lut(vsi, true); else if (!(features & NETIF_F_RXHASH) && @@ -2557,8 +2572,9 @@ static int ice_set_features(struct net_device *netdev, else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) && (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) ret = ice_vsi_manage_vlan_stripping(vsi, false); - else if ((features & NETIF_F_HW_VLAN_CTAG_TX) && - !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) + + if ((features & NETIF_F_HW_VLAN_CTAG_TX) && + !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) ret = ice_vsi_manage_vlan_insertion(vsi); else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) && (netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 09d1c314b68f..c5f6cfecc042 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -643,21 +643,37 @@ static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi) fi->fltr_act == ICE_FWD_TO_VSI_LIST || fi->fltr_act == ICE_FWD_TO_Q || fi->fltr_act == ICE_FWD_TO_QGRP)) { - fi->lb_en = true; - /* Do not set lan_en to TRUE if + /* Setting LB for prune actions will result in replicated + * packets to the internal switch that will be dropped. + */ + if (fi->lkup_type != ICE_SW_LKUP_VLAN) + fi->lb_en = true; + + /* Set lan_en to TRUE if * 1. The switch is a VEB AND * 2 - * 2.1 The lookup is MAC with unicast addr for MAC, OR - * 2.2 The lookup is MAC_VLAN with unicast addr for MAC + * 2.1 The lookup is VLAN, OR + * 2.2 The lookup is default port mode, OR + * 2.3 The lookup is MAC with mcast or bcast addr for MAC, OR + * 2.4 The lookup is MAC_VLAN with mcast or bcast addr for MAC. + * + * OR * - * In all other cases, the LAN enable has to be set to true. + * The switch is a VEPA. + * + * In all other cases, the LAN enable has to be set to false. */ - if (!(hw->evb_veb && - ((fi->lkup_type == ICE_SW_LKUP_MAC && - is_unicast_ether_addr(fi->l_data.mac.mac_addr)) || - (fi->lkup_type == ICE_SW_LKUP_MAC_VLAN && - is_unicast_ether_addr(fi->l_data.mac_vlan.mac_addr))))) + if (hw->evb_veb) { + if (fi->lkup_type == ICE_SW_LKUP_VLAN || + fi->lkup_type == ICE_SW_LKUP_DFLT || + (fi->lkup_type == ICE_SW_LKUP_MAC && + !is_unicast_ether_addr(fi->l_data.mac.mac_addr)) || + (fi->lkup_type == ICE_SW_LKUP_MAC_VLAN && + !is_unicast_ether_addr(fi->l_data.mac.mac_addr))) + fi->lan_en = true; + } else { fi->lan_en = true; + } } } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index c289d97f477d..851030ad5016 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1048,18 +1048,257 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_pkts; } +static unsigned int ice_itr_divisor(struct ice_port_info *pi) +{ + switch (pi->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_40GB: + return ICE_ITR_ADAPTIVE_MIN_INC * 1024; + case ICE_AQ_LINK_SPEED_25GB: + case ICE_AQ_LINK_SPEED_20GB: + return ICE_ITR_ADAPTIVE_MIN_INC * 512; + case ICE_AQ_LINK_SPEED_100MB: + return ICE_ITR_ADAPTIVE_MIN_INC * 32; + default: + return ICE_ITR_ADAPTIVE_MIN_INC * 256; + } +} + +/** + * ice_update_itr - update the adaptive ITR value based on statistics + * @q_vector: structure containing interrupt and ring information + * @rc: structure containing ring performance data + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + */ +static void +ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc) +{ + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; + bool container_is_rx; + + if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting)) + return; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + container_is_rx = (&q_vector->rx == rc); + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = container_is_rx ? + ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY : + ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + packets = rc->total_pkts; + bytes = rc->total_bytes; + + if (container_is_rx) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) { + itr = ICE_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & ICE_ITR_MASK) == + ICE_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY; + } + + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. + */ + if (packets < 56) { + itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC; + if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { + itr &= ICE_ITR_ADAPTIVE_LATENCY; + itr += ICE_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; + } + + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= ICE_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr >>= 1; + itr &= ICE_ITR_MASK; + if (itr < ICE_ITR_ADAPTIVE_MIN_USECS) + itr = ICE_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; + } + + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = ICE_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * gives the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. + */ + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; + } + + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & ICE_ITR_ADAPTIVE_LATENCY) + avg_wire_size >>= 1; + + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, + ice_itr_divisor(q_vector->vsi->port_info)) * + ICE_ITR_ADAPTIVE_MIN_INC; + + if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { + itr &= ICE_ITR_ADAPTIVE_LATENCY; + itr += ICE_ITR_ADAPTIVE_MAX_USECS; + } + +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + + rc->total_bytes = 0; + rc->total_pkts = 0; +} + /** * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register * @itr_idx: interrupt throttling index - * @reg_itr: interrupt throttling value adjusted based on ITR granularity + * @itr: interrupt throttling value in usecs */ -static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr) +static u32 ice_buildreg_itr(int itr_idx, u16 itr) { + /* The itr value is reported in microseconds, and the register value is + * recorded in 2 microsecond units. For this reason we only need to + * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this + * granularity as a shift instead of division. The mask makes sure the + * ITR value is never odd so we don't accidentally write into the field + * prior to the ITR field. + */ + itr &= ICE_ITR_MASK; + return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) | - (reg_itr << GLINT_DYN_CTL_INTERVAL_S); + (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)); } +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 + /** * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt * @vsi: the VSI associated with the q_vector @@ -1068,10 +1307,14 @@ static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr) static void ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) { - struct ice_hw *hw = &vsi->back->hw; - struct ice_ring_container *rc; + struct ice_ring_container *tx = &q_vector->tx; + struct ice_ring_container *rx = &q_vector->rx; u32 itr_val; + /* This will do nothing if dynamic updates are not enabled */ + ice_update_itr(q_vector, tx); + ice_update_itr(q_vector, rx); + /* This block of logic allows us to get away with only updating * one ITR value with each interrupt. The idea is to perform a * pseudo-lazy update with the following criteria. @@ -1080,35 +1323,36 @@ ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) * 2. If we must reduce an ITR that is given highest priority. * 3. We then give priority to increasing ITR based on amount. */ - if (q_vector->rx.target_itr < q_vector->rx.current_itr) { - rc = &q_vector->rx; + if (rx->target_itr < rx->current_itr) { /* Rx ITR needs to be reduced, this is highest priority */ - itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); - rc->current_itr = rc->target_itr; - } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || - ((q_vector->rx.target_itr - q_vector->rx.current_itr) < - (q_vector->tx.target_itr - q_vector->tx.current_itr))) { - rc = &q_vector->tx; + itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); + rx->current_itr = rx->target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((tx->target_itr < tx->current_itr) || + ((rx->target_itr - rx->current_itr) < + (tx->target_itr - tx->current_itr))) { /* Tx ITR needs to be reduced, this is second priority * Tx ITR needs to be increased more than Rx, fourth priority */ - itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); - rc->current_itr = rc->target_itr; - } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { - rc = &q_vector->rx; + itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr); + tx->current_itr = tx->target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (rx->current_itr != rx->target_itr) { /* Rx ITR needs to be increased, third priority */ - itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); - rc->current_itr = rc->target_itr; + itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); + rx->current_itr = rx->target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; } else { /* Still have to re-enable the interrupts */ itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } - if (!test_bit(__ICE_DOWN, vsi->state)) { - int vector = vsi->hw_base_vector + q_vector->v_idx; - - wr32(hw, GLINT_DYN_CTL(vector), itr_val); - } + if (!test_bit(__ICE_DOWN, vsi->state)) + wr32(&vsi->back->hw, + GLINT_DYN_CTL(vsi->hw_base_vector + q_vector->v_idx), + itr_val); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index fc358ea81816..74a031fbd732 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -128,6 +128,12 @@ enum ice_rx_dtype { #define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ #define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK) +#define ICE_ITR_ADAPTIVE_MIN_INC 0x0002 +#define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define ICE_ITR_ADAPTIVE_MAX_USECS 0x00FA +#define ICE_ITR_ADAPTIVE_LATENCY 0x8000 +#define ICE_ITR_ADAPTIVE_BULK 0x0000 + #define ICE_DFLT_INTRL 0 /* Legacy or Advanced Mode Queue */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 57155b4a59dc..8b1ee9f3a39d 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -764,6 +764,7 @@ static void ice_cleanup_and_realloc_vf(struct ice_vf *vf) bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) { struct ice_hw *hw = &pf->hw; + struct ice_vf *vf; int v, i; /* If we don't have any VFs, then there is nothing to reset */ @@ -778,12 +779,17 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) for (v = 0; v < pf->num_alloc_vfs; v++) ice_trigger_vf_reset(&pf->vf[v], is_vflr); - /* Call Disable LAN Tx queue AQ call with VFR bit set and 0 - * queues to inform Firmware about VF reset. - */ - for (v = 0; v < pf->num_alloc_vfs; v++) - ice_dis_vsi_txq(pf->vsi[0]->port_info, 0, NULL, NULL, - ICE_VF_RESET, v, NULL); + for (v = 0; v < pf->num_alloc_vfs; v++) { + struct ice_vsi *vsi; + + vf = &pf->vf[v]; + vsi = pf->vsi[vf->lan_vsi_idx]; + if (test_bit(ICE_VF_STATE_ENA, vf->vf_states)) { + ice_vsi_stop_lan_tx_rings(vsi, ICE_VF_RESET, vf->vf_id); + ice_vsi_stop_rx_rings(vsi); + clear_bit(ICE_VF_STATE_ENA, vf->vf_states); + } + } /* HW requires some time to make sure it can flush the FIFO for a VF * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in @@ -796,9 +802,9 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) /* Check each VF in sequence */ while (v < pf->num_alloc_vfs) { - struct ice_vf *vf = &pf->vf[v]; u32 reg; + vf = &pf->vf[v]; reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id)); if (!(reg & VPGEN_VFRSTAT_VFRD_M)) break; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3269d8e94744..580d14b49fda 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3452,6 +3452,9 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) break; } } + + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); + pm_runtime_put_noidle(&pdev->dev); return 0; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c0a3718b2e2a..c7f4b72b3c07 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4674,7 +4674,7 @@ static int mvneta_probe(struct platform_device *pdev) err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register\n"); - goto err_free_stats; + goto err_netdev; } netdev_info(dev, "Using %s mac address %pM\n", mac_from, @@ -4685,14 +4685,12 @@ static int mvneta_probe(struct platform_device *pdev) return 0; err_netdev: - unregister_netdev(dev); if (pp->bm_priv) { mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id); mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, 1 << pp->id); mvneta_bm_put(pp->bm_priv); } -err_free_stats: free_percpu(pp->stats); err_free_ports: free_percpu(pp->ports); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 25fbed2b8d94..906f080d9559 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1304,8 +1304,8 @@ static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset, int i; for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) - memcpy(data + i * ETH_GSTRING_LEN, - &mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN); + strscpy(data + i * ETH_GSTRING_LEN, + mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN); } } @@ -1455,7 +1455,7 @@ static inline void mvpp2_xlg_max_rx_size_set(struct mvpp2_port *port) /* Set defaults to the MVPP2 port */ static void mvpp2_defaults_set(struct mvpp2_port *port) { - int tx_port_num, val, queue, ptxq, lrxq; + int tx_port_num, val, queue, lrxq; if (port->priv->hw_version == MVPP21) { /* Update TX FIFO MIN Threshold */ @@ -1476,11 +1476,9 @@ static void mvpp2_defaults_set(struct mvpp2_port *port) mvpp2_write(port->priv, MVPP2_TXP_SCHED_FIXED_PRIO_REG, 0); /* Close bandwidth for all queues */ - for (queue = 0; queue < MVPP2_MAX_TXQ; queue++) { - ptxq = mvpp2_txq_phys(port->id, queue); + for (queue = 0; queue < MVPP2_MAX_TXQ; queue++) mvpp2_write(port->priv, - MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(ptxq), 0); - } + MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(queue), 0); /* Set refill period to 1 usec, refill tokens * and bucket size to maximum @@ -2336,7 +2334,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port, txq->descs_dma = 0; /* Set minimum bandwidth for disabled TXQs */ - mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->id), 0); + mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->log_id), 0); /* Set Tx descriptors queue starting address and size */ thread = mvpp2_cpu_to_thread(port->priv, get_cpu()); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 392fd895f278..ae2240074d8e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -1905,8 +1905,7 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv) } /* Find tcam entry with matched pair */ -static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid, - u16 mask) +static int mvpp2_prs_vid_range_find(struct mvpp2_port *port, u16 vid, u16 mask) { unsigned char byte[2], enable[2]; struct mvpp2_prs_entry pe; @@ -1914,13 +1913,13 @@ static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid, int tid; /* Go through the all entries with MVPP2_PRS_LU_VID */ - for (tid = MVPP2_PE_VID_FILT_RANGE_START; - tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) { - if (!priv->prs_shadow[tid].valid || - priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID) + for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id); + tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) { + if (!port->priv->prs_shadow[tid].valid || + port->priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + mvpp2_prs_init_from_hw(port->priv, &pe, tid); mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]); mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]); @@ -1950,7 +1949,7 @@ int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) memset(&pe, 0, sizeof(pe)); /* Scan TCAM and see if entry with this already exist */ - tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask); + tid = mvpp2_prs_vid_range_find(port, vid, mask); reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id)); if (reg_val & MVPP2_DSA_EXTENDED) @@ -2008,7 +2007,7 @@ void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid) int tid; /* Scan TCAM and see if entry with this already exist */ - tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff); + tid = mvpp2_prs_vid_range_find(port, vid, 0xfff); /* No such entry */ if (tid < 0) @@ -2026,8 +2025,10 @@ void mvpp2_prs_vid_remove_all(struct mvpp2_port *port) for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id); tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) { - if (priv->prs_shadow[tid].valid) - mvpp2_prs_vid_entry_remove(port, tid); + if (priv->prs_shadow[tid].valid) { + mvpp2_prs_hw_inv(priv, tid); + priv->prs_shadow[tid].valid = false; + } } } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 549d36497b8c..f3f7551162a9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1777,6 +1777,7 @@ static void mtk_poll_controller(struct net_device *dev) static int mtk_start_dma(struct mtk_eth *eth) { + u32 rx_2b_offset = (NET_IP_ALIGN == 2) ? MTK_RX_2B_OFFSET : 0; int err; err = mtk_dma_init(eth); @@ -1793,7 +1794,7 @@ static int mtk_start_dma(struct mtk_eth *eth) MTK_QDMA_GLO_CFG); mtk_w32(eth, - MTK_RX_DMA_EN | MTK_RX_2B_OFFSET | + MTK_RX_DMA_EN | rx_2b_offset | MTK_RX_BT_32DWORDS | MTK_MULTI_EN, MTK_PDMA_GLO_CFG); @@ -2297,13 +2298,13 @@ static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, switch (cmd->cmd) { case ETHTOOL_GRXRINGS: - if (dev->features & NETIF_F_LRO) { + if (dev->hw_features & NETIF_F_LRO) { cmd->data = MTK_MAX_RX_RING_NUM; ret = 0; } break; case ETHTOOL_GRXCLSRLCNT: - if (dev->features & NETIF_F_LRO) { + if (dev->hw_features & NETIF_F_LRO) { struct mtk_mac *mac = netdev_priv(dev); cmd->rule_cnt = mac->hwlro_ip_cnt; @@ -2311,11 +2312,11 @@ static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, } break; case ETHTOOL_GRXCLSRULE: - if (dev->features & NETIF_F_LRO) + if (dev->hw_features & NETIF_F_LRO) ret = mtk_hwlro_get_fdir_entry(dev, cmd); break; case ETHTOOL_GRXCLSRLALL: - if (dev->features & NETIF_F_LRO) + if (dev->hw_features & NETIF_F_LRO) ret = mtk_hwlro_get_fdir_all(dev, cmd, rule_locs); break; @@ -2332,11 +2333,11 @@ static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) switch (cmd->cmd) { case ETHTOOL_SRXCLSRLINS: - if (dev->features & NETIF_F_LRO) + if (dev->hw_features & NETIF_F_LRO) ret = mtk_hwlro_add_ipaddr(dev, cmd); break; case ETHTOOL_SRXCLSRLDEL: - if (dev->features & NETIF_F_LRO) + if (dev->hw_features & NETIF_F_LRO) ret = mtk_hwlro_del_ipaddr(dev, cmd); break; default: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index d290f0787dfb..94c59939a8cf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -2010,6 +2010,8 @@ static int mlx4_en_set_tunable(struct net_device *dev, return ret; } +#define MLX4_EEPROM_PAGE_LEN 256 + static int mlx4_en_get_module_info(struct net_device *dev, struct ethtool_modinfo *modinfo) { @@ -2044,7 +2046,7 @@ static int mlx4_en_get_module_info(struct net_device *dev, break; case MLX4_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + modinfo->eeprom_len = MLX4_EEPROM_PAGE_LEN; break; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index ffed2d4c9403..9c481823b3e8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1492,7 +1492,7 @@ int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, rule.port = port; rule.qpn = qpn; INIT_LIST_HEAD(&rule.list); - mlx4_err(dev, "going promisc on %x\n", port); + mlx4_info(dev, "going promisc on %x\n", port); return mlx4_flow_attach(dev, &rule, regid_p); } diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 10fcc22f4590..ba6ac31a339d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -2077,11 +2077,6 @@ int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, size -= offset + size - I2C_PAGE_SIZE; i2c_addr = I2C_ADDR_LOW; - if (offset >= I2C_PAGE_SIZE) { - /* Reset offset to high page */ - i2c_addr = I2C_ADDR_HIGH; - offset -= I2C_PAGE_SIZE; - } cable_info = (struct mlx4_cable_info *)inmad->data; cable_info->dev_mem_address = cpu_to_be16(offset); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 6debffb8336b..430c2eab6fc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -7,6 +7,7 @@ config MLX5_CORE depends on PCI imply PTP_1588_CLOCK imply VXLAN + imply MLXFW default n ---help--- Core driver for low level functionality of the ConnectX-4 and diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index be48c6440251..c205a80abdec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -441,6 +441,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_CREATE_UCTX: + case MLX5_CMD_OP_DESTROY_UCTX: + case MLX5_CMD_OP_CREATE_UMEM: + case MLX5_CMD_OP_DESTROY_UMEM: case MLX5_CMD_OP_ALLOC_MEMIC: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; @@ -629,6 +633,10 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); + MLX5_COMMAND_STR_CASE(CREATE_UCTX); + MLX5_COMMAND_STR_CASE(DESTROY_UCTX); + MLX5_COMMAND_STR_CASE(CREATE_UMEM); + MLX5_COMMAND_STR_CASE(DESTROY_UMEM); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ebc046fa97d3..f6b1da99e6c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -248,11 +248,32 @@ void mlx5_unregister_interface(struct mlx5_interface *intf) } EXPORT_SYMBOL(mlx5_unregister_interface); +/* Must be called with intf_mutex held */ +static bool mlx5_has_added_dev_by_protocol(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_interface *intf; + bool found = false; + + list_for_each_entry(intf, &intf_list, list) { + if (intf->protocol == protocol) { + dev_ctx = mlx5_get_device(intf, &mdev->priv); + if (dev_ctx && test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + found = true; + break; + } + } + + return found; +} + void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol) { mutex_lock(&mlx5_intf_mutex); - mlx5_remove_dev_by_protocol(mdev, protocol); - mlx5_add_dev_by_protocol(mdev, protocol); + if (mlx5_has_added_dev_by_protocol(mdev, protocol)) { + mlx5_remove_dev_by_protocol(mdev, protocol); + mlx5_add_dev_by_protocol(mdev, protocol); + } mutex_unlock(&mlx5_intf_mutex); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 4746f2d28fb6..0ccd6d40baf7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -26,7 +26,7 @@ static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev) MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, 0); - MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + MLX5_SET(disable_hca_in, in, embedded_cpu_function, 0); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d3eaf2ceaa39..a80031b2cfaf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1059,6 +1059,7 @@ void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) netdev_features_t mlx5e_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features); +int mlx5e_set_features(struct net_device *netdev, netdev_features_t features); #ifdef CONFIG_MLX5_ESWITCH int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac); int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index eec07b34b4ad..5efe9b5d9086 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -11,24 +11,25 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, struct net_device **route_dev, struct net_device **out_dev) { + struct net_device *uplink_dev, *uplink_upper, *real_dev; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_dev, *uplink_upper; bool dst_is_lag_dev; + real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev; uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); uplink_upper = netdev_master_upper_dev_get(uplink_dev); dst_is_lag_dev = (uplink_upper && netif_is_lag_master(uplink_upper) && - dev == uplink_upper && + real_dev == uplink_upper && mlx5_lag_is_sriov(priv->mdev)); /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink */ - if (!netdev_port_same_parent_id(priv->netdev, dev) || + if (!netdev_port_same_parent_id(priv->netdev, real_dev) || dst_is_lag_dev) { - *route_dev = uplink_dev; - *out_dev = *route_dev; + *route_dev = dev; + *out_dev = uplink_dev; } else { *route_dev = dev; if (is_vlan_dev(*route_dev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 78dc8fe2a83c..2821208119c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1901,6 +1901,22 @@ static int mlx5e_flash_device(struct net_device *dev, return mlx5e_ethtool_flash_device(priv, flash); } +#ifndef CONFIG_MLX5_EN_RXNFC +/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS + * otherwise this function will be defined from en_fs_ethtool.c + */ +static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (info->cmd != ETHTOOL_GRXRINGS) + return -EOPNOTSUPP; + /* ring_count is needed by ethtool -x */ + info->data = priv->channels.params.num_channels; + return 0; +} +#endif + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1919,8 +1935,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, -#ifdef CONFIG_MLX5_EN_RXNFC .get_rxnfc = mlx5e_get_rxnfc, +#ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, #endif .flash_device = mlx5e_flash_device, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 46157e2a1e5a..6a8dc73855c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3698,8 +3698,7 @@ static int mlx5e_handle_feature(struct net_device *netdev, return 0; } -static int mlx5e_set_features(struct net_device *netdev, - netdev_features_t features) +int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { netdev_features_t oper_features = netdev->features; int err = 0; @@ -3750,6 +3749,12 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); } + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + features &= ~NETIF_F_RXHASH; + if (netdev->features & NETIF_F_RXHASH) + netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); + } + mutex_unlock(&priv->state_lock); return features; @@ -3875,6 +3880,9 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) memcpy(&priv->tstamp, &config, sizeof(config)); mutex_unlock(&priv->state_lock); + /* might need to fix some features */ + netdev_update_features(priv->netdev); + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; } @@ -4734,6 +4742,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (!priv->channels.params.scatter_fcs_en) netdev->features &= ~NETIF_F_RXFCS; + /* prefere CQE compression over rxhash */ + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + netdev->features &= ~NETIF_F_RXHASH; + #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && FT_CAP(modify_root) && @@ -5153,6 +5165,11 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) struct mlx5e_priv *priv = vpriv; struct net_device *netdev = priv->netdev; +#ifdef CONFIG_MLX5_ESWITCH + if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) + return; +#endif + if (!netif_device_present(netdev)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index a66b6ed80b30..fd8cede040b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -65,9 +65,26 @@ static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + strlcpy(drvinfo->driver, mlx5e_rep_driver_name, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", + fw_rev_maj(mdev), fw_rev_min(mdev), + fw_rev_sub(mdev), mdev->board_id); +} + +static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_rep_get_drvinfo(dev, drvinfo); + strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev), + sizeof(drvinfo->bus_info)); } static const struct counter_desc sw_rep_stats_desc[] = { @@ -363,7 +380,7 @@ static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { }; static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { - .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_drvinfo = mlx5e_uplink_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, .get_sset_count = mlx5e_rep_get_sset_count, @@ -1333,6 +1350,7 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, + .ndo_set_features = mlx5e_set_features, }; bool mlx5e_eswitch_rep(struct net_device *netdev) @@ -1406,10 +1424,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->watchdog_timeo = 15 * HZ; + netdev->features |= NETIF_F_NETNS_LOCAL; - netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; - netdev->hw_features |= NETIF_F_HW_TC; - + netdev->hw_features |= NETIF_F_HW_TC; netdev->hw_features |= NETIF_F_SG; netdev->hw_features |= NETIF_F_IP_CSUM; netdev->hw_features |= NETIF_F_IPV6_CSUM; @@ -1418,7 +1435,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - if (rep->vport != MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + else netdev->features |= NETIF_F_VLAN_CHALLENGED; netdev->features |= netdev->hw_features; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d75dc44eb2ff..a43ddfc0ff0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1561,7 +1561,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { struct flow_match_vlan match; - flow_rule_match_vlan(rule, &match); + flow_rule_match_cvlan(rule, &match); if (match.mask->vlan_id || match.mask->vlan_priority || match.mask->vlan_tpid) { @@ -2572,9 +2572,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; - attr->in_rep = rpriv->rep; - attr->in_mdev = priv->mdev; - flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_DROP: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 3f3cd32ae60a..e0ba59b5296f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -431,6 +431,9 @@ static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, return index; } +/* TODO: This mlx5e_tc function shouldn't be called by eswitch */ +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9b2d78ee22b8..a97ffd0dbf01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -363,7 +363,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (rep->rep_if[REP_ETH].state != REP_LOADED) + if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -1306,7 +1306,8 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - rep->rep_if[rep_type].state = REP_UNREGISTERED; + atomic_set(&rep->rep_if[rep_type].state, + REP_UNREGISTERED); } return 0; @@ -1315,11 +1316,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (rep->rep_if[rep_type].state != REP_LOADED) - return; - - rep->rep_if[rep_type].unload(rep); - rep->rep_if[rep_type].state = REP_REGISTERED; + if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + REP_LOADED, REP_REGISTERED) == REP_LOADED) + rep->rep_if[rep_type].unload(rep); } static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) @@ -1380,16 +1379,15 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, { int err = 0; - if (rep->rep_if[rep_type].state != REP_REGISTERED) - return 0; - - err = rep->rep_if[rep_type].load(esw->dev, rep); - if (err) - return err; - - rep->rep_if[rep_type].state = REP_LOADED; + if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { + err = rep->rep_if[rep_type].load(esw->dev, rep); + if (err) + atomic_set(&rep->rep_if[rep_type].state, + REP_REGISTERED); + } - return 0; + return err; } static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) @@ -1523,8 +1521,6 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, return 0; } -void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); - static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) { mlx5e_tc_clean_fdb_peer_flows(esw); @@ -2076,7 +2072,7 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, rep_if->get_proto_dev = __rep_if->get_proto_dev; rep_if->priv = __rep_if->priv; - rep_if->state = REP_REGISTERED; + atomic_set(&rep_if->state, REP_REGISTERED); } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); @@ -2091,7 +2087,7 @@ void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) __unload_reps_all_vport(esw, max_vf, rep_type); mlx5_esw_for_all_reps(esw, i, rep) - rep->rep_if[rep_type].state = REP_UNREGISTERED; + atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED); } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); @@ -2111,7 +2107,7 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, rep = mlx5_eswitch_get_rep(esw, vport); - if (rep->rep_if[rep_type].state == REP_LOADED && + if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED && rep->rep_if[rep_type].get_proto_dev) return rep->rep_if[rep_type].get_proto_dev(rep); return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0be3eb86dd84..e29e5beb239d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1386,6 +1386,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && d1->vport.num == d2->vport.num && d1->vport.flags == d2->vport.flags && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? + (d1->vport.vhca_id == d2->vport.vhca_id) : true) && ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? (d1->vport.reformat_id == d2->vport.reformat_id) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && @@ -2284,7 +2286,7 @@ static struct mlx5_flow_root_namespace cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type); /* Create the root namespace */ - root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL); + root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL); if (!root_ns) return NULL; @@ -2427,6 +2429,7 @@ static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev) cleanup_root_ns(steering->esw_egress_root_ns[i]); kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; } static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev) @@ -2441,6 +2444,7 @@ static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev) cleanup_root_ns(steering->esw_ingress_root_ns[i]); kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; } void mlx5_cleanup_fs(struct mlx5_core_dev *dev) @@ -2609,6 +2613,7 @@ static int init_egress_acls_root_ns(struct mlx5_core_dev *dev) for (i--; i >= 0; i--) cleanup_root_ns(steering->esw_egress_root_ns[i]); kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; return err; } @@ -2636,6 +2641,7 @@ static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev) for (i--; i >= 0; i--) cleanup_root_ns(steering->esw_ingress_root_ns[i]); kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index ca0ee9916e9e..0059b290e095 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -535,23 +535,16 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) do_div(ns, NSEC_PER_SEC / HZ); clock->overflow_period = ns; - mdev->clock_info_page = alloc_page(GFP_KERNEL); - if (mdev->clock_info_page) { - mdev->clock_info = kmap(mdev->clock_info_page); - if (!mdev->clock_info) { - __free_page(mdev->clock_info_page); - mlx5_core_warn(mdev, "failed to map clock page\n"); - } else { - mdev->clock_info->sign = 0; - mdev->clock_info->nsec = clock->tc.nsec; - mdev->clock_info->cycles = clock->tc.cycle_last; - mdev->clock_info->mask = clock->cycles.mask; - mdev->clock_info->mult = clock->nominal_c_mult; - mdev->clock_info->shift = clock->cycles.shift; - mdev->clock_info->frac = clock->tc.frac; - mdev->clock_info->overflow_period = - clock->overflow_period; - } + mdev->clock_info = + (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL); + if (mdev->clock_info) { + mdev->clock_info->nsec = clock->tc.nsec; + mdev->clock_info->cycles = clock->tc.cycle_last; + mdev->clock_info->mask = clock->cycles.mask; + mdev->clock_info->mult = clock->nominal_c_mult; + mdev->clock_info->shift = clock->cycles.shift; + mdev->clock_info->frac = clock->tc.frac; + mdev->clock_info->overflow_period = clock->overflow_period; } INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); @@ -599,8 +592,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) cancel_delayed_work_sync(&clock->overflow_work); if (mdev->clock_info) { - kunmap(mdev->clock_info_page); - __free_page(mdev->clock_info_page); + free_page((unsigned long)mdev->clock_info); mdev->clock_info = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index f26a4ca29363..0b56291d22c6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -122,6 +122,12 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_driver_priv); +bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->driver->res_query_enabled; +} +EXPORT_SYMBOL(mlxsw_core_res_query_enabled); + struct mlxsw_rx_listener_item { struct list_head list; struct mlxsw_rx_listener rxl; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 8ec53f027575..62b8de9305af 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -28,6 +28,8 @@ unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); +bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core); + int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index c1c1965d7acc..72539a9a3847 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -3,6 +3,7 @@ #include #include +#include #include "core.h" #include "core_env.h" @@ -162,7 +163,7 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, { u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE]; u16 offset = MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE; - u8 module_rev_id, module_id; + u8 module_rev_id, module_id, diag_mon; unsigned int read_size; int err; @@ -195,8 +196,21 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, } break; case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: + /* Verify if transceiver provides diagnostic monitoring page */ + err = mlxsw_env_query_module_eeprom(mlxsw_core, module, + SFP_DIAGMON, 1, &diag_mon, + &read_size); + if (err) + return err; + + if (read_size < 1) + return -EIO; + modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + if (diag_mon) + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + else + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2; break; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 6956bbebe2f1..496dc904c5ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -518,6 +518,9 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) u8 width; int err; + if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core)) + return 0; + /* Add extra attributes for module temperature. Sensor index is * assigned to sensor_count value, while all indexed before * sensor_count are already utilized by the sensors connected through diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 472f63f9fac5..d3e851e7ca72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -740,6 +740,9 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal_module *module_tz; int i, err; + if (!mlxsw_core_res_query_enabled(core)) + return 0; + thermal->tz_module_arr = kcalloc(module_count, sizeof(*thermal->tz_module_arr), GFP_KERNEL); @@ -776,6 +779,9 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) unsigned int module_count = mlxsw_core_max_ports(thermal->core); int i; + if (!mlxsw_core_res_query_enabled(thermal->core)) + return; + for (i = module_count - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6b8aa3761899..f4acb38569e1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3110,6 +3110,10 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, NULL, NULL); autoneg = cmd->base.autoneg == AUTONEG_ENABLE; + if (!autoneg && cmd->base.speed == SPEED_56000) { + netdev_err(dev, "56G not supported with autoneg off\n"); + return -EINVAL; + } eth_proto_new = autoneg ? ops->to_ptys_advert_link(mlxsw_sp, cmd) : ops->to_ptys_speed(mlxsw_sp, cmd->base.speed); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index c1a9cc9a3292..4c98950380d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -1171,13 +1171,12 @@ mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key, return -EINVAL; } if (si == -1) { - /* The masks are the same, this cannot happen. - * That means the caller is broken. + /* The masks are the same, this can happen in case eRPs with + * the same mask were created in both A-TCAM and C-TCAM. + * The only possible condition under which this can happen + * is identical rule insertion. Delta is not possible here. */ - WARN_ON(1); - *delta_start = 0; - *delta_mask = 0; - return 0; + return -EINVAL; } pmask = (unsigned char) parent_key->mask[__MASK_IDX(si)]; mask = (unsigned char) key->mask[__MASK_IDX(si)]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index d633bef5f105..77fe3ed38d1b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -411,9 +411,9 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = { MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI), }; -#define MLXSW_SP2_SB_PR_INGRESS_SIZE 40960000 +#define MLXSW_SP2_SB_PR_INGRESS_SIZE 38128752 +#define MLXSW_SP2_SB_PR_EGRESS_SIZE 38128752 #define MLXSW_SP2_SB_PR_INGRESS_MNG_SIZE (200 * 1000) -#define MLXSW_SP2_SB_PR_EGRESS_SIZE 40960000 static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = { /* Ingress pools. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 15f804453cd6..96b23c856f4d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -247,8 +247,8 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, match.mask->tos & 0x3); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, - match.key->tos >> 6, - match.mask->tos >> 6); + match.key->tos >> 2, + match.mask->tos >> 2); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 902e766a8ed3..18d29b8f763f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2363,7 +2363,7 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing); + bool removing, bool dead); static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) { @@ -2494,7 +2494,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) memcpy(neigh_entry->ha, ha, ETH_ALEN); mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); - mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected); + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected, + dead); if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); @@ -3458,13 +3459,79 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, nh->update = 1; } +static int +mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n, *old_n = neigh_entry->key.n; + struct mlxsw_sp_nexthop *nh; + bool entry_connected; + u8 nud_state, dead; + int err; + + nh = list_first_entry(&neigh_entry->nexthop_list, + struct mlxsw_sp_nexthop, neigh_list_node); + + n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); + if (!n) { + n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, + nh->rif->dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + } + + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + neigh_entry->key.n = n; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + entry_connected = nud_state & NUD_VALID && !dead; + + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + neigh_release(old_n); + neigh_clone(n); + __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } + + neigh_release(n); + + return 0; + +err_neigh_entry_insert: + neigh_entry->key.n = old_n; + mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + neigh_release(n); + return err; +} + static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing) + bool removing, bool dead) { struct mlxsw_sp_nexthop *nh; + if (list_empty(&neigh_entry->nexthop_list)) + return; + + if (dead) { + int err; + + err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp, + neigh_entry); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n"); + return; + } + list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { __mlxsw_sp_nexthop_neigh_update(nh, removing); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index f272247d1708..d4bf0e694541 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -328,7 +328,18 @@ __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, return; } - if (sc == SHF_SC_L_SHF) + /* NFP shift instruction has something special. If shift direction is + * left then shift amount of 1 to 31 is specified as 32 minus the amount + * to shift. + * + * But no need to do this for indirect shift which has shift amount be + * 0. Even after we do this subtraction, shift amount 0 will be turned + * into 32 which will eventually be encoded the same as 0 because only + * low 5 bits are encoded, but shift amount be 32 will fail the + * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of + * mask range. + */ + if (sc == SHF_SC_L_SHF && shift) shift = 32 - shift; insn = OP_SHF_BASE | diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 4d78be4ec4e9..843ddf548f26 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -168,6 +168,7 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) return; } + rcu_read_lock(); for (i = 0; i < count; i++) { ipv4_addr = payload->tun_info[i].ipv4; port = be32_to_cpu(payload->tun_info[i].egress_port); @@ -183,6 +184,7 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) neigh_event_send(n, NULL); neigh_release(n); } + rcu_read_unlock(); } static int @@ -366,9 +368,10 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) payload = nfp_flower_cmsg_get_data(skb); + rcu_read_lock(); netdev = nfp_app_repr_get(app, be32_to_cpu(payload->ingress_port)); if (!netdev) - goto route_fail_warning; + goto fail_rcu_unlock; flow.daddr = payload->ipv4_addr; flow.flowi4_proto = IPPROTO_UDP; @@ -378,21 +381,23 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) rt = ip_route_output_key(dev_net(netdev), &flow); err = PTR_ERR_OR_ZERO(rt); if (err) - goto route_fail_warning; + goto fail_rcu_unlock; #else - goto route_fail_warning; + goto fail_rcu_unlock; #endif /* Get the neighbour entry for the lookup */ n = dst_neigh_lookup(&rt->dst, &flow.daddr); ip_rt_put(rt); if (!n) - goto route_fail_warning; - nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_KERNEL); + goto fail_rcu_unlock; + nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); neigh_release(n); + rcu_read_unlock(); return; -route_fail_warning: +fail_rcu_unlock: + rcu_read_unlock(); nfp_flower_cmsg_warn(app, "Requested route not found.\n"); } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index ed651dde6ef9..6d176be51a6b 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6914,6 +6914,8 @@ static int rtl8169_resume(struct device *device) struct net_device *dev = dev_get_drvdata(device); struct rtl8169_private *tp = netdev_priv(dev); + rtl_rar_set(tp, dev->dev_addr); + clk_prepare_enable(tp->clk); if (netif_running(dev)) @@ -6947,6 +6949,7 @@ static int rtl8169_runtime_resume(struct device *device) { struct net_device *dev = dev_get_drvdata(device); struct rtl8169_private *tp = netdev_priv(dev); + rtl_rar_set(tp, dev->dev_addr); if (!tp->TxDescArray) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index e33af371b169..48967dd27bbf 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1594,6 +1594,10 @@ static void sh_eth_dev_exit(struct net_device *ndev) sh_eth_get_stats(ndev); mdp->cd->soft_reset(ndev); + /* Set the RMII mode again if required */ + if (mdp->cd->rmiimode) + sh_eth_write(ndev, 0x1, RMIIMODE); + /* Set MAC address again */ update_mac_address(ndev); } diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 70cce63a6081..696037d5ac3d 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -735,6 +735,7 @@ static int sgiseeq_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, dev); + SET_NETDEV_DEV(dev, &pdev->dev); sp = netdev_priv(dev); /* Make private data page aligned */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index bf2562995fc8..126b66bb73a6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -346,8 +346,6 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) return PTR_ERR(plat_dat); plat_dat->interface = priv_plat->phy_mode; - /* clk_csr_i = 250-300MHz & MDC = clk_csr_i/124 */ - plat_dat->clk_csr = 5; plat_dat->has_gmac4 = 1; plat_dat->has_gmac = 0; plat_dat->pmt = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 195669f550f0..ba124a4da793 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1015,6 +1015,8 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) mac->mac = &sun8i_dwmac_ops; mac->dma = &sun8i_dwmac_dma_ops; + priv->dev->priv_flags |= IFF_UNICAST_FLT; + /* The loopback bit seems to be re-set when link change * Simply mask it each time * Speed 10/100/1000 are set in BIT(2)/BIT(3) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 3c749c327cbd..e09522c5509a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -460,7 +460,7 @@ stmmac_get_pauseparam(struct net_device *netdev, } else { if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, netdev->phydev->supported) || - linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + !linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, netdev->phydev->supported)) return; } @@ -491,7 +491,7 @@ stmmac_set_pauseparam(struct net_device *netdev, } else { if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phy->supported) || - linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + !linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phy->supported)) return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 48712437d0da..635d88d82610 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2208,6 +2208,10 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) if (priv->plat->axi) stmmac_axi(priv, priv->ioaddr, priv->plat->axi); + /* DMA CSR Channel configuration */ + for (chan = 0; chan < dma_csr_ch; chan++) + stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan); + /* DMA RX Channel Configuration */ for (chan = 0; chan < rx_channels_count; chan++) { rx_q = &priv->rx_queue[chan]; @@ -2233,10 +2237,6 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) tx_q->tx_tail_addr, chan); } - /* DMA CSR Channel configuration */ - for (chan = 0; chan < dma_csr_ch; chan++) - stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan); - return ret; } @@ -3338,6 +3338,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE); } rx_q->dirty_rx = entry; + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue); } /** @@ -4379,10 +4380,10 @@ int stmmac_dvr_probe(struct device *device, * set the MDC clock dynamically according to the csr actual * clock input. */ - if (!priv->plat->clk_csr) - stmmac_clk_csr_set(priv); - else + if (priv->plat->clk_csr >= 0) priv->clk_csr = priv->plat->clk_csr; + else + stmmac_clk_csr_set(priv); stmmac_check_pcs_mode(priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index bdd351597b55..093a223fe408 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -267,7 +267,8 @@ int stmmac_mdio_reset(struct mii_bus *bus) of_property_read_u32_array(np, "snps,reset-delays-us", data->delays, 3); - if (gpio_request(data->reset_gpio, "mdio-reset")) + if (devm_gpio_request(priv->device, data->reset_gpio, + "mdio-reset")) return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 3031f2bf15d6..f45bfbef97d0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -408,7 +408,10 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) /* Default to phy auto-detection */ plat->phy_addr = -1; - /* Get clk_csr from device tree */ + /* Default to get clk_csr from stmmac_clk_crs_set(), + * or get clk_csr from device tree. + */ + plat->clk_csr = -1; of_property_read_u32(np, "clk_csr", &plat->clk_csr); /* "snps,phy-addr" is not a standard property. Mark it as deprecated diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index a591583d120e..1285f282d3ac 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -800,12 +800,17 @@ static int cpsw_purge_all_mc(struct net_device *ndev, const u8 *addr, int num) static void cpsw_ndo_set_rx_mode(struct net_device *ndev) { - struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; + int slave_port = -1; + + if (cpsw->data.dual_emac) + slave_port = priv->emac_port + 1; if (ndev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ cpsw_set_promiscious(ndev, true); - cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI); + cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI, slave_port); return; } else { /* Disable promiscuous mode */ @@ -813,7 +818,8 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) } /* Restore allmulti on vlans if necessary */ - cpsw_ale_set_allmulti(cpsw->ale, ndev->flags & IFF_ALLMULTI); + cpsw_ale_set_allmulti(cpsw->ale, + ndev->flags & IFF_ALLMULTI, slave_port); /* add/remove mcast address either for real netdev or for vlan */ __hw_addr_ref_sync_dev(&ndev->mc, ndev, cpsw_add_mc_addr, @@ -3124,6 +3130,7 @@ static void cpsw_get_ringparam(struct net_device *ndev, struct cpsw_common *cpsw = priv->cpsw; /* not supported */ + ering->tx_max_pending = descs_pool_size - CPSW_MAX_QUEUES; ering->tx_max_pending = 0; ering->tx_pending = cpdma_get_num_tx_descs(cpsw->dma); ering->rx_max_pending = descs_pool_size - CPSW_MAX_QUEUES; diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 798c989d5d93..b3d9591b4824 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -482,24 +482,25 @@ int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask) } EXPORT_SYMBOL_GPL(cpsw_ale_del_vlan); -void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti) +void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti, int port) { u32 ale_entry[ALE_ENTRY_WORDS]; - int type, idx; int unreg_mcast = 0; - - /* Only bother doing the work if the setting is actually changing */ - if (ale->allmulti == allmulti) - return; - - /* Remember the new setting to check against next time */ - ale->allmulti = allmulti; + int type, idx; for (idx = 0; idx < ale->params.ale_entries; idx++) { + int vlan_members; + cpsw_ale_read(ale, idx, ale_entry); type = cpsw_ale_get_entry_type(ale_entry); if (type != ALE_TYPE_VLAN) continue; + vlan_members = + cpsw_ale_get_vlan_member_list(ale_entry, + ale->vlan_field_bits); + + if (port != -1 && !(vlan_members & BIT(port))) + continue; unreg_mcast = cpsw_ale_get_vlan_unreg_mcast(ale_entry, diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index cd07a3e96d57..1fe196d8a5e4 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -37,7 +37,6 @@ struct cpsw_ale { struct cpsw_ale_params params; struct timer_list timer; unsigned long ageout; - int al