From ed404b2b80a1acb4e09c7aedcbc593f54791bf7e Mon Sep 17 00:00:00 2001 From: Vaishnavi AVS Date: Mon, 30 Jan 2023 11:06:27 +0530 Subject: [PATCH 01/26] i2c-msm-geni: KASAN: use-after-free in __list_add_valid+0x2c/0xc4 This UAF issue is seen when driver is removed and inserted. During driver removal, pm runtime resume callback invoked in which as part of clock, ab/ib nodes are added in common struct geni_se_dev. As part of driver exit, we are not removing the ab/ib list from common structure list due to which the issue is seen when driver is loaded. As part of driver removal, checking the status of runtime suspend if it is not suspended, invoke geni suspend call otherwise ignore. So by suspend call ensured that ab/ib are removed from lists, so that UAF will not be encountered when next load of driver. Change-Id: I1f0c7a29c5e268a1ab5c017e271ad0484dcab24f Signed-off-by: Praveen Talari Signed-off-by: Vaishnavi AVS --- drivers/i2c/busses/i2c-msm-geni.c | 55 ++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-msm-geni.c b/drivers/i2c/busses/i2c-msm-geni.c index 3a1964dfaac1..473ff18b0f2e 100644 --- a/drivers/i2c/busses/i2c-msm-geni.c +++ b/drivers/i2c/busses/i2c-msm-geni.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -136,10 +137,12 @@ struct geni_i2c_dev { bool disable_dma_mode; bool prev_cancel_pending; //Halt cancel till IOS in good state bool is_i2c_rtl_based; /* doing pending cancel only for rtl based SE's */ + atomic_t is_xfer_in_progress; /* Used to maintain xfer inprogress status */ }; static struct geni_i2c_dev *gi2c_dev_dbg[MAX_SE]; static int arr_idx; +static int geni_i2c_runtime_suspend(struct device *dev); struct geni_i2c_err_log { int err; @@ -1052,11 +1055,13 @@ static int geni_i2c_xfer(struct i2c_adapter *adap, int i, ret = 0, timeout = 0; gi2c->err = 0; + atomic_set(&gi2c->is_xfer_in_progress, 1); /* Client to respect system suspend */ if (!pm_runtime_enabled(gi2c->dev)) { GENI_SE_ERR(gi2c->ipcl, false, gi2c->dev, "%s: System suspended\n", __func__); + atomic_set(&gi2c->is_xfer_in_progress, 0); return -EACCES; } @@ -1068,6 +1073,7 @@ static int geni_i2c_xfer(struct i2c_adapter *adap, pm_runtime_put_noidle(gi2c->dev); /* Set device in suspended since resume failed */ pm_runtime_set_suspended(gi2c->dev); + atomic_set(&gi2c->is_xfer_in_progress, 0); return ret; } } @@ -1078,12 +1084,13 @@ static int geni_i2c_xfer(struct i2c_adapter *adap, if (ret) { pm_runtime_mark_last_busy(gi2c->dev); pm_runtime_put_autosuspend(gi2c->dev); + atomic_set(&gi2c->is_xfer_in_progress, 0); return ret; //Don't perform xfer is cancel failed } } GENI_SE_DBG(gi2c->ipcl, false, gi2c->dev, - "n:%d addr:0x%x\n", num, msgs[0].addr); + "n:%d addr:0x%x\n", num, msgs[0].addr); gi2c->dbg_num = num; kfree(gi2c->dbg_buf_ptr); @@ -1268,7 +1275,7 @@ geni_i2c_txn_ret: pm_runtime_mark_last_busy(gi2c->dev); pm_runtime_put_autosuspend(gi2c->dev); } - + atomic_set(&gi2c->is_xfer_in_progress, 0); gi2c->cur = NULL; GENI_SE_DBG(gi2c->ipcl, false, gi2c->dev, "i2c txn ret:%d, num:%d, err:%d\n", ret, num, gi2c->err); @@ -1476,10 +1483,10 @@ static int geni_i2c_probe(struct platform_device *pdev) return ret; } + atomic_set(&gi2c->is_xfer_in_progress, 0); snprintf(boot_marker, sizeof(boot_marker), - "M - DRIVER GENI_I2C_%d Ready", gi2c->adap.nr); + "M - DRIVER GENI_I2C_%d Ready", gi2c->adap.nr); place_marker(boot_marker); - dev_info(gi2c->dev, "I2C probed\n"); return 0; } @@ -1489,6 +1496,33 @@ static int geni_i2c_remove(struct platform_device *pdev) struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev); int i; + if (atomic_read(&gi2c->is_xfer_in_progress)) { + GENI_SE_ERR(gi2c->ipcl, true, gi2c->dev, + "%s: Xfer is in progress\n", __func__); + return -EBUSY; + } + + if (!pm_runtime_status_suspended(gi2c->dev)) { + if (geni_i2c_runtime_suspend(gi2c->dev)) + GENI_SE_ERR(gi2c->ipcl, true, gi2c->dev, + "%s: runtime suspend failed\n", __func__); + } + + if (gi2c->se_mode == GSI_ONLY) { + if (gi2c->tx_c) { + GENI_SE_ERR(gi2c->ipcl, true, gi2c->dev, + "%s: clearing tx dma resource\n", __func__); + dma_release_channel(gi2c->tx_c); + } + if (gi2c->rx_c) { + GENI_SE_ERR(gi2c->ipcl, true, gi2c->dev, + "%s: clearing rx dma resource\n", __func__); + dma_release_channel(gi2c->rx_c); + } + } + + pm_runtime_put_noidle(gi2c->dev); + pm_runtime_set_suspended(gi2c->dev); pm_runtime_disable(gi2c->dev); i2c_del_adapter(&gi2c->adap); @@ -1594,6 +1628,19 @@ static int geni_i2c_suspend_late(struct device *device) int ret; GENI_SE_DBG(gi2c->ipcl, false, gi2c->dev, "%s\n", __func__); + + if (atomic_read(&gi2c->is_xfer_in_progress)) { + if (!pm_runtime_status_suspended(gi2c->dev)) { + GENI_SE_ERR(gi2c->ipcl, true, gi2c->dev, + ":%s: runtime PM is active\n", __func__); + return -EBUSY; + } + GENI_SE_ERR(gi2c->ipcl, true, gi2c->dev, + "%s System suspend not allowed while xfer in progress\n", + __func__); + return -EBUSY; + } + /* Make sure no transactions are pending */ ret = i2c_trylock_bus(&gi2c->adap, I2C_LOCK_SEGMENT); if (!ret) { From b12d6d9506435b10f588075a8bdf537004d1afbd Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 18 Nov 2022 15:05:48 -0800 Subject: [PATCH 02/26] ANDROID: mm: skip pte_alloc during speculative page fault Speculative page fault checks pmd to be valid before starting to handle the page fault and pte_alloc() should do nothing if pmd stays valid. If pmd gets changed during speculative page fault, we will detect the change later and retry with mmap_lock. Therefore pte_alloc() can be safely skipped and this prevents the racy pmd_lock() call which can access pmd->ptl after pmd was cleared. Bug: 257443051 Change-Id: Iec57df5530dba6e0e0bdf9f7500f910851c3d3fd Signed-off-by: Suren Baghdasaryan Git-commit: 1169f70f8f15ea4378ecadb9baba8791824c8b2a Git-repo: https://android.googlesource.com/kernel/common/ Signed-off-by: Srinivasarao Pathipati --- mm/memory.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 159418393187..fdc4b4dd1ffa 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3296,6 +3296,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (vmf->vma_flags & VM_SHARED) return VM_FAULT_SIGBUS; + /* Do not check unstable pmd, if it's changed will retry later */ + if (vmf->flags & FAULT_FLAG_SPECULATIVE) + goto skip_pmd_checks; + /* * Use pte_alloc() instead of pte_alloc_map(). We can't run * pte_offset_map() on pmds where a huge pmd might be created @@ -3313,6 +3317,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (unlikely(pmd_trans_unstable(vmf->pmd))) return 0; +skip_pmd_checks: /* Use the zero-page for reads */ if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { From cb68c255f8dc8e960ccd9b61ff584ba419923880 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 18 Nov 2022 15:23:53 -0800 Subject: [PATCH 03/26] ANDROID: mm: prevent speculative page fault handling for in do_swap_page() do_swap_page() uses migration_entry_wait() which operates on page tables without protection. Disable speculative page fault handling. Bug: 257443051 Change-Id: I677eb1ee85707dce533d5d811dcde5f5dabcfdf3 Signed-off-by: Suren Baghdasaryan Git-commit: 4b388752aca20f2588212251ad59d80a2cc5d214 Git-repo: https://android.googlesource.com/kernel/common/ Signed-off-by: Srinivasarao Pathipati --- mm/memory.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index fdc4b4dd1ffa..93fdb73bf0ca 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3050,6 +3050,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) int exclusive = 0; vm_fault_t ret; + if (vmf->flags & FAULT_FLAG_SPECULATIVE) { + pte_unmap(vmf->pte); + return VM_FAULT_RETRY; + } + ret = pte_unmap_same(vmf); if (ret) { /* From f87e6b8d4578ec17a96571b76f1296d93c5972bd Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 18 Nov 2022 15:36:13 -0800 Subject: [PATCH 04/26] ANDROID: mm: prevent reads of unstable pmd during speculation Checks of pmd during speculative page fault handling are racy because pmd is unprotected and might be modified or cleared. This might cause use-after-free reads from speculative path, therefore prevent such checks. At the beginning of speculation pmd is checked to be valid and if it's changed before page fault is handled, the change will be detected and page fault will be retried under mmap_lock protection. Bug: 257443051 Change-Id: I0cbd3b0b44e8296cf0d6cb298fae48c696580068 Signed-off-by: Suren Baghdasaryan Git-commit: 2bb39b912175c3c087978ae5547e277a8422c601 Git-repo: https://android.googlesource.com/kernel/common/ [quic_c_spathi@quicinc.com: resolve merge conflicts] Signed-off-by: Srinivasarao Pathipati --- mm/memory.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 93fdb73bf0ca..50dc21038c09 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3427,6 +3427,10 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; + /* Do not check unstable pmd, if it's changed will retry later */ + if (vmf->flags & FAULT_FLAG_SPECULATIVE) + goto skip_pmd_checks; + /* * Preallocate pte before we take page_lock because this might lead to * deadlocks for memcg reclaim which waits for pages under writeback: @@ -3449,6 +3453,7 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) smp_wmb(); /* See comment in __pte_alloc() */ } +skip_pmd_checks: ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) @@ -3822,7 +3827,8 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, start_pgoff + nr_pages - 1); - if (pmd_none(*vmf->pmd)) { + if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) && + pmd_none(*vmf->pmd)) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) goto out; @@ -4189,16 +4195,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) pte_t entry; vm_fault_t ret = 0; + /* Do not check unstable pmd, if it's changed will retry later */ + if (vmf->flags & FAULT_FLAG_SPECULATIVE) + goto skip_pmd_checks; + if (unlikely(pmd_none(*vmf->pmd))) { - /* - * In the case of the speculative page fault handler we abort - * the speculative path immediately as the pmd is probably - * in the way to be converted in a huge one. We will try - * again holding the mmap_sem (which implies that the collapse - * operation is done). - */ - if (vmf->flags & FAULT_FLAG_SPECULATIVE) - return VM_FAULT_RETRY; /* * Leave __pte_alloc() until later: because vm_ops->fault may * want to allocate huge page, and if we expose page table @@ -4206,8 +4207,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) * concurrent faults and from rmap lookups. */ vmf->pte = NULL; - } else if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) { - /* See comment in pte_alloc_one_map() */ + } else { if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* @@ -4237,6 +4237,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) } } +skip_pmd_checks: if (!vmf->pte) { if (vma_is_anonymous(vmf->vma)) return do_anonymous_page(vmf); From 5b5bd362f1c527e1844c3c57f54fcb0736f998f9 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Thu, 29 Apr 2021 10:28:25 -0700 Subject: [PATCH 05/26] BACKPORT: FROMLIST: mm: implement speculative handling in filemap_fault() Extend filemap_fault() to handle speculative faults. In the speculative case, we will only be fishing existing pages out of the page cache. The logic we use mirrors what is done in the non-speculative case, assuming that pages are found in the page cache, are up to date and not already locked, and that readahead is not necessary at this time. In all other cases, the fault is aborted to be handled non-speculatively. Signed-off-by: Michel Lespinasse Link: https://lore.kernel.org/all/20210407014502.24091-26-michel@lespinasse.org/ Conflicts: mm/filemap.c 1. Added back file_ra_state variable used by SPF path. 2. Updated comment for filemap_fault to reflect SPF locking rules. Bug: 161210518 Signed-off-by: Suren Baghdasaryan Change-Id: I82eba7fcfc81876245c2e65bc5ae3d33ddfcc368 Git-commit: 59d4d125b7d0108b54860ea8584679d514ef07b0 Git-repo: https://android.googlesource.com/kernel/common/ [quic_c_spathi@quicinc.com: resolve trivial merge conflicts] Signed-off-by: Srinivasarao Pathipati --- mm/filemap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index bf097b21ce0b..2afaf23c13c0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2495,7 +2495,9 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. * - * vma->vm_mm->mmap_sem must be held on entry (except FAULT_FLAG_SPECULATIVE). + * If FAULT_FLAG_SPECULATIVE is set, this function runs with elevated vma + * refcount and with mmap lock not held. + * Otherwise, vma->vm_mm->mmap_sem must be held on entry. * * If our return value has VM_FAULT_RETRY set, it's because the mmap_sem * may be dropped before doing I/O or by lock_page_maybe_drop_mmap(). @@ -2520,6 +2522,47 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) struct page *page; vm_fault_t ret = 0; + if (vmf->flags & FAULT_FLAG_SPECULATIVE) { + page = find_get_page(mapping, offset); + if (unlikely(!page) || unlikely(PageReadahead(page))) + return VM_FAULT_RETRY; + + if (!trylock_page(page)) + return VM_FAULT_RETRY; + + if (unlikely(compound_head(page)->mapping != mapping)) + goto page_unlock; + VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); + if (unlikely(!PageUptodate(page))) + goto page_unlock; + + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) + goto page_unlock; + + /* + * Update readahead mmap_miss statistic. + * + * Note that we are not sure if finish_fault() will + * manage to complete the transaction. If it fails, + * we'll come back to filemap_fault() non-speculative + * case which will update mmap_miss a second time. + * This is not ideal, we would prefer to guarantee the + * update will happen exactly once. + */ + if (!(vmf->vma->vm_flags & VM_RAND_READ) && ra->ra_pages) { + unsigned int mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss) + WRITE_ONCE(ra->mmap_miss, --mmap_miss); + } + + vmf->page = page; + return VM_FAULT_LOCKED; +page_unlock: + unlock_page(page); + return VM_FAULT_RETRY; + } + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(offset >= max_off)) return VM_FAULT_SIGBUS; From 365a5b7af5fa6b3706aa8204772921172addc714 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 21 Nov 2022 12:15:43 -0800 Subject: [PATCH 06/26] ANDROID: mm/khugepaged: add missing vm_write_{begin|end} Speculative page fault handler needs to detect concurrent pmd changes and relies on vma seqcount for that. pmdp_collapse_flush(), set_huge_pmd() and collapse_and_free_pmd() can modify a pmd. vm_write_{begin|end} are needed in the paths which can call these functions for page fault handler to detect pmd changes. Bug: 257443051 Change-Id: Ieb784b5f44901b66a594f61b9e7c91190ff97f80 Signed-off-by: Suren Baghdasaryan Git-commit: 5ed391bd8ad8481d82c1bbb05a35f5538966dce9 Git-repo: https://android.googlesource.com/kernel/common/ Signed-off-by: Srinivasarao Pathipati --- mm/khugepaged.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index bd034e57b11e..2aa19037c4ab 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1343,6 +1343,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (!pmd) goto drop_hpage; + vm_write_begin(vma); start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); /* step 1: check all mapped PTEs are to the right huge page */ @@ -1392,6 +1393,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ptl = pmd_lock(vma->vm_mm, pmd); _pmd = pmdp_collapse_flush(vma, haddr, pmd); spin_unlock(ptl); + vm_write_end(vma); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); @@ -1402,6 +1404,7 @@ drop_hpage: abort: pte_unmap_unlock(start_pte, ptl); + vm_write_end(vma); goto drop_hpage; } @@ -1473,10 +1476,12 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ if (down_write_trylock(&mm->mmap_sem)) { if (!khugepaged_test_exit(mm)) { + vm_write_begin(vma); spinlock_t *ptl = pmd_lock(mm, pmd); /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); spin_unlock(ptl); + vm_write_end(vma); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); } From ad939deb18f94f32016e156aff54c84d1519d1b5 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 15 Nov 2022 10:38:43 -0800 Subject: [PATCH 07/26] ANDROID: mm: remove sequence counting when mmap_lock is not exclusively owned In a number of cases vm_write_{begin|end} is called while mmap_lock is not owned exclusively. This is unnecessary and can affect correctness of the sequence counting protecting speculative page fault handlers. Remove extra calls. Bug: 257443051 Change-Id: I1278638a0794448e22fbdab5601212b3b2eaebdc Signed-off-by: Suren Baghdasaryan Git-commit: bfdcf47ca34dc3b7b63ca16b0a1856e57c57ee47 Git-repo: https://android.googlesource.com/kernel/common/ [quic_c_spathi@quicinc.com: resolve trivial merge conflicts] Signed-off-by: Srinivasarao Pathipati --- mm/madvise.c | 6 ------ mm/memory.c | 2 -- mm/mempolicy.c | 2 -- 3 files changed, 10 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 5f38b9faeb27..c87d4c43b885 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -500,11 +500,9 @@ static void madvise_cold_page_range(struct mmu_gather *tlb, .target_task = task, }; - vm_write_begin(vma); tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); - vm_write_end(vma); } static long madvise_cold(struct task_struct *task, @@ -538,11 +536,9 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb, .target_task = task, }; - vm_write_begin(vma); tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); - vm_write_end(vma); } static inline bool can_do_pageout(struct vm_area_struct *vma) @@ -745,12 +741,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(&range); - vm_write_begin(vma); tlb_start_vma(&tlb, vma); walk_page_range(vma->vm_mm, range.start, range.end, &madvise_free_walk_ops, &tlb); tlb_end_vma(&tlb, vma); - vm_write_end(vma); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, range.start, range.end); diff --git a/mm/memory.c b/mm/memory.c index 50dc21038c09..7dd589edcc2b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1292,7 +1292,6 @@ void unmap_page_range(struct mmu_gather *tlb, unsigned long next; BUG_ON(addr >= end); - vm_write_begin(vma); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { @@ -1302,7 +1301,6 @@ void unmap_page_range(struct mmu_gather *tlb, next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); - vm_write_end(vma); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 85b97f3471f4..54dd6c5bb913 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -599,11 +599,9 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, { int nr_updated; - vm_write_begin(vma); nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1); if (nr_updated) count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); - vm_write_end(vma); return nr_updated; } From 51cfccaecd1813aff19b15f0ca65af12ddf36809 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 15 Nov 2022 10:40:41 -0800 Subject: [PATCH 08/26] ANDROID: mm: assert that mmap_lock is taken exclusively in vm_write_begin vm_write_{begin|end} has to be called when mmap_lock is taken exlusively. Add an assert statement in vm_write_begin to enforce that. free_pgtables can free page tables without exclusive mmap_lock if the vma was isolated, therefore avoid assertions in such cases. Bug: 257443051 Change-Id: Ie81aefe025c743cda6f66717d2f08f4d78a55608 Signed-off-by: Suren Baghdasaryan Git-commit: d65d4a0538c3511eb02fed4b628e3588715c90d8 Git-repo: https://android.googlesource.com/kernel/common/ Signed-off-by: Srinivasarao Pathipati --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index de6db27e26b2..a54aca9bba65 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1542,6 +1542,12 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, #ifdef CONFIG_SPECULATIVE_PAGE_FAULT static inline void vm_write_begin(struct vm_area_struct *vma) { + /* + * Isolated vma might be freed without exclusive mmap_lock but + * speculative page fault handler still needs to know it was changed. + */ + if (!RB_EMPTY_NODE(&vma->vm_rb)) + WARN_ON_ONCE(!rwsem_is_locked(&(vma->vm_mm)->mmap_sem)); /* * The reads never spins and preemption * disablement is not required. From 78035f7a502da2bff28626bd891527aa6cbb55ce Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 18 Nov 2022 17:06:03 -0800 Subject: [PATCH 09/26] ANDROID: disable page table moves when speculative page faults are enabled move_page_tables() can move entire pmd or pud without locking individual ptes. This is problematic for speculative page faults which do not take mmap_lock because they rely on ptl lock when writing new pte value. To avoid possible race, disable move_page_tables() optimization when CONFIG_SPECULATIVE_PAGE_FAULT is enabled. Bug: 257443051 Change-Id: Ib48dda08ecad1abc60d08fc089a6566a63393c13 Signed-off-by: Suren Baghdasaryan Git-commit: 0f43357d37e4451cdc12a60895b6e4929a87adf7 Git-repo: https://android.googlesource.com/kernel/common/ [quic_c_spathi@quicinc.com: resolve trivial merge conflicts] Signed-off-by: Srinivasarao Pathipati --- mm/mremap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index f7c278e65a5d..0a026571645e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -191,7 +191,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } -#ifdef CONFIG_HAVE_MOVE_PMD +/* + * Speculative page fault handlers will not detect page table changes done + * without ptl locking. + */ +#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd) From 3aa1fadec50385b8f7d0c43aadf7ce70a55e6330 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 22 Nov 2022 10:51:25 -0800 Subject: [PATCH 10/26] ANDROID: mm: fix invalid backport in speculative page fault path Invalid condition was introduced when porting the original SPF patch which would affect NUMA mode. Fixes: 736ae8bde8da3 ("FROMLIST: mm: adding speculative page fault failure trace events") Bug: 257443051 Change-Id: Ib20c625615b279dc467588933a1f598dc179861b Signed-off-by: Suren Baghdasaryan Git-commit: 1900436df5d947c2ee74bd78cde1366556c93b51 Git-repo: https://android.googlesource.com/kernel/common/ [quic_c_spathi@quicinc.com: resolve trivial merge conflicts] Signed-off-by: Srinivasarao Pathipati --- mm/memory.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 7dd589edcc2b..47c12f886c0d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4474,9 +4474,8 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, pol = __get_vma_policy(vmf.vma, address); if (!pol) pol = get_task_policy(current); - if (!pol) - if (pol && pol->mode == MPOL_INTERLEAVE) - return VM_FAULT_RETRY; + if (pol && pol->mode == MPOL_INTERLEAVE) + return VM_FAULT_RETRY; #endif /* From ea5f9d7e7ebd9b098d253f1f3bd20d9f335d9ab5 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 19 Dec 2022 21:07:49 -0800 Subject: [PATCH 11/26] ANDROID: Re-enable fast mremap and fix UAF with SPF SPF attempts page faults without taking the mmap lock, but takes the PTL. If there is a concurrent fast mremap (at PMD/PUD level), this can lead to a UAF as fast mremap will only take the PTL locks at the PMD/PUD level. SPF cannot take the PTL locks at the larger subtree granularity since this introduces much contention in the page fault paths. To address the race: 1) Only try fast mremaps if there are no users of the VMA. Android is concerned with this optimization in the context of GC stop-the-world pause. So there are no other threads active and this should almost always succeed. 2) Speculative faults detect ongoing fast mremaps and fallback to conventional fault handling (taking mmap read lock). Bug: 263177905 Change-Id: I23917e493ddc8576de19883cac053dfde9982b7f Signed-off-by: Kalesh Singh Git-commit: 529351c4c8202aa7f5bc4a8a100e583a70ab6110 Git-repo: https://android.googlesource.com/kernel/common/ [quic_c_spathi@quicinc.com: resolve merge conflicts] Signed-off-by: Srinivasarao Pathipati --- mm/mmap.c | 18 ++++++++++++++++-- mm/mremap.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 24fb2d87142d..56ba432e7f1f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2344,8 +2344,22 @@ struct vm_area_struct *get_vma(struct mm_struct *mm, unsigned long addr) read_lock(&mm->mm_rb_lock); vma = __find_vma(mm, addr); - if (vma) - atomic_inc(&vma->vm_ref_count); + + /* + * If there is a concurrent fast mremap, bail out since the entire + * PMD/PUD subtree may have been remapped. + * + * This is usually safe for conventional mremap since it takes the + * PTE locks as does SPF. However fast mremap only takes the lock + * at the PMD/PUD level which is ok as it is done with the mmap + * write lock held. But since SPF, as the term implies forgoes, + * taking the mmap read lock and also cannot take PTL lock at the + * larger PMD/PUD granualrity, since it would introduce huge + * contention in the page fault path; fall back to regular fault + * handling. + */ + if (vma && !atomic_inc_unless_negative(&vma->vm_ref_count)) + vma = NULL; read_unlock(&mm->mm_rb_lock); return vma; diff --git a/mm/mremap.c b/mm/mremap.c index 0a026571645e..559255d32273 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -191,11 +191,39 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT +static inline bool trylock_vma_ref_count(struct vm_area_struct *vma) +{ + /* + * If we have the only reference, swap the refcount to -1. This + * will prevent other concurrent references by get_vma() for SPFs. + */ + return atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1; +} + /* - * Speculative page fault handlers will not detect page table changes done - * without ptl locking. + * Restore the VMA reference count to 1 after a fast mremap. */ -#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) +static inline void unlock_vma_ref_count(struct vm_area_struct *vma) +{ + /* + * This should only be called after a corresponding, + * successful trylock_vma_ref_count(). + */ + VM_BUG_ON_VMA(atomic_cmpxchg(&vma->vm_ref_count, -1, 1) != -1, + vma); +} +#else /* !CONFIG_SPECULATIVE_PAGE_FAULT */ +static inline bool trylock_vma_ref_count(struct vm_area_struct *vma) +{ + return true; +} +static inline void unlock_vma_ref_count(struct vm_area_struct *vma) +{ +} +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ + +#ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd) @@ -215,6 +243,14 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, if (WARN_ON(!pmd_none(*new_pmd))) return false; + /* + * We hold both exclusive mmap_lock and rmap_lock at this point and + * cannot block. If we cannot immediately take exclusive ownership + * of the VMA fallback to the move_ptes(). + */ + if (!trylock_vma_ref_count(vma)) + return false; + /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_sem prevents deadlock. @@ -237,6 +273,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, spin_unlock(new_ptl); spin_unlock(old_ptl); + unlock_vma_ref_count(vma); return true; } #endif From 7e67432a10a46717338f9a771774af7c7d589fe7 Mon Sep 17 00:00:00 2001 From: Murali Nalajala Date: Wed, 7 Sep 2022 17:38:11 -0700 Subject: [PATCH 12/26] mdt_loader: check for overflow before allocating memory Memory allocation is happening without checking the overflow. This could lead to an unexpected results. Check for overflow before allocating memory. Change-Id: Icb513ebd8030976e3f0970e9542596f9c5917843 Signed-off-by: Murali Nalajala Signed-off-by: Srinivasarao Pathipati --- drivers/soc/qcom/mdt_loader.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c index 6034cd8992b0..83c2679b6fcf 100644 --- a/drivers/soc/qcom/mdt_loader.c +++ b/drivers/soc/qcom/mdt_loader.c @@ -107,6 +107,10 @@ void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len) ehdr_size = phdrs[0].p_filesz; hash_size = phdrs[1].p_filesz; + /* Overflow check */ + if (ehdr_size > SIZE_MAX - hash_size) + return ERR_PTR(-ENOMEM); + data = kmalloc(ehdr_size + hash_size, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); From eb0c95250b047ab894ee8b6fbe8fa82167e0884a Mon Sep 17 00:00:00 2001 From: Mohana Basava Tejesh Reddy Mareddy Date: Fri, 24 Feb 2023 00:31:17 -0800 Subject: [PATCH 13/26] msm: mhi_dev: Update msi_disable on fetching MSI config Fix for the timing difference between host writing into MSI capability register and MHI MMIO init where the first instance of setting or resetting the msi_disable flag. Hence, updating the msi_disable flag on fetching the MSI config from PCIe. This will ensure to update the flag value in the case of EP reading the MSI capability register before the host sets it. Change-Id: I508a85e7107ee5a575fc03d54ab3e445049d73b2 Signed-off-by: Mohana Basava Tejesh Reddy Mareddy --- drivers/platform/msm/mhi_dev/mhi.c | 94 +++++++++++++++++------------- 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/drivers/platform/msm/mhi_dev/mhi.c b/drivers/platform/msm/mhi_dev/mhi.c index d00c3e6b0ac9..3720f4830682 100644 --- a/drivers/platform/msm/mhi_dev/mhi.c +++ b/drivers/platform/msm/mhi_dev/mhi.c @@ -103,6 +103,38 @@ static DECLARE_COMPLETION(write_to_host); static DECLARE_COMPLETION(transfer_host_to_device); static DECLARE_COMPLETION(transfer_device_to_host); +/* + * mhi_dev_get_msi_config () - Fetch the MSI config from + * PCIe and set the msi_disable flag accordingly + * + * @phandle : phandle structure + * @cfg : PCIe MSI config structure + */ +static int mhi_dev_get_msi_config(struct ep_pcie_hw *phandle, + struct ep_pcie_msi_config *cfg) +{ + int rc; + + /* + * Fetching MSI config to read the MSI capability and setting the + * msi_disable flag based on it. + */ + rc = ep_pcie_get_msi_config(phandle, cfg); + if (rc == -EOPNOTSUPP) { + mhi_log(MHI_MSG_VERBOSE, "MSI is disabled\n"); + mhi_ctx->msi_disable = true; + } else if (!rc) { + mhi_ctx->msi_disable = false; + } else { + mhi_log(MHI_MSG_ERROR, + "Error retrieving pcie msi logic\n"); + return rc; + } + + mhi_log(MHI_MSG_VERBOSE, "msi_disable = %d\n", mhi_ctx->msi_disable); + return 0; +} + /* * mhi_dev_ring_cache_completion_cb () - Call back function called * by IPA driver when ring element cache is done @@ -280,16 +312,16 @@ static int mhi_dev_schedule_msi_ipa(struct mhi_dev *mhi, struct event_req *ereq) union mhi_dev_ring_ctx *ctx; int rc; - rc = ep_pcie_get_msi_config(mhi->phandle, &cfg); - if (rc == -EOPNOTSUPP) { - mhi_log(MHI_MSG_VERBOSE, "MSI is disabled\n"); - mhi_ctx->msi_disable = true; - return 0; - } else if (rc) { + rc = mhi_dev_get_msi_config(mhi->phandle, &cfg); + if (rc) { mhi_log(MHI_MSG_ERROR, "Error retrieving pcie msi logic\n"); return rc; } + /* If MSI is disabled, bailing out */ + if (mhi_ctx->msi_disable) + return 0; + ctx = (union mhi_dev_ring_ctx *)&mhi->ev_ctx_cache[ereq->event_ring]; msi_addr.size = sizeof(uint32_t); @@ -436,17 +468,16 @@ static int mhi_trigger_msi_edma(struct mhi_dev_ring *ring, u32 idx) unsigned long flags; if (!mhi_ctx->msi_lower) { - rc = ep_pcie_get_msi_config(mhi_ctx->phandle, &cfg); - if (rc == -EOPNOTSUPP) { - mhi_log(MHI_MSG_VERBOSE, "MSI is disabled\n"); - mhi_ctx->msi_disable = true; - return 0; - } else if (rc) { - mhi_log(MHI_MSG_ERROR, - "Error retrieving pcie msi logic\n"); + rc = mhi_dev_get_msi_config(mhi_ctx->phandle, &cfg); + if (rc) { + mhi_log(MHI_MSG_ERROR, "Error retrieving pcie msi logic\n"); return rc; } + /* If MSI is disabled, bailing out */ + if (mhi_ctx->msi_disable) + return 0; + mhi_ctx->msi_data = cfg.data; mhi_ctx->msi_lower = cfg.lower; } @@ -1450,13 +1481,9 @@ static int mhi_hwc_init(struct mhi_dev *mhi) } /* Call IPA HW_ACC Init with MSI Address and db routing info */ - rc = ep_pcie_get_msi_config(mhi_ctx->phandle, &cfg); - if (rc == -EOPNOTSUPP) { - mhi_log(MHI_MSG_VERBOSE, "MSI is disabled\n"); - mhi_ctx->msi_disable = true; - } else if (rc) { - mhi_log(MHI_MSG_ERROR, - "Error retrieving pcie msi logic\n"); + rc = mhi_dev_get_msi_config(mhi_ctx->phandle, &cfg); + if (rc) { + mhi_log(MHI_MSG_ERROR, "Error retrieving pcie msi logic\n"); return rc; } @@ -1671,11 +1698,8 @@ int mhi_dev_send_event(struct mhi_dev *mhi, int evnt_ring, struct ep_pcie_msi_config cfg; struct mhi_addr transfer_addr; - rc = ep_pcie_get_msi_config(mhi->phandle, &cfg); - if (rc == -EOPNOTSUPP) { - mhi_log(MHI_MSG_VERBOSE, "MSI is disabled\n"); - mhi_ctx->msi_disable = true; - } else if (rc) { + rc = mhi_dev_get_msi_config(mhi->phandle, &cfg); + if (rc) { mhi_log(MHI_MSG_ERROR, "Error retrieving pcie msi logic\n"); return rc; } @@ -4398,24 +4422,12 @@ static int mhi_dev_resume_mmio_mhi_init(struct mhi_dev *mhi_ctx) return -EINVAL; } - /* - * Fetching MSI config to read the MSI capability and setting the - * msi_disable flag based on it. - */ - rc = ep_pcie_get_msi_config(mhi_ctx->phandle, &cfg); - if (rc == -EOPNOTSUPP) { - mhi_log(MHI_MSG_VERBOSE, "MSI is disabled\n"); - mhi_ctx->msi_disable = true; - } else if (!rc) { - mhi_ctx->msi_disable = false; - } else { - mhi_log(MHI_MSG_ERROR, - "Error retrieving pcie msi logic\n"); + rc = mhi_dev_get_msi_config(mhi_ctx->phandle, &cfg); + if (rc) { + mhi_log(MHI_MSG_ERROR, "Error retrieving pcie msi logic\n"); return rc; } - mhi_log(MHI_MSG_VERBOSE, "msi_disable = %d\n", mhi_ctx->msi_disable); - rc = mhi_dev_recover(mhi_ctx); if (rc) { mhi_log(MHI_MSG_ERROR, "get mhi state failed\n"); From d37fe0868ccc3d36d39f8d3dc377a62edc8e5682 Mon Sep 17 00:00:00 2001 From: Jyothi Kumar Seerapu Date: Thu, 23 Feb 2023 12:06:58 +0530 Subject: [PATCH 14/26] pci: msm: Flush workqueue and destroy it in mhi controller unregister In mhi_unregister_mhi_controller function, flush the work queue and then destroy it for releasing the memory of unused workqueues. When multiple mhi-based WLAN endpoints are attached directly or over switch, then there can be multiple mhi controllers and so CNSS driver calls for mhi controller registration multiple times. Each time invoking mhi controller register creates another set of workqueues and previous workqueues become stale. By doing so, it will consume the system memory. So, better to release the memory assigned to workqueue by destroying the work queue in mhi_unregister_mhi_controller function. Change-Id: I838371e9c00969a64e658e6175115363ccb916bf Signed-off-by: Jyothi Kumar Seerapu --- drivers/bus/mhi/core/mhi_init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bus/mhi/core/mhi_init.c b/drivers/bus/mhi/core/mhi_init.c index 60ae6c51143c..e2827e4e1498 100644 --- a/drivers/bus/mhi/core/mhi_init.c +++ b/drivers/bus/mhi/core/mhi_init.c @@ -1743,6 +1743,8 @@ void mhi_unregister_mhi_controller(struct mhi_controller *mhi_cntrl) struct mhi_device *mhi_dev = mhi_cntrl->mhi_dev; struct mhi_sfr_info *sfr_info = mhi_cntrl->mhi_sfr; + destroy_workqueue(mhi_cntrl->wq); + kfree(mhi_cntrl->mhi_cmd); kfree(mhi_cntrl->mhi_event); vfree(mhi_cntrl->mhi_chan); From fd63157b5e6433e545876010a35d84667e14f86f Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Thu, 27 Oct 2022 18:49:34 +0530 Subject: [PATCH 15/26] usb: pd: Send extcon notification as soon as APSD detection is done In some targets, it is observed that the time difference between vbus being provided by exerciser and the terminations being applied is more than 1 second causing failures of Type-C compliance testcases 4.10.2 and 4.10.3 When policy engine's psy changed work gets kicked in first time from vbus present interrupt callback of charger driver, we kick in usb pd sm work and it keeps running. Since apsd is not yet done, we don't queue peripheral work. When apsd is done and charger driver invokes power supply changed work, policy engine bails out as sm work is already running although the charger type is detected as SDP/CDP and were supposed to send an extcon notification. As a result the extcon is sent when the sm work hits enter snk startup call and it recognises that apsd is done and charger type is SDP or CDP and sends extcon. This is results in a delay of roughly 1.3 seconds from the moment vbus got detected to the moment we provide extcon notification to dwc3-msm. To avoid this, check for charger type and provide extcon if haven't done already in the psy_changed_notifier_work. This reduces the time delay to around 0.5 seconds helping resolve compliance issue. Change-Id: I02c9a4a6b21ca75d43fd68f2447a7388210a4856 Signed-off-by: Krishna Kurapati --- drivers/usb/pd/policy_engine.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/usb/pd/policy_engine.c b/drivers/usb/pd/policy_engine.c index 99f271551ecc..2839abb1d86c 100644 --- a/drivers/usb/pd/policy_engine.c +++ b/drivers/usb/pd/policy_engine.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -3724,6 +3725,7 @@ static void psy_changed_notifier_work(struct work_struct *w) union power_supply_propval val; enum power_supply_typec_mode typec_mode; int ret; + int usb_extcon_state; ret = usbpd_get_psy_iio_property(pd, POWER_SUPPLY_PROP_TYPEC_MODE, &val); @@ -3794,8 +3796,28 @@ static void psy_changed_notifier_work(struct work_struct *w) return; } - if (pd->typec_mode == typec_mode) + if (pd->typec_mode == typec_mode) { + if (!((pd->current_dr == DR_NONE) || (pd->current_dr == DR_UFP))) + return; + + usb_extcon_state = extcon_get_state(pd->extcon, EXTCON_USB); + + if (usb_extcon_state == 0) { + ret = usbpd_get_psy_iio_property(pd, POWER_SUPPLY_PROP_REAL_TYPE, + &val); + if (ret) { + usbpd_err(&pd->dev, "Unable to read USB PROP_REAL_TYPE: %d\n", + ret); + return; + } + + if (val.intval == POWER_SUPPLY_TYPE_USB || + val.intval == POWER_SUPPLY_TYPE_USB_CDP || + val.intval == QTI_POWER_SUPPLY_TYPE_USB_FLOAT) + queue_work(pd->wq, &pd->start_periph_work); + } return; + } pd->typec_mode = typec_mode; From 064252c4f285b1babdf607b0b56cd2ce6885fa94 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Jun 2022 10:36:35 +0530 Subject: [PATCH 16/26] BACKPORT: FROMGIT: cgroup: Use separate src/dst nodes when preloading css_sets for migration Each cset (css_set) is pinned by its tasks. When we're moving tasks around across csets for a migration, we need to hold the source and destination csets to ensure that they don't go away while we're moving tasks about. This is done by linking cset->mg_preload_node on either the mgctx->preloaded_dst_csets or mgctx->preloaded_dst_csets list. Using the same cset->mg_preload_node for both the src and dst lists was deemed okay as a cset can't be both the source and destination at the same time. Unfortunately, this overloading becomes problematic when multiple tasks are involved in a migration and some of them are identity noop migrations while others are actually moving across cgroups. For example, this can happen with the following sequence on cgroup1: #1> mkdir -p /sys/fs/cgroup/misc/a/b #2> echo $$ > /sys/fs/cgroup/misc/a/cgroup.procs #3> RUN_A_COMMAND_WHICH_CREATES_MULTIPLE_THREADS & #4> PID=$! #5> echo $PID > /sys/fs/cgroup/misc/a/b/tasks #6> echo $PID > /sys/fs/cgroup/misc/a/cgroup.procs the process including the group leader back into a. In this final migration, non-leader threads would be doing identity migration while the group leader is doing an actual one. After #3, let's say the whole process was in cset A, and that after #4, the leader moves to cset B. Then, during #6, the following happens: 1. cgroup_migrate_add_src() is called on B for the leader. 2. cgroup_migrate_add_src() is called on A for the other threads. 3. cgroup_migrate_prepare_dst() is called. It scans the src list. 3. It notices that B wants to migrate to A, so it tries to A to the dst list but realizes that its ->mg_preload_node is already busy. 4. and then it notices A wants to migrate to A as it's an identity migration, it culls it by list_del_init()'ing its ->mg_preload_node and putting references accordingly. 5. The rest of migration takes place with B on the src list but nothing on the dst list. This means that A isn't held while migration is in progress. If all tasks leave A before the migration finishes and the incoming task pins it, the cset will be destroyed leading to use-after-free. This is caused by overloading cset->mg_preload_node for both src and dst preload lists. We wanted to exclude the cset from the src list but ended up inadvertently excluding it from the dst list too. This patch fixes the issue by separating out cset->mg_preload_node into ->mg_src_preload_node and ->mg_dst_preload_node, so that the src and dst preloadings don't interfere with each other. Bug: 236582926 Change-Id: Ieaf1c0c8fc23753570897fd6e48a54335ab939ce Signed-off-by: Tejun Heo Reported-by: Mukesh Ojha Reported-by: shisiyuan Link: http://lkml.kernel.org/r/1654187688-27411-1-git-send-email-shisiyuan@xiaomi.com Link: https://lore.kernel.org/lkml/Yh+RGIJ0f3nrqIiN@slm.duckdns.org/#t Fixes: f817de98513d ("cgroup: prepare migration path for unified hierarchy") Cc: stable@vger.kernel.org # v3.16+ (cherry picked from commit 07fd5b6cdf3cc30bfde8fe0f644771688be04447 https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git for-5.19-fixes) Signed-off-by: Elliot Berman Signed-off-by: Mukesh Ojha [mojha: Move the two new list heads into a wrapper ext_css_set struct to ensure ABI doesn't break and also defined a macro init_css_set which will be replaced with init_ext_css_set.cset to avoid too much code changes] Git-commit: e8fce594347a77af0481c18b6b56509b954fa771 Git-repo: https://android.googlesource.com/kernel/common/ Signed-off-by: Srinivasarao Pathipati --- include/linux/cgroup-defs.h | 7 +++ include/linux/cgroup.h | 3 +- kernel/cgroup/cgroup.c | 103 +++++++++++++++++++++--------------- 3 files changed, 70 insertions(+), 43 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8bef92bc3e8e..b46705a495aa 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -277,6 +277,13 @@ struct css_set { struct rcu_head rcu_head; }; +struct ext_css_set { + struct css_set cset; + + struct list_head mg_src_preload_node; + struct list_head mg_dst_preload_node; +}; + struct cgroup_base_stat { struct task_cputime cputime; }; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8a0e1bd77a4f..ff39d08cd175 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -70,7 +70,8 @@ struct css_task_iter { }; extern struct cgroup_root cgrp_dfl_root; -extern struct css_set init_css_set; +extern struct ext_css_set init_ext_css_set; +#define init_css_set init_ext_css_set.cset #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a6df9e6f4b52..a3f9fab44993 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -751,25 +751,28 @@ EXPORT_SYMBOL_GPL(of_css); * reference-counted, to improve performance when child cgroups * haven't been created. */ -struct css_set init_css_set = { - .refcount = REFCOUNT_INIT(1), - .dom_cset = &init_css_set, - .tasks = LIST_HEAD_INIT(init_css_set.tasks), - .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), - .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks), - .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), - .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), - .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), - .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), - .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), - - /* - * The following field is re-initialized when this cset gets linked - * in cgroup_init(). However, let's initialize the field - * statically too so that the default cgroup can be accessed safely - * early during boot. - */ - .dfl_cgrp = &cgrp_dfl_root.cgrp, +struct ext_css_set init_ext_css_set = { + .cset = { + .refcount = REFCOUNT_INIT(1), + .dom_cset = &init_css_set, + .tasks = LIST_HEAD_INIT(init_css_set.tasks), + .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), + .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks), + .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), + .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), + .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), + .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), + .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), + /* + * The following field is re-initialized when this cset gets linked + * in cgroup_init(). However, let's initialize the field + * statically too so that the default cgroup can be accessed safely + * early during boot. + */ + .dfl_cgrp = &cgrp_dfl_root.cgrp, + }, + .mg_src_preload_node = LIST_HEAD_INIT(init_ext_css_set.mg_src_preload_node), + .mg_dst_preload_node = LIST_HEAD_INIT(init_ext_css_set.mg_dst_preload_node), }; static int css_set_count = 1; /* 1 for init_css_set */ @@ -1197,6 +1200,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, struct cgroup *cgrp) { struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { }; + struct ext_css_set *ext_cset; struct css_set *cset; struct list_head tmp_links; struct cgrp_cset_link *link; @@ -1217,9 +1221,10 @@ static struct css_set *find_css_set(struct css_set *old_cset, if (cset) return cset; - cset = kzalloc(sizeof(*cset), GFP_KERNEL); - if (!cset) + ext_cset = kzalloc(sizeof(*ext_cset), GFP_KERNEL); + if (!ext_cset) return NULL; + cset = &ext_cset->cset; /* Allocate all the cgrp_cset_link objects that we'll need */ if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) { @@ -1237,6 +1242,8 @@ static struct css_set *find_css_set(struct css_set *old_cset, INIT_HLIST_NODE(&cset->hlist); INIT_LIST_HEAD(&cset->cgrp_links); INIT_LIST_HEAD(&cset->mg_preload_node); + INIT_LIST_HEAD(&ext_cset->mg_src_preload_node); + INIT_LIST_HEAD(&ext_cset->mg_dst_preload_node); INIT_LIST_HEAD(&cset->mg_node); /* Copy the set of subsystem state objects generated in @@ -2687,22 +2694,28 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) */ void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) { - LIST_HEAD(preloaded); - struct css_set *cset, *tmp_cset; + struct ext_css_set *cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded); - list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded); + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets, + mg_src_preload_node) { + cset->cset.mg_src_cgrp = NULL; + cset->cset.mg_dst_cgrp = NULL; + cset->cset.mg_dst_cset = NULL; + list_del_init(&cset->mg_src_preload_node); + put_css_set_locked(&cset->cset); + } - list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) { - cset->mg_src_cgrp = NULL; - cset->mg_dst_cgrp = NULL; - cset->mg_dst_cset = NULL; - list_del_init(&cset->mg_preload_node); - put_css_set_locked(cset); + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets, + mg_dst_preload_node) { + cset->cset.mg_src_cgrp = NULL; + cset->cset.mg_dst_cgrp = NULL; + cset->cset.mg_dst_cset = NULL; + list_del_init(&cset->mg_dst_preload_node); + put_css_set_locked(&cset->cset); } spin_unlock_irq(&css_set_lock); @@ -2729,6 +2742,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup_mgctx *mgctx) { struct cgroup *src_cgrp; + struct ext_css_set *ext_src_cset; lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&css_set_lock); @@ -2742,8 +2756,9 @@ void cgroup_migrate_add_src(struct css_set *src_cset, return; src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); + ext_src_cset = container_of(src_cset, struct ext_css_set, cset); - if (!list_empty(&src_cset->mg_preload_node)) + if (!list_empty(&ext_src_cset->mg_src_preload_node)) return; WARN_ON(src_cset->mg_src_cgrp); @@ -2754,7 +2769,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, src_cset->mg_src_cgrp = src_cgrp; src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); - list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets); + list_add_tail(&ext_src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets); } /** @@ -2773,20 +2788,23 @@ void cgroup_migrate_add_src(struct css_set *src_cset, */ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) { - struct css_set *src_cset, *tmp_cset; + struct ext_css_set *ext_src_set, *tmp_cset; lockdep_assert_held(&cgroup_mutex); /* look up the dst cset for each src cset and link it to src */ - list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, - mg_preload_node) { + list_for_each_entry_safe(ext_src_set, tmp_cset, &mgctx->preloaded_src_csets, + mg_src_preload_node) { + struct css_set *src_cset = &ext_src_set->cset; struct css_set *dst_cset; + struct ext_css_set *ext_dst_cset; struct cgroup_subsys *ss; int ssid; dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp); if (!dst_cset) return -ENOMEM; + ext_dst_cset = container_of(dst_cset, struct ext_css_set, cset); WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset); @@ -2798,7 +2816,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; src_cset->mg_dst_cgrp = NULL; - list_del_init(&src_cset->mg_preload_node); + list_del_init(&ext_src_set->mg_src_preload_node); put_css_set(src_cset); put_css_set(dst_cset); continue; @@ -2806,8 +2824,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) src_cset->mg_dst_cset = dst_cset; - if (list_empty(&dst_cset->mg_preload_node)) - list_add_tail(&dst_cset->mg_preload_node, + if (list_empty(&ext_dst_cset->mg_dst_preload_node)) + list_add_tail(&ext_dst_cset->mg_dst_preload_node, &mgctx->preloaded_dst_csets); else put_css_set(dst_cset); @@ -3026,8 +3044,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) DEFINE_CGROUP_MGCTX(mgctx); struct cgroup_subsys_state *d_css; struct cgroup *dsct; - struct css_set *src_cset; bool has_tasks; + struct ext_css_set *ext_src_set; int ret; lockdep_assert_held(&cgroup_mutex); @@ -3057,11 +3075,12 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) goto out_finish; spin_lock_irq(&css_set_lock); - list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) { + list_for_each_entry(ext_src_set, &mgctx.preloaded_src_csets, + mg_src_preload_node) { struct task_struct *task, *ntask; /* all tasks in src_csets need to be migrated */ - list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list) + list_for_each_entry_safe(task, ntask, &ext_src_set->cset.tasks, cg_list) cgroup_migrate_add_task(task, &mgctx); } spin_unlock_irq(&css_set_lock); From 290d702383e50bf509c89a7aeff60f2525c6cc86 Mon Sep 17 00:00:00 2001 From: Patrick Daly Date: Mon, 10 Oct 2022 19:25:27 -0700 Subject: [PATCH 17/26] ANDROID: mm/filemap: Fix missing put_page() for speculative page fault find_get_page() returns a page with increased refcount, assuming a page exists at the given index. Ensure this refcount is dropped on error. Bug: 271079833 Fixes: 59d4d125 ("BACKPORT: FROMLIST: mm: implement speculative handling in filemap_fault()") Change-Id: Idc7b9e3f11f32a02bed4c6f4e11cec9200a5c790 Signed-off-by: Patrick Daly (cherry picked from commit 6232eecfa7ca0d8d0ca088da6d0edb2c3a879ff9) Signed-off-by: Zhenhua Huang Git-commit: 1d05213028b6dbdb8801e20f29b6a6f91c216033 Git-repo: https://android.googlesource.com/kernel/common/ Signed-off-by: Srinivasarao Pathipati --- mm/filemap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 2afaf23c13c0..a7a79261d008 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2524,11 +2524,14 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) if (vmf->flags & FAULT_FLAG_SPECULATIVE) { page = find_get_page(mapping, offset); - if (unlikely(!page) || unlikely(PageReadahead(page))) + if (unlikely(!page)) return VM_FAULT_RETRY; + if (unlikely(PageReadahead(page))) + goto page_put; + if (!trylock_page(page)) - return VM_FAULT_RETRY; + goto page_put; if (unlikely(compound_head(page)->mapping != mapping)) goto page_unlock; @@ -2560,6 +2563,8 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) return VM_FAULT_LOCKED; page_unlock: unlock_page(page); +page_put: + put_page(page); return VM_FAULT_RETRY; } From 529c059a8bcb5fd4b1b4fb89cf2e59587df0c669 Mon Sep 17 00:00:00 2001 From: Ram Nagesh Date: Mon, 13 Mar 2023 16:00:34 +0530 Subject: [PATCH 18/26] msm: synx: Check for zero before reducing bind handles Suppose user has sent invalid external fence to bind API. Now, while binding, if synx signal comes in parallel, it will set number of bound synxs as 0 after signal. Further reduction on that number(num_bound_synxs) (in case of callback registration failure) would make it wrap around. So, now num_bound_synxs is large value and abrupt close on synx fd will lead to synx_util_object_destroy. Here, the for loop on num_bound_synxs would lead to invalid memory access. This change decrements num_bound_synxs only if not zero. Change-Id: I0cfffc90d4164b149c87545818ae4dcf57fc4c46 Signed-off-by: Ram Nagesh --- drivers/media/platform/msm/synx/synx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/msm/synx/synx.c b/drivers/media/platform/msm/synx/synx.c index 8043cc1489ed..21c9cce7a958 100644 --- a/drivers/media/platform/msm/synx/synx.c +++ b/drivers/media/platform/msm/synx/synx.c @@ -870,7 +870,8 @@ int synx_bind(struct synx_session session_id, mutex_lock(&synx_obj->obj_lock); memset(&synx_obj->bound_synxs[bound_idx], 0, sizeof(struct synx_external_desc)); - synx_obj->num_bound_synxs--; + if (synx_obj->num_bound_synxs) + synx_obj->num_bound_synxs--; goto free; } From e80691d8244d11c80a40f46bde0f16d2eb43f22c Mon Sep 17 00:00:00 2001 From: Raihan Haider Date: Wed, 1 Mar 2023 14:23:35 +0530 Subject: [PATCH 19/26] msm: Add config option for Realtek R8168 driver Add kernel config option for Realtek R8168 driver Change-Id: I81a7b1f3585d69d550109d00d09944242c921728 Signed-off-by: Raihan Haider --- drivers/platform/msm/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/platform/msm/Kconfig b/drivers/platform/msm/Kconfig index ecd74be324fe..98d2ae6d8f16 100644 --- a/drivers/platform/msm/Kconfig +++ b/drivers/platform/msm/Kconfig @@ -168,6 +168,16 @@ config R8125 To compile this driver as a module, choose M here: the module will be called r8125. +config R8168 + tristate "Realtek R8168 driver" + depends on PCI + help + This is a 1Gbps ethernet driver for the PCI network cards based on + the Realtek RTL8111K chip. If you have one of those, say Y here. + + To compile this driver as a module, choose M here: the module + will be called r8168. + config R8125_IOSS tristate "Realtek R8125 IOSS glue driver" depends on R8125 From 930876955fbb997bf840b784c441078964b84d47 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 27 Jan 2023 09:01:10 +0530 Subject: [PATCH 20/26] clk: qcom: gcc: Add support for EDP Ref clock Yupik EDP reference clock for Yupik is required by EDP consumer. Change-Id: I981bbaa789cdce86a140d17b81d46d590cc7d980 Signed-off-by: Taniya Das --- include/dt-bindings/clock/qcom,gcc-yupik.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/dt-bindings/clock/qcom,gcc-yupik.h b/include/dt-bindings/clock/qcom,gcc-yupik.h index 5d1b744abbbf..190d9b7d1169 100644 --- a/include/dt-bindings/clock/qcom,gcc-yupik.h +++ b/include/dt-bindings/clock/qcom,gcc-yupik.h @@ -177,6 +177,7 @@ #define GCC_AGGRE_NOC_PCIE_CENTER_SF_AXI_CLK 167 #define GCC_AGGRE_NOC_PCIE_TBU_CLK 168 #define GCC_PCIE_CLKREF_EN 169 +#define GCC_EDP_CLKREF_EN 170 /* GCC power domains */ #define GCC_PCIE_0_GDSC 0 From f73dcc175fd168341d9cfb0ddc929e03a4a66857 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 27 Jan 2023 09:03:25 +0530 Subject: [PATCH 21/26] clk: qcom: gcc: Add support for edp ref clock for Yupik EDP clock is required for edp consumer. Change-Id: I06a537f06dd95af67db2679f5d16620eef624a60 Signed-off-by: Taniya Das --- drivers/clk/qcom/gcc-yupik.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/clk/qcom/gcc-yupik.c b/drivers/clk/qcom/gcc-yupik.c index 696ec3760a24..622c52df92ad 100644 --- a/drivers/clk/qcom/gcc-yupik.c +++ b/drivers/clk/qcom/gcc-yupik.c @@ -2173,6 +2173,19 @@ static struct clk_branch gcc_pcie_clkref_en = { }, }; +static struct clk_branch gcc_edp_clkref_en = { + .halt_reg = 0x8c008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x8c008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_edp_clkref_en", + .ops = &clk_branch2_ops, + }, + }, +}; + static struct clk_branch gcc_pcie_throttle_core_clk = { .halt_reg = 0x90018, .halt_check = BRANCH_HALT_SKIP, @@ -3510,6 +3523,7 @@ static struct clk_regmap *gcc_yupik_clocks[] = { [GCC_VIDEO_AXI0_CLK] = &gcc_video_axi0_clk.clkr, [GCC_VIDEO_MVP_THROTTLE_CORE_CLK] = &gcc_video_mvp_throttle_core_clk.clkr, + [GCC_EDP_CLKREF_EN] = &gcc_edp_clkref_en.clkr, }; static const struct qcom_reset_map gcc_yupik_resets[] = { From 6380631f287ef87a12c1d494e24200dce6a02954 Mon Sep 17 00:00:00 2001 From: rakegand Date: Tue, 7 Mar 2023 11:53:20 +0530 Subject: [PATCH 22/26] soc: spcom: Addressing KASAN issue slab-out-of-bounds This change blocks access to channel name string, in case channel name string length is more than permissible limits. Change-Id: I2fe0b32498bc74011b1d42bb3c056c7e174494ca Signed-off-by: rakegand --- drivers/soc/qcom/spcom.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/spcom.c b/drivers/soc/qcom/spcom.c index a966ba98c30a..da3d5d352cc1 100644 --- a/drivers/soc/qcom/spcom.c +++ b/drivers/soc/qcom/spcom.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ /* @@ -631,8 +632,12 @@ static int spcom_handle_create_channel_command(void *cmd_buf, int cmd_size) mutex_lock(&spcom_dev->chdev_count_lock); ret = spcom_create_channel_chardev(cmd->ch_name, cmd->is_sharable); mutex_unlock(&spcom_dev->chdev_count_lock); - if (ret) - spcom_pr_err("failed to create ch[%s], ret [%d]\n", cmd->ch_name, ret); + if (ret) { + if (-EINVAL == ret) + spcom_pr_err("failed to create channel, ret [%d]\n", ret); + else + spcom_pr_err("failed to create ch[%s], ret [%d]\n", cmd->ch_name, ret); + } return ret; } From f3dae06c15296e35890e10f8f355b31ac740b30a Mon Sep 17 00:00:00 2001 From: AKASH KUMAR Date: Mon, 13 Mar 2023 13:43:08 +0530 Subject: [PATCH 23/26] dwc3: Add check for sg queued trbs while reclaiming If we're in the middle of series of chained TRBs, DWC3 will avoid clearing HWO and SW has to do it manually. We are doing it while reclaiming trbs for sg transfers. Add check for sg queued trb and reclaim it as DWC3 skips clearing HWO bit during sg transfers. Change-Id: I200254728c0549da6534aea51daad94be6b6295e Signed-off-by: AKASH KUMAR --- drivers/usb/dwc3/gadget.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f25bf76dafe3..a8caaab0eb52 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3277,7 +3277,13 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, * processed by the core. Hence do not reclaim it until * it is processed by the core. */ - if (req->trb->ctrl & DWC3_TRB_CTRL_HWO) { + /* + * If sg transfer are in progress, avoid checking + * HWO bit here as these will get cleared during + * ep reclaim. + */ + if ((req->trb->ctrl & DWC3_TRB_CTRL_HWO) + && (req->num_queued_sgs == 0)) { dbg_event(0xFF, "PEND TRB", dep->number); return 1; } From 1f7f93764874f77efd2c81863dfdbf7ee45c6ef6 Mon Sep 17 00:00:00 2001 From: Krishna Nagaraja Date: Mon, 21 Nov 2022 14:21:04 +0530 Subject: [PATCH 24/26] msm: ipa3: add ioctl interface for dual backhaul Add the ioctl interface to indicate Dual backhaul info, using QMI message. Change-Id: I5fa944c61e1745fe71c7ddc8bf48e0001c19e520 Signed-off-by: Krishna Nagaraja --- include/uapi/linux/rmnet_ipa_fd_ioctl.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/uapi/linux/rmnet_ipa_fd_ioctl.h b/include/uapi/linux/rmnet_ipa_fd_ioctl.h index a8e40ef427b1..3ae29b4740e6 100644 --- a/include/uapi/linux/rmnet_ipa_fd_ioctl.h +++ b/include/uapi/linux/rmnet_ipa_fd_ioctl.h @@ -37,6 +37,7 @@ #define WAN_IOCTL_RMV_OFFLOAD_CONNECTION 19 #define WAN_IOCTL_GET_WAN_MTU 20 #define WAN_IOCTL_NOTIFY_NAT_MOVE_RES 21 +#define WAN_IOCTL_NOTIFY_DUAL_BACKHAUL_INFO 22 /* User space may not have this defined. */ #ifndef IFNAMSIZ @@ -195,6 +196,10 @@ struct wan_ioctl_query_per_client_stats { WAN_IOCTL_ADD_FLT_RULE, \ struct ipa_install_fltr_rule_req_msg_v01 *) +#define WAN_IOC_NOTIFY_DUAL_BACKHAUL_INFO _IOWR(WAN_IOC_MAGIC, \ + WAN_IOCTL_NOTIFY_DUAL_BACKHAUL_INFO, \ + struct ipa_eth_backhaul_info_req_msg_v01 *) + #define WAN_IOC_ADD_FLT_RULE_INDEX _IOWR(WAN_IOC_MAGIC, \ WAN_IOCTL_ADD_FLT_INDEX, \ struct ipa_fltr_installed_notif_req_msg_v01 *) From 046f27cfe248fe48c31fc10a595c5da6df3f2c45 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 3 Feb 2023 15:05:04 +0530 Subject: [PATCH 25/26] msm: kgsl: Check user generated timestamp before queuing drawobjs In ioctls like kgsl_ioctl_submit_commands(), if both syncobj type and cmd/marker/sparseobj type are submitted, the syncobj is queued first followed by the other obj type. After syncobj is successfully queued, in case of failure in get_timestamp while queuing the other obj, both the command objs are destroyed. As sync obj is already queued, accessing this later would cause a crash. Compare the user generated timestamp with the drawctxt timestamp and return early in case of error. This avoids unnecessary queuing of drawobjs. Change-Id: Iedebd480bc18cd74d2f69d24a9dc1032fab01cdb Signed-off-by: Kamal Agrawal --- drivers/gpu/msm/adreno_hwsched.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/msm/adreno_hwsched.c b/drivers/gpu/msm/adreno_hwsched.c index 6eb466919833..3e8fc6e9ca80 100644 --- a/drivers/gpu/msm/adreno_hwsched.c +++ b/drivers/gpu/msm/adreno_hwsched.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -902,6 +903,23 @@ int adreno_hwsched_queue_cmds(struct kgsl_device_private *dev_priv, user_ts = *timestamp; + /* + * If there is only one drawobj in the array and it is of + * type SYNCOBJ_TYPE, skip comparing user_ts as it can be 0 + */ + if (!(count == 1 && drawobj[0]->type == SYNCOBJ_TYPE) && + (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS)) { + /* + * User specified timestamps need to be greater than the last + * issued timestamp in the context + */ + if (timestamp_cmp(drawctxt->timestamp, user_ts) >= 0) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return -ERANGE; + } + } + for (i = 0; i < count; i++) { switch (drawobj[i]->type) { From 128731bb62e9076b9d2905d8f801fa87546ce328 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Tue, 28 Mar 2023 20:01:44 +0530 Subject: [PATCH 26/26] msm: kgsl: Keep postamble packets in a privileged buffer Postamble packets are executed in privileged mode by gpu. So we should keep them in a privileged scratch buffer to block userspace access. For targets with APRIV feature support, we can mark the preemption scratch buffer as privileged too to avoid similar issues in future. Change-Id: Ifda360dda251083f38dfde80ce1b5dc83daae902 Signed-off-by: Akhil P Oommen Signed-off-by: Kaushal Sanadhya --- drivers/gpu/msm/adreno.h | 5 +---- drivers/gpu/msm/adreno_a6xx_preempt.c | 17 +++++++++-------- drivers/gpu/msm/kgsl.h | 6 ++++++ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index ac685b24144b..ec29d1d40c36 100644 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_H #define __ADRENO_H @@ -16,9 +16,6 @@ #include "adreno_ringbuffer.h" #include "kgsl_sharedmem.h" -/* Index to preemption scratch buffer to store KMD postamble */ -#define KMD_POSTAMBLE_IDX 100 - /* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */ #define ADRENO_DEVICE(device) \ container_of(device, struct adreno_device, dev) diff --git a/drivers/gpu/msm/adreno_a6xx_preempt.c b/drivers/gpu/msm/adreno_a6xx_preempt.c index f0c5cf5a4869..cc5b11d30c4b 100644 --- a/drivers/gpu/msm/adreno_a6xx_preempt.c +++ b/drivers/gpu/msm/adreno_a6xx_preempt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -553,8 +553,7 @@ unsigned int a6xx_preemption_pre_ibsubmit( /* Add a KMD post amble to clear the perf counters during preemption */ if (!adreno_dev->perfcounter) { - u64 kmd_postamble_addr = - PREEMPT_SCRATCH_ADDR(adreno_dev, KMD_POSTAMBLE_IDX); + u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev)); *cmds++ = cp_type7_packet(CP_SET_AMBLE, 3); *cmds++ = lower_32_bits(kmd_postamble_addr); @@ -695,6 +694,7 @@ static int a6xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev, int a6xx_preemption_init(struct adreno_device *adreno_dev) { + u32 flags = ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? KGSL_MEMDESC_PRIVILEGED : 0; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device); struct adreno_preemption *preempt = &adreno_dev->preempt; @@ -717,7 +717,7 @@ int a6xx_preemption_init(struct adreno_device *adreno_dev) if (IS_ERR_OR_NULL(preempt->scratch)) { preempt->scratch = kgsl_allocate_global(device, PAGE_SIZE, - 0, 0, 0, "preempt_scratch"); + 0, 0, flags, "preempt_scratch"); if (IS_ERR(preempt->scratch)) return PTR_ERR(preempt->scratch); } @@ -733,12 +733,13 @@ int a6xx_preemption_init(struct adreno_device *adreno_dev) return ret; /* - * First 8 dwords of the preemption scratch buffer is used to store the address for CP - * to save/restore VPC data. Reserve 11 dwords in the preemption scratch buffer from - * index KMD_POSTAMBLE_IDX for KMD postamble pm4 packets + * First 28 dwords of the device scratch buffer are used to store shadow rb data. + * Reserve 11 dwords in the device scratch buffer from SCRATCH_POSTAMBLE_OFFSET for + * KMD postamble pm4 packets. This should be in *device->scratch* so that userspace + * cannot access it. */ if (!adreno_dev->perfcounter) { - u32 *postamble = preempt->scratch->hostptr + (KMD_POSTAMBLE_IDX * sizeof(u64)); + u32 *postamble = device->scratch->hostptr + SCRATCH_POSTAMBLE_OFFSET; u32 count = 0; postamble[count++] = cp_type7_packet(CP_REG_RMW, 3); diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 69fdf288fa68..0f0721522574 100644 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_H #define __KGSL_H @@ -71,6 +72,11 @@ #define SCRATCH_RPTR_GPU_ADDR(dev, id) \ ((dev)->scratch->gpuaddr + SCRATCH_RPTR_OFFSET(id)) +/* OFFSET to KMD postamble packets in scratch buffer */ +#define SCRATCH_POSTAMBLE_OFFSET (100 * sizeof(u64)) +#define SCRATCH_POSTAMBLE_ADDR(dev) \ + ((dev)->scratch->gpuaddr + SCRATCH_POSTAMBLE_OFFSET) + /* Timestamp window used to detect rollovers (half of integer range) */ #define KGSL_TIMESTAMP_WINDOW 0x80000000