From 8b6880fcb83220d1e466385b5b572eb7ae6be546 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 May 2024 11:43:53 +0000 Subject: [PATCH 1/4] BACKPORT: net: fix __dst_negative_advice() race __dst_negative_advice() does not enforce proper RCU rules when sk->dst_cache must be cleared, leading to possible UAF. RCU rules are that we must first clear sk->sk_dst_cache, then call dst_release(old_dst). Note that sk_dst_reset(sk) is implementing this protocol correctly, while __dst_negative_advice() uses the wrong order. Given that ip6_negative_advice() has special logic against RTF_CACHE, this means each of the three ->negative_advice() existing methods must perform the sk_dst_reset() themselves. Note the check against NULL dst is centralized in __dst_negative_advice(), there is no need to duplicate it in various callbacks. Many thanks to Clement Lecigne for tracking this issue. This old bug became visible after the blamed commit, using UDP sockets. Bug: 343727534 Fixes: a87cb3e48ee8 ("net: Facility to report route quality of connected sockets") Reported-by: Clement Lecigne Diagnosed-by: Clement Lecigne Signed-off-by: Eric Dumazet Cc: Tom Herbert Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240528114353.1794151-1-edumazet@google.com Signed-off-by: Jakub Kicinski (cherry picked from commit 92f1655aa2b2294d0b49925f3b875a634bd3b59e) [Lee: Trivial/unrelated conflict - no change to the patch] Signed-off-by: Lee Jones Change-Id: I293734dca1b81fcb712e1de294f51e96a405f7e4 Signed-off-by: Greg Kroah-Hartman --- include/net/dst_ops.h | 2 +- include/net/sock.h | 13 +++---------- net/ipv4/route.c | 22 ++++++++-------------- net/ipv6/route.c | 29 +++++++++++++++-------------- net/xfrm/xfrm_policy.c | 11 +++-------- 5 files changed, 30 insertions(+), 47 deletions(-) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 88ff7bb2bb9b..dd7c0b37da38 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -24,7 +24,7 @@ struct dst_ops { void (*destroy)(struct dst_entry *); void (*ifdown)(struct dst_entry *, struct net_device *dev, int how); - struct dst_entry * (*negative_advice)(struct dst_entry *); + void (*negative_advice)(struct sock *sk, struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, diff --git a/include/net/sock.h b/include/net/sock.h index e44d45781eef..b101012a348f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1955,19 +1955,12 @@ sk_dst_get(struct sock *sk) static inline void dst_negative_advice(struct sock *sk) { - struct dst_entry *ndst, *dst = __sk_dst_get(sk); + struct dst_entry *dst = __sk_dst_get(sk); sk_rethink_txhash(sk); - if (dst && dst->ops->negative_advice) { - ndst = dst->ops->negative_advice(dst); - - if (ndst != dst) { - rcu_assign_pointer(sk->sk_dst_cache, ndst); - sk_tx_queue_clear(sk); - WRITE_ONCE(sk->sk_dst_pending_confirm, 0); - } - } + if (dst && dst->ops->negative_advice) + dst->ops->negative_advice(sk, dst); } static inline void diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 902296ef3e5a..63453c254dcb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -137,7 +137,8 @@ static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); static unsigned int ipv4_mtu(const struct dst_entry *dst); -static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); +static void ipv4_negative_advice(struct sock *sk, + struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, @@ -856,22 +857,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf __ip_do_redirect(rt, skb, &fl4, true); } -static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) +static void ipv4_negative_advice(struct sock *sk, + struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; - struct dst_entry *ret = dst; - if (rt) { - if (dst->obsolete > 0) { - ip_rt_put(rt); - ret = NULL; - } else if ((rt->rt_flags & RTCF_REDIRECTED) || - rt->dst.expires) { - ip_rt_put(rt); - ret = NULL; - } - } - return ret; + if ((dst->obsolete > 0) || + (rt->rt_flags & RTCF_REDIRECTED) || + rt->dst.expires) + sk_dst_reset(sk); } /* diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4c51cabc866c..23cfce690345 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -84,7 +84,8 @@ enum rt6_nud_state { static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); static unsigned int ip6_mtu(const struct dst_entry *dst); -static struct dst_entry *ip6_negative_advice(struct dst_entry *); +static void ip6_negative_advice(struct sock *sk, + struct dst_entry *dst); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); @@ -2658,24 +2659,24 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) return dst_ret; } -static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) +static void ip6_negative_advice(struct sock *sk, + struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; - if (rt) { - if (rt->rt6i_flags & RTF_CACHE) { - rcu_read_lock(); - if (rt6_check_expired(rt)) { - rt6_remove_exception_rt(rt); - dst = NULL; - } - rcu_read_unlock(); - } else { - dst_release(dst); - dst = NULL; + if (rt->rt6i_flags & RTF_CACHE) { + rcu_read_lock(); + if (rt6_check_expired(rt)) { + /* counteract the dst_release() in sk_dst_reset() */ + dst_hold(dst); + sk_dst_reset(sk); + + rt6_remove_exception_rt(rt); } + rcu_read_unlock(); + return; } - return dst; + sk_dst_reset(sk); } static void ip6_link_failure(struct sk_buff *skb) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8632776ae4da..cbf9387b1650 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3778,15 +3778,10 @@ static void xfrm_link_failure(struct sk_buff *skb) /* Impossible. Such dst must be popped before reaches point of failure. */ } -static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) +static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst) { - if (dst) { - if (dst->obsolete) { - dst_release(dst); - dst = NULL; - } - } - return dst; + if (dst->obsolete) + sk_dst_reset(sk); } static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr) From ff29a6cf6e84f4ccbab2d0853cf20e6c2821ef38 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 1 Jun 2024 09:33:51 +0000 Subject: [PATCH 2/4] ANDROID: ABI fixup for abi break in struct dst_ops In commit 92f1655aa2b2 ("net: fix __dst_negative_advice() race") the struct dst_ops callback negative_advice is callback changes function parameters. But as this pointer is part of a structure that is tracked in the ABI checker, the tool triggers when this is changed. However, the callback pointer is internal to the networking stack, so changing the function type is safe, so needing to preserve this is not required. To do so, switch the function pointer type back to the old one so that the checking tools pass, AND then do a hard cast of the function pointer to the new type when assigning and calling the function. Bug: 343727534 Fixes: 92f1655aa2b2 ("net: fix __dst_negative_advice() race") Change-Id: I48d4ab4bbd29f8edc8fbd7923828b7f78a23e12e Signed-off-by: Greg Kroah-Hartman --- include/net/dst_ops.h | 12 +++++++++++- include/net/sock.h | 12 ++++++++++-- net/ipv4/route.c | 2 +- net/ipv6/route.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 5 files changed, 24 insertions(+), 6 deletions(-) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index dd7c0b37da38..382af5f36e7f 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -12,6 +12,16 @@ struct sk_buff; struct sock; struct net; +/* *** ANDROID FIXUP *** + * These typedefs are used to help fixup the ABI break caused by commit + * 92f1655aa2b2 ("net: fix __dst_negative_advice() race") where the + * negative_advice callback changed function signatures. + * See b/343727534 for more details. + * *** ANDROID FIXUP *** + */ +typedef void (*android_dst_ops_negative_advice_new_t)(struct sock *sk, struct dst_entry *); +typedef struct dst_entry * (*android_dst_ops_negative_advice_old_t)(struct dst_entry *); + struct dst_ops { unsigned short family; unsigned int gc_thresh; @@ -24,7 +34,7 @@ struct dst_ops { void (*destroy)(struct dst_entry *); void (*ifdown)(struct dst_entry *, struct net_device *dev, int how); - void (*negative_advice)(struct sock *sk, struct dst_entry *); + struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, diff --git a/include/net/sock.h b/include/net/sock.h index b101012a348f..7189de533bd4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1955,12 +1955,20 @@ sk_dst_get(struct sock *sk) static inline void dst_negative_advice(struct sock *sk) { + /* *** ANDROID FIXUP *** + * See b/343727534 for more details why this typedef is needed here. + * *** ANDROID FIXUP *** + */ + android_dst_ops_negative_advice_new_t negative_advice; + struct dst_entry *dst = __sk_dst_get(sk); sk_rethink_txhash(sk); - if (dst && dst->ops->negative_advice) - dst->ops->negative_advice(sk, dst); + if (dst && dst->ops->negative_advice) { + negative_advice = (android_dst_ops_negative_advice_new_t)dst->ops->negative_advice; + negative_advice(sk, dst); + } } static inline void diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 63453c254dcb..0cf4c49b1150 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -165,7 +165,7 @@ static struct dst_ops ipv4_dst_ops = { .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, - .negative_advice = ipv4_negative_advice, + .negative_advice = (android_dst_ops_negative_advice_old_t)ipv4_negative_advice, .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, .redirect = ip_do_redirect, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 23cfce690345..379b62d87bc1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -250,7 +250,7 @@ static struct dst_ops ip6_dst_ops_template = { .cow_metrics = dst_cow_metrics_generic, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, - .negative_advice = ip6_negative_advice, + .negative_advice = (android_dst_ops_negative_advice_old_t)ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .redirect = rt6_do_redirect, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cbf9387b1650..0fe167dca7c2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3949,7 +3949,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam if (likely(dst_ops->mtu == NULL)) dst_ops->mtu = xfrm_mtu; if (likely(dst_ops->negative_advice == NULL)) - dst_ops->negative_advice = xfrm_negative_advice; + dst_ops->negative_advice = (android_dst_ops_negative_advice_old_t)xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) dst_ops->link_failure = xfrm_link_failure; if (likely(dst_ops->neigh_lookup == NULL)) From 73b793dd7d4876d09ef0e420c145fab7782f199c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:53 -0800 Subject: [PATCH 3/4] UPSTREAM: af_unix: Do not use atomic ops for unix_sk(sk)->inflight. [ Upstream commit 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 ] When touching unix_sk(sk)->inflight, we are always under spin_lock(&unix_gc_lock). Let's convert unix_sk(sk)->inflight to the normal unsigned long. Bug: 336226035 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Stable-dep-of: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") Signed-off-by: Sasha Levin (cherry picked from commit 301fdbaa0bba4653570f07789909939f977a7620) Signed-off-by: Lee Jones Change-Id: I0d965d5f2a863d798c06de9f21d0467f256b538e --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 4 ++-- net/unix/garbage.c | 17 ++++++++--------- net/unix/scm.c | 8 +++++--- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 6cb5026cf727..e7612295b262 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -52,7 +52,7 @@ struct unix_sock { struct mutex iolock, bindlock; struct sock *peer; struct list_head link; - atomic_long_t inflight; + unsigned long inflight; spinlock_t lock; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9b1dd845bca1..53335989a6f0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -809,11 +809,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; - u = unix_sk(sk); + u = unix_sk(sk); + u->inflight = 0; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 9121a4d5436d..675fbe594dbb 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_long_dec(&usk->inflight); + usk->inflight--; } static void inc_inflight(struct unix_sock *usk) { - atomic_long_inc(&usk->inflight); + usk->inflight++; } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_long_inc(&u->inflight); + u->inflight++; + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over @@ -237,14 +238,12 @@ void unix_gc(void) */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { long total_refs; - long inflight_refs; total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_long_read(&u->inflight); - BUG_ON(inflight_refs < 1); - BUG_ON(total_refs < inflight_refs); - if (total_refs == inflight_refs) { + BUG_ON(!u->inflight); + BUG_ON(total_refs < u->inflight); + if (total_refs == u->inflight) { list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); @@ -271,7 +270,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_long_read(&u->inflight) > 0) { + if (u->inflight) { list_move_tail(&u->link, ¬_cycle_list); __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); scan_children(&u->sk, inc_inflight_move_tail, NULL); diff --git a/net/unix/scm.c b/net/unix/scm.c index 51b623de3be5..785e8c4669e2 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -51,12 +51,13 @@ void unix_inflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - if (atomic_long_inc_return(&u->inflight) == 1) { + if (!u->inflight) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { BUG_ON(list_empty(&u->link)); } + u->inflight++; /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } @@ -73,10 +74,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - BUG_ON(!atomic_long_read(&u->inflight)); + BUG_ON(!u->inflight); BUG_ON(list_empty(&u->link)); - if (atomic_long_dec_and_test(&u->inflight)) + u->inflight--; + if (!u->inflight) list_del_init(&u->link); /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); From 564901bd7f5ddd5e4464d6d5fe5d4d9158375840 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Tue, 11 Jun 2024 15:28:24 -0700 Subject: [PATCH 4/4] ANDROID: 16K: Only check basename of linker context Depending on the platform binary being executed, the linker (interpreter) requested can be one of: 1) /system/bin/bootstrap/linker64 2) /system/bin/linker64 3) /apex/com.android.runtime/bin/linker64 Relax the check to the basename (linker64), instead of the path. Bug: 330767927 Bug: 335584973 Change-Id: I4a1f95b7cecd126f85ad8cefd9ff10d272947f9e Signed-off-by: Kalesh Singh --- mm/pgsize_migration.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c index 9efadd1412a3..1c36fcc6fa63 100644 --- a/mm/pgsize_migration.c +++ b/mm/pgsize_migration.c @@ -19,6 +19,7 @@ #include #include #include +#include #include typedef void (*show_pad_maps_fn) (struct seq_file *m, struct vm_area_struct *vma); @@ -183,7 +184,15 @@ static inline bool linker_ctx(void) memset(buf, 0, bufsize); path = d_path(&file->f_path, buf, bufsize); - if (!strcmp(path, "/system/bin/linker64")) + /* + * Depending on interpreter requested, valid paths could be any of: + * 1. /system/bin/bootstrap/linker64 + * 2. /system/bin/linker64 + * 3. /apex/com.android.runtime/bin/linker64 + * + * Check the base name (linker64). + */ + if (!strcmp(kbasename(path), "linker64")) return true; }