From 39666daae5c52137ea5fc9374853a449d64aa6a1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 9 Nov 2024 14:29:34 +0000 Subject: [PATCH] Revert "inet: inet_defrag: prevent sk release while still in use" This reverts commit 1b6de5e6575b56502665c65cf93b0ae6aa0f51ab which is commit 18685451fc4e546fc0e718580d32df3c0e5c8272 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: Ifbb679aeb516ec0b393a1996c0e3f51db2ae2ab5 Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 5 +- net/core/sock_destructor.h | 12 ----- net/ipv4/inet_fragment.c | 70 +++++-------------------- net/ipv4/ip_fragment.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 5 files changed, 19 insertions(+), 72 deletions(-) delete mode 100644 net/core/sock_destructor.h diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7c4a3942cede..f6a06cc2ba90 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -710,7 +710,10 @@ struct sk_buff { struct list_head list; }; - struct sock *sk; + union { + struct sock *sk; + int ip_defrag_offset; + }; union { ktime_t tstamp; diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h deleted file mode 100644 index 2f396e6bfba5..000000000000 --- a/net/core/sock_destructor.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _NET_CORE_SOCK_DESTRUCTOR_H -#define _NET_CORE_SOCK_DESTRUCTOR_H -#include - -static inline bool is_skb_wmem(const struct sk_buff *skb) -{ - return skb->destructor == sock_wfree || - skb->destructor == __sock_wfree || - (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree); -} -#endif diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 12ef3cb26676..e0e8a65d561e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,8 +24,6 @@ #include #include -#include "../core/sock_destructor.h" - /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -41,7 +39,6 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; - int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -362,12 +359,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { + else if (last->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) + if (offset < last->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) + if (offset == last->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -384,13 +381,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = FRAG_CB(curr)->ip_defrag_offset + + curr_run_end = curr->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= FRAG_CB(curr)->ip_defrag_offset) + if (end <= curr->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= FRAG_CB(curr)->ip_defrag_offset && + else if (offset >= curr->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -404,7 +401,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - FRAG_CB(skb)->ip_defrag_offset = offset; + skb->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -414,28 +411,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - void (*destructor)(struct sk_buff *); - unsigned int orig_truesize = 0; - struct sk_buff **nextp = NULL; - struct sock *sk = skb->sk; + struct sk_buff **nextp; int delta; - if (sk && is_skb_wmem(skb)) { - /* TX: skb->sk might have been passed as argument to - * dst->output and must remain valid until tx completes. - * - * Move sk to reassembled skb and fix up wmem accounting. - */ - orig_truesize = skb->truesize; - destructor = skb->destructor; - } - if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) { - head = skb; - goto out_restore_sk; - } + if (!fp) + return NULL; FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -444,12 +426,6 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; - - if (orig_truesize) { - /* prevent skb_morph from releasing sk */ - skb->sk = NULL; - skb->destructor = NULL; - } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -457,13 +433,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); + WARN_ON(head->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - goto out_restore_sk; + return NULL; delta += head->truesize; if (delta) @@ -479,7 +455,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - goto out_restore_sk; + return NULL; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -496,21 +472,6 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } -out_restore_sk: - if (orig_truesize) { - int ts_delta = head->truesize - orig_truesize; - - /* if this reassembled skb is fragmented later, - * fraglist skbs will get skb->sk assigned from head->sk, - * and each frag skb will be released via sock_wfree. - * - * Update sk_wmem_alloc. - */ - head->sk = sk; - head->destructor = destructor; - refcount_add(ts_delta, &sk->sk_wmem_alloc); - } - return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -518,8 +479,6 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { - struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; - const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = (struct sk_buff **)reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -582,9 +541,6 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, skb_mark_not_on_list(head); head->prev = NULL; head->tstamp = q->stamp; - - if (sk) - refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index ec2264adf2a6..fad803d2d711 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -377,7 +377,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } skb_dst_drop(skb); - skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -480,6 +479,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); + skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index cab68c63ea65..fed9666a2f7d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -296,7 +296,6 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } skb_dst_drop(skb); - skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -462,6 +461,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); + skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) {