PlatformLab · axkum10 · Apr 2, 2026
diff --git a/homa_offload.c b/homa_offload.c
@@ -133,11 +133,11 @@ void homa_gro_unhook_tcp(void)
  * homa_tcp_gro_receive() - Invoked instead of TCP's normal gro_receive function
  * when hooking is enabled. Identifies Homa-over-TCP packets and passes them
  * to Homa; sends real TCP packets to TCP's gro_receive function.
- * @held_list:  Pointer to header for list of packets that are being
+ * @gro_list:   Pointer to header for list of packets that are being
  *              held for possible GRO merging.
  * @skb:        The newly arrived packet.
  */
-struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list,
+struct sk_buff *homa_tcp_gro_receive(struct list_head *gro_list,
 				      struct sk_buff *skb)
 {
 	struct homa_common_hdr *h = (struct homa_common_hdr *)
@@ -148,7 +148,7 @@ struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list,
 	//		ntohs(h->urgent), homa_local_id(h->sender_id));
 	if (h->flags != HOMA_TCP_FLAGS ||
 	    ntohs(h->urgent) != HOMA_TCP_URGENT)
-		return tcp_net_offload->callbacks.gro_receive(held_list, skb);
+		return tcp_net_offload->callbacks.gro_receive(gro_list, skb);
 
 	/* Change the packet's IP protocol to Homa so that it will get
 	 * dispatched directly to Homa in the future.
@@ -161,7 +161,7 @@ struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list,
 						 htons(IPPROTO_HOMA));
 		ip_hdr(skb)->protocol = IPPROTO_HOMA;
 	}
-	return homa_gro_receive(held_list, skb);
+	return homa_gro_receive(gro_list, skb);
 }
 
 /**
@@ -267,16 +267,15 @@ struct sk_buff *homa_gso_segment(struct sk_buff *skb,
  * unusual way: it simply aggregates all packets targeted to a particular
  * destination port, so that the entire bundle can get through the networking
  * stack in a single traversal.
- * @held_list:  Pointer to header for list of packets that are being
- *              held for possible GRO merging. Note: this list contains
- *              only packets matching a given hash.
+ * @gro_list:   Pointer to header for list of packets that are being
+ *              held for possible GRO merging.
  * @skb:        The newly arrived packet.
  *
  * Return: If the return value is non-NULL, it refers to an skb in
  * gro_list. The skb will be removed from the list by the caller and
  * passed up the stack immediately.
  */
-struct sk_buff *homa_gro_receive(struct list_head *held_list,
+struct sk_buff *homa_gro_receive(struct list_head *gro_list,
 				 struct sk_buff *skb)
 {
 	/* This function will do one of the following things:
@@ -296,10 +295,10 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 	struct homa_data_hdr *h_new;
 	u64 *softirq_cycles_metric;
 	struct sk_buff *held_skb;
+	struct sk_buff *p;
 	u64 now = homa_clock();
 	int priority;
 	u32 saddr;
-	u32 hash;
 	int busy;
 
 	if (!homa_make_header_avl(skb))
@@ -361,97 +360,53 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 #endif /* See strip.py */
 	}
 
-	/* The GRO mechanism tries to separate packets onto different
-	 * gro_lists by hash. This is bad for us, because we want to batch
-	 * packets together regardless of their RPCs. So, instead of
-	 * checking the list they gave us, check the last list where this
-	 * core added a Homa packet (if there is such a list).
+	h_new->common.gro_count = 1;
+
+	/* On newer kernels (5.0+), the GRO mechanism separates packets
+	 * into per-hash buckets (GRO_HASH_BUCKETS / struct gro_list /
+	 * napi->gro_hash[]). On EL8 (kernel 4.18), none of that exists;
+	 * napi_struct has a single gro_list. So instead of reverse-
+	 * engineering napi via container_of on hash buckets, we simply
+	 * iterate the provided gro_list to find an existing Homa packet
+	 * to batch with.
 	 */
-	hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
-	if (offload_core->held_skb) {
-		/* Reverse-engineer the location of the gro_node, so we
-		 * can verify that held_skb is still valid.
-		 */
-		struct gro_list *gro_list = container_of(held_list,
-				struct gro_list, list);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 15, 0)
-		struct napi_struct *napi = container_of(gro_list,
-				struct napi_struct, gro_hash[hash]);
-#else
-		struct gro_node *gro_node = container_of(gro_list,
-				struct gro_node, hash[hash]);
-#endif
+	list_for_each_entry(p, gro_list, list) {
+		struct homa_common_hdr *h_held;
+		int protocol;
+
+		held_skb = p;
+		h_held = (struct homa_common_hdr *)
+				skb_transport_header(held_skb);
+
+		if (skb_is_ipv6(held_skb))
+			protocol = ipv6_hdr(held_skb)->nexthdr;
+		else
+			protocol = ip_hdr(held_skb)->protocol;
+		if (protocol != IPPROTO_HOMA)
+			continue;
 
-		/* Must verify that offload_core->held_skb points to a packet on
-		 * the list, and that the packet is a Homa packet.
-		 * homa_gro_complete isn't always invoked before removing
-		 * packets from the list, so offload_core->held_skb could be a
-		 * dangling pointer (or the skb could have been reused for
-		 * some other protocol).
+		/* Aggregate skb into held_skb. We don't update the
+		 * length of held_skb because we'll eventually split
+		 * it up and process each skb independently.
 		 */
-		list_for_each_entry(held_skb,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 15, 0)
-				    &napi->gro_hash[offload_core->held_bucket].list,
-#else
-				    &gro_node->hash[offload_core->held_bucket].list,
-#endif
-				    list) {
-			int protocol;
-
-			if (held_skb != offload_core->held_skb)
-				continue;
-			if (skb_is_ipv6(held_skb))
-				protocol = ipv6_hdr(held_skb)->nexthdr;
-			else
-				protocol = ip_hdr(held_skb)->protocol;
-			if (protocol != IPPROTO_HOMA) {
-				tt_record3("homa_gro_receive held_skb 0x%0x%0x isn't Homa: protocol %d",
-					   tt_hi(held_skb), tt_lo(held_skb),
-					   protocol);
-				continue;
-			}
+		if (NAPI_GRO_CB(held_skb)->last == held_skb)
+			skb_shinfo(held_skb)->frag_list = skb;
+		else
+			NAPI_GRO_CB(held_skb)->last->next = skb;
+		NAPI_GRO_CB(held_skb)->last = skb;
+		skb->next = NULL;
+		NAPI_GRO_CB(skb)->same_flow = 1;
+		NAPI_GRO_CB(held_skb)->count++;
+		h_held->gro_count++;
+
+		/* When the batch is full, return the held_skb to the
+		 * caller; the GRO framework will remove it from the
+		 * gro_list and pass it up through the SoftIRQ layer.
+		 */
+		if (h_held->gro_count >= homa->max_gro_skbs)
+			result = held_skb;
 
-			/* Aggregate skb into held_skb. We don't update the
-			 * length of held_skb because we'll eventually split
-			 * it up and process each skb independently.
-			 */
-			if (NAPI_GRO_CB(held_skb)->last == held_skb)
-				skb_shinfo(held_skb)->frag_list = skb;
-			else
-				NAPI_GRO_CB(held_skb)->last->next = skb;
-			NAPI_GRO_CB(held_skb)->last = skb;
-			skb->next = NULL;
-			NAPI_GRO_CB(skb)->same_flow = 1;
-			NAPI_GRO_CB(held_skb)->count++;
-			if (NAPI_GRO_CB(held_skb)->count >= homa->max_gro_skbs) {
-				/* Push this batch up through the SoftIRQ
-				 * layer. This code is a hack, needed because
-				 * returning skb as result is no longer
-				 * sufficient (as of 5.4.80) to push it up
-				 * the stack; the packet just gets queued on
-				 * gro_node->rx_list. This code basically steals
-				 * the packet from dev_gro_receive and
-				 * pushes it upward.
-				 */
-				skb_list_del_init(held_skb);
-				homa_gro_complete(held_skb, 0);
-				netif_receive_skb(held_skb);
-				homa_send_ipis();
-#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 15, 0)
-				napi->gro_hash[offload_core->held_bucket].count--;
-				if (napi->gro_hash[offload_core->held_bucket].count == 0)
-					__clear_bit(offload_core->held_bucket,
-						    &napi->gro_bitmask);
-#else
-				gro_node->hash[offload_core->held_bucket].count--;
-				if (gro_node->hash[offload_core->held_bucket].count == 0)
-					__clear_bit(offload_core->held_bucket,
-						    &gro_node->bitmask);
-#endif
-				result = ERR_PTR(-EINPROGRESS);
-			}
-			goto done;
-		}
+		goto done;
 	}
 
 	/* There was no existing Homa packet that this packet could be
@@ -462,8 +417,6 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 	 * means we aren't heavily loaded; if batching does occur,
 	 * homa_gro_complete will pick a different core).
 	 */
-	offload_core->held_skb = skb;
-	offload_core->held_bucket = hash;
 	if (likely(homa->gro_policy & HOMA_GRO_SAME_CORE))
 		homa_set_softirq_cpu(skb, smp_processor_id());
 

diff --git a/homa_offload.h b/homa_offload.h
@@ -88,7 +88,7 @@ int      homa_offload_init(void);
 void     homa_send_ipis(void);
 void     homa_set_softirq_cpu(struct sk_buff *skb, int cpu);
 #ifndef __STRIP__ /* See strip.py */
-struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list,
+struct sk_buff *homa_tcp_gro_receive(struct list_head *gro_list,
 				      struct sk_buff *skb);
 #endif /* See strip.py */