Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 51 additions & 98 deletions homa_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,11 @@ void homa_gro_unhook_tcp(void)
* homa_tcp_gro_receive() - Invoked instead of TCP's normal gro_receive function
* when hooking is enabled. Identifies Homa-over-TCP packets and passes them
* to Homa; sends real TCP packets to TCP's gro_receive function.
* @held_list: Pointer to header for list of packets that are being
* @gro_list: Pointer to header for list of packets that are being
* held for possible GRO merging.
* @skb: The newly arrived packet.
*/
struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list,
struct sk_buff *homa_tcp_gro_receive(struct list_head *gro_list,
struct sk_buff *skb)
{
struct homa_common_hdr *h = (struct homa_common_hdr *)
Expand All @@ -148,7 +148,7 @@ struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list,
// ntohs(h->urgent), homa_local_id(h->sender_id));
if (h->flags != HOMA_TCP_FLAGS ||
ntohs(h->urgent) != HOMA_TCP_URGENT)
return tcp_net_offload->callbacks.gro_receive(held_list, skb);
return tcp_net_offload->callbacks.gro_receive(gro_list, skb);

/* Change the packet's IP protocol to Homa so that it will get
* dispatched directly to Homa in the future.
Expand All @@ -161,7 +161,7 @@ struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list,
htons(IPPROTO_HOMA));
ip_hdr(skb)->protocol = IPPROTO_HOMA;
}
return homa_gro_receive(held_list, skb);
return homa_gro_receive(gro_list, skb);
}

/**
Expand Down Expand Up @@ -267,16 +267,15 @@ struct sk_buff *homa_gso_segment(struct sk_buff *skb,
* unusual way: it simply aggregates all packets targeted to a particular
* destination port, so that the entire bundle can get through the networking
* stack in a single traversal.
* @held_list: Pointer to header for list of packets that are being
* held for possible GRO merging. Note: this list contains
* only packets matching a given hash.
* @gro_list: Pointer to header for list of packets that are being
* held for possible GRO merging.
* @skb: The newly arrived packet.
*
* Return: If the return value is non-NULL, it refers to an skb in
* gro_list. The skb will be removed from the list by the caller and
* passed up the stack immediately.
*/
struct sk_buff *homa_gro_receive(struct list_head *held_list,
struct sk_buff *homa_gro_receive(struct list_head *gro_list,
struct sk_buff *skb)
{
/* This function will do one of the following things:
Expand All @@ -296,10 +295,10 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
struct homa_data_hdr *h_new;
u64 *softirq_cycles_metric;
struct sk_buff *held_skb;
struct sk_buff *p;
u64 now = homa_clock();
int priority;
u32 saddr;
u32 hash;
int busy;

if (!homa_make_header_avl(skb))
Expand Down Expand Up @@ -361,97 +360,53 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
#endif /* See strip.py */
}

/* The GRO mechanism tries to separate packets onto different
* gro_lists by hash. This is bad for us, because we want to batch
* packets together regardless of their RPCs. So, instead of
* checking the list they gave us, check the last list where this
* core added a Homa packet (if there is such a list).
h_new->common.gro_count = 1;

/* On newer kernels (5.0+), the GRO mechanism separates packets
* into per-hash buckets (GRO_HASH_BUCKETS / struct gro_list /
* napi->gro_hash[]). On EL8 (kernel 4.18), none of that exists;
* napi_struct has a single gro_list. So instead of reverse-
* engineering napi via container_of on hash buckets, we simply
* iterate the provided gro_list to find an existing Homa packet
* to batch with.
*/
hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
if (offload_core->held_skb) {
/* Reverse-engineer the location of the gro_node, so we
* can verify that held_skb is still valid.
*/
struct gro_list *gro_list = container_of(held_list,
struct gro_list, list);
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 15, 0)
struct napi_struct *napi = container_of(gro_list,
struct napi_struct, gro_hash[hash]);
#else
struct gro_node *gro_node = container_of(gro_list,
struct gro_node, hash[hash]);
#endif
list_for_each_entry(p, gro_list, list) {
struct homa_common_hdr *h_held;
int protocol;

held_skb = p;
h_held = (struct homa_common_hdr *)
skb_transport_header(held_skb);

if (skb_is_ipv6(held_skb))
protocol = ipv6_hdr(held_skb)->nexthdr;
else
protocol = ip_hdr(held_skb)->protocol;
if (protocol != IPPROTO_HOMA)
continue;

/* Must verify that offload_core->held_skb points to a packet on
* the list, and that the packet is a Homa packet.
* homa_gro_complete isn't always invoked before removing
* packets from the list, so offload_core->held_skb could be a
* dangling pointer (or the skb could have been reused for
* some other protocol).
/* Aggregate skb into held_skb. We don't update the
* length of held_skb because we'll eventually split
* it up and process each skb independently.
*/
list_for_each_entry(held_skb,
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 15, 0)
&napi->gro_hash[offload_core->held_bucket].list,
#else
&gro_node->hash[offload_core->held_bucket].list,
#endif
list) {
int protocol;

if (held_skb != offload_core->held_skb)
continue;
if (skb_is_ipv6(held_skb))
protocol = ipv6_hdr(held_skb)->nexthdr;
else
protocol = ip_hdr(held_skb)->protocol;
if (protocol != IPPROTO_HOMA) {
tt_record3("homa_gro_receive held_skb 0x%0x%0x isn't Homa: protocol %d",
tt_hi(held_skb), tt_lo(held_skb),
protocol);
continue;
}
if (NAPI_GRO_CB(held_skb)->last == held_skb)
skb_shinfo(held_skb)->frag_list = skb;
else
NAPI_GRO_CB(held_skb)->last->next = skb;
NAPI_GRO_CB(held_skb)->last = skb;
skb->next = NULL;
NAPI_GRO_CB(skb)->same_flow = 1;
NAPI_GRO_CB(held_skb)->count++;
h_held->gro_count++;

/* When the batch is full, return the held_skb to the
* caller; the GRO framework will remove it from the
* gro_list and pass it up through the SoftIRQ layer.
*/
if (h_held->gro_count >= homa->max_gro_skbs)
result = held_skb;

/* Aggregate skb into held_skb. We don't update the
* length of held_skb because we'll eventually split
* it up and process each skb independently.
*/
if (NAPI_GRO_CB(held_skb)->last == held_skb)
skb_shinfo(held_skb)->frag_list = skb;
else
NAPI_GRO_CB(held_skb)->last->next = skb;
NAPI_GRO_CB(held_skb)->last = skb;
skb->next = NULL;
NAPI_GRO_CB(skb)->same_flow = 1;
NAPI_GRO_CB(held_skb)->count++;
if (NAPI_GRO_CB(held_skb)->count >= homa->max_gro_skbs) {
/* Push this batch up through the SoftIRQ
* layer. This code is a hack, needed because
* returning skb as result is no longer
* sufficient (as of 5.4.80) to push it up
* the stack; the packet just gets queued on
* gro_node->rx_list. This code basically steals
* the packet from dev_gro_receive and
* pushes it upward.
*/
skb_list_del_init(held_skb);
homa_gro_complete(held_skb, 0);
netif_receive_skb(held_skb);
homa_send_ipis();
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 15, 0)
napi->gro_hash[offload_core->held_bucket].count--;
if (napi->gro_hash[offload_core->held_bucket].count == 0)
__clear_bit(offload_core->held_bucket,
&napi->gro_bitmask);
#else
gro_node->hash[offload_core->held_bucket].count--;
if (gro_node->hash[offload_core->held_bucket].count == 0)
__clear_bit(offload_core->held_bucket,
&gro_node->bitmask);
#endif
result = ERR_PTR(-EINPROGRESS);
}
goto done;
}
goto done;
}

/* There was no existing Homa packet that this packet could be
Expand All @@ -462,8 +417,6 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
* means we aren't heavily loaded; if batching does occur,
* homa_gro_complete will pick a different core).
*/
offload_core->held_skb = skb;
offload_core->held_bucket = hash;
if (likely(homa->gro_policy & HOMA_GRO_SAME_CORE))
homa_set_softirq_cpu(skb, smp_processor_id());

Expand Down
2 changes: 1 addition & 1 deletion homa_offload.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ int homa_offload_init(void);
void homa_send_ipis(void);
void homa_set_softirq_cpu(struct sk_buff *skb, int cpu);
#ifndef __STRIP__ /* See strip.py */
struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list,
struct sk_buff *homa_tcp_gro_receive(struct list_head *gro_list,
struct sk_buff *skb);
#endif /* See strip.py */

Expand Down