'I'm not receiving packets using XDP_TX

// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
// Copyright (c) 2018 Netronome Systems, Inc.
#define BPF_NO_GLOBAL_DATA

#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include <linux/bpf.h>
#include <linux/icmp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/ipv6.h>
#include <endian.h>

#include "bpf_endian.h"
#include "bpf_helpers.h"
#include "jhash.h"
#include "common.h"
#include "parsing_helpers.h"

#include <stdint.h>

__attribute__((__always_inline__))
static inline __u16 csum_fold_helper(__u64 csum) {
    int i;
#pragma unroll
    for (i = 0; i < 4; i++) {
        if (csum >> 16)
            csum = (csum & 0xffff) + (csum >> 16);
    }
    return ~csum;
}

__attribute__((__always_inline__))
static inline void ipv4_csum(void* data_start, int data_size, __u64* csum) {
    *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
    *csum = csum_fold_helper(*csum);
}

__attribute__((__always_inline__))
static inline void ipv4_l4_csum(void* data_start, __u32 data_size,
    __u64* csum, struct iphdr* iph) {
    __u32 tmp = 0;
    *csum = bpf_csum_diff(0, 0, &iph->saddr, sizeof(__be32), *csum);
    *csum = bpf_csum_diff(0, 0, &iph->daddr, sizeof(__be32), *csum);
    tmp = __builtin_bswap32((__u32)(iph->protocol));
    *csum = bpf_csum_diff(0, 0, &tmp, sizeof(__u32), *csum);
    tmp = __builtin_bswap32((__u32)(data_size));
    *csum = bpf_csum_diff(0, 0, &tmp, sizeof(__u32), *csum);
    *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
    *csum = csum_fold_helper(*csum);
}

SEC("prog")
int xdp_drop_benchmark_traffic(struct xdp_md* ctx)
{
    void* data_end = (void*)(long)ctx->data_end;
    void* data = (void*)(long)ctx->data;
    struct ethhdr* eth = data;

    if (data + sizeof(*eth) > data_end) {
        return XDP_PASS;
    }

    uint16_t h_proto = eth->h_proto;

    if (h_proto == htons(ETH_P_IP)) {
        struct iphdr* iph = data + sizeof(*eth);
        if (data + sizeof(*eth) + sizeof(*iph) > data_end) {
            return XDP_PASS;
        }

        if (iph->protocol != IPPROTO_TCP) {
            return XDP_PASS;
        }

        struct tcphdr* tcph = data + sizeof(*eth) + sizeof(*iph);
        if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*tcph) > data_end) {
            return XDP_PASS;
        }

        __u16 tcp_len = htons(iph->tot_len) - (iph->ihl << 2);

        if (tcp_len > 2000) {
            return XDP_DROP;
        }

        if (tcph->dest == htons(65535)) {
            unsigned char gateway[ETH_ALEN];
            gateway[0] = 0x92; gateway[1] = 0x10;
            gateway[2] = 0x95; gateway[3] = 0x86;
            gateway[4] = 0x26; gateway[5] = 0xbf;
            __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN);
            __builtin_memcpy(eth->h_dest, gateway, ETH_ALEN);

            bpf_debug("MAC Source: %i:%i:%i", eth->h_source[0], eth->h_source[1], eth->h_source[2]);
            bpf_debug("%i:%i:%i\n", eth->h_source[3], eth->h_source[4], eth->h_source[5]);

            bpf_debug("MAC Destin: %i:%i:%i", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2]);
            bpf_debug("%i:%i:%i\n", eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]);

            iph->saddr = iph->daddr;
            iph->daddr = htonl(4266428307);

            __u64 csum = 0;
            iph->check = 0;
            ipv4_csum(iph, sizeof(struct iphdr), &csum);
            iph->check = csum;

            csum = 0;
            tcph->check = 0;
            ipv4_l4_csum(tcph, tcp_len, &csum, iph);
            tcph->check = csum;

            bpf_debug("Checksum New: %i | %i\n", iph->check, tcph->check);

            return XDP_TX;
        }
    }

    return XDP_PASS;
}

char _license[] SEC("license") = "GPL";

Strangely I'm not receiving TCP packets on my destination server *(147.135.76.254 | 4266428307), and also on the local server the packet doesn't appear in tcpdump -i eth0 dst port 65535

Am I doing something wrong? If yes, how can I fix the code.

The purpose is: Receive packet on server port LOCAL:65535, and redirect to DESTINATION:65535


Updates:

  1. The program seems to work correctly, I'm getting the message "Received!" in cat /sys/kernel/debug/tracing/trace_pipe

  2. The packet did not reach the destination server

  3. In the destination server (Windows), i'm running Wireshark with TCP Checksum verification disabled, this means that there is no filter in the middle that prevents packets with invalid checksum, example:

    On source server (linux), i send hping3 147.135.76.254 -p 65535 -- badcksum and receive all packets (with invalid checksums) in wireshark (dest server)

  4. In source server (XDP), when i execute: tcpdump -i eth0 tcp port 65535, i'm not receiving any packets.

    Note: I used ethtool -K eth0 tx off to disable offload / chksum

  5. If i change XDP_TX to XDP_PASS i can receive this packet in tcpdump (command above):
    14:38:10.964195 IP d2-2-us-east-va-1.39698 > 147.135.76.254.65535: Flags [S], seq 3962643128, win 14600, options [mss 1460,sackOK,TS val 2414158903 ecr 0,nop,wscale 9], length 0

  6. In ethtool -S eth0 shows rx/tx correctly

  7. If i change from "XDP_TX" to "XDP_PASS" i receive this response in tcpdump: 135.148.232.155.58062 > 147.135.76.254.65535: Flags [S], cksum 0x41c3 (correct), seq 2996104997, win 14600, options [mss 1460,sackOK,TS val 2614058020 ecr 0,nop,wscale 9], length 0, that means my TCP Checksum & IP Calculation is working correcly

  8. I updated informing the MAC Address correctly and the problem continues.


The commands executed:

[root@d2-2-us-east-va-1 ~]# arp -a _gateway (135.148.232.1) at 92:10:95:86:26:bf [ether] on eth0

[root@d2-2-us-east-va-1 ~]# cat /sys/class/net/eth0/address fa:16:3e:0e:cf:a4

[root@d2-2-us-east-va-1 ~]# ifconfig ... (to confirm mac address)



Solution 1:[1]

You are not updating the source and destination MAC addresses of the ethernet layer. Some NICs especially virtualized ones will drop outgoing packets if the source MAC does not match the configured MAC of the NIC.

If you are reflecting the packets back to the same physical machine on your local network(local machine or router/default gateway) you can simply swap the src and dst addresses.

I updated your code to add this:

// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
// Copyright (c) 2018 Netronome Systems, Inc.
#define BPF_NO_GLOBAL_DATA

#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include <linux/bpf.h>
#include <linux/icmp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/ipv6.h>
#include <endian.h>

#include "bpf_endian.h"
#include "bpf_helpers.h"
#include "jhash.h"
#include "common.h"
#include "parsing_helpers.h"

#include <stdint.h>

__attribute__((__always_inline__)) static inline __u16 csum_fold_helper(__u64 csum)
{
    int i;
#pragma unroll
    for (i = 0; i < 4; i++)
    {
        if (csum >> 16)
            csum = (csum & 0xffff) + (csum >> 16);
    }
    return ~csum;
}

__attribute__((__always_inline__)) static inline void ipv4_csum(void *data_start, int data_size, __u64 *csum)
{
    *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
    *csum = csum_fold_helper(*csum);
}

__attribute__((__always_inline__)) static inline void ipv4_l4_csum(void *data_start, __u32 data_size,
                                                                   __u64 *csum, struct iphdr *iph)
{
    __u32 tmp = 0;
    *csum = bpf_csum_diff(0, 0, &iph->saddr, sizeof(__be32), *csum);
    *csum = bpf_csum_diff(0, 0, &iph->daddr, sizeof(__be32), *csum);
    tmp = __builtin_bswap32((__u32)(iph->protocol));
    *csum = bpf_csum_diff(0, 0, &tmp, sizeof(__u32), *csum);
    tmp = __builtin_bswap32((__u32)(data_size));
    *csum = bpf_csum_diff(0, 0, &tmp, sizeof(__u32), *csum);
    *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
    *csum = csum_fold_helper(*csum);
}

SEC("prog")
int xdp_drop_benchmark_traffic(struct xdp_md *ctx)
{
    void *data_end = (void *)(long)ctx->data_end;
    void *data = (void *)(long)ctx->data;
    struct ethhdr *eth = data;

    if (data + sizeof(*eth) > data_end)
    {
        return XDP_PASS;
    }

    uint16_t h_proto = eth->h_proto;

    if (h_proto == htons(ETH_P_IP))
    {
        struct iphdr *iph = data + sizeof(*eth);
        if (data + sizeof(*eth) + sizeof(*iph) > data_end)
        {
            return XDP_PASS;
        }

        if (iph->protocol != IPPROTO_TCP)
        {
            return XDP_PASS;
        }

        struct tcphdr *tcph = data + sizeof(*eth) + sizeof(*iph);
        if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*tcph) > data_end)
        {
            return XDP_PASS;
        }

        __u16 tcp_len = htons(iph->tot_len) - (iph->ihl << 2);

        if (tcp_len > 2000)
        {
            return XDP_DROP;
        }

        if (tcph->dest == htons(65535))
        {
            bpf_debug("Checksum Old: %i | %i\n", iph->check, tcph->check);

            unsigned char tmp[ETH_ALEN];
            __builtin_memcpy(tmp, eth->h_source, ETH_ALEN);
            __builtin_memcpy(eth->h_source, eth->h_dest ETH_ALEN);
            __builtin_memcpy(eth->h_dest, tmp);

            iph->saddr = iph->daddr;
            iph->daddr = htonl(4266428307);

            __u64 csum = 0;

            iph->check = 0;
            ipv4_csum(iph, sizeof(struct iphdr), &csum);
            iph->check = csum;

            csum = 0;
            tcph->check = 0;
            ipv4_l4_csum(tcph, tcp_len, &csum, iph);
            tcph->check = csum;

            bpf_debug("Checksum New: %i | %i\n", iph->check, tcph->check);

            return XDP_TX;
        }
    }

    return XDP_PASS;
}

char _license[] SEC("license") = "GPL";

The changed lines being:

unsigned char tmp[ETH_ALEN];
__builtin_memcpy(tmp, eth->h_source, ETH_ALEN);
__builtin_memcpy(eth->h_source, eth->h_dest ETH_ALEN);
__builtin_memcpy(eth->h_dest, tmp);

Please note that the this does not work if you are switching the packet(sending it to a different physical device) in which case you need to use ARP to get the correct MAC or hardcode it.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Dylan Reimerink