Fixed race condition detecting reflections when threaded reading and writing is enabled.

Fixed handling of Jumbo Packets and LSO (Large Send Offload) behaviorst to:
1) Avoid truncation of very large sends
2) handle the case where the host network stack may not populate the IP header length for a large send.
This commit is contained in:
Mark Pizzolato 2011-08-12 17:53:32 -07:00
parent 724cdfcfb1
commit 015f2bb82e
2 changed files with 63 additions and 28 deletions

View file

@ -1069,7 +1069,7 @@ static int pcap_mac_if_win32(char *AdapterName, UCHAR MACAddress[6])
OidData->Length = 6; OidData->Length = 6;
memset(OidData->Data, 0, 6); memset(OidData->Data, 0, 6);
Status = p_PacketRequest(lpAdapter, FALSE, OidData); Status = p_PacketRequest(lpAdapter, FALSE, OidData);
if(Status) { if(Status) {
memcpy(MACAddress, OidData->Data, 6); memcpy(MACAddress, OidData->Data, 6);
@ -1203,7 +1203,7 @@ else
len = read(dev->fd_handle, buf, sizeof(buf)); len = read(dev->fd_handle, buf, sizeof(buf));
if (len > 0) { if (len > 0) {
status = 1; status = 1;
header.len = len; header.caplen = header.len = len;
_eth_callback((u_char *)dev, &header, buf); _eth_callback((u_char *)dev, &header, buf);
} else { } else {
status = 0; status = 0;
@ -1611,8 +1611,22 @@ t_stat _eth_write(ETH_DEV* dev, ETH_PACK* packet, ETH_PCALLBACK routine)
/* make sure packet is acceptable length */ /* make sure packet is acceptable length */
if ((packet->len >= ETH_MIN_PACKET) && (packet->len <= ETH_MAX_PACKET)) { if ((packet->len >= ETH_MIN_PACKET) && (packet->len <= ETH_MAX_PACKET)) {
int loopback_self_frame = LOOPBACK_SELF_FRAME(dev->physical_addr, packet->msg);
eth_packet_trace (dev, packet->msg, packet->len, "writing"); eth_packet_trace (dev, packet->msg, packet->len, "writing");
/* record sending of loopback packet (done before actual send to avoid race conditions with receiver) */
if (loopback_self_frame) {
#ifdef USE_READER_THREAD
pthread_mutex_lock (&dev->self_lock);
#endif
dev->loopback_self_sent += dev->reflections;
dev->loopback_self_sent_total++;
#ifdef USE_READER_THREAD
pthread_mutex_unlock (&dev->self_lock);
#endif
}
/* dispatch write request (synchronous; no need to save write info to dev) */ /* dispatch write request (synchronous; no need to save write info to dev) */
if (dev->pcap_mode) if (dev->pcap_mode)
status = pcap_sendpacket((pcap_t*)dev->handle, (u_char*)packet->msg, packet->len); status = pcap_sendpacket((pcap_t*)dev->handle, (u_char*)packet->msg, packet->len);
@ -1621,13 +1635,13 @@ t_stat _eth_write(ETH_DEV* dev, ETH_PACK* packet, ETH_PCALLBACK routine)
status = ((packet->len == write(dev->fd_handle, (void *)packet->msg, packet->len)) ? 0 : -1); status = ((packet->len == write(dev->fd_handle, (void *)packet->msg, packet->len)) ? 0 : -1);
#endif #endif
/* detect sending of loopback packet */ /* On error, correct loopback bookkeeping */
if ((status == 0) && (LOOPBACK_SELF_FRAME(dev->physical_addr, packet->msg))) { if ((status != 0) && loopback_self_frame) {
#ifdef USE_READER_THREAD #ifdef USE_READER_THREAD
pthread_mutex_lock (&dev->self_lock); pthread_mutex_lock (&dev->self_lock);
#endif #endif
dev->loopback_self_sent += dev->reflections; dev->loopback_self_sent -= dev->reflections;
dev->loopback_self_sent_total++; dev->loopback_self_sent_total--;
#ifdef USE_READER_THREAD #ifdef USE_READER_THREAD
pthread_mutex_unlock (&dev->self_lock); pthread_mutex_unlock (&dev->self_lock);
#endif #endif
@ -1808,7 +1822,7 @@ struct TCPHeader {
#define TCP_RST_FLAG (0x04) #define TCP_RST_FLAG (0x04)
#define TCP_SYN_FLAG (0x02) #define TCP_SYN_FLAG (0x02)
#define TCP_FIN_FLAG (0x01) #define TCP_FIN_FLAG (0x01)
#define TCP_FLAGS_MASK (0xFF) #define TCP_FLAGS_MASK (0xFFF)
uint16 window; uint16 window;
uint16 checksum; uint16 checksum;
uint16 urgent; uint16 urgent;
@ -1889,7 +1903,7 @@ _eth_fix_ip_jumbo_offload(ETH_DEV* dev, const u_char* msg, int len)
uint16 ip_flags; uint16 ip_flags;
uint16 frag_offset; uint16 frag_offset;
struct pcap_pkthdr header; struct pcap_pkthdr header;
uint16 tcp_flags; uint16 orig_tcp_flags;
/* Only interested in IP frames */ /* Only interested in IP frames */
if (ntohs(*proto) != 0x0800) { if (ntohs(*proto) != 0x0800) {
@ -1917,7 +1931,7 @@ _eth_fix_ip_jumbo_offload(ETH_DEV* dev, const u_char* msg, int len)
return; return;
} }
if (UDP->checksum == 0) if (UDP->checksum == 0)
break; /* UDP Cghecksums are disabled */ break; /* UDP Checksums are disabled */
orig_checksum = UDP->checksum; orig_checksum = UDP->checksum;
UDP->checksum = 0; UDP->checksum = 0;
UDP->checksum = pseudo_checksum(ntohs(UDP->length), IPPROTO_UDP, (uint16 *)(&IP->source_ip), (uint16 *)(&IP->dest_ip), (uint8 *)UDP); UDP->checksum = pseudo_checksum(ntohs(UDP->length), IPPROTO_UDP, (uint16 *)(&IP->source_ip), (uint16 *)(&IP->dest_ip), (uint8 *)UDP);
@ -1971,10 +1985,16 @@ _eth_fix_ip_jumbo_offload(ETH_DEV* dev, const u_char* msg, int len)
IP->flags = htons(ip_flags); IP->flags = htons(ip_flags);
IP->checksum = 0; IP->checksum = 0;
IP->checksum = ip_checksum((uint16 *)IP, IP_HLEN(IP)); IP->checksum = ip_checksum((uint16 *)IP, IP_HLEN(IP));
header.len = 14 + ntohs(IP->total_len); header.caplen = header.len = 14 + ntohs(IP->total_len);
eth_packet_trace (dev, ((u_char *)IP)-14, header.len, "reading Datagram fragment"); eth_packet_trace (dev, ((u_char *)IP)-14, header.len, "reading Datagram fragment");
#if ETH_MIN_JUMBO_FRAME < ETH_MAX_PACKET #if ETH_MIN_JUMBO_FRAME < ETH_MAX_PACKET
{ /* Debugging is easier it we read packets directly with pcap */ { /* Debugging is easier if we read packets directly with pcap
(i.e. we can use Wireshark to verify packet contents)
we don't want to do this all the time for 2 reasons:
1) sending through pcap involves kernel transitions and
2) if the current system reflects sent packets, the
recieving side will receive and process 2 copies of
any packets sent this way. */
ETH_PACK pkt; ETH_PACK pkt;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
@ -1996,26 +2016,35 @@ _eth_fix_ip_jumbo_offload(ETH_DEV* dev, const u_char* msg, int len)
break; break;
case IPPROTO_TCP: case IPPROTO_TCP:
++dev->jumbo_fragmented; ++dev->jumbo_fragmented;
eth_packet_trace_ex (dev, ((u_char *)IP)-14, len, "Fragmenting Jumbo TCP segment", 1, dev->dbit);
TCP = (struct TCPHeader *)(((char *)IP)+IP_HLEN(IP)); TCP = (struct TCPHeader *)(((char *)IP)+IP_HLEN(IP));
tcp_flags = ntohs(TCP->data_offset_and_flags)&TCP_FLAGS_MASK; orig_tcp_flags = ntohs(TCP->data_offset_and_flags);
TCP->data_offset_and_flags = htons(((TCP_DATA_OFFSET(TCP)>>2)<<12)|TCP_ACK_FLAG); if (0 == ntohs(IP->total_len)) /* Sometimes the IP header indicates a packet size of 0 */
payload_len = ntohs(IP->total_len) - IP_HLEN(IP) - TCP_DATA_OFFSET(TCP); IP->total_len = htons(len-14); /* use the captured frame size to compute the IP length */
payload_len = ntohs(IP->total_len) - (IP_HLEN(IP) + TCP_DATA_OFFSET(TCP));
mtu_payload = ETH_MIN_JUMBO_FRAME - 14 - IP_HLEN(IP) - TCP_DATA_OFFSET(TCP); mtu_payload = ETH_MIN_JUMBO_FRAME - 14 - IP_HLEN(IP) - TCP_DATA_OFFSET(TCP);
while (payload_len > 0) { while (payload_len > 0) {
if (payload_len > mtu_payload) { if (payload_len > mtu_payload) {
TCP->data_offset_and_flags = htons(orig_tcp_flags&~(TCP_PSH_FLAG|TCP_FIN_FLAG|TCP_RST_FLAG));
IP->total_len = htons(mtu_payload + IP_HLEN(IP) + TCP_DATA_OFFSET(TCP)); IP->total_len = htons(mtu_payload + IP_HLEN(IP) + TCP_DATA_OFFSET(TCP));
} else { } else {
TCP->data_offset_and_flags = htons(ntohs(TCP->data_offset_and_flags)|(tcp_flags&(TCP_PSH_FLAG|TCP_ACK_FLAG|TCP_FIN_FLAG|TCP_RST_FLAG))); TCP->data_offset_and_flags = htons(orig_tcp_flags);
IP->total_len = htons(payload_len + IP_HLEN(IP) + TCP_DATA_OFFSET(TCP)); IP->total_len = htons(payload_len + IP_HLEN(IP) + TCP_DATA_OFFSET(TCP));
} }
IP->checksum = 0; IP->checksum = 0;
IP->checksum = ip_checksum((uint16 *)IP, IP_HLEN(IP)); IP->checksum = ip_checksum((uint16 *)IP, IP_HLEN(IP));
TCP->checksum = 0; TCP->checksum = 0;
TCP->checksum = pseudo_checksum(ntohs(IP->total_len)-IP_HLEN(IP), IPPROTO_TCP, (uint16 *)(&IP->source_ip), (uint16 *)(&IP->dest_ip), (uint8 *)TCP); TCP->checksum = pseudo_checksum(ntohs(IP->total_len)-IP_HLEN(IP), IPPROTO_TCP, (uint16 *)(&IP->source_ip), (uint16 *)(&IP->dest_ip), (uint8 *)TCP);
header.len = 14 + ntohs(IP->total_len); header.caplen = header.len = 14 + ntohs(IP->total_len);
eth_packet_trace_ex (dev, ((u_char *)IP)-14, header.len, "reading TCP segment", 1, dev->dbit); eth_packet_trace_ex (dev, ((u_char *)IP)-14, header.len, "reading TCP segment", 1, dev->dbit);
#if ETH_MIN_JUMBO_FRAME < ETH_MAX_PACKET #if ETH_MIN_JUMBO_FRAME < ETH_MAX_PACKET
{ /* Debugging is easier it we read packets directly with pcap */ { /* Debugging is easier if we read packets directly with pcap
(i.e. we can use Wireshark to verify packet contents)
we don't want to do this all the time for 2 reasons:
1) sending through pcap involves kernel transitions and
2) if the current system reflects sent packets, the
recieving side will receive and process 2 copies of
any packets sent this way. */
ETH_PACK pkt; ETH_PACK pkt;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
@ -2073,7 +2102,7 @@ _eth_fix_ip_xsum_offload(ETH_DEV* dev, u_char* msg, int len)
if (ntohs(UDP->length) > (len-IP_HLEN(IP))) if (ntohs(UDP->length) > (len-IP_HLEN(IP)))
return; /* packet contained length exceeds packet size */ return; /* packet contained length exceeds packet size */
if (UDP->checksum == 0) if (UDP->checksum == 0)
return; /* UDP Cghecksums are disabled */ return; /* UDP Checksums are disabled */
orig_checksum = UDP->checksum; orig_checksum = UDP->checksum;
UDP->checksum = 0; UDP->checksum = 0;
UDP->checksum = pseudo_checksum(ntohs(UDP->length), IPPROTO_UDP, (uint16 *)(&IP->source_ip), (uint16 *)(&IP->dest_ip), (uint8 *)UDP); UDP->checksum = pseudo_checksum(ntohs(UDP->length), IPPROTO_UDP, (uint16 *)(&IP->source_ip), (uint16 *)(&IP->dest_ip), (uint8 *)UDP);
@ -2115,7 +2144,7 @@ _eth_callback(u_char* info, const struct pcap_pkthdr* header, const u_char* data
int i; int i;
eth_packet_trace (dev, data, header->len, "received"); eth_packet_trace (dev, data, header->len, "received");
f
for (i = 0; i < dev->addr_count; i++) { for (i = 0; i < dev->addr_count; i++) {
if (memcmp(data, dev->filter_address[i], 6) == 0) to_me = 1; if (memcmp(data, dev->filter_address[i], 6) == 0) to_me = 1;
if (memcmp(&data[6], dev->filter_address[i], 6) == 0) from_me = 1; if (memcmp(&data[6], dev->filter_address[i], 6) == 0) from_me = 1;
@ -2159,7 +2188,10 @@ _eth_callback(u_char* info, const struct pcap_pkthdr* header, const u_char* data
if (to_me && !from_me) { if (to_me && !from_me) {
#endif #endif
if (header->len > ETH_MIN_JUMBO_FRAME) { if (header->len > ETH_MIN_JUMBO_FRAME) {
_eth_fix_ip_jumbo_offload(dev, data, header->len); if (header->len <= header->caplen) /* Whole Frame captured? */
_eth_fix_ip_jumbo_offload(dev, data, header->len);
else
++dev->jumbo_truncated;
return; return;
} }
#if defined (USE_READER_THREAD) #if defined (USE_READER_THREAD)
@ -2255,7 +2287,7 @@ int eth_read(ETH_DEV* dev, ETH_PACK* packet, ETH_PCALLBACK routine)
len = read(dev->fd_handle, buf, sizeof(buf)); len = read(dev->fd_handle, buf, sizeof(buf));
if (len > 0) { if (len > 0) {
status = 1; status = 1;
header.len = len; header.caplen = header.len = len;
_eth_callback((u_char *)dev, &header, buf); _eth_callback((u_char *)dev, &header, buf);
} else { } else {
status = 0; status = 0;
@ -2435,11 +2467,6 @@ t_stat eth_filter_hash(ETH_DEV* dev, int addr_count, ETH_MAC* const addresses,
#ifdef USE_BPF #ifdef USE_BPF
if (dev->pcap_mode) { if (dev->pcap_mode) {
#ifdef USE_READER_THREAD
pthread_mutex_lock (&dev->lock);
ethq_clear (&dev->read_queue); /* Empty FIFO Queue when filter list changes */
pthread_mutex_unlock (&dev->lock);
#endif
/* compile filter string */ /* compile filter string */
if ((status = pcap_compile(dev->handle, &bpf, buf, 1, bpf_netmask)) < 0) { if ((status = pcap_compile(dev->handle, &bpf, buf, 1, bpf_netmask)) < 0) {
sprintf(errbuf, "%s", pcap_geterr(dev->handle)); sprintf(errbuf, "%s", pcap_geterr(dev->handle));
@ -2467,6 +2494,11 @@ t_stat eth_filter_hash(ETH_DEV* dev, int addr_count, ETH_MAC* const addresses,
} }
pcap_freecode(&bpf); pcap_freecode(&bpf);
} }
#ifdef USE_READER_THREAD
pthread_mutex_lock (&dev->lock);
ethq_clear (&dev->read_queue); /* Empty FIFO Queue when filter list changes */
pthread_mutex_unlock (&dev->lock);
#endif
} }
#endif /* USE_BPF */ #endif /* USE_BPF */
@ -2627,6 +2659,8 @@ void eth_show_dev (FILE *st, ETH_DEV* dev)
fprintf(st, " Jumbo Dropped: %d\n", dev->jumbo_dropped); fprintf(st, " Jumbo Dropped: %d\n", dev->jumbo_dropped);
if (dev->jumbo_fragmented) if (dev->jumbo_fragmented)
fprintf(st, " Jumbo Fragmented: %d\n", dev->jumbo_fragmented); fprintf(st, " Jumbo Fragmented: %d\n", dev->jumbo_fragmented);
if (dev->jumbo_truncated)
fprintf(st, " Jumbo Truncated: %d\n", dev->jumbo_truncated);
#if defined(USE_READER_THREAD) #if defined(USE_READER_THREAD)
fprintf(st, " Asynch Interrupts: %s\n", dev->asynch_io?"Enabled":"Disabled"); fprintf(st, " Asynch Interrupts: %s\n", dev->asynch_io?"Enabled":"Disabled");
if (dev->asynch_io) if (dev->asynch_io)

View file

@ -118,7 +118,7 @@
#define ETH_DEV_DESC_MAX 256 /* maximum device description size */ #define ETH_DEV_DESC_MAX 256 /* maximum device description size */
#define ETH_MIN_PACKET 60 /* minimum ethernet packet size */ #define ETH_MIN_PACKET 60 /* minimum ethernet packet size */
#define ETH_MAX_PACKET 1514 /* maximum ethernet packet size */ #define ETH_MAX_PACKET 1514 /* maximum ethernet packet size */
#define ETH_MAX_JUMBO_FRAME 16384 /* maximum ethernet jumbo frame size */ #define ETH_MAX_JUMBO_FRAME 65536 /* maximum ethernet jumbo frame size (or Offload Segment Size) */
#define ETH_MAX_DEVICE 10 /* maximum ethernet devices */ #define ETH_MAX_DEVICE 10 /* maximum ethernet devices */
#define ETH_CRC_SIZE 4 /* ethernet CRC size */ #define ETH_CRC_SIZE 4 /* ethernet CRC size */
#define ETH_FRAME_SIZE (ETH_MAX_PACKET+ETH_CRC_SIZE) /* ethernet maximum frame size */ #define ETH_FRAME_SIZE (ETH_MAX_PACKET+ETH_CRC_SIZE) /* ethernet maximum frame size */
@ -193,10 +193,11 @@ struct eth_device {
ETH_MAC host_nic_phy_hw_addr; /* MAC address of the attached NIC */ ETH_MAC host_nic_phy_hw_addr; /* MAC address of the attached NIC */
uint32 jumbo_fragmented; /* Giant IPv4 Frames Fragmented */ uint32 jumbo_fragmented; /* Giant IPv4 Frames Fragmented */
uint32 jumbo_dropped; /* Giant Frames Dropped */ uint32 jumbo_dropped; /* Giant Frames Dropped */
uint32 jumbo_truncated; /* Giant Frames too big for capture buffer - Dropped */
DEVICE* dptr; /* device ethernet is attached to */ DEVICE* dptr; /* device ethernet is attached to */
uint32 dbit; /* debugging bit */ uint32 dbit; /* debugging bit */
int reflections; /* packet reflections on interface */ int reflections; /* packet reflections on interface */
int need_crc; /* device needs CRC (Cyclic Redundancy Check) */ int need_crc; /* device needs CRC (Cyclic Redundancy Check) */
#if defined (USE_READER_THREAD) #if defined (USE_READER_THREAD)
int asynch_io; /* Asynchronous Interrupt scheduling enabled */ int asynch_io; /* Asynchronous Interrupt scheduling enabled */
int asynch_io_latency; /* instructions to delay pending interrupt */ int asynch_io_latency; /* instructions to delay pending interrupt */