[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] Adding support for Stateless Static NAT for TAP devices
From: |
John Basila |
Subject: |
[Qemu-devel] Adding support for Stateless Static NAT for TAP devices |
Date: |
Thu, 30 Aug 2012 09:12:19 +0300 |
When running multiple instances of QEMU from the same image file
(using -snapshot) and connecting each instance to a dedicated TAP
device, the Guest OS will most likely not be able to communicate
with the outside world as all packets leave the Guest OS from the
same IP and thus the Host OS will have difficulty returning the
packets to the correct TAP device/Guest OS. Stateless Static
Network Address Translation or SSNAT allows the QEMU to map the
network of the Guest OS to the network of the TAP device allowing
a unique IP address for each Guest OS that ease such case.
The only mandatory argument to the SSNAT is the Guest OS network
IP, the rest will be figured out from the underlying TAP device.
Signed-off-by: John Basila <address@hidden>
---
net/tap.c | 369 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
qapi-schema.json | 5 +-
qemu-options.hx | 10 ++-
3 files changed, 381 insertions(+), 3 deletions(-)
diff --git a/net/tap.c b/net/tap.c
index 1971525..2408a49 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -39,16 +39,88 @@
#include "qemu-char.h"
#include "qemu-common.h"
#include "qemu-error.h"
+#include "qemu_socket.h"
#include "net/tap-linux.h"
#include "hw/vhost_net.h"
+#include "checksum.h"
+
+#define ETH_P_ARP 0x0806 /* Address Resolution packet */
+#define ETH_P_IP 0x0800 /* Internet Protocol packet */
+#define ETH_P_IPV6 0x86DD /* IPv6 over blueblook */
+
+#define ETH_ALEN 6
+
+struct ethhdr {
+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+ unsigned char h_source[ETH_ALEN]; /* source ether addr */
+ unsigned short h_proto; /* packet type ID field */
+};
+
+#define IP_PROTO_TCP 6
+#define IP_PROTO_UDP 17
+#define IPV4_ADRESS_LENGTH 4
+
+struct arphdr {
+ unsigned short ar_hrd; /* format of hardware address */
+ unsigned short ar_pro; /* format of protocol address */
+ unsigned char ar_hln; /* length of hardware address */
+ unsigned char ar_pln; /* length of protocol address */
+ unsigned short ar_op; /* ARP opcode (command) */
+
+ /*
+ * Ethernet looks like this : This bit is variable sized however...
+ */
+ unsigned char ar_sha[ETH_ALEN]; /* sender hardware
address */
+ unsigned char ar_sip[IPV4_ADRESS_LENGTH]; /* sender IP address */
+ unsigned char ar_tha[ETH_ALEN]; /* target hardware
address */
+ unsigned char ar_tip[IPV4_ADRESS_LENGTH]; /* target IP address */
+};
+
+#define IP_HEADER_LENGTH(ip) (((ip->ip_hlv)&0xf) << 2)
+
+/** An IPv4 packet header */
+struct iphdr {
+ uint8_t ip_hlv; /**< Header length and version of the header
*/
+ uint8_t ip_tos; /**< Type of Service
*/
+ uint16_t ip_len; /**< Length in octets, inlc. this header and data
*/
+ uint16_t ip_id; /**< ID is used to aid in assembling framents
*/
+ uint16_t ip_off; /**< Info about fragmentation (control, offset)
*/
+ uint8_t ip_ttl; /**< Time to Live
*/
+ uint8_t ip_p; /**< Next level protocol type
*/
+ uint16_t ip_sum; /**< Header checksum
*/
+ uint32_t ip_src; /**< Source IP address
*/
+ uint32_t ip_dst; /**< Destination IP address
*/
+};
+
+/** UDP packet header */
+typedef struct udphdr {
+ uint16_t uh_sport; /* source port */
+ uint16_t uh_dport; /* destination port */
+ uint16_t uh_ulen; /* udp length */
+ uint16_t uh_chksum;/* udp checksum */
+} udp_header;
+
+
/* Maximum GSO packet size (64k) plus plenty of room for
* the ethernet and virtio_net headers
*/
#define TAP_BUFSIZE (4096 + 65536)
+typedef struct SSNATInfo {
+ unsigned int ssnat_active : 1;
+
+ struct in_addr ssnat_ifaddr;
+ struct in_addr ssnat_ifmask;
+ uint8_t ssnat_hwaddr[ETH_ALEN];
+
+ struct in_addr ssnat_guest;
+ struct in_addr ssnat_host;
+ struct in_addr ssnat_mask;
+} SSNATInfo;
+
typedef struct TAPState {
NetClientState nc;
int fd;
@@ -59,6 +131,9 @@ typedef struct TAPState {
unsigned int write_poll : 1;
unsigned int using_vnet_hdr : 1;
unsigned int has_ufo: 1;
+
+ SSNATInfo ssnat_info;
+
VHostNetState *vhost_net;
unsigned host_vnet_hdr_len;
} TAPState;
@@ -154,11 +229,154 @@ static ssize_t tap_receive_raw(NetClientState *nc, const
uint8_t *buf, size_t si
return tap_write_packet(s, iov, iovcnt);
}
+#define SSNAT_MAP_IP(_orig, _to, _mask) ( (_orig.s_addr & ~_mask.s_addr) |
(_to.s_addr & _mask.s_addr) )
+#define SSNAT_IS_MATCH(_orig, _from, _mask) ( (_orig.s_addr & _mask.s_addr)
== (_from.s_addr & _mask.s_addr) )
+
+static void tap_ssnat_translate_arp(uint8_t* buf, size_t size, const struct
in_addr from, const struct in_addr to, const struct in_addr mask)
+{
+ size_t packetSize = size;
+ uint8_t* pPacket = buf;
+
+ if(packetSize >= sizeof(struct arphdr))
+ {
+ struct arphdr* pArpHeader = (struct arphdr*)pPacket;
+ struct in_addr sourceAddress;
+ struct in_addr targetAddress;
+
+ memcpy(&sourceAddress, pArpHeader->ar_sip,
sizeof(sourceAddress));
+ if( SSNAT_IS_MATCH(sourceAddress, from, mask) )
+ {
+ sourceAddress.s_addr = SSNAT_MAP_IP(sourceAddress, to,
mask);
+ memcpy(pArpHeader->ar_sip, &sourceAddress,
sizeof(sourceAddress));
+ }
+
+ memcpy(&targetAddress, pArpHeader->ar_tip,
sizeof(targetAddress));
+ if( SSNAT_IS_MATCH(targetAddress, from, mask) )
+ {
+ targetAddress.s_addr = SSNAT_MAP_IP(targetAddress, to,
mask);
+ memcpy(pArpHeader->ar_tip, &targetAddress,
sizeof(targetAddress));
+ }
+ }
+}
+
+static void tap_ssnat_adjust_ip_checksums(uint8_t* pBuffer, const size_t size)
+{
+ size_t packetSize = size;
+ uint8_t* pPacket = pBuffer;
+
+ if(packetSize >= sizeof(struct iphdr))
+ {
+ struct iphdr* pIpHeader = (struct iphdr*)pPacket;
+ uint16_t* pCheckSumField = NULL;
+ uint32_t uiCheckSum = 0;
+
+ pIpHeader->ip_sum = 0;
+ if(IP_HEADER_LENGTH(pIpHeader) <= packetSize)
+ {
+ /*pIpHeader->ip_sum =
tap_ssnat_calculate_checksum(pIpHeader, IP_HEADER_LENGTH(pIpHeader));*/
+
+ uiCheckSum =
net_checksum_add(IP_HEADER_LENGTH(pIpHeader), (uint8_t*)pIpHeader);
+ pIpHeader->ip_sum =
htons(net_checksum_finish(uiCheckSum));
+ }
+
+ switch(pIpHeader->ip_p)
+ {
+ case IP_PROTO_TCP:
+ {
+ struct tcphdr* pTcpHeader = (struct
tcphdr*)(pPacket + IP_HEADER_LENGTH(pIpHeader));
+ pCheckSumField = &pTcpHeader->check;
+ break;
+ }
+
+ case IP_PROTO_UDP:
+ {
+ struct udphdr* pUdpHeader = (struct
udphdr*)(pPacket + IP_HEADER_LENGTH(pIpHeader));
+ pCheckSumField = &pUdpHeader->uh_chksum;
+ break;
+ }
+
+ default:
+ return;
+ }
+
+ *pCheckSumField = 0;
+ uiCheckSum = net_checksum_add(ntohs(pIpHeader->ip_len) -
IP_HEADER_LENGTH(pIpHeader), pPacket + IP_HEADER_LENGTH(pIpHeader));
+ uiCheckSum += net_checksum_add(sizeof(pIpHeader->ip_src) +
sizeof(pIpHeader->ip_dst), (uint8_t*)&pIpHeader->ip_src);
+ uiCheckSum += pIpHeader->ip_p + ntohs(pIpHeader->ip_len) -
IP_HEADER_LENGTH(pIpHeader);
+ *pCheckSumField = htons(net_checksum_finish(uiCheckSum));
+ }
+}
+
+static void tap_ssnat_translate_ip(uint8_t* buf, size_t size, const struct
in_addr from, const struct in_addr to, const struct in_addr mask)
+{
+ size_t packetSize = size;
+ uint8_t* pPacket = buf;
+
+ if(packetSize >= sizeof(struct iphdr))
+ {
+ struct iphdr* pIpHeader = (struct iphdr*)pPacket;
+ struct in_addr sourceAddress;
+ struct in_addr targetAddress;
+ int iCalculateCheckSum = 0;
+
+ sourceAddress.s_addr = pIpHeader->ip_src;
+ targetAddress.s_addr = pIpHeader->ip_dst;
+ if( SSNAT_IS_MATCH(sourceAddress, from, mask) )
+ {
+ pIpHeader->ip_src = SSNAT_MAP_IP(sourceAddress, to,
mask);
+ iCalculateCheckSum = 1;
+ }
+
+ if( SSNAT_IS_MATCH(targetAddress, from, mask) )
+ {
+ pIpHeader->ip_dst = SSNAT_MAP_IP(targetAddress, to,
mask);
+ iCalculateCheckSum = 1;
+ }
+
+ if(iCalculateCheckSum)
+ {
+ tap_ssnat_adjust_ip_checksums(pPacket, packetSize);
+ }
+ }
+}
+
+static void tap_ssnat_translate(uint8_t* buf, size_t size, const struct
in_addr from, const struct in_addr to, const struct in_addr mask)
+{
+ size_t packetSize = size;
+ uint8_t* pPacket = buf;
+
+ if(packetSize >= sizeof(struct ethhdr))
+ {
+ struct ethhdr *pEthernetHeader = (struct ethhdr*)pPacket;
+ pPacket += sizeof(struct ethhdr);
+ packetSize -= sizeof(struct ethhdr);
+ switch(htons(pEthernetHeader->h_proto))
+ {
+ case ETH_P_ARP:
+ tap_ssnat_translate_arp(pPacket,
packetSize, from, to, mask);
+ break;
+
+ case ETH_P_IP:
+ tap_ssnat_translate_ip(pPacket,
packetSize, from, to, mask);
+ break;
+ }
+ }
+}
+
+static void tap_ssnat_reveive(TAPState *s, uint8_t* buf, size_t size)
+{
+ if(s->ssnat_info.ssnat_active)
+ {
+ tap_ssnat_translate(buf, size, s->ssnat_info.ssnat_guest,
s->ssnat_info.ssnat_host,s->ssnat_info.ssnat_mask);
+ }
+}
+
static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
struct iovec iov[1];
+ tap_ssnat_reveive(s, (uint8_t*)buf, size);
if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
return tap_receive_raw(nc, buf, size);
}
@@ -189,6 +407,14 @@ static void tap_send_completed(NetClientState *nc, ssize_t
len)
tap_read_poll(s, 1);
}
+static void tap_ssnat_send(TAPState *s, uint8_t *buf, size_t size)
+{
+ if(s->ssnat_info.ssnat_active)
+ {
+ tap_ssnat_translate(buf, size, s->ssnat_info.ssnat_host,
s->ssnat_info.ssnat_guest, s->ssnat_info.ssnat_mask);
+ }
+}
+
static void tap_send(void *opaque)
{
TAPState *s = opaque;
@@ -207,6 +433,8 @@ static void tap_send(void *opaque)
size -= s->host_vnet_hdr_len;
}
+ tap_ssnat_send(s, buf, size);
+
size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
if (size == 0) {
tap_read_poll(s, 0);
@@ -586,6 +814,145 @@ static int net_tap_init(const NetdevTapOptions *tap, int
*vnet_hdr,
return fd;
}
+static int get_str_sep(char *buf, int buf_size, const char **pp, int sep)
+{
+ const char *p, *p1;
+ int len;
+ p = *pp;
+ p1 = strchr(p, sep);
+ if (!p1)
+ return -1;
+ len = p1 - p;
+ p1++;
+ if (buf_size > 0) {
+ if (len > buf_size - 1)
+ len = buf_size - 1;
+ memcpy(buf, p, len);
+ buf[len] = '\0';
+ }
+ *pp = p1;
+ return 0;
+}
+
+static int ssnat_net_tap_set_ifinfo(const char* ifname, TAPState* s)
+{
+ int iReturnValue = -1;
+ struct ifreq ifr;
+ int fd = -1;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if(fd > 0)
+ {
+ strncpy(ifr.ifr_name, ifname, IFNAMSIZ-1);
+
+ if(ioctl(fd, SIOCGIFHWADDR, &ifr) == 0)
+ {
+ memcpy(s->ssnat_info.ssnat_hwaddr,
(void*)ifr.ifr_hwaddr.sa_data, sizeof(s->ssnat_info.ssnat_hwaddr));
+
+ if(ioctl(fd, SIOCGIFADDR, &ifr) == 0)
+ {
+ memcpy(&s->ssnat_info.ssnat_ifaddr, &(((struct
sockaddr_in *)&ifr.ifr_addr)->sin_addr), sizeof(s->ssnat_info.ssnat_ifaddr));
+
+ if(ioctl(fd, SIOCGIFNETMASK, &ifr) == 0)
+ {
+ memcpy(&s->ssnat_info.ssnat_ifmask,
&(((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr),
sizeof(s->ssnat_info.ssnat_ifmask));
+ iReturnValue = 0;
+ }
+ }
+ else
+ error_report("failed to fetch the ip address of
interface '%s'", ifname);
+ }
+ else
+ error_report("failed to fetch the hardware address of
interface '%s'", ifname);
+
+ close(fd);
+ }
+
+ return iReturnValue;
+}
+
+static int ssnat_net_tap_init(TAPState* s, const NetdevTapOptions *tap)
+{
+ int returnValue = -1;
+
+ if(tap->has_ssnat)
+ {
+ const char *ifname = NULL;
+ ifname = tap->ifname;
+ if(ifname)
+ {
+ const char* ssnat_value = tap->ssnat;
+ if(ssnat_net_tap_set_ifinfo(ifname, s) >= 0)
+ {
+ char ssnat_str[1024] = { 0 };
+ const char* p = ssnat_value;
+
+ pstrcpy(ssnat_str, sizeof(ssnat_str),
ssnat_value);
+ if(get_str_sep(ssnat_str, sizeof(ssnat_str),
&p, ':') >= 0)
+ {
+ if(ssnat_str[0])
+ {
+ if(inet_aton(ssnat_str,
&s->ssnat_info.ssnat_guest))
+ {
+
if(get_str_sep(ssnat_str, sizeof(ssnat_str), &p, ':') >= 0)
+ {
+ if(ssnat_str[0])
+ {
+
if(inet_aton(ssnat_str, &s->ssnat_info.ssnat_mask))
+ {
+
if(p[0])
+
{
+
if(inet_aton(p, &s->ssnat_info.ssnat_host))
+
{
+
returnValue = 0;
+
}
+
else
+
error_report("invalid stateless static nat rule '%s', invalid
host-side-ip", ssnat_value);
+
}
+
else
+
error_report("invalid stateless static nat rule '%s', empty
host-side-ip", ssnat_value);
+ }
+ else
+
error_report("invalid stateless static nat rule '%s', invalid netmask",
ssnat_value);
+ }
+ else
+
error_report("invalid stateless static nat rule '%s', empty netmask length",
ssnat_value);
+ }
+ else
+
error_report("invalid stateless static nat rule '%s', incomplete rule",
ssnat_value);
+ }
+ else
+ error_report("invalid
stateless static nat rule '%s', invalid guest-side-ip", ssnat_value);
+ }
+ else
+ error_report("invalid stateless
static nat rule '%s', empty guest-side-ip", ssnat_value);
+ }
+ else
+ {
+ if(inet_aton(p,
&s->ssnat_info.ssnat_guest))
+ {
+ s->ssnat_info.ssnat_host =
s->ssnat_info.ssnat_ifaddr;
+ s->ssnat_info.ssnat_mask =
s->ssnat_info.ssnat_ifmask;
+
+ s->ssnat_info.ssnat_active = 1;
+ returnValue = 0;
+ }
+ else
+ error_report("invalid stateless
static nat rule '%s'", ssnat_value);
+ }
+ }
+ else
+ error_report("failed to fetch the interface
'%s' information", ifname);
+ }
+ else
+ error_report("could not retrieve ifname attribute");
+ }
+ else
+ returnValue = 0;
+
+ return returnValue;
+}
+
int net_init_tap(const NetClientOptions *opts, const char *name,
NetClientState *peer)
{
@@ -705,7 +1072,7 @@ int net_init_tap(const NetClientOptions *opts, const char
*name,
return -1;
}
- return 0;
+ return ssnat_net_tap_init(s, tap);
}
VHostNetState *tap_get_vhost_net(NetClientState *nc)
diff --git a/qapi-schema.json b/qapi-schema.json
index bd8ad74..59aa127 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2105,6 +2105,8 @@
#
# @vhostforce: #optional vhost on for non-MSIX virtio guests
#
+# @ssnat: #optional stateless static nat
+#
# Since 1.2
##
{ 'type': 'NetdevTapOptions',
@@ -2118,7 +2120,8 @@
'*vnet_hdr': 'bool',
'*vhost': 'bool',
'*vhostfd': 'str',
- '*vhostforce': 'bool' } }
+ '*vhostforce': 'bool',
+ '*ssnat': 'str' } }
##
# @NetdevSocketOptions
diff --git a/qemu-options.hx b/qemu-options.hx
index 3c411c4..c0aa852 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1268,7 +1268,9 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
"-net tap[,vlan=n][,name=str],ifname=name\n"
" connect the host TAP network interface to VLAN 'n'\n"
#else
- "-net
tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h][,vhostforce=on|off]\n"
+ "-net
tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile]\n"
+ "
[,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h]\n"
+ " [,vhostforce=on|off][,ssnat=rule]\n"
" connect the host TAP network interface to VLAN 'n' \n"
" use network scripts 'file' (default="
DEFAULT_NETWORK_SCRIPT ")\n"
" to configure it and 'dfile' (default="
DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
@@ -1285,6 +1287,12 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
" (only has effect for virtio guests which use MSIX)\n"
" use vhostforce=on to force vhost on for non-MSIX virtio
guests\n"
" use 'vhostfd=h' to connect to an already opened vhost net
device\n"
+ " use 'ssnat=rule' to create stateless static nat\n"
+ " rule: <guest-side-ip>[:<netmask>:<host-side-ip>]\n"
+ " for example:
'ssnat=172.16.0.0:255.255.255.0:192.168.1.0' will result in translating\n"
+ " the Guest machine IP addresses from
172.16.0.x to 192.168.1.x\n"
+ " If only the guest-side-ip parameter is passed, the
netmask and host-side-ip\n"
+ " will be taken from the interface passed via
ifname\n"
"-net bridge[,vlan=n][,name=str][,br=bridge][,helper=helper]\n"
" connects a host TAP network interface to a host bridge
device 'br'\n"
" (default=" DEFAULT_BRIDGE_INTERFACE ") using the program
'helper'\n"
--
1.7.2.5
- [Qemu-devel] Adding support for Stateless Static NAT for TAP devices,
John Basila <=
- Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices, Stefan Hajnoczi, 2012/08/30
- Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices, John Basila, 2012/08/30
- Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices, Stefan Hajnoczi, 2012/08/30
- Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices, John Basila, 2012/08/30
- Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices, Ivan Shmakov, 2012/08/30
- Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices, Dennis Jacobfeuerborn, 2012/08/30
- Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices, John Basila, 2012/08/30