[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH V3 2/4] colo-compare: track connection and e
From: |
Jason Wang |
Subject: |
Re: [Qemu-devel] [RFC PATCH V3 2/4] colo-compare: track connection and enqueue packet |
Date: |
Thu, 28 Apr 2016 15:47:47 +0800 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.6.0 |
On 04/18/2016 07:11 PM, Zhang Chen wrote:
> In this patch we use kernel jhash table to track
> connection, and then enqueue net packet like this:
>
> + CompareState ++
> | |
> +---------------+ +---------------+ +---------------+
> |conn list +--->conn +--------->conn |
> +---------------+ +---------------+ +---------------+
> | | | | | |
> +---------------+ +---v----+ +---v----+ +---v----+ +---v----+
> |primary | |secondary |primary | |secondary
> |packet | |packet + |packet | |packet +
> +--------+ +--------+ +--------+ +--------+
> | | | |
> +---v----+ +---v----+ +---v----+ +---v----+
> |primary | |secondary |primary | |secondary
> |packet | |packet + |packet | |packet +
> +--------+ +--------+ +--------+ +--------+
> | | | |
> +---v----+ +---v----+ +---v----+ +---v----+
> |primary | |secondary |primary | |secondary
> |packet | |packet + |packet | |packet +
> +--------+ +--------+ +--------+ +--------+
>
> Signed-off-by: Zhang Chen <address@hidden>
> Signed-off-by: Li Zhijian <address@hidden>
> Signed-off-by: Wen Congyang <address@hidden>
> ---
> include/qemu/jhash.h | 59 ++++++++++
> net/colo-compare.c | 303
> ++++++++++++++++++++++++++++++++++++++++++++++++++-
> trace-events | 3 +
> 3 files changed, 360 insertions(+), 5 deletions(-)
> create mode 100644 include/qemu/jhash.h
>
> diff --git a/include/qemu/jhash.h b/include/qemu/jhash.h
> new file mode 100644
> index 0000000..8a8ff0f
> --- /dev/null
> +++ b/include/qemu/jhash.h
> @@ -0,0 +1,59 @@
> +/* jhash.h: Jenkins hash support.
> + *
> + * Copyright (C) 2006. Bob Jenkins (address@hidden)
> + *
> + * http://burtleburtle.net/bob/hash/
> + *
> + * These are the credits from Bob's sources:
> + *
> + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> + *
> + * These are functions for producing 32-bit hashes for hash table lookup.
> + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> + * are externally useful functions. Routines to test the hash are included
> + * if SELF_TEST is defined. You can use this free for any purpose.It's in
> + * the public domain. It has no warranty.
> + *
> + * Copyright (C) 2009-2010 Jozsef Kadlecsik (address@hidden)
> + *
> + * I've modified Bob's hash to be useful in the Linux kernel, and
> + * any bugs present are my fault.
> + * Jozsef
> + */
> +
> +#ifndef QEMU_JHASH_H__
> +#define QEMU_JHASH_H__
> +
> +#include "qemu/bitops.h"
> +
> +/*
> + * hashtable related is copied from linux kernel jhash
> + */
> +
> +/* __jhash_mix -- mix 3 32-bit values reversibly. */
> +#define __jhash_mix(a, b, c) \
> +{ \
> + a -= c; a ^= rol32(c, 4); c += b; \
> + b -= a; b ^= rol32(a, 6); a += c; \
> + c -= b; c ^= rol32(b, 8); b += a; \
> + a -= c; a ^= rol32(c, 16); c += b; \
> + b -= a; b ^= rol32(a, 19); a += c; \
> + c -= b; c ^= rol32(b, 4); b += a; \
> +}
> +
> +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
> +#define __jhash_final(a, b, c) \
> +{ \
> + c ^= b; c -= rol32(b, 14); \
> + a ^= c; a -= rol32(c, 11); \
> + b ^= a; b -= rol32(a, 25); \
> + c ^= b; c -= rol32(b, 16); \
> + a ^= c; a -= rol32(c, 4); \
> + b ^= a; b -= rol32(a, 14); \
> + c ^= b; c -= rol32(b, 24); \
> +}
> +
> +/* An arbitrary initial parameter */
> +#define JHASH_INITVAL 0xdeadbeef
> +
> +#endif /* QEMU_JHASH_H__ */
> diff --git a/net/colo-compare.c b/net/colo-compare.c
> index c45b132..dc57eac 100644
> --- a/net/colo-compare.c
> +++ b/net/colo-compare.c
> @@ -22,12 +22,16 @@
> #include "qemu/sockets.h"
> #include "qapi-visit.h"
> #include "trace.h"
> +#include "slirp/slirp.h"
> +#include "qemu/jhash.h"
> +#include "net/eth.h"
>
> #define TYPE_COLO_COMPARE "colo-compare"
> #define COLO_COMPARE(obj) \
> OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
>
> #define COMPARE_READ_LEN_MAX NET_BUFSIZE
> +#define HASHTABLE_MAX_SIZE 16384
>
> static QTAILQ_HEAD(, CompareState) net_compares =
> QTAILQ_HEAD_INITIALIZER(net_compares);
> @@ -39,6 +43,28 @@ typedef struct ReadState {
> uint8_t buf[COMPARE_READ_LEN_MAX];
> } ReadState;
>
> +/*
> + + CompareState ++
> + | |
> + +---------------+ +---------------+ +---------------+
> + |conn list +--->conn +--------->conn |
> + +---------------+ +---------------+ +---------------+
> + | | | | | |
> + +---------------+ +---v----+ +---v----+ +---v----+ +---v----+
> + |primary | |secondary |primary | |secondary
> + |packet | |packet + |packet | |packet +
> + +--------+ +--------+ +--------+ +--------+
> + | | | |
> + +---v----+ +---v----+ +---v----+ +---v----+
> + |primary | |secondary |primary | |secondary
> + |packet | |packet + |packet | |packet +
> + +--------+ +--------+ +--------+ +--------+
> + | | | |
> + +---v----+ +---v----+ +---v----+ +---v----+
> + |primary | |secondary |primary | |secondary
> + |packet | |packet + |packet | |packet +
> + +--------+ +--------+ +--------+ +--------+
> +*/
> typedef struct CompareState {
> Object parent;
>
> @@ -51,12 +77,265 @@ typedef struct CompareState {
> QTAILQ_ENTRY(CompareState) next;
> ReadState pri_rs;
> ReadState sec_rs;
> +
> + /* connection list: the connections belonged to this NIC could be found
> + * in this list.
> + * element type: Connection
> + */
> + GQueue conn_list;
> + QemuMutex conn_list_lock; /* to protect conn_list */
> + /* hashtable to save connection */
> + GHashTable *connection_track_table;
> + /* to save unprocessed_connections */
> + GQueue unprocessed_connections;
> + /* proxy current hash size */
> + uint32_t hashtable_size;
> } CompareState;
>
> typedef struct CompareClass {
> ObjectClass parent_class;
> } CompareClass;
>
> +typedef struct Packet {
> + void *data;
> + union {
> + uint8_t *network_layer;
> + struct ip *ip;
Does this mean ipv6 is not supported?
> + };
> + uint8_t *transport_layer;
> + int size;
> + CompareState *s;
> +} Packet;
> +
> +typedef struct ConnectionKey {
> + /* (src, dst) must be grouped, in the same way than in IP header */
> + struct in_addr src;
> + struct in_addr dst;
> + uint16_t src_port;
> + uint16_t dst_port;
> + uint8_t ip_proto;
> +} QEMU_PACKED ConnectionKey;
> +
> +typedef struct Connection {
> + QemuMutex list_lock;
> + /* connection primary send queue: element type: Packet */
> + GQueue primary_list;
> + /* connection secondary send queue: element type: Packet */
> + GQueue secondary_list;
> + /* flag to enqueue unprocessed_connections */
> + bool processing;
> + uint8_t ip_proto;
> +} Connection;
> +
> +enum {
> + PRIMARY_IN = 0,
> + SECONDARY_IN,
> +};
> +
> +static void packet_destroy(void *opaque, void *user_data);
> +static int compare_chr_send(CharDriverState *out,
> + const uint8_t *buf,
> + uint32_t size);
> +
> +static uint32_t connection_key_hash(const void *opaque)
> +{
> + const ConnectionKey *key = opaque;
> + uint32_t a, b, c;
> +
> + /* Jenkins hash */
> + a = b = c = JHASH_INITVAL + sizeof(*key);
> + a += key->src.s_addr;
> + b += key->dst.s_addr;
> + c += (key->src_port | key->dst_port << 16);
> + __jhash_mix(a, b, c);
> +
> + a += key->ip_proto;
> + __jhash_final(a, b, c);
> +
> + return c;
> +}
> +
> +static int connection_key_equal(const void *opaque1, const void *opaque2)
> +{
> + return memcmp(opaque1, opaque2, sizeof(ConnectionKey)) == 0;
So why not useing ConnectionKey * consider we're sure of the type?
> +}
> +
> +/*
> + * initialize connecon_key for packet
> + * Return 0 on success, if return 1 the pkt will be sent later
> + */
> +static int connection_key_init(Packet *pkt, ConnectionKey *key)
> +{
> + int network_length;
> + uint8_t *data = pkt->data;
> + uint16_t l3_proto;
> + uint32_t tmp_ports;
> + ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
> +
> + pkt->network_layer = data + ETH_HLEN;
Can the length of data be shorter than ETH_HELN?
> + l3_proto = eth_get_l3_proto(data, l2hdr_len);
> + if (l3_proto != ETH_P_IP) {
> + return 1;
> + }
> +
> + network_length = pkt->ip->ip_hl * 4;
> + pkt->transport_layer = pkt->network_layer + network_length;
Do we need sanity check to make sure there's no evil network_length here?
> + if (!pkt->transport_layer) {
> + error_report("pkt->transport_layer is valid");
invalid? And if this is caused by the bad packet it self, there's no
need for a error_report.
> + return 1;
> + }
> + key->ip_proto = pkt->ip->ip_p;
> + key->src = pkt->ip->ip_src;
> + key->dst = pkt->ip->ip_dst;
> +
> + switch (key->ip_proto) {
> + case IPPROTO_TCP:
> + case IPPROTO_UDP:
> + case IPPROTO_DCCP:
> + case IPPROTO_ESP:
> + case IPPROTO_SCTP:
> + case IPPROTO_UDPLITE:
> + tmp_ports = *(uint32_t *)(pkt->transport_layer);
> + key->src_port = ntohs(tmp_ports & 0xffff);
> + key->dst_port = ntohs(tmp_ports >> 16);
> + break;
> + case IPPROTO_AH:
> + tmp_ports = *(uint32_t *)(pkt->transport_layer + 4);
> + key->src_port = ntohs(tmp_ports & 0xffff);
> + key->dst_port = ntohs(tmp_ports >> 16);
> + break;
> + default:
> + key->src_port = 0;
> + key->dst_port = 0;
> + break;
> + }
> +
> + return 0;
> +}
> +
> +static Connection *connection_new(ConnectionKey *key)
> +{
> + Connection *conn = g_slice_new(Connection);
> +
> + qemu_mutex_init(&conn->list_lock);
> + conn->ip_proto = key->ip_proto;
> + conn->processing = false;
> + g_queue_init(&conn->primary_list);
> + g_queue_init(&conn->secondary_list);
> +
> + return conn;
> +}
> +
> +/*
> + * Clear hashtable, stop this hash growing really huge
> + */
> +static void connection_hashtable_reset(CompareState *s)
> +{
> + s->hashtable_size = 0;
> + g_hash_table_remove_all(s->connection_track_table);
> +}
> +
> +/* if not found, create a new connection and add to hash table */
> +static Connection *connection_get(CompareState *s, ConnectionKey *key)
> +{
> + /* FIXME: protect connection_track_table */
> + Connection *conn = g_hash_table_lookup(s->connection_track_table, key);
> +
> + if (conn == NULL) {
> + ConnectionKey *new_key = g_memdup(key, sizeof(*key));
> +
> + conn = connection_new(key);
> +
> + s->hashtable_size++;
> + if (s->hashtable_size > HASHTABLE_MAX_SIZE) {
> + error_report("colo proxy connection hashtable full, clear it");
> + connection_hashtable_reset(s);
> + /* TODO:clear conn_list */
> + }
> +
> + g_hash_table_insert(s->connection_track_table, new_key, conn);
> + }
> +
> + return conn;
> +}
> +
> +static void connection_destroy(void *opaque)
> +{
> + Connection *conn = opaque;
> +
> + qemu_mutex_lock(&conn->list_lock);
Like I said in previous patch, if you do all the processing in colo
compare thread, you can avoid almost all synchronization (e.g mutex).
> + g_queue_foreach(&conn->primary_list, packet_destroy, NULL);
> + g_queue_free(&conn->primary_list);
> + g_queue_foreach(&conn->secondary_list, packet_destroy, NULL);
> + g_queue_free(&conn->secondary_list);
> + qemu_mutex_unlock(&conn->list_lock);
> + qemu_mutex_destroy(&conn->list_lock);
> + g_slice_free(Connection, conn);
> +}
> +
> +static Packet *packet_new(CompareState *s, const void *data,
> + int size, ConnectionKey *key)
> +{
> + Packet *pkt = g_slice_new(Packet);
> +
> + pkt->data = g_memdup(data, size);
> + pkt->size = size;
> + pkt->s = s;
> +
> + if (connection_key_init(pkt, key)) {
> + packet_destroy(pkt, NULL);
> + pkt = NULL;
> + }
Can we do connection_key_init() first, this can avoid packet_desctory()
if it fails.
> +
> + return pkt;
> +}
> +
> +/*
> + * Return 0 on success, if return -1 means the pkt
> + * is unsupported(arp and ipv6) and will be sent later
> + */
> +static int packet_enqueue(CompareState *s, int mode)
> +{
> + ConnectionKey key = {{ 0 } };
> + Packet *pkt = NULL;
> + Connection *conn;
> +
> + if (mode == PRIMARY_IN) {
> + pkt = packet_new(s, s->pri_rs.buf, s->pri_rs.packet_len, &key);
> + } else {
> + pkt = packet_new(s, s->sec_rs.buf, s->sec_rs.packet_len, &key);
> + }
> + if (!pkt) {
> + return -1;
> + }
> +
> + conn = connection_get(s, &key);
> + if (!conn->processing) {
> + qemu_mutex_lock(&s->conn_list_lock);
> + g_queue_push_tail(&s->conn_list, conn);
> + qemu_mutex_unlock(&s->conn_list_lock);
> + conn->processing = true;
> + }
> +
> + qemu_mutex_lock(&conn->list_lock);
> + if (mode == PRIMARY_IN) {
> + g_queue_push_tail(&conn->primary_list, pkt);
> + } else {
> + g_queue_push_tail(&conn->secondary_list, pkt);
> + }
> + qemu_mutex_unlock(&conn->list_lock);
> +
> + return 0;
> +}
> +
> +static void packet_destroy(void *opaque, void *user_data)
> +{
> + Packet *pkt = opaque;
> +
> + g_free(pkt->data);
> + g_slice_free(Packet, pkt);
> +}
> +
> static int compare_chr_send(CharDriverState *out,
> const uint8_t *buf,
> uint32_t size)
> @@ -158,8 +437,10 @@ static void compare_pri_chr_in(void *opaque, const
> uint8_t *buf, int size)
>
> ret = compare_chr_fill_rstate(&s->pri_rs, buf, size);
> if (ret == 1) {
> - /* FIXME: enqueue to primary packet list */
> - compare_chr_send(s->chr_out, s->pri_rs.buf, s->pri_rs.packet_len);
> + if (packet_enqueue(s, PRIMARY_IN)) {
> + trace_colo_compare_main("primary: unsupported packet in");
> + compare_chr_send(s->chr_out, s->pri_rs.buf,
> s->pri_rs.packet_len);
Looks like if a packet was not recognized by connection_key_init(), it
will be sent directly without comparing it with the packet sent from
secondary? Is this expected?
> + }
> } else if (ret == -1) {
> qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
> }
> @@ -176,9 +457,11 @@ static void compare_sec_chr_in(void *opaque, const
> uint8_t *buf, int size)
>
> ret = compare_chr_fill_rstate(&s->sec_rs, buf, size);
> if (ret == 1) {
> - /* TODO: enqueue to secondary packet list*/
> - /* should we send sec arp pkt? */
> - compare_chr_send(s->chr_out, s->sec_rs.buf, s->sec_rs.packet_len);
> + if (packet_enqueue(s, SECONDARY_IN)) {
> + trace_colo_compare_main("secondary: unsupported packet in");
> + /* should we send sec arp pkt? */
> + compare_chr_send(s->chr_out, s->sec_rs.buf,
> s->sec_rs.packet_len);
> + }
> } else if (ret == -1) {
> qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
> }
> @@ -280,6 +563,15 @@ static void colo_compare_complete(UserCreatable *uc,
> Error **errp)
> qemu_chr_fe_claim_no_fail(s->chr_out);
> QTAILQ_INSERT_TAIL(&net_compares, s, next);
>
> + g_queue_init(&s->conn_list);
> + qemu_mutex_init(&s->conn_list_lock);
> + s->hashtable_size = 0;
> +
> + s->connection_track_table = g_hash_table_new_full(connection_key_hash,
> + connection_key_equal,
> + g_free,
> + connection_destroy);
> +
> return;
> }
>
> @@ -314,6 +606,7 @@ static void colo_compare_class_finalize(ObjectClass *oc,
> void *data)
> if (!QTAILQ_EMPTY(&net_compares)) {
> QTAILQ_REMOVE(&net_compares, s, next);
> }
> + qemu_mutex_destroy(&s->conn_list_lock);
> }
>
> static void colo_compare_init(Object *obj)
> diff --git a/trace-events b/trace-events
> index ca7211b..8862288 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1916,3 +1916,6 @@ aspeed_vic_update_fiq(int flags) "Raising FIQ: %d"
> aspeed_vic_update_irq(int flags) "Raising IRQ: %d"
> aspeed_vic_read(uint64_t offset, unsigned size, uint32_t value) "From 0x%"
> PRIx64 " of size %u: 0x%" PRIx32
> aspeed_vic_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%"
> PRIx64 " of size %u: 0x%" PRIx32
> +
> +# net/colo-compare.c
> +colo_compare_main(const char *chr) "chr: %s"
- Re: [Qemu-devel] [RFC PATCH V3 1/4] colo-compare: introduce colo compare initlization, (continued)
- Re: [Qemu-devel] [RFC PATCH V3 1/4] colo-compare: introduce colo compare initlization, Zhang Chen, 2016/04/28
- Re: [Qemu-devel] [RFC PATCH V3 1/4] colo-compare: introduce colo compare initlization, Jason Wang, 2016/04/28
- Re: [Qemu-devel] [RFC PATCH V3 1/4] colo-compare: introduce colo compare initlization, Zhang Chen, 2016/04/28
- Re: [Qemu-devel] [RFC PATCH V3 1/4] colo-compare: introduce colo compare initlization, Jason Wang, 2016/04/28
- Re: [Qemu-devel] [RFC PATCH V3 1/4] colo-compare: introduce colo compare initlization, Zhang Chen, 2016/04/28
- Re: [Qemu-devel] [RFC PATCH V3 1/4] colo-compare: introduce colo compare initlization, Zhang Chen, 2016/04/28
- Re: [Qemu-devel] [RFC PATCH V3 1/4] colo-compare: introduce colo compare initlization, Jason Wang, 2016/04/28
- Re: [Qemu-devel] [RFC PATCH V3 1/4] colo-compare: introduce colo compare initlization, Eric Blake, 2016/04/28
- [Qemu-devel] [RFC PATCH V3 2/4] colo-compare: track connection and enqueue packet, Zhang Chen, 2016/04/18
- Re: [Qemu-devel] [RFC PATCH V3 2/4] colo-compare: track connection and enqueue packet,
Jason Wang <=
- Re: [Qemu-devel] [RFC PATCH V3 0/4] Introduce COLO-compare, Zhang Chen, 2016/04/27