300字范文,内容丰富有趣,生活中的好帮手!
300字范文 > linux 虚拟网卡 源码 Linux的虚拟网卡TUN和TAP

linux 虚拟网卡 源码 Linux的虚拟网卡TUN和TAP

时间:2022-12-08 05:09:33

相关推荐

linux 虚拟网卡 源码 Linux的虚拟网卡TUN和TAP

TUN/TAP 提供了给用户空间程序的包的接收和传输,它可以看成是简单的点对点设备或是

以太网设备。它不是从物理设备接收包,而是从用户空间程序接收包。它发送包不是通过物

理设备来发送包,而是将这些包写入用户空间程序来发送。

为了应用这个驱动,应用程序需要打开/dev/net/tun 设备(字符设备),然后发出一个控

制(ioctl)来注册一个网卡设备,一个网络设备将命名为tunXX 或tapXX.依赖于你所设定的标志

位。当应用程序关闭文件描述符的时候,网络设备和其他相关的路由将会消失。

依赖于所选择的设备类型,用户空间的应用程序需要读写IP 包(用tun 设备)或以太网包(用

tap 设备).至于具体用那种设备,依赖于传递给ioctl 函数的标志参数.

Tun/tap 设备的源码包地址是/tun

包含两个简单的例子,用于显示如何使用tun 设备和tap 设备。两个程序就像是这两个网

络设备接口间的网桥。

br_select.c ‐ bridge based on select system call.

br_sigio.c ‐ bridge based on async io and SIGIO signal.

当然,最好的例子是 is VTun :))

module_init(tun_init);

module_exit(tun_cleanup);

/* Network device part of the driver */

static LIST_HEAD(tun_dev_list);

static const struct ethtool_ops tun_ethtool_ops;

主要的数据结构

struct miscdevice

struct miscdevice {

int minor;

const char *name;

const struct file_operations *fops;

struct list_head list;

struct device *parent;

struct device *this_device;

};

struct tun_struct

struct tun_struct {

struct list_head list;

unsigned long flags;// //区分tun 和tap 设备

int attached;

uid_t owner;

wait_queue_head_t read_wait;// //等待队列

struct sk_buff_head readq; // //网络缓冲区队列

struct net_device *dev; // //linux 抽象网络设备结构(结构是linux 内核提供的

统一网络设备结构,定义了系统统一的访问接口。)

struct net_device_stats stats; // //网卡状态信息结构

struct fasync_struct *fasync;// //文件异步通知结构

unsigned long if_flags;

u8 dev_addr[ETH_ALEN];

u32 chr_filter[2];

u32 net_filter[2];

#ifdef TUN_DEBUG

int debug;

#endif

};

Struct ifreq

/*

* Interface request structure used for socket

* ioctl's. All interface ioctl's must have parameter

* definitions which begin with ifr_name. The

* remainder may be interface specific.

*/

struct ifreq

{

#define IFHWADDRLEN 6

union

{

char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */

} ifr_ifrn;

union {

struct sockaddr ifru_addr;

struct sockaddr ifru_dstaddr;

struct sockaddr ifru_broadaddr;

struct sockaddr ifru_netmask;

struct sockaddr ifru_hwaddr;

short ifru_flags;

int ifru_ivalue;

int ifru_mtu;

struct ifmap ifru_map;

char ifru_slave[IFNAMSIZ]; /* Just fits the size */

char ifru_newname[IFNAMSIZ];

void __user * ifru_data;

struct if_settings ifru_settings;

} ifr_ifru;

};

模块的初始化(tun_init)

static int __init tun_init(void)

{

int ret = 0;

printk(KERN_INFO "tun: %s, %s/n", DRV_DESCRIPTION, DRV_VERSION);

printk(KERN_INFO "tun: %s/n", DRV_COPYRIGHT);

ret = misc_register(&tun_miscdev);

if (ret)

printk(KERN_ERR "tun: Can't register misc device %d/n", TUN_MINOR);

return ret;

}

static struct miscdevice tun_miscdev = {

.minor = TUN_MINOR,

.name = "tun",

.fops = &tun_fops,

};

static const struct file_operations tun_fops = {

.owner = THIS_MODULE,

.llseek = no_llseek,

.read = do_sync_read,

.aio_read = tun_chr_aio_read,

.write = do_sync_write,

.aio_write = tun_chr_aio_write,

.poll = tun_chr_poll,

.ioctl = tun_chr_ioctl,

.open = tun_chr_open,

.release = tun_chr_close,

.fasync = tun_chr_fasync

};

misc_register

//在内核中利用misc_register() 函数将该驱动注册为非标准字符设备驱动,提供字符设备具

有的各种程序接口。

int misc_register(struct miscdevice * misc)

{

struct miscdevice *c;

dev_t dev;

int err = 0;

INIT_LIST_HEAD(&misc‐>list);

mutex_lock(&misc_mtx);

list_for_each_entry(c, &misc_list, list) {

if (c‐>minor == misc‐>minor) {

mutex_unlock(&misc_mtx);

return ‐EBUSY;

}

}

if (misc‐>minor == MISC_DYNAMIC_MINOR) {

int i = DYNAMIC_MINORS;

while (‐‐i >= 0)

if ( (misc_minors[i>>3] & (1 << (i&7))) == 0)

break;

if (i<0) {

mutex_unlock(&misc_mtx);

return ‐EBUSY;

}

misc‐>minor = i;

}

if (misc‐>minor < DYNAMIC_MINORS)

misc_minors[misc‐>minor >> 3] |= 1 << (misc‐>minor & 7);

dev = MKDEV(MISC_MAJOR, misc‐>minor);

misc‐>this_device = device_create(misc_class, misc‐>parent, dev,

"%s", misc‐>name);

if (IS_ERR(misc‐>this_device)) {

err = PTR_ERR(misc‐>this_device);

goto out;

}

/*

* Add it to the front, so that later devices can "override"

* earlier defaults

*/

list_add(&misc‐>list, &misc_list);

out:

mutex_unlock(&misc_mtx);

return err;

}

tun 设备的操作(系统调用)

tun_chr_open(打开设备时调用)

当打开一个tun/tap 设备时,open 函数将调用tun_chr_open()函数,其中将完成一些重要的初始化过

程,

初始化函数以及网络缓冲区链表的初始化和等待队列的初始化

static int tun_chr_open(struct inode *inode, struct file * file)

{

DBG1(KERN_INFO "tunX: tun_chr_open/n");

file‐>private_data = NULL;//初始化设备文件的内容

return 0;

}

tun_chr_ioctl(设备的控制调用接口)

控制调用接口:

Cmd=

.. TUNSETIFF

.. _IOC_TYPE(cmd) == 0x89

.. TUNSETNOCSUM

.. TUNSETPERSIST

.. TUNSETOWNER

.. TUNSETLINK

.. TUNSETDEBUG

.. SIOCGIFFLAGS

.. SIOCSIFFLAGS

.. SIOCGIFHWADDR

.. SIOCSIFHWADDR

.. SIOCADDMULTI

.. SIOCDELMULTI

Tun/tap 驱动中网卡的注册被嵌入了字符驱动的ioctl 例程中,它是通过对字符设备文件描述符利用自

定义的ioctl 设置标志 TUNSETIFF 完成网卡的注册的。

static int tun_chr_ioctl(struct inode *inode, struct file *file,unsigned int cmd, unsigned long arg)

{

struct tun_struct *tun = file‐>private_data;

void __user* argp = (void __user*)arg;

struct ifreq ifr;

if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)

if (copy_from_user(&ifr, argp, sizeof ifr))//拷贝用户区的网络设备配置。在用户区已

经分配了ifreq 结构的值和配置值,

return ‐EFAULT;

if (cmd == TUNSETIFF && !tun) {//字符设备文件的数据不是空的则

int err;

ifr.ifr_name[IFNAMSIZ‐ ] = '/0';

rtnl_lock();//在中定义

err = tun_set_iff(file, &ifr);

rtnl_unlock();

if (err)

return err;

if (copy_to_user(argp, &ifr, sizeof(ifr)))//把配置数据拷贝到用户区

return ‐EFAULT;

return 0;

}

if (!tun)//tun 设备错误

return ‐EBADFD;

DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d/n", tun‐>dev‐>name, cmd);

switch (cmd) {

case TUNSETNOCSUM:

/* Disable/Enable checksum */

if (arg)

tun‐>flags |= TUN_NOCHECKSUM;

else

tun‐>flags &= ~TUN_NOCHECKSUM;

DBG(KERN_INFO "%s: checksum %s/n",

tun‐>dev‐>name, arg ? "disabled" : "enabled");

break;

case TUNSETPERSIST:

/* Disable/Enable persist mode */

if (arg)

tun‐>flags |= TUN_PERSIST;

else

tun‐>flags &= ~TUN_PERSIST;

DBG(KERN_INFO "%s: persist %s/n",

tun‐>dev‐>name, arg ? "disabled" : "enabled");

break;

case TUNSETOWNER:

/* Set owner of the device */

tun‐>owner = (uid_t) arg;

DBG(KERN_INFO "%s: owner set to %d/n", tun‐>dev‐>name, tun‐>owner);

break;

case TUNSETLINK:

/* Only allow setting the type when the interface is down */

if (tun‐>dev‐>flags & IFF_UP) {

DBG(KERN_INFO "%s: Linktype set failed because interface is up/n",

tun‐>dev‐>name);

return ‐EBUSY;

} else {

tun‐>dev‐>type = (int) arg;

DBG(KERN_INFO "%s: linktype set to %d/n", tun‐>dev‐>name, tun‐>dev‐>type);

}

break;

#ifdef TUN_DEBUG

case TUNSETDEBUG:

tun‐>debug = arg;

break;

#endif

case SIOCGIFFLAGS:

ifr.ifr_flags = tun‐>if_flags;

if (copy_to_user( argp, &ifr, sizeof ifr))

return ‐EFAULT;

return 0;

case SIOCSIFFLAGS:

/** Set the character device's interface flags. Currently only

* IFF_PROMISC and IFF_ALLMULTI are used. */

tun‐>if_flags = ifr.ifr_flags;

DBG(KERN_INFO "%s: interface flags 0x%lx/n",

tun‐>dev‐>name, tun‐>if_flags);

return 0;

case SIOCGIFHWADDR:

/* Note: the actual net device's address may be different */

memcpy(ifr.ifr_hwaddr.sa_data, tun‐>dev_addr,

min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun‐>dev_addr));

if (copy_to_user( argp, &ifr, sizeof ifr))

return ‐EFAULT;

return 0;

case SIOCSIFHWADDR:

{

/* try to set the actual net device's hw address */

int ret = dev_set_mac_address(tun‐>dev, &ifr.ifr_hwaddr);

if (ret == 0) {

/** Set the character device's hardware address. This is used when

* filtering packets being sent from the network device to the character

* device. */

memcpy(tun‐>dev_addr, ifr.ifr_hwaddr.sa_data,

min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun‐>dev_addr));

DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x/n",

tun‐>dev‐>name,

tun‐>dev_addr[0], tun‐>dev_addr[1], tun‐>dev_addr[2],

tun‐>dev_addr[3], tun‐>dev_addr[4], tun‐>dev_addr[5]);

}

return ret;

}

case SIOCADDMULTI:

/** Add the specified group to the character device's multicast filter

* list. */

add_multi(tun‐>chr_filter, ifr.ifr_hwaddr.sa_data);

DBG(KERN_DEBUG "%s: add multi: %x:%x:%x:%x:%x:%x/n",

tun‐>dev‐>name,

(u8)ifr.ifr_hwaddr.sa_data[0], (u8)ifr.ifr_hwaddr.sa_data[1],

(u8)ifr.ifr_hwaddr.sa_data[2], (u8)ifr.ifr_hwaddr.sa_data[3],

(u8)ifr.ifr_hwaddr.sa_data[4], (u8)ifr.ifr_hwaddr.sa_data[5]);

return 0;

case SIOCDELMULTI:

/** Remove the specified group from the character device's multicast

* filter list. */

del_multi(tun‐>chr_filter, ifr.ifr_hwaddr.sa_data);

DBG(KERN_DEBUG "%s: del multi: %x:%x:%x:%x:%x:%x/n",

tun‐>dev‐>name,

(u8)ifr.ifr_hwaddr.sa_data[0], (u8)ifr.ifr_hwaddr.sa_data[1],

(u8)ifr.ifr_hwaddr.sa_data[2], (u8)ifr.ifr_hwaddr.sa_data[3],

(u8)ifr.ifr_hwaddr.sa_data[4], (u8)ifr.ifr_hwaddr.sa_data[5]);

return 0;

default:

return ‐EINVAL;

};

return 0;

}

tun_chr_aio_read(异步读)(从tun 设备中读取数据)

static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,

unsigned long count, loff_t pos)

{

struct file *file = iocb‐>ki_filp;

struct tun_struct *tun = file‐>private_data;

DECLARE_WAITQUEUE(wait, current);

struct sk_buff *skb;

ssize_t len, ret = 0;

if (!tun)

return ‐EBADFD;

DBG(KERN_INFO "%s: tun_chr_read/n", tun‐>dev‐>name);

len = iov_total(iv, count);

if (len < 0)

return ‐EINVAL;

add_wait_queue(&tun‐>read_wait, &wait);

while (len) {

const u8 ones[ ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };

u8 addr[ ETH_ALEN];

int bit_nr;

current‐>state = TASK_INTERRUPTIBLE;

/* Read frames from the queue */

if (!(skb=skb_dequeue(&tun‐>readq))) {

if (file‐>f_flags & O_NONBLOCK) {

ret = ‐EAGAIN;

break;

}

if (signal_pending(current)) {

ret = ‐ERESTARTSYS;

break;

}

/* Nothing to read, let's sleep */

schedule();

continue;

}

netif_wake_queue(tun‐>dev);

/** Decide whether to accept this packet. This code is designed to

* behave identically to an Ethernet interface. Accept the packet if

* ‐ we are promiscuous.

* ‐ the packet is addressed to us.

* ‐ the packet is broadcast.

* ‐ the packet is multicast and

* ‐ we are multicast promiscous.

* ‐ we belong to the multicast group.

*/

skb_copy_from_linear_data(skb, addr, min_t(size_t, sizeof addr,

skb‐>len));

bit_nr = ether_crc(sizeof addr, addr) >> 26;

if ((tun‐>if_flags & IFF_PROMISC) ||

memcmp(addr, tun‐>dev_addr, sizeof addr) == 0 ||

memcmp(addr, ones, sizeof addr) == 0 ||

(((addr[0] == 1 && addr[1] == 0 && addr[2] == 0x5e) ||

(addr[0] == 0x33 && addr[1] == 0x33)) &&

((tun‐>if_flags & IFF_ALLMULTI) ||

(tun‐>chr_filter[bit_nr >> 5] & (1 << (bit_nr & 31)))))) {

DBG(KERN_DEBUG "%s: tun_chr_readv: accepted: %x:%x:%x:%x:%x:%x/n",

tun‐>dev‐>name, addr[0], addr[1], addr[2],

addr[3], addr[4], addr[5]);

ret = tun_put_user(tun, skb, (struct iovec *) iv, len);

kfree_skb(skb);

break;

} else {

DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x/n",

tun‐>dev‐>name, addr[0], addr[1], addr[2],

addr[3], addr[4], addr[5]);

kfree_skb(skb);

continue;

}

}

current‐>state = TASK_RUNNING;

remove_wait_queue(&tun‐>read_wait, &wait);

return ret;

}

skb_dequeue(src/net/core/skbuff.c)

/**

* skb_dequeue ‐ remove from the head of the queue

* @list : list to dequeue from

*

* Remove the head of the list. The list lock is taken so the function

* may be used safely with other locking list functions. The head item is

* returned or %NULL if the list is empty.

*/

struct sk_buff *skb_dequeue(struct sk_buff_head *list)

{

unsigned long flags;

struct sk_buff *result;

spin_lock_irqsave(&list‐>lock, flags);

result = __skb_dequeue(list);

spin_unlock_irqrestore(&list‐>lock, flags);

return result;

}

__skb_dequeue

/**

* __skb_dequeue ‐ remove from the head of the queue

* @list : list to dequeue from

*

* Remove the head of the list. This function does not take any locks

* so must be used with appropriate locks held only. The head item is

* returned or %NULL if the list is empty.

*/

extern struct sk_buff *skb_dequeue(struct sk_buff_head *list);

static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)

{

struct sk_buff *next, *prev, *result;

prev = (struct sk_buff *) list;

next = prev‐>next;

result = NULL;

if (next != prev) {

result = next;

next = next‐>next;

list‐>qlen‐‐;

next‐>prev = prev;

prev‐>next = next;

result‐>next = result‐>prev = NULL;

}

return result;

}

tun_put_user

/* Put packet to the user space buffer */

static __inline__ ssize_t tun_put_user(struct tun_struct *tun,

struct sk_buff *skb,

struct iovec *iv, int len)

{

struct tun_pi pi = { 0, skb‐>protocol };

ssize_t total = 0;

if (!(tun‐>flags & TUN_NO_PI)) {

if ((len ‐= sizeof(pi)) < 0)

return ‐EINVAL;

if (len < skb‐>len) {

/* Packet will be striped */

pi.flags |= TUN_PKT_STRIP;

}

if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi)))

return ‐EFAULT;

total += sizeof(pi);

}

len = min_t(int, skb‐>len, len);

skb_copy_datagram_iovec(skb, 0, iv, len);

total += len;

tun‐>stats.tx_packets++;

tun‐>stats.tx_bytes += len;

return total;

}

tun_chr_aio_write(把数据写入到tun 设备中)

static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,

unsigned long count, loff_t pos)

{

struct tun_struct *tun = iocb‐>ki_filp‐>private_data;

if (!tun)

return ‐EBADFD;

DBG(KERN_INFO "%s: tun_chr_write %ld/n", tun‐>dev‐>name, count);

return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count));

}

tun_get_user

/* Get packet from user space buffer */

static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)

{

struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };

struct sk_buff *skb;

size_t len = count, align = 0;

if (!(tun‐>flags & TUN_NO_PI)) {

if ((len ‐= sizeof(pi)) > count)

return ‐EINVAL;

if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))

return ‐EFAULT;

}

if ((tun‐>flags & TUN_TYPE_MASK) == TUN_TAP_DEV)

align = NET_IP_ALIGN;

if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {

tun‐>stats.rx_dropped++;

return ‐ENOMEM;

}

if (align)

skb_reserve(skb, align);

if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {

tun‐>stats.rx_dropped++;

kfree_skb(skb);

return ‐EFAULT;

}

switch (tun‐>flags & TUN_TYPE_MASK) {

case TUN_TUN_DEV:

skb_reset_mac_header(skb);

skb‐>protocol = pi.proto;

skb‐>dev = tun‐>dev;

break;

case TUN_TAP_DEV:

skb‐>protocol = eth_type_trans(skb, tun‐>dev);

break;

};

if (tun‐>flags & TUN_NOCHECKSUM)

skb‐>ip_summed = CHECKSUM_UNNECESSARY;

netif_rx_ni(skb);

tun‐>dev‐>last_rx = jiffies;

tun‐>stats.rx_packets++;

tun‐>stats.rx_bytes += len;

return count;

}

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。