积累系统性知识
积聚技术精华
  首页    个人中心    撰写积文    建立课题    订立目标    整理积文    管理课题    管理目标    技能Get    代码积累 
Linux内核VPN实现源码分析(三)
error997 (error997)    2014-11-20 20:14:34      目标    课题
   上篇简单介绍了一下ipip模块和隧道的初始化相关内容,现在开始转入正题,想些介绍ipip协议收发包的过程。
   在讲解收发包之前,有一个问题值得注意,就是mtu的问题,由于数据包经过ipip协议后,会在原始的长度上加一个ipip包头的长度,所以使用ipip协议的mtu必须减少一个ipip包头的长度,代码如下。

切换到: 纯代码  
   
static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)  
{  
    if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))  
        return -EINVAL;  
    dev->mtu = new_mtu;  
    return 0;  
}


   new_mtu < 68是因为mtu最小长度为60字节,小于60字节的链路层包后面需要用0填充。
   接收过程函数,过程比较清晰简单。

切换到: 纯代码  
   
static int ipip_rcv(struct sk_buff *skb)  
{  
    struct ip_tunnel *tunnel;  
    const struct iphdr *iph = ip_hdr(skb);  
    read_lock(&ipip_lock);  
    if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),  
                    iph->saddr, iph->daddr)) != NULL) {  
        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {  
            read_unlock(&ipip_lock);  
            kfree_skb(skb);  
            return 0;  
        }  
        secpath_reset(skb);  
        skb->mac_header = skb->network_header;  
        skb_reset_network_header(skb);  
        skb->protocol = htons(ETH_P_IP);  
        skb->pkt_type = PACKET_HOST;  
        tunnel->dev->stats.rx_packets++;  
        tunnel->dev->stats.rx_bytes += skb->len;  
        skb->dev = tunnel->dev;  
        skb_dst_drop(skb);  
        nf_reset(skb);  
        ipip_ecn_decapsulate(iph, skb);  
        netif_rx(skb);  
        read_unlock(&ipip_lock);  
        return 0;  
    }  
    read_unlock(&ipip_lock);  
    return -1;  
}  
/


   首先是利用ipip_tunnel_lookup函数根据源目的地址等信息查找所属隧道,如果查不到就bypass过去。然后就是剥去IPIP头,但IPIP并未真正剥去,只是调用skb->mac_header = skb->network_header;skb_reset_network_header(skb);“逻辑”上剥去了,剥去后设置成普通的IP协议,接下来调用netif_rx函数提交给上层的IP层处理。至此接收过程结束。
   查找隧道的函数ipip_tunnel_lookup 函数实现如下:

切换到: 纯代码  
   
static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,  
        __be32 remote, __be32 local)  
{  
    unsigned h0 = HASH(remote);  
    unsigned h1 = HASH(local);  
    struct ip_tunnel *t;  
    struct ipip_net *ipn = net_generic(net, ipip_net_id);  
    for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {  
        if (local == t->parms.iph.saddr &&  
            remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))  
            return t;  
    }  
    for (t = ipn->tunnels_r[h0]; t; t = t->next) {  
        if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))  
            return t;  
    }  
    for (t = ipn->tunnels_l[h1]; t; t = t->next) {  
        if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))  
            return t;  
    }  
    if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))  
        return t;  
    return NULL;  
}


   其实就是一个根据隧道ip地址查找hash的过程(隧道ip就是没剥去IPIP头的源目的ip地址,不是原来未封装的数据的源目的IP地址)。
   下面是数据发送函数。

切换到: 纯代码  
   
static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)  
{  
    struct ip_tunnel *tunnel = netdev_priv(dev);  
    struct net_device_stats *stats = &tunnel->dev->stats;  
    struct iphdr  *tiph = &tunnel->parms.iph;  
    u8     tos = tunnel->parms.iph.tos;  
    __be16 df = tiph->frag_off;  
    struct rtable *rt;              /* Route to the other host */  
    struct net_device *tdev;            /* Device to other host */  
    struct iphdr  *old_iph = ip_hdr(skb);  
    struct iphdr  *iph;         /* Our new IP header */  
    unsigned int max_headroom;      /* The extra header space needed */  
    __be32 dst = tiph->daddr;  
    int    mtu;  
    if (tunnel->recursion++) {  
        stats->collisions++;  
        goto tx_error;  
    }  
    if (skb->protocol != htons(ETH_P_IP))  
        goto tx_error;  
    if (tos&1)  
        tos = old_iph->tos;  
    if (!dst) {  
        /* NBMA tunnel */  
        if ((rt = skb_rtable(skb)) == NULL) {  
            stats->tx_fifo_errors++;  
            goto tx_error;  
        }  
        if ((dst = rt->rt_gateway) == 0)  
            goto tx_error_icmp;  
    }  
    {  
        struct flowi fl = { .oif = tunnel->parms.link,  
                    .nl_u = { .ip4_u =  
                          { .daddr = dst,  
                        .saddr = tiph->saddr,  
                        .tos = RT_TOS(tos) } },  
                    .proto = IPPROTO_IPIP };  
        if (ip_route_output_key(dev_net(dev), &rt, &fl)) {  
            stats->tx_carrier_errors++;  
            goto tx_error_icmp;  
        }  
    }  
    tdev = rt->u.dst.dev;  
    if (tdev == dev) {  
        ip_rt_put(rt);  
        stats->collisions++;  
        goto tx_error;  
    }  
    if (tiph->frag_off)  
        mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);  
    else  
        mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;  
    if (mtu < 68) {  
        stats->collisions++;  
        ip_rt_put(rt);  
        goto tx_error;  
    }  
    if (skb_dst(skb))  
        skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);  
    df |= (old_iph->frag_off&htons(IP_DF));  
    if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {  
        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));  
        ip_rt_put(rt);  
        goto tx_error;  
    }  
    if (tunnel->err_count > 0) {  
        if (time_before(jiffies,  
                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {  
            tunnel->err_count--;  
            dst_link_failure(skb);  
        } else  
            tunnel->err_count = 0;  
    }  
    /* 
     * Okay, now see if we can stuff it in the buffer as-is. 
     */  
    max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));  
    if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||  
        (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {  
        struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);  
        if (!new_skb) {  
            ip_rt_put(rt);  
            stats->tx_dropped++;  
            dev_kfree_skb(skb);  
            tunnel->recursion--;  
            return 0;  
        }  
        if (skb->sk)  
            skb_set_owner_w(new_skb, skb->sk);  
        dev_kfree_skb(skb);  
        skb = new_skb;  
        old_iph = ip_hdr(skb);  
    }  
    skb->transport_header = skb->network_header;  
    skb_push(skb, sizeof(struct iphdr));  
    skb_reset_network_header(skb);  
    memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));  
    IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |  
                  IPSKB_REROUTED);  
    skb_dst_drop(skb);  
    skb_dst_set(skb, &rt->u.dst);  
    /* 
     *  Push down and install the IPIP header. 
     */  
    iph             =   ip_hdr(skb);  
    iph->version     =   4;  
    iph->ihl     =   sizeof(struct iphdr)>>2;  
    iph->frag_off        =   df;  
    iph->protocol        =   IPPROTO_IPIP;  
    iph->tos     =   INET_ECN_encapsulate(tos, old_iph->tos);  
    iph->daddr       =   rt->rt_dst;  
    iph->saddr       =   rt->rt_src;  
    if ((iph->ttl = tiph->ttl) == 0)  
        iph->ttl =   old_iph->ttl;  
    nf_reset(skb);  
    IPTUNNEL_XMIT();  
    tunnel->recursion--;  
    return 0;  
tx_error_icmp:  
    dst_link_failure(skb);  
tx_error:  
    stats->tx_errors++;  
    dev_kfree_skb(skb);  
    tunnel->recursion--;  
    return 0;  
}  
sta


   总得来看,发送过程主要包含两个方面,一是查找路由,另外一个按照IPIP协议构造新的IP包。
   查找路由的过程这里就不详细介绍了,够造新的IPIP包首先使用skb_headroom检查剩余的skb buff空间是否能容纳IPIP头,不能容纳的话,重新使用skb_realloc_headroom分配一个buf空间,其实这个也可以用skb_copy_expand函数搞定。分配完毕后填充IPIP头字段即可。
   至此,IPIP协议大部分逻辑已经描述清楚,下面还有一个函数值的注意。

切换到: 纯代码  
   
static int  
ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)  
{  
    int err = 0;  
    struct ip_tunnel_parm p;  
    struct ip_tunnel *t;  
    struct net *net = dev_net(dev);  
    struct ipip_net *ipn = net_generic(net, ipip_net_id);  
    switch (cmd) {  
    case SIOCGETTUNNEL:  
        t = NULL;  
        if (dev == ipn->fb_tunnel_dev) {  
            if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {  
                err = -EFAULT;  
                break;  
            }  
            t = ipip_tunnel_locate(net, &p, 0);  
        }  
        if (t == NULL)  
            t = netdev_priv(dev);  
        memcpy(&p, &t->parms, sizeof(p));  
        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))  
            err = -EFAULT;  
        break;  
    case SIOCADDTUNNEL:  
    case SIOCCHGTUNNEL:  
        err = -EPERM;  
        if (!capable(CAP_NET_ADMIN))  
            goto done;  
        err = -EFAULT;  
        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))  
            goto done;  
        err = -EINVAL;  
        if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||  
            p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))  
            goto done;  
        if (p.iph.ttl)  
            p.iph.frag_off |= htons(IP_DF);  
        t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);  
        if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {  
            if (t != NULL) {  
                if (t->dev != dev) {  
                    err = -EEXIST;  
                    break;  
                }  
            } else {  
                if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||  
                    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {  
                    err = -EINVAL;  
                    break;  
                }  
                t = netdev_priv(dev);  
                ipip_tunnel_unlink(ipn, t);  
                t->parms.iph.saddr = p.iph.saddr;  
                t->parms.iph.daddr = p.iph.daddr;  
                memcpy(dev->dev_addr, &p.iph.saddr, 4);  
                memcpy(dev->broadcast, &p.iph.daddr, 4);  
                ipip_tunnel_link(ipn, t);  
                netdev_state_change(dev);  
            }  
        }  
        if (t) {  
            err = 0;  
            if (cmd == SIOCCHGTUNNEL) {  
                t->parms.iph.ttl = p.iph.ttl;  
                t->parms.iph.tos = p.iph.tos;  
                t->parms.iph.frag_off = p.iph.frag_off;  
                if (t->parms.link != p.link) {  
                    t->parms.link = p.link;  
                    ipip_tunnel_bind_dev(dev);  
                    netdev_state_change(dev);  
                }  
            }  
            if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))  
                err = -EFAULT;  
        } else  
            err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);  
        break;  
    case SIOCDELTUNNEL:  
        err = -EPERM;  
        if (!capable(CAP_NET_ADMIN))  
            goto done;  
        if (dev == ipn->fb_tunnel_dev) {  
            err = -EFAULT;  
            if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))  
                goto done;  
            err = -ENOENT;  
            if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)  
                goto done;  
            err = -EPERM;  
            if (t->dev == ipn->fb_tunnel_dev)  
                goto done;  
            dev = t->dev;  
        }  
        unregister_netdevice(dev);  
        err = 0;  
        break;  
    default:  
        err = -EINVAL;  
    }  
done:  
    return err;  
}


   这个函数就是应用程序交互的地方,说白了,就是与linux系统命令ifconfig, ip命令借口的地方,创建隧道,删除隧道,改变ip地址,等等,本文就不详细介绍了。
   IPIP协议的所有内容已经介绍完毕,原理是不是很简单呢?其实IP_GRE协议与IPIP协议差不多,最先是由思科公司发明的一种协议,在linux实现代码在ip_gre.c中,具体实现与IPIP协议及其类似,这里也不多说了。至于open_swan 的IPSec协议实现稍稍复杂一些,多了一些加密过程,已经前面的密钥交换过程,大家可以仔细研究。《Linux内核VPN实现源码分析》就此完结,欢迎大家关注后续的内容。

转自 http://blog.csdn.net/y___y/article/details/5669994
(+0)技能Get

建议楼主:搜索关键字 |参考其他资源 |回复 |追问
  error997(error997):   个人中心    课题    目标    代码积累