0


iptables与内核的交互

关于iptables的的工作原理,主要分为三个方面:用户程序对规则的处理,内核对用户命令的处理,内核中netfilter对数据包的过滤(Ref:netfilter分析3-钩子函数执行流程)。
 本文大致分析iptables用户态程序如何解析规则,并将规则配置到内核中。以如下命令为例:

iptables -A INPUT -i eth0 -p tcp -s 192.168.100.0/24--dport 22-m state --state NEW,ESTABLISHED -j ACCEPT
iptables -A OUTPUT -o eth0 -p tcp --sport 22-m state --state ESTABLISHED -j ACCEPT

主要分析第一句:

iptables -A INPUT -i eth0 -p tcp -s 192.168.100.0/24--dport 22-m state --state NEW,ESTABLISHED -j ACCEPT

用户空间

代码版本:iptables-1.8.7。
 iptables的客户端和内核共享一些数据结构。例如:
ipt_entry 、xt_entry_match、xt_tcp。

structipt_entry{structipt_ip ip;/* Mark with fields that we care about. */unsignedint nfcache;/* Size of ipt_entry + matches */
    __u16 target_offset;/* Size of ipt_entry + matches + target */
    __u16 next_offset;/* Back pointer */unsignedint comefrom;/* Packet and byte counters. */structxt_counters counters;/* The matches (if any), then the target. */unsignedchar elems[0];};structxt_entry_match{union{struct{
            __u16 match_size;/* Used by userspace */char name[XT_EXTENSION_MAXNAMELEN];
            __u8 revision;} user;struct{
            __u16 match_size;/* Used inside the kernel */structxt_match*match;} kernel;/* Total length */
        __u16 match_size;} u;unsignedchar data[0];};structxt_tcp{
    __u16 spts[2];/* Source port range. */
    __u16 dpts[2];/* Destination port range. */
    __u8 option;/* TCP Option iff non-zero*/
    __u8 flg_mask;/* TCP flags mask byte */
    __u8 flg_cmp;/* TCP flags compare byte */
    __u8 invflags;/* Inverse flags */};

主函数为iptables_main(iptables-standalone.c)。

intiptables_main(int argc,char*argv[]){char*table ="filter";structxtc_handle*handle =NULL;
    ret =do_command4(argc, argv,&table,&handle, false);if(ret){
        ret =iptc_commit(handle);iptc_free(handle);}}

-A INPUT的解析代码:

intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){case'A':add_command(&command, CMD_APPEND, CMD_NONE,
                    cs.invert);
            chain = optarg;break;}

-i eth0的解析代码:

intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){case'i':if(*optarg =='\0')xtables_error(PARAMETER_PROBLEM,"Empty interface is likely to be ""undesired");set_option(&cs.options, OPT_VIANAMEIN,&cs.fw.ip.invflags,
                   cs.invert);xtables_parse_interface(optarg,
                    cs.fw.ip.iniface,
                    cs.fw.ip.iniface_mask);break;}

-p tcp -s 192.168.100.0/24 --dport 22
 ip段(192.168.100.0/24)的解析:

intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){if(shostnetworkmask)xtables_ipparse_multiple(shostnetworkmask,&saddrs,&smasks,&nsaddrs);if(dhostnetworkmask)xtables_ipparse_multiple(dhostnetworkmask,&daddrs,&dmasks,&ndaddrs);}

–dport 22的参数解析,需要tcp_match模块,命令中已经指定了协议(-p tcp)。

staticstructxtables_match tcp_match ={.family     = NFPROTO_UNSPEC,.name       ="tcp",.version    = XTABLES_VERSION,.size       =XT_ALIGN(sizeof(structxt_tcp)),.userspacesize  =XT_ALIGN(sizeof(structxt_tcp)),.help       = tcp_help,.init       = tcp_init,.parse      = tcp_parse,.print      = tcp_print,.save       = tcp_save,.extra_opts = tcp_opts,.xlate      = tcp_xlate,};

相应的解析函数:

intcommand_default(structiptables_command_state*cs,structxtables_globals*gl){if(cs->target !=NULL&&(cs->target->parse !=NULL|| cs->target->x6_parse !=NULL)&&
        cs->c >= cs->target->option_offset &&
        cs->c < cs->target->option_offset + XT_OPTION_OFFSET_SCALE){xtables_option_tpcall(cs->c, cs->argv, cs->invert,
                      cs->target,&cs->fw);return0;}for(matchp = cs->matches; matchp; matchp = matchp->next){
        m = matchp->match;if(matchp->completed ||(m->x6_parse ==NULL&& m->parse ==NULL))continue;if(cs->c < matchp->match->option_offset ||
            cs->c >= matchp->match->option_offset + XT_OPTION_OFFSET_SCALE)continue;xtables_option_mpcall(cs->c, cs->argv, cs->invert, m,&cs->fw);return0;}/* Try loading protocol */
    m =load_proto(cs);if(m !=NULL){size_t size;

        cs->proto_used =1;

        size =XT_ALIGN(sizeof(structxt_entry_match))+ m->size;

        m->m =xtables_calloc(1, size);
        m->m->u.match_size = size;strcpy(m->m->u.user.name, m->name);
        m->m->u.user.revision = m->revision;xs_init_match(m);if(m->x6_options !=NULL)
            gl->opts =xtables_options_xfrm(gl->orig_opts,
                            gl->opts,
                            m->x6_options,&m->option_offset);else
            gl->opts =xtables_merge_options(gl->orig_opts,
                             gl->opts,
                             m->extra_opts,&m->option_offset);if(gl->opts ==NULL)xtables_error(OTHER_PROBLEM,"can't alloc memory!");
        optind--;/* Indicate to rerun getopt *immediately* */return1;}}voidxtables_option_mpcall(unsignedint c,char**argv, bool invert,structxtables_match*m,void*fw){if(m->x6_parse ==NULL){if(m->parse !=NULL)
        m->parse(c - m->option_offset, argv, invert,&m->mflags, fw,&m->m);return;}}

tcp_parse会将端口数据写入struct xt_tcp中。
 load_proto中会加载按照protocol寻找对应的xtables_match。

structxtables_match*load_proto(structiptables_command_state*cs){if(!should_load_proto(cs))returnNULL;returnfind_proto(cs->protocol, XTF_TRY_LOAD,
              cs->options & OPT_NUMERIC,&cs->matches);}staticstructxtables_match*find_proto(constchar*pname,enumxtables_tryload tryload,int nolookup,structxtables_rule_match**matches){returnxtables_find_match(pname, tryload, matches);}

命令行中的数据会加载到struct xt_entry_match。之后被复制到struct ipt_entry中。

staticstructipt_entry*generate_entry(conststructipt_entry*fw,structxtables_rule_match*matches,structxt_entry_target*target){unsignedint size;structxtables_rule_match*matchp;structipt_entry*e;

    size =sizeof(structipt_entry);for(matchp = matches; matchp; matchp = matchp->next)
        size += matchp->match->m->u.match_size;

    e =xtables_malloc(size + target->u.target_size);*e =*fw;
    e->target_offset = size;
    e->next_offset = size + target->u.target_size;

    size =0;for(matchp = matches; matchp; matchp = matchp->next){//复制match中的数据memcpy(e->elems + size, matchp->match->m, matchp->match->m->u.match_size);
        size += matchp->match->m->u.match_size;}memcpy(e->elems + size, target, target->u.target_size);return e;}

数据复制。

staticintappend_entry(const xt_chainlabel chain,structipt_entry*fw,unsignedint nsaddrs,conststructin_addr saddrs[],conststructin_addr smasks[],unsignedint ndaddrs,conststructin_addr daddrs[],conststructin_addr dmasks[],int verbose,structxtc_handle*handle){for(i =0; i < nsaddrs; i++){
        fw->ip.src.s_addr = saddrs[i].s_addr;
        fw->ip.smsk.s_addr = smasks[i].s_addr;for(j =0; j < ndaddrs; j++){
            fw->ip.dst.s_addr = daddrs[j].s_addr;
            fw->ip.dmsk.s_addr = dmasks[j].s_addr;if(verbose)print_firewall_line(fw, handle);
            ret &=iptc_append_entry(chain, fw, handle);}}return ret;}iptc_append_entry(const IPT_CHAINLABEL chain,const STRUCT_ENTRY *e,structxtc_handle*handle){if(!(r =iptcc_alloc_rule(c, e->next_offset))){DEBUGP("unable to allocate rule for chain `%s'\n", chain);
        errno = ENOMEM;return0;}memcpy(r->entry, e, e->next_offset);}/* allocate and initialize a new rule for the cache */staticstructrule_head*iptcc_alloc_rule(structchain_head*c,unsignedint size){
    r->chain = c;
    r->size = size;return r;}

解析action,-j ACCEPT。

intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){case'j':set_option(&cs.options, OPT_JUMP,&cs.fw.ip.invflags,
                   cs.invert);command_jump(&cs, optarg);break;}voidcommand_jump(structiptables_command_state*cs,constchar*jumpto){
    cs->jumpto =xt_parse_target(jumpto);/* TRY_LOAD (may be chain name) */
    cs->target =xtables_find_target(cs->jumpto, XTF_TRY_LOAD);if(cs->target ==NULL)return;

    size =XT_ALIGN(sizeof(structxt_entry_target))+ cs->target->size;

    cs->target->t =xtables_calloc(1, size);
    cs->target->t->u.target_size = size;}

ACCEPT,DROP,QUEUE,RETURN对应的是standard target。

staticstructxtables_target standard_target ={.family     = NFPROTO_UNSPEC,.name       ="standard",.version    = XTABLES_VERSION,.size       =XT_ALIGN(sizeof(int)),.userspacesize  =XT_ALIGN(sizeof(int)),.help       = standard_help,};

xt_entry_target分配的大小:

size =XT_ALIGN(sizeof(structxt_entry_target))+ cs->target->size;
cs->target->t =xtables_calloc(1, size);

standard target的target->size大小为XT_ALIGN(sizeof(int))。最终分配的结构体为xt_standard_target 。

structxt_standard_target{structxt_entry_target target;int verdict;};

整理成内核需要的格式,向内核提交:

intTC_COMMIT(structxtc_handle*handle){/* Replace, then map back the counters. */
    STRUCT_REPLACE *repl;
    new_number =iptcc_compile_table_prep(handle,&new_size);
    ret =iptcc_compile_table(handle, repl);
    ret =setsockopt(handle->sockfd, TC_IPPROTO, SO_SET_REPLACE, repl,sizeof(*repl)+ repl->size);}

内核空间

staticintdo_ipt_set_ctl(structsock*sk,int cmd,void __user *user,unsignedint len){switch(cmd){case IPT_SO_SET_REPLACE:
        ret =do_replace(sock_net(sk), user, len);break;default:
        ret =-EINVAL;}return ret;}staticintdo_replace(structnet*net,constvoid __user *user,unsignedint len){
    newinfo =xt_alloc_table_info(tmp.size);if(!newinfo)return-ENOMEM;

    loc_cpu_entry = newinfo->entries;if(copy_from_user(loc_cpu_entry, user +sizeof(tmp),
               tmp.size)!=0){
        ret =-EFAULT;goto free_newinfo;}
    ret =translate_table(net, newinfo, loc_cpu_entry,&tmp);if(ret !=0)goto free_newinfo;
    
    ret =__do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
            tmp.num_counters, tmp.counters);}staticint__do_replace(structnet*net,constchar*name,unsignedint valid_hooks,structxt_table_info*newinfo,unsignedint num_counters,void __user *counters_ptr){structxt_table*t;
    t =xt_request_find_table_lock(net, AF_INET, name);
    oldinfo =xt_replace_table(t, num_counters, newinfo,&ret);}structxt_table_info*xt_replace_table(structxt_table*table,unsignedint num_counters,structxt_table_info*newinfo,int*error){
    table->private = newinfo;}

原有规则的处理

用户层调用setsockopt将数据配置到内核。do_replace函数会重新配置规则。但是用户可以多次配置iptable。这里就引入一个问题:之前内核中的iptables规到哪里去了呢?难道被冲掉了吗?
 iptables在重新解析规则时,会调用getsockopt将内核中的规则拷贝出来,然后重新配置。

intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){/* only allocate handle if we weren't called with a handle */if(!*handle)*handle =iptc_init(*table);}structxtc_handle*iptc_init(constchar*tablename){strcpy(info.name, tablename);//获取entry的大小信息。if(getsockopt(sockfd, TC_IPPROTO, SO_GET_INFO,&info,&s)<0){close(sockfd);returnNULL;}
    h =alloc_handle(&info);/* Initialize current state */
    h->sockfd = sockfd;
    h->info = info;

    h->entries->size = h->info.size;

    tmp =sizeof(STRUCT_GET_ENTRIES)+ h->info.size;if(getsockopt(h->sockfd, TC_IPPROTO, SO_GET_ENTRIES, h->entries,&tmp)<0)goto error;}

读取规则信息之后,iptables重新处理数据:

/* parse an iptables blob into it's pieces */staticintparse_table(structxtc_handle*h){/* First pass: over ruleset blob */ENTRY_ITERATE(h->entries->entrytable, h->entries->size,
            cache_add_entry, h,&prev,&num);}/* main parser function: add an entry from the blob to the cache */staticintcache_add_entry(STRUCT_ENTRY *e,structxtc_handle*h,
               STRUCT_ENTRY **prev,unsignedint*num){elseif((builtin =iptcb_ent_is_hook_entry(e, h))!=0){structchain_head*c =iptcc_alloc_chain_head((char*)hooknames[builtin-1],
                        builtin);DEBUGP_C("%u:%u new builtin chain: %p (rules=%p)\n",*num, offset, c,&c->rules);if(!c){
            errno =-ENOMEM;return-1;}

        c->hooknum = builtin;__iptcc_p_add_chain(h, c, offset, num);/* FIXME: this is ugly. */goto new_rule;}}

内核中在初始化table的时候,会配置chain。博客——netfilter分析2-表在内核的初始化——有更详尽的分析。
 以filter表为例:

staticint __net_init iptable_filter_table_init(structnet*net){
    repl =ipt_alloc_initial_table(&packet_filter);}void*ipt_alloc_initial_table(conststructxt_table*info){returnxt_alloc_initial_table(ipt, IPT);}#definext_alloc_initial_table(type, typ2)({\struct{\structtype##_replace repl;\structtype##_standard entries[];\}*tbl;\structtype##_error *term;\size_t term_offset =(offsetof(typeof(*tbl), entries[nhooks])+\__alignof__(*term)-1)&~(__alignof__(*term)-1);\tbl =kzalloc(term_offset +sizeof(*term), GFP_KERNEL);\for(; hook_mask !=0; hook_mask >>=1,++hooknum){\if(!(hook_mask &1))\continue;\tbl->repl.hook_entry[hooknum]= bytes;\tbl->repl.underflow[hooknum]= bytes;\tbl->entries[i++]=(structtype##_standard)\typ2##_STANDARD_INIT(NF_ACCEPT);\bytes +=sizeof(structtype##_standard);\}\tbl;\})

链接来源:
https://www.jianshu.com/p/ec04b7c73cfa#

标签: 网络 tcp/ip c语言

本文转载自: https://blog.csdn.net/buhuidage/article/details/136440650
版权归原作者 不悔哥 所有, 如有侵权,请联系我们删除。

“iptables与内核的交互”的评论:

还没有评论