关于iptables的的工作原理,主要分为三个方面:用户程序对规则的处理,内核对用户命令的处理,内核中netfilter对数据包的过滤(Ref:netfilter分析3-钩子函数执行流程)。
本文大致分析iptables用户态程序如何解析规则,并将规则配置到内核中。以如下命令为例:
iptables -A INPUT -i eth0 -p tcp -s 192.168.100.0/24--dport 22-m state --state NEW,ESTABLISHED -j ACCEPT
iptables -A OUTPUT -o eth0 -p tcp --sport 22-m state --state ESTABLISHED -j ACCEPT
主要分析第一句:
iptables -A INPUT -i eth0 -p tcp -s 192.168.100.0/24--dport 22-m state --state NEW,ESTABLISHED -j ACCEPT
用户空间
代码版本:iptables-1.8.7。
iptables的客户端和内核共享一些数据结构。例如:
ipt_entry 、xt_entry_match、xt_tcp。
structipt_entry{structipt_ip ip;/* Mark with fields that we care about. */unsignedint nfcache;/* Size of ipt_entry + matches */
__u16 target_offset;/* Size of ipt_entry + matches + target */
__u16 next_offset;/* Back pointer */unsignedint comefrom;/* Packet and byte counters. */structxt_counters counters;/* The matches (if any), then the target. */unsignedchar elems[0];};structxt_entry_match{union{struct{
__u16 match_size;/* Used by userspace */char name[XT_EXTENSION_MAXNAMELEN];
__u8 revision;} user;struct{
__u16 match_size;/* Used inside the kernel */structxt_match*match;} kernel;/* Total length */
__u16 match_size;} u;unsignedchar data[0];};structxt_tcp{
__u16 spts[2];/* Source port range. */
__u16 dpts[2];/* Destination port range. */
__u8 option;/* TCP Option iff non-zero*/
__u8 flg_mask;/* TCP flags mask byte */
__u8 flg_cmp;/* TCP flags compare byte */
__u8 invflags;/* Inverse flags */};
主函数为iptables_main(iptables-standalone.c)。
intiptables_main(int argc,char*argv[]){char*table ="filter";structxtc_handle*handle =NULL;
ret =do_command4(argc, argv,&table,&handle, false);if(ret){
ret =iptc_commit(handle);iptc_free(handle);}}
-A INPUT的解析代码:
intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){case'A':add_command(&command, CMD_APPEND, CMD_NONE,
cs.invert);
chain = optarg;break;}
-i eth0的解析代码:
intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){case'i':if(*optarg =='\0')xtables_error(PARAMETER_PROBLEM,"Empty interface is likely to be ""undesired");set_option(&cs.options, OPT_VIANAMEIN,&cs.fw.ip.invflags,
cs.invert);xtables_parse_interface(optarg,
cs.fw.ip.iniface,
cs.fw.ip.iniface_mask);break;}
-p tcp -s 192.168.100.0/24 --dport 22
ip段(192.168.100.0/24)的解析:
intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){if(shostnetworkmask)xtables_ipparse_multiple(shostnetworkmask,&saddrs,&smasks,&nsaddrs);if(dhostnetworkmask)xtables_ipparse_multiple(dhostnetworkmask,&daddrs,&dmasks,&ndaddrs);}
–dport 22的参数解析,需要tcp_match模块,命令中已经指定了协议(-p tcp)。
staticstructxtables_match tcp_match ={.family = NFPROTO_UNSPEC,.name ="tcp",.version = XTABLES_VERSION,.size =XT_ALIGN(sizeof(structxt_tcp)),.userspacesize =XT_ALIGN(sizeof(structxt_tcp)),.help = tcp_help,.init = tcp_init,.parse = tcp_parse,.print = tcp_print,.save = tcp_save,.extra_opts = tcp_opts,.xlate = tcp_xlate,};
相应的解析函数:
intcommand_default(structiptables_command_state*cs,structxtables_globals*gl){if(cs->target !=NULL&&(cs->target->parse !=NULL|| cs->target->x6_parse !=NULL)&&
cs->c >= cs->target->option_offset &&
cs->c < cs->target->option_offset + XT_OPTION_OFFSET_SCALE){xtables_option_tpcall(cs->c, cs->argv, cs->invert,
cs->target,&cs->fw);return0;}for(matchp = cs->matches; matchp; matchp = matchp->next){
m = matchp->match;if(matchp->completed ||(m->x6_parse ==NULL&& m->parse ==NULL))continue;if(cs->c < matchp->match->option_offset ||
cs->c >= matchp->match->option_offset + XT_OPTION_OFFSET_SCALE)continue;xtables_option_mpcall(cs->c, cs->argv, cs->invert, m,&cs->fw);return0;}/* Try loading protocol */
m =load_proto(cs);if(m !=NULL){size_t size;
cs->proto_used =1;
size =XT_ALIGN(sizeof(structxt_entry_match))+ m->size;
m->m =xtables_calloc(1, size);
m->m->u.match_size = size;strcpy(m->m->u.user.name, m->name);
m->m->u.user.revision = m->revision;xs_init_match(m);if(m->x6_options !=NULL)
gl->opts =xtables_options_xfrm(gl->orig_opts,
gl->opts,
m->x6_options,&m->option_offset);else
gl->opts =xtables_merge_options(gl->orig_opts,
gl->opts,
m->extra_opts,&m->option_offset);if(gl->opts ==NULL)xtables_error(OTHER_PROBLEM,"can't alloc memory!");
optind--;/* Indicate to rerun getopt *immediately* */return1;}}voidxtables_option_mpcall(unsignedint c,char**argv, bool invert,structxtables_match*m,void*fw){if(m->x6_parse ==NULL){if(m->parse !=NULL)
m->parse(c - m->option_offset, argv, invert,&m->mflags, fw,&m->m);return;}}
tcp_parse会将端口数据写入struct xt_tcp中。
load_proto中会加载按照protocol寻找对应的xtables_match。
structxtables_match*load_proto(structiptables_command_state*cs){if(!should_load_proto(cs))returnNULL;returnfind_proto(cs->protocol, XTF_TRY_LOAD,
cs->options & OPT_NUMERIC,&cs->matches);}staticstructxtables_match*find_proto(constchar*pname,enumxtables_tryload tryload,int nolookup,structxtables_rule_match**matches){returnxtables_find_match(pname, tryload, matches);}
命令行中的数据会加载到struct xt_entry_match。之后被复制到struct ipt_entry中。
staticstructipt_entry*generate_entry(conststructipt_entry*fw,structxtables_rule_match*matches,structxt_entry_target*target){unsignedint size;structxtables_rule_match*matchp;structipt_entry*e;
size =sizeof(structipt_entry);for(matchp = matches; matchp; matchp = matchp->next)
size += matchp->match->m->u.match_size;
e =xtables_malloc(size + target->u.target_size);*e =*fw;
e->target_offset = size;
e->next_offset = size + target->u.target_size;
size =0;for(matchp = matches; matchp; matchp = matchp->next){//复制match中的数据memcpy(e->elems + size, matchp->match->m, matchp->match->m->u.match_size);
size += matchp->match->m->u.match_size;}memcpy(e->elems + size, target, target->u.target_size);return e;}
数据复制。
staticintappend_entry(const xt_chainlabel chain,structipt_entry*fw,unsignedint nsaddrs,conststructin_addr saddrs[],conststructin_addr smasks[],unsignedint ndaddrs,conststructin_addr daddrs[],conststructin_addr dmasks[],int verbose,structxtc_handle*handle){for(i =0; i < nsaddrs; i++){
fw->ip.src.s_addr = saddrs[i].s_addr;
fw->ip.smsk.s_addr = smasks[i].s_addr;for(j =0; j < ndaddrs; j++){
fw->ip.dst.s_addr = daddrs[j].s_addr;
fw->ip.dmsk.s_addr = dmasks[j].s_addr;if(verbose)print_firewall_line(fw, handle);
ret &=iptc_append_entry(chain, fw, handle);}}return ret;}iptc_append_entry(const IPT_CHAINLABEL chain,const STRUCT_ENTRY *e,structxtc_handle*handle){if(!(r =iptcc_alloc_rule(c, e->next_offset))){DEBUGP("unable to allocate rule for chain `%s'\n", chain);
errno = ENOMEM;return0;}memcpy(r->entry, e, e->next_offset);}/* allocate and initialize a new rule for the cache */staticstructrule_head*iptcc_alloc_rule(structchain_head*c,unsignedint size){
r->chain = c;
r->size = size;return r;}
解析action,-j ACCEPT。
intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){case'j':set_option(&cs.options, OPT_JUMP,&cs.fw.ip.invflags,
cs.invert);command_jump(&cs, optarg);break;}voidcommand_jump(structiptables_command_state*cs,constchar*jumpto){
cs->jumpto =xt_parse_target(jumpto);/* TRY_LOAD (may be chain name) */
cs->target =xtables_find_target(cs->jumpto, XTF_TRY_LOAD);if(cs->target ==NULL)return;
size =XT_ALIGN(sizeof(structxt_entry_target))+ cs->target->size;
cs->target->t =xtables_calloc(1, size);
cs->target->t->u.target_size = size;}
ACCEPT,DROP,QUEUE,RETURN对应的是standard target。
staticstructxtables_target standard_target ={.family = NFPROTO_UNSPEC,.name ="standard",.version = XTABLES_VERSION,.size =XT_ALIGN(sizeof(int)),.userspacesize =XT_ALIGN(sizeof(int)),.help = standard_help,};
xt_entry_target分配的大小:
size =XT_ALIGN(sizeof(structxt_entry_target))+ cs->target->size;
cs->target->t =xtables_calloc(1, size);
standard target的target->size大小为XT_ALIGN(sizeof(int))。最终分配的结构体为xt_standard_target 。
structxt_standard_target{structxt_entry_target target;int verdict;};
整理成内核需要的格式,向内核提交:
intTC_COMMIT(structxtc_handle*handle){/* Replace, then map back the counters. */
STRUCT_REPLACE *repl;
new_number =iptcc_compile_table_prep(handle,&new_size);
ret =iptcc_compile_table(handle, repl);
ret =setsockopt(handle->sockfd, TC_IPPROTO, SO_SET_REPLACE, repl,sizeof(*repl)+ repl->size);}
内核空间
staticintdo_ipt_set_ctl(structsock*sk,int cmd,void __user *user,unsignedint len){switch(cmd){case IPT_SO_SET_REPLACE:
ret =do_replace(sock_net(sk), user, len);break;default:
ret =-EINVAL;}return ret;}staticintdo_replace(structnet*net,constvoid __user *user,unsignedint len){
newinfo =xt_alloc_table_info(tmp.size);if(!newinfo)return-ENOMEM;
loc_cpu_entry = newinfo->entries;if(copy_from_user(loc_cpu_entry, user +sizeof(tmp),
tmp.size)!=0){
ret =-EFAULT;goto free_newinfo;}
ret =translate_table(net, newinfo, loc_cpu_entry,&tmp);if(ret !=0)goto free_newinfo;
ret =__do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, tmp.counters);}staticint__do_replace(structnet*net,constchar*name,unsignedint valid_hooks,structxt_table_info*newinfo,unsignedint num_counters,void __user *counters_ptr){structxt_table*t;
t =xt_request_find_table_lock(net, AF_INET, name);
oldinfo =xt_replace_table(t, num_counters, newinfo,&ret);}structxt_table_info*xt_replace_table(structxt_table*table,unsignedint num_counters,structxt_table_info*newinfo,int*error){
table->private = newinfo;}
原有规则的处理
用户层调用setsockopt将数据配置到内核。do_replace函数会重新配置规则。但是用户可以多次配置iptable。这里就引入一个问题:之前内核中的iptables规到哪里去了呢?难道被冲掉了吗?
iptables在重新解析规则时,会调用getsockopt将内核中的规则拷贝出来,然后重新配置。
intdo_command4(int argc,char*argv[],char**table,structxtc_handle**handle, bool restore){/* only allocate handle if we weren't called with a handle */if(!*handle)*handle =iptc_init(*table);}structxtc_handle*iptc_init(constchar*tablename){strcpy(info.name, tablename);//获取entry的大小信息。if(getsockopt(sockfd, TC_IPPROTO, SO_GET_INFO,&info,&s)<0){close(sockfd);returnNULL;}
h =alloc_handle(&info);/* Initialize current state */
h->sockfd = sockfd;
h->info = info;
h->entries->size = h->info.size;
tmp =sizeof(STRUCT_GET_ENTRIES)+ h->info.size;if(getsockopt(h->sockfd, TC_IPPROTO, SO_GET_ENTRIES, h->entries,&tmp)<0)goto error;}
读取规则信息之后,iptables重新处理数据:
/* parse an iptables blob into it's pieces */staticintparse_table(structxtc_handle*h){/* First pass: over ruleset blob */ENTRY_ITERATE(h->entries->entrytable, h->entries->size,
cache_add_entry, h,&prev,&num);}/* main parser function: add an entry from the blob to the cache */staticintcache_add_entry(STRUCT_ENTRY *e,structxtc_handle*h,
STRUCT_ENTRY **prev,unsignedint*num){elseif((builtin =iptcb_ent_is_hook_entry(e, h))!=0){structchain_head*c =iptcc_alloc_chain_head((char*)hooknames[builtin-1],
builtin);DEBUGP_C("%u:%u new builtin chain: %p (rules=%p)\n",*num, offset, c,&c->rules);if(!c){
errno =-ENOMEM;return-1;}
c->hooknum = builtin;__iptcc_p_add_chain(h, c, offset, num);/* FIXME: this is ugly. */goto new_rule;}}
内核中在初始化table的时候,会配置chain。博客——netfilter分析2-表在内核的初始化——有更详尽的分析。
以filter表为例:
staticint __net_init iptable_filter_table_init(structnet*net){
repl =ipt_alloc_initial_table(&packet_filter);}void*ipt_alloc_initial_table(conststructxt_table*info){returnxt_alloc_initial_table(ipt, IPT);}#definext_alloc_initial_table(type, typ2)({\struct{\structtype##_replace repl;\structtype##_standard entries[];\}*tbl;\structtype##_error *term;\size_t term_offset =(offsetof(typeof(*tbl), entries[nhooks])+\__alignof__(*term)-1)&~(__alignof__(*term)-1);\tbl =kzalloc(term_offset +sizeof(*term), GFP_KERNEL);\for(; hook_mask !=0; hook_mask >>=1,++hooknum){\if(!(hook_mask &1))\continue;\tbl->repl.hook_entry[hooknum]= bytes;\tbl->repl.underflow[hooknum]= bytes;\tbl->entries[i++]=(structtype##_standard)\typ2##_STANDARD_INIT(NF_ACCEPT);\bytes +=sizeof(structtype##_standard);\}\tbl;\})
版权归原作者 不悔哥 所有, 如有侵权,请联系我们删除。