概述

本文主要对filter表的初始化流程,以及钩子函数的规则match流程的源码进行分析;

源码分析

所在钩子点:

1 /* 在LOCAL_IN,FORWARD, LOCAL_OUT钩子点工作 */
2 #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
3                 (1 << NF_INET_FORWARD) | \
4                 (1 << NF_INET_LOCAL_OUT))

 

filter表信息:

1 /* filter表的信息 */
2 static const struct xt_table packet_filter = {
3     .name        = "filter",
4     .valid_hooks    = FILTER_VALID_HOOKS,  /* filter工作的钩子点 */
5     .me        = THIS_MODULE,
6     .af        = NFPROTO_IPV4,
7     .priority    = NF_IP_PRI_FILTER,
8     .table_init    = iptable_filter_table_init,
9 };

 

初始化:

 1 static int __net_init iptable_filter_table_init(struct net *net)
 2 {
 3     struct ipt_replace *repl;
 4     int err;
 5 
 6     /* filter表已经被初始化了,返回 */
 7     if (net->ipv4.iptable_filter)
 8         return 0;
 9 
10     /* 分配初始化表,用于下面的表注册 */
11     repl = ipt_alloc_initial_table(&packet_filter);
12     if (repl == NULL)
13         return -ENOMEM;
14     /* Entry 1 is the FORWARD hook */
15     /* 入口1是否为FORWARD钩子点时的verdict值设置 */
16     ((struct ipt_standard *)repl->entries)[1].target.verdict =
17         forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
18 
19     /* 注册filter表,注册后,ipv4.iptable_filter保存了注册后的新表 */
20     err = ipt_register_table(net, &packet_filter, repl, filter_ops,
21                  &net->ipv4.iptable_filter);
22 
23     /* 释放初始化表 */
24     kfree(repl);
25     return err;
26 }

 

分配用于初始化的table结构,其中的xt_alloc_initial_table是以宏的形式存在的;

1 void *ipt_alloc_initial_table(const struct xt_table *info)
2 {
3     return xt_alloc_initial_table(ipt, IPT);
4 }

 

为了看起来方便,这里对函数进行了宏替换;

 1 void * ipt_alloc_initial_table(const struct xt_table *info) { 
 2     /* 钩子点掩码 */
 3     unsigned int hook_mask = info->valid_hooks; 
 4     /* 钩子点数量 */
 5     unsigned int nhooks = hweight32(hook_mask); 
 6     unsigned int bytes = 0, hooknum = 0, i = 0; 
 7     /* 此次构造的表结构 */
 8     struct { 
 9         struct ipt_replace repl; 
10         struct ipt_standard entries[]; 
11     } *tbl; 
12     
13     struct ipt_error *term;
14 
15     /* 算出entries的偏移 */
16     size_t term_offset = (offsetof(iptof(*tbl), entries[nhooks]) + 
17         __alignof__(*term) - 1) & ~(__alignof__(*term) - 1);
18     /* 分配内存 */
19     tbl = kzalloc(term_offset + sizeof(*term), GFP_KERNEL); 
20     if (tbl == NULL) 
21         return NULL; 
22     /* 找到error部分 */
23     term = (struct ipt_error *)&(((char *)tbl)[term_offset]); 
24     /* 拷贝表名 */
25     strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name));
26     /* 初始化error */
27     *term = (struct ipt_error)IPT_ERROR_INIT;
28     /* 初始化钩子点,数量(包括error),占用内存大小 */
29     tbl->repl.valid_hooks = hook_mask; 
30     tbl->repl.num_entries = nhooks + 1; 
31     tbl->repl.size = nhooks * sizeof(struct ipt_standard) + 
32              sizeof(struct ipt_error); 
33     /* 对每个偏移进行初始化 */
34     for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { 
35         if (!(hook_mask & 1)) 
36             continue; 
37         tbl->repl.hook_entry[hooknum] = bytes; 
38         tbl->repl.underflow[hooknum]  = bytes; 
39         tbl->entries[i++] = (struct ipt_standard) 
40             IPT_STANDARD_INIT(NF_ACCEPT); 
41         bytes += sizeof(struct ipt_standard); 
42     } 
43     /* 返回表 */
44     return tbl; 
45 }

 

ipt_register_table完成表注册流程,其中包括了分配table_info结构,并且与table->private进行关联,table中规则的合法性检查,以及调用nf_register_net_hooks进行钩子函数的注册;

 1 /* 表注册 */
 2 int ipt_register_table(struct net *net, const struct xt_table *table,
 3                const struct ipt_replace *repl,
 4                const struct nf_hook_ops *ops, struct xt_table **res)
 5 {
 6     int ret;
 7     struct xt_table_info *newinfo;
 8     struct xt_table_info bootstrap = {0};
 9     void *loc_cpu_entry;
10     struct xt_table *new_table;
11 
12     /* 分配table_info结构 */
13     newinfo = xt_alloc_table_info(repl->size);
14     if (!newinfo)
15         return -ENOMEM;
16 
17     /* 拷贝entries到table_info */
18     loc_cpu_entry = newinfo->entries;
19     memcpy(loc_cpu_entry, repl->entries, repl->size);
20 
21     /* 合法性检查 */
22     ret = translate_table(net, newinfo, loc_cpu_entry, repl);
23     if (ret != 0)
24         goto out_free;
25 
26     /* 建立新表,关联private到newinfo */
27     new_table = xt_register_table(net, table, &bootstrap, newinfo);
28     if (IS_ERR(new_table)) {
29         ret = PTR_ERR(new_table);
30         goto out_free;
31     }
32 
33     /* set res now, will see skbs right after nf_register_net_hooks */
34     /* 设置返回值指向新表 */
35     WRITE_ONCE(*res, new_table);
36 
37     /* 注册钩子函数 */
38     ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
39     if (ret != 0) {
40         __ipt_unregister_table(net, new_table);
41         *res = NULL;
42     }
43 
44     return ret;
45 
46 out_free:
47     xt_free_table_info(newinfo);
48     return ret;
49 }

 

xt_register_table建立新表,将xt_table_info与表进行关联,并将表加入到net->xt.tables[table->af]链表;

 1 struct xt_table *xt_register_table(struct net *net,
 2                    const struct xt_table *input_table,
 3                    struct xt_table_info *bootstrap,
 4                    struct xt_table_info *newinfo)
 5 {
 6     int ret;
 7     struct xt_table_info *private;
 8     struct xt_table *t, *table;
 9 
10     /* Don't add one object to multiple lists. */
11     /* 建立新表 */
12     table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
13     if (!table) {
14         ret = -ENOMEM;
15         goto out;
16     }
17 
18     mutex_lock(&xt[table->af].mutex);
19     /* Don't autoload: we'd eat our tail... */
20     /* 验证是否已经存在相同名字的表 */
21     list_for_each_entry(t, &net->xt.tables[table->af], list) {
22         if (strcmp(t->name, table->name) == 0) {
23             ret = -EEXIST;
24             goto unlock;
25         }
26     }
27 
28     /* Simplifies replace_table code. */
29     table->private = bootstrap;
30 
31     /* 设置newinfo到table的privates */
32     if (!xt_replace_table(table, 0, newinfo, &ret))
33         goto unlock;
34 
35     private = table->private;
36     pr_debug("table->private->number = %u\n", private->number);
37 
38     /* save number of initial entries */
39     private->initial_entries = private->number;
40 
41     /* 将表加入到xt.tables中 */
42     list_add(&table->list, &net->xt.tables[table->af]);
43     mutex_unlock(&xt[table->af].mutex);
44 
45     /* 返回新表 */
46     return table;
47 
48 unlock:
49     mutex_unlock(&xt[table->af].mutex);
50     kfree(table);
51 out:
52     return ERR_PTR(ret);
53 }

 

钩子函数iptable_filter_hook,该函数主要调用ipt_do_table函数进行规则的匹配;

 1 static unsigned int
 2 iptable_filter_hook(void *priv, struct sk_buff *skb,
 3             const struct nf_hook_state *state)
 4 {
 5     /* LOCAL_OUT && (数据长度不足ip头 || 实际ip头部长度不足最小ip头),在使用raw socket */
 6     if (state->hook == NF_INET_LOCAL_OUT &&
 7         (skb->len < sizeof(struct iphdr) ||
 8          ip_hdrlen(skb) < sizeof(struct iphdr)))
 9         /* root is playing with raw sockets. */
10         return NF_ACCEPT;
11 
12     /* 核心规则匹配流程 */
13     return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
14 }

 

ipt_do_table是核心的规则匹配流程,其中包括了标准match,扩展match,标准target,扩展target的相关处理;

  1 /* 遍历钩子链上的所有规则,进行标准匹配和扩展匹配,执行其target操作 */
  2 unsigned int
  3 ipt_do_table(struct sk_buff *skb,
  4          const struct nf_hook_state *state,
  5          struct xt_table *table)
  6 {
  7     unsigned int hook = state->hook;
  8     static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
  9     const struct iphdr *ip;
 10     /* Initializing verdict to NF_DROP keeps gcc happy. */
 11     unsigned int verdict = NF_DROP;
 12     const char *indev, *outdev;
 13     const void *table_base;
 14     struct ipt_entry *e, **jumpstack;
 15     unsigned int stackidx, cpu;
 16     const struct xt_table_info *private;
 17     struct xt_action_param acpar;
 18     unsigned int addend;
 19 
 20     /* Initialization */
 21     stackidx = 0;
 22     ip = ip_hdr(skb);
 23     indev = state->in ? state->in->name : nulldevname;
 24     outdev = state->out ? state->out->name : nulldevname;
 25     /* We handle fragments by dealing with the first fragment as
 26      * if it was a normal packet.  All other fragments are treated
 27      * normally, except that they will NEVER match rules that ask
 28      * things we don't know, ie. tcp syn flag or ports).  If the
 29      * rule is also a fragment-specific rule, non-fragments won't
 30      * match it. */
 31     acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 32     acpar.thoff   = ip_hdrlen(skb);
 33     acpar.hotdrop = false;
 34     acpar.state   = state;
 35 
 36     IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 37     local_bh_disable();
 38     addend = xt_write_recseq_begin();
 39     private = table->private;
 40     cpu        = smp_processor_id();
 41     /*
 42      * Ensure we load private-> members after we've fetched the base
 43      * pointer.
 44      */
 45     smp_read_barrier_depends();
 46     /* 首个规则地址 */
 47     table_base = private->entries;
 48     jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
 49 
 50     /* Switch to alternate jumpstack if we're being invoked via TEE.
 51      * TEE issues XT_CONTINUE verdict on original skb so we must not
 52      * clobber the jumpstack.
 53      *
 54      * For recursion via REJECT or SYNPROXY the stack will be clobbered
 55      * but it is no problem since absolute verdict is issued by these.
 56      */
 57     if (static_key_false(&xt_tee_enabled))
 58         jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
 59 
 60     /* 获取对应链上的首个匹配规则 */
 61     e = get_entry(table_base, private->hook_entry[hook]);
 62 
 63     do {
 64         const struct xt_entry_target *t;
 65         const struct xt_entry_match *ematch;
 66         struct xt_counters *counter;
 67 
 68         IP_NF_ASSERT(e);
 69         /* 标准match */
 70         if (!ip_packet_match(ip, indev, outdev,
 71             &e->ip, acpar.fragoff)) {
 72  no_match:
 73              /* 未匹配成功,继续下一个规则 */
 74             e = ipt_next_entry(e);
 75             continue;
 76         }
 77 
 78         /* 扩展match */
 79         xt_ematch_foreach(ematch, e) {
 80             acpar.match     = ematch->u.kernel.match;
 81             acpar.matchinfo = ematch->data;
 82             /* 只要有返回不匹配的,则说明匹配当前规则失败 */
 83             if (!acpar.match->match(skb, &acpar))
 84                 goto no_match;
 85         }
 86 
 87         counter = xt_get_this_cpu_counter(&e->counters);
 88         ADD_COUNTER(*counter, skb->len, 1);
 89 
 90         /* 标准match和扩展match都成功 */
 91 
 92         /* 获取target */
 93         t = ipt_get_target(e);
 94         IP_NF_ASSERT(t->u.kernel.target);
 95 
 96 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 97         /* The packet is traced: log it */
 98         if (unlikely(skb->nf_trace))
 99             trace_packet(state->net, skb, hook, state->in,
100                      state->out, table->name, private, e);
101 #endif
102         /* Standard target? */
103         /* 标准target */
104         if (!t->u.kernel.target->target) {
105             int v;
106 
107             v = ((struct xt_standard_target *)t)->verdict;
108             /* 不会跳转到用户自定义规则 */
109             if (v < 0) {
110                 /* Pop from stack? */
111                 /* 不是XT_RETURN,则跳出处理结果 */
112                 if (v != XT_RETURN) {
113                     verdict = (unsigned int)(-v) - 1;
114                     break;
115                 }
116 
117                 /* XT_RETURN则继续匹配下一条规则 */
118                 if (stackidx == 0) {
119                     e = get_entry(table_base,
120                         private->underflow[hook]);
121                 } else {
122                     e = jumpstack[--stackidx];
123                     e = ipt_next_entry(e);
124                 }
125                 continue;
126             }
127 
128             /* 记录跳转规则,以便返回时获取下一跳规则进行后续匹配 */
129             if (table_base + v != ipt_next_entry(e) &&
130                 !(e->ip.flags & IPT_F_GOTO))
131                 jumpstack[stackidx++] = e;
132 
133             /* 获取自定义规则 */
134             e = get_entry(table_base, v);
135             continue;
136         }
137 
138         /* 扩展target,执行target回调 */
139 
140         acpar.target   = t->u.kernel.target;
141         acpar.targinfo = t->data;
142 
143         verdict = t->u.kernel.target->target(skb, &acpar);
144         /* Target might have changed stuff. */
145         ip = ip_hdr(skb);
146 
147         /* 需要继续匹配 */
148         if (verdict == XT_CONTINUE)
149             e = ipt_next_entry(e);
150         /* 跳出处理匹配结果 */
151         else
152             /* Verdict */
153             break;
154     /* 无hotdrop,继续匹配 */
155     } while (!acpar.hotdrop);
156 
157     xt_write_recseq_end(addend);
158     local_bh_enable();
159 
160     /* drop标记 */
161     if (acpar.hotdrop)
162         return NF_DROP;
163     /* 返回匹配结果 */
164     else return verdict;
165 }

 

相关文章:

  • 2021-10-01
  • 2021-08-19
  • 2021-07-18
  • 2022-01-13
  • 2021-08-23
  • 2022-12-23
猜你喜欢
  • 2022-12-23
  • 2021-08-01
  • 2021-05-13
  • 2021-08-28
  • 2022-01-12
  • 2021-05-06
  • 2021-08-21
相关资源
相似解决方案