PATCH出来了.. 是针对2..6.13-15-smp的.电影
将代码存到文件seeker中, 放在linux source根.下, 然后patch -p1 < .seeker.
然后编译KERNEL.启动.
我初步测试网络下载的速度
在双CPU机器.上, 在IPTABLES 的INP.UT链安上2400行IP和PORT匹配(目的是故意模拟 美容
高负载的情况). 有PATCH, 网络下载速度比没有可以高出一倍. 因..为利用的双CPU.( 游戏 )
这个有可能是最好的解决方案. .如果4个CPU,可.能将负载能力提高4倍(至少2倍). 外贸
IRQ.BALANCE不需要了.--------------彩票
NAT情况我限于.条件,测试的十分不完全..
欢迎测试.
以后我还会给出module, 不用编译KERNE.L就可以测试了.使用了. 外汇
--- old/net/ipv4/ip_input.c 2007-09-20 20:50:31.00.000.0000 +0800 鲜花
+++ new/net/ipv4/ip_inpu.t.c 2007-09-21 05:52:4.0.000000000 +0800.
@@ -362,6 +362.,198 @@外贸
return NET_.RX_DR.OP;域名
}
+
+#define CONFIG._BOTTOM_SOFTIR.Q_SMP 婚庆
+#define CONFIG._BO.TTOM_SOFTIRQ_SMP_SYSCTL.
+
+#i.fdef CO.NFIG_BOTTOM_SOFTIRQ_SMP健康
+
+/*
+ *
+Bottom Softirq. I.mplementation. John Ye, 2007.08.27.
+
+Why t.his patch:.
+Ma.ke kernel be ab.le to concurrently execute softirq's net co.de on SMP system. 鲜花
+Takes full advantages of SMP to han.dle. more packets and greatly rais.es NIC throughput. 电子
+The current kernel's net pac.ket. processing logic is: 电子
+1) The CPU which handles a hardirq must be executing it.s re.lated softirq. 电子
+2) One softirq instance(irqs .handled .by 1 CPU) can't be executed on more than 2 CP.Us学习
+at the same time..学习
+The limitation make kernel network be hard .to take the .advantages of SMP.健康
+
+H.ow this patch: 美容
+It. s.plits the current softirq code into 2 parts: the cpu-sensitive top half,. 美容
+and the cpu-insensi.tive bottom half, then make bo.ttom half(calld BS) be 鲜花
+ex.ecuted o.n SMP concurrently. 汽车
+The two parts are not equal .in terms of size and load. .Top part has constant co.de虚拟主机
+size(mainly, in net/core/dev.c and NIC drivers), while.. bo.ttom part involves( 游戏 )
+netfilter(iptables) whose load varies very muc.h.. An iptalbes with 1000 rules t.o match.
+will make the bottom part's load be ve.ry high.. So, if the bottom part s.oftirq 建材
+can be randomly d.i.stributed to processors and run concurrently on th.em, the network will 鲜花
+gain much more packet hand.ling capa.city, network throughput will be b.e increased 鲜花
+remarkably.
+
+Where useful:
+It's useful on SMP machine.s tha.t meet the following 2 conditions: 汽车
+.1) have high kernel network load, for example, running .iptables with thousands of rules., etc). 电子
+2) have more CPUs than active. NICs, e.g. a 4 CPU.s machine with 2 NICs). 女人
+On th.ese. system, with the increase of softirq load, some CPUs will be idle 外汇
+while others(number is equal .to # of NIC) keeps busy.. 美容
+IRQBALANCE will help, but it only shifts IR.Q among CPUS, makes no .softirq concurrency.. 建材
+Balancing the load of each cpus will not re.markably increase netw.o.rk speed. 电子
+
+Where NOT usefu.l:虚拟主机
+If the bott.om half of softirq i.s to.o small(without running iptables), or the network 女人
+.is t.oo idle, BS patch will not be seen to hav.e visible effect. But It has no.
+negative affect. either..
+User can turn on/off. BS functionality by /proc/sy.s/net/bs_enable switch. 汽车
+
+How to test:
+On a linux box, run. ipta.bles, add 2.000 rules to table filter & table nat to simulate huge<性病>
+softirq load. Then, open 20 ftp sessions ..to download big file. On anot.her machine(who--------------彩票
+use this te.s.t machine as gateway), open 20 more ftp download sessions. Co.mpare the speed,服务器
+without BS enabled, and w.ith BS en.abled. 鲜花
+cat /proc/.sys./net/bs_enable. this is a switch to turn on/off BS.
+cat /proc/sys/net/bs_status. .this shows the usage of each. CPUs教育
+Test shown that when bottom sof.tirq load is high, th.e network throughput can be nearl.y.
+doubled on 2 CPUs machine. hopefully it may be .quadrupled .on a 4 cpus .linux box..
+
+Bugs:
+It wil.l NOT allow hotpu.g CPU.--- 印刷
+It only allows increme.ntal CPUs ids, starting from 0 to num_online_cpu.s().教育
+for example, 0,1,2,3 is OK. 0,1,8,9 is .KO..--- 印刷
+
+Some c.on.siderations in the future:.
+1) With BS p.atch, the irq. balance code on arch/i386/kernel/.io_apic.c seems no need any more,健康
+a.t least .not for network irq.投资
+2) Softirq load will become very small. It only ru.n .the top half of old softirq, wh.ich.
+is much less e.xpensive than bottom hal.f---the netfilter program.虚拟主机
+To let top softirq pr.ocess more p.ackets, cant. these 3 network parameters be enlarged? 美容
+extern int netde.v_max_backlog .= 1000; 建材
+extern .int netdev_budget. = 300;健康
+.ext.ern int weight_p = 64;.
+3) Now, BS are running on built-in keventd thread, we .can create .new work.queues to let it run on?教育
+
+Signed-off-by: John Ye. (Seeker) .<
johnye@webizmail.com> 汽车
+ *
+ */
+
+.#define BS_.USE_PERCPU_DATA[成人用品]
+
+struct cpu_stat {..
+. unsigned long irqs; /./total irqs电影
+ un.signed. long dids; //I did, .
+ unsigned long oth.ers;[成人用品]
+ unsigned .long works;域名
+};
+#def.ine BS_CPU_ST.AT_DEFINED 建材
+
+s.tatic int nr_cpus = 0;学习
+
+#ifdef BS_USE_PERCPU._DAT.A教育
+stati.c DEFINE_PER_CPU(struct sk_buff_head, bs_cpu_queues);. // cacheline_aligned._in_smp;投资
+sta.tic DEFI.NE_PER_CPU(struct work_struct, bs_works);( 游戏 )
+struc.t cpu_stat bs_cpu._status[NR_CPUS]; 外汇
+#else
+#define N.R_CPUS 8[成人用品]
+static struct sk_buff_head bs_cpu_queue.s[NR_CP.US];--------------彩票
+.static struct. work_struct bs_works[NR_CPUS]; 电子
+stat.ic str.uct cpu_stat bs_cpu_status[NR_CPUS];--------------彩票
+#endif
+
+int bs_enable .= 1; 美容
+
+s.tatic int ip_rcv1(struct sk_buff *skb, struct net_device. *dev)虚拟主机
+{
+ return NF_HOOK_COND(PF_INET, .NF_IP_PRE_ROUTING, skb, de.v, NULL, ip_rcv_finish, nf_hook_input_c.ond(skb)); 健康
+}
+
+
+static void bs_fu..nc(void *data)--------------彩票
+{
+ .int flags, num, cpu; 女人
+ struct sk_b.uff *skb, *.last;服务器
+ struct work_stru.ct *bs_works.;--------------彩票
+ struct sk_buff_he.ad *q;投资
+ cpu =. smp_processor_i.d(); 虚拟主机
+
+
+#ifdef .BS_USE_.PERCPU_DATA虚拟主机
+ bs_works = &per_cpu.(bs_works, cp.u); 婚庆
+ q = &per_cpu(bs_cpu_queues, cp.u.);外贸
+#else
+ bs_wo.rks = &b.s_works[cpu]; 建材
+ q. .= &bs_cpu_queues[cpu];--------------彩票
+#endif
+
+ local_bh_disabl.e(); 学习
+restart:
+ num = 0;
+ while(1) {
+ last = skb;
+ spin_lock_i.rqsave(&q->l.ock, flags);( 游戏 )
+ . skb = _._skb_dequeue(q); 女人
+ spin_unlock_irqrestor.e(&q->lock,. flags);电脑
+. if(!skb) break;.
+ num++;
+ //local_bh_disa.ble();[成人用品]
+ ip_rcv1(skb, s.kb->.dev);.
+ //__local_bh_enable(.); //s.ub_preempt_count(SOFTIRQ_OFFSET - 1);<性病>
+ }
+
+ bs_cpu_s.tatus[cpu]..others += num; 健康
+ if(num > 0) { got.o .restart; }域名
+
+ __loc.al_bh_.enable(); //sub_preempt_count(SOFTIRQ_OFFSET - 1);.
+ bs_works-.>func = 0;.
+
+ return;
+}
+
+/* COPY_IN_START_FR.OM kernel./workqueue.c */服务器
+.struct cpu_workqueu.e_struct { 外汇
+
+ spinlock_t lo.ck;<性病>
+
+ long remov.e_sequence; /* Least-recently ad.ded (next to run) */投资
+ long. insert_sequence; /* Next to ad.d */外贸
+
+ st.ruct list_.head worklist; 婚庆
+ wait_queue_head_.t mo.re_work; 外汇
+ wait_queue_head_t w.or.k_done;.
+
+ struct. workq.ueue_struct *wq; 汽车
+ ta.sk_t *thread;健康
+
+ int run_depth; /* Detect r.un_workqueu.e() recursion depth */(广告)
+} ____cachelin.e_aligned;电脑
+
+
+struct wor.kqueu.e_struct { 汽车
+ struct cpu_.workqueue_struct cpu_wq[NR_CPUS];..
+ const cha.r *name;投资
+ struct list_.head list; /* Empty i.f single thread */.
+};
+/* COPY_IN_END_FROM kern.el/w.orqueue.c */ 美容
+
+extern .struct workqueue_.struct *keventd_wq; 建材
+
+/* Preem.pt must be disabled. *./[成人用品]
+static void __queue._work(struct c.pu_workqueue_struct *cwq,.
+. struct w.ork_struct *work) 外汇
+{
+ uns.igned long flags; 杀毒
+
+ sp.in_.lock_irqsave(&cwq->lock, flags);--- 印刷
+ work-.>wq_data = cwq;(广告)
+ list._add_tail(&work->entry, &cwq->workli.st);.
+ cwq->insert._sequence++; 电子
+ wake_up(&cwq.->more_wo.rk);.
+ spin_unlock_irqrestore(&c.wq->lock, flags.); 婚庆
+}
+.#en.dif //CONFIG_BOTTOM_SOFTIRQ_SMP 建材
+
+
/*
* Main IP Receive .rout.ine. 建材
*/
@@ -424,8 +616,.73 @@.
}
}
+#if.def CONFIG_BOTTOM_SO.FTIRQ_SMP<性病>
+ if(!nr_cpus)
+. nr_cpus = num_online_cpus(.);虚拟主机
+
+ .if(bs_ena.ble && nr_cpus > 1 && iph->protocol != IPPROTO_ICMP) {.
+ //if(bs_enable && .iph->protoc.ol == IPPROTO_ICMP.) { //test on icmp first[成人用品]
+ uns.igned. int flags, cur, cpu; 乙肝
+ struct work_struct .*bs_wo.rks;健康
+. struct sk_buff_head *q;虚拟主机
+
+ cur. = smp_processor_i.d(); 外汇
+
+ bs._cpu_status.[cur].irqs++;域名
+
+ //r.andom distribute电影
+ c.pu = (bs_cpu_status[cur.].irqs % nr_cpus);教育
+ i.f(cpu == cur) { .
+ b.s_cpu_sta.tus[cpu].dids++;服务器
+ return ip_rcv1(s.kb, dev).;.
+ }
+
+#i.fdef BS_USE_.PERCPU_DATA--- 印刷
+ .q = &per_cpu(bs_cp.u_queues, cpu);教育
+#else
+ .q = &bs._cpu_queues[cpu]; 汽车
+#endif
+
+ if(!q->next) { // || skb_queue_l.en(q) == 0 ) {. 虚拟主机
+ skb_qu.eue_head_init(q);投资
+ }
+
+
+#.ifdef BS_USE_PERCPU._DATA--------------彩票
+. bs_works = &per_cpu(.bs_works, cpu);.
+#else
+ bs_works = &bs_works[c..pu];学习
+#endif
+ /*
+ . local_irq_save.(flags);.
+ SKB_CB.(skb)->dev = dev;.
+ SKB_.CB(skb)->ptype = p.t;.
+ */
+ spin_.lock_irqsav.e(&q->lock, flags); 女人
+ .__skb_queue_tail(.q, skb);[成人用品]
+. spin_unlock_irqrestore(&q-.>lock, flags);.
+. //if(net_ratelimit()) printk("ql.en %d\n", q->qlen); 汽车
+
+ //loc.al_irq_restore(flags).;.
+ . if (!bs_w.orks->func) {(广告)
+. INIT_WORK(bs_works, bs_f.unc, q);学习
+ bs._cpu_status[cpu].work.s++;(广告)
+ .preempt_disable(); 电子
+ __queue_wor.k(keve.ntd_wq->cpu_wq + cpu, bs_works);<性病>
+ preemp.t_enable(); 电子
+ }
+ } else {
+ in.t cpu = smp_processo.r_id();投资
+ bs_cpu_.stat.us[cpu].irqs++;教育
+ bs_cpu_st.atus[cpu]..dids++; 杀毒
+ retu.rn ip_rcv1(skb, de.v);学习
+ }
+ return 0;
+#else
ret.urn NF_HOOK_COND(PF_INET., NF_IP_PRE_ROUTING, skb, dev, NULL, 鲜花
- ip_rcv_finish.,. nf_hook_input_cond(skb));.
+ ip_.rcv_finish, nf_hook_input_cond(.skb));.
+#endif //CONFIG_BOTTOM._SOFT.IRQ_SMP外贸
+
inhdr_error:
IP_INC_STA.TS_BH(IPSTATS_MIB_INHDRERR.ORS);服务器
-.-- old/net/sysctl_net.c 2007-09-20 23:30:29..000000000 +0800 电子
+++ new/ne.t/sysctl_ne.t.c 2007-09-20 23:28:06.000000000 +0800[成人用品]
@@ -30,6 +30,22 .@@.
extern struct .ctl_tab.le tr_table[]; 健康
#endif
+
+#define C.ONFIG_BOTTOM_SO.FTIRQ_SMP_SYSCTL.
+#ifdef CONFIG_BOTT.OM_SOFTIRQ_.SMP_SYSCTL教育
+#if !defi.ned(BS_CPU_STAT._DEFINED) 女人
+struct cpu_sta.t {<性病>
+ uns.igned long irqs; /./total irqs<性病>
+ unsigned long dids; //.I did., 美容
+ unsigned l.ong others; 鲜花
+ unsigned l.ong works;(广告)
+};
+#endif
+ex.tern struct cpu_.stat bs_cpu_status[NR_CPUS]; 鲜花
+
+extern int .bs_enable;--------------彩票
+#endif
+
stru.ct c.tl_table net_table[] = {.
{
.c.tl_name = NET_CORE,学习
@@ -61,.5 +77,26 @@ 鲜花
.child = .tr_table,.
},
#endif
+
+#ifdef CONFIG_BOTTOM._SOFTIRQ._SMP_SYSCTL.
+ {
+ .c.tl_name = 99, 女人
+ .procnam.e = "bs._status",.
+ .dat.a = &bs_cpu_status,[成人用品]
+ .maxlen. = sizeof(bs_cpu_st.atus),虚拟主机
+ .mode .= 0644,电影
+. .proc_handler = &.proc_dointvec,--- 印刷
+ },
+
+ {
+ .ctl_name = .99,学习
+ .procna.me = "bs_ena.ble",.
+ .d.ata = &bs_enable, 婚庆
+ .maxlen = sizeof(.int),电脑
+. .mode = 0644, 杀毒
+ .proc_h.andl.er = &proc_dointvec,投资
+ },
+#endif
+
{ 0 },
};
--.- old/kernel/workqueue.c 2007-09-21 04:48:13.000000000 +080.0.
+++ .new/kernel/workqueue.c 2007-09-21 04:47:49.00000000.0 +0800 汽车
@@ -384,7 +384,1.1 @@ 杀毒
kfree(wq);
}
+/*
static struct workqueue_struct *keventd._.wq;虚拟主机
+*/
+struct. workqueue_struct. *keventd_wq; 汽车
+EXPORT_.SYMBOL.(keventd_wq);服务器
int fastcall schedule_.work(str.uct work_struct *work).
{
复制代码
[ 本帖最后由. 思一克 于 2007-9-20 22:16. 编辑 ]电脑