Merge git://oss.sgi.com:8090/xfs/xfs-2.6
[linux-drm-fsl-dcu.git] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/sched.h>
20 #include <linux/string.h>
21 #include <linux/mm.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/in.h>
25 #include <linux/errno.h>
26 #include <linux/interrupt.h>
27 #include <linux/if_ether.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/etherdevice.h>
31 #include <linux/notifier.h>
32 #include <net/ip.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37
38
39 /*      Class-Based Queueing (CBQ) algorithm.
40         =======================================
41
42         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43                  Management Models for Packet Networks",
44                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
46                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47
48                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
49                  Parameters", 1996
50
51                  [4] Sally Floyd and Michael Speer, "Experimental Results
52                  for Class-Based Queueing", 1998, not published.
53
54         -----------------------------------------------------------------------
55
56         Algorithm skeleton was taken from NS simulator cbq.cc.
57         If someone wants to check this code against the LBL version,
58         he should take into account that ONLY the skeleton was borrowed,
59         the implementation is different. Particularly:
60
61         --- The WRR algorithm is different. Our version looks more
62         reasonable (I hope) and works when quanta are allowed to be
63         less than MTU, which is always the case when real time classes
64         have small rates. Note, that the statement of [3] is
65         incomplete, delay may actually be estimated even if class
66         per-round allotment is less than MTU. Namely, if per-round
67         allotment is W*r_i, and r_1+...+r_k = r < 1
68
69         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71         In the worst case we have IntServ estimate with D = W*r+k*MTU
72         and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76         interpret some places, which look like wrong translations
77         from NS. Anyone is advised to find these differences
78         and explain to me, why I am wrong 8).
79
80         --- Linux has no EOI event, so that we cannot estimate true class
81         idle time. Workaround is to consider the next dequeue event
82         as sign that previous packet is finished. This is wrong because of
83         internal device queueing, but on a permanently loaded link it is true.
84         Moreover, combined with clock integrator, this scheme looks
85         very close to an ideal solution.  */
86
87 struct cbq_sched_data;
88
89
90 struct cbq_class
91 {
92         struct cbq_class        *next;          /* hash table link */
93         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
94
95 /* Parameters */
96         u32                     classid;
97         unsigned char           priority;       /* class priority */
98         unsigned char           priority2;      /* priority to be used after overlimit */
99         unsigned char           ewma_log;       /* time constant for idle time calculation */
100         unsigned char           ovl_strategy;
101 #ifdef CONFIG_NET_CLS_POLICE
102         unsigned char           police;
103 #endif
104
105         u32                     defmap;
106
107         /* Link-sharing scheduler parameters */
108         long                    maxidle;        /* Class parameters: see below. */
109         long                    offtime;
110         long                    minidle;
111         u32                     avpkt;
112         struct qdisc_rate_table *R_tab;
113
114         /* Overlimit strategy parameters */
115         void                    (*overlimit)(struct cbq_class *cl);
116         long                    penalty;
117
118         /* General scheduler (WRR) parameters */
119         long                    allot;
120         long                    quantum;        /* Allotment per WRR round */
121         long                    weight;         /* Relative allotment: see below */
122
123         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
124         struct cbq_class        *split;         /* Ptr to split node */
125         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
126         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
127         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
128                                                    parent otherwise */
129         struct cbq_class        *sibling;       /* Sibling chain */
130         struct cbq_class        *children;      /* Pointer to children chain */
131
132         struct Qdisc            *q;             /* Elementary queueing discipline */
133
134
135 /* Variables */
136         unsigned char           cpriority;      /* Effective priority */
137         unsigned char           delayed;
138         unsigned char           level;          /* level of the class in hierarchy:
139                                                    0 for leaf classes, and maximal
140                                                    level of children + 1 for nodes.
141                                                  */
142
143         psched_time_t           last;           /* Last end of service */
144         psched_time_t           undertime;
145         long                    avgidle;
146         long                    deficit;        /* Saved deficit for WRR */
147         unsigned long           penalized;
148         struct gnet_stats_basic bstats;
149         struct gnet_stats_queue qstats;
150         struct gnet_stats_rate_est rate_est;
151         spinlock_t              *stats_lock;
152         struct tc_cbq_xstats    xstats;
153
154         struct tcf_proto        *filter_list;
155
156         int                     refcnt;
157         int                     filters;
158
159         struct cbq_class        *defaults[TC_PRIO_MAX+1];
160 };
161
162 struct cbq_sched_data
163 {
164         struct cbq_class        *classes[16];           /* Hash table of all classes */
165         int                     nclasses[TC_CBQ_MAXPRIO+1];
166         unsigned                quanta[TC_CBQ_MAXPRIO+1];
167
168         struct cbq_class        link;
169
170         unsigned                activemask;
171         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
172                                                                    with backlog */
173
174 #ifdef CONFIG_NET_CLS_POLICE
175         struct cbq_class        *rx_class;
176 #endif
177         struct cbq_class        *tx_class;
178         struct cbq_class        *tx_borrowed;
179         int                     tx_len;
180         psched_time_t           now;            /* Cached timestamp */
181         psched_time_t           now_rt;         /* Cached real time */
182         unsigned                pmask;
183
184         struct timer_list       delay_timer;
185         struct timer_list       wd_timer;       /* Watchdog timer,
186                                                    started when CBQ has
187                                                    backlog, but cannot
188                                                    transmit just now */
189         long                    wd_expires;
190         int                     toplevel;
191         u32                     hgenerator;
192 };
193
194
195 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196
197
198 static __inline__ unsigned cbq_hash(u32 h)
199 {
200         h ^= h>>8;
201         h ^= h>>4;
202         return h&0xF;
203 }
204
205 static __inline__ struct cbq_class *
206 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207 {
208         struct cbq_class *cl;
209
210         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211                 if (cl->classid == classid)
212                         return cl;
213         return NULL;
214 }
215
216 #ifdef CONFIG_NET_CLS_POLICE
217
218 static struct cbq_class *
219 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220 {
221         struct cbq_class *cl, *new;
222
223         for (cl = this->tparent; cl; cl = cl->tparent)
224                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
225                         return new;
226
227         return NULL;
228 }
229
230 #endif
231
232 /* Classify packet. The procedure is pretty complicated, but
233    it allows us to combine link sharing and priority scheduling
234    transparently.
235
236    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237    so that it resolves to split nodes. Then packets are classified
238    by logical priority, or a more specific classifier may be attached
239    to the split node.
240  */
241
242 static struct cbq_class *
243 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244 {
245         struct cbq_sched_data *q = qdisc_priv(sch);
246         struct cbq_class *head = &q->link;
247         struct cbq_class **defmap;
248         struct cbq_class *cl = NULL;
249         u32 prio = skb->priority;
250         struct tcf_result res;
251
252         /*
253          *  Step 1. If skb->priority points to one of our classes, use it.
254          */
255         if (TC_H_MAJ(prio^sch->handle) == 0 &&
256             (cl = cbq_class_lookup(q, prio)) != NULL)
257                 return cl;
258
259         *qerr = NET_XMIT_BYPASS;
260         for (;;) {
261                 int result = 0;
262                 defmap = head->defaults;
263
264                 /*
265                  * Step 2+n. Apply classifier.
266                  */
267                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268                         goto fallback;
269
270                 if ((cl = (void*)res.class) == NULL) {
271                         if (TC_H_MAJ(res.classid))
272                                 cl = cbq_class_lookup(q, res.classid);
273                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274                                 cl = defmap[TC_PRIO_BESTEFFORT];
275
276                         if (cl == NULL || cl->level >= head->level)
277                                 goto fallback;
278                 }
279
280 #ifdef CONFIG_NET_CLS_ACT
281                 switch (result) {
282                 case TC_ACT_QUEUED:
283                 case TC_ACT_STOLEN:
284                         *qerr = NET_XMIT_SUCCESS;
285                 case TC_ACT_SHOT:
286                         return NULL;
287                 }
288 #elif defined(CONFIG_NET_CLS_POLICE)
289                 switch (result) {
290                 case TC_POLICE_RECLASSIFY:
291                         return cbq_reclassify(skb, cl);
292                 case TC_POLICE_SHOT:
293                         return NULL;
294                 default:
295                         break;
296                 }
297 #endif
298                 if (cl->level == 0)
299                         return cl;
300
301                 /*
302                  * Step 3+n. If classifier selected a link sharing class,
303                  *         apply agency specific classifier.
304                  *         Repeat this procdure until we hit a leaf node.
305                  */
306                 head = cl;
307         }
308
309 fallback:
310         cl = head;
311
312         /*
313          * Step 4. No success...
314          */
315         if (TC_H_MAJ(prio) == 0 &&
316             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
318                 return head;
319
320         return cl;
321 }
322
323 /*
324    A packet has just been enqueued on the empty class.
325    cbq_activate_class adds it to the tail of active class list
326    of its priority band.
327  */
328
329 static __inline__ void cbq_activate_class(struct cbq_class *cl)
330 {
331         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332         int prio = cl->cpriority;
333         struct cbq_class *cl_tail;
334
335         cl_tail = q->active[prio];
336         q->active[prio] = cl;
337
338         if (cl_tail != NULL) {
339                 cl->next_alive = cl_tail->next_alive;
340                 cl_tail->next_alive = cl;
341         } else {
342                 cl->next_alive = cl;
343                 q->activemask |= (1<<prio);
344         }
345 }
346
347 /*
348    Unlink class from active chain.
349    Note that this same procedure is done directly in cbq_dequeue*
350    during round-robin procedure.
351  */
352
353 static void cbq_deactivate_class(struct cbq_class *this)
354 {
355         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356         int prio = this->cpriority;
357         struct cbq_class *cl;
358         struct cbq_class *cl_prev = q->active[prio];
359
360         do {
361                 cl = cl_prev->next_alive;
362                 if (cl == this) {
363                         cl_prev->next_alive = cl->next_alive;
364                         cl->next_alive = NULL;
365
366                         if (cl == q->active[prio]) {
367                                 q->active[prio] = cl_prev;
368                                 if (cl == q->active[prio]) {
369                                         q->active[prio] = NULL;
370                                         q->activemask &= ~(1<<prio);
371                                         return;
372                                 }
373                         }
374                         return;
375                 }
376         } while ((cl_prev = cl) != q->active[prio]);
377 }
378
379 static void
380 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381 {
382         int toplevel = q->toplevel;
383
384         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
385                 psched_time_t now;
386                 psched_tdiff_t incr;
387
388                 PSCHED_GET_TIME(now);
389                 incr = PSCHED_TDIFF(now, q->now_rt);
390                 PSCHED_TADD2(q->now, incr, now);
391
392                 do {
393                         if (PSCHED_TLESS(cl->undertime, now)) {
394                                 q->toplevel = cl->level;
395                                 return;
396                         }
397                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
398         }
399 }
400
401 static int
402 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403 {
404         struct cbq_sched_data *q = qdisc_priv(sch);
405         int len = skb->len;
406         int ret;
407         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408
409 #ifdef CONFIG_NET_CLS_POLICE
410         q->rx_class = cl;
411 #endif
412         if (cl == NULL) {
413                 if (ret == NET_XMIT_BYPASS)
414                         sch->qstats.drops++;
415                 kfree_skb(skb);
416                 return ret;
417         }
418
419 #ifdef CONFIG_NET_CLS_POLICE
420         cl->q->__parent = sch;
421 #endif
422         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423                 sch->q.qlen++;
424                 sch->bstats.packets++;
425                 sch->bstats.bytes+=len;
426                 cbq_mark_toplevel(q, cl);
427                 if (!cl->next_alive)
428                         cbq_activate_class(cl);
429                 return ret;
430         }
431
432         sch->qstats.drops++;
433         cbq_mark_toplevel(q, cl);
434         cl->qstats.drops++;
435         return ret;
436 }
437
438 static int
439 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440 {
441         struct cbq_sched_data *q = qdisc_priv(sch);
442         struct cbq_class *cl;
443         int ret;
444
445         if ((cl = q->tx_class) == NULL) {
446                 kfree_skb(skb);
447                 sch->qstats.drops++;
448                 return NET_XMIT_CN;
449         }
450         q->tx_class = NULL;
451
452         cbq_mark_toplevel(q, cl);
453
454 #ifdef CONFIG_NET_CLS_POLICE
455         q->rx_class = cl;
456         cl->q->__parent = sch;
457 #endif
458         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459                 sch->q.qlen++;
460                 sch->qstats.requeues++;
461                 if (!cl->next_alive)
462                         cbq_activate_class(cl);
463                 return 0;
464         }
465         sch->qstats.drops++;
466         cl->qstats.drops++;
467         return ret;
468 }
469
470 /* Overlimit actions */
471
472 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473
474 static void cbq_ovl_classic(struct cbq_class *cl)
475 {
476         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
477         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
478
479         if (!cl->delayed) {
480                 delay += cl->offtime;
481
482                 /*
483                    Class goes to sleep, so that it will have no
484                    chance to work avgidle. Let's forgive it 8)
485
486                    BTW cbq-2.0 has a crap in this
487                    place, apparently they forgot to shift it by cl->ewma_log.
488                  */
489                 if (cl->avgidle < 0)
490                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
491                 if (cl->avgidle < cl->minidle)
492                         cl->avgidle = cl->minidle;
493                 if (delay <= 0)
494                         delay = 1;
495                 PSCHED_TADD2(q->now, delay, cl->undertime);
496
497                 cl->xstats.overactions++;
498                 cl->delayed = 1;
499         }
500         if (q->wd_expires == 0 || q->wd_expires > delay)
501                 q->wd_expires = delay;
502
503         /* Dirty work! We must schedule wakeups based on
504            real available rate, rather than leaf rate,
505            which may be tiny (even zero).
506          */
507         if (q->toplevel == TC_CBQ_MAXLEVEL) {
508                 struct cbq_class *b;
509                 psched_tdiff_t base_delay = q->wd_expires;
510
511                 for (b = cl->borrow; b; b = b->borrow) {
512                         delay = PSCHED_TDIFF(b->undertime, q->now);
513                         if (delay < base_delay) {
514                                 if (delay <= 0)
515                                         delay = 1;
516                                 base_delay = delay;
517                         }
518                 }
519
520                 q->wd_expires = base_delay;
521         }
522 }
523
524 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
525    they go overlimit
526  */
527
528 static void cbq_ovl_rclassic(struct cbq_class *cl)
529 {
530         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
531         struct cbq_class *this = cl;
532
533         do {
534                 if (cl->level > q->toplevel) {
535                         cl = NULL;
536                         break;
537                 }
538         } while ((cl = cl->borrow) != NULL);
539
540         if (cl == NULL)
541                 cl = this;
542         cbq_ovl_classic(cl);
543 }
544
545 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546
547 static void cbq_ovl_delay(struct cbq_class *cl)
548 {
549         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
550         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
551
552         if (!cl->delayed) {
553                 unsigned long sched = jiffies;
554
555                 delay += cl->offtime;
556                 if (cl->avgidle < 0)
557                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
558                 if (cl->avgidle < cl->minidle)
559                         cl->avgidle = cl->minidle;
560                 PSCHED_TADD2(q->now, delay, cl->undertime);
561
562                 if (delay > 0) {
563                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
564                         cl->penalized = sched;
565                         cl->cpriority = TC_CBQ_MAXPRIO;
566                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
567                         if (del_timer(&q->delay_timer) &&
568                             (long)(q->delay_timer.expires - sched) > 0)
569                                 q->delay_timer.expires = sched;
570                         add_timer(&q->delay_timer);
571                         cl->delayed = 1;
572                         cl->xstats.overactions++;
573                         return;
574                 }
575                 delay = 1;
576         }
577         if (q->wd_expires == 0 || q->wd_expires > delay)
578                 q->wd_expires = delay;
579 }
580
581 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
582
583 static void cbq_ovl_lowprio(struct cbq_class *cl)
584 {
585         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
586
587         cl->penalized = jiffies + cl->penalty;
588
589         if (cl->cpriority != cl->priority2) {
590                 cl->cpriority = cl->priority2;
591                 q->pmask |= (1<<cl->cpriority);
592                 cl->xstats.overactions++;
593         }
594         cbq_ovl_classic(cl);
595 }
596
597 /* TC_CBQ_OVL_DROP: penalize class by dropping */
598
599 static void cbq_ovl_drop(struct cbq_class *cl)
600 {
601         if (cl->q->ops->drop)
602                 if (cl->q->ops->drop(cl->q))
603                         cl->qdisc->q.qlen--;
604         cl->xstats.overactions++;
605         cbq_ovl_classic(cl);
606 }
607
608 static void cbq_watchdog(unsigned long arg)
609 {
610         struct Qdisc *sch = (struct Qdisc*)arg;
611
612         sch->flags &= ~TCQ_F_THROTTLED;
613         netif_schedule(sch->dev);
614 }
615
616 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
617 {
618         struct cbq_class *cl;
619         struct cbq_class *cl_prev = q->active[prio];
620         unsigned long now = jiffies;
621         unsigned long sched = now;
622
623         if (cl_prev == NULL)
624                 return now;
625
626         do {
627                 cl = cl_prev->next_alive;
628                 if ((long)(now - cl->penalized) > 0) {
629                         cl_prev->next_alive = cl->next_alive;
630                         cl->next_alive = NULL;
631                         cl->cpriority = cl->priority;
632                         cl->delayed = 0;
633                         cbq_activate_class(cl);
634
635                         if (cl == q->active[prio]) {
636                                 q->active[prio] = cl_prev;
637                                 if (cl == q->active[prio]) {
638                                         q->active[prio] = NULL;
639                                         return 0;
640                                 }
641                         }
642
643                         cl = cl_prev->next_alive;
644                 } else if ((long)(sched - cl->penalized) > 0)
645                         sched = cl->penalized;
646         } while ((cl_prev = cl) != q->active[prio]);
647
648         return (long)(sched - now);
649 }
650
651 static void cbq_undelay(unsigned long arg)
652 {
653         struct Qdisc *sch = (struct Qdisc*)arg;
654         struct cbq_sched_data *q = qdisc_priv(sch);
655         long delay = 0;
656         unsigned pmask;
657
658         pmask = q->pmask;
659         q->pmask = 0;
660
661         while (pmask) {
662                 int prio = ffz(~pmask);
663                 long tmp;
664
665                 pmask &= ~(1<<prio);
666
667                 tmp = cbq_undelay_prio(q, prio);
668                 if (tmp > 0) {
669                         q->pmask |= 1<<prio;
670                         if (tmp < delay || delay == 0)
671                                 delay = tmp;
672                 }
673         }
674
675         if (delay) {
676                 q->delay_timer.expires = jiffies + delay;
677                 add_timer(&q->delay_timer);
678         }
679
680         sch->flags &= ~TCQ_F_THROTTLED;
681         netif_schedule(sch->dev);
682 }
683
684
685 #ifdef CONFIG_NET_CLS_POLICE
686
687 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
688 {
689         int len = skb->len;
690         struct Qdisc *sch = child->__parent;
691         struct cbq_sched_data *q = qdisc_priv(sch);
692         struct cbq_class *cl = q->rx_class;
693
694         q->rx_class = NULL;
695
696         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
697
698                 cbq_mark_toplevel(q, cl);
699
700                 q->rx_class = cl;
701                 cl->q->__parent = sch;
702
703                 if (cl->q->enqueue(skb, cl->q) == 0) {
704                         sch->q.qlen++;
705                         sch->bstats.packets++;
706                         sch->bstats.bytes+=len;
707                         if (!cl->next_alive)
708                                 cbq_activate_class(cl);
709                         return 0;
710                 }
711                 sch->qstats.drops++;
712                 return 0;
713         }
714
715         sch->qstats.drops++;
716         return -1;
717 }
718 #endif
719
720 /*
721    It is mission critical procedure.
722
723    We "regenerate" toplevel cutoff, if transmitting class
724    has backlog and it is not regulated. It is not part of
725    original CBQ description, but looks more reasonable.
726    Probably, it is wrong. This question needs further investigation.
727 */
728
729 static __inline__ void
730 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
731                     struct cbq_class *borrowed)
732 {
733         if (cl && q->toplevel >= borrowed->level) {
734                 if (cl->q->q.qlen > 1) {
735                         do {
736                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
737                                         q->toplevel = borrowed->level;
738                                         return;
739                                 }
740                         } while ((borrowed=borrowed->borrow) != NULL);
741                 }
742 #if 0
743         /* It is not necessary now. Uncommenting it
744            will save CPU cycles, but decrease fairness.
745          */
746                 q->toplevel = TC_CBQ_MAXLEVEL;
747 #endif
748         }
749 }
750
751 static void
752 cbq_update(struct cbq_sched_data *q)
753 {
754         struct cbq_class *this = q->tx_class;
755         struct cbq_class *cl = this;
756         int len = q->tx_len;
757
758         q->tx_class = NULL;
759
760         for ( ; cl; cl = cl->share) {
761                 long avgidle = cl->avgidle;
762                 long idle;
763
764                 cl->bstats.packets++;
765                 cl->bstats.bytes += len;
766
767                 /*
768                    (now - last) is total time between packet right edges.
769                    (last_pktlen/rate) is "virtual" busy time, so that
770
771                          idle = (now - last) - last_pktlen/rate
772                  */
773
774                 idle = PSCHED_TDIFF(q->now, cl->last);
775                 if ((unsigned long)idle > 128*1024*1024) {
776                         avgidle = cl->maxidle;
777                 } else {
778                         idle -= L2T(cl, len);
779
780                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
781                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
782                    cl->avgidle == true_avgidle/W,
783                    hence:
784                  */
785                         avgidle += idle - (avgidle>>cl->ewma_log);
786                 }
787
788                 if (avgidle <= 0) {
789                         /* Overlimit or at-limit */
790
791                         if (avgidle < cl->minidle)
792                                 avgidle = cl->minidle;
793
794                         cl->avgidle = avgidle;
795
796                         /* Calculate expected time, when this class
797                            will be allowed to send.
798                            It will occur, when:
799                            (1-W)*true_avgidle + W*delay = 0, i.e.
800                            idle = (1/W - 1)*(-true_avgidle)
801                            or
802                            idle = (1 - W)*(-cl->avgidle);
803                          */
804                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
805
806                         /*
807                            That is not all.
808                            To maintain the rate allocated to the class,
809                            we add to undertime virtual clock,
810                            necessary to complete transmitted packet.
811                            (len/phys_bandwidth has been already passed
812                            to the moment of cbq_update)
813                          */
814
815                         idle -= L2T(&q->link, len);
816                         idle += L2T(cl, len);
817
818                         PSCHED_AUDIT_TDIFF(idle);
819
820                         PSCHED_TADD2(q->now, idle, cl->undertime);
821                 } else {
822                         /* Underlimit */
823
824                         PSCHED_SET_PASTPERFECT(cl->undertime);
825                         if (avgidle > cl->maxidle)
826                                 cl->avgidle = cl->maxidle;
827                         else
828                                 cl->avgidle = avgidle;
829                 }
830                 cl->last = q->now;
831         }
832
833         cbq_update_toplevel(q, this, q->tx_borrowed);
834 }
835
836 static __inline__ struct cbq_class *
837 cbq_under_limit(struct cbq_class *cl)
838 {
839         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
840         struct cbq_class *this_cl = cl;
841
842         if (cl->tparent == NULL)
843                 return cl;
844
845         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
846             !PSCHED_TLESS(q->now, cl->undertime)) {
847                 cl->delayed = 0;
848                 return cl;
849         }
850
851         do {
852                 /* It is very suspicious place. Now overlimit
853                    action is generated for not bounded classes
854                    only if link is completely congested.
855                    Though it is in agree with ancestor-only paradigm,
856                    it looks very stupid. Particularly,
857                    it means that this chunk of code will either
858                    never be called or result in strong amplification
859                    of burstiness. Dangerous, silly, and, however,
860                    no another solution exists.
861                  */
862                 if ((cl = cl->borrow) == NULL) {
863                         this_cl->qstats.overlimits++;
864                         this_cl->overlimit(this_cl);
865                         return NULL;
866                 }
867                 if (cl->level > q->toplevel)
868                         return NULL;
869         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
870                  PSCHED_TLESS(q->now, cl->undertime));
871
872         cl->delayed = 0;
873         return cl;
874 }
875
876 static __inline__ struct sk_buff *
877 cbq_dequeue_prio(struct Qdisc *sch, int prio)
878 {
879         struct cbq_sched_data *q = qdisc_priv(sch);
880         struct cbq_class *cl_tail, *cl_prev, *cl;
881         struct sk_buff *skb;
882         int deficit;
883
884         cl_tail = cl_prev = q->active[prio];
885         cl = cl_prev->next_alive;
886
887         do {
888                 deficit = 0;
889
890                 /* Start round */
891                 do {
892                         struct cbq_class *borrow = cl;
893
894                         if (cl->q->q.qlen &&
895                             (borrow = cbq_under_limit(cl)) == NULL)
896                                 goto skip_class;
897
898                         if (cl->deficit <= 0) {
899                                 /* Class exhausted its allotment per
900                                    this round. Switch to the next one.
901                                  */
902                                 deficit = 1;
903                                 cl->deficit += cl->quantum;
904                                 goto next_class;
905                         }
906
907                         skb = cl->q->dequeue(cl->q);
908
909                         /* Class did not give us any skb :-(
910                            It could occur even if cl->q->q.qlen != 0
911                            f.e. if cl->q == "tbf"
912                          */
913                         if (skb == NULL)
914                                 goto skip_class;
915
916                         cl->deficit -= skb->len;
917                         q->tx_class = cl;
918                         q->tx_borrowed = borrow;
919                         if (borrow != cl) {
920 #ifndef CBQ_XSTATS_BORROWS_BYTES
921                                 borrow->xstats.borrows++;
922                                 cl->xstats.borrows++;
923 #else
924                                 borrow->xstats.borrows += skb->len;
925                                 cl->xstats.borrows += skb->len;
926 #endif
927                         }
928                         q->tx_len = skb->len;
929
930                         if (cl->deficit <= 0) {
931                                 q->active[prio] = cl;
932                                 cl = cl->next_alive;
933                                 cl->deficit += cl->quantum;
934                         }
935                         return skb;
936
937 skip_class:
938                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
939                                 /* Class is empty or penalized.
940                                    Unlink it from active chain.
941                                  */
942                                 cl_prev->next_alive = cl->next_alive;
943                                 cl->next_alive = NULL;
944
945                                 /* Did cl_tail point to it? */
946                                 if (cl == cl_tail) {
947                                         /* Repair it! */
948                                         cl_tail = cl_prev;
949
950                                         /* Was it the last class in this band? */
951                                         if (cl == cl_tail) {
952                                                 /* Kill the band! */
953                                                 q->active[prio] = NULL;
954                                                 q->activemask &= ~(1<<prio);
955                                                 if (cl->q->q.qlen)
956                                                         cbq_activate_class(cl);
957                                                 return NULL;
958                                         }
959
960                                         q->active[prio] = cl_tail;
961                                 }
962                                 if (cl->q->q.qlen)
963                                         cbq_activate_class(cl);
964
965                                 cl = cl_prev;
966                         }
967
968 next_class:
969                         cl_prev = cl;
970                         cl = cl->next_alive;
971                 } while (cl_prev != cl_tail);
972         } while (deficit);
973
974         q->active[prio] = cl_prev;
975
976         return NULL;
977 }
978
979 static __inline__ struct sk_buff *
980 cbq_dequeue_1(struct Qdisc *sch)
981 {
982         struct cbq_sched_data *q = qdisc_priv(sch);
983         struct sk_buff *skb;
984         unsigned activemask;
985
986         activemask = q->activemask&0xFF;
987         while (activemask) {
988                 int prio = ffz(~activemask);
989                 activemask &= ~(1<<prio);
990                 skb = cbq_dequeue_prio(sch, prio);
991                 if (skb)
992                         return skb;
993         }
994         return NULL;
995 }
996
997 static struct sk_buff *
998 cbq_dequeue(struct Qdisc *sch)
999 {
1000         struct sk_buff *skb;
1001         struct cbq_sched_data *q = qdisc_priv(sch);
1002         psched_time_t now;
1003         psched_tdiff_t incr;
1004
1005         PSCHED_GET_TIME(now);
1006         incr = PSCHED_TDIFF(now, q->now_rt);
1007
1008         if (q->tx_class) {
1009                 psched_tdiff_t incr2;
1010                 /* Time integrator. We calculate EOS time
1011                    by adding expected packet transmission time.
1012                    If real time is greater, we warp artificial clock,
1013                    so that:
1014
1015                    cbq_time = max(real_time, work);
1016                  */
1017                 incr2 = L2T(&q->link, q->tx_len);
1018                 PSCHED_TADD(q->now, incr2);
1019                 cbq_update(q);
1020                 if ((incr -= incr2) < 0)
1021                         incr = 0;
1022         }
1023         PSCHED_TADD(q->now, incr);
1024         q->now_rt = now;
1025
1026         for (;;) {
1027                 q->wd_expires = 0;
1028
1029                 skb = cbq_dequeue_1(sch);
1030                 if (skb) {
1031                         sch->q.qlen--;
1032                         sch->flags &= ~TCQ_F_THROTTLED;
1033                         return skb;
1034                 }
1035
1036                 /* All the classes are overlimit.
1037
1038                    It is possible, if:
1039
1040                    1. Scheduler is empty.
1041                    2. Toplevel cutoff inhibited borrowing.
1042                    3. Root class is overlimit.
1043
1044                    Reset 2d and 3d conditions and retry.
1045
1046                    Note, that NS and cbq-2.0 are buggy, peeking
1047                    an arbitrary class is appropriate for ancestor-only
1048                    sharing, but not for toplevel algorithm.
1049
1050                    Our version is better, but slower, because it requires
1051                    two passes, but it is unavoidable with top-level sharing.
1052                 */
1053
1054                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1055                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1056                         break;
1057
1058                 q->toplevel = TC_CBQ_MAXLEVEL;
1059                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1060         }
1061
1062         /* No packets in scheduler or nobody wants to give them to us :-(
1063            Sigh... start watchdog timer in the last case. */
1064
1065         if (sch->q.qlen) {
1066                 sch->qstats.overlimits++;
1067                 if (q->wd_expires) {
1068                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1069                         if (delay <= 0)
1070                                 delay = 1;
1071                         mod_timer(&q->wd_timer, jiffies + delay);
1072                         sch->flags |= TCQ_F_THROTTLED;
1073                 }
1074         }
1075         return NULL;
1076 }
1077
1078 /* CBQ class maintanance routines */
1079
1080 static void cbq_adjust_levels(struct cbq_class *this)
1081 {
1082         if (this == NULL)
1083                 return;
1084
1085         do {
1086                 int level = 0;
1087                 struct cbq_class *cl;
1088
1089                 if ((cl = this->children) != NULL) {
1090                         do {
1091                                 if (cl->level > level)
1092                                         level = cl->level;
1093                         } while ((cl = cl->sibling) != this->children);
1094                 }
1095                 this->level = level+1;
1096         } while ((this = this->tparent) != NULL);
1097 }
1098
1099 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1100 {
1101         struct cbq_class *cl;
1102         unsigned h;
1103
1104         if (q->quanta[prio] == 0)
1105                 return;
1106
1107         for (h=0; h<16; h++) {
1108                 for (cl = q->classes[h]; cl; cl = cl->next) {
1109                         /* BUGGGG... Beware! This expression suffer of
1110                            arithmetic overflows!
1111                          */
1112                         if (cl->priority == prio) {
1113                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1114                                         q->quanta[prio];
1115                         }
1116                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1117                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1118                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1119                         }
1120                 }
1121         }
1122 }
1123
1124 static void cbq_sync_defmap(struct cbq_class *cl)
1125 {
1126         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1127         struct cbq_class *split = cl->split;
1128         unsigned h;
1129         int i;
1130
1131         if (split == NULL)
1132                 return;
1133
1134         for (i=0; i<=TC_PRIO_MAX; i++) {
1135                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1136                         split->defaults[i] = NULL;
1137         }
1138
1139         for (i=0; i<=TC_PRIO_MAX; i++) {
1140                 int level = split->level;
1141
1142                 if (split->defaults[i])
1143                         continue;
1144
1145                 for (h=0; h<16; h++) {
1146                         struct cbq_class *c;
1147
1148                         for (c = q->classes[h]; c; c = c->next) {
1149                                 if (c->split == split && c->level < level &&
1150                                     c->defmap&(1<<i)) {
1151                                         split->defaults[i] = c;
1152                                         level = c->level;
1153                                 }
1154                         }
1155                 }
1156         }
1157 }
1158
1159 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1160 {
1161         struct cbq_class *split = NULL;
1162
1163         if (splitid == 0) {
1164                 if ((split = cl->split) == NULL)
1165                         return;
1166                 splitid = split->classid;
1167         }
1168
1169         if (split == NULL || split->classid != splitid) {
1170                 for (split = cl->tparent; split; split = split->tparent)
1171                         if (split->classid == splitid)
1172                                 break;
1173         }
1174
1175         if (split == NULL)
1176                 return;
1177
1178         if (cl->split != split) {
1179                 cl->defmap = 0;
1180                 cbq_sync_defmap(cl);
1181                 cl->split = split;
1182                 cl->defmap = def&mask;
1183         } else
1184                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1185
1186         cbq_sync_defmap(cl);
1187 }
1188
1189 static void cbq_unlink_class(struct cbq_class *this)
1190 {
1191         struct cbq_class *cl, **clp;
1192         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1193
1194         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1195                 if (cl == this) {
1196                         *clp = cl->next;
1197                         cl->next = NULL;
1198                         break;
1199                 }
1200         }
1201
1202         if (this->tparent) {
1203                 clp=&this->sibling;
1204                 cl = *clp;
1205                 do {
1206                         if (cl == this) {
1207                                 *clp = cl->sibling;
1208                                 break;
1209                         }
1210                         clp = &cl->sibling;
1211                 } while ((cl = *clp) != this->sibling);
1212
1213                 if (this->tparent->children == this) {
1214                         this->tparent->children = this->sibling;
1215                         if (this->sibling == this)
1216                                 this->tparent->children = NULL;
1217                 }
1218         } else {
1219                 BUG_TRAP(this->sibling == this);
1220         }
1221 }
1222
1223 static void cbq_link_class(struct cbq_class *this)
1224 {
1225         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1226         unsigned h = cbq_hash(this->classid);
1227         struct cbq_class *parent = this->tparent;
1228
1229         this->sibling = this;
1230         this->next = q->classes[h];
1231         q->classes[h] = this;
1232
1233         if (parent == NULL)
1234                 return;
1235
1236         if (parent->children == NULL) {
1237                 parent->children = this;
1238         } else {
1239                 this->sibling = parent->children->sibling;
1240                 parent->children->sibling = this;
1241         }
1242 }
1243
1244 static unsigned int cbq_drop(struct Qdisc* sch)
1245 {
1246         struct cbq_sched_data *q = qdisc_priv(sch);
1247         struct cbq_class *cl, *cl_head;
1248         int prio;
1249         unsigned int len;
1250
1251         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1252                 if ((cl_head = q->active[prio]) == NULL)
1253                         continue;
1254
1255                 cl = cl_head;
1256                 do {
1257                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1258                                 sch->q.qlen--;
1259                                 if (!cl->q->q.qlen)
1260                                         cbq_deactivate_class(cl);
1261                                 return len;
1262                         }
1263                 } while ((cl = cl->next_alive) != cl_head);
1264         }
1265         return 0;
1266 }
1267
1268 static void
1269 cbq_reset(struct Qdisc* sch)
1270 {
1271         struct cbq_sched_data *q = qdisc_priv(sch);
1272         struct cbq_class *cl;
1273         int prio;
1274         unsigned h;
1275
1276         q->activemask = 0;
1277         q->pmask = 0;
1278         q->tx_class = NULL;
1279         q->tx_borrowed = NULL;
1280         del_timer(&q->wd_timer);
1281         del_timer(&q->delay_timer);
1282         q->toplevel = TC_CBQ_MAXLEVEL;
1283         PSCHED_GET_TIME(q->now);
1284         q->now_rt = q->now;
1285
1286         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1287                 q->active[prio] = NULL;
1288
1289         for (h = 0; h < 16; h++) {
1290                 for (cl = q->classes[h]; cl; cl = cl->next) {
1291                         qdisc_reset(cl->q);
1292
1293                         cl->next_alive = NULL;
1294                         PSCHED_SET_PASTPERFECT(cl->undertime);
1295                         cl->avgidle = cl->maxidle;
1296                         cl->deficit = cl->quantum;
1297                         cl->cpriority = cl->priority;
1298                 }
1299         }
1300         sch->q.qlen = 0;
1301 }
1302
1303
1304 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1305 {
1306         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1307                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1308                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1309         }
1310         if (lss->change&TCF_CBQ_LSS_EWMA)
1311                 cl->ewma_log = lss->ewma_log;
1312         if (lss->change&TCF_CBQ_LSS_AVPKT)
1313                 cl->avpkt = lss->avpkt;
1314         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1315                 cl->minidle = -(long)lss->minidle;
1316         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1317                 cl->maxidle = lss->maxidle;
1318                 cl->avgidle = lss->maxidle;
1319         }
1320         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1321                 cl->offtime = lss->offtime;
1322         return 0;
1323 }
1324
1325 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1326 {
1327         q->nclasses[cl->priority]--;
1328         q->quanta[cl->priority] -= cl->weight;
1329         cbq_normalize_quanta(q, cl->priority);
1330 }
1331
1332 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1333 {
1334         q->nclasses[cl->priority]++;
1335         q->quanta[cl->priority] += cl->weight;
1336         cbq_normalize_quanta(q, cl->priority);
1337 }
1338
1339 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1340 {
1341         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1342
1343         if (wrr->allot)
1344                 cl->allot = wrr->allot;
1345         if (wrr->weight)
1346                 cl->weight = wrr->weight;
1347         if (wrr->priority) {
1348                 cl->priority = wrr->priority-1;
1349                 cl->cpriority = cl->priority;
1350                 if (cl->priority >= cl->priority2)
1351                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1352         }
1353
1354         cbq_addprio(q, cl);
1355         return 0;
1356 }
1357
1358 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1359 {
1360         switch (ovl->strategy) {
1361         case TC_CBQ_OVL_CLASSIC:
1362                 cl->overlimit = cbq_ovl_classic;
1363                 break;
1364         case TC_CBQ_OVL_DELAY:
1365                 cl->overlimit = cbq_ovl_delay;
1366                 break;
1367         case TC_CBQ_OVL_LOWPRIO:
1368                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1369                     ovl->priority2-1 <= cl->priority)
1370                         return -EINVAL;
1371                 cl->priority2 = ovl->priority2-1;
1372                 cl->overlimit = cbq_ovl_lowprio;
1373                 break;
1374         case TC_CBQ_OVL_DROP:
1375                 cl->overlimit = cbq_ovl_drop;
1376                 break;
1377         case TC_CBQ_OVL_RCLASSIC:
1378                 cl->overlimit = cbq_ovl_rclassic;
1379                 break;
1380         default:
1381                 return -EINVAL;
1382         }
1383         cl->penalty = (ovl->penalty*HZ)/1000;
1384         return 0;
1385 }
1386
1387 #ifdef CONFIG_NET_CLS_POLICE
1388 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1389 {
1390         cl->police = p->police;
1391
1392         if (cl->q->handle) {
1393                 if (p->police == TC_POLICE_RECLASSIFY)
1394                         cl->q->reshape_fail = cbq_reshape_fail;
1395                 else
1396                         cl->q->reshape_fail = NULL;
1397         }
1398         return 0;
1399 }
1400 #endif
1401
1402 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1403 {
1404         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1405         return 0;
1406 }
1407
1408 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1409 {
1410         struct cbq_sched_data *q = qdisc_priv(sch);
1411         struct rtattr *tb[TCA_CBQ_MAX];
1412         struct tc_ratespec *r;
1413
1414         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1415             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1416             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1417                 return -EINVAL;
1418
1419         if (tb[TCA_CBQ_LSSOPT-1] &&
1420             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1421                 return -EINVAL;
1422
1423         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1424
1425         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1426                 return -EINVAL;
1427
1428         q->link.refcnt = 1;
1429         q->link.sibling = &q->link;
1430         q->link.classid = sch->handle;
1431         q->link.qdisc = sch;
1432         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1433                                             sch->handle)))
1434                 q->link.q = &noop_qdisc;
1435
1436         q->link.priority = TC_CBQ_MAXPRIO-1;
1437         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1438         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1439         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1440         q->link.overlimit = cbq_ovl_classic;
1441         q->link.allot = psched_mtu(sch->dev);
1442         q->link.quantum = q->link.allot;
1443         q->link.weight = q->link.R_tab->rate.rate;
1444
1445         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1446         q->link.avpkt = q->link.allot/2;
1447         q->link.minidle = -0x7FFFFFFF;
1448         q->link.stats_lock = &sch->dev->queue_lock;
1449
1450         init_timer(&q->wd_timer);
1451         q->wd_timer.data = (unsigned long)sch;
1452         q->wd_timer.function = cbq_watchdog;
1453         init_timer(&q->delay_timer);
1454         q->delay_timer.data = (unsigned long)sch;
1455         q->delay_timer.function = cbq_undelay;
1456         q->toplevel = TC_CBQ_MAXLEVEL;
1457         PSCHED_GET_TIME(q->now);
1458         q->now_rt = q->now;
1459
1460         cbq_link_class(&q->link);
1461
1462         if (tb[TCA_CBQ_LSSOPT-1])
1463                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1464
1465         cbq_addprio(q, &q->link);
1466         return 0;
1467 }
1468
1469 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1470 {
1471         unsigned char    *b = skb->tail;
1472
1473         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1474         return skb->len;
1475
1476 rtattr_failure:
1477         skb_trim(skb, b - skb->data);
1478         return -1;
1479 }
1480
1481 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1482 {
1483         unsigned char    *b = skb->tail;
1484         struct tc_cbq_lssopt opt;
1485
1486         opt.flags = 0;
1487         if (cl->borrow == NULL)
1488                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1489         if (cl->share == NULL)
1490                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1491         opt.ewma_log = cl->ewma_log;
1492         opt.level = cl->level;
1493         opt.avpkt = cl->avpkt;
1494         opt.maxidle = cl->maxidle;
1495         opt.minidle = (u32)(-cl->minidle);
1496         opt.offtime = cl->offtime;
1497         opt.change = ~0;
1498         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1499         return skb->len;
1500
1501 rtattr_failure:
1502         skb_trim(skb, b - skb->data);
1503         return -1;
1504 }
1505
1506 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1507 {
1508         unsigned char    *b = skb->tail;
1509         struct tc_cbq_wrropt opt;
1510
1511         opt.flags = 0;
1512         opt.allot = cl->allot;
1513         opt.priority = cl->priority+1;
1514         opt.cpriority = cl->cpriority+1;
1515         opt.weight = cl->weight;
1516         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1517         return skb->len;
1518
1519 rtattr_failure:
1520         skb_trim(skb, b - skb->data);
1521         return -1;
1522 }
1523
1524 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1525 {
1526         unsigned char    *b = skb->tail;
1527         struct tc_cbq_ovl opt;
1528
1529         opt.strategy = cl->ovl_strategy;
1530         opt.priority2 = cl->priority2+1;
1531         opt.pad = 0;
1532         opt.penalty = (cl->penalty*1000)/HZ;
1533         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1534         return skb->len;
1535
1536 rtattr_failure:
1537         skb_trim(skb, b - skb->data);
1538         return -1;
1539 }
1540
1541 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1542 {
1543         unsigned char    *b = skb->tail;
1544         struct tc_cbq_fopt opt;
1545
1546         if (cl->split || cl->defmap) {
1547                 opt.split = cl->split ? cl->split->classid : 0;
1548                 opt.defmap = cl->defmap;
1549                 opt.defchange = ~0;
1550                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1551         }
1552         return skb->len;
1553
1554 rtattr_failure:
1555         skb_trim(skb, b - skb->data);
1556         return -1;
1557 }
1558
1559 #ifdef CONFIG_NET_CLS_POLICE
1560 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1561 {
1562         unsigned char    *b = skb->tail;
1563         struct tc_cbq_police opt;
1564
1565         if (cl->police) {
1566                 opt.police = cl->police;
1567                 opt.__res1 = 0;
1568                 opt.__res2 = 0;
1569                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1570         }
1571         return skb->len;
1572
1573 rtattr_failure:
1574         skb_trim(skb, b - skb->data);
1575         return -1;
1576 }
1577 #endif
1578
1579 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1580 {
1581         if (cbq_dump_lss(skb, cl) < 0 ||
1582             cbq_dump_rate(skb, cl) < 0 ||
1583             cbq_dump_wrr(skb, cl) < 0 ||
1584             cbq_dump_ovl(skb, cl) < 0 ||
1585 #ifdef CONFIG_NET_CLS_POLICE
1586             cbq_dump_police(skb, cl) < 0 ||
1587 #endif
1588             cbq_dump_fopt(skb, cl) < 0)
1589                 return -1;
1590         return 0;
1591 }
1592
1593 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1594 {
1595         struct cbq_sched_data *q = qdisc_priv(sch);
1596         unsigned char    *b = skb->tail;
1597         struct rtattr *rta;
1598
1599         rta = (struct rtattr*)b;
1600         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1601         if (cbq_dump_attr(skb, &q->link) < 0)
1602                 goto rtattr_failure;
1603         rta->rta_len = skb->tail - b;
1604         return skb->len;
1605
1606 rtattr_failure:
1607         skb_trim(skb, b - skb->data);
1608         return -1;
1609 }
1610
1611 static int
1612 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1613 {
1614         struct cbq_sched_data *q = qdisc_priv(sch);
1615
1616         q->link.xstats.avgidle = q->link.avgidle;
1617         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1618 }
1619
1620 static int
1621 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1622                struct sk_buff *skb, struct tcmsg *tcm)
1623 {
1624         struct cbq_class *cl = (struct cbq_class*)arg;
1625         unsigned char    *b = skb->tail;
1626         struct rtattr *rta;
1627
1628         if (cl->tparent)
1629                 tcm->tcm_parent = cl->tparent->classid;
1630         else
1631                 tcm->tcm_parent = TC_H_ROOT;
1632         tcm->tcm_handle = cl->classid;
1633         tcm->tcm_info = cl->q->handle;
1634
1635         rta = (struct rtattr*)b;
1636         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1637         if (cbq_dump_attr(skb, cl) < 0)
1638                 goto rtattr_failure;
1639         rta->rta_len = skb->tail - b;
1640         return skb->len;
1641
1642 rtattr_failure:
1643         skb_trim(skb, b - skb->data);
1644         return -1;
1645 }
1646
1647 static int
1648 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1649         struct gnet_dump *d)
1650 {
1651         struct cbq_sched_data *q = qdisc_priv(sch);
1652         struct cbq_class *cl = (struct cbq_class*)arg;
1653
1654         cl->qstats.qlen = cl->q->q.qlen;
1655         cl->xstats.avgidle = cl->avgidle;
1656         cl->xstats.undertime = 0;
1657
1658         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1659                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1660
1661         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1662 #ifdef CONFIG_NET_ESTIMATOR
1663             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1664 #endif
1665             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1666                 return -1;
1667
1668         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1669 }
1670
1671 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1672                      struct Qdisc **old)
1673 {
1674         struct cbq_class *cl = (struct cbq_class*)arg;
1675
1676         if (cl) {
1677                 if (new == NULL) {
1678                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1679                                                      cl->classid)) == NULL)
1680                                 return -ENOBUFS;
1681                 } else {
1682 #ifdef CONFIG_NET_CLS_POLICE
1683                         if (cl->police == TC_POLICE_RECLASSIFY)
1684                                 new->reshape_fail = cbq_reshape_fail;
1685 #endif
1686                 }
1687                 sch_tree_lock(sch);
1688                 *old = xchg(&cl->q, new);
1689                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1690                 qdisc_reset(*old);
1691                 sch_tree_unlock(sch);
1692
1693                 return 0;
1694         }
1695         return -ENOENT;
1696 }
1697
1698 static struct Qdisc *
1699 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1700 {
1701         struct cbq_class *cl = (struct cbq_class*)arg;
1702
1703         return cl ? cl->q : NULL;
1704 }
1705
1706 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1707 {
1708         struct cbq_class *cl = (struct cbq_class *)arg;
1709
1710         if (cl->q->q.qlen == 0)
1711                 cbq_deactivate_class(cl);
1712 }
1713
1714 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1715 {
1716         struct cbq_sched_data *q = qdisc_priv(sch);
1717         struct cbq_class *cl = cbq_class_lookup(q, classid);
1718
1719         if (cl) {
1720                 cl->refcnt++;
1721                 return (unsigned long)cl;
1722         }
1723         return 0;
1724 }
1725
1726 static void cbq_destroy_filters(struct cbq_class *cl)
1727 {
1728         struct tcf_proto *tp;
1729
1730         while ((tp = cl->filter_list) != NULL) {
1731                 cl->filter_list = tp->next;
1732                 tcf_destroy(tp);
1733         }
1734 }
1735
1736 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1737 {
1738         struct cbq_sched_data *q = qdisc_priv(sch);
1739
1740         BUG_TRAP(!cl->filters);
1741
1742         cbq_destroy_filters(cl);
1743         qdisc_destroy(cl->q);
1744         qdisc_put_rtab(cl->R_tab);
1745 #ifdef CONFIG_NET_ESTIMATOR
1746         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1747 #endif
1748         if (cl != &q->link)
1749                 kfree(cl);
1750 }
1751
1752 static void
1753 cbq_destroy(struct Qdisc* sch)
1754 {
1755         struct cbq_sched_data *q = qdisc_priv(sch);
1756         struct cbq_class *cl;
1757         unsigned h;
1758
1759 #ifdef CONFIG_NET_CLS_POLICE
1760         q->rx_class = NULL;
1761 #endif
1762         /*
1763          * Filters must be destroyed first because we don't destroy the
1764          * classes from root to leafs which means that filters can still
1765          * be bound to classes which have been destroyed already. --TGR '04
1766          */
1767         for (h = 0; h < 16; h++)
1768                 for (cl = q->classes[h]; cl; cl = cl->next)
1769                         cbq_destroy_filters(cl);
1770
1771         for (h = 0; h < 16; h++) {
1772                 struct cbq_class *next;
1773
1774                 for (cl = q->classes[h]; cl; cl = next) {
1775                         next = cl->next;
1776                         cbq_destroy_class(sch, cl);
1777                 }
1778         }
1779 }
1780
1781 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1782 {
1783         struct cbq_class *cl = (struct cbq_class*)arg;
1784
1785         if (--cl->refcnt == 0) {
1786 #ifdef CONFIG_NET_CLS_POLICE
1787                 struct cbq_sched_data *q = qdisc_priv(sch);
1788
1789                 spin_lock_bh(&sch->dev->queue_lock);
1790                 if (q->rx_class == cl)
1791                         q->rx_class = NULL;
1792                 spin_unlock_bh(&sch->dev->queue_lock);
1793 #endif
1794
1795                 cbq_destroy_class(sch, cl);
1796         }
1797 }
1798
1799 static int
1800 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1801                  unsigned long *arg)
1802 {
1803         int err;
1804         struct cbq_sched_data *q = qdisc_priv(sch);
1805         struct cbq_class *cl = (struct cbq_class*)*arg;
1806         struct rtattr *opt = tca[TCA_OPTIONS-1];
1807         struct rtattr *tb[TCA_CBQ_MAX];
1808         struct cbq_class *parent;
1809         struct qdisc_rate_table *rtab = NULL;
1810
1811         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1812                 return -EINVAL;
1813
1814         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1815             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1816                 return -EINVAL;
1817
1818         if (tb[TCA_CBQ_FOPT-1] &&
1819             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1820                 return -EINVAL;
1821
1822         if (tb[TCA_CBQ_RATE-1] &&
1823             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1824                         return -EINVAL;
1825
1826         if (tb[TCA_CBQ_LSSOPT-1] &&
1827             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1828                         return -EINVAL;
1829
1830         if (tb[TCA_CBQ_WRROPT-1] &&
1831             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1832                         return -EINVAL;
1833
1834 #ifdef CONFIG_NET_CLS_POLICE
1835         if (tb[TCA_CBQ_POLICE-1] &&
1836             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1837                         return -EINVAL;
1838 #endif
1839
1840         if (cl) {
1841                 /* Check parent */
1842                 if (parentid) {
1843                         if (cl->tparent && cl->tparent->classid != parentid)
1844                                 return -EINVAL;
1845                         if (!cl->tparent && parentid != TC_H_ROOT)
1846                                 return -EINVAL;
1847                 }
1848
1849                 if (tb[TCA_CBQ_RATE-1]) {
1850                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1851                         if (rtab == NULL)
1852                                 return -EINVAL;
1853                 }
1854
1855                 /* Change class parameters */
1856                 sch_tree_lock(sch);
1857
1858                 if (cl->next_alive != NULL)
1859                         cbq_deactivate_class(cl);
1860
1861                 if (rtab) {
1862                         rtab = xchg(&cl->R_tab, rtab);
1863                         qdisc_put_rtab(rtab);
1864                 }
1865
1866                 if (tb[TCA_CBQ_LSSOPT-1])
1867                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1868
1869                 if (tb[TCA_CBQ_WRROPT-1]) {
1870                         cbq_rmprio(q, cl);
1871                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1872                 }
1873
1874                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1875                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1876
1877 #ifdef CONFIG_NET_CLS_POLICE
1878                 if (tb[TCA_CBQ_POLICE-1])
1879                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1880 #endif
1881
1882                 if (tb[TCA_CBQ_FOPT-1])
1883                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1884
1885                 if (cl->q->q.qlen)
1886                         cbq_activate_class(cl);
1887
1888                 sch_tree_unlock(sch);
1889
1890 #ifdef CONFIG_NET_ESTIMATOR
1891                 if (tca[TCA_RATE-1])
1892                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1893                                 cl->stats_lock, tca[TCA_RATE-1]);
1894 #endif
1895                 return 0;
1896         }
1897
1898         if (parentid == TC_H_ROOT)
1899                 return -EINVAL;
1900
1901         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1902             tb[TCA_CBQ_LSSOPT-1] == NULL)
1903                 return -EINVAL;
1904
1905         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1906         if (rtab == NULL)
1907                 return -EINVAL;
1908
1909         if (classid) {
1910                 err = -EINVAL;
1911                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1912                         goto failure;
1913         } else {
1914                 int i;
1915                 classid = TC_H_MAKE(sch->handle,0x8000);
1916
1917                 for (i=0; i<0x8000; i++) {
1918                         if (++q->hgenerator >= 0x8000)
1919                                 q->hgenerator = 1;
1920                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1921                                 break;
1922                 }
1923                 err = -ENOSR;
1924                 if (i >= 0x8000)
1925                         goto failure;
1926                 classid = classid|q->hgenerator;
1927         }
1928
1929         parent = &q->link;
1930         if (parentid) {
1931                 parent = cbq_class_lookup(q, parentid);
1932                 err = -EINVAL;
1933                 if (parent == NULL)
1934                         goto failure;
1935         }
1936
1937         err = -ENOBUFS;
1938         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1939         if (cl == NULL)
1940                 goto failure;
1941         cl->R_tab = rtab;
1942         rtab = NULL;
1943         cl->refcnt = 1;
1944         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1945                 cl->q = &noop_qdisc;
1946         cl->classid = classid;
1947         cl->tparent = parent;
1948         cl->qdisc = sch;
1949         cl->allot = parent->allot;
1950         cl->quantum = cl->allot;
1951         cl->weight = cl->R_tab->rate.rate;
1952         cl->stats_lock = &sch->dev->queue_lock;
1953
1954         sch_tree_lock(sch);
1955         cbq_link_class(cl);
1956         cl->borrow = cl->tparent;
1957         if (cl->tparent != &q->link)
1958                 cl->share = cl->tparent;
1959         cbq_adjust_levels(parent);
1960         cl->minidle = -0x7FFFFFFF;
1961         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1962         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1963         if (cl->ewma_log==0)
1964                 cl->ewma_log = q->link.ewma_log;
1965         if (cl->maxidle==0)
1966                 cl->maxidle = q->link.maxidle;
1967         if (cl->avpkt==0)
1968                 cl->avpkt = q->link.avpkt;
1969         cl->overlimit = cbq_ovl_classic;
1970         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1971                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1972 #ifdef CONFIG_NET_CLS_POLICE
1973         if (tb[TCA_CBQ_POLICE-1])
1974                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1975 #endif
1976         if (tb[TCA_CBQ_FOPT-1])
1977                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1978         sch_tree_unlock(sch);
1979
1980 #ifdef CONFIG_NET_ESTIMATOR
1981         if (tca[TCA_RATE-1])
1982                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1983                         cl->stats_lock, tca[TCA_RATE-1]);
1984 #endif
1985
1986         *arg = (unsigned long)cl;
1987         return 0;
1988
1989 failure:
1990         qdisc_put_rtab(rtab);
1991         return err;
1992 }
1993
1994 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1995 {
1996         struct cbq_sched_data *q = qdisc_priv(sch);
1997         struct cbq_class *cl = (struct cbq_class*)arg;
1998         unsigned int qlen;
1999
2000         if (cl->filters || cl->children || cl == &q->link)
2001                 return -EBUSY;
2002
2003         sch_tree_lock(sch);
2004
2005         qlen = cl->q->q.qlen;
2006         qdisc_reset(cl->q);
2007         qdisc_tree_decrease_qlen(cl->q, qlen);
2008
2009         if (cl->next_alive)
2010                 cbq_deactivate_class(cl);
2011
2012         if (q->tx_borrowed == cl)
2013                 q->tx_borrowed = q->tx_class;
2014         if (q->tx_class == cl) {
2015                 q->tx_class = NULL;
2016                 q->tx_borrowed = NULL;
2017         }
2018 #ifdef CONFIG_NET_CLS_POLICE
2019         if (q->rx_class == cl)
2020                 q->rx_class = NULL;
2021 #endif
2022
2023         cbq_unlink_class(cl);
2024         cbq_adjust_levels(cl->tparent);
2025         cl->defmap = 0;
2026         cbq_sync_defmap(cl);
2027
2028         cbq_rmprio(q, cl);
2029         sch_tree_unlock(sch);
2030
2031         if (--cl->refcnt == 0)
2032                 cbq_destroy_class(sch, cl);
2033
2034         return 0;
2035 }
2036
2037 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2038 {
2039         struct cbq_sched_data *q = qdisc_priv(sch);
2040         struct cbq_class *cl = (struct cbq_class *)arg;
2041
2042         if (cl == NULL)
2043                 cl = &q->link;
2044
2045         return &cl->filter_list;
2046 }
2047
2048 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2049                                      u32 classid)
2050 {
2051         struct cbq_sched_data *q = qdisc_priv(sch);
2052         struct cbq_class *p = (struct cbq_class*)parent;
2053         struct cbq_class *cl = cbq_class_lookup(q, classid);
2054
2055         if (cl) {
2056                 if (p && p->level <= cl->level)
2057                         return 0;
2058                 cl->filters++;
2059                 return (unsigned long)cl;
2060         }
2061         return 0;
2062 }
2063
2064 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2065 {
2066         struct cbq_class *cl = (struct cbq_class*)arg;
2067
2068         cl->filters--;
2069 }
2070
2071 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2072 {
2073         struct cbq_sched_data *q = qdisc_priv(sch);
2074         unsigned h;
2075
2076         if (arg->stop)
2077                 return;
2078
2079         for (h = 0; h < 16; h++) {
2080                 struct cbq_class *cl;
2081
2082                 for (cl = q->classes[h]; cl; cl = cl->next) {
2083                         if (arg->count < arg->skip) {
2084                                 arg->count++;
2085                                 continue;
2086                         }
2087                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2088                                 arg->stop = 1;
2089                                 return;
2090                         }
2091                         arg->count++;
2092                 }
2093         }
2094 }
2095
2096 static struct Qdisc_class_ops cbq_class_ops = {
2097         .graft          =       cbq_graft,
2098         .leaf           =       cbq_leaf,
2099         .qlen_notify    =       cbq_qlen_notify,
2100         .get            =       cbq_get,
2101         .put            =       cbq_put,
2102         .change         =       cbq_change_class,
2103         .delete         =       cbq_delete,
2104         .walk           =       cbq_walk,
2105         .tcf_chain      =       cbq_find_tcf,
2106         .bind_tcf       =       cbq_bind_filter,
2107         .unbind_tcf     =       cbq_unbind_filter,
2108         .dump           =       cbq_dump_class,
2109         .dump_stats     =       cbq_dump_class_stats,
2110 };
2111
2112 static struct Qdisc_ops cbq_qdisc_ops = {
2113         .next           =       NULL,
2114         .cl_ops         =       &cbq_class_ops,
2115         .id             =       "cbq",
2116         .priv_size      =       sizeof(struct cbq_sched_data),
2117         .enqueue        =       cbq_enqueue,
2118         .dequeue        =       cbq_dequeue,
2119         .requeue        =       cbq_requeue,
2120         .drop           =       cbq_drop,
2121         .init           =       cbq_init,
2122         .reset          =       cbq_reset,
2123         .destroy        =       cbq_destroy,
2124         .change         =       NULL,
2125         .dump           =       cbq_dump,
2126         .dump_stats     =       cbq_dump_stats,
2127         .owner          =       THIS_MODULE,
2128 };
2129
2130 static int __init cbq_module_init(void)
2131 {
2132         return register_qdisc(&cbq_qdisc_ops);
2133 }
2134 static void __exit cbq_module_exit(void)
2135 {
2136         unregister_qdisc(&cbq_qdisc_ops);
2137 }
2138 module_init(cbq_module_init)
2139 module_exit(cbq_module_exit)
2140 MODULE_LICENSE("GPL");