[LARTC] [PATCH 2/2] NET: Accurate packet scheduling for ATM/ADSL
(userspace)
Jesper Dangaard Brouer
hawk at diku.dk
Fri Jun 16 10:27:09 CEST 2006
(Resend message bounced to LARTC)
The Linux traffic's control engine inaccurately calculates
transmission times for packets sent over ADSL links. For
some packet sizes the error rises to over 50%. This occurs
because ADSL uses ATM as its link layer transport, and ATM
transmits packets in fixed sized 53 byte cells.
This changes the userspace tool iproute2/tc by adding an
option to calculate traffic transmission times (rate table)
over all ATM links, including ADSL, with perfect accuracy.
A longer presentation of the patch, its rational, what it
does and how to use it can be found here:
http://www.stuart.id.au/russell/files/tc/tc-atm/
A earlier version of the patch, and a _detailed_ empirical
investigation of its effects can be found here:
http://www.adsl-optimizer.dk/
Signed-off-by: Jesper Dangaard Brouer <hawk at comx.dk>
Signed-off-by: Russell Stuart <russell-tcatm at stuart.id.au>
---
diff -Nurp iproute2.orig/include/linux/pkt_sched.h iproute2/include/linux/pkt_sched.h
--- iproute2.orig/include/linux/pkt_sched.h 2005-12-10 09:27:44.000000000 +1000
+++ iproute2/include/linux/pkt_sched.h 2006-06-13 11:53:27.000000000 +1000
@@ -77,8 +77,9 @@ struct tc_ratespec
{
unsigned char cell_log;
unsigned char __reserved;
- unsigned short feature;
- short addend;
+ unsigned short feature; /* Always 0 in pre-atm patch kernels */
+ char cell_align; /* Always 0 in pre-atm patch kernels */
+ unsigned char __unused;
unsigned short mpu;
__u32 rate;
};
diff -Nurp iproute2.orig/tc/m_police.c iproute2/tc/m_police.c
--- iproute2.orig/tc/m_police.c 2005-01-19 08:11:58.000000000 +1000
+++ iproute2/tc/m_police.c 2006-06-13 11:53:27.000000000 +1000
@@ -35,7 +35,7 @@ struct action_util police_action_util =
static void explain(void)
{
fprintf(stderr, "Usage: ... police rate BPS burst BYTES[/BYTES] [ mtu BYTES[/BYTES] ]\n");
- fprintf(stderr, " [ peakrate BPS ] [ avrate BPS ]\n");
+ fprintf(stderr, " [ peakrate BPS ] [ avrate BPS ] [ overhead OVERHEAD ] [ atm ]\n");
fprintf(stderr, " [ ACTIONTERM ]\n");
fprintf(stderr, "Old Syntax ACTIONTERM := action <EXCEEDACT>[/NOTEXCEEDACT] \n");
fprintf(stderr, "New Syntax ACTIONTERM := conform-exceed <EXCEEDACT>[/NOTEXCEEDACT] \n");
@@ -134,7 +134,10 @@ int act_parse_police(struct action_util
__u32 ptab[256];
__u32 avrate = 0;
int presult = 0;
- unsigned buffer=0, mtu=0, mpu=0;
+ unsigned buffer=0, mtu=0;
+ __u8 mpu=0;
+ __s8 overhead=0;
+ int atm=0;
int Rcell_log=-1, Pcell_log = -1;
struct rtattr *tail;
@@ -184,7 +187,7 @@ int act_parse_police(struct action_util
fprintf(stderr, "Double \"mpu\" spec\n");
return -1;
}
- if (get_size(&mpu, *argv)) {
+ if (get_u8(&mpu, *argv, 10)) {
explain1("mpu");
return -1;
}
@@ -198,6 +201,18 @@ int act_parse_police(struct action_util
explain1("rate");
return -1;
}
+ } else if (strcmp(*argv, "overhead") == 0) {
+ NEXT_ARG();
+ if (p.rate.rate) {
+ fprintf(stderr, "Double \"overhead\" spec\n");
+ return -1;
+ }
+ if (get_s8(&overhead, *argv, 10)) {
+ explain1("overhead");
+ return -1;
+ }
+ } else if (strcmp(*argv, "atm") == 0) {
+ atm = 1;
} else if (strcmp(*argv, "avrate") == 0) {
NEXT_ARG();
if (avrate) {
@@ -264,22 +279,12 @@ int act_parse_police(struct action_util
}
if (p.rate.rate) {
- if ((Rcell_log = tc_calc_rtable(p.rate.rate, rtab, Rcell_log, mtu, mpu)) < 0) {
- fprintf(stderr, "TBF: failed to calculate rate table.\n");
- return -1;
- }
+ tc_calc_ratespec(&p.rate, rtab, p.rate.rate, Rcell_log, mtu, mpu, atm, overhead);
p.burst = tc_calc_xmittime(p.rate.rate, buffer);
- p.rate.cell_log = Rcell_log;
- p.rate.mpu = mpu;
}
p.mtu = mtu;
if (p.peakrate.rate) {
- if ((Pcell_log = tc_calc_rtable(p.peakrate.rate, ptab, Pcell_log, mtu, mpu)) < 0) {
- fprintf(stderr, "POLICE: failed to calculate peak rate table.\n");
- return -1;
- }
- p.peakrate.cell_log = Pcell_log;
- p.peakrate.mpu = mpu;
+ tc_calc_ratespec(&p.peakrate, ptab, p.peakrate.rate, Pcell_log, mtu, mpu, atm, overhead);
}
tail = NLMSG_TAIL(n);
diff -Nurp iproute2.orig/tc/q_cbq.c iproute2/tc/q_cbq.c
--- iproute2.orig/tc/q_cbq.c 2005-07-06 08:37:15.000000000 +1000
+++ iproute2/tc/q_cbq.c 2006-06-13 11:53:27.000000000 +1000
@@ -32,6 +32,7 @@ static void explain_class(void)
fprintf(stderr, " [ prio NUMBER ] [ cell BYTES ] [ ewma LOG ]\n");
fprintf(stderr, " [ estimator INTERVAL TIME_CONSTANT ]\n");
fprintf(stderr, " [ split CLASSID ] [ defmap MASK/CHANGE ]\n");
+ fprintf(stderr, " [ overhead BYTES ] [ atm ]\n");
}
static void explain(void)
@@ -52,7 +53,10 @@ static int cbq_parse_opt(struct qdisc_ut
struct tc_ratespec r;
struct tc_cbq_lssopt lss;
__u32 rtab[256];
- unsigned mpu=0, avpkt=0, allot=0;
+ unsigned avpkt=0, allot=0;
+ __u8 mpu=0;
+ __s8 overhead=0;
+ int atm=0;
int cell_log=-1;
int ewma_log=-1;
struct rtattr *tail;
@@ -102,7 +106,7 @@ static int cbq_parse_opt(struct qdisc_ut
}
} else if (strcmp(*argv, "mpu") == 0) {
NEXT_ARG();
- if (get_size(&mpu, *argv)) {
+ if (get_u8(&mpu, *argv, 10)) {
explain1("mpu");
return -1;
}
@@ -113,6 +117,14 @@ static int cbq_parse_opt(struct qdisc_ut
explain1("allot");
return -1;
}
+ } else if (strcmp(*argv, "overhead") == 0) {
+ NEXT_ARG();
+ if (get_s8(&overhead, *argv, 10)) {
+ explain1("overhead");
+ return -1;
+ }
+ } else if (strcmp(*argv, "atm") == 0) {
+ atm = 1;
} else if (strcmp(*argv, "help") == 0) {
explain();
return -1;
@@ -137,12 +149,7 @@ static int cbq_parse_opt(struct qdisc_ut
if (allot < (avpkt*3)/2)
allot = (avpkt*3)/2;
- if ((cell_log = tc_calc_rtable(r.rate, rtab, cell_log, allot, mpu)) < 0) {
- fprintf(stderr, "CBQ: failed to calculate rate table.\n");
- return -1;
- }
- r.cell_log = cell_log;
- r.mpu = mpu;
+ tc_calc_ratespec(&r, rtab, r.rate, cell_log, allot, mpu, atm, overhead);
if (ewma_log < 0)
ewma_log = TC_CBQ_DEF_EWMA;
@@ -175,7 +182,9 @@ static int cbq_parse_class_opt(struct qd
struct tc_cbq_fopt fopt;
struct tc_cbq_ovl ovl;
__u32 rtab[256];
- unsigned mpu=0;
+ __u8 mpu=0;
+ __s8 overhead = 0;
+ int atm = 0;
int cell_log=-1;
int ewma_log=-1;
unsigned bndw = 0;
@@ -289,10 +298,18 @@ static int cbq_parse_class_opt(struct qd
lss.change |= TCF_CBQ_LSS_AVPKT;
} else if (strcmp(*argv, "mpu") == 0) {
NEXT_ARG();
- if (get_size(&mpu, *argv)) {
+ if (get_u8(&mpu, *argv, 10)) {
explain1("mpu");
return -1;
}
+ } else if (strcmp(*argv, "overhead") == 0) {
+ NEXT_ARG();
+ if (get_s8(&overhead, *argv, 10)) {
+ explain1("overhead");
+ return -1;
+ }
+ } else if (strcmp(*argv, "atm") == 0) {
+ atm = 1;
} else if (strcmp(*argv, "weight") == 0) {
NEXT_ARG();
if (get_size(&wrr.weight, *argv)) {
@@ -336,12 +353,7 @@ static int cbq_parse_class_opt(struct qd
unsigned pktsize = wrr.allot;
if (wrr.allot < (lss.avpkt*3)/2)
wrr.allot = (lss.avpkt*3)/2;
- if ((cell_log = tc_calc_rtable(r.rate, rtab, cell_log, pktsize, mpu)) < 0) {
- fprintf(stderr, "CBQ: failed to calculate rate table.\n");
- return -1;
- }
- r.cell_log = cell_log;
- r.mpu = mpu;
+ tc_calc_ratespec(&r, rtab, r.rate, cell_log, pktsize, mpu, atm, overhead);
}
if (ewma_log < 0)
ewma_log = TC_CBQ_DEF_EWMA;
@@ -463,8 +475,12 @@ static int cbq_print_opt(struct qdisc_ut
fprintf(f, "rate %s ", buf);
if (show_details) {
fprintf(f, "cell %ub ", 1<<r->cell_log);
- if (r->mpu)
- fprintf(f, "mpu %ub ", r->mpu);
+ if (r->mpu & 0xff)
+ fprintf(f, "mpu %ub ", (__u8)r->mpu);
+ if ((r->mpu >> 8))
+ fprintf(f, "overhead %db ", (__s8)(r->mpu >> 8));
+ if (r->feature & 0x0001)
+ fprintf(f, "atm ");
}
}
if (lss && lss->flags) {
diff -Nurp iproute2.orig/tc/q_htb.c iproute2/tc/q_htb.c
--- iproute2.orig/tc/q_htb.c 2005-01-19 08:11:58.000000000 +1000
+++ iproute2/tc/q_htb.c 2006-06-13 11:53:27.000000000 +1000
@@ -34,14 +34,14 @@ static void explain(void)
" default minor id of class to which unclassified packets are sent {0}\n"
" r2q DRR quantums are computed as rate in Bps/r2q {10}\n"
" debug string of 16 numbers each 0-3 {0}\n\n"
- "... class add ... htb rate R1 [burst B1] [mpu B] [overhead O]\n"
+ "... class add ... htb rate R1 [burst B1] [mpu B] [overhead O] [atm]\n"
" [prio P] [slot S] [pslot PS]\n"
" [ceil R2] [cburst B2] [mtu MTU] [quantum Q]\n"
" rate rate allocated to this class (class can still borrow)\n"
" burst max bytes burst which can be accumulated during idle period {computed}\n"
" mpu minimum packet size used in rate computations\n"
" overhead per-packet size overhead used in rate computations\n"
-
+ " atm include atm cell tax in rate computations\n"
" ceil definite upper class rate (no borrows) {rate}\n"
" cburst burst but for ceil {computed}\n"
" mtu max packet size we create rate map for {1600}\n"
@@ -107,8 +107,10 @@ static int htb_parse_class_opt(struct qd
__u32 rtab[256],ctab[256];
unsigned buffer=0,cbuffer=0;
int cell_log=-1,ccell_log = -1;
- unsigned mtu, mpu;
- unsigned char mpu8 = 0, overhead = 0;
+ unsigned mtu;
+ __u8 mpu8=0;
+ __s8 overhead=0;
+ int atm=0;
struct rtattr *tail;
memset(&opt, 0, sizeof(opt)); mtu = 1600; /* eth packet len */
@@ -132,9 +134,11 @@ static int htb_parse_class_opt(struct qd
}
} else if (matches(*argv, "overhead") == 0) {
NEXT_ARG();
- if (get_u8(&overhead, *argv, 10)) {
+ if (get_s8(&overhead, *argv, 10)) {
explain1("overhead"); return -1;
}
+ } else if (matches(*argv, "atm") == 0) {
+ atm = 1;
} else if (matches(*argv, "quantum") == 0) {
NEXT_ARG();
if (get_u32(&opt.quantum, *argv, 10)) {
@@ -206,23 +210,11 @@ static int htb_parse_class_opt(struct qd
if (!buffer) buffer = opt.rate.rate / get_hz() + mtu;
if (!cbuffer) cbuffer = opt.ceil.rate / get_hz() + mtu;
-/* encode overhead and mpu, 8 bits each, into lower 16 bits */
- mpu = (unsigned)mpu8 | (unsigned)overhead << 8;
- opt.ceil.mpu = mpu; opt.rate.mpu = mpu;
-
- if ((cell_log = tc_calc_rtable(opt.rate.rate, rtab, cell_log, mtu, mpu)) < 0) {
- fprintf(stderr, "htb: failed to calculate rate table.\n");
- return -1;
- }
+ /* encode overhead and mpu, 8 bits each, into lower 16 bits */
+ tc_calc_ratespec(&opt.rate, rtab, opt.rate.rate, cell_log, mtu, mpu8, atm, overhead);
+ tc_calc_ratespec(&opt.ceil, ctab, opt.ceil.rate, cell_log, mtu, mpu8, atm, overhead);
opt.buffer = tc_calc_xmittime(opt.rate.rate, buffer);
- opt.rate.cell_log = cell_log;
-
- if ((ccell_log = tc_calc_rtable(opt.ceil.rate, ctab, cell_log, mtu, mpu)) < 0) {
- fprintf(stderr, "htb: failed to calculate ceil rate table.\n");
- return -1;
- }
opt.cbuffer = tc_calc_xmittime(opt.ceil.rate, cbuffer);
- opt.ceil.cell_log = ccell_log;
tail = NLMSG_TAIL(n);
addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
@@ -267,12 +259,16 @@ static int htb_print_opt(struct qdisc_ut
sprint_size(buffer, b1),
1<<hopt->rate.cell_log,
sprint_size(hopt->rate.mpu&0xFF, b2),
- sprint_size((hopt->rate.mpu>>8)&0xFF, b3));
+ sprint_size((__s8)(hopt->rate.mpu>>8), b3));
+ if (hopt->rate.feature & 0x0001)
+ fprintf(f, "atm ");
fprintf(f, "cburst %s/%u mpu %s overhead %s ",
sprint_size(cbuffer, b1),
1<<hopt->ceil.cell_log,
sprint_size(hopt->ceil.mpu&0xFF, b2),
- sprint_size((hopt->ceil.mpu>>8)&0xFF, b3));
+ sprint_size((__s8)(hopt->ceil.mpu>>8), b3));
+ if (hopt->ceil.feature & 0x0001)
+ fprintf(f, "atm ");
fprintf(f, "level %d ", (int)hopt->level);
} else {
fprintf(f, "burst %s ", sprint_size(buffer, b1));
diff -Nurp iproute2.orig/tc/q_tbf.c iproute2/tc/q_tbf.c
--- iproute2.orig/tc/q_tbf.c 2005-01-19 08:11:58.000000000 +1000
+++ iproute2/tc/q_tbf.c 2006-06-13 11:53:27.000000000 +1000
@@ -26,7 +26,7 @@
static void explain(void)
{
fprintf(stderr, "Usage: ... tbf limit BYTES burst BYTES[/BYTES] rate KBPS [ mtu BYTES[/BYTES] ]\n");
- fprintf(stderr, " [ peakrate KBPS ] [ latency TIME ]\n");
+ fprintf(stderr, " [ peakrate KBPS ] [ latency TIME ] [ overhead OVERHEAD ] [ atm ]\n");
}
static void explain1(char *arg)
@@ -43,7 +43,10 @@ static int tbf_parse_opt(struct qdisc_ut
struct tc_tbf_qopt opt;
__u32 rtab[256];
__u32 ptab[256];
- unsigned buffer=0, mtu=0, mpu=0, latency=0;
+ unsigned buffer=0, mtu=0, latency=0;
+ __u8 mpu=0;
+ __s8 overhead=0;
+ int atm=0;
int Rcell_log=-1, Pcell_log = -1;
struct rtattr *tail;
@@ -103,7 +106,7 @@ static int tbf_parse_opt(struct qdisc_ut
fprintf(stderr, "Double \"mpu\" spec\n");
return -1;
}
- if (get_size(&mpu, *argv)) {
+ if (get_u8(&mpu, *argv, 10)) {
explain1("mpu");
return -1;
}
@@ -119,6 +122,20 @@ static int tbf_parse_opt(struct qdisc_ut
return -1;
}
ok++;
+ } else if (strcmp(*argv, "overhead") == 0) {
+ NEXT_ARG();
+ if (overhead) {
+ fprintf(stderr, "Double \"overhead\" spec\n");
+ return -1;
+ }
+ if (get_s8(&overhead, *argv, 10)) {
+ explain1("overhead");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "atm") == 0) {
+ atm = 1;
+ ok++;
} else if (matches(*argv, "peakrate") == 0) {
NEXT_ARG();
if (opt.peakrate.rate) {
@@ -170,21 +187,11 @@ static int tbf_parse_opt(struct qdisc_ut
opt.limit = lim;
}
- if ((Rcell_log = tc_calc_rtable(opt.rate.rate, rtab, Rcell_log, mtu, mpu)) < 0) {
- fprintf(stderr, "TBF: failed to calculate rate table.\n");
- return -1;
- }
+ tc_calc_ratespec(&opt.rate, rtab, opt.rate.rate, Rcell_log, mtu, mpu, atm, overhead);
opt.buffer = tc_calc_xmittime(opt.rate.rate, buffer);
- opt.rate.cell_log = Rcell_log;
- opt.rate.mpu = mpu;
if (opt.peakrate.rate) {
- if ((Pcell_log = tc_calc_rtable(opt.peakrate.rate, ptab, Pcell_log, mtu, mpu)) < 0) {
- fprintf(stderr, "TBF: failed to calculate peak rate table.\n");
- return -1;
- }
+ tc_calc_ratespec(&opt.peakrate, ptab, opt.peakrate.rate, Pcell_log, mtu, mpu, atm, overhead);
opt.mtu = tc_calc_xmittime(opt.peakrate.rate, mtu);
- opt.peakrate.cell_log = Pcell_log;
- opt.peakrate.mpu = mpu;
}
tail = NLMSG_TAIL(n);
@@ -220,8 +227,12 @@ static int tbf_print_opt(struct qdisc_ut
fprintf(f, "rate %s ", sprint_rate(qopt->rate.rate, b1));
buffer = ((double)qopt->rate.rate*tc_core_tick2usec(qopt->buffer))/1000000;
if (show_details) {
- fprintf(f, "burst %s/%u mpu %s ", sprint_size(buffer, b1),
- 1<<qopt->rate.cell_log, sprint_size(qopt->rate.mpu, b2));
+ fprintf(f, "burst %s/%u mpu %s overhead %d ", sprint_size(buffer, b1),
+ 1<<qopt->rate.cell_log,
+ sprint_size(qopt->rate.mpu & 0xFF, b2),
+ (__s8)(qopt->rate.mpu >> 8));
+ if (qopt->rate.feature & 0x0001)
+ fprintf(f, "atm ");
} else {
fprintf(f, "burst %s ", sprint_size(buffer, b1));
}
@@ -232,8 +243,12 @@ static int tbf_print_opt(struct qdisc_ut
if (qopt->mtu || qopt->peakrate.mpu) {
mtu = ((double)qopt->peakrate.rate*tc_core_tick2usec(qopt->mtu))/1000000;
if (show_details) {
- fprintf(f, "mtu %s/%u mpu %s ", sprint_size(mtu, b1),
- 1<<qopt->peakrate.cell_log, sprint_size(qopt->peakrate.mpu, b2));
+ fprintf(f, "mtu %s/%u mpu %s overhead %d ", sprint_size(mtu, b1),
+ 1<<qopt->peakrate.cell_log,
+ sprint_size(qopt->peakrate.mpu & 0xFF, b2),
+ (__s8)(qopt->peakrate.mpu >> 8));
+ if (qopt->peakrate.feature & 0x0001)
+ fprintf(f, "atm ");
} else {
fprintf(f, "minburst %s ", sprint_size(mtu, b1));
}
diff -Nurp iproute2.orig/tc/tc_core.c iproute2/tc/tc_core.c
--- iproute2.orig/tc/tc_core.c 2004-07-31 06:26:15.000000000 +1000
+++ iproute2/tc/tc_core.c 2006-06-13 11:53:27.000000000 +1000
@@ -23,6 +23,9 @@
#include "tc_core.h"
+#define ATM_CELL_SIZE 53
+#define ATM_CELL_PAYLOAD 48
+
static __u32 t2us=1;
static __u32 us2t=1;
static double tick_in_usec = 1;
@@ -43,33 +46,124 @@ unsigned tc_calc_xmittime(unsigned rate,
}
/*
- rtab[pkt_len>>cell_log] = pkt_xmit_time
+ * Calculate the ATM cell overhead. ATM sends each packet in 48 byte
+ * chunks, the last chunk being padded if necessary. Each chunk carries
+ * an additional 5 byte overhead - the ATM header.
*/
+static int tc_align_to_cells(int size)
+{
+ int cells;
+
+ cells = size / ATM_CELL_PAYLOAD;
+ if (size % ATM_CELL_PAYLOAD != 0)
+ cells++;
+ return cells * ATM_CELL_SIZE;
+}
-int tc_calc_rtable(unsigned bps, __u32 *rtab, int cell_log, unsigned mtu,
- unsigned mpu)
+/*
+ * The number this function calculates is subtle. Ignore it and just believe
+ * it works if you have a choice, otherwise ..
+ *
+ * If there we are calculating the ATM cell overhead the kernel calculations
+ * will be out sometimes if the range of packet sizes spanned by one
+ * rate table element crosses an ATM cell boundary. Consider these three
+ * senarios:
+ * (a) the packet is sent across the ATM link without addition
+ * overheads the kernel doesn't know about, and
+ * (b) a packet that has 1 byte of additional overhead the kernel
+ * doesn't know about. Here
+ * (c) a packet that has 2 bytes of additional overhead the
+ * kernel doesn't know about.
+ * The table below presents what happens. Each row is for a single rate
+ * table element. The "Sizes" column shows what packet sizes the rate table
+ * element will be used for. This packet size includes the "unknown to
+ * kernel" overhead, but does not include overhead incurred by breaking the
+ * packet up into ATM cells. This ATM cell overhead consists of the 5 byte
+ * header per ATM cell, plus the padding in the last cell. The "ATM" column
+ * shows how many bytes are actually sent across the ATM link, ie it does
+ * include the ATM cell overhead.
+ *
+ * RateTable Entry Sizes(a) ATM(a) Sizes(b) ATM(b) Sizes(c) ATM(c)
+ * ratetable[0] 0..7 53 1..8 53 2..9 53
+ * ratetable[1] 8..15 53 9..16 53 2..17 53
+ * ratetable[2] 16..23 53 17..24 53 18..25 53
+ * ratetable[3] 24..31 53 25..32 53 26..33 53
+ * ratetable[4] 32..39 53 33..40 53 34..41 53
+ * ratetable[5] 40..47 53 41..48 53 42..49 53,106
+ * ratetable[6] 48..55 53,106 49..56 106 50..57 106
+ *
+ * For senario (a), the ratetable[6] entry covers two cases: one were a single
+ * ATM cell is needed to transmit the data, and one where two ATM cells are
+ * required. It can't be right for both. Unfortunately the error is large.
+ * The same problem arises in senario (c) for ratetable[5]. The problem
+ * doesn't happen for senario (b), because the boundary between rate table
+ * entries happens to match the boundary between ATM cells.
+ *
+ * What we would like to do is ensure that ratetable boundaries always match
+ * the ATM cells. If we do this the error goes away. The solution is to make
+ * the kernel add a small bias to the packet size. (Small because the bias
+ * will always be smaller than cell_log.) Adding this small bias will in
+ * effect slide the ratetable along a bit, so the boundaries match. The code
+ * below calculates that bias. Provided the MTU is less than 4092, doing
+ * this can always eliminate the error.
+ *
+ * Old kernels won't add this bias, so they will have the error described above
+ * in most cases. In the worst case senario, considering all possible ATM cell
+ * sizes (1..48), for 7 of these sizes the old kernel will calculate the rate
+ * wrongly - ie, be out by 53 bytes.
+ */
+static int tc_calc_cell_align(int atm_cell_tax, char overhead, int cell_log)
+{
+ int cell_size;
+
+ if (!atm_cell_tax)
+ return 0;
+ cell_size = 1 << cell_log;
+ return (overhead + cell_size - 2) % cell_size - cell_size + 1;
+}
+
+
+/*
+ * A constructor for a tc_ratespec.
+ */
+void tc_calc_ratespec(struct tc_ratespec* spec, __u32* rtab, unsigned bps,
+ int cell_log, unsigned mtu, unsigned char mpu, int atm_cell_tax,
+ char overhead)
{
int i;
- unsigned overhead = (mpu >> 8) & 0xFF;
- mpu = mpu & 0xFF;
if (mtu == 0)
mtu = 2047;
+ /* rtab[pkt_len>>cell_log] = pkt_xmit_time */
if (cell_log < 0) {
cell_log = 0;
while ((mtu>>cell_log) > 255)
cell_log++;
}
+
for (i=0; i<256; i++) {
- unsigned sz = (i<<cell_log);
- if (overhead)
- sz += overhead;
+ /*
+ * sz is the length of packet we will use for this ratetable
+ * entry. The time taken to send a packet of this length will
+ * be used for all packet lengths this ratetable entry applies
+ * to. As underestimating how long it will take to transmit a
+ * packet is a worse error than overestimating it, the longest
+ * packet this rate table entry applies to is used.
+ */
+ int sz = ((i+1)<<cell_log) - 1 + overhead;
if (sz < mpu)
sz = mpu;
- rtab[i] = tc_core_usec2tick(1000000*((double)sz/bps));
+ if (atm_cell_tax)
+ sz = tc_align_to_cells(sz);
+ rtab[i] = tc_calc_xmittime(bps, sz);
}
- return cell_log;
+
+ spec->cell_align = tc_calc_cell_align(atm_cell_tax, overhead, cell_log);
+ spec->cell_log = cell_log;
+ spec->feature = 0x8000 | (atm_cell_tax ? 1 : 0);
+ spec->mpu = mpu | (unsigned)(overhead << 8);
+ spec->rate = bps;
}
int tc_core_init()
diff -Nurp iproute2.orig/tc/tc_core.h iproute2/tc/tc_core.h
--- iproute2.orig/tc/tc_core.h 2005-03-15 05:02:41.000000000 +1000
+++ iproute2/tc/tc_core.h 2006-06-13 11:53:27.000000000 +1000
@@ -7,7 +7,9 @@
long tc_core_usec2tick(long usec);
long tc_core_tick2usec(long tick);
unsigned tc_calc_xmittime(unsigned rate, unsigned size);
-int tc_calc_rtable(unsigned bps, __u32 *rtab, int cell_log, unsigned mtu, unsigned mpu);
+void tc_calc_ratespec(struct tc_ratespec* spec, __u32* rtab, unsigned bps,
+ int cell_log, unsigned mtu, unsigned char mpu, int atm_cell_tax,
+ char overhead);
int tc_setup_estimator(unsigned A, unsigned time_const, struct tc_estimator *est);
More information about the LARTC
mailing list