From c16d1dabf4756f1c504d2d92f7b2971dd7a91fde Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Tue, 12 Apr 2022 21:58:07 +0100 Subject: [PATCH] dvd-sector-copy.c: Package up the moving-average machinery. Now we can have more than one. --- dvd-sector-copy.c | 184 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 101 insertions(+), 83 deletions(-) diff --git a/dvd-sector-copy.c b/dvd-sector-copy.c index 3583770..acdde73 100644 --- a/dvd-sector-copy.c +++ b/dvd-sector-copy.c @@ -451,6 +451,101 @@ static void put_title(dvd_reader_t *dvd, unsigned title) start[0], start[npart - 1] + SECTORS(len[npart - 1])); } +/*----- Moving average machinery ------------------------------------------* + * + * We're using an exponential moving average with a weighting factor of α + * (`alpha', above); larger values are more sensitive to recent changes. If + * the old average was v_1, and the measurement in the current interval is x, + * then the new average after this interval is + * + * v = α x + (1 − α) v_1 . + * + * Write β = 1 − α; so + * + * v = α x + β v_1 . + * + * Let x_0 = x, let x_1 be the measurement from the previous interval, and, + * in general, let x_i be the measurement from i intervals ago. Then another + * way to write the above would be + * + * v = α (x_0 + β x_1 + ⋯ + β^i x_i + ⋯) . + * + * Alas, our time intervals are not regular. Suppose that we get our next + * measurement after a gap of t intervals, for some integer t. We can + * compensate approximately by pretending that all of the missed intervals -- + * and our new one -- had the same mean rate. Then we'd have calculated + * + * v = α (x + β x + ⋯ + β^{t−1} x) + β^t v_1 + * + * 1 − β^t + * = α x ------- + β^t v_1 + * 1 − β + * + * = x (1 − β^t) + β^t v_1 (since α = 1 − β) + * + * = x + β^t (v_1 − x) . + * + * Does this work in general? It's clearly correct in the case t = 1. + * + * Suppose the old average was v_2, and that over a period of t intervals + * (where t is not necessarily an integer) we measured a mean rate of x, and + * then after u intervals we measured a mean rate of x /again/. Then we'd + * firstly determine + * + * v_1 = x + β^t (v_2 − x) + * + * and then + * + * v = x + β^u (v_1 − x) + * + * = x + β^u (x + β^t (v_2 − x) − x) + * + * = x + β^{t+u} (v_2 − x) , + * + * which is exactly what we'd have done if we'd calculated the same mean rate + * over the combined span of t + u intervals. + * + * One final wrinkle, in case that wasn't enough. There's a problem with the + * initial setup of an exponential moving average. Apparently + * (https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) + * explains that we can do this better by calculating the average after k + * intervals as + * + * x_0 + β x_1 + β^2 x_2 + ⋯ + β^{k−1} x_{k−1} + * v′ = ------------------------------------------- . + * 1 + β + β^2 + ⋯ + β^{k−1} + * + * The numerator is our existing v/α; the denominator is (1 − β^k)/α; the + * factors of α cancel, and we find that v′ = v/(1 − β^k). This still holds + * in our situation, where k may not be an integer. + * + * To apply all of this: + * + * * we maintain the moving average v in `avg'; + * + * * we maintain the total β^k in `corr'; and + * + * * we compute v′ = v/(1 − β^k) on demand up in `render_perfstats'. + */ + +struct avg { + double avg, corr; +}; +#define AVG_INIT { 0.0, 1.0 } + +static double alpha = 0.1; /* weighting factor for average */ + +static void update_avg(struct avg *a, double t, double n) +{ + double rate = n/t, beta_t = pow(1 - alpha, t); + + a->avg = rate + beta_t*(a->avg - rate); + a->corr *= beta_t; +} + +static inline double current_avg(const struct avg *a) + { return (a->avg/(1 - a->corr)); } + /*----- Common variables used by the copying machinery --------------------*/ /* General reading state. */ @@ -470,8 +565,7 @@ static unsigned retry, max_retries = 4; /* retry state */ static secaddr nsectors, ndone; /* number of sectors done/to do */ static secaddr last_pos; /* position last time we updated */ static struct timeval last_time; /* time last time we updated */ -static double alpha = 0.1; /* weighting factor for average */ -static double avg = 0.0, corr = 1.0; /* exponential moving average */ +static struct avg avg_rate = AVG_INIT; static int bad_err; /* most recent error code */ static const char throbber[] = "|<-<|>->"; /* throbber pattern */ @@ -523,11 +617,12 @@ static void render_perfstats(struct progress_render_state *render) const char *unit; /* If there's no average computed yet, then use some placeholder values. */ - rate = avg/(1 - corr); eta = (int)((nsectors - ndone)/rate + 0.5); + rate = current_avg(&avg_rate); eta = (int)((nsectors - ndone)/rate + 0.5); /* Write out the statistics. */ rate = scale_bytes(rate*SECTORSZ, &unit); - progress_putright(render, "ETA %s ", avg ? fmttime(eta, timebuf) : "???"); + progress_putright(render, "ETA %s ", + avg_rate.avg ? fmttime(eta, timebuf) : "???"); progress_putright(render, "%.1f %sB/s, ", rate, unit); } @@ -608,83 +703,7 @@ static void update_progress(secaddr pos) */ { struct timeval now; - double t, beta_t, rate; - - /* We're using an exponential moving average with a weighting factor of α - * (`alpha', above); larger values are more sensitive to recent changes. - * If the old average was v_1, and the measurement in the current interval - * is x, then the new average after this interval is - * - * v = α x + (1 − α) v_1 . - * - * Write β = 1 − α; so - * - * v = α x + β v_1 . - * - * Let x_0 = x, let x_1 be the measurement from the previous interval, and, - * in general, let x_i be the measurement from i intervals ago. Then - * another way to write the above would be - * - * v = α (x_0 + β x_1 + ⋯ + β^i x_i + ⋯) . - * - * Alas, our time intervals are not regular. Suppose that we get our next - * measurement after a gap of t intervals, for some integer t. We can - * compensate approximately by pretending that all of the missed intervals - * -- and our new one -- had the same mean rate. Then we'd have - * calculated - * - * v = α (x + β x + ⋯ + β^{t−1} x) + β^t v_1 - * - * 1 − β^t - * = α x ------- + β^t v_1 - * 1 − β - * - * = x (1 − β^t) + β^t v_1 (since α = 1 − β) - * - * = x + β^t (v_1 − x) . - * - * Does this work in general? It's clearly correct in the case t = 1. - * - * Suppose the old average was v_2, and that over a period of t intervals - * (where t is not necessarily an integer) we measured a mean rate of x, - * and then after u intervals we measured a mean rate of x /again/. Then - * we'd firstly determine - * - * v_1 = x + β^t (v_2 − x) - * - * and then - * - * v = x + β^u (v_1 − x) - * - * = x + β^u (x + β^t (v_2 − x) − x) - * - * = x + β^{t+u} (v_2 − x) , - * - * which is exactly what we'd have done if we'd calculated the same mean - * rate over the combined span of t + u intervals. - * - * One final wrinkle, in case that wasn't enough. There's a problem with - * the initial setup of an exponential moving average. Apparently - * (https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) - * explains that we can do this better by calculating the average after k - * intervals as - * - * x_0 + β x_1 + β^2 x_2 + ⋯ + β^{k−1} x_{k−1} - * v′ = ------------------------------------------- . - * 1 + β + β^2 + ⋯ + β^{k−1} - * - * The numerator is our existing v/α; the denominator is (1 − β^k)/α; the - * factors of α cancel, and we find that v′ = v/(1 − β^k). This still - * holds in our situation, where k may not be an integer. - * - * To apply all of this: - * - * * we maintain the moving average v in `avg'; - * - * * we maintain the total β^k in `corr'; and - * - * * we compute v′ = v/(1 − β^k) on demand up in `render_perfstats'. - */ + double t; /* Find the current time and the delta since the last time we updated. * This will be the length of the current interval. @@ -699,8 +718,7 @@ static void update_progress(secaddr pos) * interval. */ - rate = (pos - last_pos)/t; beta_t = pow(1 - alpha, t); - avg = rate + beta_t*(avg - rate); corr *= beta_t; + update_avg(&avg_rate, t, pos - last_pos); ndone += pos - last_pos; last_time = now; last_pos = pos; } -- 2.11.0