dvd-sector-copy.c: Package up the moving-average machinery.

author Mark Wooding <mdw@distorted.org.uk>

Tue, 12 Apr 2022 20:58:07 +0000 (21:58 +0100)

committer Mark Wooding <mdw@distorted.org.uk>

Tue, 12 Apr 2022 20:58:07 +0000 (21:58 +0100)
author Mark Wooding <mdw@distorted.org.uk>
Tue, 12 Apr 2022 20:58:07 +0000 (21:58 +0100)
committer Mark Wooding <mdw@distorted.org.uk>
Tue, 12 Apr 2022 20:58:07 +0000 (21:58 +0100)
diff --git a/dvd-sector-copy.c b/dvd-sector-copy.c

index 3583770..acdde73 100644 (file)
--- a/dvd-sector-copy.c
+++ b/dvd-sector-copy.c
@@ -451,6 +451,101 @@ static void put_title(dvd_reader_t *dvd, unsigned title)
            start[0], start[npart - 1] + SECTORS(len[npart - 1]));
  }
  
+/*----- Moving average machinery ------------------------------------------*
+ *
+ * We're using an exponential moving average with a weighting factor of α
+ * (`alpha', above); larger values are more sensitive to recent changes.  If
+ * the old average was v_1, and the measurement in the current interval is x,
+ * then the new average after this interval is
+ *
+ *          v = α x + (1 − α) v_1 .
+ *
+ * Write β = 1 − α; so
+ *
+ *          v = α x + β v_1 .
+ *
+ * Let x_0 = x, let x_1 be the measurement from the previous interval, and,
+ * in general, let x_i be the measurement from i intervals ago.  Then another
+ * way to write the above would be
+ *
+ *          v = α (x_0 + β x_1 + ⋯ + β^i x_i + ⋯) .
+ *
+ * Alas, our time intervals are not regular.  Suppose that we get our next
+ * measurement after a gap of t intervals, for some integer t.  We can
+ * compensate approximately by pretending that all of the missed intervals --
+ * and our new one -- had the same mean rate.  Then we'd have calculated
+ *
+ *          v = α (x + β x + ⋯ + β^{t−1} x) + β^t v_1
+ *
+ *                  1 − β^t
+ *            = α x ------- + β^t v_1
+ *                   1 − β
+ *
+ *            = x (1 − β^t) + β^t v_1              (since α = 1 − β)
+ *
+ *            = x + β^t (v_1 − x) .
+ *
+ * Does this work in general?  It's clearly correct in the case t = 1.
+ *
+ * Suppose the old average was v_2, and that over a period of t intervals
+ * (where t is not necessarily an integer) we measured a mean rate of x, and
+ * then after u intervals we measured a mean rate of x /again/.  Then we'd
+ * firstly determine
+ *
+ *          v_1 = x + β^t (v_2 − x)
+ *
+ * and then
+ *
+ *          v = x + β^u (v_1 − x)
+ *
+ *            = x + β^u (x + β^t (v_2 − x) − x)
+ *
+ *            = x + β^{t+u} (v_2 − x) ,
+ *
+ * which is exactly what we'd have done if we'd calculated the same mean rate
+ * over the combined span of t + u intervals.
+ *
+ * One final wrinkle, in case that wasn't enough.  There's a problem with the
+ * initial setup of an exponential moving average.  Apparently
+ * (https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average)
+ * explains that we can do this better by calculating the average after k
+ * intervals as
+ *
+ *               x_0 + β x_1 + β^2 x_2 + ⋯ + β^{k−1} x_{k−1}
+ *          v′ = ------------------------------------------- .
+ *                        1 + β + β^2 + ⋯ + β^{k−1}
+ *
+ * The numerator is our existing v/α; the denominator is (1 − β^k)/α; the
+ * factors of α cancel, and we find that v′ = v/(1 − β^k).  This still holds
+ * in our situation, where k may not be an integer.
+ *
+ * To apply all of this:
+ *
+ *   * we maintain the moving average v in `avg';
+ *
+ *   * we maintain the total β^k in `corr'; and
+ *
+ *   * we compute v′ = v/(1 − β^k) on demand up in `render_perfstats'.
+ */
+
+struct avg {
+  double avg, corr;
+};
+#define AVG_INIT { 0.0, 1.0 }
+
+static double alpha = 0.1;             /* weighting factor for average */
+
+static void update_avg(struct avg *a, double t, double n)
+{
+  double rate = n/t, beta_t = pow(1 - alpha, t);
+
+  a->avg = rate + beta_t*(a->avg - rate);
+  a->corr *= beta_t;
+}
+
+static inline double current_avg(const struct avg *a)
+  { return (a->avg/(1 - a->corr)); }
+
  /*----- Common variables used by the copying machinery --------------------*/
  
  /* General reading state. */
@@ -470,8 +565,7 @@ static unsigned retry, max_retries = 4;     /* retry state */
  static secaddr nsectors, ndone;                /* number of sectors done/to do */
  static secaddr last_pos;               /* position last time we updated */
  static struct timeval last_time;       /* time last time we updated */
-static double alpha = 0.1;             /* weighting factor for average */
-static double avg = 0.0, corr = 1.0;   /* exponential moving average */
+static struct avg avg_rate = AVG_INIT;
  static int bad_err;                    /* most recent error code */
  
  static const char throbber[] = "|<-<|>->"; /* throbber pattern */
@@ -523,11 +617,12 @@ static void render_perfstats(struct progress_render_state *render)
    const char *unit;
  
    /* If there's no average computed yet, then use some placeholder values. */
-  rate = avg/(1 - corr); eta = (int)((nsectors - ndone)/rate + 0.5);
+  rate = current_avg(&avg_rate); eta = (int)((nsectors - ndone)/rate + 0.5);
  
    /* Write out the statistics. */
    rate = scale_bytes(rate*SECTORSZ, &unit);
-  progress_putright(render, "ETA %s ", avg ? fmttime(eta, timebuf) : "???");
+  progress_putright(render, "ETA %s ",
+                   avg_rate.avg ? fmttime(eta, timebuf) : "???");
    progress_putright(render, "%.1f %sB/s, ", rate, unit);
  }
  
@@ -608,83 +703,7 @@ static void update_progress(secaddr pos)
          */
  {
    struct timeval now;
-  double t, beta_t, rate;
-
-  /* We're using an exponential moving average with a weighting factor of α
-   * (`alpha', above); larger values are more sensitive to recent changes.
-   * If the old average was v_1, and the measurement in the current interval
-   * is x, then the new average after this interval is
-   *
-   *        v = α x + (1 − α) v_1 .
-   *
-   * Write β = 1 − α; so
-   *
-   *        v = α x + β v_1 .
-   *
-   * Let x_0 = x, let x_1 be the measurement from the previous interval, and,
-   * in general, let x_i be the measurement from i intervals ago.  Then
-   * another way to write the above would be
-   *
-   *        v = α (x_0 + β x_1 + ⋯ + β^i x_i + ⋯) .
-   *
-   * Alas, our time intervals are not regular.  Suppose that we get our next
-   * measurement after a gap of t intervals, for some integer t.  We can
-   * compensate approximately by pretending that all of the missed intervals
-   * -- and our new one -- had the same mean rate.  Then we'd have
-   * calculated
-   *
-   *        v = α (x + β x + ⋯ + β^{t−1} x) + β^t v_1
-   *
-   *                1 − β^t
-   *          = α x ------- + β^t v_1
-   *                 1 − β
-   *
-   *          = x (1 − β^t) + β^t v_1              (since α = 1 − β)
-   *
-   *          = x + β^t (v_1 − x) .
-   *
-   * Does this work in general?  It's clearly correct in the case t = 1.
-   *
-   * Suppose the old average was v_2, and that over a period of t intervals
-   * (where t is not necessarily an integer) we measured a mean rate of x,
-   * and then after u intervals we measured a mean rate of x /again/.  Then
-   * we'd firstly determine
-   *
-   *        v_1 = x + β^t (v_2 − x)
-   *
-   * and then
-   *
-   *        v = x + β^u (v_1 − x)
-   *
-   *          = x + β^u (x + β^t (v_2 − x) − x)
-   *
-   *          = x + β^{t+u} (v_2 − x) ,
-   *
-   * which is exactly what we'd have done if we'd calculated the same mean
-   * rate over the combined span of t + u intervals.
-   *
-   * One final wrinkle, in case that wasn't enough.  There's a problem with
-   * the initial setup of an exponential moving average.  Apparently
-   * (https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average)
-   * explains that we can do this better by calculating the average after k
-   * intervals as
-   *
-   *             x_0 + β x_1 + β^2 x_2 + ⋯ + β^{k−1} x_{k−1}
-   *        v′ = ------------------------------------------- .
-   *                      1 + β + β^2 + ⋯ + β^{k−1}
-   *
-   * The numerator is our existing v/α; the denominator is (1 − β^k)/α; the
-   * factors of α cancel, and we find that v′ = v/(1 − β^k).  This still
-   * holds in our situation, where k may not be an integer.
-   *
-   * To apply all of this:
-   *
-   *   * we maintain the moving average v in `avg';
-   *
-   *   * we maintain the total β^k in `corr'; and
-   *
-   *   * we compute v′ = v/(1 − β^k) on demand up in `render_perfstats'.
-   */
+  double t;
  
    /* Find the current time and the delta since the last time we updated.
     * This will be the length of the current interval.
@@ -699,8 +718,7 @@ static void update_progress(secaddr pos)
       * interval.
       */
  
-    rate = (pos - last_pos)/t; beta_t = pow(1 - alpha, t);
-    avg = rate + beta_t*(avg - rate); corr *= beta_t;
+    update_avg(&avg_rate, t, pos - last_pos);
      ndone += pos - last_pos; last_time = now; last_pos = pos;
    }
author	Mark Wooding <mdw@distorted.org.uk>
	Tue, 12 Apr 2022 20:58:07 +0000 (21:58 +0100)
committer	Mark Wooding <mdw@distorted.org.uk>
	Tue, 12 Apr 2022 20:58:07 +0000 (21:58 +0100)