#include "lib.h"
+#ifdef __linux__
+# include <linux/cdrom.h>
+#endif
+
/*----- Program usage summary ---------------------------------------------*/
static void usage(FILE *fp)
start[0], start[npart - 1] + SECTORS(len[npart - 1]));
}
+/*----- Moving average machinery ------------------------------------------*
+ *
+ * We're using an exponential moving average with a weighting factor of α
+ * (`alpha', above); larger values are more sensitive to recent changes. If
+ * the old average was v_1, and the measurement in the current interval is x,
+ * then the new average after this interval is
+ *
+ * v = α x + (1 − α) v_1 .
+ *
+ * Write β = 1 − α; so
+ *
+ * v = α x + β v_1 .
+ *
+ * Let x_0 = x, let x_1 be the measurement from the previous interval, and,
+ * in general, let x_i be the measurement from i intervals ago. Then another
+ * way to write the above would be
+ *
+ * v = α (x_0 + β x_1 + ⋯ + β^i x_i + ⋯) .
+ *
+ * Alas, our time intervals are not regular. Suppose that we get our next
+ * measurement after a gap of t intervals, for some integer t. We can
+ * compensate approximately by pretending that all of the missed intervals --
+ * and our new one -- had the same mean rate. Then we'd have calculated
+ *
+ * v = α (x + β x + ⋯ + β^{t−1} x) + β^t v_1
+ *
+ * 1 − β^t
+ * = α x ------- + β^t v_1
+ * 1 − β
+ *
+ * = x (1 − β^t) + β^t v_1 (since α = 1 − β)
+ *
+ * = x + β^t (v_1 − x) .
+ *
+ * Does this work in general? It's clearly correct in the case t = 1.
+ *
+ * Suppose the old average was v_2, and that over a period of t intervals
+ * (where t is not necessarily an integer) we measured a mean rate of x, and
+ * then after u intervals we measured a mean rate of x /again/. Then we'd
+ * firstly determine
+ *
+ * v_1 = x + β^t (v_2 − x)
+ *
+ * and then
+ *
+ * v = x + β^u (v_1 − x)
+ *
+ * = x + β^u (x + β^t (v_2 − x) − x)
+ *
+ * = x + β^{t+u} (v_2 − x) ,
+ *
+ * which is exactly what we'd have done if we'd calculated the same mean rate
+ * over the combined span of t + u intervals.
+ *
+ * One final wrinkle, in case that wasn't enough. There's a problem with the
+ * initial setup of an exponential moving average. Apparently
+ * (https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average)
+ * explains that we can do this better by calculating the average after k
+ * intervals as
+ *
+ * x_0 + β x_1 + β^2 x_2 + ⋯ + β^{k−1} x_{k−1}
+ * v′ = ------------------------------------------- .
+ * 1 + β + β^2 + ⋯ + β^{k−1}
+ *
+ * The numerator is our existing v/α; the denominator is (1 − β^k)/α; the
+ * factors of α cancel, and we find that v′ = v/(1 − β^k). This still holds
+ * in our situation, where k may not be an integer.
+ *
+ * To apply all of this:
+ *
+ * * we maintain the moving average v in `avg';
+ *
+ * * we maintain the total β^k in `corr'; and
+ *
+ * * we compute v′ = v/(1 − β^k) on demand up in `render_perfstats'.
+ */
+
+struct avg {
+ double avg, corr;
+};
+#define AVG_INIT { 0.0, 1.0 }
+
+static double alpha = 0.1; /* weighting factor for average */
+
+static void update_avg(struct avg *a, double t, double n)
+{
+ double rate = n/t, beta_t = pow(1 - alpha, t);
+
+ a->avg = rate + beta_t*(a->avg - rate);
+ a->corr *= beta_t;
+}
+
+static inline double current_avg(const struct avg *a)
+ { return (a->avg/(1 - a->corr)); }
+
+/*----- The nonlinear progress model --------------------------------------*/
+
+/* The recorded portion of a single-layer DVD (i.e., DVD-5) can hold 2298496
+ * sectors of user data. This is preceded by 0x30000 = 196608 sectors of
+ * lead-in information, for a totoal of 2495104 sectors.
+ *
+ * The readable portion of a disc is an annulus with respective internal and
+ * external diameters of 44 mm and 117 mm. This annulus has an area of
+ * 9230.8 mm^2, so DVD has a storage density of about 270.3 sectors/mm^2. If
+ * the interior of the annulus were used for data storage rather than leaving
+ * a hole for a spindle and a clamping area, then it would be 10751 mm^2 and
+ * could store 2906107 sectors. (That means that the portion of the disc
+ * that's actually used to make it spin could have stored an additional
+ * 411003 sectors.)
+ *
+ * Sectors aren't stored on the surface willy-nilly, but arranged into a
+ * single archimedean spiral; bits are stored along this spiral at a
+ * more-or-less constant pitch. We are therefore led into an investigation
+ * of the arc-length of archimedean spirals.
+ *
+ * It's best to start with the polar equation of the spiral, which is simply
+ *
+ * r = k θ
+ *
+ * for a given constant k. The arc length of a curve expressed using polar
+ * coordinates is given by
+ *
+ * s = ∫ √(r^2 + (dr/dθ)^2) dθ
+ *
+ * = ∫ √(k^2 θ^2 + k^2) dθ
+ *
+ * = k ∫ √(1 + θ^2) dθ
+ *
+ * k
+ * = - [ θ √(1 + θ^2) + log(θ + √(1 + θ^2)) ] - s_0.
+ * 2
+ *
+ * We're assuming that the sectors are spaced out at a constant linear
+ * density along the spiral. We don't know the units for s, but there's some
+ * constant L such that A = s/L; so
+ *
+ * k
+ * A = --- [ θ √(1 + θ^2) + log(θ + √(1 + θ^2)) ] - A_0
+ * 2 L
+ *
+ * for some suitable constant A_0.
+ *
+ * Finally, we're assuming that the disc is spinning with some approximately
+ * constant angular velocity ω, so θ = ω t, giving
+ *
+ * k
+ * A = --- [ ω t √(1 + ω^2 t^2) + log(ω + √(1 + ω^2 t^2)) ] + A_0 .
+ * 2 L
+ *
+ * We can calculate approximate values for k/(2 L) and A_0. As stated above,
+ * the track pitch is about 0.75 µm; our inside and outside diameters of
+ * 44 mm and 117 mm correspond to angles of 184306 and 490088 radians
+ * respectively. Feeding those into the above equation for s gives arc
+ * lengths of 16984492558 and 120093346360 respectively, in unknown units.
+ * The difference is 103108853802, which should correspond to 2495104
+ * sectors, giving us 41324 arc-length units per sector. As a cross-check,
+ * the arc length corresponding to the inside diameter yields 411003 sectors,
+ * which is the same as we calculated above. This will be our A_0
+ */
+
+#ifdef unusef
+static double archimedes_arclen(double t)
+ /* Given an angle T, return the arc length of the canonical
+ * archimedean spiral r = θ, from θ = 0 up to θ = T.
+ */
+{
+ double u;
+
+ u = sqrt(1 + t*t);
+ return (t*u + log(t + u))/2;
+}
+#endif
+
+static double inv_archimedes_arclen(double s)
+ /* Given an arc length S, return the angle T such that the arc length
+ * of the canonical archimedean spiral r = θ, from θ = 0 up to θ = T,
+ * is equal to S.
+ */
+{
+ /* There is no closed-form solution, so we're going to invert the arc-
+ * length formula above numerically, using the Newton--Raphson method.
+ *
+ * Given an incorrect guess x_0 of a zero of some function f, we refine the
+ * guess by approximating f by its tangent at the point (x_0, f(x_0)).
+ * This will be a line with an equation like
+ *
+ * y = f'(x_0) x + c .
+ *
+ * We know that y = f(x_0) when x = x_0, so we can calculate
+ *
+ * c = f(x_0) - f'(x_0) x_0 .
+ *
+ * This will be zero when
+ *
+ * y = f'(x_0) x + f(x_0) - f'(x_0) x_0 = 0
+ *
+ * hwnce
+ *
+ * f'(x_0) x_0) - f(x_0) f(x_0)
+ * x = --------------------- = x_0 - ------- .
+ * f'(x_0) f'(x_0)
+ */
+
+ double t, ss, u, e;
+
+ /* We need to choose an initial estimate. This seems to work well in
+ * practice.
+ */
+ t = 1.5*sqrt(s);
+
+ for (;;) {
+ /* Compute s' = f(t). We open-code this calculation because the
+ * intermediate value √(1 + t^2) is also the gradient.
+ */
+ u = sqrt(1 + t*t);
+ ss = (t*u + log(t + u))/2;
+
+ /* Determine the error in f(t). We don't actually need much precision
+ * here, but 2 ulp seems achievable in practice with minimal cost: the
+ * usually sequence converges after only five iterations.
+ */
+ e = fabs(s/ss - 1);
+ if (e <= 2*DBL_EPSILON) return (t);
+
+ /* Not good enough. Refine the guess and go around again. */
+ t -= (ss - s)/u;
+ }
+}
+
+static double sectors_to_angle(secaddr base, secaddr low, secaddr high)
+ /* Return the angle, in radians, subtended by the range LOW up to
+ * HIGH of user sector addresses, given the physical sector address
+ * BASE of the first user-data sectors.
+ */
+{
+#define A0 411003.262489
+#define K 41324.4713654
+
+ return (inv_archimedes_arclen(K*(A0 + base + high)) -
+ inv_archimedes_arclen(K*(A0 + base + low)));
+
+#undef A0
+#undef K
+}
+
+enum {
+ FLAT, /* not actually a real DVD */
+ SINGLE, /* disc with only one layer */
+ PTP, /* two layers, parallel track path */
+ OTP /* two layers, opposite track path */
+};
+
+struct geometry {
+ unsigned shape; /* one of the four codes above */
+ secaddr start0, start1; /* initial physical sector */
+ secaddr midpoint; /* sector address of layer switch */
+};
+
+#define GF_BLKDEV 1u
+static void setup_geometry(struct geometry *g, int fd, unsigned f,
+ secaddr sz)
+ /* Initialize G with information about the disc structure. FD is a
+ * file descriptor for the device; SZ is the size of the disc in
+ * sectors. If `GF_BLKDEV' is clear in F then assume that FD refers
+ * to a regular file; G is populated with a `FLAT' performance model.
+ * If `GF_BLKDEV' is set, then FD refers to a block device, so try to
+ * retreive detailed structure information from the drive.
+ */
+{
+#ifdef __linux__
+ dvd_struct ds;
+ const struct dvd_layer *ly;
+#endif
+ secaddr t;
+
+#define LAYER_LIMIT 2298496 /* maximum (user) sectors on layer */
+#define DVDROM_OFFSET 0x30000 /* usual initial physical sector */
+
+ if (!(f&GF_BLKDEV)) {
+ /* We're reading from a regular file. Assume that progress will be
+ * linear.
+ */
+
+ g->shape = FLAT;
+ g->midpoint = SECLIMIT;
+ return;
+ }
+
+#ifdef __linux__
+ /* We have Linux and its DVD ioctl(2) calls. Interrogate the disc to
+ * discover its structure.
+ */
+
+ ds.type = DVD_STRUCT_PHYSICAL;
+ ds.physical.layer_num = 0;
+ if (ioctl(fd, DVD_READ_STRUCT, &ds)) {
+ moan_syserr(errno, "failed to read physical disc structure");
+ goto guess_structure;
+ }
+ ly = &ds.physical.layer[0];
+ switch (ly->nlayers) {
+ case 0:
+ g->shape = SINGLE;
+ g->start0 = g->start1 = 0;
+ g->midpoint = SECLIMIT;
+ break;
+ case 1:
+ g->start0 = ly->start_sector;
+ if (ly->track_path) {
+ g->shape = OTP;
+ g->start1 = 0;
+ g->midpoint = ly->end_sector_l0 - ly->start_sector + 1;
+ } else {
+ g->shape = PTP;
+ g->midpoint = ly->end_sector - ly->start_sector + 1;
+ ds.physical.layer_num = 1;
+ if (ioctl(fd, DVD_READ_STRUCT, &ds)) {
+ moan_syserr(errno, "failed to read layer 1 physical structure");
+ goto guess_structure;
+ }
+ g->start1 = ly->start_sector;
+ }
+ break;
+ default:
+ moan("unexpected layer count %d", ly->nlayers + 1);
+ goto guess_structure;
+ }
+ return;
+guess_structure:
+#endif
+
+ /* Either we don't have Linux, or we found something confusing. Let's try
+ * to guess at what's going on.
+ *
+ * If the volume size is small enough to fit on a single layer then assume
+ * that's what's happened; otherwise assume opposite track path with a cut
+ * at the midpoint, rounded up to an ECC block (16 sectors).
+ */
+ g->start0 = DVDROM_OFFSET; g->start1 = 0;
+ if (sz <= LAYER_LIMIT) {
+ g->shape = SINGLE;
+ g->midpoint = SECLIMIT;
+ } else {
+ g->shape = OTP;
+ t = (sz + DVDROM_OFFSET)/2;
+ t += 15; t &= -16;
+ t -= DVDROM_OFFSET;
+ g->midpoint = t;
+ }
+
+#undef LAYER_LIMIT
+#undef DVDROM_OFFSET
+}
+
+static double linear_progress(const struct geometry *g,
+ secaddr a0, secaddr a1)
+ /* Convert the sector range from A0 to A1 into a progress measurement
+ * which is, by intention, approximately linearly related to time,
+ * given a geometry description G.
+ */
+{
+ double theta = 0.0;
+
+ switch (g->shape) {
+ case FLAT:
+ theta = a1 - a0;
+ break;
+ case SINGLE:
+ theta = sectors_to_angle(g->start0, a0, a1);
+ break;
+ case PTP:
+ if (a0 < g->midpoint)
+ theta += sectors_to_angle(g->start0,
+ a0, a1 < g->midpoint ? a1 : g->midpoint);
+ if (a1 > g->midpoint)
+ theta += sectors_to_angle(g->start1,
+ a0 > g->midpoint ? a0 : g->midpoint, a1);
+ break;
+ case OTP:
+ if (a0 < g->midpoint)
+ theta += sectors_to_angle(g->start0,
+ a0, a1 < g->midpoint ? a1 : g->midpoint);
+ if (a1 > g->midpoint)
+ theta += sectors_to_angle(g->start0,
+ 2*g->midpoint - a1,
+ a0 > g->midpoint ?
+ 2*g->midpoint - a0 : g->midpoint);
+ break;
+ default:
+ abort();
+ }
+ return (theta);
+}
+
/*----- Common variables used by the copying machinery --------------------*/
/* General reading state. */
/*----- Progress reporting ------------------------------------------------*/
static secaddr nsectors, ndone; /* number of sectors done/to do */
+static double total_linear, done_linear; /* linear progress tracking */
static secaddr last_pos; /* position last time we updated */
static struct timeval last_time; /* time last time we updated */
-static double alpha = 0.1; /* weighting factor for average */
-static double avg = 0.0, corr = 1.0; /* exponential moving average */
+static struct geometry geom; /* disc geometry for progress */
+static struct avg avg_rate = AVG_INIT, avg_linear = AVG_INIT;
static int bad_err; /* most recent error code */
+static FILE *progressfp; /* file on which to trace progress data */
static const char throbber[] = "|<-<|>->"; /* throbber pattern */
static unsigned throbix = 0; /* current throbber index */
{
int eta;
char timebuf[TIMESTRMAX];
- double rate;
+ double rate, linrate;
const char *unit;
/* If there's no average computed yet, then use some placeholder values. */
- rate = avg/(1 - corr); eta = (int)((nsectors - ndone)/rate + 0.5);
+ rate = current_avg(&avg_rate);
+ linrate = current_avg(&avg_linear);
+ eta = (int)((total_linear - done_linear)/linrate + 0.5);
/* Write out the statistics. */
rate = scale_bytes(rate*SECTORSZ, &unit);
- progress_putright(render, "ETA %s ", avg ? fmttime(eta, timebuf) : "???");
+ progress_putright(render, "ETA %s ",
+ avg_linear.avg ? fmttime(eta, timebuf) : "???");
progress_putright(render, "%.1f %sB/s, ", rate, unit);
}
*/
{
struct timeval now;
- double t, beta_t, rate;
-
- /* We're using an exponential moving average with a weighting factor of α
- * (`alpha', above); larger values are more sensitive to recent changes.
- * If the old average was v_1, and the measurement in the current interval
- * is x, then the new average after this interval is
- *
- * v = α x + (1 − α) v_1 .
- *
- * Write β = 1 − α; so
- *
- * v = α x + β v_1 .
- *
- * Let x_0 = x, let x_1 be the measurement from the previous interval, and,
- * in general, let x_i be the measurement from i intervals ago. Then
- * another way to write the above would be
- *
- * v = α (x_0 + β x_1 + ⋯ + β^i x_i + ⋯) .
- *
- * Alas, our time intervals are not regular. Suppose that we get our next
- * measurement after a gap of t intervals, for some integer t. We can
- * compensate approximately by pretending that all of the missed intervals
- * -- and our new one -- had the same mean rate. Then we'd have
- * calculated
- *
- * v = α (x + β x + ⋯ + β^{t−1} x) + β^t v_1
- *
- * 1 − β^t
- * = α x ------- + β^t v_1
- * 1 − β
- *
- * = x (1 − β^t) + β^t v_1 (since α = 1 − β)
- *
- * = x + β^t (v_1 − x) .
- *
- * Does this work in general? It's clearly correct in the case t = 1.
- *
- * Suppose the old average was v_2, and that over a period of t intervals
- * (where t is not necessarily an integer) we measured a mean rate of x,
- * and then after u intervals we measured a mean rate of x /again/. Then
- * we'd firstly determine
- *
- * v_1 = x + β^t (v_2 − x)
- *
- * and then
- *
- * v = x + β^u (v_1 − x)
- *
- * = x + β^u (x + β^t (v_2 − x) − x)
- *
- * = x + β^{t+u} (v_2 − x) ,
- *
- * which is exactly what we'd have done if we'd calculated the same mean
- * rate over the combined span of t + u intervals.
- *
- * One final wrinkle, in case that wasn't enough. There's a problem with
- * the initial setup of an exponential moving average. Apparently
- * (https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average)
- * explains that we can do this better by calculating the average after k
- * intervals as
- *
- * x_0 + β x_1 + β^2 x_2 + ⋯ + β^{k−1} x_{k−1}
- * v′ = ------------------------------------------- .
- * 1 + β + β^2 + ⋯ + β^{k−1}
- *
- * The numerator is our existing v/α; the denominator is (1 − β^k)/α; the
- * factors of α cancel, and we find that v′ = v/(1 − β^k). This still
- * holds in our situation, where k may not be an integer.
- *
- * To apply all of this:
- *
- * * we maintain the moving average v in `avg';
- *
- * * we maintain the total β^k in `corr'; and
- *
- * * we compute v′ = v/(1 − β^k) on demand up in `render_perfstats'.
- */
+ double t, delta_r;
/* Find the current time and the delta since the last time we updated.
* This will be the length of the current interval.
* interval.
*/
- rate = (pos - last_pos)/t; beta_t = pow(1 - alpha, t);
- avg = rate + beta_t*(avg - rate); corr *= beta_t;
- ndone += pos - last_pos; last_time = now; last_pos = pos;
+ delta_r = linear_progress(&geom, last_pos, pos);
+ update_avg(&avg_rate, t, pos - last_pos);
+ update_avg(&avg_linear, t, delta_r);
+ ndone += pos - last_pos; done_linear += delta_r;
+ last_time = now; last_pos = pos;
+ }
+
+ /* Trace progress state if requested. */
+ if (progressfp) {
+ fprintf(progressfp, "%10ju.%06ld %"PRIuSEC" %f %f\n",
+ (uintmax_t)now.tv_sec, now.tv_usec,
+ ndone, done_linear,
+ (total_linear - done_linear)/current_avg(&avg_linear));
+ check_write(progressfp, "progress trace file");
}
/* Advance the throbber character. */
/* Put the progress display back, if we took it away, and show the file
* progress bar if we added one.
*/
- progress_update(&progress);
+ update_progress(start);
/* Read the input region and copy it to the disc. */
pos = start;
#define f_checkid 16u
#define f_retry 32u
#define f_write 256u
+#define f_file 512u
set_prog(argv[0]);
/* First up, handle the command-line options. */
for (;;) {
- opt = getopt(argc, argv, "hB:E:FR:X:b:cir:s"); if (opt < 0) break;
+ opt = getopt(argc, argv, "hB:E:FP:R:X:b:cir:s"); if (opt < 0) break;
switch (opt) {
/* `-h': Help. */
*/
case 'F': f |= f_fixup; break;
+ /* `-P FILE' (undocumented): trace progress state to FILE. */
+ case 'P':
+ if (progressfp) bail("progress trace file already set");
+ progressfp = fopen(optarg, "w");
+ if (!progressfp)
+ bail_syserr(errno, "failed to open progress trace file `%s'",
+ optarg);
+ break;
+
/* `-R FILE': Read ranges to retry from FILE. Retry ranges are
* converted into `EV_WRITE' and `EV_STOP' events.
*/
if (open_dvd(device, O_RDONLY, &dvdfd, &dvd)) exit(2);
/* Determine the size of the input device and check the sector size. */
- blksz = SECTORSZ; volsz = device_size(dvdfd, device, &blksz);
- if (blksz != SECTORSZ)
+ blksz = -1; volsz = device_size(dvdfd, device, &blksz);
+ if (blksz == -1)
+ { blksz = SECTORSZ; f |= f_file; }
+ else if (blksz != SECTORSZ)
bail("device `%s' block size %d /= %d", device, blksz, SECTORSZ);
if (volsz%SECTORSZ)
bail("device `%s' volume size %"PRIu64" not a multiple of %d",
device, volsz, SECTORSZ);
+ setup_geometry(&geom, dvdfd, f&f_file ? 0 : GF_BLKDEV, volsz/blksz);
+
+ if (progressfp) {
+ switch (geom.shape) {
+ case FLAT:
+ fprintf(progressfp, ":model flat-model\n");
+ break;
+ case SINGLE:
+ fprintf(progressfp, ":model single-layer-model :start %"PRIuSEC"\n",
+ geom.start0);
+ break;
+ case PTP:
+ fprintf(progressfp,
+ ":model parallel-track-path-model "
+ ":start0 %"PRIuSEC" :start1 %"PRIuSEC" "
+ ":midpoint %"PRIuSEC"\n",
+ geom.start0, geom.start1, geom.midpoint);
+ break;
+ case OTP:
+ fprintf(progressfp,
+ ":model opposite-track-path-model "
+ ":start %"PRIuSEC" :midpoint %"PRIuSEC"\n",
+ geom.start0, geom.midpoint);
+ break;
+ default:
+ abort();
+ }
+ }
+
/* Maybe check that we're copying from the right disc. This is intended to
* help avoid image corruption by from the wrong disc, but it obviously
* only works if the output file is mostly there.
/* If we're supposed to stop writing here, then add the size of the
* most recent range onto our running total.
*/
- if (ev->ev == EV_STOP) { nsectors += ev->pos - start; f &= ~f_write; }
+ if (ev->ev == EV_STOP) {
+ nsectors += ev->pos - start;
+ total_linear += linear_progress(&geom, start, ev->pos);
+ f &= ~f_write;
+ }
/* If we're fixing up images affected by the old early-stop bug, then
* remember this position.
*/
if (f&f_write) {
nsectors += limit - start;
+ total_linear += linear_progress(&geom, start, limit);
put_event(EV_STOP, 0, limit);
}
#ifdef DEBUG
*/
copy_progress.render = render_copy_progress;
progress_additem(&progress, ©_progress);
- if (nsectors == limit - start)
- { ndone = start; nsectors = limit; }
+ if (nsectors == limit - start) {
+ ndone = start; nsectors = limit;
+ done_linear = linear_progress(&geom, 0, start);
+ total_linear += done_linear;
+ }
else {
disc_progress.render = render_disc_progress;
progress_additem(&progress, &disc_progress);
if (outfd >= 0) close(outfd);
carefully_fclose(mapfp, "bad-sector region map");
carefully_fclose(errfp, "bad-sector error log");
+ carefully_fclose(progressfp, "progress trace file");
progress_free(&progress);
/* We're done! */