From 4d69e9430e4971eb923d87f31dcf5b089159a4fa Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Tue, 12 Apr 2022 22:05:24 +0100 Subject: [PATCH] dvd-sector-copy.c: Add nonlinear progress models. The details are rather complicated, so they're explained in the commentary. It seems to work pretty well in practice, settling in within 5% within a minute or two of starting to copy an OTP disc. --- dvd-sector-copy.c | 375 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 361 insertions(+), 14 deletions(-) diff --git a/dvd-sector-copy.c b/dvd-sector-copy.c index 5ccc755..019e382 100644 --- a/dvd-sector-copy.c +++ b/dvd-sector-copy.c @@ -27,6 +27,10 @@ #include "lib.h" +#ifdef __linux__ +# include +#endif + /*----- Program usage summary ---------------------------------------------*/ static void usage(FILE *fp) @@ -546,6 +550,302 @@ static void update_avg(struct avg *a, double t, double n) static inline double current_avg(const struct avg *a) { return (a->avg/(1 - a->corr)); } +/*----- The nonlinear progress model --------------------------------------*/ + +/* The recorded portion of a single-layer DVD (i.e., DVD-5) can hold 2298496 + * sectors of user data. This is preceded by 0x30000 = 196608 sectors of + * lead-in information, for a totoal of 2495104 sectors. + * + * The readable portion of a disc is an annulus with respective internal and + * external diameters of 44 mm and 117 mm. This annulus has an area of + * 9230.8 mm^2, so DVD has a storage density of about 270.3 sectors/mm^2. If + * the interior of the annulus were used for data storage rather than leaving + * a hole for a spindle and a clamping area, then it would be 10751 mm^2 and + * could store 2906107 sectors. (That means that the portion of the disc + * that's actually used to make it spin could have stored an additional + * 411003 sectors.) + * + * Sectors aren't stored on the surface willy-nilly, but arranged into a + * single archimedean spiral; bits are stored along this spiral at a + * more-or-less constant pitch. We are therefore led into an investigation + * of the arc-length of archimedean spirals. + * + * It's best to start with the polar equation of the spiral, which is simply + * + * r = k θ + * + * for a given constant k. The arc length of a curve expressed using polar + * coordinates is given by + * + * s = ∫ √(r^2 + (dr/dθ)^2) dθ + * + * = ∫ √(k^2 θ^2 + k^2) dθ + * + * = k ∫ √(1 + θ^2) dθ + * + * k + * = - [ θ √(1 + θ^2) + log(θ + √(1 + θ^2)) ] - s_0. + * 2 + * + * We're assuming that the sectors are spaced out at a constant linear + * density along the spiral. We don't know the units for s, but there's some + * constant L such that A = s/L; so + * + * k + * A = --- [ θ √(1 + θ^2) + log(θ + √(1 + θ^2)) ] - A_0 + * 2 L + * + * for some suitable constant A_0. + * + * Finally, we're assuming that the disc is spinning with some approximately + * constant angular velocity ω, so θ = ω t, giving + * + * k + * A = --- [ ω t √(1 + ω^2 t^2) + log(ω + √(1 + ω^2 t^2)) ] + A_0 . + * 2 L + * + * We can calculate approximate values for k/(2 L) and A_0. As stated above, + * the track pitch is about 0.75 µm; our inside and outside diameters of + * 44 mm and 117 mm correspond to angles of 184306 and 490088 radians + * respectively. Feeding those into the above equation for s gives arc + * lengths of 16984492558 and 120093346360 respectively, in unknown units. + * The difference is 103108853802, which should correspond to 2495104 + * sectors, giving us 41324 arc-length units per sector. As a cross-check, + * the arc length corresponding to the inside diameter yields 411003 sectors, + * which is the same as we calculated above. This will be our A_0 + */ + +#ifdef unusef +static double archimedes_arclen(double t) + /* Given an angle T, return the arc length of the canonical + * archimedean spiral r = θ, from θ = 0 up to θ = T. + */ +{ + double u; + + u = sqrt(1 + t*t); + return (t*u + log(t + u))/2; +} +#endif + +static double inv_archimedes_arclen(double s) + /* Given an arc length S, return the angle T such that the arc length + * of the canonical archimedean spiral r = θ, from θ = 0 up to θ = T, + * is equal to S. + */ +{ + /* There is no closed-form solution, so we're going to invert the arc- + * length formula above numerically, using the Newton--Raphson method. + * + * Given an incorrect guess x_0 of a zero of some function f, we refine the + * guess by approximating f by its tangent at the point (x_0, f(x_0)). + * This will be a line with an equation like + * + * y = f'(x_0) x + c . + * + * We know that y = f(x_0) when x = x_0, so we can calculate + * + * c = f(x_0) - f'(x_0) x_0 . + * + * This will be zero when + * + * y = f'(x_0) x + f(x_0) - f'(x_0) x_0 = 0 + * + * hwnce + * + * f'(x_0) x_0) - f(x_0) f(x_0) + * x = --------------------- = x_0 - ------- . + * f'(x_0) f'(x_0) + */ + + double t, ss, u, e; + + /* We need to choose an initial estimate. This seems to work well in + * practice. + */ + t = 1.5*sqrt(s); + + for (;;) { + /* Compute s' = f(t). We open-code this calculation because the + * intermediate value √(1 + t^2) is also the gradient. + */ + u = sqrt(1 + t*t); + ss = (t*u + log(t + u))/2; + + /* Determine the error in f(t). We don't actually need much precision + * here, but 2 ulp seems achievable in practice with minimal cost: the + * usually sequence converges after only five iterations. + */ + e = fabs(s/ss - 1); + if (e <= 2*DBL_EPSILON) return (t); + + /* Not good enough. Refine the guess and go around again. */ + t -= (ss - s)/u; + } +} + +static double sectors_to_angle(secaddr base, secaddr low, secaddr high) + /* Return the angle, in radians, subtended by the range LOW up to + * HIGH of user sector addresses, given the physical sector address + * BASE of the first user-data sectors. + */ +{ +#define A0 411003.262489 +#define K 41324.4713654 + + return (inv_archimedes_arclen(K*(A0 + base + high)) - + inv_archimedes_arclen(K*(A0 + base + low))); + +#undef A0 +#undef K +} + +#define LAYER_LIMIT 2298496 /* maximum (user) sectors on layer */ +#define DVDROM_OFFSET 0x30000 /* usual initial physical sector */ + +enum { + FLAT, /* not actually a real DVD */ + SINGLE, /* disc with only one layer */ + PTP, /* two layers, parallel track path */ + OTP /* two layers, opposite track path */ +}; + +struct geometry { + unsigned shape; /* one of the four codes above */ + secaddr start0, start1; /* initial physical sector */ + secaddr midpoint; /* sector address of layer switch */ +}; + +#define GF_BLKDEV 1u +static void setup_geometry(struct geometry *g, int fd, unsigned f, + secaddr sz) + /* Initialize G with information about the disc structure. FD is a + * file descriptor for the device; SZ is the size of the disc in + * sectors. If `GF_BLKDEV' is clear in F then assume that FD refers + * to a regular file; G is populated with a `FLAT' performance model. + * If `GF_BLKDEV' is set, then FD refers to a block device, so try to + * retreive detailed structure information from the drive. + */ +{ +#ifdef __linux__ + dvd_struct ds; + const struct dvd_layer *ly; +#endif + secaddr t; + + if (!(f&GF_BLKDEV)) { + /* We're reading from a regular file. Assume that progress will be + * linear. + */ + + g->shape = FLAT; + g->midpoint = SECLIMIT; + return; + } + +#ifdef __linux__ + /* We have Linux and its DVD ioctl(2) calls. Interrogate the disc to + * discover its structure. + */ + + ds.type = DVD_STRUCT_PHYSICAL; + ds.physical.layer_num = 0; + if (ioctl(fd, DVD_READ_STRUCT, &ds)) { + moan_syserr(errno, "failed to read physical disc structure"); + goto guess_structure; + } + ly = &ds.physical.layer[0]; + switch (ly->nlayers) { + case 0: + g->shape = SINGLE; + g->start0 = g->start1 = 0; + g->midpoint = SECLIMIT; + break; + case 1: + g->start0 = ly->start_sector; + if (ly->track_path) { + g->shape = OTP; + g->start1 = 0; + g->midpoint = ly->end_sector_l0 - ly->start_sector + 1; + } else { + g->shape = PTP; + g->midpoint = ly->end_sector - ly->start_sector + 1; + ds.physical.layer_num = 1; + if (ioctl(fd, DVD_READ_STRUCT, &ds)) { + moan_syserr(errno, "failed to read layer 1 physical structure"); + goto guess_structure; + } + g->start1 = ly->start_sector; + } + break; + default: + moan("unexpected layer count %d", ly->nlayers + 1); + goto guess_structure; + } + return; +guess_structure: +#endif + + /* Either we don't have Linux, or we found something confusing. Let's try + * to guess at what's going on. + * + * If the volume size is small enough to fit on a single layer then assume + * that's what's happened; otherwise assume opposite track path with a cut + * at the midpoint, rounded up to an ECC block (16 sectors). + */ + g->start0 = DVDROM_OFFSET; g->start1 = 0; + if (sz <= LAYER_LIMIT) { + g->shape = SINGLE; + g->midpoint = SECLIMIT; + } else { + g->shape = OTP; + t = (sz + DVDROM_OFFSET)/2; + t += 15; t &= -16; + t -= DVDROM_OFFSET; + g->midpoint = t; + } +} + +static double linear_progress(const struct geometry *g, + secaddr a0, secaddr a1) + /* Convert the sector range from A0 to A1 into a progress measurement + * which is, by intention, approximately linearly related to time, + * given a geometry description G. + */ +{ + double theta = 0.0; + + switch (g->shape) { + case FLAT: + theta = a1 - a0; + break; + case SINGLE: + theta = sectors_to_angle(g->start0, a0, a1); + break; + case PTP: + if (a0 < g->midpoint) + theta += sectors_to_angle(g->start0, + a0, a1 < g->midpoint ? a1 : g->midpoint); + if (a1 > g->midpoint) + theta += sectors_to_angle(g->start1, + a0 > g->midpoint ? a0 : g->midpoint, a1); + break; + case OTP: + if (a0 < g->midpoint) + theta += sectors_to_angle(g->start0, + a0, a1 < g->midpoint ? a1 : g->midpoint); + if (a1 > g->midpoint) + theta += sectors_to_angle(g->start0, + 2*g->midpoint - a1, + a0 > g->midpoint ? + 2*g->midpoint - a0 : g->midpoint); + break; + default: + abort(); + } + return (theta); +} + /*----- Common variables used by the copying machinery --------------------*/ /* General reading state. */ @@ -563,9 +863,11 @@ static unsigned retry, max_retries = 4; /* retry state */ /*----- Progress reporting ------------------------------------------------*/ static secaddr nsectors, ndone; /* number of sectors done/to do */ +static double total_linear, done_linear; /* linear progress tracking */ static secaddr last_pos; /* position last time we updated */ static struct timeval last_time; /* time last time we updated */ -static struct avg avg_rate = AVG_INIT; +static struct geometry geom; /* disc geometry for progress */ +static struct avg avg_rate = AVG_INIT, avg_linear = AVG_INIT; static int bad_err; /* most recent error code */ static FILE *progressfp; /* file on which to trace progress data */ @@ -614,16 +916,18 @@ static void render_perfstats(struct progress_render_state *render) { int eta; char timebuf[TIMESTRMAX]; - double rate; + double rate, linrate; const char *unit; /* If there's no average computed yet, then use some placeholder values. */ - rate = current_avg(&avg_rate); eta = (int)((nsectors - ndone)/rate + 0.5); + rate = current_avg(&avg_rate); + linrate = current_avg(&avg_linear); + eta = (int)((total_linear - done_linear)/linrate + 0.5); /* Write out the statistics. */ rate = scale_bytes(rate*SECTORSZ, &unit); progress_putright(render, "ETA %s ", - avg_rate.avg ? fmttime(eta, timebuf) : "???"); + avg_linear.avg ? fmttime(eta, timebuf) : "???"); progress_putright(render, "%.1f %sB/s, ", rate, unit); } @@ -704,7 +1008,7 @@ static void update_progress(secaddr pos) */ { struct timeval now; - double t; + double t, delta_r; /* Find the current time and the delta since the last time we updated. * This will be the length of the current interval. @@ -719,16 +1023,19 @@ static void update_progress(secaddr pos) * interval. */ + delta_r = linear_progress(&geom, last_pos, pos); update_avg(&avg_rate, t, pos - last_pos); - ndone += pos - last_pos; last_time = now; last_pos = pos; + update_avg(&avg_linear, t, delta_r); + ndone += pos - last_pos; done_linear += delta_r; + last_time = now; last_pos = pos; } /* Trace progress state if requested. */ if (progressfp) { - fprintf(progressfp, "%10ju.%06ld %"PRIuSEC" %f\n", + fprintf(progressfp, "%10ju.%06ld %"PRIuSEC" %f %f\n", (uintmax_t)now.tv_sec, now.tv_usec, - ndone, - (nsectors - ndone)/current_avg(&avg_rate)); + ndone, done_linear, + (total_linear - done_linear)/current_avg(&avg_linear)); check_write(progressfp, "progress trace file"); } @@ -1837,6 +2144,7 @@ int main(int argc, char *argv[]) #define f_checkid 16u #define f_retry 32u #define f_write 256u +#define f_file 512u set_prog(argv[0]); @@ -2088,13 +2396,44 @@ int main(int argc, char *argv[]) if (open_dvd(device, O_RDONLY, &dvdfd, &dvd)) exit(2); /* Determine the size of the input device and check the sector size. */ - blksz = SECTORSZ; volsz = device_size(dvdfd, device, &blksz); - if (blksz != SECTORSZ) + blksz = -1; volsz = device_size(dvdfd, device, &blksz); + if (blksz == -1) + { blksz = SECTORSZ; f |= f_file; } + else if (blksz != SECTORSZ) bail("device `%s' block size %d /= %d", device, blksz, SECTORSZ); if (volsz%SECTORSZ) bail("device `%s' volume size %"PRIu64" not a multiple of %d", device, volsz, SECTORSZ); + setup_geometry(&geom, dvdfd, f&f_file ? 0 : GF_BLKDEV, volsz/blksz); + + if (progressfp) { + switch (geom.shape) { + case FLAT: + fprintf(progressfp, ":model flat-model\n"); + break; + case SINGLE: + fprintf(progressfp, ":model single-layer-model :start %"PRIuSEC"\n", + geom.start0); + break; + case PTP: + fprintf(progressfp, + ":model parallel-track-path-model " + ":start0 %"PRIuSEC" :start1 %"PRIuSEC" " + ":midpoint %"PRIuSEC"\n", + geom.start0, geom.start1, geom.midpoint); + break; + case OTP: + fprintf(progressfp, + ":model opposite-track-path-model " + ":start %"PRIuSEC" :midpoint %"PRIuSEC"\n", + geom.start0, geom.midpoint); + break; + default: + abort(); + } + } + /* Maybe check that we're copying from the right disc. This is intended to * help avoid image corruption by from the wrong disc, but it obviously * only works if the output file is mostly there. @@ -2242,7 +2581,11 @@ int main(int argc, char *argv[]) /* If we're supposed to stop writing here, then add the size of the * most recent range onto our running total. */ - if (ev->ev == EV_STOP) { nsectors += ev->pos - start; f &= ~f_write; } + if (ev->ev == EV_STOP) { + nsectors += ev->pos - start; + total_linear += linear_progress(&geom, start, ev->pos); + f &= ~f_write; + } /* If we're fixing up images affected by the old early-stop bug, then * remember this position. @@ -2276,6 +2619,7 @@ int main(int argc, char *argv[]) */ if (f&f_write) { nsectors += limit - start; + total_linear += linear_progress(&geom, start, limit); put_event(EV_STOP, 0, limit); } #ifdef DEBUG @@ -2292,8 +2636,11 @@ int main(int argc, char *argv[]) */ copy_progress.render = render_copy_progress; progress_additem(&progress, ©_progress); - if (nsectors == limit - start) - { ndone = start; nsectors = limit; } + if (nsectors == limit - start) { + ndone = start; nsectors = limit; + done_linear = linear_progress(&geom, 0, start); + total_linear += done_linear; + } else { disc_progress.render = render_disc_progress; progress_additem(&progress, &disc_progress); -- 2.11.0