X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/tweak/blobdiff_plain/a63728c704b14d40a3229f66dbf0088fbe5d95af..HEAD:/main.c diff --git a/main.c b/main.c index 2e7d179..9478f9e 100644 --- a/main.c +++ b/main.c @@ -1,9 +1,8 @@ /* - * TODO possibly after that: + * Potential future TODO items. Points marked ISSUE need to be + * resolved one way or another, with good justification for the + * decision made, before implementation begins. * - * - Need to handle >2Gb files! Up the `filesize' type to long - * long, and use it everywhere (not just in buffer.c). - * * - Multiple buffers, multiple on-screen windows. * + ^X^F to open new file * + ^X^R to open new file RO @@ -18,17 +17,16 @@ * + hex-editor-style minibuffer for entering search terms, * rather than the current rather crap one; in particular * this enables pasting into the search string. - * + er, how exactly do we deal with the problem of saving over - * a file which we're maintaining references to in another - * buffer? The _current_ buffer can at least be sorted out by - * replacing it with a fresh tree containing a single - * file-data block, but other buffers are in trouble. - * * if we can rely on Unix fd semantics, this isn't too - * bad; we can just keep the fd open on the original file, - * and then the data stays around even after we rename(2) - * our new version over the top. Disk space usage gets - * silly after a few iterations, but it's better than - * nothing. + * + ISSUE: how exactly do we deal with the problem of saving + * over a file which we're maintaining references to in + * another buffer? The _current_ buffer can at least be + * sorted out by replacing it with a fresh tree containing a + * single file-data block, but other buffers are in trouble. + * * if we can rely on Unix fd semantics, one option is just + * to keep the fd open on the original file, and then the + * data stays around even after we rename(2) our new + * version over the top. Disk space usage gets silly after + * a few iterations, but it's better than nothing. * * - Undo! * + this actually doesn't seem _too_ horrid. For a start, one @@ -47,36 +45,129 @@ * shouldn't be one. Sort that out. * * - In-place editing. - * + this is an extra option when running in Fix mode. It - * causes a change of semantics when saving: instead of - * constructing a new backup file and writing it over the old - * one, we simply seek within the original file and write out - * all the pieces that have changed. - * + Primarily useful for editing disk devices directly - * (yikes!), or other situations where you actually cannot - * create a fresh copy of the file and rename(2) it into - * place. - * + I had intended to suggest that in Fix mode this would be - * nice and easy, since every element of the buffer tree is - * either a literal block (needs writing) or a from-file - * block denoting the same file _in the same position_. - * However, this is not in fact the case because you can cut - * and paste, so it's not that easy. - * + So I'm forced to the conclusion that when operating in - * this mode, it becomes illegal to cut and paste from-file - * blocks: they must be loaded in full at some point. - * * Thinking ahead about multiple-buffer operation: it - * would be a bad idea to keep a from-file block - * referencing /dev/hda and paste it into another ordinary - * buffer. But _also_ it would be a bad idea to paste a - * from-file block referencing a file stored _on_ /dev/hda - * into the in-place buffer dealing with /dev/hda itself. - * * So I'm forced to another odd conclusion, which is that - * from-file blocks must be eliminated in _two_ places: - * when copying a cut buffer _from_ an in-place buffer, - * _and_ when pasting a cut buffer _into_ one. + * + this is an extra option useful for editing disk devices + * directly (!), or other situation in which it's impossible + * or impractical to rename(2) your new file over the old + * one. It causes a change of semantics when saving: instead + * of constructing a new backup file and writing it over the + * old one, we simply seek within the original file and write + * out all the pieces that have changed. + * + Saving the file involves identifying the bits of the file + * that need to change, and changing them. A piece of file + * can be discarded as `no change required' if it's + * represented in the buffer by a from-file block whose file + * offset is equal to its offset in the buffer. + * * Once we have identified all the bits that do need to + * change, we have to draw up a dependency graph to + * indicate which bits want to be copied from which other + * bits. (You don't want to overwrite a piece of file if + * you still have from-file blocks pointing at that + * piece.) This is a directed graph with nodes + * corresponding to intervals of the file, and edges + * indicating that the source node's interval is intended + * to end up containing the data from the target node's + * interval in the original file. Another node type is + * `literal data', which can be the target of an edge but + * never the source. + * - note that this means any two nodes connected by an + * edge must represent intervals of the same length. + * Sometimes this means that an interval must be split + * into pieces even though it is represented in the + * buffer by a single large from-file block (if + * from-file blocks copying _from_ it don't cover the + * whole of it). I suspect the simplest approach here + * is just to start by making a B-tree of division + * points in the file: every from-file block adds four + * division points (for start and end of both source + * and dest interval), and once the tree is complete, + * each graph node represents the interval between two + * adjacent division points. + * - ISSUE: actually, that strategy is inadequate: + * consider a large from-file block displaced by only + * one byte from its source location. The above + * strategy gives division points at x, x+1, x+y, + * x+y+1, but the interval [x,x+1] actually wants to + * point to [x+1,x+2] and we don't have a division + * point for that. Worse still, finding a way to add + * the remaining division points is also undesirable + * because there'd be so many of them. Needs design + * changes. + * * Then, any node which is not the target of any edge + * represents a piece of file which it's safe to write + * over, so we do so and throw away the node. + * * If we run out of such nodes and the graph is still + * non-empty, it's because all remaining nodes are part of + * loops. A loop must represent a set of disjoint + * intervals in the file, all the same length, which need + * to be permuted cyclically. So we deal with such a loop + * by reading a chunk of data from the start of one of the + * intervals and holding it, then copying from the next + * interval to that one, and so on until we've gone round + * the loop. + * + the intervals in the loop might be far too big to + * hold an entire interval's worth of real data in + * memory, so we might have to do it piecewise. + * + ISSUE: I wonder if a warning of some sort might be in + * order for if you accidentally request most of the file be + * moved about. This sort of trickery is really intended for + * small changes to a large file; if you (say) enable insert + * mode while editing a hard disk and accidentally leave + * everything one byte further up, you _really_ don't want to + * hit Save. The semantics of the warning are difficult, + * though. + * + * - Custom display and/or input formats? + * + for example, Zap on RISC OS is able to display a binary + * file at 4 bytes per line and show the ARM disassembly of + * each word. For added credit, ability to type an ARM + * instruction back _in_ and have it reassembled into binary + * would be even better. + * + a simpler example is that sometimes you want to view a + * file as a sequence of little-endian 32-bit words rather + * than single bytes. + * + this would have to involve some sort of scripting or + * internal API. I'd really rather the interface was nailed + * down very early on and people were then free to develop + * custom formats without my involvement; I might be + * persuaded to keep a library of them or a list of + * hyperlinks or something, but actually _maintaining_ them + * is more effort than I want. + * + ARM assembler is all very well, but what about x86, with + * its variable instruction length? You can start + * disassembling from any byte position and work forwards + * unambiguously, but going backwards or jumping to an + * arbitrary byte position is much harder. You might have to + * shift your current file view back or forward by one byte + * to resynchronise, and the semantics of insert mode become + * generally confused, and even trying to _predict_ what a + * sensible synchronisation point would be when jumping to a + * bit of the file you've never seen before ... yuck. + * * The key thing that makes this horrid is that the custom + * display mode looks at the file _contents_, not merely + * its length, when deciding how many bytes per line to + * display. File-position-dependent number of bytes per + * line is fine, but _data_ dependency is doom. + * * So I think that in the interests of not causing tension + * between random things people would like in _some_ hex + * editor and what makes Tweak Tweak, I am going to put my + * foot down and say that I will not implement any + * mechanism which permits a data-dependent number of + * bytes per line. Anything short of that, fine, send me a + * patch or a detailed and well thought out design and + * I'll consider it on its merits. + * * I don't, OTOH, see any reason why a custom display + * function couldn't be permitted to see data before or + * after the current lineful if it wanted to. So x86 + * disassembly could be done in a one-byte-per-line sort + * of fashion in which each line shows the machine + * instruction which the CPU would see if it started + * executing at that byte, and also gave its length. Then + * you could pick out the sequence of instructions you + * were interested in from the various out-of-sync ones. */ +#include "tweak.h" + #include #include #include @@ -92,8 +183,6 @@ #include #endif -#include "tweak.h" - static void init(void); static void done(void); static void load_file (char *); @@ -101,10 +190,10 @@ static void load_file (char *); char toprint[256]; /* LUT: printable versions of chars */ char hex[256][3]; /* LUT: binary to hex, 1 byte */ -char message[80]; +char message[512]; -char decstatus[] = "%s TWEAK "VER": %-18.18s %s posn=%-10d size=%-10d"; -char hexstatus[] = "%s TWEAK "VER": %-18.18s %s posn=0x%-8X size=0x%-8X"; +char decstatus[] = "%s TWEAK "VER": %-18.18s %s posn=%-10"OFF"d size=%-10"OFF"d"; +char hexstatus[] = "%s TWEAK "VER": %-18.18s %s posn=0x%-8"OFF"X size=0x%-8"OFF"X"; char *statfmt = hexstatus; char last_char; @@ -121,12 +210,12 @@ int marking = FALSE; int modified = FALSE; int new_file = FALSE; /* shouldn't need initialisation - * but let's not take chances :-) */ -int width = 16; -int realoffset = 0, offset = 16; +fileoffset_t width = 16; +fileoffset_t realoffset = 0, offset = 16; int ascii_enabled = TRUE; -long file_size = 0, top_pos = 0, cur_pos = 0, mark_point = 0; +fileoffset_t file_size = 0, top_pos = 0, cur_pos = 0, mark_point = 0; int scrlines; @@ -134,7 +223,7 @@ int scrlines; * Main program */ int main(int argc, char **argv) { - int newoffset = -1, newwidth = -1; + fileoffset_t newoffset = -1, newwidth = -1; /* * Parse command line arguments @@ -170,10 +259,10 @@ int main(int argc, char **argv) { } switch (c) { case 'o': case 'O': - newoffset = strtol(value, NULL, 0); /* allow `0xXX' */ + newoffset = parse_num(value, NULL); break; case 'w': case 'W': - newwidth = strtol(value, NULL, 0); + newwidth = parse_num(value, NULL); break; } break; @@ -229,7 +318,7 @@ int main(int argc, char **argv) { void fix_offset(void) { if (3*width+11 > display_cols) { width = (display_cols-11) / 3; - sprintf (message, "Width reduced to %d to fit on the screen", width); + sprintf (message, "Width reduced to %"OFF"d to fit on the screen", width); } if (4*width+14 > display_cols) { ascii_enabled = FALSE; @@ -251,11 +340,11 @@ static void init(void) { display_setup(); - display_define_colour(COL_BUFFER, 7, 0); - display_define_colour(COL_SELECT, 0, 7); - display_define_colour(COL_STATUS, 11, 4); - display_define_colour(COL_ESCAPE, 9, 0); - display_define_colour(COL_INVALID, 11, 0); + display_define_colour(COL_BUFFER, -1, -1, FALSE); + display_define_colour(COL_SELECT, 0, 7, TRUE); + display_define_colour(COL_STATUS, 11, 4, TRUE); + display_define_colour(COL_ESCAPE, 9, 0, FALSE); + display_define_colour(COL_INVALID, 11, 0, FALSE); for (i=0; i<256; i++) { sprintf(hex[i], "%02X", i); @@ -279,7 +368,7 @@ static void load_file (char *fname) { file_size = 0; if ( (fp = fopen (fname, "rb")) ) { if (eager_mode) { - long len; + size_t len; static char buffer[4096]; filedata = buf_new_empty(); @@ -295,12 +384,12 @@ static void load_file (char *fname) { } fclose (fp); assert(file_size == buf_length(filedata)); - sprintf(message, "loaded %s (size %ld == 0x%lX).", + sprintf(message, "loaded %s (size %"OFF"d == 0x%"OFF"X).", fname, file_size, file_size); } else { filedata = buf_new_from_file(fp); file_size = buf_length(filedata); - sprintf(message, "opened %s (size %ld == 0x%lX).", + sprintf(message, "opened %s (size %"OFF"d == 0x%"OFF"X).", fname, file_size, file_size); } new_file = FALSE; @@ -321,7 +410,7 @@ static void load_file (char *fname) { */ int save_file (void) { FILE *fp; - long pos = 0; + fileoffset_t pos = 0; if (look_mode) return FALSE; /* do nothing! */ @@ -330,7 +419,7 @@ int save_file (void) { static char buffer[SAVE_BLKSIZ]; while (pos < file_size) { - long size = file_size - pos; + fileoffset_t size = file_size - pos; if (size > SAVE_BLKSIZ) size = SAVE_BLKSIZ; @@ -380,30 +469,30 @@ int backup_file (void) { } static unsigned char *scrbuf = NULL; -static int scrbuflines = 0; +static int scrbufsize = 0; /* * Draw the screen, for normal usage. */ void draw_scr (void) { int scrsize, scroff, llen, i, j; - long currpos; - int marktop, markbot, mark; + fileoffset_t currpos; + fileoffset_t marktop, markbot; + int mark; char *p; unsigned char c, *q; char *linebuf; scrlines = display_rows - 2; - if (scrlines > scrbuflines) { - scrbuf = (scrbuf ? - realloc(scrbuf, scrlines*width) : - malloc(scrlines*width)); + scrsize = scrlines * width; + if (scrsize > scrbufsize) { + scrbuf = (scrbuf ? realloc(scrbuf, scrsize) : malloc(scrsize)); if (!scrbuf) { done(); fprintf(stderr, "%s: out of memory!\n", pname); exit (2); } - scrbuflines = scrlines; + scrbufsize = scrsize; } linebuf = malloc(width*4+20); @@ -419,7 +508,8 @@ void draw_scr (void) { scroff = width - offset; else scroff = 0; - scrsize = scrlines * width - scroff; + + scrsize -= scroff; if (scrsize > file_size - top_pos) scrsize = file_size - top_pos; @@ -476,9 +566,10 @@ void draw_scr (void) { * requiring highlighting: a hex bit and an ascii * bit. */ - int localstart= (currposmarkbot ? markbot : - currpos+llen) - currpos; + fileoffset_t localstart= (currposmarkbot ? markbot : + currpos+llen) - currpos; localstart += width-llen; localstop += width-llen; display_write_chars(linebuf, 11+3*localstart); @@ -499,9 +590,11 @@ void draw_scr (void) { display_write_chars(linebuf+10+3*localstop, 2+3*width-3*localstop); } - } else + } else { + display_set_colour(COL_BUFFER); display_write_chars(linebuf, ascii_enabled ? 13+4*width : 10+3*width); + } } currpos += (currpos ? width : offset); display_clear_to_eol(); @@ -544,6 +637,9 @@ void draw_scr (void) { display_refresh (); } +volatile int safe_update, update_required; +void update (void); + /* * Get a string, in the "minibuffer". Return TRUE on success, FALSE * on break. Possibly syntax-highlight the entered string for @@ -566,9 +662,9 @@ int get_str (char *prompt, char *buf, int highlight) { p++; if (p=r || !isxdigit (*p)) + else if (p>=r || !isxdigit ((unsigned char)*p)) display_set_colour(COL_INVALID); - else if (p+1>=r || !isxdigit (p[1])) + else if (p+1>=r || !isxdigit ((unsigned char)p[1])) p++, display_set_colour(COL_INVALID); else p+=2, display_set_colour(COL_ESCAPE); @@ -636,7 +732,8 @@ int parse_quoted (char *buffer) { p++; if (*p == '\\') *q++ = *p++; - else if (p[1] && isxdigit(*p) && isxdigit(p[1])) { + else if (p[1] && isxdigit((unsigned char)*p) && + isxdigit((unsigned char)p[1])) { char buf[3]; buf[0] = *p++; buf[1] = *p++; @@ -667,8 +764,6 @@ void suspend(void) { #endif } -volatile int safe_update, update_required; - void update (void) { display_recheck_size(); fix_offset (); @@ -682,18 +777,18 @@ void schedule_update(void) { update_required = TRUE; } -long parse_num (char *buffer, int *error) { +fileoffset_t parse_num (char *buffer, int *error) { if (error) *error = FALSE; if (!buffer[strspn(buffer, "0123456789")]) { /* interpret as decimal */ - return atoi(buffer); + return ATOOFF(buffer); } else if (buffer[0]=='0' && (buffer[1]=='X' || buffer[1]=='x') && !buffer[2+strspn(buffer+2,"0123456789ABCDEFabcdef")]) { - return strtol(buffer+2, NULL, 16); + return STRTOOFF(buffer+2, NULL, 16); } else if (buffer[0]=='$' && !buffer[1+strspn(buffer+1,"0123456789ABCDEFabcdef")]) { - return strtol(buffer+1, NULL, 16); + return STRTOOFF(buffer+1, NULL, 16); } else { return 0; if (error)