X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/tweak/blobdiff_plain/a63728c704b14d40a3229f66dbf0088fbe5d95af..HEAD:/main.c

diff --git a/main.c b/main.c
index 2e7d179..9478f9e 100644
--- a/main.c
+++ b/main.c
@@ -1,9 +1,8 @@
 /*
- * TODO possibly after that:
+ * Potential future TODO items. Points marked ISSUE need to be
+ * resolved one way or another, with good justification for the
+ * decision made, before implementation begins.
  * 
- *  - Need to handle >2Gb files! Up the `filesize' type to long
- *    long, and use it everywhere (not just in buffer.c).
- *
  *  - Multiple buffers, multiple on-screen windows.
  *     + ^X^F to open new file
  *     + ^X^R to open new file RO
@@ -18,17 +17,16 @@
  *     + hex-editor-style minibuffer for entering search terms,
  * 	 rather than the current rather crap one; in particular
  * 	 this enables pasting into the search string.
- *     + er, how exactly do we deal with the problem of saving over
- * 	 a file which we're maintaining references to in another
- * 	 buffer? The _current_ buffer can at least be sorted out by
- * 	 replacing it with a fresh tree containing a single
- * 	 file-data block, but other buffers are in trouble.
- * 	  * if we can rely on Unix fd semantics, this isn't too
- * 	    bad; we can just keep the fd open on the original file,
- * 	    and then the data stays around even after we rename(2)
- * 	    our new version over the top. Disk space usage gets
- * 	    silly after a few iterations, but it's better than
- * 	    nothing.
+ *     + ISSUE: how exactly do we deal with the problem of saving
+ * 	 over a file which we're maintaining references to in
+ * 	 another buffer? The _current_ buffer can at least be
+ * 	 sorted out by replacing it with a fresh tree containing a
+ * 	 single file-data block, but other buffers are in trouble.
+ * 	  * if we can rely on Unix fd semantics, one option is just
+ * 	    to keep the fd open on the original file, and then the
+ * 	    data stays around even after we rename(2) our new
+ * 	    version over the top. Disk space usage gets silly after
+ * 	    a few iterations, but it's better than nothing.
  * 
  *  - Undo!
  *     + this actually doesn't seem _too_ horrid. For a start, one
@@ -47,36 +45,129 @@
  * 	 shouldn't be one. Sort that out.
  * 
  *  - In-place editing.
- *     + this is an extra option when running in Fix mode. It
- * 	 causes a change of semantics when saving: instead of
- * 	 constructing a new backup file and writing it over the old
- * 	 one, we simply seek within the original file and write out
- * 	 all the pieces that have changed.
- *     + Primarily useful for editing disk devices directly
- * 	 (yikes!), or other situations where you actually cannot
- * 	 create a fresh copy of the file and rename(2) it into
- * 	 place.
- *     + I had intended to suggest that in Fix mode this would be
- * 	 nice and easy, since every element of the buffer tree is
- * 	 either a literal block (needs writing) or a from-file
- * 	 block denoting the same file _in the same position_.
- * 	 However, this is not in fact the case because you can cut
- * 	 and paste, so it's not that easy.
- *     + So I'm forced to the conclusion that when operating in
- * 	 this mode, it becomes illegal to cut and paste from-file
- * 	 blocks: they must be loaded in full at some point.
- * 	  * Thinking ahead about multiple-buffer operation: it
- * 	    would be a bad idea to keep a from-file block
- * 	    referencing /dev/hda and paste it into another ordinary
- * 	    buffer. But _also_ it would be a bad idea to paste a
- * 	    from-file block referencing a file stored _on_ /dev/hda
- * 	    into the in-place buffer dealing with /dev/hda itself.
- * 	  * So I'm forced to another odd conclusion, which is that
- * 	    from-file blocks must be eliminated in _two_ places:
- * 	    when copying a cut buffer _from_ an in-place buffer,
- * 	    _and_ when pasting a cut buffer _into_ one.
+ *     + this is an extra option useful for editing disk devices
+ * 	 directly (!), or other situation in which it's impossible
+ * 	 or impractical to rename(2) your new file over the old
+ * 	 one. It causes a change of semantics when saving: instead
+ * 	 of constructing a new backup file and writing it over the
+ * 	 old one, we simply seek within the original file and write
+ * 	 out all the pieces that have changed.
+ *     + Saving the file involves identifying the bits of the file
+ * 	 that need to change, and changing them. A piece of file
+ * 	 can be discarded as `no change required' if it's
+ * 	 represented in the buffer by a from-file block whose file
+ * 	 offset is equal to its offset in the buffer.
+ * 	  * Once we have identified all the bits that do need to
+ * 	    change, we have to draw up a dependency graph to
+ * 	    indicate which bits want to be copied from which other
+ * 	    bits. (You don't want to overwrite a piece of file if
+ * 	    you still have from-file blocks pointing at that
+ * 	    piece.) This is a directed graph with nodes
+ * 	    corresponding to intervals of the file, and edges
+ * 	    indicating that the source node's interval is intended
+ * 	    to end up containing the data from the target node's
+ * 	    interval in the original file. Another node type is
+ * 	    `literal data', which can be the target of an edge but
+ * 	    never the source.
+ * 	     - note that this means any two nodes connected by an
+ * 	       edge must represent intervals of the same length.
+ * 	       Sometimes this means that an interval must be split
+ * 	       into pieces even though it is represented in the
+ * 	       buffer by a single large from-file block (if
+ * 	       from-file blocks copying _from_ it don't cover the
+ * 	       whole of it). I suspect the simplest approach here
+ * 	       is just to start by making a B-tree of division
+ * 	       points in the file: every from-file block adds four
+ * 	       division points (for start and end of both source
+ * 	       and dest interval), and once the tree is complete,
+ * 	       each graph node represents the interval between two
+ * 	       adjacent division points.
+ * 	     - ISSUE: actually, that strategy is inadequate:
+ * 	       consider a large from-file block displaced by only
+ * 	       one byte from its source location. The above
+ * 	       strategy gives division points at x, x+1, x+y,
+ * 	       x+y+1, but the interval [x,x+1] actually wants to
+ * 	       point to [x+1,x+2] and we don't have a division
+ * 	       point for that. Worse still, finding a way to add
+ * 	       the remaining division points is also undesirable
+ * 	       because there'd be so many of them. Needs design
+ * 	       changes.
+ * 	  * Then, any node which is not the target of any edge
+ * 	    represents a piece of file which it's safe to write
+ * 	    over, so we do so and throw away the node.
+ * 	  * If we run out of such nodes and the graph is still
+ * 	    non-empty, it's because all remaining nodes are part of
+ * 	    loops. A loop must represent a set of disjoint
+ * 	    intervals in the file, all the same length, which need
+ * 	    to be permuted cyclically. So we deal with such a loop
+ * 	    by reading a chunk of data from the start of one of the
+ * 	    intervals and holding it, then copying from the next
+ * 	    interval to that one, and so on until we've gone round
+ * 	    the loop.
+ * 	     + the intervals in the loop might be far too big to
+ * 	       hold an entire interval's worth of real data in
+ * 	       memory, so we might have to do it piecewise.
+ *     + ISSUE: I wonder if a warning of some sort might be in
+ * 	 order for if you accidentally request most of the file be
+ * 	 moved about. This sort of trickery is really intended for
+ * 	 small changes to a large file; if you (say) enable insert
+ * 	 mode while editing a hard disk and accidentally leave
+ * 	 everything one byte further up, you _really_ don't want to
+ * 	 hit Save. The semantics of the warning are difficult,
+ * 	 though.
+ *
+ *  - Custom display and/or input formats?
+ *     + for example, Zap on RISC OS is able to display a binary
+ * 	 file at 4 bytes per line and show the ARM disassembly of
+ * 	 each word. For added credit, ability to type an ARM
+ * 	 instruction back _in_ and have it reassembled into binary
+ * 	 would be even better.
+ *     + a simpler example is that sometimes you want to view a
+ * 	 file as a sequence of little-endian 32-bit words rather
+ * 	 than single bytes.
+ *     + this would have to involve some sort of scripting or
+ * 	 internal API. I'd really rather the interface was nailed
+ * 	 down very early on and people were then free to develop
+ * 	 custom formats without my involvement; I might be
+ * 	 persuaded to keep a library of them or a list of
+ * 	 hyperlinks or something, but actually _maintaining_ them
+ * 	 is more effort than I want.
+ *     + ARM assembler is all very well, but what about x86, with
+ * 	 its variable instruction length? You can start
+ * 	 disassembling from any byte position and work forwards
+ * 	 unambiguously, but going backwards or jumping to an
+ * 	 arbitrary byte position is much harder. You might have to
+ * 	 shift your current file view back or forward by one byte
+ * 	 to resynchronise, and the semantics of insert mode become
+ * 	 generally confused, and even trying to _predict_ what a
+ * 	 sensible synchronisation point would be when jumping to a
+ * 	 bit of the file you've never seen before ... yuck.
+ * 	  * The key thing that makes this horrid is that the custom
+ * 	    display mode looks at the file _contents_, not merely
+ * 	    its length, when deciding how many bytes per line to
+ * 	    display. File-position-dependent number of bytes per
+ * 	    line is fine, but _data_ dependency is doom.
+ * 	  * So I think that in the interests of not causing tension
+ * 	    between random things people would like in _some_ hex
+ * 	    editor and what makes Tweak Tweak, I am going to put my
+ * 	    foot down and say that I will not implement any
+ * 	    mechanism which permits a data-dependent number of
+ * 	    bytes per line. Anything short of that, fine, send me a
+ * 	    patch or a detailed and well thought out design and
+ * 	    I'll consider it on its merits.
+ * 	  * I don't, OTOH, see any reason why a custom display
+ * 	    function couldn't be permitted to see data before or
+ * 	    after the current lineful if it wanted to. So x86
+ * 	    disassembly could be done in a one-byte-per-line sort
+ * 	    of fashion in which each line shows the machine
+ * 	    instruction which the CPU would see if it started
+ * 	    executing at that byte, and also gave its length. Then
+ * 	    you could pick out the sequence of instructions you
+ * 	    were interested in from the various out-of-sync ones.
  */
 
+#include "tweak.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -92,8 +183,6 @@
 #include <process.h>
 #endif
 
-#include "tweak.h"
-
 static void init(void);
 static void done(void);
 static void load_file (char *);
@@ -101,10 +190,10 @@ static void load_file (char *);
 char toprint[256];		       /* LUT: printable versions of chars */
 char hex[256][3];		       /* LUT: binary to hex, 1 byte */
 
-char message[80];
+char message[512];
 
-char decstatus[] = "%s TWEAK "VER": %-18.18s %s posn=%-10d size=%-10d";
-char hexstatus[] = "%s TWEAK "VER": %-18.18s %s posn=0x%-8X size=0x%-8X";
+char decstatus[] = "%s TWEAK "VER": %-18.18s %s posn=%-10"OFF"d size=%-10"OFF"d";
+char hexstatus[] = "%s TWEAK "VER": %-18.18s %s posn=0x%-8"OFF"X size=0x%-8"OFF"X";
 char *statfmt = hexstatus;
 
 char last_char;
@@ -121,12 +210,12 @@ int marking = FALSE;
 int modified = FALSE;
 int new_file = FALSE;		       /* shouldn't need initialisation -
 					* but let's not take chances :-) */
-int width = 16;
-int realoffset = 0, offset = 16;
+fileoffset_t width = 16;
+fileoffset_t realoffset = 0, offset = 16;
 
 int ascii_enabled = TRUE;
 
-long file_size = 0, top_pos = 0, cur_pos = 0, mark_point = 0;
+fileoffset_t file_size = 0, top_pos = 0, cur_pos = 0, mark_point = 0;
 
 int scrlines;
 
@@ -134,7 +223,7 @@ int scrlines;
  * Main program
  */
 int main(int argc, char **argv) {
-    int newoffset = -1, newwidth = -1;
+    fileoffset_t newoffset = -1, newwidth = -1;
 
     /*
      * Parse command line arguments
@@ -170,10 +259,10 @@ int main(int argc, char **argv) {
 		}
 		switch (c) {
 		  case 'o': case 'O':
-		    newoffset = strtol(value, NULL, 0);   /* allow `0xXX' */
+		    newoffset = parse_num(value, NULL);
 		    break;
 		  case 'w': case 'W':
-		    newwidth = strtol(value, NULL, 0);
+		    newwidth = parse_num(value, NULL);
 		    break;
 		}
 		break;
@@ -229,7 +318,7 @@ int main(int argc, char **argv) {
 void fix_offset(void) {
     if (3*width+11 > display_cols) {
 	width = (display_cols-11) / 3;
-	sprintf (message, "Width reduced to %d to fit on the screen", width);
+	sprintf (message, "Width reduced to %"OFF"d to fit on the screen", width);
     }
     if (4*width+14 > display_cols) {
 	ascii_enabled = FALSE;
@@ -251,11 +340,11 @@ static void init(void) {
 
     display_setup();
 
-    display_define_colour(COL_BUFFER, 7, 0);
-    display_define_colour(COL_SELECT, 0, 7);
-    display_define_colour(COL_STATUS, 11, 4);
-    display_define_colour(COL_ESCAPE, 9, 0);
-    display_define_colour(COL_INVALID, 11, 0);
+    display_define_colour(COL_BUFFER, -1, -1, FALSE);
+    display_define_colour(COL_SELECT, 0, 7, TRUE);
+    display_define_colour(COL_STATUS, 11, 4, TRUE);
+    display_define_colour(COL_ESCAPE, 9, 0, FALSE);
+    display_define_colour(COL_INVALID, 11, 0, FALSE);
 
     for (i=0; i<256; i++) {
 	sprintf(hex[i], "%02X", i);
@@ -279,7 +368,7 @@ static void load_file (char *fname) {
     file_size = 0;
     if ( (fp = fopen (fname, "rb")) ) {
 	if (eager_mode) {
-	    long len;
+	    size_t len;
 	    static char buffer[4096];
 
 	    filedata = buf_new_empty();
@@ -295,12 +384,12 @@ static void load_file (char *fname) {
 	    }
 	    fclose (fp);
 	    assert(file_size == buf_length(filedata));
-	    sprintf(message, "loaded %s (size %ld == 0x%lX).",
+	    sprintf(message, "loaded %s (size %"OFF"d == 0x%"OFF"X).",
 		    fname, file_size, file_size);
 	} else {
 	    filedata = buf_new_from_file(fp);
 	    file_size = buf_length(filedata);
-	    sprintf(message, "opened %s (size %ld == 0x%lX).",
+	    sprintf(message, "opened %s (size %"OFF"d == 0x%"OFF"X).",
 		    fname, file_size, file_size);
 	}
 	new_file = FALSE;
@@ -321,7 +410,7 @@ static void load_file (char *fname) {
  */
 int save_file (void) {
     FILE *fp;
-    long pos = 0;
+    fileoffset_t pos = 0;
 
     if (look_mode)
 	return FALSE;		       /* do nothing! */
@@ -330,7 +419,7 @@ int save_file (void) {
 	static char buffer[SAVE_BLKSIZ];
 
 	while (pos < file_size) {
-	    long size = file_size - pos;
+	    fileoffset_t size = file_size - pos;
 	    if (size > SAVE_BLKSIZ)
 		size = SAVE_BLKSIZ;
 
@@ -380,30 +469,30 @@ int backup_file (void) {
 }
 
 static unsigned char *scrbuf = NULL;
-static int scrbuflines = 0;
+static int scrbufsize = 0;
 
 /*
  * Draw the screen, for normal usage.
  */
 void draw_scr (void) {
     int scrsize, scroff, llen, i, j;
-    long currpos;
-    int marktop, markbot, mark;
+    fileoffset_t currpos;
+    fileoffset_t marktop, markbot;
+    int mark;
     char *p;
     unsigned char c, *q;
     char *linebuf;
 
     scrlines = display_rows - 2;
-    if (scrlines > scrbuflines) {
-	scrbuf = (scrbuf ?
-		  realloc(scrbuf, scrlines*width) :
-		  malloc(scrlines*width));
+    scrsize = scrlines * width;
+    if (scrsize > scrbufsize) {
+	scrbuf = (scrbuf ? realloc(scrbuf, scrsize) : malloc(scrsize));
 	if (!scrbuf) {
 	    done();
 	    fprintf(stderr, "%s: out of memory!\n", pname);
 	    exit (2);
 	}
-	scrbuflines = scrlines;
+	scrbufsize = scrsize;
     }
 
     linebuf = malloc(width*4+20);
@@ -419,7 +508,8 @@ void draw_scr (void) {
 	scroff = width - offset;
     else
 	scroff = 0;
-    scrsize = scrlines * width - scroff;
+
+    scrsize -= scroff;
     if (scrsize > file_size - top_pos)
 	scrsize = file_size - top_pos;
 
@@ -476,9 +566,10 @@ void draw_scr (void) {
 		 * requiring highlighting: a hex bit and an ascii
 		 * bit.
 		 */
-		int localstart= (currpos<marktop?marktop:currpos) - currpos;
-		int localstop = (currpos+llen>markbot ? markbot :
-				 currpos+llen) - currpos;
+		fileoffset_t localstart= (currpos<marktop ? marktop :
+                                          currpos) - currpos;
+		fileoffset_t localstop = (currpos+llen>markbot ? markbot :
+                                          currpos+llen) - currpos;
 		localstart += width-llen;
 		localstop += width-llen;
 		display_write_chars(linebuf, 11+3*localstart);
@@ -499,9 +590,11 @@ void draw_scr (void) {
 		    display_write_chars(linebuf+10+3*localstop,
 				       2+3*width-3*localstop);
 		}
-	    } else
+	    } else {
+                display_set_colour(COL_BUFFER);
 		display_write_chars(linebuf,
 				   ascii_enabled ? 13+4*width : 10+3*width);
+            }
 	}
 	currpos += (currpos ? width : offset);
 	display_clear_to_eol();
@@ -544,6 +637,9 @@ void draw_scr (void) {
     display_refresh ();
 }
 
+volatile int safe_update, update_required;
+void update (void);
+
 /*
  * Get a string, in the "minibuffer". Return TRUE on success, FALSE
  * on break. Possibly syntax-highlight the entered string for
@@ -566,9 +662,9 @@ int get_str (char *prompt, char *buf, int highlight) {
 		    p++;
 		    if (p<r && *p == '\\')
 			p++, display_set_colour(COL_ESCAPE);
-		    else if (p>=r || !isxdigit (*p))
+		    else if (p>=r || !isxdigit ((unsigned char)*p))
 			display_set_colour(COL_INVALID);
-		    else if (p+1>=r || !isxdigit (p[1]))
+		    else if (p+1>=r || !isxdigit ((unsigned char)p[1]))
 			p++, display_set_colour(COL_INVALID);
 		    else
 			p+=2, display_set_colour(COL_ESCAPE);
@@ -636,7 +732,8 @@ int parse_quoted (char *buffer) {
 	    p++;
 	    if (*p == '\\')
 		*q++ = *p++;
-	    else if (p[1] && isxdigit(*p) && isxdigit(p[1])) {
+	    else if (p[1] && isxdigit((unsigned char)*p) &&
+		     isxdigit((unsigned char)p[1])) {
 		char buf[3];
 		buf[0] = *p++;
 		buf[1] = *p++;
@@ -667,8 +764,6 @@ void suspend(void) {
 #endif
 }
 
-volatile int safe_update, update_required;
-
 void update (void) {
     display_recheck_size();
     fix_offset ();
@@ -682,18 +777,18 @@ void schedule_update(void) {
 	update_required = TRUE;
 }
 
-long parse_num (char *buffer, int *error) {
+fileoffset_t parse_num (char *buffer, int *error) {
     if (error)
 	*error = FALSE;
     if (!buffer[strspn(buffer, "0123456789")]) {
 	/* interpret as decimal */
-	return atoi(buffer);
+	return ATOOFF(buffer);
     } else if (buffer[0]=='0' && (buffer[1]=='X' || buffer[1]=='x') &&
 	       !buffer[2+strspn(buffer+2,"0123456789ABCDEFabcdef")]) {
-	return strtol(buffer+2, NULL, 16);
+	return STRTOOFF(buffer+2, NULL, 16);
     } else if (buffer[0]=='$' &&
 	       !buffer[1+strspn(buffer+1,"0123456789ABCDEFabcdef")]) {
-	return strtol(buffer+1, NULL, 16);
+	return STRTOOFF(buffer+1, NULL, 16);
     } else {
 	return 0;
 	if (error)