Increase the size of the 'message' buffer, which is currently

[sgt/tweak] / main.c
diff --git a/main.c b/main.c

index 9875d5a..9478f9e 100644 (file)
--- a/main.c
+++ b/main.c
@@ -1,9 +1,8 @@
  /*
- * TODO possibly after that:
+ * Potential future TODO items. Points marked ISSUE need to be
+ * resolved one way or another, with good justification for the
+ * decision made, before implementation begins.
   * 
- *  - Need to handle >2Gb files! Up the `filesize' type to long
- *    long, and use it everywhere (not just in buffer.c).
- *
   *  - Multiple buffers, multiple on-screen windows.
   *     + ^X^F to open new file
   *     + ^X^R to open new file RO
@@ -18,17 +17,16 @@
   *     + hex-editor-style minibuffer for entering search terms,
   *      rather than the current rather crap one; in particular
   *      this enables pasting into the search string.
- *     + er, how exactly do we deal with the problem of saving over
- *      a file which we're maintaining references to in another
- *      buffer? The _current_ buffer can at least be sorted out by
- *      replacing it with a fresh tree containing a single
- *      file-data block, but other buffers are in trouble.
- *       * if we can rely on Unix fd semantics, this isn't too
- *         bad; we can just keep the fd open on the original file,
- *         and then the data stays around even after we rename(2)
- *         our new version over the top. Disk space usage gets
- *         silly after a few iterations, but it's better than
- *         nothing.
+ *     + ISSUE: how exactly do we deal with the problem of saving
+ *      over a file which we're maintaining references to in
+ *      another buffer? The _current_ buffer can at least be
+ *      sorted out by replacing it with a fresh tree containing a
+ *      single file-data block, but other buffers are in trouble.
+ *       * if we can rely on Unix fd semantics, one option is just
+ *         to keep the fd open on the original file, and then the
+ *         data stays around even after we rename(2) our new
+ *         version over the top. Disk space usage gets silly after
+ *         a few iterations, but it's better than nothing.
   * 
   *  - Undo!
   *     + this actually doesn't seem _too_ horrid. For a start, one
@@ -47,36 +45,129 @@
   *      shouldn't be one. Sort that out.
   * 
   *  - In-place editing.
- *     + this is an extra option when running in Fix mode. It
- *      causes a change of semantics when saving: instead of
- *      constructing a new backup file and writing it over the old
- *      one, we simply seek within the original file and write out
- *      all the pieces that have changed.
- *     + Primarily useful for editing disk devices directly
- *      (yikes!), or other situations where you actually cannot
- *      create a fresh copy of the file and rename(2) it into
- *      place.
- *     + I had intended to suggest that in Fix mode this would be
- *      nice and easy, since every element of the buffer tree is
- *      either a literal block (needs writing) or a from-file
- *      block denoting the same file _in the same position_.
- *      However, this is not in fact the case because you can cut
- *      and paste, so it's not that easy.
- *     + So I'm forced to the conclusion that when operating in
- *      this mode, it becomes illegal to cut and paste from-file
- *      blocks: they must be loaded in full at some point.
- *       * Thinking ahead about multiple-buffer operation: it
- *         would be a bad idea to keep a from-file block
- *         referencing /dev/hda and paste it into another ordinary
- *         buffer. But _also_ it would be a bad idea to paste a
- *         from-file block referencing a file stored _on_ /dev/hda
- *         into the in-place buffer dealing with /dev/hda itself.
- *       * So I'm forced to another odd conclusion, which is that
- *         from-file blocks must be eliminated in _two_ places:
- *         when copying a cut buffer _from_ an in-place buffer,
- *         _and_ when pasting a cut buffer _into_ one.
+ *     + this is an extra option useful for editing disk devices
+ *      directly (!), or other situation in which it's impossible
+ *      or impractical to rename(2) your new file over the old
+ *      one. It causes a change of semantics when saving: instead
+ *      of constructing a new backup file and writing it over the
+ *      old one, we simply seek within the original file and write
+ *      out all the pieces that have changed.
+ *     + Saving the file involves identifying the bits of the file
+ *      that need to change, and changing them. A piece of file
+ *      can be discarded as `no change required' if it's
+ *      represented in the buffer by a from-file block whose file
+ *      offset is equal to its offset in the buffer.
+ *       * Once we have identified all the bits that do need to
+ *         change, we have to draw up a dependency graph to
+ *         indicate which bits want to be copied from which other
+ *         bits. (You don't want to overwrite a piece of file if
+ *         you still have from-file blocks pointing at that
+ *         piece.) This is a directed graph with nodes
+ *         corresponding to intervals of the file, and edges
+ *         indicating that the source node's interval is intended
+ *         to end up containing the data from the target node's
+ *         interval in the original file. Another node type is
+ *         `literal data', which can be the target of an edge but
+ *         never the source.
+ *          - note that this means any two nodes connected by an
+ *            edge must represent intervals of the same length.
+ *            Sometimes this means that an interval must be split
+ *            into pieces even though it is represented in the
+ *            buffer by a single large from-file block (if
+ *            from-file blocks copying _from_ it don't cover the
+ *            whole of it). I suspect the simplest approach here
+ *            is just to start by making a B-tree of division
+ *            points in the file: every from-file block adds four
+ *            division points (for start and end of both source
+ *            and dest interval), and once the tree is complete,
+ *            each graph node represents the interval between two
+ *            adjacent division points.
+ *          - ISSUE: actually, that strategy is inadequate:
+ *            consider a large from-file block displaced by only
+ *            one byte from its source location. The above
+ *            strategy gives division points at x, x+1, x+y,
+ *            x+y+1, but the interval [x,x+1] actually wants to
+ *            point to [x+1,x+2] and we don't have a division
+ *            point for that. Worse still, finding a way to add
+ *            the remaining division points is also undesirable
+ *            because there'd be so many of them. Needs design
+ *            changes.
+ *       * Then, any node which is not the target of any edge
+ *         represents a piece of file which it's safe to write
+ *         over, so we do so and throw away the node.
+ *       * If we run out of such nodes and the graph is still
+ *         non-empty, it's because all remaining nodes are part of
+ *         loops. A loop must represent a set of disjoint
+ *         intervals in the file, all the same length, which need
+ *         to be permuted cyclically. So we deal with such a loop
+ *         by reading a chunk of data from the start of one of the
+ *         intervals and holding it, then copying from the next
+ *         interval to that one, and so on until we've gone round
+ *         the loop.
+ *          + the intervals in the loop might be far too big to
+ *            hold an entire interval's worth of real data in
+ *            memory, so we might have to do it piecewise.
+ *     + ISSUE: I wonder if a warning of some sort might be in
+ *      order for if you accidentally request most of the file be
+ *      moved about. This sort of trickery is really intended for
+ *      small changes to a large file; if you (say) enable insert
+ *      mode while editing a hard disk and accidentally leave
+ *      everything one byte further up, you _really_ don't want to
+ *      hit Save. The semantics of the warning are difficult,
+ *      though.
+ *
+ *  - Custom display and/or input formats?
+ *     + for example, Zap on RISC OS is able to display a binary
+ *      file at 4 bytes per line and show the ARM disassembly of
+ *      each word. For added credit, ability to type an ARM
+ *      instruction back _in_ and have it reassembled into binary
+ *      would be even better.
+ *     + a simpler example is that sometimes you want to view a
+ *      file as a sequence of little-endian 32-bit words rather
+ *      than single bytes.
+ *     + this would have to involve some sort of scripting or
+ *      internal API. I'd really rather the interface was nailed
+ *      down very early on and people were then free to develop
+ *      custom formats without my involvement; I might be
+ *      persuaded to keep a library of them or a list of
+ *      hyperlinks or something, but actually _maintaining_ them
+ *      is more effort than I want.
+ *     + ARM assembler is all very well, but what about x86, with
+ *      its variable instruction length? You can start
+ *      disassembling from any byte position and work forwards
+ *      unambiguously, but going backwards or jumping to an
+ *      arbitrary byte position is much harder. You might have to
+ *      shift your current file view back or forward by one byte
+ *      to resynchronise, and the semantics of insert mode become
+ *      generally confused, and even trying to _predict_ what a
+ *      sensible synchronisation point would be when jumping to a
+ *      bit of the file you've never seen before ... yuck.
+ *       * The key thing that makes this horrid is that the custom
+ *         display mode looks at the file _contents_, not merely
+ *         its length, when deciding how many bytes per line to
+ *         display. File-position-dependent number of bytes per
+ *         line is fine, but _data_ dependency is doom.
+ *       * So I think that in the interests of not causing tension
+ *         between random things people would like in _some_ hex
+ *         editor and what makes Tweak Tweak, I am going to put my
+ *         foot down and say that I will not implement any
+ *         mechanism which permits a data-dependent number of
+ *         bytes per line. Anything short of that, fine, send me a
+ *         patch or a detailed and well thought out design and
+ *         I'll consider it on its merits.
+ *       * I don't, OTOH, see any reason why a custom display
+ *         function couldn't be permitted to see data before or
+ *         after the current lineful if it wanted to. So x86
+ *         disassembly could be done in a one-byte-per-line sort
+ *         of fashion in which each line shows the machine
+ *         instruction which the CPU would see if it started
+ *         executing at that byte, and also gave its length. Then
+ *         you could pick out the sequence of instructions you
+ *         were interested in from the various out-of-sync ones.
   */
  
+#include "tweak.h"
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
@@ -92,8 +183,6 @@
  #include <process.h>
  #endif
  
-#include "tweak.h"
-
  static void init(void);
  static void done(void);
  static void load_file (char *);
@@ -101,10 +190,10 @@ static void load_file (char *);
  char toprint[256];                    /* LUT: printable versions of chars */
  char hex[256][3];                     /* LUT: binary to hex, 1 byte */
  
-char message[80];
+char message[512];
  
-char decstatus[] = "%s TWEAK "VER": %-18.18s %s posn=%-10d size=%-10d";
-char hexstatus[] = "%s TWEAK "VER": %-18.18s %s posn=0x%-8X size=0x%-8X";
+char decstatus[] = "%s TWEAK "VER": %-18.18s %s posn=%-10"OFF"d size=%-10"OFF"d";
+char hexstatus[] = "%s TWEAK "VER": %-18.18s %s posn=0x%-8"OFF"X size=0x%-8"OFF"X";
  char *statfmt = hexstatus;
  
  char last_char;
@@ -121,12 +210,12 @@ int marking = FALSE;
  int modified = FALSE;
  int new_file = FALSE;                 /* shouldn't need initialisation -
                                         * but let's not take chances :-) */
-int width = 16;
-int realoffset = 0, offset = 16;
+fileoffset_t width = 16;
+fileoffset_t realoffset = 0, offset = 16;
  
  int ascii_enabled = TRUE;
  
-long file_size = 0, top_pos = 0, cur_pos = 0, mark_point = 0;
+fileoffset_t file_size = 0, top_pos = 0, cur_pos = 0, mark_point = 0;
  
  int scrlines;
  
@@ -134,7 +223,7 @@ int scrlines;
   * Main program
   */
  int main(int argc, char **argv) {
-    int newoffset = -1, newwidth = -1;
+    fileoffset_t newoffset = -1, newwidth = -1;
  
      /*
       * Parse command line arguments
@@ -170,10 +259,10 @@ int main(int argc, char **argv) {
                 }
                 switch (c) {
                   case 'o': case 'O':
-                   newoffset = strtol(value, NULL, 0);   /* allow `0xXX' */
+                   newoffset = parse_num(value, NULL);
                     break;
                   case 'w': case 'W':
-                   newwidth = strtol(value, NULL, 0);
+                   newwidth = parse_num(value, NULL);
                     break;
                 }
                 break;
@@ -229,7 +318,7 @@ int main(int argc, char **argv) {
  void fix_offset(void) {
      if (3*width+11 > display_cols) {
         width = (display_cols-11) / 3;
-       sprintf (message, "Width reduced to %d to fit on the screen", width);
+       sprintf (message, "Width reduced to %"OFF"d to fit on the screen", width);
      }
      if (4*width+14 > display_cols) {
         ascii_enabled = FALSE;
@@ -279,7 +368,7 @@ static void load_file (char *fname) {
      file_size = 0;
      if ( (fp = fopen (fname, "rb")) ) {
         if (eager_mode) {
-           long len;
+           size_t len;
             static char buffer[4096];
  
             filedata = buf_new_empty();
@@ -295,12 +384,12 @@ static void load_file (char *fname) {
             }
             fclose (fp);
             assert(file_size == buf_length(filedata));
-           sprintf(message, "loaded %s (size %ld == 0x%lX).",
+           sprintf(message, "loaded %s (size %"OFF"d == 0x%"OFF"X).",
                     fname, file_size, file_size);
         } else {
             filedata = buf_new_from_file(fp);
             file_size = buf_length(filedata);
-           sprintf(message, "opened %s (size %ld == 0x%lX).",
+           sprintf(message, "opened %s (size %"OFF"d == 0x%"OFF"X).",
                     fname, file_size, file_size);
         }
         new_file = FALSE;
@@ -321,7 +410,7 @@ static void load_file (char *fname) {
   */
  int save_file (void) {
      FILE *fp;
-    long pos = 0;
+    fileoffset_t pos = 0;
  
      if (look_mode)
         return FALSE;                  /* do nothing! */
@@ -330,7 +419,7 @@ int save_file (void) {
         static char buffer[SAVE_BLKSIZ];
  
         while (pos < file_size) {
-           long size = file_size - pos;
+           fileoffset_t size = file_size - pos;
             if (size > SAVE_BLKSIZ)
                 size = SAVE_BLKSIZ;
  
@@ -380,30 +469,30 @@ int backup_file (void) {
  }
  
  static unsigned char *scrbuf = NULL;
-static int scrbuflines = 0;
+static int scrbufsize = 0;
  
  /*
   * Draw the screen, for normal usage.
   */
  void draw_scr (void) {
      int scrsize, scroff, llen, i, j;
-    long currpos;
-    int marktop, markbot, mark;
+    fileoffset_t currpos;
+    fileoffset_t marktop, markbot;
+    int mark;
      char *p;
      unsigned char c, *q;
      char *linebuf;
  
      scrlines = display_rows - 2;
-    if (scrlines > scrbuflines) {
-       scrbuf = (scrbuf ?
-                 realloc(scrbuf, scrlines*width) :
-                 malloc(scrlines*width));
+    scrsize = scrlines * width;
+    if (scrsize > scrbufsize) {
+       scrbuf = (scrbuf ? realloc(scrbuf, scrsize) : malloc(scrsize));
         if (!scrbuf) {
             done();
             fprintf(stderr, "%s: out of memory!\n", pname);
             exit (2);
         }
-       scrbuflines = scrlines;
+       scrbufsize = scrsize;
      }
  
      linebuf = malloc(width*4+20);
@@ -419,7 +508,8 @@ void draw_scr (void) {
         scroff = width - offset;
      else
         scroff = 0;
-    scrsize = scrlines * width - scroff;
+
+    scrsize -= scroff;
      if (scrsize > file_size - top_pos)
         scrsize = file_size - top_pos;
  
@@ -476,9 +566,10 @@ void draw_scr (void) {
                  * requiring highlighting: a hex bit and an ascii
                  * bit.
                  */
-               int localstart= (currpos<marktop?marktop:currpos) - currpos;
-               int localstop = (currpos+llen>markbot ? markbot :
-                                currpos+llen) - currpos;
+               fileoffset_t localstart= (currpos<marktop ? marktop :
+                                          currpos) - currpos;
+               fileoffset_t localstop = (currpos+llen>markbot ? markbot :
+                                          currpos+llen) - currpos;
                 localstart += width-llen;
                 localstop += width-llen;
                 display_write_chars(linebuf, 11+3*localstart);
@@ -571,9 +662,9 @@ int get_str (char *prompt, char *buf, int highlight) {
                     p++;
                     if (p<r && *p == '\\')
                         p++, display_set_colour(COL_ESCAPE);
-                   else if (p>=r || !isxdigit (*p))
+                   else if (p>=r || !isxdigit ((unsigned char)*p))
                         display_set_colour(COL_INVALID);
-                   else if (p+1>=r || !isxdigit (p[1]))
+                   else if (p+1>=r || !isxdigit ((unsigned char)p[1]))
                         p++, display_set_colour(COL_INVALID);
                     else
                         p+=2, display_set_colour(COL_ESCAPE);
@@ -641,7 +732,8 @@ int parse_quoted (char *buffer) {
             p++;
             if (*p == '\\')
                 *q++ = *p++;
-           else if (p[1] && isxdigit(*p) && isxdigit(p[1])) {
+           else if (p[1] && isxdigit((unsigned char)*p) &&
+                    isxdigit((unsigned char)p[1])) {
                 char buf[3];
                 buf[0] = *p++;
                 buf[1] = *p++;
@@ -685,18 +777,18 @@ void schedule_update(void) {
         update_required = TRUE;
  }
  
-long parse_num (char *buffer, int *error) {
+fileoffset_t parse_num (char *buffer, int *error) {
      if (error)
         *error = FALSE;
      if (!buffer[strspn(buffer, "0123456789")]) {
         /* interpret as decimal */
-       return atoi(buffer);
+       return ATOOFF(buffer);
      } else if (buffer[0]=='0' && (buffer[1]=='X' || buffer[1]=='x') &&
                !buffer[2+strspn(buffer+2,"0123456789ABCDEFabcdef")]) {
-       return strtol(buffer+2, NULL, 16);
+       return STRTOOFF(buffer+2, NULL, 16);
      } else if (buffer[0]=='$' &&
                !buffer[1+strspn(buffer+1,"0123456789ABCDEFabcdef")]) {
-       return strtol(buffer+1, NULL, 16);
+       return STRTOOFF(buffer+1, NULL, 16);
      } else {
         return 0;
         if (error)