diff mbox

gas crash on unreasonably long lines

Message ID 9a2cae6b-0678-9731-68a1-3e9bd8cdb36c@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Nov. 2, 2016, 10:38 p.m. UTC
We encountered a gas segfault when assembling some C++ code that had a 
monstrous mangled name.  While I convinced the author to change their 
C++, it'd be nice if the assembler didn't blow up.

g++ was generating a mangled name of some 590K characters, and thus 
ending up with source lines of that order of magnitude.  However:

1) the buffer reading code presumed size_t, int and unsigned were all 
interchangeable types.  Fixed by using size_t consistently.

2) the 'buffer_length' global var was used for two different purposes:
   (a) holding the length of (approximately) half of the input buffer
   (b) indicating how many chars file_give_next_buffer would read.
But F_G_N_B (now?) always reads input_file_buffer_size() bytes (32K), 
and the former meaning is just crazy.  Fixed by having it hold the 
buffer length sans BEFORE and AFTER counts.

3) The buffer expansion code was just confused.  It would add another 
input buffer's worth of chars, but then double that.  This eventually 
led to the observed segfault when doubling 2GB to zero (remember, it was 
incorrectly using a plain int).  We were calling realloc every time 
round the 'read more chars' loop - even if it was already long enough. 
When the seg fault happened, the input line was 'only' around 500K. 
Fixed by using the usual doubling idiom, and only when there's fewer 
than I_F_B_S() bytes available.  So we call realloc much less often.

4) to top it all off, the code that scanned the just-read block looking 
for a newline, actually scanned the whole input buffer.  Even though a 
\n could only be in the newly read bit.  Fixed this O(N^2) behaviour by 
just scanning the newly read chars.

I attempted to reduce the 6GB testcase, but could only get it down to 
about 2GB.  I think this impractical for the testsuite.

ok?

nathan
-- 
Nathan Sidwell

Comments

Nick Clifton Nov. 3, 2016, 12:08 p.m. UTC | #1
Hi Nathan,

> ok?


Yes - please apply.

Cheers
  Nick
diff mbox

Patch

2016-10-25  Nathan Sidwell  <nathan@acm.org>

	gas/
	* input-scrub.c (partial_size): Make size_t.
	(buffer_length): Likewise.  Adjust meaning.
	(struct input_save): Adjust partial_size type.
	(input_scrub_reinit): New.
	(input_scrub_push, input_scrub_begin): Use it.
	(input_scrub_next_buffer): Fix buffer extension logic. Only scan
	newly read buffer for newline.

diff --git a/gas/input-scrub.c b/gas/input-scrub.c
index 1de5e03..f5f2af2 100644
--- a/gas/input-scrub.c
+++ b/gas/input-scrub.c
@@ -61,16 +61,17 @@ 
 
 static char *buffer_start;	/*->1st char of full buffer area.  */
 static char *partial_where;	/*->after last full line in buffer.  */
-static int partial_size;	/* >=0. Number of chars in partial line in buffer.  */
+static size_t partial_size;	/* >=0. Number of chars in partial line in buffer.  */
 
 /* Because we need AFTER_STRING just after last full line, it clobbers
    1st part of partial line. So we preserve 1st part of partial line
    here.  */
 static char save_source[AFTER_SIZE];
 
-/* What is the largest size buffer that input_file_give_next_buffer()
-   could return to us?  */
-static unsigned int buffer_length;
+/* The size of the input buffer we concatenate
+   input_file_give_next_buffer chunks into.  Excludes the BEFORE and
+   AFTER counts.  */
+static size_t buffer_length;
 
 /* The index into an sb structure we are reading from.  -1 if none.  */
 static size_t sb_index = -1;
@@ -107,7 +108,7 @@  static int logical_input_line;
 struct input_save {
   char *              buffer_start;
   char *              partial_where;
-  int                 partial_size;
+  size_t              partial_size;
   char                save_source[AFTER_SIZE];
   size_t              buffer_length;
   const char *              physical_input_file;
@@ -130,6 +131,20 @@  static char *input_scrub_pop (struct input_save *arg);
 
 static struct input_save *next_saved_file;
 
+/* Initialize input buffering.  */
+
+static void
+input_scrub_reinit (void)
+{
+  input_file_begin ();		/* Reinitialize! */
+  logical_input_line = -1;
+  logical_input_file = NULL;
+
+  buffer_length = input_file_buffer_size () * 2;
+  buffer_start = XNEWVEC (char, BEFORE_SIZE + AFTER_SIZE + 1 + buffer_length);
+  memcpy (buffer_start, BEFORE_STRING, (int) BEFORE_SIZE);
+}
+
 /* Push the state of input reading and scrubbing so that we can #include.
    The return value is a 'void *' (fudged for old compilers) to a save
    area, which can be restored by passing it to input_scrub_pop().  */
@@ -157,15 +172,9 @@  input_scrub_push (char *saved_position)
   saved->next_saved_file = next_saved_file;
   saved->input_file_save = input_file_push ();
 
-  input_file_begin ();		/* Reinitialize! */
-  logical_input_line = -1;
-  logical_input_file = NULL;
-  buffer_length = input_file_buffer_size ();
   sb_index = -1;
 
-  buffer_start = XNEWVEC (char, (BEFORE_SIZE + buffer_length
-				 + buffer_length + AFTER_SIZE + 1));
-  memcpy (buffer_start, BEFORE_STRING, (int) BEFORE_SIZE);
+  input_scrub_reinit ();
 
   return saved;
 }
@@ -204,19 +213,9 @@  input_scrub_begin (void)
   know (strlen (AFTER_STRING) == AFTER_SIZE
 	|| (AFTER_STRING[0] == '\0' && AFTER_SIZE == 1));
 
-  input_file_begin ();
-
-  buffer_length = input_file_buffer_size ();
-
-  buffer_start = XNEWVEC (char, (BEFORE_SIZE + buffer_length
-				 + buffer_length + AFTER_SIZE + 1));
-  memcpy (buffer_start, BEFORE_STRING, (int) BEFORE_SIZE);
-
-  /* Line number things.  */
-  logical_input_line = -1;
-  logical_input_file = NULL;
   physical_input_file = NULL;	/* No file read yet.  */
   next_saved_file = NULL;	/* At EOF, don't pop to any other file */
+  input_scrub_reinit ();
   do_scrub_begin (flag_m68k_mri);
 }
 
@@ -344,22 +343,21 @@  input_scrub_next_buffer (char **bufp)
 
   if (partial_size)
     {
-      memmove (buffer_start + BEFORE_SIZE, partial_where,
-	       (unsigned int) partial_size);
+      memmove (buffer_start + BEFORE_SIZE, partial_where, partial_size);
       memcpy (buffer_start + BEFORE_SIZE, save_source, AFTER_SIZE);
     }
 
   while (1)
     {
       char *p;
+      char *start = buffer_start + BEFORE_SIZE + partial_size;
 
       *bufp = buffer_start + BEFORE_SIZE;
-      limit = input_file_give_next_buffer (buffer_start
-					   + BEFORE_SIZE
-					   + partial_size);
+      limit = input_file_give_next_buffer (start);
       if (!limit)
 	{
-	  if (partial_size == 0)
+	  if (!partial_size)
+	    /* End of this file.  */
 	    break;
 
 	  as_warn (_("end of file not at end of a line; newline inserted"));
@@ -374,25 +372,32 @@  input_scrub_next_buffer (char **bufp)
 
 	  /* Find last newline.  */
 	  for (p = limit - 1; *p != '\n' || TC_EOL_IN_INSN (p); --p)
-	    ;
+	    if (p < start)
+	      goto read_more;
 	  ++p;
 	}
 
-      if (p != buffer_start + BEFORE_SIZE)
-	{
-	  partial_where = p;
-	  partial_size = limit - p;
-	  memcpy (save_source, partial_where, (int) AFTER_SIZE);
-	  memcpy (partial_where, AFTER_STRING, (int) AFTER_SIZE);
-	  return partial_where;
-	}
+      /* We found a newline in the newly read chars.  */
+      partial_where = p;
+      partial_size = limit - p;
+
+      /* Save the fragment after that last newline.  */
+      memcpy (save_source, partial_where, (int) AFTER_SIZE);
+      memcpy (partial_where, AFTER_STRING, (int) AFTER_SIZE);
+      return partial_where;
 
+    read_more:
+      /* Didn't find a newline.  Read more text.  */
       partial_size = limit - (buffer_start + BEFORE_SIZE);
-      buffer_length += input_file_buffer_size ();
-      buffer_start = XRESIZEVEC (char, buffer_start,
-				 (BEFORE_SIZE
-				  + 2 * buffer_length
-				  + AFTER_SIZE + 1));
+      if (buffer_length - input_file_buffer_size () < partial_size)
+	{
+	  /* Increase the buffer when it doesn't have room for the
+	     next block of input.  */
+	  buffer_length *= 2;
+	  buffer_start = XRESIZEVEC (char, buffer_start,
+				     (buffer_length
+				      + BEFORE_SIZE + AFTER_SIZE + 1));
+	}
     }
 
   /* Tell the listing we've finished the file.  */