Back to index

cell-binutils  2.17cvs20070401
strings.c
Go to the documentation of this file.
00001 /* strings -- print the strings of printable characters in files
00002    Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
00003    2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
00004 
00005    This program is free software; you can redistribute it and/or modify
00006    it under the terms of the GNU General Public License as published by
00007    the Free Software Foundation; either version 2, or (at your option)
00008    any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU General Public License
00016    along with this program; if not, write to the Free Software
00017    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
00018    02110-1301, USA.  */
00019 
00020 /* Usage: strings [options] file...
00021 
00022    Options:
00023    --all
00024    -a
00025    -          Do not scan only the initialized data section of object files.
00026 
00027    --print-file-name
00028    -f         Print the name of the file before each string.
00029 
00030    --bytes=min-len
00031    -n min-len
00032    -min-len   Print graphic char sequences, MIN-LEN or more bytes long,
00033               that are followed by a NUL or a newline.  Default is 4.
00034 
00035    --radix={o,x,d}
00036    -t {o,x,d} Print the offset within the file before each string,
00037               in octal/hex/decimal.
00038 
00039    -o         Like -to.  (Some other implementations have -o like -to,
00040               others like -td.  We chose one arbitrarily.)
00041 
00042    --encoding={s,S,b,l,B,L}
00043    -e {s,S,b,l,B,L}
00044               Select character encoding: 7-bit-character, 8-bit-character,
00045               bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
00046               littleendian 32-bit.
00047 
00048    --target=BFDNAME
00049    -T {bfdname}
00050               Specify a non-default object file format.
00051 
00052    --help
00053    -h         Print the usage message on the standard output.
00054 
00055    --version
00056    -v         Print the program version number.
00057 
00058    Written by Richard Stallman <rms@gnu.ai.mit.edu>
00059    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
00060 
00061 #ifdef HAVE_CONFIG_H
00062 #include "config.h"
00063 #endif
00064 #include "bfd.h"
00065 #include <stdio.h>
00066 #include "getopt.h"
00067 #include <errno.h>
00068 #include "bucomm.h"
00069 #include "libiberty.h"
00070 #include "safe-ctype.h"
00071 #include <sys/stat.h>
00072 
00073 /* Some platforms need to put stdin into binary mode, to read
00074     binary files.  */
00075 #ifdef HAVE_SETMODE
00076 #ifndef O_BINARY
00077 #ifdef _O_BINARY
00078 #define O_BINARY _O_BINARY
00079 #define setmode _setmode
00080 #else
00081 #define O_BINARY 0
00082 #endif
00083 #endif
00084 #if O_BINARY
00085 #include <io.h>
00086 #define SET_BINARY(f) do { if (!isatty (f)) setmode (f,O_BINARY); } while (0)
00087 #endif
00088 #endif
00089 
00090 #define STRING_ISGRAPHIC(c) \
00091       (   (c) >= 0 \
00092        && (c) <= 255 \
00093        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127)))
00094 
00095 #ifndef errno
00096 extern int errno;
00097 #endif
00098 
00099 /* The BFD section flags that identify an initialized data section.  */
00100 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
00101 
00102 #ifdef HAVE_FOPEN64
00103 typedef off64_t file_off;
00104 #define file_open(s,m) fopen64(s, m)
00105 #else
00106 typedef off_t file_off;
00107 #define file_open(s,m) fopen(s, m)
00108 #endif
00109 #ifdef HAVE_STAT64
00110 typedef struct stat64 statbuf;
00111 #define file_stat(f,s) stat64(f, s)
00112 #else
00113 typedef struct stat statbuf;
00114 #define file_stat(f,s) stat(f, s)
00115 #endif
00116 
00117 /* Radix for printing addresses (must be 8, 10 or 16).  */
00118 static int address_radix;
00119 
00120 /* Minimum length of sequence of graphic chars to trigger output.  */
00121 static int string_min;
00122 
00123 /* TRUE means print address within file for each string.  */
00124 static bfd_boolean print_addresses;
00125 
00126 /* TRUE means print filename for each string.  */
00127 static bfd_boolean print_filenames;
00128 
00129 /* TRUE means for object files scan only the data section.  */
00130 static bfd_boolean datasection_only;
00131 
00132 /* TRUE if we found an initialized data section in the current file.  */
00133 static bfd_boolean got_a_section;
00134 
00135 /* The BFD object file format.  */
00136 static char *target;
00137 
00138 /* The character encoding format.  */
00139 static char encoding;
00140 static int encoding_bytes;
00141 
00142 static struct option long_options[] =
00143 {
00144   {"all", no_argument, NULL, 'a'},
00145   {"print-file-name", no_argument, NULL, 'f'},
00146   {"bytes", required_argument, NULL, 'n'},
00147   {"radix", required_argument, NULL, 't'},
00148   {"encoding", required_argument, NULL, 'e'},
00149   {"target", required_argument, NULL, 'T'},
00150   {"help", no_argument, NULL, 'h'},
00151   {"version", no_argument, NULL, 'v'},
00152   {NULL, 0, NULL, 0}
00153 };
00154 
00155 /* Records the size of a named file so that we
00156    do not repeatedly run bfd_stat() on it.  */
00157 
00158 typedef struct
00159 {
00160   const char *  filename;
00161   bfd_size_type filesize;
00162 } filename_and_size_t;
00163 
00164 static void strings_a_section (bfd *, asection *, void *);
00165 static bfd_boolean strings_object_file (const char *);
00166 static bfd_boolean strings_file (char *file);
00167 static int integer_arg (char *s);
00168 static void print_strings (const char *, FILE *, file_off, int, int, char *);
00169 static void usage (FILE *, int);
00170 static long get_char (FILE *, file_off *, int *, char **);
00171 
00172 int main (int, char **);
00173 
00174 int
00175 main (int argc, char **argv)
00176 {
00177   int optc;
00178   int exit_status = 0;
00179   bfd_boolean files_given = FALSE;
00180 
00181 #if defined (HAVE_SETLOCALE)
00182   setlocale (LC_ALL, "");
00183 #endif
00184   bindtextdomain (PACKAGE, LOCALEDIR);
00185   textdomain (PACKAGE);
00186 
00187   program_name = argv[0];
00188   xmalloc_set_program_name (program_name);
00189 
00190   expandargv (&argc, &argv);
00191 
00192   string_min = -1;
00193   print_addresses = FALSE;
00194   print_filenames = FALSE;
00195   datasection_only = TRUE;
00196   target = NULL;
00197   encoding = 's';
00198 
00199   while ((optc = getopt_long (argc, argv, "afhHn:ot:e:T:Vv0123456789",
00200                            long_options, (int *) 0)) != EOF)
00201     {
00202       switch (optc)
00203        {
00204        case 'a':
00205          datasection_only = FALSE;
00206          break;
00207 
00208        case 'f':
00209          print_filenames = TRUE;
00210          break;
00211 
00212        case 'H':
00213        case 'h':
00214          usage (stdout, 0);
00215 
00216        case 'n':
00217          string_min = integer_arg (optarg);
00218          if (string_min < 1)
00219            fatal (_("invalid number %s"), optarg);
00220          break;
00221 
00222        case 'o':
00223          print_addresses = TRUE;
00224          address_radix = 8;
00225          break;
00226 
00227        case 't':
00228          print_addresses = TRUE;
00229          if (optarg[1] != '\0')
00230            usage (stderr, 1);
00231          switch (optarg[0])
00232            {
00233            case 'o':
00234              address_radix = 8;
00235              break;
00236 
00237            case 'd':
00238              address_radix = 10;
00239              break;
00240 
00241            case 'x':
00242              address_radix = 16;
00243              break;
00244 
00245            default:
00246              usage (stderr, 1);
00247            }
00248          break;
00249 
00250        case 'T':
00251          target = optarg;
00252          break;
00253 
00254        case 'e':
00255          if (optarg[1] != '\0')
00256            usage (stderr, 1);
00257          encoding = optarg[0];
00258          break;
00259 
00260        case 'V':
00261        case 'v':
00262          print_version ("strings");
00263          break;
00264 
00265        case '?':
00266          usage (stderr, 1);
00267 
00268        default:
00269          if (string_min < 0)
00270            string_min = optc - '0';
00271          else
00272            string_min = string_min * 10 + optc - '0';
00273          break;
00274        }
00275     }
00276 
00277   if (string_min < 0)
00278     string_min = 4;
00279 
00280   switch (encoding)
00281     {
00282     case 'S':
00283     case 's':
00284       encoding_bytes = 1;
00285       break;
00286     case 'b':
00287     case 'l':
00288       encoding_bytes = 2;
00289       break;
00290     case 'B':
00291     case 'L':
00292       encoding_bytes = 4;
00293       break;
00294     default:
00295       usage (stderr, 1);
00296     }
00297 
00298   bfd_init ();
00299   set_default_bfd_target ();
00300 
00301   if (optind >= argc)
00302     {
00303       datasection_only = FALSE;
00304 #ifdef SET_BINARY
00305       SET_BINARY (fileno (stdin));
00306 #endif
00307       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
00308       files_given = TRUE;
00309     }
00310   else
00311     {
00312       for (; optind < argc; ++optind)
00313        {
00314          if (strcmp (argv[optind], "-") == 0)
00315            datasection_only = FALSE;
00316          else
00317            {
00318              files_given = TRUE;
00319              exit_status |= strings_file (argv[optind]) == FALSE;
00320            }
00321        }
00322     }
00323 
00324   if (!files_given)
00325     usage (stderr, 1);
00326 
00327   return (exit_status);
00328 }
00329 
00330 /* Scan section SECT of the file ABFD, whose printable name is in
00331    ARG->filename and whose size might be in ARG->filesize.  If it
00332    contains initialized data set `got_a_section' and print the
00333    strings in it.
00334 
00335    FIXME: We ought to be able to return error codes/messages for
00336    certain conditions.  */
00337 
00338 static void
00339 strings_a_section (bfd *abfd, asection *sect, void *arg)
00340 {
00341   filename_and_size_t * filename_and_sizep;
00342   bfd_size_type *filesizep;
00343   bfd_size_type sectsize;
00344   void *mem;
00345      
00346   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
00347     return;
00348 
00349   sectsize = bfd_get_section_size (sect);
00350      
00351   if (sectsize <= 0)
00352     return;
00353 
00354   /* Get the size of the file.  This might have been cached for us.  */
00355   filename_and_sizep = (filename_and_size_t *) arg;
00356   filesizep = & filename_and_sizep->filesize;
00357 
00358   if (*filesizep == 0)
00359     {
00360       struct stat st;
00361       
00362       if (bfd_stat (abfd, &st))
00363        return;
00364 
00365       /* Cache the result so that we do not repeatedly stat this file.  */
00366       *filesizep = st.st_size;
00367     }
00368 
00369   /* Compare the size of the section against the size of the file.
00370      If the section is bigger then the file must be corrupt and
00371      we should not try dumping it.  */
00372   if (sectsize >= *filesizep)
00373     return;
00374 
00375   mem = xmalloc (sectsize);
00376 
00377   if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
00378     {
00379       got_a_section = TRUE;
00380 
00381       print_strings (filename_and_sizep->filename, NULL, sect->filepos,
00382                    0, sectsize, mem);
00383     }
00384 
00385   free (mem);
00386 }
00387 
00388 /* Scan all of the sections in FILE, and print the strings
00389    in the initialized data section(s).
00390 
00391    Return TRUE if successful,
00392    FALSE if not (such as if FILE is not an object file).  */
00393 
00394 static bfd_boolean
00395 strings_object_file (const char *file)
00396 {
00397   filename_and_size_t filename_and_size;
00398   bfd *abfd;
00399 
00400   abfd = bfd_openr (file, target);
00401 
00402   if (abfd == NULL)
00403     /* Treat the file as a non-object file.  */
00404     return FALSE;
00405 
00406   /* This call is mainly for its side effect of reading in the sections.
00407      We follow the traditional behavior of `strings' in that we don't
00408      complain if we don't recognize a file to be an object file.  */
00409   if (!bfd_check_format (abfd, bfd_object))
00410     {
00411       bfd_close (abfd);
00412       return FALSE;
00413     }
00414 
00415   got_a_section = FALSE;
00416   filename_and_size.filename = file;
00417   filename_and_size.filesize = 0;
00418   bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
00419 
00420   if (!bfd_close (abfd))
00421     {
00422       bfd_nonfatal (file);
00423       return FALSE;
00424     }
00425 
00426   return got_a_section;
00427 }
00428 
00429 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
00430 
00431 static bfd_boolean
00432 strings_file (char *file)
00433 {
00434   statbuf st;
00435 
00436   if (file_stat (file, &st) < 0)
00437     {
00438       if (errno == ENOENT)
00439        non_fatal (_("'%s': No such file"), file);
00440       else
00441        non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
00442                  file, strerror (errno));
00443       return FALSE;
00444     }
00445 
00446   /* If we weren't told to scan the whole file,
00447      try to open it as an object file and only look at
00448      initialized data sections.  If that fails, fall back to the
00449      whole file.  */
00450   if (!datasection_only || !strings_object_file (file))
00451     {
00452       FILE *stream;
00453 
00454       stream = file_open (file, FOPEN_RB);
00455       if (stream == NULL)
00456        {
00457          fprintf (stderr, "%s: ", program_name);
00458          perror (file);
00459          return FALSE;
00460        }
00461 
00462       print_strings (file, stream, (file_off) 0, 0, 0, (char *) 0);
00463 
00464       if (fclose (stream) == EOF)
00465        {
00466          fprintf (stderr, "%s: ", program_name);
00467          perror (file);
00468          return FALSE;
00469        }
00470     }
00471 
00472   return TRUE;
00473 }
00474 
00475 /* Read the next character, return EOF if none available.
00476    Assume that STREAM is positioned so that the next byte read
00477    is at address ADDRESS in the file.
00478 
00479    If STREAM is NULL, do not read from it.
00480    The caller can supply a buffer of characters
00481    to be processed before the data in STREAM.
00482    MAGIC is the address of the buffer and
00483    MAGICCOUNT is how many characters are in it.  */
00484 
00485 static long
00486 get_char (FILE *stream, file_off *address, int *magiccount, char **magic)
00487 {
00488   int c, i;
00489   long r = EOF;
00490   unsigned char buf[4];
00491 
00492   for (i = 0; i < encoding_bytes; i++)
00493     {
00494       if (*magiccount)
00495        {
00496          (*magiccount)--;
00497          c = *(*magic)++;
00498        }
00499       else
00500        {
00501          if (stream == NULL)
00502            return EOF;
00503 
00504          /* Only use getc_unlocked if we found a declaration for it.
00505             Otherwise, libc is not thread safe by default, and we
00506             should not use it.  */
00507 
00508 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
00509          c = getc_unlocked (stream);
00510 #else
00511          c = getc (stream);
00512 #endif
00513          if (c == EOF)
00514            return EOF;
00515        }
00516 
00517       (*address)++;
00518       buf[i] = c;
00519     }
00520 
00521   switch (encoding)
00522     {
00523     case 'S':
00524     case 's':
00525       r = buf[0];
00526       break;
00527     case 'b':
00528       r = (buf[0] << 8) | buf[1];
00529       break;
00530     case 'l':
00531       r = buf[0] | (buf[1] << 8);
00532       break;
00533     case 'B':
00534       r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
00535        ((long) buf[2] << 8) | buf[3];
00536       break;
00537     case 'L':
00538       r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
00539        ((long) buf[3] << 24);
00540       break;
00541     }
00542 
00543   if (r == EOF)
00544     return 0;
00545 
00546   return r;
00547 }
00548 
00549 /* Find the strings in file FILENAME, read from STREAM.
00550    Assume that STREAM is positioned so that the next byte read
00551    is at address ADDRESS in the file.
00552    Stop reading at address STOP_POINT in the file, if nonzero.
00553 
00554    If STREAM is NULL, do not read from it.
00555    The caller can supply a buffer of characters
00556    to be processed before the data in STREAM.
00557    MAGIC is the address of the buffer and
00558    MAGICCOUNT is how many characters are in it.
00559    Those characters come at address ADDRESS and the data in STREAM follow.  */
00560 
00561 static void
00562 print_strings (const char *filename, FILE *stream, file_off address,
00563               int stop_point, int magiccount, char *magic)
00564 {
00565   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
00566 
00567   while (1)
00568     {
00569       file_off start;
00570       int i;
00571       long c;
00572 
00573       /* See if the next `string_min' chars are all graphic chars.  */
00574     tryline:
00575       if (stop_point && address >= stop_point)
00576        break;
00577       start = address;
00578       for (i = 0; i < string_min; i++)
00579        {
00580          c = get_char (stream, &address, &magiccount, &magic);
00581          if (c == EOF)
00582            return;
00583          if (! STRING_ISGRAPHIC (c))
00584            /* Found a non-graphic.  Try again starting with next char.  */
00585            goto tryline;
00586          buf[i] = c;
00587        }
00588 
00589       /* We found a run of `string_min' graphic characters.  Print up
00590         to the next non-graphic character.  */
00591 
00592       if (print_filenames)
00593        printf ("%s: ", filename);
00594       if (print_addresses)
00595        switch (address_radix)
00596          {
00597          case 8:
00598 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
00599            if (sizeof (start) > sizeof (long))
00600              printf ("%7Lo ", (unsigned long long) start);
00601            else
00602 #else
00603 # if !BFD_HOST_64BIT_LONG
00604            if (start != (unsigned long) start)
00605              printf ("++%7lo ", (unsigned long) start);
00606            else
00607 # endif
00608 #endif
00609              printf ("%7lo ", (unsigned long) start);
00610            break;
00611 
00612          case 10:
00613 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
00614            if (sizeof (start) > sizeof (long))
00615              printf ("%7Ld ", (unsigned long long) start);
00616            else
00617 #else
00618 # if !BFD_HOST_64BIT_LONG
00619            if (start != (unsigned long) start)
00620              printf ("++%7ld ", (unsigned long) start);
00621            else
00622 # endif
00623 #endif
00624              printf ("%7ld ", (long) start);
00625            break;
00626 
00627          case 16:
00628 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
00629            if (sizeof (start) > sizeof (long))
00630              printf ("%7Lx ", (unsigned long long) start);
00631            else
00632 #else
00633 # if !BFD_HOST_64BIT_LONG
00634            if (start != (unsigned long) start)
00635              printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
00636                     (unsigned long) (start & 0xffffffff));
00637            else
00638 # endif
00639 #endif
00640              printf ("%7lx ", (unsigned long) start);
00641            break;
00642          }
00643 
00644       buf[i] = '\0';
00645       fputs (buf, stdout);
00646 
00647       while (1)
00648        {
00649          c = get_char (stream, &address, &magiccount, &magic);
00650          if (c == EOF)
00651            break;
00652          if (! STRING_ISGRAPHIC (c))
00653            break;
00654          putchar (c);
00655        }
00656 
00657       putchar ('\n');
00658     }
00659 }
00660 
00661 /* Parse string S as an integer, using decimal radix by default,
00662    but allowing octal and hex numbers as in C.  */
00663 
00664 static int
00665 integer_arg (char *s)
00666 {
00667   int value;
00668   int radix = 10;
00669   char *p = s;
00670   int c;
00671 
00672   if (*p != '0')
00673     radix = 10;
00674   else if (*++p == 'x')
00675     {
00676       radix = 16;
00677       p++;
00678     }
00679   else
00680     radix = 8;
00681 
00682   value = 0;
00683   while (((c = *p++) >= '0' && c <= '9')
00684         || (radix == 16 && (c & ~40) >= 'A' && (c & ~40) <= 'Z'))
00685     {
00686       value *= radix;
00687       if (c >= '0' && c <= '9')
00688        value += c - '0';
00689       else
00690        value += (c & ~40) - 'A';
00691     }
00692 
00693   if (c == 'b')
00694     value *= 512;
00695   else if (c == 'B')
00696     value *= 1024;
00697   else
00698     p--;
00699 
00700   if (*p)
00701     fatal (_("invalid integer argument %s"), s);
00702 
00703   return value;
00704 }
00705 
00706 static void
00707 usage (FILE *stream, int status)
00708 {
00709   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
00710   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
00711   fprintf (stream, _(" The options are:\n\
00712   -a - --all                Scan the entire file, not just the data section\n\
00713   -f --print-file-name      Print the name of the file before each string\n\
00714   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
00715   -<number>                 least [number] characters (default 4).\n\
00716   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
00717   -o                        An alias for --radix=o\n\
00718   -T --target=<BFDNAME>     Specify the binary file format\n\
00719   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
00720                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
00721   @<file>                   Read options from <file>\n\
00722   -h --help                 Display this information\n\
00723   -v --version              Print the program's version number\n"));
00724   list_supported_targets (program_name, stream);
00725   if (REPORT_BUGS_TO[0] && status == 0)
00726     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
00727   exit (status);
00728 }