PROXY  WHOIS  RQUOTE  TEXTS  SOFT  FOREX  BBOARD
 Music  Philosophy  Code  Literature  Russian

= ROOT|Technical|Code_Examples|C|grep-2.5|src|dosbuf.c =

page 1 of 2



/* Messy DOS-specific code for correctly treating binary, Unix text
   and DOS text files.

   This has several aspects:

     * Guessing the file type (unless the user tells us);
     * Stripping CR characters from DOS text files (otherwise regex
       functions won't work correctly);
     * Reporting correct byte count with -b for any kind of file.

*/

typedef enum {
  UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT
} File_type;

struct dos_map {
  off_t pos;	/* position in buffer passed to matcher */
  off_t add;	/* how much to add when reporting char position */
};

static int       dos_report_unix_offset = 0;

static File_type dos_file_type     = UNKNOWN;
static File_type dos_use_file_type = UNKNOWN;
static off_t     dos_stripped_crs  = 0;
static struct dos_map *dos_pos_map;
static int       dos_pos_map_size  = 0;
static int       dos_pos_map_used  = 0;
static int       inp_map_idx = 0, out_map_idx = 1;

/* Guess DOS file type by looking at its contents.  */
static inline File_type
guess_type (char *buf, register size_t buflen)
{
  int crlf_seen = 0;
  register char *bp = buf;

  while (buflen--)
    {
      /* Treat a file as binary if it has a NUL character.  */
      if (!*bp)
        return DOS_BINARY;

      /* CR before LF means DOS text file (unless we later see
         binary characters).  */
      else if (*bp == '\r' && buflen && bp[1] == '\n')
        crlf_seen = 1;

      bp++;
    }

  return crlf_seen ? DOS_TEXT : UNIX_TEXT;
}

/* Convert external DOS file representation to internal.
   Return the count of characters left in the buffer.
   Build table to map character positions when reporting byte counts.  */
static inline int
undossify_input (register char *buf, size_t buflen)
{
  int chars_left = 0;

  if (totalcc == 0)
    {
      /* New file: forget everything we knew about character
         position mapping table and file type.  */
      inp_map_idx = 0;
      out_map_idx = 1;
      dos_pos_map_used = 0;
      dos_stripped_crs = 0;
      dos_file_type = dos_use_file_type;
    }

  /* Guess if this file is binary, unless we already know that.  */
  if (dos_file_type == UNKNOWN)
    dos_file_type = guess_type(buf, buflen);

  /* If this file is to be treated as DOS Text, strip the CR characters
     and maybe build the table for character position mapping on output.  */
  if (dos_file_type == DOS_TEXT)
    {
      char   *destp   = buf;

      while (buflen--)
        {
          if (*buf != '\r')
            {
              *destp++ = *buf++;
              chars_left++;
            }
          else
            {
              buf++;
              if (out_byte && !dos_report_unix_offset)
                {
                  dos_stripped_crs++;
                  while (buflen && *buf == '\r')
                    {
                      dos_stripped_crs++;
=1=

= PAGE 1 = NEXT > |2

UP TO ROOT | UP TO DIR

Google
 


E-mail Facebook Google Digg del.icio.us BlinkList Fark Furl Ma.gnolia Netscape NewsVine Reddit Slashdot Spurl StumbleUpon Technorati YahooMyWeb LiveJournal Blogmarks TwitThis Live News2.ru BobrDobr.ru Memori.ru MoeMesto.ru

0.021111 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU)