/* Messy DOS-specific code for correctly treating binary, Unix text
and DOS text files.
This has several aspects:
* Guessing the file type (unless the user tells us);
* Stripping CR characters from DOS text files (otherwise regex
functions won't work correctly);
* Reporting correct byte count with -b for any kind of file.
*/
typedef enum {
UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT
} File_type;
struct dos_map {
off_t pos; /* position in buffer passed to matcher */
off_t add; /* how much to add when reporting char position */
};
static int dos_report_unix_offset = 0;
static File_type dos_file_type = UNKNOWN;
static File_type dos_use_file_type = UNKNOWN;
static off_t dos_stripped_crs = 0;
static struct dos_map *dos_pos_map;
static int dos_pos_map_size = 0;
static int dos_pos_map_used = 0;
static int inp_map_idx = 0, out_map_idx = 1;
/* Guess DOS file type by looking at its contents. */
static inline File_type
guess_type (char *buf, register size_t buflen)
{
int crlf_seen = 0;
register char *bp = buf;
while (buflen--)
{
/* Treat a file as binary if it has a NUL character. */
if (!*bp)
return DOS_BINARY;
/* CR before LF means DOS text file (unless we later see
binary characters). */
else if (*bp == '\r' && buflen && bp[1] == '\n')
crlf_seen = 1;
bp++;
}
return crlf_seen ? DOS_TEXT : UNIX_TEXT;
}
/* Convert external DOS file representation to internal.
Return the count of characters left in the buffer.
Build table to map character positions when reporting byte counts. */
static inline int
undossify_input (register char *buf, size_t buflen)
{
int chars_left = 0;
if (totalcc == 0)
{
/* New file: forget everything we knew about character
position mapping table and file type. */
inp_map_idx = 0;
out_map_idx = 1;
dos_pos_map_used = 0;
dos_stripped_crs = 0;
dos_file_type = dos_use_file_type;
}
/* Guess if this file is binary, unless we already know that. */
if (dos_file_type == UNKNOWN)
dos_file_type = guess_type(buf, buflen);
/* If this file is to be treated as DOS Text, strip the CR characters
and maybe build the table for character position mapping on output. */
if (dos_file_type == DOS_TEXT)
{
char *destp = buf;
while (buflen--)
{
if (*buf != '\r')
{
*destp++ = *buf++;
chars_left++;
}
else
{
buf++;
if (out_byte && !dos_report_unix_offset)
{
dos_stripped_crs++;
while (buflen && *buf == '\r')
{
dos_stripped_crs++;
=1= |