| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541 | /**   cfile_tools.c   A library to deal transparently with possibly compressed files.    Documentation in the function headers and in cfile_tools.h  Copyright (C) 2004 by Arno Wagner <arno.wagner@acm.org>   Distributed under the Gnu Public License version 2 or the modified  BSD license (see file COPYING)  Support for gzip added by Bernhard Tellenbach <bernhard.tellenbach@gmail.com>   */#define _GNU_SOURCE#define _FILE_OFFSET_BITS 64//#ifndef DONT_HAVE_BZ2//#include <bzlib.h>//#endif#include <stdlib.h>#include <string.h>#include <errno.h>#include <assert.h>#include "cfile_tools.h"// Concrete formats. remember to adjust CFR_NUM_FORMATS if changed!// Note: 0, 1 are special entries.const char * cfr_formats[CFR_NUM_FORMATS] = {  "not open",     //  0  "uncompressed", //  1  "bzip2",         //  2  "gzip",         //  3};const char * cfr_extensions[CFR_NUM_FORMATS] = {  "",             //  0  "",             //  1  ".bz2",          //  2  ".gz"          //  3};// Prototypes of non API functions (don't use these from outside this file)const char * _cfr_compressor_strerror(int format, int err);const char * _bz2_strerror(int err);// API Functions CFRFILE *cfr_open(const char *path) {  /*******************************/  // Analog to 'fopen'. Error in result has to be tested using  // 'cfr_error' on the result!  // Note: The user needs to free the reurn value!  // Opens a possibly compressed file for reading.  // File type is determined by file name ending  int format, ext_len, name_len;  CFRFILE * retval = NULL;  // determine file format  name_len = strlen(path);  format = 2;  // skip specials 0, 1   // Do action dependent on file format  retval = (CFRFILE *) calloc(1,sizeof(CFRFILE));  if(retval == NULL)	return (NULL);      retval->eof = 0;  retval->error1 = 0;  retval->error2 = 0;  if((path == NULL) || (strcmp(path, "-") == 0)) {	/* dump from stdin */	gzFile f;	while (format < CFR_NUM_FORMATS) {		if (strcmp(cfr_extensions[format], ".gz") == 0)        		break;    		format ++;  	}	f = gzdopen(0, "r");	if(f == NULL) {		free(retval);		return (NULL);    	}        retval->data2 = f;	retval->format = format;	return (retval);  }  while (format < CFR_NUM_FORMATS) {    ext_len = strlen(cfr_extensions[format]);    if (strncmp(cfr_extensions[format],                path+(name_len-ext_len),                ext_len) == 0        ) break;    format ++;  }  if (format >= CFR_NUM_FORMATS) 	format = 1;  // uncompressed   retval->format = format;  switch (format) {  case 1:  // uncompressed    {       FILE * in;      in = fopen(path,"r");      if (in == NULL) { 	free(retval);        return(NULL);      }      retval->data1 = in;      return(retval);    }    break;  case 2:  // bzip2    {       int bzerror;      BZFILE * bzin;      FILE * in;            retval->bz2_stream_end = 0;            // get file      in = fopen(path,"r");      if (in == NULL) {         free(retval);        return(NULL);      }      retval->data1 = in;            // bzip2ify file      bzin = BZ2_bzReadOpen( &bzerror, in, 0, 0, NULL, 0);       if (bzerror != BZ_OK) {        errno = bzerror;        BZ2_bzReadClose( &bzerror, bzin);        fclose(in);	free(retval);        return(NULL);      }      retval->data2 = bzin;      return(retval);    }    break;  case 3:  // gzip    { 	gzFile f;      	// get file 	f = gzopen(path, "r");	if(f == NULL) {		free(retval);		return (NULL);    	}        retval->data2 = f;	return (retval);   }   break;  default:  // this is an internal error, no diag yet.    fprintf(stderr,"illegal format '%d' in cfr_open!\n", format);    exit(1);  }  return NULL;} int cfr_close(CFRFILE *stream) {  /**************************/  // Analog to 'fclose'.  // FIXME - why is stream->* set, then freed?  if (stream == NULL || stream->closed) {      errno = EBADF;      return -1;  }      int retval = -1;    switch (stream->format) {  case 1:  // uncompressed      retval = fclose((FILE *)(stream->data1));      stream->error1 = retval;      break;  case 2: // bzip2      BZ2_bzReadClose( &stream->error2, (BZFILE *)stream->data2);      stream->error1 = retval = fclose((FILE *)(stream->data1));      break;  case 3:  // gzip      if(stream->data2!=NULL)          retval = gzclose(stream->data2);      stream->error2 = retval;      break;    default:  // internal error          assert("illegal stream->format" && 0);  }  free(stream);  return(retval);}size_t cfr_read_n(CFRFILE *stream, void *ptr, size_t bytes) {  /******************************************************************/  // Wrapper, will return either 'bytes' (the number of bytes to read) or 0   return(cfr_read(ptr, bytes, 1, stream)*bytes);}size_t cfr_read(void *ptr, size_t size, size_t nmemb, CFRFILE *stream) {  /******************************************************************/  // Analog to 'fread'. Will not return with partial elements, only  // full ones. Hence calling this function with one large element  // size will result in a complete or no read.    size_t retval = 0;  if (stream == NULL) return(0);  // shortcut  if (stream->eof) return(0);  switch (stream->format) {  case 1:  // uncompressed    {       FILE * in;      in = (FILE *)(stream->data1);      retval = fread(ptr, size, nmemb, in);      if (retval != nmemb) {        // fprintf(stderr,"short read!!!\n");        stream->eof = feof(in);        stream->error1 = ferror(in);	retval = 0;      }      return (retval);    }    break;   case 2:  // bzip2    {       BZFILE * bzin;       int bzerror;      int buffsize;      if (stream->bz2_stream_end == 1) {        // feof-behaviour: Last read did consume last byte but not more        stream->eof = 1;        return(0);      }      bzerror = BZ_OK;      bzin = (BZFILE *) (stream->data2);      buffsize = size * nmemb;      retval = BZ2_bzRead(&bzerror, bzin, ptr, buffsize);      if (bzerror == BZ_STREAM_END ) {        stream->bz2_stream_end = 1;        stream->error2 = bzerror;        if (retval == buffsize) {          // feof-behaviour: no eof yet        } else {          // feof-behaviour: read past end, set eof          stream->eof = 1;	  retval = 0;        }        return(retval/size);      }      if (bzerror == BZ_OK) {        // Normal case, no error.        // A short read here is an error, so catch it        if (retval == buffsize) {          return(retval/size);        }      }      // Other error...      stream->error2 = bzerror;      BZ2_bzReadClose( &bzerror, bzin );      if (bzerror != BZ_OK) {        stream->error2 = bzerror;      }      retval = fclose((FILE *)(stream->data1));      stream->error1 = retval;      stream->closed = 1;      return(0);    }    break;  case 3:  // gzip    {       gzFile in;      in = (gzFile)(stream->data2);      retval = gzread(in, ptr, size*nmemb);      if (retval != nmemb*size) {        // fprintf(stderr,"short read!!!\n");        stream->eof = gzeof(in);        stream->error2 = errno;	retval = 0;      }      return (retval/size);    }    break;  default:  // this is an internal error, no diag yet.    fprintf(stderr,"illegal format '%d' in cfr_read!\n",stream->format);    exit(1);  }} ssize_t cfr_getline(char **lineptr, size_t *n, CFRFILE *stream) {  /************************************************************/  // May not be very efficient, since it uses single-char reads  // for formats where there is no native getline in the library.  // For bzip2 the speedup for additional buffering was only 5%  // so I dropped it.  // Returns -1 in case of an error.  char *tmp;  if (stream == NULL) return(-1);    switch (stream->format) {  case 1:  // uncompressed    {       if (fgets(*lineptr, *n, (FILE *)(stream->data1)) == NULL) {        stream->error1 = errno;	return -1;      }      return 0;    }    break;  case 2:  // bzip2      {                      size_t count;      char c;      size_t ret;	  //bzin = (BZFILE *) (stream->data2);      // allocate initial buffer if none was passed or size was zero      if (*lineptr == NULL) {        *lineptr = (char *) calloc(120, 1);        if(*lineptr == NULL) {            stream->error1 = errno;            return(-1);        }        *n = 120;      }      if (*n == 0) {        *n = 120;        tmp = (char *) realloc(*lineptr, *n); // to avoid memory-leaks        if(tmp == NULL) {            stream->error1 = errno;            return(-1);        }        *lineptr = tmp;      }      count = 0;      // read until '\n'      do {        ret = cfr_read(&c, 1, 1, stream);        if (ret != 1) {          return(-1);        }        count ++;        if (count >= *n) {          *n = 2 * *n;          tmp = (char *) realloc(*lineptr, *n);          if (tmp == NULL) {            stream->error1 = errno;            return(-1);          }          *lineptr = tmp;        }        (*lineptr)[count-1] = c;      } while (c != '\n');      (*lineptr)[count] = 0;      return(count);    }    break;  case 3:  // gzip    {       char * return_ptr = gzgets((gzFile)(stream->data2), *lineptr, *n );      if (return_ptr == Z_NULL) {        stream->error2 = errno;        return(-1);      }      return *n;      }    break;  default:  // this is an internal error, no diag yet.    fprintf(stderr,"illegal format '%d' in cfr_getline!\n",stream->format);    exit(1);    return(-1);  }     }int cfr_eof(CFRFILE *stream) {  // Returns true on end of file/end of compressed data.  // The end of the compressed data is regarded as end of file  // in this library, embedded or multiple compressed data per   // file is not supported by this library.  //  // Note: The sematics is that cfr_eof is true only after  // the first byte after the end of file was read. Some compressors  // report EOF already when the last availale character has been   // read (far more sensible IMO), but for consistency we follow the  // convention of the standard c library here.  return(stream->eof);}int cfr_error(CFRFILE *stream) {  // Returns true on error.  // Errors can be ordinary errors from fopen.fclose/fread  // or can originate from the underlying compression.  // This function just returns 0 when there is no error or  // 1 in case of error.  // To get a more detailed report cfr_strerror will try to  // come up with a description of the whole situation.  // For numeric details, more query functions would need to be  // implemented.    if (stream == NULL) return(1);  return(stream->error1 || stream->error2);}char * cfr_strerror(CFRFILE *stream) {  // Result is "stream-i/o: <stream-error> <compressor>[: <compressor error>]"  // Do not modify result.   // Result may change on subsequent call to this function.  static char res[120];  int ret;  char * msg, * msg2;  if (stream == NULL) {    snprintf(res, sizeof(res), "%s", "Error: stream is NULL, i.e. not opened");    return(res);  }  ret = asprintf(&msg,           "stream-i/o: %s, %s  [%s]",           stream->eof?"EOF":"",           strerror(stream->error1),           cfr_compressor_str(stream));  if (ret == -1)    goto oom;  if (stream->format == 2) {    ret = asprintf(&msg2,              "%s: %s",             msg,              _cfr_compressor_strerror(stream->format, stream->error2));    free(msg);    if (ret == -1)      goto oom;    msg = msg2;  }   if (stream->format == 3) {    ret = asprintf(&msg2,              "%s: %s",             msg,              gzerror((gzFile)(stream->data2), &(stream->error2)));    free(msg);    if (ret == -1)      goto oom;    msg = msg2;  }  snprintf(res, sizeof(res), "%s", msg);  free(msg);   return(res);oom:  snprintf(res, sizeof(res), "%s", "Error: asprintf: out of memory");  return(res);}const char * cfr_compressor_str(CFRFILE *stream) {  // Returns the name of the compressor used  if ((stream->format < 0) || (stream->format >= CFR_NUM_FORMATS)) {    return("undefined compression type");  } else {    return (cfr_formats[stream->format]);  }}// Utility functions for compressor errors. // * Not part of the API, do not call directly as they may change! *const char * _cfr_compressor_strerror(int format, int err) {  // Transforms error code to string for all compressors    switch (format) {  case 0:     return("file not open");    break;  case 1:    return("file not compressed");    break;      case 2:    return(_bz2_strerror(err));    break;  case 3:    return NULL;    break;  default:    return("unknowen compressor code");  }  }    const char * _bz2_strerror(int err) {  // Since bzlib does not have strerror, we do it here manually.  // This works for version 1.0 of 21 March 2000 of bzlib.h    switch (err) {  case BZ_OK: return("BZ_OK");  case BZ_RUN_OK: return("BZ_RUN_OK");  case BZ_FLUSH_OK: return("BZ_FLUSH_OK");  case BZ_FINISH_OK: return("BZ_FINISH_OK");  case BZ_STREAM_END: return("BZ_STREAM_END");  case BZ_SEQUENCE_ERROR: return("BZ_SEQUENCE_ERROR");  case BZ_PARAM_ERROR: return("BZ_PARAM_ERROR");  case BZ_MEM_ERROR: return("BZ_MEM_ERROR");  case BZ_DATA_ERROR: return("BZ_DATA_ERROR");  case BZ_DATA_ERROR_MAGIC: return("BZ_DATA_ERROR_MAGIC");  case BZ_IO_ERROR: return("BZ_IO_ERROR");  case BZ_UNEXPECTED_EOF: return("BZ_UNEXPECTED_EOF");  case BZ_OUTBUFF_FULL: return("BZ_OUTBUFF_FULL");  case BZ_CONFIG_ERROR: return("BZ_CONFIG_ERROR");  default: return("unknowen bzip2 error code");  }}    
 |