| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541 | 
							- /** 
 
-   cfile_tools.c
 
-  
 
-   A library to deal transparently with possibly compressed files.  
 
-   Documentation in the function headers and in cfile_tools.h
 
-   Copyright (C) 2004 by Arno Wagner <arno.wagner@acm.org> 
 
-   Distributed under the Gnu Public License version 2 or the modified
 
-   BSD license (see file COPYING)
 
-   Support for gzip added by Bernhard Tellenbach <bernhard.tellenbach@gmail.com>   
 
- */
 
- #define _GNU_SOURCE
 
- #define _FILE_OFFSET_BITS 64
 
- //#ifndef DONT_HAVE_BZ2
 
- //#include <bzlib.h>
 
- //#endif
 
- #include <stdlib.h>
 
- #include <string.h>
 
- #include <errno.h>
 
- #include <assert.h>
 
- #include "cfile_tools.h"
 
- // Concrete formats. remember to adjust CFR_NUM_FORMATS if changed!
 
- // Note: 0, 1 are special entries.
 
- const char * cfr_formats[CFR_NUM_FORMATS] = {
 
-   "not open",     //  0
 
-   "uncompressed", //  1
 
-   "bzip2",         //  2
 
-   "gzip",         //  3
 
- };
 
- const char * cfr_extensions[CFR_NUM_FORMATS] = {
 
-   "",             //  0
 
-   "",             //  1
 
-   ".bz2",          //  2
 
-   ".gz"          //  3
 
- };
 
- // Prototypes of non API functions (don't use these from outside this file)
 
- const char * _cfr_compressor_strerror(int format, int err);
 
- const char * _bz2_strerror(int err);
 
- // API Functions 
 
- CFRFILE *cfr_open(const char *path) {
 
-   /*******************************/
 
-   // Analog to 'fopen'. Error in result has to be tested using
 
-   // 'cfr_error' on the result!
 
-   // Note: The user needs to free the reurn value!
 
-   // Opens a possibly compressed file for reading.
 
-   // File type is determined by file name ending
 
-   int format, ext_len, name_len;
 
-   CFRFILE * retval = NULL;
 
-   // determine file format
 
-   name_len = strlen(path);
 
-   format = 2;  // skip specials 0, 1 
 
-   // Do action dependent on file format
 
-   retval = (CFRFILE *) calloc(1,sizeof(CFRFILE));
 
-   if(retval == NULL)
 
- 	return (NULL);    
 
-   retval->eof = 0;
 
-   retval->error1 = 0;
 
-   retval->error2 = 0;
 
-   if((path == NULL) || (strcmp(path, "-") == 0)) {
 
- 	/* dump from stdin */
 
- 	gzFile f;
 
- 	while (format < CFR_NUM_FORMATS) {
 
- 		if (strcmp(cfr_extensions[format], ".gz") == 0)
 
-         		break;
 
-     		format ++;
 
-   	}
 
- 	f = gzdopen(0, "r");
 
- 	if(f == NULL) {
 
- 		free(retval);
 
- 		return (NULL);
 
-     	}
 
-         retval->data2 = f;
 
- 	retval->format = format;
 
- 	return (retval);
 
-   }
 
-   while (format < CFR_NUM_FORMATS) {
 
-     ext_len = strlen(cfr_extensions[format]);
 
-     if (strncmp(cfr_extensions[format],
 
-                 path+(name_len-ext_len),
 
-                 ext_len) == 0
 
-         ) break;
 
-     format ++;
 
-   }
 
-   if (format >= CFR_NUM_FORMATS) 
 
- 	format = 1;  // uncompressed 
 
-   retval->format = format;
 
-   switch (format) {
 
-   case 1:  // uncompressed
 
-     { 
 
-       FILE * in;
 
-       in = fopen(path,"r");
 
-       if (in == NULL) { 
 
- 	free(retval);
 
-         return(NULL);
 
-       }
 
-       retval->data1 = in;
 
-       return(retval);
 
-     }
 
-     break;
 
-   case 2:  // bzip2
 
-     { 
 
-       int bzerror;
 
-       BZFILE * bzin;
 
-       FILE * in;
 
-       
 
-       retval->bz2_stream_end = 0;
 
-       
 
-       // get file
 
-       in = fopen(path,"r");
 
-       if (in == NULL) { 
 
-         free(retval);
 
-         return(NULL);
 
-       }
 
-       retval->data1 = in;
 
-       
 
-       // bzip2ify file
 
-       bzin = BZ2_bzReadOpen( &bzerror, in, 0, 0, NULL, 0); 
 
-       if (bzerror != BZ_OK) {
 
-         errno = bzerror;
 
-         BZ2_bzReadClose( &bzerror, bzin);
 
-         fclose(in);
 
- 	free(retval);
 
-         return(NULL);
 
-       }
 
-       retval->data2 = bzin;
 
-       return(retval);
 
-     }
 
-     break;
 
-   case 3:  // gzip
 
-     { 
 
- 	gzFile f;
 
-       	// get file 
 
- 	f = gzopen(path, "r");
 
- 	if(f == NULL) {
 
- 		free(retval);
 
- 		return (NULL);
 
-     	}
 
-         retval->data2 = f;
 
- 	return (retval);
 
-    }
 
-    break;
 
-   default:  // this is an internal error, no diag yet.
 
-     fprintf(stderr,"illegal format '%d' in cfr_open!\n", format);
 
-     exit(1);
 
-   }
 
-   return NULL;
 
- } 
 
- int cfr_close(CFRFILE *stream) {
 
-   /**************************/
 
-   // Analog to 'fclose'.
 
-   // FIXME - why is stream->* set, then freed?
 
-   if (stream == NULL || stream->closed) {
 
-       errno = EBADF;
 
-       return -1;
 
-   }
 
-     
 
-   int retval = -1;
 
-   
 
-   switch (stream->format) {
 
-   case 1:  // uncompressed
 
-       retval = fclose((FILE *)(stream->data1));
 
-       stream->error1 = retval;
 
-       break;
 
-   case 2: // bzip2
 
-       BZ2_bzReadClose( &stream->error2, (BZFILE *)stream->data2);
 
-       stream->error1 = retval = fclose((FILE *)(stream->data1));
 
-       break;
 
-   case 3:  // gzip
 
-       if(stream->data2!=NULL)
 
-           retval = gzclose(stream->data2);
 
-       stream->error2 = retval;
 
-       break;
 
-     default:  // internal error
 
-           assert("illegal stream->format" && 0);
 
-   }
 
-   free(stream);
 
-   return(retval);
 
- }
 
- size_t cfr_read_n(CFRFILE *stream, void *ptr, size_t bytes) {
 
-   /******************************************************************/
 
-   // Wrapper, will return either 'bytes' (the number of bytes to read) or 0 
 
-   return(cfr_read(ptr, bytes, 1, stream)*bytes);
 
- }
 
- size_t cfr_read(void *ptr, size_t size, size_t nmemb, CFRFILE *stream) {
 
-   /******************************************************************/
 
-   // Analog to 'fread'. Will not return with partial elements, only
 
-   // full ones. Hence calling this function with one large element
 
-   // size will result in a complete or no read.
 
-   
 
-   size_t retval = 0;
 
-   if (stream == NULL) return(0);
 
-   // shortcut
 
-   if (stream->eof) return(0);
 
-   switch (stream->format) {
 
-   case 1:  // uncompressed
 
-     { 
 
-       FILE * in;
 
-       in = (FILE *)(stream->data1);
 
-       retval = fread(ptr, size, nmemb, in);
 
-       if (retval != nmemb) {
 
-         // fprintf(stderr,"short read!!!\n");
 
-         stream->eof = feof(in);
 
-         stream->error1 = ferror(in);
 
- 	retval = 0;
 
-       }
 
-       return (retval);
 
-     }
 
-     break;
 
-    case 2:  // bzip2
 
-     { 
 
-       BZFILE * bzin; 
 
-       int bzerror;
 
-       int buffsize;
 
-       if (stream->bz2_stream_end == 1) {
 
-         // feof-behaviour: Last read did consume last byte but not more
 
-         stream->eof = 1;
 
-         return(0);
 
-       }
 
-       bzerror = BZ_OK;
 
-       bzin = (BZFILE *) (stream->data2);
 
-       buffsize = size * nmemb;
 
-       retval = BZ2_bzRead(&bzerror, bzin, ptr, buffsize);
 
-       if (bzerror == BZ_STREAM_END ) {
 
-         stream->bz2_stream_end = 1;
 
-         stream->error2 = bzerror;
 
-         if (retval == buffsize) {
 
-           // feof-behaviour: no eof yet
 
-         } else {
 
-           // feof-behaviour: read past end, set eof
 
-           stream->eof = 1;
 
- 	  retval = 0;
 
-         }
 
-         return(retval/size);
 
-       }
 
-       if (bzerror == BZ_OK) {
 
-         // Normal case, no error.
 
-         // A short read here is an error, so catch it
 
-         if (retval == buffsize) {
 
-           return(retval/size);
 
-         }
 
-       }
 
-       // Other error...
 
-       stream->error2 = bzerror;
 
-       BZ2_bzReadClose( &bzerror, bzin );
 
-       if (bzerror != BZ_OK) {
 
-         stream->error2 = bzerror;
 
-       }
 
-       retval = fclose((FILE *)(stream->data1));
 
-       stream->error1 = retval;
 
-       stream->closed = 1;
 
-       return(0);
 
-     }
 
-     break;
 
-   case 3:  // gzip
 
-     { 
 
-       gzFile in;
 
-       in = (gzFile)(stream->data2);
 
-       retval = gzread(in, ptr, size*nmemb);
 
-       if (retval != nmemb*size) {
 
-         // fprintf(stderr,"short read!!!\n");
 
-         stream->eof = gzeof(in);
 
-         stream->error2 = errno;
 
- 	retval = 0;
 
-       }
 
-       return (retval/size);
 
-     }
 
-     break;
 
-   default:  // this is an internal error, no diag yet.
 
-     fprintf(stderr,"illegal format '%d' in cfr_read!\n",stream->format);
 
-     exit(1);
 
-   }
 
- } 
 
- ssize_t cfr_getline(char **lineptr, size_t *n, CFRFILE *stream) {
 
-   /************************************************************/
 
-   // May not be very efficient, since it uses single-char reads
 
-   // for formats where there is no native getline in the library.
 
-   // For bzip2 the speedup for additional buffering was only 5%
 
-   // so I dropped it.
 
-   // Returns -1 in case of an error.
 
-   char *tmp;
 
-   if (stream == NULL) return(-1);  
 
-   switch (stream->format) {
 
-   case 1:  // uncompressed
 
-     { 
 
-       if (fgets(*lineptr, *n, (FILE *)(stream->data1)) == NULL) {
 
-         stream->error1 = errno;
 
- 	return -1;
 
-       }
 
-       return 0;
 
-     }
 
-     break;
 
-   case 2:  // bzip2  
 
-     {                
 
-       size_t count;
 
-       char c;
 
-       size_t ret;
 
- 	  //bzin = (BZFILE *) (stream->data2);
 
-       // allocate initial buffer if none was passed or size was zero
 
-       if (*lineptr == NULL) {
 
-         *lineptr = (char *) calloc(120, 1);
 
-         if(*lineptr == NULL) {
 
-             stream->error1 = errno;
 
-             return(-1);
 
-         }
 
-         *n = 120;
 
-       }
 
-       if (*n == 0) {
 
-         *n = 120;
 
-         tmp = (char *) realloc(*lineptr, *n); // to avoid memory-leaks
 
-         if(tmp == NULL) {
 
-             stream->error1 = errno;
 
-             return(-1);
 
-         }
 
-         *lineptr = tmp;
 
-       }
 
-       count = 0;
 
-       // read until '\n'
 
-       do {
 
-         ret = cfr_read(&c, 1, 1, stream);
 
-         if (ret != 1) {
 
-           return(-1);
 
-         }
 
-         count ++;
 
-         if (count >= *n) {
 
-           *n = 2 * *n;
 
-           tmp = (char *) realloc(*lineptr, *n);
 
-           if (tmp == NULL) {
 
-             stream->error1 = errno;
 
-             return(-1);
 
-           }
 
-           *lineptr = tmp;
 
-         }
 
-         (*lineptr)[count-1] = c;
 
-       } while (c != '\n');
 
-       (*lineptr)[count] = 0;
 
-       return(count);
 
-     }
 
-     break;
 
-   case 3:  // gzip
 
-     { 
 
-       char * return_ptr = gzgets((gzFile)(stream->data2), *lineptr, *n );
 
-       if (return_ptr == Z_NULL) {
 
-         stream->error2 = errno;
 
-         return(-1);
 
-       }
 
-       return *n;
 
-   
 
-     }
 
-     break;
 
-   default:  // this is an internal error, no diag yet.
 
-     fprintf(stderr,"illegal format '%d' in cfr_getline!\n",stream->format);
 
-     exit(1);
 
-     return(-1);
 
-   }     
 
- }
 
- int cfr_eof(CFRFILE *stream) {
 
-   // Returns true on end of file/end of compressed data.
 
-   // The end of the compressed data is regarded as end of file
 
-   // in this library, embedded or multiple compressed data per 
 
-   // file is not supported by this library.
 
-   //
 
-   // Note: The sematics is that cfr_eof is true only after
 
-   // the first byte after the end of file was read. Some compressors
 
-   // report EOF already when the last availale character has been 
 
-   // read (far more sensible IMO), but for consistency we follow the
 
-   // convention of the standard c library here.
 
-   return(stream->eof);
 
- }
 
- int cfr_error(CFRFILE *stream) {
 
-   // Returns true on error.
 
-   // Errors can be ordinary errors from fopen.fclose/fread
 
-   // or can originate from the underlying compression.
 
-   // This function just returns 0 when there is no error or
 
-   // 1 in case of error.
 
-   // To get a more detailed report cfr_strerror will try to
 
-   // come up with a description of the whole situation.
 
-   // For numeric details, more query functions would need to be
 
-   // implemented.
 
-   
 
-   if (stream == NULL) return(1);
 
-   return(stream->error1 || stream->error2);
 
- }
 
- char * cfr_strerror(CFRFILE *stream) {
 
-   // Result is "stream-i/o: <stream-error> <compressor>[: <compressor error>]"
 
-   // Do not modify result. 
 
-   // Result may change on subsequent call to this function.
 
-   static char res[120];
 
-   int ret;
 
-   char * msg, * msg2;
 
-   if (stream == NULL) {
 
-     snprintf(res, sizeof(res), "%s", "Error: stream is NULL, i.e. not opened");
 
-     return(res);
 
-   }
 
-   ret = asprintf(&msg,
 
-            "stream-i/o: %s, %s  [%s]",
 
-            stream->eof?"EOF":"",
 
-            strerror(stream->error1),
 
-            cfr_compressor_str(stream));
 
-   if (ret == -1)
 
-     goto oom;
 
-   if (stream->format == 2) {
 
-     ret = asprintf(&msg2, 
 
-              "%s: %s",
 
-              msg, 
 
-              _cfr_compressor_strerror(stream->format, stream->error2));
 
-     free(msg);
 
-     if (ret == -1)
 
-       goto oom;
 
-     msg = msg2;
 
-   } 
 
-   if (stream->format == 3) {
 
-     ret = asprintf(&msg2, 
 
-              "%s: %s",
 
-              msg, 
 
-              gzerror((gzFile)(stream->data2), &(stream->error2)));
 
-     free(msg);
 
-     if (ret == -1)
 
-       goto oom;
 
-     msg = msg2;
 
-   }
 
-   snprintf(res, sizeof(res), "%s", msg);
 
-   free(msg); 
 
-   return(res);
 
- oom:
 
-   snprintf(res, sizeof(res), "%s", "Error: asprintf: out of memory");
 
-   return(res);
 
- }
 
- const char * cfr_compressor_str(CFRFILE *stream) {
 
-   // Returns the name of the compressor used
 
-   if ((stream->format < 0) || (stream->format >= CFR_NUM_FORMATS)) {
 
-     return("undefined compression type");
 
-   } else {
 
-     return (cfr_formats[stream->format]);
 
-   }
 
- }
 
- // Utility functions for compressor errors. 
 
- // * Not part of the API, do not call directly as they may change! *
 
- const char * _cfr_compressor_strerror(int format, int err) {
 
-   // Transforms error code to string for all compressors
 
-   
 
-   switch (format) {
 
-   case 0: 
 
-     return("file not open");
 
-     break;
 
-   case 1:
 
-     return("file not compressed");
 
-     break;
 
-     
 
-   case 2:
 
-     return(_bz2_strerror(err));
 
-     break;
 
-   case 3:
 
-     return NULL;
 
-     break;
 
-   default:
 
-     return("unknowen compressor code");
 
-   }  
 
- }
 
-     
 
- const char * _bz2_strerror(int err) {
 
-   // Since bzlib does not have strerror, we do it here manually.
 
-   // This works for version 1.0 of 21 March 2000 of bzlib.h
 
-   
 
-   switch (err) {
 
-   case BZ_OK: return("BZ_OK");
 
-   case BZ_RUN_OK: return("BZ_RUN_OK");
 
-   case BZ_FLUSH_OK: return("BZ_FLUSH_OK");
 
-   case BZ_FINISH_OK: return("BZ_FINISH_OK");
 
-   case BZ_STREAM_END: return("BZ_STREAM_END");
 
-   case BZ_SEQUENCE_ERROR: return("BZ_SEQUENCE_ERROR");
 
-   case BZ_PARAM_ERROR: return("BZ_PARAM_ERROR");
 
-   case BZ_MEM_ERROR: return("BZ_MEM_ERROR");
 
-   case BZ_DATA_ERROR: return("BZ_DATA_ERROR");
 
-   case BZ_DATA_ERROR_MAGIC: return("BZ_DATA_ERROR_MAGIC");
 
-   case BZ_IO_ERROR: return("BZ_IO_ERROR");
 
-   case BZ_UNEXPECTED_EOF: return("BZ_UNEXPECTED_EOF");
 
-   case BZ_OUTBUFF_FULL: return("BZ_OUTBUFF_FULL");
 
-   case BZ_CONFIG_ERROR: return("BZ_CONFIG_ERROR");
 
-   default: return("unknowen bzip2 error code");
 
-   }
 
- }
 
-     
 
 
  |