123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541 |
- /**
- cfile_tools.c
-
- A library to deal transparently with possibly compressed files.
- Documentation in the function headers and in cfile_tools.h
- Copyright (C) 2004 by Arno Wagner <arno.wagner@acm.org>
- Distributed under the Gnu Public License version 2 or the modified
- BSD license (see file COPYING)
- Support for gzip added by Bernhard Tellenbach <bernhard.tellenbach@gmail.com>
- */
- #define _GNU_SOURCE
- #define _FILE_OFFSET_BITS 64
- //#ifndef DONT_HAVE_BZ2
- //#include <bzlib.h>
- //#endif
- #include <stdlib.h>
- #include <string.h>
- #include <errno.h>
- #include <assert.h>
- #include "cfile_tools.h"
- // Concrete formats. remember to adjust CFR_NUM_FORMATS if changed!
- // Note: 0, 1 are special entries.
- const char * cfr_formats[CFR_NUM_FORMATS] = {
- "not open", // 0
- "uncompressed", // 1
- "bzip2", // 2
- "gzip", // 3
- };
- const char * cfr_extensions[CFR_NUM_FORMATS] = {
- "", // 0
- "", // 1
- ".bz2", // 2
- ".gz" // 3
- };
- // Prototypes of non API functions (don't use these from outside this file)
- const char * _cfr_compressor_strerror(int format, int err);
- const char * _bz2_strerror(int err);
- // API Functions
- CFRFILE *cfr_open(const char *path) {
- /*******************************/
- // Analog to 'fopen'. Error in result has to be tested using
- // 'cfr_error' on the result!
- // Note: The user needs to free the reurn value!
- // Opens a possibly compressed file for reading.
- // File type is determined by file name ending
- int format, ext_len, name_len;
- CFRFILE * retval = NULL;
- // determine file format
- name_len = strlen(path);
- format = 2; // skip specials 0, 1
- // Do action dependent on file format
- retval = (CFRFILE *) calloc(1,sizeof(CFRFILE));
- if(retval == NULL)
- return (NULL);
- retval->eof = 0;
- retval->error1 = 0;
- retval->error2 = 0;
- if((path == NULL) || (strcmp(path, "-") == 0)) {
- /* dump from stdin */
- gzFile f;
- while (format < CFR_NUM_FORMATS) {
- if (strcmp(cfr_extensions[format], ".gz") == 0)
- break;
- format ++;
- }
- f = gzdopen(0, "r");
- if(f == NULL) {
- free(retval);
- return (NULL);
- }
- retval->data2 = f;
- retval->format = format;
- return (retval);
- }
- while (format < CFR_NUM_FORMATS) {
- ext_len = strlen(cfr_extensions[format]);
- if (strncmp(cfr_extensions[format],
- path+(name_len-ext_len),
- ext_len) == 0
- ) break;
- format ++;
- }
- if (format >= CFR_NUM_FORMATS)
- format = 1; // uncompressed
- retval->format = format;
- switch (format) {
- case 1: // uncompressed
- {
- FILE * in;
- in = fopen(path,"r");
- if (in == NULL) {
- free(retval);
- return(NULL);
- }
- retval->data1 = in;
- return(retval);
- }
- break;
- case 2: // bzip2
- {
- int bzerror;
- BZFILE * bzin;
- FILE * in;
-
- retval->bz2_stream_end = 0;
-
- // get file
- in = fopen(path,"r");
- if (in == NULL) {
- free(retval);
- return(NULL);
- }
- retval->data1 = in;
-
- // bzip2ify file
- bzin = BZ2_bzReadOpen( &bzerror, in, 0, 0, NULL, 0);
- if (bzerror != BZ_OK) {
- errno = bzerror;
- BZ2_bzReadClose( &bzerror, bzin);
- fclose(in);
- free(retval);
- return(NULL);
- }
- retval->data2 = bzin;
- return(retval);
- }
- break;
- case 3: // gzip
- {
- gzFile f;
- // get file
- f = gzopen(path, "r");
- if(f == NULL) {
- free(retval);
- return (NULL);
- }
- retval->data2 = f;
- return (retval);
- }
- break;
- default: // this is an internal error, no diag yet.
- fprintf(stderr,"illegal format '%d' in cfr_open!\n", format);
- exit(1);
- }
- return NULL;
- }
- int cfr_close(CFRFILE *stream) {
- /**************************/
- // Analog to 'fclose'.
- // FIXME - why is stream->* set, then freed?
- if (stream == NULL || stream->closed) {
- errno = EBADF;
- return -1;
- }
-
- int retval = -1;
-
- switch (stream->format) {
- case 1: // uncompressed
- retval = fclose((FILE *)(stream->data1));
- stream->error1 = retval;
- break;
- case 2: // bzip2
- BZ2_bzReadClose( &stream->error2, (BZFILE *)stream->data2);
- stream->error1 = retval = fclose((FILE *)(stream->data1));
- break;
- case 3: // gzip
- if(stream->data2!=NULL)
- retval = gzclose(stream->data2);
- stream->error2 = retval;
- break;
- default: // internal error
- assert("illegal stream->format" && 0);
- }
- free(stream);
- return(retval);
- }
- size_t cfr_read_n(CFRFILE *stream, void *ptr, size_t bytes) {
- /******************************************************************/
- // Wrapper, will return either 'bytes' (the number of bytes to read) or 0
- return(cfr_read(ptr, bytes, 1, stream)*bytes);
- }
- size_t cfr_read(void *ptr, size_t size, size_t nmemb, CFRFILE *stream) {
- /******************************************************************/
- // Analog to 'fread'. Will not return with partial elements, only
- // full ones. Hence calling this function with one large element
- // size will result in a complete or no read.
-
- size_t retval = 0;
- if (stream == NULL) return(0);
- // shortcut
- if (stream->eof) return(0);
- switch (stream->format) {
- case 1: // uncompressed
- {
- FILE * in;
- in = (FILE *)(stream->data1);
- retval = fread(ptr, size, nmemb, in);
- if (retval != nmemb) {
- // fprintf(stderr,"short read!!!\n");
- stream->eof = feof(in);
- stream->error1 = ferror(in);
- retval = 0;
- }
- return (retval);
- }
- break;
- case 2: // bzip2
- {
- BZFILE * bzin;
- int bzerror;
- int buffsize;
- if (stream->bz2_stream_end == 1) {
- // feof-behaviour: Last read did consume last byte but not more
- stream->eof = 1;
- return(0);
- }
- bzerror = BZ_OK;
- bzin = (BZFILE *) (stream->data2);
- buffsize = size * nmemb;
- retval = BZ2_bzRead(&bzerror, bzin, ptr, buffsize);
- if (bzerror == BZ_STREAM_END ) {
- stream->bz2_stream_end = 1;
- stream->error2 = bzerror;
- if (retval == buffsize) {
- // feof-behaviour: no eof yet
- } else {
- // feof-behaviour: read past end, set eof
- stream->eof = 1;
- retval = 0;
- }
- return(retval/size);
- }
- if (bzerror == BZ_OK) {
- // Normal case, no error.
- // A short read here is an error, so catch it
- if (retval == buffsize) {
- return(retval/size);
- }
- }
- // Other error...
- stream->error2 = bzerror;
- BZ2_bzReadClose( &bzerror, bzin );
- if (bzerror != BZ_OK) {
- stream->error2 = bzerror;
- }
- retval = fclose((FILE *)(stream->data1));
- stream->error1 = retval;
- stream->closed = 1;
- return(0);
- }
- break;
- case 3: // gzip
- {
- gzFile in;
- in = (gzFile)(stream->data2);
- retval = gzread(in, ptr, size*nmemb);
- if (retval != nmemb*size) {
- // fprintf(stderr,"short read!!!\n");
- stream->eof = gzeof(in);
- stream->error2 = errno;
- retval = 0;
- }
- return (retval/size);
- }
- break;
- default: // this is an internal error, no diag yet.
- fprintf(stderr,"illegal format '%d' in cfr_read!\n",stream->format);
- exit(1);
- }
- }
- ssize_t cfr_getline(char **lineptr, size_t *n, CFRFILE *stream) {
- /************************************************************/
- // May not be very efficient, since it uses single-char reads
- // for formats where there is no native getline in the library.
- // For bzip2 the speedup for additional buffering was only 5%
- // so I dropped it.
- // Returns -1 in case of an error.
- char *tmp;
- if (stream == NULL) return(-1);
- switch (stream->format) {
- case 1: // uncompressed
- {
- if (fgets(*lineptr, *n, (FILE *)(stream->data1)) == NULL) {
- stream->error1 = errno;
- return -1;
- }
- return 0;
- }
- break;
- case 2: // bzip2
- {
- size_t count;
- char c;
- size_t ret;
- //bzin = (BZFILE *) (stream->data2);
- // allocate initial buffer if none was passed or size was zero
- if (*lineptr == NULL) {
- *lineptr = (char *) calloc(120, 1);
- if(*lineptr == NULL) {
- stream->error1 = errno;
- return(-1);
- }
- *n = 120;
- }
- if (*n == 0) {
- *n = 120;
- tmp = (char *) realloc(*lineptr, *n); // to avoid memory-leaks
- if(tmp == NULL) {
- stream->error1 = errno;
- return(-1);
- }
- *lineptr = tmp;
- }
- count = 0;
- // read until '\n'
- do {
- ret = cfr_read(&c, 1, 1, stream);
- if (ret != 1) {
- return(-1);
- }
- count ++;
- if (count >= *n) {
- *n = 2 * *n;
- tmp = (char *) realloc(*lineptr, *n);
- if (tmp == NULL) {
- stream->error1 = errno;
- return(-1);
- }
- *lineptr = tmp;
- }
- (*lineptr)[count-1] = c;
- } while (c != '\n');
- (*lineptr)[count] = 0;
- return(count);
- }
- break;
- case 3: // gzip
- {
- char * return_ptr = gzgets((gzFile)(stream->data2), *lineptr, *n );
- if (return_ptr == Z_NULL) {
- stream->error2 = errno;
- return(-1);
- }
- return *n;
-
- }
- break;
- default: // this is an internal error, no diag yet.
- fprintf(stderr,"illegal format '%d' in cfr_getline!\n",stream->format);
- exit(1);
- return(-1);
- }
- }
- int cfr_eof(CFRFILE *stream) {
- // Returns true on end of file/end of compressed data.
- // The end of the compressed data is regarded as end of file
- // in this library, embedded or multiple compressed data per
- // file is not supported by this library.
- //
- // Note: The sematics is that cfr_eof is true only after
- // the first byte after the end of file was read. Some compressors
- // report EOF already when the last availale character has been
- // read (far more sensible IMO), but for consistency we follow the
- // convention of the standard c library here.
- return(stream->eof);
- }
- int cfr_error(CFRFILE *stream) {
- // Returns true on error.
- // Errors can be ordinary errors from fopen.fclose/fread
- // or can originate from the underlying compression.
- // This function just returns 0 when there is no error or
- // 1 in case of error.
- // To get a more detailed report cfr_strerror will try to
- // come up with a description of the whole situation.
- // For numeric details, more query functions would need to be
- // implemented.
-
- if (stream == NULL) return(1);
- return(stream->error1 || stream->error2);
- }
- char * cfr_strerror(CFRFILE *stream) {
- // Result is "stream-i/o: <stream-error> <compressor>[: <compressor error>]"
- // Do not modify result.
- // Result may change on subsequent call to this function.
- static char res[120];
- int ret;
- char * msg, * msg2;
- if (stream == NULL) {
- snprintf(res, sizeof(res), "%s", "Error: stream is NULL, i.e. not opened");
- return(res);
- }
- ret = asprintf(&msg,
- "stream-i/o: %s, %s [%s]",
- stream->eof?"EOF":"",
- strerror(stream->error1),
- cfr_compressor_str(stream));
- if (ret == -1)
- goto oom;
- if (stream->format == 2) {
- ret = asprintf(&msg2,
- "%s: %s",
- msg,
- _cfr_compressor_strerror(stream->format, stream->error2));
- free(msg);
- if (ret == -1)
- goto oom;
- msg = msg2;
- }
- if (stream->format == 3) {
- ret = asprintf(&msg2,
- "%s: %s",
- msg,
- gzerror((gzFile)(stream->data2), &(stream->error2)));
- free(msg);
- if (ret == -1)
- goto oom;
- msg = msg2;
- }
- snprintf(res, sizeof(res), "%s", msg);
- free(msg);
- return(res);
- oom:
- snprintf(res, sizeof(res), "%s", "Error: asprintf: out of memory");
- return(res);
- }
- const char * cfr_compressor_str(CFRFILE *stream) {
- // Returns the name of the compressor used
- if ((stream->format < 0) || (stream->format >= CFR_NUM_FORMATS)) {
- return("undefined compression type");
- } else {
- return (cfr_formats[stream->format]);
- }
- }
- // Utility functions for compressor errors.
- // * Not part of the API, do not call directly as they may change! *
- const char * _cfr_compressor_strerror(int format, int err) {
- // Transforms error code to string for all compressors
-
- switch (format) {
- case 0:
- return("file not open");
- break;
- case 1:
- return("file not compressed");
- break;
-
- case 2:
- return(_bz2_strerror(err));
- break;
- case 3:
- return NULL;
- break;
- default:
- return("unknowen compressor code");
- }
- }
-
- const char * _bz2_strerror(int err) {
- // Since bzlib does not have strerror, we do it here manually.
- // This works for version 1.0 of 21 March 2000 of bzlib.h
-
- switch (err) {
- case BZ_OK: return("BZ_OK");
- case BZ_RUN_OK: return("BZ_RUN_OK");
- case BZ_FLUSH_OK: return("BZ_FLUSH_OK");
- case BZ_FINISH_OK: return("BZ_FINISH_OK");
- case BZ_STREAM_END: return("BZ_STREAM_END");
- case BZ_SEQUENCE_ERROR: return("BZ_SEQUENCE_ERROR");
- case BZ_PARAM_ERROR: return("BZ_PARAM_ERROR");
- case BZ_MEM_ERROR: return("BZ_MEM_ERROR");
- case BZ_DATA_ERROR: return("BZ_DATA_ERROR");
- case BZ_DATA_ERROR_MAGIC: return("BZ_DATA_ERROR_MAGIC");
- case BZ_IO_ERROR: return("BZ_IO_ERROR");
- case BZ_UNEXPECTED_EOF: return("BZ_UNEXPECTED_EOF");
- case BZ_OUTBUFF_FULL: return("BZ_OUTBUFF_FULL");
- case BZ_CONFIG_ERROR: return("BZ_CONFIG_ERROR");
- default: return("unknowen bzip2 error code");
- }
- }
-
|