Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .ci/create_test_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
set -e
set -x

pip install --upgrade pip

envdir="$1"
thisdir=$(cd $(dirname "$0") && pwd)

Expand Down
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
# `indexed_gzip` changelog


## 1.8.0 (Under development)


* Changes to allow linking against
[zlib-ng](https://github.com/zlib-ng/zlib-ng/) (#107).
* Simplified C interface `cindexed_gzip`, and machinery for compiling as a
static or shared C library (#107).


## 1.7.1 (March 31st 2023)


* Small change to the `IndexedGzipFile` class so that it accepts file-likes
which do not implement `fileno()` (#118).



## 1.7.0 (September 12th 2022)


Expand Down
63 changes: 63 additions & 0 deletions cindexed_gzip/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Build the cindexed_gzip C library.
#
# The library can be built in one of the following ways:
# - Linked against the system zlib installation (default)
# - Linked against a zlib build at $ZLIB_HOME
# - Linked against a zlib-ng build at $ZLIB_NG_HOME

all: libcindexed_gzip.so libcindexed_gzip_static.a

AR ?= ar
CC ?= gcc
PYTHON ?= python
OBJFILES = zran.o zran_file_util.o cindexed_gzip.o
PYTHON_INCLUDE_DIRECTORY = $(shell ${PYTHON} -c "from sysconfig import get_paths; print(get_paths()['include'])")
CFLAGS += -I${PYTHON_INCLUDE_DIRECTORY} -I..
INDEXED_GZIP_VERSION = $(shell cat ../indexed_gzip/__init__.py | grep __version__ | cut -d ' ' -f 3 | tr -d "'")

# link against built zlib
ifdef ZLIB_HOME
LDFLAGS += -L${ZLIB_HOME} -lz
CFLAGS += -I${ZLIB_HOME}
# link against built zlib-ng
else ifdef ZLIB_NG_HOME
LDFLAGS += -L${ZLIB_NG_HOME} -lz-ng
CFLAGS += -I${ZLIB_NG_HOME} -DZRAN_USE_ZLIB_NG=1
# link against system zlib
else
LDFLAGS += -lz
endif


clean:
rm -f *.o *.so *.a cindexed_gzip.h


cindexed_gzip.h: cindexed_gzip.h.in
sed -e "s/@INDEXED_GZIP_VERSION@/${INDEXED_GZIP_VERSION}/" $< > $@


%.o: %.c cindexed_gzip.h
${CC} ${CFLAGS} -c -o $@ $<


libcindexed_gzip.so: ${OBJFILES}
${CC} ${CFLAGS} -shared -o $@ $^ ${LDFLAGS}


libcindexed_gzip_static.a: ${OBJFILES}
${AR} -r $@ $^


install:
ifndef PREFIX
@echo "PREFIX not set - aborting"
exit 1
endif
mkdir -p ${PREFIX}/lib
mkdir -p ${PREFIX}/include/cindexed_gzip
cp libcindexed_gzip.so ${PREFIX}/lib/
cp libcindexed_gzip_static.a ${PREFIX}/lib/
cp cindexed_gzip.h ${PREFIX}/include/cindexed_gzip/
cp ccindexed_gzip.h ${PREFIX}/include/cindexed_gzip/
cp zran.h ${PREFIX}/include/cindexed_gzip/
8 changes: 8 additions & 0 deletions cindexed_gzip/ccindexed_gzip.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef CCINDEXED_GZIP_H
#define CCINDEXED_GZIP_H

namespace cindexed_gzip {
#include "cindexed_gzip/cindexed_gzip.h"
}

#endif
178 changes: 178 additions & 0 deletions cindexed_gzip/cindexed_gzip.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/*
* Simplified C interface for reading GZIP files with indexed_gzip.
*
* Both GZIP and other files may be loaded via this interface; GZIP files will
* be read via the zran module, and other files will read normally.
*/
#include <string.h>
#include <stdint.h>

#include "cindexed_gzip/zran.h"
#include "cindexed_gzip/cindexed_gzip.h"


/*
* igz_file struct, for reading from a GZIP or other file. File handle is
* opened/closed on-demand.
*/
struct _igz_file {
zran_index_t index;
int compressed;
char *filepath;
};


/*
* Returns 1 if the given file is a GZIP file, 0 if it is not, -1 if an error
* occurs.
*
* https://www.rfc-editor.org/rfc/rfc1952#section-2.3.1
*/

static int is_gzip_file(FILE *f) {

uint8_t magic[2];

if (fseek(f, 0, SEEK_SET) != 0) { goto fail; }
if (fread(magic, 1, 2, f) != 2) { goto fail; }
if (fseek(f, 0, SEEK_SET) != 0) { goto fail; }

if (magic[0] == 0x1f && magic[1] == 0x8b) { return 1; }
else { return 0; }
fail:
return -1;
};


/*
* Returns the size of the given file.
*/
static uint64_t file_size(FILE *f) {

size_t s;
if (fseek(f, 0, SEEK_END) != 0) { goto fail; }

s = ftello(f);

if (s < 0) { goto fail; }
if (fseek(f, 0, SEEK_SET) != 0) { goto fail; }

return (uint64_t)s;
fail:
return -1;
}


/*
* Open a file for reading. GZIP files will be loaded via the zran module.
* Other files will be read normally (via fseek/fread). The igz_file must be
* passed to igz_close when it is no longer needed.
*/
igz_file * igz_open(const char *filepath) {

igz_file *gzf = NULL;
FILE *f = NULL;
size_t namelen = 0;
off_t size = 0;

f = fopen(filepath, "rb");
if (f == NULL) {
goto fail;
}

gzf = calloc(1, sizeof(igz_file));
if (gzf == NULL) {
goto fail;
}

namelen = strlen(filepath);
gzf->filepath = malloc(namelen + 1);
strcpy(gzf->filepath, filepath);

gzf->compressed = is_gzip_file(f);
if (gzf->compressed == -1) {
goto fail;
}

if (gzf->compressed) {
if (zran_init(&(gzf->index), f, NULL, 4194304,
32768, 1048576, ZRAN_AUTO_BUILD) != 0) {
goto fail;
}
gzf->index.fd = NULL;
}

fclose(f);

return gzf;

fail:
if (gzf != NULL) { free(gzf); }
if (f != NULL) { fclose(f); }
return NULL;
}


/*
* Free resources associated with the igz_file. Must only be called once for
* a given igz_file.
*/
int igz_close(igz_file *gzf) {
if (gzf->compressed) {
zran_free(&(gzf->index));
}
free(gzf->filepath);
free(gzf);
}

/*
* Read up to len bytes from the given igz_file, starting from off. The bytes
* are copied into buf.
*
* Returns th number of bytes that were read, 0 if off == EOF, or a negative
* value if an error occurred.
*/
int64_t igz_read(igz_file *gzf, void *buf, uint64_t len, uint64_t off) {

FILE *f = NULL;
int64_t bytes_read = 0;

if (len > INT64_MAX) {
goto fail;
}

f = fopen(gzf->filepath, "rb");
if (f == NULL) {
goto fail;
}

if (gzf->compressed) {
gzf->index.fd = f;
if (zran_seek(&(gzf->index), off, SEEK_SET, NULL) != ZRAN_SEEK_OK) {
goto fail;
}

bytes_read = zran_read(&(gzf->index), buf, len);
if (bytes_read == ZRAN_READ_EOF) {
goto fail;
}
}
else {
if (fseek(f, off, SEEK_SET) != 0) {
goto fail;
}
bytes_read = fread(buf, 1, len, f);
}

fclose(f);

return bytes_read;

fail:
if (f != NULL) {
fclose(f);
}
gzf->index.fd = NULL;

return -1;
}
60 changes: 60 additions & 0 deletions cindexed_gzip/cindexed_gzip.h.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Simplified C interface for reading GZIP files with indexed_gzip.
*
* Both GZIP and other files may be loaded via this interface; GZIP files will
* be read via the zran module, and other files will read normally.
*/
#ifndef __CINDEXED_GZIP_H__
#define __CINDEXED_GZIP_H__

#ifdef __cplusplus
extern "C" {
#endif

#include <stdint.h>

#include "zran.h"

/*
* Version string, defined in indexed_gzip/__init__.py (see
* cindexed_gzip/Makefile).
*/
#define CINDEXED_GZIP_VERSION "@INDEXED_GZIP_VERSION@"


/*
* Struct representing a file which has been loaded via igz_open.
*/
struct _igz_file;
typedef struct _igz_file igz_file;


/*
* Open a file for reading. GZIP files will be loaded via the zran module.
* Other files will be read normally (via fseek/fread). The igz_file must be
* passed to igz_close when it is no longer needed.
*/
igz_file * igz_open(const char *filepath);


/*
* Free resources associated with the igz_file. Must only be called once for
* a given igz_file.
*/
int igz_close(igz_file *gzf);


/*
* Read up to len bytes from the given igz_file, starting from off. The bytes
* are copied into buf.
*
* Returns th number of bytes that were read, 0 if off == EOF, or a negative
* value if an error occurred.
*/
int64_t igz_read(igz_file *gzf, void *buf, uint64_t len, uint64_t off);

#ifdef __cplusplus
}
#endif

#endif /* __CINDEXED_GZIP_H__ */
Loading