Skip to content

Commit

Permalink
Add streaming filter API
Browse files Browse the repository at this point in the history
This introduces an API to plug custom filters to an input stream.

The caller gets get_stream_filter("path") to obtain an appropriate
filter for the path, and then uses it when opening an input stream
via open_istream().  After that, the caller can read from the stream
with read_istream(), and close it with close_istream(), just like an
unfiltered stream.

This only adds a "null" filter that is a pass-thru filter, but later
changes can add LF-to-CRLF and other filters, and the callers of the
streaming API do not have to change.

Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
gitster committed May 26, 2011
1 parent d1bf0e0 commit b669109
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 16 deletions.
84 changes: 77 additions & 7 deletions convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -814,12 +814,69 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
return ret | convert_to_git(path, src, len, dst, 0);
}

/*****************************************************************
*
* Streaming converison support
*
*****************************************************************/

typedef int (*filter_fn)(struct stream_filter *,
const char *input, size_t *isize_p,
char *output, size_t *osize_p);
typedef void (*free_fn)(struct stream_filter *);

struct stream_filter_vtbl {
filter_fn filter;
free_fn free;
};

struct stream_filter {
struct stream_filter_vtbl *vtbl;
};

static int null_filter_fn(struct stream_filter *filter,
const char *input, size_t *isize_p,
char *output, size_t *osize_p)
{
size_t count = *isize_p;
if (*osize_p < count)
count = *osize_p;
if (count) {
memmove(output, input, count);
*isize_p -= count;
*osize_p -= count;
}
return 0;
}

static void null_free_fn(struct stream_filter *filter)
{
; /* nothing -- null instances are shared */
}

static struct stream_filter_vtbl null_vtbl = {
null_filter_fn,
null_free_fn,
};

static struct stream_filter null_filter_singleton = {
&null_vtbl,
};

int is_null_stream_filter(struct stream_filter *filter)
{
return filter == &null_filter_singleton;
}

/*
* You would be crazy to set CRLF, smuge/clean or ident to
* a large binary blob you would want us not to slurp into
* the memory!
* Return an appropriately constructed filter for the path, or NULL if
* the contents cannot be filtered without reading the whole thing
* in-core.
*
* Note that you would be crazy to set CRLF, smuge/clean or ident to a
* large binary blob you would want us not to slurp into the memory!
*/
int can_bypass_conversion(const char *path)
struct stream_filter *get_stream_filter(const char *path, const unsigned char *sha1)
{
struct conv_attrs ca;
enum crlf_action crlf_action;
Expand All @@ -828,11 +885,24 @@ int can_bypass_conversion(const char *path)

if (ca.ident ||
(ca.drv && (ca.drv->smudge || ca.drv->clean)))
return 0;
return NULL;

crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) ||
(crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
return 1;
return 0;
return &null_filter_singleton;

return NULL;
}

void free_stream_filter(struct stream_filter *filter)
{
filter->vtbl->free(filter);
}

int stream_filter(struct stream_filter *filter,
const char *input, size_t *isize_p,
char *output, size_t *osize_p)
{
return filter->vtbl->filter(filter, input, isize_p, output, osize_p);
}
23 changes: 22 additions & 1 deletion convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,26 @@ extern int convert_to_working_tree(const char *path, const char *src,
size_t len, struct strbuf *dst);
extern int renormalize_buffer(const char *path, const char *src, size_t len,
struct strbuf *dst);
extern int can_bypass_conversion(const char *path);

/*****************************************************************
*
* Streaming converison support
*
*****************************************************************/

struct stream_filter; /* opaque */

extern struct stream_filter *get_stream_filter(const char *path, const unsigned char *);
extern void free_stream_filter(struct stream_filter *);
extern int is_null_stream_filter(struct stream_filter *);

/*
* Use as much input up to *isize_p and fill output up to *osize_p;
* update isize_p and osize_p to indicate how much buffer space was
* consumed and filled. Return 0 on success, non-zero on error.
*/
extern int stream_filter(struct stream_filter *,
const char *input, size_t *isize_p,
char *output, size_t *osize_p);

#endif /* CONVERT_H */
16 changes: 10 additions & 6 deletions entry.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ static int fstat_output(int fd, const struct checkout *state, struct stat *st)
}

static int streaming_write_entry(struct cache_entry *ce, char *path,
struct stream_filter *filter,
const struct checkout *state, int to_tempfile,
int *fstat_done, struct stat *statbuf)
{
Expand All @@ -126,7 +127,7 @@ static int streaming_write_entry(struct cache_entry *ce, char *path,
ssize_t kept = 0;
int fd = -1;

st = open_istream(ce->sha1, &type, &sz);
st = open_istream(ce->sha1, &type, &sz, filter);
if (!st)
return -1;
if (type != OBJ_BLOB)
Expand Down Expand Up @@ -186,11 +187,14 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
size_t wrote, newsize = 0;
struct stat st;

if ((ce_mode_s_ifmt == S_IFREG) &&
can_bypass_conversion(path) &&
!streaming_write_entry(ce, path, state, to_tempfile,
&fstat_done, &st))
goto finish;
if (ce_mode_s_ifmt == S_IFREG) {
struct stream_filter *filter = get_stream_filter(path, ce->sha1);
if (filter &&
!streaming_write_entry(ce, path, filter,
state, to_tempfile,
&fstat_done, &st))
goto finish;
}

switch (ce_mode_s_ifmt) {
case S_IFREG:
Expand Down
100 changes: 99 additions & 1 deletion streaming.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,27 @@ struct stream_vtbl {
static open_method_decl(incore);
static open_method_decl(loose);
static open_method_decl(pack_non_delta);
static struct git_istream *attach_stream_filter(struct git_istream *st,
struct stream_filter *filter);


static open_istream_fn open_istream_tbl[] = {
open_istream_incore,
open_istream_loose,
open_istream_pack_non_delta,
};

#define FILTER_BUFFER (1024*16)

struct filtered_istream {
struct git_istream *upstream;
struct stream_filter *filter;
char ibuf[FILTER_BUFFER];
char obuf[FILTER_BUFFER];
int i_end, i_ptr;
int o_end, o_ptr;
};

struct git_istream {
const struct stream_vtbl *vtbl;
unsigned long size; /* inflated size of full object */
Expand All @@ -72,6 +86,8 @@ struct git_istream {
struct packed_git *pack;
off_t pos;
} in_pack;

struct filtered_istream filtered;
} u;
};

Expand Down Expand Up @@ -112,7 +128,8 @@ static enum input_source istream_source(const unsigned char *sha1,

struct git_istream *open_istream(const unsigned char *sha1,
enum object_type *type,
unsigned long *size)
unsigned long *size,
struct stream_filter *filter)
{
struct git_istream *st;
struct object_info oi;
Expand All @@ -129,6 +146,14 @@ struct git_istream *open_istream(const unsigned char *sha1,
return NULL;
}
}
if (st && filter) {
/* Add "&& !is_null_stream_filter(filter)" for performance */
struct git_istream *nst = attach_stream_filter(st, filter);
if (!nst)
close_istream(st);
st = nst;
}

*size = st->size;
return st;
}
Expand All @@ -147,6 +172,79 @@ static void close_deflated_stream(struct git_istream *st)
}


/*****************************************************************
*
* Filtered stream
*
*****************************************************************/

static close_method_decl(filtered)
{
free_stream_filter(st->u.filtered.filter);
return close_istream(st->u.filtered.upstream);
}

static read_method_decl(filtered)
{
struct filtered_istream *fs = &(st->u.filtered);
size_t filled = 0;

while (sz) {
/* do we already have filtered output? */
if (fs->o_ptr < fs->o_end) {
size_t to_move = fs->o_end - fs->o_ptr;
if (sz < to_move)
to_move = sz;
memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
fs->o_ptr += to_move;
sz -= to_move;
filled += to_move;
continue;
}
fs->o_end = fs->o_ptr = 0;

/* do we have anything to feed the filter with? */
if (fs->i_ptr < fs->i_end) {
size_t to_feed = fs->i_end - fs->i_ptr;
size_t to_receive = FILTER_BUFFER;
if (stream_filter(fs->filter,
fs->ibuf + fs->i_ptr, &to_feed,
fs->obuf, &to_receive))
return -1;
fs->i_ptr = fs->i_end - to_feed;
fs->o_end = FILTER_BUFFER - to_receive;
continue;
}
fs->i_end = fs->i_ptr = 0;

/* refill the input from the upstream */
fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
if (fs->i_end <= 0)
break;
}
return filled;
}

static struct stream_vtbl filtered_vtbl = {
close_istream_filtered,
read_istream_filtered,
};

static struct git_istream *attach_stream_filter(struct git_istream *st,
struct stream_filter *filter)
{
struct git_istream *ifs = xmalloc(sizeof(*ifs));
struct filtered_istream *fs = &(ifs->u.filtered);

ifs->vtbl = &filtered_vtbl;
fs->upstream = st;
fs->filter = filter;
fs->i_end = fs->i_ptr = 0;
fs->o_end = fs->o_ptr = 0;
ifs->size = -1; /* unknown */
return ifs;
}

/*****************************************************************
*
* Loose object stream
Expand Down
2 changes: 1 addition & 1 deletion streaming.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
/* opaque */
struct git_istream;

extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *);
extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *);
extern int close_istream(struct git_istream *);
extern ssize_t read_istream(struct git_istream *, char *, size_t);

Expand Down

0 comments on commit b669109

Please sign in to comment.