forked from hauke/wireshark
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfile_wrappers.c
1783 lines (1595 loc) · 51.4 KB
/
file_wrappers.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* file_wrappers.c
*
* Wiretap Library
* Copyright (c) 1998 by Gilbert Ramirez <[email protected]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
* Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
* under licence:
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#include "config.h"
#include <errno.h>
#include <string.h>
#include "wtap-int.h"
#include "file_wrappers.h"
#include <wsutil/ws_diag_control.h>
#include <wsutil/file_util.h>
#ifdef HAVE_LIBZ
#define ZLIB_CONST
#include <zlib.h>
#endif /* HAVE_LIBZ */
/*
* See RFC 1952 for a description of the gzip file format.
*
* Some other compressed file formats we might want to support:
*
* XZ format: http://tukaani.org/xz/
*
* Bzip2 format: http://bzip.org/
*/
/*
* List of extensions for compressed files.
* If we add support for more compressed file types, this table
* might be expanded to include routines to handle the various
* compression types.
*/
static const char *compressed_file_extensions[] = {
#ifdef HAVE_LIBZ
"gz",
#endif
NULL
};
/*
* Return a GSList of all the compressed file extensions.
* The data pointers all point to items in compressed_file_extensions[],
* so the GSList can just be freed with g_slist_free().
*/
GSList *
wtap_get_compressed_file_extensions(void)
{
const char **extension;
GSList *extensions;
extensions = NULL;
for (extension = &compressed_file_extensions[0]; *extension != NULL;
extension++)
extensions = g_slist_append(extensions, (gpointer)(*extension));
return extensions;
}
/* #define GZBUFSIZE 8192 */
#define GZBUFSIZE 4096
/* values for wtap_reader compression */
typedef enum {
UNKNOWN, /* unknown - look for a gzip header */
UNCOMPRESSED, /* uncompressed - copy input directly */
#ifdef HAVE_LIBZ
ZLIB, /* decompress a zlib stream */
GZIP_AFTER_HEADER
#endif
} compression_t;
struct wtap_reader {
int fd; /* file descriptor */
gint64 raw_pos; /* current position in file (just to not call lseek()) */
gint64 pos; /* current position in uncompressed data */
guint size; /* buffer size */
unsigned char *in; /* input buffer */
unsigned char *out; /* output buffer (double-sized when reading) */
unsigned char *next; /* next output data to deliver or write */
guint have; /* amount of output data unused at next */
gboolean eof; /* TRUE if end of input file reached */
gint64 start; /* where the gzip data started, for rewinding */
gint64 raw; /* where the raw data started, for seeking */
compression_t compression; /* type of compression, if any */
gboolean is_compressed; /* FALSE if completely uncompressed, TRUE otherwise */
/* seek request */
gint64 skip; /* amount to skip (already rewound if backwards) */
gboolean seek_pending; /* TRUE if seek request pending */
/* error information */
int err; /* error code */
const char *err_info; /* additional error information string for some errors */
guint avail_in; /* number of bytes available at next_in */
unsigned char *next_in; /* next input byte */
#ifdef HAVE_LIBZ
/* zlib inflate stream */
z_stream strm; /* stream structure in-place (not a pointer) */
gboolean dont_check_crc; /* TRUE if we aren't supposed to check the CRC */
#endif
/* fast seeking */
GPtrArray *fast_seek;
void *fast_seek_cur;
};
static int /* gz_load */
raw_read(FILE_T state, unsigned char *buf, unsigned int count, guint *have)
{
ssize_t ret;
*have = 0;
do {
ret = ws_read(state->fd, buf + *have, count - *have);
if (ret <= 0)
break;
*have += (unsigned)ret;
state->raw_pos += ret;
} while (*have < count);
if (ret < 0) {
state->err = errno;
state->err_info = NULL;
return -1;
}
if (ret == 0)
state->eof = TRUE;
return 0;
}
static int /* gz_avail */
fill_in_buffer(FILE_T state)
{
if (state->err)
return -1;
if (state->eof == 0) {
if (raw_read(state, state->in, state->size, &(state->avail_in)) == -1)
return -1;
state->next_in = state->in;
}
return 0;
}
#define ZLIB_WINSIZE 32768
struct fast_seek_point {
gint64 out; /* corresponding offset in uncompressed data */
gint64 in; /* offset in input file of first full byte */
compression_t compression;
union {
struct {
#ifdef HAVE_INFLATEPRIME
int bits; /* number of bits (1-7) from byte at in - 1, or 0 */
#endif
unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
/* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
guint32 adler;
guint32 total_out;
} zlib;
} data;
};
struct zlib_cur_seek_point {
unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
unsigned int pos;
unsigned int have;
};
#define SPAN G_GINT64_CONSTANT(1048576)
static struct fast_seek_point *
fast_seek_find(FILE_T file, gint64 pos)
{
struct fast_seek_point *smallest = NULL;
struct fast_seek_point *item;
guint low, i, max;
if (!file->fast_seek)
return NULL;
for (low = 0, max = file->fast_seek->len; low < max; ) {
i = (low + max) / 2;
item = (struct fast_seek_point *)file->fast_seek->pdata[i];
if (pos < item->out)
max = i;
else if (pos > item->out) {
smallest = item;
low = i + 1;
} else {
return item;
}
}
return smallest;
}
static void
fast_seek_header(FILE_T file, gint64 in_pos, gint64 out_pos,
compression_t compression)
{
struct fast_seek_point *item = NULL;
if (file->fast_seek->len != 0)
item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
if (!item || item->out < out_pos) {
struct fast_seek_point *val = g_new(struct fast_seek_point,1);
val->in = in_pos;
val->out = out_pos;
val->compression = compression;
g_ptr_array_add(file->fast_seek, val);
}
}
static void
fast_seek_reset(FILE_T state _U_)
{
#ifdef HAVE_LIBZ
if (state->compression == ZLIB && state->fast_seek_cur) {
struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
cur->have = 0;
}
#endif
}
#ifdef HAVE_LIBZ
/* Get next byte from input, or -1 if end or error.
*
* Note:
*
* 1) errors from raw_read(), and thus from fill_in_buffer(), are
* "sticky", and fill_in_buffer() won't do any reading if there's
* an error;
*
* 2) GZ_GETC() returns -1 on an EOF;
*
* so it's safe to make multiple GZ_GETC() calls and only check the
* last one for an error. */
#define GZ_GETC() ((state->avail_in == 0 && fill_in_buffer(state) == -1) ? -1 : \
(state->avail_in == 0 ? -1 : \
(state->avail_in--, *(state->next_in)++)))
/* Get a one-byte integer and return 0 on success and the value in *ret.
Otherwise -1 is returned, state->err is set, and *ret is not modified. */
static int
gz_next1(FILE_T state, guint8 *ret)
{
int ch;
ch = GZ_GETC();
if (ch == -1) {
if (state->err == 0) {
/* EOF */
state->err = WTAP_ERR_SHORT_READ;
state->err_info = NULL;
}
return -1;
}
*ret = ch;
return 0;
}
/* Get a two-byte little-endian integer and return 0 on success and the value
in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
modified. */
static int
gz_next2(FILE_T state, guint16 *ret)
{
guint16 val;
int ch;
val = GZ_GETC();
ch = GZ_GETC();
if (ch == -1) {
if (state->err == 0) {
/* EOF */
state->err = WTAP_ERR_SHORT_READ;
state->err_info = NULL;
}
return -1;
}
val += (guint16)ch << 8;
*ret = val;
return 0;
}
/* Get a four-byte little-endian integer and return 0 on success and the value
in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
modified. */
static int
gz_next4(FILE_T state, guint32 *ret)
{
guint32 val;
int ch;
val = GZ_GETC();
val += (unsigned)GZ_GETC() << 8;
val += (guint32)GZ_GETC() << 16;
ch = GZ_GETC();
if (ch == -1) {
if (state->err == 0) {
/* EOF */
state->err = WTAP_ERR_SHORT_READ;
state->err_info = NULL;
}
return -1;
}
val += (guint32)ch << 24;
*ret = val;
return 0;
}
/* Skip the specified number of bytes and return 0 on success. Otherwise -1
is returned. */
static int
gz_skipn(FILE_T state, size_t n)
{
while (n != 0) {
if (GZ_GETC() == -1) {
if (state->err == 0) {
/* EOF */
state->err = WTAP_ERR_SHORT_READ;
state->err_info = NULL;
}
return -1;
}
n--;
}
return 0;
}
/* Skip a null-terminated string and return 0 on success. Otherwise -1
is returned. */
static int
gz_skipzstr(FILE_T state)
{
int ch;
/* It's null-terminated, so scan until we read a byte with
the value 0 or get an error. */
while ((ch = GZ_GETC()) > 0)
;
if (ch == -1) {
if (state->err == 0) {
/* EOF */
state->err = WTAP_ERR_SHORT_READ;
state->err_info = NULL;
}
return -1;
}
return 0;
}
static void
zlib_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point, int bits, gint64 in_pos, gint64 out_pos)
{
/* it's for sure after gzip header, so file->fast_seek->len != 0 */
struct fast_seek_point *item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
#ifndef HAVE_INFLATEPRIME
if (bits)
return;
#endif
/* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
* Inserting value in middle of sorted array is expensive, so we want to add only in the end.
* It's not big deal, cause first-read don't usually invoke seeking
*/
if (item->out + SPAN < out_pos) {
struct fast_seek_point *val = g_new(struct fast_seek_point,1);
val->in = in_pos;
val->out = out_pos;
val->compression = ZLIB;
#ifdef HAVE_INFLATEPRIME
val->data.zlib.bits = bits;
#endif
if (point->pos != 0) {
unsigned int left = ZLIB_WINSIZE - point->pos;
memcpy(val->data.zlib.window, point->window + point->pos, left);
memcpy(val->data.zlib.window + left, point->window, point->pos);
} else
memcpy(val->data.zlib.window, point->window, ZLIB_WINSIZE);
/*
* XXX - strm.adler is a uLong in at least some versions
* of zlib, and uLong is an unsigned long in at least
* some of those versions, which means it's 64-bit
* on LP64 platforms, even though the checksum is
* 32-bit. We assume the actual Adler checksum
* is in the lower 32 bits of strm.adler; as the
* checksum in the file is only 32 bits, we save only
* those lower 32 bits, and cast away any additional
* bits to squelch warnings.
*
* The same applies to strm.total_out.
*/
val->data.zlib.adler = (guint32) file->strm.adler;
val->data.zlib.total_out = (guint32) file->strm.total_out;
g_ptr_array_add(file->fast_seek, val);
}
}
static void /* gz_decomp */
zlib_read(FILE_T state, unsigned char *buf, unsigned int count)
{
int ret = 0; /* XXX */
guint32 crc, len;
z_streamp strm = &(state->strm);
unsigned char *buf2 = buf;
unsigned int count2 = count;
strm->avail_out = count;
strm->next_out = buf;
/* fill output buffer up to end of deflate stream or error */
do {
/* get more input for inflate() */
if (state->avail_in == 0 && fill_in_buffer(state) == -1)
break;
if (state->avail_in == 0) {
/* EOF */
state->err = WTAP_ERR_SHORT_READ;
state->err_info = NULL;
break;
}
strm->avail_in = state->avail_in;
strm->next_in = state->next_in;
/* decompress and handle errors */
#ifdef Z_BLOCK
ret = inflate(strm, Z_BLOCK);
#else
ret = inflate(strm, Z_NO_FLUSH);
#endif
state->avail_in = strm->avail_in;
#ifdef z_const
DIAG_OFF(cast-qual)
state->next_in = (unsigned char *)strm->next_in;
DIAG_ON(cast-qual)
#else
state->next_in = strm->next_in;
#endif
if (ret == Z_STREAM_ERROR) {
state->err = WTAP_ERR_DECOMPRESS;
state->err_info = strm->msg;
break;
}
if (ret == Z_NEED_DICT) {
state->err = WTAP_ERR_DECOMPRESS;
state->err_info = "preset dictionary needed";
break;
}
if (ret == Z_MEM_ERROR) {
/* This means "not enough memory". */
state->err = ENOMEM;
state->err_info = NULL;
break;
}
if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
state->err = WTAP_ERR_DECOMPRESS;
state->err_info = strm->msg;
break;
}
/*
* XXX - Z_BUF_ERROR?
*/
strm->adler = crc32(strm->adler, buf2, count2 - strm->avail_out);
#ifdef Z_BLOCK
if (state->fast_seek_cur) {
struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
unsigned int ready = count2 - strm->avail_out;
if (ready < ZLIB_WINSIZE) {
guint left = ZLIB_WINSIZE - cur->pos;
if (ready >= left) {
memcpy(cur->window + cur->pos, buf2, left);
if (ready != left)
memcpy(cur->window, buf2 + left, ready - left);
cur->pos = ready - left;
cur->have += ready;
} else {
memcpy(cur->window + cur->pos, buf2, ready);
cur->pos += ready;
cur->have += ready;
}
if (cur->have >= ZLIB_WINSIZE)
cur->have = ZLIB_WINSIZE;
} else {
memcpy(cur->window, buf2 + (ready - ZLIB_WINSIZE), ZLIB_WINSIZE);
cur->pos = 0;
cur->have = ZLIB_WINSIZE;
}
if (cur->have >= ZLIB_WINSIZE && ret != Z_STREAM_END && (strm->data_type & 128) && !(strm->data_type & 64))
zlib_fast_seek_add(state, cur, (strm->data_type & 7), state->raw_pos - strm->avail_in, state->pos + (count - strm->avail_out));
}
#endif
buf2 = (buf2 + count2 - strm->avail_out);
count2 = strm->avail_out;
} while (strm->avail_out && ret != Z_STREAM_END);
/* update available output and crc check value */
state->next = buf;
state->have = count - strm->avail_out;
/* Check gzip trailer if at end of deflate stream.
We don't fail immediately here, we just set an error
indication, so that we try to process what data we
got before the error. The next attempt to read
something past that data will get the error. */
if (ret == Z_STREAM_END) {
if (gz_next4(state, &crc) != -1 &&
gz_next4(state, &len) != -1) {
if (crc != strm->adler && !state->dont_check_crc) {
state->err = WTAP_ERR_DECOMPRESS;
state->err_info = "bad CRC";
} else if (len != (strm->total_out & 0xffffffffUL)) {
state->err = WTAP_ERR_DECOMPRESS;
state->err_info = "length field wrong";
}
}
state->compression = UNKNOWN; /* ready for next stream, once have is 0 */
g_free(state->fast_seek_cur);
state->fast_seek_cur = NULL;
}
}
#endif
static int
gz_head(FILE_T state)
{
/* get some data in the input buffer */
if (state->avail_in == 0) {
if (fill_in_buffer(state) == -1)
return -1;
if (state->avail_in == 0)
return 0;
}
/* look for the gzip magic header bytes 31 and 139 */
#ifdef HAVE_LIBZ
if (state->next_in[0] == 31) {
state->avail_in--;
state->next_in++;
if (state->avail_in == 0 && fill_in_buffer(state) == -1)
return -1;
if (state->avail_in && state->next_in[0] == 139) {
guint8 cm;
guint8 flags;
guint16 len;
guint16 hcrc;
/* we have a gzip header, woo hoo! */
state->avail_in--;
state->next_in++;
/* read rest of header */
/* compression method (CM) */
if (gz_next1(state, &cm) == -1)
return -1;
if (cm != 8) {
state->err = WTAP_ERR_DECOMPRESS;
state->err_info = "unknown compression method";
return -1;
}
/* flags (FLG) */
if (gz_next1(state, &flags) == -1)
return -1;
if (flags & 0xe0) { /* reserved flag bits */
state->err = WTAP_ERR_DECOMPRESS;
state->err_info = "reserved flag bits set";
return -1;
}
/* modification time (MTIME) */
if (gz_skipn(state, 4) == -1)
return -1;
/* extra flags (XFL) */
if (gz_skipn(state, 1) == -1)
return -1;
/* operating system (OS) */
if (gz_skipn(state, 1) == -1)
return -1;
if (flags & 4) {
/* extra field - get XLEN */
if (gz_next2(state, &len) == -1)
return -1;
/* skip the extra field */
if (gz_skipn(state, len) == -1)
return -1;
}
if (flags & 8) {
/* file name */
if (gz_skipzstr(state) == -1)
return -1;
}
if (flags & 16) {
/* comment */
if (gz_skipzstr(state) == -1)
return -1;
}
if (flags & 2) {
/* header crc */
if (gz_next2(state, &hcrc) == -1)
return -1;
/* XXX - check the CRC? */
}
/* set up for decompression */
inflateReset(&(state->strm));
state->strm.adler = crc32(0L, Z_NULL, 0);
state->compression = ZLIB;
state->is_compressed = TRUE;
#ifdef Z_BLOCK
if (state->fast_seek) {
struct zlib_cur_seek_point *cur = g_new(struct zlib_cur_seek_point,1);
cur->pos = cur->have = 0;
g_free(state->fast_seek_cur);
state->fast_seek_cur = cur;
fast_seek_header(state, state->raw_pos - state->avail_in, state->pos, GZIP_AFTER_HEADER);
}
#endif
return 0;
}
else {
/* not a gzip file -- save first byte (31) and fall to raw i/o */
state->out[0] = 31;
state->have = 1;
}
}
#endif
#ifdef HAVE_LIBXZ
/* { 0xFD, '7', 'z', 'X', 'Z', 0x00 } */
/* FD 37 7A 58 5A 00 */
#endif
if (state->fast_seek)
fast_seek_header(state, state->raw_pos - state->avail_in - state->have, state->pos, UNCOMPRESSED);
/* doing raw i/o, save start of raw data for seeking, copy any leftover
input to output -- this assumes that the output buffer is larger than
the input buffer, which also assures space for gzungetc() */
state->raw = state->pos;
state->next = state->out;
if (state->avail_in) {
memcpy(state->next + state->have, state->next_in, state->avail_in);
state->have += state->avail_in;
state->avail_in = 0;
}
state->compression = UNCOMPRESSED;
return 0;
}
static int /* gz_make */
fill_out_buffer(FILE_T state)
{
if (state->compression == UNKNOWN) { /* look for gzip header */
if (gz_head(state) == -1)
return -1;
if (state->have) /* got some data from gz_head() */
return 0;
}
if (state->compression == UNCOMPRESSED) { /* straight copy */
if (raw_read(state, state->out, state->size /* << 1 */, &(state->have)) == -1)
return -1;
state->next = state->out;
}
#ifdef HAVE_LIBZ
else if (state->compression == ZLIB) { /* decompress */
zlib_read(state, state->out, state->size << 1);
}
#endif
return 0;
}
static int
gz_skip(FILE_T state, gint64 len)
{
guint n;
/* skip over len bytes or reach end-of-file, whichever comes first */
while (len)
if (state->have) {
/* We have stuff in the output buffer; skip over
it. */
n = (gint64)state->have > len ? (unsigned)len : state->have;
state->have -= n;
state->next += n;
state->pos += n;
len -= n;
} else if (state->err) {
/* We have nothing in the output buffer, and
we have an error that may not have been
reported yet; that means we can't generate
any more data into the output buffer, so
return an error indication. */
return -1;
} else if (state->eof && state->avail_in == 0) {
/* We have nothing in the output buffer, and
we're at the end of the input; just return. */
break;
} else {
/* We have nothing in the output buffer, and
we can generate more data; get more output,
looking for header if required. */
if (fill_out_buffer(state) == -1)
return -1;
}
return 0;
}
static void
gz_reset(FILE_T state)
{
state->have = 0; /* no output data available */
state->eof = FALSE; /* not at end of file */
state->compression = UNKNOWN; /* look for gzip header */
state->seek_pending = FALSE; /* no seek request pending */
state->err = 0; /* clear error */
state->err_info = NULL;
state->pos = 0; /* no uncompressed data yet */
state->avail_in = 0; /* no input data yet */
}
FILE_T
file_fdopen(int fd)
{
#ifdef _STATBUF_ST_BLKSIZE /* XXX, _STATBUF_ST_BLKSIZE portable? */
ws_statb64 st;
#endif
int want = GZBUFSIZE;
FILE_T state;
if (fd == -1)
return NULL;
/* allocate FILE_T structure to return */
state = (FILE_T)g_try_malloc(sizeof *state);
if (state == NULL)
return NULL;
state->fast_seek_cur = NULL;
state->fast_seek = NULL;
/* open the file with the appropriate mode (or just use fd) */
state->fd = fd;
/* we don't yet know whether it's compressed */
state->is_compressed = FALSE;
/* save the current position for rewinding (only if reading) */
state->start = ws_lseek64(state->fd, 0, SEEK_CUR);
if (state->start == -1) state->start = 0;
state->raw_pos = state->start;
/* initialize stream */
gz_reset(state);
#ifdef _STATBUF_ST_BLKSIZE
if (ws_fstat64(fd, &st) >= 0) {
/*
* Yes, st_blksize can be bigger than an int; apparently,
* it's a long on LP64 Linux, for example.
*
* If the value is too big to fit into an int, just
* use the default.
*/
if (st.st_blksize <= G_MAXINT)
want = (int)st.st_blksize;
/* XXX, verify result? */
}
#endif
/* allocate buffers */
state->in = (unsigned char *)g_try_malloc(want);
state->out = (unsigned char *)g_try_malloc(want << 1);
state->size = want;
if (state->in == NULL || state->out == NULL) {
g_free(state->out);
g_free(state->in);
g_free(state);
errno = ENOMEM;
return NULL;
}
#ifdef HAVE_LIBZ
/* allocate inflate memory */
state->strm.zalloc = Z_NULL;
state->strm.zfree = Z_NULL;
state->strm.opaque = Z_NULL;
state->strm.avail_in = 0;
state->strm.next_in = Z_NULL;
if (inflateInit2(&(state->strm), -15) != Z_OK) { /* raw inflate */
g_free(state->out);
g_free(state->in);
g_free(state);
errno = ENOMEM;
return NULL;
}
/* for now, assume we should check the crc */
state->dont_check_crc = FALSE;
#endif
/* return stream */
return state;
}
FILE_T
file_open(const char *path)
{
int fd;
FILE_T ft;
#ifdef HAVE_LIBZ
const char *suffixp;
#endif
/* open file and do correct filename conversions.
XXX - do we need O_LARGEFILE? On UN*X, if we need to do
something special to get large file support, the configure
script should have set us up with the appropriate #defines,
so we should be getting a large-file-enabled file descriptor
here. Pre-Large File Summit UN*Xes, and possibly even some
post-LFS UN*Xes, might require O_LARGEFILE here, though.
If so, we should probably handle that in ws_open(). */
if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
return NULL;
/* open file handle */
ft = file_fdopen(fd);
if (ft == NULL) {
ws_close(fd);
return NULL;
}
#ifdef HAVE_LIBZ
/*
* If this file's name ends in ".caz", it's probably a compressed
* Windows Sniffer file. The compression is gzip, but if we
* process the CRC as specified by RFC 1952, the computed CRC
* doesn't match the stored CRC.
*
* Compressed Windows Sniffer files don't all have the same CRC
* value; is it just random crap, or are they running the CRC on
* a different set of data than you're supposed to (e.g., not
* CRCing some of the data), or something such as that?
*
* For now, we just set a flag to ignore CRC errors.
*/
suffixp = strrchr(path, '.');
if (suffixp != NULL) {
if (g_ascii_strcasecmp(suffixp, ".caz") == 0)
ft->dont_check_crc = TRUE;
}
#endif
return ft;
}
void
file_set_random_access(FILE_T stream, gboolean random_flag _U_, GPtrArray *seek)
{
stream->fast_seek = seek;
}
gint64
file_seek(FILE_T file, gint64 offset, int whence, int *err)
{
struct fast_seek_point *here;
guint n;
if (whence != SEEK_SET && whence != SEEK_CUR && whence != SEEK_END) {
g_assert_not_reached();
/*
*err = EINVAL;
return -1;
*/
}
/* Normalize offset to a SEEK_CUR specification */
if (whence == SEEK_END) {
/* Try skip until end-of-file */
if (gz_skip(file, G_MAXINT64) == -1) {
*err = file->err;
return -1;
}
if (offset == 0) {
/* We are done */
return file->pos;
}
} else if (whence == SEEK_SET)
offset -= file->pos;
else if (file->seek_pending)
offset += file->skip;
file->seek_pending = FALSE;
/*
* Are we seeking backwards and, if so, do we have data in the buffer?
*/
if (offset < 0 && file->next) {
/*
* Yes.
*
* This is guaranteed to fit in an unsigned int.
* To squelch compiler warnings, we cast the
* result.
*/
guint had = (unsigned)(file->next - file->out);
/*
* Do we have enough data before the current position in
* the buffer that we can seek backwards within the buffer?
*/
if (-offset <= had) {
/*
* Yes.
*
* Offset is negative, so -offset is
* non-negative, and -offset is
* <= an unsigned and thus fits in an
* unsigned. Get that value and
* adjust appropriately.
*
* (Casting offset to unsigned makes
* it positive, which is not what we
* would want, so we cast -offset
* instead.)
*/
guint adjustment = (unsigned)(-offset);
file->have += adjustment;
file->next -= adjustment;
file->pos -= adjustment;
return file->pos;
}
}
/*
* No. Do we have "fast seek" data for the location to which we
* will be seeking?
*
* XXX, profile
*/
if ((here = fast_seek_find(file, file->pos + offset)) && (offset < 0 || offset > SPAN || here->compression == UNCOMPRESSED)) {
gint64 off, off2;