forked from QuantConnect/Lean
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCompression.cs
514 lines (471 loc) · 19.3 KB
/
Compression.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
/*
* QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
* Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.IO;
using System.Collections.Generic;
using System.Text;
using ICSharpCode.SharpZipLib.Zip;
using ICSharpCode.SharpZipLib.Tar;
using ICSharpCode.SharpZipLib.GZip;
using ICSharpCode.SharpZipLib.Core;
using QuantConnect.Logging;
namespace QuantConnect
{
/// <summary>
/// Compression class manages the opening and extraction of compressed files (zip, tar, tar.gz).
/// </summary>
/// <remarks>QuantConnect's data library is stored in zip format locally on the hard drive.</remarks>
public static class Compression
{
/// <summary>
/// Create a zip file of the supplied file names and string data source
/// </summary>
/// <param name="zipPath">Output location to save the file.</param>
/// <param name="filenamesAndData">File names and data in a dictionary format.</param>
/// <returns>True on successfully creating the zip file.</returns>
public static bool ZipData(string zipPath, Dictionary<string, string> filenamesAndData)
{
var success = true;
var buffer = new byte[4096];
try
{
//Create our output
using (var stream = new ZipOutputStream(File.Create(zipPath)))
{
foreach (var filename in filenamesAndData.Keys)
{
//Create the space in the zip file:
var entry = new ZipEntry(filename);
//Get a Byte[] of the file data:
var file = Encoding.Default.GetBytes(filenamesAndData[filename]);
stream.PutNextEntry(entry);
using (var ms = new MemoryStream(file))
{
int sourceBytes;
do
{
sourceBytes = ms.Read(buffer, 0, buffer.Length);
stream.Write(buffer, 0, sourceBytes);
}
while (sourceBytes > 0);
}
} // End For Each File.
//Close stream:
stream.Finish();
stream.Close();
} // End Using
}
catch (Exception err)
{
Log.Error("QC.Data.ZipData(): " + err.Message);
success = false;
}
return success;
}
/// <summary>
/// Create a zip file of the supplied file names and data using a byte array
/// </summary>
/// <param name="zipPath">Output location to save the file.</param>
/// <param name="filenamesAndData">File names and data in a dictionary format.</param>
/// <returns>True on successfully saving the file</returns>
public static bool ZipData(string zipPath, IReadOnlyDictionary<string, byte[]> filenamesAndData)
{
var success = true;
var buffer = new byte[4096];
try
{
//Create our output
using (var stream = new ZipOutputStream(File.Create(zipPath)))
{
foreach (var filename in filenamesAndData.Keys)
{
//Create the space in the zip file:
var entry = new ZipEntry(filename);
//Get a Byte[] of the file data:
var file = filenamesAndData[filename];
stream.PutNextEntry(entry);
using (var ms = new MemoryStream(file))
{
int sourceBytes;
do
{
sourceBytes = ms.Read(buffer, 0, buffer.Length);
stream.Write(buffer, 0, sourceBytes);
}
while (sourceBytes > 0);
}
} // End For Each File.
//Close stream:
stream.Finish();
stream.Close();
} // End Using
}
catch (Exception err)
{
Log.Error("QC.Data.ZipData(): " + err.Message);
success = false;
}
return success;
}
/// <summary>
/// Uncompress zip data byte array into a dictionary string array of filename-contents.
/// </summary>
/// <param name="zipData">Byte data array of zip compressed information</param>
/// <returns>Uncompressed dictionary string-sting of files in the zip</returns>
public static Dictionary<string, string> UnzipData(byte[] zipData)
{
// Initialize:
var data = new Dictionary<string, string>();
try
{
using (var ms = new MemoryStream(zipData))
{
//Read out the zipped data into a string, save in array:
using (var zipStream = new ZipInputStream(ms))
{
while (true)
{
//Get the next file
var entry = zipStream.GetNextEntry();
if (entry != null)
{
//Read the file into buffer:
var buffer = new byte[entry.Size];
zipStream.Read(buffer, 0, (int)entry.Size);
//Save into array:
data.Add(entry.Name, buffer.GetString());
}
else
{
break;
}
}
} // End Zip Stream.
} // End Using Memory Stream
}
catch (Exception err)
{
Log.Error("Data.UnzipData(): " + err.Message);
}
return data;
}
/// <summary>
/// Compress a given file and delete the original file. Automatically rename the file to name.zip.
/// </summary>
/// <param name="textPath">Path of the original file</param>
/// <param name="deleteOriginal">Boolean flag to delete the original file after completion</param>
/// <returns>String path for the new zip file</returns>
public static string Zip(string textPath, bool deleteOriginal = true)
{
var zipPath = "";
try
{
var buffer = new byte[4096];
zipPath = textPath.Replace(".csv", ".zip");
zipPath = zipPath.Replace(".txt", ".zip");
//Open the zip:
using (var stream = new ZipOutputStream(File.Create(zipPath)))
{
//Zip the text file.
var entry = new ZipEntry(Path.GetFileName(textPath));
stream.PutNextEntry(entry);
using (var fs = File.OpenRead(textPath))
{
int sourceBytes;
do
{
sourceBytes = fs.Read(buffer, 0, buffer.Length);
stream.Write(buffer, 0, sourceBytes);
}
while (sourceBytes > 0);
}
//Close stream:
stream.Finish();
stream.Close();
}
//Delete the old text file:
if (deleteOriginal) File.Delete(textPath);
}
catch (Exception err)
{
Log.Error("QC.Data.Zip(): " + err.Message);
}
return zipPath;
} // End Zip:
public static void Zip(string data, string zipPath, string zipEntry)
{
using (var stream = new ZipOutputStream(File.Create(zipPath)))
{
var entry = new ZipEntry(zipEntry);
stream.PutNextEntry(entry);
var buffer = new byte[4096];
using (var dataReader = new MemoryStream(Encoding.Default.GetBytes(data)))
{
int sourceBytes;
do
{
sourceBytes = dataReader.Read(buffer, 0, buffer.Length);
stream.Write(buffer, 0, sourceBytes);
}
while (sourceBytes > 0);
}
}
}
/// <summary>
/// Zips all files specified to a new zip at the destination path
/// </summary>
public static void ZipFiles(string destination, IEnumerable<string> files)
{
try
{
using (var zipStream = new ZipOutputStream(File.Create(destination)))
{
var buffer = new byte[4096];
foreach (var file in files)
{
if (!File.Exists(file))
{
Log.Trace("ZipFiles(): File does not exist: " + file);
continue;
}
var entry = new ZipEntry(Path.GetFileName(file));
zipStream.PutNextEntry(entry);
using (var fstream = File.OpenRead(file))
{
StreamUtils.Copy(fstream, zipStream, buffer);
}
}
}
}
catch (Exception err)
{
Log.Error(err);
}
}
/// <summary>
/// Streams a local zip file using a streamreader.
/// Important: the caller must call Dispose() on the returned ZipFile instance.
/// </summary>
/// <param name="filename">Location of the original zip file</param>
/// <param name="zip">The ZipFile instance to be returned to the caller</param>
/// <returns>Stream reader of the first file contents in the zip file</returns>
public static StreamReader Unzip(string filename, out Ionic.Zip.ZipFile zip)
{
StreamReader reader = null;
zip = null;
try
{
if (File.Exists(filename))
{
try
{
zip = new Ionic.Zip.ZipFile(filename);
reader = new StreamReader(zip[0].OpenReader());
}
catch (Exception err)
{
Log.Error("QC.Data.Unzip(1): " + err.Message);
if (zip != null) zip.Dispose();
if (reader != null) reader.Close();
}
}
else
{
Log.Error("Data.UnZip(2): File doesn't exist: " + filename);
}
}
catch (Exception err)
{
Log.Error("Data.UnZip(3): " + filename + " >> " + err.Message);
}
return reader;
} // End UnZip
/// <summary>
/// Streams each line from the first zip entry in the specified zip file
/// </summary>
/// <param name="filename">The zip file path to stream</param>
/// <returns>An enumerable containing each line from the first unzipped entry</returns>
public static IEnumerable<string> ReadLines(string filename)
{
if (!File.Exists(filename))
{
Log.Error("Compression.ReadFirstZipEntry(): File does not exist: " + filename);
return null;
}
try
{
return ReadLinesImpl(filename);
}
catch (Exception err)
{
Log.Error(err);
}
return null;
}
private static IEnumerable<string> ReadLinesImpl(string filename)
{
using (var zip = Ionic.Zip.ZipFile.Read(filename))
{
var entry = zip[0];
using (var entryReader = new StreamReader(entry.OpenReader()))
{
while (!entryReader.EndOfStream)
{
yield return entryReader.ReadLine();
}
}
}
}
/// <summary>
/// Unzip a local file and return its contents via streamreader:
/// </summary>
public static StreamReader UnzipStream(Stream zipstream)
{
StreamReader reader = null;
try
{
//Initialise:
MemoryStream file;
//If file exists, open a zip stream for it.
using (var zipStream = new ZipInputStream(zipstream))
{
//Read the file entry into buffer:
var entry = zipStream.GetNextEntry();
var buffer = new byte[entry.Size];
zipStream.Read(buffer, 0, (int)entry.Size);
//Load the buffer into a memory stream.
file = new MemoryStream(buffer);
}
//Open the memory stream with a stream reader.
reader = new StreamReader(file);
}
catch (Exception err)
{
Log.Error(err, "Data.UnZip(): Stream >> " + err.Message);
}
return reader;
} // End UnZip
/// <summary>
/// Unzip a local file and return its contents via streamreader to a local the same location as the ZIP.
/// </summary>
/// <param name="zipFile">Location of the zip on the HD</param>
/// <returns>List of unzipped file names</returns>
public static List<string> UnzipToFolder(string zipFile)
{
//1. Initialize:
var files = new List<string>();
var slash = zipFile.LastIndexOf(Path.DirectorySeparatorChar);
var outFolder = "";
if (slash > 0)
{
outFolder = zipFile.Substring(0, slash);
}
ZipFile zf = null;
try
{
var fs = File.OpenRead(zipFile);
zf = new ZipFile(fs);
foreach (ZipEntry zipEntry in zf)
{
//Ignore Directories
if (!zipEntry.IsFile) continue;
//Remove the folder from the entry
var entryFileName = Path.GetFileName(zipEntry.Name);
if (entryFileName == null) continue;
var buffer = new byte[4096]; // 4K is optimum
var zipStream = zf.GetInputStream(zipEntry);
// Manipulate the output filename here as desired.
var fullZipToPath = Path.Combine(outFolder, entryFileName);
//Save the file name for later:
files.Add(fullZipToPath);
//Log.Trace("Data.UnzipToFolder(): Input File: " + zipFile + ", Output Directory: " + fullZipToPath);
//Copy the data in buffer chunks
using (var streamWriter = File.Create(fullZipToPath))
{
StreamUtils.Copy(zipStream, streamWriter, buffer);
}
}
}
finally
{
if (zf != null)
{
zf.IsStreamOwner = true; // Makes close also shut the underlying stream
zf.Close(); // Ensure we release resources
}
}
return files;
} // End UnZip
/// <summary>
/// Extracts all file from a zip archive and copies them to a destination folder.
/// </summary>
/// <param name="source">The source zip file.</param>
/// <param name="destination">The destination folder to extract the file to.</param>
public static void UnTarFiles(string source, string destination)
{
var inStream = File.OpenRead(source);
var tarArchive = TarArchive.CreateInputTarArchive(inStream);
tarArchive.ExtractContents(destination);
tarArchive.Close();
inStream.Close();
}
/// <summary>
/// Extract tar.gz files to disk
/// </summary>
/// <param name="source">Tar.gz source file</param>
/// <param name="destination">Location folder to unzip to</param>
public static void UnTarGzFiles(string source, string destination)
{
var inStream = File.OpenRead(source);
var gzipStream = new GZipInputStream(inStream);
var tarArchive = TarArchive.CreateInputTarArchive(gzipStream);
tarArchive.ExtractContents(destination);
tarArchive.Close();
gzipStream.Close();
inStream.Close();
}
/// <summary>
/// Creates the entry name for a QC zip data file
/// </summary>
public static string CreateZipEntryName(string symbol, SecurityType securityType, DateTime date, Resolution resolution)
{
if (resolution == Resolution.Hour || resolution == Resolution.Daily)
{
return symbol + ".csv";
}
if (securityType == SecurityType.Forex)
{
return String.Format("{0}_{1}_{2}_quote.csv", date.ToString(DateFormat.EightCharacter), symbol.ToLower(), resolution.ToString().ToLower());
}
return String.Format("{0}_{1}_{2}_trade.csv", date.ToString(DateFormat.EightCharacter), symbol.ToLower(), resolution.ToString().ToLower());
}
/// <summary>
/// Creates the zip file name for a QC zip data file
/// </summary>
public static string CreateZipFileName(string symbol, SecurityType securityType, DateTime date, Resolution resolution)
{
if (resolution == Resolution.Hour || resolution == Resolution.Daily)
{
return symbol + ".zip";
}
var zipFileName = date.ToString(DateFormat.EightCharacter);
if (securityType == SecurityType.Forex)
{
return zipFileName + "_quote.zip";
}
return zipFileName + "_trade.zip";
}
} // End OS Class
} // End QC Namespace