Skip to content

Commit d371ffd

Browse files
author
Ivan Gavryliuk
committed
more fluent reader with options
1 parent 67c33ca commit d371ffd

File tree

8 files changed

+30
-105
lines changed

8 files changed

+30
-105
lines changed

src/DataFrame.Formats/Csv/CsvFormatReader.cs

+8-32
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,20 @@
99

1010
namespace DataFrame.Formats.Csv
1111
{
12-
class CsvFormatReader : IFormatReader
12+
static class CsvFormatReader
1313
{
1414
private static readonly Dictionary<Type, Type> _inferredTypeToParquetType = new Dictionary<Type, Type>
1515
{
1616
{ typeof(byte), typeof(int) }
1717
};
1818

19-
public Frame FromStream(Stream inputStream)
20-
{
21-
throw new NotImplementedException();
22-
}
23-
2419
/// <summary>
2520
/// Reads csv stream into dataset
2621
/// </summary>
2722
/// <param name="csvStream">CSV stream</param>
2823
/// <param name="options">Options for reader, optional</param>
2924
/// <returns>Correct dataset</returns>
30-
public static Matrix<object> ReadToDataSet(Stream csvStream, CsvOptions options = null)
25+
public static Frame ReadToFrame(Stream csvStream, CsvOptions options = null)
3126
{
3227
if (csvStream == null) throw new ArgumentNullException(nameof(csvStream));
3328

@@ -75,49 +70,30 @@ public static Matrix<object> ReadToDataSet(Stream csvStream, CsvOptions options
7570
rowCount += 1;
7671
}
7772

78-
Matrix<object> result;
79-
8073
//set schema
8174
if (options.InferSchema)
8275
{
83-
result = InferSchema(headers, rowCount, columnValues);
84-
}
85-
else
86-
{
87-
var schema = headers.Select(h => new ColumnSchema<string>(h)).ToList();
88-
result = new Matrix(schema, rowCount);
76+
return InferSchema(headers, rowCount, columnValues);
8977
}
9078

91-
//assign values
92-
foreach(KeyValuePair<int, IList> pair in columnValues)
93-
{
94-
int r = 0;
95-
foreach(object value in pair.Value)
96-
{
97-
result[pair.Key, r++] = value;
98-
}
99-
}
100-
101-
return result;
79+
return new Frame(headers.Select((name, i) => new Series<string>(name, (List<string>)columnValues[i])));
10280
}
10381

10482

105-
private static Matrix InferSchema(string[] headers, int rowCount, Dictionary<int, IList> columnValues)
83+
private static Frame InferSchema(string[] headers, int rowCount, IReadOnlyDictionary<int, IList> columnValues)
10684
{
107-
var elements = new List<ColumnSchema>();
85+
var series = new List<Series>();
10886
for (int i = 0; i < headers.Length; i++)
10987
{
11088
IList cv = columnValues[i];
11189
Type columnType = cv.Cast<string>().ToArray().InferType(out IList typedValues);
11290

11391
Type ct;
11492
if (!_inferredTypeToParquetType.TryGetValue(columnType, out ct)) ct = columnType;
115-
elements.Add(new ColumnSchema(headers[i], ct));
116-
117-
columnValues[i] = typedValues;
93+
series.Add(new Series(ct, headers[i], typedValues));
11894
}
11995

120-
return new Matrix(elements, rowCount);
96+
return new Frame(series);
12197
}
12298
}
12399
}
+15-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,24 @@
1-
using DataFrame.Formats.Parquet;
1+
using System.IO;
2+
using DataFrame.Formats.Csv;
3+
using DataFrame.Formats.Parquet;
24
using DataFrame.Math.Data;
5+
using Parquet;
6+
using Parquet.Data;
37

4-
namespace DataFrame.Formats
8+
namespace DataFrame.Math.Data
59
{
610
public static class FluentExtensions
711
{
8-
public static IFormatReader Parquet(this FluentReader entry)
12+
public static Frame Parquet(this FluentReader reader, Stream inputStream, ParquetOptions parquetOptions = null, ReaderOptions readerOptions = null)
913
{
10-
return new ParquetFormatReader();
14+
DataSet ds = ParquetReader.Read(inputStream, parquetOptions, readerOptions);
15+
16+
return ParquetConverter.ConvertFromParquet(ds);
17+
}
18+
19+
public static Frame Csv(this FluentReader reader, Stream inputStream, CsvOptions options = null)
20+
{
21+
return CsvFormatReader.ReadToFrame(inputStream, options);
1122
}
1223
}
1324
}

src/DataFrame.Formats/IFormatReader.cs

-10
This file was deleted.

src/DataFrame.Formats/Parquet/ParquetFormatReader.cs

-17
This file was deleted.

src/DataFrame.Math/Data/ColumnSchema.cs

-24
This file was deleted.

src/DataFrame.Math/Data/Frame.cs

+6
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,11 @@ public IReadOnlyCollection<object> GetRow(int i)
3535

3636
return f;
3737
}
38+
39+
#region [ Fluent Extensions ]
40+
41+
public static FluentReader Read => new FluentReader();
42+
43+
#endregion
3844
}
3945
}

src/DataFrame.Math/Data/Matrix.cs

-17
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,9 @@
44

55
namespace DataFrame.Math.Data
66
{
7-
public class Matrix : Matrix<object>
8-
{
9-
public Matrix(IReadOnlyCollection<ColumnSchema> schema, int rowCount) : base(schema.Count, rowCount)
10-
{
11-
}
12-
}
13-
147
public class Matrix<T>
158
{
169
private readonly MatrixStorage<T> _storage;
17-
private readonly List<ColumnSchema> _schema = new List<ColumnSchema>();
1810

1911
public Matrix(int columns, int rows) : this(columns, rows, new SimpleMatrixStorage<T>(columns, rows))
2012
{
@@ -51,15 +43,6 @@ private void ValidateIndexInRange(int column, int row)
5143
//todo:
5244
}
5345

54-
#region [ Fluent Extensions ]
55-
56-
public static FluentReader Read()
57-
{
58-
return new FluentReader();
59-
}
60-
61-
#endregion
62-
6346
public override string ToString()
6447
{
6548
var sb = new StringBuilder();

src/Tests/DataFrame.Formats.Tests/FormatReadingTest.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public void Read_parquet_file()
1111
{
1212
using (FileStream fs = File.OpenRead(GetDataFilePath("alltypes.snappy.parquet")))
1313
{
14-
Frame frame = Matrix.Read().Parquet().FromStream(fs);
14+
Frame frame = Frame.Read.Parquet(fs);
1515
}
1616
}
1717

0 commit comments

Comments
 (0)