forked from paillave/Etl.Net
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Stephane Royer
committed
Jan 25, 2022
1 parent
3521f52
commit ee8b530
Showing
45 changed files
with
1,381 additions
and
237 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<PackageId>Paillave.EtlNet.Bloomberg</PackageId> | ||
<Version>2.0.5</Version> | ||
<Authors>Stéphane Royer</Authors> | ||
<Company></Company> | ||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild> | ||
<PackageLicenseExpression>MIT</PackageLicenseExpression> | ||
<PackageProjectUrl>https://paillave.github.io/Etl.Net/</PackageProjectUrl> | ||
<PackageTags>ETL .net core SSIS reactive text file bloomberg</PackageTags> | ||
<Product>ETL.net bloomberg files extensions</Product> | ||
<PackageIcon>NugetIcon.png</PackageIcon> | ||
<Description>Extensions for Etl.Net to read bloomberg response files</Description> | ||
<LangVersion>latest</LangVersion> | ||
<PackageReadmeFile>README.md</PackageReadmeFile> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<None Include="../../README.md" Pack="true" PackagePath=""/> | ||
<None Include="../NugetIcon.png" Pack="true" Visible="false" PackagePath="" /> | ||
</ItemGroup> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net6.0</TargetFramework> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\Paillave.Etl\Paillave.Etl.csproj" /> | ||
<ProjectReference Include="..\Paillave.Etl.TextFile\Paillave.Etl.TextFile.csproj" /> | ||
</ItemGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<PackageId>Paillave.EtlNet.Pdf</PackageId> | ||
<Version>2.0.5</Version> | ||
<Authors>Stéphane Royer</Authors> | ||
<Company></Company> | ||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild> | ||
<PackageLicenseExpression>MIT</PackageLicenseExpression> | ||
<PackageProjectUrl>https://paillave.github.io/Etl.Net/</PackageProjectUrl> | ||
<PackageTags>ETL .net core SSIS reactive Pdf file</PackageTags> | ||
<Product>ETL.net PDF files extensions</Product> | ||
<PackageIcon>NugetIcon.png</PackageIcon> | ||
<Description>Pdf files extensions for Etl.Net</Description> | ||
<PackageReleaseNotes> | ||
extensions for Etl.Net to deal with PDF files | ||
</PackageReleaseNotes> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<None Include="../NugetIcon.png" Pack="true" Visible="false" PackagePath="" /> | ||
</ItemGroup> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net6.0</TargetFramework> | ||
<LangVersion>latest</LangVersion> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="PdfPig" Version="0.1.5" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\Paillave.Pdf\Paillave.Pdf.csproj" /> | ||
<ProjectReference Include="..\Paillave.Etl\Paillave.Etl.csproj" /> | ||
</ItemGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
using Paillave.Etl.Core; | ||
using System; | ||
|
||
namespace Paillave.Etl.Pdf | ||
{ | ||
public static class PdfFileEx | ||
{ | ||
public static IStream<PdfContent> CrossApplyPdfContent(this IStream<IFileValue> stream, string name, Func<PdfRowsValuesProviderArgs, PdfRowsValuesProviderArgs> argsBuilder, bool noParallelisation = false) | ||
=> stream.CrossApply(name, new PdfRowsValuesProvider(argsBuilder(new PdfRowsValuesProviderArgs())), noParallelisation); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
using System; | ||
using Paillave.Etl.Core; | ||
using System.Collections.Generic; | ||
using System.Threading; | ||
// using Paillave.Etl.ValuesProviders; | ||
using Paillave.Pdf; | ||
|
||
namespace Paillave.Etl.Pdf | ||
{ | ||
public class PdfRowsValuesProviderArgs | ||
{ | ||
public IList<TextTemplate> PatternsToIgnore { get; } = new List<TextTemplate>(); | ||
public IList<HeadersSetup> HeadersSetups { get; } = new List<HeadersSetup>(); | ||
public PdfRowsValuesProviderArgs AddHeadersSetup(HeadersSetup headersSetup) | ||
{ | ||
this.HeadersSetups.Add(headersSetup); | ||
return this; | ||
} | ||
public PdfRowsValuesProviderArgs AddIgnore(Func<TextTemplate, TextTemplate> templateBuilder) | ||
{ | ||
this.PatternsToIgnore.Add(templateBuilder(new TextTemplate())); | ||
return this; | ||
} | ||
} | ||
public abstract class PdfContent | ||
{ | ||
protected PdfContent(List<string> section, int pageNumber, IFileValue fileValue) | ||
=> (Section, PageNumber, FileValue) = (section, pageNumber, fileValue); | ||
public List<string> Section { get; } | ||
public int PageNumber { get; } | ||
public IFileValue FileValue { get; } | ||
} | ||
public class PdfHeader : PdfContent | ||
{ | ||
public PdfHeader(IFileValue fileValue, List<string> section, int pageNumber) : base(section, pageNumber, fileValue) { } | ||
} | ||
public class PdfTable : PdfContent | ||
{ | ||
public List<List<List<string>>> Table { get; } | ||
public PdfTable(IFileValue fileValue, List<string> section, int pageNumber, List<List<List<string>>> table) : base(section, pageNumber, fileValue) => (Table) = (table); | ||
} | ||
public class PdfTextLine : PdfContent | ||
{ | ||
public string Text { get; } | ||
public int LineNumber { get; } | ||
public PdfTextLine(IFileValue fileValue, List<string> section, int pageNumber, int lineNumber, string text) : base(section, pageNumber, fileValue) => (Text, LineNumber) = (text, lineNumber); | ||
} | ||
public class PdfRowsValuesProvider : ValuesProviderBase<IFileValue, PdfContent> | ||
{ | ||
private readonly PdfRowsValuesProviderArgs _args; | ||
public PdfRowsValuesProvider(PdfRowsValuesProviderArgs args) => _args = args; | ||
public override ProcessImpact PerformanceImpact => ProcessImpact.Heavy; | ||
public override ProcessImpact MemoryFootPrint => ProcessImpact.Heavy; | ||
public override void PushValues(IFileValue input, Action<PdfContent> push, CancellationToken cancellationToken, IDependencyResolver resolver, IInvoker invoker) | ||
{ | ||
var stream = input.GetContent(); | ||
stream.Seek(0, System.IO.SeekOrigin.Begin); | ||
using (var pdfReader = new PdfReader(stream, this._args.PatternsToIgnore, this._args.HeadersSetups)) | ||
pdfReader.Read(new PdfProcessor(push, input)); | ||
} | ||
private class PdfProcessor : IPdfProcessor | ||
{ | ||
private readonly Action<PdfContent> _push; | ||
private readonly IFileValue _fileValue; | ||
public PdfProcessor(Action<PdfContent> push, IFileValue fileValue) => (_push, _fileValue) = (push, fileValue); | ||
public void ProcessLine(string text, int pageNumber, int lineNumber, int lineNumberInParagraph, int lineNumberInPage, List<string> section) | ||
=> _push(new PdfTextLine(_fileValue, section, pageNumber, lineNumber, text)); | ||
public void ProcessTable(List<List<List<string>>> table, int pageNumber, List<string> section) | ||
=> _push(new PdfTable(_fileValue, section, pageNumber, table)); | ||
public void ProcessHeader(List<string> section, int pageNumber) | ||
=> _push(new PdfHeader(_fileValue, section, pageNumber)); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.