Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a parsing step by comma for servers still sending back CSV for … #23

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Added unit tests to cookie parser logic, added regex for comma separa…
…ted parsing, PR #23
  • Loading branch information
johnrey1 committed Jun 8, 2020
commit ebe55e98114443b9e963f695db024ddad6acae7b
1 change: 1 addition & 0 deletions ScrapySharp.Tests/Network/Cookie.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
key1=204=eBL9WYALz-YPXedwGc-utNT3YXlx7moCeN-dvDDl-7xYtHIZtPEr0OZ2bKWtEJWInrKuEnlR_-JFXZ4mMYIVQrPcTxGCZCCHoeUpdz1kU3cMa38TGrn-uaB6gH7D7A_XKa5bJvjbkatI3mvnPNgjJfd4QrHeDu8hms-c9b6to04; key2=this is a test value; expires=Tue, 08-Dec-2020 01:44:28 GMT; path=/; domain=.localhost.fakedomain; HttpOnly
14 changes: 14 additions & 0 deletions ScrapySharp.Tests/ScrapySharp.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
<PropertyGroup>
<TargetFrameworks>netcoreapp2.0</TargetFrameworks>
</PropertyGroup>
<ItemGroup>
<Compile Remove="When_build_HtmlDom.cs" />
</ItemGroup>
<ItemGroup>
<None Remove="Network\Cookie.txt" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.7.*" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.7.0" />
Expand Down Expand Up @@ -41,5 +47,13 @@
<Content Include="Html\Page1.htm">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
<Content Include="When_build_HtmlDom.cs">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<Content Include="Network\Cookie.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
</ItemGroup>
</Project>
56 changes: 56 additions & 0 deletions ScrapySharp.Tests/When_parse_cookies.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// ReSharper disable InconsistentNaming

using System.IO;
using NUnit.Framework;
using ScrapySharp.Html.Dom;
using ScrapySharp.Html.Parsing;
using System.Linq;
using ScrapySharp.Network;
using System.Net;

namespace ScrapySharp.Tests
{
[TestFixture]
public class When_parse_cookies
{
[Test]
public void When_parse_standard_cookie()
{
var cookie = GetCookie();

CookiesParser parser = new CookiesParser(".localhost.fakedomain");
var cookieList = parser.ParseCookies(cookie);

Assert.AreEqual(2, cookieList.Count);
}

[Test]
public void When_parse_csv_cookie()
{
var csvCookie = GetCookie().Replace(";",",");

CookiesParser parser = new CookiesParser(".localhost.fakedomain");
var cookieList = parser.ParseCookies(csvCookie);

Assert.AreEqual(2, cookieList.Count);
}

[Test]
public void When_parse_csv_invalid_cookie()
{
string invalidCookie = GetCookie().Replace(";",";,");
CookiesParser parser = new CookiesParser(".localhost.fakedomain");

Assert.Throws<CookieException>(()=> { parser.ParseCookies(invalidCookie); });
}

private static string GetCookie()
{
var cookie = File.ReadAllText("Network/Cookie.txt");

return cookie;
}
}
}

// ReSharper restore InconsistentNaming
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
// ReSharper disable InconsistentNaming

using System.Linq;
using HtmlAgilityPack;
using NUnit.Framework;
using ScrapySharp.Core;
using ScrapySharp.Extensions;
using ScrapySharp.Core;
using System.Linq;

namespace ScrapySharp.Tests
{
Expand Down
30 changes: 15 additions & 15 deletions ScrapySharp/Network/CookiesParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ namespace ScrapySharp.Network
public class CookiesParser
{
private readonly string defaultDomain;
private static readonly Regex splitCookiesRegex = new Regex(@"\s*(?<name>[^=]+)=(?<val>[^;]+)?[,;]+", RegexOptions.Compiled);

private static readonly Regex splitCookiesRegex = new Regex(@"\s*(?<name>[^=]+)=(?<val>[^;]+)?[;]+", RegexOptions.Compiled);
private static readonly Regex splitCookiesCsvRegex = new Regex(@"\s*(?<name>[^=]+)=(?<val>.*?),(?=[^,]+?(?:=|$))+", RegexOptions.Compiled);

public CookiesParser(string defaultDomain)
{
this.defaultDomain = defaultDomain;
Expand All @@ -19,24 +20,23 @@ public List<KeyValuePair<string, string>> ParseValuePairs(string cookiesExpressi
{
var list = new List<KeyValuePair<string, string>>();

var cookiesArr = cookiesExpression.Split(",".ToCharArray());
Match match;
if (cookiesExpression.Contains(";"))
match = splitCookiesRegex.Match(cookiesExpression);
else
match = splitCookiesCsvRegex.Match(cookiesExpression);

foreach (var cookieString in cookiesArr)
while (match.Success)
{
var match = splitCookiesRegex.Match(cookieString);

while (match.Success)
if (match.Groups["name"].Success && match.Groups["val"].Success)
{
if (match.Groups["name"].Success && match.Groups["val"].Success)
try
{
try
{
list.Add(new KeyValuePair<string, string>(match.Groups["name"].Value, match.Groups["val"].Value));
}
catch (CookieException) { }
list.Add(new KeyValuePair<string, string>(match.Groups["name"].Value, match.Groups["val"].Value));
}
match = match.NextMatch();
catch (CookieException) { }
}
match = match.NextMatch();
}

return list;
Expand All @@ -50,7 +50,7 @@ public List<Cookie> ParseCookies(string cookiesExpression)
for (int i = 0; i < keyValuePairs.Count; i++)
{
var pair = keyValuePairs[i];
if (pair.Key.Equals("path", StringComparison.InvariantCultureIgnoreCase)
if (pair.Key.Equals("path", StringComparison.InvariantCultureIgnoreCase)
|| pair.Key.Equals("domain", StringComparison.InvariantCultureIgnoreCase)
|| pair.Key.Equals("expires", StringComparison.InvariantCultureIgnoreCase))
continue;
Expand Down