forked from Azure/usql
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding SQL Konferenz 2017 Keynote example scripts
- Loading branch information
Showing
13 changed files
with
390 additions
and
0 deletions.
There are no files selected for viewing
34 changes: 34 additions & 0 deletions
34
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote.sln
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio 2013 | ||
VisualStudioVersion = 12.0.31101.0 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{182E2583-ECAD-465B-BB50-91101D7C24CE}") = "SQLKonferenz2017-Keynote", "SQLKonferenz2017-Keynote\SQLKonferenz2017-Keynote.usqlproj", "{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Debug|x64 = Debug|x64 | ||
Debug|x86 = Debug|x86 | ||
Release|Any CPU = Release|Any CPU | ||
Release|x64 = Release|x64 | ||
Release|x86 = Release|x86 | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Debug|x64.ActiveCfg = Debug|x64 | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Debug|x64.Build.0 = Debug|x64 | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Debug|x86.ActiveCfg = Debug|x86 | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Debug|x86.Build.0 = Debug|x86 | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Release|Any CPU.Build.0 = Release|Any CPU | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Release|x64.ActiveCfg = Release|x64 | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Release|x64.Build.0 = Release|x64 | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Release|x86.ActiveCfg = Release|x86 | ||
{FD4830ED-213B-4C23-BE5D-4B9D3957CCC2}.Release|x86.Build.0 = Release|x86 | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
EndGlobal |
8 changes: 8 additions & 0 deletions
8
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/1.1-Files.usql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
@data = EXTRACT date string, | ||
time string, | ||
author string, | ||
tweet string | ||
FROM "/Samples/Data/Tweets/MikeDoesBigDataTweets.csv" | ||
USING Extractors.Csv(); | ||
|
||
OUTPUT @data TO "/Output/sqlkonf.csv" USING Outputters.Csv(); |
6 changes: 6 additions & 0 deletions
6
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/1.1-Fileset.usql.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
using Microsoft.Analytics.Interfaces; | ||
using Microsoft.Analytics.Types; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Text; |
9 changes: 9 additions & 0 deletions
9
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/1.2-Fileset.usql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
@data = EXTRACT date string, | ||
time string, | ||
author string, | ||
tweet string, | ||
origin string | ||
FROM "/Samples/Data/Tweets/{origin}Tweets.csv" | ||
USING Extractors.Csv(); | ||
|
||
OUTPUT @data TO "/Output/sqlkonf.csv" USING Outputters.Csv(); |
25 changes: 25 additions & 0 deletions
25
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/2-Table.usql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
DROP TABLE IF EXISTS TweetData; | ||
CREATE TABLE TweetData ( | ||
date string, | ||
time string, | ||
author string, | ||
tweet string, | ||
origin SqlArray<string>, | ||
INDEX idx CLUSTERED (author) | ||
DISTRIBUTED BY HASH(author) INTO 2 | ||
); | ||
|
||
@data = EXTRACT date string, | ||
time string, | ||
author string, | ||
tweet string, | ||
origin string | ||
FROM "/Samples/Data/Tweets/{origin}Tweets.csv" | ||
USING Extractors.Csv(); | ||
|
||
INSERT INTO TweetData | ||
SELECT date, time, author, tweet, ARRAY_AGG(origin.ToLowerInvariant()) AS origin | ||
FROM @data | ||
|
||
WHERE tweet.EndsWith("stop") | ||
GROUP BY date, time, author, tweet; |
26 changes: 26 additions & 0 deletions
26
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/3.1-Familiar-CSharp.usql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
| ||
@m = SELECT TweetAnalysis.Udfs.get_mentions(tweet) AS mentions | ||
FROM TweetData; | ||
|
||
@m = SELECT m.Substring(1) AS m | ||
, "mention" AS category | ||
FROM @m CROSS APPLY EXPLODE(mentions) AS t(m) | ||
WHERE m != "@"; | ||
|
||
@t = | ||
SELECT author, "author" AS category | ||
FROM TweetData | ||
UNION ALL | ||
SELECT * | ||
FROM @m; | ||
|
||
@res = SELECT author.ToLowerInvariant() AS author | ||
, category | ||
, COUNT( * ) AS tweetcount | ||
FROM @t | ||
GROUP BY author.ToLowerInvariant(), category; | ||
|
||
OUTPUT @res | ||
TO "/Output/TweetAnalysis/MyTwitterAnalysis.csv" | ||
ORDER BY tweetcount DESC | ||
USING Outputters.Csv(); |
27 changes: 27 additions & 0 deletions
27
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/3.1-Familiar-CSharp.usql.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
using Microsoft.Analytics.Interfaces; | ||
using Microsoft.Analytics.Types.Sql; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Text; | ||
using System.Linq; | ||
|
||
// TweetAnalysis Code Behind | ||
// Show the use of a U-SQL user-defined function (UDF) | ||
// | ||
namespace TweetAnalysis | ||
{ | ||
public class Udfs | ||
{ | ||
// SqlArray<string> get_mentions(string tweet) | ||
// | ||
// Returns a U-SQL array of string containing the twitter handles that were mentioned inside the tweet. | ||
// | ||
public static SqlArray<string> get_mentions(string tweet) | ||
{ | ||
return new SqlArray<string>( | ||
tweet.Split(new char[] { ' ', ',', '.', ':', '!', ';', '"', '“' }).Where(x => x.StartsWith("@")) | ||
); | ||
} | ||
} | ||
} |
69 changes: 69 additions & 0 deletions
69
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/3.2-Familiar-SQL.usql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// Requires 4-USQL-ObjectModel.usql to be executed to create the TVF | ||
|
||
// Analyzes cooked TweetAuthorsAndMentions | ||
// | ||
// Does some analytics using windowing expressions. | ||
// | ||
// 1. Shows use of windowing expressions | ||
// 2. Constant table with VALUES | ||
// 3. ARRAY Contains | ||
// 4. Joins | ||
|
||
// For each author and category with tweetcount > 50, get their tweetcount, the median tweetcount in the category, their percentile and absolute rank in rank order | ||
@res = | ||
SELECT DISTINCT | ||
author, category, tweetcount | ||
, PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY tweetcount ASC) | ||
OVER (PARTITION BY category) AS median_tweetcount_perhandle_category | ||
, PERCENT_RANK() OVER | ||
(PARTITION BY category ORDER BY tweetcount ASC) AS relative_rank | ||
, ROW_NUMBER() OVER | ||
(PARTITION BY category ORDER BY tweetcount DESC) AS absolute_rank | ||
FROM TweetAndMentionsTVF(DEFAULT) AS t | ||
WHERE tweetcount >= 50; | ||
|
||
OUTPUT @res | ||
TO "/Output/TweetAnalysis/MyTwitterAnalysis6.csv" | ||
ORDER BY absolute_rank, category ASC | ||
USING Outputters.Csv(); | ||
|
||
// For each author who provided their tweet feed (to be fair), provide their influence measured as mentioned/authored | ||
// Account for changes to tweet handles with a constant lookup table | ||
// (could also be done as a standard table, or as a file that gets deployed as resource and looked up with a UDF) | ||
|
||
@tweet_handle_mapping = SELECT * FROM (VALUES ("sqlservermike","mikedoesbigdata")) AS T(old_handle, new_handle); | ||
|
||
@t = | ||
SELECT n.new_handle ?? t.author AS author, | ||
category, | ||
tweetcount, | ||
file_origin | ||
FROM TweetAndMentionsTVF(DEFAULT) AS t | ||
LEFT OUTER JOIN | ||
@tweet_handle_mapping AS n | ||
ON t.author == n.old_handle; | ||
|
||
@t = | ||
SELECT author, | ||
category, | ||
SUM(tweetcount) AS tweetcount | ||
FROM @t | ||
WHERE file_origin != null && file_origin.Contains(author) | ||
GROUP BY author, | ||
category; | ||
|
||
@res = | ||
SELECT m.author, | ||
a.tweetcount AS authored_count, | ||
m.tweetcount AS mentioned_count, | ||
(double) m.tweetcount / (double) a.tweetcount AS influence | ||
FROM @t AS a | ||
JOIN | ||
@t AS m | ||
ON a.author == m.author | ||
WHERE a.category == "author" AND m.category == "mention"; | ||
|
||
OUTPUT @res | ||
TO "/Output/TweetAnalysis/influencer.csv" | ||
ORDER BY influence DESC | ||
USING Outputters.Csv(); |
19 changes: 19 additions & 0 deletions
19
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/3.3-Unfamiliar-SQL.usql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
@left = | ||
SELECT * | ||
FROM (VALUES ( 1, "x", (int?) 50 ), | ||
( 1, "y", (int?) 60 ) | ||
) AS L(K, A, C); | ||
|
||
@right = | ||
SELECT * | ||
FROM (VALUES ( 5, "x", 1 ), | ||
( 6, "x", 2 ), | ||
(10, "y", 3 ) | ||
) AS R(B, A, K); | ||
|
||
@res = | ||
SELECT * FROM @left | ||
OUTER UNION BY NAME ON (A, K) | ||
SELECT * FROM @right; | ||
|
||
OUTPUT @res TO "/output/docsamples/outerunion.csv" USING Outputters.Csv(); |
46 changes: 46 additions & 0 deletions
46
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/4-USQL-ObjectModel.usql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
DROP FUNCTION IF EXISTS TweetAndMentionsTVF; | ||
|
||
CREATE FUNCTION TweetAndMentionsTVF(@origin string = null) | ||
RETURNS @res | ||
AS | ||
BEGIN | ||
|
||
// In order to see the user-code inside the TVF, you have to reference the assembly inside the TVF body. | ||
REFERENCE ASSEMBLY TweetAnalysis; // Generated from TweetAnalysis Example | ||
|
||
// Extract mentions | ||
@m = | ||
SELECT origin, | ||
TweetAnalysis.Udfs.get_mentions(tweet) AS mentions, | ||
author AS mentioned_by | ||
FROM TweetData | ||
WHERE String.IsNullOrEmpty(@origin) || origin.Contains(@origin); | ||
|
||
// Combine authors and mentions | ||
@t = | ||
SELECT origin, | ||
author, | ||
"author" AS category, | ||
"" AS mentioned_by | ||
FROM TweetData | ||
UNION ALL | ||
SELECT origin, | ||
m.Substring(1) AS m, | ||
"mention" AS category, | ||
mentioned_by | ||
FROM @m | ||
CROSS APPLY | ||
EXPLODE(mentions) AS t(m) | ||
WHERE m != "@"; | ||
|
||
// Count authors and mentions | ||
@res = | ||
SELECT author.ToLowerInvariant() AS author, | ||
category, | ||
COUNT( * ) AS tweetcount, | ||
new SQL.ARRAY<string>(ARRAY_AGG(origin).SelectMany(x => x).Distinct()) AS file_origin, | ||
ARRAY_AGG(DISTINCT mentioned_by) AS mentioned_by | ||
FROM @t | ||
GROUP BY author.ToLowerInvariant(), | ||
category; | ||
END; |
16 changes: 16 additions & 0 deletions
16
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/5-USQL-UDOs.usql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
@in = EXTRACT begin DateTime, end DateTime, user string | ||
FROM "/Samples/Blogs/MRys/Ranges/ranges.txt" | ||
USING Extractors.Text(delimiter:'-'); | ||
|
||
@r = REDUCE @in PRESORT begin | ||
ON user | ||
PRODUCE begin DateTime, end DateTime, user string | ||
READONLY user | ||
USING new ReduceSample.RangeReducer(); | ||
|
||
// Remove comment to show filter push through the reducer | ||
// @r = SELECT * FROM @r WHERE user == "ABC"; | ||
|
||
OUTPUT @r | ||
TO "/temp/result.csv" | ||
USING Outputters.Csv(); |
63 changes: 63 additions & 0 deletions
63
Examples/SQLKonferenz2017-Keynote/SQLKonferenz2017-Keynote/5-USQL-UDOs.usql.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
using Microsoft.Analytics.Interfaces; | ||
using Microsoft.Analytics.Types.Sql; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
|
||
namespace ReduceSample | ||
{ | ||
[SqlUserDefinedReducer(IsRecursive = true)] // not sure if it can run recursive yet. Need to test with large data sets. | ||
public class RangeReducer : IReducer | ||
{ | ||
public override IEnumerable<IRow> Reduce(IRowset input, IUpdatableRow output) | ||
{ | ||
// Init aggregation values | ||
bool first_row_processed = false; | ||
var begin = DateTime.MaxValue; // Dummy value to make compiler happy | ||
var end = DateTime.MinValue; // Dummy value to make compiler happy | ||
|
||
// requires that the reducer is PRESORTED on begin and READONLY on the reduce key. | ||
foreach (var row in input.Rows) | ||
{ | ||
// Initialize the first interval with the first row if i is 0 | ||
if (!first_row_processed) | ||
{ | ||
first_row_processed = true; // mark that we handled the first row | ||
begin = row.Get<DateTime>("begin"); | ||
end = row.Get<DateTime>("end"); | ||
// If the end is just a time and not a date, it can be earlier than the begin, indicating it is on the next day. | ||
// This let's fix up the end to the next day in that case | ||
if (end < begin) { end = end.AddDays(1); } | ||
} | ||
else | ||
{ | ||
var b = row.Get<DateTime>("begin"); | ||
var e = row.Get<DateTime>("end"); | ||
// fix up the date if end is earlier than begin | ||
if (e < b) { e = e.AddDays(1); } | ||
|
||
// if the begin is still inside the interval, increase the interval if it is longer | ||
if (b <= end) | ||
{ | ||
// if the new end time is later than the current, extend the interval | ||
if (e > end) { end = e; } | ||
} | ||
else // output the previous interval and start a new one | ||
{ | ||
output.Set<DateTime>("begin", begin); | ||
output.Set<DateTime>("end", end); | ||
yield return output.AsReadOnly(); | ||
begin = b; end = e; | ||
} // if | ||
} // if | ||
} // foreach | ||
|
||
// now output the last interval | ||
output.Set<DateTime>("begin", begin); | ||
output.Set<DateTime>("end", end); | ||
yield return output.AsReadOnly(); | ||
} // Reduce | ||
|
||
} // RangeReducer | ||
} // ReduceSample |
Oops, something went wrong.