forked from Azure/usql
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Checking in sample code to process JSON files using U-SQL.
- Loading branch information
Rukmani Gopalan
committed
May 17, 2016
1 parent
b12549c
commit bd2f7af
Showing
10 changed files
with
684 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
############################################################################### | ||
# Set default behavior to automatically normalize line endings. | ||
############################################################################### | ||
* text=auto | ||
|
||
############################################################################### | ||
# Set default behavior for command prompt diff. | ||
# | ||
# This is need for earlier builds of msysgit that does not have it on by | ||
# default for csharp files. | ||
# Note: This is only used by command line | ||
############################################################################### | ||
#*.cs diff=csharp | ||
|
||
############################################################################### | ||
# Set the merge driver for project and solution files | ||
# | ||
# Merging from the command prompt will add diff markers to the files if there | ||
# are conflicts (Merging from VS is not affected by the settings below, in VS | ||
# the diff markers are never inserted). Diff markers may cause the following | ||
# file extensions to fail to load in VS. An alternative would be to treat | ||
# these files as binary and thus will always conflict and require user | ||
# intervention with every merge. To do so, just uncomment the entries below | ||
############################################################################### | ||
#*.sln merge=binary | ||
#*.csproj merge=binary | ||
#*.vbproj merge=binary | ||
#*.vcxproj merge=binary | ||
#*.vcproj merge=binary | ||
#*.dbproj merge=binary | ||
#*.fsproj merge=binary | ||
#*.lsproj merge=binary | ||
#*.wixproj merge=binary | ||
#*.modelproj merge=binary | ||
#*.sqlproj merge=binary | ||
#*.wwaproj merge=binary | ||
|
||
############################################################################### | ||
# behavior for image files | ||
# | ||
# image files are treated as binary by default. | ||
############################################################################### | ||
#*.jpg binary | ||
#*.png binary | ||
#*.gif binary | ||
|
||
############################################################################### | ||
# diff behavior for common document formats | ||
# | ||
# Convert binary document formats to text before diffing them. This feature | ||
# is only available from the command line. Turn it on by uncommenting the | ||
# entries below. | ||
############################################################################### | ||
#*.doc diff=astextplain | ||
#*.DOC diff=astextplain | ||
#*.docx diff=astextplain | ||
#*.DOCX diff=astextplain | ||
#*.dot diff=astextplain | ||
#*.DOT diff=astextplain | ||
#*.pdf diff=astextplain | ||
#*.PDF diff=astextplain | ||
#*.rtf diff=astextplain | ||
#*.RTF diff=astextplain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
## Ignore Visual Studio temporary files, build results, and | ||
## files generated by popular Visual Studio add-ons. | ||
|
||
# User-specific files | ||
*.suo | ||
*.user | ||
*.userosscache | ||
*.sln.docstates | ||
|
||
# User-specific files (MonoDevelop/Xamarin Studio) | ||
*.userprefs | ||
|
||
# Build results | ||
[Dd]ebug/ | ||
[Dd]ebugPublic/ | ||
[Rr]elease/ | ||
[Rr]eleases/ | ||
x64/ | ||
x86/ | ||
build/ | ||
bld/ | ||
[Bb]in/ | ||
[Oo]bj/ | ||
|
||
# Visual Studio 2015 cache/options directory | ||
.vs/ | ||
|
||
# MSTest test Results | ||
[Tt]est[Rr]esult*/ | ||
[Bb]uild[Ll]og.* | ||
|
||
# NUNIT | ||
*.VisualState.xml | ||
TestResult.xml | ||
|
||
# Build Results of an ATL Project | ||
[Dd]ebugPS/ | ||
[Rr]eleasePS/ | ||
dlldata.c | ||
|
||
# DNX | ||
project.lock.json | ||
artifacts/ | ||
|
||
*_i.c | ||
*_p.c | ||
*_i.h | ||
*.ilk | ||
*.meta | ||
*.obj | ||
*.pch | ||
*.pdb | ||
*.pgc | ||
*.pgd | ||
*.rsp | ||
*.sbr | ||
*.tlb | ||
*.tli | ||
*.tlh | ||
*.tmp | ||
*.tmp_proj | ||
*.log | ||
*.vspscc | ||
*.vssscc | ||
.builds | ||
*.pidb | ||
*.svclog | ||
*.scc | ||
|
||
# Chutzpah Test files | ||
_Chutzpah* | ||
|
||
# Visual C++ cache files | ||
ipch/ | ||
*.aps | ||
*.ncb | ||
*.opensdf | ||
*.sdf | ||
*.cachefile | ||
|
||
# Visual Studio profiler | ||
*.psess | ||
*.vsp | ||
*.vspx | ||
|
||
# TFS 2012 Local Workspace | ||
$tf/ | ||
|
||
# Guidance Automation Toolkit | ||
*.gpState | ||
|
||
# ReSharper is a .NET coding add-in | ||
_ReSharper*/ | ||
*.[Rr]e[Ss]harper | ||
*.DotSettings.user | ||
|
||
# JustCode is a .NET coding add-in | ||
.JustCode | ||
|
||
# TeamCity is a build add-in | ||
_TeamCity* | ||
|
||
# DotCover is a Code Coverage Tool | ||
*.dotCover | ||
|
||
# NCrunch | ||
_NCrunch_* | ||
.*crunch*.local.xml | ||
|
||
# MightyMoose | ||
*.mm.* | ||
AutoTest.Net/ | ||
|
||
# Web workbench (sass) | ||
.sass-cache/ | ||
|
||
# Installshield output folder | ||
[Ee]xpress/ | ||
|
||
# DocProject is a documentation generator add-in | ||
DocProject/buildhelp/ | ||
DocProject/Help/*.HxT | ||
DocProject/Help/*.HxC | ||
DocProject/Help/*.hhc | ||
DocProject/Help/*.hhk | ||
DocProject/Help/*.hhp | ||
DocProject/Help/Html2 | ||
DocProject/Help/html | ||
|
||
# Click-Once directory | ||
publish/ | ||
|
||
# Publish Web Output | ||
*.[Pp]ublish.xml | ||
*.azurePubxml | ||
## TODO: Comment the next line if you want to checkin your | ||
## web deploy settings but do note that will include unencrypted | ||
## passwords | ||
#*.pubxml | ||
|
||
*.publishproj | ||
|
||
# NuGet Packages | ||
*.nupkg | ||
# The packages folder can be ignored because of Package Restore | ||
**/packages/* | ||
# except build/, which is used as an MSBuild target. | ||
!**/packages/build/ | ||
# Uncomment if necessary however generally it will be regenerated when needed | ||
#!**/packages/repositories.config | ||
|
||
# Windows Azure Build Output | ||
csx/ | ||
*.build.csdef | ||
|
||
# Windows Store app package directory | ||
AppPackages/ | ||
|
||
# Visual Studio cache files | ||
# files ending in .cache can be ignored | ||
*.[Cc]ache | ||
# but keep track of directories ending in .cache | ||
!*.[Cc]ache/ | ||
|
||
# Others | ||
ClientBin/ | ||
[Ss]tyle[Cc]op.* | ||
~$* | ||
*~ | ||
*.dbmdl | ||
*.dbproj.schemaview | ||
*.pfx | ||
*.publishsettings | ||
node_modules/ | ||
orleans.codegen.cs | ||
|
||
# RIA/Silverlight projects | ||
Generated_Code/ | ||
|
||
# Backup & report files from converting an old project file | ||
# to a newer Visual Studio version. Backup files are not needed, | ||
# because we have git ;-) | ||
_UpgradeReport_Files/ | ||
Backup*/ | ||
UpgradeLog*.XML | ||
UpgradeLog*.htm | ||
|
||
# SQL Server files | ||
*.mdf | ||
*.ldf | ||
|
||
# Business Intelligence projects | ||
*.rdl.data | ||
*.bim.layout | ||
*.bim_*.settings | ||
|
||
# Microsoft Fakes | ||
FakesAssemblies/ | ||
|
||
# Node.js Tools for Visual Studio | ||
.ntvs_analysis.dat | ||
|
||
# Visual Studio 6 build log | ||
*.plg | ||
|
||
# Visual Studio 6 workspace options file | ||
*.opt | ||
|
||
# LightSwitch generated files | ||
GeneratedArtifacts/ | ||
_Pvt_Extensions/ | ||
ModelManifest.xml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio 14 | ||
VisualStudioVersion = 14.0.24720.0 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{182E2583-ECAD-465B-BB50-91101D7C24CE}") = "JsonSample", "JsonSample\JsonSample.usqlproj", "{F4368386-85B9-437B-ACBE-12CC6031FB3A}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Debug|x64 = Debug|x64 | ||
Debug|x86 = Debug|x86 | ||
Release|Any CPU = Release|Any CPU | ||
Release|x64 = Release|x64 | ||
Release|x86 = Release|x86 | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|x64.ActiveCfg = Debug|x64 | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|x64.Build.0 = Debug|x64 | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|x86.ActiveCfg = Debug|x86 | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|x86.Build.0 = Debug|x86 | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|Any CPU.Build.0 = Release|Any CPU | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|x64.ActiveCfg = Release|x64 | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|x64.Build.0 = Release|x64 | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|x86.ActiveCfg = Release|x86 | ||
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|x86.Build.0 = Release|x86 | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
EndGlobal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
CREATE ASSEMBLY IF NOT EXISTS [Newtonsoft.Json] FROM "assemblies/Newtonsoft.Json.dll"; | ||
CREATE ASSEMBLY IF NOT EXISTS [Microsoft.Analytics.Samples.Formats] FROM "assemblies/Microsoft.Analytics.Samples.Formats.dll"; | ||
|
||
REFERENCE ASSEMBLY [Newtonsoft.Json]; | ||
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats]; | ||
|
||
//Extract the Json string using a default Text extractor. This is ideal if you file size is <128 KB. | ||
@json = | ||
EXTRACT jsonString string FROM @"Samples/Data/json/radiowebsite/{*}.json" USING Extractors.Text(delimiter:'\b', quoting:false); | ||
|
||
//Use the JsonTuple function to get the Json Token of the string so it can be parsed later with Json .NET functions | ||
@jsonify = SELECT Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(jsonString) AS rec FROM @json; | ||
|
||
//Extract the fields you want from the Json object. | ||
@columnized = SELECT | ||
rec["ts"] AS ts, | ||
rec["userId"] AS userId, | ||
rec["sessionid"] AS sessionId, | ||
rec["page"] AS page, | ||
rec["auth"] AS auth, | ||
rec["method"] AS method, | ||
rec["status"] AS status, | ||
rec["level"] AS level, | ||
rec["itemInSession"] AS itemInSession, | ||
rec["location"] AS location, | ||
rec["lastName"] AS lastName, | ||
rec["firstName"] AS firstName, | ||
rec["registration"] AS registration, | ||
rec["gender"] AS gender, | ||
rec["artist"] AS artist, | ||
rec["song"] AS song, | ||
Double.Parse((rec["length"] ?? "0")) AS length | ||
FROM @jsonify; | ||
|
||
//Output the file to a tool of your choice. | ||
OUTPUT @columnized | ||
TO "/Samples/Output/columnized.txt" | ||
USING Outputters.Text(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
using Microsoft.Analytics.Interfaces; | ||
using Microsoft.Analytics.Types.Sql; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
|
||
namespace JsonSample | ||
{ | ||
|
||
} |
Oops, something went wrong.