Skip to content

Commit

Permalink
Checking in sample code to process JSON files using U-SQL.
Browse files Browse the repository at this point in the history
  • Loading branch information
Rukmani Gopalan committed May 17, 2016
1 parent b12549c commit bd2f7af
Show file tree
Hide file tree
Showing 10 changed files with 684 additions and 0 deletions.
63 changes: 63 additions & 0 deletions Examples/JsonSample/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
###############################################################################
# Set default behavior to automatically normalize line endings.
###############################################################################
* text=auto

###############################################################################
# Set default behavior for command prompt diff.
#
# This is need for earlier builds of msysgit that does not have it on by
# default for csharp files.
# Note: This is only used by command line
###############################################################################
#*.cs diff=csharp

###############################################################################
# Set the merge driver for project and solution files
#
# Merging from the command prompt will add diff markers to the files if there
# are conflicts (Merging from VS is not affected by the settings below, in VS
# the diff markers are never inserted). Diff markers may cause the following
# file extensions to fail to load in VS. An alternative would be to treat
# these files as binary and thus will always conflict and require user
# intervention with every merge. To do so, just uncomment the entries below
###############################################################################
#*.sln merge=binary
#*.csproj merge=binary
#*.vbproj merge=binary
#*.vcxproj merge=binary
#*.vcproj merge=binary
#*.dbproj merge=binary
#*.fsproj merge=binary
#*.lsproj merge=binary
#*.wixproj merge=binary
#*.modelproj merge=binary
#*.sqlproj merge=binary
#*.wwaproj merge=binary

###############################################################################
# behavior for image files
#
# image files are treated as binary by default.
###############################################################################
#*.jpg binary
#*.png binary
#*.gif binary

###############################################################################
# diff behavior for common document formats
#
# Convert binary document formats to text before diffing them. This feature
# is only available from the command line. Turn it on by uncommenting the
# entries below.
###############################################################################
#*.doc diff=astextplain
#*.DOC diff=astextplain
#*.docx diff=astextplain
#*.DOCX diff=astextplain
#*.dot diff=astextplain
#*.DOT diff=astextplain
#*.pdf diff=astextplain
#*.PDF diff=astextplain
#*.rtf diff=astextplain
#*.RTF diff=astextplain
212 changes: 212 additions & 0 deletions Examples/JsonSample/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.

# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates

# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs

# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
build/
bld/
[Bb]in/
[Oo]bj/

# Visual Studio 2015 cache/options directory
.vs/

# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*

# NUNIT
*.VisualState.xml
TestResult.xml

# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c

# DNX
project.lock.json
artifacts/

*_i.c
*_p.c
*_i.h
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc

# Chutzpah Test files
_Chutzpah*

# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opensdf
*.sdf
*.cachefile

# Visual Studio profiler
*.psess
*.vsp
*.vspx

# TFS 2012 Local Workspace
$tf/

# Guidance Automation Toolkit
*.gpState

# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user

# JustCode is a .NET coding add-in
.JustCode

# TeamCity is a build add-in
_TeamCity*

# DotCover is a Code Coverage Tool
*.dotCover

# NCrunch
_NCrunch_*
.*crunch*.local.xml

# MightyMoose
*.mm.*
AutoTest.Net/

# Web workbench (sass)
.sass-cache/

# Installshield output folder
[Ee]xpress/

# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html

# Click-Once directory
publish/

# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
## TODO: Comment the next line if you want to checkin your
## web deploy settings but do note that will include unencrypted
## passwords
#*.pubxml

*.publishproj

# NuGet Packages
*.nupkg
# The packages folder can be ignored because of Package Restore
**/packages/*
# except build/, which is used as an MSBuild target.
!**/packages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/packages/repositories.config

# Windows Azure Build Output
csx/
*.build.csdef

# Windows Store app package directory
AppPackages/

# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!*.[Cc]ache/

# Others
ClientBin/
[Ss]tyle[Cc]op.*
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.pfx
*.publishsettings
node_modules/
orleans.codegen.cs

# RIA/Silverlight projects
Generated_Code/

# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm

# SQL Server files
*.mdf
*.ldf

# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings

# Microsoft Fakes
FakesAssemblies/

# Node.js Tools for Visual Studio
.ntvs_analysis.dat

# Visual Studio 6 build log
*.plg

# Visual Studio 6 workspace options file
*.opt

# LightSwitch generated files
GeneratedArtifacts/
_Pvt_Extensions/
ModelManifest.xml
34 changes: 34 additions & 0 deletions Examples/JsonSample/JsonSample.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.24720.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{182E2583-ECAD-465B-BB50-91101D7C24CE}") = "JsonSample", "JsonSample\JsonSample.usqlproj", "{F4368386-85B9-437B-ACBE-12CC6031FB3A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|Any CPU = Release|Any CPU
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|x64.ActiveCfg = Debug|x64
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|x64.Build.0 = Debug|x64
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|x86.ActiveCfg = Debug|x86
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Debug|x86.Build.0 = Debug|x86
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|Any CPU.Build.0 = Release|Any CPU
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|x64.ActiveCfg = Release|x64
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|x64.Build.0 = Release|x64
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|x86.ActiveCfg = Release|x86
{F4368386-85B9-437B-ACBE-12CC6031FB3A}.Release|x86.Build.0 = Release|x86
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
38 changes: 38 additions & 0 deletions Examples/JsonSample/JsonSample/JsonParsing.usql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
CREATE ASSEMBLY IF NOT EXISTS [Newtonsoft.Json] FROM "assemblies/Newtonsoft.Json.dll";
CREATE ASSEMBLY IF NOT EXISTS [Microsoft.Analytics.Samples.Formats] FROM "assemblies/Microsoft.Analytics.Samples.Formats.dll";

REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];

//Extract the Json string using a default Text extractor. This is ideal if you file size is <128 KB.
@json =
EXTRACT jsonString string FROM @"Samples/Data/json/radiowebsite/{*}.json" USING Extractors.Text(delimiter:'\b', quoting:false);

//Use the JsonTuple function to get the Json Token of the string so it can be parsed later with Json .NET functions
@jsonify = SELECT Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(jsonString) AS rec FROM @json;

//Extract the fields you want from the Json object.
@columnized = SELECT
rec["ts"] AS ts,
rec["userId"] AS userId,
rec["sessionid"] AS sessionId,
rec["page"] AS page,
rec["auth"] AS auth,
rec["method"] AS method,
rec["status"] AS status,
rec["level"] AS level,
rec["itemInSession"] AS itemInSession,
rec["location"] AS location,
rec["lastName"] AS lastName,
rec["firstName"] AS firstName,
rec["registration"] AS registration,
rec["gender"] AS gender,
rec["artist"] AS artist,
rec["song"] AS song,
Double.Parse((rec["length"] ?? "0")) AS length
FROM @jsonify;

//Output the file to a tool of your choice.
OUTPUT @columnized
TO "/Samples/Output/columnized.txt"
USING Outputters.Text();
11 changes: 11 additions & 0 deletions Examples/JsonSample/JsonSample/JsonParsing.usql.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
using Microsoft.Analytics.Interfaces;
using Microsoft.Analytics.Types.Sql;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace JsonSample
{

}
Loading

0 comments on commit bd2f7af

Please sign in to comment.