-
Notifications
You must be signed in to change notification settings - Fork 87
/
Copy pathTransformDataSet.R
74 lines (59 loc) · 2.8 KB
/
TransformDataSet.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# Author: Jitender Aswani, Co-Founder @datadolph.in
# Date: 3/15/2013
# Copyright (c) 2011, under the Creative Commons Attribution-NonCommercial 3.0 Unported (CC BY-NC 3.0) License
# For more information see: https://creativecommons.org/licenses/by-nc/3.0/
# All rights reserved.
rm (list = ls())
setwd("~/Toga/Alto")
library("chron")
library("plyr")
library("RJSONIO")
library("data.table")
library("ggplot2")
in.file.name <- "datasets/BollywoodCinema-1940-2008.csv"
out.file.name <- "datasets/masterblaster/TransformedSachinTestRecords.csv"
out.file.JSON <- "datasets/masterblaster/SachinTestRecords.JSON"
out.file.R.dataframes <- "datasets/masterblaster/MB.RData"
dt <- data.table(read.csv(in.file.name, na.strings="-", as.is=TRUE, header=TRUE,
stringsAsFactors=FALSE, strip.white=TRUE))
lMonths <- c("January","February","March", "April","May","June","July","August","September", "October","November","December")
lDays <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
# Line plot
ggplot(totmidc, aes(variable, value)) + geom_line() + xlab("") + ylab("")
# Bar plot
# Note that the parameter stat="identity" passed to geom_bar()
ggplot(totmidc, aes(x=variable, y=value)) + geom_bar(stat="identity") + xlab("") + ylab("")
d <- dt[,list(Deols=sum(grepl("deol|Dharmendra",Cast, ignore.case=TRUE)),
Kapoors=sum(grepl("kapoor",Cast, ignore.case=TRUE)),
Khans=sum(grepl("khan",Cast, ignore.case=TRUE)),
Bachchans=sum(grepl("bachchan",Cast, ignore.case=TRUE))),
by=year]
d1 <- melt(d, id.var="year")
ggplot(d1)+geom_bar(aes(x=year,y=value,fill=variable),stat='identity')
ggplot(d, aes(year))+ geom_bar() +facet_wrap(~n)
ggplot(d, aes(x=factor(year), y=c(Deols, Kapoors))) + geom_bar(stat="identity") + xlab("") + ylab("Deols")
n <- c("Deols", "Kapoors")
ggplot(d, aes(x=year, )) + geom_bar(stat="identity") + facet_wrap(n)
dat<-data.frame(num=1:3,usage=c(4,2,5),cap=c(10,20,10),diff=c(6,18,5))
dat.melt<-melt(dat,id.var=c('num','cap'))
ggplot(dat.melt)+geom_bar(aes(x=num,y=value,fill=variable),stat='identity')
#IPL DS
in.file.name <- "pads/data/pad56638.csv" pad89706
dt <- data.table(read.csv(in.file.name, na.strings="-", as.is=TRUE, header=TRUE,
stringsAsFactors=FALSE, strip.white=TRUE))
dt[,Sixes, by=Player][order(-Sixes)]
getCount <- function(dat, expr, gby) {
e <- substitute(expr)
b <- substitute(gby)
print(dat[,eval(e),by=b])
}
getCount(dt, sum(Sixes), Player)
q <- quote(Sixes)
q1 <- quote(Player)
dt[,eval(q)), by=q1 ][order(-eval(q))]
dt[,q,by=q1]
#IPL DS
in.file.name <- "pads/data/pad89706.csv"
mbdt <- data.table(read.csv(in.file.name, na.strings="-", as.is=TRUE, header=TRUE,
stringsAsFactors=FALSE, strip.white=TRUE))
mbdt[,list(sum(as.integer(Test_Runs), na.rm=TRUE)), by=Year][order(Year)]