-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyserecent.R
126 lines (77 loc) · 3.49 KB
/
analyserecent.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# view / analyse recent plays
library(plyr)
library(ggplot2)
setwd("~/GitHub/Rtraining")
source("getPlays.R")
data <- getPlays()
#most popular artists
z <- ddply(data, c("artist"), summarise, freq = length(artist) )
z <- z[order(z$freq),]
z <- tail(z,n = 20)
p <- ggplot(data=z, aes(x=freq, y=reorder(artist,freq),origin=0))
p + geom_point(size=3) + theme_bw() +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60",linetype="dashed"))
#end most popular artist
# 100 mest poppis
y <-count(data,c("artist"))
order(y$freq,decreasing=T)
ggplot(data=head( y[order(y$freq,decreasing=T),],100), aes(x=freq, y=reorder(artist,freq))) + geom_point(size=3) + theme_bw() +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60",linetype="dashed"))
#end 100 mest poppis
# når var lw poppis
y1 = subset(data,artist== "Lars Winnerbäck")
y1 = subset(data,artist== "Sigrid Moldestad")
ggplot(y1,aes(x=month,fill=year)) + geom_histogram()
ggplot(y1,aes(x=y1$time)) + geom_density() + geom_line(stat="density",adjust=0.5,colour="red")
# end når var lw poppis
# bruke ddply for å telle lengden på vektoren
z <- ddply(data, c("artist","year","month"), summarise, freq = length(artist) )
z <- subset(z, artist == "Lars Winnerbäck")
z$year <- factor(z$year)
z <- transform(z,
date = as.Date(paste(month,"-1",sep=""))
)
ggplot(z,aes(x=z$date,y=z$freq)) + geom_line()
# end bruke ddply for å telle lengden på vektoren
# test ulike plott
y <-count(data,c("artist"))
y <- subset(y,freq > 1)
lt500 <- subset(y, freq < 500)
ggplot(lt500,aes(x=lt500$freq,y=..density..)) + geom_histogram(binwidth=50,origin=30) + geom_density()
ggplot(lt500,aes(x=lt500$freq)) + geom_density()
ggplot(lt500,aes(x=lt500$freq)) + geom_line(stat="density")
# end test ulike plott
# hvilken måned hører jeg mes på musikk
y <-count(data,c("artist","year","monthOnly"))
y$year <- factor(y$year)
ggplot(y,aes(x=y$monthOnly,y=y$freq,colour=y$year)) + geom_bar(stat="identity")
ggplot(y,aes(x=y$month,colour=y$year)) + geom_bar(stat="density")
ggplot(y,aes(x=y$month,fill=y$year)) + geom_histogram()
# end hvilken måned hører jeg mes på musikk
# mest pop i et år
y <-count(data,c("artist","year"))
y <- y[with(y,order(year,-freq)),]
y$year <- factor(y$year)
y.df <- head(subset(y,year=="2013"),100)
tmp <- by(y,y$year,function(x) head(x,10))
ggplot(data=y.df, aes(x=freq, y=reorder(artist,freq))) + geom_point(size=3,aes(colour=year)) + theme_bw() +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60",linetype="dashed"))
#end mest pop i et år
yearorder <- y$artist[order(y$year,-y$freq)]
y$artist <- factor(y$artist,levels=yearorder)
y$year <- factor(y$year)
# plot on what hour of the day i play music
data.timeofday <- count(data,"hourOnly")
ggplot(data.timeofday,aes(x=hourOnly,y=freq)) + geom_bar(stat="identity")
#end plot on what hour of the day i play music
#-------
#what music do i play at 5 aclock
data.atfive <- subset(data,hourOnly=="22")
y <- count(data.atfive,c("artist"))
y <- head(y[order(-y$freq),],10)
ggplot(data=y, aes(x=freq, y=reorder(artist,freq))) + geom_point(size=3) + theme_bw() +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60",linetype="dashed"))