Maria@70
|
1 library(rworldmap)
|
Maria@70
|
2 library(ggplot2)
|
Maria@70
|
3
|
Maria@70
|
4 PlotBarChart<- function(df, cat="Language", ordercat="REGION", mincount=10, legend=T, color_plt="Paired"){
|
Maria@70
|
5 idx_cat = which(colnames(df)==cat)
|
Maria@70
|
6 idx_ordercat = which(colnames(df)==ordercat)
|
Maria@70
|
7 dfsub <- subset(df, df[,idx_cat]!="")
|
Maria@70
|
8 dfsub <- dfsub[ dfsub[,idx_cat] %in% names(table(dfsub[,idx_cat]))[table(dfsub[,idx_cat]) >mincount] , ]
|
Maria@70
|
9 #dfsub <- dfsub[order(dfsub$REGION.y),]
|
Maria@70
|
10 dfsub <- dfsub[order(dfsub[,idx_ordercat]),]
|
Maria@70
|
11 dfsub[,idx_cat] <- factor(dfsub[,idx_cat], levels=unique(dfsub[,idx_cat]))
|
Maria@70
|
12 g = ggplot(dfsub,aes(dfsub[,idx_cat], fill=dfsub[,idx_ordercat], order=-as.numeric(dfsub[,idx_ordercat])))+geom_bar()
|
Maria@70
|
13 #g = g+ylim("0", "100")#+scale_y_discrete(breaks=c("100"),labels=c("100+"))
|
Maria@70
|
14 g=g+scale_y_continuous(limits=c(0, 200), breaks=seq(0,200,40))
|
Maria@70
|
15 g=g+scale_fill_brewer(palette=color_plt)
|
Maria@70
|
16 g=g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
|
Maria@70
|
17 g=g+labs(y="Counts",x=cat)+coord_flip()+theme_bw()
|
Maria@70
|
18 if (legend){
|
Maria@70
|
19 g=g+guides(fill = guide_legend(title = ordercat))}
|
Maria@70
|
20 else{
|
Maria@70
|
21 g=g+guides(fill="none")
|
Maria@70
|
22 }
|
Maria@70
|
23 return(g)
|
Maria@70
|
24 }
|
Maria@70
|
25
|
Maria@70
|
26 PlotCountryNCounts <- function(df, mincount=10){
|
Maria@70
|
27 countrycounts = table(df$Country)
|
Maria@70
|
28 dd=data.frame(countrycounts)
|
Maria@70
|
29 names(dd)=c("Country","Counts")
|
Maria@70
|
30 cols <- rep(2, dim(dd)[1])
|
Maria@70
|
31 cols[dd$Counts<mincount]=1
|
Maria@70
|
32 dd$NCounts = cols
|
Maria@70
|
33 spdf<-joinCountryData2Map(dd,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country")
|
Maria@70
|
34 mapParams <- mapCountryData(spdf, nameColumnToPlot="NCounts",catMethod='categorical',missingCountryCol="grey",oceanCol="lightblue",colourPalette='heat', mapTitle=paste("Country sample size, n_recordings>",mincount),addLegend=F)
|
Maria@70
|
35 mapParams$legendText <- c(paste('<',mincount),paste('>=',mincount),'na')
|
Maria@70
|
36 do.call(addMapLegendBoxes, c(mapParams,x='bottomleft'))
|
Maria@70
|
37 }
|
Maria@70
|
38
|
Maria@70
|
39 PlotCountryCounts <- function(df, output=F){
|
Maria@70
|
40 countrycounts = table(df$Country)
|
Maria@70
|
41 dd=data.frame(countrycounts)
|
Maria@70
|
42 names(dd)=c("Country","Counts")
|
Maria@70
|
43 spdf<-joinCountryData2Map(dd,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country")
|
Maria@70
|
44 spdf<-spdf[-which(spdf$ADMIN=='Antarctica'),]
|
Maria@70
|
45 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',borderCol='black',oceanCol="white",colourPalette='heat', mapTitle="",addLegend=F)
|
Maria@70
|
46 mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=seq(10,100,10),missingCountryCol='grey',borderCol='black',oceanCol="white",colourPalette="heat", mapTitle="",addLegend=F)
|
Maria@70
|
47 #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendShrink=0.5,legendWidth=0.5, tcl=0.3, legendMar = 7, legendLabels="all",horizontal=T, legendIntervals="page"))
|
Maria@70
|
48 legend("left", legend = c(paste(seq(90,1,-10),'-',seq(100,11,-10)), 'NA'), fill = c(heat.colors(9, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white")
|
Maria@70
|
49 if (output){
|
Maria@70
|
50 pdf(file="countrycounts.pdf")
|
Maria@70
|
51 mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=seq(10,100,10),missingCountryCol='grey',borderCol='black',oceanCol="white",colourPalette="heat", mapTitle="",addLegend=F)
|
Maria@70
|
52 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',borderCol='black',oceanCol="lightblue",colourPalette='heat', mapTitle="",addLegend=F)
|
Maria@70
|
53 #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendShrink=0.5,legendWidth=0.5, tcl=0.3, legendMar = 7, legendLabels="all",horizontal=T, legendIntervals="page"))
|
Maria@70
|
54 legend("left", legend = c(paste(seq(90,1,-10),'-',seq(100,11,-10)), 'NA'), fill = c(heat.colors(9, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white")
|
Maria@70
|
55 dev.off()
|
Maria@70
|
56 }
|
Maria@70
|
57 }
|
Maria@70
|
58
|
Maria@70
|
59 PlotYearDistribution <- function(df, output=F){
|
Maria@70
|
60 df$Year<-as.numeric(as.character(df$Year))
|
Maria@70
|
61 g = ggplot(df,aes(x=Year,y=..count..))+geom_histogram(breaks=seq(1895, 2015, by = 1))
|
Maria@70
|
62 #g = ggplot(df,aes(x=Year,y=..count..))+geom_bar()+geom_density(alpha=.3, fill="grey")
|
Maria@70
|
63 #g = g+scale_x_continuous(breaks = pretty(df$Year, n=10))
|
Maria@70
|
64 g = g+theme_bw()+labs(x ='Year', y ='Count')
|
Maria@70
|
65 #g = ggplot(df,aes(x=Year,y=..count..))+geom_histogram()+theme_bw()
|
Maria@70
|
66 print(g)
|
Maria@70
|
67 if (output){
|
Maria@70
|
68 ggsave('yeardistribution.pdf',plot=g)
|
Maria@70
|
69 }
|
Maria@70
|
70 }
|
Maria@70
|
71
|
Maria@70
|
72 PlotCountryDistribution <- function(df){
|
Maria@70
|
73 #countrycounts = table(df$Country)
|
Maria@70
|
74 #dd=data.frame(countrycounts)
|
Maria@70
|
75 #names(dd)=c("Country","Counts")
|
Maria@70
|
76 g = ggplot(df,aes(x=Country))+geom_bar()
|
Maria@70
|
77 g=g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
|
Maria@70
|
78 }
|
Maria@70
|
79
|
Maria@70
|
80 #PlotCultureDistribution <- function(df){
|
Maria@70
|
81 # g = ggplot(df,aes(x=Culture))+geom_bar()
|
Maria@70
|
82 # g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
|
Maria@70
|
83 #}
|
Maria@70
|
84
|
Maria@70
|
85 PlotLanguageDistribution <- function(df, mincount=1){
|
Maria@70
|
86 dfsubset <- subset(df, df$Language!="") # ignore the recordings culture info
|
Maria@70
|
87 culturecounts = table(dfsubset$Culture)
|
Maria@70
|
88 culturecounts = culturecounts[culturecounts>=mincount]
|
Maria@70
|
89 barplot(culturecounts, las=2, cex.names=0.2)
|
Maria@70
|
90 #g = ggplot(df,aes(x=Language))+geom_bar()
|
Maria@70
|
91 #g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
|
Maria@70
|
92 }
|
Maria@70
|
93
|
Maria@70
|
94 PlotBarForCategory <- function(df, cat="Language", mincount=1){
|
Maria@70
|
95 idx_cat = which(colnames(df)==cat)
|
Maria@70
|
96 dfsubset <- subset(df, df[,idx_cat]!="")
|
Maria@70
|
97 counts = table(dfsubset[,idx_cat])
|
Maria@70
|
98 counts = counts[counts>=mincount]
|
Maria@70
|
99 barplot(counts, las=2, cex.names=0.2)
|
Maria@70
|
100 #g = ggplot(df,aes(x=Language))+geom_bar()
|
Maria@70
|
101 #g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
|
Maria@70
|
102 }
|
Maria@70
|
103
|
Maria@70
|
104 PlotCountryYearCutoff <- function(df, cutoffyear=1960){
|
Maria@70
|
105 df$BeforeYear = df$Year<cutoffyear
|
Maria@70
|
106 dfsubset <- subset(df, BeforeYear==T)
|
Maria@70
|
107 countrycounts = table(dfsubset$Country)
|
Maria@70
|
108 dd=data.frame(countrycounts)
|
Maria@70
|
109 names(dd)=c("Country","Counts")
|
Maria@70
|
110 spdf<-joinCountryData2Map(dd,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country")
|
Maria@70
|
111 mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',oceanCol="lightblue",colourPalette='heat', mapTitle=paste("Country sample size, year<",cutoffyear),addLegend=F)
|
Maria@70
|
112 do.call( addMapLegend, c(mapParams, labelFontSize=0.3, legendWidth=0.5, tcl=-0.3, legendMar = 4, legendLabels="all",horizontal=F, legendIntervals="page"))
|
Maria@70
|
113 }
|
Maria@70
|
114
|
Maria@70
|
115 #dftemp$Decade<-floor(df$Year/10)*10
|
Maria@70
|
116 #yearcounts = sapply(levels(dftemp$Country),function(x)table(subset(dftemp,Country==x)$Before1960))
|
Maria@70
|
117 #decadecounts = sapply(levels(df$Country),function(x)table(subset(dftemp,Country==x & Before1960=="TRUE")$Before1960))
|
Maria@70
|
118 #print(yearcounts)
|
Maria@70
|
119
|
Maria@70
|
120 #metadata we are interested in:
|
Maria@70
|
121 #Artist,AlbumTitle,Culture, Language/Language_Album, Subject_Album
|
Maria@70
|
122 PlotCountryCulture <- function(df){
|
Maria@70
|
123 dfsubset <- subset(df, df$Culture!="")
|
Maria@70
|
124 countrycounts = table(dfsubset$Country)
|
Maria@70
|
125 dd=data.frame(countrycounts)
|
Maria@70
|
126 names(dd)=c("Country","Counts")
|
Maria@70
|
127 spdf<-joinCountryData2Map(dd,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country")
|
Maria@70
|
128 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',oceanCol="lightblue",colourPalette='heat', mapTitle="Recordings with culture information",addLegend=F)
|
Maria@70
|
129 mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod="logFixedWidth",missingCountryCol='grey',oceanCol="lightblue",colourPalette='heat', mapTitle="Number of recordings with culture information",addLegend=F)
|
Maria@70
|
130 do.call( addMapLegend, c(mapParams, labelFontSize=0.3, legendWidth=0.5, tcl=-0.3, legendMar = 4, legendLabels="all",horizontal=F, legendIntervals="page"))
|
Maria@70
|
131 }
|
Maria@70
|
132
|
Maria@70
|
133 PlotCultureDistribution <- function(df){
|
Maria@70
|
134 dfsubset <- subset(df, df$Culture!="") # ignore the recordings culture info
|
Maria@70
|
135 culturecounts = table(dfsubset$Culture)
|
Maria@70
|
136 barplot(culturecounts, las=2, cex.names=0.2)
|
Maria@70
|
137 #g = ggplot(df,aes(x=Year,y=..count..))+geom_histogram(breaks=seq(1875, 2015, by = 1))
|
Maria@70
|
138 #print(g)
|
Maria@70
|
139 }
|
Maria@70
|
140
|
Maria@70
|
141 PlotCountryNCulture <- function(df){
|
Maria@70
|
142 dfsubset <- subset(df, df$Culture!="")
|
Maria@70
|
143 #culturecounts = table(dfsubset$Culture)
|
Maria@70
|
144 countrycounts = table(dfsubset$Country, dfsubset$Culture)
|
Maria@70
|
145 aa <- addmargins(countrycounts, FUN = list(Total = sum), quiet = TRUE)
|
Maria@70
|
146 print(paste(">1000",row.names(aa)[aa[,dim(aa)[2]]>1000]))
|
Maria@70
|
147 print(paste(">500",row.names(aa)[aa[,dim(aa)[2]]>500]))
|
Maria@70
|
148 print(paste(">100",row.names(aa)[aa[,dim(aa)[2]]>100]))
|
Maria@70
|
149 inds = which(aa[,dim(aa)[2]]>500 & row.names(aa)!="Total")
|
Maria@70
|
150 mosaicplot(aa[inds,1:5])
|
Maria@70
|
151 dd=data.frame(countrycounts[inds,])
|
Maria@70
|
152 dd=data.frame(countrycounts)
|
Maria@70
|
153 names(dd)=c("Country","Culture","Counts")
|
Maria@70
|
154 ddsub <- subset(dd, (Country=="Canada"| Country=="United Kingdom" | Country=="United States of America") & Counts>20)
|
Maria@70
|
155 g=ggplot(ddsub, aes(x=Culture,y=Counts))+geom_point()+facet_wrap(~Country, scales = "free")
|
Maria@70
|
156 g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
|
Maria@70
|
157 }
|
Maria@70
|
158
|
Maria@70
|
159 PlotCountryCultureNcounts <- function(df,mincount=50){
|
Maria@70
|
160 dfsubset <- subset(df, df$Culture!="")
|
Maria@70
|
161 #culturecounts = table(dfsubset$Culture)
|
Maria@70
|
162 countrycounts = table(dfsubset$Country, dfsubset$Culture)
|
Maria@70
|
163 dd=data.frame(countrycounts)
|
Maria@70
|
164 names(dd)=c("Country","Culture","Counts")
|
Maria@70
|
165 ddsub <- subset(dd, Counts>mincount)
|
Maria@70
|
166 g=ggplot(ddsub, aes(x=Culture,y=Counts))+geom_point()+facet_wrap(~Country, scales = "free")
|
Maria@70
|
167 g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))#+title(main=paste("Cultures per country, count>",mincount))
|
Maria@70
|
168 }
|
Maria@70
|
169
|
Maria@70
|
170 PlotCountryLanguageNcounts <- function(df,mincount=50){
|
Maria@70
|
171 dfsubset <- subset(df, df$Language!="" & df$Language!=" ")
|
Maria@70
|
172 countrycounts = table(dfsubset$Country, dfsubset$Language)
|
Maria@70
|
173 dd=data.frame(countrycounts)
|
Maria@70
|
174 names(dd)=c("Country","Language","Counts")
|
Maria@70
|
175 ddsub <- subset(dd, Counts>mincount)
|
Maria@70
|
176 g=ggplot(ddsub, aes(x=Language,y=Counts))+geom_point()+facet_wrap(~Country, scales = "free")
|
Maria@70
|
177 g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))#+title(main=paste("Cultures per country, count>",mincount))
|
Maria@70
|
178 }
|
Maria@70
|
179
|
Maria@70
|
180 PlotNxNcounts <- function(df, cat1="Country", cat2="Culture", mincount=50, figname=""){
|
Maria@70
|
181 indcat1 = which(colnames(df)==cat1)
|
Maria@70
|
182 indcat2 = which(colnames(df)==cat2)
|
Maria@70
|
183 dfsubset <- subset(df, df[,indcat1]!="" & df[,indcat2]!="") # avoid nan values
|
Maria@70
|
184 NNcounts <- table(dfsubset[,indcat1], dfsubset[,indcat2])
|
Maria@70
|
185 dd=data.frame(NNcounts)
|
Maria@70
|
186 names(dd) <- c("Cat1","Cat2","Counts")
|
Maria@70
|
187 ddsub <- subset(dd, Counts>mincount)
|
Maria@70
|
188 if (figname==""){
|
Maria@70
|
189 g=ggplot(ddsub, aes(x=Cat2,y=Counts))+geom_point()+facet_wrap(~Cat1)
|
Maria@70
|
190 g+coord_flip()+theme(axis.text.y=element_text(hjust=1,vjust=0.5,size=5))+labs(y="Counts",x=cat2)
|
Maria@70
|
191 }else{
|
Maria@70
|
192 g=ggplot(ddsub, aes(x=Cat2,y=Counts))+geom_point()+facet_wrap(~Cat1)
|
Maria@70
|
193 g+coord_flip()+theme(axis.text.y=element_text(hjust=1,vjust=0.5,size=5))+labs(y="Counts",x=cat2)
|
Maria@70
|
194 ggsave(figname)
|
Maria@70
|
195 }
|
Maria@70
|
196 }
|
Maria@70
|
197
|
Maria@70
|
198 Wordcloud<- function(df, cat="Language", output=F){
|
Maria@70
|
199 require(wordcloud)
|
Maria@70
|
200 require(RColorBrewer)
|
Maria@70
|
201 ind_cat = which(colnames(df)==cat)
|
Maria@70
|
202 counts <- table(df[,ind_cat])
|
Maria@70
|
203 dd=data.frame(counts)
|
Maria@70
|
204 names(dd) <- c("words","freq")
|
Maria@70
|
205 pal2 <- brewer.pal(8,"Dark2")
|
Maria@70
|
206 wordcloud(dd$words,dd$freq,random.order=FALSE, colors=pal2)
|
Maria@70
|
207 if (output){
|
Maria@70
|
208 pdf("wordcloud.pdf")
|
Maria@70
|
209 wordcloud(dd$words,dd$freq,random.order=FALSE, colors=pal2)
|
Maria@70
|
210 dev.off()
|
Maria@70
|
211 }
|
Maria@70
|
212 }
|
Maria@70
|
213 PlotCountryOutliers <- function(df, output=''){
|
Maria@70
|
214 par(mar = rep(2, 4))
|
Maria@70
|
215 spdf<-joinCountryData2Map(df,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country")
|
Maria@70
|
216 spdf<-spdf[-which(spdf$ADMIN=='Antarctica'),]
|
Maria@70
|
217 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers",catMethod=seq(0,70,5),missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE)
|
Maria@70
|
218 mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE)
|
Maria@70
|
219 # avoid antarctica
|
Maria@70
|
220 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", ylim=c(-60,90), catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE)
|
Maria@70
|
221 #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendWidth=0.5, tcl=0.3, legendMar = 7, legendLabels="all",horizontal=T, legendIntervals="page"))
|
Maria@70
|
222 legend("left", legend = c(paste(seq(90,0,-10),'-',seq(100,10,-10),'%'), 'NA'), fill = c(heat.colors(10, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white")
|
Maria@70
|
223 if (output!=''){
|
Maria@70
|
224 pdf(output)
|
Maria@70
|
225 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers",catMethod=seq(0,70,5),missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE)
|
Maria@70
|
226 mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE)
|
Maria@70
|
227 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", ylim=c(-60,90), catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE)
|
Maria@70
|
228 #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendWidth=0.5, tcl=0.3, legendMar=7, legendLabels="all",horizontal=T, legendIntervals="page"))
|
Maria@70
|
229 legend("left", legend = c(paste(seq(90,0,-10),'-',seq(100,10,-10),'%'), 'NA'), fill = c(heat.colors(10, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white")
|
Maria@70
|
230 dev.off()
|
Maria@70
|
231 }
|
Maria@70
|
232 else {
|
Maria@70
|
233 return(mapParams)
|
Maria@70
|
234 }
|
Maria@70
|
235 } |