Mercurial > hg > plosone_underreview
comparison scripts_R/MetadataPlots.R @ 76:d17833be50ca branch-tests
merged
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Fri, 22 Sep 2017 16:30:36 +0100 |
parents | cc028157502a |
children | bde45ce0eeab |
comparison
equal
deleted
inserted
replaced
75:02faad4a996b | 76:d17833be50ca |
---|---|
1 library(rworldmap) | |
2 library(ggplot2) | |
3 | |
4 PlotBarChart<- function(df, cat="Language", ordercat="REGION", mincount=10, legend=T, color_plt="Paired"){ | |
5 idx_cat = which(colnames(df)==cat) | |
6 idx_ordercat = which(colnames(df)==ordercat) | |
7 dfsub <- subset(df, df[,idx_cat]!="") | |
8 dfsub <- dfsub[ dfsub[,idx_cat] %in% names(table(dfsub[,idx_cat]))[table(dfsub[,idx_cat]) >mincount] , ] | |
9 #dfsub <- dfsub[order(dfsub$REGION.y),] | |
10 dfsub <- dfsub[order(dfsub[,idx_ordercat]),] | |
11 dfsub[,idx_cat] <- factor(dfsub[,idx_cat], levels=unique(dfsub[,idx_cat])) | |
12 g = ggplot(dfsub,aes(dfsub[,idx_cat], fill=dfsub[,idx_ordercat], order=-as.numeric(dfsub[,idx_ordercat])))+geom_bar() | |
13 #g = g+ylim("0", "100")#+scale_y_discrete(breaks=c("100"),labels=c("100+")) | |
14 g=g+scale_y_continuous(limits=c(0, 200), breaks=seq(0,200,40)) | |
15 g=g+scale_fill_brewer(palette=color_plt) | |
16 g=g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) | |
17 g=g+labs(y="Counts",x=cat)+coord_flip()+theme_bw() | |
18 if (legend){ | |
19 g=g+guides(fill = guide_legend(title = ordercat))} | |
20 else{ | |
21 g=g+guides(fill="none") | |
22 } | |
23 return(g) | |
24 } | |
25 | |
26 PlotCountryNCounts <- function(df, mincount=10){ | |
27 countrycounts = table(df$Country) | |
28 dd=data.frame(countrycounts) | |
29 names(dd)=c("Country","Counts") | |
30 cols <- rep(2, dim(dd)[1]) | |
31 cols[dd$Counts<mincount]=1 | |
32 dd$NCounts = cols | |
33 spdf<-joinCountryData2Map(dd,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country") | |
34 mapParams <- mapCountryData(spdf, nameColumnToPlot="NCounts",catMethod='categorical',missingCountryCol="grey",oceanCol="lightblue",colourPalette='heat', mapTitle=paste("Country sample size, n_recordings>",mincount),addLegend=F) | |
35 mapParams$legendText <- c(paste('<',mincount),paste('>=',mincount),'na') | |
36 do.call(addMapLegendBoxes, c(mapParams,x='bottomleft')) | |
37 } | |
38 | |
39 PlotCountryCounts <- function(df, output=F){ | |
40 countrycounts = table(df$Country) | |
41 dd=data.frame(countrycounts) | |
42 names(dd)=c("Country","Counts") | |
43 spdf<-joinCountryData2Map(dd,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country") | |
44 spdf<-spdf[-which(spdf$ADMIN=='Antarctica'),] | |
45 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',borderCol='black',oceanCol="white",colourPalette='heat', mapTitle="",addLegend=F) | |
46 mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=seq(10,100,10),missingCountryCol='grey',borderCol='black',oceanCol="white",colourPalette="heat", mapTitle="",addLegend=F) | |
47 #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendShrink=0.5,legendWidth=0.5, tcl=0.3, legendMar = 7, legendLabels="all",horizontal=T, legendIntervals="page")) | |
48 legend("left", legend = c(paste(seq(90,1,-10),'-',seq(100,11,-10)), 'NA'), fill = c(heat.colors(9, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white") | |
49 if (output){ | |
50 pdf(file="countrycounts.pdf") | |
51 mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=seq(10,100,10),missingCountryCol='grey',borderCol='black',oceanCol="white",colourPalette="heat", mapTitle="",addLegend=F) | |
52 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',borderCol='black',oceanCol="lightblue",colourPalette='heat', mapTitle="",addLegend=F) | |
53 #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendShrink=0.5,legendWidth=0.5, tcl=0.3, legendMar = 7, legendLabels="all",horizontal=T, legendIntervals="page")) | |
54 legend("left", legend = c(paste(seq(90,1,-10),'-',seq(100,11,-10)), 'NA'), fill = c(heat.colors(9, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white") | |
55 dev.off() | |
56 } | |
57 } | |
58 | |
59 PlotYearDistribution <- function(df, output=F){ | |
60 df$Year<-as.numeric(as.character(df$Year)) | |
61 g = ggplot(df,aes(x=Year,y=..count..))+geom_histogram(breaks=seq(1895, 2015, by = 1)) | |
62 #g = ggplot(df,aes(x=Year,y=..count..))+geom_bar()+geom_density(alpha=.3, fill="grey") | |
63 #g = g+scale_x_continuous(breaks = pretty(df$Year, n=10)) | |
64 g = g+theme_bw()+labs(x ='Year', y ='Count') | |
65 #g = ggplot(df,aes(x=Year,y=..count..))+geom_histogram()+theme_bw() | |
66 print(g) | |
67 if (output){ | |
68 ggsave('yeardistribution.pdf',plot=g) | |
69 } | |
70 } | |
71 | |
72 PlotCountryDistribution <- function(df){ | |
73 #countrycounts = table(df$Country) | |
74 #dd=data.frame(countrycounts) | |
75 #names(dd)=c("Country","Counts") | |
76 g = ggplot(df,aes(x=Country))+geom_bar() | |
77 g=g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) | |
78 } | |
79 | |
80 #PlotCultureDistribution <- function(df){ | |
81 # g = ggplot(df,aes(x=Culture))+geom_bar() | |
82 # g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) | |
83 #} | |
84 | |
85 PlotLanguageDistribution <- function(df, mincount=1){ | |
86 dfsubset <- subset(df, df$Language!="") # ignore the recordings culture info | |
87 culturecounts = table(dfsubset$Culture) | |
88 culturecounts = culturecounts[culturecounts>=mincount] | |
89 barplot(culturecounts, las=2, cex.names=0.2) | |
90 #g = ggplot(df,aes(x=Language))+geom_bar() | |
91 #g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) | |
92 } | |
93 | |
94 PlotBarForCategory <- function(df, cat="Language", mincount=1){ | |
95 idx_cat = which(colnames(df)==cat) | |
96 dfsubset <- subset(df, df[,idx_cat]!="") | |
97 counts = table(dfsubset[,idx_cat]) | |
98 counts = counts[counts>=mincount] | |
99 barplot(counts, las=2, cex.names=0.2) | |
100 #g = ggplot(df,aes(x=Language))+geom_bar() | |
101 #g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) | |
102 } | |
103 | |
104 PlotCountryYearCutoff <- function(df, cutoffyear=1960){ | |
105 df$BeforeYear = df$Year<cutoffyear | |
106 dfsubset <- subset(df, BeforeYear==T) | |
107 countrycounts = table(dfsubset$Country) | |
108 dd=data.frame(countrycounts) | |
109 names(dd)=c("Country","Counts") | |
110 spdf<-joinCountryData2Map(dd,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country") | |
111 mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',oceanCol="lightblue",colourPalette='heat', mapTitle=paste("Country sample size, year<",cutoffyear),addLegend=F) | |
112 do.call( addMapLegend, c(mapParams, labelFontSize=0.3, legendWidth=0.5, tcl=-0.3, legendMar = 4, legendLabels="all",horizontal=F, legendIntervals="page")) | |
113 } | |
114 | |
115 #dftemp$Decade<-floor(df$Year/10)*10 | |
116 #yearcounts = sapply(levels(dftemp$Country),function(x)table(subset(dftemp,Country==x)$Before1960)) | |
117 #decadecounts = sapply(levels(df$Country),function(x)table(subset(dftemp,Country==x & Before1960=="TRUE")$Before1960)) | |
118 #print(yearcounts) | |
119 | |
120 #metadata we are interested in: | |
121 #Artist,AlbumTitle,Culture, Language/Language_Album, Subject_Album | |
122 PlotCountryCulture <- function(df){ | |
123 dfsubset <- subset(df, df$Culture!="") | |
124 countrycounts = table(dfsubset$Country) | |
125 dd=data.frame(countrycounts) | |
126 names(dd)=c("Country","Counts") | |
127 spdf<-joinCountryData2Map(dd,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country") | |
128 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',oceanCol="lightblue",colourPalette='heat', mapTitle="Recordings with culture information",addLegend=F) | |
129 mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod="logFixedWidth",missingCountryCol='grey',oceanCol="lightblue",colourPalette='heat', mapTitle="Number of recordings with culture information",addLegend=F) | |
130 do.call( addMapLegend, c(mapParams, labelFontSize=0.3, legendWidth=0.5, tcl=-0.3, legendMar = 4, legendLabels="all",horizontal=F, legendIntervals="page")) | |
131 } | |
132 | |
133 PlotCultureDistribution <- function(df){ | |
134 dfsubset <- subset(df, df$Culture!="") # ignore the recordings culture info | |
135 culturecounts = table(dfsubset$Culture) | |
136 barplot(culturecounts, las=2, cex.names=0.2) | |
137 #g = ggplot(df,aes(x=Year,y=..count..))+geom_histogram(breaks=seq(1875, 2015, by = 1)) | |
138 #print(g) | |
139 } | |
140 | |
141 PlotCountryNCulture <- function(df){ | |
142 dfsubset <- subset(df, df$Culture!="") | |
143 #culturecounts = table(dfsubset$Culture) | |
144 countrycounts = table(dfsubset$Country, dfsubset$Culture) | |
145 aa <- addmargins(countrycounts, FUN = list(Total = sum), quiet = TRUE) | |
146 print(paste(">1000",row.names(aa)[aa[,dim(aa)[2]]>1000])) | |
147 print(paste(">500",row.names(aa)[aa[,dim(aa)[2]]>500])) | |
148 print(paste(">100",row.names(aa)[aa[,dim(aa)[2]]>100])) | |
149 inds = which(aa[,dim(aa)[2]]>500 & row.names(aa)!="Total") | |
150 mosaicplot(aa[inds,1:5]) | |
151 dd=data.frame(countrycounts[inds,]) | |
152 dd=data.frame(countrycounts) | |
153 names(dd)=c("Country","Culture","Counts") | |
154 ddsub <- subset(dd, (Country=="Canada"| Country=="United Kingdom" | Country=="United States of America") & Counts>20) | |
155 g=ggplot(ddsub, aes(x=Culture,y=Counts))+geom_point()+facet_wrap(~Country, scales = "free") | |
156 g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) | |
157 } | |
158 | |
159 PlotCountryCultureNcounts <- function(df,mincount=50){ | |
160 dfsubset <- subset(df, df$Culture!="") | |
161 #culturecounts = table(dfsubset$Culture) | |
162 countrycounts = table(dfsubset$Country, dfsubset$Culture) | |
163 dd=data.frame(countrycounts) | |
164 names(dd)=c("Country","Culture","Counts") | |
165 ddsub <- subset(dd, Counts>mincount) | |
166 g=ggplot(ddsub, aes(x=Culture,y=Counts))+geom_point()+facet_wrap(~Country, scales = "free") | |
167 g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))#+title(main=paste("Cultures per country, count>",mincount)) | |
168 } | |
169 | |
170 PlotCountryLanguageNcounts <- function(df,mincount=50){ | |
171 dfsubset <- subset(df, df$Language!="" & df$Language!=" ") | |
172 countrycounts = table(dfsubset$Country, dfsubset$Language) | |
173 dd=data.frame(countrycounts) | |
174 names(dd)=c("Country","Language","Counts") | |
175 ddsub <- subset(dd, Counts>mincount) | |
176 g=ggplot(ddsub, aes(x=Language,y=Counts))+geom_point()+facet_wrap(~Country, scales = "free") | |
177 g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))#+title(main=paste("Cultures per country, count>",mincount)) | |
178 } | |
179 | |
180 PlotNxNcounts <- function(df, cat1="Country", cat2="Culture", mincount=50, figname=""){ | |
181 indcat1 = which(colnames(df)==cat1) | |
182 indcat2 = which(colnames(df)==cat2) | |
183 dfsubset <- subset(df, df[,indcat1]!="" & df[,indcat2]!="") # avoid nan values | |
184 NNcounts <- table(dfsubset[,indcat1], dfsubset[,indcat2]) | |
185 dd=data.frame(NNcounts) | |
186 names(dd) <- c("Cat1","Cat2","Counts") | |
187 ddsub <- subset(dd, Counts>mincount) | |
188 if (figname==""){ | |
189 g=ggplot(ddsub, aes(x=Cat2,y=Counts))+geom_point()+facet_wrap(~Cat1) | |
190 g+coord_flip()+theme(axis.text.y=element_text(hjust=1,vjust=0.5,size=5))+labs(y="Counts",x=cat2) | |
191 }else{ | |
192 g=ggplot(ddsub, aes(x=Cat2,y=Counts))+geom_point()+facet_wrap(~Cat1) | |
193 g+coord_flip()+theme(axis.text.y=element_text(hjust=1,vjust=0.5,size=5))+labs(y="Counts",x=cat2) | |
194 ggsave(figname) | |
195 } | |
196 } | |
197 | |
198 Wordcloud<- function(df, cat="Language", output=F){ | |
199 require(wordcloud) | |
200 require(RColorBrewer) | |
201 ind_cat = which(colnames(df)==cat) | |
202 counts <- table(df[,ind_cat]) | |
203 dd=data.frame(counts) | |
204 names(dd) <- c("words","freq") | |
205 pal2 <- brewer.pal(8,"Dark2") | |
206 wordcloud(dd$words,dd$freq,random.order=FALSE, colors=pal2) | |
207 if (output){ | |
208 pdf("wordcloud.pdf") | |
209 wordcloud(dd$words,dd$freq,random.order=FALSE, colors=pal2) | |
210 dev.off() | |
211 } | |
212 } | |
213 PlotCountryOutliers <- function(df, output=''){ | |
214 par(mar = rep(2, 4)) | |
215 spdf<-joinCountryData2Map(df,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country") | |
216 spdf<-spdf[-which(spdf$ADMIN=='Antarctica'),] | |
217 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers",catMethod=seq(0,70,5),missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) | |
218 mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) | |
219 # avoid antarctica | |
220 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", ylim=c(-60,90), catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) | |
221 #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendWidth=0.5, tcl=0.3, legendMar = 7, legendLabels="all",horizontal=T, legendIntervals="page")) | |
222 legend("left", legend = c(paste(seq(90,0,-10),'-',seq(100,10,-10),'%'), 'NA'), fill = c(heat.colors(10, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white") | |
223 if (output!=''){ | |
224 pdf(output) | |
225 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers",catMethod=seq(0,70,5),missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) | |
226 mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) | |
227 #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", ylim=c(-60,90), catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) | |
228 #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendWidth=0.5, tcl=0.3, legendMar=7, legendLabels="all",horizontal=T, legendIntervals="page")) | |
229 legend("left", legend = c(paste(seq(90,0,-10),'-',seq(100,10,-10),'%'), 'NA'), fill = c(heat.colors(10, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white") | |
230 dev.off() | |
231 } | |
232 else { | |
233 return(mapParams) | |
234 } | |
235 } |