Maria@70: library(rworldmap) Maria@70: library(ggplot2) Maria@70: Maria@70: PlotBarChart<- function(df, cat="Language", ordercat="REGION", mincount=10, legend=T, color_plt="Paired"){ Maria@70: idx_cat = which(colnames(df)==cat) Maria@70: idx_ordercat = which(colnames(df)==ordercat) Maria@70: dfsub <- subset(df, df[,idx_cat]!="") Maria@70: dfsub <- dfsub[ dfsub[,idx_cat] %in% names(table(dfsub[,idx_cat]))[table(dfsub[,idx_cat]) >mincount] , ] Maria@70: #dfsub <- dfsub[order(dfsub$REGION.y),] Maria@70: dfsub <- dfsub[order(dfsub[,idx_ordercat]),] Maria@70: dfsub[,idx_cat] <- factor(dfsub[,idx_cat], levels=unique(dfsub[,idx_cat])) Maria@70: g = ggplot(dfsub,aes(dfsub[,idx_cat], fill=dfsub[,idx_ordercat], order=-as.numeric(dfsub[,idx_ordercat])))+geom_bar() Maria@70: #g = g+ylim("0", "100")#+scale_y_discrete(breaks=c("100"),labels=c("100+")) Maria@70: g=g+scale_y_continuous(limits=c(0, 200), breaks=seq(0,200,40)) Maria@70: g=g+scale_fill_brewer(palette=color_plt) Maria@70: g=g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) Maria@70: g=g+labs(y="Counts",x=cat)+coord_flip()+theme_bw() Maria@70: if (legend){ Maria@70: g=g+guides(fill = guide_legend(title = ordercat))} Maria@70: else{ Maria@70: g=g+guides(fill="none") Maria@70: } Maria@70: return(g) Maria@70: } Maria@70: Maria@70: PlotCountryNCounts <- function(df, mincount=10){ Maria@70: countrycounts = table(df$Country) Maria@70: dd=data.frame(countrycounts) Maria@70: names(dd)=c("Country","Counts") Maria@70: cols <- rep(2, dim(dd)[1]) Maria@70: cols[dd$Counts",mincount),addLegend=F) Maria@70: mapParams$legendText <- c(paste('<',mincount),paste('>=',mincount),'na') Maria@70: do.call(addMapLegendBoxes, c(mapParams,x='bottomleft')) Maria@70: } Maria@70: Maria@70: PlotCountryCounts <- function(df, output=F){ Maria@70: countrycounts = table(df$Country) Maria@70: dd=data.frame(countrycounts) Maria@70: names(dd)=c("Country","Counts") Maria@70: spdf<-joinCountryData2Map(dd,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country") Maria@70: spdf<-spdf[-which(spdf$ADMIN=='Antarctica'),] Maria@70: #mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',borderCol='black',oceanCol="white",colourPalette='heat', mapTitle="",addLegend=F) Maria@70: mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=seq(10,100,10),missingCountryCol='grey',borderCol='black',oceanCol="white",colourPalette="heat", mapTitle="",addLegend=F) Maria@70: #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendShrink=0.5,legendWidth=0.5, tcl=0.3, legendMar = 7, legendLabels="all",horizontal=T, legendIntervals="page")) Maria@70: legend("left", legend = c(paste(seq(90,1,-10),'-',seq(100,11,-10)), 'NA'), fill = c(heat.colors(9, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white") Maria@70: if (output){ Maria@70: pdf(file="countrycounts.pdf") Maria@70: mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=seq(10,100,10),missingCountryCol='grey',borderCol='black',oceanCol="white",colourPalette="heat", mapTitle="",addLegend=F) Maria@70: #mapParams <- mapCountryData(spdf, nameColumnToPlot="Counts",catMethod=as.numeric(levels(as.factor(spdf$Counts))),missingCountryCol='grey',borderCol='black',oceanCol="lightblue",colourPalette='heat', mapTitle="",addLegend=F) Maria@70: #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendShrink=0.5,legendWidth=0.5, tcl=0.3, legendMar = 7, legendLabels="all",horizontal=T, legendIntervals="page")) Maria@70: legend("left", legend = c(paste(seq(90,1,-10),'-',seq(100,11,-10)), 'NA'), fill = c(heat.colors(9, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white") Maria@70: dev.off() Maria@70: } Maria@70: } Maria@70: Maria@70: PlotYearDistribution <- function(df, output=F){ Maria@70: df$Year<-as.numeric(as.character(df$Year)) Maria@70: g = ggplot(df,aes(x=Year,y=..count..))+geom_histogram(breaks=seq(1895, 2015, by = 1)) Maria@70: #g = ggplot(df,aes(x=Year,y=..count..))+geom_bar()+geom_density(alpha=.3, fill="grey") Maria@70: #g = g+scale_x_continuous(breaks = pretty(df$Year, n=10)) Maria@70: g = g+theme_bw()+labs(x ='Year', y ='Count') Maria@70: #g = ggplot(df,aes(x=Year,y=..count..))+geom_histogram()+theme_bw() Maria@70: print(g) Maria@70: if (output){ Maria@70: ggsave('yeardistribution.pdf',plot=g) Maria@70: } Maria@70: } Maria@70: Maria@70: PlotCountryDistribution <- function(df){ Maria@70: #countrycounts = table(df$Country) Maria@70: #dd=data.frame(countrycounts) Maria@70: #names(dd)=c("Country","Counts") Maria@70: g = ggplot(df,aes(x=Country))+geom_bar() Maria@70: g=g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) Maria@70: } Maria@70: Maria@70: #PlotCultureDistribution <- function(df){ Maria@70: # g = ggplot(df,aes(x=Culture))+geom_bar() Maria@70: # g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) Maria@70: #} Maria@70: Maria@70: PlotLanguageDistribution <- function(df, mincount=1){ Maria@70: dfsubset <- subset(df, df$Language!="") # ignore the recordings culture info Maria@70: culturecounts = table(dfsubset$Culture) Maria@70: culturecounts = culturecounts[culturecounts>=mincount] Maria@70: barplot(culturecounts, las=2, cex.names=0.2) Maria@70: #g = ggplot(df,aes(x=Language))+geom_bar() Maria@70: #g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) Maria@70: } Maria@70: Maria@70: PlotBarForCategory <- function(df, cat="Language", mincount=1){ Maria@70: idx_cat = which(colnames(df)==cat) Maria@70: dfsubset <- subset(df, df[,idx_cat]!="") Maria@70: counts = table(dfsubset[,idx_cat]) Maria@70: counts = counts[counts>=mincount] Maria@70: barplot(counts, las=2, cex.names=0.2) Maria@70: #g = ggplot(df,aes(x=Language))+geom_bar() Maria@70: #g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) Maria@70: } Maria@70: Maria@70: PlotCountryYearCutoff <- function(df, cutoffyear=1960){ Maria@70: df$BeforeYear = df$Year1000",row.names(aa)[aa[,dim(aa)[2]]>1000])) Maria@70: print(paste(">500",row.names(aa)[aa[,dim(aa)[2]]>500])) Maria@70: print(paste(">100",row.names(aa)[aa[,dim(aa)[2]]>100])) Maria@70: inds = which(aa[,dim(aa)[2]]>500 & row.names(aa)!="Total") Maria@70: mosaicplot(aa[inds,1:5]) Maria@70: dd=data.frame(countrycounts[inds,]) Maria@70: dd=data.frame(countrycounts) Maria@70: names(dd)=c("Country","Culture","Counts") Maria@70: ddsub <- subset(dd, (Country=="Canada"| Country=="United Kingdom" | Country=="United States of America") & Counts>20) Maria@70: g=ggplot(ddsub, aes(x=Culture,y=Counts))+geom_point()+facet_wrap(~Country, scales = "free") Maria@70: g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) Maria@70: } Maria@70: Maria@70: PlotCountryCultureNcounts <- function(df,mincount=50){ Maria@70: dfsubset <- subset(df, df$Culture!="") Maria@70: #culturecounts = table(dfsubset$Culture) Maria@70: countrycounts = table(dfsubset$Country, dfsubset$Culture) Maria@70: dd=data.frame(countrycounts) Maria@70: names(dd)=c("Country","Culture","Counts") Maria@70: ddsub <- subset(dd, Counts>mincount) Maria@70: g=ggplot(ddsub, aes(x=Culture,y=Counts))+geom_point()+facet_wrap(~Country, scales = "free") Maria@70: g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))#+title(main=paste("Cultures per country, count>",mincount)) Maria@70: } Maria@70: Maria@70: PlotCountryLanguageNcounts <- function(df,mincount=50){ Maria@70: dfsubset <- subset(df, df$Language!="" & df$Language!=" ") Maria@70: countrycounts = table(dfsubset$Country, dfsubset$Language) Maria@70: dd=data.frame(countrycounts) Maria@70: names(dd)=c("Country","Language","Counts") Maria@70: ddsub <- subset(dd, Counts>mincount) Maria@70: g=ggplot(ddsub, aes(x=Language,y=Counts))+geom_point()+facet_wrap(~Country, scales = "free") Maria@70: g+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))#+title(main=paste("Cultures per country, count>",mincount)) Maria@70: } Maria@70: Maria@70: PlotNxNcounts <- function(df, cat1="Country", cat2="Culture", mincount=50, figname=""){ Maria@70: indcat1 = which(colnames(df)==cat1) Maria@70: indcat2 = which(colnames(df)==cat2) Maria@70: dfsubset <- subset(df, df[,indcat1]!="" & df[,indcat2]!="") # avoid nan values Maria@70: NNcounts <- table(dfsubset[,indcat1], dfsubset[,indcat2]) Maria@70: dd=data.frame(NNcounts) Maria@70: names(dd) <- c("Cat1","Cat2","Counts") Maria@70: ddsub <- subset(dd, Counts>mincount) Maria@70: if (figname==""){ Maria@70: g=ggplot(ddsub, aes(x=Cat2,y=Counts))+geom_point()+facet_wrap(~Cat1) Maria@70: g+coord_flip()+theme(axis.text.y=element_text(hjust=1,vjust=0.5,size=5))+labs(y="Counts",x=cat2) Maria@70: }else{ Maria@70: g=ggplot(ddsub, aes(x=Cat2,y=Counts))+geom_point()+facet_wrap(~Cat1) Maria@70: g+coord_flip()+theme(axis.text.y=element_text(hjust=1,vjust=0.5,size=5))+labs(y="Counts",x=cat2) Maria@70: ggsave(figname) Maria@70: } Maria@70: } Maria@70: Maria@70: Wordcloud<- function(df, cat="Language", output=F){ Maria@70: require(wordcloud) Maria@70: require(RColorBrewer) Maria@70: ind_cat = which(colnames(df)==cat) Maria@70: counts <- table(df[,ind_cat]) Maria@70: dd=data.frame(counts) Maria@70: names(dd) <- c("words","freq") Maria@70: pal2 <- brewer.pal(8,"Dark2") Maria@70: wordcloud(dd$words,dd$freq,random.order=FALSE, colors=pal2) Maria@70: if (output){ Maria@70: pdf("wordcloud.pdf") Maria@70: wordcloud(dd$words,dd$freq,random.order=FALSE, colors=pal2) Maria@70: dev.off() Maria@70: } Maria@70: } Maria@70: PlotCountryOutliers <- function(df, output=''){ Maria@70: par(mar = rep(2, 4)) Maria@70: spdf<-joinCountryData2Map(df,joinCode="NAME",nameCountryColumn="Country",nameJoinColumn="Country") Maria@70: spdf<-spdf[-which(spdf$ADMIN=='Antarctica'),] Maria@70: #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers",catMethod=seq(0,70,5),missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) Maria@70: mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) Maria@70: # avoid antarctica Maria@70: #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", ylim=c(-60,90), catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) Maria@70: #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendWidth=0.5, tcl=0.3, legendMar = 7, legendLabels="all",horizontal=T, legendIntervals="page")) Maria@70: legend("left", legend = c(paste(seq(90,0,-10),'-',seq(100,10,-10),'%'), 'NA'), fill = c(heat.colors(10, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white") Maria@70: if (output!=''){ Maria@70: pdf(output) Maria@70: #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers",catMethod=seq(0,70,5),missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) Maria@70: mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) Maria@70: #mapParams <- mapCountryData(spdf, nameColumnToPlot="Outliers", ylim=c(-60,90), catMethod=seq(0,1,0.1), missingCountryCol='grey',colourPalette='heat', mapTitle="", addLegend=FALSE) Maria@70: #do.call( addMapLegend, c(mapParams, labelFontSize=0.7, legendWidth=0.5, tcl=0.3, legendMar=7, legendLabels="all",horizontal=T, legendIntervals="page")) Maria@70: legend("left", legend = c(paste(seq(90,0,-10),'-',seq(100,10,-10),'%'), 'NA'), fill = c(heat.colors(10, alpha = 1), 'grey'), cex = 0.56, bty = "o",bg="white",box.lwd=0,box.col="white") Maria@70: dev.off() Maria@70: } Maria@70: else { Maria@70: return(mapParams) Maria@70: } Maria@70: }