m@77: df = read.csv('../data/results/df_and_clusters.csv', header=T) Maria@70: Maria@70: source("MetadataPlots.R") Maria@70: Maria@70: ## for plos use arial Maria@70: #install.packages("extrafont") Maria@70: library(extrafont) Maria@70: font_import() Maria@70: loadfonts() Maria@70: Arial <- Type1Font(family="Arial", metrics=c("ArialMT.afm","arial-BoldMT.afm","Arial-ItalicMT.afm", "Arial-BoldItalicMT.afm")) Maria@70: postscriptFonts(Arial=Arial) Maria@70: par(family="Arial") Maria@70: Maria@70: #pdf(file="data/country_distribution_BL.pdf") m@91: pdf(file="../data/results/country_distribution.pdf", pointsize=12, width=6, height=4) Maria@70: PlotCountryCounts(df) Maria@70: dev.off() m@91: postscript(file="../data/results/country_distribution.eps", pointsize=12, width=6, height=4) Maria@70: PlotCountryCounts(df) Maria@70: dev.off() Maria@70: m@91: pdf(file="../data/results/year_distribution.pdf", width=6, height=4, pointsize=12) Maria@70: PlotYearDistribution(df) Maria@70: dev.off() m@91: postscript("../data/results/year_distribution.eps", width=10, pointsize=12) Maria@70: PlotYearDistribution(df) Maria@70: dev.off() Maria@70: #PlotBarChart(df, cat="Year", ordercat="REGION", mincount=10) Maria@70: Maria@70: #pdf(file="data/language_distribution_BL.pdf") Maria@70: levels(df$Language)[which(levels(df$Language)=="Southwestern Caribbean Creole English")]="SouthW Carib. Creole English" Maria@70: df$Language[which(df$Language=="Southwestern Caribbean Creole English")] = "SouthW Carib. Creole English" Maria@70: levels(df$Language)[which(levels(df$Language)=="Lesser Antillean Creole French")]="Lesser Antil. Creole French" Maria@70: df$Language[which(df$Language=="Lesser Antillean Creole French")] = "Lesser Antil. Creole French" Maria@70: df$REGION[which(df$Country=="French Guiana")] = "South America" m@91: pdf(file="../data/results/language_distribution.pdf", pointsize=12) Maria@70: PlotBarChart(df, cat="Language", ordercat="Region", mincount=10) Maria@70: dev.off() m@91: postscript("../data/results/language_distribution.eps", width=8, height=10, pointsize=12) Maria@70: PlotBarChart(df, cat="Language", ordercat="Region", mincount=10) Maria@70: dev.off() Maria@70: m@77: df = read.csv('../data/results/df_and_clusters.csv', header=T) Maria@70: #PlotBarChart(df, cat="Clusters", ordercat="CountryLang", mincount=1,legend=F) Maria@70: df$REGION[which(df$Country=="French Guiana")] = "South America" Maria@70: g = ggplot(df,aes(df$Clusters, fill=df$REGION))+geom_bar() Maria@70: levels(df$REGION)[which(levels(df$REGION)=="South America")]="S. America" Maria@70: levels(df$REGION)[which(levels(df$REGION)=="North America")]="N. America" Maria@70: Maria@70: #library(rworldmap) Maria@70: #wrld = getMap() Maria@70: #regiondata<-wrld@data[,c("ADMIN","GEO3", "Stern")] Maria@70: #df<-merge(df,regiondata,by.x="Country",by.y="ADMIN",all.x=T) Maria@70: Maria@70: #cluster_labels_df = read.csv('data/clusters_top3_labels.csv') m@77: cluster_labels_df = read.csv('../data/results/clusters_top3_countries.csv') Maria@70: cluster_labels = paste(cluster_labels_df[,1],cluster_labels_df[,2],cluster_labels_df[,3],sep="") Maria@70: #df$CountryLang = as.factor(paste(df$Country, df$Language, sep="-")) Maria@70: Maria@70: countrycounts = table(df$Clusters,df$Country) Maria@70: library(cluster) Maria@70: library(ape) Maria@70: library(gridExtra) Maria@70: library(ggdendro) Maria@70: library(dendextend) Maria@70: hc = hclust(dist(countrycounts), method="average") Maria@70: hc2=hc Maria@70: #hc2$labels = as.character(1:length(cluster_labels)) Maria@70: hc2$labels = "" Maria@70: #dhc <- as.dendrogram(hc2) Maria@70: # library(dynamicTreeCut) Maria@70: # clusters <- cutreeDynamic(hc2, minClusterSize = k_clust,method = "tree") Maria@70: # clusters <- clusters[order.dendrogram(dhc)] Maria@70: # clusters_numbers <- unique(clusters) - (0 %in% clusters) Maria@70: # n_clusters <- length(clusters_numbers) Maria@70: # library(colorspace) Maria@70: # cols <- rainbow_hcl(n_clusters) Maria@70: # dhc <- hc2 %>% as.dendrogram %>% Maria@70: # set("branches_k_color", k=k_clust) %>% branches_attr_by_clusters(clusters, values = cols) Maria@70: k_clust = 5 Maria@70: dhc <- hc2 %>% as.dendrogram %>% Maria@70: set("branches_k_color", k=k_clust) %>% set("branches_lwd", 0.7) %>% Maria@70: set("labels_cex", 0.6) %>% set("labels_colors", k=k_clust) %>% Maria@70: set("leaves_pch", 19) %>% set("leaves_cex", 0.5) Maria@70: #ddata <- dendro_data(dhc, type = "rectangle") Maria@70: ddata <- as.ggdend(dhc) Maria@70: p <- ggplot(ddata)+coord_flip() Maria@70: #p <- ggplot(segment(ddata)) + Maria@70: # geom_segment(aes(x = x, y = y, xend = xend, yend = yend, colour=ddata$segments$col)) + Maria@70: # coord_flip() + theme_dendro() + theme(legend.position="none") + Maria@70: # geom_text(aes(x = x, y = y, label = label, angle = -90, hjust = 0.5, vjust=1.3, colour=ddata$labels$col), data= label(ddata)) Maria@70: Maria@70: #dend <- hc2 %>% as.dendrogram %>% Maria@70: # set("branches_k_color", k = 5) %>% set("branches_lwd", 0.7) %>% Maria@70: # set("labels_cex", 0.6) %>% set("labels_colors", k = 5) %>% Maria@70: # set("leaves_pch", 19) %>% set("leaves_cex", 0.5) Maria@70: #ggd1 <- as.ggdend(dend) Maria@70: #pp <- ggplot(ggd1, horiz = TRUE) Maria@70: Maria@70: library(stringr) Maria@70: for (i in 1:length(cluster_labels)){ Maria@70: cl = cluster_labels[i] Maria@70: cl = str_replace_all(cl, "[(']", "") Maria@70: cl = str_replace_all(cl, "[|]", "-") Maria@70: cl = str_replace_all(cl, ", ", " (") Maria@70: cl = str_replace_all(cl, "[)]", "), ") Maria@70: cl = str_replace_all(cl, "nan", "NA") Maria@70: #cl = paste(cl, "cluster",i) Maria@70: cluster_labels[i] = cl Maria@70: } Maria@70: #cluster_idx = paste("cluster",1:length(cluster_labels)) Maria@70: #df$Clusters = as.factor(df$Clusters) Maria@70: df$Clusters = factor(x=df$Clusters,levels=hc$labels[hc$order]) Maria@70: cluster_labels = cluster_labels[hc$order] Maria@70: #g = ggplot(df,aes(as.factor(df$Clusters), fill=df$CountryLang))+geom_bar() Maria@70: #g = ggplot(df,aes(Clusters, fill=REGION))+geom_bar()+facet_grid(~REGION,space="free",scales="free")#,scales="free") Maria@70: g = ggplot(df,aes(as.factor(df$Clusters), fill=df$Region))+geom_bar() Maria@70: #g = ggplot(df,aes(as.factor(df$Clusters), fill=df$REGION))+geom_bar() Maria@70: g = g+scale_x_discrete(labels=cluster_labels) Maria@70: #g = g+scale_y_continuous(position="right") Maria@70: #g = g+scale_fill_brewer(palette="Paired")#+scale_fill_grey() Maria@70: g = g+scale_fill_brewer(palette="Paired")#+scale_fill_grey() Maria@70: #g = g+labs(y="Counts", x="Top 3 country-language tags in each cluster")+coord_flip()+theme_bw()#+guides(fill="none") Maria@70: g = g+labs(y="Counts", x="Clusters")+coord_flip()+theme_bw()#+guides(fill="none") Maria@70: #g = g+labs(y="Counts", x="Clusters")+coord_flip()+theme_bw()#+guides(fill="none") Maria@70: #g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position=c(.9,.8),legend.margin = unit(0, "cm"),legend.key.size = unit(0.3, "cm"),legend.title = element_text(size=10),legend.text = element_text(size=10)) Maria@70: #g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position="left",legend.margin = unit(0, "cm"),legend.key.size = unit(0.3, "cm"),legend.title = element_text(size=9),legend.text = element_text(size=9)) Maria@70: g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position="top",legend.title = element_text(size=9),legend.text = element_text(size=9)) Maria@70: g = g+theme(panel.border = element_rect(colour = "white"),strip.background=element_rect(fill="white"),strip.text.x = element_blank()) Maria@70: #g = g+theme(axis.text.y = element_text(colour = ddata$labels$col)) m@91: ggsave('../data/results/clusters_top3.pdf',plot=g, pointsize=12) m@91: ggsave('../data/results/clusters_top3.eps',plot=g, pointsize=12)