m@77
|
1 df = read.csv('../data/results/df_and_clusters.csv', header=T)
|
Maria@70
|
2
|
Maria@70
|
3 source("MetadataPlots.R")
|
Maria@70
|
4
|
Maria@70
|
5 ## for plos use arial
|
Maria@70
|
6 #install.packages("extrafont")
|
Maria@70
|
7 library(extrafont)
|
Maria@70
|
8 font_import()
|
Maria@70
|
9 loadfonts()
|
Maria@70
|
10 Arial <- Type1Font(family="Arial", metrics=c("ArialMT.afm","arial-BoldMT.afm","Arial-ItalicMT.afm", "Arial-BoldItalicMT.afm"))
|
Maria@70
|
11 postscriptFonts(Arial=Arial)
|
Maria@70
|
12 par(family="Arial")
|
Maria@70
|
13
|
Maria@70
|
14 #pdf(file="data/country_distribution_BL.pdf")
|
m@91
|
15 pdf(file="../data/results/country_distribution.pdf", pointsize=12, width=6, height=4)
|
Maria@70
|
16 PlotCountryCounts(df)
|
Maria@70
|
17 dev.off()
|
m@91
|
18 postscript(file="../data/results/country_distribution.eps", pointsize=12, width=6, height=4)
|
Maria@70
|
19 PlotCountryCounts(df)
|
Maria@70
|
20 dev.off()
|
Maria@70
|
21
|
m@91
|
22 pdf(file="../data/results/year_distribution.pdf", width=6, height=4, pointsize=12)
|
Maria@70
|
23 PlotYearDistribution(df)
|
Maria@70
|
24 dev.off()
|
m@91
|
25 postscript("../data/results/year_distribution.eps", width=10, pointsize=12)
|
Maria@70
|
26 PlotYearDistribution(df)
|
Maria@70
|
27 dev.off()
|
Maria@70
|
28 #PlotBarChart(df, cat="Year", ordercat="REGION", mincount=10)
|
Maria@70
|
29
|
Maria@70
|
30 #pdf(file="data/language_distribution_BL.pdf")
|
Maria@70
|
31 levels(df$Language)[which(levels(df$Language)=="Southwestern Caribbean Creole English")]="SouthW Carib. Creole English"
|
Maria@70
|
32 df$Language[which(df$Language=="Southwestern Caribbean Creole English")] = "SouthW Carib. Creole English"
|
Maria@70
|
33 levels(df$Language)[which(levels(df$Language)=="Lesser Antillean Creole French")]="Lesser Antil. Creole French"
|
Maria@70
|
34 df$Language[which(df$Language=="Lesser Antillean Creole French")] = "Lesser Antil. Creole French"
|
Maria@70
|
35 df$REGION[which(df$Country=="French Guiana")] = "South America"
|
m@91
|
36 pdf(file="../data/results/language_distribution.pdf", pointsize=12)
|
Maria@70
|
37 PlotBarChart(df, cat="Language", ordercat="Region", mincount=10)
|
Maria@70
|
38 dev.off()
|
m@91
|
39 postscript("../data/results/language_distribution.eps", width=8, height=10, pointsize=12)
|
Maria@70
|
40 PlotBarChart(df, cat="Language", ordercat="Region", mincount=10)
|
Maria@70
|
41 dev.off()
|
Maria@70
|
42
|
m@77
|
43 df = read.csv('../data/results/df_and_clusters.csv', header=T)
|
Maria@70
|
44 #PlotBarChart(df, cat="Clusters", ordercat="CountryLang", mincount=1,legend=F)
|
Maria@70
|
45 df$REGION[which(df$Country=="French Guiana")] = "South America"
|
Maria@70
|
46 g = ggplot(df,aes(df$Clusters, fill=df$REGION))+geom_bar()
|
Maria@70
|
47 levels(df$REGION)[which(levels(df$REGION)=="South America")]="S. America"
|
Maria@70
|
48 levels(df$REGION)[which(levels(df$REGION)=="North America")]="N. America"
|
Maria@70
|
49
|
Maria@70
|
50 #library(rworldmap)
|
Maria@70
|
51 #wrld = getMap()
|
Maria@70
|
52 #regiondata<-wrld@data[,c("ADMIN","GEO3", "Stern")]
|
Maria@70
|
53 #df<-merge(df,regiondata,by.x="Country",by.y="ADMIN",all.x=T)
|
Maria@70
|
54
|
Maria@70
|
55 #cluster_labels_df = read.csv('data/clusters_top3_labels.csv')
|
m@77
|
56 cluster_labels_df = read.csv('../data/results/clusters_top3_countries.csv')
|
Maria@70
|
57 cluster_labels = paste(cluster_labels_df[,1],cluster_labels_df[,2],cluster_labels_df[,3],sep="")
|
Maria@70
|
58 #df$CountryLang = as.factor(paste(df$Country, df$Language, sep="-"))
|
Maria@70
|
59
|
Maria@70
|
60 countrycounts = table(df$Clusters,df$Country)
|
Maria@70
|
61 library(cluster)
|
Maria@70
|
62 library(ape)
|
Maria@70
|
63 library(gridExtra)
|
Maria@70
|
64 library(ggdendro)
|
Maria@70
|
65 library(dendextend)
|
Maria@70
|
66 hc = hclust(dist(countrycounts), method="average")
|
Maria@70
|
67 hc2=hc
|
Maria@70
|
68 #hc2$labels = as.character(1:length(cluster_labels))
|
Maria@70
|
69 hc2$labels = ""
|
Maria@70
|
70 #dhc <- as.dendrogram(hc2)
|
Maria@70
|
71 # library(dynamicTreeCut)
|
Maria@70
|
72 # clusters <- cutreeDynamic(hc2, minClusterSize = k_clust,method = "tree")
|
Maria@70
|
73 # clusters <- clusters[order.dendrogram(dhc)]
|
Maria@70
|
74 # clusters_numbers <- unique(clusters) - (0 %in% clusters)
|
Maria@70
|
75 # n_clusters <- length(clusters_numbers)
|
Maria@70
|
76 # library(colorspace)
|
Maria@70
|
77 # cols <- rainbow_hcl(n_clusters)
|
Maria@70
|
78 # dhc <- hc2 %>% as.dendrogram %>%
|
Maria@70
|
79 # set("branches_k_color", k=k_clust) %>% branches_attr_by_clusters(clusters, values = cols)
|
Maria@70
|
80 k_clust = 5
|
Maria@70
|
81 dhc <- hc2 %>% as.dendrogram %>%
|
Maria@70
|
82 set("branches_k_color", k=k_clust) %>% set("branches_lwd", 0.7) %>%
|
Maria@70
|
83 set("labels_cex", 0.6) %>% set("labels_colors", k=k_clust) %>%
|
Maria@70
|
84 set("leaves_pch", 19) %>% set("leaves_cex", 0.5)
|
Maria@70
|
85 #ddata <- dendro_data(dhc, type = "rectangle")
|
Maria@70
|
86 ddata <- as.ggdend(dhc)
|
Maria@70
|
87 p <- ggplot(ddata)+coord_flip()
|
Maria@70
|
88 #p <- ggplot(segment(ddata)) +
|
Maria@70
|
89 # geom_segment(aes(x = x, y = y, xend = xend, yend = yend, colour=ddata$segments$col)) +
|
Maria@70
|
90 # coord_flip() + theme_dendro() + theme(legend.position="none") +
|
Maria@70
|
91 # geom_text(aes(x = x, y = y, label = label, angle = -90, hjust = 0.5, vjust=1.3, colour=ddata$labels$col), data= label(ddata))
|
Maria@70
|
92
|
Maria@70
|
93 #dend <- hc2 %>% as.dendrogram %>%
|
Maria@70
|
94 # set("branches_k_color", k = 5) %>% set("branches_lwd", 0.7) %>%
|
Maria@70
|
95 # set("labels_cex", 0.6) %>% set("labels_colors", k = 5) %>%
|
Maria@70
|
96 # set("leaves_pch", 19) %>% set("leaves_cex", 0.5)
|
Maria@70
|
97 #ggd1 <- as.ggdend(dend)
|
Maria@70
|
98 #pp <- ggplot(ggd1, horiz = TRUE)
|
Maria@70
|
99
|
Maria@70
|
100 library(stringr)
|
Maria@70
|
101 for (i in 1:length(cluster_labels)){
|
Maria@70
|
102 cl = cluster_labels[i]
|
Maria@70
|
103 cl = str_replace_all(cl, "[(']", "")
|
Maria@70
|
104 cl = str_replace_all(cl, "[|]", "-")
|
Maria@70
|
105 cl = str_replace_all(cl, ", ", " (")
|
Maria@70
|
106 cl = str_replace_all(cl, "[)]", "), ")
|
Maria@70
|
107 cl = str_replace_all(cl, "nan", "NA")
|
Maria@70
|
108 #cl = paste(cl, "cluster",i)
|
Maria@70
|
109 cluster_labels[i] = cl
|
Maria@70
|
110 }
|
Maria@70
|
111 #cluster_idx = paste("cluster",1:length(cluster_labels))
|
Maria@70
|
112 #df$Clusters = as.factor(df$Clusters)
|
Maria@70
|
113 df$Clusters = factor(x=df$Clusters,levels=hc$labels[hc$order])
|
Maria@70
|
114 cluster_labels = cluster_labels[hc$order]
|
Maria@70
|
115 #g = ggplot(df,aes(as.factor(df$Clusters), fill=df$CountryLang))+geom_bar()
|
Maria@70
|
116 #g = ggplot(df,aes(Clusters, fill=REGION))+geom_bar()+facet_grid(~REGION,space="free",scales="free")#,scales="free")
|
Maria@70
|
117 g = ggplot(df,aes(as.factor(df$Clusters), fill=df$Region))+geom_bar()
|
Maria@70
|
118 #g = ggplot(df,aes(as.factor(df$Clusters), fill=df$REGION))+geom_bar()
|
Maria@70
|
119 g = g+scale_x_discrete(labels=cluster_labels)
|
Maria@70
|
120 #g = g+scale_y_continuous(position="right")
|
Maria@70
|
121 #g = g+scale_fill_brewer(palette="Paired")#+scale_fill_grey()
|
Maria@70
|
122 g = g+scale_fill_brewer(palette="Paired")#+scale_fill_grey()
|
Maria@70
|
123 #g = g+labs(y="Counts", x="Top 3 country-language tags in each cluster")+coord_flip()+theme_bw()#+guides(fill="none")
|
Maria@70
|
124 g = g+labs(y="Counts", x="Clusters")+coord_flip()+theme_bw()#+guides(fill="none")
|
Maria@70
|
125 #g = g+labs(y="Counts", x="Clusters")+coord_flip()+theme_bw()#+guides(fill="none")
|
Maria@70
|
126 #g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position=c(.9,.8),legend.margin = unit(0, "cm"),legend.key.size = unit(0.3, "cm"),legend.title = element_text(size=10),legend.text = element_text(size=10))
|
Maria@70
|
127 #g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position="left",legend.margin = unit(0, "cm"),legend.key.size = unit(0.3, "cm"),legend.title = element_text(size=9),legend.text = element_text(size=9))
|
Maria@70
|
128 g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position="top",legend.title = element_text(size=9),legend.text = element_text(size=9))
|
Maria@70
|
129 g = g+theme(panel.border = element_rect(colour = "white"),strip.background=element_rect(fill="white"),strip.text.x = element_blank())
|
Maria@70
|
130 #g = g+theme(axis.text.y = element_text(colour = ddata$labels$col))
|
m@91
|
131 ggsave('../data/results/clusters_top3.pdf',plot=g, pointsize=12)
|
m@91
|
132 ggsave('../data/results/clusters_top3.eps',plot=g, pointsize=12)
|