Mercurial > hg > plosone_underreview
comparison scripts_R/Metadata_subsetBLSM.R @ 77:bde45ce0eeab branch-tests
plots and figures for results
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Fri, 22 Sep 2017 18:02:59 +0100 |
parents | cc028157502a |
children | d3e05cd49feb |
comparison
equal
deleted
inserted
replaced
76:d17833be50ca | 77:bde45ce0eeab |
---|---|
1 #df = read.csv("/Users/mariapanteli/Documents/2014-2015/Python/pythoncode/MergeBL-Smith/data/df_BLSM.csv",header=TRUE) | 1 df = read.csv('../data/results/df_and_clusters.csv', header=T) |
2 #df = read.csv("data/df_subset_remove.csv",header=TRUE) | |
3 #df = read.csv("/Users/mariapanteli/Documents/2014-2015/Python/pythoncode/CodeForBL/data/metadataBL_new.csv",header=TRUE) | |
4 #df = read.csv("/Users/mariapanteli/Documents/2014-2015/Python/pythoncode/MergeBL-Smith/data/metadata_BLSM.csv",header=TRUE) | |
5 #df = df[1:29182,] # BL data | |
6 df = read.csv('data/df_and_clusters.csv', header=T) | |
7 | 2 |
8 source("MetadataPlots.R") | 3 source("MetadataPlots.R") |
9 | 4 |
10 ## for plos use arial | 5 ## for plos use arial |
11 #install.packages("extrafont") | 6 #install.packages("extrafont") |
15 Arial <- Type1Font(family="Arial", metrics=c("ArialMT.afm","arial-BoldMT.afm","Arial-ItalicMT.afm", "Arial-BoldItalicMT.afm")) | 10 Arial <- Type1Font(family="Arial", metrics=c("ArialMT.afm","arial-BoldMT.afm","Arial-ItalicMT.afm", "Arial-BoldItalicMT.afm")) |
16 postscriptFonts(Arial=Arial) | 11 postscriptFonts(Arial=Arial) |
17 par(family="Arial") | 12 par(family="Arial") |
18 | 13 |
19 #pdf(file="data/country_distribution_BL.pdf") | 14 #pdf(file="data/country_distribution_BL.pdf") |
20 pdf(file="data/country_distribution.pdf") | 15 pdf(file="../data/results/country_distribution.pdf") |
21 PlotCountryCounts(df) | 16 PlotCountryCounts(df) |
22 dev.off() | 17 dev.off() |
23 postscript(file="data/country_distribution.eps") | 18 postscript(file="../data/results/country_distribution.eps") |
24 PlotCountryCounts(df) | 19 PlotCountryCounts(df) |
25 dev.off() | 20 dev.off() |
26 | 21 |
27 pdf(file="data/year_distribution.pdf", width=6, height=4) | 22 pdf(file="../data/results/year_distribution.pdf", width=6, height=4) |
28 PlotYearDistribution(df) | 23 PlotYearDistribution(df) |
29 dev.off() | 24 dev.off() |
30 postscript("data/year_distribution.eps", width=10) | 25 postscript("../data/results/year_distribution.eps", width=10) |
31 PlotYearDistribution(df) | 26 PlotYearDistribution(df) |
32 dev.off() | 27 dev.off() |
33 #PlotBarChart(df, cat="Year", ordercat="REGION", mincount=10) | 28 #PlotBarChart(df, cat="Year", ordercat="REGION", mincount=10) |
34 | 29 |
35 #pdf(file="data/language_distribution_BL.pdf") | 30 #pdf(file="data/language_distribution_BL.pdf") |
36 levels(df$Language)[which(levels(df$Language)=="Southwestern Caribbean Creole English")]="SouthW Carib. Creole English" | 31 levels(df$Language)[which(levels(df$Language)=="Southwestern Caribbean Creole English")]="SouthW Carib. Creole English" |
37 df$Language[which(df$Language=="Southwestern Caribbean Creole English")] = "SouthW Carib. Creole English" | 32 df$Language[which(df$Language=="Southwestern Caribbean Creole English")] = "SouthW Carib. Creole English" |
38 levels(df$Language)[which(levels(df$Language)=="Lesser Antillean Creole French")]="Lesser Antil. Creole French" | 33 levels(df$Language)[which(levels(df$Language)=="Lesser Antillean Creole French")]="Lesser Antil. Creole French" |
39 df$Language[which(df$Language=="Lesser Antillean Creole French")] = "Lesser Antil. Creole French" | 34 df$Language[which(df$Language=="Lesser Antillean Creole French")] = "Lesser Antil. Creole French" |
40 df$REGION[which(df$Country=="French Guiana")] = "South America" | 35 df$REGION[which(df$Country=="French Guiana")] = "South America" |
41 pdf(file="data/language_distribution.pdf") | 36 pdf(file="../data/results/language_distribution.pdf") |
42 PlotBarChart(df, cat="Language", ordercat="Region", mincount=10) | 37 PlotBarChart(df, cat="Language", ordercat="Region", mincount=10) |
43 dev.off() | 38 dev.off() |
44 postscript("data/language_distribution.eps", width=8, height=10) | 39 postscript("../data/results/language_distribution.eps", width=8, height=10) |
45 PlotBarChart(df, cat="Language", ordercat="Region", mincount=10) | 40 PlotBarChart(df, cat="Language", ordercat="Region", mincount=10) |
46 dev.off() | 41 dev.off() |
47 | 42 |
48 #language phylogeny | 43 df = read.csv('../data/results/df_and_clusters.csv', header=T) |
49 df = read.csv('data/metadata_BLSM_language.csv', header=T) | |
50 pdf(file="data/language_iso3_iso1.pdf") | |
51 PlotBarChart(df, cat="Language_iso3", ordercat="Language_iso1", mincount=10) | |
52 dev.off() | |
53 | |
54 # PlotCountryCounts(df) | |
55 # PlotCountryCultureNcounts(df, mincount=20) | |
56 # PlotCountryLanguageNcounts(df, mincount=20) | |
57 # PlotYearDistribution(df) | |
58 # PlotLanguageDistribution(df) | |
59 # PlotCultureDistribution(df) | |
60 # PlotNxNcounts(df, cat1="Country", cat2="Genre_Album", mincount=20) | |
61 | |
62 df = read.csv('data/df_and_clusters.csv', header=T) | |
63 #PlotBarChart(df, cat="Clusters", ordercat="CountryLang", mincount=1,legend=F) | 44 #PlotBarChart(df, cat="Clusters", ordercat="CountryLang", mincount=1,legend=F) |
64 df$REGION[which(df$Country=="French Guiana")] = "South America" | 45 df$REGION[which(df$Country=="French Guiana")] = "South America" |
65 g = ggplot(df,aes(df$Clusters, fill=df$REGION))+geom_bar() | 46 g = ggplot(df,aes(df$Clusters, fill=df$REGION))+geom_bar() |
66 levels(df$REGION)[which(levels(df$REGION)=="South America")]="S. America" | 47 levels(df$REGION)[which(levels(df$REGION)=="South America")]="S. America" |
67 levels(df$REGION)[which(levels(df$REGION)=="North America")]="N. America" | 48 levels(df$REGION)[which(levels(df$REGION)=="North America")]="N. America" |
70 #wrld = getMap() | 51 #wrld = getMap() |
71 #regiondata<-wrld@data[,c("ADMIN","GEO3", "Stern")] | 52 #regiondata<-wrld@data[,c("ADMIN","GEO3", "Stern")] |
72 #df<-merge(df,regiondata,by.x="Country",by.y="ADMIN",all.x=T) | 53 #df<-merge(df,regiondata,by.x="Country",by.y="ADMIN",all.x=T) |
73 | 54 |
74 #cluster_labels_df = read.csv('data/clusters_top3_labels.csv') | 55 #cluster_labels_df = read.csv('data/clusters_top3_labels.csv') |
75 cluster_labels_df = read.csv('data/clusters_top3_countries.csv') | 56 cluster_labels_df = read.csv('../data/results/clusters_top3_countries.csv') |
76 cluster_labels = paste(cluster_labels_df[,1],cluster_labels_df[,2],cluster_labels_df[,3],sep="") | 57 cluster_labels = paste(cluster_labels_df[,1],cluster_labels_df[,2],cluster_labels_df[,3],sep="") |
77 #df$CountryLang = as.factor(paste(df$Country, df$Language, sep="-")) | 58 #df$CountryLang = as.factor(paste(df$Country, df$Language, sep="-")) |
78 | 59 |
79 countrycounts = table(df$Clusters,df$Country) | 60 countrycounts = table(df$Clusters,df$Country) |
80 library(cluster) | 61 library(cluster) |
145 #g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position=c(.9,.8),legend.margin = unit(0, "cm"),legend.key.size = unit(0.3, "cm"),legend.title = element_text(size=10),legend.text = element_text(size=10)) | 126 #g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position=c(.9,.8),legend.margin = unit(0, "cm"),legend.key.size = unit(0.3, "cm"),legend.title = element_text(size=10),legend.text = element_text(size=10)) |
146 #g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position="left",legend.margin = unit(0, "cm"),legend.key.size = unit(0.3, "cm"),legend.title = element_text(size=9),legend.text = element_text(size=9)) | 127 #g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position="left",legend.margin = unit(0, "cm"),legend.key.size = unit(0.3, "cm"),legend.title = element_text(size=9),legend.text = element_text(size=9)) |
147 g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position="top",legend.title = element_text(size=9),legend.text = element_text(size=9)) | 128 g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position="top",legend.title = element_text(size=9),legend.text = element_text(size=9)) |
148 g = g+theme(panel.border = element_rect(colour = "white"),strip.background=element_rect(fill="white"),strip.text.x = element_blank()) | 129 g = g+theme(panel.border = element_rect(colour = "white"),strip.background=element_rect(fill="white"),strip.text.x = element_blank()) |
149 #g = g+theme(axis.text.y = element_text(colour = ddata$labels$col)) | 130 #g = g+theme(axis.text.y = element_text(colour = ddata$labels$col)) |
150 ggsave('data/clusters_top3.pdf',plot=g) | 131 ggsave('../data/results/clusters_top3.pdf',plot=g) |
151 ggsave('data/clusters_top3.eps',plot=g) | 132 ggsave('../data/results/clusters_top3.eps',plot=g) |
152 | |
153 #g_legend<-function(a.gplot){ | |
154 # tmp <- ggplot_gtable(ggplot_build(a.gplot)) | |
155 # leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") | |
156 # legend <- tmp$grobs[[leg]] | |
157 # return(legend)} | |
158 #mylegend<-g_legend(g) | |
159 | |
160 #pdf(file="data/clusters_top3_hclust.pdf", width=12, height=5) | |
161 #grid.arrange(arrangeGrob(g + theme(legend.position="none"),p + theme(legend.position="none"),nrow=1, widths=c(4,1)),mylegend, nrow=2,heights=c(10, 1)) | |
162 #dev.off() | |
163 | |
164 #grid.arrange(arrangeGrob(g,p,nrow=1, ncol=2)) | |
165 #ggsave('data/clusters_top3_hclust.pdf',plot=g_comb) | |
166 #g=g+annotate(x=20, y=1:18, label=cluster_idx)+geom_text(aes(x=20,y=1:18,label=cluster_idx)) | |
167 #+guides(fill = guide_legend(title = "Region")) | |
168 #grid.draw(cbind(ggplotGrob(g), ggplotGrob(pp), size = "last")) |