diff scripts_R/Metadata_subsetBLSM.R @ 77:bde45ce0eeab branch-tests

plots and figures for results
author Maria Panteli <m.x.panteli@gmail.com>
date Fri, 22 Sep 2017 18:02:59 +0100
parents cc028157502a
children d3e05cd49feb
line wrap: on
line diff
--- a/scripts_R/Metadata_subsetBLSM.R	Fri Sep 22 16:30:36 2017 +0100
+++ b/scripts_R/Metadata_subsetBLSM.R	Fri Sep 22 18:02:59 2017 +0100
@@ -1,9 +1,4 @@
-#df = read.csv("/Users/mariapanteli/Documents/2014-2015/Python/pythoncode/MergeBL-Smith/data/df_BLSM.csv",header=TRUE)
-#df = read.csv("data/df_subset_remove.csv",header=TRUE)
-#df = read.csv("/Users/mariapanteli/Documents/2014-2015/Python/pythoncode/CodeForBL/data/metadataBL_new.csv",header=TRUE)
-#df = read.csv("/Users/mariapanteli/Documents/2014-2015/Python/pythoncode/MergeBL-Smith/data/metadata_BLSM.csv",header=TRUE)
-#df = df[1:29182,] # BL data
-df = read.csv('data/df_and_clusters.csv', header=T)
+df = read.csv('../data/results/df_and_clusters.csv', header=T)
 
 source("MetadataPlots.R")
 
@@ -17,17 +12,17 @@
 par(family="Arial")
 
 #pdf(file="data/country_distribution_BL.pdf")
-pdf(file="data/country_distribution.pdf")
+pdf(file="../data/results/country_distribution.pdf")
 PlotCountryCounts(df)
 dev.off()
-postscript(file="data/country_distribution.eps")
+postscript(file="../data/results/country_distribution.eps")
 PlotCountryCounts(df)
 dev.off()
 
-pdf(file="data/year_distribution.pdf", width=6, height=4)
+pdf(file="../data/results/year_distribution.pdf", width=6, height=4)
 PlotYearDistribution(df)
 dev.off()
-postscript("data/year_distribution.eps", width=10)
+postscript("../data/results/year_distribution.eps", width=10)
 PlotYearDistribution(df)
 dev.off()
 #PlotBarChart(df, cat="Year", ordercat="REGION", mincount=10)
@@ -38,28 +33,14 @@
 levels(df$Language)[which(levels(df$Language)=="Lesser Antillean Creole French")]="Lesser Antil. Creole French"
 df$Language[which(df$Language=="Lesser Antillean Creole French")] = "Lesser Antil. Creole French"
 df$REGION[which(df$Country=="French Guiana")] = "South America"
-pdf(file="data/language_distribution.pdf")
+pdf(file="../data/results/language_distribution.pdf")
 PlotBarChart(df, cat="Language", ordercat="Region", mincount=10)
 dev.off()
-postscript("data/language_distribution.eps", width=8, height=10)
+postscript("../data/results/language_distribution.eps", width=8, height=10)
 PlotBarChart(df, cat="Language", ordercat="Region", mincount=10)
 dev.off()
 
-#language phylogeny
-df = read.csv('data/metadata_BLSM_language.csv', header=T)
-pdf(file="data/language_iso3_iso1.pdf")
-PlotBarChart(df, cat="Language_iso3", ordercat="Language_iso1", mincount=10)
-dev.off()
-
-# PlotCountryCounts(df)
-# PlotCountryCultureNcounts(df, mincount=20)
-# PlotCountryLanguageNcounts(df, mincount=20)
-# PlotYearDistribution(df)
-# PlotLanguageDistribution(df)
-# PlotCultureDistribution(df)
-# PlotNxNcounts(df, cat1="Country", cat2="Genre_Album", mincount=20)
-
-df = read.csv('data/df_and_clusters.csv', header=T)
+df = read.csv('../data/results/df_and_clusters.csv', header=T)
 #PlotBarChart(df, cat="Clusters", ordercat="CountryLang", mincount=1,legend=F)
 df$REGION[which(df$Country=="French Guiana")] = "South America"
 g = ggplot(df,aes(df$Clusters, fill=df$REGION))+geom_bar()
@@ -72,7 +53,7 @@
 #df<-merge(df,regiondata,by.x="Country",by.y="ADMIN",all.x=T)
 
 #cluster_labels_df = read.csv('data/clusters_top3_labels.csv')
-cluster_labels_df = read.csv('data/clusters_top3_countries.csv')
+cluster_labels_df = read.csv('../data/results/clusters_top3_countries.csv')
 cluster_labels = paste(cluster_labels_df[,1],cluster_labels_df[,2],cluster_labels_df[,3],sep="")
 #df$CountryLang = as.factor(paste(df$Country, df$Language, sep="-"))
 
@@ -147,22 +128,5 @@
 g = g+guides(fill = guide_legend(title = "Region"))+theme(legend.position="top",legend.title = element_text(size=9),legend.text = element_text(size=9))
 g = g+theme(panel.border = element_rect(colour = "white"),strip.background=element_rect(fill="white"),strip.text.x = element_blank())
 #g = g+theme(axis.text.y = element_text(colour = ddata$labels$col))
-ggsave('data/clusters_top3.pdf',plot=g)
-ggsave('data/clusters_top3.eps',plot=g)
-
-#g_legend<-function(a.gplot){
-#  tmp <- ggplot_gtable(ggplot_build(a.gplot))
-#  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
-#  legend <- tmp$grobs[[leg]]
-#  return(legend)}
-#mylegend<-g_legend(g)
-
-#pdf(file="data/clusters_top3_hclust.pdf", width=12, height=5)
-#grid.arrange(arrangeGrob(g + theme(legend.position="none"),p + theme(legend.position="none"),nrow=1, widths=c(4,1)),mylegend, nrow=2,heights=c(10, 1))
-#dev.off()
-
-#grid.arrange(arrangeGrob(g,p,nrow=1, ncol=2))
-#ggsave('data/clusters_top3_hclust.pdf',plot=g_comb)
-#g=g+annotate(x=20, y=1:18, label=cluster_idx)+geom_text(aes(x=20,y=1:18,label=cluster_idx))
-#+guides(fill = guide_legend(title = "Region"))
-#grid.draw(cbind(ggplotGrob(g), ggplotGrob(pp), size = "last"))
+ggsave('../data/results/clusters_top3.pdf',plot=g)
+ggsave('../data/results/clusters_top3.eps',plot=g)