Mercurial > hg > confint
diff analysis/analyse_sets.Rmd @ 0:205974c9568c tip
Initial commit. Predictions not included for lack of space.
author | franrodalg <f.rodriguezalgarra@qmul.ac.uk> |
---|---|
date | Sat, 29 Jun 2019 18:45:50 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/analysis/analyse_sets.Rmd Sat Jun 29 18:45:50 2019 +0100 @@ -0,0 +1,123 @@ +--- +title: "Analyse Sets" +output: html_notebook +--- + +```{r} +source('../db/access_db.R') +library(tidyverse) +``` + + +```{r} +excerpts_classes <- get_excerpts_classes() + +excerpts_artist <- get_excerpts_artists() + +classes_artists <- excerpts_classes %>% + inner_join(excerpts_artist, by = c('ex_id')) %>% + inner_join(get_class_names(), by = c('class_id')) + +``` + +## Excerpts + +```{r} +sets_excerpts <- rbind( + read.csv('../sets/train_fixed.csv') %>% mutate(set = 'train'), + read.csv('../sets/test_fixed.csv') %>% mutate(set = 'test'), + read.csv('../sets/filt_fixed.csv') %>% mutate(set = 'filt')) %>% + inner_join(excerpts_classes, by = c('ex_id')) + +(sets_excerpts_table <- + sets_excerpts %>% + group_by(class_id, set, iter) %>% + unique() %>% + summarise(num_excerpts = n()) %>% + ungroup()) +``` + + +```{r } + +sets_excerpts_table$set <- factor(sets_excerpts_table$set, + levels = c('train', 'test', 'filt')) + +sets_excerpts_table$set <- plyr::mapvalues(sets_excerpts_table$set, + from = c('filt', 'test', 'train'), + to = c('Pr. Test', + 'Test', + 'Train')) +sets_excerpts_table <- sets_excerpts_table %>% + inner_join(get_class_names(), by = c('class_id')) +``` + +```{r fig.height = 3, fig.width = 6} +excerpts_p <- ggplot(sets_excerpts_table) + + geom_violin(aes(x = set, + y = num_excerpts, + color = set, fill = set), + alpha = 0.6, size = 1, + draw_quantiles = c(0.5)) + + theme_bw() + + scale_y_continuous(name = 'Unique Excerpts', + limits=c(0, 80)) + + xlab('Collection') + + theme(axis.title.x = element_text(size = 12), + axis.title.y = element_text(size = 11), + axis.text = element_text(size = 11), + axis.text.x = element_text( + angle = 45, hjust = 1, vjust = 1), + legend.position = 'none', + strip.text.x = element_text(size = 11)) + + facet_grid(.~class) +``` + +## Artists + +```{r} +sets_artists <- sets_excerpts %>% + inner_join(excerpts_artist, by = c('ex_id')) + +(sets_artists_table <- + sets_artists %>% + group_by(class_id, set, iter) %>% + summarise(num_artists = n_distinct(artist_id)) %>% + ungroup()) +``` + + +```{r } + +sets_artists_table$set <- factor(sets_artists_table$set, + levels = c('train', 'test', 'filt')) + +sets_artists_table$set <- plyr::mapvalues(sets_artists_table$set, + from = c('filt', 'test', 'train'), + to = c('Pr. Test', + 'Test', + 'Train')) +sets_artists_table <- sets_artists_table %>% + inner_join(get_class_names(), by = c('class_id')) +``` + +```{r fig.height = 3, fig.width = 6} +artists_p <- ggplot(sets_artists_table) + + geom_violin(aes(x = set, + y = num_artists, + color = set, fill = set), + alpha = 0.6, size = 1, + draw_quantiles = c(0.5)) + + theme_bw() + + scale_y_continuous(name = 'Unique Artists', + limits=c(0, 60)) + + xlab('Collection') + + theme(axis.title.x = element_text(size = 12), + axis.title.y = element_text(size = 11), + axis.text = element_text(size = 11), + axis.text.x = element_text( + angle = 45, hjust = 1, vjust = 1), + legend.position = 'none', + strip.text.x = element_text(size = 11)) + + facet_grid(.~class) +```