Mercurial > hg > confint
view analysis/analyse_sets.Rmd @ 0:205974c9568c tip
Initial commit. Predictions not included for lack of space.
author | franrodalg <f.rodriguezalgarra@qmul.ac.uk> |
---|---|
date | Sat, 29 Jun 2019 18:45:50 +0100 |
parents | |
children |
line wrap: on
line source
--- title: "Analyse Sets" output: html_notebook --- ```{r} source('../db/access_db.R') library(tidyverse) ``` ```{r} excerpts_classes <- get_excerpts_classes() excerpts_artist <- get_excerpts_artists() classes_artists <- excerpts_classes %>% inner_join(excerpts_artist, by = c('ex_id')) %>% inner_join(get_class_names(), by = c('class_id')) ``` ## Excerpts ```{r} sets_excerpts <- rbind( read.csv('../sets/train_fixed.csv') %>% mutate(set = 'train'), read.csv('../sets/test_fixed.csv') %>% mutate(set = 'test'), read.csv('../sets/filt_fixed.csv') %>% mutate(set = 'filt')) %>% inner_join(excerpts_classes, by = c('ex_id')) (sets_excerpts_table <- sets_excerpts %>% group_by(class_id, set, iter) %>% unique() %>% summarise(num_excerpts = n()) %>% ungroup()) ``` ```{r } sets_excerpts_table$set <- factor(sets_excerpts_table$set, levels = c('train', 'test', 'filt')) sets_excerpts_table$set <- plyr::mapvalues(sets_excerpts_table$set, from = c('filt', 'test', 'train'), to = c('Pr. Test', 'Test', 'Train')) sets_excerpts_table <- sets_excerpts_table %>% inner_join(get_class_names(), by = c('class_id')) ``` ```{r fig.height = 3, fig.width = 6} excerpts_p <- ggplot(sets_excerpts_table) + geom_violin(aes(x = set, y = num_excerpts, color = set, fill = set), alpha = 0.6, size = 1, draw_quantiles = c(0.5)) + theme_bw() + scale_y_continuous(name = 'Unique Excerpts', limits=c(0, 80)) + xlab('Collection') + theme(axis.title.x = element_text(size = 12), axis.title.y = element_text(size = 11), axis.text = element_text(size = 11), axis.text.x = element_text( angle = 45, hjust = 1, vjust = 1), legend.position = 'none', strip.text.x = element_text(size = 11)) + facet_grid(.~class) ``` ## Artists ```{r} sets_artists <- sets_excerpts %>% inner_join(excerpts_artist, by = c('ex_id')) (sets_artists_table <- sets_artists %>% group_by(class_id, set, iter) %>% summarise(num_artists = n_distinct(artist_id)) %>% ungroup()) ``` ```{r } sets_artists_table$set <- factor(sets_artists_table$set, levels = c('train', 'test', 'filt')) sets_artists_table$set <- plyr::mapvalues(sets_artists_table$set, from = c('filt', 'test', 'train'), to = c('Pr. Test', 'Test', 'Train')) sets_artists_table <- sets_artists_table %>% inner_join(get_class_names(), by = c('class_id')) ``` ```{r fig.height = 3, fig.width = 6} artists_p <- ggplot(sets_artists_table) + geom_violin(aes(x = set, y = num_artists, color = set, fill = set), alpha = 0.6, size = 1, draw_quantiles = c(0.5)) + theme_bw() + scale_y_continuous(name = 'Unique Artists', limits=c(0, 60)) + xlab('Collection') + theme(axis.title.x = element_text(size = 12), axis.title.y = element_text(size = 11), axis.text = element_text(size = 11), axis.text.x = element_text( angle = 45, hjust = 1, vjust = 1), legend.position = 'none', strip.text.x = element_text(size = 11)) + facet_grid(.~class) ```