f@0: --- f@0: title: "Analyse Sets" f@0: output: html_notebook f@0: --- f@0: f@0: ```{r} f@0: source('../db/access_db.R') f@0: library(tidyverse) f@0: ``` f@0: f@0: f@0: ```{r} f@0: excerpts_classes <- get_excerpts_classes() f@0: f@0: excerpts_artist <- get_excerpts_artists() f@0: f@0: classes_artists <- excerpts_classes %>% f@0: inner_join(excerpts_artist, by = c('ex_id')) %>% f@0: inner_join(get_class_names(), by = c('class_id')) f@0: f@0: ``` f@0: f@0: ## Excerpts f@0: f@0: ```{r} f@0: sets_excerpts <- rbind( f@0: read.csv('../sets/train_fixed.csv') %>% mutate(set = 'train'), f@0: read.csv('../sets/test_fixed.csv') %>% mutate(set = 'test'), f@0: read.csv('../sets/filt_fixed.csv') %>% mutate(set = 'filt')) %>% f@0: inner_join(excerpts_classes, by = c('ex_id')) f@0: f@0: (sets_excerpts_table <- f@0: sets_excerpts %>% f@0: group_by(class_id, set, iter) %>% f@0: unique() %>% f@0: summarise(num_excerpts = n()) %>% f@0: ungroup()) f@0: ``` f@0: f@0: f@0: ```{r } f@0: f@0: sets_excerpts_table$set <- factor(sets_excerpts_table$set, f@0: levels = c('train', 'test', 'filt')) f@0: f@0: sets_excerpts_table$set <- plyr::mapvalues(sets_excerpts_table$set, f@0: from = c('filt', 'test', 'train'), f@0: to = c('Pr. Test', f@0: 'Test', f@0: 'Train')) f@0: sets_excerpts_table <- sets_excerpts_table %>% f@0: inner_join(get_class_names(), by = c('class_id')) f@0: ``` f@0: f@0: ```{r fig.height = 3, fig.width = 6} f@0: excerpts_p <- ggplot(sets_excerpts_table) + f@0: geom_violin(aes(x = set, f@0: y = num_excerpts, f@0: color = set, fill = set), f@0: alpha = 0.6, size = 1, f@0: draw_quantiles = c(0.5)) + f@0: theme_bw() + f@0: scale_y_continuous(name = 'Unique Excerpts', f@0: limits=c(0, 80)) + f@0: xlab('Collection') + f@0: theme(axis.title.x = element_text(size = 12), f@0: axis.title.y = element_text(size = 11), f@0: axis.text = element_text(size = 11), f@0: axis.text.x = element_text( f@0: angle = 45, hjust = 1, vjust = 1), f@0: legend.position = 'none', f@0: strip.text.x = element_text(size = 11)) + f@0: facet_grid(.~class) f@0: ``` f@0: f@0: ## Artists f@0: f@0: ```{r} f@0: sets_artists <- sets_excerpts %>% f@0: inner_join(excerpts_artist, by = c('ex_id')) f@0: f@0: (sets_artists_table <- f@0: sets_artists %>% f@0: group_by(class_id, set, iter) %>% f@0: summarise(num_artists = n_distinct(artist_id)) %>% f@0: ungroup()) f@0: ``` f@0: f@0: f@0: ```{r } f@0: f@0: sets_artists_table$set <- factor(sets_artists_table$set, f@0: levels = c('train', 'test', 'filt')) f@0: f@0: sets_artists_table$set <- plyr::mapvalues(sets_artists_table$set, f@0: from = c('filt', 'test', 'train'), f@0: to = c('Pr. Test', f@0: 'Test', f@0: 'Train')) f@0: sets_artists_table <- sets_artists_table %>% f@0: inner_join(get_class_names(), by = c('class_id')) f@0: ``` f@0: f@0: ```{r fig.height = 3, fig.width = 6} f@0: artists_p <- ggplot(sets_artists_table) + f@0: geom_violin(aes(x = set, f@0: y = num_artists, f@0: color = set, fill = set), f@0: alpha = 0.6, size = 1, f@0: draw_quantiles = c(0.5)) + f@0: theme_bw() + f@0: scale_y_continuous(name = 'Unique Artists', f@0: limits=c(0, 60)) + f@0: xlab('Collection') + f@0: theme(axis.title.x = element_text(size = 12), f@0: axis.title.y = element_text(size = 11), f@0: axis.text = element_text(size = 11), f@0: axis.text.x = element_text( f@0: angle = 45, hjust = 1, vjust = 1), f@0: legend.position = 'none', f@0: strip.text.x = element_text(size = 11)) + f@0: facet_grid(.~class) f@0: ```