annotate analysis/analyse_sets.Rmd @ 0:205974c9568c tip

Initial commit. Predictions not included for lack of space.
author franrodalg <f.rodriguezalgarra@qmul.ac.uk>
date Sat, 29 Jun 2019 18:45:50 +0100
parents
children
rev   line source
f@0 1 ---
f@0 2 title: "Analyse Sets"
f@0 3 output: html_notebook
f@0 4 ---
f@0 5
f@0 6 ```{r}
f@0 7 source('../db/access_db.R')
f@0 8 library(tidyverse)
f@0 9 ```
f@0 10
f@0 11
f@0 12 ```{r}
f@0 13 excerpts_classes <- get_excerpts_classes()
f@0 14
f@0 15 excerpts_artist <- get_excerpts_artists()
f@0 16
f@0 17 classes_artists <- excerpts_classes %>%
f@0 18 inner_join(excerpts_artist, by = c('ex_id')) %>%
f@0 19 inner_join(get_class_names(), by = c('class_id'))
f@0 20
f@0 21 ```
f@0 22
f@0 23 ## Excerpts
f@0 24
f@0 25 ```{r}
f@0 26 sets_excerpts <- rbind(
f@0 27 read.csv('../sets/train_fixed.csv') %>% mutate(set = 'train'),
f@0 28 read.csv('../sets/test_fixed.csv') %>% mutate(set = 'test'),
f@0 29 read.csv('../sets/filt_fixed.csv') %>% mutate(set = 'filt')) %>%
f@0 30 inner_join(excerpts_classes, by = c('ex_id'))
f@0 31
f@0 32 (sets_excerpts_table <-
f@0 33 sets_excerpts %>%
f@0 34 group_by(class_id, set, iter) %>%
f@0 35 unique() %>%
f@0 36 summarise(num_excerpts = n()) %>%
f@0 37 ungroup())
f@0 38 ```
f@0 39
f@0 40
f@0 41 ```{r }
f@0 42
f@0 43 sets_excerpts_table$set <- factor(sets_excerpts_table$set,
f@0 44 levels = c('train', 'test', 'filt'))
f@0 45
f@0 46 sets_excerpts_table$set <- plyr::mapvalues(sets_excerpts_table$set,
f@0 47 from = c('filt', 'test', 'train'),
f@0 48 to = c('Pr. Test',
f@0 49 'Test',
f@0 50 'Train'))
f@0 51 sets_excerpts_table <- sets_excerpts_table %>%
f@0 52 inner_join(get_class_names(), by = c('class_id'))
f@0 53 ```
f@0 54
f@0 55 ```{r fig.height = 3, fig.width = 6}
f@0 56 excerpts_p <- ggplot(sets_excerpts_table) +
f@0 57 geom_violin(aes(x = set,
f@0 58 y = num_excerpts,
f@0 59 color = set, fill = set),
f@0 60 alpha = 0.6, size = 1,
f@0 61 draw_quantiles = c(0.5)) +
f@0 62 theme_bw() +
f@0 63 scale_y_continuous(name = 'Unique Excerpts',
f@0 64 limits=c(0, 80)) +
f@0 65 xlab('Collection') +
f@0 66 theme(axis.title.x = element_text(size = 12),
f@0 67 axis.title.y = element_text(size = 11),
f@0 68 axis.text = element_text(size = 11),
f@0 69 axis.text.x = element_text(
f@0 70 angle = 45, hjust = 1, vjust = 1),
f@0 71 legend.position = 'none',
f@0 72 strip.text.x = element_text(size = 11)) +
f@0 73 facet_grid(.~class)
f@0 74 ```
f@0 75
f@0 76 ## Artists
f@0 77
f@0 78 ```{r}
f@0 79 sets_artists <- sets_excerpts %>%
f@0 80 inner_join(excerpts_artist, by = c('ex_id'))
f@0 81
f@0 82 (sets_artists_table <-
f@0 83 sets_artists %>%
f@0 84 group_by(class_id, set, iter) %>%
f@0 85 summarise(num_artists = n_distinct(artist_id)) %>%
f@0 86 ungroup())
f@0 87 ```
f@0 88
f@0 89
f@0 90 ```{r }
f@0 91
f@0 92 sets_artists_table$set <- factor(sets_artists_table$set,
f@0 93 levels = c('train', 'test', 'filt'))
f@0 94
f@0 95 sets_artists_table$set <- plyr::mapvalues(sets_artists_table$set,
f@0 96 from = c('filt', 'test', 'train'),
f@0 97 to = c('Pr. Test',
f@0 98 'Test',
f@0 99 'Train'))
f@0 100 sets_artists_table <- sets_artists_table %>%
f@0 101 inner_join(get_class_names(), by = c('class_id'))
f@0 102 ```
f@0 103
f@0 104 ```{r fig.height = 3, fig.width = 6}
f@0 105 artists_p <- ggplot(sets_artists_table) +
f@0 106 geom_violin(aes(x = set,
f@0 107 y = num_artists,
f@0 108 color = set, fill = set),
f@0 109 alpha = 0.6, size = 1,
f@0 110 draw_quantiles = c(0.5)) +
f@0 111 theme_bw() +
f@0 112 scale_y_continuous(name = 'Unique Artists',
f@0 113 limits=c(0, 60)) +
f@0 114 xlab('Collection') +
f@0 115 theme(axis.title.x = element_text(size = 12),
f@0 116 axis.title.y = element_text(size = 11),
f@0 117 axis.text = element_text(size = 11),
f@0 118 axis.text.x = element_text(
f@0 119 angle = 45, hjust = 1, vjust = 1),
f@0 120 legend.position = 'none',
f@0 121 strip.text.x = element_text(size = 11)) +
f@0 122 facet_grid(.~class)
f@0 123 ```