comparison analysis/analyse_sets.Rmd @ 0:205974c9568c tip

Initial commit. Predictions not included for lack of space.
author franrodalg <f.rodriguezalgarra@qmul.ac.uk>
date Sat, 29 Jun 2019 18:45:50 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:205974c9568c
1 ---
2 title: "Analyse Sets"
3 output: html_notebook
4 ---
5
6 ```{r}
7 source('../db/access_db.R')
8 library(tidyverse)
9 ```
10
11
12 ```{r}
13 excerpts_classes <- get_excerpts_classes()
14
15 excerpts_artist <- get_excerpts_artists()
16
17 classes_artists <- excerpts_classes %>%
18 inner_join(excerpts_artist, by = c('ex_id')) %>%
19 inner_join(get_class_names(), by = c('class_id'))
20
21 ```
22
23 ## Excerpts
24
25 ```{r}
26 sets_excerpts <- rbind(
27 read.csv('../sets/train_fixed.csv') %>% mutate(set = 'train'),
28 read.csv('../sets/test_fixed.csv') %>% mutate(set = 'test'),
29 read.csv('../sets/filt_fixed.csv') %>% mutate(set = 'filt')) %>%
30 inner_join(excerpts_classes, by = c('ex_id'))
31
32 (sets_excerpts_table <-
33 sets_excerpts %>%
34 group_by(class_id, set, iter) %>%
35 unique() %>%
36 summarise(num_excerpts = n()) %>%
37 ungroup())
38 ```
39
40
41 ```{r }
42
43 sets_excerpts_table$set <- factor(sets_excerpts_table$set,
44 levels = c('train', 'test', 'filt'))
45
46 sets_excerpts_table$set <- plyr::mapvalues(sets_excerpts_table$set,
47 from = c('filt', 'test', 'train'),
48 to = c('Pr. Test',
49 'Test',
50 'Train'))
51 sets_excerpts_table <- sets_excerpts_table %>%
52 inner_join(get_class_names(), by = c('class_id'))
53 ```
54
55 ```{r fig.height = 3, fig.width = 6}
56 excerpts_p <- ggplot(sets_excerpts_table) +
57 geom_violin(aes(x = set,
58 y = num_excerpts,
59 color = set, fill = set),
60 alpha = 0.6, size = 1,
61 draw_quantiles = c(0.5)) +
62 theme_bw() +
63 scale_y_continuous(name = 'Unique Excerpts',
64 limits=c(0, 80)) +
65 xlab('Collection') +
66 theme(axis.title.x = element_text(size = 12),
67 axis.title.y = element_text(size = 11),
68 axis.text = element_text(size = 11),
69 axis.text.x = element_text(
70 angle = 45, hjust = 1, vjust = 1),
71 legend.position = 'none',
72 strip.text.x = element_text(size = 11)) +
73 facet_grid(.~class)
74 ```
75
76 ## Artists
77
78 ```{r}
79 sets_artists <- sets_excerpts %>%
80 inner_join(excerpts_artist, by = c('ex_id'))
81
82 (sets_artists_table <-
83 sets_artists %>%
84 group_by(class_id, set, iter) %>%
85 summarise(num_artists = n_distinct(artist_id)) %>%
86 ungroup())
87 ```
88
89
90 ```{r }
91
92 sets_artists_table$set <- factor(sets_artists_table$set,
93 levels = c('train', 'test', 'filt'))
94
95 sets_artists_table$set <- plyr::mapvalues(sets_artists_table$set,
96 from = c('filt', 'test', 'train'),
97 to = c('Pr. Test',
98 'Test',
99 'Train'))
100 sets_artists_table <- sets_artists_table %>%
101 inner_join(get_class_names(), by = c('class_id'))
102 ```
103
104 ```{r fig.height = 3, fig.width = 6}
105 artists_p <- ggplot(sets_artists_table) +
106 geom_violin(aes(x = set,
107 y = num_artists,
108 color = set, fill = set),
109 alpha = 0.6, size = 1,
110 draw_quantiles = c(0.5)) +
111 theme_bw() +
112 scale_y_continuous(name = 'Unique Artists',
113 limits=c(0, 60)) +
114 xlab('Collection') +
115 theme(axis.title.x = element_text(size = 12),
116 axis.title.y = element_text(size = 11),
117 axis.text = element_text(size = 11),
118 axis.text.x = element_text(
119 angle = 45, hjust = 1, vjust = 1),
120 legend.position = 'none',
121 strip.text.x = element_text(size = 11)) +
122 facet_grid(.~class)
123 ```