f@0
|
1 ---
|
f@0
|
2 title: "Analyse Sets"
|
f@0
|
3 output: html_notebook
|
f@0
|
4 ---
|
f@0
|
5
|
f@0
|
6 ```{r}
|
f@0
|
7 source('../db/access_db.R')
|
f@0
|
8 library(tidyverse)
|
f@0
|
9 ```
|
f@0
|
10
|
f@0
|
11
|
f@0
|
12 ```{r}
|
f@0
|
13 excerpts_classes <- get_excerpts_classes()
|
f@0
|
14
|
f@0
|
15 excerpts_artist <- get_excerpts_artists()
|
f@0
|
16
|
f@0
|
17 classes_artists <- excerpts_classes %>%
|
f@0
|
18 inner_join(excerpts_artist, by = c('ex_id')) %>%
|
f@0
|
19 inner_join(get_class_names(), by = c('class_id'))
|
f@0
|
20
|
f@0
|
21 ```
|
f@0
|
22
|
f@0
|
23 ## Excerpts
|
f@0
|
24
|
f@0
|
25 ```{r}
|
f@0
|
26 sets_excerpts <- rbind(
|
f@0
|
27 read.csv('../sets/train_fixed.csv') %>% mutate(set = 'train'),
|
f@0
|
28 read.csv('../sets/test_fixed.csv') %>% mutate(set = 'test'),
|
f@0
|
29 read.csv('../sets/filt_fixed.csv') %>% mutate(set = 'filt')) %>%
|
f@0
|
30 inner_join(excerpts_classes, by = c('ex_id'))
|
f@0
|
31
|
f@0
|
32 (sets_excerpts_table <-
|
f@0
|
33 sets_excerpts %>%
|
f@0
|
34 group_by(class_id, set, iter) %>%
|
f@0
|
35 unique() %>%
|
f@0
|
36 summarise(num_excerpts = n()) %>%
|
f@0
|
37 ungroup())
|
f@0
|
38 ```
|
f@0
|
39
|
f@0
|
40
|
f@0
|
41 ```{r }
|
f@0
|
42
|
f@0
|
43 sets_excerpts_table$set <- factor(sets_excerpts_table$set,
|
f@0
|
44 levels = c('train', 'test', 'filt'))
|
f@0
|
45
|
f@0
|
46 sets_excerpts_table$set <- plyr::mapvalues(sets_excerpts_table$set,
|
f@0
|
47 from = c('filt', 'test', 'train'),
|
f@0
|
48 to = c('Pr. Test',
|
f@0
|
49 'Test',
|
f@0
|
50 'Train'))
|
f@0
|
51 sets_excerpts_table <- sets_excerpts_table %>%
|
f@0
|
52 inner_join(get_class_names(), by = c('class_id'))
|
f@0
|
53 ```
|
f@0
|
54
|
f@0
|
55 ```{r fig.height = 3, fig.width = 6}
|
f@0
|
56 excerpts_p <- ggplot(sets_excerpts_table) +
|
f@0
|
57 geom_violin(aes(x = set,
|
f@0
|
58 y = num_excerpts,
|
f@0
|
59 color = set, fill = set),
|
f@0
|
60 alpha = 0.6, size = 1,
|
f@0
|
61 draw_quantiles = c(0.5)) +
|
f@0
|
62 theme_bw() +
|
f@0
|
63 scale_y_continuous(name = 'Unique Excerpts',
|
f@0
|
64 limits=c(0, 80)) +
|
f@0
|
65 xlab('Collection') +
|
f@0
|
66 theme(axis.title.x = element_text(size = 12),
|
f@0
|
67 axis.title.y = element_text(size = 11),
|
f@0
|
68 axis.text = element_text(size = 11),
|
f@0
|
69 axis.text.x = element_text(
|
f@0
|
70 angle = 45, hjust = 1, vjust = 1),
|
f@0
|
71 legend.position = 'none',
|
f@0
|
72 strip.text.x = element_text(size = 11)) +
|
f@0
|
73 facet_grid(.~class)
|
f@0
|
74 ```
|
f@0
|
75
|
f@0
|
76 ## Artists
|
f@0
|
77
|
f@0
|
78 ```{r}
|
f@0
|
79 sets_artists <- sets_excerpts %>%
|
f@0
|
80 inner_join(excerpts_artist, by = c('ex_id'))
|
f@0
|
81
|
f@0
|
82 (sets_artists_table <-
|
f@0
|
83 sets_artists %>%
|
f@0
|
84 group_by(class_id, set, iter) %>%
|
f@0
|
85 summarise(num_artists = n_distinct(artist_id)) %>%
|
f@0
|
86 ungroup())
|
f@0
|
87 ```
|
f@0
|
88
|
f@0
|
89
|
f@0
|
90 ```{r }
|
f@0
|
91
|
f@0
|
92 sets_artists_table$set <- factor(sets_artists_table$set,
|
f@0
|
93 levels = c('train', 'test', 'filt'))
|
f@0
|
94
|
f@0
|
95 sets_artists_table$set <- plyr::mapvalues(sets_artists_table$set,
|
f@0
|
96 from = c('filt', 'test', 'train'),
|
f@0
|
97 to = c('Pr. Test',
|
f@0
|
98 'Test',
|
f@0
|
99 'Train'))
|
f@0
|
100 sets_artists_table <- sets_artists_table %>%
|
f@0
|
101 inner_join(get_class_names(), by = c('class_id'))
|
f@0
|
102 ```
|
f@0
|
103
|
f@0
|
104 ```{r fig.height = 3, fig.width = 6}
|
f@0
|
105 artists_p <- ggplot(sets_artists_table) +
|
f@0
|
106 geom_violin(aes(x = set,
|
f@0
|
107 y = num_artists,
|
f@0
|
108 color = set, fill = set),
|
f@0
|
109 alpha = 0.6, size = 1,
|
f@0
|
110 draw_quantiles = c(0.5)) +
|
f@0
|
111 theme_bw() +
|
f@0
|
112 scale_y_continuous(name = 'Unique Artists',
|
f@0
|
113 limits=c(0, 60)) +
|
f@0
|
114 xlab('Collection') +
|
f@0
|
115 theme(axis.title.x = element_text(size = 12),
|
f@0
|
116 axis.title.y = element_text(size = 11),
|
f@0
|
117 axis.text = element_text(size = 11),
|
f@0
|
118 axis.text.x = element_text(
|
f@0
|
119 angle = 45, hjust = 1, vjust = 1),
|
f@0
|
120 legend.position = 'none',
|
f@0
|
121 strip.text.x = element_text(size = 11)) +
|
f@0
|
122 facet_grid(.~class)
|
f@0
|
123 ```
|