Mercurial > hg > confint
comparison analysis/analyse_sets.Rmd @ 0:205974c9568c tip
Initial commit. Predictions not included for lack of space.
author | franrodalg <f.rodriguezalgarra@qmul.ac.uk> |
---|---|
date | Sat, 29 Jun 2019 18:45:50 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:205974c9568c |
---|---|
1 --- | |
2 title: "Analyse Sets" | |
3 output: html_notebook | |
4 --- | |
5 | |
6 ```{r} | |
7 source('../db/access_db.R') | |
8 library(tidyverse) | |
9 ``` | |
10 | |
11 | |
12 ```{r} | |
13 excerpts_classes <- get_excerpts_classes() | |
14 | |
15 excerpts_artist <- get_excerpts_artists() | |
16 | |
17 classes_artists <- excerpts_classes %>% | |
18 inner_join(excerpts_artist, by = c('ex_id')) %>% | |
19 inner_join(get_class_names(), by = c('class_id')) | |
20 | |
21 ``` | |
22 | |
23 ## Excerpts | |
24 | |
25 ```{r} | |
26 sets_excerpts <- rbind( | |
27 read.csv('../sets/train_fixed.csv') %>% mutate(set = 'train'), | |
28 read.csv('../sets/test_fixed.csv') %>% mutate(set = 'test'), | |
29 read.csv('../sets/filt_fixed.csv') %>% mutate(set = 'filt')) %>% | |
30 inner_join(excerpts_classes, by = c('ex_id')) | |
31 | |
32 (sets_excerpts_table <- | |
33 sets_excerpts %>% | |
34 group_by(class_id, set, iter) %>% | |
35 unique() %>% | |
36 summarise(num_excerpts = n()) %>% | |
37 ungroup()) | |
38 ``` | |
39 | |
40 | |
41 ```{r } | |
42 | |
43 sets_excerpts_table$set <- factor(sets_excerpts_table$set, | |
44 levels = c('train', 'test', 'filt')) | |
45 | |
46 sets_excerpts_table$set <- plyr::mapvalues(sets_excerpts_table$set, | |
47 from = c('filt', 'test', 'train'), | |
48 to = c('Pr. Test', | |
49 'Test', | |
50 'Train')) | |
51 sets_excerpts_table <- sets_excerpts_table %>% | |
52 inner_join(get_class_names(), by = c('class_id')) | |
53 ``` | |
54 | |
55 ```{r fig.height = 3, fig.width = 6} | |
56 excerpts_p <- ggplot(sets_excerpts_table) + | |
57 geom_violin(aes(x = set, | |
58 y = num_excerpts, | |
59 color = set, fill = set), | |
60 alpha = 0.6, size = 1, | |
61 draw_quantiles = c(0.5)) + | |
62 theme_bw() + | |
63 scale_y_continuous(name = 'Unique Excerpts', | |
64 limits=c(0, 80)) + | |
65 xlab('Collection') + | |
66 theme(axis.title.x = element_text(size = 12), | |
67 axis.title.y = element_text(size = 11), | |
68 axis.text = element_text(size = 11), | |
69 axis.text.x = element_text( | |
70 angle = 45, hjust = 1, vjust = 1), | |
71 legend.position = 'none', | |
72 strip.text.x = element_text(size = 11)) + | |
73 facet_grid(.~class) | |
74 ``` | |
75 | |
76 ## Artists | |
77 | |
78 ```{r} | |
79 sets_artists <- sets_excerpts %>% | |
80 inner_join(excerpts_artist, by = c('ex_id')) | |
81 | |
82 (sets_artists_table <- | |
83 sets_artists %>% | |
84 group_by(class_id, set, iter) %>% | |
85 summarise(num_artists = n_distinct(artist_id)) %>% | |
86 ungroup()) | |
87 ``` | |
88 | |
89 | |
90 ```{r } | |
91 | |
92 sets_artists_table$set <- factor(sets_artists_table$set, | |
93 levels = c('train', 'test', 'filt')) | |
94 | |
95 sets_artists_table$set <- plyr::mapvalues(sets_artists_table$set, | |
96 from = c('filt', 'test', 'train'), | |
97 to = c('Pr. Test', | |
98 'Test', | |
99 'Train')) | |
100 sets_artists_table <- sets_artists_table %>% | |
101 inner_join(get_class_names(), by = c('class_id')) | |
102 ``` | |
103 | |
104 ```{r fig.height = 3, fig.width = 6} | |
105 artists_p <- ggplot(sets_artists_table) + | |
106 geom_violin(aes(x = set, | |
107 y = num_artists, | |
108 color = set, fill = set), | |
109 alpha = 0.6, size = 1, | |
110 draw_quantiles = c(0.5)) + | |
111 theme_bw() + | |
112 scale_y_continuous(name = 'Unique Artists', | |
113 limits=c(0, 60)) + | |
114 xlab('Collection') + | |
115 theme(axis.title.x = element_text(size = 12), | |
116 axis.title.y = element_text(size = 11), | |
117 axis.text = element_text(size = 11), | |
118 axis.text.x = element_text( | |
119 angle = 45, hjust = 1, vjust = 1), | |
120 legend.position = 'none', | |
121 strip.text.x = element_text(size = 11)) + | |
122 facet_grid(.~class) | |
123 ``` |