view analysis/analyse_sets.Rmd @ 0:205974c9568c tip

Initial commit. Predictions not included for lack of space.
author franrodalg <f.rodriguezalgarra@qmul.ac.uk>
date Sat, 29 Jun 2019 18:45:50 +0100
parents
children
line wrap: on
line source
---
title: "Analyse Sets"
output: html_notebook
---

```{r}
source('../db/access_db.R')
library(tidyverse)
```


```{r}
excerpts_classes <- get_excerpts_classes()

excerpts_artist <- get_excerpts_artists()

classes_artists <- excerpts_classes %>%
  inner_join(excerpts_artist, by = c('ex_id')) %>%
  inner_join(get_class_names(), by = c('class_id'))

```

## Excerpts

```{r}
sets_excerpts <- rbind(
  read.csv('../sets/train_fixed.csv') %>% mutate(set = 'train'),
  read.csv('../sets/test_fixed.csv') %>% mutate(set = 'test'),
  read.csv('../sets/filt_fixed.csv') %>% mutate(set = 'filt')) %>%
  inner_join(excerpts_classes, by = c('ex_id'))

(sets_excerpts_table <- 
  sets_excerpts %>% 
  group_by(class_id, set, iter) %>%
  unique() %>%
  summarise(num_excerpts = n()) %>%
  ungroup())
```


```{r }

sets_excerpts_table$set <- factor(sets_excerpts_table$set,
                                  levels = c('train', 'test', 'filt'))

sets_excerpts_table$set <- plyr::mapvalues(sets_excerpts_table$set,
                                  from = c('filt', 'test', 'train'),
                                  to = c('Pr. Test',
                                         'Test',
                                         'Train'))
sets_excerpts_table <- sets_excerpts_table %>%
  inner_join(get_class_names(), by = c('class_id'))
```

```{r fig.height = 3, fig.width = 6}
excerpts_p <- ggplot(sets_excerpts_table) +
  geom_violin(aes(x = set,
                   y = num_excerpts,
                   color = set, fill = set),
              alpha = 0.6, size = 1,
              draw_quantiles = c(0.5)) +
  theme_bw() +
  scale_y_continuous(name = 'Unique Excerpts',
                     limits=c(0, 80)) +
  xlab('Collection') +
  theme(axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 11),
        axis.text = element_text(size = 11),
        axis.text.x = element_text(
          angle = 45, hjust = 1, vjust = 1),
        legend.position = 'none', 
        strip.text.x = element_text(size = 11)) +
  facet_grid(.~class)
```

## Artists

```{r}
sets_artists <- sets_excerpts %>%
  inner_join(excerpts_artist, by = c('ex_id'))

(sets_artists_table <- 
  sets_artists %>% 
  group_by(class_id, set, iter) %>%
  summarise(num_artists = n_distinct(artist_id)) %>%
  ungroup())
```


```{r }

sets_artists_table$set <- factor(sets_artists_table$set,
                                  levels = c('train', 'test', 'filt'))

sets_artists_table$set <- plyr::mapvalues(sets_artists_table$set,
                                  from = c('filt', 'test', 'train'),
                                  to = c('Pr. Test',
                                         'Test',
                                         'Train'))
sets_artists_table <- sets_artists_table %>%
  inner_join(get_class_names(), by = c('class_id'))
```

```{r fig.height = 3, fig.width = 6}
artists_p <- ggplot(sets_artists_table) +
  geom_violin(aes(x = set,
                   y = num_artists,
                   color = set, fill = set),
              alpha = 0.6, size = 1,
              draw_quantiles = c(0.5)) +
  theme_bw() +
  scale_y_continuous(name = 'Unique Artists',
                     limits=c(0, 60)) +
  xlab('Collection') +
  theme(axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 11),
        axis.text = element_text(size = 11),
        axis.text.x = element_text(
          angle = 45, hjust = 1, vjust = 1),
        legend.position = 'none', 
        strip.text.x = element_text(size = 11)) +
  facet_grid(.~class)
```