annotate analysis/simulate_sets.R @ 0:205974c9568c tip

Initial commit. Predictions not included for lack of space.
author franrodalg <f.rodriguezalgarra@qmul.ac.uk>
date Sat, 29 Jun 2019 18:45:50 +0100
parents
children
rev   line source
f@0 1
f@0 2
f@0 3
f@0 4 extract_sizes <- function(preds, sets) {
f@0 5 preds %>%
f@0 6 inner_join(sets, by = c('iteration', 'ex_id')) %>%
f@0 7 select(iteration, ex_id, gt) %>% unique() %>%
f@0 8 group_by(iteration, gt) %>%
f@0 9 summarise(num_ex = n())
f@0 10 }
f@0 11
f@0 12 class_names <- function(preds, sets) {
f@0 13 preds %>%
f@0 14 inner_join(sets, by = c('iteration', 'ex_id')) %>%
f@0 15 select(ex_id, gt) %>% unique()
f@0 16 }
f@0 17
f@0 18 names <- class_names(preds_orig, test_sets)
f@0 19
f@0 20 sizes <- extract_sizes(preds_orig, reg_sets)
f@0 21
f@0 22 simulate_set <- function(x, sizes) {
f@0 23 y <- data.frame()
f@0 24 for (i in 1:nrow(sizes)) {
f@0 25 y <- rbind(
f@0 26 y,
f@0 27 x %>%
f@0 28 filter(gt == sizes[i,]$gt) %>%
f@0 29 sample_n(sizes[i,]$num_ex)
f@0 30 )
f@0 31 }
f@0 32 y
f@0 33 }
f@0 34
f@0 35 simulate_sets <- function(x, names, sizes, num_simul = 10) {
f@0 36 y <- data.frame()
f@0 37 for (s in 1:num_simul) {
f@0 38 for (it in unique(x$iteration)) {
f@0 39 y <- rbind(
f@0 40 y,
f@0 41 simulate_set(x %>%
f@0 42 filter(iteration == it) %>%
f@0 43 inner_join(names, by = c('ex_id')),
f@0 44 sizes %>%
f@0 45 filter(iteration == it)) %>%
f@0 46 mutate(iteration = it, simul = s))
f@0 47 }}
f@0 48 y
f@0 49 }
f@0 50
f@0 51 set.seed(1986)
f@0 52
f@0 53 simul_sets <- simulate_sets(test_sets, names, sizes, num_simul = 100)
f@0 54
f@0 55
f@0 56 simulate_foms <- function(preds, simul_sets) {
f@0 57 foms <- data.frame()
f@0 58 for (i in unique(simul_sets$simul)) {
f@0 59 print(i)
f@0 60 foms <- rbind(
f@0 61 foms,
f@0 62 get_fom_table(preds %>%
f@0 63 inner_join(simul_sets %>%
f@0 64 filter(simul == i) %>%
f@0 65 select(-simul),
f@0 66 by = c('iteration', 'ex_id', 'gt'))) %>%
f@0 67 ungroup() %>%
f@0 68 mutate(simul = i)
f@0 69 )
f@0 70 }
f@0 71 foms
f@0 72
f@0 73 }
f@0 74
f@0 75 simul_foms <- simulate_foms(preds_orig, simul_sets)
f@0 76
f@0 77 compare <- function(x, y) {
f@0 78 x %>% inner_join(
f@0 79 y, by = c('iteration', 'alg', 'feat_col', 'feat_set')) %>%
f@0 80 ungroup() %>%
f@0 81 mutate(loe = mean_recall.x >= mean_recall.y) %>%
f@0 82 summarise(sum(loe) / n()) %>% pull()
f@0 83 }
f@0 84
f@0 85 simulate_comparisons <- function(foms, simul_foms, test=F) {
f@0 86 if (!test) {
f@0 87 sapply(X = unique(simul_foms$simul),
f@0 88 FUN = function(i) {
f@0 89 compare(foms, simul_foms %>% filter(simul == i))})
f@0 90 } else {
f@0 91 sapply(X = unique(simul_foms$simul),
f@0 92 FUN = function(i) {
f@0 93 compare(simul_foms %>% filter(simul == i), foms)})
f@0 94 }
f@0 95 }
f@0 96
f@0 97
f@0 98 reg_comparisons <- simulate_comparisons(reg_fom_iter_orig, simul_foms)
f@0 99
f@0 100 reg_test <- compare(reg_fom_iter_orig, test_fom_iter_orig)
f@0 101
f@0 102
f@0 103 test_comparisons <- simulate_comparisons(test_fom_iter_orig, simul_foms, test=T)
f@0 104
f@0 105 simulate_all_comparisons <- function(simul_foms) {
f@0 106 all_comp <- data.frame()
f@0 107 for (i in unique(simul_foms$simul)) {
f@0 108 all_comp <- rbind(
f@0 109 all_comp,
f@0 110 data.frame(
f@0 111 simul = i,
f@0 112 simul_comp = unique(simul_foms$simul)[-i],
f@0 113 prop = simulate_comparisons(
f@0 114 simul_foms %>% filter(simul == i),
f@0 115 simul_foms %>% filter(simul != i))))
f@0 116 }
f@0 117 all_comp
f@0 118 }
f@0 119
f@0 120 all_comparisons <- simulate_all_comparisons(simul_foms)
f@0 121
f@0 122 ggplot() +
f@0 123 geom_density(data=all_comparisons,
f@0 124 aes(x=prop, group=simul),
f@0 125 color='grey85') +
f@0 126 geom_density(data=all_comparisons %>%
f@0 127 group_by(simul) %>%
f@0 128 summarise(av_prop = mean(prop)),
f@0 129 aes(x=av_prop), linetype='dashed') +
f@0 130 geom_density(data=data.frame(prop=reg_comparisons),
f@0 131 aes(x=prop)) +
f@0 132 xlim(c(0, 1)) +
f@0 133 theme_bw() +
f@0 134 xlab('Proportion of measurements') +
f@0 135 ylab('Density')