Mercurial > hg > confint
diff analysis/simulate_sets.R @ 0:205974c9568c tip
Initial commit. Predictions not included for lack of space.
author | franrodalg <f.rodriguezalgarra@qmul.ac.uk> |
---|---|
date | Sat, 29 Jun 2019 18:45:50 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/analysis/simulate_sets.R Sat Jun 29 18:45:50 2019 +0100 @@ -0,0 +1,135 @@ + + + +extract_sizes <- function(preds, sets) { + preds %>% + inner_join(sets, by = c('iteration', 'ex_id')) %>% + select(iteration, ex_id, gt) %>% unique() %>% + group_by(iteration, gt) %>% + summarise(num_ex = n()) +} + +class_names <- function(preds, sets) { + preds %>% + inner_join(sets, by = c('iteration', 'ex_id')) %>% + select(ex_id, gt) %>% unique() +} + +names <- class_names(preds_orig, test_sets) + +sizes <- extract_sizes(preds_orig, reg_sets) + +simulate_set <- function(x, sizes) { + y <- data.frame() + for (i in 1:nrow(sizes)) { + y <- rbind( + y, + x %>% + filter(gt == sizes[i,]$gt) %>% + sample_n(sizes[i,]$num_ex) + ) + } + y +} + +simulate_sets <- function(x, names, sizes, num_simul = 10) { + y <- data.frame() + for (s in 1:num_simul) { + for (it in unique(x$iteration)) { + y <- rbind( + y, + simulate_set(x %>% + filter(iteration == it) %>% + inner_join(names, by = c('ex_id')), + sizes %>% + filter(iteration == it)) %>% + mutate(iteration = it, simul = s)) + }} + y +} + +set.seed(1986) + +simul_sets <- simulate_sets(test_sets, names, sizes, num_simul = 100) + + +simulate_foms <- function(preds, simul_sets) { + foms <- data.frame() + for (i in unique(simul_sets$simul)) { + print(i) + foms <- rbind( + foms, + get_fom_table(preds %>% + inner_join(simul_sets %>% + filter(simul == i) %>% + select(-simul), + by = c('iteration', 'ex_id', 'gt'))) %>% + ungroup() %>% + mutate(simul = i) + ) + } + foms + +} + +simul_foms <- simulate_foms(preds_orig, simul_sets) + +compare <- function(x, y) { + x %>% inner_join( + y, by = c('iteration', 'alg', 'feat_col', 'feat_set')) %>% + ungroup() %>% + mutate(loe = mean_recall.x >= mean_recall.y) %>% + summarise(sum(loe) / n()) %>% pull() +} + +simulate_comparisons <- function(foms, simul_foms, test=F) { + if (!test) { + sapply(X = unique(simul_foms$simul), + FUN = function(i) { + compare(foms, simul_foms %>% filter(simul == i))}) + } else { + sapply(X = unique(simul_foms$simul), + FUN = function(i) { + compare(simul_foms %>% filter(simul == i), foms)}) + } +} + + +reg_comparisons <- simulate_comparisons(reg_fom_iter_orig, simul_foms) + +reg_test <- compare(reg_fom_iter_orig, test_fom_iter_orig) + + +test_comparisons <- simulate_comparisons(test_fom_iter_orig, simul_foms, test=T) + +simulate_all_comparisons <- function(simul_foms) { + all_comp <- data.frame() + for (i in unique(simul_foms$simul)) { + all_comp <- rbind( + all_comp, + data.frame( + simul = i, + simul_comp = unique(simul_foms$simul)[-i], + prop = simulate_comparisons( + simul_foms %>% filter(simul == i), + simul_foms %>% filter(simul != i)))) + } + all_comp +} + +all_comparisons <- simulate_all_comparisons(simul_foms) + +ggplot() + + geom_density(data=all_comparisons, + aes(x=prop, group=simul), + color='grey85') + + geom_density(data=all_comparisons %>% + group_by(simul) %>% + summarise(av_prop = mean(prop)), + aes(x=av_prop), linetype='dashed') + + geom_density(data=data.frame(prop=reg_comparisons), + aes(x=prop)) + + xlim(c(0, 1)) + + theme_bw() + + xlab('Proportion of measurements') + + ylab('Density')