Mercurial > hg > confint
view analysis/simulate_sets.R @ 0:205974c9568c tip
Initial commit. Predictions not included for lack of space.
author | franrodalg <f.rodriguezalgarra@qmul.ac.uk> |
---|---|
date | Sat, 29 Jun 2019 18:45:50 +0100 |
parents | |
children |
line wrap: on
line source
extract_sizes <- function(preds, sets) { preds %>% inner_join(sets, by = c('iteration', 'ex_id')) %>% select(iteration, ex_id, gt) %>% unique() %>% group_by(iteration, gt) %>% summarise(num_ex = n()) } class_names <- function(preds, sets) { preds %>% inner_join(sets, by = c('iteration', 'ex_id')) %>% select(ex_id, gt) %>% unique() } names <- class_names(preds_orig, test_sets) sizes <- extract_sizes(preds_orig, reg_sets) simulate_set <- function(x, sizes) { y <- data.frame() for (i in 1:nrow(sizes)) { y <- rbind( y, x %>% filter(gt == sizes[i,]$gt) %>% sample_n(sizes[i,]$num_ex) ) } y } simulate_sets <- function(x, names, sizes, num_simul = 10) { y <- data.frame() for (s in 1:num_simul) { for (it in unique(x$iteration)) { y <- rbind( y, simulate_set(x %>% filter(iteration == it) %>% inner_join(names, by = c('ex_id')), sizes %>% filter(iteration == it)) %>% mutate(iteration = it, simul = s)) }} y } set.seed(1986) simul_sets <- simulate_sets(test_sets, names, sizes, num_simul = 100) simulate_foms <- function(preds, simul_sets) { foms <- data.frame() for (i in unique(simul_sets$simul)) { print(i) foms <- rbind( foms, get_fom_table(preds %>% inner_join(simul_sets %>% filter(simul == i) %>% select(-simul), by = c('iteration', 'ex_id', 'gt'))) %>% ungroup() %>% mutate(simul = i) ) } foms } simul_foms <- simulate_foms(preds_orig, simul_sets) compare <- function(x, y) { x %>% inner_join( y, by = c('iteration', 'alg', 'feat_col', 'feat_set')) %>% ungroup() %>% mutate(loe = mean_recall.x >= mean_recall.y) %>% summarise(sum(loe) / n()) %>% pull() } simulate_comparisons <- function(foms, simul_foms, test=F) { if (!test) { sapply(X = unique(simul_foms$simul), FUN = function(i) { compare(foms, simul_foms %>% filter(simul == i))}) } else { sapply(X = unique(simul_foms$simul), FUN = function(i) { compare(simul_foms %>% filter(simul == i), foms)}) } } reg_comparisons <- simulate_comparisons(reg_fom_iter_orig, simul_foms) reg_test <- compare(reg_fom_iter_orig, test_fom_iter_orig) test_comparisons <- simulate_comparisons(test_fom_iter_orig, simul_foms, test=T) simulate_all_comparisons <- function(simul_foms) { all_comp <- data.frame() for (i in unique(simul_foms$simul)) { all_comp <- rbind( all_comp, data.frame( simul = i, simul_comp = unique(simul_foms$simul)[-i], prop = simulate_comparisons( simul_foms %>% filter(simul == i), simul_foms %>% filter(simul != i)))) } all_comp } all_comparisons <- simulate_all_comparisons(simul_foms) ggplot() + geom_density(data=all_comparisons, aes(x=prop, group=simul), color='grey85') + geom_density(data=all_comparisons %>% group_by(simul) %>% summarise(av_prop = mean(prop)), aes(x=av_prop), linetype='dashed') + geom_density(data=data.frame(prop=reg_comparisons), aes(x=prop)) + xlim(c(0, 1)) + theme_bw() + xlab('Proportion of measurements') + ylab('Density')