f@0: f@0: f@0: f@0: extract_sizes <- function(preds, sets) { f@0: preds %>% f@0: inner_join(sets, by = c('iteration', 'ex_id')) %>% f@0: select(iteration, ex_id, gt) %>% unique() %>% f@0: group_by(iteration, gt) %>% f@0: summarise(num_ex = n()) f@0: } f@0: f@0: class_names <- function(preds, sets) { f@0: preds %>% f@0: inner_join(sets, by = c('iteration', 'ex_id')) %>% f@0: select(ex_id, gt) %>% unique() f@0: } f@0: f@0: names <- class_names(preds_orig, test_sets) f@0: f@0: sizes <- extract_sizes(preds_orig, reg_sets) f@0: f@0: simulate_set <- function(x, sizes) { f@0: y <- data.frame() f@0: for (i in 1:nrow(sizes)) { f@0: y <- rbind( f@0: y, f@0: x %>% f@0: filter(gt == sizes[i,]$gt) %>% f@0: sample_n(sizes[i,]$num_ex) f@0: ) f@0: } f@0: y f@0: } f@0: f@0: simulate_sets <- function(x, names, sizes, num_simul = 10) { f@0: y <- data.frame() f@0: for (s in 1:num_simul) { f@0: for (it in unique(x$iteration)) { f@0: y <- rbind( f@0: y, f@0: simulate_set(x %>% f@0: filter(iteration == it) %>% f@0: inner_join(names, by = c('ex_id')), f@0: sizes %>% f@0: filter(iteration == it)) %>% f@0: mutate(iteration = it, simul = s)) f@0: }} f@0: y f@0: } f@0: f@0: set.seed(1986) f@0: f@0: simul_sets <- simulate_sets(test_sets, names, sizes, num_simul = 100) f@0: f@0: f@0: simulate_foms <- function(preds, simul_sets) { f@0: foms <- data.frame() f@0: for (i in unique(simul_sets$simul)) { f@0: print(i) f@0: foms <- rbind( f@0: foms, f@0: get_fom_table(preds %>% f@0: inner_join(simul_sets %>% f@0: filter(simul == i) %>% f@0: select(-simul), f@0: by = c('iteration', 'ex_id', 'gt'))) %>% f@0: ungroup() %>% f@0: mutate(simul = i) f@0: ) f@0: } f@0: foms f@0: f@0: } f@0: f@0: simul_foms <- simulate_foms(preds_orig, simul_sets) f@0: f@0: compare <- function(x, y) { f@0: x %>% inner_join( f@0: y, by = c('iteration', 'alg', 'feat_col', 'feat_set')) %>% f@0: ungroup() %>% f@0: mutate(loe = mean_recall.x >= mean_recall.y) %>% f@0: summarise(sum(loe) / n()) %>% pull() f@0: } f@0: f@0: simulate_comparisons <- function(foms, simul_foms, test=F) { f@0: if (!test) { f@0: sapply(X = unique(simul_foms$simul), f@0: FUN = function(i) { f@0: compare(foms, simul_foms %>% filter(simul == i))}) f@0: } else { f@0: sapply(X = unique(simul_foms$simul), f@0: FUN = function(i) { f@0: compare(simul_foms %>% filter(simul == i), foms)}) f@0: } f@0: } f@0: f@0: f@0: reg_comparisons <- simulate_comparisons(reg_fom_iter_orig, simul_foms) f@0: f@0: reg_test <- compare(reg_fom_iter_orig, test_fom_iter_orig) f@0: f@0: f@0: test_comparisons <- simulate_comparisons(test_fom_iter_orig, simul_foms, test=T) f@0: f@0: simulate_all_comparisons <- function(simul_foms) { f@0: all_comp <- data.frame() f@0: for (i in unique(simul_foms$simul)) { f@0: all_comp <- rbind( f@0: all_comp, f@0: data.frame( f@0: simul = i, f@0: simul_comp = unique(simul_foms$simul)[-i], f@0: prop = simulate_comparisons( f@0: simul_foms %>% filter(simul == i), f@0: simul_foms %>% filter(simul != i)))) f@0: } f@0: all_comp f@0: } f@0: f@0: all_comparisons <- simulate_all_comparisons(simul_foms) f@0: f@0: ggplot() + f@0: geom_density(data=all_comparisons, f@0: aes(x=prop, group=simul), f@0: color='grey85') + f@0: geom_density(data=all_comparisons %>% f@0: group_by(simul) %>% f@0: summarise(av_prop = mean(prop)), f@0: aes(x=av_prop), linetype='dashed') + f@0: geom_density(data=data.frame(prop=reg_comparisons), f@0: aes(x=prop)) + f@0: xlim(c(0, 1)) + f@0: theme_bw() + f@0: xlab('Proportion of measurements') + f@0: ylab('Density')