f@0
|
1
|
f@0
|
2
|
f@0
|
3
|
f@0
|
4 extract_sizes <- function(preds, sets) {
|
f@0
|
5 preds %>%
|
f@0
|
6 inner_join(sets, by = c('iteration', 'ex_id')) %>%
|
f@0
|
7 select(iteration, ex_id, gt) %>% unique() %>%
|
f@0
|
8 group_by(iteration, gt) %>%
|
f@0
|
9 summarise(num_ex = n())
|
f@0
|
10 }
|
f@0
|
11
|
f@0
|
12 class_names <- function(preds, sets) {
|
f@0
|
13 preds %>%
|
f@0
|
14 inner_join(sets, by = c('iteration', 'ex_id')) %>%
|
f@0
|
15 select(ex_id, gt) %>% unique()
|
f@0
|
16 }
|
f@0
|
17
|
f@0
|
18 names <- class_names(preds_orig, test_sets)
|
f@0
|
19
|
f@0
|
20 sizes <- extract_sizes(preds_orig, reg_sets)
|
f@0
|
21
|
f@0
|
22 simulate_set <- function(x, sizes) {
|
f@0
|
23 y <- data.frame()
|
f@0
|
24 for (i in 1:nrow(sizes)) {
|
f@0
|
25 y <- rbind(
|
f@0
|
26 y,
|
f@0
|
27 x %>%
|
f@0
|
28 filter(gt == sizes[i,]$gt) %>%
|
f@0
|
29 sample_n(sizes[i,]$num_ex)
|
f@0
|
30 )
|
f@0
|
31 }
|
f@0
|
32 y
|
f@0
|
33 }
|
f@0
|
34
|
f@0
|
35 simulate_sets <- function(x, names, sizes, num_simul = 10) {
|
f@0
|
36 y <- data.frame()
|
f@0
|
37 for (s in 1:num_simul) {
|
f@0
|
38 for (it in unique(x$iteration)) {
|
f@0
|
39 y <- rbind(
|
f@0
|
40 y,
|
f@0
|
41 simulate_set(x %>%
|
f@0
|
42 filter(iteration == it) %>%
|
f@0
|
43 inner_join(names, by = c('ex_id')),
|
f@0
|
44 sizes %>%
|
f@0
|
45 filter(iteration == it)) %>%
|
f@0
|
46 mutate(iteration = it, simul = s))
|
f@0
|
47 }}
|
f@0
|
48 y
|
f@0
|
49 }
|
f@0
|
50
|
f@0
|
51 set.seed(1986)
|
f@0
|
52
|
f@0
|
53 simul_sets <- simulate_sets(test_sets, names, sizes, num_simul = 100)
|
f@0
|
54
|
f@0
|
55
|
f@0
|
56 simulate_foms <- function(preds, simul_sets) {
|
f@0
|
57 foms <- data.frame()
|
f@0
|
58 for (i in unique(simul_sets$simul)) {
|
f@0
|
59 print(i)
|
f@0
|
60 foms <- rbind(
|
f@0
|
61 foms,
|
f@0
|
62 get_fom_table(preds %>%
|
f@0
|
63 inner_join(simul_sets %>%
|
f@0
|
64 filter(simul == i) %>%
|
f@0
|
65 select(-simul),
|
f@0
|
66 by = c('iteration', 'ex_id', 'gt'))) %>%
|
f@0
|
67 ungroup() %>%
|
f@0
|
68 mutate(simul = i)
|
f@0
|
69 )
|
f@0
|
70 }
|
f@0
|
71 foms
|
f@0
|
72
|
f@0
|
73 }
|
f@0
|
74
|
f@0
|
75 simul_foms <- simulate_foms(preds_orig, simul_sets)
|
f@0
|
76
|
f@0
|
77 compare <- function(x, y) {
|
f@0
|
78 x %>% inner_join(
|
f@0
|
79 y, by = c('iteration', 'alg', 'feat_col', 'feat_set')) %>%
|
f@0
|
80 ungroup() %>%
|
f@0
|
81 mutate(loe = mean_recall.x >= mean_recall.y) %>%
|
f@0
|
82 summarise(sum(loe) / n()) %>% pull()
|
f@0
|
83 }
|
f@0
|
84
|
f@0
|
85 simulate_comparisons <- function(foms, simul_foms, test=F) {
|
f@0
|
86 if (!test) {
|
f@0
|
87 sapply(X = unique(simul_foms$simul),
|
f@0
|
88 FUN = function(i) {
|
f@0
|
89 compare(foms, simul_foms %>% filter(simul == i))})
|
f@0
|
90 } else {
|
f@0
|
91 sapply(X = unique(simul_foms$simul),
|
f@0
|
92 FUN = function(i) {
|
f@0
|
93 compare(simul_foms %>% filter(simul == i), foms)})
|
f@0
|
94 }
|
f@0
|
95 }
|
f@0
|
96
|
f@0
|
97
|
f@0
|
98 reg_comparisons <- simulate_comparisons(reg_fom_iter_orig, simul_foms)
|
f@0
|
99
|
f@0
|
100 reg_test <- compare(reg_fom_iter_orig, test_fom_iter_orig)
|
f@0
|
101
|
f@0
|
102
|
f@0
|
103 test_comparisons <- simulate_comparisons(test_fom_iter_orig, simul_foms, test=T)
|
f@0
|
104
|
f@0
|
105 simulate_all_comparisons <- function(simul_foms) {
|
f@0
|
106 all_comp <- data.frame()
|
f@0
|
107 for (i in unique(simul_foms$simul)) {
|
f@0
|
108 all_comp <- rbind(
|
f@0
|
109 all_comp,
|
f@0
|
110 data.frame(
|
f@0
|
111 simul = i,
|
f@0
|
112 simul_comp = unique(simul_foms$simul)[-i],
|
f@0
|
113 prop = simulate_comparisons(
|
f@0
|
114 simul_foms %>% filter(simul == i),
|
f@0
|
115 simul_foms %>% filter(simul != i))))
|
f@0
|
116 }
|
f@0
|
117 all_comp
|
f@0
|
118 }
|
f@0
|
119
|
f@0
|
120 all_comparisons <- simulate_all_comparisons(simul_foms)
|
f@0
|
121
|
f@0
|
122 ggplot() +
|
f@0
|
123 geom_density(data=all_comparisons,
|
f@0
|
124 aes(x=prop, group=simul),
|
f@0
|
125 color='grey85') +
|
f@0
|
126 geom_density(data=all_comparisons %>%
|
f@0
|
127 group_by(simul) %>%
|
f@0
|
128 summarise(av_prop = mean(prop)),
|
f@0
|
129 aes(x=av_prop), linetype='dashed') +
|
f@0
|
130 geom_density(data=data.frame(prop=reg_comparisons),
|
f@0
|
131 aes(x=prop)) +
|
f@0
|
132 xlim(c(0, 1)) +
|
f@0
|
133 theme_bw() +
|
f@0
|
134 xlab('Proportion of measurements') +
|
f@0
|
135 ylab('Density')
|