Mercurial > hg > confint
comparison analysis/simulate_sets.R @ 0:205974c9568c tip
Initial commit. Predictions not included for lack of space.
author | franrodalg <f.rodriguezalgarra@qmul.ac.uk> |
---|---|
date | Sat, 29 Jun 2019 18:45:50 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:205974c9568c |
---|---|
1 | |
2 | |
3 | |
4 extract_sizes <- function(preds, sets) { | |
5 preds %>% | |
6 inner_join(sets, by = c('iteration', 'ex_id')) %>% | |
7 select(iteration, ex_id, gt) %>% unique() %>% | |
8 group_by(iteration, gt) %>% | |
9 summarise(num_ex = n()) | |
10 } | |
11 | |
12 class_names <- function(preds, sets) { | |
13 preds %>% | |
14 inner_join(sets, by = c('iteration', 'ex_id')) %>% | |
15 select(ex_id, gt) %>% unique() | |
16 } | |
17 | |
18 names <- class_names(preds_orig, test_sets) | |
19 | |
20 sizes <- extract_sizes(preds_orig, reg_sets) | |
21 | |
22 simulate_set <- function(x, sizes) { | |
23 y <- data.frame() | |
24 for (i in 1:nrow(sizes)) { | |
25 y <- rbind( | |
26 y, | |
27 x %>% | |
28 filter(gt == sizes[i,]$gt) %>% | |
29 sample_n(sizes[i,]$num_ex) | |
30 ) | |
31 } | |
32 y | |
33 } | |
34 | |
35 simulate_sets <- function(x, names, sizes, num_simul = 10) { | |
36 y <- data.frame() | |
37 for (s in 1:num_simul) { | |
38 for (it in unique(x$iteration)) { | |
39 y <- rbind( | |
40 y, | |
41 simulate_set(x %>% | |
42 filter(iteration == it) %>% | |
43 inner_join(names, by = c('ex_id')), | |
44 sizes %>% | |
45 filter(iteration == it)) %>% | |
46 mutate(iteration = it, simul = s)) | |
47 }} | |
48 y | |
49 } | |
50 | |
51 set.seed(1986) | |
52 | |
53 simul_sets <- simulate_sets(test_sets, names, sizes, num_simul = 100) | |
54 | |
55 | |
56 simulate_foms <- function(preds, simul_sets) { | |
57 foms <- data.frame() | |
58 for (i in unique(simul_sets$simul)) { | |
59 print(i) | |
60 foms <- rbind( | |
61 foms, | |
62 get_fom_table(preds %>% | |
63 inner_join(simul_sets %>% | |
64 filter(simul == i) %>% | |
65 select(-simul), | |
66 by = c('iteration', 'ex_id', 'gt'))) %>% | |
67 ungroup() %>% | |
68 mutate(simul = i) | |
69 ) | |
70 } | |
71 foms | |
72 | |
73 } | |
74 | |
75 simul_foms <- simulate_foms(preds_orig, simul_sets) | |
76 | |
77 compare <- function(x, y) { | |
78 x %>% inner_join( | |
79 y, by = c('iteration', 'alg', 'feat_col', 'feat_set')) %>% | |
80 ungroup() %>% | |
81 mutate(loe = mean_recall.x >= mean_recall.y) %>% | |
82 summarise(sum(loe) / n()) %>% pull() | |
83 } | |
84 | |
85 simulate_comparisons <- function(foms, simul_foms, test=F) { | |
86 if (!test) { | |
87 sapply(X = unique(simul_foms$simul), | |
88 FUN = function(i) { | |
89 compare(foms, simul_foms %>% filter(simul == i))}) | |
90 } else { | |
91 sapply(X = unique(simul_foms$simul), | |
92 FUN = function(i) { | |
93 compare(simul_foms %>% filter(simul == i), foms)}) | |
94 } | |
95 } | |
96 | |
97 | |
98 reg_comparisons <- simulate_comparisons(reg_fom_iter_orig, simul_foms) | |
99 | |
100 reg_test <- compare(reg_fom_iter_orig, test_fom_iter_orig) | |
101 | |
102 | |
103 test_comparisons <- simulate_comparisons(test_fom_iter_orig, simul_foms, test=T) | |
104 | |
105 simulate_all_comparisons <- function(simul_foms) { | |
106 all_comp <- data.frame() | |
107 for (i in unique(simul_foms$simul)) { | |
108 all_comp <- rbind( | |
109 all_comp, | |
110 data.frame( | |
111 simul = i, | |
112 simul_comp = unique(simul_foms$simul)[-i], | |
113 prop = simulate_comparisons( | |
114 simul_foms %>% filter(simul == i), | |
115 simul_foms %>% filter(simul != i)))) | |
116 } | |
117 all_comp | |
118 } | |
119 | |
120 all_comparisons <- simulate_all_comparisons(simul_foms) | |
121 | |
122 ggplot() + | |
123 geom_density(data=all_comparisons, | |
124 aes(x=prop, group=simul), | |
125 color='grey85') + | |
126 geom_density(data=all_comparisons %>% | |
127 group_by(simul) %>% | |
128 summarise(av_prop = mean(prop)), | |
129 aes(x=av_prop), linetype='dashed') + | |
130 geom_density(data=data.frame(prop=reg_comparisons), | |
131 aes(x=prop)) + | |
132 xlim(c(0, 1)) + | |
133 theme_bw() + | |
134 xlab('Proportion of measurements') + | |
135 ylab('Density') |