comparison analysis/simulate_sets.R @ 0:205974c9568c tip

Initial commit. Predictions not included for lack of space.
author franrodalg <f.rodriguezalgarra@qmul.ac.uk>
date Sat, 29 Jun 2019 18:45:50 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:205974c9568c
1
2
3
4 extract_sizes <- function(preds, sets) {
5 preds %>%
6 inner_join(sets, by = c('iteration', 'ex_id')) %>%
7 select(iteration, ex_id, gt) %>% unique() %>%
8 group_by(iteration, gt) %>%
9 summarise(num_ex = n())
10 }
11
12 class_names <- function(preds, sets) {
13 preds %>%
14 inner_join(sets, by = c('iteration', 'ex_id')) %>%
15 select(ex_id, gt) %>% unique()
16 }
17
18 names <- class_names(preds_orig, test_sets)
19
20 sizes <- extract_sizes(preds_orig, reg_sets)
21
22 simulate_set <- function(x, sizes) {
23 y <- data.frame()
24 for (i in 1:nrow(sizes)) {
25 y <- rbind(
26 y,
27 x %>%
28 filter(gt == sizes[i,]$gt) %>%
29 sample_n(sizes[i,]$num_ex)
30 )
31 }
32 y
33 }
34
35 simulate_sets <- function(x, names, sizes, num_simul = 10) {
36 y <- data.frame()
37 for (s in 1:num_simul) {
38 for (it in unique(x$iteration)) {
39 y <- rbind(
40 y,
41 simulate_set(x %>%
42 filter(iteration == it) %>%
43 inner_join(names, by = c('ex_id')),
44 sizes %>%
45 filter(iteration == it)) %>%
46 mutate(iteration = it, simul = s))
47 }}
48 y
49 }
50
51 set.seed(1986)
52
53 simul_sets <- simulate_sets(test_sets, names, sizes, num_simul = 100)
54
55
56 simulate_foms <- function(preds, simul_sets) {
57 foms <- data.frame()
58 for (i in unique(simul_sets$simul)) {
59 print(i)
60 foms <- rbind(
61 foms,
62 get_fom_table(preds %>%
63 inner_join(simul_sets %>%
64 filter(simul == i) %>%
65 select(-simul),
66 by = c('iteration', 'ex_id', 'gt'))) %>%
67 ungroup() %>%
68 mutate(simul = i)
69 )
70 }
71 foms
72
73 }
74
75 simul_foms <- simulate_foms(preds_orig, simul_sets)
76
77 compare <- function(x, y) {
78 x %>% inner_join(
79 y, by = c('iteration', 'alg', 'feat_col', 'feat_set')) %>%
80 ungroup() %>%
81 mutate(loe = mean_recall.x >= mean_recall.y) %>%
82 summarise(sum(loe) / n()) %>% pull()
83 }
84
85 simulate_comparisons <- function(foms, simul_foms, test=F) {
86 if (!test) {
87 sapply(X = unique(simul_foms$simul),
88 FUN = function(i) {
89 compare(foms, simul_foms %>% filter(simul == i))})
90 } else {
91 sapply(X = unique(simul_foms$simul),
92 FUN = function(i) {
93 compare(simul_foms %>% filter(simul == i), foms)})
94 }
95 }
96
97
98 reg_comparisons <- simulate_comparisons(reg_fom_iter_orig, simul_foms)
99
100 reg_test <- compare(reg_fom_iter_orig, test_fom_iter_orig)
101
102
103 test_comparisons <- simulate_comparisons(test_fom_iter_orig, simul_foms, test=T)
104
105 simulate_all_comparisons <- function(simul_foms) {
106 all_comp <- data.frame()
107 for (i in unique(simul_foms$simul)) {
108 all_comp <- rbind(
109 all_comp,
110 data.frame(
111 simul = i,
112 simul_comp = unique(simul_foms$simul)[-i],
113 prop = simulate_comparisons(
114 simul_foms %>% filter(simul == i),
115 simul_foms %>% filter(simul != i))))
116 }
117 all_comp
118 }
119
120 all_comparisons <- simulate_all_comparisons(simul_foms)
121
122 ggplot() +
123 geom_density(data=all_comparisons,
124 aes(x=prop, group=simul),
125 color='grey85') +
126 geom_density(data=all_comparisons %>%
127 group_by(simul) %>%
128 summarise(av_prop = mean(prop)),
129 aes(x=av_prop), linetype='dashed') +
130 geom_density(data=data.frame(prop=reg_comparisons),
131 aes(x=prop)) +
132 xlim(c(0, 1)) +
133 theme_bw() +
134 xlab('Proportion of measurements') +
135 ylab('Density')