diff analysis/simulate_sets.R @ 0:205974c9568c tip

Initial commit. Predictions not included for lack of space.
author franrodalg <f.rodriguezalgarra@qmul.ac.uk>
date Sat, 29 Jun 2019 18:45:50 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/analysis/simulate_sets.R	Sat Jun 29 18:45:50 2019 +0100
@@ -0,0 +1,135 @@
+
+
+
+extract_sizes <- function(preds, sets) {
+  preds %>%
+    inner_join(sets, by = c('iteration', 'ex_id')) %>%
+    select(iteration, ex_id, gt) %>% unique() %>%
+    group_by(iteration, gt) %>%
+    summarise(num_ex = n())
+}
+
+class_names <- function(preds, sets) {
+  preds %>%
+    inner_join(sets, by = c('iteration', 'ex_id')) %>%
+    select(ex_id, gt) %>% unique()
+}
+
+names <- class_names(preds_orig, test_sets)
+
+sizes <- extract_sizes(preds_orig, reg_sets)
+
+simulate_set <- function(x, sizes) {
+  y <- data.frame()
+  for (i in 1:nrow(sizes)) {
+    y <- rbind(
+      y,
+      x %>%
+        filter(gt == sizes[i,]$gt) %>%
+        sample_n(sizes[i,]$num_ex)
+    )
+  }
+  y
+}
+
+simulate_sets <- function(x, names, sizes, num_simul = 10) {
+  y <- data.frame()
+  for (s in 1:num_simul) {
+    for (it in unique(x$iteration)) {
+      y <- rbind(
+        y,
+        simulate_set(x %>%
+                       filter(iteration == it) %>%
+                       inner_join(names, by = c('ex_id')),
+                     sizes %>%
+                       filter(iteration == it)) %>%
+          mutate(iteration = it, simul = s))
+    }}
+  y
+}
+
+set.seed(1986)
+
+simul_sets <- simulate_sets(test_sets, names, sizes, num_simul = 100)
+
+
+simulate_foms <- function(preds, simul_sets) {
+  foms <- data.frame()
+  for (i in unique(simul_sets$simul)) {
+    print(i)
+    foms <- rbind(
+      foms,
+      get_fom_table(preds %>%
+                      inner_join(simul_sets %>%
+                                   filter(simul == i) %>%
+                                   select(-simul),
+                                 by = c('iteration', 'ex_id', 'gt'))) %>%
+        ungroup() %>%
+        mutate(simul = i) 
+    )
+  }
+  foms
+  
+}
+
+simul_foms <- simulate_foms(preds_orig, simul_sets)
+
+compare <- function(x, y) {
+  x %>% inner_join(
+    y, by = c('iteration', 'alg', 'feat_col', 'feat_set')) %>%
+    ungroup() %>%
+    mutate(loe = mean_recall.x >= mean_recall.y) %>%
+    summarise(sum(loe) / n()) %>% pull()
+}
+
+simulate_comparisons <- function(foms, simul_foms, test=F) {
+  if (!test) {
+      sapply(X = unique(simul_foms$simul),
+         FUN = function(i) {
+           compare(foms, simul_foms %>% filter(simul == i))}) 
+  } else {
+      sapply(X = unique(simul_foms$simul),
+           FUN = function(i) {
+             compare(simul_foms %>% filter(simul == i), foms)}) 
+  }
+}
+
+
+reg_comparisons <- simulate_comparisons(reg_fom_iter_orig, simul_foms)
+
+reg_test <- compare(reg_fom_iter_orig, test_fom_iter_orig)
+
+
+test_comparisons <- simulate_comparisons(test_fom_iter_orig, simul_foms, test=T)
+
+simulate_all_comparisons <- function(simul_foms) {
+  all_comp <- data.frame()
+  for (i in unique(simul_foms$simul)) {
+    all_comp <- rbind(
+      all_comp,
+      data.frame(
+        simul = i,
+        simul_comp = unique(simul_foms$simul)[-i],
+        prop = simulate_comparisons(
+          simul_foms %>% filter(simul == i),
+          simul_foms %>% filter(simul != i))))
+  }
+  all_comp
+}
+
+all_comparisons <- simulate_all_comparisons(simul_foms)
+
+ggplot() +
+  geom_density(data=all_comparisons,
+               aes(x=prop, group=simul),
+               color='grey85') +
+  geom_density(data=all_comparisons %>%
+                 group_by(simul) %>%
+                 summarise(av_prop = mean(prop)),
+               aes(x=av_prop), linetype='dashed') +
+  geom_density(data=data.frame(prop=reg_comparisons),
+               aes(x=prop)) +
+  xlim(c(0, 1)) +
+  theme_bw() +
+  xlab('Proportion of measurements') +
+  ylab('Density')