f@0: ## Generic sampling function f@0: get_samples <- function(num_iter = 1, mode = 'cv', f@0: N = 0, num_folds = 4, f@0: stratified = F, keep_prop = F, f@0: db = "../db/gtzan.db", classes = NULL){ f@0: f@0: if (mode == 'cv') f@0: get_cv_samples( f@0: num_iter = num_iter, num_folds = num_folds, f@0: stratified = stratified, db = db, classes = classes) f@0: else f@0: get_bs_samples( f@0: N = N, num_iter = num_iter, stratified = stratified, f@0: keep_prop = keep_prop, db = db, classes = classes) f@0: f@0: } f@0: f@0: # CROSS-VALIDATION f@0: f@0: ## Cross-Validation sampling f@0: get_cv_samples <- function(num_iter = 1, num_folds = 4, stratified = F, f@0: db = "../db/gtzan.db", classes = NULL){ f@0: f@0: train <- data.frame() f@0: test <- data.frame() f@0: f@0: for (i in 1:num_iter){ f@0: folds <- f@0: get_folds(num_folds = num_folds, stratified = stratified, f@0: db = db, classes = classes) f@0: f@0: for (j in 1:num_folds){ f@0: f@0: train <- rbind(train, f@0: data.frame(ex_id = folds[[j]]$train, f@0: iter = i, fold = j, f@0: mode = 'cv', f@0: stratified = stratified)) f@0: test <- rbind(test, f@0: data.frame(ex_id = folds[[j]]$test, f@0: iter = i, fold = j, f@0: mode = 'cv', f@0: stratified = stratified)) f@0: } f@0: f@0: } f@0: f@0: return(list(train = train, test = test)) f@0: f@0: } f@0: f@0: f@0: # Random assignment into folds f@0: .get_assignment <- function(N, num_folds){ f@0: f@0: sample( f@0: as.numeric( f@0: sapply(1:num_folds, f@0: function(x) rep(x, N/num_folds)))) f@0: f@0: } f@0: f@0: # Obtain folds for Cross-Validation f@0: get_folds <- function(num_folds = 4, stratified = F, f@0: db = "../db/gtzan.db", classes = NULL){ f@0: f@0: res <- get_excerpts_classes(db = db, classes) %>% f@0: arrange(class_id, ex_id_class) f@0: f@0: if(stratified){ f@0: assignment <- numeric(0) f@0: for(i in unique(res$class_id)){ f@0: N_class <- f@0: filter(res, class_id == i) %>% f@0: nrow() f@0: assignment <- f@0: c(assignment, .get_assignment(N_class, num_folds)) f@0: } f@0: } f@0: else f@0: assignment <- .get_assignment(nrow(res), num_folds) f@0: f@0: folds <- vector("list", num_folds) f@0: aux <- res$ex_id f@0: f@0: for (i in 1:num_folds){ f@0: folds[[i]][['test']] <- f@0: aux[which(assignment == i)] f@0: folds[[i]][['train']] <- f@0: aux[which(assignment != i)] f@0: } f@0: f@0: folds f@0: f@0: } f@0: f@0: f@0: # BOOTSTRAP f@0: f@0: ## Boostrap sampling f@0: get_bs_samples <- function(N, num_iter, stratified = F, keep_prop = F, f@0: db = "../db/gtzan.db", classes = NULL, f@0: hold_out_ex_ids = NULL){ f@0: f@0: res <- get_excerpts_classes(db, classes) f@0: f@0: if (N == 0 | is.null(N)) f@0: N <- nrow(res) f@0: f@0: train <- data.frame() f@0: test <- data.frame() f@0: f@0: for (i in 1:num_iter){ f@0: train_test <- f@0: get_bs_sample(N, stratified, keep_prop, db, classes, res, f@0: hold_out_ex_ids) f@0: train <- rbind(train, f@0: data.frame(ex_id = train_test$train, f@0: iter = i, f@0: mode = 'bs', f@0: stratified = stratified, f@0: keep_prop = keep_prop)) f@0: test <- rbind(test, f@0: data.frame(ex_id = train_test$test, f@0: iter = i, f@0: mode = 'bs', f@0: stratified = stratified, f@0: keep_prop = keep_prop)) f@0: } f@0: f@0: return(list(train = train, test = test)) f@0: f@0: } f@0: f@0: ## Correct proportions for test set stratification f@0: .correct_prop <- function(samples, reference_samples, f@0: db = "../db/gtzan.db"){ f@0: f@0: ref <- as.factor( f@0: get_classes(reference_samples, unique_classes = F, db = db)) f@0: classes <- factor( f@0: get_classes(samples, unique_classes = F, db = db), f@0: levels = levels(ref)) f@0: f@0: ref_counts <- table(ref) f@0: ref_prop <- ref_counts / sum(ref_counts) f@0: f@0: prior_counts <- table(classes) f@0: post_counts <- round(ref_prop * min(table(classes) / ref_prop)) f@0: f@0: post_samples <- numeric(0) f@0: for (class in levels(classes)){ f@0: post_samples <- f@0: c(post_samples, f@0: sample(samples[which(classes == class)], f@0: size = post_counts[[class]])) f@0: } f@0: f@0: post_samples f@0: f@0: } f@0: f@0: ## Bootstrap single train/test pair f@0: get_bs_sample <- function(N, stratified = F, keep_prop = F, f@0: db = "../db/gtzan.db", classes = NULL, f@0: df = NULL, f@0: hold_out_ex_ids = NULL f@0: ){ f@0: f@0: if(is.null(df)) f@0: df <- get_excerpts_classes(db, classes) f@0: f@0: n_ex_class <- table(df$class_id) f@0: print(n_ex_class) f@0: f@0: if(!is.null(hold_out_ex_ids)) f@0: df <- df %>% filter(! (ex_id %in% hold_out_ex_ids)) f@0: f@0: train <- numeric(0) f@0: f@0: if(stratified){ f@0: for(j in unique(df$class_id)){ f@0: ex_class <- filter(df, class_id == j) %>% f@0: select(ex_id) %>% f@0: unlist() %>% f@0: unname() f@0: train <- c(train, f@0: sample(ex_class, f@0: size = n_ex_class[which(names(n_ex_class) == j)], f@0: replace = T)) f@0: } f@0: } f@0: else{ f@0: train <- sample(df$ex_id, size = N, replace = T) f@0: } f@0: f@0: test <- sort(c(df$ex_id[!(df$ex_id %in% train)], hold_out_ex_ids)) f@0: if(keep_prop){ f@0: test <- .correct_prop(test, reference = train) f@0: } f@0: f@0: return(list(train = train, test = test)) f@0: f@0: }