f@0
|
1 ## Generic sampling function
|
f@0
|
2 get_samples <- function(num_iter = 1, mode = 'cv',
|
f@0
|
3 N = 0, num_folds = 4,
|
f@0
|
4 stratified = F, keep_prop = F,
|
f@0
|
5 db = "../db/gtzan.db", classes = NULL){
|
f@0
|
6
|
f@0
|
7 if (mode == 'cv')
|
f@0
|
8 get_cv_samples(
|
f@0
|
9 num_iter = num_iter, num_folds = num_folds,
|
f@0
|
10 stratified = stratified, db = db, classes = classes)
|
f@0
|
11 else
|
f@0
|
12 get_bs_samples(
|
f@0
|
13 N = N, num_iter = num_iter, stratified = stratified,
|
f@0
|
14 keep_prop = keep_prop, db = db, classes = classes)
|
f@0
|
15
|
f@0
|
16 }
|
f@0
|
17
|
f@0
|
18 # CROSS-VALIDATION
|
f@0
|
19
|
f@0
|
20 ## Cross-Validation sampling
|
f@0
|
21 get_cv_samples <- function(num_iter = 1, num_folds = 4, stratified = F,
|
f@0
|
22 db = "../db/gtzan.db", classes = NULL){
|
f@0
|
23
|
f@0
|
24 train <- data.frame()
|
f@0
|
25 test <- data.frame()
|
f@0
|
26
|
f@0
|
27 for (i in 1:num_iter){
|
f@0
|
28 folds <-
|
f@0
|
29 get_folds(num_folds = num_folds, stratified = stratified,
|
f@0
|
30 db = db, classes = classes)
|
f@0
|
31
|
f@0
|
32 for (j in 1:num_folds){
|
f@0
|
33
|
f@0
|
34 train <- rbind(train,
|
f@0
|
35 data.frame(ex_id = folds[[j]]$train,
|
f@0
|
36 iter = i, fold = j,
|
f@0
|
37 mode = 'cv',
|
f@0
|
38 stratified = stratified))
|
f@0
|
39 test <- rbind(test,
|
f@0
|
40 data.frame(ex_id = folds[[j]]$test,
|
f@0
|
41 iter = i, fold = j,
|
f@0
|
42 mode = 'cv',
|
f@0
|
43 stratified = stratified))
|
f@0
|
44 }
|
f@0
|
45
|
f@0
|
46 }
|
f@0
|
47
|
f@0
|
48 return(list(train = train, test = test))
|
f@0
|
49
|
f@0
|
50 }
|
f@0
|
51
|
f@0
|
52
|
f@0
|
53 # Random assignment into folds
|
f@0
|
54 .get_assignment <- function(N, num_folds){
|
f@0
|
55
|
f@0
|
56 sample(
|
f@0
|
57 as.numeric(
|
f@0
|
58 sapply(1:num_folds,
|
f@0
|
59 function(x) rep(x, N/num_folds))))
|
f@0
|
60
|
f@0
|
61 }
|
f@0
|
62
|
f@0
|
63 # Obtain folds for Cross-Validation
|
f@0
|
64 get_folds <- function(num_folds = 4, stratified = F,
|
f@0
|
65 db = "../db/gtzan.db", classes = NULL){
|
f@0
|
66
|
f@0
|
67 res <- get_excerpts_classes(db = db, classes) %>%
|
f@0
|
68 arrange(class_id, ex_id_class)
|
f@0
|
69
|
f@0
|
70 if(stratified){
|
f@0
|
71 assignment <- numeric(0)
|
f@0
|
72 for(i in unique(res$class_id)){
|
f@0
|
73 N_class <-
|
f@0
|
74 filter(res, class_id == i) %>%
|
f@0
|
75 nrow()
|
f@0
|
76 assignment <-
|
f@0
|
77 c(assignment, .get_assignment(N_class, num_folds))
|
f@0
|
78 }
|
f@0
|
79 }
|
f@0
|
80 else
|
f@0
|
81 assignment <- .get_assignment(nrow(res), num_folds)
|
f@0
|
82
|
f@0
|
83 folds <- vector("list", num_folds)
|
f@0
|
84 aux <- res$ex_id
|
f@0
|
85
|
f@0
|
86 for (i in 1:num_folds){
|
f@0
|
87 folds[[i]][['test']] <-
|
f@0
|
88 aux[which(assignment == i)]
|
f@0
|
89 folds[[i]][['train']] <-
|
f@0
|
90 aux[which(assignment != i)]
|
f@0
|
91 }
|
f@0
|
92
|
f@0
|
93 folds
|
f@0
|
94
|
f@0
|
95 }
|
f@0
|
96
|
f@0
|
97
|
f@0
|
98 # BOOTSTRAP
|
f@0
|
99
|
f@0
|
100 ## Boostrap sampling
|
f@0
|
101 get_bs_samples <- function(N, num_iter, stratified = F, keep_prop = F,
|
f@0
|
102 db = "../db/gtzan.db", classes = NULL,
|
f@0
|
103 hold_out_ex_ids = NULL){
|
f@0
|
104
|
f@0
|
105 res <- get_excerpts_classes(db, classes)
|
f@0
|
106
|
f@0
|
107 if (N == 0 | is.null(N))
|
f@0
|
108 N <- nrow(res)
|
f@0
|
109
|
f@0
|
110 train <- data.frame()
|
f@0
|
111 test <- data.frame()
|
f@0
|
112
|
f@0
|
113 for (i in 1:num_iter){
|
f@0
|
114 train_test <-
|
f@0
|
115 get_bs_sample(N, stratified, keep_prop, db, classes, res,
|
f@0
|
116 hold_out_ex_ids)
|
f@0
|
117 train <- rbind(train,
|
f@0
|
118 data.frame(ex_id = train_test$train,
|
f@0
|
119 iter = i,
|
f@0
|
120 mode = 'bs',
|
f@0
|
121 stratified = stratified,
|
f@0
|
122 keep_prop = keep_prop))
|
f@0
|
123 test <- rbind(test,
|
f@0
|
124 data.frame(ex_id = train_test$test,
|
f@0
|
125 iter = i,
|
f@0
|
126 mode = 'bs',
|
f@0
|
127 stratified = stratified,
|
f@0
|
128 keep_prop = keep_prop))
|
f@0
|
129 }
|
f@0
|
130
|
f@0
|
131 return(list(train = train, test = test))
|
f@0
|
132
|
f@0
|
133 }
|
f@0
|
134
|
f@0
|
135 ## Correct proportions for test set stratification
|
f@0
|
136 .correct_prop <- function(samples, reference_samples,
|
f@0
|
137 db = "../db/gtzan.db"){
|
f@0
|
138
|
f@0
|
139 ref <- as.factor(
|
f@0
|
140 get_classes(reference_samples, unique_classes = F, db = db))
|
f@0
|
141 classes <- factor(
|
f@0
|
142 get_classes(samples, unique_classes = F, db = db),
|
f@0
|
143 levels = levels(ref))
|
f@0
|
144
|
f@0
|
145 ref_counts <- table(ref)
|
f@0
|
146 ref_prop <- ref_counts / sum(ref_counts)
|
f@0
|
147
|
f@0
|
148 prior_counts <- table(classes)
|
f@0
|
149 post_counts <- round(ref_prop * min(table(classes) / ref_prop))
|
f@0
|
150
|
f@0
|
151 post_samples <- numeric(0)
|
f@0
|
152 for (class in levels(classes)){
|
f@0
|
153 post_samples <-
|
f@0
|
154 c(post_samples,
|
f@0
|
155 sample(samples[which(classes == class)],
|
f@0
|
156 size = post_counts[[class]]))
|
f@0
|
157 }
|
f@0
|
158
|
f@0
|
159 post_samples
|
f@0
|
160
|
f@0
|
161 }
|
f@0
|
162
|
f@0
|
163 ## Bootstrap single train/test pair
|
f@0
|
164 get_bs_sample <- function(N, stratified = F, keep_prop = F,
|
f@0
|
165 db = "../db/gtzan.db", classes = NULL,
|
f@0
|
166 df = NULL,
|
f@0
|
167 hold_out_ex_ids = NULL
|
f@0
|
168 ){
|
f@0
|
169
|
f@0
|
170 if(is.null(df))
|
f@0
|
171 df <- get_excerpts_classes(db, classes)
|
f@0
|
172
|
f@0
|
173 n_ex_class <- table(df$class_id)
|
f@0
|
174 print(n_ex_class)
|
f@0
|
175
|
f@0
|
176 if(!is.null(hold_out_ex_ids))
|
f@0
|
177 df <- df %>% filter(! (ex_id %in% hold_out_ex_ids))
|
f@0
|
178
|
f@0
|
179 train <- numeric(0)
|
f@0
|
180
|
f@0
|
181 if(stratified){
|
f@0
|
182 for(j in unique(df$class_id)){
|
f@0
|
183 ex_class <- filter(df, class_id == j) %>%
|
f@0
|
184 select(ex_id) %>%
|
f@0
|
185 unlist() %>%
|
f@0
|
186 unname()
|
f@0
|
187 train <- c(train,
|
f@0
|
188 sample(ex_class,
|
f@0
|
189 size = n_ex_class[which(names(n_ex_class) == j)],
|
f@0
|
190 replace = T))
|
f@0
|
191 }
|
f@0
|
192 }
|
f@0
|
193 else{
|
f@0
|
194 train <- sample(df$ex_id, size = N, replace = T)
|
f@0
|
195 }
|
f@0
|
196
|
f@0
|
197 test <- sort(c(df$ex_id[!(df$ex_id %in% train)], hold_out_ex_ids))
|
f@0
|
198 if(keep_prop){
|
f@0
|
199 test <- .correct_prop(test, reference = train)
|
f@0
|
200 }
|
f@0
|
201
|
f@0
|
202 return(list(train = train, test = test))
|
f@0
|
203
|
f@0
|
204 }
|