f@0: --- f@0: title: "Create Database" f@0: output: html_notebook f@0: --- f@0: f@0: ```{r} f@0: library(tidyverse) f@0: if(!require("RSQLite")){ f@0: install.packages("RSQLite") f@0: library("RSQLite") f@0: } f@0: ``` f@0: f@0: ```{r read_csv} f@0: filename <- '../metadata/GTZANindex.csv' f@0: data <- read.csv(filename, stringsAsFactors = F) f@0: ``` f@0: f@0: ```{r fill_empty} f@0: data <- f@0: data %>% f@0: mutate( f@0: artist_list = ifelse( f@0: artist_list == '', f@0: paste0("Unknown Artist (", class, "_", ex_id_class, ")"), f@0: artist_list), f@0: track_name = ifelse( f@0: track_name == '', f@0: paste0("Unknown Track (", class, "_", ex_id_class, ")"), f@0: track_name) f@0: ) f@0: ``` f@0: f@0: ```{r reconstruct_file} f@0: ext <- 'au' f@0: f@0: get_filename <- function(class, ex_id_class, ext = 'wav'){ f@0: paste0(class, '.', f@0: ifelse(ex_id_class < 10, "0000", "000"), f@0: ex_id_class, '.', ext) f@0: } f@0: ``` f@0: f@0: ```{r create_rec_id} f@0: rep <- read.csv(file = 'rep.csv') f@0: f@0: data$rec_id <- f@0: sapply(1:max(data$ex_id), f@0: function(x) ifelse(x %in% rep$ex_id, { f@0: i <- rep[which(x == rep$ex_id), 'rep_id'] f@0: min(rep[which(rep$rep_id == i), 'ex_id'])}, f@0: x)) f@0: ``` f@0: f@0: f@0: ## Create Tables f@0: f@0: ```{r excerpts} f@0: excerpts <- data %>% select(ex_id, rec_id) f@0: ``` f@0: ```{r recordings} f@0: recordings <- data %>% f@0: select(rec_id, track_name) %>% f@0: rename(title = track_name) %>% f@0: unique() f@0: ``` f@0: ```{r artists} f@0: # Find and split individual artists from artist lists f@0: artist_list <- data %>% select(artist_list) %>% unlist() f@0: artists <- data.frame(artist_id = numeric(), f@0: artist_name = character(), f@0: stringsAsFactors = F) f@0: recordings_artists <- data.frame(rec_id = numeric(), f@0: artist_id = numeric()) f@0: f@0: for (i in 1:nrow(data)){ f@0: aux <- unname( f@0: sapply( f@0: unlist(strsplit(data[i, 'artist_list'], split = ';')), trimws)) f@0: f@0: for (j in 1:length(aux)){ f@0: if (any(aux[j] %in% artists$artist_name)){ f@0: artist_id <- as.numeric( f@0: artists[artists$artist_name == aux[j], 'artist_id']) f@0: } f@0: else { f@0: artist_id <- ifelse(nrow(artists) < 1, 1, f@0: max(artists$artist_id) + 1) f@0: artists <- f@0: rbind(artists, f@0: data.frame(artist_id = artist_id, f@0: artist_name = unname(aux[j]), f@0: stringsAsFactors = F)) f@0: } f@0: recordings_artists <- f@0: rbind(recordings_artists, f@0: data.frame(rec_id = as.numeric(data[i, 'rec_id']), f@0: artist_id = artist_id)) f@0: } f@0: f@0: } f@0: f@0: recordings_artists <- unique(recordings_artists) f@0: ``` f@0: f@0: ```{r classes} f@0: class_names <- unique(data$class) f@0: classes <- data.frame(class_id = 1:length(class_names), f@0: class = class_names, stringsAsFactors = F) f@0: excerpts_classes <- data %>% f@0: select(ex_id, class, ex_id_class) %>% f@0: inner_join(classes, by = c('class')) %>% f@0: select(ex_id, class_id, ex_id_class) f@0: ``` f@0: f@0: f@0: f@0: ```{r create_db} f@0: f@0: tables <- c( f@0: "excerpts", "recordings", "artists", "recordings_artists", f@0: "classes", "excerpts_classes" f@0: ) f@0: f@0: con <- dbConnect(SQLite(), "gtzan.db") f@0: f@0: sapply( f@0: X = tables, f@0: FUN = function(x) f@0: dbWriteTable(con, name = x, value = get(x), overwrite = T) f@0: ) f@0: f@0: dbDisconnect(con) f@0: ``` f@0: