f@0
|
1 ---
|
f@0
|
2 title: "Create Database"
|
f@0
|
3 output: html_notebook
|
f@0
|
4 ---
|
f@0
|
5
|
f@0
|
6 ```{r}
|
f@0
|
7 library(tidyverse)
|
f@0
|
8 if(!require("RSQLite")){
|
f@0
|
9 install.packages("RSQLite")
|
f@0
|
10 library("RSQLite")
|
f@0
|
11 }
|
f@0
|
12 ```
|
f@0
|
13
|
f@0
|
14 ```{r read_csv}
|
f@0
|
15 filename <- '../metadata/GTZANindex.csv'
|
f@0
|
16 data <- read.csv(filename, stringsAsFactors = F)
|
f@0
|
17 ```
|
f@0
|
18
|
f@0
|
19 ```{r fill_empty}
|
f@0
|
20 data <-
|
f@0
|
21 data %>%
|
f@0
|
22 mutate(
|
f@0
|
23 artist_list = ifelse(
|
f@0
|
24 artist_list == '',
|
f@0
|
25 paste0("Unknown Artist (", class, "_", ex_id_class, ")"),
|
f@0
|
26 artist_list),
|
f@0
|
27 track_name = ifelse(
|
f@0
|
28 track_name == '',
|
f@0
|
29 paste0("Unknown Track (", class, "_", ex_id_class, ")"),
|
f@0
|
30 track_name)
|
f@0
|
31 )
|
f@0
|
32 ```
|
f@0
|
33
|
f@0
|
34 ```{r reconstruct_file}
|
f@0
|
35 ext <- 'au'
|
f@0
|
36
|
f@0
|
37 get_filename <- function(class, ex_id_class, ext = 'wav'){
|
f@0
|
38 paste0(class, '.',
|
f@0
|
39 ifelse(ex_id_class < 10, "0000", "000"),
|
f@0
|
40 ex_id_class, '.', ext)
|
f@0
|
41 }
|
f@0
|
42 ```
|
f@0
|
43
|
f@0
|
44 ```{r create_rec_id}
|
f@0
|
45 rep <- read.csv(file = 'rep.csv')
|
f@0
|
46
|
f@0
|
47 data$rec_id <-
|
f@0
|
48 sapply(1:max(data$ex_id),
|
f@0
|
49 function(x) ifelse(x %in% rep$ex_id, {
|
f@0
|
50 i <- rep[which(x == rep$ex_id), 'rep_id']
|
f@0
|
51 min(rep[which(rep$rep_id == i), 'ex_id'])},
|
f@0
|
52 x))
|
f@0
|
53 ```
|
f@0
|
54
|
f@0
|
55
|
f@0
|
56 ## Create Tables
|
f@0
|
57
|
f@0
|
58 ```{r excerpts}
|
f@0
|
59 excerpts <- data %>% select(ex_id, rec_id)
|
f@0
|
60 ```
|
f@0
|
61 ```{r recordings}
|
f@0
|
62 recordings <- data %>%
|
f@0
|
63 select(rec_id, track_name) %>%
|
f@0
|
64 rename(title = track_name) %>%
|
f@0
|
65 unique()
|
f@0
|
66 ```
|
f@0
|
67 ```{r artists}
|
f@0
|
68 # Find and split individual artists from artist lists
|
f@0
|
69 artist_list <- data %>% select(artist_list) %>% unlist()
|
f@0
|
70 artists <- data.frame(artist_id = numeric(),
|
f@0
|
71 artist_name = character(),
|
f@0
|
72 stringsAsFactors = F)
|
f@0
|
73 recordings_artists <- data.frame(rec_id = numeric(),
|
f@0
|
74 artist_id = numeric())
|
f@0
|
75
|
f@0
|
76 for (i in 1:nrow(data)){
|
f@0
|
77 aux <- unname(
|
f@0
|
78 sapply(
|
f@0
|
79 unlist(strsplit(data[i, 'artist_list'], split = ';')), trimws))
|
f@0
|
80
|
f@0
|
81 for (j in 1:length(aux)){
|
f@0
|
82 if (any(aux[j] %in% artists$artist_name)){
|
f@0
|
83 artist_id <- as.numeric(
|
f@0
|
84 artists[artists$artist_name == aux[j], 'artist_id'])
|
f@0
|
85 }
|
f@0
|
86 else {
|
f@0
|
87 artist_id <- ifelse(nrow(artists) < 1, 1,
|
f@0
|
88 max(artists$artist_id) + 1)
|
f@0
|
89 artists <-
|
f@0
|
90 rbind(artists,
|
f@0
|
91 data.frame(artist_id = artist_id,
|
f@0
|
92 artist_name = unname(aux[j]),
|
f@0
|
93 stringsAsFactors = F))
|
f@0
|
94 }
|
f@0
|
95 recordings_artists <-
|
f@0
|
96 rbind(recordings_artists,
|
f@0
|
97 data.frame(rec_id = as.numeric(data[i, 'rec_id']),
|
f@0
|
98 artist_id = artist_id))
|
f@0
|
99 }
|
f@0
|
100
|
f@0
|
101 }
|
f@0
|
102
|
f@0
|
103 recordings_artists <- unique(recordings_artists)
|
f@0
|
104 ```
|
f@0
|
105
|
f@0
|
106 ```{r classes}
|
f@0
|
107 class_names <- unique(data$class)
|
f@0
|
108 classes <- data.frame(class_id = 1:length(class_names),
|
f@0
|
109 class = class_names, stringsAsFactors = F)
|
f@0
|
110 excerpts_classes <- data %>%
|
f@0
|
111 select(ex_id, class, ex_id_class) %>%
|
f@0
|
112 inner_join(classes, by = c('class')) %>%
|
f@0
|
113 select(ex_id, class_id, ex_id_class)
|
f@0
|
114 ```
|
f@0
|
115
|
f@0
|
116
|
f@0
|
117
|
f@0
|
118 ```{r create_db}
|
f@0
|
119
|
f@0
|
120 tables <- c(
|
f@0
|
121 "excerpts", "recordings", "artists", "recordings_artists",
|
f@0
|
122 "classes", "excerpts_classes"
|
f@0
|
123 )
|
f@0
|
124
|
f@0
|
125 con <- dbConnect(SQLite(), "gtzan.db")
|
f@0
|
126
|
f@0
|
127 sapply(
|
f@0
|
128 X = tables,
|
f@0
|
129 FUN = function(x)
|
f@0
|
130 dbWriteTable(con, name = x, value = get(x), overwrite = T)
|
f@0
|
131 )
|
f@0
|
132
|
f@0
|
133 dbDisconnect(con)
|
f@0
|
134 ```
|
f@0
|
135
|