Mercurial > hg > confint
comparison db/create_db.Rmd @ 0:205974c9568c tip
Initial commit. Predictions not included for lack of space.
author | franrodalg <f.rodriguezalgarra@qmul.ac.uk> |
---|---|
date | Sat, 29 Jun 2019 18:45:50 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:205974c9568c |
---|---|
1 --- | |
2 title: "Create Database" | |
3 output: html_notebook | |
4 --- | |
5 | |
6 ```{r} | |
7 library(tidyverse) | |
8 if(!require("RSQLite")){ | |
9 install.packages("RSQLite") | |
10 library("RSQLite") | |
11 } | |
12 ``` | |
13 | |
14 ```{r read_csv} | |
15 filename <- '../metadata/GTZANindex.csv' | |
16 data <- read.csv(filename, stringsAsFactors = F) | |
17 ``` | |
18 | |
19 ```{r fill_empty} | |
20 data <- | |
21 data %>% | |
22 mutate( | |
23 artist_list = ifelse( | |
24 artist_list == '', | |
25 paste0("Unknown Artist (", class, "_", ex_id_class, ")"), | |
26 artist_list), | |
27 track_name = ifelse( | |
28 track_name == '', | |
29 paste0("Unknown Track (", class, "_", ex_id_class, ")"), | |
30 track_name) | |
31 ) | |
32 ``` | |
33 | |
34 ```{r reconstruct_file} | |
35 ext <- 'au' | |
36 | |
37 get_filename <- function(class, ex_id_class, ext = 'wav'){ | |
38 paste0(class, '.', | |
39 ifelse(ex_id_class < 10, "0000", "000"), | |
40 ex_id_class, '.', ext) | |
41 } | |
42 ``` | |
43 | |
44 ```{r create_rec_id} | |
45 rep <- read.csv(file = 'rep.csv') | |
46 | |
47 data$rec_id <- | |
48 sapply(1:max(data$ex_id), | |
49 function(x) ifelse(x %in% rep$ex_id, { | |
50 i <- rep[which(x == rep$ex_id), 'rep_id'] | |
51 min(rep[which(rep$rep_id == i), 'ex_id'])}, | |
52 x)) | |
53 ``` | |
54 | |
55 | |
56 ## Create Tables | |
57 | |
58 ```{r excerpts} | |
59 excerpts <- data %>% select(ex_id, rec_id) | |
60 ``` | |
61 ```{r recordings} | |
62 recordings <- data %>% | |
63 select(rec_id, track_name) %>% | |
64 rename(title = track_name) %>% | |
65 unique() | |
66 ``` | |
67 ```{r artists} | |
68 # Find and split individual artists from artist lists | |
69 artist_list <- data %>% select(artist_list) %>% unlist() | |
70 artists <- data.frame(artist_id = numeric(), | |
71 artist_name = character(), | |
72 stringsAsFactors = F) | |
73 recordings_artists <- data.frame(rec_id = numeric(), | |
74 artist_id = numeric()) | |
75 | |
76 for (i in 1:nrow(data)){ | |
77 aux <- unname( | |
78 sapply( | |
79 unlist(strsplit(data[i, 'artist_list'], split = ';')), trimws)) | |
80 | |
81 for (j in 1:length(aux)){ | |
82 if (any(aux[j] %in% artists$artist_name)){ | |
83 artist_id <- as.numeric( | |
84 artists[artists$artist_name == aux[j], 'artist_id']) | |
85 } | |
86 else { | |
87 artist_id <- ifelse(nrow(artists) < 1, 1, | |
88 max(artists$artist_id) + 1) | |
89 artists <- | |
90 rbind(artists, | |
91 data.frame(artist_id = artist_id, | |
92 artist_name = unname(aux[j]), | |
93 stringsAsFactors = F)) | |
94 } | |
95 recordings_artists <- | |
96 rbind(recordings_artists, | |
97 data.frame(rec_id = as.numeric(data[i, 'rec_id']), | |
98 artist_id = artist_id)) | |
99 } | |
100 | |
101 } | |
102 | |
103 recordings_artists <- unique(recordings_artists) | |
104 ``` | |
105 | |
106 ```{r classes} | |
107 class_names <- unique(data$class) | |
108 classes <- data.frame(class_id = 1:length(class_names), | |
109 class = class_names, stringsAsFactors = F) | |
110 excerpts_classes <- data %>% | |
111 select(ex_id, class, ex_id_class) %>% | |
112 inner_join(classes, by = c('class')) %>% | |
113 select(ex_id, class_id, ex_id_class) | |
114 ``` | |
115 | |
116 | |
117 | |
118 ```{r create_db} | |
119 | |
120 tables <- c( | |
121 "excerpts", "recordings", "artists", "recordings_artists", | |
122 "classes", "excerpts_classes" | |
123 ) | |
124 | |
125 con <- dbConnect(SQLite(), "gtzan.db") | |
126 | |
127 sapply( | |
128 X = tables, | |
129 FUN = function(x) | |
130 dbWriteTable(con, name = x, value = get(x), overwrite = T) | |
131 ) | |
132 | |
133 dbDisconnect(con) | |
134 ``` | |
135 |