Mercurial > hg > confint
diff metadata/parse_file.Rmd @ 0:205974c9568c tip
Initial commit. Predictions not included for lack of space.
author | franrodalg <f.rodriguezalgarra@qmul.ac.uk> |
---|---|
date | Sat, 29 Jun 2019 18:45:50 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metadata/parse_file.Rmd Sat Jun 29 18:45:50 2019 +0100 @@ -0,0 +1,70 @@ +--- +title: "GTZAN index parsing" +output: html_notebook +--- + +```{r libraries} +library(tidyverse) +``` + +## Reading the file + +```{r read_file_functions} +read_text <- function(filename){ + lines <- readLines(con <- file(filename)) + close(con) + lines +} + +remove_comments <- function(text, comment_char){ + comment_regex <- paste0( + '^', comment_char, '|^.*', comment_char) + text[!grepl(comment_regex, text)] +} + +parse_long_sep <- function(lines, separator){ + records <- unname( + sapply(sapply(lines, strsplit, split = separator), + trimws)) + records <- lapply(records, `length<-`, max(lengths(records))) + df <- data.frame(t(sapply(records,c)), stringsAsFactors = F) + rownames(df) <- 1:nrow(df) + df +} + +parse_file <- function(filename, separator, comment_char){ + lines <- read_text(filename) + lines <- remove_comments(lines, comment_char) + data <- parse_long_sep(lines, separator) + data[is.na(data)] <- "" + data +} +``` + +```{r read_file} + +create_data_frame <- function(file = 'GTZANindex'){ + + index_file = paste0(file, '.txt') + + data <- parse_file( + index_file, separator = ':::', comment_char = '#') + colnames(data) <- c('file_name', 'artist_list', 'track_name') + + data$ex_id <- 1:nrow(data) + data <- data %>% + select(ex_id, everything()) %>% + separate(file_name, c("class", "ex_id_class"), + remove = T, extra = "drop") + data$ex_id_class <- as.numeric(data$ex_id_class) + data +} + +``` + + +```{r bob} +filename <- 'GTZANindex' +data_1 <- create_data_frame(filename) +write.csv(data_1, file = paste0(filename, '.csv'), row.names = F) +```