confint: metadata/parse_file.Rmd annotate

annotate metadata/parse_file.Rmd @ 0:205974c9568c tip

Initial commit. Predictions not included for lack of space.

author	franrodalg <f.rodriguezalgarra@qmul.ac.uk>
date	Sat, 29 Jun 2019 18:45:50 +0100
parents
children

rev	line source
f@0	1 ---
f@0	2 title: "GTZAN index parsing"
f@0	3 output: html_notebook
f@0	4 ---
f@0	5
f@0	6 ```{r libraries}
f@0	7 library(tidyverse)
f@0	8 ```
f@0	9
f@0	10 ## Reading the file
f@0	11
f@0	12 ```{r read_file_functions}
f@0	13 read_text <- function(filename){
f@0	14 lines <- readLines(con <- file(filename))
f@0	15 close(con)
f@0	16 lines
f@0	17 }
f@0	18
f@0	19 remove_comments <- function(text, comment_char){
f@0	20 comment_regex <- paste0(
f@0	21 '^', comment_char, '\|^.*', comment_char)
f@0	22 text[!grepl(comment_regex, text)]
f@0	23 }
f@0	24
f@0	25 parse_long_sep <- function(lines, separator){
f@0	26 records <- unname(
f@0	27 sapply(sapply(lines, strsplit, split = separator),
f@0	28 trimws))
f@0	29 records <- lapply(records, `length<-`, max(lengths(records)))
f@0	30 df <- data.frame(t(sapply(records,c)), stringsAsFactors = F)
f@0	31 rownames(df) <- 1:nrow(df)
f@0	32 df
f@0	33 }
f@0	34
f@0	35 parse_file <- function(filename, separator, comment_char){
f@0	36 lines <- read_text(filename)
f@0	37 lines <- remove_comments(lines, comment_char)
f@0	38 data <- parse_long_sep(lines, separator)
f@0	39 data[is.na(data)] <- ""
f@0	40 data
f@0	41 }
f@0	42 ```
f@0	43
f@0	44 ```{r read_file}
f@0	45
f@0	46 create_data_frame <- function(file = 'GTZANindex'){
f@0	47
f@0	48 index_file = paste0(file, '.txt')
f@0	49
f@0	50 data <- parse_file(
f@0	51 index_file, separator = ':::', comment_char = '#')
f@0	52 colnames(data) <- c('file_name', 'artist_list', 'track_name')
f@0	53
f@0	54 data$ex_id <- 1:nrow(data)
f@0	55 data <- data %>%
f@0	56 select(ex_id, everything()) %>%
f@0	57 separate(file_name, c("class", "ex_id_class"),
f@0	58 remove = T, extra = "drop")
f@0	59 data$ex_id_class <- as.numeric(data$ex_id_class)
f@0	60 data
f@0	61 }
f@0	62
f@0	63 ```
f@0	64
f@0	65
f@0	66 ```{r bob}
f@0	67 filename <- 'GTZANindex'
f@0	68 data_1 <- create_data_frame(filename)
f@0	69 write.csv(data_1, file = paste0(filename, '.csv'), row.names = F)
f@0	70 ```

Mercurial > hg > confint

annotate metadata/parse_file.Rmd @ 0:205974c9568c tip