Mercurial > hg > pitch-accuracy-and-interaction-in-unaccompanied-duet-singing
view latex/scripts/data_import.R @ 4:3e666d0329b5 tip
updated code
author | Jiajie Dai <daijiajie1@gmail.com> |
---|---|
date | Sat, 06 Jan 2018 12:20:49 +0000 |
parents | 6531169e6866 |
children |
line wrap: on
line source
cortable <- function(df) { nm <- names(df) # cm <- cor(df, method="spearman") for (ii in 1:ncol(df)) { cat(" ") if (ii>1) cat(rep("&", ii-1)) cat(nm[ii]) if (ii < ncol(df)) { for (jj in (ii+1):ncol(df)) { test <- cor.test(df[,ii], df[,jj], method="spearman") significant <- test$p.value < 0.01 # cat(sprintf("\n\n%i %i -- %s -- %s -- %0.3f -- %g -- %s\n\n", ii, jj, nm[ii], nm[jj], test$estimate, test$p.value, significant)) if (significant) cat("&", sprintf("\\textbf{%5.2f}",test$estimate)) else cat("&", sprintf("%5.2f",test$estimate)) } } cat("\\\\\n") } } makeDriftGraph <- function(data, singermode) { subdata <- data[data$singermode == singermode, ] meanpitch <- median(subdata$pitchmedian-subdata$semitone) # print(meanpitch) plot(I(subdata$pitchmedian - subdata$semitone) ~ I((subdata$run-1) * 25 + subdata$noteid), pch = "+", ylim = meanpitch + c(-2,2), xlab = "note number", ylab = "pitch after score subtraction") fit <- lm(I(subdata$pitchmedian - subdata$semitone) ~ I((subdata$run-1) * 25 + subdata$noteid)) abline(fit) } hz2pitch <- function(f) { 69 + 12 * log2(f / 440) } diff_1_3 <- function(sgr, md) { singermodeind <- data$singer == sgr & data$mode == md firstrun <- singermodeind & data$run == 1 thirdrun <- singermodeind & data$run == 3 firstdata <- data.frame(noteid = data$noteid[firstrun], newmedian=data$newmedian[firstrun]) thirddata <- data.frame(noteid = data$noteid[thirdrun], newmedian=data$newmedian[thirdrun]) comparedata <- merge(firstdata, thirddata, by="noteid") return(comparedata) } # estimate.pitch <- function(x) # { # if (length(x) > 0) # { # w <- 1./rowSums(rdist(x)) # w <- w/sum(w) # return(sum(w*x)) # } else { # return(NA) # } # } datadir <- '../data/filtered_pitchtracks/' ############################################################################## # LOAD MAIN DATA AND SOME HELPER STUFF ############################################################################## cat("\nMAIN PAPER EVALUATION\n\n", file = stderr()) cat("\nLoading note data...", file = stderr()) data <- read.csv(file = "../data/all_final_note_tracks_klaus.csv", header = TRUE, stringsAsFactors = T) singers <- levels(data$singer) poor.singers <- c("mf", "mw", "sr", "hl") nSinger <- length(singers) conditions <- c("normal", "masked", "imagined") nCondition <- 3 # flag poor singers data$ispoor <- data$singer %in% poor.singers data$singer <- factor(data$singer) # make a singermode variable to uniquely identify all recordings data$singermode <- as.factor(sprintf("%s%s", data$singer, data$mode)) # make a serial note number (essentially from 1 to 75) data$notenumber <- data$noteid + 25 * (data$run-1) cat(" done.\n\n", file = stderr()) ############################################################################## # EXTRACT PITCH ESTIMATES FROM PITCH TRACKS ############################################################################## cat("Extracting pitches etc. from pitch tracks (patience!) ...", file = stderr()) # get pitch data and extract medians data$newmedian <- data$pitchmedian * NA data$newsd <- data$pitchmedian * NA data$newn <- data$pitchmedian * 0 data$slope <- data$pitchmedian * NA count <- 0 cat(" ", file = stderr()) for (iSinger in 1:nSinger) { for (iCondition in 1:nCondition) { count <- count + 1 cat(sprintf("\b\b\b\b\b\b\b\b%2i of %2i", count, nSinger*nCondition), file = stderr()) yindata <- read.table( sprintf('%s/happy_birthday_%s_%s_vamp_yintony_yintony_f0.csv', datadir, singers[iSinger], conditions[iCondition]), header=F, sep=",") dataind <- which(data$singer == singers[iSinger] & data$mode == conditions[iCondition]) nNote <- length(dataind) for (iNote in 1:nNote) { yinind <- yindata[,1] >= data$onset[dataind[iNote]] & yindata[,1] <= (data$onset[dataind[iNote]] + data$duration[dataind[iNote]]) curryindata <- hz2pitch(yindata[yinind,2]) currmedian <- median(curryindata) data$newmedian[dataind[iNote]] <- currmedian around.median <- which(abs(curryindata-currmedian) < 3) currn <- length(around.median) if (currn > 0) { data$newsd[dataind[iNote]] <- sd(curryindata[around.median]) data$slope[dataind[iNote]] <- lm(curryindata[around.median]~around.median)$coefficients[2] data$newn[dataind[iNote]] <- sum(currn) } } } } # find which notes are grosslly erroneous in the newly extracted medians toCorrect <- is.na(data$newmedian) | abs(data$newmedian-data$pitchmedian) >= 1 # and set them to NA data$newmedian[toCorrect] <- NA cat(" done.\n\n", file = stderr()) ############################################################################## # LOAD MELODY GROUND TRUTH DATA ############################################################################## cat("Loading melody ground truth...", file = stderr()) gtdata <- read.csv2(file = "../data/happy_birthday_GT.csv", header = TRUE, stringsAsFactors = FALSE) # and weave it into the main data frame for (iNote in 1:25) { data$semitone[data$noteid == iNote] <- gtdata$pitch_ion_cents_et[iNote] / 100 } cat(" done.\n\n", file = stderr()) ############################################################################## # LOADING SURVEY DATA ############################################################################## cat("Loading survey data...", file = stderr()) meta <- read.csv(file = "../data/metadata.csv", header = TRUE, stringsAsFactors = T) # re-order ordinal variables meta$sing.ability <- relevel(meta$sing.ability, ref="veryhigh") meta$sing.ability <- relevel(meta$sing.ability, ref="high") meta$sing.ability <- relevel(meta$sing.ability, ref="medium") meta$sing.ability <- relevel(meta$sing.ability, ref="low") meta$sing.ability <- relevel(meta$sing.ability, ref="poor") meta$sing.experience <- relevel(meta$sing.experience, ref="professional") meta$sing.experience <- relevel(meta$sing.experience, ref="alot") meta$sing.experience <- relevel(meta$sing.experience, ref="some") meta$sing.experience <- relevel(meta$sing.experience, ref="none") meta$sing.experience <- relevel(meta$sing.experience, ref="notanswered") meta$musical.background <- relevel(meta$musical.background, ref="professional") meta$musical.background <- relevel(meta$musical.background, ref="semiprofessional") meta$musical.background <- relevel(meta$musical.background, ref="amateur") meta$musical.background <- relevel(meta$musical.background, ref="none") meta$choir.experience <- relevel(meta$choir.experience, ref="active") meta$choir.experience <- relevel(meta$choir.experience, ref="sangbutnolongeractive") meta$choir.experience <- relevel(meta$choir.experience, ref="childneverafter") meta$choir.experience <- relevel(meta$choir.experience, ref="none") meta$instudy <- meta$id %in% singers & !(meta$id %in% poor.singers) cat(" done.\n\n", file = stderr()) ############################################################################## # DRIFT CALCULATIONS ############################################################################## cat("Calculating drift...", file = stderr()) drifts <- data.frame(singer=c(), mode=c(), singermode =c(), noteid=c(), diffs=c()) for (iSinger in 1:nSinger) { singerind <- data$singer == singers[iSinger] for (iCondition in 1:nCondition) { recordingind <- singerind & data$mode == conditions[iCondition] diffs <- rep(NA, 25) for (iNote in 1:25) { ind1 <- recordingind & data$notenumber == iNote ind2 <- recordingind & data$notenumber == iNote + 50 if (sum(ind1) > 0 & sum(ind2) > 0) { diffs[iNote] <- data$newmedian[ind2] - data$newmedian[ind1] } } # cat(sum(is.na(diffs))) tempdf = data.frame(singer = rep(singers[iSinger], 25), mode = rep(conditions[iCondition], 25), singermode = rep(sprintf("%s%s", singers[iSinger], conditions[iCondition]), 25), noteid = 1:25, diffs = diffs) drifts = rbind(drifts, tempdf) } } cat(" done.\n\n", file = stderr()) ############################################################################## # PITCH ERROR CALCULATIONS ############################################################################## cat("Calculating pitch errors...", file = stderr()) justintonation_scale <- c(0,11.7,3.9,15.6,-13.7,-2,-9.8,2,13.7,15.6,17.6,-11.7) data$justintonation <- data$newmedian * 0 data$jipitcherror <- data$pitcherror for (i in 0:11) { ind <- data$semitone %% 12 == i data$justintonation[ind] <- justintonation_scale[i+1]/100 } data$pitcherror <- data$newmedian * 0 data$jipitcherror <- data$newmedian * 0 for (sm in unique(data$singermode)) { for (r in 1:3) { logicalind <- data$singermode == sm & data$run == r & !is.na(data$newmedian) if (sum(logicalind) > 0) { data$pitcherror[logicalind] <- lm(I(newmedian-semitone) ~ noteid, data = data, subset = logicalind)$residuals data$jipitcherror[logicalind] <- lm(I(newmedian-semitone-justintonation) ~ noteid, data = data, subset = logicalind)$residuals } } } cat(" done.\n\n", file = stderr()) ############################################################################## # INTERVAL ERROR CALCULATIONS ############################################################################## cat("Calculating interval errors (patience!) ... ", file = stderr()) data$expectedinterval <- data$semitone * NA data$expectedfutureinterval <- data$semitone * NA data$realisedinterval <- data$semitone * NA count <- 0 cat(" of ", file = stderr()) for (singermode in sort(unique(data$singermode))) { count <- count + 1 cat(sprintf("\b\b\b\b\b\b\b\b%2i of %2i", count, nSinger*nCondition), file = stderr()) singermodeIdx <- which(data$singermode == singermode) notenumbers <- data$notenumber[singermodeIdx] for (notenumber in notenumbers) { currNoteIdx <- singermodeIdx[which(notenumbers==notenumber)] previousNoteIdx <- singermodeIdx[which(notenumbers==(notenumber-1))] nextNoteIdx <- singermodeIdx[which(notenumbers==(notenumber+1))] if (length(previousNoteIdx) == 1) { data$expectedinterval[currNoteIdx] <- data$semitone[currNoteIdx] - data$semitone[previousNoteIdx] data$realisedinterval[currNoteIdx] <- data$newmedian[currNoteIdx] - data$newmedian[previousNoteIdx] } if (length(nextNoteIdx) == 1) { data$expectedfutureinterval[currNoteIdx] <- data$semitone[nextNoteIdx] - data$semitone[currNoteIdx] } } } data$intervalerror <- data$realisedinterval - data$expectedinterval cat(" done.\n\n", file=stderr()) ############################################################################## # MEMORY MODEL STUFF ############################################################################## cat("Calculating different memory models (patience!) ... \n\n", file = stderr()) # mus <- 1 - seq(1,0,length.out=nmu)^2 mus <- seq(0, 1, by = 0.01) nmu <- length(mus) burn.in <- 6 md <- with(data, data.frame(singer=singer, mode=mode, noteid=noteid, ispoor=ispoor, singermode=singermode, notenumber=notenumber, p=newmedian, s=semitone)) mr <- data.frame(with(md, aggregate(data.frame(ispoor=ispoor), list(singer=singer,mode=mode), min))) mr$singermode <- as.character(mr$singer) mr$tstart <- mat.or.vec(nrow(mr),1) for (iRow in 1:nrow(mr)) { mr$singermode[iRow]<-sprintf("%s%s", mr$singer[iRow], mr$mode[iRow]) ind <- mr$singermode[iRow] == md$singermode & md$notenumber <= burn.in mr$tstart[iRow] <- mean(md$p[ind] - md$s[ind], na.rm=T) } cat(" ", file = stderr()) mdmatrix <- mat.or.vec(nrow(md), nmu) * NA colnames(mdmatrix) <- sprintf("err%04.0f", mus*1000) for (imu in 1:nmu) { mu <- mus[imu] muname <- sprintf("err%04.0f", mu*1000) # md[[muname]] <- md$p * NA # cat(sprintf("\b\b\b\b\b\b\b\b\b\bmu = %0.3f", mu), file = stderr()) ptm <- proc.time() for (sm in levels(md$singermode)) { mrind <- mr$singermode == sm mdind <- which(md$singermode == sm & md$notenumber > burn.in) r <- mr$tstart[mrind] indices <- c(mdind, mdind[1]-md$notenumber[mdind[1]]+76) # cat(length(indices), "\n") for (ii in indices) { if (!is.na(md$p[ii-1])) { eminus1 <- md$p[ii-1] - md$s[ii-1] - r # if (abs(eminus1)<100) # how hacky is this? # { r <- r + (1-mu) * eminus1 mdmatrix[ii-1,imu] <- eminus1 # } } } } cat(proc.time() - ptm, "\n") } md <- cbind(md, mdmatrix) cat(" done.\n\n", file = stderr()) ############################################################################## # AGGREGATED METRICS ############################################################################## cat("Calculating aggregated metrics ... ", file = stderr()) agg.drifts <- with(drifts[!(drifts$singer %in% poor.singers),], aggregate(data.frame(diffs=diffs), list(singer = singer, mode = mode), function(x){mean(x, na.rm=T)})) singeragg.drifts <- with(drifts[!(drifts$singer %in% poor.singers),], aggregate(data.frame(diffs=diffs), list(singer = singer), function(x){mean(x, na.rm=T)})) agg.drifts.p <- with(drifts[!(drifts$singer %in% poor.singers),], aggregate(data.frame(p=diffs), list(singer = singer, mode = mode), function(x) {t.test(x)$p.value})) agg.drifts.sd <- with(drifts[!(drifts$singer %in% poor.singers),], aggregate(data.frame(sd=diffs), list(singer = singer, mode = mode), sd)) agg.drifts.all <- with(drifts, aggregate(data.frame(diffs=diffs), list(singer = singer, mode = mode), function(x){mean(x, na.rm=T)})) agg.mape <- with(data[!data$ispoor,], aggregate(data.frame(mape=abs(pitcherror)), list(singer = singer, mode = mode), function(x){mean(x, na.rm=T)})) singeragg.mape <- with(data[!data$ispoor,], aggregate(data.frame(mape=abs(pitcherror)), list(singer = singer), function(x){mean(x, na.rm=T)})) agg.ib <- with(data[!data$ispoor,], aggregate(data.frame(ib=intervalerror), list(singer = singer, mode = mode), function(x) {mean(x, na.rm=T)})) agg.maie <- with(data[!data$ispoor,], aggregate(data.frame(maie=abs(intervalerror)), list(singer = singer, mode = mode), function(x) {mean(x, na.rm=T)})) agg.rise <- with(data[!data$ispoor,], aggregate(data.frame(rise=duration * abs(slope) * 44100/1024), list(singer = singer, mode = mode), function(x) {mean(x, na.rm=T)})) singeragg.maie <- with(data[!data$ispoor,], aggregate(data.frame(maie=abs(intervalerror)), list(singer = singer), function(x) {mean(x, na.rm=T)})) singeragg.ratioout <- with(data[!data$ispoor,], aggregate(data.frame(ratioout=abs(intervalerror)), list(singer = singer), function(x) {mean(x>1, na.rm=T)})) singeragg.ib <- with(data[!data$ispoor,], aggregate(data.frame(ib=intervalerror), list(singer = singer), function(x) {mean(x, na.rm=T)})) # linear.drift.all <- with(data, # aggregate(data.frame(lineardrift=1:nrow(data)), list(singer=data$singer, mode=data$mode), function(ind) {lm(newmedian-semitone~notenumber, data=data[ind,])$coefficients[2]})) linear.drift.all <- with(data, aggregate(data.frame(est=1:nrow(data)), list(singer=data$singer, mode=data$mode), function(ind) {summary(lm(newmedian-semitone~notenumber, data=data[ind,]))$coefficients[2,c(1,4)]})) linear.drift.all <- data.frame(singer=linear.drift.all$singer, mode=linear.drift.all$mode, lineardrift=linear.drift.all[,3][,1], p.value=linear.drift.all[,3][,2]) linear.drift <- linear.drift.all[!linear.drift.all$singer %in% poor.singers,] # metadata versus stats merged.singeragg <- merge(singeragg.mape, meta, by.x = "singer", by.y = "id", all.y = F) merged.singeragg <- merge(merged.singeragg, singeragg.drifts, by.x = "singer", by.y = "singer", all.y = F) merged.singeragg <- merge(merged.singeragg, singeragg.maie, by.x = "singer", by.y = "singer", all.y = F) # merged.singeragg <- merge(merged.singeragg, singeragg.ib, by.x = "singer", by.y = "singer", all.y = F) merged.agg <- merge(agg.mape, meta, by.x = c("singer"), by.y = "id", all.y = F) merged.agg <- merge(merged.agg, agg.drifts, by = c("singer","mode")) merged.agg <- merge(merged.agg, agg.maie, by = c("singer","mode")) merged.agg <- merge(merged.agg, agg.ib, by = c("singer","mode")) merged.agg <- merge(merged.agg, agg.rise, by = c("singer","mode")) merged.agg <- merge(merged.agg, linear.drift, by = c("singer","mode")) cat(" done.\n\n", file = stderr()) ## VARIANCE CALCULATION library(MASS) burnin <- 6 data$relativetonic <- data$newmedian * NA for (iRow in 1:nrow(mr)) { refind <- data$singermode == mr$singermode[iRow] & data$notenumber <= burnin ind <- data$singermode == mr$singermode[iRow] & data$notenumber > burnin data$relativetonic[ind] <- data$newmedian[ind] - data$semitone[ind] - median(data$newmedian[refind] - data$semitone[refind]) }