Mercurial > hg > pitch-accuracy-and-interaction-in-unaccompanied-duet-singing
view latex/scripts/results_and_figures.R @ 4:3e666d0329b5 tip
updated code
author | Jiajie Dai <daijiajie1@gmail.com> |
---|---|
date | Sat, 06 Jan 2018 12:20:49 +0000 |
parents | 6531169e6866 |
children |
line wrap: on
line source
cat("\n____________________________\n") cat("\n____ MAIN PAPER RESULTS ____\n") cat("\n____________________________\n\n") cat("This script assumes that the script data_import.R has been run.\n") cat("This script generates statistics and figures used in the JASA submission.\n\n") ############################################################################## # METHOD SECTION OUTPUT ############################################################################## cat("\n____ METHOD SECTION ____\n\n") cat("- data for self-assessment table:\n") print(with(meta[meta$instudy,], summary(data.frame(age, musical.background, sing.ability, sing.experience, choir.experience)))) cat("\n____ ACCURACY SECTION ____\n\n") cat("- Interval error significance tests:\n\n") significance.level <- 0.0001 res <- with(data[data$ispoor == F, ], t.test(intervalerror[ expectedinterval == 8 ])) cat( " - Upward minor sixth is flat.\n") cat(sprintf(" Is the difference significant at significance level %0.4f? %s (p = %g, df=%g, t = %g)\n\n", significance.level, res$p.value < significance.level, res$p.value, res$parameter, res$statistic)) res <- with(data[data$ispoor == F, ], t.test(intervalerror[ expectedinterval == 12 ])) cat( " - Upward octave is flat.\n") cat(sprintf(" Is the difference significant at significance level %0.4f? %s (p = %g, df=%g, t = %g)\n\n", significance.level, res$p.value < significance.level, res$p.value, res$parameter, res$statistic)) res <- with(data[data$ispoor == F, ], t.test(intervalerror[ expectedinterval == 0 ])) cat(sprintf(" - Prime (0 interval) is sharp by %0.2f.\n", res$estimate)) cat(sprintf(" Is the difference significant at significance level %0.4f? %s (p = %g, df=%g, t = %g)\n\n", significance.level, res$p.value < significance.level, res$p.value, res$parameter, res$statistic)) cat("- explaining pitch error by following interval or beginning of phrase\n\n") is.beginning.of.phrase <- I(data$noteid[!data$ispoor] %in% c(1,7,13,20)) res <- with(data[!data$ispoor, ], summary(lm(pitcherror ~ is.beginning.of.phrase + expectedfutureinterval))) cat(sprintf(" - p: %0.3g, %0.3g\n",res$coefficients[3,4],res$coefficients[2,4])) cat(sprintf(" - F = %g\n",res$fstatistic[1])) cat(sprintf(" - DF: %0.0f\n",res$fstatistic[3])) cat(sprintf(" - Variance explained: %0.1f%s\n",100*res$r.squared,"%")) cat(sprintf(" - Sharpening in cents per note in interval: %0.1f\n\n",100*res$coefficients[3,1])) res <- with(data[!data$ispoor, ], summary(lm(pitcherror ~ is.beginning.of.phrase))) cat(sprintf(" - Variance explained only by intercept and following interval: %0.1f%s\n",100*res$r.squared,"%")) cat(sprintf(" - p: %0.3g\n",res$coefficients[2,4])) cat(sprintf(" - F = %g\n",res$fstatistic[1])) cat(sprintf(" - DF: %0.0f\n\n",res$fstatistic[3])) # cat(sprintf(" - Variance explained: %0.1f%s\n\n",100*res$r.squared,"%")) res <- with(data[!data$ispoor, ], summary(lm(pitcherror ~ expectedfutureinterval))) cat(sprintf(" - Variance explained only by intercept and beginning of phrase: %0.1f%s\n",100*res$r.squared,"%")) cat(sprintf(" - p: %0.3g\n",res$coefficients[2,4])) cat(sprintf(" - F = %g\n",res$fstatistic[1])) cat(sprintf(" - DF: %0.0f\n\n",res$fstatistic[3])) # cat(sprintf(" - Variance explained: %0.1f%s\n\n",100*res$r.squared,"%")) et.ji.test <- with(data[!data$ispoor,], t.test(abs(pitcherror), abs(jipitcherror), paired=T)) etMAPE <- with(data[!data$ispoor,], mean(abs(pitcherror), na.rm=T)) jiMAPE <- with(data[!data$ispoor,], mean(abs(jipitcherror), na.rm=T)) cat("- Error difference between ET and JI error:\n") cat(" Is it significant? Paired t-test says:") cat(sprintf("%s (p = %g, df: %0.0f, t = %g)\n\n", et.ji.test$p.value < 0.01, et.ji.test$p.value, et.ji.test$parameter, et.ji.test$statistic), "\n") cat(sprintf("ET (MAPE: %g) fits better than JI (MAPE: %g), by %g\n\n", etMAPE, jiMAPE, et.ji.test$estimate)) ############################################################################## # RESULTS SECTION OUTPUT ############################################################################## cat("\n____ RESULTS SECTION ____\n\n") cat("Basic metrics statistics.\n") cat(sprintf(" - MAPE. Mean: %0.3f; median: %0.3f; sd: %0.3f\n", mean(agg.mape$mape), median(agg.mape$mape), sd(agg.mape$mape))) cat(sprintf(" - MAIE. Mean: %0.3f; median: %0.3f; sd: %0.3f\n", mean(agg.maie$maie), median(agg.maie$maie), sd(agg.maie$maie))) # cat(sprintf(" - IB. Mean: %0.3f; median: %0.3f; sd: %0.3f\n", mean(agg.ib$ib), median(agg.ib$ib), sd(agg.ib$ib))) cat(sprintf(" - D. Mean: %0.3f; median: %0.3f; sd: %0.3f\n", mean(agg.drifts$diffs), median(agg.drifts$diffs), sd(agg.drifts$diffs))) cat(sprintf(" - |D|. Mean: %0.3f; median: %0.3f; sd: %0.3f\n\n", mean(abs(agg.drifts$diffs)), median(abs(agg.drifts$diffs)), sd(abs(agg.drifts$diffs)))) cat(sprintf(" - lin D. in cents: Mean: %0.3f; median: %0.3f; sd: %0.3f\n", 100*mean(linear.drift$lineardrift), 100*median(linear.drift$lineardrift), 100*sd(linear.drift$lineardrift))) cat(sprintf("- Significance of drift:")) cat(sprintf(" - significant drifts: %i of %i (%0.0f%s)\n", sum(agg.drifts.p$p<0.01), length(agg.drifts.p$p), 100*mean(agg.drifts.p$p<0.01), "%")) with(agg.drifts.p[agg.drifts$diffs>0,], cat(sprintf(" - number of upward drifts: %i, of which %i significantly so.\n", length(p), sum(p<0.01)))) with(agg.drifts.p[agg.drifts$diffs<0,], cat(sprintf(" - number of downward drifts: %i, of which %i significantly so.\n\n", length(p), sum(p<0.01)))) cat(sprintf("- Significance of linear drift")) cat(sprintf(" - significant drifts: %i of %i (%0.0f%%) at conf. level 0.01, %i of %i (%0.0f%%) at conf. level 0.05 \n", sum(linear.drift$p.value<0.01), length(linear.drift$p.value), 100*mean(linear.drift$p.value<0.01), sum(linear.drift$p.value<0.05), length(linear.drift$p.value), 100*mean(linear.drift$p.value<0.05))) with(linear.drift[linear.drift$lineardrift>0,], cat(sprintf(" - number of upward drifts: %i, of which %i significantly so at 0.01.\n", length(p.value), sum(p.value<0.01)))) with(linear.drift[linear.drift$lineardrift<0,], cat(sprintf(" - number of downward drifts: %i, of which %i significantly so at 0.01.\n\n", length(p.value), sum(p.value<0.01)))) with(linear.drift[linear.drift$lineardrift>0,], cat(sprintf(" - number of upward drifts: %i, of which %i significantly so at 0.05.\n", length(p.value), sum(p.value<0.05)))) with(linear.drift[linear.drift$lineardrift<0,], cat(sprintf(" - number of downward drifts: %i, of which %i significantly so at 0.05.\n\n", length(p.value), sum(p.value<0.05)))) cat("- Correlation table (LaTeX syntax)\n") # cor.df <- data.frame(sg.abl=as.numeric(merged.agg$sing.ability), sg.exp=as.numeric(merged.agg$sing.experience), mus.bg=as.numeric(merged.agg$musical.background), ch.exp=as.numeric(merged.agg$choir.experience), MAIE=merged.agg$maie, MAPE=merged.agg$mape, IB=merged.agg$ib, D=merged.agg$diffs, abs.D=abs(merged.agg$diffs)) cor.df <- data.frame(sg.abl=as.numeric(merged.agg$sing.ability), sg.exp=as.numeric(merged.agg$sing.experience), mus.bg=as.numeric(merged.agg$musical.background), ch.exp=as.numeric(merged.agg$choir.experience), MAIE=merged.agg$maie, MAPE=merged.agg$mape, lin.D=merged.agg$lineardrift, abs.D=abs(merged.agg$diffs), D=merged.agg$diffs) # cortable(data.frame(sg.abl=as.numeric(merged.agg$sing.ability), mus.bg=as.numeric(merged.agg$musical.background), ch.exp=as.numeric(merged.agg$choir.experience), MAIE=merged.agg$maie, ib=merged.agg$ib, MAPE=merged.agg$mape, D=merged.agg$diffs, abs.D=abs(merged.agg$diffs))) cortable(cor.df) cat("\n\n") # testing singer predicting MAPE, MAIE, and drift cat("- Variance explained by singer for different measures\n\n") res <- summary(lm(mape~singer, data=agg.mape)) pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE) df <- res$fstatistic[2] cat(sprintf(" - MAPE: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n", res$r.squared*100, pval, df, res$fstatistic[1])) res <- summary(lm(maie~singer, data=agg.maie)) pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE) df <- res$fstatistic[2] cat(sprintf(" - MAIE: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n", res$r.squared * 100, pval, df, res$fstatistic[1])) # res <- summary(lm(ib~singer, data=agg.ib)) # pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE) # cat(sprintf(" - IB: %0.2f%%, p-value: %0.5g\n", res$r.squared * 100, pval)) res <- summary(lm(lineardrift~singer, data=linear.drift)) pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE) df <- res$fstatistic[2] cat(sprintf(" - lin.D: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n", res$r.squared * 100, pval, df, res$fstatistic[1])) res <- summary(lm(diffs~singer, data=agg.drifts)) pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE) df <- res$fstatistic[2] cat(sprintf(" - D: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n", res$r.squared * 100, pval, df, res$fstatistic[1])) res <- summary(lm(abs(diffs)~singer, data=agg.drifts)) pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE) df <- res$fstatistic[2] cat(sprintf(" - |D|: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n\n", res$r.squared * 100, pval, df, res$fstatistic[1])) # testing influence of condition cat("- Testing influence of singing condition (Normal/Masked/Imagined) via Kruskal-Wallis tests (sign. level 0.01):\n\n") res <- kruskal.test(mape ~ mode, data = agg.mape) cat(sprintf(" - MAPE significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic)) res <- kruskal.test(maie ~ mode, data = agg.maie) cat(sprintf(" - MAIE significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic)) # res <- kruskal.test(ib ~ mode, data = agg.ib) # cat(sprintf(" - IB significant difference due to condition? %s (p-value: %0.5g)\n", res$p.value<0.01, res$p.value)) res <- kruskal.test(lineardrift ~ mode, data = linear.drift) cat(sprintf(" - lin. D significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic)) res <- kruskal.test(diffs ~ mode, data = agg.drifts) cat(sprintf(" - D significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic)) res <- kruskal.test(abs(diffs) ~ mode, data = agg.drifts) cat(sprintf(" - |D| significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic)) # cat("- \"Flat\" singer: \n\n") # "hl" # with(agg.drifts.all, cat(sprintf(" - drift in normal condition: %0.2f semitones\n", diffs[singer=="hl" & mode=="normal"]))) # with(linear.drift.all, cat(sprintf(" - linear drift in normal condition: %0.2f cents per note\n", 100 * lineardrift[singer=="hl" & mode=="normal"]))) # with(agg.drifts.all, cat(sprintf(" - drift in masked condition: %0.2f semitones\n", diffs[singer=="hl" & mode=="masked"]))) # with(linear.drift.all, cat(sprintf(" - linear drift in masked condition: %0.2f cents per note\n", 100 * lineardrift[singer=="hl" & mode=="masked"]))) # with(agg.drifts.all, cat(sprintf(" - drift in imagined condition: %0.2f semitones\n", diffs[singer=="hl" & mode=="imagined"]))) # with(linear.drift.all, cat(sprintf(" - linear drift in imagined condition: %0.2f cents per note\n\n", 100 * lineardrift[singer=="hl" & mode=="imagined"]))) ############################################################################## # MODEL SECTION OUTPUT ############################################################################## cat("\n____ MODEL SECTION ____\n\n") # notevar <- with(mdr[!mdr$ispoor,], aggregate(I(p-s-tstart), list(notenumber=notenumber), function(x)var(x,na.rm=T))[,2]) # notevar2 <- with(data[!data$ispoor,], aggregate(pitcherror, list(notenumber=notenumber), function(x)var(x,na.rm=T))[,2]) # driftvar <- notevar-notevar2 # driftvar.fit <- lm(driftvar[7:75]~I(7:75)) # sqrt(driftvar.fit$coefficients[2]*50) cat("- existence of 'spread'\n") var.by.notenumber <- aggregate(data$relativetonic[!data$ispoor], list(data$notenumber[!data$ispoor]), function(x) {var(x, na.rm=T)}) var2.by.notenumber <- aggregate(data$pitcherror[!data$ispoor], list(data$notenumber[!data$ispoor]), function(x) {var(x, na.rm=T)}) counts.by.notenumber <- aggregate(data$relativetonic[!data$ispoor], list(data$notenumber[!data$ispoor]), function(x) {sum(!is.na(x))}) dist.by.notenumber <- aggregate(abs(data$relativetonic[!data$ispoor]), list(data$notenumber[!data$ispoor]), function(x) {mean(x, na.rm=T)}) fit1 <- lm(var.by.notenumber[,2]~var2.by.notenumber[,2], weights=counts.by.notenumber[,2]) fit2 <- lm(fit1$residuals~var.by.notenumber[(burnin+1):75,1], weights=counts.by.notenumber[(burnin+1):75,2]) summary(fit2) res <- summary(fit2) pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE) cat(sprintf(" - notenumber: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n\n", res$r.squared * 100, pval, res$fstatistic[1], res$fstatistic[3])) plot(names(fit1$residuals), fit1$residuals) abline(fit2) intvar <- var(data$intervalerror[!data$ispoor], na.rm=T) expected.D.var <- 50 * intvar expected.D.sd <- expected.D.var^0.5 pitch.err.sd <- sd(data$pitcherror[!data$ispoor], na.rm=T) cat(sprintf("- inadequacy of mu = 0 model:\n\n")) cat(sprintf(" - interval (error) variance: %0.3f\n", intvar)) cat(sprintf(" - expected variance of D (interval variance summed over 50 notes): %0.3f\n", expected.D.var)) cat(sprintf(" - ... and the corresponding standard deviation: %0.3f\n\n", expected.D.sd)) cat(sprintf(" - In comparison the standard deviation of pitch erros is: %0.3f\n\n", pitch.err.sd)) model.mape <- apply(md[!md$ispoor,(1:nmu)+8], 2, function(x) {mean(abs(x),na.rm=T)}) model.mape.singerwise <- aggregate(md[!md$ispoor,(1:nmu)+8], list(md$singer[!md$ispoor]), function(x) {mean(abs(x),na.rm=T)}) model.mape.recordingwise <- aggregate(md[!md$ispoor,(1:nmu)+8], list(md$singer[!md$ispoor],md$mode[!md$ispoor]), function(x) {mean(abs(x),na.rm=T)}) recording.mus <- data.frame(singer = model.mape.recordingwise[,1], mu = mus[apply(model.mape.recordingwise[,-(1:2)], 1, which.min)]) # singer.mus <- data.frame(singer = model.mape.singerwise[,1], mu = mus[apply(model.mape.singerwise[,-1], 1, which.min)]) singer.mus <- aggregate(recording.mus[,2], list(recording.mus[,1]),mean) cat(sprintf("- Better parameter for mu, firstly via 'model MAPE':\n\n")) cat(sprintf(" - the smallest error of %0.2f is achieved with model '%s'\n", min(model.mape), names(which.min(model.mape)))) cat(sprintf(" - error at mu = 0: %0.2f\n", model.mape[1])) cat(sprintf(" - error at mu = 1: %0.2f\n\n", model.mape[length(model.mape)])) #------------------------------- mdr <- merge(md, mr[,c(1,2,5)], by=c("singer", "mode")) # plot(I(p-s-tstart)~factor(notenumber), data=mdr) sig2 <- var(data$pitcherror[!data$ispoor], na.rm=T) sig2int <- var(data$intervalerror[!data$ispoor], na.rm=T) n <- 50 # vd <- var(drifts[!drifts$singer %in% poor.singers,4], na.rm=T) # vd <- var(drifts$diffs[!drifts$singer %in% poor.singers], na.rm=T) vd <- var(agg.drifts$diffs) est.mu <- 1-sqrt(vd/(n*sig2)) cat("- Estimate according to drift variance formula:\n\n") cat(sprintf(" - overall pitch error variance: %0.3f\n", sig2)) # cat(sprintf(" - overall interval error variance: %0.3f", sig2int)) cat(sprintf(" - observed variance over 50-note gap: %0.3f\n", vd)) cat(sprintf(" - estimated mu based on these: %0.3f\n\n", est.mu)) cat("- Stats for mu by singer:\n") cat(sprintf(" - range: %0.3f to %0.3f\n", range(singer.mus[,2])[1], range(singer.mus[,2])[2])) cat(sprintf(" - mean: %0.3f, median: %0.3f, sd: %0.3f\n\n", mean(singer.mus[,2]), median(singer.mus[,2]), sd(singer.mus[,2]))) # n <- 50 # mu <- 0.916 # sig2 <- var(data$pitcherror[ !data$ispoor ], na.rm=T) # vv <- var(drifts$diffs[!drifts$singer %in% poor.singers], na.rm=T) # vv <- var(drifts$diffs[!drifts$singer %in% poor.singers & drifts$noteid ==iNote], na.rm=T) # n <- 50 # sig2 <- var(data$pitcherror[ !data$ispoor ], na.rm=T) # vv <- var(agg.drifts$diffs) # est.mu <- 1-sqrt(vv/(n*sig2)) # # vv <- var(agg.drifts$diffs) # for (mu in c(0,0.8,0.9,est.mu, 0.95, 1)) # { # dvar <- (1-mu)^2 * n * sig2 # cat(sprintf("Example: mu = %g, sigma^2 = %g, n = %g\n", mu, sig2, n)) # cat(sprintf(" ... var = %g, sd = %g\n", dvar, sqrt(dvar))) # } # CHECK!!!!! # singermodemu <- c() # for (singer in singers[!singers %in% poor.singers]) # { # for (mode in c("normal","imagined","masked")) # { # sig2 <- var(data$pitcherror[drifts$singer==singer & drifts$mode==mode], na.rm=T) # # sig2int <- var(data$intervalerror[data$singermode==singermode], na.rm=T) # n <- 50 # vd <- var(drifts$diffs[drifts$singer==singer & drifts$mode==mode], na.rm=T) # est.mu <- 1-sqrt(vd/(n*sig2)) # # cat(sprintf("%s: %0.3f\n", singer, est.mu)) # singermode <- sprintf("%s%s", singer, mode) # singermodemu <- rbind(singermodemu, data.frame(singermode=singermode, singer=singer, mode=mode, mu=est.mu)) # } # } # hist(singermodemu$mu, col="gray") # make drift figures for (singermode in sort(unique(data$singermode))) { pdf(sprintf("../figures/drift_figure_%s.pdf", singermode), width = 3.5, height = 3.5) makeDriftGraph(data, singermode) dev.off() } for (singermode in sort(unique(data$singermode))) { pdf(sprintf("../figures/drift_figure_model0850_%s.pdf", singermode), width = 5.5, height = 4.5) with(data[data$singermode==singermode,], plot(notenumber, newmedian-semitone, pch="+", ylab="pitch", xlab="note")) with(md[md$singermode==singermode,], points(notenumber, p-s-err0850, pch=20, type='o')) dev.off() } countsByInterval <- with(data[!data$ispoor,], aggregate(data.frame(count=data$intervalerror), list(interval=data$expectedinterval), function(x){sum(!is.na(x))})) pdf("../figures/interval_error_by_interval_boxplot.pdf", width = 5.5, height = 4) par(mar=c(2,4,1,.5)) plot(c(1,18), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n') abline(h=0) abline(h=-1, lty=2) abline(h=1, lty=2) plot(intervalerror ~ factor(expectedinterval, levels=-5:12), data=data, subset = !ispoor, pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, xaxt='n', yaxt='n', add=T, ylab="interval error (with respect to ET)", xlab="interval in semitones", cex.axis=0.8, lty=1) posi <- c(1:6, 8, 11, 13, 14, 18) axis(2, at=c(-1,0,1), cex.axis=0.8) axis(1, at=posi, labels=posi-6, cex.axis=0.8) # text(counts$interval + 6, rep(2,11), counts$count, cex=0.7) mtext(c("n=", countsByInterval$count), at=c(-.1,countsByInterval$interval + 6), cex=0.7) dev.off() countsByNoteID <- with(data[!data$ispoor,], aggregate(data.frame(count=data$intervalerror), list(noteid=data$noteid), function(x){sum(!is.na(x))})) pdf("../figures/interval_error_by_notenumber_boxplot.pdf", width = 7, height = 4) par(mar=c(2,.5,1,1)) plot(c(1,25), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n') abline(h=0) abline(h=-1, lty=2) abline(h=1, lty=2) plot(intervalerror ~ factor(noteid, levels=1:75), data=data, subset = !ispoor, pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, add=T, ylab="interval error (with respect to ET)", xlab="interval in semitones", cex.axis=0.8, lty=1, yaxt='n', boxwex=0.6) # posi <- c(1:6, 8, 11, 13, 14, 18) # axis(1, at=posi, labels=posi-6, cex.axis=0.8) # text(countsByNoteID$noteid, rep(2,11), countsByNoteID$count, cex=0.7) mtext(c(countsByNoteID$count), at=c(countsByNoteID$noteid), cex=0.7) text(1:25, rep(-1.5,11), c("(-5)", gtdata$interval_ST[-25]), cex=0.7) text(2.7, -1.35 , "intervals in semitones", cex=0.7) text(1-.5, 1.3, "phrase 1", pos=4) abline(v=6.5, lty=1, col='gray') text(7-.5, 1.3, "phrase 2", pos=4) abline(v=12.5, lty=1, col='gray') text(13-.5, 1.3, "phrase 3", pos=4) abline(v=19.5, lty=1, col='gray') text(20-.5, 1.3, "phrase 4", pos=4) dev.off() pdf("../figures/pitch_error_by_interval_boxplot.pdf", width = 5.5, height = 4) par(mar=c(2,4,1,.5)) plot(c(1,18), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n') abline(h=0) abline(h=-1, lty=2) abline(h=1, lty=2) plot(pitcherror ~ factor(expectedinterval, levels=-5:12), data=data, subset = !ispoor, pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, xaxt='n', yaxt='n', add=T, ylab="pitch error (with respect to ET)", xlab="interval in semitones", cex.axis=0.8, lty=1) posi <- c(1:6, 8, 11, 13, 14, 18) axis(2, at=c(-1,0,1), cex.axis=0.8) axis(1, at=posi, labels=posi-6, cex.axis=0.8) # text(counts$interval + 6, rep(2,11), counts$count, cex=0.7) # mtext(c("n=", countsByInterval$count), at=c(-.1,countsByInterval$interval + 6), cex=0.7) dev.off() countsBySemitone <- with(data[!data$ispoor,], aggregate(data.frame(count=data$pitcherror), list(interval=data$semitone), function(x){sum(!is.na(x))})) pdf("../figures/pitch_error_by_semitone_boxplot.pdf", width = 4.5, height = 4) par(mar=c(2,4,1,.5)) plot(c(.5,13.5), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n') abline(h=0) abline(h=-1, lty=2) abline(h=1, lty=2) plot(pitcherror ~ factor(semitone, levels=-5:7), data=data, subset = !ispoor, pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, xaxt='n', yaxt='n', add=T, ylab="pitch error (with respect to ET)", xlab="interval in semitones", cex.axis=0.8, lty=1) posi <- c(1, 3, 5, 6, 8, 10, 11, 13) axis(2, at=c(-1,0,1), cex.axis=0.8) axis(1, at=posi, labels=posi-6, cex.axis=0.8) # text(counts$interval + 6, rep(2,11), counts$count, cex=0.7) # mtext(c("n=", countsByInterval$count), at=c(-.1,countsByInterval$interval + 6), cex=0.7) dev.off() pdf("../figures/pitch_error_by_notenumber_boxplot.pdf", width = 7, height = 4) par(mar=c(2,.5,1,1)) plot(c(1,25), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n') abline(h=0) abline(h=-1, lty=2) abline(h=1, lty=2) plot(pitcherror ~ factor(noteid, levels=1:75), data=data, subset = !ispoor, pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, add=T, ylab="pitch error (with respect to ET)", xlab="interval in semitones", cex.axis=0.8, lty=1, yaxt='n', boxwex=0.6) # posi <- c(1:6, 8, 11, 13, 14, 18) # axis(1, at=posi, labels=posi-6, cex.axis=0.8) # text(countsByNoteID$noteid, rep(2,11), countsByNoteID$count, cex=0.7) # mtext(c(countsByNoteID$count), at=c(countsByNoteID$noteid), cex=0.7) text(1:25, rep(-1.5,11), gtdata$pitch_ion_cents_et/100, cex=0.7) text(2.7, -1.35 , "pitch relative to tonic", cex=0.7) text(1-.5, 1.3, "phrase 1", pos=4) abline(v=6.5, lty=1, col='gray') text(7-.5, 1.3, "phrase 2", pos=4) abline(v=12.5, lty=1, col='gray') text(13-.5, 1.3, "phrase 3", pos=4) abline(v=19.5, lty=1, col='gray') text(20-.5, 1.3, "phrase 4", pos=4) dev.off() # linear fit example plot pdf("../figures/pitch_error_example.pdf", width = 6, height = 5) par(mar=c(4,4,.1,.1)) ind <- which(data$singer == "jp" & data$mode == "imagined" & data$run == 1) plot(data$newmedian[ind], pch = 5, xlab="note number", ylab="pitch", xlim = c(0,25), las = 1) points((data$newmedian-data$semitone)[ind], pch=18) fit <- lm(I(newmedian-semitone)~notenumber, data = data[ind,]) for (ii in 1:25) { lines(c(ii,ii), c(fit$fitted.values[ii], (data$newmedian-data$semitone)[ind[ii]])) } abline(a=fit$coefficients[1], b=fit$coefficients[2]) text(0, fit$coefficients[1]+0.3, 0, cex = 0.7) for (offs in 1:8) { abline(a=fit$coefficients[1]+offs, b=fit$coefficients[2], lty = 2) abline(a=fit$coefficients[1]-offs, b=fit$coefficients[2], lty = 2) text(0, fit$coefficients[1]+offs+0.3, +offs, cex = 0.7) text(0, fit$coefficients[1]-offs+0.3, -offs, cex = 0.7) } dev.off() # drift histogram pdf("../figures/drift_histogram.pdf", width = 6, height = 4) par(mar=c(4,4,.3,.5)) hist(agg.drifts$diffs, seq(-1.5,1.5,by=0.2), col="dark gray", border="black", xlab = "drift", ylab = "number of recordings", main = "") dev.off() # mean absolute pitch error histogram pdf("../figures/mape_histogram.pdf", width = 3.3, height = 4) par(mar=c(4,4,.3,.5)) hist(agg.mape$mape, seq(0,0.4,by=0.05), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24)) abline(h=5, col = "gray") abline(h=10, col = "gray") abline(h=15, col = "gray") abline(h=20, col = "gray") hist(agg.mape$mape, seq(0,0.4,by=0.05), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24), add = T) dev.off() # mean absolute interval error histogram pdf("../figures/maie_histogram.pdf", width = 3.3, height = 4) par(mar=c(4,1,.3,.5)) hist(agg.maie$maie, seq(0,.5,by=0.05), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n") abline(h=5, col = "gray") abline(h=10, col = "gray") abline(h=15, col = "gray") abline(h=20, col = "gray") hist(agg.maie$maie, seq(0,.5,by=0.05), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n", add = T) dev.off() mean(agg.maie$maie) median(agg.maie$maie) sd(agg.maie$maie) # MAR pdf("../figures/mar_histogram.pdf", width = 3.3, height = 4) par(mar=c(4,1,.3,.5)) hist(agg.rise$rise, seq(0,.5,by=0.05), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n") abline(h=5, col = "gray") abline(h=10, col = "gray") abline(h=15, col = "gray") abline(h=20, col = "gray") hist(agg.rise$rise, seq(0,.5,by=0.05), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n", add = T) dev.off() mean(agg.rise$rise) median(agg.rise$rise) sd(agg.rise$rise) # drift pdf("../figures/d_histogram.pdf", width = 3.3, height = 4) par(mar=c(4,1,.3,.5)) hist(agg.drifts$diffs, seq(-.6,.6,by=0.1), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n") abline(h=5, col = "gray") abline(h=10, col = "gray") abline(h=15, col = "gray") abline(h=20, col = "gray") hist(agg.drifts$diffs, seq(-.6,.6,by=0.1), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n", add = T) dev.off() # absolute drift pdf("../figures/abs_d_histogram.pdf", width = 3.3, height = 4) par(mar=c(4,1,.3,.5)) hist(abs(agg.drifts$diffs), seq(0,.6,by=0.05), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n") abline(h=5, col = "gray") abline(h=10, col = "gray") abline(h=15, col = "gray") abline(h=20, col = "gray") hist(abs(agg.drifts$diffs), seq(0,.6,by=0.05), col="dark gray", border="black", xlab = "semitones", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n", add = T) dev.off() # linear drift pdf("../figures/lineard_histogram.pdf", width = 3.3, height = 4) par(mar=c(4,1,.3,.5)) hist(100*linear.drift$lineardrift, seq(-1.2,1.2,by=0.2), col="dark gray", border="black", xlab = "cents", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n") abline(h=5, col = "gray") abline(h=10, col = "gray") abline(h=15, col = "gray") abline(h=20, col = "gray") hist(100*linear.drift$lineardrift, seq(-1.2,1.2,by=0.2), col="dark gray", border="black", xlab = "cents", ylab = "number of recordings", main = "", ylim=c(0,24), yaxt="n", add = T) dev.off() # mu pdf("../figures/mu_histogram.pdf", width = 4.3, height = 3) par(mar=c(4,4,.3,.5)) hist(singer.mus[,2], seq(0.6, 1.0, 0.4/5), col="dark gray", border="black", xlab = expression(mu), ylab = "number of singers", main = "", add = F) dev.off() # drift significance plot pdf("../figures/drift_significance_plot.pdf", width = 6, height = 3.5) plot(agg.drifts$diffs, agg.drifts.p$p, log="y", col = rgb(0,0,0,0.4), pch=19, yaxt = "n", ylab = "p-value ", xlab = "drift magnitude in semitones", xlim=c(-1,1)) axis(2, at = 10^(-(1:4)), label = c("0.1","0.01","0.001","0.0001"),las=1) abline(h=0.01, lty = 2) abline(v=0.0) text(0.75,0.0000005,"significant\nupward\ndrift") text(-0.75,0.0000005,"significant\ndownward\ndrift") dev.off() # drift significance plot pdf("../figures/lineardrift_significance_plot.pdf", width = 6, height = 4.8) plot(linear.drift$lineardrift, linear.drift$p.value, log="y", col = rgb(0,0,0,0.4), pch=19, yaxt = "n", ylab = "p-value ", xlab = "linear drift per note in semitones") axis(2, at = 10^(-(1:4)), label = c("0.1","0.01","0.001","0.0001"),las=1) abline(h=0.01, lty = 2) abline(v=0.0) text(0.0035,0.0000005,"significant\nupward\ndrift") text(-0.0035,0.0000005,"significant\ndownward\ndrift") dev.off() # how many are significantly adrift? # sum(agg.drifts.p$p<0.01) # mean(agg.drifts.p$p<0.01) # drift vs mape pdf("../figures/drift_vs_mape.pdf", width = 5.5, height = 4) par(mar=c(4,4,.3,.5)) outl <- agg.mape$mape>0.35 fit1 <- lm(agg.mape$mape~agg.drifts$diffs) fit2 <- lm(agg.mape$mape~agg.drifts$diffs, subset=!outl) plot(agg.drifts$diffs, agg.mape$mape, type="n", xlab="drift magnitude in semitones", ylab="mean absolute pitch error") points(agg.drifts$diffs[outl], agg.mape$mape[outl], pch=19) points(agg.drifts$diffs[!outl], agg.mape$mape[!outl], pch=21) abline(fit1, lty=2) abline(fit2) dev.off() # drift vs maie pdf("../figures/drift_vs_maie.pdf", width = 5.5, height = 4) par(mar=c(4,4,.3,.5)) outl <- agg.mape$mape>0.35 fit1 <- lm(agg.maie$maie~agg.drifts$diffs) fit2 <- lm(agg.maie$maie~agg.drifts$diffs, subset=!outl) plot(agg.drifts$diffs, agg.maie$maie, type="n", xlab="drift magnitude in semitones", ylab="mean absolute pitch error") points(agg.drifts$diffs[outl], agg.maie$maie[outl], pch=19) points(agg.drifts$diffs[!outl], agg.maie$maie[!outl], pch=21) abline(fit1, lty=2) abline(fit2) dev.off() # drift vs IB pdf("../figures/drift_vs_ib.pdf", width = 5.5, height = 4) par(mar=c(4,4,.3,.5)) fit1 <- lm(agg.ib$ib~agg.drifts$diffs) plot(agg.drifts$diffs, agg.ib$ib, type="n", xlab="drift magnitude in semitones", ylab="mean interval error") points(agg.drifts$diffs, agg.ib$ib, pch=21) abline(fit1, lty=2) dev.off() # cor(merged.singeragg$maie, merged.singeragg$mape) pdf("../figures/mape_maie_drift.pdf", width = 5, height = 5) # par(mar=c(1,1,1,1)) plot(data.frame(MAPE=agg.mape$mape, MAIE=agg.maie$maie, Drift=agg.drifts$diffs)) dev.off() # and their correlations cor.test(agg.mape$mape, agg.maie$maie) cor.test(agg.mape$mape, agg.drifts$diffs) cor.test(agg.maie$maie, agg.drifts$diffs) # corrgram(data.frame(merged.singeragg$maie, merged.singeragg$mape, merged.singeragg$diffs, as.numeric(merged.singeragg$sing.ability), as.numeric(merged.singeragg$musical.background), as.numeric(merged.singeragg$choir.experience))) # corrgram(data.frame(merged.agg$maie, merged.agg$mape, merged.agg$diffs, as.numeric(merged.agg$sing.ability), as.numeric(merged.agg$musical.background), as.numeric(merged.agg$choir.experience))) # pdf("../figures/corrgram.pdf", width = 5, height = 5) # corrgram(data.frame(MAIE=merged.agg$maie, ib=merged.agg$ib, MAPE=merged.agg$mape, D=merged.agg$diffs, singing.ability=as.numeric(merged.agg$sing.ability), musical.bg=as.numeric(merged.agg$musical.background), choir.exp=as.numeric(merged.agg$choir.experience)), lower.panel=panel.conf, upper.panel=panel.pie) # dev.off() # slope figure pdf("../figures/rise_by_notenumber_boxplot.pdf", width = 5.5, height = 4) par(mar=c(2,3.8,.1,.1)) plot(c(1,25), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n') abline(h=0) abline(h=-1, lty=2) abline(h=1, lty=2) plot(I(slope * duration * 44100/1024) ~ factor(noteid, levels=1:75), data=data, subset = !ispoor, pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, add=T, ylab="rise (semitones)", xlab="interval in semitones", cex.axis=0.8, lty=1, boxwex=0.6) # posi <- c(1:6, 8, 11, 13, 14, 18) # axis(1, at=posi, labels=posi-6, cex.axis=0.8) # text(countsByNoteID$noteid, rep(2,11), countsByNoteID$count, cex=0.7) mtext(c(countsByNoteID$count), at=c(countsByNoteID$noteid), cex=0.7) # text(1:25, rep(-1.5,11), c("(-5)", gtdata$interval_ST[-25]), cex=0.7) # text(2.7, -1.35 , "intervals in semitones", cex=0.7) text(1-.5, 1.3, "phrase 1", pos=4) abline(v=6.5, lty=1, col='gray') text(7-.5, 1.3, "phrase 2", pos=4) abline(v=12.5, lty=1, col='gray') text(13-.5, 1.3, "phrase 3", pos=4) abline(v=19.5, lty=1, col='gray') text(20-.5, 1.3, "phrase 4", pos=4) dev.off() # mu versus model MAPE pdf("../figures/memory_vs_model_mape.pdf", width = 4.3, height = 3) par(mar=c(4,4,.3,.5)) plot(mus, apply(md[!md$ispoor,(1:nmu)+8], 2, function(x) {mean(abs(x),na.rm=T)}), type='l', pch=20, xlab=expression(mu), ylab="mean absolute pitch error", ylim=c(0.18,0.30)) abline(h=mean(abs(data$pitcherror[!data$ispoor]), na.rm=T), lty=2) dev.off()