view latex/scripts/results_and_figures.R @ 4:3e666d0329b5 tip

updated code
author Jiajie Dai <daijiajie1@gmail.com>
date Sat, 06 Jan 2018 12:20:49 +0000
parents 6531169e6866
children
line wrap: on
line source
cat("\n____________________________\n")
cat("\n____ MAIN PAPER RESULTS ____\n")
cat("\n____________________________\n\n")
cat("This script assumes that the script data_import.R has been run.\n")
cat("This script generates statistics and figures used in the JASA submission.\n\n")

##############################################################################
# METHOD SECTION OUTPUT
##############################################################################

cat("\n____ METHOD SECTION ____\n\n")
cat("- data for self-assessment table:\n")
print(with(meta[meta$instudy,], summary(data.frame(age, musical.background, sing.ability, sing.experience, choir.experience))))

cat("\n____ ACCURACY SECTION ____\n\n")

cat("- Interval error significance tests:\n\n")
significance.level <- 0.0001
res <- with(data[data$ispoor == F, ],
    t.test(intervalerror[ expectedinterval == 8 ]))
cat(        "  - Upward minor sixth is flat.\n")
cat(sprintf("    Is the difference significant at significance level %0.4f? %s (p = %g, df=%g, t = %g)\n\n", significance.level, res$p.value < significance.level, res$p.value, res$parameter, res$statistic))

res <- with(data[data$ispoor == F, ],
    t.test(intervalerror[ expectedinterval == 12 ]))
cat(        "  - Upward octave is flat.\n")
cat(sprintf("    Is the difference significant at significance level %0.4f? %s (p = %g, df=%g, t = %g)\n\n", significance.level, res$p.value < significance.level, res$p.value, res$parameter, res$statistic))

res <- with(data[data$ispoor == F, ],
    t.test(intervalerror[ expectedinterval == 0 ]))
cat(sprintf("  - Prime (0 interval) is sharp by %0.2f.\n", res$estimate))
cat(sprintf("    Is the difference significant at significance level %0.4f? %s (p = %g, df=%g, t = %g)\n\n", significance.level, res$p.value < significance.level, res$p.value, res$parameter, res$statistic))

cat("- explaining pitch error by following interval or beginning of phrase\n\n")
is.beginning.of.phrase <- I(data$noteid[!data$ispoor] %in% c(1,7,13,20))
res <- with(data[!data$ispoor, ],
    summary(lm(pitcherror ~ is.beginning.of.phrase + expectedfutureinterval)))
cat(sprintf("  - p: %0.3g, %0.3g\n",res$coefficients[3,4],res$coefficients[2,4]))
cat(sprintf("  - F = %g\n",res$fstatistic[1]))
cat(sprintf("  - DF: %0.0f\n",res$fstatistic[3]))
cat(sprintf("  - Variance explained: %0.1f%s\n",100*res$r.squared,"%"))
cat(sprintf("  - Sharpening in cents per note in interval: %0.1f\n\n",100*res$coefficients[3,1]))

res <- with(data[!data$ispoor, ],
    summary(lm(pitcherror ~ is.beginning.of.phrase)))
cat(sprintf("  - Variance explained only by intercept and following interval: %0.1f%s\n",100*res$r.squared,"%"))
cat(sprintf("  - p: %0.3g\n",res$coefficients[2,4]))
cat(sprintf("  - F = %g\n",res$fstatistic[1]))
cat(sprintf("  - DF: %0.0f\n\n",res$fstatistic[3]))
# cat(sprintf("  - Variance explained: %0.1f%s\n\n",100*res$r.squared,"%"))

res <- with(data[!data$ispoor, ],
    summary(lm(pitcherror ~ expectedfutureinterval)))
cat(sprintf("  - Variance explained only by intercept and beginning of phrase: %0.1f%s\n",100*res$r.squared,"%"))
cat(sprintf("  - p: %0.3g\n",res$coefficients[2,4]))
cat(sprintf("  - F = %g\n",res$fstatistic[1]))
cat(sprintf("  - DF: %0.0f\n\n",res$fstatistic[3]))
# cat(sprintf("  - Variance explained: %0.1f%s\n\n",100*res$r.squared,"%"))

et.ji.test <- with(data[!data$ispoor,], t.test(abs(pitcherror), abs(jipitcherror), paired=T))
etMAPE <- with(data[!data$ispoor,], mean(abs(pitcherror), na.rm=T))
jiMAPE <- with(data[!data$ispoor,], mean(abs(jipitcherror), na.rm=T))
cat("- Error difference between ET and JI error:\n")
cat("  Is it significant? Paired t-test says:")
cat(sprintf("%s (p = %g, df: %0.0f, t = %g)\n\n", et.ji.test$p.value < 0.01, et.ji.test$p.value, et.ji.test$parameter, et.ji.test$statistic), "\n")
cat(sprintf("ET (MAPE: %g) fits better than JI (MAPE: %g), by %g\n\n", etMAPE, jiMAPE, et.ji.test$estimate))

##############################################################################
# RESULTS SECTION OUTPUT
##############################################################################
cat("\n____ RESULTS SECTION ____\n\n")
cat("Basic metrics statistics.\n")
cat(sprintf("  - MAPE. Mean: %0.3f;  median: %0.3f;  sd: %0.3f\n", mean(agg.mape$mape), median(agg.mape$mape), sd(agg.mape$mape)))
cat(sprintf("  - MAIE. Mean: %0.3f;  median: %0.3f;  sd: %0.3f\n", mean(agg.maie$maie), median(agg.maie$maie), sd(agg.maie$maie)))
# cat(sprintf("  - IB. Mean: %0.3f;  median: %0.3f;  sd: %0.3f\n", mean(agg.ib$ib), median(agg.ib$ib), sd(agg.ib$ib)))
cat(sprintf("  - D. Mean: %0.3f;  median: %0.3f;  sd: %0.3f\n", mean(agg.drifts$diffs), median(agg.drifts$diffs), sd(agg.drifts$diffs)))
cat(sprintf("  - |D|. Mean: %0.3f;  median: %0.3f;  sd: %0.3f\n\n", mean(abs(agg.drifts$diffs)), median(abs(agg.drifts$diffs)), sd(abs(agg.drifts$diffs))))
cat(sprintf("  - lin D. in cents: Mean: %0.3f;  median: %0.3f;  sd: %0.3f\n", 100*mean(linear.drift$lineardrift), 100*median(linear.drift$lineardrift), 100*sd(linear.drift$lineardrift)))

cat(sprintf("- Significance of drift:"))
cat(sprintf("  - significant drifts: %i of %i (%0.0f%s)\n", sum(agg.drifts.p$p<0.01), length(agg.drifts.p$p), 100*mean(agg.drifts.p$p<0.01), "%"))
with(agg.drifts.p[agg.drifts$diffs>0,], cat(sprintf("  - number of upward drifts: %i, of which %i significantly so.\n", length(p), sum(p<0.01))))
with(agg.drifts.p[agg.drifts$diffs<0,], cat(sprintf("  - number of downward drifts: %i, of which %i significantly so.\n\n", length(p), sum(p<0.01))))

cat(sprintf("- Significance of linear drift"))
cat(sprintf("  - significant drifts: %i of %i (%0.0f%%) at conf. level 0.01, %i of %i (%0.0f%%) at conf. level 0.05 \n", sum(linear.drift$p.value<0.01), length(linear.drift$p.value), 100*mean(linear.drift$p.value<0.01), sum(linear.drift$p.value<0.05), length(linear.drift$p.value), 100*mean(linear.drift$p.value<0.05)))
with(linear.drift[linear.drift$lineardrift>0,], cat(sprintf("  - number of upward drifts: %i, of which %i significantly so at 0.01.\n", length(p.value), sum(p.value<0.01))))
with(linear.drift[linear.drift$lineardrift<0,], cat(sprintf("  - number of downward drifts: %i, of which %i significantly so at 0.01.\n\n", length(p.value), sum(p.value<0.01))))
with(linear.drift[linear.drift$lineardrift>0,], cat(sprintf("  - number of upward drifts: %i, of which %i significantly so at 0.05.\n", length(p.value), sum(p.value<0.05))))
with(linear.drift[linear.drift$lineardrift<0,], cat(sprintf("  - number of downward drifts: %i, of which %i significantly so at 0.05.\n\n", length(p.value), sum(p.value<0.05))))


cat("- Correlation table (LaTeX syntax)\n")
# cor.df <- data.frame(sg.abl=as.numeric(merged.agg$sing.ability), sg.exp=as.numeric(merged.agg$sing.experience), mus.bg=as.numeric(merged.agg$musical.background), ch.exp=as.numeric(merged.agg$choir.experience), MAIE=merged.agg$maie, MAPE=merged.agg$mape, IB=merged.agg$ib, D=merged.agg$diffs, abs.D=abs(merged.agg$diffs))
cor.df <- data.frame(sg.abl=as.numeric(merged.agg$sing.ability), sg.exp=as.numeric(merged.agg$sing.experience), mus.bg=as.numeric(merged.agg$musical.background), ch.exp=as.numeric(merged.agg$choir.experience), MAIE=merged.agg$maie, MAPE=merged.agg$mape, lin.D=merged.agg$lineardrift, abs.D=abs(merged.agg$diffs), D=merged.agg$diffs)
# cortable(data.frame(sg.abl=as.numeric(merged.agg$sing.ability), mus.bg=as.numeric(merged.agg$musical.background), ch.exp=as.numeric(merged.agg$choir.experience), MAIE=merged.agg$maie, ib=merged.agg$ib, MAPE=merged.agg$mape, D=merged.agg$diffs, abs.D=abs(merged.agg$diffs)))
cortable(cor.df)
cat("\n\n")

# testing singer predicting MAPE, MAIE, and drift
cat("- Variance explained by singer for different measures\n\n")
res <- summary(lm(mape~singer, data=agg.mape))
pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE)
df <- res$fstatistic[2]
cat(sprintf("  - MAPE: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n", res$r.squared*100, pval, df, res$fstatistic[1]))
res <- summary(lm(maie~singer, data=agg.maie))
pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE)
df <- res$fstatistic[2]
cat(sprintf("  - MAIE: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n", res$r.squared * 100, pval, df, res$fstatistic[1]))
# res <- summary(lm(ib~singer, data=agg.ib))
# pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE)
# cat(sprintf("  - IB: %0.2f%%, p-value: %0.5g\n", res$r.squared * 100, pval))
res <- summary(lm(lineardrift~singer, data=linear.drift))
pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE)
df <- res$fstatistic[2]
cat(sprintf("  - lin.D: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n", res$r.squared * 100, pval, df, res$fstatistic[1]))
res <- summary(lm(diffs~singer, data=agg.drifts))
pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE)
df <- res$fstatistic[2]
cat(sprintf("  - D: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n", res$r.squared * 100, pval, df, res$fstatistic[1]))
res <- summary(lm(abs(diffs)~singer, data=agg.drifts))
pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE)
df <- res$fstatistic[2]
cat(sprintf("  - |D|: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n\n", res$r.squared * 100, pval, df, res$fstatistic[1]))


# testing influence of condition
cat("- Testing influence of singing condition (Normal/Masked/Imagined) via Kruskal-Wallis tests (sign. level 0.01):\n\n")
res <- kruskal.test(mape ~ mode, data = agg.mape)
cat(sprintf("  - MAPE significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic))
res <- kruskal.test(maie ~ mode, data = agg.maie)
cat(sprintf("  - MAIE significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic))
# res <- kruskal.test(ib ~ mode, data = agg.ib)
# cat(sprintf("  - IB significant difference due to condition? %s (p-value: %0.5g)\n", res$p.value<0.01, res$p.value))
res <- kruskal.test(lineardrift ~ mode, data = linear.drift)
cat(sprintf("  - lin. D significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic))
res <- kruskal.test(diffs ~ mode, data = agg.drifts)
cat(sprintf("  - D significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic))
res <- kruskal.test(abs(diffs) ~ mode, data = agg.drifts)
cat(sprintf("  - |D| significant difference due to condition? %s (p-value: %0.5g, df = %g, chisquared = %g)\n", res$p.value<0.01, res$p.value, res$parameter, res$statistic))

# cat("- \"Flat\" singer: \n\n")
# "hl"
# with(agg.drifts.all, cat(sprintf("  - drift in normal condition: %0.2f semitones\n", diffs[singer=="hl" & mode=="normal"])))
# with(linear.drift.all, cat(sprintf("  - linear drift in normal condition: %0.2f cents per note\n", 100 * lineardrift[singer=="hl" & mode=="normal"])))
# with(agg.drifts.all, cat(sprintf("  - drift in masked condition: %0.2f semitones\n", diffs[singer=="hl" & mode=="masked"])))
# with(linear.drift.all, cat(sprintf("  - linear drift in masked condition: %0.2f cents per note\n", 100 * lineardrift[singer=="hl" & mode=="masked"])))
# with(agg.drifts.all, cat(sprintf("  - drift in imagined condition: %0.2f semitones\n", diffs[singer=="hl" & mode=="imagined"])))
# with(linear.drift.all, cat(sprintf("  - linear drift in imagined condition: %0.2f cents per note\n\n", 100 * lineardrift[singer=="hl" & mode=="imagined"])))

##############################################################################
# MODEL SECTION OUTPUT
##############################################################################
cat("\n____ MODEL SECTION ____\n\n")

# notevar <- with(mdr[!mdr$ispoor,], aggregate(I(p-s-tstart), list(notenumber=notenumber), function(x)var(x,na.rm=T))[,2])
# notevar2 <- with(data[!data$ispoor,], aggregate(pitcherror, list(notenumber=notenumber), function(x)var(x,na.rm=T))[,2])
# driftvar <- notevar-notevar2
# driftvar.fit <- lm(driftvar[7:75]~I(7:75))
# sqrt(driftvar.fit$coefficients[2]*50)

cat("- existence of 'spread'\n")
var.by.notenumber <- aggregate(data$relativetonic[!data$ispoor], list(data$notenumber[!data$ispoor]), function(x) {var(x, na.rm=T)})
var2.by.notenumber <- aggregate(data$pitcherror[!data$ispoor], list(data$notenumber[!data$ispoor]), function(x) {var(x, na.rm=T)})
counts.by.notenumber <- aggregate(data$relativetonic[!data$ispoor], list(data$notenumber[!data$ispoor]), function(x) {sum(!is.na(x))})
dist.by.notenumber <- aggregate(abs(data$relativetonic[!data$ispoor]), list(data$notenumber[!data$ispoor]), function(x) {mean(x, na.rm=T)})
fit1 <- lm(var.by.notenumber[,2]~var2.by.notenumber[,2], weights=counts.by.notenumber[,2])
fit2 <- lm(fit1$residuals~var.by.notenumber[(burnin+1):75,1], weights=counts.by.notenumber[(burnin+1):75,2])
summary(fit2)
res <- summary(fit2)
pval <- pf(res$fstatistic[1],res$fstatistic[2],res$fstatistic[3],lower.tail=FALSE)
cat(sprintf("  - notenumber: %0.2f%%, p-value: %0.5g, df = %g, F = %g\n\n", res$r.squared * 100, pval, res$fstatistic[1], res$fstatistic[3]))
plot(names(fit1$residuals), fit1$residuals)
abline(fit2)

intvar <- var(data$intervalerror[!data$ispoor], na.rm=T)
expected.D.var <- 50 * intvar
expected.D.sd <- expected.D.var^0.5
pitch.err.sd <- sd(data$pitcherror[!data$ispoor], na.rm=T)
cat(sprintf("- inadequacy of mu = 0 model:\n\n"))
cat(sprintf("  - interval (error) variance: %0.3f\n", intvar))
cat(sprintf("  - expected variance of D (interval variance summed over 50 notes): %0.3f\n", expected.D.var))
cat(sprintf("  - ... and the corresponding standard deviation: %0.3f\n\n", expected.D.sd))
cat(sprintf("  - In comparison the standard deviation of pitch erros is: %0.3f\n\n", pitch.err.sd))

model.mape <- apply(md[!md$ispoor,(1:nmu)+8], 2, function(x) {mean(abs(x),na.rm=T)})
model.mape.singerwise <- aggregate(md[!md$ispoor,(1:nmu)+8], list(md$singer[!md$ispoor]), function(x) {mean(abs(x),na.rm=T)})
model.mape.recordingwise <- aggregate(md[!md$ispoor,(1:nmu)+8], list(md$singer[!md$ispoor],md$mode[!md$ispoor]), function(x) {mean(abs(x),na.rm=T)})

recording.mus <- data.frame(singer = model.mape.recordingwise[,1], mu = mus[apply(model.mape.recordingwise[,-(1:2)], 1, which.min)])
# singer.mus <- data.frame(singer = model.mape.singerwise[,1], mu = mus[apply(model.mape.singerwise[,-1], 1, which.min)])
singer.mus <- aggregate(recording.mus[,2], list(recording.mus[,1]),mean)

cat(sprintf("- Better parameter for mu, firstly via 'model MAPE':\n\n"))
cat(sprintf("  - the smallest error of %0.2f is achieved with model '%s'\n", min(model.mape), names(which.min(model.mape))))
cat(sprintf("  - error at mu = 0: %0.2f\n", model.mape[1]))
cat(sprintf("  - error at mu = 1: %0.2f\n\n", model.mape[length(model.mape)]))

#-------------------------------
mdr <- merge(md, mr[,c(1,2,5)], by=c("singer", "mode"))
# plot(I(p-s-tstart)~factor(notenumber), data=mdr)

sig2 <- var(data$pitcherror[!data$ispoor], na.rm=T)
sig2int <- var(data$intervalerror[!data$ispoor], na.rm=T)
n <- 50
# vd <- var(drifts[!drifts$singer %in% poor.singers,4], na.rm=T)
# vd <- var(drifts$diffs[!drifts$singer %in% poor.singers], na.rm=T)
vd <- var(agg.drifts$diffs)
est.mu <- 1-sqrt(vd/(n*sig2))
  
cat("- Estimate according to drift variance formula:\n\n")
cat(sprintf("  - overall pitch error variance: %0.3f\n", sig2))
# cat(sprintf("  - overall interval error variance: %0.3f", sig2int))
cat(sprintf("  - observed variance over 50-note gap: %0.3f\n", vd))
cat(sprintf("  - estimated mu based on these: %0.3f\n\n", est.mu))

cat("- Stats for mu by singer:\n")
cat(sprintf("  - range: %0.3f to %0.3f\n", range(singer.mus[,2])[1], range(singer.mus[,2])[2]))
cat(sprintf("  - mean: %0.3f, median: %0.3f, sd: %0.3f\n\n", mean(singer.mus[,2]), median(singer.mus[,2]), sd(singer.mus[,2])))


# n <- 50
# mu <- 0.916
# sig2 <- var(data$pitcherror[ !data$ispoor ], na.rm=T)
# vv <- var(drifts$diffs[!drifts$singer %in% poor.singers], na.rm=T)
# vv <- var(drifts$diffs[!drifts$singer %in% poor.singers & drifts$noteid ==iNote], na.rm=T)

# n <- 50
# sig2 <- var(data$pitcherror[ !data$ispoor ], na.rm=T)
# vv <- var(agg.drifts$diffs)
# est.mu <- 1-sqrt(vv/(n*sig2))
# 
# vv <- var(agg.drifts$diffs)
# for (mu in c(0,0.8,0.9,est.mu,  0.95, 1))
# {
#     dvar <- (1-mu)^2 * n * sig2
#     cat(sprintf("Example: mu = %g, sigma^2 = %g, n = %g\n", mu, sig2, n))
#     cat(sprintf(" ... var = %g, sd = %g\n", dvar, sqrt(dvar)))
# }
# CHECK!!!!!

# singermodemu <- c()
# for (singer in singers[!singers %in% poor.singers])
# {
#     for (mode in c("normal","imagined","masked"))
#     {
#         sig2 <- var(data$pitcherror[drifts$singer==singer & drifts$mode==mode], na.rm=T)
#         # sig2int <- var(data$intervalerror[data$singermode==singermode], na.rm=T)
#         n <- 50
#         vd <- var(drifts$diffs[drifts$singer==singer & drifts$mode==mode], na.rm=T)
#         est.mu <- 1-sqrt(vd/(n*sig2))
#         # cat(sprintf("%s: %0.3f\n", singer, est.mu))
#         singermode <- sprintf("%s%s", singer, mode)
#         singermodemu <- rbind(singermodemu, data.frame(singermode=singermode, singer=singer, mode=mode, mu=est.mu))
#     }
# }
# hist(singermodemu$mu, col="gray")

# make drift figures
for (singermode in sort(unique(data$singermode)))
{
	pdf(sprintf("../figures/drift_figure_%s.pdf", singermode), width = 3.5, height = 3.5)
	makeDriftGraph(data, singermode)
	dev.off()
}

for (singermode in sort(unique(data$singermode)))
{
	pdf(sprintf("../figures/drift_figure_model0850_%s.pdf", singermode), width = 5.5, height = 4.5)
	with(data[data$singermode==singermode,], plot(notenumber, newmedian-semitone, pch="+", ylab="pitch", xlab="note"))
	with(md[md$singermode==singermode,], points(notenumber, p-s-err0850, pch=20, type='o'))
	dev.off()
}

countsByInterval <- with(data[!data$ispoor,], aggregate(data.frame(count=data$intervalerror), list(interval=data$expectedinterval), function(x){sum(!is.na(x))})) 

pdf("../figures/interval_error_by_interval_boxplot.pdf", width = 5.5, height = 4)
par(mar=c(2,4,1,.5))
plot(c(1,18), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n')
abline(h=0)
abline(h=-1, lty=2)
abline(h=1, lty=2)
plot(intervalerror ~ factor(expectedinterval, levels=-5:12), data=data, subset = !ispoor,
	pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, xaxt='n', yaxt='n', add=T,
	ylab="interval error (with respect to ET)", xlab="interval in semitones",
	cex.axis=0.8, lty=1)
posi <- c(1:6, 8, 11, 13, 14, 18)
axis(2, at=c(-1,0,1), cex.axis=0.8)
axis(1, at=posi, labels=posi-6, cex.axis=0.8)
# text(counts$interval + 6, rep(2,11), counts$count, cex=0.7)
mtext(c("n=", countsByInterval$count), at=c(-.1,countsByInterval$interval + 6), cex=0.7)
dev.off()

countsByNoteID <- with(data[!data$ispoor,], aggregate(data.frame(count=data$intervalerror), list(noteid=data$noteid), function(x){sum(!is.na(x))}))

pdf("../figures/interval_error_by_notenumber_boxplot.pdf", width = 7, height = 4)
par(mar=c(2,.5,1,1))	
plot(c(1,25), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n')
abline(h=0)
abline(h=-1, lty=2)
abline(h=1, lty=2)
plot(intervalerror ~ factor(noteid, levels=1:75), data=data, subset = !ispoor,
	pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, add=T,
	ylab="interval error (with respect to ET)", xlab="interval in semitones",
	cex.axis=0.8, lty=1, yaxt='n', boxwex=0.6)
# posi <- c(1:6, 8, 11, 13, 14, 18)
# axis(1, at=posi, labels=posi-6, cex.axis=0.8)
# text(countsByNoteID$noteid, rep(2,11), countsByNoteID$count, cex=0.7)
mtext(c(countsByNoteID$count), at=c(countsByNoteID$noteid), cex=0.7)
text(1:25, rep(-1.5,11), c("(-5)", gtdata$interval_ST[-25]), cex=0.7)
text(2.7, -1.35 , "intervals in semitones", cex=0.7)
text(1-.5, 1.3, "phrase 1", pos=4)
abline(v=6.5, lty=1, col='gray')
text(7-.5, 1.3, "phrase 2", pos=4)
abline(v=12.5, lty=1, col='gray')
text(13-.5, 1.3, "phrase 3", pos=4)
abline(v=19.5, lty=1, col='gray')
text(20-.5, 1.3, "phrase 4", pos=4)
dev.off()

pdf("../figures/pitch_error_by_interval_boxplot.pdf", width = 5.5, height = 4)
par(mar=c(2,4,1,.5))
plot(c(1,18), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n')
abline(h=0)
abline(h=-1, lty=2)
abline(h=1, lty=2)
plot(pitcherror ~ factor(expectedinterval, levels=-5:12), data=data, subset = !ispoor,
	pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, xaxt='n', yaxt='n', add=T,
	ylab="pitch error (with respect to ET)", xlab="interval in semitones",
	cex.axis=0.8, lty=1)
posi <- c(1:6, 8, 11, 13, 14, 18)
axis(2, at=c(-1,0,1), cex.axis=0.8)
axis(1, at=posi, labels=posi-6, cex.axis=0.8)
# text(counts$interval + 6, rep(2,11), counts$count, cex=0.7)
# mtext(c("n=", countsByInterval$count), at=c(-.1,countsByInterval$interval + 6), cex=0.7)
dev.off()

countsBySemitone <- with(data[!data$ispoor,], aggregate(data.frame(count=data$pitcherror), list(interval=data$semitone), function(x){sum(!is.na(x))})) 
pdf("../figures/pitch_error_by_semitone_boxplot.pdf", width = 4.5, height = 4)
par(mar=c(2,4,1,.5))
plot(c(.5,13.5), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n')
abline(h=0)
abline(h=-1, lty=2)
abline(h=1, lty=2)
plot(pitcherror ~ factor(semitone, levels=-5:7), data=data, subset = !ispoor,
	pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, xaxt='n', yaxt='n', add=T,
	ylab="pitch error (with respect to ET)", xlab="interval in semitones",
	cex.axis=0.8, lty=1)
posi <- c(1, 3, 5, 6, 8, 10, 11, 13)
axis(2, at=c(-1,0,1), cex.axis=0.8)
axis(1, at=posi, labels=posi-6, cex.axis=0.8)
# text(counts$interval + 6, rep(2,11), counts$count, cex=0.7)
# mtext(c("n=", countsByInterval$count), at=c(-.1,countsByInterval$interval + 6), cex=0.7)
dev.off()

pdf("../figures/pitch_error_by_notenumber_boxplot.pdf", width = 7, height = 4)
par(mar=c(2,.5,1,1))	
plot(c(1,25), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n')
abline(h=0)
abline(h=-1, lty=2)
abline(h=1, lty=2)
plot(pitcherror ~ factor(noteid, levels=1:75), data=data, subset = !ispoor,
	pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, add=T,
	ylab="pitch error (with respect to ET)", xlab="interval in semitones",
	cex.axis=0.8, lty=1, yaxt='n', boxwex=0.6)
# posi <- c(1:6, 8, 11, 13, 14, 18)
# axis(1, at=posi, labels=posi-6, cex.axis=0.8)
# text(countsByNoteID$noteid, rep(2,11), countsByNoteID$count, cex=0.7)
# mtext(c(countsByNoteID$count), at=c(countsByNoteID$noteid), cex=0.7)
text(1:25, rep(-1.5,11), gtdata$pitch_ion_cents_et/100, cex=0.7)
text(2.7, -1.35 , "pitch relative to tonic", cex=0.7)
text(1-.5, 1.3, "phrase 1", pos=4)
abline(v=6.5, lty=1, col='gray')
text(7-.5, 1.3, "phrase 2", pos=4)
abline(v=12.5, lty=1, col='gray')
text(13-.5, 1.3, "phrase 3", pos=4)
abline(v=19.5, lty=1, col='gray')
text(20-.5, 1.3, "phrase 4", pos=4)
dev.off()

# linear fit example plot
pdf("../figures/pitch_error_example.pdf", width = 6, height = 5)
par(mar=c(4,4,.1,.1))
ind <- which(data$singer == "jp" & data$mode == "imagined" & data$run == 1)
plot(data$newmedian[ind], pch = 5,
	xlab="note number", ylab="pitch",
	xlim = c(0,25), las = 1)
points((data$newmedian-data$semitone)[ind], pch=18)
fit <- lm(I(newmedian-semitone)~notenumber, data = data[ind,])
for (ii in 1:25)
{
	lines(c(ii,ii), c(fit$fitted.values[ii], (data$newmedian-data$semitone)[ind[ii]]))
}
abline(a=fit$coefficients[1], b=fit$coefficients[2])
text(0, fit$coefficients[1]+0.3, 0, cex = 0.7)
for (offs in 1:8)
{
	abline(a=fit$coefficients[1]+offs, b=fit$coefficients[2], lty = 2)
	abline(a=fit$coefficients[1]-offs, b=fit$coefficients[2], lty = 2)
	text(0, fit$coefficients[1]+offs+0.3, +offs, cex = 0.7)
	text(0, fit$coefficients[1]-offs+0.3, -offs, cex = 0.7)
}
dev.off()

# drift histogram
pdf("../figures/drift_histogram.pdf", width = 6, height = 4)
par(mar=c(4,4,.3,.5))
hist(agg.drifts$diffs, seq(-1.5,1.5,by=0.2), col="dark gray", border="black", 
	xlab = "drift", ylab = "number of recordings", main = "")
dev.off()

# mean absolute pitch error histogram
pdf("../figures/mape_histogram.pdf", width = 3.3, height = 4)
par(mar=c(4,4,.3,.5))
hist(agg.mape$mape, seq(0,0.4,by=0.05), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24))
abline(h=5, col = "gray")
abline(h=10, col = "gray")
abline(h=15, col = "gray")
abline(h=20, col = "gray")
hist(agg.mape$mape, seq(0,0.4,by=0.05), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24), add = T)
dev.off()

# mean absolute interval error histogram
pdf("../figures/maie_histogram.pdf", width = 3.3, height = 4)
par(mar=c(4,1,.3,.5))
hist(agg.maie$maie, seq(0,.5,by=0.05), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n")
abline(h=5, col = "gray")
abline(h=10, col = "gray")
abline(h=15, col = "gray")
abline(h=20, col = "gray")
hist(agg.maie$maie, seq(0,.5,by=0.05), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n", add = T)
dev.off()
mean(agg.maie$maie)
median(agg.maie$maie)
sd(agg.maie$maie)

# MAR
pdf("../figures/mar_histogram.pdf", width = 3.3, height = 4)
par(mar=c(4,1,.3,.5))
hist(agg.rise$rise, seq(0,.5,by=0.05), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n")
abline(h=5, col = "gray")
abline(h=10, col = "gray")
abline(h=15, col = "gray")
abline(h=20, col = "gray")
hist(agg.rise$rise, seq(0,.5,by=0.05), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n", add = T)
dev.off()
mean(agg.rise$rise)
median(agg.rise$rise)
sd(agg.rise$rise)

# drift
pdf("../figures/d_histogram.pdf", width = 3.3, height = 4)
par(mar=c(4,1,.3,.5))
hist(agg.drifts$diffs, seq(-.6,.6,by=0.1), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n")
abline(h=5, col = "gray")
abline(h=10, col = "gray")
abline(h=15, col = "gray")
abline(h=20, col = "gray")
hist(agg.drifts$diffs, seq(-.6,.6,by=0.1), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n", add = T)
dev.off()

# absolute drift
pdf("../figures/abs_d_histogram.pdf", width = 3.3, height = 4)
par(mar=c(4,1,.3,.5))
hist(abs(agg.drifts$diffs), seq(0,.6,by=0.05), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n")
abline(h=5, col = "gray")
abline(h=10, col = "gray")
abline(h=15, col = "gray")
abline(h=20, col = "gray")
hist(abs(agg.drifts$diffs), seq(0,.6,by=0.05), col="dark gray", border="black", 
	xlab = "semitones", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n", add = T)
dev.off()

# linear drift
pdf("../figures/lineard_histogram.pdf", width = 3.3, height = 4)
par(mar=c(4,1,.3,.5))
hist(100*linear.drift$lineardrift, seq(-1.2,1.2,by=0.2), col="dark gray", border="black", 
	xlab = "cents", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n")
abline(h=5, col = "gray")
abline(h=10, col = "gray")
abline(h=15, col = "gray")
abline(h=20, col = "gray")
hist(100*linear.drift$lineardrift, seq(-1.2,1.2,by=0.2), col="dark gray", border="black", 
	xlab = "cents", ylab = "number of recordings", main = "",
	ylim=c(0,24), yaxt="n", add = T)
dev.off()

# mu
pdf("../figures/mu_histogram.pdf", width = 4.3, height = 3)
par(mar=c(4,4,.3,.5))
hist(singer.mus[,2], seq(0.6, 1.0, 0.4/5), col="dark gray", border="black", 
	xlab = expression(mu), ylab = "number of singers", main = "",
	add = F)
dev.off()



# drift significance plot
pdf("../figures/drift_significance_plot.pdf", width = 6, height = 3.5)
plot(agg.drifts$diffs, agg.drifts.p$p, log="y", col = rgb(0,0,0,0.4), 
	pch=19, yaxt = "n", ylab = "p-value                  ", 
	xlab = "drift magnitude in semitones",
	xlim=c(-1,1))
axis(2, at = 10^(-(1:4)), label = c("0.1","0.01","0.001","0.0001"),las=1)
abline(h=0.01, lty = 2)
abline(v=0.0)
text(0.75,0.0000005,"significant\nupward\ndrift")
text(-0.75,0.0000005,"significant\ndownward\ndrift")
dev.off()

# drift significance plot
pdf("../figures/lineardrift_significance_plot.pdf", width = 6, height = 4.8)
plot(linear.drift$lineardrift, linear.drift$p.value, log="y", col = rgb(0,0,0,0.4), 
	pch=19, yaxt = "n", ylab = "p-value                  ", 
	xlab = "linear drift per note in semitones")
axis(2, at = 10^(-(1:4)), label = c("0.1","0.01","0.001","0.0001"),las=1)
abline(h=0.01, lty = 2)
abline(v=0.0)
text(0.0035,0.0000005,"significant\nupward\ndrift")
text(-0.0035,0.0000005,"significant\ndownward\ndrift")
dev.off()


# how many are significantly adrift?
# sum(agg.drifts.p$p<0.01)
# mean(agg.drifts.p$p<0.01)

# drift vs mape
pdf("../figures/drift_vs_mape.pdf", width = 5.5, height = 4)
par(mar=c(4,4,.3,.5))
outl <- agg.mape$mape>0.35
fit1 <- lm(agg.mape$mape~agg.drifts$diffs)
fit2 <- lm(agg.mape$mape~agg.drifts$diffs, subset=!outl)
plot(agg.drifts$diffs, agg.mape$mape, type="n", 
	xlab="drift magnitude in semitones",
	ylab="mean absolute pitch error")
points(agg.drifts$diffs[outl], agg.mape$mape[outl], pch=19)
points(agg.drifts$diffs[!outl], agg.mape$mape[!outl], pch=21)
abline(fit1, lty=2)
abline(fit2)
dev.off()

# drift vs maie
pdf("../figures/drift_vs_maie.pdf", width = 5.5, height = 4)
par(mar=c(4,4,.3,.5))
outl <- agg.mape$mape>0.35
fit1 <- lm(agg.maie$maie~agg.drifts$diffs)
fit2 <- lm(agg.maie$maie~agg.drifts$diffs, subset=!outl)
plot(agg.drifts$diffs, agg.maie$maie, type="n", 
	xlab="drift magnitude in semitones",
	ylab="mean absolute pitch error")
points(agg.drifts$diffs[outl], agg.maie$maie[outl], pch=19)
points(agg.drifts$diffs[!outl], agg.maie$maie[!outl], pch=21)
abline(fit1, lty=2)
abline(fit2)
dev.off()

# drift vs IB
pdf("../figures/drift_vs_ib.pdf", width = 5.5, height = 4)
par(mar=c(4,4,.3,.5))
fit1 <- lm(agg.ib$ib~agg.drifts$diffs)
plot(agg.drifts$diffs, agg.ib$ib, type="n", 
	xlab="drift magnitude in semitones",
	ylab="mean interval error")
points(agg.drifts$diffs, agg.ib$ib, pch=21)
abline(fit1, lty=2)
dev.off()


# cor(merged.singeragg$maie, merged.singeragg$mape)

pdf("../figures/mape_maie_drift.pdf", width = 5, height = 5)
# par(mar=c(1,1,1,1))
plot(data.frame(MAPE=agg.mape$mape, MAIE=agg.maie$maie, Drift=agg.drifts$diffs))
dev.off()
# and their correlations
cor.test(agg.mape$mape, agg.maie$maie)
cor.test(agg.mape$mape, agg.drifts$diffs)
cor.test(agg.maie$maie, agg.drifts$diffs)

# corrgram(data.frame(merged.singeragg$maie, merged.singeragg$mape, merged.singeragg$diffs, as.numeric(merged.singeragg$sing.ability), as.numeric(merged.singeragg$musical.background), as.numeric(merged.singeragg$choir.experience)))
# corrgram(data.frame(merged.agg$maie, merged.agg$mape, merged.agg$diffs, as.numeric(merged.agg$sing.ability), as.numeric(merged.agg$musical.background), as.numeric(merged.agg$choir.experience)))

# pdf("../figures/corrgram.pdf", width = 5, height = 5)
# corrgram(data.frame(MAIE=merged.agg$maie, ib=merged.agg$ib, MAPE=merged.agg$mape, D=merged.agg$diffs, singing.ability=as.numeric(merged.agg$sing.ability), musical.bg=as.numeric(merged.agg$musical.background), choir.exp=as.numeric(merged.agg$choir.experience)), lower.panel=panel.conf, upper.panel=panel.pie)
# dev.off()

# slope figure
pdf("../figures/rise_by_notenumber_boxplot.pdf", width = 5.5, height = 4)
par(mar=c(2,3.8,.1,.1))	
plot(c(1,25), c(-1.6,1.6), type='n', xaxt='n', xlab = '', ylab='', yaxt='n')
abline(h=0)
abline(h=-1, lty=2)
abline(h=1, lty=2)
plot(I(slope * duration * 44100/1024) ~ factor(noteid, levels=1:75), data=data, subset = !ispoor,
	pch=20, col=rgb(0.9,0.9,0.9), ylim = c(-2,2), outline=F, add=T,
	ylab="rise (semitones)", xlab="interval in semitones",
	cex.axis=0.8, lty=1, boxwex=0.6)
# posi <- c(1:6, 8, 11, 13, 14, 18)
# axis(1, at=posi, labels=posi-6, cex.axis=0.8)
# text(countsByNoteID$noteid, rep(2,11), countsByNoteID$count, cex=0.7)
mtext(c(countsByNoteID$count), at=c(countsByNoteID$noteid), cex=0.7)
# text(1:25, rep(-1.5,11), c("(-5)", gtdata$interval_ST[-25]), cex=0.7)
# text(2.7, -1.35 , "intervals in semitones", cex=0.7)
text(1-.5, 1.3, "phrase 1", pos=4)
abline(v=6.5, lty=1, col='gray')
text(7-.5, 1.3, "phrase 2", pos=4)
abline(v=12.5, lty=1, col='gray')
text(13-.5, 1.3, "phrase 3", pos=4)
abline(v=19.5, lty=1, col='gray')
text(20-.5, 1.3, "phrase 4", pos=4)
dev.off()

# mu versus model MAPE
pdf("../figures/memory_vs_model_mape.pdf", width = 4.3, height = 3)
par(mar=c(4,4,.3,.5))
plot(mus,
     apply(md[!md$ispoor,(1:nmu)+8], 2, function(x) {mean(abs(x),na.rm=T)}), 
     type='l', 
     pch=20,
     xlab=expression(mu),
     ylab="mean absolute pitch error",
     ylim=c(0.18,0.30))
abline(h=mean(abs(data$pitcherror[!data$ispoor]), na.rm=T), lty=2)
dev.off()