view latex/scripts/annotator_comparison.R @ 4:3e666d0329b5 tip

updated code
author Jiajie Dai <daijiajie1@gmail.com>
date Sat, 06 Jan 2018 12:20:49 +0000
parents 6531169e6866
children
line wrap: on
line source
# This script is independent of the main data processing because it would
# have become too confusing (I reckon).
# What it is about:
# * all three authors have annotated segmentation on a subset of the recordings
# * this script 
#   * loads the pitch track for each note segment and annoation
#   * compares the results regarding pitch and rise

hz2pitch <- function(f)
{
    69 + 12 * log2(f / 440)
}

datadir <- '../data/filtered_pitchtracks/'

cat("\n____________________________\n")
cat("\n____ PRE-EVALUATION     ____\n")
cat("\n____________________________\n\n")

cat("\ncompare different annotators\n\n")

##############################################################################
# LOAD ANNOTATIONS
##############################################################################

cat("Getting annotations from three annotators ... ", file = stderr())

d <- read.table("../data/final_compare.csv", header=F, sep=",")
names(d) <- c("singer", "mode", "annotator", "onset", "duration", "run", "noteid")
d$singermode <- as.factor(sprintf("%s%s", d$singer, d$mode))
singer.mode <- unique(d[,1:3])
nRecording <- nrow(singer.mode)
n <- nrow(d)
cat(" done.\n\n", file = stderr())

##############################################################################
# EXTRACT PITCHES
##############################################################################

cat("Extracting pitches etc. from pitch tracks (patience!) ...", file = stderr())

d$median <- mat.or.vec(n,1) * NA
d$sd <- mat.or.vec(n,1) * NA
d$rise <- mat.or.vec(n,1) * NA
d$n <- mat.or.vec(n,1) * NA

count <- 0
cat("          ", file = stderr())

for (iRecording in 1:nRecording)
{
    count <- count + 1
    cat(sprintf("\b\b\b\b\b\b\b\b%2i of %2i", count, nRecording), file = stderr())
    yindata <- read.table(
        sprintf('%s/happy_birthday_%s_%s_vamp_yintony_yintony_f0.csv', datadir, singer.mode[iRecording,1], singer.mode[iRecording,2]), 
            header=F, sep=",")
    for (currannotator in c("kf", "mm", "sd"))
    {
        dataind <- which(d$singer == singer.mode[iRecording,1] & d$mode == singer.mode[iRecording,2] & currannotator == singer.mode[iRecording,3])
        nNote <- length(dataind)
        for (iNote in 1:nNote)
        {
            yinind <- yindata[,1] >= d$onset[dataind[iNote]]  & yindata[,1] <= (d$onset[dataind[iNote]] + d$duration[dataind[iNote]])
            curryindata <- hz2pitch(yindata[yinind,2])
            currmedian <- median(curryindata)
            d$median[dataind[iNote]] <- currmedian 
            around.median <- which(abs(curryindata-currmedian) < 3)
            currn <- length(around.median)
            if (currn > 0)
            {
                d$sd[dataind[iNote]] <- sd(curryindata[around.median])
                d$rise[dataind[iNote]] <- lm(curryindata[around.median]~around.median)$coefficients[2] * d$duration[dataind[iNote]]
                d$n[dataind[iNote]] <- sum(currn)
            }
        }
    }
}

mergedd <- merge(d[d$annotator=="mm",],d[d$annotator=="kf",], by = c("singer","mode", "run","noteid"), suffixes = c(".mm",".kf"))
mergedd <- merge(mergedd,d[d$annotator=="sd",], by = c("singer","mode", "run","noteid"), suffixes = c("","sd"))

sd.kf.test <- t.test(mergedd$median - mergedd$median.kf)
sd.mm.test <- t.test(mergedd$median - mergedd$median.mm)
mm.kf.test <- t.test(mergedd$median.mm - mergedd$median.kf)

significance.level <- 0.01
cat(sprintf("\nSignificance of differences of pitch estimates obtained from different \nnote segmentation annotators (t test p<%0.2f).\n\n", significance.level))
cat(sprintf("* Mean difference between sd, kf in semitones: %0.3f\n", sd.kf.test$estimate))
cat(sprintf("  Is significant: %s\n", sd.kf.test$p.value < significance.level))

cat(sprintf("* Mean difference between sd, mm in semitones: %0.3f\n", sd.mm.test$estimate))
cat(sprintf("  Is significant: %s\n", sd.mm.test$p.value < significance.level))

cat(sprintf("* Mean difference between mm, kf in semitones: %0.3f\n", mm.kf.test$estimate))
cat(sprintf("  Is significant: %s\n", mm.kf.test$p.value < significance.level))