Mercurial > hg > dml-open-backendtools

# Part of DML (Digital Music Laboratory)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

# -*- coding: utf-8 -*-
__author__="Daniel Wolff"

import codecs
import warnings
import numpy
import csv
from n3Parser import uri2path


# reads csv file into a table,
# the first column, containing "time" is converted to float, the rest is left at strings
# data formats are for example:
# for silvet pitch output:['time','duration','pitch','velocity','label']
# for qm_vamp_key_standard output: ['time','keynr','label']
# for qm_vamp_key_standard_tonic output: ['time','keynr','label']
#
# data can be nicely traversed:
# for time, duration,pitch,velocity,label
def get_array_from_csv(input_f_file):

    output = []
    badcount = 0

    # keep track of column names
    ncols = 0
    with open(uri2path(input_f_file), 'rb') as csvfile:
        contents = csv.reader(csvfile, delimiter=',', quotechar='"')
        for row in contents:
            if ncols == 0:
                ncols = len(row)

	    if len(row) >= ncols:
                # we assume format time , ...
	        output.append([float(row[0])] + row[1:])
            else:
                badcount += 1

    if badcount > 0:
        warnings.warn("Incomplete csv file, ignoring " + str(badcount) + " entries")

    return output


# converts csv input to dictionary with entities named as in "columtype".
#
# first value (time) is assumed to be float
# for silvet pitch output call_
#  csv_to_dict(input_f_file, columtype = ['time','duration','pitch','velocity','label'])
# for qm_vamp_key_standard output call
#    csv_to_dict(input_f_file, columtype = ['time','keynr','label'])
# for qm_vamp_key_standard_tonic output call
#    csv_to_dict(input_f_file, columtype = ['time','keynr','label'])
def get_dict_from_csv(input_f_file, columtype = ['time']):

    output = []
    badcount = 0

    # keep track of column names
    ncols = 0
    with open(uri2path(input_f_file), 'rb') as csvfile:
        contents = csv.reader(csvfile, delimiter=',', quotechar='"')
        for row in contents:

            # initialise the column name
            if ncols == 0:
                ncols = len(row)

                # get number of descriptors, and append if left empty
                ncoldescr = len(columtype)
                if ncoldescr < ncols:
                    warnings.warn("Column types missing")
                    columtype.extend(['data'+str(i) for i in range(ncoldescr+1, ncols+1)])

	    if len(row) == ncols:
                # parse the csv data into dict
                rowdict = dict()
                for i,col in enumerate(columtype):
                    # first value (time) is transformed to float
                    if i == 0:
                        rowdict[col] = float(row[i])
                    else:
                        rowdict[col] = row[i]

                # append dictionary to output
                output.append(rowdict)

            else:
                badcount += 1

    if badcount > 0:
        warnings.warn("Incomplete csv file, ignoring " + str(badcount) + " entries")

    return output
author	Daniel Wolff
date	Sat, 20 Feb 2016 18:14:24 +0100
parents
children