view src/org/qmul/eecs/c4dm/sia/midi/MidiParser.java @ 83:4ef262740ceb

can be run with a specified maximum number of Datapoints and for a specified midiFileName
author stevenh
date Fri, 30 Aug 2013 16:12:28 +0100
parents 39106212a3c6
children
line wrap: on
line source
package org.qmul.eecs.c4dm.sia.midi;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Vector;

import javax.sound.midi.InvalidMidiDataException;
import javax.sound.midi.MetaMessage;
import javax.sound.midi.MidiEvent;
import javax.sound.midi.MidiMessage;
import javax.sound.midi.MidiSystem;
import javax.sound.midi.Sequence;
import javax.sound.midi.ShortMessage;
import javax.sound.midi.SysexMessage;
import javax.sound.midi.Track;

import org.qmul.eecs.c4dm.sia.SiaMain;
import org.qmul.eecs.c4dm.sia.model.Datapoint;
import org.qmul.eecs.c4dm.sia.model.DimensionValue;
import org.qmul.eecs.c4dm.sia.model.MemberOfDataset;
import org.qmul.eecs.c4dm.sia.model.SiaDataset;
import org.qmul.eecs.c4dm.sia.model.SiaVector;
import org.qmul.eecs.c4dm.sia.rdf.Namespaces;

import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.Triple;
import com.hp.hpl.jena.ontology.OntClass;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ReadWrite;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.AnonId;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.sparql.core.Var;
import com.hp.hpl.jena.sparql.syntax.ElementGroup;
import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock;
import com.hp.hpl.jena.tdb.TDBFactory;
import com.hp.hpl.jena.vocabulary.RDF;
import com.sun.media.sound.MidiUtils;

public class MidiParser {
	
	public static final int TIME_DIMENSION = 1;
	public static final int PITCH_DIMENSION = 2;
	public static final int CHANNEL_DIMENSION = 3;

	private static Property siaDimValProperty;
	private static Property siaDimensionProperty;
	private static Property siaValueProperty;
	private static Property siaMemberOfDatasetProperty;
	private static OntClass datapointClass;
	private static OntClass datasetClass;
	private static OntClass siaVectorClass;
	private static Property siaVectorProperty;
	
	// The ontology loaded as dataset
	private static final String ontology = "file:src/rdf/siaDatapointOntology.n3";

	// The final output file
	private static final String finalModelFileName = "src/rdf/midiModel";

	// The input midi file
	private static final String midiFileName = "/Volumes/USB_DISK/portable/ewerts/Cantata_16_no_5-mids/score.mid";

	public static void main(String[] args)
	{
		MidiParser midiParser = new MidiParser();
		midiParser.midiToRdf(6, midiFileName);
	}
	
	public void midiToRdf(int maxDatapoints, String midiFileName)
	{
		int numDatapoints = 0;
		
		// First create a Jena ontology model
		OntModel model = ModelFactory
				.createOntologyModel(); // OntModelSpec.OWL_MEM

		// Then read the data from the file into the ontology model
		model.read(ontology, "N3");
		
		datapointClass = model.getOntClass(Datapoint.RESOURCE_URI);
		datasetClass = model.getOntClass(SiaDataset.RESOURCE_URI);
		siaVectorClass = model.getOntClass(SiaVector.RESOURCE_URI);
		Resource datapointResource = model.getOntResource(datapointClass);
		Resource datasetResource = model.getOntResource(datasetClass);
		siaDimValProperty = model.createProperty(DimensionValue.PROPERTY_URI);
		siaDimensionProperty = model.createProperty(DimensionValue.DIMENSION_URI);
		siaValueProperty = model.createProperty(DimensionValue.VALUE_URI);
		siaMemberOfDatasetProperty = model.createProperty(MemberOfDataset.PROPERTY_URI);
		siaVectorProperty = model.createProperty(SiaVector.PROPERTY_URI);
		
		File midiFile1 = new File(midiFileName);
	
		Sequence sequence = null;
		try {
			sequence = MidiSystem.getSequence(midiFile1);
		} catch (InvalidMidiDataException e) {
			e.printStackTrace();
			System.exit(1);
		} catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		}
		
		MidiUtils.TempoCache tempoCache = new MidiUtils.TempoCache(sequence);

		Track[] tracks = sequence.getTracks();
		int numTracks = tracks.length;
		float divisionType = sequence.getDivisionType();
	    List<Datapoint> datapoints = new ArrayList<Datapoint>();
		
		HashMap<Integer, Integer> trackDimensionMap = getTrackToDimensionIndexMap(sequence);
		
		if (trackDimensionMap.isEmpty())
		{
			try {
				throw new Exception("Couldn't find any track to dimension mappings");
			} catch (Exception e) {
				e.printStackTrace();
				System.exit(1);
			}
		}
		
		// Add a dataset, represented by a blank node, to the model
		Resource datasetBnode = model.createResource(AnonId.create());
		model.add(datasetBnode, RDF.type, datasetResource);
		
		long maxTick = 0;
				
		for (int trackIdx = 0; trackIdx < numTracks; trackIdx++)
		{
			System.out.println("Track " + trackIdx + ":");
			int numEvents = tracks[trackIdx].size();
			for (int eventIdx = 0; eventIdx < numEvents; eventIdx++)
			{
				MidiEvent event = tracks[trackIdx].get(eventIdx);
				long tick = event.getTick();
				MidiMessage midiMessage = event.getMessage();
				
				int midiMessageLength = midiMessage.getLength();
				byte[] messageBytes = midiMessage.getMessage();
				int status = midiMessage.getStatus();
				System.out.println("tick = " + tick + " midiMessageLength = " + midiMessageLength + " status byte = " + status);

				int s = status & 0x80;
				if (s == 0x80)
				{
					System.out.println("STATUS MESSAGE (s = " + s + ")");
				}
				else
				{
					System.out.println("not a status message (s = " + s + ")");
				}
					
				// Determine the type of this message (short, sysex or meta)
				if (midiMessage instanceof ShortMessage)
				{
					System.out.print("ShortMessage ");
										
					// Determine which command is being issued
					ShortMessage shortMessage = (ShortMessage)midiMessage;
					int messageCommand = shortMessage.getCommand();
					int channel = shortMessage.getChannel();
					int data1 = shortMessage.getData1();
					int data2 = shortMessage.getData2();

					if (messageCommand == ShortMessage.ACTIVE_SENSING)
					{
						System.out.print("ignoring ACTIVE_SENSING");
					}
					else if (messageCommand == ShortMessage.CHANNEL_PRESSURE)
					{
						System.out.print("ignoring CHANNEL_PRESSURE");
					}
					else if (messageCommand == ShortMessage.CONTINUE)
					{
						System.out.print("ignoring CONTINUE");
					}
					else if (messageCommand == ShortMessage.CONTROL_CHANGE)
					{
						System.out.print("ignoring CONTROL_CHANGE");
					}
					else if (messageCommand == ShortMessage.END_OF_EXCLUSIVE)
					{
						System.out.print("ignoring END_OF_EXCLUSIVE");
					}
					else if (messageCommand == ShortMessage.MIDI_TIME_CODE)
					{
						System.out.print("ignoring MIDI_TIME_CODE");
					}
					else if (messageCommand == ShortMessage.NOTE_OFF)
					{
						System.out.println("NOTE_OFF");
						long microsecs = MidiUtils.tick2microsecond(sequence, tick, tempoCache);
						System.out.println(" microsecs = " + microsecs);
					}
					else if (messageCommand == ShortMessage.NOTE_ON)
					{
						if (tick > maxTick)
						{
							maxTick = tick;
						}
						System.out.println("NOTE_ON");
						double microsecs = MidiUtils.tick2microsecond(sequence, tick, tempoCache);
						double secs = microsecs/1000000;
						System.out.println(" microsecs = " + secs);
						
						DimensionValue timeDimVal = new DimensionValue();
						timeDimVal.setDimension(TIME_DIMENSION);
						timeDimVal.setValue(secs);
						
						DimensionValue pitchDimVal = new DimensionValue();
						pitchDimVal.setDimension(PITCH_DIMENSION);
						pitchDimVal.setValue(data1);
						
						DimensionValue channelDimVal = new DimensionValue();
						channelDimVal.setDimension(CHANNEL_DIMENSION);
						channelDimVal.setValue(trackDimensionMap.get(trackIdx));

						Datapoint datapoint = new Datapoint();
						Vector<DimensionValue> dimVals = new Vector<DimensionValue>();
						dimVals.add(timeDimVal);
						dimVals.add(pitchDimVal);
						dimVals.add(channelDimVal);
						
						datapoint.setDimensionValues(dimVals);
						
						if (numDatapoints < maxDatapoints)
						{
							datapoints.add(datapoint);
							numDatapoints++;
						
							// RDF							
							Resource datapointBnode = model.createResource(AnonId.create());
							model.add(datapointBnode, RDF.type, datapointResource);
	
							// Find or create a DimVal for the TIME dimension
							Resource timeDimValBnode = findOrCreateDimValBNode(model, TIME_DIMENSION, secs);
							
							// Find or create a DimVal for the PITCH dimension
							Resource pitchDimValBnode = findOrCreateDimValBNode(model, PITCH_DIMENSION, data1);
							
							// Find or create a DimVal for the CHANNEL dimension
							Resource channelDimValBnode = findOrCreateDimValBNode(model, CHANNEL_DIMENSION, trackDimensionMap.get(trackIdx));
							
							// Find or create a Vector for these three DimVals
							Resource vectorBnode = findOrCreateVectorBNode(model, timeDimValBnode, secs,
									pitchDimValBnode, data1, channelDimValBnode, trackDimensionMap.get(trackIdx));
													
							model.add(datapointBnode, siaVectorProperty, vectorBnode);
							model.add(datapointBnode, siaMemberOfDatasetProperty, datasetBnode);
						}

					}
					else if (messageCommand == ShortMessage.PITCH_BEND)
					{
						System.out.print("ignoring PITCH_BEND");
					}
					else if (messageCommand == ShortMessage.POLY_PRESSURE)
					{
						System.out.print("ignoring POLY_PRESSURE");
					}
					else if (messageCommand == ShortMessage.PROGRAM_CHANGE)
					{
						System.out.print("ignoring PROGRAM_CHANGE");
					}
					else if (messageCommand == ShortMessage.SONG_POSITION_POINTER)
					{
						System.out.print("ignoring SONG_POSITION_POINTER");
					}
					else
					{
						System.out.print("unrecognised midi message command (" + messageCommand + ")");
					}
					System.out.print(", channel " + channel + ", data1 = [" + data1 + "], data2 = [" + data2 + "]");
					System.out.println();
				}
				else if (midiMessage instanceof MetaMessage)
				{
					System.out.println("MetaMessage");
					
					MetaMessage metaMessage = (MetaMessage)midiMessage;
					byte[] metaMessageData = metaMessage.getData();
					int metaMessageLength = metaMessage.getLength();
					int metaMessageType = metaMessage.getType();
					System.out.println("metaMessageType = " + metaMessageType + ", metaMessageLength = " + metaMessageLength);
					
					// Determine message type
					if (metaMessageType == 81)
					{
						if (divisionType == Sequence.PPQ)
						{
							// Do nothing - we've dealt with PPQ tempo data elsewhere
						}
						else
						{
							try {
								throw new Exception("Not yet implemented SMPTE tempo metadata");
							} catch (Exception e) {
								e.printStackTrace();
								System.exit(1);
							}
						}
					}

					for (int dataIdx = 0; dataIdx < metaMessageData.length; dataIdx++)
					{
						System.out.println("\tmetaMessageData[" + dataIdx + "] = " + (metaMessageType == 81 ? metaMessageData[dataIdx] : (char)metaMessageData[dataIdx]));
					}

				}
				else if (midiMessage instanceof SysexMessage)
				{
					// We can safely ignore these messages
					System.out.println("ignoring SysexMessage");
				}
				else
				{
					System.out.println("Unknown MidiMessage type (" + midiMessage.getClass().toString() + ")");
				}
				
				for (int byteIdx = 0; byteIdx < midiMessageLength; byteIdx++)
				{
					byte messageByte = messageBytes[byteIdx];
					System.out.println("\tbyte[" + byteIdx + "] = " + messageByte);
				}
			}
		}

		System.out.println("done");
	    	    
		// Print out what we've got now
		System.out.println("------------------");
		StmtIterator stmtIterator = model.listStatements();
		printStmts(stmtIterator);

		// TODO write rdf to file
		File outFileRdf = new File(finalModelFileName + ".rdf");
		File outFileN3 = new File(finalModelFileName + ".n3");
		FileOutputStream outFileOutputStreamRdf = null;
		FileOutputStream outFileOutputStreamN3 = null;

		// RDF/XML version
		try {
			outFileOutputStreamRdf = new FileOutputStream(outFileRdf);
			model.writeAll(outFileOutputStreamRdf, "RDF/XML", null);
		} catch (FileNotFoundException e) {
			System.out.println("Unable to write to file: "
					+ outFileRdf.getAbsolutePath());
			e.printStackTrace();
			System.exit(1);
		}

		try {
			outFileOutputStreamRdf.close();
		} catch (IOException e1) {
			e1.printStackTrace();
			System.exit(1);
		}

		// N3 version
		try {
			outFileOutputStreamN3 = new FileOutputStream(outFileN3);
			model.writeAll(outFileOutputStreamN3, "N3", null);
		} catch (FileNotFoundException e) {
			System.out.println("Unable to write to file: "
					+ outFileN3.getAbsolutePath());
			e.printStackTrace();
			System.exit(1);
		}

		try {
			outFileOutputStreamN3.close();
		} catch (IOException e1) {
			e1.printStackTrace();
			System.exit(1);
		}

		System.out.println("Model written to files: "
				+ outFileRdf.getAbsolutePath() + " and " + outFileN3.getAbsolutePath());
		
		// Obtain a dataset context
		Dataset dataset = TDBFactory.assembleDataset(SiaMain.assemblerFile);
        dataset.begin(ReadWrite.WRITE) ;
        try {
        	dataset.replaceNamedModel(SiaMain.graph, model);
        	dataset.commit();
        	System.out.println("dataset.commit() done");
        } finally {
        	dataset.end();
        	System.out.println("dataset.end() done");
        }
		dataset.close();
    	System.out.println("dataset.close() done");

    	System.out.println("max tick: " + maxTick);
		System.out.println("Number of Datapoints (n) = " + datapoints.size());
		
	}

	private static Resource findOrCreateVectorBNode(OntModel model, Resource timeDimValBnode,
			double timeVal, Resource pitchDimValBnode, double pitchVal, Resource channelDimValBnode,
			double channelVal) {

		Resource bnode;

		String vectorVarStr = "vector";
		String dimVal1VarStr = "dimVal1";
		String dimVal2VarStr = "dimVal2";
		String dimVal3VarStr = "dimVal3";
		
		Var vectorVar = Var.alloc(vectorVarStr);
		Var dimVal1Var = Var.alloc(dimVal1VarStr);
		Var dimVal2Var = Var.alloc(dimVal2VarStr);
		Var dimVal3Var = Var.alloc(dimVal3VarStr);

		Node timeDimensionLiteralNode = Node.createUncachedLiteral(TIME_DIMENSION, XSDDatatype.XSDinteger);
		Node timeValueLiteralNode = Node.createUncachedLiteral(timeVal, XSDDatatype.XSDdouble);
		Node pitchDimensionLiteralNode = Node.createUncachedLiteral(PITCH_DIMENSION, XSDDatatype.XSDinteger);
		Node pitchValueLiteralNode = Node.createUncachedLiteral(pitchVal, XSDDatatype.XSDdouble);
		Node channelDimensionLiteralNode = Node.createUncachedLiteral(CHANNEL_DIMENSION, XSDDatatype.XSDinteger);
		Node channelValueLiteralNode = Node.createUncachedLiteral(channelVal, XSDDatatype.XSDdouble);

		Query query = QueryFactory.make();
		query.setPrefix("sia", Namespaces.SIA_NS_URI);
		query.setQuerySelectType();
		query.setDistinct(true);
		query.addResultVar(vectorVar);

		ElementTriplesBlock etp = new ElementTriplesBlock();

		Triple vectorTriple = new Triple(vectorVar, RDF.type.asNode(), siaVectorClass.asNode());
		Triple dimVal1Triple = new Triple(vectorVar, siaDimValProperty.asNode(), dimVal1Var);

		Triple dimVal1DimTriple = new Triple(dimVal1Var, siaDimensionProperty.asNode(), timeDimensionLiteralNode);
		Triple dimVal1ValTriple = new Triple(dimVal1Var, siaValueProperty.asNode(), timeValueLiteralNode);
		Triple dimVal2Triple = new Triple(vectorVar, siaDimValProperty.asNode(), dimVal2Var);
		Triple dimVal2DimTriple = new Triple(dimVal2Var, siaDimensionProperty.asNode(), pitchDimensionLiteralNode);
		Triple dimVal2ValTriple = new Triple(dimVal2Var, siaValueProperty.asNode(), pitchValueLiteralNode);
		Triple dimVal3Triple = new Triple(vectorVar, siaDimValProperty.asNode(), dimVal3Var);
		Triple dimVal3DimTriple = new Triple(dimVal3Var, siaDimensionProperty.asNode(), channelDimensionLiteralNode);
		Triple dimVal3ValTriple = new Triple(dimVal3Var, siaValueProperty.asNode(), channelValueLiteralNode);
		
		etp.addTriple(vectorTriple);
		etp.addTriple(dimVal1Triple);
		etp.addTriple(dimVal1DimTriple);
		etp.addTriple(dimVal1ValTriple);
		etp.addTriple(dimVal2Triple);
		etp.addTriple(dimVal2DimTriple);
		etp.addTriple(dimVal2ValTriple);
		etp.addTriple(dimVal3Triple);
		etp.addTriple(dimVal3DimTriple);
		etp.addTriple(dimVal3ValTriple);
				
		ElementGroup body = new ElementGroup();
		body.addElement(etp);
		query.setQueryPattern(body);
				
		QueryExecution qe = QueryExecutionFactory.create(query, model);
		ResultSet rs = qe.execSelect();
		
		if (rs.hasNext())
		{
			QuerySolution querySolution = rs.next();
			bnode = querySolution.get(vectorVarStr).asResource();
		}
		else
		{
			bnode = model.createResource(AnonId.create());							
			model.add(bnode, RDF.type, siaVectorClass);
			model.add(bnode, siaDimValProperty, timeDimValBnode);
			model.add(bnode, siaDimValProperty, pitchDimValBnode);
			model.add(bnode, siaDimValProperty, channelDimValBnode);
		}
		return bnode;
	}

	private static Resource findOrCreateDimValBNode(OntModel model, int dimension, double value) {
		Resource bnode;

		String dimValVarStr = "dimVal";
		Var dimValVar = Var.alloc(dimValVarStr);
		Query query = QueryFactory.make();
		query.setPrefix("sia", Namespaces.SIA_NS_URI);
		query.setQuerySelectType();
		query.setDistinct(true);
		query.addResultVar(dimValVar);
		ElementTriplesBlock etp = new ElementTriplesBlock();
		Node dimensionLiteralNode = Node.createUncachedLiteral(dimension, XSDDatatype.XSDinteger);
		Node valueLiteralNode = Node.createUncachedLiteral(value, XSDDatatype.XSDdouble);
		
		Triple dimensionTriple = new Triple(dimValVar, siaDimensionProperty.asNode(), dimensionLiteralNode);
		Triple valueTriple = new Triple(dimValVar, siaValueProperty.asNode(), valueLiteralNode);
		etp.addTriple(dimensionTriple);
		etp.addTriple(valueTriple);
		query.setQueryPattern(etp);
		
		QueryExecution qe = QueryExecutionFactory.create(query, model);
		ResultSet rs = qe.execSelect();
		
		if (rs.hasNext())
		{
			QuerySolution querySolution = rs.next();
			bnode = querySolution.get(dimValVarStr).asResource();
		}
		else
		{
			bnode = model.createResource(AnonId.create());							
			model.addLiteral(bnode, siaDimensionProperty, dimension);
			model.addLiteral(bnode, siaValueProperty, value);
		}
		return bnode;
	}

	private static HashMap<Integer, Integer> getTrackToDimensionIndexMap(Sequence sequence) {
		
		int numTracks = sequence.getTracks().length;
		int siaDatapointDimension = 2; // Dimension 1 is reserved for time

		HashMap<Integer, Integer> trackDimensionMap = new HashMap<Integer, Integer>();
		Track[] tracks = sequence.getTracks();
		
		for (int trackIdx = 0; trackIdx < numTracks; trackIdx++)
		{
			boolean trackIsAudible = false;
			int numEvents = tracks[trackIdx].size();

			for (int eventIdx = 0; eventIdx < numEvents; eventIdx++)
			{
				MidiEvent event = tracks[trackIdx].get(eventIdx);
				MidiMessage midiMessage = event.getMessage();
				
				// Determine the type of this message (short, sysex or meta)
				if (midiMessage instanceof ShortMessage)
				{
					// Determine which command is being issued
					ShortMessage shortMessage = (ShortMessage)midiMessage;
					int messageCommand = shortMessage.getCommand();

					if (messageCommand == ShortMessage.NOTE_ON)
					{
						trackIsAudible = true;
						break;
					}
				}
			}

			if (trackIsAudible)
			{
				trackDimensionMap.put(trackIdx, siaDatapointDimension);
				siaDatapointDimension++;
			}
		}

		return trackDimensionMap;
	}

	private static void printStmts(StmtIterator iter) {
		Statement statement;

		while (iter.hasNext()) {
			statement = iter.nextStatement();
			System.out.println(" | <" + statement.getSubject() + "> | <"
					+ statement.getPredicate() + "> | <"
					+ statement.getObject() + "> | ");
		}

		// And an empty line to make it pretty
		System.out.println();
	}
}