view yetilab/matrix/matrix.yeti @ 254:5eb57c649de0 sparse

Using hashes is simpler, but turns out to be mostly no faster and sometimes much slower. Not one to merge back.
author Chris Cannam
date Tue, 21 May 2013 17:40:33 +0100
parents efdb1aee9d21
children
line wrap: on
line source

module yetilab.matrix.matrix;

// A matrix is an array of vectors.

// A matrix can be stored in either column-major (the default) or
// row-major format. Storage order is an efficiency concern only:
// every API function operating on matrix objects will return the same
// result regardless of storage order.  (The transpose function just
// switches the row/column order without moving the elements.)

//!!! check that we are not unnecessarily copying in the transform functions

vec = load yetilab.vector.vector;
bf = load yetilab.vector.blockfuncs;

load yetilab.vector.vectortype;
load yetilab.matrix.matrixtype;

size m =
    case m of
    DenseRows r:
        major = length r;
        { 
            rows = major, 
            columns = if major > 0 then vec.length r[0] else 0 fi,
        };
    DenseCols c:
        major = length c;
        { 
            rows = if major > 0 then vec.length c[0] else 0 fi,
            columns = major, 
        };
    SparseRows { size, data }: size;
    SparseCols { size, data }: size;
    esac;

width m = (size m).columns;
height m = (size m).rows;

nonZeroValues m =
   (nzDense d =
        sum
           (map do v:
                sum (map do n: if n == 0 then 0 else 1 fi done (vec.list v))
                done d);
    nzSparse d =
        sum (map do k:
                 length (keys d.data[k])
                 done (keys d.data));
    case m of 
    DenseRows d: nzDense d;
    DenseCols d: nzDense d;
    SparseRows d: nzSparse d;
    SparseCols d: nzSparse d;
    esac);

density m =
   ({ rows, columns } = size m;
    cells = rows * columns;
    (nonZeroValues m) / cells);

fromSlice n m { data } =
    if n in data and m in data[n] then data[n][m] else 0 fi;

filledSlice sz n { data } =
   (slice = new double[sz];
    if n in data then
        h = data[n];
        for (keys h) do k: slice[k] := h[k] done;
    fi;
    vec.vector slice);

getAt row col m =
    case m of
    DenseRows rows: r = rows[row]; vec.at col r;
    DenseCols cols: c = cols[col]; vec.at row c;
    SparseRows data: fromSlice row col data;
    SparseCols data: fromSlice col row data;
    esac;

getColumn j m =
    case m of
    DenseCols cols: cols[j];
    SparseCols data: filledSlice data.size.rows j data;
    _: vec.fromList (map do i: getAt i j m done [0..height m - 1]);
    esac;

getRow i m =
    case m of
    DenseRows rows: rows[i];
    SparseRows data: filledSlice data.size.columns i data; 
    _: vec.fromList (map do j: getAt i j m done [0..width m - 1]);
    esac;

isRowMajor? m =
    case m of
    DenseRows _: true;
    DenseCols _: false;
    SparseRows _: true;
    SparseCols _: false;
    esac;

isSparse? m =
    case m of
    DenseRows _: false;
    DenseCols _: false;
    SparseRows _: true;
    SparseCols _: true;
    esac;

newColumnMajorStorage { rows, columns } = 
    if rows < 1 then array []
    else array (map \(vec.zeros rows) [1..columns])
    fi;

zeroMatrix { rows, columns } = 
    DenseCols (newColumnMajorStorage { rows, columns });

zeroMatrixWithTypeOf m { rows, columns } = 
    if isRowMajor? m then
        DenseRows (newColumnMajorStorage { rows = columns, columns = rows });
    else
        DenseCols (newColumnMajorStorage { rows, columns });
    fi;

zeroSizeMatrix () = zeroMatrix { rows = 0, columns = 0 };

generate f { rows, columns } =
    if rows < 1 or columns < 1 then zeroSizeMatrix ()
    else
        m = array (map \(new double[rows]) [1..columns]);
        for [0..columns-1] do col:
            for [0..rows-1] do row:
                m[col][row] := f row col;
            done;
        done;
        DenseCols (array (map vec.vector m))
    fi;

enumerateSparse m =
   (enumerate { size, data } =
        concat
           (map do i:
                map do j:
                    { i, j, v = data[i][j] }
                    done (keys data[i])
                done (keys data));
    case m of
    SparseCols d: 
        map do { i, j, v }: { i = j, j = i, v } done (enumerate d);
    SparseRows d:
        enumerate d;
     _: [];
    esac);

makeSparse type size entries =
   (isRow = case type of RowMajor (): true; ColumnMajor (): false; esac;
    data = [:];
    preprocess = 
        if isRow then id
        else do { i, j, v }: { i = j, j = i, v } done
        fi;
    for (map preprocess entries) do { i, j, v }:
        if not (i in data) then data[i] := [:] fi;
        data[i][j] := v;
    done;
    if isRow then SparseRows else SparseCols fi { size, data });

toSparse m =
    if isSparse? m then m
    else
        { rows, columns } = size m;
        enumerate m ii jj =
            case ii of
            i::irest:
                case jj of
                j::rest:
                    v = getAt i j m;
                    if v != 0 then { i, j, v } :. \(enumerate m ii rest)
                    else enumerate m ii rest
                    fi;
                 _: enumerate m irest [0..columns-1];
                esac;
             _: [];
            esac;
        makeSparse 
            if isRowMajor? m then RowMajor () else ColumnMajor () fi
               (size m)
               (enumerate m [0..rows-1] [0..columns-1]);
    fi;

toDense m =
    if not (isSparse? m) then m
    elif isRowMajor? m then
        DenseRows (array (map do row: getRow row m done [0..height m - 1]));
    else
        DenseCols (array (map do col: getColumn col m done [0..width m - 1]));
    fi;

constMatrix n = generate do row col: n done;
randomMatrix = generate do row col: Math#random() done;
identityMatrix = constMatrix 1;

transposed m =
    case m of
    DenseRows d: DenseCols d;
    DenseCols d: DenseRows d;
    SparseRows { data, size }: SparseCols
        { data, size = { rows = size.columns, columns = size.rows } }; 
    SparseCols { data, size }: SparseRows
        { data, size = { rows = size.columns, columns = size.rows } }; 
    esac;

sparseFlipped m =
   ({ tagger, data } = 
        case m of
        SparseCols { data }: { tagger = SparseRows, data };
        SparseRows { data }: { tagger = SparseCols, data };
        _: failWith "sparseFlipped called for non-sparse matrix";
        esac;
    data' = [:];
    for (keys data) do i:
        for (keys data[i]) do j:
            if not (j in data') then data'[j] := [:] fi;
            data'[j][i] := data[i][j];
        done
    done;
    tagger { size = size m, data = data' });

flipped m =
    if isSparse? m then
        sparseFlipped m
    else
        if isRowMajor? m then
            generate do row col: getAt row col m done (size m);
        else
            transposed
               (generate do row col: getAt col row m done
                { rows = (width m), columns = (height m) });
        fi
    fi;

toRowMajor m =
    if isRowMajor? m then m else flipped m fi;

toColumnMajor m =
    if not isRowMajor? m then m else flipped m fi;

equal'' comparator vecComparator m1 m2 =
    // Prerequisite: m1 and m2 have same size, sparse-p, and storage order
   (compareVecLists vv1 vv2 = all id (map2 vecComparator vv1 vv2);
    compareSparse d1 d2 =
       (data1 = d1.data;
        data2 = d2.data;
        keys data1 == keys data2 and
            all id
               (map do i: 
                    keys data1[i] == keys data2[i] and
                    all id
                       (map do j:
                            comparator data1[i][j] data2[i][j]
                            done (keys data1[i]))
                    done (keys data1)));
    case m1 of
    DenseRows d1:
        case m2 of DenseRows d2: compareVecLists d1 d2; _: false; esac;
    DenseCols d1:
        case m2 of DenseCols d2: compareVecLists d1 d2; _: false; esac;
    SparseRows d1:
        case m2 of SparseRows d2: compareSparse d1 d2; _: false; esac;
    SparseCols d1:
        case m2 of SparseCols d2: compareSparse d1 d2; _: false; esac;
    esac);

equal' comparator vecComparator m1 m2 =
    if size m1 != size m2 then 
        false
    elif isRowMajor? m1 != isRowMajor? m2 then
        equal' comparator vecComparator (flipped m1) m2;
    elif isSparse? m1 != isSparse? m2 then
        if isSparse? m1 then
            equal' comparator vecComparator m1 (toSparse m2)
        else
            equal' comparator vecComparator (toSparse m1) m2
        fi
    else
        equal'' comparator vecComparator m1 m2
    fi;

// Compare matrices using the given comparator for individual cells.
// Note that matrices with different storage order but the same
// contents are equal, although comparing them is slow.
//!!! Document the fact that sparse matrices can only be equal if they
// have the same set of non-zero cells (regardless of comparator used)
equalUnder comparator =
    equal' comparator (vec.equalUnder comparator);

equal =
    equal' (==) vec.equal;

newMatrix type data = //!!! NB does not copy data
   (tagger = case type of RowMajor (): DenseRows; ColumnMajor (): DenseCols esac;
    if empty? data or vec.empty? (head data)
    then zeroSizeMatrix ()
    else tagger (array data)
    fi);

newRowVector data = //!!! NB does not copy data
    DenseRows (array [data]);

newColumnVector data = //!!! NB does not copy data
    DenseCols (array [data]);

scaled factor m = //!!! v inefficient
    generate do row col: factor * (getAt row col m) done (size m);

thresholded threshold m = //!!! v inefficient; and should take a threshold function?
    generate do row col:
        v = getAt row col m; if (abs v) > threshold then v else 0 fi
    done (size m);

sum' m1 m2 =
    if (size m1) != (size m2)
    then failWith "Matrices are not the same size: \(size m1), \(size m2)";
    else
        generate do row col: getAt row col m1 + getAt row col m2 done (size m1);
    fi;

difference m1 m2 = //!!! doc: m1 - m2, not m2 - m1
    if (size m1) != (size m2)
    then failWith "Matrices are not the same size: \(size m1), \(size m2)";
    else
        generate do row col: getAt row col m1 - getAt row col m2 done (size m1);
    fi;

abs' m =
    generate do row col: abs (getAt row col m) done (size m);

sparseProductLeft size m1 m2 =
   (e = enumerateSparse m1;
    data = array (map \(new double[size.rows]) [1..size.columns]);
    for [0..size.columns - 1] do j':
        c = getColumn j' m2;
        for e do { v, i, j }:
            data[j'][i] := data[j'][i] + v * (vec.at j c);
        done;
    done;
    DenseCols (array (map vec.vector (list data))));

sparseProductRight size m1 m2 =
   (e = enumerateSparse m2;
    data = array (map \(new double[size.columns]) [1..size.rows]);
    for [0..size.rows - 1] do i':
        r = getRow i' m1;
        for e do { v, i, j }:
            data[i'][j] := data[i'][j] + v * (vec.at i r);
        done;
    done;
    DenseRows (array (map vec.vector (list data))));

sparseProduct size m1 m2 =
    case m2 of
    SparseCols d:
       (e = enumerateSparse m1;
        out = [:];
        for (keys d.data) do j':
            h = [:];
            for e do { v, i, j }:
                if j in d.data[j'] then
                    if not (i in h) then h[i] := 0 fi;
                    h[i] := h[i] + v * d.data[j'][j];
                fi;
            done;
            out[j'] := h;
        done;
        SparseCols { size, data = out });
    SparseRows _:
        sparseProduct size m1 (flipped m2);
     _: failWith "sparseProduct called for non-sparse matrices";
    esac;

denseProduct size m1 m2 =
   (data = array (map \(new double[size.rows]) [1..size.columns]);
    for [0..size.rows - 1] do i:
        row = getRow i m1;
        for [0..size.columns - 1] do j:
            data[j][i] := bf.sum (bf.multiply row (getColumn j m2));
        done;
    done;
    DenseCols (array (map vec.vector (list data))));

product m1 m2 =
    if (size m1).columns != (size m2).rows
    then failWith "Matrix dimensions incompatible: \(size m1), \(size m2) (\((size m1).columns) != \((size m2).rows))";
    else 
        size = { rows = (size m1).rows, columns = (size m2).columns };
        if isSparse? m1 then
            if isSparse? m2 then
                sparseProduct size m1 m2
            else
                sparseProductLeft size m1 m2
            fi
        elif isSparse? m2 then
            sparseProductRight size m1 m2
        else
            denseProduct size m1 m2
        fi;
    fi;

asRows m =
    map do i: getRow i m done [0 .. (height m) - 1];

asColumns m =
    map do i: getColumn i m done [0 .. (width m) - 1];

concatAgainstGrain tagger getter counter mm =
   (n = counter (size (head mm));
    tagger (array
       (map do i:
           vec.concat (map (getter i) mm)
           done [0..n-1])));

concatWithGrain tagger getter counter mm =
    tagger (array
       (concat
           (map do m:
               n = counter (size m);
               map do i: getter i m done [0..n-1]
               done mm)));

checkDimensionsFor direction first mm =
   (counter = if direction == Horizontal () then (.rows) else (.columns) fi;
    n = counter (size first);
    if not (all id (map do m: counter (size m) == n done mm)) then
        failWith "Matrix dimensions incompatible for concat (found \(map do m: counter (size m) done mm) not all of which are \(n))";
    fi);

concat direction mm = //!!! doc: storage order is taken from first matrix in sequence
    //!!! would this be better as separate concatHorizontal/concatVertical functions?
    case mm of
    first::rest: 
        checkDimensionsFor direction first mm;
        row = isRowMajor? first;
        // horizontal, row-major: against grain with rows
        // horizontal, col-major: with grain with cols
        // vertical, row-major: with grain with rows
        // vertical, col-major: against grain with cols
        case direction of
        Horizontal ():
            if row then concatAgainstGrain DenseRows getRow (.rows) mm;
            else concatWithGrain DenseCols getColumn (.columns) mm;
            fi;
        Vertical ():
            if row then concatWithGrain DenseRows getRow (.rows) mm;
            else concatAgainstGrain DenseCols getColumn (.columns) mm;
            fi;
        esac;
    [single]: single;
    _: zeroSizeMatrix ();
    esac;

//!!! inconsistent with std.slice which has start..end not start+count (see also vec.slice/rangeOf)
rowSlice start count m = //!!! doc: storage order same as input
    if isRowMajor? m then
        DenseRows (array (map ((flip getRow) m) [start .. start + count - 1]))
    else 
        DenseCols (array (map (vec.rangeOf start count) (asColumns m)))
    fi;

columnSlice start count m = //!!! doc: storage order same as input
    if not isRowMajor? m then
        DenseCols (array (map ((flip getColumn) m) [start .. start + count - 1]))
    else 
        DenseRows (array (map (vec.rangeOf start count) (asRows m)))
    fi;

resizedTo newsize m =
   (if newsize == (size m) then
        m
    elif (height m) == 0 or (width m) == 0 then
        zeroMatrixWithTypeOf m newsize;
    else
        growrows = newsize.rows - (height m);
        growcols = newsize.columns - (width m);
        rowm = isRowMajor? m;
        resizedTo newsize
            if rowm and growrows < 0 then
                rowSlice 0 newsize.rows m
            elif (not rowm) and growcols < 0 then 
                columnSlice 0 newsize.columns m
            elif growrows < 0 then 
                rowSlice 0 newsize.rows m
            elif growcols < 0 then 
                columnSlice 0 newsize.columns m
            else
                if growrows > 0 then
                    concat (Vertical ())
                       [m, zeroMatrixWithTypeOf m ((size m) with { rows = growrows })]
                else
                    concat (Horizontal ())
                       [m, zeroMatrixWithTypeOf m ((size m) with { columns = growcols })]
                fi
            fi
    fi);

{
    size,
    width,
    height,
    density,
    nonZeroValues,
    getAt,
    getColumn,
    getRow,
    isRowMajor?,
    isSparse?,
    generate,
    constMatrix,
    randomMatrix,
    zeroMatrix,
    identityMatrix,
    zeroSizeMatrix,
    equal,
    equalUnder,
    transposed,
    flipped,
    toRowMajor,
    toColumnMajor,
    toSparse,
    toDense,
    scaled,
    thresholded,
    resizedTo,
    asRows,
    asColumns,
    sum = sum',
    difference,
    abs = abs',
    product,
    concat,
    rowSlice,
    columnSlice,
    newMatrix,
    newRowVector,
    newColumnVector,
    newSparseMatrix = makeSparse
}
as
{
//!!! check whether these are right to be .selector rather than just selector

    size is matrix -> { .rows is number, .columns is number },
    width is matrix -> number,
    height is matrix -> number,
    density is matrix -> number,
    nonZeroValues is matrix -> number,
    getAt is number -> number -> matrix -> number,
    getColumn is number -> matrix -> vector,
    getRow is number -> matrix -> vector,
    isRowMajor? is matrix -> boolean,
    isSparse? is matrix -> boolean,
    generate is (number -> number -> number) -> { .rows is number, .columns is number } -> matrix,
    constMatrix is number -> { .rows is number, .columns is number } -> matrix,
    randomMatrix is { .rows is number, .columns is number } -> matrix,
    zeroMatrix is { .rows is number, .columns is number } -> matrix, 
    identityMatrix is { .rows is number, .columns is number } -> matrix, 
    zeroSizeMatrix is () -> matrix,
    equal is matrix -> matrix -> boolean,
    equalUnder is (number -> number -> boolean) -> matrix -> matrix -> boolean,
    transposed is matrix -> matrix,
    flipped is matrix -> matrix, 
    toRowMajor is matrix -> matrix, 
    toColumnMajor is matrix -> matrix,
    toSparse is matrix -> matrix,
    toDense is matrix -> matrix,
    scaled is number -> matrix -> matrix,
    thresholded is number -> matrix -> matrix,
    resizedTo is { .rows is number, .columns is number } -> matrix -> matrix,
    asRows is matrix -> list<vector>, 
    asColumns is matrix -> list<vector>,
    sum is matrix -> matrix -> matrix,
    difference is matrix -> matrix -> matrix,
    abs is matrix -> matrix,
    product is matrix -> matrix -> matrix,
    concat is (Horizontal () | Vertical ()) -> list<matrix> -> matrix,
    rowSlice is number -> number -> matrix -> matrix, 
    columnSlice is number -> number -> matrix -> matrix,
    newMatrix is (ColumnMajor () | RowMajor ()) -> list<vector> -> matrix, 
    newRowVector is vector -> matrix, 
    newColumnVector is vector -> matrix,
    newSparseMatrix is (ColumnMajor () | RowMajor ()) -> { .rows is number, .columns is number } -> list<{ .i is number, .j is number, .v is number }> -> matrix
}