nikcleju@2: """Orthogonal matching pursuit algorithms nikcleju@2: """ nikcleju@2: nikcleju@2: # Author: Vlad Niculae nikcleju@2: # nikcleju@2: # License: BSD Style. nikcleju@2: nikcleju@2: from warnings import warn nikcleju@2: nikcleju@2: import numpy as np nikcleju@2: from scipy import linalg nikcleju@2: from scipy.linalg.lapack import get_lapack_funcs nikcleju@2: nikcleju@2: #from .base import LinearModel nikcleju@2: #from ..utils.arrayfuncs import solve_triangular nikcleju@2: from sklearn.linear_model.base import LinearModel nikcleju@2: from sklearn.utils.arrayfuncs import solve_triangular nikcleju@2: nikcleju@2: premature = """ Orthogonal matching pursuit ended prematurely due to linear nikcleju@2: dependence in the dictionary. The requested precision might not have been met. nikcleju@2: """ nikcleju@2: nikcleju@2: nikcleju@2: def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True): nikcleju@2: """Orthogonal Matching Pursuit step using the Cholesky decomposition. nikcleju@2: nikcleju@2: Parameters: nikcleju@2: ----------- nikcleju@2: X: array, shape = (n_samples, n_features) nikcleju@2: Input dictionary. Columns are assumed to have unit norm. nikcleju@2: nikcleju@2: y: array, shape = (n_samples,) nikcleju@2: Input targets nikcleju@2: nikcleju@2: n_nonzero_coefs: int nikcleju@2: Targeted number of non-zero elements nikcleju@2: nikcleju@2: tol: float nikcleju@2: Targeted squared error, if not None overrides n_nonzero_coefs. nikcleju@2: nikcleju@2: copy_X: bool, optional nikcleju@2: Whether the design matrix X must be copied by the algorithm. A false nikcleju@2: value is only helpful if X is already Fortran-ordered, otherwise a nikcleju@2: copy is made anyway. nikcleju@2: nikcleju@2: Returns: nikcleju@2: -------- nikcleju@2: gamma: array, shape = (n_nonzero_coefs,) nikcleju@2: Non-zero elements of the solution nikcleju@2: nikcleju@2: idx: array, shape = (n_nonzero_coefs,) nikcleju@2: Indices of the positions of the elements in gamma within the solution nikcleju@2: vector nikcleju@2: nikcleju@2: """ nikcleju@2: if copy_X: nikcleju@2: X = X.copy('F') nikcleju@2: else: # even if we are allowed to overwrite, still copy it if bad order nikcleju@2: X = np.asfortranarray(X) nikcleju@2: nikcleju@2: min_float = np.finfo(X.dtype).eps nikcleju@2: nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X,)) nikcleju@2: potrs, = get_lapack_funcs(('potrs',), (X,)) nikcleju@2: nikcleju@2: alpha = np.dot(X.T, y) nikcleju@2: residual = y nikcleju@2: n_active = 0 nikcleju@2: indices = range(X.shape[1]) # keeping track of swapping nikcleju@2: nikcleju@2: #max_features = X.shape[1] if tol is not None else n_nonzero_coefs nikcleju@2: # Nic: tol not None should not overide n_nonzero_coefs, but act together nikcleju@2: max_features = n_nonzero_coefs nikcleju@2: nikcleju@2: L = np.empty((max_features, max_features), dtype=X.dtype) nikcleju@2: L[0, 0] = 1. nikcleju@2: nikcleju@2: while True: nikcleju@2: lam = np.argmax(np.abs(np.dot(X.T, residual))) nikcleju@2: if lam < n_active or alpha[lam] ** 2 < min_float: nikcleju@2: # atom already selected or inner product too small nikcleju@2: warn(premature) nikcleju@2: break nikcleju@2: if n_active > 0: nikcleju@2: # Updates the Cholesky decomposition of X' X nikcleju@2: L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam]) nikcleju@2: solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) nikcleju@2: v = nrm2(L[n_active, :n_active]) ** 2 nikcleju@2: if 1 - v <= min_float: # selected atoms are dependent nikcleju@2: warn(premature) nikcleju@2: break nikcleju@2: L[n_active, n_active] = np.sqrt(1 - v) nikcleju@2: X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam]) nikcleju@2: alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active] nikcleju@2: indices[n_active], indices[lam] = indices[lam], indices[n_active] nikcleju@2: n_active += 1 nikcleju@2: # solves LL'x = y as a composition of two triangular systems nikcleju@2: gamma, _ = potrs(L[:n_active, :n_active], alpha[:n_active], lower=True, nikcleju@2: overwrite_b=False) nikcleju@2: nikcleju@2: residual = y - np.dot(X[:, :n_active], gamma) nikcleju@2: if tol is not None and nrm2(residual) ** 2 <= tol: nikcleju@2: break nikcleju@2: #elif n_active == max_features: nikcleju@2: # Nic: tol not None should not overide n_nonzero_coefs, but act together nikcleju@2: if n_active == max_features: nikcleju@2: break nikcleju@2: nikcleju@2: return gamma, indices[:n_active] nikcleju@2: nikcleju@2: nikcleju@2: def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None, nikcleju@2: copy_Gram=True, copy_Xy=True): nikcleju@2: """Orthogonal Matching Pursuit step on a precomputed Gram matrix. nikcleju@2: nikcleju@2: This function uses the the Cholesky decomposition method. nikcleju@2: nikcleju@2: Parameters: nikcleju@2: ----------- nikcleju@2: Gram: array, shape = (n_features, n_features) nikcleju@2: Gram matrix of the input data matrix nikcleju@2: nikcleju@2: Xy: array, shape = (n_features,) nikcleju@2: Input targets nikcleju@2: nikcleju@2: n_nonzero_coefs: int nikcleju@2: Targeted number of non-zero elements nikcleju@2: nikcleju@2: tol_0: float nikcleju@2: Squared norm of y, required if tol is not None. nikcleju@2: nikcleju@2: tol: float nikcleju@2: Targeted squared error, if not None overrides n_nonzero_coefs. nikcleju@2: nikcleju@2: copy_Gram: bool, optional nikcleju@2: Whether the gram matrix must be copied by the algorithm. A false nikcleju@2: value is only helpful if it is already Fortran-ordered, otherwise a nikcleju@2: copy is made anyway. nikcleju@2: nikcleju@2: copy_Xy: bool, optional nikcleju@2: Whether the covariance vector Xy must be copied by the algorithm. nikcleju@2: If False, it may be overwritten. nikcleju@2: nikcleju@2: Returns: nikcleju@2: -------- nikcleju@2: gamma: array, shape = (n_nonzero_coefs,) nikcleju@2: Non-zero elements of the solution nikcleju@2: nikcleju@2: idx: array, shape = (n_nonzero_coefs,) nikcleju@2: Indices of the positions of the elements in gamma within the solution nikcleju@2: vector nikcleju@2: nikcleju@2: """ nikcleju@2: Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram) nikcleju@2: nikcleju@2: if copy_Xy: nikcleju@2: Xy = Xy.copy() nikcleju@2: nikcleju@2: min_float = np.finfo(Gram.dtype).eps nikcleju@2: nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram,)) nikcleju@2: potrs, = get_lapack_funcs(('potrs',), (Gram,)) nikcleju@2: nikcleju@2: indices = range(len(Gram)) # keeping track of swapping nikcleju@2: alpha = Xy nikcleju@2: tol_curr = tol_0 nikcleju@2: delta = 0 nikcleju@2: n_active = 0 nikcleju@2: nikcleju@2: max_features = len(Gram) if tol is not None else n_nonzero_coefs nikcleju@2: L = np.empty((max_features, max_features), dtype=Gram.dtype) nikcleju@2: L[0, 0] = 1. nikcleju@2: nikcleju@2: while True: nikcleju@2: lam = np.argmax(np.abs(alpha)) nikcleju@2: if lam < n_active or alpha[lam] ** 2 < min_float: nikcleju@2: # selected same atom twice, or inner product too small nikcleju@2: warn(premature) nikcleju@2: break nikcleju@2: if n_active > 0: nikcleju@2: L[n_active, :n_active] = Gram[lam, :n_active] nikcleju@2: solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) nikcleju@2: v = nrm2(L[n_active, :n_active]) ** 2 nikcleju@2: if 1 - v <= min_float: # selected atoms are dependent nikcleju@2: warn(premature) nikcleju@2: break nikcleju@2: L[n_active, n_active] = np.sqrt(1 - v) nikcleju@2: Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) nikcleju@2: Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) nikcleju@2: indices[n_active], indices[lam] = indices[lam], indices[n_active] nikcleju@2: Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] nikcleju@2: n_active += 1 nikcleju@2: # solves LL'x = y as a composition of two triangular systems nikcleju@2: gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True, nikcleju@2: overwrite_b=False) nikcleju@2: nikcleju@2: beta = np.dot(Gram[:, :n_active], gamma) nikcleju@2: alpha = Xy - beta nikcleju@2: if tol is not None: nikcleju@2: tol_curr += delta nikcleju@2: delta = np.inner(gamma, beta[:n_active]) nikcleju@2: tol_curr -= delta nikcleju@2: if tol_curr <= tol: nikcleju@2: break nikcleju@2: elif n_active == max_features: nikcleju@2: break nikcleju@2: nikcleju@2: return gamma, indices[:n_active] nikcleju@2: nikcleju@2: nikcleju@2: def orthogonal_mp(X, y, n_nonzero_coefs=None, tol=None, precompute_gram=False, nikcleju@2: copy_X=True): nikcleju@2: """Orthogonal Matching Pursuit (OMP) nikcleju@2: nikcleju@2: Solves n_targets Orthogonal Matching Pursuit problems. nikcleju@2: An instance of the problem has the form: nikcleju@2: nikcleju@2: When parametrized by the number of non-zero coefficients using nikcleju@2: `n_nonzero_coefs`: nikcleju@2: argmin ||y - X\gamma||^2 subject to ||\gamma||_0 <= n_{nonzero coefs} nikcleju@2: nikcleju@2: When parametrized by error using the parameter `tol`: nikcleju@2: argmin ||\gamma||_0 subject to ||y - X\gamma||^2 <= tol nikcleju@2: nikcleju@2: Parameters nikcleju@2: ---------- nikcleju@2: X: array, shape = (n_samples, n_features) nikcleju@2: Input data. Columns are assumed to have unit norm. nikcleju@2: nikcleju@2: y: array, shape = (n_samples,) or (n_samples, n_targets) nikcleju@2: Input targets nikcleju@2: nikcleju@2: n_nonzero_coefs: int nikcleju@2: Desired number of non-zero entries in the solution. If None (by nikcleju@2: default) this value is set to 10% of n_features. nikcleju@2: nikcleju@2: tol: float nikcleju@2: Maximum norm of the residual. If not None, overrides n_nonzero_coefs. nikcleju@2: nikcleju@2: Nic: tol does not oveeride n_nonzero_coefs, the wto conditions act jointly! nikcleju@2: nikcleju@2: precompute_gram: {True, False, 'auto'}, nikcleju@2: Whether to perform precomputations. Improves performance when n_targets nikcleju@2: or n_samples is very large. nikcleju@2: nikcleju@2: copy_X: bool, optional nikcleju@2: Whether the design matrix X must be copied by the algorithm. A false nikcleju@2: value is only helpful if X is already Fortran-ordered, otherwise a nikcleju@2: copy is made anyway. nikcleju@2: nikcleju@2: Returns nikcleju@2: ------- nikcleju@2: coef: array, shape = (n_features,) or (n_features, n_targets) nikcleju@2: Coefficients of the OMP solution nikcleju@2: nikcleju@2: See also nikcleju@2: -------- nikcleju@2: OrthogonalMatchingPursuit nikcleju@2: orthogonal_mp_gram nikcleju@2: lars_path nikcleju@2: decomposition.sparse_encode nikcleju@2: decomposition.sparse_encode_parallel nikcleju@2: nikcleju@2: Notes nikcleju@2: ----- nikcleju@2: Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang, nikcleju@2: Matching pursuits with time-frequency dictionaries, IEEE Transactions on nikcleju@2: Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415. nikcleju@2: (http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf) nikcleju@2: nikcleju@2: This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad, nikcleju@2: M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal nikcleju@2: Matching Pursuit Technical Report - CS Technion, April 2008. nikcleju@2: http://www.cs.technion.ac.il/~ronrubin/Publications/KSVX-OMP-v2.pdf nikcleju@2: nikcleju@2: """ nikcleju@2: X, y = map(np.asanyarray, (X, y)) nikcleju@2: if y.ndim == 1: nikcleju@2: y = y[:, np.newaxis] nikcleju@2: if copy_X: nikcleju@2: X = X.copy('F') nikcleju@2: copy_X = False nikcleju@2: else: nikcleju@2: X = np.asfortranarray(X) nikcleju@2: if y.shape[1] > 1: # subsequent targets will be affected nikcleju@2: copy_X = True nikcleju@2: if n_nonzero_coefs == None and tol == None: nikcleju@2: n_nonzero_coefs = int(0.1 * X.shape[1]) nikcleju@2: if tol is not None and tol < 0: nikcleju@2: raise ValueError("Epsilon cannot be negative") nikcleju@2: if tol is None and n_nonzero_coefs <= 0: nikcleju@2: raise ValueError("The number of atoms must be positive") nikcleju@2: if tol is None and n_nonzero_coefs > X.shape[1]: nikcleju@2: raise ValueError("The number of atoms cannot be more than the number \ nikcleju@2: of features") nikcleju@2: if precompute_gram == 'auto': nikcleju@2: precompute_gram = X.shape[0] > X.shape[1] nikcleju@2: if precompute_gram: nikcleju@2: G = np.dot(X.T, X) nikcleju@2: G = np.asfortranarray(G) nikcleju@2: Xy = np.dot(X.T, y) nikcleju@2: if tol is not None: nikcleju@2: norms_squared = np.sum((y ** 2), axis=0) nikcleju@2: else: nikcleju@2: norms_squared = None nikcleju@2: return orthogonal_mp_gram(G, Xy, n_nonzero_coefs, tol, norms_squared, nikcleju@2: copy_Gram=copy_X, copy_Xy=False) nikcleju@2: nikcleju@2: coef = np.zeros((X.shape[1], y.shape[1])) nikcleju@2: for k in xrange(y.shape[1]): nikcleju@2: x, idx = _cholesky_omp(X, y[:, k], n_nonzero_coefs, tol, nikcleju@2: copy_X=copy_X) nikcleju@2: coef[idx, k] = x nikcleju@2: return np.squeeze(coef) nikcleju@2: nikcleju@2: nikcleju@2: def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None, nikcleju@2: norms_squared=None, copy_Gram=True, nikcleju@2: copy_Xy=True): nikcleju@2: """Gram Orthogonal Matching Pursuit (OMP) nikcleju@2: nikcleju@2: Solves n_targets Orthogonal Matching Pursuit problems using only nikcleju@2: the Gram matrix X.T * X and the product X.T * y. nikcleju@2: nikcleju@2: Parameters nikcleju@2: ---------- nikcleju@2: Gram: array, shape = (n_features, n_features) nikcleju@2: Gram matrix of the input data: X.T * X nikcleju@2: nikcleju@2: Xy: array, shape = (n_features,) or (n_features, n_targets) nikcleju@2: Input targets multiplied by X: X.T * y nikcleju@2: nikcleju@2: n_nonzero_coefs: int nikcleju@2: Desired number of non-zero entries in the solution. If None (by nikcleju@2: default) this value is set to 10% of n_features. nikcleju@2: nikcleju@2: tol: float nikcleju@2: Maximum norm of the residual. If not None, overrides n_nonzero_coefs. nikcleju@2: nikcleju@2: norms_squared: array-like, shape = (n_targets,) nikcleju@2: Squared L2 norms of the lines of y. Required if tol is not None. nikcleju@2: nikcleju@2: copy_Gram: bool, optional nikcleju@2: Whether the gram matrix must be copied by the algorithm. A false nikcleju@2: value is only helpful if it is already Fortran-ordered, otherwise a nikcleju@2: copy is made anyway. nikcleju@2: nikcleju@2: copy_Xy: bool, optional nikcleju@2: Whether the covariance vector Xy must be copied by the algorithm. nikcleju@2: If False, it may be overwritten. nikcleju@2: nikcleju@2: Returns nikcleju@2: ------- nikcleju@2: coef: array, shape = (n_features,) or (n_features, n_targets) nikcleju@2: Coefficients of the OMP solution nikcleju@2: nikcleju@2: See also nikcleju@2: -------- nikcleju@2: OrthogonalMatchingPursuit nikcleju@2: orthogonal_mp nikcleju@2: lars_path nikcleju@2: decomposition.sparse_encode nikcleju@2: decomposition.sparse_encode_parallel nikcleju@2: nikcleju@2: Notes nikcleju@2: ----- nikcleju@2: Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang, nikcleju@2: Matching pursuits with time-frequency dictionaries, IEEE Transactions on nikcleju@2: Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415. nikcleju@2: (http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf) nikcleju@2: nikcleju@2: This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad, nikcleju@2: M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal nikcleju@2: Matching Pursuit Technical Report - CS Technion, April 2008. nikcleju@2: http://www.cs.technion.ac.il/~ronrubin/Publications/KSVX-OMP-v2.pdf nikcleju@2: nikcleju@2: """ nikcleju@2: Gram, Xy = map(np.asanyarray, (Gram, Xy)) nikcleju@2: if Xy.ndim == 1: nikcleju@2: Xy = Xy[:, np.newaxis] nikcleju@2: if tol is not None: nikcleju@2: norms_squared = [norms_squared] nikcleju@2: nikcleju@2: if n_nonzero_coefs == None and tol is None: nikcleju@2: n_nonzero_coefs = int(0.1 * len(Gram)) nikcleju@2: if tol is not None and norms_squared == None: nikcleju@2: raise ValueError('Gram OMP needs the precomputed norms in order \ nikcleju@2: to evaluate the error sum of squares.') nikcleju@2: if tol is not None and tol < 0: nikcleju@2: raise ValueError("Epsilon cennot be negative") nikcleju@2: if tol is None and n_nonzero_coefs <= 0: nikcleju@2: raise ValueError("The number of atoms must be positive") nikcleju@2: if tol is None and n_nonzero_coefs > len(Gram): nikcleju@2: raise ValueError("The number of atoms cannot be more than the number \ nikcleju@2: of features") nikcleju@2: coef = np.zeros((len(Gram), Xy.shape[1])) nikcleju@2: for k in range(Xy.shape[1]): nikcleju@2: x, idx = _gram_omp(Gram, Xy[:, k], n_nonzero_coefs, nikcleju@2: norms_squared[k] if tol is not None else None, tol, nikcleju@2: copy_Gram=copy_Gram, copy_Xy=copy_Xy) nikcleju@2: coef[idx, k] = x nikcleju@2: return np.squeeze(coef) nikcleju@2: nikcleju@2: nikcleju@2: class OrthogonalMatchingPursuit(LinearModel): nikcleju@2: """Orthogonal Mathching Pursuit model (OMP) nikcleju@2: nikcleju@2: Parameters nikcleju@2: ---------- nikcleju@2: n_nonzero_coefs: int, optional nikcleju@2: Desired number of non-zero entries in the solution. If None (by nikcleju@2: default) this value is set to 10% of n_features. nikcleju@2: nikcleju@2: tol: float, optional nikcleju@2: Maximum norm of the residual. If not None, overrides n_nonzero_coefs. nikcleju@2: nikcleju@2: fit_intercept: boolean, optional nikcleju@2: whether to calculate the intercept for this model. If set nikcleju@2: to false, no intercept will be used in calculations nikcleju@2: (e.g. data is expected to be already centered). nikcleju@2: nikcleju@2: normalize: boolean, optional nikcleju@2: If False, the regressors X are assumed to be already normalized. nikcleju@2: nikcleju@2: precompute_gram: {True, False, 'auto'}, nikcleju@2: Whether to use a precomputed Gram and Xy matrix to speed up nikcleju@2: calculations. Improves performance when `n_targets` or `n_samples` is nikcleju@2: very large. Note that if you already have such matrices, you can pass nikcleju@2: them directly to the fit method. nikcleju@2: nikcleju@2: copy_X: bool, optional nikcleju@2: Whether the design matrix X must be copied by the algorithm. A false nikcleju@2: value is only helpful if X is already Fortran-ordered, otherwise a nikcleju@2: copy is made anyway. nikcleju@2: nikcleju@2: copy_Gram: bool, optional nikcleju@2: Whether the gram matrix must be copied by the algorithm. A false nikcleju@2: value is only helpful if X is already Fortran-ordered, otherwise a nikcleju@2: copy is made anyway. nikcleju@2: nikcleju@2: copy_Xy: bool, optional nikcleju@2: Whether the covariance vector Xy must be copied by the algorithm. nikcleju@2: If False, it may be overwritten. nikcleju@2: nikcleju@2: nikcleju@2: Attributes nikcleju@2: ---------- nikcleju@2: coef_: array, shape = (n_features,) or (n_features, n_targets) nikcleju@2: parameter vector (w in the fomulation formula) nikcleju@2: nikcleju@2: intercept_: float or array, shape =(n_targets,) nikcleju@2: independent term in decision function. nikcleju@2: nikcleju@2: Notes nikcleju@2: ----- nikcleju@2: Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang, nikcleju@2: Matching pursuits with time-frequency dictionaries, IEEE Transactions on nikcleju@2: Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415. nikcleju@2: (http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf) nikcleju@2: nikcleju@2: This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad, nikcleju@2: M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal nikcleju@2: Matching Pursuit Technical Report - CS Technion, April 2008. nikcleju@2: http://www.cs.technion.ac.il/~ronrubin/Publications/KSVX-OMP-v2.pdf nikcleju@2: nikcleju@2: See also nikcleju@2: -------- nikcleju@2: orthogonal_mp nikcleju@2: orthogonal_mp_gram nikcleju@2: lars_path nikcleju@2: Lars nikcleju@2: LassoLars nikcleju@2: decomposition.sparse_encode nikcleju@2: decomposition.sparse_encode_parallel nikcleju@2: nikcleju@2: """ nikcleju@2: def __init__(self, copy_X=True, copy_Gram=True, nikcleju@2: copy_Xy=True, n_nonzero_coefs=None, tol=None, nikcleju@2: fit_intercept=True, normalize=True, precompute_gram=False): nikcleju@2: self.n_nonzero_coefs = n_nonzero_coefs nikcleju@2: self.tol = tol nikcleju@2: self.fit_intercept = fit_intercept nikcleju@2: self.normalize = normalize nikcleju@2: self.precompute_gram = precompute_gram nikcleju@2: self.copy_Gram = copy_Gram nikcleju@2: self.copy_Xy = copy_Xy nikcleju@2: self.copy_X = copy_X nikcleju@2: nikcleju@2: def fit(self, X, y, Gram=None, Xy=None): nikcleju@2: """Fit the model using X, y as training data. nikcleju@2: nikcleju@2: Parameters nikcleju@2: ---------- nikcleju@2: X: array-like, shape = (n_samples, n_features) nikcleju@2: Training data. nikcleju@2: nikcleju@2: y: array-like, shape = (n_samples,) or (n_samples, n_targets) nikcleju@2: Target values. nikcleju@2: nikcleju@2: Gram: array-like, shape = (n_features, n_features) (optional) nikcleju@2: Gram matrix of the input data: X.T * X nikcleju@2: nikcleju@2: Xy: array-like, shape = (n_features,) or (n_features, n_targets) nikcleju@2: (optional) nikcleju@2: Input targets multiplied by X: X.T * y nikcleju@2: nikcleju@2: nikcleju@2: Returns nikcleju@2: ------- nikcleju@2: self: object nikcleju@2: returns an instance of self. nikcleju@2: """ nikcleju@2: X = np.atleast_2d(X) nikcleju@2: y = np.atleast_1d(y) nikcleju@2: n_features = X.shape[1] nikcleju@2: nikcleju@2: X, y, X_mean, y_mean, X_std = self._center_data(X, y, nikcleju@2: self.fit_intercept, nikcleju@2: self.normalize, nikcleju@2: self.copy_X) nikcleju@2: nikcleju@2: if self.n_nonzero_coefs == None and self.tol is None: nikcleju@2: self.n_nonzero_coefs = int(0.1 * n_features) nikcleju@2: nikcleju@2: if Gram is not None: nikcleju@2: Gram = np.atleast_2d(Gram) nikcleju@2: nikcleju@2: if self.copy_Gram: nikcleju@2: copy_Gram = False nikcleju@2: Gram = Gram.copy('F') nikcleju@2: else: nikcleju@2: Gram = np.asfortranarray(Gram) nikcleju@2: nikcleju@2: copy_Gram = self.copy_Gram nikcleju@2: if y.shape[1] > 1: # subsequent targets will be affected nikcleju@2: copy_Gram = True nikcleju@2: nikcleju@2: if Xy is None: nikcleju@2: Xy = np.dot(X.T, y) nikcleju@2: else: nikcleju@2: if self.copy_Xy: nikcleju@2: Xy = Xy.copy() nikcleju@2: if self.normalize: nikcleju@2: if len(Xy.shape) == 1: nikcleju@2: Xy /= X_std nikcleju@2: else: nikcleju@2: Xy /= X_std[:, np.newaxis] nikcleju@2: nikcleju@2: if self.normalize: nikcleju@2: Gram /= X_std nikcleju@2: Gram /= X_std[:, np.newaxis] nikcleju@2: nikcleju@2: norms_sq = np.sum(y ** 2, axis=0) if self.tol is not None else None nikcleju@2: self.coef_ = orthogonal_mp_gram(Gram, Xy, self.n_nonzero_coefs, nikcleju@2: self.tol, norms_sq, nikcleju@2: copy_Gram, True).T nikcleju@2: else: nikcleju@2: precompute_gram = self.precompute_gram nikcleju@2: if precompute_gram == 'auto': nikcleju@2: precompute_gram = X.shape[0] > X.shape[1] nikcleju@2: self.coef_ = orthogonal_mp(X, y, self.n_nonzero_coefs, self.tol, nikcleju@2: precompute_gram=self.precompute_gram, nikcleju@2: copy_X=self.copy_X).T nikcleju@2: nikcleju@2: self._set_intercept(X_mean, y_mean, X_std) nikcleju@2: return self