m@30: # -*- coding: utf-8 -*- m@30: """ m@30: Created on Fri Sep 1 19:11:52 2017 m@30: m@30: @author: mariapanteli m@30: """ m@30: m@30: import pytest m@30: m@30: import numpy as np m@30: import pandas as pd m@30: import pickle m@30: import os m@30: m@30: import scripts.utils as utils m@30: m@30: m@30: def test_get_outliers(): m@30: np.random.seed(1) m@30: X = np.random.randn(100, 3) m@30: # create outliers by shifting the entries of the last 5 samples m@30: X[-5:, :] = X[-5:, :] + 10 m@30: Y = np.concatenate([np.repeat('a', 95), np.repeat('b', 5)]) m@30: threshold, y_pred, MD = utils.get_outliers(X) m@30: # expect that items from country 'b' are detected as outliers m@30: assert np.array_equal(y_pred[-5:], np.ones(5)) m@30: m@30: m@30: def test_get_outliers(): m@30: np.random.seed(1) m@30: X = np.random.randn(100, 3) m@30: # create outliers by shifting the entries of the last 5 samples m@30: X[-5:, :] = X[-5:, :] + 10 m@30: Y = np.concatenate([np.repeat('a', 95), np.repeat('b', 5)]) m@30: threshold, y_pred, MD = utils.get_outliers_Mahal(X) m@30: # expect that items from country 'b' are detected as outliers m@30: assert np.array_equal(y_pred[-5:], np.ones(5)) m@30: m@30: m@30: def test_pca_data(): m@30: np.random.seed(1) m@30: X = np.random.randn(100, 3) m@30: X[-5:, :] = X[-5:, :] + 10 m@30: X_pca, n_pc = utils.pca_data(X, min_variance=0.8) m@30: assert n_pc < X.shape[1] m@30: m@30: m@30: def test_get_local_outliers_from_neighbors_dict(): m@30: np.random.seed(1) m@30: X = np.random.randn(100, 3) m@30: n_outliers = 3 m@30: X[-n_outliers:, :] = X[-n_outliers:, :] + 10 m@30: Y = np.concatenate([np.repeat('a', 20), np.repeat('b', 20), np.repeat('c', 20), m@30: np.repeat('k', 20), np.repeat('l', 20)]) m@30: w_dict = {'a': ['b', 'c'], 'b': ['a', 'c'], 'c': ['b', 'a'], 'k': ['l'], 'l':['k']} m@30: spatial_outliers = utils.get_local_outliers_from_neighbors_dict(X, Y, w_dict) m@30: # last n samples of 'l' country must be outliers m@30: assert np.array_equal(spatial_outliers[-1][3][-n_outliers:], np.ones(n_outliers)) m@30: m@30: m@30: def test_best_n_clusters_silhouette(): m@30: np.random.seed(1) m@30: X = np.random.randn(100, 3) m@30: X[:30, :] = X[:30, :] + 10 m@30: X[-30:, :] = X[-30:, :] + 20 m@30: bestncl, _ = utils.best_n_clusters_silhouette(X, max_ncl=10) m@30: assert bestncl == 3 m@30: