Mercurial > hg > plosone_underreview
comparison tests/test_utils.py @ 30:e8084526f7e5 branch-tests
additional test functions
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Wed, 13 Sep 2017 19:57:49 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
29:6aa08c9c95e9 | 30:e8084526f7e5 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Created on Fri Sep 1 19:11:52 2017 | |
4 | |
5 @author: mariapanteli | |
6 """ | |
7 | |
8 import pytest | |
9 | |
10 import numpy as np | |
11 import pandas as pd | |
12 import pickle | |
13 import os | |
14 | |
15 import scripts.utils as utils | |
16 | |
17 | |
18 def test_get_outliers(): | |
19 np.random.seed(1) | |
20 X = np.random.randn(100, 3) | |
21 # create outliers by shifting the entries of the last 5 samples | |
22 X[-5:, :] = X[-5:, :] + 10 | |
23 Y = np.concatenate([np.repeat('a', 95), np.repeat('b', 5)]) | |
24 threshold, y_pred, MD = utils.get_outliers(X) | |
25 # expect that items from country 'b' are detected as outliers | |
26 assert np.array_equal(y_pred[-5:], np.ones(5)) | |
27 | |
28 | |
29 def test_get_outliers(): | |
30 np.random.seed(1) | |
31 X = np.random.randn(100, 3) | |
32 # create outliers by shifting the entries of the last 5 samples | |
33 X[-5:, :] = X[-5:, :] + 10 | |
34 Y = np.concatenate([np.repeat('a', 95), np.repeat('b', 5)]) | |
35 threshold, y_pred, MD = utils.get_outliers_Mahal(X) | |
36 # expect that items from country 'b' are detected as outliers | |
37 assert np.array_equal(y_pred[-5:], np.ones(5)) | |
38 | |
39 | |
40 def test_pca_data(): | |
41 np.random.seed(1) | |
42 X = np.random.randn(100, 3) | |
43 X[-5:, :] = X[-5:, :] + 10 | |
44 X_pca, n_pc = utils.pca_data(X, min_variance=0.8) | |
45 assert n_pc < X.shape[1] | |
46 | |
47 | |
48 def test_get_local_outliers_from_neighbors_dict(): | |
49 np.random.seed(1) | |
50 X = np.random.randn(100, 3) | |
51 n_outliers = 3 | |
52 X[-n_outliers:, :] = X[-n_outliers:, :] + 10 | |
53 Y = np.concatenate([np.repeat('a', 20), np.repeat('b', 20), np.repeat('c', 20), | |
54 np.repeat('k', 20), np.repeat('l', 20)]) | |
55 w_dict = {'a': ['b', 'c'], 'b': ['a', 'c'], 'c': ['b', 'a'], 'k': ['l'], 'l':['k']} | |
56 spatial_outliers = utils.get_local_outliers_from_neighbors_dict(X, Y, w_dict) | |
57 # last n samples of 'l' country must be outliers | |
58 assert np.array_equal(spatial_outliers[-1][3][-n_outliers:], np.ones(n_outliers)) | |
59 | |
60 | |
61 def test_best_n_clusters_silhouette(): | |
62 np.random.seed(1) | |
63 X = np.random.randn(100, 3) | |
64 X[:30, :] = X[:30, :] + 10 | |
65 X[-30:, :] = X[-30:, :] + 20 | |
66 bestncl, _ = utils.best_n_clusters_silhouette(X, max_ncl=10) | |
67 assert bestncl == 3 | |
68 |