Maria@18
|
1 # -*- coding: utf-8 -*-
|
Maria@18
|
2 """
|
Maria@18
|
3 Created on Fri Sep 1 19:11:52 2017
|
Maria@18
|
4
|
Maria@18
|
5 @author: mariapanteli
|
Maria@18
|
6 """
|
Maria@18
|
7
|
Maria@18
|
8 import pytest
|
Maria@18
|
9
|
Maria@18
|
10 import numpy as np
|
Maria@18
|
11
|
Maria@18
|
12 import scripts.outliers as outliers
|
Maria@18
|
13
|
Maria@18
|
14
|
Maria@18
|
15 def test_country_outlier_df():
|
Maria@18
|
16 counts = {'a':2, 'b':3}
|
Maria@18
|
17 labels = np.array(['a', 'a', 'a', 'a', 'b', 'b', 'b'])
|
Maria@18
|
18 df = outliers.country_outlier_df(counts, labels, normalize=True)
|
Maria@18
|
19 assert np.array_equal(df['Outliers'].get_values(), np.array([0.5, 1.0]))
|
Maria@18
|
20
|
Maria@18
|
21
|
Maria@18
|
22 def test_normalize_outlier_counts():
|
Maria@18
|
23 outlier_counts = {'a':2, 'b':3}
|
Maria@18
|
24 country_counts = {'a':4, 'b':3}
|
Maria@18
|
25 outlier_counts = outliers.normalize_outlier_counts(outlier_counts, country_counts)
|
Maria@18
|
26 outlier_counts_true = {'a':.5, 'b':1.}
|
Maria@18
|
27 assert np.array_equal(outlier_counts, outlier_counts_true)
|
Maria@18
|
28
|
Maria@18
|
29
|
Maria@18
|
30 def test_get_outliers_df():
|
m@20
|
31 np.random.seed(1)
|
m@20
|
32 X = np.random.randn(100, 3)
|
m@20
|
33 # create outliers by shifting the entries of the last 5 samples
|
m@20
|
34 X[-5:, :] = X[-5:, :] + 10
|
m@20
|
35 Y = np.concatenate([np.repeat('a', 95), np.repeat('b', 5)])
|
m@20
|
36 df, threshold, MD = outliers.get_outliers_df(X, Y)
|
m@20
|
37 # expect that items from country 'b' are detected as outliers
|
m@20
|
38 assert np.array_equal(df['Outliers'].get_values(), np.array([0., 1.0]))
|
m@20
|
39
|