Maria@18
|
1 # -*- coding: utf-8 -*-
|
Maria@18
|
2 """
|
Maria@18
|
3 Created on Fri Sep 1 19:11:52 2017
|
Maria@18
|
4
|
Maria@18
|
5 @author: mariapanteli
|
Maria@18
|
6 """
|
Maria@18
|
7
|
Maria@18
|
8 import pytest
|
Maria@18
|
9
|
Maria@18
|
10 import numpy as np
|
Maria@18
|
11 import pandas as pd
|
m@20
|
12 import pickle
|
m@20
|
13 import os
|
Maria@18
|
14
|
Maria@18
|
15 import scripts.outliers as outliers
|
Maria@18
|
16
|
Maria@18
|
17
|
Maria@18
|
18 def test_country_outlier_df():
|
Maria@18
|
19 counts = {'a':2, 'b':3}
|
Maria@18
|
20 labels = np.array(['a', 'a', 'a', 'a', 'b', 'b', 'b'])
|
Maria@18
|
21 df = outliers.country_outlier_df(counts, labels, normalize=True)
|
Maria@18
|
22 assert np.array_equal(df['Outliers'].get_values(), np.array([0.5, 1.0]))
|
Maria@18
|
23
|
Maria@18
|
24
|
Maria@18
|
25 def test_normalize_outlier_counts():
|
Maria@18
|
26 outlier_counts = {'a':2, 'b':3}
|
Maria@18
|
27 country_counts = {'a':4, 'b':3}
|
Maria@18
|
28 outlier_counts = outliers.normalize_outlier_counts(outlier_counts, country_counts)
|
Maria@18
|
29 outlier_counts_true = {'a':.5, 'b':1.}
|
Maria@18
|
30 assert np.array_equal(outlier_counts, outlier_counts_true)
|
Maria@18
|
31
|
Maria@18
|
32
|
Maria@18
|
33 def test_get_outliers_df():
|
m@20
|
34 np.random.seed(1)
|
m@20
|
35 X = np.random.randn(100, 3)
|
m@20
|
36 # create outliers by shifting the entries of the last 5 samples
|
m@20
|
37 X[-5:, :] = X[-5:, :] + 10
|
m@20
|
38 Y = np.concatenate([np.repeat('a', 95), np.repeat('b', 5)])
|
m@20
|
39 df, threshold, MD = outliers.get_outliers_df(X, Y)
|
m@20
|
40 # expect that items from country 'b' are detected as outliers
|
m@20
|
41 assert np.array_equal(df['Outliers'].get_values(), np.array([0., 1.0]))
|
m@20
|
42
|