forked from penn-cnt/RNS_STIM-MORPHOLOGY
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharray_unit_tests.py
291 lines (219 loc) · 9.23 KB
/
array_unit_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
import numpy as np
import pandas as PD
from tabulate import tabulate
class TestArrayProperties:
def __init__(self,in_Arr,verbose=True):
self.in_Arr = in_Arr
self.verbose = verbose
def test_dim(self,M,N):
"""
Confirm shape of array is correct.
Parameters
----------
M : Integer
Expected number of rows. -1 indicates that we should skip this check.
N : Integer
Exepected number of columns.
Returns
-------
Assertion.
"""
if M==-1:M=self.in_Arr.shape[0]
try:
assert(self.in_Arr.shape==(M,N))
if self.verbose: print("Array Shape Correct.")
return True
except AssertionError:
print("Input Array is of shape (%d,%d). Expected shape is (%d,%d)." %(self.in_Arr.shape[0],self.in_Arr.shape[1],M,N))
return False
def test_type(self,dtype):
"""
Confirm the type of the array is correct.
Parameters
----------
dtype : TYPE
Expected data type.
Returns
-------
Assertion.
"""
if isinstance(self.in_Arr,np.ndarray):
try:
assert(self.in_Arr.dtype==dtype)
if self.verbose: print("Array Type Correct.")
return True
except AssertionError:
print("Input Array is of type %s. Expected type is %s." %(self.in_Arr.dtype,dtype))
return False
elif isinstance(self.in_Arr,PD.DataFrame):
try:
assert((self.in_Arr.dtypes.values==dtype).all())
if self.verbose: print("Dataframe Type Correct.")
return True
except AssertionError:
print("Dataframe has a mismatched type to expected input.")
return False
def test_inf(self):
"""
Check for inf.
Returns
-------
None.
"""
if isinstance(self.in_Arr,np.ndarray):
try:
assert(np.isinf(self.in_Arr).any()==False)
if self.verbose: print("No INFs found.")
return True
except AssertionError:
print("Infinity found within input array.")
return False
elif isinstance(self.in_Arr,PD.DataFrame):
try:
assert(np.isinf(self.in_Arr.values).any()==False)
if self.verbose: print("No INFs found.")
return True
except AssertionError:
print("Infinity found within input array.")
return False
def test_nan(self):
"""
Check for nan.
Returns
-------
None.
"""
if isinstance(self.in_Arr,np.ndarray):
try:
assert(np.isnan(self.in_Arr).any()==False)
if self.verbose: print("No NaNs found.")
return True
except AssertionError:
print("NaNs found within input array.")
return False
elif isinstance(self.in_Arr,PD.DataFrame):
try:
assert(np.isnan(self.in_Arr.values).any()==False)
if self.verbose: print("No NaNs found.")
return True
except AssertionError:
print("NaNs found within input array.")
return False
class TestArraySimilarity:
def __init__(self,in_Arr,ref_Arr):
self.in_Arr = in_Arr
self.ref_Arr = ref_Arr
def deviations(self):
"""
Calculate deviations between input and reference array structure.
NOTE: Due to differences in libraries and seeds, exact matches may not be expected.
This is to provide the user with a quick approximation that their results are within
some bound of confidence.
Returns
-------
On-screen display of statistics.
"""
if isinstance(self.in_Arr,np.ndarray):
print("Array comparison")
DF = PD.DataFrame(index=['Minimum Expected','Maximum Expected','Median Expected', 'Median Absolute Deviation'],columns=np.arange(self.ref_Arr.shape[1]))
DF.loc['Minimum Expected'] = [np.min(self.ref_Arr[:,idx]) for idx in range(self.ref_Arr.shape[1])]
DF.loc['Maximum Expected'] = [np.max(self.ref_Arr[:,idx]) for idx in range(self.ref_Arr.shape[1])]
DF.loc['Median Expected'] = [np.median(self.ref_Arr[:,idx]) for idx in range(self.ref_Arr.shape[1])]
DF.loc['Median Absolute Deviation'] = [np.median(np.fabs(self.ref_Arr[:,idx]-self.in_Arr[:,idx])) for idx in range(self.ref_Arr.shape[1])]
print(tabulate(DF,headers='keys',tablefmt='psql'))
if isinstance(self.in_Arr,PD.DataFrame):
print("Dataframe comparison")
numeric_columns = self.ref_Arr.select_dtypes(include=np.number).columns.tolist()
categorical_columns = np.setdiff1d(self.ref_Arr.columns,numeric_columns)
DF = PD.DataFrame(index=['Minimum Expected','Maximum Expected','Median Expected', 'Median Absolute Deviation'],columns=numeric_columns)
DF.loc['Minimum Expected'] = [np.min(self.ref_Arr.values[:,idx]) for idx in numeric_columns]
DF.loc['Maximum Expected'] = [np.max(self.ref_Arr.values[:,idx]) for idx in numeric_columns]
DF.loc['Median Expected'] = [np.median(self.ref_Arr.values[:,idx]) for idx in numeric_columns]
DF.loc['Median Absolute Deviation'] = [np.median(np.fabs((self.ref_Arr.values[:,idx]-self.in_Arr.values[:,idx]))) for idx in numeric_columns]
print("Numerical Columns:")
print(tabulate(DF,headers='keys',tablefmt='psql'))
DF = PD.DataFrame(index=['Percent Match'],columns=categorical_columns)
DF.loc['Percent Match'] = [100*(self.ref_Arr.values[:,idx]==self.in_Arr.values[:,idx]).sum()/self.ref_Arr.values[:,idx].size for idx in categorical_columns]
print("Categorical Columns:")
print(tabulate(DF,headers='keys',tablefmt='psql'))
class TestTransformation:
def __init__(self,in_Arr):
self.in_Arr = in_Arr
def array_bounding(self,arr_min,arr_max):
"""
Assert if the array exists within expected bounds.
Parameters
----------
arr_min : numeric type
Minimum expected value.
arr_max : numeric type
Maximum expected value.
Returns
-------
Assertion.
"""
imin = np.amin(self.in_Arr)
imax = np.amax(self.in_Arr)
try:
assert(imin>=arr_min)
except AssertionError:
print("Input array minimum of %3.2e greater than expected minimum %3.2e" %(imin,arr_min))
try:
assert(imin<=arr_max)
except AssertionError:
print("Input array maximum of %3.2e greater than expected maximum %3.2e" %(imax,arr_max))
if __name__ == '__main__':
np.random.seed(42)
# Create dummy arrays and dataframe for unit test development
M = 40
N = 20
arr1 = 100*np.random.random(size=(M,N))
arr2 = np.random.normal(arr1)
arr3 = np.copy(arr2)
arr4 = np.copy(arr2)
arr3[0,0] = np.nan
arr4[0,0] = np.inf
DF1 = PD.DataFrame(arr1,columns=np.arange(N))
DF2 = PD.DataFrame(arr2,columns=np.arange(N))
DF3 = PD.DataFrame(arr3,columns=np.arange(N))
DF4 = PD.DataFrame(arr4,columns=np.arange(N))
# Change typing as needed
DF3 = DF2.copy().apply(np.floor)
DF3.iloc[:,0] = PD.to_numeric(DF3.values[:,0],downcast='integer')
# Some reference variables for tests
min1 = np.amin(arr1)
min2 = np.amin(arr2)
max1 = np.amax(arr1)
max2 = np.amax(arr2)
dtype = arr1.dtype
dtypes = DF1.dtypes
### Run each unit test. When applicable, pair-wise, with success then failure.
# Dimension test
TAP = TestArrayProperties(arr1)
TAP.test_dim(M, N) # Success
TAP.test_dim(M+1,N+1) # Failure
print("===")
# Type test
TAP.test_type(dtype)
TAP.test_type('str')
print("===")
TAP = TestArrayProperties(DF1)
TAP.test_type(dtypes)
TAP = TestArrayProperties(DF3)
TAP.test_type(dtypes)
print("===")
# Undefined type tests
TAP = TestArrayProperties(arr1)
TAP.test_nan()
TAP = TestArrayProperties(arr3)
TAP.test_nan()
print("===")
TAP = TestArrayProperties(arr1)
TAP.test_inf()
TAP = TestArrayProperties(arr4)
TAP.test_inf()
# Dataframe comparison
TAS = TestArraySimilarity(arr1, arr2)
TAS.deviations()
TAS = TestArraySimilarity(DF1, DF2)
TAS.deviations()