-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlpcmci.py
3224 lines (2358 loc) · 149 KB
/
lpcmci.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import numpy as np
from itertools import product, combinations
import os
class LPCMCI():
r"""
This class implements the LPCMCI algorithm for constraint-based causal discovery on stationary times series with latent confounders and without selection variables, which we introduce in the main text of this submission.
Parameters passed to the constructor:
- dataframe:
Tigramite dataframe object that contains the the time series dataset \bold{X}
- cond_ind_test:
A conditional independence test object that specifies which conditional independence test CI is to be used
Parameters passed to self.run_lpcmci():
Note: Not all parameters are used for the simulation studies. Some are temporary and might be removed in future versions
- tau_max:
The maximum considered time lag tau_max
- pc_alpha:
The significance level \alpha of conditional independence tests
- n_preliminary_iterations:
Determines the number of iterations in the preliminary phase of LPCMCI. In the paper this corresponds to the 'k' in LPCMCI(k)
- max_cond_px:
Consider a pair of variables (X^i_{t-\tau}, X^j_t) with \tau > 0. In Algorithm S2 (here this is self._run_ancestral_removal_phase()), the algorithm does not test for conditional independence given subsets of apds_t(X^i_{t-\tau}, X^j_t, C(G)) of cardinality higher than max_cond_px. In Algorithm S3 (here this is self._run_non_ancestral_removal_phase()), the algorithm does not test for conditional independence given subsets of napds_t(X^i_{t-\tau}, X^j_t, C(G)) of cardinality higher than max_cond_px.
- max_p_global:
Restricts all conditional independence tests to conditioning sets with cardinality smaller or equal to max_p_global
- max_p_non_ancestral:
Restricts all conditional independence tests in the second removal phase (here this is self._run_dsep_removal_phase()) to conditioning sets with cardinality smaller or equal to max_p_global
- max_q_global:
For each ordered pair (X^i_{t-\tau}, X^j_t) of adjacent variables and for each cardinality of the conditioning sets test at most max_q_global many conditioning sets (when summing over all tested cardinalities more than max_q_global tests may be made)
- max_pds_set:
In Algorithm S3 (here this is self._run_non_ancestral_removal_phase()), the algorithm tests for conditional independence given subsets of the relevant napds_t sets. If for a given link the set napds_t(X^j_t, X^i_{t-\tau}, C(G)) has more than max_pds_set many elements (or, if the link is also tested in the opposite directed, if napds_t(X^i_{t-\tau}, X^j_t, C(G)) has more than max_pds_set elements), this link is not tested.
- prelim_with_collider_rules:
If True: As in pseudocode
If False: Line 22 of Algorithm S2 is replaced by line 18 of Algorithm S2 when Algorithm S2 is called from the preliminary phase (not in the last applicatin of Algorithm S2 directly before Algorithm S3 is applied)
- parents_of_lagged:
If True: As in pseudocode
If False: The default conditioning set is pa(X^j_t, C(G)) rather than pa({X^j_t, X^i_{t-\tau}, C(G)) for tau > 0
- prelim_only:
If True, stop after the preliminary phase. Can be used for detailed performance analysis
- break_once_separated:
If True: As in pseudocode
If False: The break commands are removed from Algorithms S2 and S3
- no_non_ancestral_phase:
If True, do not execute Algorithm S3. Can be used for detailed performance analysis
- use_a_pds_t_for_majority:
If True: As in pseudocode
If False: The search for separating sets instructed by the majority rule is made given subsets adj(X^j_t, C(G)) rather than subsets of apds_t(X^j_t, X^i_{t-\tau}, C(G))
- orient_contemp:
If orient_contemp == 1: As in pseudocode of Algorithm S2
If orient_contemp == 2: Also orient contemporaneous links in line 18 of Algorithm S2
If orient_comtemp == 0: Also not orient contemporaneous links in line 22 of Algorithm S2
- update_middle_marks:
If True: As in pseudoce of Algorithms S2 and S3
If False: The MMR rule is not applied
- prelim_rules:
If prelim_rules == 1: As in pseudocode of Algorithm S2
If prelim_rules == 0: Exclude rules R9^prime and R10^\prime from line 18 in Algorithm S2
- fix_all_edges_before_final_orientation:
When one of max_p_global, max_p_non_ancestral, max_q_global or max_pds_set is not np.inf, the algorithm may terminate although not all middle marks are empty. All orientation rules are nevertheless sound, since the rules always check for the appropriate middle marks. If fix_all_edges_before_final_orientation is True, all middle marks are set to the empty middle mark by force, followed by another application of the rules.
- auto_first:
If True: As in pseudcode of Algorithms S2 and S3
If False: Autodependency links are not prioritized even before contemporaneous links
- remember_only_parents:
If True: As in pseudocode of Algorithm 1
If False: If X^i_{t-\tau} has been marked as ancestor of X^j_t at any point of a preliminary iteration but the link between X^i_{t-\tau} and X^j_t was removed later, the link is nevertheless initialized with a tail at X^i_{t-\tau} in the re-initialization
- no_apr:
If no_apr == 0: As in pseudcode of Algorithms S2 and S3
If no_apr == 1: The APR is not applied by Algorithm S2, except in line 22 of its last call directly before the call of Algorithm S3
If no_apr == 2: The APR is never applied
- verbosity:
Controls the verbose output self.run_lpcmci() and the function it calls.
Return value of self.run_lpcmci():
The estimated graph in form of a link matrix. This is a numpy array of shape (self.N, self.N, self.tau_max + 1), where the entry array[i, j, \tau] is a string that visualizes the estimated link from X^i_{i-\tau} to X^j_t. For example, if array[0, 2, 1] = 'o->', then the estimated graph contains the link X^i_{t-1} o-> X^j_t. This numpy array is also saved as instance attribute self.graph. Note that self.N is the number of observed time series and self.tau_max the maximal considered time lag.
A note on middle marks:
For convenience (to have strings of the same lengths) we here internally denote the empty middle mark by '-'. For post-processing purposes all middle marks are set to the empty middle mark (here '-') in line 224 (there can be non-empty middle marks only when one of max_p_global, max_p_non_ancestral, max_q_global or max_pds_set is not np.inf), but if verbosity >= 1 a graph with the middle marks will be printed out before.
A note on wildcards:
The middle mark wildcard \ast and the edge mark wildcard are here represented as *, the edge mark wildcard \star as +
"""
def __init__(self, dataframe, cond_ind_test):
"""Class constructor. Store:
i) data
ii) conditional independence test object
iii) some instance attributes"""
# Save the time series data that the algorithm operates on
self.dataframe = dataframe
# Set the conditional independence test to be used
self.cond_ind_test = cond_ind_test
self.cond_ind_test.set_dataframe(self.dataframe)
# Store the shape of the data in the T and N variables
self.T, self.N = self.dataframe.values.shape
def run_lpcmci(self,
tau_max = 1,
pc_alpha = 0.05,
n_preliminary_iterations = 1,
max_cond_px = 0,
max_p_global = np.inf,
max_p_non_ancestral = np.inf,
max_q_global = np.inf,
max_pds_set = np.inf,
prelim_with_collider_rules = True,
parents_of_lagged = True,
prelim_only = False,
break_once_separated = True,
no_non_ancestral_phase = False,
use_a_pds_t_for_majority = True,
orient_contemp = 1,
update_middle_marks = True,
prelim_rules = 1,
fix_all_edges_before_final_orientation = True,
auto_first = True,
remember_only_parents = True,
no_apr = 0,
verbosity = 0):
"""Run LPCMCI on the dataset and with the conditional independence test passed to the class constructor and with the options passed to this function."""
#######################################################################################################################
#######################################################################################################################
# Step 0: Initializations
self._initialize(tau_max, pc_alpha, n_preliminary_iterations, max_cond_px, max_p_global, max_p_non_ancestral, max_q_global, max_pds_set, prelim_with_collider_rules, parents_of_lagged, prelim_only, break_once_separated, no_non_ancestral_phase, use_a_pds_t_for_majority, orient_contemp, update_middle_marks, prelim_rules, fix_all_edges_before_final_orientation, auto_first, remember_only_parents, no_apr, verbosity)
#######################################################################################################################
#######################################################################################################################
# Step 1: Preliminary phases
for i in range(self.n_preliminary_iterations):
# Verbose output
if self.verbosity >= 1:
print("\n=======================================================")
print("=======================================================")
print("Starting preliminary phase {:2}".format(i + 1))
# In the preliminary phases, auto-lag links are tested with first priority. Among the auto-lag links, different lags are not distinguished. All other links have lower priority, among which those which shorter lags have higher priority
self._run_ancestral_removal_phase(prelim = True)
# Verbose output
if self.verbosity >= 1:
print("\nPreliminary phase {:2} complete".format(i + 1))
print("\nGraph:\n--------------------------------")
self._print_graph_dict()
print("--------------------------------")
# When the option self.prelim_only is chosen, do not re-initialize in the last iteration
if i == self.n_preliminary_iterations - 1 and self.prelim_only:
break
# Remember ancestorships, re-initialize and re-apply the remembered ancestorships
def_ancs = self.def_ancs
if self.remember_only_parents:
smaller_def_ancs = dict()
for j in range(self.N):
smaller_def_ancs[j] = {(i, lag_i) for (i, lag_i) in def_ancs[j] if self._get_link((i, lag_i), (j, 0)) != ""}
def_ancs = smaller_def_ancs
self._initialize_run_memory()
self._apply_new_ancestral_information(None, def_ancs)
#######################################################################################################################
#######################################################################################################################
# Step 2: Full ancestral phase
if not self.prelim_only:
# Verbose output
if self.verbosity >= 1:
print("\n=======================================================")
print("=======================================================")
print("Starting final ancestral phase")
# In the standard ancestral phase, links are prioritized in the same as in the preliminary phases
self._run_ancestral_removal_phase()
# Verbose output
if self.verbosity >= 1:
print("\nFinal ancestral phase complete")
print("\nGraph:\n--------------------------------")
self._print_graph_dict()
print("--------------------------------")
#######################################################################################################################
#######################################################################################################################
# Step 3: Non-ancestral phase
if (not self.prelim_only) and (not self.no_non_ancestral_phase):
# Verbose output
if self.verbosity >= 1:
print("\n=======================================================")
print("=======================================================")
print("Starting non-ancestral phase")
# In the non-ancestral phase, large lags are prioritized
self._run_non_ancestral_removal_phase()
# Verbose output
if self.verbosity >= 1:
print("\nNon-ancestral phase complete")
print("\nGraph:\n--------------------------------")
self._print_graph_dict()
print("--------------------------------")
if self.fix_all_edges_before_final_orientation:
self._fix_all_edges()
self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False)
#######################################################################################################################
#######################################################################################################################
# Verbose output
if self.verbosity >= 1:
print("\nLPCMCI has converged")
print("\nFinal graph:\n--------------------------------")
print("--------------------------------")
self._print_graph_dict()
print("--------------------------------")
print("--------------------------------\n")
print("Max search set: {}".format(self.max_na_search_set_found))
print("Max na-pds set: {}\n".format(self.max_na_pds_set_found))
# Post processing
self._fix_all_edges()
self.graph = self._dict2graph()
self.val_min_matrix = self._dict_to_matrix(self.val_min, self.tau_max, self.N, default = 0)
self.cardinality_matrix = self._dict_to_matrix(self.max_cardinality, self.tau_max, self.N, default = 0)
# Return the estimated graph
return self.graph
def _initialize(self, tau_max, pc_alpha, n_preliminary_iterations, max_cond_px, max_p_global, max_p_non_ancestral, max_q_global, max_pds_set, prelim_with_collider_rules, parents_of_lagged, prelim_only, break_once_separated, no_non_ancestral_phase, use_a_pds_t_for_majority, orient_contemp, update_middle_marks, prelim_rules, fix_all_edges_before_final_orientation, auto_first, remember_only_parents, no_apr, verbosity):
"""Function for
i) saving the arguments passed to self.run_lpcmci() as instance attributes
ii) initializing various memory variables for storing the current graph, sepsets etc.
"""
# Save the arguments passed to self.run_lpcmci()
self.tau_max = tau_max
self.pc_alpha = pc_alpha
self.n_preliminary_iterations = n_preliminary_iterations
self.max_cond_px = max_cond_px
self.max_p_global = max_p_global
self.max_p_non_ancestral = max_p_non_ancestral
self.max_q_global = max_q_global
self.max_pds_set = max_pds_set
self.prelim_with_collider_rules = prelim_with_collider_rules
self.parents_of_lagged = parents_of_lagged
self.prelim_only = prelim_only
self.break_once_separated = break_once_separated
self.no_non_ancestral_phase = no_non_ancestral_phase
self.use_a_pds_t_for_majority = use_a_pds_t_for_majority
self.orient_contemp = orient_contemp
self.update_middle_marks = update_middle_marks
self.prelim_rules = prelim_rules
self.fix_all_edges_before_final_orientation = fix_all_edges_before_final_orientation
self.auto_first = auto_first
self.remember_only_parents = remember_only_parents
self.no_apr = no_apr
self.verbosity = verbosity
# Rules to be executed at the end of a preliminary phase
self._rules_prelim_final= [["APR"], ["ER-08"], ["ER-02"], ["ER-01"], ["ER-09"], ["ER-10"]]
# Rules to be executed within the while loop of a preliminary phase
self._rules_prelim = [["APR"], ["ER-08"], ["ER-02"], ["ER-01"]] if self.prelim_rules == 0 else self._rules_prelim_final
# Full list of all rules
self._rules_all = [["APR"], ["ER-08"], ["ER-02"], ["ER-01"], ["ER-00-d"], ["ER-00-c"], ["ER-03"], ["R-04"], ["ER-09"], ["ER-10"], ["ER-00-b"], ["ER-00-a"]]
# Initialize various memory variables for storing the current graph, sepsets etc.
self._initialize_run_memory()
# Return
return True
def _initialize_run_memory(self):
"""Function for initializing various memory variables for storing the current graph, sepsets etc."""
# Initialize the nested dictionary for storing the current graph.
# Syntax: self.graph_dict[j][(i, -tau)] gives the string representing the link from X^i_{t-tau} to X^j_t
self.graph_dict = {}
for j in range(self.N):
self.graph_dict[j] = {(i, 0): "o?o" for i in range(self.N) if j != i}
if self.max_cond_px == 0 and self.update_middle_marks:
self.graph_dict[j].update({(i, -tau): "oL>" for i in range(self.N) for tau in range(1, self.tau_max + 1)})
else:
self.graph_dict[j].update({(i, -tau): "o?>" for i in range(self.N) for tau in range(1, self.tau_max + 1)})
# Initialize the nested dictionary for storing separating sets
# Syntax: self.sepsets[j][(i, -tau)] stores separating sets of X^i_{t-tau} to X^j_t. For tau = 0, i < j.
self.sepsets = {j: {(i, -tau): set() for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)}
# Initialize dictionaries for storing known ancestorships, non-ancestorships, and ambiguous ancestorships
# Syntax: self.def_ancs[j] contains the set of all known ancestors of X^j_t. Equivalently for the others
self.def_ancs = {j: set() for j in range(self.N)}
self.def_non_ancs = {j: set() for j in range(self.N)}
self.ambiguous_ancestorships = {j: set() for j in range(self.N)}
# Initialize nested dictionaries for saving the minimum test statistic among all conditional independence tests of a given pair of variables, the maximum p-values, as well as the maximal cardinality of the known separating sets.
# Syntax: As for self.sepsets
self.val_min = {j: {(i, -tau): float("inf") for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)}
self.pval_max = {j: {(i, -tau): 0 for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)}
self.max_cardinality = {j: {(i, -tau): 0 for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)}
# Initialize a nested dictionary for caching na-pds-sets
# Syntax: As for self.sepsets
self._na_pds_t = {(j, -tau_j): {} for j in range(self.N) for tau_j in range(self.tau_max + 1)}
# Initialize a variable for remembering the maximal cardinality among all calculated na-pds-sets, as well as the maximial cardinality of any search set in the non-ancestral phase
self.max_na_search_set_found = -1
self.max_na_pds_set_found = -1
# Return
return True
def _run_ancestral_removal_phase(self, prelim = False):
"""Run an ancestral edge removal phase, this is Algorithm S2"""
# Iterate until convergence
# p_pc is the cardinality of the non-default part of the conditioning sets. The full conditioning sets may have higher cardinality due to default conditioning on known parents
p_pc = 0
while_broken = False
while True:
##########################################################################################################
### Run the next removal iteration #######################################################################
# Force-quit while leep when p_pc exceeds the limit put by self.max_p_global
if p_pc > self.max_p_global:
while_broken = True
break
# Verbose output
if self.verbosity >= 1:
if p_pc == 0:
print("\nStarting test phase\n")
print("p = {}".format(p_pc))
# Variables to memorize the occurence and absence of certain events in the below edge removal phase
has_converged = True
any_removal = False
# Remember edges for which the separating set search is aborted due to max_q_global
self._cannot_mark = set()
# Generate the prioritized link list
if self.auto_first:
link_list = [product(range(self.N), range(-self.tau_max, 0))]
link_list = link_list + [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)]
else:
link_list = [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)]
# Run through all elements of link_list. Each element of link_list specifies ordered pairs of variables whose connecting edges are then subjected to conditional independence tests
for links in link_list:
# Memory variables for storing edges that are marked for removal
to_remove = {j: {} for j in range(self.N)}
# Iterate through all edges specified by links. Note that since the variables paris are ordered, (A, B) and (B, A) are seen as different pairs.
for pair in links:
# Decode the elements of links into pairs of variables (X, Y)
if len(pair) == 2:
X = (pair[0], pair[1])
Y = (pair[0], 0)
else:
X = (pair[0], pair[2])
Y = (pair[1], 0)
# Do not test auto-links twice
if self.auto_first and X[0] == Y[0]:
continue
######################################################################################################
### Exclusion of links ###############################################################################
# Exclude the current link if ...
# ... X = Y
if X[1] == 0 and X[0] == Y[0]:
continue
# ... X > Y
if self._is_smaller(Y, X):
continue
# Get the current link
link = self._get_link(X, Y)
# Moreover exclude the current link if ...
# ... X and Y are not adjacent anymore
if link == "":
continue
# ... the link is definitely part of G
if link[1] == "-":
continue
######################################################################################################
### Determine which tests the link will be subjected to ###########################################
# Depending on the middle mark on the link between X and Y as well as on some global options, we may not need to search for separating set among the potential parents of Y and/or X.
test_Y = True if link[1] not in ["R", "!"] else False
test_X = True if (link[1] not in ["L", "!"] and (X[1] == 0 or (self.max_cond_px > 0 and self.max_cond_px >= p_pc))) else False
######################################################################################################
### Preparation PC search set and default conditioning set ###########################################
if test_Y:
S_default_YX, S_search_YX = self._get_default_and_search_sets(Y, X, "ancestral")
if test_X:
S_default_XY, S_search_XY = self._get_default_and_search_sets(X, Y, "ancestral")
######################################################################################################
### Middle mark updates ##############################################################################
any_middle_mark_update = False
# Note: Updating the middle marks here, within the for-loop, does not spoil order independence. In fact, this update does not influence the flow of the for-loop at all
if test_Y:
if len(S_search_YX) < p_pc:
# Note that X is smaller than Y. If S_search_YX exists and has fewer than p elements, X and Y are not d-separated by S \subset Par(Y). Therefore, the middle mark on the edge between X and Y can be updated with 'R'
if (X, Y) not in self._cannot_mark:
self._apply_middle_mark(X, Y, "R")
else:
# Since S_search_YX exists and has hat least p_pc elements, the link between X and Y will be subjected to conditional independenc tests. Therefore, the algorithm has not converged yet.
has_converged = False
if test_X:
if len(S_search_XY) < p_pc:
# Note that X is smaller than Y. If S_search_XY exists and has fewer than p elements, X and Y are not d-separated by S \subset Par(X). Therefore, the middle mark on the edge between X and Y can be updated with 'L'
if (X, Y) not in self._cannot_mark:
self._apply_middle_mark(X, Y, "L")
else:
# Since S_search_YX exists and has hat least p_pc elements, the link between X and Y will be subjected to conditional independenc tests. Therefore, the algorithm has not converged yet.
has_converged = False
######################################################################################################
######################################################################################################
### Tests for conditional independence ###############################################################
# If option self.break_once_separated is True, the below for-loops will be broken immediately once a separating set has been found. In conjunction with the modified majority rule employed for orienting links, order independence (with respect to the index 'i' on X^i_t) then requires that the tested conditioning sets are ordered in an order independent way. Here, the minimal effect size of previous conditional independence tests serve as an order independent order criterion.
if self.break_once_separated or not np.isinf(self.max_q_global):
if test_Y:
S_search_YX = self._sort_search_set(S_search_YX, Y)
if test_X:
S_search_XY = self._sort_search_set(S_search_XY, X)
# Run through all cardinality p_pc subsets of S_search_YX
if test_Y:
q_count = 0
for S_pc in combinations(S_search_YX, p_pc):
q_count = q_count + 1
if q_count > self.max_q_global:
self._cannot_mark.add((X, Y))
break
# Build the full conditioning set
Z = set(S_pc)
Z = Z.union(S_default_YX)
# Test conditional independence of X and Y given Z
val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
if self.verbosity >= 2:
print("ANC(Y): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" %
(X, Y, ' '.join([str(z) for z in S_default_YX]), ' '.join([str(z) for z in S_pc]), val, pval))
# Accordingly update dictionaries that keep track of the test statistic, the corresponding p-value and the cardinality of conditioning sets
self._update_val_min(X, Y, val)
self._update_pval_max(X, Y, pval)
self._update_cardinality(X, Y, len(Z))
# Check whether test result was significant
if pval > self.pc_alpha:
# Mark the edge from X to Y for removal and save sepset
to_remove[Y[0]][X] = True
self._save_sepset(X, Y, (frozenset(Z), "wm"))
# Verbose output
if self.verbosity >= 1:
print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_YX))
if self.break_once_separated:
break
# Run through all cardinality p_pc subsets of S_search_XY
if test_X:
q_count = 0
for S_pc in combinations(S_search_XY, p_pc):
q_count = q_count + 1
if q_count > self.max_q_global:
self._cannot_mark.add((X, Y))
break
# Build the full conditioning set
Z = set(S_pc)
Z = Z.union(S_default_XY)
# Test conditional independence of X and Y given Z
val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
if self.verbosity >= 2:
print("ANC(X): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" %
(X, Y, ' '.join([str(z) for z in S_default_XY]), ' '.join([str(z) for z in S_pc]), val, pval))
# Accordingly update dictionaries that keep track of the test statistic, the corresponding p-value and the cardinality of conditioning sets
self._update_val_min(X, Y, val)
self._update_pval_max(X, Y, pval)
self._update_cardinality(X, Y, len(Z))
# Check whether test result was significant
if pval > self.pc_alpha:
# Mark the edge from X to Y for removal and save sepset
to_remove[Y[0]][X] = True
self._save_sepset(X, Y, (frozenset(Z), "wm"))
# Verbose output
if self.verbosity >= 1:
print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_XY))
if self.break_once_separated:
break
# for pair in links
##########################################################################################################
### Remove edges marked for removal in to_remove #########################################################
# Run through all of the nested dictionary
for j in range(self.N):
for (i, lag_i) in to_remove[j].keys():
# Remember that at least one edge has been removed, remove the edge
any_removal = True
self._write_link((i, lag_i), (j, 0), "", verbosity = self.verbosity)
# end for links in link_list
# Verbose output
if self.verbosity >= 1:
print("\nTest phase complete")
##############################################################################################################
### Orientations and next step ###############################################################################
if any_removal:
# At least one edge was removed or at least one middle mark has been updated. Therefore: i) apply the restricted set of orientation rules, ii) restart the while loop at p_pc = 0, unless all edges have converged, then break the while loop
only_lagged = False if self.orient_contemp == 2 else True
any_update = self._run_orientation_phase(rule_list = self._rules_prelim, only_lagged = only_lagged)
# If the orientation phase made a non-trivial update, then restart the while loop. Else increase p_pc by one
if any_update:
if self.max_cond_px == 0 and self.update_middle_marks:
self._update_middle_marks()
p_pc = 0
else:
p_pc = p_pc + 1
else:
# The graph has not changed at all in this iteration of the while loop. Therefore, if all edges have converged, break the while loop. If at least one edge has not yet converged, increase p_pc by one.
if has_converged:
break
else:
p_pc = p_pc + 1
# end while True
##################################################################################################################
### Consistency test and middle mark update ######################################################################
# Run through the entire graph
for j in range(self.N):
for (i, lag_i) in self.graph_dict[j].keys():
X = (i, lag_i)
Y = (j, 0)
if self._is_smaller(Y, X):
continue
# Consider only those links that are still part G
link = self._get_link((i, lag_i), (j, 0))
if len(link) > 0:
# Consistency check
if not while_broken and (X, Y) not in self._cannot_mark:
assert link[1] != "?"
assert link[1] != "R" or (lag_i < 0 and (self.max_cond_px > 0 or not self.update_middle_marks))
# Update all middle marks to '!'
if link[1] not in ["-", "!"]:
self._write_link((i, lag_i), (j, 0), link[0] + "!" + link[2])
##################################################################################################################
### Final rule applications ######################################################################################
if not prelim or self.prelim_with_collider_rules:
if not prelim:
self.no_apr = self.no_apr - 1
any_update = self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False)
if self.max_cond_px == 0 and self.update_middle_marks and any_update:
self._update_middle_marks()
else:
only_lagged = False if self.orient_contemp >= 1 else True
any_update = self._run_orientation_phase(rule_list = self._rules_prelim_final, only_lagged = only_lagged)
if self.max_cond_px == 0 and self.update_middle_marks and any_update:
self._update_middle_marks()
# Return
return True
def _run_non_ancestral_removal_phase(self):
"""Run the non-ancestral edge removal phase, this is Algorithm S3"""
# Update of middle marks
self._update_middle_marks()
# This function initializeds self._graph_full_dict, a nested dictionary representing the graph including links that are forward in time. This will make the calculcation of na-pds-t sets easier.
self._initialize_full_graph()
# Iterate until convergence. Here, p_pc is the cardinality of the non-default part of the conditioning sets. The full conditioning sets may have higher cardinality due to default conditioning on known parents
p_pc = 0
while True:
##########################################################################################################
### Run the next removal iteration #######################################################################
# Force-quit while leep when p_pc exceeds the limit put by self.max_p_global or self.max_p_non_ancestral
if p_pc > self.max_p_global or p_pc > self.max_p_non_ancestral:
break
# Verbose output
if self.verbosity >= 1:
if p_pc == 0:
print("\nStarting test phase\n")
print("p = {}".format(p_pc))
# Variables to memorize the occurence and absence of certain events in the below edge removal phase
has_converged = True
any_removal = False
# Remember edges for which the separating set search is aborted due to max_q_global
self._cannot_mark = set()
# Generate the prioritized link list
if self.auto_first:
link_list = [product(range(self.N), range(-self.tau_max, 0))]
link_list = link_list + [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)]
else:
link_list = [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)]
# Run through all elements of link_list. Each element of link_list specifies ordered pairs of variables whose connecting edges are then subjected to conditional independence tests
for links in link_list:
# Memory variables for storing edges that are marked for removal
to_remove = {j: {} for j in range(self.N)}
# Iterate through all edges specified by links. Note that since the variables paris are ordered, (A, B) and (B, A) are seen as different pairs.
for pair in links:
if len(pair) == 2:
X = (pair[0], pair[1])
Y = (pair[0], 0)
else:
X = (pair[0], pair[2])
Y = (pair[1], 0)
# Do not test auto-links twice
if self.auto_first and X[0] == Y[0]:
continue
######################################################################################################
### Exclusion of links ###############################################################################
# Exclude the current link if ...
# ... X = Y
if X[1] == 0 and X[0] == Y[0]:
continue
# ... X > Y
if self._is_smaller(Y, X):
continue
# Get the current link
link = self._get_link(X, Y)
# Exclude the current link if ...
if link == "":
continue
# ... the link is definitely part of G
if link[1] == "-":
continue
######################################################################################################
### Determine which tests the link will be subjected to #############################################
# The algorithm always searches for separating sets in na-pds-t(Y, X). Depending on whether the X and Y are contemporaneous on some global options, the algorithm may also search for separating sets in na-pds-t(X, Y)
test_X = True if (X[1] == 0 or (self.max_cond_px > 0 and self.max_cond_px >= p_pc)) else False
######################################################################################################
### Preparation of default conditioning sets and PC search sets ######################################
# Verbose output
if self.verbosity >= 2:
print("_get_na_pds_t ")
S_default_YX, S_search_YX = self._get_default_and_search_sets(Y, X, "non-ancestral")
self.max_na_search_set_found = max(self.max_na_search_set_found, len(S_search_YX))
if test_X:
S_default_XY, S_search_XY = self._get_default_and_search_sets(X, Y, "non-ancestral")
self.max_na_search_set_found = max(self.max_na_search_set_found, len(S_search_XY))
# If the search set exceeds the specified bounds, do not test this link
if len(S_search_YX) > self.max_pds_set or (test_X and len(S_search_XY) > self.max_pds_set):
self._cannot_mark.add((X, Y))
continue
######################################################################################################
######################################################################################################
### Middle mark updates ##############################################################################
# Note: Updating the middle marks here, within the for-loop, does not spoil order independence. In fact, this update does not influence the flow of the for-loop at all
if link[1] == "!":
if len(S_search_YX) < p_pc or (test_X and len(S_search_XY) < p_pc):
# Mark the link from X to Y as converged, remember the fixation, then continue
if (X, Y) not in self._cannot_mark:
self._write_link(X, Y, link[0] + "-" + link[2], verbosity = self.verbosity)
continue
else:
has_converged = False
elif link[1] == "R":
if len(S_search_YX) < p_pc:
# Mark the link from X to Y as converged, remember the fixation, then continue
if (X, Y) not in self._cannot_mark:
self._write_link(X, Y, link[0] + "-" + link[2], verbosity = self.verbosity)
continue
elif (test_X and len(S_search_XY) >= p_pc):
has_converged = False
elif link[1] == "L":
if test_X and len(S_seach_XY) < p_pc:
# Mark the link from X to Y as converged, remember the fixation, then continue
if (X, Y) not in self._cannot_mark:
self._write_link(X, Y, link[0] + "-" + link[2], verbosity = self.verbosity)
continue
elif len(S_search_YX) >= p_pc:
has_converged = False
else:
if len(S_search_YX) < p_pc and (not test_X or len(S_search_XY) < p_pc):
# Mark the link from X to Y as converged, remember the fixation, then continue
if (X, Y) not in self._cannot_mark:
self._write_link(X, Y, link[0] + "-" + link[2], verbosity = self.verbosity)
continue
else:
has_converged = False
######################################################################################################
### Tests for conditional independence ###############################################################
# If option self.break_once_separated is True, the below for-loops will be broken immediately once a separating set has been found. In conjunction with the modified majority rule employed for orienting links, order independence (with respect to the index 'i' on X^i_t) then requires that the tested conditioning sets are ordered in an order independent way. Here, the minimal effect size of previous conditional independence tests serve as an order independent order criterion.
if self.break_once_separated or not np.isinf(self.max_q_global):
S_search_YX = self._sort_search_set(S_search_YX, Y)
if test_X:
S_search_XY = self._sort_search_set(S_search_XY, X)
# Verbose output
if self.verbosity >= 2:
print("for S_pc in combinations(S_search_YX, p_pc)")
# Run through all cardinality p_pc subsets of S_search_YX
q_count = 0
for S_pc in combinations(S_search_YX, p_pc):
q_count = q_count + 1
if q_count > self.max_q_global:
self._cannot_mark.add((X, Y))
break
# Build the full conditioning set
Z = set(S_pc)
Z = Z.union(S_default_YX)
# Test conditional independence of X and Y given Z
val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
if self.verbosity >= 2:
print("Non-ANC(Y): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" %
(X, Y, ' '.join([str(z) for z in S_default_YX]), ' '.join([str(z) for z in S_pc]), val, pval))
# Accordingly update dictionaries that keep track of the test statistic, the corresponding p-value and the cardinality of conditioning sets
self._update_val_min(X, Y, val)
self._update_pval_max(X, Y, pval)
self._update_cardinality(X, Y, len(Z))
# Check whether test result was significant
if pval > self.pc_alpha:
# Mark the edge from X to Y for removal and save sepset
to_remove[Y[0]][X] = True
self._save_sepset(X, Y, (frozenset(Z), "wm"))
# Verbose output
if self.verbosity >= 1:
print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_YX))
if self.break_once_separated:
break
if test_X:
# Verbose output
if self.verbosity >= 2:
print("for S_pc in combinations(S_search_XY, p_pc)")
# Run through all cardinality p_pc subsets of S_search_XY
q_count = 0
for S_pc in combinations(S_search_XY, p_pc):
q_count = q_count + 1
if q_count > self.max_q_global:
self._cannot_mark.add((X, Y))
break
# Build the full conditioning set
Z = set(S_pc)
Z = Z.union(S_default_XY)
# Test conditional independence of X and Y given Z
val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
if self.verbosity >= 2:
print("Non-ANC(X): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" %
(X, Y, ' '.join([str(z) for z in S_default_XY]), ' '.join([str(z) for z in S_pc]), val, pval))
# Accordingly update dictionaries that keep track of the test statistic, the corresponding p-value and the cardinality of conditioning sets
self._update_val_min(X, Y, val)
self._update_pval_max(X, Y, pval)
self._update_cardinality(X, Y, len(Z))
# Check whether test result was significant
if pval > self.pc_alpha:
# Mark the edge from X to Y for removal and save sepset
to_remove[Y[0]][X] = True
self._save_sepset(X, Y, (frozenset(Z), "wm"))
# Verbose output
if self.verbosity >= 1:
print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_YX))
if self.break_once_separated:
break
# end for links in link_list
##########################################################################################################
### Remove edges marked for removal in to_remove #########################################################
# Check whether there is any removal at all
any_removal_this = False
# Run through all of the nested dictionary
for j in range(self.N):
for (i, lag_i) in to_remove[j].keys():
# Remember that at least one edge has been removed, remove the edge
any_removal = True
any_removal_this = True
self._write_link((i, lag_i), (j, 0), "", verbosity = self.verbosity)
# If any_removal_this = True, we need to recalculate full graph dict
if any_removal_this:
self._initialize_full_graph()
self._na_pds_t = {(j, -tau_j): {} for j in range(self.N) for tau_j in range(self.tau_max + 1)}
# end for links in link_list
# Verbose output
if self.verbosity >= 1:
print("\nTest phase complete")
##############################################################################################################
### Orientations and next step ###############################################################################
if any_removal:
# At least one edge was removed or at least one middle mark has been updated. Therefore: i) apply the full set of orientation rules, ii) restart the while loop at p_pc = 0, unless all edges have converged, then break the while loop
any_update = self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False)
if any_update:
self._initialize_full_graph()
self._na_pds_t = {(j, -tau_j): {} for j in range(self.N) for tau_j in range(self.tau_max + 1)}
p_pc = 0
else:
p_pc = p_pc + 1
else:
# The graph has not changed at all in this iteration of the while loop. Therefore, if all edges have converged, break the while loop. If at least one edge has not yet converged, increase p_pc by one.
if has_converged:
break
else:
p_pc = p_pc + 1
# end while True
##################################################################################################################
### Final rule applications ######################################################################################
self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False)
# Return
return True
def _run_orientation_phase(self, rule_list, only_lagged = False):
"""Exhaustively apply the rules specified by rule_list, this is Algorithm S4"""
# Verbose output
if self.verbosity >= 1:
print("\nStarting orientation phase")
print("with rule list: ", rule_list)
# Remember whether this call to _run_orientation_phase has made any update to G
restarted_once = False
# Run through all priority levels of rule_list
idx = 0
while idx <= len(rule_list) - 1:
# Some rule require self._graph_full_dict. Therefore, it is initialized once the while loop (re)-starts at the first prioprity level
if idx == 0:
self._initialize_full_graph()
# Remember whether G will be updated with new useful information ('x' marks are considered not useful)
restart = False
###########################################################################################################
### Rule application ######################################################################################
# Get the current rules
current_rules = rule_list[idx]
# Prepare a list to remember marked orientations
to_orient = []
# Run through all current rules