-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreading.py
110 lines (99 loc) · 3.78 KB
/
reading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""Input/Output Module
Helper functions for reading and writing files.
"""
from collections import Counter
import numpy as np
def read_seed(filename, labels, dimensions, D):
"""Read seed from a file where each line has the seed for the corresponding node"""
p = None
D = np.array(D)
if filename is None:
return None
try:
p = float(filename)
except ValueError:
pass
if p is not None:
n = len(labels)
number_labels = len(set(labels))
if number_labels != dimensions:
raise ValueError(f'When using percentage seed, dimension space should equal number of labels. Labels: {number_labels}. Dimensions: {dimensions}.')
ch = np.random.rand(n) < p
idx = np.where(ch)[0]
train_examples = ch.sum()
s = np.zeros([n, number_labels])
s[ch] = -1
label_counter = Counter([labels[i] for i in idx])
for l in set(labels):
s[ch, l] /= (train_examples - label_counter[l])
for i in idx:
s[i, labels[i]] = 1 / label_counter[labels[i]]
s -= s.sum(axis=0) / len(s)
for i in range(dimensions):
si = s[:, i]
print(f'{i} {label_counter[i]:4d} {si[si < 0].sum():7.4f} {si[si > 0].sum():7.3f} {si.sum():7.4f}')
return s
with open(filename) as f:
s = np.array([[float(i) for i in line.split()] for line in f])
return s
def read_hypergraph(filename):
"""Read a hypergraph and return n, m and a list of participating nodes"""
with open(filename) as f:
node_weights = None
weights = None
center_id = None
hypergraph_node_weights = None
first_line = [int(i) for i in f.readline().split()]
m, n = first_line[:2]
fmt = first_line[2] if len(first_line) > 2 else 0
has_edge_weights = fmt % 10 == 1
has_node_weights = (fmt // 10) % 10 == 1
hyperedge_has_node_weights = (fmt // 100) % 10 == 1
has_hyperedge_centers = (fmt // 1000) % 10 == 1
if has_edge_weights:
weights = {}
if has_hyperedge_centers:
center_id = {}
if hyperedge_has_node_weights:
hypergraph_node_weights = {}
hypergraph = []
for _ in range(m):
start = 0
line = f.readline().split()
if has_edge_weights:
w = float(line[start])
start += 1
if has_hyperedge_centers:
c_ind = int(line[start]) - 1
start += 1
if hyperedge_has_node_weights:
nodes = tuple([int(i) - 1 for i in line[start::2]])
hyperedge_node_weights = [float(i) for i in line[start + 1::2]]
hypergraph_node_weights[nodes] = hyperedge_node_weights
else:
nodes = tuple([int(i) - 1 for i in line[start:]])
hypergraph.append(nodes)
if has_edge_weights:
weights[hypergraph[-1]] = w
if has_hyperedge_centers:
center_id[hypergraph[-1]] = c_ind
if has_node_weights:
node_weights = [float(f.readline()) for _ in range(n)]
return n, m, node_weights, hypergraph, weights, center_id, hypergraph_node_weights
def read_labels(filename):
"""Groundtruth community labels"""
if filename is None:
return [], []
with open(filename) as f:
label_names = np.array(f.readline().split())
labels = [int(i) for i in f]
return label_names, labels
def read_positions(filename, n, dimensions):
"""Manual positions"""
if filename is None:
return None
if filename == '0':
return np.zeros((n, dimensions))
with open(filename) as f:
positions = np.array([[float(i) for i in line.split()] for line in f])
return positions