-
Notifications
You must be signed in to change notification settings - Fork 0
/
LammpsH5MD.py
405 lines (336 loc) · 14.9 KB
/
LammpsH5MD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
import numpy as np
import h5py
import sys
import time
import os
from tqdm import tqdm
__all__ = ['LammpsH5MD']
# =====================================================================
# PRIVATE function
# ---------------------------------------------------------------------
# DEFINE OPTIMAL ROTATION FUNCTION
def optimal_rotate(P,Q):
# P and Q are two sets of vectors
P = np.matrix(P)
Q = np.matrix(Q)
assert P.shape == Q.shape
Qc = np.mean(Q,axis=0)
P = P - np.mean(P,axis=0)
Q = Q - np.mean(Q,axis=0)
# calculate covariance matrix A = (P^T)Q
A = P.T * Q
# SVD for matrix A
V, S, Wt = np.linalg.svd(A)
# correct rotation matrix to ensure a right-handed system if necessary
d = (np.linalg.det(V) * np.linalg.det(Wt)) < 0.0
if d:
S[-1] = -S[-1]
V[:,-1] = -V[:,-1]
# calculate the final rotation matrix U
U = V * Wt
return np.array(P * U + Qc)
# =====================================================================
# =====================================================================
# funtion used to calculate mean and variance one-pass algorithm
class OnlineVariance:
"""
Welford's algorithm to computes the sample mean, variance in a stream
"""
def __init__(self, dim):
if dim == 1:
self.mean, self.var, self.S = 0,0,0
else:
self.mean, self.var, self.S = np.zeros(dim), np.zeros(dim), np.zeros(dim)
self.n = 0
def stream(self, data_point):
self.n += 1
self.delta = data_point - self.mean
self.mean += self.delta / self.n
self.S += self.delta * (data_point - self.mean)
self.var = self.S / (self.n - 1.0)
return self.mean, self.var
# =====================================================================
class LammpsH5MD:
def __init__(self):
self.file = self.filename = None
self.frame_number = None
self.atoms_number = None
self.mols_number = None
self.box = None
def load(self, finname):
# load the trajectory
try:
self.filename = finname
self.file = h5py.File(finname, 'r')
except:
raise
self.get_framenumber()
self.get_atomnumber()
self.get_molnumber()
self.get_box()
self.get_species()
self.get_mol_index()
def get_species(self):
try:
self.species = self.file['particles/all/species'][:]
except:
raise
def get_box(self):
try:
self.box = self.file['particles/all/box/edges/value'][0]
except:
raise
def get_framenumber(self):
# get the total number of snapshots stored in file
try:
self.frame_number = self.file['particles/all/position/value'].shape[0]
except:
raise
def get_atomnumber(self):
# get the total number of atoms/particles stored in file
try:
self.atoms_number = self.file['particles/all/position/value'].shape[1]
except:
raise
def get_mol_index(self):
try:
self.mol_index = np.int_(self.file['particles/all/mol/value'][0])
except:
raise
def get_molnumber(self):
try:
self.mols_number = int(self.file['particles/all/mol/value'][0].max())
except:
raise
def get_frame(self,t):
# get the frame provided the index of snapshot
return self.file['particles/all/position/value'][t]
def cal_twotime(self, func_lst, t0freq=10, dtnumber=100, start=0, end=None,
align=False, mode='log', screen_info=True):
# This method calculate any two-time quantity. Like MSD, ISF ...
# And return the full list of data
#
# explain each argument:
# func_lst: list of functions to calculate the quantity.
# can specify multiple functions. E.g [msd,isf]
# if only one function, no need to use [ ]
# t0freq: take initial start time t0 for every this many frames
# dtnumber: length of dt_lst
# start: the index of first frame you want to analyze. Default value=0
# end: the index of last frame you want to analyze. Default value:
# last frame of file
# align: enable/disable the trajectory alignment. If enable, specify the
# index of reference snapshot. Ex. align=0. Default value: False
#
# mode: specify the method to distribute the dt. Default value: log
# log: make the dt list logrithm
# linear: linear distributedt
#
# return: quantity list calculated. the order of list is the same as
# the func_lst.
# all get_timesteps() if no self.frame_number
if not self.frame_number:
self.get_framenumber()
if end == None:
end = self.frame_number
else:
assert type(end) == int
assert type(t0freq) == int
assert type(dtnumber) == int
assert type(start) == int
if type(func_lst) != list:
func_lst = [func_lst]
# create the initial time array.
t0_lst = np.arange(start, end, t0freq)
# create the delta t array. Two possible modes
# linear mode
if mode == 'linear':
dt_lst = np.unique(np.int_(np.linspace(0, end-start-1, dtnumber)))
elif mode == 'log':
dt_lst = np.unique(np.int_(np.power(10, np.linspace(0, np.log10(end-start-1), dtnumber))))
dt_lst = np.insert(dt_lst, 0, 0)
else:
raise ValueError("Error: value of argument 'mode' not recognized...\n")
# start the calculation
# initialize the quantity dictionary
# E.g corr = {msd: []
# isf: []}
twotime = {}
for f in func_lst:
twotime[f] = []
# initial configuration. used for alignment if enabled
if align is not False:
frame_start = self.file['particles/all/position/value'][align]
t_start = time.time()
for t0 in tqdm(t0_lst):
if not screen_info:
sys.stdout = open(os.devnull, 'w')
sys.stdout.write('Initial time {} analyzed.\n'.format(t0))
sys.stdout.flush()
for index, dt in enumerate(dt_lst[np.where(dt_lst <= (end-t0-1))]):
if align is not False:
frame_t1 = self.get_frame(t0+dt)
frame_t2 = self.get_frame(t0)
frame_t1 = optimal_rotate(frame_t1, frame_start)
frame_t2 = optimal_rotate(frame_t2, frame_start)
else:
frame_t1 = self.get_frame(t0+dt)
frame_t2 = self.get_frame(t0)
for func in func_lst:
twotime_temp = func(frame_t1, frame_t2) # t1 > t2
if isinstance(twotime_temp, (list, tuple, np.ndarray)):
twotime[func].append(np.concatenate(([t0, t0+dt], twotime_temp)))
else:
twotime[func].append([t0, t0+dt, twotime_temp]) # store the full information
t_end = time.time()
t_cost = t_end - t_start
sys.stdout.write('\nTotal time cost: {:.2f} mins\n'.format(t_cost/60.0))
# convert the python array to numpy array
for key in twotime:
twotime[key] = np.array(twotime[key])
return twotime # return the dictionary
def cal_onetime(self, func_lst, tfreq=1, start=0, end=None, align=False, reduce='sum', screen_info=True):
# This method calculate any static(one-time) quantity. Like Energy ...
# And return the full list of data
#
# explain each argument:
# func_lst: list of functions to calculate the quantity.
# can specify multiple functions. E.g [msd,isf]
# if only one function, no need to use [ ]
# tfreq: calculate the quantity for every this many frames
# start: the index of first frame you want to analyze. Default value=0
# end: the index of last frame you want to analyze. Default value:
# last frame of file
# align: enable/disable the trajectory alignment. If enable, specify the
# index of reference snapshot. E.g. align=0. Default value: False
# reduce: reduce the result. E.g. reduce=sum will sum all the quantity together
#
# return: a python dictionary whose keys are the object of function and values
# are the quantity associated to that function
# all get_timesteps() if no self.frame_number
if not self.frame_number:
self.get_framenumber()
if end == None:
end = self.frame_number
else:
assert type(end) == int
assert type(tfreq) == int
assert type(start) == int
if type(func_lst) != list:
func_lst = [func_lst]
# create the initial time array.
t_lst = np.arange(start, end, tfreq)
# start the calculation
# initialize the quantity dictionary
# E.g corr = {msd: []
# isf: []}
onetime = {}
for f in func_lst:
onetime[f] = []
onetime['time list'] = t_lst
# initial configuration. used for alignment if enabled
if align is not False:
frame_start = self.file['particles/all/position/value'][align]
t_start = time.time()
temp = {}
for t in tqdm(t_lst):
if not screen_info:
sys.stdout = open(os.devnull, 'w')
sys.stdout.write('Initial time {} analyzed.\n'.format(t))
sys.stdout.flush()
if align is not False:
frame_t = self.get_frame(t)
frame_t = optimal_rotate(frame_t, frame_start)
else:
frame_t = self.get_frame(t)
for func in func_lst:
onetime_temp = func(frame_t)
if reduce == 'sum':
try:
onetime[func] += onetime_temp
except ValueError:
onetime[func] = onetime_temp
elif reduce == 'mean':
try:
temp[func].stream(onetime_temp)
onetime[func] = temp[func].mean
except:
temp[func] = OnlineVariance(dim=onetime_temp.shape)
temp[func].stream(onetime_temp)
onetime[func] = temp[func].mean
elif reduce == 'var':
try:
temp[func].stream(onetime_temp)
onetime[func] = temp[func].var
except:
temp[func] = OnlineVariance(dim=onetime_temp.shape)
temp[func].stream(onetime_temp)
onetime[func] = temp[func].var
elif reduce == 'None' or reduce == 'none':
onetime[func].append(onetime_temp)
t_end = time.time()
t_cost = t_end - t_start
sys.stdout.write('\nTotal time cost: {:.2f} mins\n'.format(t_cost/60.0))
# convert the python array to numpy array if necessary
for key in onetime:
onetime[key] = np.array(onetime[key])
return onetime # return the dictionary
def info(self):
sys.stdout.write('File Loaded: {}\n'.format(self.filename))
sys.stdout.write('Toal Number of Frames: {}\n'.format(self.frame_number))
def extract_traj(self, foutname, stride, start=0, end=None):
# all get_timesteps() if no self.frame_number
if not self.frame_number:
self.get_framenumber()
if not self.atoms_number:
self.get_atomnumber()
if end == None:
end = self.frame_number
else:
assert type(end) == int
sys.stdout.write('Initialize the new H5MD file\n')
new_traj = h5py.File(foutname, 'w') # create a new file
# first get the dimension of dataset in original trajectory file
nframes = self.frame_number
natoms = self.atoms_number
new_nframes = np.arange(nframes)[start:end:stride].shape[0]
# copy h5md, observables, parameters group
self.file.copy('/h5md', new_traj)
self.file.copy('/observables', new_traj)
self.file.copy('/parameters', new_traj)
# create particles/all group
new_traj.create_group('/particles/all')
# create all the group in particles/all/
new_traj.create_group('/particles/all/position')
new_traj.create_group('/particles/all/box/edges')
new_traj.create_group('/particles/all/velocity')
# create dataset with new dimension position and velocity
for key in ['position','velocity']:
new_traj.create_dataset('particles/all/' + key + '/time', (new_nframes, ), dtype='float64')
new_traj.create_dataset('particles/all/' + key + '/step', (new_nframes, ), dtype='int32')
new_traj.create_dataset('particles/all/' + key + '/value', (new_nframes, natoms, 3), dtype='float64')
# create box dataset
new_traj.create_dataset('particles/all/box/edges/time', (new_nframes, ), dtype='float64')
new_traj.create_dataset('particles/all/box/edges/step', (new_nframes, ), dtype='int32')
new_traj.create_dataset('particles/all/box/edges/value', (new_nframes, 3), dtype='float64')
# loop throught the original trajectory file
# store the frames into our new file
t_start = time.time()
for index, t in enumerate(np.arange(nframes)[start:end:stride]):
sys.stdout.write('Process timestep {}\n'.format(t))
sys.stdout.flush()
for key in ['position', 'velocity']:
new_traj['particles/all/' + key + '/time'][index] = np.copy(self.file['particles/all/' + key + '/time'][t])
new_traj['particles/all/' + key + '/step'][index] = np.copy(self.file['particles/all/' + key + '/step'][t])
new_traj['particles/all/' + key + '/value'][index] = np.copy(self.file['particles/all/' + key + '/value'][t])
new_traj['particles/all/box/edges/time'][index] = np.copy(self.file['particles/all/box/edges/time'][t])
new_traj['particles/all/box/edges/step'][index] = np.copy(self.file['particles/all/box/edges/step'][t])
new_traj['particles/all/box/edges/value'][index] = np.copy(self.file['particles/all/box/edges/value'][t])
new_traj.flush()
new_traj.close()
self.file.close()
t_end = time.time()
t_cost = t_end - t_start
sys.stdout.write('\nTotal time cost: {:.2f} mins\n'.format(t_cost/60.0))
sys.stdout.flush()
return