-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_files_jsc.py
100 lines (78 loc) · 3.19 KB
/
find_files_jsc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
File Name : find_files_jsc.py
Author: Ruth Lorenz ([email protected])
Created: 19-08-2022
Modified:
Purpose: find all files for CORDEX-FPSCONV data
'''
import os
import logging
import glob
from cdo import Cdo
import filefinder
cdo = Cdo()
### Define logger
logging.basicConfig(format='%(levelname)s %(asctime)s: %(message)s',
level=logging.INFO)
logger = logging.getLogger(__name__)
####################
### Define input ###
####################
INPUT_PATH = '/home/rlorenz/fpscpcm/CORDEX-FPSCONV/output'
DOMAIN = 'ALP-3'
SCENARIOS = ['historical', 'rcp85', 'evaluation']
VARIABLES = ['orog', 'sftls']
TIME_RES = ['fx', 'fx']
OUTPUT_PATH = f'/home/rlorenz/fpscpcm/tmp/rlorenz/data/{DOMAIN}'
def find_files(path_pattern, file_pattern, varn, t_freq):
'''
Find files and copy/rename using filefinder class
'''
outpath_varn = f'{OUTPUT_PATH}/{t_freq}/{varn}'
if not os.access(outpath_varn, os.F_OK):
os.makedirs(outpath_varn)
ff = filefinder.FileFinder(path_pattern, file_pattern)
try:
files = ff.find_paths(variable=varn, t_freq=t_freq)
except ValueError:
logger.warning('No files found for path %s', path_pattern)
return
logger.info('All files found are %s.', files)
for path, meta in files:
logger.info(meta)
filelist = sorted(glob.glob(path))
for ifile in filelist:
# check if file:
if os.path.isfile(ifile):
if ifile.endswith('.nc'):
try:
new_name = (f'{varn}_{DOMAIN}_{meta["gcm"]}_{meta["scenario"]}_'
f'{meta["ensemble"]}_{meta["rcm"]}_{meta["nesting"]}_'
f'{t_freq}.nc')
except KeyError:
split_ifile = ifile.split('/')
new_name = split_ifile[-1]
logger.info('New filename is %s', new_name)
os.system(f'cp {ifile} {outpath_varn}/{new_name}')
else:
logger.warning('File found is not netcdf but %s', ifile)
else:
logger.warning('Not file but %s found', ifile)
def main():
'''
Find files based on different path patterns and copy/rename to output folder
'''
for v_ind, varn in enumerate(VARIABLES):
file_pattern = '*{variable}*.nc'
path_pattern1 = '%s/%s/{institut}/{gcm}/{scenario}/{ensemble}/{rcm}/{nesting}/{t_freq}/{variable}/' %(INPUT_PATH, DOMAIN)
find_files(path_pattern1, file_pattern, varn, TIME_RES[v_ind])
path_pattern2 = '%s/%s/{institut}/{gcm}/{t_freq}/' %(INPUT_PATH, DOMAIN)
find_files(path_pattern2, file_pattern, varn, TIME_RES[v_ind])
path_pattern3 = '%s/%s/{institut}/{gcm}/{scenario}/{ensemble}/{rcm}/{nesting}/*/' %(INPUT_PATH, DOMAIN)
find_files(path_pattern3, file_pattern, varn, TIME_RES[v_ind])
path_pattern4 = '%s/%s/{institut}/{gcm}/{scenario}/{ensemble}/{rcm}/{nesting}/{t_freq}/{variable}/latest/' %(INPUT_PATH, DOMAIN)
find_files(path_pattern4, file_pattern, varn, TIME_RES[v_ind])
if __name__ == '__main__':
main()