-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze.py
executable file
·164 lines (142 loc) · 4.46 KB
/
analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python
# DOENDO - A Python-Based Fortran Refactoring Tool
# Copyright (C) 2011 Felipe H. da Jornada <[email protected]>
import re
import xml.dom.minidom
from common import *
def analyze(name, data):
'''Parses a FORTRAN source file and returns a XML document
name: name of the file
data: string containing all the file
'''
def find_token(_cmp, line):
res = _cmp.search(line)
if not res is None:
return res.group(0)
else: return None
doc = xml.dom.minidom.Document()
cur_node = doc.createElement(name)
doc.appendChild(cur_node)
#used to match the "blocks"
re_start = (re.compile('^program (\w+)'),
re.compile('^module (\w+)'),
re.compile('^subroutine (\w+)'),
re.compile('^type (\w+)') )
re_end = (re.compile('end program (\w+)'),
re.compile('end module (\w+)'),
re.compile('end subroutine (\w+)'),
re.compile('end type (\w+)') )
#get corrected line number (b/c we joined lines ending with &)
offsets=[]
for m in re.finditer('&[ ]*\n', data, re.MULTILINE):
start = m.start()
offsets.append( data.count('\n',0,start) )
no_multi_lines = re.sub('&[ ]*\n', '', data, re.MULTILINE)
line_n = -1
cur_node.setAttribute('start','0')
for line in no_multi_lines.split('\n'):
line_n += 1
#correct line number
while line_n-1 in offsets:
line_n += 1
#clean-up useless spaces and comments
line = re.sub(r'!.*','',line)
line = re.sub(r'[ ]+',' ',line)
line = re.sub(r'[ ]*::[ ]*','::',line)
line = re.sub(r'[ ]*,[ ]*',',',line)
line = re.sub(r'^ ','',line)
line = re.sub(r' $','',line)
if not len(line):
continue
#determine if we are starting/closing a block
#ending program|module|subroutine|type ?
token=None
for re_obj in re_end:
token = find_token(re_obj, line)
if token: break
if not(token is None):
cur_node.setAttribute('end',str(line_n))
cur_node = cur_node.parentNode
else:
#starting program|module|subroutine|type ?
i=0
for re_obj in re_start:
token = find_token(re_obj, line)
if token: break
i+=1
if not(token is None):
tmp_node = doc.createElement('block')
cur_node.appendChild(tmp_node)
cur_node = tmp_node
cur_node.setAttribute('start',str(line_n))
cur_node.setAttribute('type',str(i))
cur_node.setAttribute('name',token.split(' ')[-1])
#now, search for vars
has_vars = '::' in line
if has_vars:
col_pos = line.index('::')
var_type = line[:col_pos]
var_str = line[col_pos+2:]
#trowing away array indices
#TODO: store this information as a sulfix
var_str = re.sub(r'\([^\)]*\)','',var_str)
for v in var_str.split(','):
var_node = doc.createElement('var')
var_node.setAttribute('type',var_type)
var_node.setAttribute('name',v)
#var_node.setAttribute('sulfix', blah)
cur_node.appendChild(var_node)
cur_node.setAttribute('end',str(line_n-1))
return doc
def get_small_vars(doc, ignore_types=True, len_small=1):
'''Return list of variables that have only one letter'''
_vars = doc.getElementsByTagName('var')
small_vars = []
for var in _vars:
name = var.getAttribute('name')
if len(name) <= len_small:
#if appropriate, ignore vars inside types
if (var.parentNode.getAttribute('type')!='3') or \
(not ignore_types):
small_vars.append(var)
return small_vars
def check_var_free(elem, name):
'''Check if `name` is not a variable in the scope of `elem` or its parents'''
el = elem
while el!=None:
for child in el.childNodes:
if child.nodeName=='var':
if child.getAttribute('name')==name:
return False
try:
el = el.parentNode
except:
el = None
return True
def print_info(doc):
print
print 'Some useful info:'
is_prog=False
node0 = doc.childNodes[0]
print ' - Source file name:', node0.nodeName
if len(node0.childNodes):
is_prog = node0.childNodes[0].getAttribute('type')=='0'
print ' - Is this a program?', is_prog
modules = get_elements_with_attrib(doc, 'block', 'type', '1')
print ' - Number of modules:', len(modules)
subs = get_elements_with_attrib(doc, 'block', 'type', '2')
print ' - Number of subroutines:', len(subs)
types = get_elements_with_attrib(doc, 'block', 'type', '3')
print ' - Number of types:', len(types)
all_vars = doc.getElementsByTagName('var')
print ' - Number of variables:', len(all_vars)
small_vars = get_small_vars(doc, 1)
print ' - Single letter vars:', len(small_vars)
return small_vars
if __name__=='__main__':
import sys
fname = sys.argv[1]
lines = open(fname).read()
doc = analyze(fname, lines)
print
print doc.toprettyxml()