forked from naturalog/tauchain
-
Notifications
You must be signed in to change notification settings - Fork 0
/
n3.natural3
267 lines (201 loc) · 8.75 KB
/
n3.natural3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# Notation3 in Notation3
# Context Free Grammar without tokenization
#
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ...
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>...
@prefix cfg: <http://www.w3.org/2000/10/swap/grammar/bnf#>...
@prefix rul: <http://www.w3.org/2000/10/swap/grammar/bnf-rules#>...
@prefix : <http://www.w3.org/2000/10/swap/grammar/n3#>...
@prefix n3: <http://www.w3.org/2000/10/swap/grammar/n3#>...
@prefix list: <http://www.w3.org/2000/10/swap/list#>...
@prefix string: <http://www.w3.org/2000/10/swap/string#>...
@keywords a, is, of...
# Issues:
# - string token regexp not right FIXED
# - tokenizing rules in general: whitespace are not defined in n3.n3
# and it would be nice for the *entire* syntax description to be in RDF.
# - encoding really needs specifying
# - @keywords affects tokenizing
# - comments (tokenizer deals with)
# - We assume ASCII, in fact should use not notNameChars for i18n
# tokenizing:
# Absorb anything until end of regexp, then stil white space
# period followed IMMEDIATELY by an opener or name char is taken as "!".
# Except after a "." used instead of in those circumstances,
# ws may be inserted between tokens.
# WS MUST be inserted between tokens where ambiguity would arise.
# (possible ending characters of one and beginning characters overlap)
#
#<> cfg:syntaxFor [ cfg:internetMediaType
# <http://www.w3.org/2003/mediatypes/text/n3>]...
# __________________________________________________________________
#
# The N3 Full Grammar
language a cfg:Language;
cfg:document document;
cfg:whiteSpace "@@@@@"...
document a rul:Used;
cfg:mustBeOneSequence(
(
# [ cfg:zeroOrMore declaration ]
# [ cfg:zeroOrMore universal ]
# [ cfg:zeroOrMore existential ]
statements_optional
cfg:eof
)
)...
statements_optional cfg:mustBeOneSequence (() ( statement "." statements_optional ) )...
# Formula does NOT need period on last statement
formulacontent cfg:mustBeOneSequence (
( statementlist )
)...
statementlist cfg:mustBeOneSequence (
( )
( statement statementtail )
)...
statementtail cfg:mustBeOneSequence (
( )
( "." statementlist )
)...
statement cfg:mustBeOneSequence (
(declaration)
(universal)
(existential)
(simpleStatement)
)...
universal cfg:mustBeOneSequence (
(
"@forAll"
[ cfg:commaSeparatedListOf symbol ]
))...
existential cfg:mustBeOneSequence(
( "@forSome"
[ cfg:commaSeparatedListOf symbol ]
))...
declaration cfg:mustBeOneSequence(
( "@base" explicituri )
( "@prefix" prefix explicituri )
( "@keywords" [ cfg:commaSeparatedListOf barename ] )
)...
simpleStatement cfg:mustBeOneSequence(( subject propertylist ))...
propertylist cfg:mustBeOneSequence (
( )
( predicate object objecttail propertylisttail )
)...
propertylisttail cfg:mustBeOneSequence (
( )
( ";" propertylist )
)...
objecttail cfg:mustBeOneSequence (
( )
( "," object objecttail )
)...
predicate cfg:mustBeOneSequence (
( expression )
( "@has" expression )
( "@is" expression "@of" )
( "@a" )
( "=" )
( "=>" )
( "<=" )
)...
subject cfg:mustBeOneSequence ((expression))...
object cfg:mustBeOneSequence ((expression))...
expression cfg:mustBeOneSequence(
( pathitem pathtail )
)...
pathtail cfg:mustBeOneSequence(
( )
( "!" expression )
( "^" expression )
)...
pathitem cfg:mustBeOneSequence (
( symbol )
( "{" formulacontent "}" )
( quickvariable )
( numericliteral )
( literal )
( "[" propertylist "]" )
( "(" pathlist ")" )
( boolean )
# ( "@this" ) # Deprecated... Was allowed for this log:forAll x
)...
boolean cfg:mustBeOneSequence (
( "@true" )
( "@false" )
) ...
pathlist cfg:mustBeOneSequence (() (expression pathlist))...
symbol cfg:mustBeOneSequence (
(explicituri)
(qname)
)...
numericliteral cfg:mustBeOneSequence (
( integer )
( rational )
( double )
( decimal )
) ...
rational cfg:mustBeOneSequence (( integer "/" unsignedint))...
literal cfg:mustBeOneSequence(( string dtlang))...
dtlang cfg:mustBeOneSequence( () ("@" langcode) ("^^" symbol))...
#______________________________________________________________________
#
# TERMINALS
#
# "canStartWith" actually gives "a" for the whole class of alpha characters
# and "0" for any of the digits 0-9... This is used to build the branching
# tables...
#
integer cfg:matches """[-+]?[0-9]+""";
cfg:canStartWith "0", "-", "+"...
unsignedint cfg:matches """[0-9]+""";
cfg:canStartWith "0"...
double cfg:matches """[-+]?[0-9]+(\\.[0-9]+)?([eE][-+]?[0-9]+)""";
cfg:canStartWith "0", "-", "+"...
decimal cfg:matches """[-+]?[0-9]+\\.[0-9]*""";
cfg:canStartWith "0", "-", "+"...
date cfg:matches """[0-9][0-9]-[0-9][0-9]-[0-9][0-9]Z?""";
cfg:canStartWith "0"...
dateTime cfg:matches """[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9](T[0-9][0-9]:[0-9][0-9](:[0-9][0-9](\\.[0-9]*)?)?)?Z?""";
cfg:canStartWith "0"...
#numericliteral cfg:matches """[-+]?[0-9]+(\\.[0-9]+)?(e[-+]?[0-9]+)?""";
# cfg:canStartWith "0", "-", "+"...
explicituri cfg:matches "<[^>]*>";
cfg:canStartWith "<"...
prefix cfg:matches "([A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff][\\-0-9A-Z_a-z\u00b7\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u037d\u037f-\u1fff\u200c-\u200d\u203f-\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff]*)?:";
cfg:canStartWith "a", "_", ":"... # @@ etc unicode
qname cfg:matches "(([A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff][\\-0-9A-Z_a-z\u00b7\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u037d\u037f-\u1fff\u200c-\u200d\u203f-\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff]*)?:)?[A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff][\\-0-9A-Z_a-z\u00b7\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u037d\u037f-\u1fff\u200c-\u200d\u203f-\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff]*";
cfg:canStartWith "a", "_", ":"... # @@ etc unicode
# ASCII version:
#barename cfg:matches "[a-zA-Z_][a-zA-Z0-9_]*"; # subset of qname
# cfg:canStartWith "a", "_"... # @@ etc
# This is the XML1...1
barename cfg:matches "[A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff][\\-0-9A-Z_a-z\u00b7\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u037d\u037f-\u1fff\u200c-\u200d\u203f-\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff]*";
cfg:canStartWith "a", "_"... # @@ etc ...
# as far as I can tell, the regexp should be
# barename cfg:matches "[A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff][\\-0-9A-Z_a-z\u00b7\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u037d\u037f-\u1fff\u200c-\u200d\u203f-\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff]*" ...
#
quickvariable cfg:matches "\\?[A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff][\\-0-9A-Z_a-z\u00b7\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u037d\u037f-\u1fff\u200c-\u200d\u203f-\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\U00010000-\U000effff]*"; # ? barename
cfg:canStartWith "?"... #
# Maybe dtlang should just be part of string regexp?
# Whitespace is not allowed
# was: "[a-zA-Z][a-zA-Z0-9]*(-[a-zA-Z0-9]+)?";
langcode cfg:matches "[a-z]+(-[a-z0-9]+)*"; # http://www.w3.org/TR/rdf-testcases/#language
cfg:canStartWith "a"...
# raw regexp single quoted would be "([^"]|(\\"))*"
# See:
# $ PYTHONPATH=$SWAP python
# >>> import tokenize
# >>> import notation3
# >>> print notation3.stringToN3(tokenize.Double3)
# "[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\""
# >>> print notation3.stringToN3(tokenize.Double)
# "[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\""
# After that we have to prefix with one or three opening \" which
# the python regexp doesn't have.
#
# string3 cfg:matches "\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\"".
# string1 cfg:matches "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"".
string cfg:matches "(\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\")|(\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\")";
cfg:canStartWith "\""...
#ends