-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.rb
143 lines (129 loc) · 4.42 KB
/
test.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env ruby
# frozen_string_literal: true
require './lib/bootstrap'
def test1
datasources = %w[wos]
datasources.each do |ds|
provider = ::Datasource.by_id(ds)
provider.verbose = true
item = provider.import_items(['10.1111/1467-6478.00033']).first
r = {}
item.to_h.each do |k, v|
v = case v
when Array
v[..3]
else
v
end
r[k] = v
end
puts(JSON.pretty_generate(r))
end
end
def test3
ids = Dir.glob(File.join(Workflow::Path.csl, '*.json')).map { |f| File.basename(f, '.json') }
text_dir = '/mnt/c/Users/Boulanger/ownCloud/Langfristvorhaben/Legal-Theory-Graph/Data/FULLTEXTS/JLS/jls-txt'
stopword_files = ['data/0-metadata/summarize-ignore.txt']
authors_ignore_list = ['see']
affiliation_ignore_list = ['108 Cowley Road']
options = Workflow::Dataset::Options.new(
verbose: false,
text_dir:,
stopword_files:,
authors_ignore_list:,
affiliation_ignore_list:,
cache_file_prefix: 'test-'
)
limit = 1000
dataset = Workflow::Dataset.new(options:)
items = dataset.import(ids[..limit], limit:)
File.write('tmp/test3.json', JSON.pretty_generate(items.map(&:to_h)))
dataset.export(Export::WebOfScience.new('tmp/test3.txt'))
end
def test4
iso4 = PyCall.import_module('iso4')
puts iso4.abbreviate('Recent Advances in Studies on Cardiac Structure and Metabolism')
puts iso4.abbreviate('Journal of the American Academy of Dermatology', periods: false)
end
def test5
require 'neo4j-ruby-driver'
url = 'bolt://172.31.240.1:7687' # ENV['NEO4J_URL']
username = ENV['NEO4J_USERNAME']
password = ENV['NEO4J_PASSWORD']
auth = ::Neo4j::Driver::AuthTokens.basic(username, password)
puts "Connecting to Neo4J on #{url}..." if @verbose
@driver = ::Neo4j::Driver::GraphDatabase.driver(url, auth, encryption: false)
begin
result = @driver.session.read_transaction do |tx|
result = tx.run('match (n) return count(n)')
if result.has_next?
result.single.first
else
'nil'
end
end
puts(result)
rescue StandardError => e
puts "Cannot connect to server: #{e}".colorize(:red)
exit(1)
end
end
def test6
require 'sqlite3'
ext_dir = '/opt/sqlite-extensions' # or whereever you keep the extensions
db = SQLite3::Database.new(':memory:')
db.enable_load_extension(true)
db.load_extension(File.join(ext_dir, 'spellfix.o'))
db.execute('CREATE VIRTUAL TABLE demo USING spellfix1;')
db.execute('CREATE TABLE mytable (id integer, description text)')
db.execute('INSERT INTO mytable VALUES (1, "hello world, guys")')
db.execute('INSERT INTO mytable VALUES (2, "hello there everybody")')
result = db.execute('SELECT * FROM mytable WHERE editdist3(description, "hel o wrold guy") < 600')
puts result
end
def test7
item = Model::Zotero::Item.new({ title: 'Rechtssoziologie von Sinz', date: '2015' })
results = Datasource::ZoteroSqlite
.find_similar_items(item)
.map { |i| i.to_h(compact: true) }
puts "#{results.length} hits, first one is:"
puts JSON.dump(results.first)
end
def test8
item = Model::Zotero::Item.new({ title: 'Die Rechtssoziologfe von Hugo Sinzhelmer - eine Annäherung', date: '2015' })
results = Datasource::ZoteroSqlite
.find_similar_items(item, edit_distance: 600)
.map { |i| i.to_h(compact: true) }
puts "#{results.length} hits, first one is:"
puts JSON.dump(results.first)
end
def test9
data = { "author": [{ "family": 'Boulanger' }], "title": 'Zur Aktualität der Todesstrafe',
"issued": { "date-parts": [[2002]] } }
item = Format::CSL::Item.new(data)
Datasource::ZoteroSqlite.verbose=true
puts JSON.dump(Datasource::ZoteroSqlite.lookup(item))
end
def test10
data = { "author": [{ "family": 'Boulanger' }], "title": 'Zur Aktualität der Todesstrafe',
"issued": { "date-parts": [[2002]] } }
item = Format::CSL::Item.new(data)
Datasource::Lobid.verbose = true
puts JSON.dump(Datasource::Lobid.lookup(item))
end
def test11
Datasource::WosKb.verbose =true
pp Datasource::WosKb.items_by_autor "Ehrlich, Eugen"
end
def test12
require './lib/service/google-document-ai'
first_page= 348
service = Service::GoogleDocumentAi.new
service.process("data/1-pdf/Raiser (1987) Rechtssoziologie.pdf", first_page: first_page).each_with_index do |text, i|
puts "Saving page #{first_page + i}"
open('tmp/raiser-1987.txt', 'a') do |f|
f << "#{text}\n"
end
end
end
test12