-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlnrs_3nf_o1.sql
653 lines (535 loc) · 18.5 KB
/
lnrs_3nf_o1.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
-- bash
-- https://www.perplexity.ai/search/system-you-are-an-advanced-ana-FmQBBisrSx._7unn8VwhmQ
-- break down the big table into smaller tables into 3NF
-- The aim is to develop a neater process to edit and update source data, ie. add a new measure type, or stakeholder
-- and recreate
-- rm data/lnrs_3nf_o1.duckdb
duckdb
ATTACH 'data/lnrs_3nf_o1.duckdb' AS lnrs;
USE lnrs;
INSTALL HTTPFS;
LOAD HTTPFS;
INSTALL SPATIAL;
LOAD SPATIAL;
-- get the main table for the app data
CREATE OR REPLACE TABLE source_table AS
SELECT *
FROM read_parquet('data/area-measures-tbl.parquet');
-- get tables for species and how they relate to areas and priorities
-- CREATE TABLES --
------------------------------------------------------------------
-- 1) MEASURE
------------------------------------------------------------------
CREATE OR REPLACE TABLE measure (
measure_id INTEGER NOT NULL PRIMARY KEY,
measure VARCHAR,
other_priorities_delivered VARCHAR,
core_supplementary VARCHAR,
mapped_unmapped VARCHAR,
relevant_map_layer VARCHAR,
link_to_further_guidance VARCHAR
);
------------------------------------------------------------------
-- 2) MEASURE_TYPE
------------------------------------------------------------------
CREATE OR REPLACE SEQUENCE seq_measure_type_id START 1;
CREATE OR REPLACE TABLE measure_type (
measure_type_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('seq_measure_type_id'),
measure_type VARCHAR
);
------------------------------------------------------------------
-- 3) STAKEHOLDER
------------------------------------------------------------------
CREATE OR REPLACE SEQUENCE seq_stakeholder_id START 1;
CREATE OR REPLACE TABLE stakeholder (
stakeholder_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('seq_stakeholder_id'),
stakeholder VARCHAR
);
------------------------------------------------------------------
-- 4) MEASURE_HAS_TYPE (bridge)
------------------------------------------------------------------
CREATE OR REPLACE TABLE measure_has_type (
measure_id INTEGER NOT NULL,
measure_type_id INTEGER NOT NULL,
PRIMARY KEY (measure_id, measure_type_id),
FOREIGN KEY (measure_id) REFERENCES measure(measure_id),
FOREIGN KEY (measure_type_id) REFERENCES measure_type(measure_type_id)
);
------------------------------------------------------------------
-- 5) MEASURE_HAS_STAKEHOLDER (bridge)
------------------------------------------------------------------
CREATE OR REPLACE TABLE measure_has_stakeholder (
measure_id INTEGER NOT NULL,
stakeholder_id INTEGER NOT NULL,
PRIMARY KEY (measure_id, stakeholder_id),
FOREIGN KEY (measure_id) REFERENCES measure(measure_id),
FOREIGN KEY (stakeholder_id) REFERENCES stakeholder(stakeholder_id)
);
------------------------------------------------------------------
-- 6) AREA
------------------------------------------------------------------
CREATE OR REPLACE TABLE area (
area_id INTEGER NOT NULL PRIMARY KEY,
area_name VARCHAR,
area_description VARCHAR,
area_link VARCHAR,
bng_hab_mgt VARCHAR,
bng_hab_creation VARCHAR,
local_funding_schemes VARCHAR
);
------------------------------------------------------------------
-- 7) PRIORITY
------------------------------------------------------------------
CREATE OR REPLACE TABLE priority (
priority_id INTEGER NOT NULL PRIMARY KEY,
biodiversity_priority VARCHAR,
simplified_biodiversity_priority VARCHAR,
theme VARCHAR
);
------------------------------------------------------------------
-- 8) SPECIES
------------------------------------------------------------------
CREATE OR REPLACE TABLE species(
taxa VARCHAR,
common_name VARCHAR,
assemblage VARCHAR,
linnaean_name VARCHAR,
species_id INTEGER PRIMARY KEY,
usage_key VARCHAR,
scientific_name VARCHAR,
status VARCHAR,
kingdom VARCHAR,
phylum VARCHAR,
"order" VARCHAR,
"family" VARCHAR,
genus VARCHAR,
species VARCHAR,
kingdom_key VARCHAR,
phylum_key VARCHAR,
class_key VARCHAR,
order_key VARCHAR,
family_key VARCHAR,
genus_key VARCHAR,
species_key VARCHAR,
"synonym" VARCHAR,
"class" VARCHAR,
accepted_usage_key VARCHAR,
verbatim_name VARCHAR,
verbatim_index BIGINT,
gbif_species_url VARCHAR,
image_url VARCHAR,
license VARCHAR,
attribution VARCHAR,
photo_url VARCHAR);
------------------------------------------------------------------
-- 9) GRANT_TABLE (renamed to avoid reserved keyword)
------------------------------------------------------------------
CREATE OR REPLACE TABLE grant_table (
grant_id VARCHAR NOT NULL PRIMARY KEY,
grant_name VARCHAR,
grant_scheme VARCHAR,
url VARCHAR,
summary_wrapped VARCHAR
);
------------------------------------------------------------------
-- 10) MEASURE_AREA_PRIORITY
------------------------------------------------------------------
CREATE OR REPLACE TABLE measure_area_priority (
measure_id INTEGER NOT NULL,
area_id INTEGER NOT NULL,
priority_id INTEGER NOT NULL,
PRIMARY KEY (measure_id, area_id, priority_id),
FOREIGN KEY (measure_id) REFERENCES measure(measure_id),
FOREIGN KEY (area_id) REFERENCES area(area_id),
FOREIGN KEY (priority_id) REFERENCES priority(priority_id)
);
------------------------------------------------------------------
-- 11) MEASURE_AREA_PRIORITY_GRANT
------------------------------------------------------------------
CREATE OR REPLACE TABLE measure_area_priority_grant (
measure_id INTEGER NOT NULL,
area_id INTEGER NOT NULL,
priority_id INTEGER NOT NULL,
grant_id VARCHAR NOT NULL,
PRIMARY KEY (measure_id, area_id, priority_id, grant_id),
FOREIGN KEY (measure_id, area_id, priority_id)
REFERENCES measure_area_priority (measure_id, area_id, priority_id),
FOREIGN KEY (grant_id) REFERENCES grant_table(grant_id)
);
------------------------------------------------------------------
-- 12) SPECIES PRIORITY AREA
------------------------------------------------------------------
CREATE OR REPLACE TABLE species_area_priority(
species_id INTEGER NOT NULL,
area_id INTEGER NOT NULL,
priority_id INTEGER NOT NULL,
PRIMARY KEY (species_id, area_id, priority_id),
FOREIGN KEY (species_id) REFERENCES species(species_id),
FOREIGN KEY (area_id) REFERENCES area(area_id),
FOREIGN KEY (priority_id) REFERENCES priority(priority_id));
------------------------------------------------------------------
-- 13) ARE GEOM TABLE - the 694 polygons
------------------------------------------------------------------
CREATE OR REPLACE TABLE area_geom(
geo_point_2d GEOMETRY,
geo_shape GEOMETRY,
area_id INTEGER
);
.tables
----------------------------------------------------------------------------
-- INSERT STATEMENTS --
----------------------------------------------------------------------------
-- 1) Insert into measure
-- revised to get all measures
INSERT INTO measure (
measure_id,
measure,
--other_priorities_delivered,
core_supplementary,
mapped_unmapped,
--relevant_map_layer,
link_to_further_guidance
)
SELECT DISTINCT
measure_id,
measure,
--other_priorities_delivered,
core_supplementary,
mapped_unmapped,
--relevant_map_layer,
link_to_further_guidance
FROM read_csv('https://opendata.westofengland-ca.gov.uk/api/explore/v2.1/catalog/datasets/lnrs-measures/exports/csv?lang=en&timezone=Europe%2FLondon&use_labels=false&delimiter=%2C');
FROM measure;
INSERT INTO area (
area_id,
area_name,
area_description,
area_link,
bng_hab_mgt,
bng_hab_creation,
local_funding_schemes
)
SELECT DISTINCT
area_id,
area_name,
area_description,
area_link,
bng_hab_mgt,
bng_hab_creation,
local_funding_schemes
FROM source_table
WHERE area_id IS NOT NULL;
-- priority insert
INSERT INTO priority
BY NAME
SELECT *
FROM read_parquet('https://opendata.westofengland-ca.gov.uk/api/explore/v2.1/catalog/datasets/priorities-grouped-tbl/exports/parquet');
-- grant insert
INSERT INTO grant_table (
grant_id,
grant_name,
grant_scheme,
"url",
summary_wrapped
)
SELECT DISTINCT
grant_id,
grant_name,
grant_scheme,
"url",
summary_wrapped
FROM source_table
WHERE grant_id IS NOT NULL;
-- measure types
INSERT INTO measure_type BY NAME
(SELECT DISTINCT
measure_type
FROM source_table);
-- stakeholder insert
INSERT INTO stakeholder BY NAME
(SELECT DISTINCT
stakeholder
FROM source_table);
FROM stakeholder;
-- has measure type insert
INSERT INTO measure_has_type (measure_id, measure_type_id)
SELECT DISTINCT
s.measure_id,
mt.measure_type_id
FROM source_table s
JOIN measure m
ON s.measure_id = m.measure_id
JOIN measure_type mt
ON s.measure_type = mt.measure_type
WHERE s.measure_id IS NOT NULL
AND s.measure_type IS NOT NULL;
-- measure has stakeholder insert
INSERT INTO measure_has_stakeholder (measure_id, stakeholder_id)
SELECT DISTINCT
s.measure_id,
stkh.stakeholder_id
FROM source_table s
JOIN measure m
ON s.measure_id = m.measure_id
JOIN stakeholder stkh
ON s.stakeholder = stkh.stakeholder
WHERE s.measure_id IS NOT NULL
AND s.stakeholder IS NOT NULL;
-- measure_area_priority insert
-- Each row in source_table links a specific
-- (measure_id, area_id, priority_id). We capture it here, ignoring duplicates:
INSERT INTO measure_area_priority (
measure_id,
area_id,
priority_id
)
SELECT DISTINCT
measure_id,
area_id,
priority_id
FROM source_table
WHERE measure_id IS NOT NULL
AND area_id IS NOT NULL
AND priority_id IS NOT NULL;
-- 6) Insert into measure_area_priority_grant (bridge for grants)
-- Some measure–area–priority combos have an associated grant_id. Insert them here:
INSERT INTO measure_area_priority_grant (
measure_id,
area_id,
priority_id,
grant_id
)
SELECT DISTINCT
measure_id,
area_id,
priority_id,
grant_id
FROM source_table
WHERE measure_id IS NOT NULL
AND area_id IS NOT NULL
AND priority_id IS NOT NULL
AND grant_id IS NOT NULL;
-- 7) Insert into species
INSERT INTO species
BY NAME
SELECT *
FROM read_parquet('https://opendata.westofengland-ca.gov.uk/api/explore/v2.1/catalog/datasets/lnrs-species-tbl/exports/parquet');
-- 8) Insert into species_area_priority
INSERT INTO species_area_priority
BY NAME
(SELECT species_id, area_id, priority_id
FROM read_parquet('https://opendata.westofengland-ca.gov.uk/api/explore/v2.1/catalog/datasets/species-area-tbl/exports/parquet')
LEFT JOIN
(SELECT species_id, priority_id
FROM read_parquet('https://opendata.westofengland-ca.gov.uk/api/explore/v2.1/catalog/datasets/species-priority-tbl/exports/parquet'))
USING (species_id));
-- 9) Insert into area_geom
INSERT INTO area_geom(
geo_point_2d,
geo_shape,
area_id
)
SELECT
geo_point_2d,
geo_shape,
id area_id
FROM read_parquet('data/lnrs-sub-areas.parquet');
-- Recreate source_table with a Single Query
-- If you wish to see all of the columns in a single result set (mirroring source_table),
-- you can do so with the following join query.
-- The many-to-many relationship to grants is handled by left-joining on the
-- measure_area_priority_grant table, as some measure–area–priority rows may not have an associated grant.
CREATE OR REPLACE VIEW source_table_recreated_vw AS
SELECT
/* Measures */
m.measure_id,
m.measure,
--m.other_priorities_delivered,
m.core_supplementary,
m.mapped_unmapped,
-- m.relevant_map_layer,
m.link_to_further_guidance,
/* Measure Types (one row per measure_type if multiple exist) */
mt.measure_type,
/* Stakeholders (one row per stakeholder if multiple exist) */
stkh.stakeholder,
/* Area */
map.area_id,
a.area_name,
a.area_description,
a.area_link,
a.bng_hab_mgt,
a.bng_hab_creation,
a.local_funding_schemes,
/* Priority */
map.priority_id,
p.biodiversity_priority,
p.simplified_biodiversity_priority,
p.theme,
/* Grant */
mag.grant_id,
g.grant_name,
g.grant_scheme,
g.summary_wrapped,
g.url
FROM measure_area_priority AS map
JOIN measure AS m
ON map.measure_id = m.measure_id
JOIN area AS a
ON map.area_id = a.area_id
JOIN priority AS p
ON map.priority_id = p.priority_id
-- Many-to-many from measure -> measure_type
LEFT JOIN measure_has_type AS mht
ON m.measure_id = mht.measure_id
LEFT JOIN measure_type AS mt
ON mht.measure_type_id = mt.measure_type_id
-- Many-to-many from measure -> stakeholder
LEFT JOIN measure_has_stakeholder AS mhs
ON m.measure_id = mhs.measure_id
LEFT JOIN stakeholder AS stkh
ON mhs.stakeholder_id = stkh.stakeholder_id
-- Potential multiple grants per measure–area–priority
LEFT JOIN measure_area_priority_grant AS mag
ON map.measure_id = mag.measure_id
AND map.area_id = mag.area_id
AND map.priority_id = mag.priority_id
LEFT JOIN grant_table AS g
ON mag.grant_id = g.grant_id;
DESCRIBE FROM source_table;
-- testing why there are fewer rows in the recreated table
-- compared to the source table
-- it is because the recreated table elides rows where a grant ID
-- is NULL but all other unique identifiers exist
-- needs to be tested in the TEST app
-- also lets try removing all unused fields from the dataset as it is only used for the app
CREATE OR REPLACE VIEW source_table_distinct_vw AS
SELECT measure_id, priority_id, grant_id, measure_type, stakeholder
FROM source_table st
-- INNER JOIN source_table_recreated str
-- USING (measure_id, priority_id, area_id)
WHERE st.area_id = 15;
CREATE OR REPLACE VIEW source_table_recreated_distinct_vw AS
SELECT measure_id, priority_id, grant_id, measure_type, stakeholder
FROM source_table_recreated_vw str
-- INNER JOIN source_table_recreated str
-- USING (measure_id, priority_id, area_id)
WHERE str.area_id = 15;
SELECT * FROM source_table_distinct_vw;
( SELECT * FROM source_table_distinct_vw
EXCEPT
SELECT * FROM source_table_recreated_distinct_vw)
UNION ALL
( SELECT * FROM source_table_recreated_distinct_vw
EXCEPT
SELECT * FROM source_table_distinct_vw);
-- a revised version of the area-measures-tbl which keeps only the fields
-- necessary for the app
CREATE OR REPLACE VIEW apmg_slim_vw AS
SELECT
core_supplementary
, measure_type
, stakeholder
, area_name
, grant_id
, priority_id
, biodiversity_priority
, measure
, measure_id
, link_to_further_guidance
, grant_name
, "url"
FROM source_table_recreated_vw;
FROM apmg_slim_vw;
-- try JSON as CSV is outputting invalid encoding of non alphanumeric characters
COPY apmg_slim_vw TO 'data/apmg_slim_ods.json' (ARRAY true);
-- Now we need a process to update (edit) the values in the individual tables
-- and then update the source_table_recreated view
----------------------------------------------------------------
----------------------------------------------------------------
-- CRUD OPEARATION ON THE TABLES
----------------------------------------------------------------
-- 1. Update Operations
-- a. Update a record in the "measure" table
UPDATE measure
SET measure = 'New Measure Description'
WHERE measure_id = 1;
-- b. Update a record in the "measure_type" table
UPDATE measure_type
SET measure_type = 'New Type'
WHERE measure_type_id = 1;
-- c. Update a record in the "stakeholder" table
UPDATE stakeholder
SET stakeholder = 'New Stakeholder Name'
WHERE stakeholder_id = 1;
-- d. Update a record in the "grant_table" table
UPDATE grant_table
SET grant_name = 'New Grant Name'
WHERE grant_id = 'GRANT123';
-- e. Update a record in the "priority" table
UPDATE priority
SET biodiversity_priority = 'New Priority'
WHERE priority_id = 1;
-- f. Update a record in the "area" table
UPDATE area
SET area_name = 'New Area Name'
WHERE area_id = 1;
-- 2. Create Operations
-- a. Add a record to the "measure" table
-- generate a new incrnemnted measure_id
CREATE MACRO max_meas() AS (SELECT MAX(measure_id) + 1 as max_measure_id FROM measure);
SELECT max_meas();
INSERT INTO measure (measure_id, measure, other_priorities_delivered, core_supplementary, mapped_unmapped, relevant_map_layer, link_to_further_guidance)
VALUES (max_meas(), 'New Measure', 'Priority A', 'Core', 'Mapped', 'Layer 1', 'https://example.com');
SELECT * FROM measure WHERE measure_id >= 780;
-- b. Add a record to the "area" table
INSERT INTO area (area_id, area_name, area_description, area_link, bng_hab_mgt, bng_hab_creation, local_funding_schemes)
VALUES (80, 'New Area', 'Area Description', 'https://example.com', 'Management Plan', 'Creation Plan', 'Scheme A');
-- c. Add a record to the "measure_type" table
INSERT INTO measure_type (measure_type)
VALUES ('New Type');
-- d. Add a record to the "stakeholder" table
INSERT INTO stakeholder (stakeholder)
VALUES ('New Stakeholder');
-- e. Add a record to the "grant_table" table
INSERT INTO grant_table (grant_id, grant_name, grant_scheme, url, summary_wrapped)
VALUES ('GRANT456', 'New Grant', 'Scheme X', 'https://example.com', 'Grant Summary');
-- f. Add a record to the "priority" table
INSERT INTO priority (priority_id, biodiversity_priority, simplified_biodiversity_priority, theme)
VALUES (788, 'New Priority', 'Simplified Priority', 'Theme A');
-- 3. Delete Operations
-- a. Delete a record from the "measure" table
-------------------------------------------------------------------------
-- Need to delete the rows with the measure_id from all linked tables first
DELETE FROM measure_has_type
WHERE measure_type_id = (SELECT measure_type_id FROM measure_has_type
WHERE measure_id = 1);
DELETE FROM measure_area_priority_grant
WHERE measure_id = 1;
DELETE FROM measure_area_priority
WHERE measure_id = 1;
-- stakeholder delete
DELETE FROM measure_has_stakeholder
WHERE stakeholder_id = 1;
DELETE FROM stakeholder
WHERE stakeholder_id = 1;
-- d. Delete a record from the "grant_table" table
DELETE FROM grant_table
WHERE grant_id = 'GRANT123';
-- e. Delete a record from the "priority" table
-------------------------------------------------------------------------
DELETE FROM measure_area_priority_grant
WHERE priority_id = 1;
DELETE FROM measure_area_priority
WHERE priority_id = 1;
DELETE FROM priority
WHERE priority_id = 1;
-- f. Delete a record from the "area" table
DELETE FROM measure_area_priority_grant
WHERE area_id = 1;
DELETE FROM measure_area_priority
WHERE area_id = 1;
DELETE FROM area
WHERE area_id = 1;
.help
.quit