Skip to content

Commit

Permalink
gcmd keyword revamp
Browse files Browse the repository at this point in the history
  • Loading branch information
Brian Duggan committed Nov 20, 2013
1 parent 5d6cf2a commit ed69f2f
Show file tree
Hide file tree
Showing 13 changed files with 446 additions and 3 deletions.
41 changes: 41 additions & 0 deletions db/patches/0530_gcmd.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
create table gcmd_keyword (
identifier varchar not null primary key,
parent_identifier varchar constraint fk_parent
references gcmd_keyword(identifier)
deferrable initially deferred,
label varchar,
definition varchar
);

create view vw_gcmd_keyword as
select
coalesce(level4.identifier,
level3.identifier,
level2.identifier,
level1.identifier,
term.identifier,
topic.identifier,
category.identifier) as identifier,
category.label as category,
topic.label as topic,
term.label as term,
level1.label as level1,
level2.label as level2,
level3.label as level3,
level4.label as level4
from gcmd_keyword wrapper left join gcmd_keyword category on category.parent_identifier = wrapper.identifier
left join gcmd_keyword topic on topic.parent_identifier = category.identifier
left join gcmd_keyword term on term.parent_identifier = topic.identifier
left join gcmd_keyword level1 on level1.parent_identifier = term.identifier
left join gcmd_keyword level2 on level2.parent_identifier = level1.identifier
left join gcmd_keyword level3 on level3.parent_identifier = level2.identifier
left join gcmd_keyword level4 on level4.parent_identifier = level3.identifier
where
wrapper.identifier='1eb0ea0a-312c-4d74-8d42-6f1ad758f999' and wrapper.label='Science Keywords';

create table publication_gcmd_keyword_map (
publication_id integer not null references publication(id) on delete cascade on update cascade,
gcmd_keyword_identifier varchar not null references gcmd_keyword(identifier) on delete cascade on update cascade,
primary key (publication_id, gcmd_keyword_identifier)
);

125 changes: 125 additions & 0 deletions db/patches/0540_gcmd_view.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
create view vw_gcmd_keyword as
select
coalesce(level4.identifier,
level3.identifier,
level2.identifier,
level1.identifier,
term.identifier,
topic.identifier,
category.identifier) as identifier,
category.label as category,
topic.label as topic,
term.label as term,
level1.label as level1,
level2.label as level2,
level3.label as level3,
level4.label as level4
from gcmd_keyword wrapper inner join gcmd_keyword category on category.parent_identifier = wrapper.identifier
inner join gcmd_keyword topic on topic.parent_identifier = category.identifier
inner join gcmd_keyword term on term.parent_identifier = topic.identifier
inner join gcmd_keyword level1 on level1.parent_identifier = term.identifier
inner join gcmd_keyword level2 on level2.parent_identifier = level1.identifier
inner join gcmd_keyword level3 on level3.parent_identifier = level2.identifier
inner join gcmd_keyword level4 on level4.parent_identifier = level3.identifier
where
wrapper.identifier='1eb0ea0a-312c-4d74-8d42-6f1ad758f999' and wrapper.label='Science Keywords'
UNION
select
coalesce(
level3.identifier,
level2.identifier,
level1.identifier,
term.identifier,
topic.identifier,
category.identifier) as identifier,
category.label as category,
topic.label as topic,
term.label as term,
level1.label as level1,
level2.label as level2,
level3.label as level3,
NULL as level4
from gcmd_keyword wrapper inner join gcmd_keyword category on category.parent_identifier = wrapper.identifier
inner join gcmd_keyword topic on topic.parent_identifier = category.identifier
inner join gcmd_keyword term on term.parent_identifier = topic.identifier
inner join gcmd_keyword level1 on level1.parent_identifier = term.identifier
inner join gcmd_keyword level2 on level2.parent_identifier = level1.identifier
inner join gcmd_keyword level3 on level3.parent_identifier = level2.identifier
where
wrapper.identifier='1eb0ea0a-312c-4d74-8d42-6f1ad758f999' and wrapper.label='Science Keywords'
UNION
select
coalesce(
level2.identifier,
level1.identifier,
term.identifier,
topic.identifier,
category.identifier) as identifier,
category.label as category,
topic.label as topic,
term.label as term,
level1.label as level1,
level2.label as level2,
NULL as level3,
NULL as level4
from gcmd_keyword wrapper inner join gcmd_keyword category on category.parent_identifier = wrapper.identifier
inner join gcmd_keyword topic on topic.parent_identifier = category.identifier
inner join gcmd_keyword term on term.parent_identifier = topic.identifier
inner join gcmd_keyword level1 on level1.parent_identifier = term.identifier
inner join gcmd_keyword level2 on level2.parent_identifier = level1.identifier
where
wrapper.identifier='1eb0ea0a-312c-4d74-8d42-6f1ad758f999' and wrapper.label='Science Keywords'
UNION
select
coalesce(
level1.identifier,
term.identifier,
topic.identifier,
category.identifier) as identifier,
category.label as category,
topic.label as topic,
term.label as term,
level1.label as level1,
NULL as level2, NULL as level3, NULL as level4
from gcmd_keyword wrapper inner join gcmd_keyword category on category.parent_identifier = wrapper.identifier
inner join gcmd_keyword topic on topic.parent_identifier = category.identifier
inner join gcmd_keyword term on term.parent_identifier = topic.identifier
inner join gcmd_keyword level1 on level1.parent_identifier = term.identifier
where
wrapper.identifier='1eb0ea0a-312c-4d74-8d42-6f1ad758f999' and wrapper.label='Science Keywords'
UNION
select
coalesce(
term.identifier,
topic.identifier,
category.identifier) as identifier,
category.label as category,
topic.label as topic,
term.label as term,
NULL as level1, NULL as level2, NULL as level3, NULL as level4
from gcmd_keyword wrapper inner join gcmd_keyword category on category.parent_identifier = wrapper.identifier
inner join gcmd_keyword topic on topic.parent_identifier = category.identifier
inner join gcmd_keyword term on term.parent_identifier = topic.identifier
where
wrapper.identifier='1eb0ea0a-312c-4d74-8d42-6f1ad758f999' and wrapper.label='Science Keywords'
union
select
coalesce(
topic.identifier,
category.identifier) as identifier,
category.label as category,
topic.label as topic,
NULL as term, NULL as level1, NULL as level2, NULL as level3, NULL as level4
from gcmd_keyword wrapper inner join gcmd_keyword category on category.parent_identifier = wrapper.identifier
inner join gcmd_keyword topic on topic.parent_identifier = category.identifier
where
wrapper.identifier='1eb0ea0a-312c-4d74-8d42-6f1ad758f999' and wrapper.label='Science Keywords'
union
select
coalesce( category.identifier) as identifier,
category.label as category,
NULL as topic, NULL as term, NULL as level1, NULL as level2, NULL as level3, NULL as level4
from gcmd_keyword wrapper inner join gcmd_keyword category on category.parent_identifier = wrapper.identifier
where
wrapper.identifier='1eb0ea0a-312c-4d74-8d42-6f1ad758f999' and wrapper.label='Science Keywords'
;
47 changes: 47 additions & 0 deletions eg/import_gcmd_keywords
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env perl

use Mojo::UserAgent;
use Tuba::DB::Objects qw/-nicknames -autoconnect/;
use HTML::Entities qw/decode_entities/;
use v5.14;

my $me = $ENV{USER} || 'unknown';
my $ua = Mojo::UserAgent->new();
my $tx = $ua->get(q[http://gcmdservices.gsfc.nasa.gov/static/kms/sciencekeywords/sciencekeywords.rdf]);

# http://gcmdservices.gsfc.nasa.gov/static/kms/concept/536a86bd-3dd1-4f4a-9b4a-222a12746db5
my $changes = 0;

my $db = GcmdKeyword->meta->db;

$db->do_transaction(sub {
$db->dbh->do('set constraints all deferred;');
$tx->res->dom->find('rdf\:RDF > skos\:Concept')->each(sub {
my $dom = shift;
my $identifier = $dom->attr('rdf:about');
my $label = [ $dom->find('skos\:prefLabel')->each ]->[0]->text;
my @broader = $dom->find('skos\:broader')->each;
die "not a tree" if @broader > 1;
my $parent;
if ($parent = $broader[0]) {
$parent = $parent->attr('rdf:resource');
my $gk = GcmdKeyword->new(identifier => $parent);
unless ($gk->load(speculative => 1)) {
$gk->save(audit_user => $me) or die $gk->error;
}
}
my @definition = $dom->find('skos\:definition')->each;
my $definition = $definition[0]->text if @definition;

my $kw = GcmdKeyword->new( identifier => $identifier );
$kw->load(speculative => 1);
$kw->parent($parent);
$kw->label($label || undef);
$definition =~ s/ / /g;
$kw->definition($definition);
$kw->save(audit_user => $me) or die $definition;
});
});



2 changes: 2 additions & 0 deletions eg/import_keywords
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use Smart::Comments;

use Tuba::DB::Objects qw/-nicknames -autoconnect/;

# TODO use http://gcmdservices.gsfc.nasa.gov/static/kms/sciencekeywords/sciencekeywords.rdf

my $me = $ENV{USER} || 'unknown';
my $ua = Mojo::UserAgent->new();
my $content = $ua->get(q[http://gcmdservices.gsfc.nasa.gov/static/kms/sciencekeywords/sciencekeywords.csv])->res->body;
Expand Down
17 changes: 16 additions & 1 deletion lib/Tuba.pm
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,15 @@ sub startup {
my @restrict = $opts->{restrict_identifier} ? ( $identifier => $opts->{restrict_identifier} ) : ();
my %defaults = $opts->{defaults} ? %{ $opts->{defaults} } : ();
if ($opts->{wildcard}) {
my $reserved = q[^(?:form/update(?:_prov|_rel|_files)?|form/create|update(?:_rel|files|prov)?|put_files|history/)];
my $reserved = qr[^(?:form/update
(?:_prov|_rel|_files)?
|form/create
|update
(?:_prov|_rel|_files)?
|put_files
|history
)
]x;
for my $format (@supported_formats) {
$resource->get("*$identifier.$format" => \@restrict => { format => $format } )
->over(not_match => { $identifier => $reserved })
Expand All @@ -330,26 +338,30 @@ sub startup {
$authed->get("/form/update/*$identifier" => \%defaults) ->to("$cname#update_form")->name("update_form_$name");
$authed->get("/form/update_prov/*$identifier" => \%defaults) ->to("$cname#update_prov_form")->name("update_prov_form_$name");
$authed->get("/form/update_rel/*$identifier" => \%defaults) ->to("$cname#update_rel_form")->name("update_rel_form_$name");
$authed->get("/form/update_keywords/*$identifier" => \%defaults) ->to("$cname#update_keywords_form")->name("update_keywords_form_$name");
$authed->get("/form/update_files/*$identifier" => \%defaults)->to("$cname#update_files_form")->name("update_files_form_$name");
$authed->get("/history/*$identifier" => \%defaults) ->to("$cname#history") ->name("history_$name");
$authed->delete("*$identifier" => \%defaults) ->to("$cname#remove") ->name("remove_$name");
$authed->post("*$identifier" => \%defaults)->over(not_match => { $identifier => qr[^(?:prov|rel|files)/] })
->to("$cname#update") ->name("update_$name");
$authed->post("/prov/*$identifier") ->to("$cname#update_prov")->name("update_prov_$name");
$authed->post("/rel/*$identifier") ->to("$cname#update_rel")->name("update_rel_$name");
$authed->post("/keywords/*$identifier") ->to("$cname#update_keywords")->name("update_keywords_$name");
$authed->post("/files/*$identifier") ->to("$cname#update_files")->name("update_files_$name");
$authed->put("/files/*$identifier/#filename") # a default filename for PUTs would be ambiguous.
->to("$cname#put_files")->name("put_files_$name");
} else {
$authed->get("/form/update/:$identifier") ->to("$cname#update_form")->name("update_form_$name");
$authed->get("/form/update_prov/:$identifier" => \%defaults) ->to("$cname#update_prov_form")->name("update_prov_form_$name");
$authed->get("/form/update_rel/:$identifier" => \%defaults) ->to("$cname#update_rel_form")->name("update_rel_form_$name");
$authed->get("/form/update_keywords/:$identifier" => \%defaults) ->to("$cname#update_keywords_form")->name("update_keywords_form_$name");
$authed->get("/form/update_files/:$identifier" => \%defaults)->to("$cname#update_files_form")->name("update_files_form_$name");
$authed->get("/history/:$identifier" => \%defaults) ->to("$cname#history") ->name("history_$name");
$authed->delete(":$identifier" => \%defaults) ->to("$cname#remove") ->name("remove_$name");
$authed->post(":$identifier" => \%defaults) ->to("$cname#update") ->name("update_$name");
$authed->post("/prov/:$identifier" => \%defaults) ->to("$cname#update_prov")->name("update_prov_$name");
$authed->post("/rel/:$identifier" => \%defaults) ->to("$cname#update_rel")->name("update_rel_$name");
$authed->post("/keywords/:$identifier" => \%defaults) ->to("$cname#update_keywords")->name("update_keywords_$name");
$authed->post("/files/:$identifier" => \%defaults) ->to("$cname#update_files")->name("update_files_$name");
$authed->put("/files/:$identifier/#filename" => {filename => 'unnamed', %defaults })
->to("$cname#put_files")->name("put_files_$name");
Expand Down Expand Up @@ -430,6 +442,9 @@ sub startup {
$r->resource(person => { restrict_identifier => qr/\d+/ } );
$r->get('/person/:name')->to('person#redirect_by_name');
# GcmdKeyword
$r->resource('gcmd_keyword');
# Others, some of which aren't yet implemented.
$r->resource($_) for qw/dataset model software algorithm activity
instrument platform
Expand Down
37 changes: 37 additions & 0 deletions lib/Tuba/Controller.pm
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,43 @@ sub put_files {
$c->render(text => "ok");
}

=head2 update_keywords
Assign GCMD keywords to a resource.
=cut

sub update_keywords {
my $c = shift;
my $obj = $c->_this_object or return $c->render_not_found;
my $pub = $obj->get_publication(autocreate => 1);
$pub->save(audit => $c->user) unless $pub->id;
if (my $json = $c->req->json) {
my $delete_extra = delete $json->{_delete_extra};
$json = [ $json ] if ref($json) eq 'HASH';
my %to_delete = map { ($_->identifier => 1) } @{ $pub->gcmd_keywords };

for my $k (@$json) {
ref $k eq 'HASH' or return $c->render(json => { error => { data => $k, msg => "not a hash" }} );
my $kw = exists $k->{identifier} ? GcmdKeyword->new(%$k) : GcmdKeyword->new_from_flat(%$k);
$kw->load(speculative => 1) or return $c->render(json => { error => { data => $k, msg => 'not found' }} );
$pub->add_gcmd_keywords($kw);
delete $to_delete{$kw->identifier};
}
$pub->save(audit_user => $c->user);
if ($delete_extra) {
for my $extra (keys %to_delete) {
PublicationGcmdKeywordMap->new(
publication => $pub->id,
gcmd_keyword_identifier => $extra
)->delete;
}
}
return $c->render(json => 'ok');
}
return $c->render(text => "html not implemented"); # TODO
}

=head2 update_rel
Update the relationships.
Expand Down
46 changes: 46 additions & 0 deletions lib/Tuba/DB/Mixin/Object/GcmdKeyword.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package Tuba::DB::Object::GcmdKeyword;
use Tuba::Log;
use Data::Dumper;
# Tuba::DB::Mixin::Object::GcmdKeyword;

sub stringify {
my $self = shift;
my %args = @_;
if ($args{short}) {
return $self->label;
}
if (my $parent = $self->parent) {
return join '>', $self->parent->label, $self->label;
}
return $self->label;
}

sub new_from_flat {
my $c = shift;
my %h = @_;
# Example :
# {
# 'id' => '5286',
# 'category' => 'EARTH SCIENCE',
# 'topic' => 'HUMAN DIMENSIONS',
# 'term' => 'ENVIRONMENTAL IMPACTS',
# 'level1' => 'FOSSIL FUEL BURNING'
# 'level2' => undef,
# 'level3' => undef,
# };
my $new;
my @cols = qw/category topic term level1 level2 level3/;
my %cols;
@cols{@cols} = @h{@cols};
my $ds = DBIx::Simple->new(Tuba::Plugin::Db->connection->dbh);
my @rows = $ds->select('vw_gcmd_keyword', '*', \%cols )->hashes;
return unless @rows > 0;
unless (@rows==1) {
logger()->warn("we got ".@rows." rows for ".dumpit(\%h));
}
my $identifier = $rows[0]->{identifier};
return $c->new(identifier => $identifier);
}

1;

2 changes: 2 additions & 0 deletions lib/Tuba/DB/Object.pm
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ information :
- a list of parent publications
- a list of files
- a list of gcmd keywords
The parameter 'c' should have a controller object
(so that we can look up a URL for an object).
Expand Down Expand Up @@ -257,6 +258,7 @@ sub as_tree {
};
}
$tree->{files} = [ map $_->as_tree(@_), $pub->files ];
$tree->{gcmd_keywords} = [ map $_->as_tree(@_), $pub->gcmd_keywords ];
}
$tree->{uri} //= $s->uri($c);
}
Expand Down
Loading

0 comments on commit ed69f2f

Please sign in to comment.