=0,S=R?"width":"height",V=J(t,{placement:B,boundary:p,rootBoundary:u,altBoundary:l,padding:c}),q=R?T?L:P:T?A:D;w[S]>x[S]&&(q=fe(q));var N=fe(q),I=[];if(i&&I.push(V[H]<=0),s&&I.push(V[q]<=0,V[N]<=0),I.every((function(e){return e}))){E=B,j=!1;break}O.set(B,I)}if(j)for(var _=function(e){var t=b.find((function(t){var n=O.get(t);if(n)return n.slice(0,e).every((function(e){return e}))}));if(t)return E=t,"break"},F=h?3:1;F>0;F--){if("break"===_(F))break}t.placement!==E&&(t.modifiersData[r]._skip=!0,t.placement=E,t.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function de(e,t,n){return i(e,a(t,n))}var he={name:"preventOverflow",enabled:!0,phase:"main",fn:function(e){var t=e.state,n=e.options,r=e.name,o=n.mainAxis,s=void 0===o||o,f=n.altAxis,c=void 0!==f&&f,p=n.boundary,u=n.rootBoundary,l=n.altBoundary,d=n.padding,h=n.tether,m=void 0===h||h,v=n.tetherOffset,y=void 0===v?0:v,b=J(t,{boundary:p,rootBoundary:u,padding:d,altBoundary:l}),w=C(t.placement),x=U(t.placement),O=!x,j=z(w),M="x"===j?"y":"x",k=t.modifiersData.popperOffsets,B=t.rects.reference,H=t.rects.popper,T="function"==typeof y?y(Object.assign({},t.rects,{placement:t.placement})):y,R="number"==typeof T?{mainAxis:T,altAxis:T}:Object.assign({mainAxis:0,altAxis:0},T),S=t.modifiersData.offset?t.modifiersData.offset[t.placement]:null,V={x:0,y:0};if(k){if(s){var q,N="y"===j?D:P,I="y"===j?A:L,_="y"===j?"height":"width",F=k[j],X=F+b[N],Y=F-b[I],G=m?-H[_]/2:0,K=x===W?B[_]:H[_],Q=x===W?-H[_]:-B[_],Z=t.elements.arrow,$=m&&Z?g(Z):{width:0,height:0},ee=t.modifiersData["arrow#persistent"]?t.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},te=ee[N],ne=ee[I],re=de(0,B[_],$[_]),oe=O?B[_]/2-G-re-te-R.mainAxis:K-re-te-R.mainAxis,ie=O?-B[_]/2+G+re+ne+R.mainAxis:Q+re+ne+R.mainAxis,ae=t.elements.arrow&&E(t.elements.arrow),se=ae?"y"===j?ae.clientTop||0:ae.clientLeft||0:0,fe=null!=(q=null==S?void 0:S[j])?q:0,ce=F+ie-fe,pe=de(m?a(X,F+oe-fe-se):X,F,m?i(Y,ce):Y);k[j]=pe,V[j]=pe-F}if(c){var ue,le="x"===j?D:P,he="x"===j?A:L,me=k[M],ve="y"===M?"height":"width",ye=me+b[le],ge=me-b[he],be=-1!==[D,P].indexOf(w),we=null!=(ue=null==S?void 0:S[M])?ue:0,xe=be?ye:me-B[ve]-H[ve]-we+R.altAxis,Oe=be?me+B[ve]+H[ve]-we-R.altAxis:ge,je=m&&be?function(e,t,n){var r=de(e,t,n);return r>n?n:r}(xe,me,Oe):de(m?xe:ye,me,m?Oe:ge);k[M]=je,V[M]=je-me}t.modifiersData[r]=V}},requiresIfExists:["offset"]};var me={name:"arrow",enabled:!0,phase:"main",fn:function(e){var t,n=e.state,r=e.name,o=e.options,i=n.elements.arrow,a=n.modifiersData.popperOffsets,s=C(n.placement),f=z(s),c=[P,L].indexOf(s)>=0?"height":"width";if(i&&a){var p=function(e,t){return Y("number"!=typeof(e="function"==typeof e?e(Object.assign({},t.rects,{placement:t.placement})):e)?e:G(e,k))}(o.padding,n),u=g(i),l="y"===f?D:P,d="y"===f?A:L,h=n.rects.reference[c]+n.rects.reference[f]-a[f]-n.rects.popper[c],m=a[f]-n.rects.reference[f],v=E(i),y=v?"y"===f?v.clientHeight||0:v.clientWidth||0:0,b=h/2-m/2,w=p[l],x=y-u[c]-p[d],O=y/2-u[c]/2+b,j=de(w,O,x),M=f;n.modifiersData[r]=((t={})[M]=j,t.centerOffset=j-O,t)}},effect:function(e){var t=e.state,n=e.options.element,r=void 0===n?"[data-popper-arrow]":n;null!=r&&("string"!=typeof r||(r=t.elements.popper.querySelector(r)))&&N(t.elements.popper,r)&&(t.elements.arrow=r)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function ve(e,t,n){return void 0===n&&(n={x:0,y:0}),{top:e.top-t.height-n.y,right:e.right-t.width+n.x,bottom:e.bottom-t.height+n.y,left:e.left-t.width-n.x}}function ye(e){return[D,L,A,P].some((function(t){return e[t]>=0}))}var ge={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(e){var t=e.state,n=e.name,r=t.rects.reference,o=t.rects.popper,i=t.modifiersData.preventOverflow,a=J(t,{elementContext:"reference"}),s=J(t,{altBoundary:!0}),f=ve(a,r),c=ve(s,o,i),p=ye(f),u=ye(c);t.modifiersData[n]={referenceClippingOffsets:f,popperEscapeOffsets:c,isReferenceHidden:p,hasPopperEscaped:u},t.attributes.popper=Object.assign({},t.attributes.popper,{"data-popper-reference-hidden":p,"data-popper-escaped":u})}},be=Z({defaultModifiers:[ee,te,oe,ie]}),we=[ee,te,oe,ie,ae,le,he,me,ge],xe=Z({defaultModifiers:we});e.applyStyles=ie,e.arrow=me,e.computeStyles=oe,e.createPopper=xe,e.createPopperLite=be,e.defaultModifiers=we,e.detectOverflow=J,e.eventListeners=ee,e.flip=le,e.hide=ge,e.offset=ae,e.popperGenerator=Z,e.popperOffsets=te,e.preventOverflow=he,Object.defineProperty(e,"__esModule",{value:!0})}));
+//# sourceMappingURL=popper.min.js.map
diff --git a/publications/tasks.py b/publications/tasks.py
index a0c3e16..c27f94e 100644
--- a/publications/tasks.py
+++ b/publications/tasks.py
@@ -1,114 +1,106 @@
+import logging
+logger = logging.getLogger(__name__)
+
from django_q.models import Schedule
from publications.models import Publication
from bs4 import BeautifulSoup
import json
import xml.dom.minidom
-from django.contrib.gis.geos import GEOSGeometry
+from django.contrib.gis.geos import GEOSGeometry
import requests
-def get_geom(url):
- req= requests.get(url)
- soup = BeautifulSoup(req.content, 'html.parser')
- geom = parse_html(soup)
- geom_object = None
- if geom :
- geom_data = geom["features"][0]["geometry"]
- # preparing geometry data in accordance to geosAPI fields
- type_geom= {'type': 'GeometryCollection'}
- geom_content = {"geometries" : [geom_data]}
- type_geom.update(geom_content)
- geom_data_string= json.dumps(type_geom)
- try :
- geom_object = GEOSGeometry(geom_data_string) #GeometryCollection object
- except :
- print("Invalid Geometry")
-
- return geom_object
-
-def parse_html(content):
- json_object = {}
+def extract_geometry_from_html(content):
for tag in content.find_all("meta"):
if tag.get("name", None) == "DC.SpatialCoverage":
data = tag.get("content", None)
try:
- json_object = json.loads(data)
+ geom = json.loads(data)
+
+ geom_data = geom["features"][0]["geometry"]
+ # preparing geometry data in accordance to geosAPI fields
+ type_geom= {'type': 'GeometryCollection'}
+ geom_content = {"geometries" : [geom_data]}
+ type_geom.update(geom_content)
+ geom_data_string= json.dumps(type_geom)
+ try :
+ geom_object = GEOSGeometry(geom_data_string) # GeometryCollection object
+ logging.debug('Found geometry: %s', geom_object)
+ return geom_object
+ except :
+ print("Invalid Geometry")
except ValueError as e:
print("Not a valid GeoJSON")
- return json_object
-
-
-def get_timeperiod(url):
- req= requests.get(url)
- soup = BeautifulSoup(req.content, 'html.parser')
- return extract_timeperiod_from_html(soup)
def extract_timeperiod_from_html(content):
- tp_start = []
- tp_end = []
+ period = [None, None]
for tag in content.find_all("meta"):
- if tag.get("name", None) == "DC.temporal":
+ if tag.get("name", None) in ['DC.temporal', 'DC.PeriodOfTime']:
data = tag.get("content", None)
period = data.split("/")
- period1 = period[0]
- period2 = period[1]
- tp_start.append(period1)
- tp_end.append(period2)
-
- return tp_start,tp_end
-
+ logging.debug('Found time period: %s', period)
+ break;
+ # returning arrays for array field in DB
+ return [period[0]], [period[1]]
-def parse_xml(content):
-
+def parse_oai_xml_and_save_publications(content):
DOMTree = xml.dom.minidom.parseString(content)
collection = DOMTree.documentElement # pass DOMTree as argument
- articles = collection.getElementsByTagName("dc:identifier")
- articles_count_in_journal = len(articles) # number of articles in journal
+ articles = collection.getElementsByTagName("dc:identifier")
+ articles_count_in_journal = len(articles)
for i in range(articles_count_in_journal):
identifier = collection.getElementsByTagName("dc:identifier")
identifier_value = identifier[i].firstChild.nodeValue
if identifier_value.startswith('http'):
- link_value = identifier_value
- #get geometry from html
- geom_object = get_geom(link_value)
- #get Timeperiod from html
- period = get_timeperiod(link_value)
- period_start = period[0]
- period_end = period[1]
+
+ with requests.get(identifier_value) as response:
+ soup = BeautifulSoup(response.content, 'html.parser')
+
+ geom_object = extract_geometry_from_html(soup)
+ period_start, period_end = extract_timeperiod_from_html(soup)
+
else:
- link_value = None
geom_object = None
period_start = []
period_end = []
-
+
title = collection.getElementsByTagName("dc:title")
if title:
title_value = title[0].firstChild.nodeValue
else :
title_value = None
- abstract = collection.getElementsByTagName("dc:description")
+ abstract = collection.getElementsByTagName("dc:description")
if abstract:
abstract_text = abstract[0].firstChild.nodeValue
else:
- abstract_text = None
+ abstract_text = None
journal = collection.getElementsByTagName("dc:publisher")
if journal:
journal_value = journal[0].firstChild.nodeValue
- else:
+ else:
journal_value = None
date = collection.getElementsByTagName("dc:date")
if date:
date_value = date[0].firstChild.nodeValue
else:
- date_value = None
- publication = Publication(title = title_value,abstract = abstract_text,publicationDate = date_value, url = link_value , journal = journal_value, geometry = geom_object, timeperiod_startdate = period_start,timeperiod_enddate = period_end)
+ date_value = None
+
+ publication = Publication(
+ title = title_value,
+ abstract = abstract_text,
+ publicationDate = date_value,
+ url = identifier_value,
+ journal = journal_value,
+ geometry = geom_object,
+ timeperiod_startdate = period_start,
+ timeperiod_enddate = period_end)
publication.save()
-
+ logger.info('Saved new publication for %s: %s', identifier_value, publication)
-def harvest_data(url):
+def harvest_oai_endpoint(url):
try:
- response = requests.get(url)
- parse_xml(response.content)
- except requests.exceptions.RequestException as e:
+ with requests.Session() as s:
+ response = s.get(url)
+ parse_oai_xml_and_save_publications(response.content)
+ except requests.exceptions.RequestException as e:
print ("The requested URL is invalid or has bad connection.Please change the URL")
-
diff --git a/publications/templates/admin/base_site.html b/publications/templates/admin/base_site.html
new file mode 100644
index 0000000..9f8a684
--- /dev/null
+++ b/publications/templates/admin/base_site.html
@@ -0,0 +1,17 @@
+{% extends "admin/base_site.html" %}
+{% load static %}
+
+{% comment %}
+See https://github.com/django/django/blob/main/django/contrib/admin/templates/admin/base_site.html
+{% endcomment %}
+
+{% block extrahead %}
+
+{% endblock %}
+
+{% block branding %}
+
+{% if user.is_anonymous %}
+ {% include "admin/color_theme_toggle.html" %}
+{% endif %}
+{% endblock %}
diff --git a/publications/templates/base.html b/publications/templates/base.html
index abcf397..c451ef7 100644
--- a/publications/templates/base.html
+++ b/publications/templates/base.html
@@ -25,11 +25,13 @@