diff --git a/Dockerfile b/Dockerfile index 3951348..687f374 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,11 @@ RUN wget -qO - https://www.mongodb.org/static/pgp/server-3.4.asc | apt-key add - RUN echo "deb [ arch=amd64,arm64 ] http://repo.mongodb.org/apt/ubuntu xenial/mongodb-org/3.4 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-3.4.list RUN apt-get update && apt-get install -y --no-install-recommends \ - python3.6 \ + python3.8 \ python3-pip \ - libpython3.6 \ + python3.8-dev \ + libpython3.8 \ + libpython3.8-dev \ jq \ mongodb-org \ locales \ @@ -25,6 +27,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3.8 get-pip.py + WORKDIR /src ENV LC_ALL en_US.UTF-8 diff --git a/project/server/main/aurehal.py b/project/server/main/aurehal.py index 960fb7b..b4fd187 100644 --- a/project/server/main/aurehal.py +++ b/project/server/main/aurehal.py @@ -151,21 +151,23 @@ def harvest_and_save_aurehal(collection_name, aurehal_type): for vip in vips: orcid, id_hal_i, id_hal_s = None, None, None idref = vip['id'] - for ext in vip.get('externalIds', []): - if 'id_hal_i' in ext['type']: - id_hal_i = ext['id'] - if 'id_hal_s' in ext['type']: - id_hal_s = ext['id'] - if 'orcid' in ext['type']: - orcid = ext['id'] - if id_hal_i: - hal_idref[id_hal_i] = {'idref': idref.replace('idref', '')} - if orcid: - hal_idref[id_hal_i]['orcid'] = orcid - if id_hal_s: - hal_idref[id_hal_s] = {'idref': idref.replace('idref', '')} - if orcid: - hal_idref[id_hal_s]['orcid'] = orcid + externalIds = vip.get('externalIds', []) + if isinstance(externalIds, list): + for ext in vip.get('externalIds', []): + if 'id_hal_i' in ext['type']: + id_hal_i = ext['id'] + if 'id_hal_s' in ext['type']: + id_hal_s = ext['id'] + if 'orcid' in ext['type']: + orcid = ext['id'] + if id_hal_i: + hal_idref[id_hal_i] = {'idref': idref.replace('idref', '')} + if orcid: + hal_idref[id_hal_i]['orcid'] = orcid + if id_hal_s: + hal_idref[id_hal_s] = {'idref': idref.replace('idref', '')} + if orcid: + hal_idref[id_hal_s]['orcid'] = orcid #parsed data parsed_data, docid_map = create_docid_map(data, aurehal_type, hal_idref) current_file = f'aurehal_{aurehal_type}.json' diff --git a/project/server/main/idref.py b/project/server/main/idref.py index 6527bf7..03f40ca 100644 --- a/project/server/main/idref.py +++ b/project/server/main/idref.py @@ -46,6 +46,8 @@ def add_data(data, uri_prefix, target_index, target_id): continue if idref not in data: data[idref] = {'id':idref, 'externalIds':[]} + if not isinstance(data[idref].get('externalIds'), list): + data[idref]['externalIds'] = [] externalIds = data[idref]['externalIds'] has_ext = False for e in externalIds: diff --git a/requirements.txt b/requirements.txt index 15ce63d..0b9c6bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ -Flask==1.1.1 +Flask==2.1.0 Flask-Bootstrap==3.3.7.1 Flask-Testing==0.7.1 Flask-WTF==0.14.2 gunicorn==20.0.4 pymongo==3.8.0 -pandas==0.25.3 +pandas==1.2.5 python-dateutil~=2.8.1 python-keystoneclient==4.0.0 python-swiftclient==3.9.0