Skip to content

Commit

Permalink
support jsonfeed references
Browse files Browse the repository at this point in the history
  • Loading branch information
mfenner committed Jan 13, 2025
1 parent 80ccc01 commit 3efd0e4
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 47 deletions.
83 changes: 38 additions & 45 deletions api/blogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ async def extract_single_blog(slug: str):
feed_format = parse_feed_format(feed) or config["feed_format"]
title = feed.get("title", None) or config["title"]
generator_raw = config["generator_raw"] or (
parse_generator(feed.get("generator_detail", None) or feed.get("generator"))
parse_generator(feed.get("generator_detail", None) or feed.get("generator"))
or "Other"
)
generator = re.split(" ", generator_raw)[0]
Expand All @@ -136,7 +136,7 @@ async def extract_single_blog(slug: str):
# ignore the default favicons
if favicon in ["https://s0.wp.com/i/buttonw-com.png"]:
favicon = None
language = config["language"] or feed.get("language", None)
language = config["language"] or feed.get("language", None)
if language:
language = language.split("-")[0]
except Exception as error:
Expand Down Expand Up @@ -184,7 +184,7 @@ async def extract_single_blog(slug: str):
"feed_format": config["feed_format"],
}
update_single_blog(blog)

# update InvenioRDM blog community if blog is active, expired or archived
if config["status"] in ["active", "expired", "archived"]:
r = upsert_blog_community(blog)
Expand Down Expand Up @@ -316,12 +316,9 @@ def update_single_blog(blog):
def push_blog_community_id(slug):
"""Get InvenioRDM blog community id and store in blog."""
try:
context = ssl.create_default_context()
if is_local():
context = False
url = f"{environ['QUART_INVENIORDM_API']}/api/communities?q=slug:{slug}"
headers = {"Authorization": f"Bearer {environ['QUART_INVENIORDM_TOKEN']}"}
response = httpx.get(url, headers=headers, timeout=10,verify=context)
response = httpx.get(url, headers=headers, timeout=10)
result = response.json()
if py_.get(result, "hits.total") != 1:
return result
Expand Down Expand Up @@ -355,9 +352,6 @@ def upsert_blog_community(blog):
def create_blog_community(blog):
"""Create an InvenioRDM blog community."""
try:
context = ssl.create_default_context()
if is_local():
context = False
url = f"{environ['QUART_INVENIORDM_API']}/api/communities"
headers = {"Authorization": f"Bearer {environ['QUART_INVENIORDM_TOKEN']}"}
metadata = {
Expand All @@ -369,17 +363,21 @@ def create_blog_community(blog):
metadata["description"] = py_.truncate(
blog.get("description", None), 250, omission="", separator=" "
)
custom_fields = compact({
"rs:feed_url": blog.get("feed_url"),
"rs:feed_format": blog.get("feed_format"),
"rs:generator": blog.get("generator"),
"rs:license": blog.get("license"),
"rs:issn": blog.get("issn"),
"rs:prefix": blog.get("prefix"),
"rs:joined": format_datetime(get_date_from_unix_timestamp(blog.get("created_at", 0)), "en"),
"rs:language": get_language(blog.get("language"), format="name"),
"rs:category": FOS_MAPPINGS.get(blog.get("category"), None),
})
custom_fields = compact(
{
"rs:feed_url": blog.get("feed_url"),
"rs:feed_format": blog.get("feed_format"),
"rs:generator": blog.get("generator"),
"rs:license": blog.get("license"),
"rs:issn": blog.get("issn"),
"rs:prefix": blog.get("prefix"),
"rs:joined": format_datetime(
get_date_from_unix_timestamp(blog.get("created_at", 0)), "en"
),
"rs:language": get_language(blog.get("language"), format="name"),
"rs:category": FOS_MAPPINGS.get(blog.get("category"), None),
}
)
data = {
"access": {
"visibility": "public",
Expand All @@ -391,7 +389,7 @@ def create_blog_community(blog):
"metadata": metadata,
"custom_fields": custom_fields,
}
response = httpx.post(url, headers=headers, json=data, timeout=10,verify=context)
response = httpx.post(url, headers=headers, json=data, timeout=10)
return response
except Exception as error:
print(error)
Expand All @@ -401,9 +399,6 @@ def create_blog_community(blog):
def update_blog_community(blog):
"""Update an InvenioRDM blog community."""
try:
context = ssl.create_default_context()
if is_local():
context = False
url = f"{environ['QUART_INVENIORDM_API']}/api/communities/{blog.get('slug')}"
headers = {"Authorization": f"Bearer {environ['QUART_INVENIORDM_TOKEN']}"}
metadata = {
Expand All @@ -415,17 +410,21 @@ def update_blog_community(blog):
metadata["description"] = py_.truncate(
blog.get("description", None), 250, omission="", separator=" "
)
custom_fields = compact({
"rs:feed_url": blog.get("feed_url"),
"rs:feed_format": blog.get("feed_format"),
"rs:generator": blog.get("generator"),
"rs:license": blog.get("license"),
"rs:issn": blog.get("issn"),
"rs:prefix": blog.get("prefix"),
"rs:joined": format_datetime(get_date_from_unix_timestamp(blog.get("created_at", 0)), "en"),
"rs:language": get_language(blog.get("language"), format="name"),
"rs:category": FOS_MAPPINGS.get(blog.get("category"), None),
})
custom_fields = compact(
{
"rs:feed_url": blog.get("feed_url"),
"rs:feed_format": blog.get("feed_format"),
"rs:generator": blog.get("generator"),
"rs:license": blog.get("license"),
"rs:issn": blog.get("issn"),
"rs:prefix": blog.get("prefix"),
"rs:joined": format_datetime(
get_date_from_unix_timestamp(blog.get("created_at", 0)), "en"
),
"rs:language": get_language(blog.get("language"), format="name"),
"rs:category": FOS_MAPPINGS.get(blog.get("category"), None),
}
)
data = {
"access": {
"visibility": "public",
Expand All @@ -437,7 +436,7 @@ def update_blog_community(blog):
"metadata": metadata,
"custom_fields": custom_fields,
}
response = httpx.put(url, headers=headers, json=data, timeout=10,verify=context)
response = httpx.put(url, headers=headers, json=data, timeout=10)
if response.status_code >= 400:
print(response.json())
return response
Expand All @@ -451,9 +450,6 @@ def upload_blog_logo(blog):
if blog.get("favicon", None) is None:
return None
try:
context = ssl.create_default_context()
if is_local():
context = False
url = (
f"{environ['QUART_INVENIORDM_API']}/api/communities/{blog.get('slug')}/logo"
)
Expand All @@ -464,7 +460,7 @@ def upload_blog_logo(blog):
content = httpx.get(
blog.get("favicon"), timeout=10, follow_redirects=True
).content
response = httpx.put(url, headers=headers, content=content, timeout=10,verify=context)
response = httpx.put(url, headers=headers, content=content, timeout=10)
return response
except Exception as error:
print(error)
Expand Down Expand Up @@ -500,14 +496,11 @@ def upload_blog_logo(blog):
def feature_community(id):
"""Feature an InvenioRDM community by id."""
try:
context = ssl.create_default_context()
if is_local():
context = False
url = f"{environ['QUART_INVENIORDM_API']}/api/communities/{id}/featured"
headers = {"Authorization": f"Bearer {environ['QUART_INVENIORDM_TOKEN']}"}
now = datetime.datetime.now().isoformat()
data = {"start_date": now}
response = httpx.post(url, headers=headers, json=data, timeout=10,verify=context)
response = httpx.post(url, headers=headers, json=data, timeout=10)
return response
except Exception as error:
print(error)
Expand Down
18 changes: 16 additions & 2 deletions api/posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,8 +936,10 @@ def format_author(author, published_at):
content_html = post.get("content_html", "")
content_text = get_markdown(content_html)
summary = get_summary(content_html)
abstract = None
reference = await get_references(content_html)
abstract = None
reference = await get_jsonfeed_references(post.get("_references", []))
if len(reference) == 0:
reference = await get_references(content_html)
relationships = get_relationships(content_html)
url = normalize_url(post.get("url", None), secure=blog.get("secure", True))
archive_url = (
Expand Down Expand Up @@ -1698,6 +1700,18 @@ async def get_references(content_html: str):
return formatted_references


async def get_jsonfeed_references(references: list):
"""Extract references from jsonfeed _references field."""
urls = [ref.get("url", None) for ref in references]
tasks = []
for index, url in enumerate(urls):
task = format_reference(url, index)
tasks.append(task)

formatted_references = py_.compact(await asyncio.gather(*tasks))
return formatted_references


async def format_reference(url, index):
"""Format reference."""
if validate_url(normalize_id(url)) in ["DOI", "URL"]:
Expand Down

0 comments on commit 3efd0e4

Please sign in to comment.