Skip to content

Commit

Permalink
rebuild project
Browse files Browse the repository at this point in the history
  • Loading branch information
suqingdong committed Jan 11, 2023
1 parent 85d1c7d commit 94815d2
Show file tree
Hide file tree
Showing 29 changed files with 440 additions and 420 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
*pyc
*__pycache__
.vscode
__pycache__
build
dist
test
tests
*.egg-info
4 changes: 2 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include requirements.txt
include impact_factor/version/version.json
include impact_factor/version.json
recursive-include impact_factor/data/ *
exclude test
exclude tests
51 changes: 47 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,59 @@
# Impact Factor Toolkits

## Installation
```base
pip install impact_factor
```bash
python3 -m pip -U install impact_factor
```

## Usage
## Use in CMD
### `help`
```bash
IF -h

# or
impact_factor -h
```

### `build`
> build/update the database
```bash
# optional, only required when you need build or update the database
IF build
```

### `search`
> search with `journal`, `journal_abbr`, `issn`, `eissn` or `nlm_id`
```bash
IF search nature # search journal
IF search 'nature c%' # like search journal
IF search 0028-0836 # search ISSN
IF search 1476-4687 # search eISSN
IF search 0410462 # search nlm_id
IF search nature --color # colorful output
```

### `filter`
> filter `factor` with `min_value` and `max_value`
```bash
IF filter -m 100 -M 200 --color

# output with pubmed filter format
IF filter -m 100 -M 200 --pubmed-filter
```

## Use in Python
```python
from impact_factor.core import Factor

fa = Factor()

print(fa.dbfile)

fa.search('nature')
fa.search('nature c%')

fa.filter(min_value=100, max_value=200)
fa.filter(min_value=100, max_value=200, pubmed_filter=True)
```

## Documents
https://impact-factor.readthedocs.io
1 change: 1 addition & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ rm -rf dist build *egg-info

python3 setup.py sdist bdist_wheel

rm -rf build *egg-info
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@
# -- Project information -----------------------------------------------------

project = 'impact_factor'
copyright = '2020, suqingdong'
copyright = '2022, suqingdong'
author = 'suqingdong'

# The full version, including alpha/beta/rc tags
release = '1.0.0'
release = '1.1.0'


# -- General configuration ---------------------------------------------------
Expand Down
9 changes: 0 additions & 9 deletions help.md

This file was deleted.

121 changes: 5 additions & 116 deletions impact_factor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,123 +1,12 @@
"""
Impact Factor Toolkits
"""
import os
import sys
import json
import datetime
import textwrap
from functools import partial
from pathlib import Path

from multiprocessing.dummy import Pool as ThreadPool

import click
BASE_DIR = Path(__file__).resolve().parent

from impact_factor import util
from impact_factor.util.factor import fetch_factor
from impact_factor.util.journal import parse_journal
from impact_factor.db.manager import Manager, Factor, FactorVersion
DEFAULT_DB = BASE_DIR.joinpath('data', 'impact_factor.sqlite3')
DEFAULT_EXCEL = BASE_DIR.joinpath('data', '2022_JCR_IF.xlsx')

BASE_DIR = os.path.dirname(os.path.realpath(__file__))
DEFAULT_DB = os.path.join(BASE_DIR, 'data', 'impact_factor.db')
version_info = json.load(BASE_DIR.joinpath('version.json').open())

version_info = json.load(open(os.path.join(BASE_DIR, 'version', 'version.json')))
__version__ = version_info['version']
__author__ = version_info['author']
__author_email__ = version_info['author_email']


class ImpactFactor(object):
def __init__(self, dbfile=DEFAULT_DB, echo=False, **kwargs):
self.dbfile = dbfile
self.manager = Manager(dbfile, echo=echo)

def check_version(self):
context = self.manager.query(FactorVersion)
res = self.manager.count(Factor.nlm_id)
context['total_count'] = res.scalar()
context['indexed_count'] = res.filter(Factor.indexed == True).scalar()

click.secho(textwrap.dedent('''
==========================================================
program version:\t{__version__}
database version:\t{version} [{datetime}]
total journals:\t\t{total_count}
indexed journals:\t{indexed_count}
database filepath:\t{dbfile}
==========================================================
''').format(__version__=__version__, dbfile=self.dbfile, **context), fg='green', bold=True)

def search(self, value, field=None, like=True):
fields = [field] if field else ['issn', 'e_issn', 'journal', 'med_abbr', 'nlm_id']

for key in fields:
context = self.manager.query(Factor, key, value, like=like)
if context:
factor_history = json.loads(context['factor_history'])
context['factor_history'] = {int(k): float(v) for k, v in factor_history.items() if v}
return context

def pubmed_filter(self, min_value=None, max_value=None, indexed=None, outfile=None, **kwargs):
res = self.manager.session.query(Factor)
if indexed is not None:
res = res.filter(Factor.indexed == indexed)
if min_value is not None:
res = res.filter(Factor.factor >= min_value)
if max_value is not None:
res = res.filter(Factor.factor < max_value)

issn_list = '|'.join(each.issn or '"{}"[Journal]'.format(each.med_abbr) for each in res)

if len(issn_list) > 4000:
print('total {n} journals with IF: {min_value} - {max_value} (exceed 4000 characters)'.format(n=res.count(), **locals()))
else:
print('{n} journals with IF: {min_value} - {max_value}'.format(n=res.count(), **locals()))
if outfile:
with util.safe_open(outfile, 'w') as out:
out.write(issn_list)
print('save file: {}'.format(outfile))
else:
print(issn_list)

def save_json(self, out, context, data):
if data:
data = dict(context, **data)
print('>>> save nlm_id: {nlm_id}'.format(**context))
out.write(json.dumps(data) + '\n')
else:
print('<<< no factor for nlm_id: {nlm_id}'.format(**context))

def build(self, entrez_file, medline_file, threads=4, tmpfile=None):

tmpfile = tmpfile or entrez_file.rsplit('.', 1)[0] + '.jl'

with util.safe_open(tmpfile, 'w') as out:
pool = ThreadPool(threads)
for context in parse_journal(entrez_file):
kws = (context.get('issn'), context.get('e_issn'))
if any(kws):
pool.apply_async(fetch_factor,
args=kws,
callback=partial(self.save_json, out, context))
pool.close()
pool.join()

self.manager.create_table(drop=True)

indexed_ids = {each['nlm_id']: 1 for each in parse_journal(medline_file)}

with util.safe_open(tmpfile) as f:
for line in f:
data = json.loads(line.strip())
data['indexed'] = True if data['nlm_id'] in indexed_ids else False
self.manager.upsert(Factor, 'nlm_id', Factor(**data))

self.manager.upsert(FactorVersion,
None,
FactorVersion(version=2020, datetime=datetime.datetime.now()))
self.manager.close()


if __name__ == '__main__':
IF = ImpactFactor()
IF.check_version()
42 changes: 42 additions & 0 deletions impact_factor/bin/_build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import click

from impact_factor import util, DEFAULT_EXCEL
from impact_factor.core import NlmCatalog


@click.command(
name='build',
help=click.style('build/update the database', italic=True, fg='green'),
)
@click.option('-i', '--excel', help='the excel file with IF', default=DEFAULT_EXCEL, show_default=True)
@click.option('-u', '--update', help='update all records', is_flag=True)
@click.pass_context
def main(ctx, **kwargs):

with ctx.obj['manager'] as manager:

for context in util.parse_excel(kwargs['excel']):
issn = context['issn']
eissn = context['eissn']
journal = context['journal']

record = manager.query('journal', journal).first()

# update when record is not in database, or force update
if record is None or kwargs['update']:

res = None
if eissn:
res = NlmCatalog.search(f'{eissn}[ISSN]')
if not res and issn:
res = NlmCatalog.search(f'{issn}[ISSN]')
if not res:
res = NlmCatalog.search(journal)

if res:
context.update(res)
else:
manager.logger.warning(f'no result for: {context}')

manager.insert(context, key='journal')

29 changes: 29 additions & 0 deletions impact_factor/bin/_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import json

import click

from impact_factor import util
from impact_factor.core import Factor


@click.command(
name='filter',
help=click.style('filter according to factor', italic=True, fg='cyan'),
)
@click.option('-m', '--min-value', help='the min factor', type=float)
@click.option('-M', '--max-value', help='the max factor', type=float)
@click.option('-C', '--color', help='colorful output', is_flag=True)
@click.option('-P', '--pubmed-filter', help='output pubmed filter format', is_flag=True)
@click.pass_context
def main(ctx, **kwargs):

fa = Factor(ctx.obj['dbfile'])

res = fa.filter(**kwargs)

if kwargs['pubmed_filter']:
print(res)
else:
if kwargs['color']:
res = util.highlight_json(json.dumps(res, indent=2))
print(res)
26 changes: 26 additions & 0 deletions impact_factor/bin/_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import json

import click

from impact_factor import util
from impact_factor.core import Factor


@click.command(
name='search',
help=click.style('search record from database', italic=True, fg='magenta'),
)
@click.argument('value')
@click.option('-f', '--field', help='specify a field to search')
@click.option('-C', '--color', help='colorful output', is_flag=True)
@click.pass_context
def main(ctx, **kwargs):

fa = Factor(ctx.obj['dbfile'])

res = fa.search(kwargs['value'])

if kwargs['color']:
res = util.highlight_json(json.dumps(res, indent=2))

print(res)
Loading

0 comments on commit 94815d2

Please sign in to comment.