rebuild project

suqingdong · Jan 11, 2023 · 94815d2 · 94815d2
1 parent 85d1c7d
commit 94815d2
Show file tree

Hide file tree

Showing 29 changed files with 440 additions and 420 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,7 @@
 *pyc
+*__pycache__
 .vscode
-__pycache__
 build
 dist
-test
+tests
 *.egg-info
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,4 +1,4 @@
 include requirements.txt
-include impact_factor/version/version.json
+include impact_factor/version.json
 recursive-include impact_factor/data/ *
-exclude test
+exclude tests
diff --git a/README.md b/README.md
@@ -8,16 +8,59 @@
 # Impact Factor Toolkits
 
 ## Installation
-```base
-pip install impact_factor
+```bash
+python3 -m pip -U install impact_factor
 ```
 
-## Usage
+## Use in CMD
+### `help`
 ```bash
 IF -h
-
+# or
 impact_factor -h
 ```
 
+### `build`
+> build/update the database
+```bash
+# optional, only required when you need build or update the database
+IF build
+```
+
+### `search`
+> search with `journal`, `journal_abbr`, `issn`, `eissn` or `nlm_id`
+```bash
+IF search nature         # search journal
+IF search 'nature c%'    # like search journal
+IF search 0028-0836      # search ISSN
+IF search 1476-4687      # search eISSN
+IF search 0410462        # search nlm_id
+IF search nature --color # colorful output
+```
+
+### `filter`
+> filter `factor` with `min_value` and `max_value`
+```bash
+IF filter -m 100 -M 200 --color
+
+# output with pubmed filter format
+IF filter -m 100 -M 200 --pubmed-filter
+```
+
+## Use in Python
+```python
+from impact_factor.core import Factor
+
+fa = Factor()
+
+print(fa.dbfile)
+
+fa.search('nature')
+fa.search('nature c%')
+
+fa.filter(min_value=100, max_value=200)
+fa.filter(min_value=100, max_value=200, pubmed_filter=True)
+```
+
 ## Documents
 https://impact-factor.readthedocs.io
diff --git a/build.sh b/build.sh
@@ -2,3 +2,4 @@ rm -rf dist build *egg-info
 
 python3 setup.py sdist bdist_wheel
 
+rm -rf build *egg-info
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -28,11 +28,11 @@
 # -- Project information -----------------------------------------------------
 
 project = 'impact_factor'
-copyright = '2020, suqingdong'
+copyright = '2022, suqingdong'
 author = 'suqingdong'
 
 # The full version, including alpha/beta/rc tags
-release = '1.0.0'
+release = '1.1.0'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/help.md b/help.md
diff --git a/impact_factor/__init__.py b/impact_factor/__init__.py
@@ -1,123 +1,12 @@
-"""
-    Impact Factor Toolkits
-"""
-import os
-import sys
 import json
-import datetime
-import textwrap
-from functools import partial
+from pathlib import Path
 
-from multiprocessing.dummy import Pool as ThreadPool
 
-import click
+BASE_DIR = Path(__file__).resolve().parent
 
-from impact_factor import util
-from impact_factor.util.factor import fetch_factor
-from impact_factor.util.journal import parse_journal
-from impact_factor.db.manager import Manager, Factor, FactorVersion
+DEFAULT_DB = BASE_DIR.joinpath('data', 'impact_factor.sqlite3')
+DEFAULT_EXCEL = BASE_DIR.joinpath('data', '2022_JCR_IF.xlsx')
 
-BASE_DIR = os.path.dirname(os.path.realpath(__file__))
-DEFAULT_DB = os.path.join(BASE_DIR, 'data', 'impact_factor.db')
+version_info = json.load(BASE_DIR.joinpath('version.json').open())
 
-version_info = json.load(open(os.path.join(BASE_DIR, 'version', 'version.json')))
 __version__ = version_info['version']
-__author__ = version_info['author']
-__author_email__ = version_info['author_email']
-
-
-class ImpactFactor(object):
-    def __init__(self, dbfile=DEFAULT_DB, echo=False, **kwargs):
-        self.dbfile = dbfile
-        self.manager = Manager(dbfile, echo=echo)
-
-    def check_version(self):
-        context = self.manager.query(FactorVersion)
-        res = self.manager.count(Factor.nlm_id)
-        context['total_count'] = res.scalar()
-        context['indexed_count'] = res.filter(Factor.indexed == True).scalar()
-
-        click.secho(textwrap.dedent('''
-            ==========================================================
-            program version:\t{__version__}
-            database version:\t{version} [{datetime}]
-            total journals:\t\t{total_count}
-            indexed journals:\t{indexed_count}
-            database filepath:\t{dbfile}
-            ==========================================================
-        ''').format(__version__=__version__, dbfile=self.dbfile, **context), fg='green', bold=True)
-
-    def search(self, value, field=None, like=True):
-        fields = [field] if field else ['issn', 'e_issn', 'journal', 'med_abbr', 'nlm_id']
-
-        for key in fields:
-            context = self.manager.query(Factor, key, value, like=like)
-            if context:
-                factor_history = json.loads(context['factor_history'])
-                context['factor_history'] = {int(k): float(v) for k, v in factor_history.items() if v}
-                return context
-
-    def pubmed_filter(self, min_value=None, max_value=None, indexed=None, outfile=None, **kwargs):
-        res = self.manager.session.query(Factor)
-        if indexed is not None:
-            res = res.filter(Factor.indexed == indexed)
-        if min_value is not None:
-            res = res.filter(Factor.factor >= min_value)
-        if max_value is not None:
-            res = res.filter(Factor.factor < max_value)
-
-        issn_list = '|'.join(each.issn or '"{}"[Journal]'.format(each.med_abbr) for each in res)
-
-        if len(issn_list) > 4000:
-            print('total {n} journals with IF: {min_value} - {max_value} (exceed 4000 characters)'.format(n=res.count(), **locals()))
-        else:
-            print('{n} journals with IF: {min_value} - {max_value}'.format(n=res.count(), **locals()))
-            if outfile:
-                with util.safe_open(outfile, 'w') as out:
-                    out.write(issn_list)
-                print('save file: {}'.format(outfile))
-            else:
-                print(issn_list)
-
-    def save_json(self, out,  context, data):
-        if data:
-            data =  dict(context, **data)
-            print('>>> save nlm_id: {nlm_id}'.format(**context))
-            out.write(json.dumps(data) + '\n')
-        else:
-            print('<<< no factor for nlm_id: {nlm_id}'.format(**context))
-
-    def build(self, entrez_file, medline_file, threads=4, tmpfile=None):
-
-        tmpfile = tmpfile or entrez_file.rsplit('.', 1)[0] + '.jl'
-
-        with util.safe_open(tmpfile, 'w') as out:
-            pool = ThreadPool(threads)
-            for context in parse_journal(entrez_file):
-                kws = (context.get('issn'), context.get('e_issn'))
-                if any(kws):
-                    pool.apply_async(fetch_factor,
-                                     args=kws,
-                                     callback=partial(self.save_json, out, context))
-            pool.close()
-            pool.join()
-
-        self.manager.create_table(drop=True)
-
-        indexed_ids = {each['nlm_id']: 1 for each in parse_journal(medline_file)}
-
-        with util.safe_open(tmpfile) as f:
-            for line in f:
-                data = json.loads(line.strip())
-                data['indexed'] = True if data['nlm_id'] in indexed_ids else False
-                self.manager.upsert(Factor, 'nlm_id', Factor(**data))
-
-        self.manager.upsert(FactorVersion,
-                            None,
-                            FactorVersion(version=2020, datetime=datetime.datetime.now()))
-        self.manager.close()
-
-
-if __name__ == '__main__':
-    IF = ImpactFactor()
-    IF.check_version()
diff --git a/impact_factor/bin/_build.py b/impact_factor/bin/_build.py
@@ -0,0 +1,42 @@
+import click
+
+from impact_factor import util, DEFAULT_EXCEL
+from impact_factor.core import NlmCatalog
+
+
+@click.command(
+    name='build',
+    help=click.style('build/update the database', italic=True, fg='green'),
+)
+@click.option('-i', '--excel', help='the excel file with IF', default=DEFAULT_EXCEL, show_default=True)
+@click.option('-u', '--update', help='update all records', is_flag=True)
+@click.pass_context
+def main(ctx, **kwargs):
+
+    with ctx.obj['manager'] as manager:
+
+        for context in util.parse_excel(kwargs['excel']):
+            issn = context['issn']
+            eissn = context['eissn']
+            journal = context['journal']
+
+            record = manager.query('journal', journal).first()
+
+            # update when record is not in database, or force update
+            if record is None or kwargs['update']:
+
+                res = None
+                if eissn:
+                    res = NlmCatalog.search(f'{eissn}[ISSN]')
+                if not res and issn:
+                    res = NlmCatalog.search(f'{issn}[ISSN]')
+                if not res:
+                    res = NlmCatalog.search(journal)
+
+                if res:
+                    context.update(res)
+                else:
+                    manager.logger.warning(f'no result for: {context}')
+
+                manager.insert(context, key='journal')
+
diff --git a/impact_factor/bin/_filter.py b/impact_factor/bin/_filter.py
@@ -0,0 +1,29 @@
+import json
+
+import click
+
+from impact_factor import util
+from impact_factor.core import Factor
+
+
+@click.command(
+    name='filter',
+    help=click.style('filter according to factor', italic=True, fg='cyan'),
+)
+@click.option('-m', '--min-value', help='the min factor', type=float)
+@click.option('-M', '--max-value', help='the max factor', type=float)
+@click.option('-C', '--color', help='colorful output', is_flag=True)
+@click.option('-P', '--pubmed-filter', help='output pubmed filter format', is_flag=True)
+@click.pass_context
+def main(ctx, **kwargs):
+
+    fa = Factor(ctx.obj['dbfile'])
+
+    res = fa.filter(**kwargs)
+
+    if kwargs['pubmed_filter']:
+        print(res)
+    else:
+        if kwargs['color']:
+            res = util.highlight_json(json.dumps(res, indent=2))
+        print(res)
diff --git a/impact_factor/bin/_query.py b/impact_factor/bin/_query.py
@@ -0,0 +1,26 @@
+import json
+
+import click
+
+from impact_factor import util
+from impact_factor.core import Factor
+
+
+@click.command(
+    name='search',
+    help=click.style('search record from database', italic=True, fg='magenta'),
+)
+@click.argument('value')
+@click.option('-f', '--field', help='specify a field to search')
+@click.option('-C', '--color', help='colorful output', is_flag=True)
+@click.pass_context
+def main(ctx, **kwargs):
+
+    fa = Factor(ctx.obj['dbfile'])
+
+    res = fa.search(kwargs['value'])
+
+    if kwargs['color']:
+        res = util.highlight_json(json.dumps(res, indent=2))
+
+    print(res)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,3 +2,4 @@ rm -rf dist build *egg-info

		python3 setup.py sdist bdist_wheel

		rm -rf build *egg-info