diff --git a/docs/pl_training.html b/docs/pl_training.html
index 95bd7f4..5b8198c 100644
--- a/docs/pl_training.html
+++ b/docs/pl_training.html
@@ -33,6 +33,13 @@
     
 <div class="cell border-box-sizing code_cell rendered">
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -71,7 +78,7 @@ <h2 id="Load-model-and-tokenizer">Load model and tokenizer<a class="anchor-link"
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="ner_model_from" class="doc_header"><code>ner_model_from</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L20" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ner_model_from</code>(<strong><code>name</code></strong>:<code>str</code>, <strong><code>dataset</code></strong>:<a href="/forgebox/hf_transformer_data.html#IOBES"><code>IOBES</code></a>)</p>
+<h4 id="ner_model_from" class="doc_header"><code>ner_model_from</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L31" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ner_model_from</code>(<strong><code>name</code></strong>:<code>str</code>, <strong><code>dataset</code></strong>:<a href="/forgebox/hf_transformer_data.html#IOBES"><code>IOBES</code></a>)</p>
 </blockquote>
 <p>name: from_pretrain(name)</p>
 
@@ -96,7 +103,7 @@ <h4 id="ner_model_from" class="doc_header"><code>ner_model_from</code><a href="h
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="ner_tokenizer_from" class="doc_header"><code>ner_tokenizer_from</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L33" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ner_tokenizer_from</code>(<strong><code>name</code></strong>:<code>str</code>)</p>
+<h4 id="ner_tokenizer_from" class="doc_header"><code>ner_tokenizer_from</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L44" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ner_tokenizer_from</code>(<strong><code>name</code></strong>:<code>str</code>)</p>
 </blockquote>
 
 </div>
@@ -133,7 +140,7 @@ <h2 id="Lightning-data-module">Lightning data module<a class="anchor-link" href=
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="NERDataModule" class="doc_header"><code>class</code> <code>NERDataModule</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L42" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NERDataModule</code>(<strong>*<code>args</code></strong>:<code>Any</code>, <strong>**<code>kwargs</code></strong>:<code>Any</code>) :: <code>LightningDataModule</code></p>
+<h2 id="NERDataModule" class="doc_header"><code>class</code> <code>NERDataModule</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L53" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NERDataModule</code>(<strong>*<code>args</code></strong>:<code>Any</code>, <strong>**<code>kwargs</code></strong>:<code>Any</code>) :: <code>LightningDataModule</code></p>
 </blockquote>
 <p>A DataModule standardizes the training, val, test splits, data preparation and transforms.
 The main advantage is consistent data splits, data preparation and transforms across models.</p>
@@ -202,7 +209,7 @@ <h2 id="NERDataModule" class="doc_header"><code>class</code> <code>NERDataModule
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="NERModule" class="doc_header"><code>class</code> <code>NERModule</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L58" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NERModule</code>(<strong><code>model</code></strong>) :: <code>LightningModule</code></p>
+<h2 id="NERModule" class="doc_header"><code>class</code> <code>NERModule</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L69" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NERModule</code>(<strong><code>model</code></strong>) :: <code>LightningModule</code></p>
 </blockquote>
 <p>PyTorch lightning module for training ner model</p>
 
@@ -240,7 +247,7 @@ <h2 id="Enhance-pipeline">Enhance pipeline<a class="anchor-link" href="#Enhance-
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="clean_ner_output" class="doc_header"><code>clean_ner_output</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L107" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>clean_ner_output</code>(<strong><code>outputs</code></strong>)</p>
+<h4 id="clean_ner_output" class="doc_header"><code>clean_ner_output</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L118" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>clean_ner_output</code>(<strong><code>outputs</code></strong>)</p>
 </blockquote>
 <p>Cleaning output for NER task</p>
 
@@ -258,47 +265,6 @@ <h4 id="clean_ner_output" class="doc_header"><code>clean_ner_output</code><a hre
     
 <div class="cell border-box-sizing code_cell rendered">
 
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="predict_ner_table" class="doc_header"><code>predict_ner_table</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L146" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>predict_ner_table</code>(<strong><code>pipeline_kw</code></strong>)</p>
-</blockquote>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="refined_ner_pipeline" class="doc_header"><code>refined_ner_pipeline</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L155" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>refined_ner_pipeline</code>(<strong><code>model</code></strong>, <strong><code>tokenizer</code></strong>, <strong>**<code>pipeline_kw</code></strong>)</p>
-</blockquote>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
 </div>
     {% endraw %}
 
@@ -313,8 +279,11 @@ <h4 id="refined_ner_pipeline" class="doc_header"><code>refined_ner_pipeline</cod
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="refined_ner_from_pretrained" class="doc_header"><code>refined_ner_from_pretrained</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L165" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>refined_ner_from_pretrained</code>(<strong><code>pretrained</code></strong>, <strong>**<code>pipeline_kw</code></strong>)</p>
+<h2 id="NERInference" class="doc_header"><code>class</code> <code>NERInference</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L158" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NERInference</code>(<strong><code>model</code></strong>, <strong><code>tokenizer</code></strong>, <strong><code>name</code></strong>=<em><code>None</code></em>)</p>
 </blockquote>
+<p>NER Inference pipeline
+ner = NERInference.from_pretrained('xxxx/xxxx')
+ner.predict(['text1','text2'])</p>
 
 </div>
 
diff --git a/forgebox/__init__.py b/forgebox/__init__.py
index 58ce5cd..ac15521 100644
--- a/forgebox/__init__.py
+++ b/forgebox/__init__.py
@@ -1 +1 @@
-__version__ = "0.4.11"
+__version__ = "0.4.17"
diff --git a/forgebox/_nbdev.py b/forgebox/_nbdev.py
index 8041416..7eb9377 100644
--- a/forgebox/_nbdev.py
+++ b/forgebox/_nbdev.py
@@ -114,9 +114,7 @@
          "NERDataModule": "72_pl_training.ipynb",
          "NERModule": "72_pl_training.ipynb",
          "clean_ner_output": "72_pl_training.ipynb",
-         "predict_ner_table": "72_pl_training.ipynb",
-         "refined_ner_pipeline": "72_pl_training.ipynb",
-         "refined_ner_from_pretrained": "72_pl_training.ipynb",
+         "NERInference": "72_pl_training.ipynb",
          "CudaDevice": "CUDA_GPU_Management.ipynb",
          "CudaHandler": "CUDA_GPU_Management.ipynb",
          "MLMVisualizer": "bert_visualize.ipynb",
diff --git a/forgebox/hf/train.py b/forgebox/hf/train.py
index d307835..e797c5b 100644
--- a/forgebox/hf/train.py
+++ b/forgebox/hf/train.py
@@ -1,18 +1,29 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/72_pl_training.ipynb (unless otherwise specified).
 
-__all__ = ['ner_model_from', 'ner_tokenizer_from', 'NERDataModule', 'NERModule', 'clean_ner_output',
-           'predict_ner_table', 'refined_ner_pipeline', 'refined_ner_from_pretrained']
+__all__ = ['ner_model_from', 'ner_tokenizer_from', 'NERDataModule', 'NERModule', 'clean_ner_output', 'NERInference']
 
 # Cell
 from .data import IOBES
 from ..imports import *
+from ..loop import chunkify
 import pytorch_lightning as pl
 from transformers import (
     AutoModelForTokenClassification,
     AutoTokenizer,
     pipeline
 )
-from typing import Callable
+from tqdm.notebook import tqdm
+from typing import Callable, List
+from torch import device
+
+# Cell
+try:
+    ishell = get_ipython()
+    IS_JUPYTER = True
+    from tqdm.notebook import tqdm
+except NameError:
+    IS_JUPYTER = False
+    from tqdm import tqdm
 
 # Cell
 
@@ -113,12 +124,12 @@ def clean_ner_output(self, outputs):
     last_idx = 0
     # make to sub group by position
     for output in outputs:
-        if output["index"]-1 == last_idx:
+        if output["start"] in [last_idx, last_idx-1]:
             current.append(output)
         else:
             results.append(current)
             current = [output, ]
-        last_idx = output["index"]
+        last_idx = output["end"]
     if len(current) > 0:
         results.append(current)
 
@@ -139,30 +150,177 @@ def clean_ner_output(self, outputs):
                 word=new_str,
                 start=min(starts),
                 end=max(ends),
-                entity=c[0]['entity']
+                entity=c[0]['entity_group']
             ))
     return strings
 
-def predict_ner_table(pipeline_kw):
-    def predict_ner_table_(self, text: str) -> pd.DataFrame:
-        return pd.DataFrame(
-            self.clean_output(
-                self(text, **pipeline_kw)
-                )
-            )
-    return predict_ner_table_
-
-def refined_ner_pipeline(model, tokenizer, **pipeline_kw):
-    if "aggregation_strategy" not in pipeline_kw:
-        pipeline_kw["aggregation_strategy"] = "first"
-
-    ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
-
-    ner_pipeline.__class__.clean_output = clean_ner_output
-    ner_pipeline.__class__.predict_table = predict_ner_table(pipeline_kw)
-    return ner_pipeline
-
-def refined_ner_from_pretrained(pretrained, **pipeline_kw):
-    model = AutoModelForTokenClassification.from_pretrained(pretrained)
-    tokenizer = AutoTokenizer.from_pretrained(pretrained)
-    return refined_ner_pipeline(model, tokenizer, **pipeline_kw)
\ No newline at end of file
+# Cell
+class NERInference:
+    """
+    NER Inference pipeline
+    ner = NERInference.from_pretrained('xxxx/xxxx')
+    ner.predict(['text1','text2'])
+    """
+
+    def __init__(self, model, tokenizer, name=None):
+        super().__init__()
+        self.model = model.eval()
+        self.tokenizer = tokenizer
+        self.name = name if name else "NER model"
+
+    def __repr__(self):
+        return f"[NERInference on {self.name}]"
+
+    def to(self, device_str):
+        self.model = self.model.to(device(device_str))
+        return self
+
+    @classmethod
+    def from_pretrained(cls, tag):
+        """
+        Load from pretrained model and tokenizer
+        """
+        model = AutoModelForTokenClassification.from_pretrained(tag)
+        tokenizer = AutoTokenizer.from_pretrained(tag)
+        return cls(model=model, tokenizer=tokenizer, name=model.config._name_or_path)
+
+    def __call__(self, data, batch_size=32, dev=device("cpu")):
+        if type(data) == str:
+            return self.batch_predict([data,])
+        else:
+            return self.predict(data, dev=dev, batch_size=batch_size)
+
+    def predict(
+        self,
+        texts: List[str],
+        dev=device("cpu"),
+        batch_size: int = 32,
+        progress_bar: bool = True
+    ) -> pd.DataFrame:
+        """
+        Predict a list of sentences/ paragraphs
+        """
+        # place the model into device
+        self.model = self.model.to(dev)
+        iterator = list(enumerate(chunkify(texts, bs=batch_size)))
+        if progress_bar:
+            iterator = tqdm(iterator, leave=False)
+
+        # run through iterator
+        all_dfs = []
+        for i, text_b in iterator:
+            # by batch prediction
+            batch_df = self.batch_predict(text_b)
+            if len(batch_df) > 0:
+                # calculate the row number
+                batch_df['text_id'] = batch_df.apply(
+                    lambda row: i*batch_size+row.batch_row_sn, axis=1)
+                all_dfs.append(batch_df)
+
+        # place the model back to cpu
+        self.model = self.model.to("cpu")
+        return pd.concat(all_dfs).reset_index(drop=True)
+
+    def tokenizing(self, texts):
+        inputs = self.tokenizer(
+            texts,
+            padding="max_length",
+            max_length=self.tokenizer.model_max_length,
+            return_attention_mask=True,
+            return_tensors='pt', truncation=True, return_offsets_mapping=True
+        ).to(self.model.device)
+        return inputs
+
+
+    def batch_predict(self, texts:List[str])-> pd.DataFrame:
+        """
+        Predict a single batch of sentences
+        """
+        id2label = self.model.config.id2label
+        inputs = self.tokenizing(texts)
+
+        with torch.no_grad():
+            outputs = self.model(input_ids=inputs.input_ids,
+                                 attention_mask=inputs.attention_mask)
+        inputs = inputs.to(device('cpu'))
+
+        pred_idx = outputs.logits.argmax(-1).to(device("cpu"))
+        batch_size = pred_idx.size(0)
+        offsets = inputs.offset_mapping
+        results = []
+        for bi in range(batch_size):
+            text = texts[bi]
+            input_ids = inputs.input_ids[bi]
+            word_ids = inputs.word_ids(bi)
+            pred_ids = pred_idx[bi]
+            # initial  values for the row
+            last_pos = 0
+            previous_has_positive = False
+            current_start = 0
+            current_index = 0
+            current_id = 0
+            line = []
+            for ti in range(1, len(input_ids)):
+                if input_ids[ti] == self.tokenizer.sep_token_id:
+                    break
+                # is the current token an appending sub-word?
+                if word_ids[ti] == last_pos:
+                    pass
+                # is current token negative
+                elif pred_ids[ti].item() == 0:
+                    # store the previous hanging prediction
+                    if previous_has_positive:
+                        start = current_start
+                        end = offsets[bi, ti, 0].item()
+                        line.append({
+                            "start": start, "end": end,
+                            "entity": id2label[current_id],
+                            "word": text[start:end],
+                            "index": current_index,
+                        })
+
+                    current_start = offsets[bi, ti, 0].item()
+                    previous_has_positive = False
+                    current_id = 0
+                    current_index = ti
+                # has positive prediction index, other than zero
+                else:
+                    if previous_has_positive:
+                        # different than the previous
+                        if current_id != pred_ids[ti].item():
+                            start = current_start
+                            end = offsets[bi, ti, 0].item()
+                            line.append({
+                                        "start": start,
+                                        "end": end,
+                                        "entity": id2label[current_id],
+                                        "word": text[start:end],
+                                        "index": current_index,
+                                        })
+                            current_start = offsets[bi, ti, 0].item()
+                    # this is the 1st postive predict for a while
+                    else:
+                        current_start = offsets[bi, ti, 0].item()
+                    previous_has_positive = True
+                    current_index = ti
+                    current_id = pred_ids[ti].item()
+
+                last_pos = word_ids[ti]
+            if previous_has_positive:
+                start = current_start
+                end = offsets[bi, ti, 1].item()
+                line.append({
+                            "start": start,
+                            "end": end,
+                            "entity": id2label[current_id],
+                            "word": text[start:end],
+                            "index": current_index,
+                            })
+
+            results.append(line)
+        all_dfs = []
+        for i, res in enumerate(results):
+            sub_df = pd.DataFrame(res)
+            sub_df["batch_row_sn"] = i
+            all_dfs.append(sub_df)
+        return pd.concat(all_dfs)
\ No newline at end of file
diff --git a/nbs/72_pl_training.ipynb b/nbs/72_pl_training.ipynb
index be82bc7..3ce4854 100644
--- a/nbs/72_pl_training.ipynb
+++ b/nbs/72_pl_training.ipynb
@@ -19,20 +19,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
     "# export\n",
     "from forgebox.hf.data import IOBES\n",
     "from forgebox.imports import *\n",
+    "from forgebox.loop import chunkify\n",
     "import pytorch_lightning as pl\n",
     "from transformers import (\n",
     "    AutoModelForTokenClassification,\n",
     "    AutoTokenizer,\n",
     "    pipeline\n",
     ")\n",
-    "from typing import Callable"
+    "from tqdm.notebook import tqdm\n",
+    "from typing import Callable, List\n",
+    "from torch import device"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 290,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export\n",
+    "try:\n",
+    "    ishell = get_ipython()\n",
+    "    IS_JUPYTER = True\n",
+    "    from tqdm.notebook import tqdm\n",
+    "except NameError:\n",
+    "    IS_JUPYTER = False\n",
+    "    from tqdm import tqdm"
    ]
   },
   {
@@ -55,7 +74,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -179,7 +198,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -193,12 +212,12 @@
     "    last_idx = 0\n",
     "    # make to sub group by position\n",
     "    for output in outputs:\n",
-    "        if output[\"index\"]-1 == last_idx:\n",
+    "        if output[\"start\"] in [last_idx, last_idx-1]:\n",
     "            current.append(output)\n",
     "        else:\n",
     "            results.append(current)\n",
     "            current = [output, ]\n",
-    "        last_idx = output[\"index\"]\n",
+    "        last_idx = output[\"end\"]\n",
     "    if len(current) > 0:\n",
     "        results.append(current)\n",
     "\n",
@@ -219,34 +238,195 @@
     "                word=new_str,\n",
     "                start=min(starts),\n",
     "                end=max(ends),\n",
-    "                entity=c[0]['entity']\n",
+    "                entity=c[0]['entity_group']\n",
     "            ))\n",
-    "    return strings\n",
+    "    return strings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 281,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export\n",
+    "class NERInference:\n",
+    "    \"\"\"\n",
+    "    NER Inference pipeline\n",
+    "    ner = NERInference.from_pretrained('xxxx/xxxx')\n",
+    "    ner.predict(['text1','text2'])\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(self, model, tokenizer, name=None):\n",
+    "        super().__init__()\n",
+    "        self.model = model.eval()\n",
+    "        self.tokenizer = tokenizer\n",
+    "        self.name = name if name else \"NER model\"\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        return f\"[NERInference on {self.name}]\"\n",
     "\n",
-    "def predict_ner_table(pipeline_kw):\n",
-    "    def predict_ner_table_(self, text: str) -> pd.DataFrame:\n",
-    "        return pd.DataFrame(\n",
-    "            self.clean_output(\n",
-    "                self(text, **pipeline_kw)\n",
-    "                )\n",
-    "            )\n",
-    "    return predict_ner_table_\n",
+    "    def to(self, device_str):\n",
+    "        self.model = self.model.to(device(device_str))\n",
+    "        return self\n",
     "\n",
-    "def refined_ner_pipeline(model, tokenizer, **pipeline_kw):\n",
-    "    if \"aggregation_strategy\" not in pipeline_kw:\n",
-    "        pipeline_kw[\"aggregation_strategy\"] = \"first\"\n",
+    "    @classmethod\n",
+    "    def from_pretrained(cls, tag):\n",
+    "        \"\"\"\n",
+    "        Load from pretrained model and tokenizer\n",
+    "        \"\"\"\n",
+    "        model = AutoModelForTokenClassification.from_pretrained(tag)\n",
+    "        tokenizer = AutoTokenizer.from_pretrained(tag)\n",
+    "        return cls(model=model, tokenizer=tokenizer, name=model.config._name_or_path)\n",
+    "    \n",
+    "    def __call__(self, data, batch_size=32, dev=device(\"cpu\")):\n",
+    "        if type(data) == str:\n",
+    "            return self.batch_predict([data,])\n",
+    "        else:\n",
+    "            return self.predict(data, dev=dev, batch_size=batch_size)\n",
     "\n",
-    "    ner_pipeline = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
+    "    def predict(\n",
+    "        self,\n",
+    "        texts: List[str],\n",
+    "        dev=device(\"cpu\"),\n",
+    "        batch_size: int = 32,\n",
+    "        progress_bar: bool = True\n",
+    "    ) -> pd.DataFrame:\n",
+    "        \"\"\"\n",
+    "        Predict a list of sentences/ paragraphs\n",
+    "        \"\"\"\n",
+    "        # place the model into device\n",
+    "        self.model = self.model.to(dev)\n",
+    "        iterator = list(enumerate(chunkify(texts, bs=batch_size)))\n",
+    "        if progress_bar:\n",
+    "            iterator = tqdm(iterator, leave=False)\n",
+    "\n",
+    "        # run through iterator\n",
+    "        all_dfs = []\n",
+    "        for i, text_b in iterator:\n",
+    "            # by batch prediction\n",
+    "            batch_df = self.batch_predict(text_b)\n",
+    "            if len(batch_df) > 0:\n",
+    "                # calculate the row number\n",
+    "                batch_df['text_id'] = batch_df.apply(\n",
+    "                    lambda row: i*batch_size+row.batch_row_sn, axis=1)\n",
+    "                all_dfs.append(batch_df)\n",
+    "\n",
+    "        # place the model back to cpu\n",
+    "        self.model = self.model.to(\"cpu\")\n",
+    "        return pd.concat(all_dfs).reset_index(drop=True)\n",
     "    \n",
-    "    ner_pipeline.__class__.clean_output = clean_ner_output\n",
-    "    ner_pipeline.__class__.predict_table = predict_ner_table(pipeline_kw)\n",
-    "    return ner_pipeline\n",
+    "    def tokenizing(self, texts):\n",
+    "        inputs = self.tokenizer(\n",
+    "            texts,\n",
+    "            padding=\"max_length\",\n",
+    "            max_length=self.tokenizer.model_max_length,\n",
+    "            return_attention_mask=True,\n",
+    "            return_tensors='pt', truncation=True, return_offsets_mapping=True\n",
+    "        ).to(self.model.device)\n",
+    "        return inputs\n",
+    "\n",
+    "\n",
+    "    def batch_predict(self, texts:List[str])-> pd.DataFrame:\n",
+    "        \"\"\"\n",
+    "        Predict a single batch of sentences\n",
+    "        \"\"\"\n",
+    "        id2label = self.model.config.id2label\n",
+    "        inputs = self.tokenizing(texts)\n",
+    "\n",
+    "        with torch.no_grad():\n",
+    "            outputs = self.model(input_ids=inputs.input_ids,\n",
+    "                                 attention_mask=inputs.attention_mask)\n",
+    "        inputs = inputs.to(device('cpu'))\n",
+    "\n",
+    "        pred_idx = outputs.logits.argmax(-1).to(device(\"cpu\"))\n",
+    "        batch_size = pred_idx.size(0)\n",
+    "        offsets = inputs.offset_mapping\n",
+    "        results = []\n",
+    "        for bi in range(batch_size):\n",
+    "            text = texts[bi]\n",
+    "            input_ids = inputs.input_ids[bi]\n",
+    "            word_ids = inputs.word_ids(bi)\n",
+    "            pred_ids = pred_idx[bi]\n",
+    "            # initial  values for the row\n",
+    "            last_pos = 0\n",
+    "            previous_has_positive = False\n",
+    "            current_start = 0\n",
+    "            current_index = 0\n",
+    "            current_id = 0\n",
+    "            line = []\n",
+    "            for ti in range(1, len(input_ids)):\n",
+    "                if input_ids[ti] == self.tokenizer.sep_token_id:\n",
+    "                    break\n",
+    "                # is the current token an appending sub-word?\n",
+    "                if word_ids[ti] == last_pos:\n",
+    "                    pass\n",
+    "                # is current token negative\n",
+    "                elif pred_ids[ti].item() == 0:\n",
+    "                    # store the previous hanging prediction\n",
+    "                    if previous_has_positive:\n",
+    "                        start = current_start\n",
+    "                        end = offsets[bi, ti, 0].item()\n",
+    "                        line.append({\n",
+    "                            \"start\": start, \"end\": end,\n",
+    "                            \"entity\": id2label[current_id],\n",
+    "                            \"word\": text[start:end],\n",
+    "                            \"index\": current_index,\n",
+    "                        })\n",
     "\n",
-    "def refined_ner_from_pretrained(pretrained, **pipeline_kw):\n",
-    "    model = AutoModelForTokenClassification.from_pretrained(pretrained)\n",
-    "    tokenizer = AutoTokenizer.from_pretrained(pretrained)\n",
-    "    return refined_ner_pipeline(model, tokenizer, **pipeline_kw)"
+    "                    current_start = offsets[bi, ti, 0].item()\n",
+    "                    previous_has_positive = False\n",
+    "                    current_id = 0\n",
+    "                    current_index = ti\n",
+    "                # has positive prediction index, other than zero\n",
+    "                else:\n",
+    "                    if previous_has_positive:\n",
+    "                        # different than the previous\n",
+    "                        if current_id != pred_ids[ti].item():\n",
+    "                            start = current_start\n",
+    "                            end = offsets[bi, ti, 0].item()\n",
+    "                            line.append({\n",
+    "                                        \"start\": start,\n",
+    "                                        \"end\": end,\n",
+    "                                        \"entity\": id2label[current_id],\n",
+    "                                        \"word\": text[start:end],\n",
+    "                                        \"index\": current_index,\n",
+    "                                        })\n",
+    "                            current_start = offsets[bi, ti, 0].item()\n",
+    "                    # this is the 1st postive predict for a while\n",
+    "                    else:\n",
+    "                        current_start = offsets[bi, ti, 0].item()\n",
+    "                    previous_has_positive = True\n",
+    "                    current_index = ti\n",
+    "                    current_id = pred_ids[ti].item()\n",
+    "\n",
+    "                last_pos = word_ids[ti]\n",
+    "            if previous_has_positive:\n",
+    "                start = current_start\n",
+    "                end = offsets[bi, ti, 1].item()\n",
+    "                line.append({\n",
+    "                            \"start\": start,\n",
+    "                            \"end\": end,\n",
+    "                            \"entity\": id2label[current_id],\n",
+    "                            \"word\": text[start:end],\n",
+    "                            \"index\": current_index,\n",
+    "                            })\n",
+    "\n",
+    "            results.append(line)\n",
+    "        all_dfs = []\n",
+    "        for i, res in enumerate(results):\n",
+    "            sub_df = pd.DataFrame(res)\n",
+    "            sub_df[\"batch_row_sn\"] = i\n",
+    "            all_dfs.append(sub_df)\n",
+    "        return pd.concat(all_dfs)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/settings.ini b/settings.ini
index 79bf464..b416459 100644
--- a/settings.ini
+++ b/settings.ini
@@ -7,7 +7,7 @@ author = xiaochen(ray) zhang
 author_email = b2ray2c@gmail.com
 copyright = xiaochen(ray) zhang
 branch = master
-version = 0.4.11
+version = 0.4.17
 min_python = 3.6
 audience = Developers
 language = English