diff --git a/docs/_data/sidebars/home_sidebar.yml b/docs/_data/sidebars/home_sidebar.yml
index 449d4f7..d50c1ff 100644
--- a/docs/_data/sidebars/home_sidebar.yml
+++ b/docs/_data/sidebars/home_sidebar.yml
@@ -78,6 +78,12 @@ entries:
     - output: web,pdf
       title: Lightning Callbacks
       url: thunder_callbacks.html
+    - output: web,pdf
+      title: Data parts for hf transformers
+      url: hf_transformer_data.html
+    - output: web,pdf
+      title: Pytorch Lighting training
+      url: pl_training.html
     - output: web,pdf
       title: CUDA GPU Management
       url: CUDA_GPU_Management.html
diff --git a/docs/hf_transformer_data.html b/docs/hf_transformer_data.html
new file mode 100644
index 0000000..1d333a1
--- /dev/null
+++ b/docs/hf_transformer_data.html
@@ -0,0 +1,462 @@
+---
+
+title: Data parts for hf transformers
+
+
+keywords: fastai
+sidebar: home_sidebar
+
+
+
+nb_path: "nbs/70_hf_transformer_data.ipynb"
+---
+<!--
+
+#################################################
+### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
+#################################################
+# file to edit: nbs/70_hf_transformer_data.ipynb
+# command to build the docs after a change: nbdev_build_docs
+
+-->
+
+<div class="container" id="notebook-container">
+        
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+</div>
+    {% endraw %}
+
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h2 id="Process-IOBES-files">Process IOBES files<a class="anchor-link" href="#Process-IOBES-files"> </a></h2>
+</div>
+</div>
+</div>
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+<div class="output_markdown rendered_html output_subarea ">
+<h4 id="convert_iob2_file_to_iobes" class="doc_header"><code>convert_iob2_file_to_iobes</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/data.py#L11" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>convert_iob2_file_to_iobes</code>(<strong><code>file_path</code></strong>, <strong><code>result_path</code></strong>)</p>
+</blockquote>
+<p>Convert IOB2 file to IOBES</p>
+
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+<div class="output_markdown rendered_html output_subarea ">
+<h4 id="conbine_iobes_file" class="doc_header"><code>conbine_iobes_file</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/data.py#L30" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>conbine_iobes_file</code>(<strong><code>file_paths</code></strong>:<code>List</code>[<code>Path</code>], <strong><code>new_file_path</code></strong>:<code>Path</code>)</p>
+</blockquote>
+<p>Conbine from multiple IOBES files
+    into IOBES files</p>
+
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+</div>
+    {% endraw %}
+
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h2 id="Dataset">Dataset<a class="anchor-link" href="#Dataset"> </a></h2>
+</div>
+</div>
+</div>
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+<div class="output_markdown rendered_html output_subarea ">
+<h2 id="IOBES" class="doc_header"><code>class</code> <code>IOBES</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/data.py#L45" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>IOBES</code>(<strong>*<code>args</code></strong>, <strong>**<code>kwds</code></strong>) :: <code>Dataset</code></p>
+</blockquote>
+<p>Load iobes file for NER training task</p>
+
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+<div class="output_markdown rendered_html output_subarea ">
+<h4 id="clean_output" class="doc_header"><code>clean_output</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/data.py#L220" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>clean_output</code>(<strong><code>outputs</code></strong>)</p>
+</blockquote>
+<p>Cleaning output for NER task</p>
+
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+<div class="input">
+
+<div class="inner_cell">
+    <div class="input_area">
+<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">transformers</span> <span class="kn">import</span> <span class="n">AutoTokenizer</span>
+</pre></div>
+
+    </div>
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+<div class="input">
+
+<div class="inner_cell">
+    <div class="input_area">
+<div class=" highlight hl-ipython3"><pre><span></span><span class="n">tokenizer</span> <span class="o">=</span> <span class="n">AutoTokenizer</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;raynardj/roberta-pubmed&quot;</span><span class="p">,</span> <span class="n">add_prefix_space</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</pre></div>
+
+    </div>
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+<div class="input">
+
+<div class="inner_cell">
+    <div class="input_area">
+<div class=" highlight hl-ipython3"><pre><span></span><span class="n">dataset</span> <span class="o">=</span> <span class="n">IOBES</span><span class="p">(</span><span class="s2">&quot;/Users/xiaochen.zhang/data/valid.iobes&quot;</span><span class="p">,</span> <span class="n">tokenizer</span><span class="p">)</span>
+</pre></div>
+
+    </div>
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+<div class="input">
+
+<div class="inner_cell">
+    <div class="input_area">
+<div class=" highlight hl-ipython3"><pre><span></span><span class="k">for</span> <span class="n">w</span><span class="p">,</span><span class="n">l</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="o">*</span><span class="n">dataset</span><span class="p">[</span><span class="mi">2</span><span class="p">]):</span>
+    <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">w</span><span class="si">}</span><span class="s2">-</span><span class="si">{</span><span class="n">l</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+</pre></div>
+
+    </div>
+</div>
+</div>
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+<div class="output_subarea output_stream output_stdout output_text">
+<pre>in-O
+blood-O
+;-O
+content-O
+of-O
+cAMP-O
+was-O
+also-O
+decreased-O
+in-O
+lymphocytes-O
+by-O
+33-O
+%-O
+.-O
+At-O
+the-O
+same-O
+time-O
+,-O
+total-O
+content-O
+of-O
+T-cell_type
+lymphocytes-cell_type
+was-O
+decreased-O
+1.5-fold-O
+in-O
+peripheric-O
+blood-O
+.-O
+Treatment-O
+with-O
+I-hydroxyvitamin-O
+D3-O
+(-O
+1-1.5-O
+mg-O
+daily-O
+,-O
+within-O
+4-O
+weeks-O
+)-O
+led-O
+to-O
+normalization-O
+of-O
+total-O
+and-O
+ionized-O
+form-O
+of-O
+Ca2+-O
+and-O
+of-O
+25-O
+(-O
+OH-O
+)-O
+D-O
+,-O
+but-O
+did-O
+not-O
+affect-O
+the-O
+PTH-O
+content-O
+in-O
+blood-O
+.-O
+Concentration-O
+of-O
+the-O
+receptors-protein
+to-O
+1.25-O
+(-O
+OH-O
+)-O
+2D3-O
+was-O
+elevated-O
+up-O
+to-O
+39.7-O
+fmole/mg-O
+after-O
+I-O
+week-O
+of-O
+the-O
+treatment-O
+,-O
+whereas-O
+it-O
+was-O
+decreased-O
+to-O
+the-O
+initial-O
+level-O
+24.8-O
+fmole/mg-O
+within-O
+4-O
+weeks-O
+;-O
+simultaneous-O
+alteration-O
+in-O
+</pre>
+</div>
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+<div class="input">
+
+<div class="inner_cell">
+    <div class="input_area">
+<div class=" highlight hl-ipython3"><pre><span></span><span class="n">dataset</span><span class="o">.</span><span class="n">one_batch</span><span class="p">()</span>
+</pre></div>
+
+    </div>
+</div>
+</div>
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+
+<div class="output_text output_subarea output_execute_result">
+<pre>{&#39;input_ids&#39;: tensor([[   19,  3741,  2603,  ...,  1417,  2617, 11576],
+        [ 4590,  2156,   255,  ...,   405,  1182,  6608],
+        [ 6214, 25683,  3809,  ...,    11,     5,  8151],
+        ...,
+        [13998, 25326,  2413,  ...,     5,  2199,    21],
+        [11299,   705, 24811,  ...,   134,  1589,  2032],
+        [ 5804,   924,    14,  ...,   366,  1168,     9]]), &#39;attention_mask&#39;: tensor([[1, 1, 1,  ..., 1, 1, 1],
+        [1, 1, 1,  ..., 1, 1, 1],
+        [1, 1, 1,  ..., 1, 1, 1],
+        ...,
+        [1, 1, 1,  ..., 1, 1, 1],
+        [1, 1, 1,  ..., 1, 1, 1],
+        [1, 1, 1,  ..., 1, 1, 1]]), &#39;offset_mapping&#39;: tensor([[[ 1,  4],
+         [ 1,  2],
+         [ 2,  5],
+         ...,
+         [ 3,  5],
+         [ 5,  8],
+         [ 1,  6]],
+
+        [[ 1,  5],
+         [ 1,  1],
+         [ 1,  1],
+         ...,
+         [ 5,  7],
+         [ 7,  9],
+         [ 9, 14]],
+
+        [[ 1,  5],
+         [ 5,  8],
+         [ 8, 10],
+         ...,
+         [ 1,  2],
+         [ 1,  3],
+         [ 1, 10]],
+
+        ...,
+
+        [[ 1,  5],
+         [ 5,  8],
+         [ 8, 10],
+         ...,
+         [ 1,  3],
+         [ 1,  7],
+         [ 1,  3]],
+
+        [[ 1,  5],
+         [ 5,  6],
+         [ 6, 10],
+         ...,
+         [ 2,  3],
+         [ 1,  1],
+         [ 1,  2]],
+
+        [[ 1,  7],
+         [ 1,  5],
+         [ 1,  4],
+         ...,
+         [ 3,  5],
+         [ 5,  7],
+         [ 1,  2]]]), &#39;labels&#39;: tensor([[0, 1, 1,  ..., 0, 0, 0],
+        [2, 0, 2,  ..., 0, 0, 0],
+        [0, 0, 0,  ..., 0, 0, 0],
+        ...,
+        [1, 1, 1,  ..., 0, 0, 0],
+        [0, 0, 0,  ..., 2, 0, 2],
+        [0, 0, 0,  ..., 0, 0, 0]])}</pre>
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+</div>
+ 
+
diff --git a/docs/pl_training.html b/docs/pl_training.html
new file mode 100644
index 0000000..864d669
--- /dev/null
+++ b/docs/pl_training.html
@@ -0,0 +1,228 @@
+---
+
+title: Pytorch Lighting training
+
+
+keywords: fastai
+sidebar: home_sidebar
+
+summary: "on huggingface transformers"
+description: "on huggingface transformers"
+nb_path: "nbs/72_pl_training.ipynb"
+---
+<!--
+
+#################################################
+### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
+#################################################
+# file to edit: nbs/72_pl_training.ipynb
+# command to build the docs after a change: nbdev_build_docs
+
+-->
+
+<div class="container" id="notebook-container">
+        
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+<div class="input">
+
+<div class="inner_cell">
+    <div class="input_area">
+<div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># !pip install pytorch-lightning==1.3.8</span>
+<span class="c1"># !pip install tensorflow==2.2.0</span>
+</pre></div>
+
+    </div>
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h2 id="Load-model-and-tokenizer">Load model and tokenizer<a class="anchor-link" href="#Load-model-and-tokenizer"> </a></h2>
+</div>
+</div>
+</div>
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+<div class="output_markdown rendered_html output_subarea ">
+<h4 id="ner_model_from" class="doc_header"><code>ner_model_from</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L14" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ner_model_from</code>(<strong><code>name</code></strong>:<code>str</code>, <strong><code>dataset</code></strong>:<a href="/forgebox/hf_transformer_data.html#IOBES"><code>IOBES</code></a>)</p>
+</blockquote>
+<p>name: from_pretrain(name)</p>
+
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+<div class="output_markdown rendered_html output_subarea ">
+<h4 id="ner_tokenizer_from" class="doc_header"><code>ner_tokenizer_from</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L27" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ner_tokenizer_from</code>(<strong><code>name</code></strong>:<code>str</code>)</p>
+</blockquote>
+
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+</div>
+    {% endraw %}
+
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h2 id="Lightning-data-module">Lightning data module<a class="anchor-link" href="#Lightning-data-module"> </a></h2>
+</div>
+</div>
+</div>
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+<div class="output_markdown rendered_html output_subarea ">
+<h2 id="NERDataModule" class="doc_header"><code>class</code> <code>NERDataModule</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L36" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NERDataModule</code>(<strong>*<code>args</code></strong>:<code>Any</code>, <strong>**<code>kwargs</code></strong>:<code>Any</code>) :: <code>LightningDataModule</code></p>
+</blockquote>
+<p>A DataModule standardizes the training, val, test splits, data preparation and transforms.
+The main advantage is consistent data splits, data preparation and transforms across models.</p>
+<p>Example::</p>
+
+<pre><code>class MyDataModule(LightningDataModule):
+    def __init__(self):
+        super().__init__()
+    def prepare_data(self):
+        # download, split, etc...
+        # only called on 1 GPU/TPU in distributed
+    def setup(self):
+        # make assignments here (val/train/test split)
+        # called on every process in DDP
+    def train_dataloader(self):
+        train_split = Dataset(...)
+        return DataLoader(train_split)
+    def val_dataloader(self):
+        val_split = Dataset(...)
+        return DataLoader(val_split)
+    def test_dataloader(self):
+        test_split = Dataset(...)
+        return DataLoader(test_split)
+    def teardown(self):
+        # clean up after fit or test
+        # called on every process in DDP
+
+</code></pre>
+<p>A DataModule implements 6 key methods:</p>
+<ul>
+<li><strong>prepare_data</strong> (things to do on 1 GPU/TPU not on every GPU/TPU in distributed mode).</li>
+<li><strong>setup</strong>  (things to do on every accelerator in distributed mode).</li>
+<li><strong>train_dataloader</strong> the training dataloader.</li>
+<li><strong>val_dataloader</strong> the val dataloader(s).</li>
+<li><strong>test_dataloader</strong> the test dataloader(s).</li>
+<li><strong>teardown</strong> (things to do on every accelerator in distributed mode when finished)</li>
+</ul>
+<p>This allows you to share a full dataset without explaining how to download,
+split transform and process the data</p>
+
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+<div class="output_markdown rendered_html output_subarea ">
+<h2 id="NERModule" class="doc_header"><code>class</code> <code>NERModule</code><a href="https://github.com/raynardj/forgebox/tree/master/forgebox/hf/train.py#L52" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NERModule</code>(<strong><code>model</code></strong>) :: <code>LightningModule</code></p>
+</blockquote>
+<p>PyTorch lightning module for training ner model</p>
+
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
+</div>
+    {% endraw %}
+
+</div>
+ 
+
diff --git a/docs/sidebar.json b/docs/sidebar.json
index 9e2b594..cff08b1 100644
--- a/docs/sidebar.json
+++ b/docs/sidebar.json
@@ -24,6 +24,8 @@
     "Categorical Transformation for DL": "category.html",
     "Cosine ": "cosine_search.html",
     "Lightning Callbacks": "thunder_callbacks.html",
+    "Data parts for hf transformers": "hf_transformer_data.html",
+    "Pytorch Lighting training": "pl_training.html",
     "CUDA GPU Management": "CUDA_GPU_Management.html",
     "Bert Visualize": "bert_visualize.html",
     "NLP data": "bilstm-based-search-on-netflix-data.html",
diff --git a/forgebox/__init__.py b/forgebox/__init__.py
index a34b2f6..574c066 100644
--- a/forgebox/__init__.py
+++ b/forgebox/__init__.py
@@ -1 +1 @@
-__version__ = "0.4.7"
+__version__ = "0.4.9"
diff --git a/forgebox/_nbdev.py b/forgebox/_nbdev.py
index e8c03ee..1566d36 100644
--- a/forgebox/_nbdev.py
+++ b/forgebox/_nbdev.py
@@ -106,6 +106,14 @@
          "UnfreezeScheduler": "61_thunder_callbacks.ipynb",
          "nn.Module.unfreeze": "61_thunder_callbacks.ipynb",
          "nn.Module.freeze": "61_thunder_callbacks.ipynb",
+         "convert_iob2_file_to_iobes": "70_hf_transformer_data.ipynb",
+         "conbine_iobes_file": "70_hf_transformer_data.ipynb",
+         "IOBES": "70_hf_transformer_data.ipynb",
+         "clean_output": "70_hf_transformer_data.ipynb",
+         "ner_model_from": "72_pl_training.ipynb",
+         "ner_tokenizer_from": "72_pl_training.ipynb",
+         "NERDataModule": "72_pl_training.ipynb",
+         "NERModule": "72_pl_training.ipynb",
          "CudaDevice": "CUDA_GPU_Management.ipynb",
          "CudaHandler": "CUDA_GPU_Management.ipynb",
          "MLMVisualizer": "bert_visualize.ipynb",
@@ -164,6 +172,8 @@
            "category.py",
            "cosine.py",
            "thunder/callbacks.py",
+           "hf/data.py",
+           "hf/train.py",
            "ftorch/cuda.py",
            "bert_visualize.py",
            "data/nlp.py",
diff --git a/forgebox/hf/__init__.py b/forgebox/hf/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/forgebox/hf/data.py b/forgebox/hf/data.py
new file mode 100644
index 0000000..3287221
--- /dev/null
+++ b/forgebox/hf/data.py
@@ -0,0 +1,257 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/70_hf_transformer_data.ipynb (unless otherwise specified).
+
+__all__ = ['convert_iob2_file_to_iobes', 'conbine_iobes_file', 'IOBES', 'clean_output']
+
+# Cell
+from ..imports import *
+from ..category import Category
+from typing import List, Dict, Callable, Any, Tuple
+
+# Cell
+def convert_iob2_file_to_iobes(file_path, result_path):
+    """
+    Convert IOB2 file to IOBES
+    """
+    with open(file_path, 'r') as f:
+        lines = f.readlines()
+    with open(result_path, 'w') as f:
+        for line in lines:
+            line = line.strip()
+            if line == '':
+                f.write('\n')
+                continue
+            line = line.split()
+            if line[-1] == 'O':
+                f.write(' '.join(line) + '\n')
+            else:
+                f.write(' '.join(line[:-1]) + ' ' + line[-1] + '\n')
+
+
+def conbine_iobes_file(
+    file_paths: List[Path],
+    new_file_path: Path
+):
+    """
+    Conbine from multiple IOBES files
+        into IOBES files
+    """
+    with open(new_file_path, 'w') as new_file:
+        for file_path in file_paths:
+            with open(file_path, 'r') as file:
+                for line in file:
+                    new_file.write(line)
+
+# Cell
+class IOBES(Dataset):
+    """
+    Load iobes file for NER training task
+    """
+
+    def __init__(
+        self,
+        file_path,
+        tokenizer,
+        max_len=128,
+        save_buffer: int = 15,
+        category: Category = None,
+        return_string: bool = False,
+        use_frag: bool = False,
+    ):
+        """
+        file_path,
+        tokenizer,
+        max_len=128,
+        save_buffer: int = 15,
+        category: Category = None,
+            label categories, if set to None, will be figured out
+            automatically.
+            You can set this to None for train dataset, but for valid
+            dataset:
+            valid_ds = IOBES(...,category=train_ds.cates)
+        return_string: bool = False, do we return original string
+            for tokenizer output, this option is good for debuging
+            but the data won't pass into cuda if choose so
+        use_frag: bool = False, do we use prepend like 'I-','B-'
+        """
+        self.file_path = file_path
+        self.max_len = max_len
+        self.pairs = []
+        self.list_of_words = []
+        self.list_of_labels = []
+        self.tokenizer = tokenizer
+        self.cates = category
+        self.return_string = return_string
+        self.use_frag = use_frag
+        self.load_data(save_buffer)
+
+    def load_data(self, save_buffer: int = 15):
+        """
+        Load file in to object structure
+        """
+        with open(self.file_path, 'r') as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    splited = line.split()
+                    if len(splited) != 2:
+                        continue
+                    word, label = splited
+                    # do we use 'I-', 'B-' etc
+                    if self.use_frag is False:
+                        if "-" in label:
+                            label = label.split('-')[1]
+                    self.pairs.append([word, label])
+
+        self.pairs = np.array(self.pairs)
+
+        if self.cates is None:
+            labels_df = pd.DataFrame({"label": self.pairs[:, 1]})
+            self.cates = Category(list(labels_df.vc("label").index))
+
+        self.batching_words(save_buffer)
+
+    def batching_words(self, save_buffer: int = 15):
+        """
+        batching self.words into self.list_of_words
+        by self.max_len -15
+        """
+        for i in range(0, len(self.pairs), self.max_len-save_buffer):
+            chunk_slice = slice(i, i+self.max_len-save_buffer)
+            self.list_of_words.append(self.pairs[chunk_slice, 0])
+            self.list_of_labels.append(self.pairs[chunk_slice, 1])
+
+    def __len__(self) -> int:
+        return len(self.list_of_words)
+
+    def __getitem__(self, idx: int) -> Tuple[List[str]]:
+        return list(self.list_of_words[idx]), list(self.list_of_labels[idx])
+
+    def __repr__(self):
+        return f"""NER dataset using IOBES annotation
+        {len(self)} sentences,
+        Labels:
+        {list(self.cates.i2c)}
+        """
+
+    def collate_fn(self, data):
+        """
+        data: list of tuple
+        """
+        words, text_labels = zip(*data)
+
+        inputs = self.tokenizer(
+            list(words),
+            return_tensors='pt',
+            padding=True,
+            truncation=True,
+            max_length=self.max_len,
+            is_split_into_words=True,
+            return_offsets_mapping=True,
+            add_special_tokens=False,
+        )
+        return self.align_offsets(inputs, text_labels, words)
+
+    def align_offsets(
+        self,
+        inputs,
+        text_labels: List[List[str]],
+        words: List[List[str]]
+    ):
+        """
+        inputs: output if tokenizer
+        text_labels: labels in form of list of list of strings
+        words: words in form of list of list of strings
+        """
+        labels = torch.zeros_like(inputs.input_ids).long()
+        labels -= 100
+        text_lables_array = np.empty(labels.shape, dtype=object)
+        words_array = np.empty(labels.shape, dtype=object)
+        max_len = inputs.input_ids.shape[1]
+
+        for row_id, input_ids in enumerate(inputs.input_ids):
+            word_pos = inputs.word_ids(row_id)
+            for idx, pos in enumerate(word_pos):
+                if pos is None:
+                    continue
+                if pos <= max_len:
+                    labels[row_id, idx] = self.cates.c2i[text_labels[row_id][pos]]
+                    if self.return_string:
+                        text_lables_array[row_id,
+                                          idx] = text_labels[row_id][pos]
+                        words_array[row_id, idx] = words[row_id][pos]
+
+        inputs['labels'] = labels
+        if self.return_string:
+            inputs['text_labels'] = text_lables_array.tolist()
+            inputs['word'] = words_array.tolist()
+        return inputs
+
+    def dataloader(self, batch_size: int = 32, shuffle: bool = True):
+        """
+        Create dataloader
+        """
+        return DataLoader(
+            self,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            collate_fn=self.collate_fn,
+        )
+
+    def one_batch(self, batch_size: int = 32, shuffle: bool = True):
+        return next(iter(self.dataloader(batch_size, shuffle)))
+
+    def visualize_batch(self, batch, row_idx=0):
+        return list(zip(self.tokenizer.convert_ids_to_tokens(batch.input_ids[row_idx]),
+                        batch.labels[row_idx].numpy(),
+                        batch.text_labels[row_idx],
+                        batch.word[row_idx],
+                        batch.offset_mapping[row_idx].numpy(),
+                        ))
+
+    def set_hfconfig(self, config):
+        """
+        set the category information to huggingface config
+        """
+        config.num_labels = len(self.cates)
+        config.id2label = {i: label for i, label in enumerate(self.cates.i2c)}
+        config.label2id = {label: i for i, label in enumerate(self.cates.i2c)}
+
+
+def clean_output(outputs):
+    """
+    Cleaning output for NER task
+    """
+    results = []
+    current = []
+    last_idx = 0
+    # make to sub group by position
+    for output in outputs:
+        if output["index"]-1 == last_idx:
+            current.append(output)
+        else:
+            results.append(current)
+            current = [output, ]
+        last_idx = output["index"]
+    if len(current) > 0:
+        results.append(current)
+
+    # from tokens to string
+    strings = []
+    for c in results:
+        tokens = []
+        starts = []
+        ends = []
+        for o in c:
+            tokens.append(o['word'])
+            starts.append(o['start'])
+            ends.append(o['end'])
+
+        new_str = tokenizer.convert_tokens_to_string(tokens)
+        if new_str != '':
+            strings.append(dict(
+                word=new_str,
+                start=min(starts),
+                end=max(ends),
+                entity=c[0]['entity']
+            ))
+    return strings
\ No newline at end of file
diff --git a/forgebox/hf/train.py b/forgebox/hf/train.py
new file mode 100644
index 0000000..0cae4f6
--- /dev/null
+++ b/forgebox/hf/train.py
@@ -0,0 +1,98 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/72_pl_training.ipynb (unless otherwise specified).
+
+__all__ = ['ner_model_from', 'ner_tokenizer_from', 'NERDataModule', 'NERModule']
+
+# Cell
+from .data import IOBES
+from ..imports import *
+import pytorch_lightning as pl
+from transformers import AutoModelForTokenClassification, AutoTokenizer
+
+# Cell
+
+# ner model and tokenizer
+def ner_model_from(
+    name:str, dataset: IOBES
+):
+    """
+    name: from_pretrain(name)
+    """
+    model = AutoModelForTokenClassification.from_pretrained(
+        name,
+        num_labels=len(dataset.cates),
+    )
+    dataset.set_hfconfig(model.config)
+    return model
+
+def ner_tokenizer_from(
+    name: str
+):
+    return AutoTokenizer.from_pretrained(
+        name, add_prefix_space=True)
+
+# Cell
+
+# ner data module
+class NERDataModule(pl.LightningDataModule):
+    def __init__(self, train_ds, val_ds, batch_size=32):
+        super().__init__()
+        self.train_ds = train_ds
+        self.val_ds = val_ds
+        self.batch_size = batch_size
+
+    def train_dataloader(self):
+        return self.train_ds.dataloader(batch_size=self.batch_size, shuffle=True)
+
+    def val_dataloader(self):
+        return self.val_ds.dataloader(batch_size=self.batch_size*2, shuffle=False)
+
+# Cell
+
+# ner module
+class NERModule(pl.LightningModule):
+    """
+    PyTorch lightning module for training ner model
+    """
+    def __init__(
+        self, model,
+        ):
+        """
+        model: huggingface transformer model for ner
+        """
+        super().__init__()
+        self.model = model
+
+    def forward(self, batch):
+        return self.model(
+            input_ids=batch['input_ids'],
+            attention_mask=batch['attention_mask'],
+            labels=batch['labels'])
+
+    def training_step(self, batch, batch_idx):
+        outputs = self(batch)
+        loss = outputs.loss
+        self.log("loss", loss)
+        self.log("acc", self.calcualte_acc(outputs, batch.labels))
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        outputs = self(batch)
+        loss = outputs.loss
+        self.log("val_loss", loss)
+        self.log("val_acc", self.calcualte_acc(outputs, batch.labels))
+        return loss
+
+    def calcualte_acc(self, outputs, labels):
+        pred_idx = outputs.logits.argmax(-1)
+        mask = torch.ones_like(pred_idx)
+        mask[labels==-100]=False
+        return (pred_idx[mask]==labels[mask]).float().mean()
+
+    def configure_optimizers(self):
+        # discriminative learning rate
+        param_groups = [
+            {'params': self.model.roberta.parameters(), 'lr': 5e-6},
+            {'params': self.model.classifier.parameters(), 'lr': 1e-3},
+        ]
+        optimizer = torch.optim.Adam(param_groups, lr=1e-3)
+        return optimizer
\ No newline at end of file
diff --git a/nbs/70_hf_transformer_data.ipynb b/nbs/70_hf_transformer_data.ipynb
new file mode 100644
index 0000000..fac2f7d
--- /dev/null
+++ b/nbs/70_hf_transformer_data.ipynb
@@ -0,0 +1,593 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data parts for hf transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# default_exp hf.data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export\n",
+    "from forgebox.imports import *\n",
+    "from forgebox.category import Category\n",
+    "from typing import List, Dict, Callable, Any, Tuple"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Process IOBES files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export\n",
+    "def convert_iob2_file_to_iobes(file_path, result_path):\n",
+    "    \"\"\"\n",
+    "    Convert IOB2 file to IOBES\n",
+    "    \"\"\"\n",
+    "    with open(file_path, 'r') as f:\n",
+    "        lines = f.readlines()\n",
+    "    with open(result_path, 'w') as f:\n",
+    "        for line in lines:\n",
+    "            line = line.strip()\n",
+    "            if line == '':\n",
+    "                f.write('\\n')\n",
+    "                continue\n",
+    "            line = line.split()\n",
+    "            if line[-1] == 'O':\n",
+    "                f.write(' '.join(line) + '\\n')\n",
+    "            else:\n",
+    "                f.write(' '.join(line[:-1]) + ' ' + line[-1] + '\\n')\n",
+    "\n",
+    "\n",
+    "def conbine_iobes_file(\n",
+    "    file_paths: List[Path],\n",
+    "    new_file_path: Path\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    Conbine from multiple IOBES files\n",
+    "        into IOBES files\n",
+    "    \"\"\"\n",
+    "    with open(new_file_path, 'w') as new_file:\n",
+    "        for file_path in file_paths:\n",
+    "            with open(file_path, 'r') as file:\n",
+    "                for line in file:\n",
+    "                    new_file.write(line)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export\n",
+    "class IOBES(Dataset):\n",
+    "    \"\"\"\n",
+    "    Load iobes file for NER training task\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        file_path,\n",
+    "        tokenizer,\n",
+    "        max_len=128,\n",
+    "        save_buffer: int = 15,\n",
+    "        category: Category = None,\n",
+    "        return_string: bool = False,\n",
+    "        use_frag: bool = False,\n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        file_path,\n",
+    "        tokenizer,\n",
+    "        max_len=128,\n",
+    "        save_buffer: int = 15,\n",
+    "        category: Category = None,\n",
+    "            label categories, if set to None, will be figured out\n",
+    "            automatically.\n",
+    "            You can set this to None for train dataset, but for valid\n",
+    "            dataset:\n",
+    "            valid_ds = IOBES(...,category=train_ds.cates)\n",
+    "        return_string: bool = False, do we return original string\n",
+    "            for tokenizer output, this option is good for debuging\n",
+    "            but the data won't pass into cuda if choose so\n",
+    "        use_frag: bool = False, do we use prepend like 'I-','B-'\n",
+    "        \"\"\"\n",
+    "        self.file_path = file_path\n",
+    "        self.max_len = max_len\n",
+    "        self.pairs = []\n",
+    "        self.list_of_words = []\n",
+    "        self.list_of_labels = []\n",
+    "        self.tokenizer = tokenizer\n",
+    "        self.cates = category\n",
+    "        self.return_string = return_string\n",
+    "        self.use_frag = use_frag\n",
+    "        self.load_data(save_buffer)\n",
+    "\n",
+    "    def load_data(self, save_buffer: int = 15):\n",
+    "        \"\"\"\n",
+    "        Load file in to object structure\n",
+    "        \"\"\"\n",
+    "        with open(self.file_path, 'r') as f:\n",
+    "            for line in f:\n",
+    "                line = line.strip()\n",
+    "                if line:\n",
+    "                    splited = line.split()\n",
+    "                    if len(splited) != 2:\n",
+    "                        continue\n",
+    "                    word, label = splited\n",
+    "                    # do we use 'I-', 'B-' etc\n",
+    "                    if self.use_frag is False:\n",
+    "                        if \"-\" in label:\n",
+    "                            label = label.split('-')[1]\n",
+    "                    self.pairs.append([word, label])\n",
+    "\n",
+    "        self.pairs = np.array(self.pairs)\n",
+    "\n",
+    "        if self.cates is None:\n",
+    "            labels_df = pd.DataFrame({\"label\": self.pairs[:, 1]})\n",
+    "            self.cates = Category(list(labels_df.vc(\"label\").index))\n",
+    "\n",
+    "        self.batching_words(save_buffer)\n",
+    "\n",
+    "    def batching_words(self, save_buffer: int = 15):\n",
+    "        \"\"\"\n",
+    "        batching self.words into self.list_of_words\n",
+    "        by self.max_len -15\n",
+    "        \"\"\"\n",
+    "        for i in range(0, len(self.pairs), self.max_len-save_buffer):\n",
+    "            chunk_slice = slice(i, i+self.max_len-save_buffer)\n",
+    "            self.list_of_words.append(self.pairs[chunk_slice, 0])\n",
+    "            self.list_of_labels.append(self.pairs[chunk_slice, 1])\n",
+    "\n",
+    "    def __len__(self) -> int:\n",
+    "        return len(self.list_of_words)\n",
+    "\n",
+    "    def __getitem__(self, idx: int) -> Tuple[List[str]]:\n",
+    "        return list(self.list_of_words[idx]), list(self.list_of_labels[idx])\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        return f\"\"\"NER dataset using IOBES annotation\n",
+    "        {len(self)} sentences,\n",
+    "        Labels:\n",
+    "        {list(self.cates.i2c)}\n",
+    "        \"\"\"\n",
+    "\n",
+    "    def collate_fn(self, data):\n",
+    "        \"\"\"\n",
+    "        data: list of tuple\n",
+    "        \"\"\"\n",
+    "        words, text_labels = zip(*data)\n",
+    "\n",
+    "        inputs = self.tokenizer(\n",
+    "            list(words),\n",
+    "            return_tensors='pt',\n",
+    "            padding=True,\n",
+    "            truncation=True,\n",
+    "            max_length=self.max_len,\n",
+    "            is_split_into_words=True,\n",
+    "            return_offsets_mapping=True,\n",
+    "            add_special_tokens=False,\n",
+    "        )\n",
+    "        return self.align_offsets(inputs, text_labels, words)\n",
+    "\n",
+    "    def align_offsets(\n",
+    "        self,\n",
+    "        inputs,\n",
+    "        text_labels: List[List[str]],\n",
+    "        words: List[List[str]]\n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        inputs: output if tokenizer\n",
+    "        text_labels: labels in form of list of list of strings\n",
+    "        words: words in form of list of list of strings\n",
+    "        \"\"\"\n",
+    "        labels = torch.zeros_like(inputs.input_ids).long()\n",
+    "        labels -= 100\n",
+    "        text_lables_array = np.empty(labels.shape, dtype=object)\n",
+    "        words_array = np.empty(labels.shape, dtype=object)\n",
+    "        max_len = inputs.input_ids.shape[1]\n",
+    "\n",
+    "        for row_id, input_ids in enumerate(inputs.input_ids):\n",
+    "            word_pos = inputs.word_ids(row_id)\n",
+    "            for idx, pos in enumerate(word_pos):\n",
+    "                if pos is None:\n",
+    "                    continue\n",
+    "                if pos <= max_len:\n",
+    "                    labels[row_id, idx] = self.cates.c2i[text_labels[row_id][pos]]\n",
+    "                    if self.return_string:\n",
+    "                        text_lables_array[row_id,\n",
+    "                                          idx] = text_labels[row_id][pos]\n",
+    "                        words_array[row_id, idx] = words[row_id][pos]\n",
+    "\n",
+    "        inputs['labels'] = labels\n",
+    "        if self.return_string:\n",
+    "            inputs['text_labels'] = text_lables_array.tolist()\n",
+    "            inputs['word'] = words_array.tolist()\n",
+    "        return inputs\n",
+    "\n",
+    "    def dataloader(self, batch_size: int = 32, shuffle: bool = True):\n",
+    "        \"\"\"\n",
+    "        Create dataloader\n",
+    "        \"\"\"\n",
+    "        return DataLoader(\n",
+    "            self,\n",
+    "            batch_size=batch_size,\n",
+    "            shuffle=shuffle,\n",
+    "            collate_fn=self.collate_fn,\n",
+    "        )\n",
+    "\n",
+    "    def one_batch(self, batch_size: int = 32, shuffle: bool = True):\n",
+    "        return next(iter(self.dataloader(batch_size, shuffle)))\n",
+    "\n",
+    "    def visualize_batch(self, batch, row_idx=0):\n",
+    "        return list(zip(self.tokenizer.convert_ids_to_tokens(batch.input_ids[row_idx]),\n",
+    "                        batch.labels[row_idx].numpy(),\n",
+    "                        batch.text_labels[row_idx],\n",
+    "                        batch.word[row_idx],\n",
+    "                        batch.offset_mapping[row_idx].numpy(),\n",
+    "                        ))\n",
+    "\n",
+    "    def set_hfconfig(self, config):\n",
+    "        \"\"\"\n",
+    "        set the category information to huggingface config\n",
+    "        \"\"\"\n",
+    "        config.num_labels = len(self.cates)\n",
+    "        config.id2label = {i: label for i, label in enumerate(self.cates.i2c)}\n",
+    "        config.label2id = {label: i for i, label in enumerate(self.cates.i2c)}\n",
+    "\n",
+    "\n",
+    "def clean_output(outputs):\n",
+    "    \"\"\"\n",
+    "    Cleaning output for NER task\n",
+    "    \"\"\"\n",
+    "    results = []\n",
+    "    current = []\n",
+    "    last_idx = 0\n",
+    "    # make to sub group by position\n",
+    "    for output in outputs:\n",
+    "        if output[\"index\"]-1 == last_idx:\n",
+    "            current.append(output)\n",
+    "        else:\n",
+    "            results.append(current)\n",
+    "            current = [output, ]\n",
+    "        last_idx = output[\"index\"]\n",
+    "    if len(current) > 0:\n",
+    "        results.append(current)\n",
+    "\n",
+    "    # from tokens to string\n",
+    "    strings = []\n",
+    "    for c in results:\n",
+    "        tokens = []\n",
+    "        starts = []\n",
+    "        ends = []\n",
+    "        for o in c:\n",
+    "            tokens.append(o['word'])\n",
+    "            starts.append(o['start'])\n",
+    "            ends.append(o['end'])\n",
+    "\n",
+    "        new_str = tokenizer.convert_tokens_to_string(tokens)\n",
+    "        if new_str != '':\n",
+    "            strings.append(dict(\n",
+    "                word=new_str,\n",
+    "                start=min(starts),\n",
+    "                end=max(ends),\n",
+    "                entity=c[0]['entity']\n",
+    "            ))\n",
+    "    return strings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = AutoTokenizer.from_pretrained(\"raynardj/roberta-pubmed\", add_prefix_space=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = IOBES(\"/Users/xiaochen.zhang/data/valid.iobes\", tokenizer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "in-O\n",
+      "blood-O\n",
+      ";-O\n",
+      "content-O\n",
+      "of-O\n",
+      "cAMP-O\n",
+      "was-O\n",
+      "also-O\n",
+      "decreased-O\n",
+      "in-O\n",
+      "lymphocytes-O\n",
+      "by-O\n",
+      "33-O\n",
+      "%-O\n",
+      ".-O\n",
+      "At-O\n",
+      "the-O\n",
+      "same-O\n",
+      "time-O\n",
+      ",-O\n",
+      "total-O\n",
+      "content-O\n",
+      "of-O\n",
+      "T-cell_type\n",
+      "lymphocytes-cell_type\n",
+      "was-O\n",
+      "decreased-O\n",
+      "1.5-fold-O\n",
+      "in-O\n",
+      "peripheric-O\n",
+      "blood-O\n",
+      ".-O\n",
+      "Treatment-O\n",
+      "with-O\n",
+      "I-hydroxyvitamin-O\n",
+      "D3-O\n",
+      "(-O\n",
+      "1-1.5-O\n",
+      "mg-O\n",
+      "daily-O\n",
+      ",-O\n",
+      "within-O\n",
+      "4-O\n",
+      "weeks-O\n",
+      ")-O\n",
+      "led-O\n",
+      "to-O\n",
+      "normalization-O\n",
+      "of-O\n",
+      "total-O\n",
+      "and-O\n",
+      "ionized-O\n",
+      "form-O\n",
+      "of-O\n",
+      "Ca2+-O\n",
+      "and-O\n",
+      "of-O\n",
+      "25-O\n",
+      "(-O\n",
+      "OH-O\n",
+      ")-O\n",
+      "D-O\n",
+      ",-O\n",
+      "but-O\n",
+      "did-O\n",
+      "not-O\n",
+      "affect-O\n",
+      "the-O\n",
+      "PTH-O\n",
+      "content-O\n",
+      "in-O\n",
+      "blood-O\n",
+      ".-O\n",
+      "Concentration-O\n",
+      "of-O\n",
+      "the-O\n",
+      "receptors-protein\n",
+      "to-O\n",
+      "1.25-O\n",
+      "(-O\n",
+      "OH-O\n",
+      ")-O\n",
+      "2D3-O\n",
+      "was-O\n",
+      "elevated-O\n",
+      "up-O\n",
+      "to-O\n",
+      "39.7-O\n",
+      "fmole/mg-O\n",
+      "after-O\n",
+      "I-O\n",
+      "week-O\n",
+      "of-O\n",
+      "the-O\n",
+      "treatment-O\n",
+      ",-O\n",
+      "whereas-O\n",
+      "it-O\n",
+      "was-O\n",
+      "decreased-O\n",
+      "to-O\n",
+      "the-O\n",
+      "initial-O\n",
+      "level-O\n",
+      "24.8-O\n",
+      "fmole/mg-O\n",
+      "within-O\n",
+      "4-O\n",
+      "weeks-O\n",
+      ";-O\n",
+      "simultaneous-O\n",
+      "alteration-O\n",
+      "in-O\n"
+     ]
+    }
+   ],
+   "source": [
+    "for w,l in zip(*dataset[2]):\n",
+    "    print(f\"{w}-{l}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'input_ids': tensor([[   19,  3741,  2603,  ...,  1417,  2617, 11576],\n",
+       "        [ 4590,  2156,   255,  ...,   405,  1182,  6608],\n",
+       "        [ 6214, 25683,  3809,  ...,    11,     5,  8151],\n",
+       "        ...,\n",
+       "        [13998, 25326,  2413,  ...,     5,  2199,    21],\n",
+       "        [11299,   705, 24811,  ...,   134,  1589,  2032],\n",
+       "        [ 5804,   924,    14,  ...,   366,  1168,     9]]), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1],\n",
+       "        [1, 1, 1,  ..., 1, 1, 1],\n",
+       "        [1, 1, 1,  ..., 1, 1, 1],\n",
+       "        ...,\n",
+       "        [1, 1, 1,  ..., 1, 1, 1],\n",
+       "        [1, 1, 1,  ..., 1, 1, 1],\n",
+       "        [1, 1, 1,  ..., 1, 1, 1]]), 'offset_mapping': tensor([[[ 1,  4],\n",
+       "         [ 1,  2],\n",
+       "         [ 2,  5],\n",
+       "         ...,\n",
+       "         [ 3,  5],\n",
+       "         [ 5,  8],\n",
+       "         [ 1,  6]],\n",
+       "\n",
+       "        [[ 1,  5],\n",
+       "         [ 1,  1],\n",
+       "         [ 1,  1],\n",
+       "         ...,\n",
+       "         [ 5,  7],\n",
+       "         [ 7,  9],\n",
+       "         [ 9, 14]],\n",
+       "\n",
+       "        [[ 1,  5],\n",
+       "         [ 5,  8],\n",
+       "         [ 8, 10],\n",
+       "         ...,\n",
+       "         [ 1,  2],\n",
+       "         [ 1,  3],\n",
+       "         [ 1, 10]],\n",
+       "\n",
+       "        ...,\n",
+       "\n",
+       "        [[ 1,  5],\n",
+       "         [ 5,  8],\n",
+       "         [ 8, 10],\n",
+       "         ...,\n",
+       "         [ 1,  3],\n",
+       "         [ 1,  7],\n",
+       "         [ 1,  3]],\n",
+       "\n",
+       "        [[ 1,  5],\n",
+       "         [ 5,  6],\n",
+       "         [ 6, 10],\n",
+       "         ...,\n",
+       "         [ 2,  3],\n",
+       "         [ 1,  1],\n",
+       "         [ 1,  2]],\n",
+       "\n",
+       "        [[ 1,  7],\n",
+       "         [ 1,  5],\n",
+       "         [ 1,  4],\n",
+       "         ...,\n",
+       "         [ 3,  5],\n",
+       "         [ 5,  7],\n",
+       "         [ 1,  2]]]), 'labels': tensor([[0, 1, 1,  ..., 0, 0, 0],\n",
+       "        [2, 0, 2,  ..., 0, 0, 0],\n",
+       "        [0, 0, 0,  ..., 0, 0, 0],\n",
+       "        ...,\n",
+       "        [1, 1, 1,  ..., 0, 0, 0],\n",
+       "        [0, 0, 0,  ..., 2, 0, 2],\n",
+       "        [0, 0, 0,  ..., 0, 0, 0]])}"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset.one_batch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/nbs/72_pl_training.ipynb b/nbs/72_pl_training.ipynb
new file mode 100644
index 0000000..82a4985
--- /dev/null
+++ b/nbs/72_pl_training.ipynb
@@ -0,0 +1,210 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pytorch Lighting training\n",
+    "> on huggingface transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# default_exp hf.train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export\n",
+    "from forgebox.hf.data import IOBES\n",
+    "from forgebox.imports import *\n",
+    "import pytorch_lightning as pl\n",
+    "from transformers import AutoModelForTokenClassification, AutoTokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install transformers==4.9.1\n",
+    "# !pip install pytorch-lightning==1.3.8\n",
+    "# !pip install tensorflow==2.2.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load model and tokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export\n",
+    "\n",
+    "# ner model and tokenizer\n",
+    "def ner_model_from(\n",
+    "    name:str, dataset: IOBES\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    name: from_pretrain(name)\n",
+    "    \"\"\"\n",
+    "    model = AutoModelForTokenClassification.from_pretrained(\n",
+    "        name,\n",
+    "        num_labels=len(dataset.cates),\n",
+    "    )\n",
+    "    dataset.set_hfconfig(model.config)\n",
+    "    return model\n",
+    "\n",
+    "def ner_tokenizer_from(\n",
+    "    name: str\n",
+    "):\n",
+    "    return AutoTokenizer.from_pretrained(\n",
+    "        name, add_prefix_space=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Lightning data module"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export\n",
+    "\n",
+    "# ner data module\n",
+    "class NERDataModule(pl.LightningDataModule):\n",
+    "    def __init__(self, train_ds, val_ds, batch_size=32):\n",
+    "        super().__init__()\n",
+    "        self.train_ds = train_ds\n",
+    "        self.val_ds = val_ds\n",
+    "        self.batch_size = batch_size\n",
+    "\n",
+    "    def train_dataloader(self):\n",
+    "        return self.train_ds.dataloader(batch_size=self.batch_size, shuffle=True)\n",
+    "\n",
+    "    def val_dataloader(self):\n",
+    "        return self.val_ds.dataloader(batch_size=self.batch_size*2, shuffle=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export\n",
+    "\n",
+    "# ner module\n",
+    "class NERModule(pl.LightningModule):\n",
+    "    \"\"\"\n",
+    "    PyTorch lightning module for training ner model\n",
+    "    \"\"\"\n",
+    "    def __init__(\n",
+    "        self, model,\n",
+    "        ):\n",
+    "        \"\"\"\n",
+    "        model: huggingface transformer model for ner\n",
+    "        \"\"\"\n",
+    "        super().__init__()\n",
+    "        self.model = model\n",
+    "\n",
+    "    def forward(self, batch):\n",
+    "        return self.model(\n",
+    "            input_ids=batch['input_ids'],\n",
+    "            attention_mask=batch['attention_mask'],\n",
+    "            labels=batch['labels'])\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        outputs = self(batch)\n",
+    "        loss = outputs.loss\n",
+    "        self.log(\"loss\", loss)\n",
+    "        self.log(\"acc\", self.calcualte_acc(outputs, batch.labels))\n",
+    "        return loss\n",
+    "\n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        outputs = self(batch)\n",
+    "        loss = outputs.loss\n",
+    "        self.log(\"val_loss\", loss)\n",
+    "        self.log(\"val_acc\", self.calcualte_acc(outputs, batch.labels))\n",
+    "        return loss\n",
+    "    \n",
+    "    def calcualte_acc(self, outputs, labels):\n",
+    "        pred_idx = outputs.logits.argmax(-1)\n",
+    "        mask = torch.ones_like(pred_idx)\n",
+    "        mask[labels==-100]=False\n",
+    "        return (pred_idx[mask]==labels[mask]).float().mean()\n",
+    "    \n",
+    "    def configure_optimizers(self):\n",
+    "        # discriminative learning rate\n",
+    "        param_groups = [\n",
+    "            {'params': self.model.roberta.parameters(), 'lr': 5e-6},\n",
+    "            {'params': self.model.classifier.parameters(), 'lr': 1e-3},\n",
+    "        ]\n",
+    "        optimizer = torch.optim.Adam(param_groups, lr=1e-3)\n",
+    "        return optimizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/settings.ini b/settings.ini
index d6fe5ad..bcdd2b7 100644
--- a/settings.ini
+++ b/settings.ini
@@ -7,7 +7,7 @@ author = xiaochen(ray) zhang
 author_email = b2ray2c@gmail.com
 copyright = xiaochen(ray) zhang
 branch = master
-version = 0.4.7
+version = 0.4.9
 min_python = 3.6
 audience = Developers
 language = English