diff --git a/docs/_data/sidebars/home_sidebar.yml b/docs/_data/sidebars/home_sidebar.yml index 449d4f7..d50c1ff 100644 --- a/docs/_data/sidebars/home_sidebar.yml +++ b/docs/_data/sidebars/home_sidebar.yml @@ -78,6 +78,12 @@ entries: - output: web,pdf title: Lightning Callbacks url: thunder_callbacks.html + - output: web,pdf + title: Data parts for hf transformers + url: hf_transformer_data.html + - output: web,pdf + title: Pytorch Lighting training + url: pl_training.html - output: web,pdf title: CUDA GPU Management url: CUDA_GPU_Management.html diff --git a/docs/hf_transformer_data.html b/docs/hf_transformer_data.html new file mode 100644 index 0000000..1d333a1 --- /dev/null +++ b/docs/hf_transformer_data.html @@ -0,0 +1,462 @@ +--- + +title: Data parts for hf transformers + + +keywords: fastai +sidebar: home_sidebar + + + +nb_path: "nbs/70_hf_transformer_data.ipynb" +--- + + +
from transformers import AutoTokenizer
+
tokenizer = AutoTokenizer.from_pretrained("raynardj/roberta-pubmed", add_prefix_space=True)
+
dataset = IOBES("/Users/xiaochen.zhang/data/valid.iobes", tokenizer)
+
for w,l in zip(*dataset[2]):
+ print(f"{w}-{l}")
+
dataset.one_batch()
+
# !pip install pytorch-lightning==1.3.8
+# !pip install tensorflow==2.2.0
+