import os
# Huggingface镜像网站:http-s://hf-mirror.com
# 注意:修改环境变量一定要放到最上方!!!放到 import transformers前修改!!!
os.environ['HF_ENDPOINT'] = 'http-s://hf-mirror.com'
from transformers import AutoTokenizer
# with url: /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/config.json
check_point = 'distilbert-base-uncased-finetuned-sst-2-english'
tokenizer = AutoTokenizer.from_pretrained(check_point)
inputs = ['I‘m hunger', 'nice to meet you, lisi']
input = tokenizer(inputs, padding=True, truncation=True, return_tensors='pt')
print('input: ', input)
"""
input: {'input_ids': tensor([[ 101, 1045, 1520, 1049, 9012, 102, 0, 0, 0], [ 101, 3835, 2000, 3113, 2017, 1010, 5622, 5332, 102]]),
'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1]])}
查看每个参数的性质:数值(.data),形状(.shape),数据类型(.dtype),梯度(.grad)
"""
0条评论