1、安装命令如下:
pip install keras-bert -i https://pypi.tuna.tsinghua.edu.cn/simple/
2、使用案例:
import os import numpy as np from keras_bert import Tokenizer from keras_bert import load_trained_model_from_checkpoint from keras_bert import load_vocabulary pretrained_path = '/path/to/chinese_L-12_H-768_A-12' config_path = os.path.join(pretrained_path, 'bert_config.json') checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt') vocab_path = os.path.join(pretrained_path, 'vocab.txt') token_dict = load_vocabulary(vocab_path) tokenizer = Tokenizer(token_dict) model = load_trained_model_from_checkpoint(config_path, checkpoint_path) first_sentence = "我爱中国" second_sentence = "你好中国" tokens = tokenizer.tokenize(first_sentence) print(tokens) # ['[CLS]', '我', '爱', '中', '国', '[SEP]'] ids, segments = tokenizer.encode(first=first_sentence, max_len=512) print(ids[:10], segments[:10]) # [101, 2769, 4263, 704, 1744, 102, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] predict = model.predict([np.array([ids]), np.array([segments])])[0] for i, token in enumerate(tokens): print(token, predict[i].tolist()[:4]) # [CLS] [-0.2284594178199768, 0.4080664813518524, -0.5485220551490784, -0.35724306106567383] # 我 [0.32827311754226685, 0.4299146234989166, -0.3868843615055084, -0.6989549994468689] # 爱 [0.8267912864685059, 0.11296054720878601, -2.1068904399871826, -1.4290785789489746] # 中 [0.40148067474365234, 0.6819553971290588, -1.6997158527374268, 0.1521463841199875] # 国 [0.017748408019542694, 0.2129848748445511, -1.2955209016799927, -1.2657432556152344] # [SEP] [0.43678218126296997, 0.3476751446723938, -0.41245850920677185, 0.18876412510871887]