NLTK 中使用 Stanford NLP 工具包进行NER和POS任务。
安装环境
- 下载
1
http://nlp.stanford.edu/software/CRF-NER.html
2
http://nlp.stanford.edu/software/tagger.html
- 解压
1
unzip stanford-ner-2018-10-16.zip
2
unzip stanford-postagger-full-2018-10-16.zip
- 添加
CLASSPATH
,修改.bashrc
文件:1
export STANFORD_NLTK_PATH=/home/haha/stanford_nltk
2
export STANFORD_NER_PATH=$STANFORD_NLTK_PATH/stanford-ner-2018-10-16
3
export STANFORD_POS_PATH=$STANFORD_NLTK_PATH/stanford-postagger-full-2018-10-16
4
export JAVA_HOME=/home/haha/java/jdk-13.0.1
5
export PATH=$JAVA_HOME/bin:$PATH
6
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$STANFORD_NER_PATH/stanford-ner.jar:$STANFORD_POS_PATH/stanford-postagger.jar
- 添加
STANFORD_MODELS
,修改.bashrc
文件:1
export STANFORD_MODELS=$STANFORD_NER_PATH/classifiers:$STANFORD_POS_PATH/models
函数使用
1 | from nltk.tag import StanfordNERTagger |
2 | from nltk.tag import StanfordPOSTagger |
3 | |
4 | pos_tagger = StanfordPOSTagger('english-bidirectional-distsim.tagger') |
5 | ner_tagger = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz') |
6 | tokens = ['The', 'suspect', 'dumped', 'the', 'dead', 'body', 'into', 'a', 'local', 'reservoir', '.'] |
7 | pos = [each[1] for each in pos_tagger.tag(tokens)] |
8 | ner = [each[1] for each in ner_tagger.tag(tokens)] |