Browse Source

修改model到models

tags/v0.2.0
yh_cc 5 years ago
parent
commit
dc0124cf02
3 changed files with 14 additions and 7 deletions
  1. +0
    -0
      reproduction/chinese_word_segment/models/__init__.py
  2. +0
    -0
      reproduction/chinese_word_segment/models/cws_model.py
  3. +14
    -7
      reproduction/chinese_word_segment/train_context.py

reproduction/chinese_word_segment/model/__init__.py → reproduction/chinese_word_segment/models/__init__.py View File


reproduction/chinese_word_segment/model/cws_model.py → reproduction/chinese_word_segment/models/cws_model.py View File


+ 14
- 7
reproduction/chinese_word_segment/train_context.py View File

@@ -3,11 +3,17 @@ from fastNLP.core.instance import Instance
from fastNLP.core.dataset import DataSet from fastNLP.core.dataset import DataSet
from fastNLP.api.pipeline import Pipeline from fastNLP.api.pipeline import Pipeline
from fastNLP.api.processor import FullSpaceToHalfSpaceProcessor from fastNLP.api.processor import FullSpaceToHalfSpaceProcessor

from reproduction.chinese_word_segment.process.cws_processor import *
from reproduction.chinese_word_segment.process.span_converter import AlphaSpanConverter, DigitSpanConverter
from fastNLP.api.processor import IndexerProcessor
from reproduction.chinese_word_segment.process.cws_processor import SpeicalSpanProcessor
from reproduction.chinese_word_segment.process.cws_processor import CWSCharSegProcessor
from reproduction.chinese_word_segment.process.cws_processor import CWSSegAppTagProcessor
from reproduction.chinese_word_segment.process.cws_processor import Pre2Post2BigramProcessor
from reproduction.chinese_word_segment.process.cws_processor import VocabProcessor

from reproduction.chinese_word_segment.process.span_converter import AlphaSpanConverter
from reproduction.chinese_word_segment.process.span_converter import DigitSpanConverter
from reproduction.chinese_word_segment.io.cws_reader import NaiveCWSReader from reproduction.chinese_word_segment.io.cws_reader import NaiveCWSReader

from reproduction.chinese_word_segment.models.cws_model import CWSBiLSTMSegApp


tr_filename = '' tr_filename = ''
dev_filename = '' dev_filename = ''
@@ -60,8 +66,8 @@ bigram_proc(tr_dataset)
char_vocab_proc(tr_dataset) char_vocab_proc(tr_dataset)
bigram_vocab_proc(tr_dataset) bigram_vocab_proc(tr_dataset)


char_index_proc = IndexProcessor(char_vocab_proc.get_vocab(), 'char_list')
bigram_index_proc = IndexProcessor(bigram_vocab_proc.get_vocab(), 'bigram_list')
char_index_proc = IndexerProcessor(char_vocab_proc.get_vocab(), 'chars_list', 'indexed_chars_list')
bigram_index_proc = IndexerProcessor(bigram_vocab_proc.get_vocab(), 'bigrams_list','indexed_bigrams_list')


char_index_proc(tr_dataset) char_index_proc(tr_dataset)
bigram_index_proc(tr_dataset) bigram_index_proc(tr_dataset)
@@ -81,7 +87,8 @@ bigram_index_proc(dev_dataset)


# 3. 得到数据集可以用于训练了 # 3. 得到数据集可以用于训练了
# TODO pretrain的embedding是怎么解决的? # TODO pretrain的embedding是怎么解决的?

cws_model = CWSBiLSTMSegApp(vocab_num, embed_dim=100, bigram_vocab_num=None, bigram_embed_dim=100, num_bigram_per_char=None,
hidden_size=200, bidirectional=True, embed_drop_p=None, num_layers=1, tag_size=2)








Loading…
Cancel
Save