- # In current version, many arguments are not used in pipelines, so,
- # a tag `[being used]` will indicate which argument is being used
- version: v0.1
- framework: pytorch
- task: text-classification
-
- model:
- path: bert-base-sst2
- backbone:
- type: bert
- prefix: bert
- attention_probs_dropout_prob: 0.1
- bos_token_id: 0
- eos_token_id: 2
- hidden_act: elu
- hidden_dropout_prob: 0.1
- hidden_size: 768
- initializer_range: 0.02
- intermediate_size: 3072
- layer_norm_eps: 1e-05
- max_position_embeddings: 514
- model_type: roberta
- num_attention_heads: 12
- num_hidden_layers: 12
- pad_token_id: 1
- type_vocab_size: 1
- vocab_size: 50265
- num_classes: 5
-
-
- col_index: &col_indexs
- text_col: 0
- label_col: 1
-
- dataset:
- train:
- <<: *col_indexs
- file: ~
- valid:
- <<: *col_indexs
- file: glue/sst2 # [being used]
- test:
- <<: *col_indexs
- file: ~
-
- preprocessor:
- type: Tokenize
- tokenizer_name: /workspace/bert-base-sst2
-
- train:
- batch_size: 256
- learning_rate: 0.00001
- lr_scheduler_type: cosine
- num_steps: 100000
-
- evaluation: # [being used]
- model_path: .cache/easynlp/
- max_sequence_length: 128
- batch_size: 32
- metrics:
- - accuracy
- - f1
|