| @@ -0,0 +1,62 @@ | |||
| ''' | |||
| BERT Config: | |||
| --------------------------------------------------------------------------------------------------''' | |||
| class BertConfig(object): | |||
| """Configuration class to store the configuration of a `BertModel`. | |||
| """ | |||
| def __init__(self, | |||
| vocab_size, | |||
| hidden_size=768, | |||
| num_hidden_layers=12, | |||
| num_attention_heads=12, | |||
| intermediate_size=3072, | |||
| hidden_act="relu", | |||
| hidden_dropout_prob=0.1, | |||
| attention_probs_dropout_prob=0.1, | |||
| max_position_embeddings=512, | |||
| type_vocab_size=2, | |||
| initializer_range=0.02, | |||
| output_hidden_states=False, | |||
| batch_size=100, | |||
| ): | |||
| """Constructs BertConfig. | |||
| Args: | |||
| vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. | |||
| hidden_size: Size of the encoder layers and the pooler layer. | |||
| num_hidden_layers: Number of hidden layers in the Transformer encoder. | |||
| num_attention_heads: Number of attention heads for each attention layer in | |||
| the Transformer encoder. | |||
| intermediate_size: The size of the "intermediate" (i.e., feed-forward) | |||
| layer in the Transformer encoder. | |||
| hidden_act: The non-linear activation function (function or string) in the | |||
| encoder and pooler. If string, "gelu", "relu" and "swish" are supported. | |||
| hidden_dropout_prob: The dropout probabilitiy for all fully connected | |||
| layers in the embeddings, encoder, and pooler. | |||
| attention_probs_dropout_prob: The dropout ratio for the attention | |||
| probabilities. | |||
| max_position_embeddings: The maximum sequence length that this model might | |||
| ever be used with. Typically set this to something large just in case | |||
| (e.g., 512 or 1024 or 2048). | |||
| type_vocab_size: The vocabulary size of the `token_type_ids` passed into | |||
| `BertModel`. | |||
| initializer_range: The sttdev of the truncated_normal_initializer for | |||
| initializing all weight matrices. | |||
| """ | |||
| self.vocab_size = vocab_size | |||
| self.hidden_size = hidden_size | |||
| self.num_hidden_layers = num_hidden_layers | |||
| self.num_attention_heads = num_attention_heads | |||
| self.hidden_act = hidden_act | |||
| self.intermediate_size = intermediate_size | |||
| self.hidden_dropout_prob = hidden_dropout_prob | |||
| self.attention_probs_dropout_prob = attention_probs_dropout_prob | |||
| self.max_position_embeddings = max_position_embeddings | |||
| self.type_vocab_size = type_vocab_size | |||
| self.initializer_range = initializer_range | |||
| self.output_hidden_states = output_hidden_states | |||
| self.batch_size = batch_size | |||
| '''-----------------------------------------------------------------------------------------------''' | |||