You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

caffe.proto 74 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821
  1. syntax = "proto2";
  2. package domi.caffe;
  3. // Specifies the shape (dimensions) of a Blob.
  4. message BlobShape {
  5. repeated int64 dim = 1 [packed = true];
  6. }
  7. message BlobProto {
  8. optional BlobShape shape = 7;
  9. repeated float data = 5 [packed = true];
  10. repeated float diff = 6 [packed = true];
  11. repeated double double_data = 8 [packed = true];
  12. repeated double double_diff = 9 [packed = true];
  13. optional bytes int8_data = 10;
  14. repeated int32 int32_data = 11 [packed = true];
  15. repeated uint64 uint64_data = 12 [packed = true];
  16. // 4D dimensions -- deprecated. Use "shape" instead.
  17. optional int32 num = 1 [default = 0];
  18. optional int32 channels = 2 [default = 0];
  19. optional int32 height = 3 [default = 0];
  20. optional int32 width = 4 [default = 0];
  21. }
  22. // The BlobProtoVector is simply a way to pass multiple blobproto instances
  23. // around.
  24. message BlobProtoVector {
  25. repeated BlobProto blobs = 1;
  26. }
  27. message Datum {
  28. optional int32 channels = 1;
  29. optional int32 height = 2;
  30. optional int32 width = 3;
  31. // the actual image data, in bytes
  32. optional bytes data = 4;
  33. optional int32 label = 5;
  34. // Optionally, the datum could also hold float data.
  35. repeated float float_data = 6;
  36. // If true data contains an encoded image that need to be decoded
  37. optional bool encoded = 7 [default = false];
  38. }
  39. message FillerParameter {
  40. // The filler type.
  41. optional string type = 1 [default = 'constant'];
  42. optional float value = 2 [default = 0]; // the value in constant filler
  43. optional float min = 3 [default = 0]; // the min value in uniform filler
  44. optional float max = 4 [default = 1]; // the max value in uniform filler
  45. optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
  46. optional float std = 6 [default = 1]; // the std value in Gaussian filler
  47. // The expected number of non-zero output weights for a given input in
  48. // Gaussian filler -- the default -1 means don't perform sparsification.
  49. optional int32 sparse = 7 [default = -1];
  50. // Normalize the filler variance by fan_in, fan_out, or their average.
  51. // Applies to 'xavier' and 'msra' fillers.
  52. enum VarianceNorm {
  53. FAN_IN = 0;
  54. FAN_OUT = 1;
  55. AVERAGE = 2;
  56. }
  57. optional VarianceNorm variance_norm = 8 [default = FAN_IN];
  58. }
  59. message NetParameter {
  60. optional string name = 1; // consider giving the network a name
  61. // DEPRECATED. See InputParameter. The input blobs to the network.
  62. repeated string input = 3;
  63. // DEPRECATED. See InputParameter. The shape of the input blobs.
  64. repeated BlobShape input_shape = 8;
  65. // 4D input dimensions -- deprecated. Use "input_shape" instead.
  66. // If specified, for each input blob there should be four
  67. // values specifying the num, channels, height and width of the input blob.
  68. // Thus, there should be a total of (4 * #input) numbers.
  69. repeated int32 input_dim = 4;
  70. // Whether the network will force every layer to carry out backward operation.
  71. // If set False, then whether to carry out backward is determined
  72. // automatically according to the net structure and learning rates.
  73. optional bool force_backward = 5 [default = false];
  74. // The current "state" of the network, including the phase, level, and stage.
  75. // Some layers may be included/excluded depending on this state and the states
  76. // specified in the layers' include and exclude fields.
  77. optional NetState state = 6;
  78. // Print debugging information about results while running Net::Forward,
  79. // Net::Backward, and Net::Update.
  80. optional bool debug_info = 7 [default = false];
  81. // The layers that make up the net. Each of their configurations, including
  82. // connectivity and behavior, is specified as a LayerParameter.
  83. repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
  84. // DEPRECATED: use 'layer' instead.
  85. repeated V1LayerParameter layers = 2;
  86. }
  87. // NOTE
  88. // Update the next available ID when you add a new SolverParameter field.
  89. //
  90. // SolverParameter next available ID: 42 (last added: layer_wise_reduce)
  91. message SolverParameter {
  92. //////////////////////////////////////////////////////////////////////////////
  93. // Specifying the train and test networks
  94. //
  95. // Exactly one train net must be specified using one of the following fields:
  96. // train_net_param, train_net, net_param, net
  97. // One or more test nets may be specified using any of the following fields:
  98. // test_net_param, test_net, net_param, net
  99. // If more than one test net field is specified (e.g., both net and
  100. // test_net are specified), they will be evaluated in the field order given
  101. // above: (1) test_net_param, (2) test_net, (3) net_param/net.
  102. // A test_iter must be specified for each test_net.
  103. // A test_level and/or a test_stage may also be specified for each test_net.
  104. //////////////////////////////////////////////////////////////////////////////
  105. // Proto filename for the train net, possibly combined with one or more
  106. // test nets.
  107. optional string net = 24;
  108. // Inline train net param, possibly combined with one or more test nets.
  109. optional NetParameter net_param = 25;
  110. optional string train_net = 1; // Proto filename for the train net.
  111. repeated string test_net = 2; // Proto filenames for the test nets.
  112. optional NetParameter train_net_param = 21; // Inline train net params.
  113. repeated NetParameter test_net_param = 22; // Inline test net params.
  114. // The states for the train/test nets. Must be unspecified or
  115. // specified once per net.
  116. //
  117. // By default, all states will have solver = true;
  118. // train_state will have phase = TRAIN,
  119. // and all test_state's will have phase = TEST.
  120. // Other defaults are set according to the NetState defaults.
  121. optional NetState train_state = 26;
  122. repeated NetState test_state = 27;
  123. // The number of iterations for each test net.
  124. repeated int32 test_iter = 3;
  125. // The number of iterations between two testing phases.
  126. optional int32 test_interval = 4 [default = 0];
  127. optional bool test_compute_loss = 19 [default = false];
  128. // If true, run an initial test pass before the first iteration,
  129. // ensuring memory availability and printing the starting value of the loss.
  130. optional bool test_initialization = 32 [default = true];
  131. optional float base_lr = 5; // The base learning rate
  132. // the number of iterations between displaying info. If display = 0, no info
  133. // will be displayed.
  134. optional int32 display = 6;
  135. // Display the loss averaged over the last average_loss iterations
  136. optional int32 average_loss = 33 [default = 1];
  137. optional int32 max_iter = 7; // the maximum number of iterations
  138. // accumulate gradients over `iter_size` x `batch_size` instances
  139. optional int32 iter_size = 36 [default = 1];
  140. // The learning rate decay policy. The currently implemented learning rate
  141. // policies are as follows:
  142. // - fixed: always return base_lr.
  143. // - step: return base_lr * gamma ^ (floor(iter / step))
  144. // - exp: return base_lr * gamma ^ iter
  145. // - inv: return base_lr * (1 + gamma * iter) ^ (- power)
  146. // - multistep: similar to step but it allows non uniform steps defined by
  147. // stepvalue
  148. // - poly: the effective learning rate follows a polynomial decay, to be
  149. // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
  150. // - sigmoid: the effective learning rate follows a sigmod decay
  151. // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
  152. //
  153. // where base_lr, max_iter, gamma, step, stepvalue and power are defined
  154. // in the solver parameter protocol buffer, and iter is the current iteration.
  155. optional string lr_policy = 8;
  156. optional float gamma = 9; // The parameter to compute the learning rate.
  157. optional float power = 10; // The parameter to compute the learning rate.
  158. optional float momentum = 11; // The momentum value.
  159. optional float weight_decay = 12; // The weight decay.
  160. // regularization types supported: L1 and L2
  161. // controlled by weight_decay
  162. optional string regularization_type = 29 [default = "L2"];
  163. // the stepsize for learning rate policy "step"
  164. optional int32 stepsize = 13;
  165. // the stepsize for learning rate policy "multistep"
  166. repeated int32 stepvalue = 34;
  167. // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
  168. // whenever their actual L2 norm is larger.
  169. optional float clip_gradients = 35 [default = -1];
  170. optional int32 snapshot = 14 [default = 0]; // The snapshot interval
  171. optional string snapshot_prefix = 15; // The prefix for the snapshot.
  172. // whether to snapshot diff in the results or not. Snapshotting diff will help
  173. // debugging but the final protocol buffer size will be much larger.
  174. optional bool snapshot_diff = 16 [default = false];
  175. enum SnapshotFormat {
  176. HDF5 = 0;
  177. BINARYPROTO = 1;
  178. }
  179. optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
  180. // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
  181. enum SolverMode {
  182. CPU = 0;
  183. GPU = 1;
  184. }
  185. optional SolverMode solver_mode = 17 [default = GPU];
  186. // the device_id will that be used in GPU mode. Use device_id = 0 in default.
  187. optional int32 device_id = 18 [default = 0];
  188. // If non-negative, the seed with which the Solver will initialize the Caffe
  189. // random number generator -- useful for reproducible results. Otherwise,
  190. // (and by default) initialize using a seed derived from the system clock.
  191. optional int64 random_seed = 20 [default = -1];
  192. // type of the solver
  193. optional string type = 40 [default = "SGD"];
  194. // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
  195. optional float delta = 31 [default = 1e-8];
  196. // parameters for the Adam solver
  197. optional float momentum2 = 39 [default = 0.999];
  198. // RMSProp decay value
  199. // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
  200. optional float rms_decay = 38 [default = 0.99];
  201. // If true, print information about the state of the net that may help with
  202. // debugging learning problems.
  203. optional bool debug_info = 23 [default = false];
  204. // If false, don't save a snapshot after training finishes.
  205. optional bool snapshot_after_train = 28 [default = true];
  206. // DEPRECATED: old solver enum types, use string instead
  207. enum SolverType {
  208. SGD = 0;
  209. NESTEROV = 1;
  210. ADAGRAD = 2;
  211. RMSPROP = 3;
  212. ADADELTA = 4;
  213. ADAM = 5;
  214. }
  215. // DEPRECATED: use type instead of solver_type
  216. optional SolverType solver_type = 30 [default = SGD];
  217. // Overlap compute and communication for data parallel training
  218. optional bool layer_wise_reduce = 41 [default = true];
  219. }
  220. // A message that stores the solver snapshots
  221. message SolverState {
  222. optional int32 iter = 1; // The current iteration
  223. optional string learned_net = 2; // The file that stores the learned net.
  224. repeated BlobProto history = 3; // The history for sgd solvers
  225. optional int32 current_step = 4 [default = 0]; // The current step for learning rate
  226. }
  227. enum Phase {
  228. TRAIN = 0;
  229. TEST = 1;
  230. }
  231. message NetState {
  232. optional Phase phase = 1 [default = TEST];
  233. optional int32 level = 2 [default = 0];
  234. repeated string stage = 3;
  235. }
  236. message NetStateRule {
  237. // Set phase to require the NetState have a particular phase (TRAIN or TEST)
  238. // to meet this rule.
  239. optional Phase phase = 1;
  240. // Set the minimum and/or maximum levels in which the layer should be used.
  241. // Leave undefined to meet the rule regardless of level.
  242. optional int32 min_level = 2;
  243. optional int32 max_level = 3;
  244. // Customizable sets of stages to include or exclude.
  245. // The net must have ALL of the specified stages and NONE of the specified
  246. // "not_stage"s to meet the rule.
  247. // (Use multiple NetStateRules to specify conjunctions of stages.)
  248. repeated string stage = 4;
  249. repeated string not_stage = 5;
  250. }
  251. // Specifies training parameters (multipliers on global learning constants,
  252. // and the name and other settings used for weight sharing).
  253. message ParamSpec {
  254. // The names of the parameter blobs -- useful for sharing parameters among
  255. // layers, but never required otherwise. To share a parameter between two
  256. // layers, give it a (non-empty) name.
  257. optional string name = 1;
  258. // Whether to require shared weights to have the same shape, or just the same
  259. // count -- defaults to STRICT if unspecified.
  260. optional DimCheckMode share_mode = 2;
  261. enum DimCheckMode {
  262. // STRICT (default) requires that num, channels, height, width each match.
  263. STRICT = 0;
  264. // PERMISSIVE requires only the count (num*channels*height*width) to match.
  265. PERMISSIVE = 1;
  266. }
  267. // The multiplier on the global learning rate for this parameter.
  268. optional float lr_mult = 3 [default = 1.0];
  269. // The multiplier on the global weight decay for this parameter.
  270. optional float decay_mult = 4 [default = 1.0];
  271. }
  272. // NOTE
  273. // Update the next available ID when you add a new LayerParameter field.
  274. //
  275. // LayerParameter next available layer-specific ID: 151 (last added: smooth_l1_loss_param)
  276. message LayerParameter {
  277. optional string name = 1; // the layer name
  278. optional string type = 2; // the layer type
  279. repeated string bottom = 3; // the name of each bottom blob
  280. repeated string top = 4; // the name of each top blob
  281. // The train / test phase for computation.
  282. optional Phase phase = 10;
  283. // The amount of weight to assign each top blob in the objective.
  284. // Each layer assigns a default value, usually of either 0 or 1,
  285. // to each top blob.
  286. repeated float loss_weight = 5;
  287. // Specifies training parameters (multipliers on global learning constants,
  288. // and the name and other settings used for weight sharing).
  289. repeated ParamSpec param = 6;
  290. // The blobs containing the numeric parameters of the layer.
  291. repeated BlobProto blobs = 7;
  292. // Specifies whether to backpropagate to each bottom. If unspecified,
  293. // Caffe will automatically infer whether each input needs backpropagation
  294. // to compute parameter gradients. If set to true for some inputs,
  295. // backpropagation to those inputs is forced; if set false for some inputs,
  296. // backpropagation to those inputs is skipped.
  297. //
  298. // The size must be either 0 or equal to the number of bottoms.
  299. repeated bool propagate_down = 11;
  300. // Rules controlling whether and when a layer is included in the network,
  301. // based on the current NetState. You may specify a non-zero number of rules
  302. // to include OR exclude, but not both. If no include or exclude rules are
  303. // specified, the layer is always included. If the current NetState meets
  304. // ANY (i.e., one or more) of the specified rules, the layer is
  305. // included/excluded.
  306. repeated NetStateRule include = 8;
  307. repeated NetStateRule exclude = 9;
  308. // Parameters for data pre-processing.
  309. optional TransformationParameter transform_param = 100;
  310. // Parameters shared by loss layers.
  311. optional LossParameter loss_param = 101;
  312. // Layer type-specific parameters.
  313. //
  314. // Note: certain layers may have more than one computational engine
  315. // for their implementation. These layers include an Engine type and
  316. // engine parameter for selecting the implementation.
  317. // The default for the engine is set by the ENGINE switch at compile-time.
  318. optional AccuracyParameter accuracy_param = 102;
  319. optional ArgMaxParameter argmax_param = 103;
  320. optional BatchNormParameter batch_norm_param = 139;
  321. optional BiasParameter bias_param = 141;
  322. optional ConcatParameter concat_param = 104;
  323. optional ContrastiveLossParameter contrastive_loss_param = 105;
  324. optional ConvolutionParameter convolution_param = 106;
  325. optional CropParameter crop_param = 144;
  326. optional DataParameter data_param = 107;
  327. optional DetectionOutputParameter detection_output_param = 150;
  328. optional DropoutParameter dropout_param = 108;
  329. optional DummyDataParameter dummy_data_param = 109;
  330. optional EltwiseParameter eltwise_param = 110;
  331. optional ELUParameter elu_param = 140;
  332. optional EmbedParameter embed_param = 137;
  333. optional ExpParameter exp_param = 111;
  334. optional FlattenParameter flatten_param = 135;
  335. optional HDF5DataParameter hdf5_data_param = 112;
  336. optional HDF5OutputParameter hdf5_output_param = 113;
  337. optional HingeLossParameter hinge_loss_param = 114;
  338. optional ImageDataParameter image_data_param = 115;
  339. optional InfogainLossParameter infogain_loss_param = 116;
  340. optional InnerProductParameter inner_product_param = 117;
  341. optional InputParameter input_param = 143;
  342. optional LogParameter log_param = 134;
  343. optional LRNParameter lrn_param = 118;
  344. optional MemoryDataParameter memory_data_param = 119;
  345. optional MVNParameter mvn_param = 120;
  346. optional ParameterParameter parameter_param = 145;
  347. optional PoolingParameter pooling_param = 121;
  348. optional PowerParameter power_param = 122;
  349. optional PReLUParameter prelu_param = 131;
  350. optional PythonParameter python_param = 130;
  351. optional RecurrentParameter recurrent_param = 146;
  352. optional ReductionParameter reduction_param = 136;
  353. optional ReLUParameter relu_param = 123;
  354. optional ReshapeParameter reshape_param = 133;
  355. optional ScaleParameter scale_param = 142;
  356. optional SigmoidParameter sigmoid_param = 124;
  357. optional SmoothL1LossParameter smooth_l1_loss_param = 148;
  358. optional SoftmaxParameter softmax_param = 125;
  359. optional SPPParameter spp_param = 132;
  360. optional SliceParameter slice_param = 126;
  361. optional TanHParameter tanh_param = 127;
  362. optional ThresholdParameter threshold_param = 128;
  363. optional TileParameter tile_param = 138;
  364. optional WindowDataParameter window_data_param = 129;
  365. optional PermuteParameter permute_param = 202;
  366. optional PriorBoxParameter prior_box_param = 203;
  367. optional NormalizeParameter norm_param = 206;
  368. optional PSROIPoolingParameter psroi_pooling_param = 207;
  369. optional FreespaceExtractParameter freespace_extract_param = 151;
  370. optional PostprocessParameter postprocess_param = 152;
  371. optional SpatialTransformParameter spatial_transform_param = 153;
  372. optional ROIAlignParameter roi_align_param = 154;
  373. optional ReorgParameter reorg_param = 155;
  374. optional RegionParameter region_param = 156;
  375. optional ReverseParameter reverse_param = 157;
  376. optional InterpParameter interp_param = 158;
  377. optional ShuffleChannelParameter shuffle_channel_param = 159;
  378. optional UpsampleParameter upsample_param = 160;
  379. optional ROIPoolingParameter roi_pooling_param = 161;
  380. optional YoloParameter yolo_param = 199;
  381. optional YoloV3DetectionOutputParameter yolov3_detection_output_param = 200;
  382. optional ProposalParameter proposal_param = 201;
  383. optional FSRDetectionOutputParameter fsrdetectionoutput_param = 222;
  384. optional SSDDetectionOutputParameter ssddetectionoutput_param = 232;
  385. optional YoloV2DetectionOutputParameter yolov2_detection_output_param = 204;
  386. optional QuantParameter quant_param = 208;
  387. optional CondTakeParameter condtake_param = 233;
  388. optional MatrixInverseParameter matrix_inverse_param = 210;
  389. optional WarpPerspectiveParameter warp_perspective_param = 234;
  390. optional BatchMatMulParameter batch_matmul_param = 235;
  391. optional SpatialTransformerParameter st_param = 5000;
  392. optional YoloV3DetectionOutputV2Parameter yolov3_detection_output_v2_param = 5001;
  393. }
  394. // Message that stores parameters used to apply transformation
  395. // to the data layer's data
  396. message TransformationParameter {
  397. // For data pre-processing, we can do simple scaling and subtracting the
  398. // data mean, if provided. Note that the mean subtraction is always carried
  399. // out before scaling.
  400. optional float scale = 1 [default = 1];
  401. // Specify if we want to randomly mirror data.
  402. optional bool mirror = 2 [default = false];
  403. // Specify if we would like to randomly crop an image.
  404. optional uint32 crop_size = 3 [default = 0];
  405. // mean_file and mean_value cannot be specified at the same time
  406. optional string mean_file = 4;
  407. // if specified can be repeated once (would substract it from all the channels)
  408. // or can be repeated the same number of times as channels
  409. // (would subtract them from the corresponding channel)
  410. repeated float mean_value = 5;
  411. // Force the decoded image to have 3 color channels.
  412. optional bool force_color = 6 [default = false];
  413. // Force the decoded image to have 1 color channels.
  414. optional bool force_gray = 7 [default = false];
  415. }
  416. // Message that stores parameters shared by loss layers
  417. message LossParameter {
  418. // If specified, ignore instances with the given label.
  419. optional int32 ignore_label = 1;
  420. // How to normalize the loss for loss layers that aggregate across batches,
  421. // spatial dimensions, or other dimensions. Currently only implemented in
  422. // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
  423. enum NormalizationMode {
  424. // Divide by the number of examples in the batch times spatial dimensions.
  425. // Outputs that receive the ignore label will NOT be ignored in computing
  426. // the normalization factor.
  427. FULL = 0;
  428. // Divide by the total number of output locations that do not take the
  429. // ignore_label. If ignore_label is not set, this behaves like FULL.
  430. VALID = 1;
  431. // Divide by the batch size.
  432. BATCH_SIZE = 2;
  433. // Do not normalize the loss.
  434. NONE = 3;
  435. }
  436. // For historical reasons, the default normalization for
  437. // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
  438. optional NormalizationMode normalization = 3 [default = VALID];
  439. // Deprecated. Ignored if normalization is specified. If normalization
  440. // is not specified, then setting this to false will be equivalent to
  441. // normalization = BATCH_SIZE to be consistent with previous behavior.
  442. optional bool normalize = 2;
  443. }
  444. // Messages that store parameters used by individual layer types follow, in
  445. // alphabetical order.
  446. message AccuracyParameter {
  447. // When computing accuracy, count as correct by comparing the true label to
  448. // the top k scoring classes. By default, only compare to the top scoring
  449. // class (i.e. argmax).
  450. optional uint32 top_k = 1 [default = 1];
  451. // The "label" axis of the prediction blob, whose argmax corresponds to the
  452. // predicted label -- may be negative to index from the end (e.g., -1 for the
  453. // last axis). For example, if axis == 1 and the predictions are
  454. // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
  455. // labels with integer values in {0, 1, ..., C-1}.
  456. optional int32 axis = 2 [default = 1];
  457. // If specified, ignore instances with the given label.
  458. optional int32 ignore_label = 3;
  459. }
  460. message ArgMaxParameter {
  461. // If true produce pairs (argmax, maxval)
  462. optional bool out_max_val = 1 [default = false];
  463. optional uint32 top_k = 2 [default = 1];
  464. // The axis along which to maximise -- may be negative to index from the
  465. // end (e.g., -1 for the last axis).
  466. // By default ArgMaxLayer maximizes over the flattened trailing dimensions
  467. // for each index of the first / num dimension.
  468. optional int32 axis = 3;
  469. }
  470. message ConcatParameter {
  471. // The axis along which to concatenate -- may be negative to index from the
  472. // end (e.g., -1 for the last axis). Other axes must have the
  473. // same dimension for all the bottom blobs.
  474. // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
  475. optional int32 axis = 2 [default = 1];
  476. // DEPRECATED: alias for "axis" -- does not support negative indexing.
  477. optional uint32 concat_dim = 1 [default = 1];
  478. }
  479. message BatchNormParameter {
  480. // If false, normalization is performed over the current mini-batch
  481. // and global statistics are accumulated (but not yet used) by a moving
  482. // average.
  483. // If true, those accumulated mean and variance values are used for the
  484. // normalization.
  485. // By default, it is set to false when the network is in the training
  486. // phase and true when the network is in the testing phase.
  487. optional bool use_global_stats = 1;
  488. // What fraction of the moving average remains each iteration?
  489. // Smaller values make the moving average decay faster, giving more
  490. // weight to the recent values.
  491. // Each iteration updates the moving average @f$S_{t-1}@f$ with the
  492. // current mean @f$ Y_t @f$ by
  493. // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$
  494. // is the moving_average_fraction parameter.
  495. optional float moving_average_fraction = 2 [default = .999];
  496. // Small value to add to the variance estimate so that we don't divide by
  497. // zero.
  498. optional float eps = 3 [default = 1e-5];
  499. }
  500. message BiasParameter {
  501. // The first axis of bottom[0] (the first input Blob) along which to apply
  502. // bottom[1] (the second input Blob). May be negative to index from the end
  503. // (e.g., -1 for the last axis).
  504. //
  505. // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
  506. // top[0] will have the same shape, and bottom[1] may have any of the
  507. // following shapes (for the given value of axis):
  508. // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
  509. // (axis == 1 == -3) 3; 3x40; 3x40x60
  510. // (axis == 2 == -2) 40; 40x60
  511. // (axis == 3 == -1) 60
  512. // Furthermore, bottom[1] may have the empty shape (regardless of the value of
  513. // "axis") -- a scalar bias.
  514. optional int32 axis = 1 [default = 1];
  515. // (num_axes is ignored unless just one bottom is given and the bias is
  516. // a learned parameter of the layer. Otherwise, num_axes is determined by the
  517. // number of axes by the second bottom.)
  518. // The number of axes of the input (bottom[0]) covered by the bias
  519. // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
  520. // Set num_axes := 0, to add a zero-axis Blob: a scalar.
  521. optional int32 num_axes = 2 [default = 1];
  522. // (filler is ignored unless just one bottom is given and the bias is
  523. // a learned parameter of the layer.)
  524. // The initialization for the learned bias parameter.
  525. // Default is the zero (0) initialization, resulting in the BiasLayer
  526. // initially performing the identity operation.
  527. optional FillerParameter filler = 3;
  528. optional bool bias_from_blob = 4 [default = true];
  529. }
  530. message ContrastiveLossParameter {
  531. // margin for dissimilar pair
  532. optional float margin = 1 [default = 1.0];
  533. // The first implementation of this cost did not exactly match the cost of
  534. // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
  535. // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
  536. // Hadsell paper. New models should probably use this version.
  537. // legacy_version = true uses (margin - d^2). This is kept to support /
  538. // reproduce existing models and results
  539. optional bool legacy_version = 2 [default = false];
  540. }
  541. message ConvolutionParameter {
  542. optional uint32 num_output = 1; // The number of outputs for the layer
  543. optional bool bias_term = 2 [default = true]; // whether to have bias terms
  544. // Pad, kernel size, and stride are all given as a single value for equal
  545. // dimensions in all spatial dimensions, or once per spatial dimension.
  546. repeated uint32 pad = 3; // The padding size; defaults to 0
  547. repeated uint32 kernel_size = 4; // The kernel size
  548. repeated uint32 stride = 6; // The stride; defaults to 1
  549. // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
  550. // holes. (Kernel dilation is sometimes referred to by its use in the
  551. // algorithme à trous from Holschneider et al. 1987.)
  552. repeated uint32 dilation = 18; // The dilation; defaults to 1
  553. // For 2D convolution only, the *_h and *_w versions may also be used to
  554. // specify both spatial dimensions.
  555. optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
  556. optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
  557. optional uint32 kernel_h = 11; // The kernel height (2D only)
  558. optional uint32 kernel_w = 12; // The kernel width (2D only)
  559. optional uint32 stride_h = 13; // The stride height (2D only)
  560. optional uint32 stride_w = 14; // The stride width (2D only)
  561. optional uint32 group = 5 [default = 1]; // The group size for group conv
  562. optional FillerParameter weight_filler = 7; // The filler for the weight
  563. optional FillerParameter bias_filler = 8; // The filler for the bias
  564. enum Engine {
  565. DEFAULT = 0;
  566. CAFFE = 1;
  567. CUDNN = 2;
  568. }
  569. optional Engine engine = 15 [default = DEFAULT];
  570. // The axis to interpret as "channels" when performing convolution.
  571. // Preceding dimensions are treated as independent inputs;
  572. // succeeding dimensions are treated as "spatial".
  573. // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
  574. // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
  575. // groups g>1) filters across the spatial axes (H, W) of the input.
  576. // With (N, C, D, H, W) inputs, and axis == 1, we perform
  577. // N independent 3D convolutions, sliding (C/g)-channels
  578. // filters across the spatial axes (D, H, W) of the input.
  579. optional int32 axis = 16 [default = 1];
  580. // Whether to force use of the general ND convolution, even if a specific
  581. // implementation for blobs of the appropriate number of spatial dimensions
  582. // is available. (Currently, there is only a 2D-specific convolution
  583. // implementation; for input blobs with num_axes != 2, this option is
  584. // ignored and the ND implementation will be used.)
  585. optional bool force_nd_im2col = 17 [default = false];
  586. }
  587. message CropParameter {
  588. // To crop, elements of the first bottom are selected to fit the dimensions
  589. // of the second, reference bottom. The crop is configured by
  590. // - the crop `axis` to pick the dimensions for cropping
  591. // - the crop `offset` to set the shift for all/each dimension
  592. // to align the cropped bottom with the reference bottom.
  593. // All dimensions up to but excluding `axis` are preserved, while
  594. // the dimensions including and trailing `axis` are cropped.
  595. // If only one `offset` is set, then all dimensions are offset by this amount.
  596. // Otherwise, the number of offsets must equal the number of cropped axes to
  597. // shift the crop in each dimension accordingly.
  598. // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
  599. // and `axis` may be negative to index from the end (e.g., -1 for the last
  600. // axis).
  601. optional int32 axis = 1 [default = 2];
  602. repeated uint32 offset = 2;
  603. }
  604. message DataParameter {
  605. enum DB {
  606. LEVELDB = 0;
  607. LMDB = 1;
  608. }
  609. // Specify the data source.
  610. optional string source = 1;
  611. // Specify the batch size.
  612. optional uint32 batch_size = 4;
  613. // The rand_skip variable is for the data layer to skip a few data points
  614. // to avoid all asynchronous sgd clients to start at the same point. The skip
  615. // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
  616. // be larger than the number of keys in the database.
  617. // DEPRECATED. Each solver accesses a different subset of the database.
  618. optional uint32 rand_skip = 7 [default = 0];
  619. optional DB backend = 8 [default = LEVELDB];
  620. // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
  621. // simple scaling and subtracting the data mean, if provided. Note that the
  622. // mean subtraction is always carried out before scaling.
  623. optional float scale = 2 [default = 1];
  624. optional string mean_file = 3;
  625. // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
  626. // crop an image.
  627. optional uint32 crop_size = 5 [default = 0];
  628. // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
  629. // data.
  630. optional bool mirror = 6 [default = false];
  631. // Force the encoded image to have 3 color channels
  632. optional bool force_encoded_color = 9 [default = false];
  633. // Prefetch queue (Increase if data feeding bandwidth varies, within the
  634. // limit of device memory for GPU training)
  635. optional uint32 prefetch = 10 [default = 4];
  636. }
  637. message DropoutParameter {
  638. optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
  639. optional bool scale_train = 2 [default = true]; // scale train or test phase
  640. }
  641. // DummyDataLayer fills any number of arbitrarily shaped blobs with random
  642. // (or constant) data generated by "Fillers" (see "message FillerParameter").
  643. message DummyDataParameter {
  644. // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N
  645. // shape fields, and 0, 1 or N data_fillers.
  646. //
  647. // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
  648. // If 1 data_filler is specified, it is applied to all top blobs. If N are
  649. // specified, the ith is applied to the ith top blob.
  650. repeated FillerParameter data_filler = 1;
  651. repeated BlobShape shape = 6;
  652. // 4D dimensions -- deprecated. Use "shape" instead.
  653. repeated uint32 num = 2;
  654. repeated uint32 channels = 3;
  655. repeated uint32 height = 4;
  656. repeated uint32 width = 5;
  657. }
  658. message EltwiseParameter {
  659. enum EltwiseOp {
  660. PROD = 0;
  661. SUM = 1;
  662. MAX = 2;
  663. }
  664. optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
  665. repeated float coeff = 2; // blob-wise coefficient for SUM operation
  666. // Whether to use an asymptotically slower (for >2 inputs) but stabler method
  667. // of computing the gradient for the PROD operation. (No effect for SUM op.)
  668. optional bool stable_prod_grad = 3 [default = true];
  669. }
  670. // Message that stores parameters used by ELULayer
  671. message ELUParameter {
  672. // Described in:
  673. // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
  674. // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
  675. optional float alpha = 1 [default = 1];
  676. }
  677. // Message that stores parameters used by EmbedLayer
  678. message EmbedParameter {
  679. optional uint32 num_output = 1; // The number of outputs for the layer
  680. // The input is given as integers to be interpreted as one-hot
  681. // vector indices with dimension num_input. Hence num_input should be
  682. // 1 greater than the maximum possible input value.
  683. optional uint32 input_dim = 2;
  684. optional bool bias_term = 3 [default = true]; // Whether to use a bias term
  685. optional FillerParameter weight_filler = 4; // The filler for the weight
  686. optional FillerParameter bias_filler = 5; // The filler for the bias
  687. }
  688. // Message that stores parameters used by ExpLayer
  689. message ExpParameter {
  690. // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
  691. // Or if base is set to the default (-1), base is set to e,
  692. // so y = exp(shift + scale * x).
  693. optional float base = 1 [default = -1.0];
  694. optional float scale = 2 [default = 1.0];
  695. optional float shift = 3 [default = 0.0];
  696. }
  697. /// Message that stores parameters used by FlattenLayer
  698. message FlattenParameter {
  699. // The first axis to flatten: all preceding axes are retained in the output.
  700. // May be negative to index from the end (e.g., -1 for the last axis).
  701. optional int32 axis = 1 [default = 1];
  702. // The last axis to flatten: all following axes are retained in the output.
  703. // May be negative to index from the end (e.g., the default -1 for the last
  704. // axis).
  705. optional int32 end_axis = 2 [default = -1];
  706. }
  707. // Message that stores parameters used by HDF5DataLayer
  708. message HDF5DataParameter {
  709. // Specify the data source.
  710. optional string source = 1;
  711. // Specify the batch size.
  712. optional uint32 batch_size = 2;
  713. // Specify whether to shuffle the data.
  714. // If shuffle == true, the ordering of the HDF5 files is shuffled,
  715. // and the ordering of data within any given HDF5 file is shuffled,
  716. // but data between different files are not interleaved; all of a file's
  717. // data are output (in a random order) before moving onto another file.
  718. optional bool shuffle = 3 [default = false];
  719. }
  720. message HDF5OutputParameter {
  721. optional string file_name = 1;
  722. }
  723. message HingeLossParameter {
  724. enum Norm {
  725. L1 = 1;
  726. L2 = 2;
  727. }
  728. // Specify the Norm to use L1 or L2
  729. optional Norm norm = 1 [default = L1];
  730. }
  731. message ImageDataParameter {
  732. // Specify the data source.
  733. optional string source = 1;
  734. // Specify the batch size.
  735. optional uint32 batch_size = 4 [default = 1];
  736. // The rand_skip variable is for the data layer to skip a few data points
  737. // to avoid all asynchronous sgd clients to start at the same point. The skip
  738. // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
  739. // be larger than the number of keys in the database.
  740. optional uint32 rand_skip = 7 [default = 0];
  741. // Whether or not ImageLayer should shuffle the list of files at every epoch.
  742. optional bool shuffle = 8 [default = false];
  743. // It will also resize images if new_height or new_width are not zero.
  744. optional uint32 new_height = 9 [default = 0];
  745. optional uint32 new_width = 10 [default = 0];
  746. // Specify if the images are color or gray
  747. optional bool is_color = 11 [default = true];
  748. // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
  749. // simple scaling and subtracting the data mean, if provided. Note that the
  750. // mean subtraction is always carried out before scaling.
  751. optional float scale = 2 [default = 1];
  752. optional string mean_file = 3;
  753. // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
  754. // crop an image.
  755. optional uint32 crop_size = 5 [default = 0];
  756. // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
  757. // data.
  758. optional bool mirror = 6 [default = false];
  759. optional string root_folder = 12 [default = ""];
  760. }
  761. message InfogainLossParameter {
  762. // Specify the infogain matrix source.
  763. optional string source = 1;
  764. optional int32 axis = 2 [default = 1]; // axis of prob
  765. }
  766. message InnerProductParameter {
  767. optional uint32 num_output = 1; // The number of outputs for the layer
  768. optional bool bias_term = 2 [default = true]; // whether to have bias terms
  769. optional FillerParameter weight_filler = 3; // The filler for the weight
  770. optional FillerParameter bias_filler = 4; // The filler for the bias
  771. // The first axis to be lumped into a single inner product computation;
  772. // all preceding axes are retained in the output.
  773. // May be negative to index from the end (e.g., -1 for the last axis).
  774. optional int32 axis = 5 [default = 1];
  775. // Specify whether to transpose the weight matrix or not.
  776. // If transpose == true, any operations will be performed on the transpose
  777. // of the weight matrix. The weight matrix itself is not going to be transposed
  778. // but rather the transfer flag of operations will be toggled accordingly.
  779. optional bool transpose = 6 [default = false];
  780. }
  781. message InputParameter {
  782. // This layer produces N >= 1 top blob(s) to be assigned manually.
  783. // Define N shapes to set a shape for each top.
  784. // Define 1 shape to set the same shape for every top.
  785. // Define no shape to defer to reshaping manually.
  786. repeated BlobShape shape = 1;
  787. }
  788. // Message that stores parameters used by LogLayer
  789. message LogParameter {
  790. // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
  791. // Or if base is set to the default (-1), base is set to e,
  792. // so y = ln(shift + scale * x) = log_e(shift + scale * x)
  793. optional float base = 1 [default = -1.0];
  794. optional float scale = 2 [default = 1.0];
  795. optional float shift = 3 [default = 0.0];
  796. }
  797. // Message that stores parameters used by LRNLayer
  798. message LRNParameter {
  799. optional uint32 local_size = 1 [default = 5];
  800. optional float alpha = 2 [default = 1.];
  801. optional float beta = 3 [default = 0.75];
  802. enum NormRegion {
  803. ACROSS_CHANNELS = 0;
  804. WITHIN_CHANNEL = 1;
  805. }
  806. optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
  807. optional float k = 5 [default = 1.];
  808. enum Engine {
  809. DEFAULT = 0;
  810. CAFFE = 1;
  811. CUDNN = 2;
  812. }
  813. optional Engine engine = 6 [default = DEFAULT];
  814. }
  815. message MemoryDataParameter {
  816. optional uint32 batch_size = 1;
  817. optional uint32 channels = 2;
  818. optional uint32 height = 3;
  819. optional uint32 width = 4;
  820. }
  821. message MVNParameter {
  822. // This parameter can be set to false to normalize mean only
  823. optional bool normalize_variance = 1 [default = true];
  824. // This parameter can be set to true to perform DNN-like MVN
  825. optional bool across_channels = 2 [default = false];
  826. // Epsilon for not dividing by zero while normalizing variance
  827. optional float eps = 3 [default = 1e-9];
  828. }
  829. message ParameterParameter {
  830. optional BlobShape shape = 1;
  831. }
  832. message PoolingParameter {
  833. enum PoolMethod {
  834. MAX = 0;
  835. AVE = 1;
  836. STOCHASTIC = 2;
  837. }
  838. optional PoolMethod pool = 1 [default = MAX]; // The pooling method
  839. // Pad, kernel size, and stride are all given as a single value for equal
  840. // dimensions in height and width or as Y, X pairs.
  841. optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
  842. optional uint32 pad_h = 9 [default = 0]; // The padding height
  843. optional uint32 pad_w = 10 [default = 0]; // The padding width
  844. optional uint32 kernel_size = 2; // The kernel size (square)
  845. optional uint32 kernel_h = 5; // The kernel height
  846. optional uint32 kernel_w = 6; // The kernel width
  847. optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
  848. optional uint32 stride_h = 7; // The stride height
  849. optional uint32 stride_w = 8; // The stride width
  850. enum Engine {
  851. DEFAULT = 0;
  852. CAFFE = 1;
  853. CUDNN = 2;
  854. }
  855. optional Engine engine = 11 [default = DEFAULT];
  856. // If global_pooling then it will pool over the size of the bottom by doing
  857. // kernel_h = bottom->height and kernel_w = bottom->width
  858. optional bool global_pooling = 12 [default = false];
  859. optional bool ceil_mode = 13 [default = true];
  860. // How to calculate the output size - using ceil (default) or floor rounding.
  861. enum RoundMode {
  862. CEIL = 0;
  863. FLOOR = 1;
  864. }
  865. optional RoundMode round_mode = 14 [default = CEIL];
  866. }
  867. message PowerParameter {
  868. // PowerLayer computes outputs y = (shift + scale * x) ^ power.
  869. optional float power = 1 [default = 1.0];
  870. optional float scale = 2 [default = 1.0];
  871. optional float shift = 3 [default = 0.0];
  872. }
  873. message PythonParameter {
  874. optional string module = 1;
  875. optional string layer = 2;
  876. // This value is set to the attribute `param_str` of the `PythonLayer` object
  877. // in Python before calling the `setup()` method. This could be a number,
  878. // string, dictionary in Python dict format, JSON, etc. You may parse this
  879. // string in `setup` method and use it in `forward` and `backward`.
  880. optional string param_str = 3 [default = ''];
  881. // Whether this PythonLayer is shared among worker solvers during data parallelism.
  882. // If true, each worker solver sequentially run forward from this layer.
  883. // This value should be set true if you are using it as a data layer.
  884. optional bool share_in_parallel = 4 [default = false];
  885. }
  886. // Message that stores parameters used by RecurrentLayer
  887. message RecurrentParameter {
  888. // The dimension of the output (and usually hidden state) representation --
  889. // must be explicitly set to non-zero.
  890. optional uint32 num_output = 1 [default = 0];
  891. optional FillerParameter weight_filler = 2; // The filler for the weight
  892. optional FillerParameter bias_filler = 3; // The filler for the bias
  893. // Whether to enable displaying debug_info in the unrolled recurrent net.
  894. optional bool debug_info = 4 [default = false];
  895. // Whether to add as additional inputs (bottoms) the initial hidden state
  896. // blobs, and add as additional outputs (tops) the final timestep hidden state
  897. // blobs. The number of additional bottom/top blobs required depends on the
  898. // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
  899. optional bool expose_hidden = 5 [default = false];
  900. }
  901. // Message that stores parameters used by ReductionLayer
  902. message ReductionParameter {
  903. enum ReductionOp {
  904. SUM = 1;
  905. ASUM = 2;
  906. SUMSQ = 3;
  907. MEAN = 4;
  908. }
  909. optional ReductionOp operation = 1 [default = SUM]; // reduction operation
  910. // The first axis to reduce to a scalar -- may be negative to index from the
  911. // end (e.g., -1 for the last axis).
  912. // (Currently, only reduction along ALL "tail" axes is supported; reduction
  913. // of axis M through N, where N < num_axes - 1, is unsupported.)
  914. // Suppose we have an n-axis bottom Blob with shape:
  915. // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
  916. // If axis == m, the output Blob will have shape
  917. // (d0, d1, d2, ..., d(m-1)),
  918. // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
  919. // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
  920. // If axis == 0 (the default), the output Blob always has the empty shape
  921. // (count 1), performing reduction across the entire input --
  922. // often useful for creating new loss functions.
  923. optional int32 axis = 2 [default = 0];
  924. optional float coeff = 3 [default = 1.0]; // coefficient for output
  925. }
  926. // Message that stores parameters used by ReLULayer
  927. message ReLUParameter {
  928. // Allow non-zero slope for negative inputs to speed up optimization
  929. // Described in:
  930. // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
  931. // improve neural network acoustic models. In ICML Workshop on Deep Learning
  932. // for Audio, Speech, and Language Processing.
  933. optional float negative_slope = 1 [default = 0];
  934. enum Engine {
  935. DEFAULT = 0;
  936. CAFFE = 1;
  937. CUDNN = 2;
  938. }
  939. optional Engine engine = 2 [default = DEFAULT];
  940. }
  941. message ReshapeParameter {
  942. // Specify the output dimensions. If some of the dimensions are set to 0,
  943. // the corresponding dimension from the bottom layer is used (unchanged).
  944. // Exactly one dimension may be set to -1, in which case its value is
  945. // inferred from the count of the bottom blob and the remaining dimensions.
  946. // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
  947. //
  948. // layer {
  949. // type: "Reshape" bottom: "input" top: "output"
  950. // reshape_param { ... }
  951. // }
  952. //
  953. // If "input" is 2D with shape 2 x 8, then the following reshape_param
  954. // specifications are all equivalent, producing a 3D blob "output" with shape
  955. // 2 x 2 x 4:
  956. //
  957. // reshape_param { shape { dim: 2 dim: 2 dim: 4 } }
  958. // reshape_param { shape { dim: 0 dim: 2 dim: 4 } }
  959. // reshape_param { shape { dim: 0 dim: 2 dim: -1 } }
  960. // reshape_param { shape { dim: 0 dim:-1 dim: 4 } }
  961. //
  962. optional BlobShape shape = 1;
  963. // axis and num_axes control the portion of the bottom blob's shape that are
  964. // replaced by (included in) the reshape. By default (axis == 0 and
  965. // num_axes == -1), the entire bottom blob shape is included in the reshape,
  966. // and hence the shape field must specify the entire output shape.
  967. //
  968. // axis may be non-zero to retain some portion of the beginning of the input
  969. // shape (and may be negative to index from the end; e.g., -1 to begin the
  970. // reshape after the last axis, including nothing in the reshape,
  971. // -2 to include only the last axis, etc.).
  972. //
  973. // For example, suppose "input" is a 2D blob with shape 2 x 8.
  974. // Then the following ReshapeLayer specifications are all equivalent,
  975. // producing a blob "output" with shape 2 x 2 x 4:
  976. //
  977. // reshape_param { shape { dim: 2 dim: 2 dim: 4 } }
  978. // reshape_param { shape { dim: 2 dim: 4 } axis: 1 }
  979. // reshape_param { shape { dim: 2 dim: 4 } axis: -3 }
  980. //
  981. // num_axes specifies the extent of the reshape.
  982. // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
  983. // input axes in the range [axis, axis+num_axes].
  984. // num_axes may also be -1, the default, to include all remaining axes
  985. // (starting from axis).
  986. //
  987. // For example, suppose "input" is a 2D blob with shape 2 x 8.
  988. // Then the following ReshapeLayer specifications are equivalent,
  989. // producing a blob "output" with shape 1 x 2 x 8.
  990. //
  991. // reshape_param { shape { dim: 1 dim: 2 dim: 8 } }
  992. // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 }
  993. // reshape_param { shape { dim: 1 } num_axes: 0 }
  994. //
  995. // On the other hand, these would produce output blob shape 2 x 1 x 8:
  996. //
  997. // reshape_param { shape { dim: 2 dim: 1 dim: 8 } }
  998. // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 }
  999. //
  1000. optional int32 axis = 2 [default = 0];
  1001. optional int32 num_axes = 3 [default = -1];
  1002. }
  1003. message ScaleParameter {
  1004. // The first axis of bottom[0] (the first input Blob) along which to apply
  1005. // bottom[1] (the second input Blob). May be negative to index from the end
  1006. // (e.g., -1 for the last axis).
  1007. //
  1008. // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
  1009. // top[0] will have the same shape, and bottom[1] may have any of the
  1010. // following shapes (for the given value of axis):
  1011. // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
  1012. // (axis == 1 == -3) 3; 3x40; 3x40x60
  1013. // (axis == 2 == -2) 40; 40x60
  1014. // (axis == 3 == -1) 60
  1015. // Furthermore, bottom[1] may have the empty shape (regardless of the value of
  1016. // "axis") -- a scalar multiplier.
  1017. optional int32 axis = 1 [default = 1];
  1018. // (num_axes is ignored unless just one bottom is given and the scale is
  1019. // a learned parameter of the layer. Otherwise, num_axes is determined by the
  1020. // number of axes by the second bottom.)
  1021. // The number of axes of the input (bottom[0]) covered by the scale
  1022. // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
  1023. // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
  1024. optional int32 num_axes = 2 [default = 1];
  1025. // (filler is ignored unless just one bottom is given and the scale is
  1026. // a learned parameter of the layer.)
  1027. // The initialization for the learned scale parameter.
  1028. // Default is the unit (1) initialization, resulting in the ScaleLayer
  1029. // initially performing the identity operation.
  1030. optional FillerParameter filler = 3;
  1031. // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
  1032. // may be more efficient). Initialized with bias_filler (defaults to 0).
  1033. optional bool bias_term = 4 [default = false];
  1034. optional FillerParameter bias_filler = 5;
  1035. optional bool scale_from_blob = 6 [default = true];
  1036. }
  1037. message SigmoidParameter {
  1038. enum Engine {
  1039. DEFAULT = 0;
  1040. CAFFE = 1;
  1041. CUDNN = 2;
  1042. }
  1043. optional Engine engine = 1 [default = DEFAULT];
  1044. }
  1045. message SliceParameter {
  1046. // The axis along which to slice -- may be negative to index from the end
  1047. // (e.g., -1 for the last axis).
  1048. // By default, SliceLayer concatenates blobs along the "channels" axis (1).
  1049. optional int32 axis = 3 [default = 1];
  1050. repeated uint32 slice_point = 2;
  1051. // DEPRECATED: alias for "axis" -- does not support negative indexing.
  1052. optional uint32 slice_dim = 1 [default = 1];
  1053. }
  1054. message SmoothL1LossParameter {
  1055. // SmoothL1Loss(x) =
  1056. // 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma
  1057. // |x| - 0.5 / sigma / sigma -- otherwise
  1058. optional float sigma = 1 [default = 1];
  1059. }
  1060. // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
  1061. message SoftmaxParameter {
  1062. enum Engine {
  1063. DEFAULT = 0;
  1064. CAFFE = 1;
  1065. CUDNN = 2;
  1066. }
  1067. optional Engine engine = 1 [default = DEFAULT];
  1068. // The axis along which to perform the softmax -- may be negative to index
  1069. // from the end (e.g., -1 for the last axis).
  1070. // Any other axes will be evaluated as independent softmaxes.
  1071. optional int32 axis = 2 [default = 1];
  1072. }
  1073. message TanHParameter {
  1074. enum Engine {
  1075. DEFAULT = 0;
  1076. CAFFE = 1;
  1077. CUDNN = 2;
  1078. }
  1079. optional Engine engine = 1 [default = DEFAULT];
  1080. }
  1081. // Message that stores parameters used by TileLayer
  1082. message TileParameter {
  1083. // The index of the axis to tile.
  1084. optional int32 axis = 1 [default = 1];
  1085. // The number of copies (tiles) of the blob to output.
  1086. optional int32 tiles = 2;
  1087. }
  1088. // Message that stores parameters used by ThresholdLayer
  1089. message ThresholdParameter {
  1090. optional float threshold = 1 [default = 0]; // Strictly positive values
  1091. }
  1092. message WindowDataParameter {
  1093. // Specify the data source.
  1094. optional string source = 1;
  1095. // For data pre-processing, we can do simple scaling and subtracting the
  1096. // data mean, if provided. Note that the mean subtraction is always carried
  1097. // out before scaling.
  1098. optional float scale = 2 [default = 1];
  1099. optional string mean_file = 3;
  1100. // Specify the batch size.
  1101. optional uint32 batch_size = 4;
  1102. // Specify if we would like to randomly crop an image.
  1103. optional uint32 crop_size = 5 [default = 0];
  1104. // Specify if we want to randomly mirror data.
  1105. optional bool mirror = 6 [default = false];
  1106. // Foreground (object) overlap threshold
  1107. optional float fg_threshold = 7 [default = 0.5];
  1108. // Background (non-object) overlap threshold
  1109. optional float bg_threshold = 8 [default = 0.5];
  1110. // Fraction of batch that should be foreground objects
  1111. optional float fg_fraction = 9 [default = 0.25];
  1112. // Amount of contextual padding to add around a window
  1113. // (used only by the window_data_layer)
  1114. optional uint32 context_pad = 10 [default = 0];
  1115. // Mode for cropping out a detection window
  1116. // warp: cropped window is warped to a fixed size and aspect ratio
  1117. // square: the tightest square around the window is cropped
  1118. optional string crop_mode = 11 [default = "warp"];
  1119. // cache_images: will load all images in memory for faster access
  1120. optional bool cache_images = 12 [default = false];
  1121. // append root_folder to locate images
  1122. optional string root_folder = 13 [default = ""];
  1123. }
  1124. message SPPParameter {
  1125. enum PoolMethod {
  1126. MAX = 0;
  1127. AVE = 1;
  1128. STOCHASTIC = 2;
  1129. }
  1130. optional uint32 pyramid_height = 1;
  1131. optional PoolMethod pool = 2 [default = MAX]; // The pooling method
  1132. enum Engine {
  1133. DEFAULT = 0;
  1134. CAFFE = 1;
  1135. CUDNN = 2;
  1136. }
  1137. optional Engine engine = 6 [default = DEFAULT];
  1138. }
  1139. // DEPRECATED: use LayerParameter.
  1140. message V1LayerParameter {
  1141. repeated string bottom = 2;
  1142. repeated string top = 3;
  1143. optional string name = 4;
  1144. repeated NetStateRule include = 32;
  1145. repeated NetStateRule exclude = 33;
  1146. enum LayerType {
  1147. NONE = 0;
  1148. ABSVAL = 35;
  1149. ACCURACY = 1;
  1150. ARGMAX = 30;
  1151. BNLL = 2;
  1152. CONCAT = 3;
  1153. CONTRASTIVE_LOSS = 37;
  1154. CONVOLUTION = 4;
  1155. DATA = 5;
  1156. DECONVOLUTION = 39;
  1157. DROPOUT = 6;
  1158. DUMMY_DATA = 32;
  1159. EUCLIDEAN_LOSS = 7;
  1160. ELTWISE = 25;
  1161. EXP = 38;
  1162. FLATTEN = 8;
  1163. HDF5_DATA = 9;
  1164. HDF5_OUTPUT = 10;
  1165. HINGE_LOSS = 28;
  1166. IM2COL = 11;
  1167. IMAGE_DATA = 12;
  1168. INFOGAIN_LOSS = 13;
  1169. INNER_PRODUCT = 14;
  1170. LRN = 15;
  1171. MEMORY_DATA = 29;
  1172. MULTINOMIAL_LOGISTIC_LOSS = 16;
  1173. MVN = 34;
  1174. POOLING = 17;
  1175. POWER = 26;
  1176. RELU = 18;
  1177. SIGMOID = 19;
  1178. SIGMOID_CROSS_ENTROPY_LOSS = 27;
  1179. SILENCE = 36;
  1180. SOFTMAX = 20;
  1181. SOFTMAX_LOSS = 21;
  1182. SPLIT = 22;
  1183. SLICE = 33;
  1184. TANH = 23;
  1185. WINDOW_DATA = 24;
  1186. THRESHOLD = 31;
  1187. QUANT = 208;
  1188. DEQUANT = 209;
  1189. }
  1190. optional LayerType type = 5;
  1191. repeated BlobProto blobs = 6;
  1192. repeated string param = 1001;
  1193. repeated DimCheckMode blob_share_mode = 1002;
  1194. enum DimCheckMode {
  1195. STRICT = 0;
  1196. PERMISSIVE = 1;
  1197. }
  1198. repeated float blobs_lr = 7;
  1199. repeated float weight_decay = 8;
  1200. repeated float loss_weight = 35;
  1201. optional AccuracyParameter accuracy_param = 27;
  1202. optional ArgMaxParameter argmax_param = 23;
  1203. optional ConcatParameter concat_param = 9;
  1204. optional ContrastiveLossParameter contrastive_loss_param = 40;
  1205. optional ConvolutionParameter convolution_param = 10;
  1206. optional DataParameter data_param = 11;
  1207. optional DropoutParameter dropout_param = 12;
  1208. optional DummyDataParameter dummy_data_param = 26;
  1209. optional EltwiseParameter eltwise_param = 24;
  1210. optional ExpParameter exp_param = 41;
  1211. optional HDF5DataParameter hdf5_data_param = 13;
  1212. optional HDF5OutputParameter hdf5_output_param = 14;
  1213. optional HingeLossParameter hinge_loss_param = 29;
  1214. optional ImageDataParameter image_data_param = 15;
  1215. optional InfogainLossParameter infogain_loss_param = 16;
  1216. optional InnerProductParameter inner_product_param = 17;
  1217. optional LRNParameter lrn_param = 18;
  1218. optional MemoryDataParameter memory_data_param = 22;
  1219. optional MVNParameter mvn_param = 34;
  1220. optional PoolingParameter pooling_param = 19;
  1221. optional PowerParameter power_param = 21;
  1222. optional ReLUParameter relu_param = 30;
  1223. optional SigmoidParameter sigmoid_param = 38;
  1224. optional SoftmaxParameter softmax_param = 39;
  1225. optional SliceParameter slice_param = 31;
  1226. optional TanHParameter tanh_param = 37;
  1227. optional ThresholdParameter threshold_param = 25;
  1228. optional WindowDataParameter window_data_param = 20;
  1229. optional TransformationParameter transform_param = 36;
  1230. optional LossParameter loss_param = 42;
  1231. optional V0LayerParameter layer = 1;
  1232. }
  1233. // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
  1234. // in Caffe. We keep this message type around for legacy support.
  1235. message V0LayerParameter {
  1236. optional string name = 1; // the layer name
  1237. optional string type = 2; // the string to specify the layer type
  1238. // Parameters to specify layers with inner products.
  1239. optional uint32 num_output = 3; // The number of outputs for the layer
  1240. optional bool biasterm = 4 [default = true]; // whether to have bias terms
  1241. optional FillerParameter weight_filler = 5; // The filler for the weight
  1242. optional FillerParameter bias_filler = 6; // The filler for the bias
  1243. optional uint32 pad = 7 [default = 0]; // The padding size
  1244. optional uint32 kernelsize = 8; // The kernel size
  1245. optional uint32 group = 9 [default = 1]; // The group size for group conv
  1246. optional uint32 stride = 10 [default = 1]; // The stride
  1247. enum PoolMethod {
  1248. MAX = 0;
  1249. AVE = 1;
  1250. STOCHASTIC = 2;
  1251. }
  1252. optional PoolMethod pool = 11 [default = MAX]; // The pooling method
  1253. optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
  1254. optional uint32 local_size = 13 [default = 5]; // for local response norm
  1255. optional float alpha = 14 [default = 1.]; // for local response norm
  1256. optional float beta = 15 [default = 0.75]; // for local response norm
  1257. optional float k = 22 [default = 1.];
  1258. // For data layers, specify the data source
  1259. optional string source = 16;
  1260. // For data pre-processing, we can do simple scaling and subtracting the
  1261. // data mean, if provided. Note that the mean subtraction is always carried
  1262. // out before scaling.
  1263. optional float scale = 17 [default = 1];
  1264. optional string meanfile = 18;
  1265. // For data layers, specify the batch size.
  1266. optional uint32 batchsize = 19;
  1267. // For data layers, specify if we would like to randomly crop an image.
  1268. optional uint32 cropsize = 20 [default = 0];
  1269. // For data layers, specify if we want to randomly mirror data.
  1270. optional bool mirror = 21 [default = false];
  1271. // The blobs containing the numeric parameters of the layer
  1272. repeated BlobProto blobs = 50;
  1273. // The ratio that is multiplied on the global learning rate. If you want to
  1274. // set the learning ratio for one blob, you need to set it for all blobs.
  1275. repeated float blobs_lr = 51;
  1276. // The weight decay that is multiplied on the global weight decay.
  1277. repeated float weight_decay = 52;
  1278. // The rand_skip variable is for the data layer to skip a few data points
  1279. // to avoid all asynchronous sgd clients to start at the same point. The skip
  1280. // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
  1281. // be larger than the number of keys in the database.
  1282. optional uint32 rand_skip = 53 [default = 0];
  1283. // Fields related to detection (det_*)
  1284. // foreground (object) overlap threshold
  1285. optional float det_fg_threshold = 54 [default = 0.5];
  1286. // background (non-object) overlap threshold
  1287. optional float det_bg_threshold = 55 [default = 0.5];
  1288. // Fraction of batch that should be foreground objects
  1289. optional float det_fg_fraction = 56 [default = 0.25];
  1290. // optional bool OBSOLETE_can_clobber = 57 [default = true];
  1291. // Amount of contextual padding to add around a window
  1292. // (used only by the window_data_layer)
  1293. optional uint32 det_context_pad = 58 [default = 0];
  1294. // Mode for cropping out a detection window
  1295. // warp: cropped window is warped to a fixed size and aspect ratio
  1296. // square: the tightest square around the window is cropped
  1297. optional string det_crop_mode = 59 [default = "warp"];
  1298. // For ReshapeLayer, one needs to specify the new dimensions.
  1299. optional int32 new_num = 60 [default = 0];
  1300. optional int32 new_channels = 61 [default = 0];
  1301. optional int32 new_height = 62 [default = 0];
  1302. optional int32 new_width = 63 [default = 0];
  1303. // Whether or not ImageLayer should shuffle the list of files at every epoch.
  1304. // It will also resize images if new_height or new_width are not zero.
  1305. optional bool shuffle_images = 64 [default = false];
  1306. // For ConcatLayer, one needs to specify the dimension for concatenation, and
  1307. // the other dimensions must be the same for all the bottom blobs.
  1308. // By default it will concatenate blobs along the channels dimension.
  1309. optional uint32 concat_dim = 65 [default = 1];
  1310. optional HDF5OutputParameter hdf5_output_param = 1001;
  1311. }
  1312. message PReLUParameter {
  1313. // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
  1314. // Surpassing Human-Level Performance on ImageNet Classification, 2015.
  1315. // Initial value of a_i. Default is a_i=0.25 for all i.
  1316. optional FillerParameter filler = 1;
  1317. // Whether or not slope parameters are shared across channels.
  1318. optional bool channel_shared = 2 [default = false];
  1319. }
  1320. // Message that stores parameters used by DetectionOutputLayer
  1321. //message DetectionOutputParameter {
  1322. // optional int32 num_classes = 1 [default = 21];
  1323. // optional float nms_threshold = 2 [default = 0.3];
  1324. // optional int32 top_k = 3;
  1325. // optional float confidence_threshold = 4 [default = 0.8];
  1326. //}
  1327. // Message that store parameters used by PriorBoxLayer
  1328. message PriorBoxParameter {
  1329. // Encode/decode type.
  1330. enum CodeType {
  1331. CORNER = 1;
  1332. CENTER_SIZE = 2;
  1333. CORNER_SIZE = 3;
  1334. }
  1335. // Minimum box size (in pixels). Required!
  1336. repeated float min_size = 1;
  1337. // Maximum box size (in pixels). Required!
  1338. repeated float max_size = 2;
  1339. // Various of aspect ratios. Duplicate ratios will be ignored.
  1340. // If none is provided, we use default ratio 1.
  1341. repeated float aspect_ratio = 3;
  1342. // If true, will flip each aspect ratio.
  1343. // For example, if there is aspect ratio "r",
  1344. // we will generate aspect ratio "1.0/r" as well.
  1345. optional bool flip = 4 [default = true];
  1346. // If true, will clip the prior so that it is within [0, 1]
  1347. optional bool clip = 5 [default = false];
  1348. // Variance for adjusting the prior bboxes.
  1349. repeated float variance = 6;
  1350. // By default, we calculate img_height, img_width, step_x, step_y based on
  1351. // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely
  1352. // provided.
  1353. // Explicitly provide the img_size.
  1354. optional uint32 img_size = 7;
  1355. // Either img_size or img_h/img_w should be specified; not both.
  1356. optional uint32 img_h = 8;
  1357. optional uint32 img_w = 9;
  1358. // Explicitly provide the step size.
  1359. optional float step = 10;
  1360. // Either step or step_h/step_w should be specified; not both.
  1361. optional float step_h = 11;
  1362. optional float step_w = 12;
  1363. // Offset to the top left corner of each cell.
  1364. optional float offset = 13 [default = 0.5];
  1365. }
  1366. // Message that stores parameters used by PermutetLayer
  1367. message PermuteParameter {
  1368. // The new orders of the axes of data. Notice it should be with
  1369. // in the same range as the input data, and it starts from 0.
  1370. // Do not provide repeated order.
  1371. repeated uint32 order = 1;
  1372. }
  1373. message NormalizeParameter {
  1374. optional bool across_spatial = 1 [default = true];
  1375. // Initial value of scale. Default is 1.0 for all
  1376. optional FillerParameter scale_filler = 2;
  1377. // Whether or not scale parameters are shared across channels.
  1378. optional bool channel_shared = 3 [default = true];
  1379. // Epsilon for not dividing by zero while normalizing variance
  1380. optional float eps = 4 [default = 1e-10];
  1381. }
  1382. // needed by ssd
  1383. message SaveOutputParameter {
  1384. // Output directory. If not empty, we will save the results.
  1385. optional string output_directory = 1;
  1386. // Output name prefix.
  1387. optional string output_name_prefix = 2;
  1388. // Output format.
  1389. // VOC - PASCAL VOC output format.
  1390. // COCO - MS COCO output format.
  1391. optional string output_format = 3;
  1392. // If you want to output results, must also provide the following two files.
  1393. // Otherwise, we will ignore saving results.
  1394. // label map file.
  1395. optional string label_map_file = 4;
  1396. // A file which contains a list of names and sizes with same order
  1397. // of the input DB. The file is in the following format:
  1398. // name height width
  1399. // ...
  1400. optional string name_size_file = 5;
  1401. // Number of test images. It can be less than the lines specified in
  1402. // name_size_file. For example, when we only want to evaluate on part
  1403. // of the test images.
  1404. optional uint32 num_test_image = 6;
  1405. // The resize parameter used in saving the data.
  1406. // optional ResizeParameter resize_param = 7;
  1407. }
  1408. message NonMaximumSuppressionParameter {
  1409. // Threshold to be used in nms.
  1410. optional float nms_threshold = 1 [default = 0.3];
  1411. // Maximum number of results to be kept.
  1412. optional int32 top_k = 2;
  1413. // Parameter for adaptive nms.
  1414. optional float eta = 3 [default = 1.0];
  1415. }
  1416. message GeneralNmsParameter {
  1417. optional int32 post_top_k = 1 ;
  1418. optional float nms_threshold = 2 [default = 0];
  1419. optional float iou_threshold_decay = 3 [default = 1.0];
  1420. optional float coor_scale_factor = 4 [default = 1.0];
  1421. }
  1422. // Message that store parameters used by DetectionOutputLayer, ssd/fasterRcnn
  1423. message DetectionOutputParameter {
  1424. optional int32 num_classes = 1;
  1425. optional bool share_location = 2 [default = true];
  1426. optional int32 background_label_id = 3 [default = 0];
  1427. optional NonMaximumSuppressionParameter nms_param = 4;
  1428. optional SaveOutputParameter save_output_param = 5;
  1429. optional PriorBoxParameter.CodeType code_type = 6 [default = CENTER_SIZE];
  1430. optional bool variance_encoded_in_target = 8 [default = true];
  1431. optional int32 keep_top_k = 7;
  1432. optional float confidence_threshold = 9;
  1433. optional float nms_threshold = 13;
  1434. optional int32 top_k = 14;
  1435. optional int32 boxes = 15 [default = 1];
  1436. optional bool relative = 17 [default = true];
  1437. optional float objectness_threshold = 18 [default = 0.5];
  1438. optional float class_threshold = 19 [default = 0.5];
  1439. repeated float biases = 20;
  1440. optional GeneralNmsParameter general_nms_param = 21;
  1441. optional float objectness_score = 22;
  1442. }
  1443. message PSROIPoolingParameter {
  1444. required float spatial_scale = 1;
  1445. required int32 output_dim = 2; // output channel number
  1446. required int32 group_size = 3; // number of groups to encode position-sensitive score maps
  1447. }
  1448. // Message that stores parameters used by FreespaceExtractLayer
  1449. message FreespaceExtractParameter {
  1450. optional float org_height = 1;
  1451. }
  1452. // Message that stores parameters used by DetectpostprocessLayer
  1453. message PostprocessParameter {
  1454. optional float nms_thresh = 1 [default = 0.3];
  1455. optional float conf_thresh = 2 [default = 0.5];
  1456. optional uint32 post_nms_topn = 3 [default = 100];
  1457. optional uint32 cls_num = 4 [default = 12];
  1458. repeated float bbox_reg_weights = 5;
  1459. }
  1460. // Message that stores parameters used by SpatialTransformLayer
  1461. message SpatialTransformParameter {
  1462. optional uint32 output_h = 1 [default = 0];
  1463. optional uint32 output_w = 2 [default = 0];
  1464. optional float border_value = 3 [default = 0];
  1465. repeated float affine_transform = 4;
  1466. enum Engine {
  1467. DEFAULT = 0;
  1468. CAFFE = 1;
  1469. CUDNN = 2;
  1470. }
  1471. optional Engine engine = 15 [default = DEFAULT];
  1472. }
  1473. message ROIAlignParameter {
  1474. // Pad, kernel size, and stride are all given as a single value for equal
  1475. // dimensions in height and width or as Y, X pairs.
  1476. optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
  1477. optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
  1478. // Multiplicative spatial scale factor to translate ROI coords from their
  1479. // input scale to the scale used when pooling
  1480. optional float spatial_scale = 3 [default = 1];
  1481. optional int32 sampling_ratio = 4 [default = -1];
  1482. optional int32 roi_end_mode = 5 [default = 0];
  1483. }
  1484. message RegionParameter {
  1485. optional uint32 classes = 1 [default = 20]; // Category of classification
  1486. optional uint32 coords = 2 [default = 4]; // Coordinates of box
  1487. optional uint32 boxes = 3 [default = 1]; // Number of boxes predicted per grid
  1488. optional uint32 softmax = 4 [default = 0];
  1489. optional string softmax_tree = 5 [default = ""];
  1490. optional uint32 background = 6 [default = 0];
  1491. }
  1492. message ReorgParameter{
  1493. optional uint32 stride = 2 [default = 2];
  1494. optional bool reverse = 1 [default = false];
  1495. }
  1496. message ReverseParameter{
  1497. repeated int32 axis = 1;
  1498. }
  1499. message InterpParameter{
  1500. optional int32 height = 1 [default = 0];//Height of output
  1501. optional int32 width = 2 [default = 0];//Width of output
  1502. optional int32 zoom_factor = 3 [default = 1];//zoom factor
  1503. optional int32 shrink_factor = 4 [default = 1];//shrink factor
  1504. optional int32 pad_beg = 5 [default = 0];//padding at begin of input
  1505. optional int32 pad_end = 6 [default = 0];//padding at end of input
  1506. }
  1507. message ShuffleChannelParameter{
  1508. optional uint32 group = 1[default = 1]; // The number of group
  1509. }
  1510. message UpsampleParameter{
  1511. optional float scale = 1[default = 1];
  1512. optional int32 stride = 2[default = 2];
  1513. optional int32 stride_h = 3[default = 2];
  1514. optional int32 stride_w = 4[default=2];
  1515. }
  1516. message ROIPoolingParameter {
  1517. required int32 pooled_h = 1;
  1518. required int32 pooled_w = 2;
  1519. optional float spatial_scale = 3 [default=0.0625];
  1520. optional float spatial_scale_h = 4;
  1521. optional float spatial_scale_w = 5;
  1522. }
  1523. message YoloParameter {
  1524. optional int32 boxes = 1 [default = 3];
  1525. optional int32 coords = 2 [default = 4];
  1526. optional int32 classes = 3 [default = 80];
  1527. optional string yolo_version = 4 [default = "V3"];
  1528. optional bool softmax = 5 [default = false];
  1529. optional bool background = 6 [default = false];
  1530. optional bool softmaxtree = 7 [default = false];
  1531. }
  1532. message YoloV3DetectionOutputParameter {
  1533. optional int32 boxes = 1 [default = 3];
  1534. optional int32 classes = 2 [default = 80];
  1535. optional bool relative = 3 [default = true];
  1536. optional float obj_threshold = 4 [default = 0.5];
  1537. optional float score_threshold = 5 [default = 0.5];
  1538. optional float iou_threshold = 6 [default = 0.45];
  1539. optional int32 pre_nms_topn = 7 [default = 512];
  1540. optional int32 post_nms_topn = 8 [default = 1024];
  1541. repeated float biases_high = 9;
  1542. repeated float biases_mid = 10;
  1543. repeated float biases_low = 11;
  1544. optional int32 coords = 12 [default = 4];
  1545. repeated float biases = 13;
  1546. optional bool resize_origin_img_to_net = 14 [default = false];
  1547. }
  1548. message YoloV3DetectionOutputV2Parameter {
  1549. optional int32 boxes = 1 [default = 3];
  1550. optional int32 classes = 2 [default = 80];
  1551. optional bool relative = 3 [default = true];
  1552. optional float obj_threshold = 4 [default = 0.5];
  1553. optional float score_threshold = 5 [default = 0.5];
  1554. optional float iou_threshold = 6 [default = 0.45];
  1555. optional int32 pre_nms_topn = 7 [default = 512];
  1556. optional int32 post_nms_topn = 8 [default = 1024];
  1557. repeated float biases_high = 9;
  1558. repeated float biases_mid = 10;
  1559. repeated float biases_low = 11;
  1560. optional int32 coords = 12 [default = 4];
  1561. repeated float biases = 13;
  1562. optional bool resize_origin_img_to_net = 14 [default = false];
  1563. optional int32 out_box_dim = 15 [default = 3];
  1564. }
  1565. message ProposalParameter {
  1566. optional float feat_stride = 1 [default = 16];
  1567. optional float base_size = 2 [default = 16];
  1568. optional float min_size = 3 [default = 16];
  1569. repeated float ratio = 4;
  1570. repeated float scale = 5;
  1571. optional int32 pre_nms_topn = 6 [default = 3000];
  1572. optional int32 post_nms_topn = 7 [default = 304];
  1573. optional float iou_threshold = 8 [default = 0.7];
  1574. optional bool output_actual_rois_num = 9 [default = false];
  1575. }
  1576. message FSRDetectionOutputParameter {
  1577. required int32 num_classes = 1;
  1578. required float score_threshold = 2;
  1579. required float iou_threshold = 3;
  1580. optional int32 batch_rois = 4 [default = 1];
  1581. }
  1582. message SSDDetectionOutputParameter {
  1583. required int32 num_classes= 1 [default = 2];
  1584. optional bool share_location = 2 [default = true];
  1585. optional int32 background_label_id = 3 [default = 0];
  1586. optional float iou_threshold = 4 [default = 0.3];
  1587. optional int32 top_k = 5 [default = 200];
  1588. optional float eta = 6 [default = 1.0];
  1589. optional bool variance_encoded_in_target = 7 [default = false];
  1590. optional int32 code_type = 8 [default = 1];
  1591. optional int32 keep_top_k = 9 [default = -1];
  1592. optional float confidence_threshold = 10 [default = 0.0];
  1593. }
  1594. message YoloV2DetectionOutputParameter {
  1595. optional int32 boxes = 1 [default = 5];
  1596. optional int32 classes = 2 [default = 80];
  1597. optional bool relative = 3 [default = true];
  1598. optional float obj_threshold = 4 [default = 0.5];
  1599. optional float score_threshold = 5 [default = 0.5];
  1600. optional float iou_threshold = 6 [default = 0.45];
  1601. optional int32 pre_nms_topn = 7 [default = 512];
  1602. optional int32 post_nms_topn = 8 [default = 1024];
  1603. repeated float biases = 9;
  1604. optional int32 coords = 10 [default = 4];
  1605. optional bool resize_origin_img_to_net = 11 [default = false];
  1606. }
  1607. message QuantParameter {
  1608. optional float scale = 2;
  1609. optional bytes offset = 3;
  1610. }
  1611. message BatchMatMulParameter{
  1612. optional bool adj_x1 = 1 [default = false];
  1613. optional bool adj_x2 = 2 [default = false];
  1614. }
  1615. message CondTakeParameter {
  1616. required string mode = 1;
  1617. required float val = 2;
  1618. optional float eps = 3 [default = 1e-06];
  1619. }
  1620. message MatrixInverseParameter {
  1621. optional bool adjoint = 1 [default = false];
  1622. }
  1623. message WarpPerspectiveParameter {
  1624. required int32 out_height = 1;
  1625. required int32 out_width = 2;
  1626. optional float constant = 3;
  1627. optional string border_type = 4 [default = 'BORDER_CONSTANT'];
  1628. }
  1629. message SpatialTransformerParameter {
  1630. // How to use the parameter passed by localisation network
  1631. optional string transform_type = 1 [default = "affine"];
  1632. // What is the sampling technique
  1633. optional string sampler_type = 2 [default = "bilinear"];
  1634. // If not set,stay same with the input dimension H and W
  1635. optional int32 output_H = 3;
  1636. optional int32 output_W = 4;
  1637. // If false, only compute dTheta, DO NOT compute dU
  1638. optional bool to_compute_dU = 5 [default = true];
  1639. // The default value for some parameters
  1640. optional double theta_1_1 = 6;
  1641. optional double theta_1_2 = 7;
  1642. optional double theta_1_3 = 8;
  1643. optional double theta_2_1 = 9;
  1644. optional double theta_2_2 = 10;
  1645. optional double theta_2_3 = 11;
  1646. }

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示