hummingbird
/
Dubhe

 
			
							/* Copyright 2021 Tianshu AI Platform. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ============================================================= */

syntax = "proto3";

package oneflow;

// An allocation/de-allocation operation performed by the allocator.
message AllocationRecord {
  // The timestamp of the operation.
  int64 alloc_micros = 1;
  // Number of bytes allocated, or de-allocated if negative.
  int64 alloc_bytes = 2;
}

message AllocatorMemoryUsed {
  string allocator_name = 1;
  // These are per-node allocator memory stats.
  int64 total_bytes = 2;
  int64 peak_bytes = 3;
  // The bytes that are not deallocated.
  int64 live_bytes = 4;
  // The allocation and deallocation timeline.
  repeated AllocationRecord allocation_records = 6;

  // These are snapshots of the overall allocator memory stats.
  // The number of live bytes currently allocated by the allocator.
  int64 allocator_bytes_in_use = 5;
}

// Output sizes recorded for a single execution of a graph node.
message NodeOutput {
  int32 slot = 1;
  // TensorDescription tensor_description = 3;
};

// For memory tracking.
message MemoryStats {
  int64 temp_memory_size = 1;
  int64 persistent_memory_size = 3;
  repeated int64 persistent_tensor_alloc_ids = 5;

  int64 device_temp_memory_size = 2 [deprecated = true];
  int64 device_persistent_memory_size = 4 [deprecated = true];
  repeated int64 device_persistent_tensor_alloc_ids = 6 [deprecated = true];
}

// Time/size stats recorded for a single execution of a graph node.
message NodeExecStats {
  // TODO(tucker): Use some more compact form of node identity than
  // the full string name.  Either all processes should agree on a
  // global id (cost_id?) for each node, or we should use a hash of
  // the name.
  string node_name = 1;
  int64 all_start_micros = 2;
  int64 op_start_rel_micros = 3;
  int64 op_end_rel_micros = 4;
  int64 all_end_rel_micros = 5;
  repeated AllocatorMemoryUsed memory = 6;
  repeated NodeOutput output = 7;
  string timeline_label = 8;
  int64 scheduled_micros = 9;
  uint32 thread_id = 10;
  // repeated AllocationDescription referenced_tensor = 11;
  MemoryStats memory_stats = 12;
};

message DeviceStepStats {
  string device = 1;
  repeated NodeExecStats node_stats = 2;
}

message StepStats {
  repeated DeviceStepStats dev_stats = 1;
};


// lanpa, copied from config.proto
// Metadata output (i.e., non-Tensor) for a single Run() call.
message RunMetadata {
  // Statistics traced for this step. Populated if tracing is turned on via the
  // "RunOptions" proto.
  // EXPERIMENTAL: The format and set of events may change in future versions.
  StepStats step_stats = 1;

  // The cost graph for the computation defined by the run call.
  // CostGraphDef cost_graph = 2;

  // Graphs of the partitions executed by executors.
  // repeated GraphDef partition_graphs = 3;
}