Browse Source

updata example

pull/1/head
fanchao yue (i25168) 2 months ago
parent
commit
0c5e03345a
16 changed files with 2 additions and 284 deletions
  1. +0
    -26
      example/001-example/reduce_sum_performance.yaml
  2. +0
    -46
      example/001-example/sort_pair_performance.yaml
  3. +0
    -210
      example/001-example/topk_pair_performance.yaml
  4. +0
    -0
      example/README.md
  5. +0
    -0
      example/build_and_run.sh
  6. +0
    -0
      example/build_and_run_reduce_sum.sh
  7. +0
    -0
      example/build_and_run_sort_pair.sh
  8. +0
    -0
      example/build_and_run_topk_pair.sh
  9. +2
    -2
      example/build_common.sh
  10. +0
    -0
      example/competition_parallel_algorithms.md
  11. +0
    -0
      example/performance_utils.h
  12. +0
    -0
      example/reduce_sum_algorithm.maca
  13. +0
    -0
      example/sort_pair_algorithm.maca
  14. +0
    -0
      example/test_utils.h
  15. +0
    -0
      example/topk_pair_algorithm.maca
  16. +0
    -0
      example/yaml_reporter.h

+ 0
- 26
example/001-example/reduce_sum_performance.yaml View File

@@ -1,26 +0,0 @@
# ReduceSum算法性能测试结果
# 生成时间: 2025-08-19 15:42:19

algorithm: "ReduceSum"
data_types:
input: "float"
output: "float"
formulas:
throughput: "elements / time(s) / 1e9 (G/s)"
performance_data:
- data_size: 1000000
time_ms: 0.059418
throughput_gps: 16.830029
data_type: "float"
- data_size: 134217728
time_ms: 0.884915
throughput_gps: 151.672977
data_type: "float"
- data_size: 536870912
time_ms: 2.811878
throughput_gps: 190.929630
data_type: "float"
- data_size: 1073741824
time_ms: 5.376102
throughput_gps: 199.724956
data_type: "float"

+ 0
- 46
example/001-example/sort_pair_performance.yaml View File

@@ -1,46 +0,0 @@
# SortPair算法性能测试结果
# 生成时间: 2025-08-19 15:44:51

algorithm: "SortPair"
data_types:
key_type: "float"
value_type: "uint32_t"
formulas:
throughput: "elements / time(s) / 1e9 (G/s)"
performance_data:
- data_size: 1000000
ascending:
time_ms: 0.645709
throughput_gps: 1.548686
descending:
time_ms: 0.643405
throughput_gps: 1.554231
key_type: "float"
value_type: "uint32_t"
- data_size: 134217728
ascending:
time_ms: 48.160717
throughput_gps: 2.786871
descending:
time_ms: 48.198193
throughput_gps: 2.784705
key_type: "float"
value_type: "uint32_t"
- data_size: 536870912
ascending:
time_ms: 192.608109
throughput_gps: 2.787374
descending:
time_ms: 192.794830
throughput_gps: 2.784675
key_type: "float"
value_type: "uint32_t"
- data_size: 1073741824
ascending:
time_ms: 391.377167
throughput_gps: 2.743496
descending:
time_ms: 391.900879
throughput_gps: 2.739830
key_type: "float"
value_type: "uint32_t"

+ 0
- 210
example/001-example/topk_pair_performance.yaml View File

@@ -1,210 +0,0 @@
# TopkPair算法性能测试结果
# 生成时间: 2025-08-19 15:48:37

algorithm: "TopkPair"
data_types:
key_type: "float"
value_type: "uint32_t"
formulas:
throughput: "elements / time(s) / 1e9 (G/s)"
performance_data:
- data_size: 1000000
k_value: 32
ascending:
time_ms: 0.678861
throughput_gps: 1.473056
descending:
time_ms: 0.686054
throughput_gps: 1.457610
key_type: "float"
value_type: "uint32_t"
- data_size: 1000000
k_value: 50
ascending:
time_ms: 0.707123
throughput_gps: 1.414181
descending:
time_ms: 0.679578
throughput_gps: 1.471502
key_type: "float"
value_type: "uint32_t"
- data_size: 1000000
k_value: 100
ascending:
time_ms: 0.700877
throughput_gps: 1.426784
descending:
time_ms: 0.671693
throughput_gps: 1.488776
key_type: "float"
value_type: "uint32_t"
- data_size: 1000000
k_value: 256
ascending:
time_ms: 0.706432
throughput_gps: 1.415564
descending:
time_ms: 0.689306
throughput_gps: 1.450735
key_type: "float"
value_type: "uint32_t"
- data_size: 1000000
k_value: 1024
ascending:
time_ms: 0.663194
throughput_gps: 1.507855
descending:
time_ms: 0.660531
throughput_gps: 1.513933
key_type: "float"
value_type: "uint32_t"
- data_size: 134217728
k_value: 32
ascending:
time_ms: 48.147915
throughput_gps: 2.787612
descending:
time_ms: 48.289310
throughput_gps: 2.779450
key_type: "float"
value_type: "uint32_t"
- data_size: 134217728
k_value: 50
ascending:
time_ms: 48.196999
throughput_gps: 2.784774
descending:
time_ms: 48.226074
throughput_gps: 2.783095
key_type: "float"
value_type: "uint32_t"
- data_size: 134217728
k_value: 100
ascending:
time_ms: 48.210712
throughput_gps: 2.783981
descending:
time_ms: 48.247169
throughput_gps: 2.781878
key_type: "float"
value_type: "uint32_t"
- data_size: 134217728
k_value: 256
ascending:
time_ms: 48.231018
throughput_gps: 2.782809
descending:
time_ms: 48.235191
throughput_gps: 2.782569
key_type: "float"
value_type: "uint32_t"
- data_size: 134217728
k_value: 1024
ascending:
time_ms: 48.243660
throughput_gps: 2.782080
descending:
time_ms: 48.287209
throughput_gps: 2.779571
key_type: "float"
value_type: "uint32_t"
- data_size: 536870912
k_value: 32
ascending:
time_ms: 192.694534
throughput_gps: 2.786124
descending:
time_ms: 192.709503
throughput_gps: 2.785908
key_type: "float"
value_type: "uint32_t"
- data_size: 536870912
k_value: 50
ascending:
time_ms: 192.567810
throughput_gps: 2.787958
descending:
time_ms: 192.672104
throughput_gps: 2.786449
key_type: "float"
value_type: "uint32_t"
- data_size: 536870912
k_value: 100
ascending:
time_ms: 192.633591
throughput_gps: 2.787006
descending:
time_ms: 192.675278
throughput_gps: 2.786403
key_type: "float"
value_type: "uint32_t"
- data_size: 536870912
k_value: 256
ascending:
time_ms: 192.612732
throughput_gps: 2.787307
descending:
time_ms: 192.753647
throughput_gps: 2.785270
key_type: "float"
value_type: "uint32_t"
- data_size: 536870912
k_value: 1024
ascending:
time_ms: 192.627899
throughput_gps: 2.787088
descending:
time_ms: 192.446716
throughput_gps: 2.789712
key_type: "float"
value_type: "uint32_t"
- data_size: 1073741824
k_value: 32
ascending:
time_ms: 392.105988
throughput_gps: 2.738397
descending:
time_ms: 392.092041
throughput_gps: 2.738494
key_type: "float"
value_type: "uint32_t"
- data_size: 1073741824
k_value: 50
ascending:
time_ms: 391.682678
throughput_gps: 2.741356
descending:
time_ms: 392.191681
throughput_gps: 2.737799
key_type: "float"
value_type: "uint32_t"
- data_size: 1073741824
k_value: 100
ascending:
time_ms: 391.761108
throughput_gps: 2.740808
descending:
time_ms: 392.304626
throughput_gps: 2.737010
key_type: "float"
value_type: "uint32_t"
- data_size: 1073741824
k_value: 256
ascending:
time_ms: 391.693726
throughput_gps: 2.741279
descending:
time_ms: 392.074707
throughput_gps: 2.738615
key_type: "float"
value_type: "uint32_t"
- data_size: 1073741824
k_value: 1024
ascending:
time_ms: 391.690765
throughput_gps: 2.741300
descending:
time_ms: 392.122711
throughput_gps: 2.738280
key_type: "float"
value_type: "uint32_t"

example/a guide to GPUKernelContest/README.md → example/README.md View File


example/a guide to GPUKernelContest/build_and_run.sh → example/build_and_run.sh View File


example/a guide to GPUKernelContest/build_and_run_reduce_sum.sh → example/build_and_run_reduce_sum.sh View File


example/a guide to GPUKernelContest/build_and_run_sort_pair.sh → example/build_and_run_sort_pair.sh View File


example/a guide to GPUKernelContest/build_and_run_topk_pair.sh → example/build_and_run_topk_pair.sh View File


example/a guide to GPUKernelContest/build_common.sh → example/build_common.sh View File

@@ -33,7 +33,7 @@ COMPILER=${COMPILER:-mxcc}
#COMPILER_FLAGS=${COMPILER_FLAGS:--O3 -std=c++17 --extended-lambda} # not run all test for easy debug #COMPILER_FLAGS=${COMPILER_FLAGS:--O3 -std=c++17 --extended-lambda} # not run all test for easy debug
COMPILER_FLAGS=${COMPILER_FLAGS:--O3 -std=c++17 --extended-lambda -DRUN_FULL_TEST} COMPILER_FLAGS=${COMPILER_FLAGS:--O3 -std=c++17 --extended-lambda -DRUN_FULL_TEST}


INCLUDE_DIR=${INCLUDE_DIR:-src}
INCLUDE_DIR=${INCLUDE_DIR:-}
BUILD_DIR=${BUILD_DIR:-build} BUILD_DIR=${BUILD_DIR:-build}


# 编译单个算法的通用函数 # 编译单个算法的通用函数
@@ -49,7 +49,7 @@ compile_algorithm() {
mkdir -p "$BUILD_DIR" mkdir -p "$BUILD_DIR"
# 编译命令 # 编译命令
local compile_cmd="$COMPILER $COMPILER_FLAGS -I$INCLUDE_DIR src/$source_file -o $target_file"
local compile_cmd="$COMPILER $COMPILER_FLAGS -I$INCLUDE_DIR $source_file -o $target_file"
print_info "执行: $compile_cmd" print_info "执行: $compile_cmd"

example/a guide to GPUKernelContest/competition_parallel_algorithms.md → example/competition_parallel_algorithms.md View File


example/a guide to GPUKernelContest/src/performance_utils.h → example/performance_utils.h View File


example/a guide to GPUKernelContest/src/reduce_sum_algorithm.maca → example/reduce_sum_algorithm.maca View File


example/a guide to GPUKernelContest/src/sort_pair_algorithm.maca → example/sort_pair_algorithm.maca View File


example/a guide to GPUKernelContest/src/test_utils.h → example/test_utils.h View File


example/a guide to GPUKernelContest/src/topk_pair_algorithm.maca → example/topk_pair_algorithm.maca View File


example/a guide to GPUKernelContest/src/yaml_reporter.h → example/yaml_reporter.h View File


Loading…
Cancel
Save