Skip to content

Commit 542d998

Browse files
Code review changes.
1 parent 4ba3716 commit 542d998

14 files changed

+41
-37
lines changed

benchmark/generate_chunk_auto_batchsize_benchmarks.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ echo "#include \"${gpu_name}.h\"
6363
6464
namespace dorado::basecall {
6565
66-
void Add${gpu_name_no_dashes}Benchmarks(std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks) {" >> ${gpu_name}.cpp
66+
void Add${gpu_name_no_dashes}Benchmarks(std::map<std::pair<std::string, std::string>, std::unordered_map<int, float>>& chunk_benchmarks) {" >> ${gpu_name}.cpp
6767

6868
# Add the chunk benchmarks for every model
6969
cat chunk_benchmarks__*.txt >> ${gpu_name}.cpp
@@ -77,9 +77,9 @@ echo "#pragma once
7777
7878
#include <map>
7979
#include <string>
80-
#include <tuple>
80+
#include <unordered_map>
8181
8282
namespace dorado::basecall {
83-
void Add${gpu_name_no_dashes}Benchmarks(std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks);
83+
void Add${gpu_name_no_dashes}Benchmarks(std::map<std::pair<std::string, std::string>, std::unordered_map<int, float>>& chunk_benchmarks);
8484
} // namespace dorado::basecall
8585
" >> ${gpu_name}.h

dorado/basecall/CudaCaller.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ void CudaCaller::determine_batch_dims(const BasecallerCreationParams &params) {
309309

310310
float best_time = std::numeric_limits<float>::max();
311311

312+
assert(m_batch_dims.size() > 0);
312313
int chunk_size = m_batch_dims.back().T_in;
313314
// We limit the maximum when doing benchmarking to avoid excessive startup time.
314315
// The limit for transformer models should be increased at a later time.
@@ -319,8 +320,8 @@ void CudaCaller::determine_batch_dims(const BasecallerCreationParams &params) {
319320
if (params.emit_batchsize_benchmarks) {
320321
// When we are emitting benchmarks, prefer accuracy over speed of benchmark generation, so run the benchmarks
321322
// at full chunk size. We must round down the requested chunk size to a multiple of the minimum granularity.
322-
size_t chunk_granularity = params.model_config.chunk_size_granularity();
323-
chunk_size = int((chunk_size / chunk_granularity) * chunk_granularity);
323+
const size_t chunk_granularity = params.model_config.chunk_size_granularity();
324+
chunk_size = static_cast<int>((chunk_size / chunk_granularity) * chunk_granularity);
324325
} else {
325326
// 288 * stride (much shorter than the default chunk size of 10k) is a somewhat arbitrary
326327
// trade-off between getting more accurate measurements and avoiding excessive startup time,

dorado/basecall/CudaChunkBenchmarks.cpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ std::optional<const CudaChunkBenchmarks::ChunkTimings> CudaChunkBenchmarks::get_
2525
ModelName model_name = std::filesystem::path(model_path).filename().string();
2626

2727
// Try looking up the specified gpu name directly
28-
if (m_chunk_benchmarks.find({gpu_name, model_name}) != m_chunk_benchmarks.end()) {
29-
return m_chunk_benchmarks.at({gpu_name, model_name});
28+
auto iter = m_chunk_benchmarks.find({gpu_name, model_name});
29+
if (iter != m_chunk_benchmarks.cend()) {
30+
return iter->second;
3031
}
3132

3233
// If the direct lookup fails, try looking up via an alias
@@ -35,10 +36,11 @@ std::optional<const CudaChunkBenchmarks::ChunkTimings> CudaChunkBenchmarks::get_
3536
{"NVIDIA A800 80GB PCIe", "NVIDIA A100 80GB PCIe"},
3637
};
3738

38-
if (gpu_name_alias.find(gpu_name) != gpu_name_alias.end()) {
39+
if (gpu_name_alias.find(gpu_name) != gpu_name_alias.cend()) {
3940
gpu_name = gpu_name_alias[gpu_name];
40-
if (m_chunk_benchmarks.find({gpu_name, model_name}) != m_chunk_benchmarks.end()) {
41-
return m_chunk_benchmarks.at({gpu_name, model_name});
41+
iter = m_chunk_benchmarks.find({gpu_name, model_name});
42+
if (iter != m_chunk_benchmarks.cend()) {
43+
return iter->second;
4244
}
4345
}
4446

dorado/basecall/CudaChunkBenchmarks.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include <map>
44
#include <optional>
55
#include <string>
6-
#include <tuple>
6+
#include <unordered_map>
77
#include <utility>
88
#include <vector>
99

@@ -12,10 +12,10 @@ namespace dorado::basecall {
1212
class CudaChunkBenchmarks final {
1313
private:
1414
CudaChunkBenchmarks();
15-
using ChunkTimings = std::map<int, float>;
15+
using ChunkTimings = std::unordered_map<int, float>;
1616
using ModelName = std::string;
1717
using GPUName = std::string;
18-
std::map<std::tuple<GPUName, ModelName>, ChunkTimings> m_chunk_benchmarks;
18+
std::map<std::pair<GPUName, ModelName>, ChunkTimings> m_chunk_benchmarks;
1919

2020
public:
2121
static CudaChunkBenchmarks& instance() {

dorado/basecall/benchmarks/NVIDIA_A100_80GB_PCIe.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
namespace dorado::basecall {
44

55
void AddNVIDIA_A100_80GB_PCIeBenchmarks(
6-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks) {
6+
std::map<std::pair<std::string, std::string>, std::unordered_map<int, float>>&
7+
chunk_benchmarks) {
78
chunk_benchmarks[{"NVIDIA A100 80GB PCIe", "[email protected]"}] = {
89
{64, 0.098176f}, {128, 0.054744f}, {192, 0.038688f}, {256, 0.032228f},
910
{320, 0.027174f}, {384, 0.024059f}, {640, 0.022741f}, {704, 0.021700f},

dorado/basecall/benchmarks/NVIDIA_A100_80GB_PCIe.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
#include <map>
44
#include <string>
5-
#include <tuple>
5+
#include <unordered_map>
66

77
namespace dorado::basecall {
8-
void AddNVIDIA_A100_80GB_PCIeBenchmarks(
9-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks);
8+
void AddNVIDIA_A100_80GB_PCIeBenchmarks(std::map<std::pair<std::string, std::string>,
9+
std::unordered_map<int, float>>& chunk_benchmarks);
1010
} // namespace dorado::basecall

dorado/basecall/benchmarks/NVIDIA_H100_PCIe.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
namespace dorado::basecall {
44

5-
void AddNVIDIA_H100_PCIeBenchmarks(
6-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks) {
5+
void AddNVIDIA_H100_PCIeBenchmarks(std::map<std::pair<std::string, std::string>,
6+
std::unordered_map<int, float>>& chunk_benchmarks) {
77
chunk_benchmarks[{"NVIDIA H100 PCIe", "[email protected]"}] = {
88
{64, 0.078797f}, {128, 0.043509f}, {192, 0.030815f}, {256, 0.025591f},
99
{320, 0.021523f}, {384, 0.019044f}, {448, 0.017026f}, {640, 0.016945f},

dorado/basecall/benchmarks/NVIDIA_H100_PCIe.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
#include <map>
44
#include <string>
5-
#include <tuple>
5+
#include <unordered_map>
66

77
namespace dorado::basecall {
8-
void AddNVIDIA_H100_PCIeBenchmarks(
9-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks);
8+
void AddNVIDIA_H100_PCIeBenchmarks(std::map<std::pair<std::string, std::string>,
9+
std::unordered_map<int, float>>& chunk_benchmarks);
1010
} // namespace dorado::basecall

dorado/basecall/benchmarks/NVIDIA_RTX_A6000.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
namespace dorado::basecall {
44

5-
void AddNVIDIA_RTX_A6000Benchmarks(
6-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks) {
5+
void AddNVIDIA_RTX_A6000Benchmarks(std::map<std::pair<std::string, std::string>,
6+
std::unordered_map<int, float>>& chunk_benchmarks) {
77
chunk_benchmarks[{"NVIDIA RTX A6000", "[email protected]"}] = {
88
{64, 0.081234f}, {128, 0.047160f}, {192, 0.036165f}, {256, 0.031301f},
99
{320, 0.027645f}, {576, 0.026394f}, {640, 0.025800f}, {1344, 0.025478f},

dorado/basecall/benchmarks/NVIDIA_RTX_A6000.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
#include <map>
44
#include <string>
5-
#include <tuple>
5+
#include <unordered_map>
66

77
namespace dorado::basecall {
8-
void AddNVIDIA_RTX_A6000Benchmarks(
9-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks);
8+
void AddNVIDIA_RTX_A6000Benchmarks(std::map<std::pair<std::string, std::string>,
9+
std::unordered_map<int, float>>& chunk_benchmarks);
1010
} // namespace dorado::basecall

dorado/basecall/benchmarks/Quadro_GV100.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
namespace dorado::basecall {
44

5-
void AddQuadro_GV100Benchmarks(
6-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks) {
5+
void AddQuadro_GV100Benchmarks(std::map<std::pair<std::string, std::string>,
6+
std::unordered_map<int, float>>& chunk_benchmarks) {
77
chunk_benchmarks[{"Quadro GV100", "[email protected]"}] = {
88
{64, 0.133248f}, {128, 0.075928f}, {192, 0.059051f}, {256, 0.048244f},
99
{320, 0.041699f}, {448, 0.040158f}, {512, 0.038198f}, {576, 0.033886f},

dorado/basecall/benchmarks/Quadro_GV100.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
#include <map>
44
#include <string>
5-
#include <tuple>
5+
#include <unordered_map>
66

77
namespace dorado::basecall {
8-
void AddQuadro_GV100Benchmarks(
9-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks);
8+
void AddQuadro_GV100Benchmarks(std::map<std::pair<std::string, std::string>,
9+
std::unordered_map<int, float>>& chunk_benchmarks);
1010
} // namespace dorado::basecall

dorado/basecall/benchmarks/Tesla_V100-PCIE-16GB.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
namespace dorado::basecall {
44

5-
void AddTesla_V100_PCIE_16GBBenchmarks(
6-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks) {
5+
void AddTesla_V100_PCIE_16GBBenchmarks(std::map<std::pair<std::string, std::string>,
6+
std::unordered_map<int, float>>& chunk_benchmarks) {
77
chunk_benchmarks[{"Tesla V100-PCIE-16GB", "[email protected]"}] = {
88
{64, 0.107872f}, {128, 0.060736f}, {192, 0.046971f}, {256, 0.038268f},
99
{320, 0.032979f}, {512, 0.032126f}, {576, 0.029948f}, {640, 0.028114f},

dorado/basecall/benchmarks/Tesla_V100-PCIE-16GB.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
#include <map>
44
#include <string>
5-
#include <tuple>
5+
#include <unordered_map>
66

77
namespace dorado::basecall {
8-
void AddTesla_V100_PCIE_16GBBenchmarks(
9-
std::map<std::tuple<std::string, std::string>, std::map<int, float>>& chunk_benchmarks);
8+
void AddTesla_V100_PCIE_16GBBenchmarks(std::map<std::pair<std::string, std::string>,
9+
std::unordered_map<int, float>>& chunk_benchmarks);
1010
} // namespace dorado::basecall

0 commit comments

Comments
 (0)