Skip to content

Commit

Permalink
Revert "ShardSpec: Remove designated initializers"
Browse files Browse the repository at this point in the history
This reverts commit bbb9b68.
  • Loading branch information
TT-billteng committed Feb 6, 2024
1 parent 2c7499c commit bd08a55
Show file tree
Hide file tree
Showing 9 changed files with 15 additions and 15 deletions.
6 changes: 3 additions & 3 deletions tt_eager/tt_dnn/op_library/bmm/bmm_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ std::vector<Tensor> Matmul::create_output_tensors(const std::vector<Tensor>& inp
uint32_t num_blocks_total = num_blocks_y * num_blocks_x;
uint32_t num_cores = num_blocks_x * num_blocks_y;
CoreRangeSet all_cores = num_cores_to_corerange_set(num_cores, program_config.compute_with_storage_grid_size, true);
ShardSpec shard_spec = ShardSpec{all_cores, {per_core_M * TILE_HEIGHT, per_core_N * TILE_WIDTH}, ShardOrientation::ROW_MAJOR};
ShardSpec shard_spec = ShardSpec{.grid=all_cores, .shape={per_core_M * TILE_HEIGHT, per_core_N * TILE_WIDTH}, .orientation=ShardOrientation::ROW_MAJOR};
auto mem_config = this->output_mem_config;
mem_config.shard_spec = shard_spec;
return {create_sharded_device_tensor(this->compute_output_shapes(input_tensors).at(0), this->output_dtype, Layout::TILE, input_tensor_a.device(), mem_config)};
Expand All @@ -815,7 +815,7 @@ std::vector<Tensor> Matmul::create_output_tensors(const std::vector<Tensor>& inp
all_cores = CoreRangeSet({CoreRange{{0, 0}, {num_blocks_x - 1, num_blocks_y - 1}}});
shard_orientation = ShardOrientation::ROW_MAJOR;
}
ShardSpec shard_spec = ShardSpec{all_cores, {per_core_M * TILE_HEIGHT, per_core_N * TILE_WIDTH}, shard_orientation};
ShardSpec shard_spec = ShardSpec{.grid=all_cores, .shape={per_core_M * TILE_HEIGHT, per_core_N * TILE_WIDTH}, .orientation=shard_orientation};
auto mem_config = this->output_mem_config;
mem_config.shard_spec = shard_spec;
return {create_sharded_device_tensor(this->compute_output_shapes(input_tensors).at(0), this->output_dtype, Layout::TILE, input_tensor_a.device(), mem_config)};
Expand All @@ -839,7 +839,7 @@ std::vector<Tensor> Matmul::create_output_tensors(const std::vector<Tensor>& inp
}

CoreRangeSet all_cores = num_cores_to_corerange_set(num_cores, program_config.compute_with_storage_grid_size, shard_orientation==ShardOrientation::ROW_MAJOR);
ShardSpec shard_spec = ShardSpec{all_cores, {per_core_M * TILE_HEIGHT, per_core_N * TILE_WIDTH}, shard_orientation};
ShardSpec shard_spec = ShardSpec{.grid=all_cores, .shape={per_core_M * TILE_HEIGHT, per_core_N * TILE_WIDTH}, .orientation=shard_orientation};
auto mem_config = this->output_mem_config;
mem_config.shard_spec = shard_spec;
return {create_sharded_device_tensor(this->compute_output_shapes(input_tensors).at(0), this->output_dtype, Layout::TILE, input_tensor_a.device(), mem_config)};
Expand Down
4 changes: 2 additions & 2 deletions tt_eager/tt_dnn/op_library/conv/optimized_conv_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ std::vector<Tensor> OptimizedConv::create_output_tensors(const std::vector<Tenso
CoreRangeSet shard_grid = num_cores_to_corerange_set(num_cores, this->parallelization_config.grid_size, true);

std::array<uint32_t, 2> shard_shape = {this->parallelization_config.per_core_out_matrix_height_ntiles * TILE_HEIGHT, output_shape[-1]};
auto shard_spec = ShardSpec{shard_grid, shard_shape, ShardOrientation::ROW_MAJOR};
auto shard_spec = ShardSpec{.grid=shard_grid, .shape=shard_shape, .orientation=ShardOrientation::ROW_MAJOR};
auto mem_config = this->output_mem_config;
mem_config.shard_spec = shard_spec;
return {create_sharded_device_tensor(output_shape, this->output_dtype, output_layout, input_tensor.device(), mem_config)};
Expand All @@ -176,7 +176,7 @@ std::vector<Tensor> OptimizedConv::create_output_tensors(const std::vector<Tenso
uint32_t total_active_num_cores = total_active_num_cores_per_weight_slice * num_weight_slices_width;
CoreRangeSet shard_grid = num_cores_to_corerange_set(total_active_num_cores, this->parallelization_config.grid_size, true);
std::array<uint32_t, 2> shard_shape = {this->parallelization_config.per_core_out_matrix_height_ntiles * TILE_HEIGHT, this->parallelization_config.per_core_weight_matrix_width_ntiles * TILE_WIDTH};
auto shard_spec = ShardSpec{shard_grid, shard_shape, ShardOrientation::COL_MAJOR};
auto shard_spec = ShardSpec{.grid=shard_grid, .shape=shard_shape, .orientation=ShardOrientation::COL_MAJOR};
auto mem_config = this->output_mem_config;
mem_config.shard_spec = shard_spec;
return {create_sharded_device_tensor(output_shape, this->output_dtype, output_layout, input_tensor.device(), mem_config)};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ std::vector<Tensor> EltwiseBinary::create_output_tensors(
return {};
}
if (this->output_mem_config.is_sharded()) {
ShardSpec shard_spec{CoreRangeSet({}), {0, 0}};
ShardSpec shard_spec{.grid=CoreRangeSet({}), .shape={0, 0}};
if (input_tensor_a.memory_config().is_sharded()) {
shard_spec = input_tensor_a.shard_spec().value();
} else if (input_tensor_b.memory_config().is_sharded()) {
Expand Down
4 changes: 2 additions & 2 deletions tt_eager/tt_dnn/op_library/nlp_tms/nlp_tms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ std::vector<Tensor> NlpCreateHeads::create_output_tensors(const std::vector<Tens
if (this->output_mem_config.is_sharded()) {
auto core_grid = input_tensor.device()->compute_with_storage_grid_size();
auto q_shard_grid = num_cores_to_corerange_set(this->num_q_heads, core_grid, true);
ShardSpec q_shard_spec{q_shard_grid, {TILE_HEIGHT, this->head_dim}};
ShardSpec q_shard_spec{.grid = q_shard_grid, .shape = {TILE_HEIGHT, this->head_dim}};
auto q_mem_config = this->output_mem_config;
q_mem_config.shard_spec = q_shard_spec;
auto kv_shard_grid = num_cores_to_corerange_set(this->num_kv_heads, core_grid, true);
ShardSpec kv_shard_spec{kv_shard_grid, {TILE_HEIGHT, this->head_dim}};
ShardSpec kv_shard_spec{.grid = kv_shard_grid, .shape = {TILE_HEIGHT, this->head_dim}};
auto kv_mem_config = this->output_mem_config;
kv_mem_config.shard_spec = kv_shard_spec;
auto output_shapes = this->compute_output_shapes(input_tensors);
Expand Down
2 changes: 1 addition & 1 deletion tt_eager/tt_dnn/op_library/pool/max_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ std::vector<Tensor> MaxPool::create_output_tensors(const std::vector<Tensor> &in
uint32_t out_nhw_per_core = out_nhw / ncores;
CoreRangeSet shard_grid = num_cores_to_corerange_set(ncores, input.device()->compute_with_storage_grid_size(), true);
std::array<uint32_t, 2> shard_shape = {out_nhw_per_core, input.shape()[-1]};
auto shard_spec = ShardSpec{shard_grid, shard_shape, ShardOrientation::ROW_MAJOR, false};
auto shard_spec = ShardSpec{.shard_grid=shard_grid, .shard_shape=shard_shape, .shard_orientation=ShardOrientation::ROW_MAJOR, .halo = false};
auto mem_config = this->out_mem_config_;
mem_config.shard_spec = shard_spec;
return {create_sharded_device_tensor(output_shape, input.dtype(), input.layout(), input.device(), mem_config)};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ std::vector<Tensor> RotaryEmbedding::create_output_tensors(const std::vector<Ten
const auto& input_tensor = input_tensors.at(0);
auto output_shape = this->compute_output_shapes(input_tensors)[0];
if (this->output_mem_config.is_sharded()) {
ShardSpec shard_spec{CoreRangeSet({}), {0, 0}};
ShardSpec shard_spec{.grid = CoreRangeSet({}), .shape = {0, 0}};
if (input_tensor.is_sharded()) {
shard_spec = input_tensor.shard_spec().value();
} else {
Expand Down
2 changes: 1 addition & 1 deletion tt_eager/tt_dnn/op_library/sharded/sharded_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ inline Tensor interleaved_to_sharded(
}
bool row_wise = shard_orientation == ShardOrientation::ROW_MAJOR;
CoreRangeSet grid = num_cores_to_corerange_set(num_cores, grid_size, row_wise);
auto shard_spec = ShardSpec{grid, shard_shape, shard_orientation};
auto shard_spec = ShardSpec{.shard_grid = grid, .shard_shape = shard_shape, .shard_orientation = shard_orientation};
MemoryConfig sharded_mem_config = MemoryConfig{.memory_layout = shard_scheme, .buffer_type = BufferType::L1};
return operation::run(
Sharded{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ std::vector<Tensor> SplitFusedQKVAndSplitHeads::create_output_tensors(const std:
// shard spec
uint32_t per_core_M_qv = (num_heads / num_cores_y) * M; // 768
uint32_t per_core_N_qv = K; // 64
ShardSpec shard_spec_qv = ShardSpec{all_cores, {per_core_M_qv, per_core_N_qv}, ShardOrientation::COL_MAJOR};
ShardSpec shard_spec_qv = ShardSpec{.grid=all_cores, .shape={per_core_M_qv, per_core_N_qv}, .orientation=ShardOrientation::COL_MAJOR};
uint32_t per_core_M_k = (num_heads / num_cores_y) * K; // 128
uint32_t per_core_N_k = M; // 384
ShardSpec shard_spec_k = ShardSpec{all_cores, {per_core_M_k, per_core_N_k}, ShardOrientation::COL_MAJOR};
ShardSpec shard_spec_k = ShardSpec{.grid=all_cores, .shape={per_core_M_k, per_core_N_k}, .orientation=ShardOrientation::COL_MAJOR};
// create sharded tensors
auto mem_config_qv = this->output_mem_config;
mem_config_qv.shard_spec = shard_spec_qv;
Expand Down Expand Up @@ -344,7 +344,7 @@ std::vector<Tensor> GroupAttnMatmul::create_output_tensors(const std::vector<Ten
CoreRangeSet all_cores = num_cores_to_corerange_set(num_cores, this->compute_with_storage_grid_size, this->row_major);

ShardOrientation shard_orientation = this->row_major ? ShardOrientation::ROW_MAJOR : ShardOrientation::COL_MAJOR;
ShardSpec shard_spec = ShardSpec{all_cores, {output_shape[2], output_shape[3]}, shard_orientation};
ShardSpec shard_spec = ShardSpec{.grid=all_cores, .shape={output_shape[2], output_shape[3]}, .orientation=shard_orientation};
output_mem_config.shard_spec = shard_spec;
}
return {create_sharded_device_tensor(this->compute_output_shapes(input_tensors).at(0), this->output_dtype, Layout::TILE, input_tensor_a.device(), output_mem_config)};
Expand Down
2 changes: 1 addition & 1 deletion tt_eager/tt_dnn/op_library/untilize/untilize_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ std::vector<Tensor> Untilize::create_output_tensors(const std::vector<Tensor> &i
auto shard_grid = num_cores_to_corerange_set(num_cores, input_tensor.device()->compute_with_storage_grid_size(), true);
uint32_t fused_height = input_tensor.volume() / input_tensor.shape()[-1];
std::array<uint32_t, 2> shard_shape = {fused_height / num_cores, input_tensor.shape()[-1]};
ShardSpec shard_spec{shard_grid, shard_shape, ShardOrientation::ROW_MAJOR};
ShardSpec shard_spec{.grid=shard_grid, .shape=shard_shape, .orientation=ShardOrientation::ROW_MAJOR};
auto mem_config = this->output_mem_config;
mem_config.shard_spec = shard_spec;
return {create_sharded_device_tensor(this->compute_output_shapes(input_tensors).at(0), output_dtype, Layout::ROW_MAJOR, input_tensor.device(), mem_config)};
Expand Down

0 comments on commit bd08a55

Please sign in to comment.