Skip to content

Commit

Permalink
drm/amd/display: Move insert entry table to the FPU code
Browse files Browse the repository at this point in the history
The insert_entry_into_table_sorted function uses FPU operation and calls
other static functions support. This commit moves the insert entry
function with all the required struct and static functions to the FPU
file.

Tested-by: Daniel Wheeler <[email protected]>
Reviewed-by: Harry Wentland <[email protected]>
Signed-off-by: Rodrigo Siqueira <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
  • Loading branch information
rodrigosiqueira authored and alexdeucher committed Jul 25, 2022
1 parent 44998fb commit 34a1b0f
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 178 deletions.
188 changes: 10 additions & 178 deletions drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,137 +115,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C

#define DC_LOGGER_INIT(logger)

#define DCN3_2_DEFAULT_DET_SIZE 256
#define DCN3_2_MAX_DET_SIZE 1152
#define DCN3_2_MIN_DET_SIZE 128
#define DCN3_2_MIN_COMPBUF_SIZE_KB 128

struct _vcs_dpi_ip_params_st dcn3_2_ip = {
.gpuvm_enable = 0,
.gpuvm_max_page_table_levels = 4,
.hostvm_enable = 0,
.rob_buffer_size_kbytes = 128,
.det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
.config_return_buffer_size_in_kbytes = 1280,
.compressed_buffer_segment_size_in_kbytes = 64,
.meta_fifo_size_in_kentries = 22,
.zero_size_buffer_entries = 512,
.compbuf_reserved_space_64b = 256,
.compbuf_reserved_space_zs = 64,
.dpp_output_buffer_pixels = 2560,
.opp_output_buffer_lines = 1,
.pixel_chunk_size_kbytes = 8,
.alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team
.min_pixel_chunk_size_bytes = 1024,
.dcc_meta_buffer_size_bytes = 6272,
.meta_chunk_size_kbytes = 2,
.min_meta_chunk_size_bytes = 256,
.writeback_chunk_size_kbytes = 8,
.ptoi_supported = false,
.num_dsc = 4,
.maximum_dsc_bits_per_component = 12,
.maximum_pixels_per_line_per_dsc_unit = 6016,
.dsc422_native_support = true,
.is_line_buffer_bpp_fixed = true,
.line_buffer_fixed_bpp = 57,
.line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp
.max_line_buffer_lines = 32,
.writeback_interface_buffer_size_kbytes = 90,
.max_num_dpp = 4,
.max_num_otg = 4,
.max_num_hdmi_frl_outputs = 1,
.max_num_wb = 1,
.max_dchub_pscl_bw_pix_per_clk = 4,
.max_pscl_lb_bw_pix_per_clk = 2,
.max_lb_vscl_bw_pix_per_clk = 4,
.max_vscl_hscl_bw_pix_per_clk = 4,
.max_hscl_ratio = 6,
.max_vscl_ratio = 6,
.max_hscl_taps = 8,
.max_vscl_taps = 8,
.dpte_buffer_size_in_pte_reqs_luma = 64,
.dpte_buffer_size_in_pte_reqs_chroma = 34,
.dispclk_ramp_margin_percent = 1,
.max_inter_dcn_tile_repeaters = 8,
.cursor_buffer_size = 16,
.cursor_chunk_size = 2,
.writeback_line_buffer_buffer_size = 0,
.writeback_min_hscl_ratio = 1,
.writeback_min_vscl_ratio = 1,
.writeback_max_hscl_ratio = 1,
.writeback_max_vscl_ratio = 1,
.writeback_max_hscl_taps = 1,
.writeback_max_vscl_taps = 1,
.dppclk_delay_subtotal = 47,
.dppclk_delay_scl = 50,
.dppclk_delay_scl_lb_only = 16,
.dppclk_delay_cnvc_formatter = 28,
.dppclk_delay_cnvc_cursor = 6,
.dispclk_delay_subtotal = 125,
.dynamic_metadata_vm_enabled = false,
.odm_combine_4to1_supported = false,
.dcc_supported = true,
.max_num_dp2p0_outputs = 2,
.max_num_dp2p0_streams = 4,
};

struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
.clock_limits = {
{
.state = 0,
.dcfclk_mhz = 1564.0,
.fabricclk_mhz = 400.0,
.dispclk_mhz = 2150.0,
.dppclk_mhz = 2150.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
.phyclk_d32_mhz = 625.0,
.socclk_mhz = 1200.0,
.dscclk_mhz = 716.667,
.dram_speed_mts = 1600.0,
.dtbclk_mhz = 1564.0,
},
},
.num_states = 1,
.sr_exit_time_us = 5.20,
.sr_enter_plus_exit_time_us = 9.60,
.sr_exit_z8_time_us = 285.0,
.sr_enter_plus_exit_z8_time_us = 320,
.writeback_latency_us = 12.0,
.round_trip_ping_latency_dcfclk_cycles = 263,
.urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
.urgent_latency_vm_data_only_us = 4.0,
.fclk_change_latency_us = 20,
.usr_retraining_latency_us = 2,
.smn_latency_us = 2,
.mall_allocated_for_dcn_mbytes = 64,
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
.pct_ideal_sdp_bw_after_urgent = 100.0,
.pct_ideal_fabric_bw_after_urgent = 67.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented
.pct_ideal_dram_bw_after_urgent_strobe = 67.0,
.max_avg_sdp_bw_use_normal_percent = 80.0,
.max_avg_fabric_bw_use_normal_percent = 60.0,
.max_avg_dram_bw_use_normal_strobe_percent = 50.0,
.max_avg_dram_bw_use_normal_percent = 15.0,
.num_chans = 8,
.dram_channel_width_bytes = 2,
.fabric_datapath_to_dcn_data_return_bytes = 64,
.return_bus_width_bytes = 64,
.downspread_percent = 0.38,
.dcn_downspread_percent = 0.5,
.dram_clock_change_latency_us = 400,
.dispclk_dppclk_vco_speed_mhz = 4300.0,
.do_urgent_latency_adjustment = true,
.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
};

enum dcn32_clk_src_array_id {
DCN32_CLK_SRC_PLL0,
DCN32_CLK_SRC_PLL1,
Expand Down Expand Up @@ -3454,53 +3323,6 @@ static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
}
}

static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
{
float memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_2_soc.num_chans *
dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);

float fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);

float sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);

float limiting_bw_kbytes_sec = memory_bw_kbytes_sec;

if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;

if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;

return limiting_bw_kbytes_sec;
}

static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
struct _vcs_dpi_voltage_scaling_st *entry)
{
int index = 0;
int i = 0;
float net_bw_of_new_state = 0;

if (*num_entries == 0) {
table[0] = *entry;
(*num_entries)++;
} else {
net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) {
index++;
if (index >= *num_entries)
break;
}

for (i = *num_entries; i > index; i--) {
table[i] = table[i - 1];
}

table[index] = *entry;
(*num_entries)++;
}
}

static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
unsigned int index)
{
Expand Down Expand Up @@ -3585,7 +3407,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = 0;

get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();
}

// Insert the max DCFCLK
Expand All @@ -3594,7 +3418,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = 0;

get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();

// Insert the UCLK DPMS
for (i = 0; i < num_uclk_dpms; i++) {
Expand All @@ -3603,7 +3429,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;

get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();
}

// If FCLK is coarse grained, insert individual DPMs.
Expand All @@ -3614,7 +3442,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = 0;

get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();
}
}
// If FCLK fine grained, only insert max
Expand All @@ -3624,7 +3454,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = 0;

get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();
}

// At this point, the table contains all "points of interest" based on
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
#define TO_DCN32_RES_POOL(pool)\
container_of(pool, struct dcn32_resource_pool, base)

extern struct _vcs_dpi_ip_params_st dcn3_2_ip;
extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc;

struct dcn32_resource_pool {
struct resource_pool base;
};
Expand Down
Loading

0 comments on commit 34a1b0f

Please sign in to comment.