Skip to content

Commit

Permalink
drm/i915: Rewrite VLV/CHV watermark code
Browse files Browse the repository at this point in the history
Assuming the PND deadline mechanism works reasonably we should do
memory requests as early as possible so that PND has schedule the
requests more intelligently. Currently we're still calculating
the watermarks as if VLV/CHV are identical to g4x, which isn't
the case.

The current code also seems to calculate insufficient watermarks
and hence we're seeing some underruns, especially on high resolution
displays.

To fix it just rip out the current code and replace is with something
that tries to utilize PND as efficiently as possible.

We now calculate the WM watermark to trigger when the FIFO still has
256us worth of data. 256us is the maximum deadline value supoorted by
PND, so issuing memory requests earlier would mean we probably couldn't
utilize the full FIFO as PND would attempt to return the data at
least in at least 256us. We also clamp the watermark to at least 8
cachelines as that's the magic watermark that enabling trickle feed
would also impose. I'm assuming it matches some burst size.

In theory we could just enable trickle feed and ignore the WM values,
except trickle feed doesn't work with max fifo mode anyway, so we'd
still need to calculate the SR watermarks. It seems cleaner to just
disable trickle feed and calculate all watermarks the same way. Also
trickle feed wouldn't account for the 256us max deadline value, thoguh
that may be a moot point in non-max fifo mode sicne the FIFOs are fairly
small.

On VLV max fifo mode can be used with either primary or sprite planes.
So the code now also checks all the planes (apart from the cursor)
when calculating the SR plane watermark.

We don't have to worry about the WM1 watermarks since we're using the
PND deadline scheme which means the hardware ignores WM1 values.

v2: Use plane->state->fb instead of plane->fb

Signed-off-by: Ville Syrjälä <[email protected]>
Reviewed-by: Jesse Barnes <[email protected]>
Signed-off-by: Daniel Vetter <[email protected]>
  • Loading branch information
vsyrjala authored and danvet committed Mar 17, 2015
1 parent 03e515f commit ae80152
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 142 deletions.
11 changes: 11 additions & 0 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,17 @@ struct ilk_wm_values {
};

struct vlv_wm_values {
struct {
uint16_t primary;
uint16_t sprite[2];
uint8_t cursor;
} pipe[3];

struct {
uint16_t plane;
uint8_t cursor;
} sr;

struct {
uint8_t cursor;
uint8_t sprite[2];
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/i915/i915_reg.h
Original file line number Diff line number Diff line change
Expand Up @@ -4172,7 +4172,7 @@ enum skl_disp_power_wells {
/* vlv/chv high order bits */
#define DSPHOWM (VLV_DISPLAY_BASE + 0x70064)
#define DSPFW_SR_HI_SHIFT 24
#define DSPFW_SR_HI_MASK (1<<24)
#define DSPFW_SR_HI_MASK (3<<24) /* 2 bits for chv, 1 for vlv */
#define DSPFW_SPRITEF_HI_SHIFT 23
#define DSPFW_SPRITEF_HI_MASK (1<<23)
#define DSPFW_SPRITEE_HI_SHIFT 22
Expand All @@ -4193,7 +4193,7 @@ enum skl_disp_power_wells {
#define DSPFW_PLANEA_HI_MASK (1<<0)
#define DSPHOWM1 (VLV_DISPLAY_BASE + 0x70068)
#define DSPFW_SR_WM1_HI_SHIFT 24
#define DSPFW_SR_WM1_HI_MASK (1<<24)
#define DSPFW_SR_WM1_HI_MASK (3<<24) /* 2 bits for chv, 1 for vlv */
#define DSPFW_SPRITEF_WM1_HI_SHIFT 23
#define DSPFW_SPRITEF_WM1_HI_MASK (1<<23)
#define DSPFW_SPRITEE_WM1_HI_SHIFT 22
Expand Down
300 changes: 160 additions & 140 deletions drivers/gpu/drm/i915/intel_pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,55 @@ static void vlv_write_wm_values(struct intel_crtc *crtc,
(wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) |
(wm->ddl[pipe].primary << DDL_PLANE_SHIFT));

I915_WRITE(DSPFW1,
((wm->sr.plane << DSPFW_SR_SHIFT) & DSPFW_SR_MASK) |
((wm->pipe[PIPE_B].cursor << DSPFW_CURSORB_SHIFT) & DSPFW_CURSORB_MASK) |
((wm->pipe[PIPE_B].primary << DSPFW_PLANEB_SHIFT) & DSPFW_PLANEB_MASK_VLV) |
((wm->pipe[PIPE_A].primary << DSPFW_PLANEA_SHIFT) & DSPFW_PLANEA_MASK_VLV));
I915_WRITE(DSPFW2,
((wm->pipe[PIPE_A].sprite[1] << DSPFW_SPRITEB_SHIFT) & DSPFW_SPRITEB_MASK_VLV) |
((wm->pipe[PIPE_A].cursor << DSPFW_CURSORA_SHIFT) & DSPFW_CURSORA_MASK) |
((wm->pipe[PIPE_A].sprite[0] << DSPFW_SPRITEA_SHIFT) & DSPFW_SPRITEA_MASK_VLV));
I915_WRITE(DSPFW3,
((wm->sr.cursor << DSPFW_CURSOR_SR_SHIFT) & DSPFW_CURSOR_SR_MASK));

if (IS_CHERRYVIEW(dev_priv)) {
I915_WRITE(DSPFW7_CHV,
((wm->pipe[PIPE_B].sprite[1] << DSPFW_SPRITED_SHIFT) & DSPFW_SPRITED_MASK) |
((wm->pipe[PIPE_B].sprite[0] << DSPFW_SPRITEC_SHIFT) & DSPFW_SPRITEC_MASK));
I915_WRITE(DSPFW8_CHV,
((wm->pipe[PIPE_C].sprite[1] << DSPFW_SPRITEF_SHIFT) & DSPFW_SPRITEF_MASK) |
((wm->pipe[PIPE_C].sprite[0] << DSPFW_SPRITEE_SHIFT) & DSPFW_SPRITEE_MASK));
I915_WRITE(DSPFW9_CHV,
((wm->pipe[PIPE_C].primary << DSPFW_PLANEC_SHIFT) & DSPFW_PLANEC_MASK) |
((wm->pipe[PIPE_C].cursor << DSPFW_CURSORC_SHIFT) & DSPFW_CURSORC_MASK));
I915_WRITE(DSPHOWM,
(((wm->sr.plane >> 9) << DSPFW_SR_HI_SHIFT) & DSPFW_SR_HI_MASK) |
(((wm->pipe[PIPE_C].sprite[1] >> 8) << DSPFW_SPRITEF_HI_SHIFT) & DSPFW_SPRITEF_HI_MASK) |
(((wm->pipe[PIPE_C].sprite[0] >> 8) << DSPFW_SPRITEE_HI_SHIFT) & DSPFW_SPRITEE_HI_MASK) |
(((wm->pipe[PIPE_C].primary >> 8) << DSPFW_PLANEC_HI_SHIFT) & DSPFW_PLANEC_HI_MASK) |
(((wm->pipe[PIPE_B].sprite[1] >> 8) << DSPFW_SPRITED_HI_SHIFT) & DSPFW_SPRITED_HI_MASK) |
(((wm->pipe[PIPE_B].sprite[0] >> 8) << DSPFW_SPRITEC_HI_SHIFT) & DSPFW_SPRITEC_HI_MASK) |
(((wm->pipe[PIPE_B].primary >> 8) << DSPFW_PLANEB_HI_SHIFT) & DSPFW_PLANEB_HI_MASK) |
(((wm->pipe[PIPE_A].sprite[1] >> 8) << DSPFW_SPRITEB_HI_SHIFT) & DSPFW_SPRITEB_HI_MASK) |
(((wm->pipe[PIPE_A].sprite[0] >> 8) << DSPFW_SPRITEA_HI_SHIFT) & DSPFW_SPRITEA_HI_MASK) |
(((wm->pipe[PIPE_A].primary >> 8) << DSPFW_PLANEA_HI_SHIFT) & DSPFW_PLANEA_HI_MASK));
} else {
I915_WRITE(DSPFW7,
((wm->pipe[PIPE_B].sprite[1] << DSPFW_SPRITED_SHIFT) & DSPFW_SPRITED_MASK) |
((wm->pipe[PIPE_B].sprite[0] << DSPFW_SPRITEC_SHIFT) & DSPFW_SPRITEC_MASK));
I915_WRITE(DSPHOWM,
(((wm->sr.plane >> 9) << DSPFW_SR_HI_SHIFT) & DSPFW_SR_HI_MASK) |
(((wm->pipe[PIPE_B].sprite[1] >> 8) << DSPFW_SPRITED_HI_SHIFT) & DSPFW_SPRITED_HI_MASK) |
(((wm->pipe[PIPE_B].sprite[0] >> 8) << DSPFW_SPRITEC_HI_SHIFT) & DSPFW_SPRITEC_HI_MASK) |
(((wm->pipe[PIPE_B].primary >> 8) << DSPFW_PLANEB_HI_SHIFT) & DSPFW_PLANEB_HI_MASK) |
(((wm->pipe[PIPE_A].sprite[1] >> 8) << DSPFW_SPRITEB_HI_SHIFT) & DSPFW_SPRITEB_HI_MASK) |
(((wm->pipe[PIPE_A].sprite[0] >> 8) << DSPFW_SPRITEA_HI_SHIFT) & DSPFW_SPRITEA_HI_MASK) |
(((wm->pipe[PIPE_A].primary >> 8) << DSPFW_PLANEA_HI_SHIFT) & DSPFW_PLANEA_HI_MASK));
}

POSTING_READ(DSPFW1);

dev_priv->wm.vlv = *wm;
}

Expand Down Expand Up @@ -850,169 +899,113 @@ static uint8_t vlv_compute_drain_latency(struct drm_crtc *crtc,
DDL_PRECISION_HIGH : DDL_PRECISION_LOW);
}

/*
* Update drain latency registers of memory arbiter
*
* Valleyview SoC has a new memory arbiter and needs drain latency registers
* to be programmed. Each plane has a drain latency multiplier and a drain
* latency value.
*/

static void vlv_update_drain_latency(struct drm_crtc *crtc)
static int vlv_compute_wm(struct intel_crtc *crtc,
struct intel_plane *plane,
int fifo_size)
{
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
enum pipe pipe = intel_crtc->pipe;
struct vlv_wm_values wm = dev_priv->wm.vlv;
int clock, entries, pixel_size;

wm.ddl[pipe].primary = vlv_compute_drain_latency(crtc, crtc->primary);
wm.ddl[pipe].cursor = vlv_compute_drain_latency(crtc, crtc->cursor);
/*
* FIXME the plane might have an fb
* but be invisible (eg. due to clipping)
*/
if (!crtc->active || !plane->base.state->fb)
return 0;

vlv_write_wm_values(intel_crtc, &wm);
}
pixel_size = drm_format_plane_cpp(plane->base.state->fb->pixel_format, 0);
clock = crtc->config->base.adjusted_mode.crtc_clock;

#define single_plane_enabled(mask) is_power_of_2(mask)
entries = DIV_ROUND_UP(clock, 1000) * pixel_size;

static void valleyview_update_wm(struct drm_crtc *crtc)
/*
* Set up the watermark such that we don't start issuing memory
* requests until we are within PND's max deadline value (256us).
* Idea being to be idle as long as possible while still taking
* advatange of PND's deadline scheduling. The limit of 8
* cachelines (used when the FIFO will anyway drain in less time
* than 256us) should match what we would be done if trickle
* feed were enabled.
*/
return fifo_size - clamp(DIV_ROUND_UP(256 * entries, 64), 0, fifo_size - 8);
}

static bool vlv_compute_sr_wm(struct drm_device *dev,
struct vlv_wm_values *wm)
{
struct drm_device *dev = crtc->dev;
static const int sr_latency_ns = 12000;
struct drm_i915_private *dev_priv = dev->dev_private;
int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
int plane_sr, cursor_sr;
int ignore_plane_sr, ignore_cursor_sr;
unsigned int enabled = 0;
bool cxsr_enabled;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_crtc *crtc;
enum pipe pipe = INVALID_PIPE;
int num_planes = 0;
int fifo_size = 0;
struct intel_plane *plane;

vlv_update_drain_latency(crtc);
wm->sr.cursor = wm->sr.plane = 0;

if (g4x_compute_wm0(dev, PIPE_A,
&valleyview_wm_info, pessimal_latency_ns,
&valleyview_cursor_wm_info, pessimal_latency_ns,
&planea_wm, &cursora_wm))
enabled |= 1 << PIPE_A;
crtc = single_enabled_crtc(dev);
/* maxfifo not supported on pipe C */
if (crtc && to_intel_crtc(crtc)->pipe != PIPE_C) {
pipe = to_intel_crtc(crtc)->pipe;
num_planes = !!wm->pipe[pipe].primary +
!!wm->pipe[pipe].sprite[0] +
!!wm->pipe[pipe].sprite[1];
fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
}

if (g4x_compute_wm0(dev, PIPE_B,
&valleyview_wm_info, pessimal_latency_ns,
&valleyview_cursor_wm_info, pessimal_latency_ns,
&planeb_wm, &cursorb_wm))
enabled |= 1 << PIPE_B;
if (fifo_size == 0 || num_planes > 1)
return false;

if (single_plane_enabled(enabled) &&
g4x_compute_srwm(dev, ffs(enabled) - 1,
sr_latency_ns,
&valleyview_wm_info,
&valleyview_cursor_wm_info,
&plane_sr, &ignore_cursor_sr) &&
g4x_compute_srwm(dev, ffs(enabled) - 1,
2*sr_latency_ns,
&valleyview_wm_info,
&valleyview_cursor_wm_info,
&ignore_plane_sr, &cursor_sr)) {
cxsr_enabled = true;
} else {
cxsr_enabled = false;
intel_set_memory_cxsr(dev_priv, false);
plane_sr = cursor_sr = 0;
}
wm->sr.cursor = vlv_compute_wm(to_intel_crtc(crtc),
to_intel_plane(crtc->cursor), 0x3f);

DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
"B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
planea_wm, cursora_wm,
planeb_wm, cursorb_wm,
plane_sr, cursor_sr);
list_for_each_entry(plane, &dev->mode_config.plane_list, base.head) {
if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
continue;

I915_WRITE(DSPFW1,
(plane_sr << DSPFW_SR_SHIFT) |
(cursorb_wm << DSPFW_CURSORB_SHIFT) |
(planeb_wm << DSPFW_PLANEB_SHIFT) |
(planea_wm << DSPFW_PLANEA_SHIFT));
I915_WRITE(DSPFW2,
(I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
(cursora_wm << DSPFW_CURSORA_SHIFT));
I915_WRITE(DSPFW3,
(I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
(cursor_sr << DSPFW_CURSOR_SR_SHIFT));
if (plane->pipe != pipe)
continue;

if (cxsr_enabled)
intel_set_memory_cxsr(dev_priv, true);
wm->sr.plane = vlv_compute_wm(to_intel_crtc(crtc),
plane, fifo_size);
if (wm->sr.plane != 0)
break;
}

return true;
}

static void cherryview_update_wm(struct drm_crtc *crtc)
static void valleyview_update_wm(struct drm_crtc *crtc)
{
struct drm_device *dev = crtc->dev;
static const int sr_latency_ns = 12000;
struct drm_i915_private *dev_priv = dev->dev_private;
int planea_wm, planeb_wm, planec_wm;
int cursora_wm, cursorb_wm, cursorc_wm;
int plane_sr, cursor_sr;
int ignore_plane_sr, ignore_cursor_sr;
unsigned int enabled = 0;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
enum pipe pipe = intel_crtc->pipe;
bool cxsr_enabled;
struct vlv_wm_values wm = dev_priv->wm.vlv;

vlv_update_drain_latency(crtc);
wm.ddl[pipe].primary = vlv_compute_drain_latency(crtc, crtc->primary);
wm.pipe[pipe].primary = vlv_compute_wm(intel_crtc,
to_intel_plane(crtc->primary),
vlv_get_fifo_size(dev, pipe, 0));

if (g4x_compute_wm0(dev, PIPE_A,
&valleyview_wm_info, pessimal_latency_ns,
&valleyview_cursor_wm_info, pessimal_latency_ns,
&planea_wm, &cursora_wm))
enabled |= 1 << PIPE_A;
wm.ddl[pipe].cursor = vlv_compute_drain_latency(crtc, crtc->cursor);
wm.pipe[pipe].cursor = vlv_compute_wm(intel_crtc,
to_intel_plane(crtc->cursor),
0x3f);

if (g4x_compute_wm0(dev, PIPE_B,
&valleyview_wm_info, pessimal_latency_ns,
&valleyview_cursor_wm_info, pessimal_latency_ns,
&planeb_wm, &cursorb_wm))
enabled |= 1 << PIPE_B;
cxsr_enabled = vlv_compute_sr_wm(dev, &wm);

if (g4x_compute_wm0(dev, PIPE_C,
&valleyview_wm_info, pessimal_latency_ns,
&valleyview_cursor_wm_info, pessimal_latency_ns,
&planec_wm, &cursorc_wm))
enabled |= 1 << PIPE_C;
if (memcmp(&wm, &dev_priv->wm.vlv, sizeof(wm)) == 0)
return;

if (single_plane_enabled(enabled) &&
g4x_compute_srwm(dev, ffs(enabled) - 1,
sr_latency_ns,
&valleyview_wm_info,
&valleyview_cursor_wm_info,
&plane_sr, &ignore_cursor_sr) &&
g4x_compute_srwm(dev, ffs(enabled) - 1,
2*sr_latency_ns,
&valleyview_wm_info,
&valleyview_cursor_wm_info,
&ignore_plane_sr, &cursor_sr)) {
cxsr_enabled = true;
} else {
cxsr_enabled = false;
intel_set_memory_cxsr(dev_priv, false);
plane_sr = cursor_sr = 0;
}
DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
"SR: plane=%d, cursor=%d\n", pipe_name(pipe),
wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
wm.sr.plane, wm.sr.cursor);

DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
"B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, "
"SR: plane=%d, cursor=%d\n",
planea_wm, cursora_wm,
planeb_wm, cursorb_wm,
planec_wm, cursorc_wm,
plane_sr, cursor_sr);
if (!cxsr_enabled)
intel_set_memory_cxsr(dev_priv, false);

I915_WRITE(DSPFW1,
(plane_sr << DSPFW_SR_SHIFT) |
(cursorb_wm << DSPFW_CURSORB_SHIFT) |
(planeb_wm << DSPFW_PLANEB_SHIFT) |
(planea_wm << DSPFW_PLANEA_SHIFT));
I915_WRITE(DSPFW2,
(I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
(cursora_wm << DSPFW_CURSORA_SHIFT));
I915_WRITE(DSPFW3,
(I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
(cursor_sr << DSPFW_CURSOR_SR_SHIFT));
I915_WRITE(DSPFW9_CHV,
(I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK |
DSPFW_CURSORC_MASK)) |
(planec_wm << DSPFW_PLANEC_SHIFT) |
(cursorc_wm << DSPFW_CURSORC_SHIFT));
vlv_write_wm_values(intel_crtc, &wm);

if (cxsr_enabled)
intel_set_memory_cxsr(dev_priv, true);
Expand All @@ -1030,17 +1023,44 @@ static void valleyview_update_sprite_wm(struct drm_plane *plane,
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
enum pipe pipe = intel_crtc->pipe;
int sprite = to_intel_plane(plane)->plane;
bool cxsr_enabled;
struct vlv_wm_values wm = dev_priv->wm.vlv;

if (enabled)
if (enabled) {
wm.ddl[pipe].sprite[sprite] =
vlv_compute_drain_latency(crtc, plane);
else

wm.pipe[pipe].sprite[sprite] =
vlv_compute_wm(intel_crtc,
to_intel_plane(plane),
vlv_get_fifo_size(dev, pipe, sprite+1));
} else {
wm.ddl[pipe].sprite[sprite] = 0;
wm.pipe[pipe].sprite[sprite] = 0;
}

cxsr_enabled = vlv_compute_sr_wm(dev, &wm);

if (memcmp(&wm, &dev_priv->wm.vlv, sizeof(wm)) == 0)
return;

DRM_DEBUG_KMS("Setting FIFO watermarks - %c: sprite %c=%d, "
"SR: plane=%d, cursor=%d\n", pipe_name(pipe),
sprite_name(pipe, sprite),
wm.pipe[pipe].sprite[sprite],
wm.sr.plane, wm.sr.cursor);

if (!cxsr_enabled)
intel_set_memory_cxsr(dev_priv, false);

vlv_write_wm_values(intel_crtc, &wm);

if (cxsr_enabled)
intel_set_memory_cxsr(dev_priv, true);
}

#define single_plane_enabled(mask) is_power_of_2(mask)

static void g4x_update_wm(struct drm_crtc *crtc)
{
struct drm_device *dev = crtc->dev;
Expand Down Expand Up @@ -6430,7 +6450,7 @@ void intel_init_pm(struct drm_device *dev)
else if (INTEL_INFO(dev)->gen == 8)
dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
} else if (IS_CHERRYVIEW(dev)) {
dev_priv->display.update_wm = cherryview_update_wm;
dev_priv->display.update_wm = valleyview_update_wm;
dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
dev_priv->display.init_clock_gating =
cherryview_init_clock_gating;
Expand Down

0 comments on commit ae80152

Please sign in to comment.