Skip to content

Commit

Permalink
Reduce InstanceData footprint
Browse files Browse the repository at this point in the history
The InstanceData type in the rendering subsystem was previously 16
floats which occupied a total of 64 bytes per instance. This meant that
for every character or background cell drawn, 64 bytes were sent to the
GPU. In the case of a 400x100 cell grid, a total of 2.5MB would be sent.

This patch reduces InstanceData's size to 26 bytes, a 60% improvement!
Using the above example for comparison, a worst case of 1MB would be
transferred.

The motivation for this patch comes from macOS. Once the terminal grid
would reach a certain size, performance experienced a sharp and dramatic
drop (render times would go from ~3ms to ~16ms). I don't want to
speculate too much on the underlying issue, but suffice it to say that
this patch alleviates the problem in my testing.
  • Loading branch information
jwilm committed Jul 1, 2017
1 parent 5a220b7 commit 3cdba29
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 50 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ clap = "2.20"
fnv = "1.0.5"
unicode-width = "0.1.4"
arraydeque = "0.2"
half = "1.0"
clippy = { version = "0.0.104", optional = true }

[target.'cfg(any(target_os = "linux", target_os = "freebsd", target_os="dragonfly", target_os="openbsd"))'.dependencies]
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ extern crate errno;
extern crate fnv;
extern crate font;
extern crate glutin;
extern crate half;
extern crate libc;
extern crate mio;
extern crate notify;
Expand Down
115 changes: 65 additions & 50 deletions src/renderer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use gl::types::*;
use gl;
use index::{Line, Column, RangeInclusive};
use notify::{Watcher as WatcherApi, RecommendedWatcher as Watcher, op};
use half::f16;

use config::{self, Config, Delta};
use term::{self, cell, RenderableCell};
Expand Down Expand Up @@ -125,14 +126,14 @@ pub struct ShaderProgram {
#[derive(Debug, Clone)]
pub struct Glyph {
tex_id: GLuint,
top: f32,
left: f32,
width: f32,
height: f32,
uv_bot: f32,
uv_left: f32,
uv_width: f32,
uv_height: f32,
top: i16,
left: i16,
width: i16,
height: i16,
uv_bot: f16,
uv_left: f16,
uv_width: f16,
uv_height: f16,
}

/// Naïve glyph cache
Expand Down Expand Up @@ -281,29 +282,33 @@ impl GlyphCache {
#[derive(Debug)]
#[repr(C)]
struct InstanceData {
// coords
col: f32,
row: f32,
// Grid coordinates of the cell
//
// By storing these as u16, it puts a limit on the max dimensions of the
// terminal to u16 max value. Practically speaking, this shouldn't be a
// problem.
col: u16,
row: u16,
// glyph offset
left: f32,
top: f32,
left: i16,
top: i16,
// glyph scale
width: f32,
height: f32,
width: i16,
height: i16,
// uv offset
uv_left: f32,
uv_bot: f32,
uv_left: f16,
uv_bot: f16,
// uv scale
uv_width: f32,
uv_height: f32,
uv_width: f16,
uv_height: f16,
// color
r: f32,
g: f32,
b: f32,
r: u8,
g: u8,
b: u8,
// background color
bg_r: f32,
bg_g: f32,
bg_b: f32,
bg_r: u8,
bg_g: u8,
bg_b: u8,
}

#[derive(Debug)]
Expand Down Expand Up @@ -366,8 +371,8 @@ impl Batch {
}

self.instances.push(InstanceData {
col: cell.column.0 as f32,
row: cell.line.0 as f32,
col: cell.column.0 as u16,
row: cell.line.0 as u16,

top: glyph.top,
left: glyph.left,
Expand All @@ -379,13 +384,13 @@ impl Batch {
uv_width: glyph.uv_width,
uv_height: glyph.uv_height,

r: cell.fg.r as f32,
g: cell.fg.g as f32,
b: cell.fg.b as f32,
r: cell.fg.r,
g: cell.fg.g,
b: cell.fg.b,

bg_r: cell.bg.r as f32,
bg_g: cell.bg.g as f32,
bg_b: cell.bg.b as f32,
bg_r: cell.bg.r,
bg_g: cell.bg.g,
bg_b: cell.bg.b,
});
}

Expand Down Expand Up @@ -490,38 +495,48 @@ impl QuadRenderer {
(BATCH_MAX * size_of::<InstanceData>()) as isize,
ptr::null(), gl::STREAM_DRAW);
// coords
let mut size = 0;
gl::VertexAttribPointer(1, 2,
gl::FLOAT, gl::FALSE,
gl::UNSIGNED_SHORT, gl::FALSE,
size_of::<InstanceData>() as i32,
ptr::null());
gl::EnableVertexAttribArray(1);
gl::VertexAttribDivisor(1, 1);
size += 2 * size_of::<u16>();


// glyphoffset
gl::VertexAttribPointer(2, 4,
gl::FLOAT, gl::FALSE,
gl::SHORT, gl::FALSE,
size_of::<InstanceData>() as i32,
(2 * size_of::<f32>()) as *const _);
size as *const _);
gl::EnableVertexAttribArray(2);
gl::VertexAttribDivisor(2, 1);
size += 4 * size_of::<i16>();

// uv
gl::VertexAttribPointer(3, 4,
gl::FLOAT, gl::FALSE,
gl::HALF_FLOAT, gl::FALSE,
size_of::<InstanceData>() as i32,
(6 * size_of::<f32>()) as *const _);
size as *const _);
gl::EnableVertexAttribArray(3);
gl::VertexAttribDivisor(3, 1);
size += 4 * size_of::<f16>();

// color
gl::VertexAttribPointer(4, 3,
gl::FLOAT, gl::FALSE,
gl::UNSIGNED_BYTE, gl::FALSE,
size_of::<InstanceData>() as i32,
(10 * size_of::<f32>()) as *const _);
size as *const _);
gl::EnableVertexAttribArray(4);
gl::VertexAttribDivisor(4, 1);
size += 3 * size_of::<u8>();

// color
gl::VertexAttribPointer(5, 3,
gl::FLOAT, gl::FALSE,
gl::UNSIGNED_BYTE, gl::FALSE,
size_of::<InstanceData>() as i32,
(13 * size_of::<f32>()) as *const _);
size as *const _);
gl::EnableVertexAttribArray(5);
gl::VertexAttribDivisor(5, 1);

Expand Down Expand Up @@ -1319,14 +1334,14 @@ impl Atlas {

Glyph {
tex_id: self.id,
top: glyph.top as f32,
width: width as f32,
height: height as f32,
left: glyph.left as f32,
uv_bot: uv_bot,
uv_left: uv_left,
uv_width: uv_width,
uv_height: uv_height,
top: glyph.top as i16,
width: width as i16,
height: height as i16,
left: glyph.left as i16,
uv_bot: f16::from_f32(uv_bot),
uv_left: f16::from_f32(uv_left),
uv_width: f16::from_f32(uv_width),
uv_height: f16::from_f32(uv_height),
}
}

Expand Down

0 comments on commit 3cdba29

Please sign in to comment.