Skip to content

Commit

Permalink
Revert "blend: improve precision"
Browse files Browse the repository at this point in the history
This reverts commit 8b3adf4.
  • Loading branch information
oov committed Nov 14, 2018
1 parent 57e427b commit d45a460
Show file tree
Hide file tree
Showing 9 changed files with 67,583 additions and 46 deletions.
7 changes: 7 additions & 0 deletions blend/decl_amd64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package blend

func drawNormalNRGBAToNRGBAFast(dest []byte, src []byte, alpha uint32, d0, s0, y int, sx0 int, sx1 int, sxDelta int, syDelta int, dx0 int, dx1 int, dxDelta int, dyDelta int)

func init() {
drawNormalNRGBAToNRGBA = drawNormalNRGBAToNRGBAFast
}
61 changes: 57 additions & 4 deletions blend/genblend.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,59 @@ func (d {{.Name.Lower}}) drawRGBAToRGBAUniform(dst *image.RGBA, r image.Rectangl
}
var draw{{.Name}}NRGBAToNRGBA drawFunc = func(dest []byte, src []byte, alpha uint32, d0, s0, y int, sx0 int, sx1 int, sxDelta int, syDelta int, dx0 int, dx1 int, dxDelta int, dyDelta int) {
alpha *= 32897
for ; y > 0; y-- {
dpix := dest[d0:]
spix := src[s0:]
for i, j := sx0, dx0; i != sx1; i, j = i+sxDelta, j+dxDelta {
sa := uint32(spix[i+3])
sb := uint32(spix[i+2])
sg := uint32(spix[i+1])
sr := uint32(spix[i])
tmp := (sa * alpha >> 23) * 32897
if tmp == 0 {
continue
}
da := uint32(dpix[j+3])
db := uint32(dpix[j+2])
dg := uint32(dpix[j+1])
dr := uint32(dpix[j])
a1 := (tmp * da) >> 23
a2 := (tmp * (255 - da)) >> 23
a3 := ((8388735 - tmp) * da) >> 23
a := a1 + a2 + a3
if a == 0 {
continue
}
var r, g, b uint32
{{if .CodePerChannel}}
{{.CodePerChannel.To8.Channel "r"}}
{{.CodePerChannel.To8.Channel "g"}}
{{.CodePerChannel.To8.Channel "b"}}
{{else if .Code}}
{{.Code.To8}}
{{else if .CodePerChannel16}}
{{.CodePerChannel16.To8.Channel "r"}}
{{.CodePerChannel16.To8.Channel "g"}}
{{.CodePerChannel16.To8.Channel "b"}}
{{else if .Code16}}
{{.Code16.To8}}
{{end}}
{{if .OverMax}}
dpix[j+3] = uint8(a)
dpix[j+2] = uint8(clip8((b*a1 + sb*a2 + db*a3) / a))
dpix[j+1] = uint8(clip8((g*a1 + sg*a2 + dg*a3) / a))
dpix[j+0] = uint8(clip8((r*a1 + sr*a2 + dr*a3) / a))
{{else}}
dpix[j+3] = uint8(a)
dpix[j+2] = uint8((b*a1 + sb*a2 + db*a3) / a)
dpix[j+1] = uint8((g*a1 + sg*a2 + dg*a3) / a)
dpix[j+0] = uint8((r*a1 + sr*a2 + dr*a3) / a)
{{end}}
}
d0 += dyDelta
s0 += syDelta
}
{{define "drawMain1"}}
alpha *= 0x0101
for ; y > 0; y-- {
Expand Down Expand Up @@ -571,10 +624,10 @@ var draw{{.Name}}NRGBAToNRGBA drawFunc = func(dest []byte, src []byte, alpha uin
s0 += syDelta
}
{{end}}
{{template "drawMain1" .}}
{{template "drawMain2" .}}
{{template "drawMain2_SetNRGBA" .}}
{{template "drawMain3" .}}
{{template "drawMain1" .}}
{{template "drawMain2" .}}
{{template "drawMain2_SetNRGBA" .}}
{{template "drawMain3" .}}
}
var draw{{.Name}}RGBAToNRGBA drawFunc = func(dest []byte, src []byte, alpha uint32, d0, s0, y int, sx0 int, sx1 int, sxDelta int, syDelta int, dx0 int, dx1 int, dxDelta int, dyDelta int) {
Expand Down
47 changes: 47 additions & 0 deletions blend/gendivtable.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// +build ignore

package main

import (
"bytes"
"go/format"
"html/template"
"log"
"os"
)

var source = `// Code generated by gendivtable.go. DO NOT EDIT.
package blend
var divTable = [256]uint32{
{{range $i, $v := .N}}{{printf "0x%08x, // %d\n" $v $i}}{{end}}}
`

func main() {
t := template.Must(template.New("").Parse(source))

n := make([]int64, 256)
for i := int64(1); i < 256; i++ {
n[i] = 0xffffffff / i
}

b := bytes.NewBufferString("")
if err := t.Execute(b, map[string]interface{}{
"N": n,
}); err != nil {
log.Fatal(err)
}
buf, err := format.Source(b.Bytes())
if err != nil {
log.Fatal(err)
}
f, err := os.Create("zdivtable.go")
if err != nil {
log.Fatal(err)
}
defer f.Close()
if _, err = f.Write(buf); err != nil {
log.Fatal(err)
}
}
49 changes: 49 additions & 0 deletions blend/genrgba2nrgba.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// +build ignore

package main

import (
"bytes"
"fmt"
"go/format"
"html/template"
"log"
"os"
)

var source = `// Code generated by genrgba2nrgba.go. DO NOT EDIT.
package blend
var rgbaToNRGBATable = [65536]uint16{
{{range $i, $v := .N}}{{printf "%s\n" $v}}{{end}}}
`

func main() {
t := template.Must(template.New("").Parse(source))

n := make([]string, 65536)
for i := 0; i < 256; i++ {
n[i<<8] = fmt.Sprintf("0, // %d * 255 / 0", i)
for j := 1; j < 256; j++ {
n[(i<<8)+j] = fmt.Sprintf("%d, // %d * 255 / %d", i*255/j, i, j)
}
}

b := bytes.NewBufferString("")
if err := t.Execute(b, map[string]interface{}{
"N": n,
}); err != nil {
log.Fatal(err)
}
buf, err := format.Source(b.Bytes())
if err != nil {
log.Fatal(err)
}
f, err := os.Create("zrgba2nrgbatable.go")
if err != nil {
log.Fatal(err)
}
defer f.Close()
if _, err = f.Write(buf); err != nil {
log.Fatal(err)
}
}
139 changes: 139 additions & 0 deletions blend/normal_amd64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#include "textflag.h"

TEXT ·drawNormalNRGBAToNRGBAFast(SB),0,$0-144
MOVL alpha+48(FP), AX
IMULL $0x00008081, AX
MOVL AX, alpha+48(FP)

MOVQ src+24(FP), SI
ADDQ s0+64(FP), SI
MOVQ dest+0(FP), DI
ADDQ d0+56(FP), DI
MOVQ y+72(FP), BX

MOVQ dyDelta+136(FP), R12
MOVQ syDelta+104(FP), R13
MOVQ sxDelta+96(FP), R14

MOVQ $0x0000808180818081, AX
MOVQ AX, X14

PXOR X15, X15

LOOPY:
MOVQ sx0+80(FP), R15

LOOPX:
MOVL (SI)(R15*1), AX
SHRL $24, AX
CMPB AX, $0
JZ NEXTX

ANDQ $0xff, AX
MULL alpha+48(FP)
SHRQ $23, AX
CMPB AX, $0
JZ NEXTX

MOVL AX, X0
MOVL (DI)(R15*1), X1
PSRLDQ $3, X1
MOVQ $0xff, AX
MOVQ AX, X2

PUNPCKLBW X0, X0
PUNPCKLWL X0, X0 // sa sa sa sa
PXOR X2, X0 // sa sa sa 255-sa

PUNPCKLBW X1, X1
PUNPCKLWL X1, X1 // da da da da
PSLLDQ $1, X2
PXOR X2, X1 // 00 da 255-da da

PUNPCKLBW X15, X0
PUNPCKLBW X15, X1
PMULLW X1, X0

MOVL (SI)(R15*1), X2 // sa sr sg sb
MOVL (DI)(R15*1), X3 // da dr dg db
MOVOA X2, X4 // 00 r g b

PMULHUW X14, X0

PUNPCKLBW X2, X3 // sa da sr dr sg dg sb db
PUNPCKLBW X15, X4 // 00 00 00 r 00 g 00 b
PUNPCKLWL X4, X3 // 00 00 sa da 00 r sr dr 00 g sg dg 00 b sb db
MOVOA X3, X2
PSRLDQ $8, X2

PSRLW $7, X0 // 0000 sa*da/255 sa*(255-da)/255 (255-sa)*da/255

MOVOA X0, X1
MOVOA X0, X5
PSRLDQ $2, X5
PADDW X5, X1
PSRLDQ $2, X5
PADDW X5, X1 // a

PUNPCKLBW X15, X2 // 00 00 00 00 00 sa 00 da 00 00 00 r 00 sr 00 dr
PUNPCKLBW X15, X3 // 00 00 00 g 00 sg 00 dg 00 00 00 b 00 sb 00 db
PUNPCKLQDQ X0, X0
PMULLW X0, X2
PMULLW X0, X3

MOVOA X2, X0
PSRLDQ $2, X0
PADDW X0, X2
PSRLDQ $2, X0
PADDW X0, X2 // 00 00 00 00 00 sa 00 da 00 00 00 r

MOVOA X3, X0
PSRLDQ $2, X0
PADDW X0, X3
PSRLDQ $2, X0
PADDW X0, X3 // 00 00 00 g 00 sg 00 dg 00 00 00 b

MOVQ $0xffff, AX
MOVQ AX, X5
INCL AX
MOVQ AX, X4
PAND X5, X1
MOVL X1, DX
CMPL DX, $2
JB DIVEND
LEAQ ·divTable(SB), AX
MOVL (AX)(DX*4), X0

PUNPCKLQDQ X0, X0
PUNPCKLQDQ X4, X4
PUNPCKLQDQ X5, X5

PAND X5, X2
PMULDQ X0, X2
PADDQ X4, X2
PSRLDQ $4, X2

PAND X5, X3
PMULDQ X0, X3
PADDQ X4, X3
PSRLDQ $4, X3

DIVEND:
MOVOA X3, X4
PSRLDQ $8, X3

PUNPCKLBW X3, X4 // g b
PUNPCKLBW X1, X2 // a r
PUNPCKLWL X2, X4 // a r g b
MOVL X4, (DI)(R15*1)

NEXTX:
ADDQ R14, R15
CMPQ R15, sx1+88(FP)
JNE LOOPX

ADDQ R12, DI
ADDQ R13, SI
DECQ BX
JNZ LOOPY
RET
2 changes: 2 additions & 0 deletions blend/util.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//go:generate go run gendivtable.go
//go:generate go run genrgba2nrgba.go
//go:generate go run genblend.go
//go:generate go run genporterduff.go

Expand Down
Loading

0 comments on commit d45a460

Please sign in to comment.