android_system_core/libpixelflinger/codeflinger/GGLAssembler.cpp
Ashok Bhat bfc6dc4ca8 Pixelflinger: Support for handling 64-bit addresses in GGL Assembler
GGLAssembler assumes addresses to be 32-bit and uses ARM 32-bit
instructions to load/store/manipulate addresses. To support, 64-bit
architectures, following changes has been done

1. ARMAssemblerInterface has been extended to support four new
   operations ADDR_LDR, ADDR_STR, ADDR_SUB, ADDR_ADD. Base class
   implements these virtual functions to use 32bit  equivalent
   function. This avoids existing 32-bit Assembler backend
   implementations like ARMAssembler and MIPSAssembler  from
   mapping the new functions to existing equivalent routines.
   This also allows 64-bit Architectures like AArch64 to override
   the function in their assembler backend implementations.

2. GGLAssembler code (spread over GGLAssembler.cpp, GGLAssembler.h
   and texturing.cpp) has been changed to use the new operations
   for address operations.

Change-Id: I3d7eace4691e3e47cef737d97ac67ce6ef4fb18d
Signed-off-by: Ashok Bhat <ashok.bhat@arm.com>
2013-12-12 17:30:13 +00:00

1190 lines
39 KiB
C++

/* libs/pixelflinger/codeflinger/GGLAssembler.cpp
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#define LOG_TAG "GGLAssembler"
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <cutils/log.h>
#include "GGLAssembler.h"
namespace android {
// ----------------------------------------------------------------------------
GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
: ARMAssemblerProxy(target),
RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7)
{
}
GGLAssembler::~GGLAssembler()
{
}
void GGLAssembler::prolog()
{
ARMAssemblerProxy::prolog();
}
void GGLAssembler::epilog(uint32_t touched)
{
ARMAssemblerProxy::epilog(touched);
}
void GGLAssembler::reset(int opt_level)
{
ARMAssemblerProxy::reset();
RegisterAllocator::reset();
mOptLevel = opt_level;
}
// ---------------------------------------------------------------------------
int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
{
int err = 0;
int opt_level = mOptLevel;
while (opt_level >= 0) {
reset(opt_level);
err = scanline_core(needs, c);
if (err == 0)
break;
opt_level--;
}
// XXX: in theory, pcForLabel is not valid before generate()
uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
uint32_t* fragment_end_pc = pcForLabel("epilog");
const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
// build a name for our pipeline
char name[64];
sprintf(name,
"scanline__%08X:%08X_%08X_%08X [%3d ipp]",
needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
if (err) {
ALOGE("Error while generating ""%s""\n", name);
disassemble(name);
return -1;
}
return generate(name);
}
int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
{
int64_t duration = ggl_system_time();
mBlendFactorCached = 0;
mBlending = 0;
mMasking = 0;
mAA = GGL_READ_NEEDS(P_AA, needs.p);
mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0;
mBuilderContext.needs = needs;
mBuilderContext.c = c;
mBuilderContext.Rctx = reserveReg(R0); // context always in R0
mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
// ------------------------------------------------------------------------
decodeLogicOpNeeds(needs);
decodeTMUNeeds(needs, c);
mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
if (!mCbFormat.c[GGLFormat::ALPHA].h) {
if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
(mBlendSrc == GGL_DST_ALPHA)) {
mBlendSrc = GGL_ONE;
}
if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
(mBlendSrcA == GGL_DST_ALPHA)) {
mBlendSrcA = GGL_ONE;
}
if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
(mBlendDst == GGL_DST_ALPHA)) {
mBlendDst = GGL_ONE;
}
if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
(mBlendDstA == GGL_DST_ALPHA)) {
mBlendDstA = GGL_ONE;
}
}
// if we need the framebuffer, read it now
const int blending = blending_codes(mBlendSrc, mBlendDst) |
blending_codes(mBlendSrcA, mBlendDstA);
// XXX: handle special cases, destination not modified...
if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
(mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
// Destination unmodified (beware of logic ops)
} else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
(mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
// Destination is zero (beware of logic ops)
}
int fbComponents = 0;
const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
for (int i=0 ; i<4 ; i++) {
const int mask = 1<<i;
component_info_t& info = mInfo[i];
int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
fs = GGL_ONE;
info.masked = !!(masking & mask);
info.inDest = !info.masked && mCbFormat.c[i].h &&
((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
if (mCbFormat.components >= GGL_LUMINANCE &&
(i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
info.inDest = false;
}
info.needed = (i==GGLFormat::ALPHA) &&
(isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
info.replaced = !!(mTextureMachine.replaced & mask);
info.iterated = (!info.replaced && (info.inDest || info.needed));
info.smooth = mSmooth && info.iterated;
info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA);
info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
mBlending |= (info.blend ? mask : 0);
mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
fbComponents |= mCbFormat.c[i].h ? mask : 0;
}
mAllMasked = (mMasking == fbComponents);
if (mAllMasked) {
mDithering = 0;
}
fragment_parts_t parts;
// ------------------------------------------------------------------------
prolog();
// ------------------------------------------------------------------------
build_scanline_prolog(parts, needs);
if (registerFile().status())
return registerFile().status();
// ------------------------------------------------------------------------
label("fragment_loop");
// ------------------------------------------------------------------------
{
Scratch regs(registerFile());
if (mDithering) {
// update the dither index.
MOV(AL, 0, parts.count.reg,
reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
ADD(AL, 0, parts.count.reg, parts.count.reg,
imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
MOV(AL, 0, parts.count.reg,
reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
}
// XXX: could we do an early alpha-test here in some cases?
// It would probaly be used only with smooth-alpha and no texture
// (or no alpha component in the texture).
// Early z-test
if (mAlphaTest==GGL_ALWAYS) {
build_depth_test(parts, Z_TEST|Z_WRITE);
} else {
// we cannot do the z-write here, because
// it might be killed by the alpha-test later
build_depth_test(parts, Z_TEST);
}
{ // texture coordinates
Scratch scratches(registerFile());
// texel generation
build_textures(parts, regs);
if (registerFile().status())
return registerFile().status();
}
if ((blending & (FACTOR_DST|BLEND_DST)) ||
(mMasking && !mAllMasked) ||
(mLogicOp & LOGIC_OP_DST))
{
// blending / logic_op / masking need the framebuffer
mDstPixel.setTo(regs.obtain(), &mCbFormat);
// load the framebuffer pixel
comment("fetch color-buffer");
load(parts.cbPtr, mDstPixel);
}
if (registerFile().status())
return registerFile().status();
pixel_t pixel;
int directTex = mTextureMachine.directTexture;
if (directTex | parts.packed) {
// note: we can't have both here
// iterated color or direct texture
pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
pixel.flags &= ~CORRUPTIBLE;
} else {
if (mDithering) {
const int ctxtReg = mBuilderContext.Rctx;
const int mask = GGL_DITHER_SIZE-1;
parts.dither = reg_t(regs.obtain());
AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg);
LDRB(AL, parts.dither.reg, parts.dither.reg,
immed12_pre(GGL_OFFSETOF(ditherMatrix)));
}
// allocate a register for the resulting pixel
pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
build_component(pixel, parts, GGLFormat::ALPHA, regs);
if (mAlphaTest!=GGL_ALWAYS) {
// only handle the z-write part here. We know z-test
// was successful, as well as alpha-test.
build_depth_test(parts, Z_WRITE);
}
build_component(pixel, parts, GGLFormat::RED, regs);
build_component(pixel, parts, GGLFormat::GREEN, regs);
build_component(pixel, parts, GGLFormat::BLUE, regs);
pixel.flags |= CORRUPTIBLE;
}
if (registerFile().status())
return registerFile().status();
if (pixel.reg == -1) {
// be defensive here. if we're here it's probably
// that this whole fragment is a no-op.
pixel = mDstPixel;
}
if (!mAllMasked) {
// logic operation
build_logic_op(pixel, regs);
// masking
build_masking(pixel, regs);
comment("store");
store(parts.cbPtr, pixel, WRITE_BACK);
}
}
if (registerFile().status())
return registerFile().status();
// update the iterated color...
if (parts.reload != 3) {
build_smooth_shade(parts);
}
// update iterated z
build_iterate_z(parts);
// update iterated fog
build_iterate_f(parts);
SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
B(PL, "fragment_loop");
label("epilog");
epilog(registerFile().touched());
if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
if (mDepthTest!=GGL_ALWAYS) {
label("discard_before_textures");
build_iterate_texture_coordinates(parts);
}
label("discard_after_textures");
build_smooth_shade(parts);
build_iterate_z(parts);
build_iterate_f(parts);
if (!mAllMasked) {
ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
}
SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
B(PL, "fragment_loop");
epilog(registerFile().touched());
}
return registerFile().status();
}
// ---------------------------------------------------------------------------
void GGLAssembler::build_scanline_prolog(
fragment_parts_t& parts, const needs_t& needs)
{
Scratch scratches(registerFile());
int Rctx = mBuilderContext.Rctx;
// compute count
comment("compute ct (# of pixels to process)");
parts.count.setTo(obtainReg());
int Rx = scratches.obtain();
int Ry = scratches.obtain();
CONTEXT_LOAD(Rx, iterators.xl);
CONTEXT_LOAD(parts.count.reg, iterators.xr);
CONTEXT_LOAD(Ry, iterators.y);
// parts.count = iterators.xr - Rx
SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
if (mDithering) {
// parts.count.reg = 0xNNNNXXDD
// NNNN = count-1
// DD = dither offset
// XX = 0xxxxxxx (x = garbage)
Scratch scratches(registerFile());
int tx = scratches.obtain();
int ty = scratches.obtain();
AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
} else {
// parts.count.reg = 0xNNNN0000
// NNNN = count-1
MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
}
if (!mAllMasked) {
// compute dst ptr
comment("compute color-buffer pointer");
const int cb_bits = mCbFormat.size*8;
int Rs = scratches.obtain();
parts.cbPtr.setTo(obtainReg(), cb_bits);
CONTEXT_LOAD(Rs, state.buffers.color.stride);
CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data);
SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs
base_offset(parts.cbPtr, parts.cbPtr, Rs);
scratches.recycle(Rs);
}
// init fog
const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
if (need_fog) {
comment("compute initial fog coordinate");
Scratch scratches(registerFile());
int dfdx = scratches.obtain();
int ydfdy = scratches.obtain();
int f = ydfdy;
CONTEXT_LOAD(dfdx, generated_vars.dfdx);
CONTEXT_LOAD(ydfdy, iterators.ydfdy);
MLA(AL, 0, f, Rx, dfdx, ydfdy);
CONTEXT_STORE(f, generated_vars.f);
}
// init Z coordinate
if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
parts.z = reg_t(obtainReg());
comment("compute initial Z coordinate");
Scratch scratches(registerFile());
int dzdx = scratches.obtain();
int ydzdy = parts.z.reg;
CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point
CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point
MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
// we're going to index zbase of parts.count
// zbase = base + (xl-count + stride*y)*2
int Rs = dzdx;
int zbase = scratches.obtain();
CONTEXT_LOAD(Rs, state.buffers.depth.stride);
CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data);
SMLABB(AL, Rs, Ry, Rs, Rx);
ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
CONTEXT_ADDR_STORE(zbase, generated_vars.zbase);
}
// init texture coordinates
init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
scratches.recycle(Ry);
// iterated color
init_iterated_color(parts, reg_t(Rx));
// init coverage factor application (anti-aliasing)
if (mAA) {
parts.covPtr.setTo(obtainReg(), 16);
CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage);
ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
}
}
// ---------------------------------------------------------------------------
void GGLAssembler::build_component( pixel_t& pixel,
const fragment_parts_t& parts,
int component,
Scratch& regs)
{
static char const * comments[] = {"alpha", "red", "green", "blue"};
comment(comments[component]);
// local register file
Scratch scratches(registerFile());
const int dst_component_size = pixel.component_size(component);
component_t temp(-1);
build_incoming_component( temp, dst_component_size,
parts, component, scratches, regs);
if (mInfo[component].inDest) {
// blending...
build_blending( temp, mDstPixel, component, scratches );
// downshift component and rebuild pixel...
downshift(pixel, component, temp, parts.dither);
}
}
void GGLAssembler::build_incoming_component(
component_t& temp,
int dst_size,
const fragment_parts_t& parts,
int component,
Scratch& scratches,
Scratch& global_regs)
{
const uint32_t component_mask = 1<<component;
// Figure out what we need for the blending stage...
int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
fs = GGL_ONE;
}
// Figure out what we need to extract and for what reason
const int blending = blending_codes(fs, fd);
// Are we actually going to blend?
const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
// expand the source if the destination has more bits
int need_expander = false;
for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
texture_unit_t& tmu = mTextureMachine.tmu[i];
if ((tmu.format_idx) &&
(parts.texel[i].component_size(component) < dst_size)) {
need_expander = true;
}
}
// do we need to extract this component?
const bool multiTexture = mTextureMachine.activeUnits > 1;
const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
(isAlphaSourceNeeded());
int need_extract = mInfo[component].needed;
if (mInfo[component].inDest)
{
need_extract |= ((need_blending ?
(blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
need_extract |= mInfo[component].smooth;
need_extract |= mInfo[component].fog;
need_extract |= mDithering;
need_extract |= multiTexture;
}
if (need_extract) {
Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
component_t fragment;
// iterated color
build_iterated_color(fragment, parts, component, regs);
// texture environement (decal, modulate, replace)
build_texture_environment(fragment, parts, component, regs);
// expand the source if the destination has more bits
if (need_expander && (fragment.size() < dst_size)) {
// we're here only if we fetched a texel
// (so we know for sure fragment is CORRUPTIBLE)
expand(fragment, fragment, dst_size);
}
// We have a few specific things to do for the alpha-channel
if ((component==GGLFormat::ALPHA) &&
(mInfo[component].needed || fragment.size()<dst_size))
{
// convert to integer_t first and make sure
// we don't corrupt a needed register
if (fragment.l) {
component_t incoming(fragment);
modify(fragment, regs);
MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
fragment.h -= fragment.l;
fragment.l = 0;
}
// coverage factor application
build_coverage_application(fragment, parts, regs);
// alpha-test
build_alpha_test(fragment, parts);
if (blend_needs_alpha_source) {
// We keep only 8 bits for the blending stage
const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
if (fragment.flags & CORRUPTIBLE) {
fragment.flags &= ~CORRUPTIBLE;
mAlphaSource.setTo(fragment.reg,
fragment.size(), fragment.flags);
if (shift) {
MOV(AL, 0, mAlphaSource.reg,
reg_imm(mAlphaSource.reg, LSR, shift));
}
} else {
// XXX: it would better to do this in build_blend_factor()
// so we can avoid the extra MOV below.
mAlphaSource.setTo(regs.obtain(),
fragment.size(), CORRUPTIBLE);
if (shift) {
MOV(AL, 0, mAlphaSource.reg,
reg_imm(fragment.reg, LSR, shift));
} else {
MOV(AL, 0, mAlphaSource.reg, fragment.reg);
}
}
mAlphaSource.s -= shift;
}
}
// fog...
build_fog( fragment, component, regs );
temp = fragment;
} else {
if (mInfo[component].inDest) {
// extraction not needed and replace
// we just select the right component
if ((mTextureMachine.replaced & component_mask) == 0) {
// component wasn't replaced, so use it!
temp = component_t(parts.iterated, component);
}
for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
const texture_unit_t& tmu = mTextureMachine.tmu[i];
if ((tmu.mask & component_mask) &&
((tmu.replaced & component_mask) == 0)) {
temp = component_t(parts.texel[i], component);
}
}
}
}
}
bool GGLAssembler::isAlphaSourceNeeded() const
{
// XXX: also needed for alpha-test
const int bs = mBlendSrc;
const int bd = mBlendDst;
return bs==GGL_SRC_ALPHA_SATURATE ||
bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
}
// ---------------------------------------------------------------------------
void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
{
if (mSmooth && !parts.iterated_packed) {
// update the iterated color in a pipelined way...
comment("update iterated color");
Scratch scratches(registerFile());
const int reload = parts.reload;
for (int i=0 ; i<4 ; i++) {
if (!mInfo[i].iterated)
continue;
int c = parts.argb[i].reg;
int dx = parts.argb_dx[i].reg;
if (reload & 1) {
c = scratches.obtain();
CONTEXT_LOAD(c, generated_vars.argb[i].c);
}
if (reload & 2) {
dx = scratches.obtain();
CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
}
if (mSmooth) {
ADD(AL, 0, c, c, dx);
}
if (reload & 1) {
CONTEXT_STORE(c, generated_vars.argb[i].c);
scratches.recycle(c);
}
if (reload & 2) {
scratches.recycle(dx);
}
}
}
}
// ---------------------------------------------------------------------------
void GGLAssembler::build_coverage_application(component_t& fragment,
const fragment_parts_t& parts, Scratch& regs)
{
// here fragment.l is guarenteed to be 0
if (mAA) {
// coverages are 1.15 fixed-point numbers
comment("coverage application");
component_t incoming(fragment);
modify(fragment, regs);
Scratch scratches(registerFile());
int cf = scratches.obtain();
LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
if (fragment.h > 31) {
fragment.h--;
SMULWB(AL, fragment.reg, incoming.reg, cf);
} else {
MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
SMULWB(AL, fragment.reg, fragment.reg, cf);
}
}
}
// ---------------------------------------------------------------------------
void GGLAssembler::build_alpha_test(component_t& fragment,
const fragment_parts_t& parts)
{
if (mAlphaTest != GGL_ALWAYS) {
comment("Alpha Test");
Scratch scratches(registerFile());
int ref = scratches.obtain();
const int shift = GGL_COLOR_BITS-fragment.size();
CONTEXT_LOAD(ref, state.alpha_test.ref);
if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
else CMP(AL, fragment.reg, ref);
int cc = NV;
switch (mAlphaTest) {
case GGL_NEVER: cc = NV; break;
case GGL_LESS: cc = LT; break;
case GGL_EQUAL: cc = EQ; break;
case GGL_LEQUAL: cc = LS; break;
case GGL_GREATER: cc = HI; break;
case GGL_NOTEQUAL: cc = NE; break;
case GGL_GEQUAL: cc = HS; break;
}
B(cc^1, "discard_after_textures");
}
}
// ---------------------------------------------------------------------------
void GGLAssembler::build_depth_test(
const fragment_parts_t& parts, uint32_t mask)
{
mask &= Z_TEST|Z_WRITE;
const needs_t& needs = mBuilderContext.needs;
const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
Scratch scratches(registerFile());
if (mDepthTest != GGL_ALWAYS || zmask) {
int cc=AL, ic=AL;
switch (mDepthTest) {
case GGL_LESS: ic = HI; break;
case GGL_EQUAL: ic = EQ; break;
case GGL_LEQUAL: ic = HS; break;
case GGL_GREATER: ic = LT; break;
case GGL_NOTEQUAL: ic = NE; break;
case GGL_GEQUAL: ic = LS; break;
case GGL_NEVER:
// this never happens, because it's taken care of when
// computing the needs. but we keep it for completness.
comment("Depth Test (NEVER)");
B(AL, "discard_before_textures");
return;
case GGL_ALWAYS:
// we're here because zmask is enabled
mask &= ~Z_TEST; // test always passes.
break;
}
// inverse the condition
cc = ic^1;
if ((mask & Z_WRITE) && !zmask) {
mask &= ~Z_WRITE;
}
if (!mask)
return;
comment("Depth Test");
int zbase = scratches.obtain();
int depth = scratches.obtain();
int z = parts.z.reg;
CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall
ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
// above does zbase = zbase + ((count >> 16) << 1)
if (mask & Z_TEST) {
LDRH(AL, depth, zbase); // stall
CMP(AL, depth, reg_imm(z, LSR, 16));
B(cc, "discard_before_textures");
}
if (mask & Z_WRITE) {
if (mask == Z_WRITE) {
// only z-write asked, cc is meaningless
ic = AL;
}
MOV(AL, 0, depth, reg_imm(z, LSR, 16));
STRH(ic, depth, zbase);
}
}
}
void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
{
const needs_t& needs = mBuilderContext.needs;
if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
Scratch scratches(registerFile());
int dzdx = scratches.obtain();
CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall
ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
}
}
void GGLAssembler::build_iterate_f(const fragment_parts_t& parts)
{
const needs_t& needs = mBuilderContext.needs;
if (GGL_READ_NEEDS(P_FOG, needs.p)) {
Scratch scratches(registerFile());
int dfdx = scratches.obtain();
int f = scratches.obtain();
CONTEXT_LOAD(f, generated_vars.f);
CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall
ADD(AL, 0, f, f, dfdx);
CONTEXT_STORE(f, generated_vars.f);
}
}
// ---------------------------------------------------------------------------
void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
{
const needs_t& needs = mBuilderContext.needs;
const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
if (opcode == GGL_COPY)
return;
comment("logic operation");
pixel_t s(pixel);
if (!(pixel.flags & CORRUPTIBLE)) {
pixel.reg = regs.obtain();
pixel.flags |= CORRUPTIBLE;
}
pixel_t d(mDstPixel);
switch(opcode) {
case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break;
case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break;
case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break;
case GGL_COPY: break;
case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break;
case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break;
case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break;
case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break;
case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg);
MVN(AL, 0, pixel.reg, pixel.reg); break;
case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg);
MVN(AL, 0, pixel.reg, pixel.reg); break;
case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break;
case GGL_OR_REVERSE: // s | ~d == ~(~s & d)
BIC(AL, 0, pixel.reg, d.reg, s.reg);
MVN(AL, 0, pixel.reg, pixel.reg); break;
case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break;
case GGL_OR_INVERTED: // ~s | d == ~(s & ~d)
BIC(AL, 0, pixel.reg, s.reg, d.reg);
MVN(AL, 0, pixel.reg, pixel.reg); break;
case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg);
MVN(AL, 0, pixel.reg, pixel.reg); break;
case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break;
};
}
// ---------------------------------------------------------------------------
static uint32_t find_bottom(uint32_t val)
{
uint32_t i = 0;
while (!(val & (3<<i)))
i+= 2;
return i;
}
static void normalize(uint32_t& val, uint32_t& rot)
{
rot = 0;
while (!(val&3) || (val & 0xFC000000)) {
uint32_t newval;
newval = val >> 2;
newval |= (val&3) << 30;
val = newval;
rot += 2;
if (rot == 32) {
rot = 0;
break;
}
}
}
void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
{
uint32_t rot;
uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
mask &= size;
if (mask == size) {
if (d != s)
MOV( AL, 0, d, s);
return;
}
if (getCodegenArch() == CODEGEN_ARCH_MIPS) {
// MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr
// the below ' while (mask)' code is buggy on mips
// since mips returns true on isValidImmediate()
// then we get multiple AND instr (positive logic)
AND( AL, 0, d, s, imm(mask) );
return;
}
int negative_logic = !isValidImmediate(mask);
if (negative_logic) {
mask = ~mask & size;
}
normalize(mask, rot);
if (mask) {
while (mask) {
uint32_t bitpos = find_bottom(mask);
int shift = rot + bitpos;
uint32_t m = mask & (0xff << bitpos);
mask &= ~m;
m >>= bitpos;
int32_t newMask = (m<<shift) | (m>>(32-shift));
if (!negative_logic) {
AND( AL, 0, d, s, imm(newMask) );
} else {
BIC( AL, 0, d, s, imm(newMask) );
}
s = d;
}
} else {
MOV( AL, 0, d, imm(0));
}
}
void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
{
if (!mMasking || mAllMasked) {
return;
}
comment("color mask");
pixel_t fb(mDstPixel);
pixel_t s(pixel);
if (!(pixel.flags & CORRUPTIBLE)) {
pixel.reg = regs.obtain();
pixel.flags |= CORRUPTIBLE;
}
int mask = 0;
for (int i=0 ; i<4 ; i++) {
const int component_mask = 1<<i;
const int h = fb.format.c[i].h;
const int l = fb.format.c[i].l;
if (h && (!(mMasking & component_mask))) {
mask |= ((1<<(h-l))-1) << l;
}
}
// There is no need to clear the masked components of the source
// (unless we applied a logic op), because they're already zeroed
// by construction (masked components are not computed)
if (mLogicOp) {
const needs_t& needs = mBuilderContext.needs;
const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
if (opcode != GGL_CLEAR) {
// clear masked component of source
build_and_immediate(pixel.reg, s.reg, mask, fb.size());
s = pixel;
}
}
// clear non masked components of destination
build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
// or back the channels that were masked
if (s.reg == fb.reg) {
// this is in fact a MOV
if (s.reg == pixel.reg) {
// ugh. this in in fact a nop
} else {
MOV(AL, 0, pixel.reg, fb.reg);
}
} else {
ORR(AL, 0, pixel.reg, s.reg, fb.reg);
}
}
// ---------------------------------------------------------------------------
void GGLAssembler::base_offset(
const pointer_t& d, const pointer_t& b, const reg_t& o)
{
switch (b.size) {
case 32:
ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
break;
case 24:
if (d.reg == b.reg) {
ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
ADDR_ADD(AL, 0, d.reg, d.reg, o.reg);
} else {
ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
ADDR_ADD(AL, 0, d.reg, d.reg, b.reg);
}
break;
case 16:
ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
break;
case 8:
ADDR_ADD(AL, 0, d.reg, b.reg, o.reg);
break;
}
}
// ----------------------------------------------------------------------------
// cheezy register allocator...
// ----------------------------------------------------------------------------
// Modified to support MIPS processors, in a very simple way. We retain the
// (Arm) limit of 16 total registers, but shift the mapping of those registers
// from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and
// register 1 has a traditional use as a temp).
RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch)
{
}
void RegisterAllocator::reset()
{
mRegs.reset();
}
int RegisterAllocator::reserveReg(int reg)
{
return mRegs.reserve(reg);
}
int RegisterAllocator::obtainReg()
{
return mRegs.obtain();
}
void RegisterAllocator::recycleReg(int reg)
{
mRegs.recycle(reg);
}
RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
{
return mRegs;
}
// ----------------------------------------------------------------------------
RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch)
: mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0)
{
if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) {
mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17
}
reserve(ARMAssemblerInterface::SP);
reserve(ARMAssemblerInterface::PC);
}
RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch)
: mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0)
{
if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) {
mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17
}
}
RegisterAllocator::RegisterFile::~RegisterFile()
{
}
bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
{
return (mRegs == rhs.mRegs);
}
void RegisterAllocator::RegisterFile::reset()
{
mRegs = mTouched = mStatus = 0;
reserve(ARMAssemblerInterface::SP);
reserve(ARMAssemblerInterface::PC);
}
// RegisterFile::reserve() take a register parameter in the
// range 0-15 (Arm compatible), but on a Mips processor, will
// return the actual allocated register in the range 2-17.
int RegisterAllocator::RegisterFile::reserve(int reg)
{
reg += mRegisterOffset;
LOG_ALWAYS_FATAL_IF(isUsed(reg),
"reserving register %d, but already in use",
reg);
mRegs |= (1<<reg);
mTouched |= mRegs;
return reg;
}
// This interface uses regMask in range 2-17 on MIPS, no translation.
void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
{
mRegs |= regMask;
mTouched |= regMask;
}
int RegisterAllocator::RegisterFile::isUsed(int reg) const
{
LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg);
return mRegs & (1<<reg);
}
int RegisterAllocator::RegisterFile::obtain()
{
const char priorityList[14] = { 0, 1, 2, 3,
12, 14, 4, 5,
6, 7, 8, 9,
10, 11 };
const int nbreg = sizeof(priorityList);
int i, r, reg;
for (i=0 ; i<nbreg ; i++) {
r = priorityList[i];
if (!isUsed(r + mRegisterOffset)) {
break;
}
}
// this is not an error anymore because, we'll try again with
// a lower optimization level.
//ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
if (i >= nbreg) {
mStatus |= OUT_OF_REGISTERS;
// we return SP so we can more easily debug things
// the code will never be run anyway.
return ARMAssemblerInterface::SP;
}
reg = reserve(r); // Param in Arm range 0-15, returns range 2-17 on Mips.
return reg;
}
bool RegisterAllocator::RegisterFile::hasFreeRegs() const
{
uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix.
return ((regs & 0xFFFF) == 0xFFFF) ? false : true;
}
int RegisterAllocator::RegisterFile::countFreeRegs() const
{
uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix.
int f = ~regs & 0xFFFF;
// now count number of 1
f = (f & 0x5555) + ((f>>1) & 0x5555);
f = (f & 0x3333) + ((f>>2) & 0x3333);
f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
f = (f & 0x00FF) + ((f>>8) & 0x00FF);
return f;
}
void RegisterAllocator::RegisterFile::recycle(int reg)
{
// commented out, since common failure of running out of regs
// triggers this assertion. Since the code is not execectued
// in that case, it does not matter. No reason to FATAL err.
// LOG_FATAL_IF(!isUsed(reg),
// "recycling unallocated register %d",
// reg);
mRegs &= ~(1<<reg);
}
void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
{
// commented out, since common failure of running out of regs
// triggers this assertion. Since the code is not execectued
// in that case, it does not matter. No reason to FATAL err.
// LOG_FATAL_IF((mRegs & regMask)!=regMask,
// "recycling unallocated registers "
// "(recycle=%08x, allocated=%08x, unallocated=%08x)",
// regMask, mRegs, mRegs&regMask);
mRegs &= ~regMask;
}
uint32_t RegisterAllocator::RegisterFile::touched() const
{
return mTouched;
}
// ----------------------------------------------------------------------------
}; // namespace android