From 96dbb4fc58fe2dcf4390e073dbb42cc77ef2f0b5 Mon Sep 17 00:00:00 2001 From: Martyn Capewell Date: Mon, 7 Dec 2009 13:59:59 +0000 Subject: [PATCH 1/5] Adds UXTB16 support to Pixelflinger * Add support for UXTB16 to the disassembler * Add encoding of the UXTB16 instruction to the Pixelflinger JIT. Introducing the UXTB16 instruction allows removal of some masking code, and is beneficial from a pipeline point of view - lots of UXTB16 followed by MUL sequences. Also, further rescheduling and use of SMULWB brings extra performance improvements. * Use UXTB16 in bilinear filtered texturing Uses UXTB16 to extract channels for SIMD operations, rather than creating and ANDing with masks. Saves a register and is faster on A8, as UXTB16 result can feed into first stage of multiply, unlike AND. Also, used SMULWB rather than SMULBB, which allows removal of MOVs used to rescale results. Code has been scheduled for A8 pipeline, specifically aiming to allow multiplies to issue in pipeline 0, for efficient dual issue operation. Testing on SpriteMethodTest (http://code.google.com/p/apps-for-android/) gives 8% improvement (12.7 vs. 13.7 fps.) SMULBB to SMULWB trick could be used in > 3) << 10) | Rm; +} + }; // namespace android diff --git a/libpixelflinger/codeflinger/ARMAssembler.h b/libpixelflinger/codeflinger/ARMAssembler.h index ef3b66af7..a667cb511 100644 --- a/libpixelflinger/codeflinger/ARMAssembler.h +++ b/libpixelflinger/codeflinger/ARMAssembler.h @@ -123,6 +123,7 @@ public: int RdHi, int RdLo, int Rs, int Rm); virtual void SMLAW(int cc, int y, int Rd, int Rm, int Rs, int Rn); + virtual void UXTB16(int cc, int Rd, int Rm, int rotate); private: ARMAssembler(const ARMAssembler& rhs); diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h index 465b3bd9d..ff6af2a22 100644 --- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h +++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h @@ -203,6 +203,9 @@ public: virtual void SMLAW(int cc, int y, int Rd, int Rm, int Rs, int Rn) = 0; + // byte/half word extract... + virtual void UXTB16(int cc, int Rd, int Rm, int rotate) = 0; + // ----------------------------------------------------------------------- // convenience... // ----------------------------------------------------------------------- diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp index 18c461864..7c422dbad 100644 --- a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp +++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp @@ -195,6 +195,9 @@ void ARMAssemblerProxy::SMLAW(int cc, int y, int Rd, int Rm, int Rs, int Rn) { mTarget->SMLAW(cc, y, Rd, Rm, Rs, Rn); } +void ARMAssemblerProxy::UXTB16(int cc, int Rd, int Rm, int rotate) { + mTarget->UXTB16(cc, Rd, Rm, rotate); +} }; // namespace android diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.h b/libpixelflinger/codeflinger/ARMAssemblerProxy.h index 4bdca9cf5..9134cce6f 100644 --- a/libpixelflinger/codeflinger/ARMAssemblerProxy.h +++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.h @@ -114,6 +114,8 @@ public: virtual void SMLAW(int cc, int y, int Rd, int Rm, int Rs, int Rn); + virtual void UXTB16(int cc, int Rd, int Rm, int rotate); + private: ARMAssemblerInterface* mTarget; }; diff --git a/libpixelflinger/codeflinger/disassem.c b/libpixelflinger/codeflinger/disassem.c index 4676da0d8..ee5e63a2b 100644 --- a/libpixelflinger/codeflinger/disassem.c +++ b/libpixelflinger/codeflinger/disassem.c @@ -80,6 +80,7 @@ * f - 1st fp operand (register) (bits 12-14) * g - 2nd fp operand (register) (bits 16-18) * h - 3rd fp operand (register/immediate) (bits 0-4) + * j - xtb rotate literal (bits 10-11) * b - branch address * t - thumb branch address (bits 24, 0-23) * k - breakpoint comment (bits 0-3, 8-19) @@ -122,6 +123,7 @@ static const struct arm32_insn arm32_i[] = { { 0x0fe000f0, 0x00c00090, "smull", "Sdnms" }, { 0x0fe000f0, 0x00a00090, "umlal", "Sdnms" }, { 0x0fe000f0, 0x00e00090, "smlal", "Sdnms" }, + { 0x0fff03f0, 0x06cf0070, "uxtb16", "dmj" }, { 0x0d700000, 0x04200000, "strt", "daW" }, { 0x0d700000, 0x04300000, "ldrt", "daW" }, { 0x0d700000, 0x04600000, "strbt", "daW" }, @@ -406,6 +408,10 @@ disasm(const disasm_interface_t *di, u_int loc, int altfmt) else di->di_printf("f%d", insn & 7); break; + /* j - xtb rotate literal (bits 10-11) */ + case 'j': + di->di_printf("ror #%d", ((insn >> 10) & 3) << 3); + break; /* b - branch address */ case 'b': branch = ((insn << 2) & 0x03ffffff); diff --git a/libpixelflinger/codeflinger/texturing.cpp b/libpixelflinger/codeflinger/texturing.cpp index 90e658407..ba13fb303 100644 --- a/libpixelflinger/codeflinger/texturing.cpp +++ b/libpixelflinger/codeflinger/texturing.cpp @@ -25,6 +25,7 @@ #include "codeflinger/GGLAssembler.h" +#include namespace android { @@ -567,7 +568,7 @@ void GGLAssembler::build_textures( fragment_parts_t& parts, RSB(GE, 0, height, height, imm(0)); MUL(AL, 0, height, stride, height); } else { - // u has not been CLAMPed yet + // v has not been CLAMPed yet CMP(AL, height, reg_imm(v, ASR, FRAC_BITS)); MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS)); MOV(LE, 0, height, imm(0)); @@ -868,6 +869,106 @@ void GGLAssembler::filter24( load(txPtr, texel, 0); } +#if __ARM_ARCH__ >= 6 +// ARMv6 version, using UXTB16, and scheduled for Cortex-A8 pipeline +void GGLAssembler::filter32( + const fragment_parts_t& parts, + pixel_t& texel, const texture_unit_t& tmu, + int U, int V, pointer_t& txPtr, + int FRAC_BITS) +{ + const int adjust = FRAC_BITS*2 - 8; + const int round = 0; + const int prescale = 16 - adjust; + + Scratch scratches(registerFile()); + + int pixel= scratches.obtain(); + int dh = scratches.obtain(); + int u = scratches.obtain(); + int k = scratches.obtain(); + + int temp = scratches.obtain(); + int dl = scratches.obtain(); + + int offsetrt = scratches.obtain(); + int offsetlb = scratches.obtain(); + + int pixellb = offsetlb; + + // RB -> U * V + CONTEXT_LOAD(offsetrt, generated_vars.rt); + CONTEXT_LOAD(offsetlb, generated_vars.lb); + if(!round) { + MOV(AL, 0, U, reg_imm(U, LSL, prescale)); + } + ADD(AL, 0, u, offsetrt, offsetlb); + + LDR(AL, pixel, txPtr.reg, reg_scale_pre(u)); + if (round) { + SMULBB(AL, u, U, V); + RSB(AL, 0, U, U, imm(1< (1-U) * V + if (round) { + SMULBB(AL, u, U, V); + } else { + SMULWB(AL, u, U, V); + } + UXTB16(AL, temp, pixellb, 0); + if (round) { + ADD(AL, 0, u, u, imm(1<<(adjust-1))); + MOV(AL, 0, u, reg_imm(u, LSR, adjust)); + } + MLA(AL, 0, dh, temp, u, dh); + UXTB16(AL, temp, pixellb, 8); + MLA(AL, 0, dl, temp, u, dl); + SUB(AL, 0, k, k, u); + + // LT -> (1-U)*(1-V) + RSB(AL, 0, V, V, imm(1< U*(1-V) + LDR(AL, pixel, txPtr.reg, reg_scale_pre(offsetrt)); + SUB(AL, 0, u, k, u); + UXTB16(AL, temp, pixel, 0); + MLA(AL, 0, dh, temp, u, dh); + UXTB16(AL, temp, pixel, 8); + MLA(AL, 0, dl, temp, u, dl); + + UXTB16(AL, dh, dh, 8); + UXTB16(AL, dl, dl, 8); + ORR(AL, 0, texel.reg, dh, reg_imm(dl, LSL, 8)); +} +#else void GGLAssembler::filter32( const fragment_parts_t& parts, pixel_t& texel, const texture_unit_t& tmu, @@ -955,6 +1056,7 @@ void GGLAssembler::filter32( AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8)); ORR(AL, 0, texel.reg, dh, dl); } +#endif void GGLAssembler::build_texture_environment( component_t& fragment, From f9e8ab03bd93d98567e96822535090a877594aba Mon Sep 17 00:00:00 2001 From: Martyn Capewell Date: Mon, 7 Dec 2009 15:00:19 +0000 Subject: [PATCH 2/5] NEON shortcut for flat colour blending into 16-bit This is a shortcut for the needs descriptor 00000077:03515104_00000000_00000000. It requires blending a single 32-bit colour value into a 16-bit framebuffer. It's used when fading out the screen, eg. when a modal requester pops-up. The PF JIT produces code for this using 24 instructions/pixel. The NEON implementation requires 2.1 instructions/pixel. Performance hasn't been benchmarked, but the improvement is quite visible. This code has only been tested by inspection of the fading effect described above, when press+holding a finger on the home screen to pop up the Shortcuts/Widgets/Folders/Wallpaper requester. Along with the NEON version, a fallback v5TE implementation is also provided. This ARM version of col32cb16blend is not fully optimised, but is a reasonable implementation, and better than the version produced by the JIT. It is here as a fallback, if NEON is not available. --- libpixelflinger/Android.mk | 6 + libpixelflinger/col32cb16blend.S | 78 +++++++++++++ libpixelflinger/col32cb16blend_neon.S | 153 ++++++++++++++++++++++++++ libpixelflinger/scanline.cpp | 47 ++++++++ 4 files changed, 284 insertions(+) create mode 100644 libpixelflinger/col32cb16blend.S create mode 100644 libpixelflinger/col32cb16blend_neon.S diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk index 0cc85d9dc..6491d243b 100644 --- a/libpixelflinger/Android.mk +++ b/libpixelflinger/Android.mk @@ -40,7 +40,13 @@ PIXELFLINGER_SRC_FILES:= \ buffer.cpp ifeq ($(TARGET_ARCH),arm) +ifeq ($(TARGET_ARCH_VERSION),armv7-a) +PIXELFLINGER_SRC_FILES += col32cb16blend_neon.S +PIXELFLINGER_SRC_FILES += col32cb16blend.S +else PIXELFLINGER_SRC_FILES += t32cb16blend.S +PIXELFLINGER_SRC_FILES += col32cb16blend.S +endif endif ifeq ($(TARGET_ARCH),arm) diff --git a/libpixelflinger/col32cb16blend.S b/libpixelflinger/col32cb16blend.S new file mode 100644 index 000000000..1450bde84 --- /dev/null +++ b/libpixelflinger/col32cb16blend.S @@ -0,0 +1,78 @@ +/* libs/pixelflinger/col32cb16blend.S +** +** (C) COPYRIGHT 2009 ARM Limited. +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +** +*/ + + .text + .align + + .global scanline_col32cb16blend_arm + +// +// This function alpha blends a fixed color into a destination scanline, using +// the formula: +// +// d = s + (((a + (a >> 7)) * d) >> 8) +// +// where d is the destination pixel, +// s is the source color, +// a is the alpha channel of the source color. +// + +// r0 = destination buffer pointer +// r1 = color value +// r2 = count + + +scanline_col32cb16blend_arm: + push {r4-r10, lr} // stack ARM regs + + mov r5, r1, lsr #24 // shift down alpha + mov r9, #0xff // create mask + add r5, r5, r5, lsr #7 // add in top bit + rsb r5, r5, #256 // invert alpha + and r10, r1, #0xff // extract red + and r12, r9, r1, lsr #8 // extract green + and r4, r9, r1, lsr #16 // extract blue + mov r10, r10, lsl #5 // prescale red + mov r12, r12, lsl #6 // prescale green + mov r4, r4, lsl #5 // prescale blue + mov r9, r9, lsr #2 // create dest green mask + +1: + ldrh r8, [r0] // load dest pixel + subs r2, r2, #1 // decrement loop counter + mov r6, r8, lsr #11 // extract dest red + and r7, r9, r8, lsr #5 // extract dest green + and r8, r8, #0x1f // extract dest blue + + smlabb r6, r6, r5, r10 // dest red * alpha + src red + smlabb r7, r7, r5, r12 // dest green * alpha + src green + smlabb r8, r8, r5, r4 // dest blue * alpha + src blue + + mov r6, r6, lsr #8 // shift down red + mov r7, r7, lsr #8 // shift down green + mov r6, r6, lsl #11 // shift red into 565 + orr r6, r7, lsl #5 // shift green into 565 + orr r6, r8, lsr #8 // shift blue into 565 + + strh r6, [r0], #2 // store pixel to dest, update ptr + bne 1b // if count != 0, loop + + pop {r4-r10, pc} // return + + + diff --git a/libpixelflinger/col32cb16blend_neon.S b/libpixelflinger/col32cb16blend_neon.S new file mode 100644 index 000000000..17b0d01a8 --- /dev/null +++ b/libpixelflinger/col32cb16blend_neon.S @@ -0,0 +1,153 @@ +/* libs/pixelflinger/col32cb16blend_neon.S +** +** (C) COPYRIGHT 2009 ARM Limited. +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +** +*/ + + .text + .align + + .global scanline_col32cb16blend_neon + +// +// This function alpha blends a fixed color into a destination scanline, using +// the formula: +// +// d = s + (((a + (a >> 7)) * d) >> 8) +// +// where d is the destination pixel, +// s is the source color, +// a is the alpha channel of the source color. +// +// The NEON implementation processes 16 pixels per iteration. The remaining 0 - 15 +// pixels are processed in ARM code. +// + +// r0 = destination buffer pointer +// r1 = color pointer +// r2 = count + + +scanline_col32cb16blend_neon: + push {r4-r11, lr} // stack ARM regs + + vmov.u16 q15, #256 // create alpha constant + movs r3, r2, lsr #4 // calc. sixteens iterations + vmov.u16 q14, #0x1f // create blue mask + + beq 2f // if r3 == 0, branch to singles + + vld4.8 {d0[], d2[], d4[], d6[]}, [r1] // load color into four registers + // split and duplicate them, such that + // d0 = 8 equal red values + // d2 = 8 equal green values + // d4 = 8 equal blue values + // d6 = 8 equal alpha values + vshll.u8 q0, d0, #5 // shift up red and widen + vshll.u8 q1, d2, #6 // shift up green and widen + vshll.u8 q2, d4, #5 // shift up blue and widen + + vshr.u8 d7, d6, #7 // extract top bit of alpha + vaddl.u8 q3, d6, d7 // add top bit into alpha + vsub.u16 q3, q15, q3 // invert alpha + +1: + // This loop processes 16 pixels per iteration. In the comments, references to + // the first eight pixels are suffixed with "0" (red0, green0, blue0), + // the second eight are suffixed "1". + // q8 = dst red0 + // q9 = dst green0 + // q10 = dst blue0 + // q13 = dst red1 + // q12 = dst green1 + // q11 = dst blue1 + + vld1.16 {d20, d21, d22, d23}, [r0] // load 16 dest pixels + vshr.u16 q8, q10, #11 // shift dst red0 to low 5 bits + pld [r0, #63] // preload next dest pixels + vshl.u16 q9, q10, #5 // shift dst green0 to top 6 bits + vand q10, q10, q14 // extract dst blue0 + vshr.u16 q9, q9, #10 // shift dst green0 to low 6 bits + vmul.u16 q8, q8, q3 // multiply dst red0 by src alpha + vshl.u16 q12, q11, #5 // shift dst green1 to top 6 bits + vmul.u16 q9, q9, q3 // multiply dst green0 by src alpha + vshr.u16 q13, q11, #11 // shift dst red1 to low 5 bits + vmul.u16 q10, q10, q3 // multiply dst blue0 by src alpha + vshr.u16 q12, q12, #10 // shift dst green1 to low 6 bits + vand q11, q11, q14 // extract dst blue1 + vadd.u16 q8, q8, q0 // add src red to dst red0 + vmul.u16 q13, q13, q3 // multiply dst red1 by src alpha + vadd.u16 q9, q9, q1 // add src green to dst green0 + vmul.u16 q12, q12, q3 // multiply dst green1 by src alpha + vadd.u16 q10, q10, q2 // add src blue to dst blue0 + vmul.u16 q11, q11, q3 // multiply dst blue1 by src alpha + vshr.u16 q8, q8, #8 // shift down red0 + vadd.u16 q13, q13, q0 // add src red to dst red1 + vshr.u16 q9, q9, #8 // shift down green0 + vadd.u16 q12, q12, q1 // add src green to dst green1 + vshr.u16 q10, q10, #8 // shift down blue0 + vadd.u16 q11, q11, q2 // add src blue to dst blue1 + vsli.u16 q10, q9, #5 // shift & insert green0 into blue0 + vshr.u16 q13, q13, #8 // shift down red1 + vsli.u16 q10, q8, #11 // shift & insert red0 into blue0 + vshr.u16 q12, q12, #8 // shift down green1 + vshr.u16 q11, q11, #8 // shift down blue1 + subs r3, r3, #1 // decrement loop counter + vsli.u16 q11, q12, #5 // shift & insert green1 into blue1 + vsli.u16 q11, q13, #11 // shift & insert red1 into blue1 + + vst1.16 {d20, d21, d22, d23}, [r0]! // write 16 pixels back to dst + bne 1b // if count != 0, loop + +2: + ands r3, r2, #15 // calc. single iterations + beq 4f // if r3 == 0, exit + + ldr r4, [r1] // load source color + mov r5, r4, lsr #24 // shift down alpha + add r5, r5, r5, lsr #7 // add in top bit + rsb r5, r5, #256 // invert alpha + and r11, r4, #0xff // extract red + ubfx r12, r4, #8, #8 // extract green + ubfx r4, r4, #16, #8 // extract blue + mov r11, r11, lsl #5 // prescale red + mov r12, r12, lsl #6 // prescale green + mov r4, r4, lsl #5 // prescale blue + +3: + ldrh r8, [r0] // load dest pixel + subs r3, r3, #1 // decrement loop counter + mov r6, r8, lsr #11 // extract dest red + ubfx r7, r8, #5, #6 // extract dest green + and r8, r8, #0x1f // extract dest blue + + smlabb r6, r6, r5, r11 // dest red * alpha + src red + smlabb r7, r7, r5, r12 // dest green * alpha + src green + smlabb r8, r8, r5, r4 // dest blue * alpha + src blue + + mov r6, r6, lsr #8 // shift down red + mov r7, r7, lsr #8 // shift down green + mov r6, r6, lsl #11 // shift red into 565 + orr r6, r7, lsl #5 // shift green into 565 + orr r6, r8, lsr #8 // shift blue into 565 + + strh r6, [r0], #2 // store pixel to dest, update ptr + bne 3b // if count != 0, loop +4: + + pop {r4-r11, pc} // return + + + diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp index f70030680..a2f43eb08 100644 --- a/libpixelflinger/scanline.cpp +++ b/libpixelflinger/scanline.cpp @@ -80,6 +80,7 @@ static void scanline_perspective(context_t* c); static void scanline_perspective_single(context_t* c); static void scanline_t32cb16blend(context_t* c); static void scanline_t32cb16(context_t* c); +static void scanline_col32cb16blend(context_t* c); static void scanline_memcpy(context_t* c); static void scanline_memset8(context_t* c); static void scanline_memset16(context_t* c); @@ -93,6 +94,8 @@ static void rect_memcpy(context_t* c, size_t yc); extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); +extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); +extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); // ---------------------------------------------------------------------------- @@ -111,6 +114,9 @@ static shortcut_t shortcuts[] = { { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } }, { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, "565 fb, 8888 tx", scanline_t32cb16, init_y_noop }, + { { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } }, + { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } }, + "565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed }, { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, { 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } }, "(nop) alpha test", scanline_noop, init_y_noop }, @@ -943,6 +949,8 @@ void init_y_packed(context_t* c, int32_t y0) uint8_t f = c->state.buffers.color.format; c->packed = ggl_pack_color(c, f, c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); + c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888, + c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); c->iterators.y = y0; c->step_y = step_y__nop; // choose the rectangle blitter @@ -1253,6 +1261,45 @@ finish: // ---------------------------------------------------------------------------- +void scanline_col32cb16blend(context_t* c) +{ + int32_t x = c->iterators.xl; + size_t ct = c->iterators.xr - x; + int32_t y = c->iterators.y; + surface_t* cb = &(c->state.buffers.color); + union { + uint16_t* dst; + uint32_t* dst32; + }; + dst = reinterpret_cast(cb->data) + (x+(cb->stride*y)); + +#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) +#if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN + scanline_col32cb16blend_neon(dst, &(c->packed8888), ct); +#else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN + scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct); +#endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN +#else + uint32_t s = GGL_RGBA_TO_HOST(c->packed8888); + int sA = (s>>24); + int f = 0x100 - (sA + (sA>>7)); + while (ct--) { + uint16_t d = *dst; + int dR = (d>>11)&0x1f; + int dG = (d>>5)&0x3f; + int dB = (d)&0x1f; + int sR = (s >> ( 3))&0x1F; + int sG = (s >> ( 8+2))&0x3F; + int sB = (s >> (16+3))&0x1F; + sR += (f*dR)>>8; + sG += (f*dG)>>8; + sB += (f*dB)>>8; + *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); + } +#endif + +} + void scanline_t32cb16(context_t* c) { int32_t x = c->iterators.xl; From cd64315f72537359537c66eebe482495ffefba57 Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 16 Feb 2010 20:18:29 +0000 Subject: [PATCH 3/5] Add documentation for some adb environmental variables. The ADB_TRACE one is particularly important. Change-Id: I125a5930c43065c8cf505eea40d20e3f209bc858 --- adb/commandline.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/adb/commandline.c b/adb/commandline.c index bb77c4fcd..8003a644e 100644 --- a/adb/commandline.c +++ b/adb/commandline.c @@ -170,6 +170,12 @@ void help() "\n" " - If it is \"system\" or \"data\", only the corresponding partition\n" " is updated.\n" + "\n" + "environmental variables:\n" + " ADB_TRACE - Print debug information. A comma separated list of the following values\n" + " 1 or all, adb, sockets, packets, rwx, usb, sync, sysdeps, transport, jdwp\n" + " ANDROID_SERIAL - The serial number to connect to. -s takes priority over this if given.\n" + " ANDROID_LOG_TAGS - When used with the logcat option, only these debug tags are printed.\n" ); } From f42d2fac2b09547295e353ddffb281aa7932403f Mon Sep 17 00:00:00 2001 From: Martyn Capewell Date: Mon, 7 Dec 2009 15:24:08 +0000 Subject: [PATCH 4/5] Fix LDM addressing mode disassembly The Pixelflinger disassembler does not handle LDM addressing modes correctly, assuming that the P and U bits in the instruction mean the same in both LDM and STM. This results in the disassembler producing sequences like: stmfd r13!, {r4-r11, r14} ... ... ... ldmea r13!, {r4-r11, r14} This small patch fixes it by EORing the P and U bits with the Load/Store bit. Change-Id: Ic7a1556642c4e29415fc3697019f1239b6c26fc2 --- libpixelflinger/codeflinger/disassem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpixelflinger/codeflinger/disassem.c b/libpixelflinger/codeflinger/disassem.c index ee5e63a2b..c17f3ecc1 100644 --- a/libpixelflinger/codeflinger/disassem.c +++ b/libpixelflinger/codeflinger/disassem.c @@ -278,7 +278,7 @@ static char const insn_fpaconstants[][8] = { #define insn_condition(x) arm32_insn_conditions[(x >> 28) & 0x0f] #define insn_blktrans(x) insn_block_transfers[(x >> 23) & 3] -#define insn_stkblktrans(x) insn_stack_block_transfers[(x >> 23) & 3] +#define insn_stkblktrans(x) insn_stack_block_transfers[(3*((x >> 20)&1))^((x >> 23)&3)] #define op2_shift(x) op_shifts[(x >> 5) & 3] #define insn_fparnd(x) insn_fpa_rounding[(x >> 5) & 0x03] #define insn_fpaprec(x) insn_fpa_precision[(((x >> 18) & 2)|(x >> 7)) & 1] From 9b6c850d24df82451862b81f059361b586f5ef0b Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Queru Date: Mon, 3 May 2010 12:31:13 -0700 Subject: [PATCH 5/5] fix sim build Change-Id: Ide300eafbcbbc6dfae25fe86188302c6676c4a3b --- libpixelflinger/codeflinger/texturing.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libpixelflinger/codeflinger/texturing.cpp b/libpixelflinger/codeflinger/texturing.cpp index ba13fb303..6baa28ed9 100644 --- a/libpixelflinger/codeflinger/texturing.cpp +++ b/libpixelflinger/codeflinger/texturing.cpp @@ -25,7 +25,9 @@ #include "codeflinger/GGLAssembler.h" +#ifdef __arm__ #include +#endif namespace android {