Quick Links
regarding NEON - RGB565 to 888 conversion
#1
Posted 03 July 2012 - 05:14 AM
#2
Posted 03 July 2012 - 08:52 AM
#include <arm_neon.h>
void rgb565to888(int n, const uint16_t *p565, uint8_t *p888)
{
uint8x8_t mask5, mask6;
int i;
mask5 = vmov_n_u8(0xf8); // pre-construct masks
mask6 = vmov_n_u8(0xfc);
for(i=0;i<n/8;i++) { // 8 pixels per loop
uint8x8_t red, grn, blu;
uint16x8_t pix;
uint8x8x3_t rgb;
pix = vld1q_u16(p565+8*i); // load 8 pixels worth
red = vshrn_n_u16(pix,8); // narrow and shift red
grn = vshrn_n_u16(pix,3); // narrow and shift green
blu = vmovn_u16(pix); // narrow blue
red = vand_u8(red,mask5); // mask shifted red
grn = vand_u8(grn,mask6); // mask shifted green
blu = vshl_n_u8(blu,3); // shift and mask blue
rgb.val[0] = red; // ensure RGB is in three..
rgb.val[1] = grn; // ..consecutive registers
rgb.val[2] = blu;
vst3_u8(p888+8*3*i,rgb); // store 3 element structure
}
}
hth
s.
#3
Posted 03 July 2012 - 12:25 PM
#include <arm_neon.h>
void rgb565to888(int n, const uint16_t *p565, uint8_t *p888)
{
uint8x16_t mask5, mask6;
int i;
mask5 = vmovq_n_u8(0xf8); // pre-construct mask values
mask6 = vmovq_n_u8(0xfc);
for(i=0;i<n/16;i++) { // 16 pixels per loop
uint16x8_t p0, p1;
uint8x8_t r0, r1, g0, g1, b0, b1;
uint8x16_t red, rhi, rlo, grn, ghi, glo, blu, bhi, blo;
uint8x16x3_t rgb;
// start in D vectors, split RGB into individual bytes
p0 = vld1q_u16(p565+16*i); // load first 8 pixels worth
p1 = vld1q_u16(p565+16*i+8); // load second 8 pixels worth
r0 = vshrn_n_u16(p0,8); // narrow and shift red
g0 = vshrn_n_u16(p0,3); // narrow and shift green
b0 = vmovn_u16(p0); // narrow blue
r1 = vshrn_n_u16(p1,8); // narrow and shift red
g1 = vshrn_n_u16(p1,3); // narrow and shift green
b1 = vmovn_u16(p1); // narrow blue
// perform rest of conversion using Q vectors
red = vcombine_u8(r0,r1); // [NoOp] create 16-bytes of R
grn = vcombine_u8(g0,g1); // [NoOp] create 16-bytes of G
blu = vcombine_u8(b0,b1); // [NoOp] create 16-bytes of B
rhi = vandq_u8(red,mask5); // mask shifted red
ghi = vandq_u8(grn,mask6); // mask shifted green
bhi = vshlq_n_u8(blu,3); // shift and mask blue
rlo = vshrq_n_u8(red,5); // create lsbs for red
glo = vshrq_n_u8(grn,6); // create lsbs for green
blo = vshrq_n_u8(bhi,5); // create lsbs for blue (via bhi)
red = vorrq_u8(rhi,rlo); // combine red msb and lsbs
grn = vorrq_u8(ghi,glo); // combine green msb and lsbs
blu = vorrq_u8(bhi,blo); // combine blue msb and lsbs
// store result
rgb.val[0] = red; // [NoOp] ensure RGB is in three
rgb.val[1] = grn; // [NoOp] consecutive Q registers
rgb.val[2] = blu; // [NoOp] to permit use of VST3
vst3q_u8(p888+16*3*i,rgb); // store 16x RGB structures
}
}
hth
s.















