RGB24 To Yuv420 C语言 +汇编实现(windows平台)
生活随笔
收集整理的這篇文章主要介紹了
RGB24 To Yuv420 C语言 +汇编实现(windows平台)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
以下代碼來自libyuv
#include <stdint.h> #include <stdlib.h> #include <string.h>#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))#define align_buffer_64(var, size) \uint8_t* var##_mem = (uint8_t*)(malloc((size)+63)); /* NOLINT */ \uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */#define free_aligned_buffer_64(var) \free(var##_mem); \var = 0#define SIMD_ALIGNED(var) __declspec(align(16)) var#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))typedef __declspec(align(16)) uint8_t uvec8[16];static const uvec8 kShuffleMaskRGB24ToARGB = {0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u };typedef __declspec(align(16)) int8_t vec8[16];static const vec8 kARGBToY = { 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 };static const uvec8 kAddY16 = { 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u };typedef __declspec(align(32)) uint8_t ulvec8[32];#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \uint8_t* dst_v, int width) { \SIMD_ALIGNED(uint8_t temp[128 * 4]); \memset(temp, 0, 128 * 2); /* for msan */ \int r = width & MASK; \int n = width & ~MASK; \ if (n > 0) { \ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \ } \memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \SS(r, UVSHIFT) * BPP); \ if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \BPP); \memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ } \ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \}#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \SIMD_ALIGNED(uint8_t temp[128 * 2]); \memset(temp, 0, 128); /* for YUY2 and msan */ \int r = width & MASK; \int n = width & ~MASK; \ if (n > 0) { \ANY_SIMD(src_ptr, dst_ptr, n); \ } \memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ANY_SIMD(temp, temp + 128, MASK + 1); \memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \}#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \SIMD_ALIGNED(uint8_t temp[128 * 2]); \memset(temp, 0, 128); /* for YUY2 and msan */ \int r = width & MASK; \int n = width & ~MASK; \ if (n > 0) { \ANY_SIMD(src_ptr, dst_ptr, n); \ } \memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ANY_SIMD(temp, temp + 128, MASK + 1); \memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \}static const ulvec8 kBiasUV128 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {__asm {mov eax, [esp + 4] // src_rgb24mov edx, [esp + 8] // dst_argbmov ecx, [esp + 12] // widthpcmpeqb xmm5, xmm5 // generate mask 0xff000000pslld xmm5, 24movdqa xmm4, xmmword ptr kShuffleMaskRGB24ToARGBconvertloop :movdqu xmm0, [eax]movdqu xmm1, [eax + 16]movdqu xmm3, [eax + 32]lea eax, [eax + 48]movdqa xmm2, xmm3palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}pshufb xmm2, xmm4por xmm2, xmm5palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}pshufb xmm0, xmm4movdqu[edx + 32], xmm2por xmm0, xmm5pshufb xmm1, xmm4movdqu[edx], xmm0por xmm1, xmm5palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]}pshufb xmm3, xmm4movdqu[edx + 16], xmm1por xmm3, xmm5movdqu[edx + 48], xmm3lea edx, [edx + 64]sub ecx, 16jg convertloopret} }static const vec8 kARGBToU = { 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0 };static const vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, };__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb,int src_stride_argb,uint8_t* dst_u,uint8_t* dst_v,int width) {__asm {push esipush edimov eax, [esp + 8 + 4] // src_argbmov esi, [esp + 8 + 8] // src_stride_argbmov edx, [esp + 8 + 12] // dst_umov edi, [esp + 8 + 16] // dst_vmov ecx, [esp + 8 + 20] // widthmovdqa xmm5, xmmword ptr kBiasUV128movdqa xmm6, xmmword ptr kARGBToVmovdqa xmm7, xmmword ptr kARGBToUsub edi, edx // stride from u to vconvertloop :/* step 1 - subsample 16x2 argb pixels to 8x1 */movdqu xmm0, [eax]movdqu xmm4, [eax + esi]pavgb xmm0, xmm4movdqu xmm1, [eax + 16]movdqu xmm4, [eax + esi + 16]pavgb xmm1, xmm4movdqu xmm2, [eax + 32]movdqu xmm4, [eax + esi + 32]pavgb xmm2, xmm4movdqu xmm3, [eax + 48]movdqu xmm4, [eax + esi + 48]pavgb xmm3, xmm4lea eax, [eax + 64]movdqa xmm4, xmm0shufps xmm0, xmm1, 0x88shufps xmm4, xmm1, 0xddpavgb xmm0, xmm4movdqa xmm4, xmm2shufps xmm2, xmm3, 0x88shufps xmm4, xmm3, 0xddpavgb xmm2, xmm4// step 2 - convert to U and V// from here down is very similar to Y code except// instead of 16 different pixels, its 8 pixels of U and 8 of Vmovdqa xmm1, xmm0movdqa xmm3, xmm2pmaddubsw xmm0, xmm7 // Upmaddubsw xmm2, xmm7pmaddubsw xmm1, xmm6 // Vpmaddubsw xmm3, xmm6phaddw xmm0, xmm2phaddw xmm1, xmm3psraw xmm0, 8psraw xmm1, 8packsswb xmm0, xmm1paddb xmm0, xmm5 // -> unsigned// step 3 - store 8 U and 8 V valuesmovlps qword ptr[edx], xmm0 // Umovhps qword ptr[edx + edi], xmm0 // Vlea edx, [edx + 8]sub ecx, 16jg convertlooppop edipop esiret} }__declspec(naked) void ARGBToYRow_SSSE3(const uint8_t* src_argb,uint8_t* dst_y,int width) {__asm {mov eax, [esp + 4] /* src_argb */mov edx, [esp + 8] /* dst_y */mov ecx, [esp + 12] /* width */movdqa xmm4, xmmword ptr kARGBToYmovdqa xmm5, xmmword ptr kAddY16convertloop :movdqu xmm0, [eax]movdqu xmm1, [eax + 16]movdqu xmm2, [eax + 32]movdqu xmm3, [eax + 48]pmaddubsw xmm0, xmm4pmaddubsw xmm1, xmm4pmaddubsw xmm2, xmm4pmaddubsw xmm3, xmm4lea eax, [eax + 64]phaddw xmm0, xmm1phaddw xmm2, xmm3psrlw xmm0, 7psrlw xmm2, 7packuswb xmm0, xmm2paddb xmm0, xmm5movdqu[edx], xmm0lea edx, [edx + 16]sub ecx, 16jg convertloopret} }ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15) ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)int RGB24ToI420(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) {int y;void(*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width);void(*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width);void(*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width);if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0){return -1;}if (height < 0){height = -height;src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;src_stride_rgb24 = -src_stride_rgb24;}RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;//寬度不是16字節的倍數if (IS_ALIGNED(width, 16)){RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;}ARGBToUVRow = ARGBToUVRow_Any_SSSE3;ARGBToYRow = ARGBToYRow_Any_SSSE3;if (IS_ALIGNED(width, 16)) {ARGBToUVRow = ARGBToUVRow_SSSE3;ARGBToYRow = ARGBToYRow_SSSE3;}const int kRowSize = (width * 4 + 31) & ~31;align_buffer_64(row, kRowSize * 2);for (y = 0; y < height - 1; y += 2) {RGB24ToARGBRow(src_rgb24, row, width);RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);ARGBToYRow(row, dst_y, width);ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);src_rgb24 += src_stride_rgb24 * 2;dst_y += dst_stride_y * 2;dst_u += dst_stride_u;dst_v += dst_stride_v;}if (height & 1){RGB24ToARGBRow(src_rgb24, row, width);ARGBToUVRow(row, 0, dst_u, dst_v, width);ARGBToYRow(row, dst_y, width);}free_aligned_buffer_64(row);return 0; }總結
以上是生活随笔為你收集整理的RGB24 To Yuv420 C语言 +汇编实现(windows平台)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 最新ThinkPHP微信独立精彩互换抢红
- 下一篇: 常用微分近似公式