diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt index 39bcb033f..bd714b760 100644 --- a/libfreerdp/codec/CMakeLists.txt +++ b/libfreerdp/codec/CMakeLists.txt @@ -117,9 +117,12 @@ if(WITH_LIBAVCODEC OR WITH_OPENH264) set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/h264_x64.asm) set(OBJ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${H264_ASM}.dir/h264_x64.asm.o) add_custom_command(TARGET ${H264_ASM} - COMMAND nasm ARGS -f elf64 -o ${OBJ} ${SRC}) + COMMAND nasm ARGS -f elf64 -o ${OBJ} ${SRC}) else() - message(FATAL_ERROR "H264 YUV data converting is not implemented in 32 bit assembly yet.") + set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/h264_x32.asm) + set(OBJ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${H264_ASM}.dir/h264_x32.asm.o) + add_custom_command(TARGET ${H264_ASM} + COMMAND nasm ARGS -f elf32 -o ${OBJ} ${SRC}) endif() set(FREERDP_OPENH264_LIBS ${OPENH264_LIBRARIES} ${OBJ}) @@ -136,7 +139,10 @@ if(WITH_LIBAVCODEC OR WITH_OPENH264) add_custom_command(TARGET ${H264_ASM} COMMAND nasm ARGS -f elf64 -o ${OBJ} ${SRC}) else() - message(FATAL_ERROR "H264 YUV data converting with SSSE3 is not implemented in 32 bit assembly yet.") + set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/h264_ssse3_x32.asm) + set(OBJ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${H264_ASM}.dir/h264_ssse3_x32.asm.o) + add_custom_command(TARGET ${H264_ASM} + COMMAND nasm ARGS -f elf32 -o ${OBJ} ${SRC}) endif() set(FREERDP_OPENH264_LIBS ${OPENH264_LIBRARIES} ${OBJ}) diff --git a/libfreerdp/codec/h264.c b/libfreerdp/codec/h264.c index 5180ffa5b..ef66cf8bc 100644 --- a/libfreerdp/codec/h264.c +++ b/libfreerdp/codec/h264.c @@ -216,8 +216,7 @@ int h264_prepare_rgb_buffer(H264_CONTEXT* h264, int width, int height) if (size > h264->size) { h264->size = size; - h264->data = (BYTE*) realloc(h264->data, h264->size); - memset(h264->data, 0, h264->size); + h264->data = (BYTE*) _aligned_realloc(h264->data, h264->size,16); } if (!h264->data) @@ -747,9 +746,6 @@ H264_CONTEXT* h264_context_new(BOOL Compressor) { h264->Compressor = Compressor; - if (h264_prepare_rgb_buffer(h264, 256, 256) < 0) - return NULL; - #ifdef WITH_OPENH264 if (!openh264_init(h264)) { @@ -776,7 +772,7 @@ void h264_context_free(H264_CONTEXT* h264) { if (h264) { - free(h264->data); + _aligne_free(h264->data); #ifdef WITH_OPENH264 openh264_free(h264); diff --git a/libfreerdp/codec/h264_ssse3_x32.asm b/libfreerdp/codec/h264_ssse3_x32.asm index 66962b1ba..b1a57e545 100644 --- a/libfreerdp/codec/h264_ssse3_x32.asm +++ b/libfreerdp/codec/h264_ssse3_x32.asm @@ -1,3 +1,8 @@ +; a entire function for converting YUV420p data to the RGB format (without any special upconverting) +; It's completely written in nasm-x86-assembly for intel processors supporting SSSE3 and higher. +; Restrictions are that output data has to be aligned to 16 byte (a question of REAL performance!) +; and the width of resolution must be divisable by four. +; section .text global check_ssse3 @@ -372,7 +377,7 @@ valid_yuv_data: por xmm4,xmm5 por xmm4,xmm6 - movdqu [edi],xmm4 + movdqa [edi],xmm4 ;Y data processing in secound line @@ -414,7 +419,7 @@ valid_yuv_data: por xmm4,xmm6 mov edx,[ebp-318] - movdqu [edi+edx],xmm4 + movdqa [edi+edx],xmm4 skip_last_line1: add edi,16 diff --git a/libfreerdp/codec/h264_ssse3_x64.asm b/libfreerdp/codec/h264_ssse3_x64.asm index 8b1fda229..51428b46f 100644 --- a/libfreerdp/codec/h264_ssse3_x64.asm +++ b/libfreerdp/codec/h264_ssse3_x64.asm @@ -1,3 +1,8 @@ +; a entire function for converting YUV420p data to the RGB format (without any special upconverting) +; It's completely written in nasm-x86-assembly for intel processors supporting SSSE3 and higher. +; Restrictions are that output data has to be aligned to 16 byte (a question of REAL performance!) +; and the width of resolution must be divisable by four. +; section .text global check_ssse3 @@ -385,7 +390,7 @@ valid_yuv_data: por xmm4,xmm5 por xmm4,xmm6 - movdqu [rdi],xmm4 + movdqa [rdi],xmm4 ;Y data processing in secound line @@ -424,7 +429,7 @@ valid_yuv_data: por xmm4,xmm5 por xmm4,xmm6 - movdqu [rdi+r10],xmm4 + movdqa [rdi+r10],xmm4 skip_last_line1: add rdi,16 diff --git a/libfreerdp/codec/test/Makefile.TestOpenH264ASM b/libfreerdp/codec/test/Makefile.TestOpenH264ASM deleted file mode 100644 index 8e747a647..000000000 --- a/libfreerdp/codec/test/Makefile.TestOpenH264ASM +++ /dev/null @@ -1,20 +0,0 @@ -TestOpenH264ASM: h264_ssse3.asm.o TestOpenH264ASM.c.o h264.c.o - gcc -o TestOpenH264ASM h264_ssse3.asm.o TestOpenH264ASM.c.o h264.c.o - -h264_ssse3.asm.o: ../h264_ssse3_x64.asm - nasm -f elf64 -o h264_ssse3.asm.o ../h264_ssse3_x64.asm - -h264.asm.o: ../h264.asm - nasm -f elf64 -o h264.asm.o ../h264.asm - -TestOpenH264ASM.c.o: TestOpenH264ASM.c - gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c - -h264.c.o: ../h264.c - gcc -c -O3 -o h264.c.o ../h264.c - -clean: - rm -f TestOpenH264ASM TestOpenH264ASM.c.o h264_ssse3.asm.o h264.c.o h264.asm.o - -old: h264.asm.o TestOpenH264ASM.c.o h264.c.o - gcc -o TestOpenH264ASM h264.asm.o TestOpenH264ASM.c.o h264.c.o diff --git a/libfreerdp/codec/test/Makefile.TestOpenH264ASM32 b/libfreerdp/codec/test/Makefile.TestOpenH264ASM32 index ab52a3b7d..2a0308db4 100644 --- a/libfreerdp/codec/test/Makefile.TestOpenH264ASM32 +++ b/libfreerdp/codec/test/Makefile.TestOpenH264ASM32 @@ -1,11 +1,11 @@ -TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o #h264.asm.o - gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o #h264.asm.o +TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o + gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o -lwinpr h264_ssse3.asm.o: ../h264_ssse3_x32.asm - nasm -f elf64 -o h264_ssse3.asm.o ../h264_ssse3_x32.asm + nasm -f elf32 -o h264_ssse3.asm.o ../h264_ssse3_x32.asm h264.asm.o: ../h264_x32.asm - nasm -f elf64 -o h264.asm.o ../h264_x32.asm + nasm -f elf32 -o h264.asm.o ../h264_x32.asm TestOpenH264ASM.c.o: TestOpenH264ASM.c gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c diff --git a/libfreerdp/codec/test/Makefile.TestOpenH264ASM64 b/libfreerdp/codec/test/Makefile.TestOpenH264ASM64 index ace4451ae..a060926b7 100644 --- a/libfreerdp/codec/test/Makefile.TestOpenH264ASM64 +++ b/libfreerdp/codec/test/Makefile.TestOpenH264ASM64 @@ -1,5 +1,5 @@ TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o - gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o + gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o -lwinpr h264_ssse3.asm.o: ../h264_ssse3_x64.asm nasm -f elf64 -o h264_ssse3.asm.o ../h264_ssse3_x64.asm diff --git a/libfreerdp/codec/test/TestOpenH264ASM.c b/libfreerdp/codec/test/TestOpenH264ASM.c index dc0f2e6d5..d0c04787f 100644 --- a/libfreerdp/codec/test/TestOpenH264ASM.c +++ b/libfreerdp/codec/test/TestOpenH264ASM.c @@ -2,6 +2,8 @@ #include #include +#include + #include "TestOpenH264ASM.h" #define WIDTH 1920 @@ -28,7 +30,7 @@ int main(void){ pSrcData[0]=malloc(1984*HEIGHT*sizeof(char)); pSrcData[1]=malloc(1984*HEIGHT/4*sizeof(char)); pSrcData[2]=malloc(1984*HEIGHT/4*sizeof(char)); - pDstData_asm=malloc(WIDTH*HEIGHT*4*sizeof(char)); + pDstData_asm=_aligned_malloc(WIDTH*HEIGHT*4*sizeof(char),16); pDstData_c=malloc(WIDTH*HEIGHT*4*sizeof(char)); for(i=0;i