H264 data alignement and 32 bit comilation ...

This commit is contained in:
erbth
2014-08-21 00:58:08 +02:00
parent 9eec9cb18a
commit dee50a8ca2
8 changed files with 34 additions and 40 deletions

View File

@@ -117,9 +117,12 @@ if(WITH_LIBAVCODEC OR WITH_OPENH264)
set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/h264_x64.asm)
set(OBJ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${H264_ASM}.dir/h264_x64.asm.o)
add_custom_command(TARGET ${H264_ASM}
COMMAND nasm ARGS -f elf64 -o ${OBJ} ${SRC})
COMMAND nasm ARGS -f elf64 -o ${OBJ} ${SRC})
else()
message(FATAL_ERROR "H264 YUV data converting is not implemented in 32 bit assembly yet.")
set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/h264_x32.asm)
set(OBJ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${H264_ASM}.dir/h264_x32.asm.o)
add_custom_command(TARGET ${H264_ASM}
COMMAND nasm ARGS -f elf32 -o ${OBJ} ${SRC})
endif()
set(FREERDP_OPENH264_LIBS ${OPENH264_LIBRARIES} ${OBJ})
@@ -136,7 +139,10 @@ if(WITH_LIBAVCODEC OR WITH_OPENH264)
add_custom_command(TARGET ${H264_ASM}
COMMAND nasm ARGS -f elf64 -o ${OBJ} ${SRC})
else()
message(FATAL_ERROR "H264 YUV data converting with SSSE3 is not implemented in 32 bit assembly yet.")
set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/h264_ssse3_x32.asm)
set(OBJ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${H264_ASM}.dir/h264_ssse3_x32.asm.o)
add_custom_command(TARGET ${H264_ASM}
COMMAND nasm ARGS -f elf32 -o ${OBJ} ${SRC})
endif()
set(FREERDP_OPENH264_LIBS ${OPENH264_LIBRARIES} ${OBJ})

View File

@@ -216,8 +216,7 @@ int h264_prepare_rgb_buffer(H264_CONTEXT* h264, int width, int height)
if (size > h264->size)
{
h264->size = size;
h264->data = (BYTE*) realloc(h264->data, h264->size);
memset(h264->data, 0, h264->size);
h264->data = (BYTE*) _aligned_realloc(h264->data, h264->size,16);
}
if (!h264->data)
@@ -747,9 +746,6 @@ H264_CONTEXT* h264_context_new(BOOL Compressor)
{
h264->Compressor = Compressor;
if (h264_prepare_rgb_buffer(h264, 256, 256) < 0)
return NULL;
#ifdef WITH_OPENH264
if (!openh264_init(h264))
{
@@ -776,7 +772,7 @@ void h264_context_free(H264_CONTEXT* h264)
{
if (h264)
{
free(h264->data);
_aligne_free(h264->data);
#ifdef WITH_OPENH264
openh264_free(h264);

View File

@@ -1,3 +1,8 @@
; a entire function for converting YUV420p data to the RGB format (without any special upconverting)
; It's completely written in nasm-x86-assembly for intel processors supporting SSSE3 and higher.
; Restrictions are that output data has to be aligned to 16 byte (a question of REAL performance!)
; and the width of resolution must be divisable by four.
;
section .text
global check_ssse3
@@ -372,7 +377,7 @@ valid_yuv_data:
por xmm4,xmm5
por xmm4,xmm6
movdqu [edi],xmm4
movdqa [edi],xmm4
;Y data processing in secound line
@@ -414,7 +419,7 @@ valid_yuv_data:
por xmm4,xmm6
mov edx,[ebp-318]
movdqu [edi+edx],xmm4
movdqa [edi+edx],xmm4
skip_last_line1:
add edi,16

View File

@@ -1,3 +1,8 @@
; a entire function for converting YUV420p data to the RGB format (without any special upconverting)
; It's completely written in nasm-x86-assembly for intel processors supporting SSSE3 and higher.
; Restrictions are that output data has to be aligned to 16 byte (a question of REAL performance!)
; and the width of resolution must be divisable by four.
;
section .text
global check_ssse3
@@ -385,7 +390,7 @@ valid_yuv_data:
por xmm4,xmm5
por xmm4,xmm6
movdqu [rdi],xmm4
movdqa [rdi],xmm4
;Y data processing in secound line
@@ -424,7 +429,7 @@ valid_yuv_data:
por xmm4,xmm5
por xmm4,xmm6
movdqu [rdi+r10],xmm4
movdqa [rdi+r10],xmm4
skip_last_line1:
add rdi,16

View File

@@ -1,20 +0,0 @@
TestOpenH264ASM: h264_ssse3.asm.o TestOpenH264ASM.c.o h264.c.o
gcc -o TestOpenH264ASM h264_ssse3.asm.o TestOpenH264ASM.c.o h264.c.o
h264_ssse3.asm.o: ../h264_ssse3_x64.asm
nasm -f elf64 -o h264_ssse3.asm.o ../h264_ssse3_x64.asm
h264.asm.o: ../h264.asm
nasm -f elf64 -o h264.asm.o ../h264.asm
TestOpenH264ASM.c.o: TestOpenH264ASM.c
gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c
h264.c.o: ../h264.c
gcc -c -O3 -o h264.c.o ../h264.c
clean:
rm -f TestOpenH264ASM TestOpenH264ASM.c.o h264_ssse3.asm.o h264.c.o h264.asm.o
old: h264.asm.o TestOpenH264ASM.c.o h264.c.o
gcc -o TestOpenH264ASM h264.asm.o TestOpenH264ASM.c.o h264.c.o

View File

@@ -1,11 +1,11 @@
TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o #h264.asm.o
gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o #h264.asm.o
TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o
gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o -lwinpr
h264_ssse3.asm.o: ../h264_ssse3_x32.asm
nasm -f elf64 -o h264_ssse3.asm.o ../h264_ssse3_x32.asm
nasm -f elf32 -o h264_ssse3.asm.o ../h264_ssse3_x32.asm
h264.asm.o: ../h264_x32.asm
nasm -f elf64 -o h264.asm.o ../h264_x32.asm
nasm -f elf32 -o h264.asm.o ../h264_x32.asm
TestOpenH264ASM.c.o: TestOpenH264ASM.c
gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c

View File

@@ -1,5 +1,5 @@
TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o
gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o
gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o -lwinpr
h264_ssse3.asm.o: ../h264_ssse3_x64.asm
nasm -f elf64 -o h264_ssse3.asm.o ../h264_ssse3_x64.asm

View File

@@ -2,6 +2,8 @@
#include <stdlib.h>
#include <sys/time.h>
#include <winpr/crt.h>
#include "TestOpenH264ASM.h"
#define WIDTH 1920
@@ -28,7 +30,7 @@ int main(void){
pSrcData[0]=malloc(1984*HEIGHT*sizeof(char));
pSrcData[1]=malloc(1984*HEIGHT/4*sizeof(char));
pSrcData[2]=malloc(1984*HEIGHT/4*sizeof(char));
pDstData_asm=malloc(WIDTH*HEIGHT*4*sizeof(char));
pDstData_asm=_aligned_malloc(WIDTH*HEIGHT*4*sizeof(char),16);
pDstData_c=malloc(WIDTH*HEIGHT*4*sizeof(char));
for(i=0;i<WIDTH*HEIGHT;i++){
@@ -81,7 +83,7 @@ int main(void){
free(pSrcData[1]);
free(pSrcData[2]);
free(pDstData_c);
free(pDstData_asm);
_aligned_free(pDstData_asm);
return 0;
}