Merge remote-tracking branch 'upstream/master'

This commit is contained in:
C-o-r-E
2013-01-23 16:10:57 -05:00
135 changed files with 8316 additions and 982 deletions

View File

@@ -231,6 +231,10 @@ set(GSTREAMER_FEATURE_TYPE "RECOMMENDED")
set(GSTREAMER_FEATURE_PURPOSE "multimedia")
set(GSTREAMER_FEATURE_DESCRIPTION "multimedia redirection, audio and video playback")
set(IPP_FEATURE_TYPE "OPTIONAL")
set(IPP_FEATURE_PURPOSE "performance")
set(IPP_FEATURE_DESCRIPTION "Intel Integrated Performance Primitives library")
if(WIN32)
set(X11_FEATURE_TYPE "DISABLED")
set(ZLIB_FEATURE_TYPE "DISABLED")
@@ -285,6 +289,9 @@ find_feature(PCSC ${PCSC_FEATURE_TYPE} ${PCSC_FEATURE_PURPOSE} ${PCSC_FEATURE_DE
find_feature(FFmpeg ${FFMPEG_FEATURE_TYPE} ${FFMPEG_FEATURE_PURPOSE} ${FFMPEG_FEATURE_DESCRIPTION})
find_feature(Gstreamer ${GSTREAMER_FEATURE_TYPE} ${GSTREAMER_FEATURE_PURPOSE} ${GSTREAMER_FEATURE_DESCRIPTION})
# Intel Performance Primitives
find_feature(IPP ${IPP_FEATURE_TYPE} ${IPP_FEATURE_PURPOSE} ${IPP_FEATURE_DESCRIPTION})
# Installation Paths
if(WIN32)
set(CMAKE_INSTALL_BINDIR ".")

View File

@@ -144,6 +144,7 @@ void cliprdr_process_short_format_names(cliprdrPlugin* cliprdr, STREAM* s, UINT3
}
else
{
format_name->name = NULL;
format_name->length = ConvertFromUnicode(CP_UTF8, 0, (WCHAR*) s->p, 32 / 2, &format_name->name, 0, NULL, NULL);
}

View File

@@ -430,7 +430,7 @@ BOOL drive_file_query_information(DRIVE_FILE* file, UINT32 FsInformationClass, S
BOOL drive_file_set_information(DRIVE_FILE* file, UINT32 FsInformationClass, UINT32 Length, STREAM* input)
{
char* s;
char* s = NULL;
mode_t m;
UINT64 size;
int status;

View File

@@ -471,7 +471,7 @@ static void drive_process_irp_query_volume_information(DRIVE_DEVICE* disk, IRP*
static void drive_process_irp_query_directory(DRIVE_DEVICE* disk, IRP* irp)
{
char* path;
char* path = NULL;
int status;
DRIVE_FILE* file;
BYTE InitialQuery;

View File

@@ -72,7 +72,7 @@ typedef struct _PARALLEL_DEVICE PARALLEL_DEVICE;
static void parallel_process_irp_create(PARALLEL_DEVICE* parallel, IRP* irp)
{
char* path;
char* path = NULL;
int status;
UINT32 PathLength;

View File

@@ -650,12 +650,15 @@ static void rdpsnd_process_terminate(rdpSvcPlugin* plugin)
if (rdpsnd->device)
IFCALL(rdpsnd->device->Free, rdpsnd->device);
while ((item = list_dequeue(rdpsnd->data_out_list)) != NULL)
if (rdpsnd->data_out_list)
{
stream_free(item->data_out);
free(item);
while ((item = list_dequeue(rdpsnd->data_out_list)) != NULL)
{
stream_free(item->data_out);
free(item);
}
list_free(rdpsnd->data_out_list);
}
list_free(rdpsnd->data_out_list);
if (rdpsnd->subsystem)
free(rdpsnd->subsystem);

View File

@@ -79,7 +79,7 @@ static BOOL serial_check_fds(SERIAL_DEVICE* serial);
static void serial_process_irp_create(SERIAL_DEVICE* serial, IRP* irp)
{
char* path;
char* path = NULL;
int status;
SERIAL_TTY* tty;
UINT32 PathLength;
@@ -570,6 +570,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial)
IRP* prev;
SERIAL_TTY* tty;
UINT32 result = 0;
BOOL irp_completed = FALSE;
memset(&serial->tv, 0, sizeof(struct timeval));
tty = serial->tty;
@@ -588,6 +589,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial)
{
irp->IoStatus = STATUS_SUCCESS;
serial_process_irp_read(serial, irp);
irp_completed = TRUE;
}
break;
@@ -596,6 +598,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial)
{
irp->IoStatus = STATUS_SUCCESS;
serial_process_irp_write(serial, irp);
irp_completed = TRUE;
}
break;
@@ -607,6 +610,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial)
irp->IoStatus = STATUS_SUCCESS;
stream_write_UINT32(irp->output, result);
irp->Complete(irp);
irp_completed = TRUE;
}
break;
@@ -618,7 +622,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial)
prev = irp;
irp = (IRP*) list_next(serial->pending_irps, irp);
if (prev->IoStatus == STATUS_SUCCESS)
if (irp_completed || prev->IoStatus == STATUS_SUCCESS)
{
list_remove(serial->pending_irps, prev);
SetEvent(serial->in_event);

View File

@@ -36,7 +36,7 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} freerdp-client)
set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS
MONOLITHIC ${MONOLITHIC_BUILD}
MODULE freerdp
MODULES freerdp-core freerdp-gdi freerdp-locale freerdp-codec freerdp-utils)
MODULES freerdp-core freerdp-gdi freerdp-locale freerdp-codec freerdp-primitives freerdp-utils)
target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS})
install(TARGETS ${MODULE_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})

View File

@@ -78,7 +78,7 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} freerdp-client)
set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD}
MODULE freerdp
MODULES freerdp-core freerdp-cache freerdp-gdi freerdp-codec freerdp-rail freerdp-utils)
MODULES freerdp-core freerdp-cache freerdp-gdi freerdp-codec freerdp-primitives freerdp-rail freerdp-utils)
set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD}
MODULE winpr

View File

@@ -46,7 +46,7 @@ set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS
set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS
MONOLITHIC ${MONOLITHIC_BUILD}
MODULE freerdp
MODULES freerdp-core freerdp-gdi freerdp-codec freerdp-utils)
MODULES freerdp-core freerdp-gdi freerdp-codec freerdp-primitives freerdp-utils)
target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS})

View File

@@ -544,6 +544,19 @@ void wf_gdi_surface_bits(rdpContext* context, SURFACE_BITS_COMMAND* surface_bits
free(tile_bitmap);
}
void wf_gdi_surface_frame_marker(rdpContext* context, SURFACE_FRAME_MARKER* surface_frame_marker)
{
wfInfo* wfi;
rdpSettings* settings;
wfi = ((wfContext*) context)->wfi;
settings = wfi->instance->settings;
if (surface_frame_marker->frameAction == SURFACECMD_FRAMEACTION_END && settings->FrameAcknowledge > 0)
{
IFCALL(wfi->instance->update->SurfaceFrameAcknowledge, context, surface_frame_marker->frameId);
}
}
void wf_gdi_register_update_callbacks(rdpUpdate* update)
{
rdpPrimaryUpdate* primary = update->primary;
@@ -575,4 +588,5 @@ void wf_gdi_register_update_callbacks(rdpUpdate* update)
primary->EllipseCB = NULL;
update->SurfaceBits = wf_gdi_surface_bits;
update->SurfaceFrameMarker = wf_gdi_surface_frame_marker;
}

View File

@@ -74,7 +74,9 @@ void wf_context_new(freerdp* instance, rdpContext* context)
void wf_context_free(freerdp* instance, rdpContext* context)
{
if (context->cache)
cache_free(context->cache);
freerdp_channels_free(context->channels);
}
int wf_create_console(void)
@@ -227,6 +229,8 @@ BOOL wf_pre_connect(freerdp* instance)
freerdp_client_parse_rdp_file(file, settings->ConnectionFile);
freerdp_client_populate_settings_from_rdp_file(file, settings);
freerdp_client_rdp_file_free(file);
}
settings->OsMajorType = OSMAJORTYPE_WINDOWS;
@@ -259,7 +263,7 @@ BOOL wf_pre_connect(freerdp* instance)
wfi->cursor = g_default_cursor;
wfi->fullscreen = settings->Fullscreen;
wfi->fs_toggle = wfi->fullscreen;
wfi->fs_toggle = 1;
wfi->sw_gdi = settings->SoftwareGdi;
wfi->clrconv = (HCLRCONV) malloc(sizeof(CLRCONV));
@@ -279,7 +283,7 @@ BOOL wf_pre_connect(freerdp* instance)
settings->DesktopHeight = i1;
}
if (wfi->fs_toggle)
if (wfi->fullscreen)
{
settings->DesktopWidth = GetSystemMetrics(SM_CXSCREEN);
settings->DesktopHeight = GetSystemMetrics(SM_CYSCREEN);
@@ -673,7 +677,6 @@ int wfreerdp_run(freerdp* instance)
/* cleanup */
freerdp_channels_close(channels, instance);
freerdp_channels_free(channels);
freerdp_disconnect(instance);
return 0;

View File

@@ -120,10 +120,14 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} freerdp-client)
set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD}
MODULE freerdp
MODULES freerdp-core freerdp-gdi freerdp-locale freerdp-rail freerdp-utils)
MODULES freerdp-core freerdp-gdi freerdp-locale freerdp-primitives freerdp-rail freerdp-utils)
target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS})
if(WITH_IPP)
target_link_libraries(xfreerdp ${IPP_LIBRARY_LIST})
endif()
install(TARGETS ${MODULE_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER "Client/X11")

View File

@@ -880,8 +880,11 @@ void xf_gdi_ellipse_cb(rdpContext* context, ELLIPSE_CB_ORDER* ellipse_cb)
void xf_gdi_surface_frame_marker(rdpContext* context, SURFACE_FRAME_MARKER* surface_frame_marker)
{
xfInfo* xfi = ((xfContext*) context)->xfi;
xfInfo* xfi;
rdpSettings* settings;
xfi = ((xfContext*) context)->xfi;
settings = xfi->instance->settings;
switch (surface_frame_marker->frameAction)
{
case SURFACECMD_FRAMEACTION_BEGIN:
@@ -906,6 +909,10 @@ void xf_gdi_surface_frame_marker(rdpContext* context, SURFACE_FRAME_MARKER* surf
gdi_InvalidateRegion(xfi->hdc, xfi->frame_x1, xfi->frame_y1,
xfi->frame_x2 - xfi->frame_x1, xfi->frame_y2 - xfi->frame_y1);
}
if (settings->FrameAcknowledge > 0)
{
IFCALL(xfi->instance->update->SurfaceFrameAcknowledge, context, surface_frame_marker->frameId);
}
break;
}
}

View File

@@ -9,6 +9,7 @@ endif()
option(WITH_MANPAGES "Generate manpages." ON)
option(WITH_PROFILER "Compile profiler." OFF)
option(WITH_IPP "Use Intel Performance Primitives." OFF)
if((TARGET_ARCH MATCHES "x86|x64") AND (NOT DEFINED WITH_SSE2))
option(WITH_SSE2 "Enable SSE2 optimization." ON)

View File

@@ -46,7 +46,7 @@ set(IPPVM "vm") # vector math
set(IPP_X64 0)
if (CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
set(IPP_X64 1)
endif()
if (CMAKE_CL_64)
@@ -67,6 +67,11 @@ function(get_ipp_version _ROOT_DIR)
file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR1 REGEX "IPP_VERSION_MAJOR")
file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR2 REGEX "IPP_VERSION_MINOR")
file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_BUILD")
if(NOT STR3)
file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_UPDATE")
endif()
file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR4 REGEX "IPP_VERSION_STR")
# extract info and assign to variables
@@ -198,16 +203,20 @@ function(set_ipp_variables _LATEST_VERSION)
# set INCLUDE and LIB folders
set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include PARENT_SCOPE)
if (IPP_X64)
if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64)
message(SEND_ERROR "IPP EM64T libraries not found")
endif()
set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/intel64 PARENT_SCOPE)
if(APPLE)
set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib PARENT_SCOPE)
else()
if(NOT EXISTS ${IPP_ROOT_DIR}/lib/ia32)
message(SEND_ERROR "IPP IA32 libraries not found")
if(IPP_X64)
if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64)
message(SEND_ERROR "IPP EM64T libraries not found")
endif()
set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/intel64 PARENT_SCOPE)
else()
if(NOT EXISTS ${IPP_ROOT_DIR}/lib/ia32)
message(SEND_ERROR "IPP IA32 libraries not found")
endif()
set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/ia32 PARENT_SCOPE)
endif()
set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/ia32 PARENT_SCOPE)
endif()
# set IPP_LIBRARIES variable (7.x lib names)
@@ -265,7 +274,7 @@ if(NOT IPP_FOUND)
# Note, if several IPP installations found the newest version will be
# selected
# ------------------------------------------------------------------------
foreach(curdir ${CMAKE_SYSTEM_PREFIX_PATH})
foreach(curdir ${CMAKE_SYSTEM_PREFIX_PATH} /opt)
set(curdir ${curdir}/intel)
file(TO_CMAKE_PATH ${curdir} CURDIR)
@@ -336,3 +345,53 @@ if(WIN32 AND MINGW AND NOT IPP_LATEST_VERSION_MAJOR LESS 7)
set(MSV_NTDLL "ntdll")
set(IPP_LIBRARIES ${IPP_LIBRARIES} ${MSV_NTDLL}${IPP_LIB_SUFFIX})
endif()
# ------------------------------------------------------------------------
# This section will look for the IPP "compiler" dependent library
# libiomp5.
# ------------------------------------------------------------------------
foreach(curdir ${CMAKE_SYSTEM_PREFIX_PATH} /opt)
set(curdir ${curdir}/intel)
if(EXISTS ${curdir})
file(GLOB_RECURSE liblist FOLLOW_SYMLINKS ${curdir}/libiomp5.*)
foreach(lib ${liblist})
get_filename_component(libdir ${lib} REALPATH)
get_filename_component(libdir ${libdir} PATH)
if(${IPP_VERSION_MAJOR} VERSION_LESS "7")
set(IPP_COMPILER_LIBRARY_DIRS ${libdir})
set(IPP_COMPILER_LIBRARIES iomp5)
else()
if(APPLE)
set(IPP_COMPILER_LIBRARY_DIRS ${libdir})
set(IPP_COMPILER_LIBRARIES iomp5)
else()
if(IPP_X64)
if(("${libdir}" MATCHES "intel64"))
set(IPP_COMPILER_LIBRARY_DIRS ${libdir})
set(IPP_COMPILER_LIBRARIES iomp5)
endif()
else()
set(IPP_COMPILER_LIBRARY_DIRS ${libdir})
set(IPP_COMPILER_LIBRARIES iomp5)
endif()
endif()
endif()
endforeach(lib)
endif()
endforeach(curdir)
# ------------------------------------------------------------------------
# Build fullpath library list.
# ------------------------------------------------------------------------
find_library(LIB_IPPI ippi PATHS ${IPP_LIBRARY_DIRS})
set(IPP_LIBRARY_LIST ${IPP_LIBRARY_LIST} ${LIB_IPPI})
find_library(LIB_IPPS ipps PATHS ${IPP_LIBRARY_DIRS})
set(IPP_LIBRARY_LIST ${IPP_LIBRARY_LIST} ${LIB_IPPS})
find_library(LIB_IPPCORE ippcore PATHS ${IPP_LIBRARY_DIRS})
set(IPP_LIBRARY_LIST ${IPP_LIBRARY_LIST} ${LIB_IPPCORE})
find_library(LIB_IOMP5 iomp5 PATHS ${IPP_COMPILER_LIBRARY_DIRS})
set(IPP_LIBRARY_LIST ${IPP_LIBRARY_LIST} ${LIB_IOMP5})

View File

@@ -36,6 +36,7 @@
#cmakedefine WITH_PROFILER
#cmakedefine WITH_SSE2
#cmakedefine WITH_NEON
#cmakedefine WITH_IPP
#cmakedefine WITH_NATIVE_SSPI
#cmakedefine WITH_JPEG
#cmakedefine WITH_WIN8

View File

@@ -48,6 +48,10 @@ typedef struct _FREERDP_ADDIN FREERDP_ADDIN;
typedef void* (*FREERDP_LOAD_CHANNEL_ADDIN_ENTRY_FN)(LPCSTR pszName, LPSTR pszSubsystem, LPSTR pszType, DWORD dwFlags);
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API LPSTR freerdp_get_library_install_path();
FREERDP_API LPSTR freerdp_get_dynamic_addin_install_path();
@@ -57,5 +61,9 @@ FREERDP_API void* freerdp_load_dynamic_addin(LPCSTR pszFileName, LPCSTR pszPath,
FREERDP_API void* freerdp_load_dynamic_channel_addin_entry(LPCSTR pszName, LPSTR pszSubsystem, LPSTR pszType, DWORD dwFlags);
FREERDP_API void* freerdp_load_channel_addin_entry(LPCSTR pszName, LPSTR pszSubsystem, LPSTR pszType, DWORD dwFlags);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_COMMON_ADDIN_H */

View File

@@ -32,6 +32,10 @@
#define INLINE inline
#endif
#ifdef _WIN32
#define __func__ __FUNCTION__
#endif
#if defined _WIN32 || defined __CYGWIN__
#ifdef FREERDP_EXPORTS
#ifdef __GNUC__

View File

@@ -59,6 +59,10 @@ struct rdp_bitmap_cache
rdpSettings* settings;
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API rdpBitmap* bitmap_cache_get(rdpBitmapCache* bitmap_cache, UINT32 id, UINT32 index);
FREERDP_API void bitmap_cache_put(rdpBitmapCache* bitmap_cache, UINT32 id, UINT32 index, rdpBitmap* bitmap);
@@ -67,4 +71,8 @@ FREERDP_API void bitmap_cache_register_callbacks(rdpUpdate* update);
FREERDP_API rdpBitmapCache* bitmap_cache_new(rdpSettings* settings);
FREERDP_API void bitmap_cache_free(rdpBitmapCache* bitmap_cache);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_BITMAP_V2_CACHE_H */

View File

@@ -56,6 +56,10 @@ struct rdp_brush_cache
rdpSettings* settings;
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API void* brush_cache_get(rdpBrushCache* brush, UINT32 index, UINT32* bpp);
FREERDP_API void brush_cache_put(rdpBrushCache* brush, UINT32 index, void* entry, UINT32 bpp);
@@ -64,4 +68,8 @@ FREERDP_API void brush_cache_register_callbacks(rdpUpdate* update);
FREERDP_API rdpBrushCache* brush_cache_new(rdpSettings* settings);
FREERDP_API void brush_cache_free(rdpBrushCache* brush);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_BRUSH_CACHE_H */

View File

@@ -48,7 +48,15 @@ struct rdp_cache
rdpSettings* settings;
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API rdpCache* cache_new(rdpSettings* settings);
FREERDP_API void cache_free(rdpCache* cache);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_CACHE_H */

View File

@@ -59,6 +59,10 @@ struct rdp_glyph_cache
rdpSettings* settings;
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API rdpGlyph* glyph_cache_get(rdpGlyphCache* glyph_cache, UINT32 id, UINT32 index);
FREERDP_API void glyph_cache_put(rdpGlyphCache* glyph_cache, UINT32 id, UINT32 index, rdpGlyph* entry);
@@ -70,4 +74,8 @@ FREERDP_API void glyph_cache_register_callbacks(rdpUpdate* update);
FREERDP_API rdpGlyphCache* glyph_cache_new(rdpSettings* settings);
FREERDP_API void glyph_cache_free(rdpGlyphCache* glyph);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_GLYPH_CACHE_H */

View File

@@ -52,6 +52,10 @@ struct rdp_nine_grid_cache
rdpSettings* settings;
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API void* nine_grid_cache_get(rdpNineGridCache* nine_grid, UINT32 index);
FREERDP_API void nine_grid_cache_put(rdpNineGridCache* nine_grid, UINT32 index, void* entry);
@@ -60,4 +64,8 @@ FREERDP_API void nine_grid_cache_register_callbacks(rdpUpdate* update);
FREERDP_API rdpNineGridCache* nine_grid_cache_new(rdpSettings* settings);
FREERDP_API void nine_grid_cache_free(rdpNineGridCache* nine_grid);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_NINE_GRID_CACHE_H */

View File

@@ -43,6 +43,10 @@ struct rdp_offscreen_cache
rdpSettings* settings;
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API rdpBitmap* offscreen_cache_get(rdpOffscreenCache* offscreen_cache, UINT32 index);
FREERDP_API void offscreen_cache_put(rdpOffscreenCache* offscreen_cache, UINT32 index, rdpBitmap* bitmap);
FREERDP_API void offscreen_cache_delete(rdpOffscreenCache* offscreen, UINT32 index);
@@ -52,4 +56,8 @@ FREERDP_API void offscreen_cache_register_callbacks(rdpUpdate* update);
FREERDP_API rdpOffscreenCache* offscreen_cache_new(rdpSettings* settings);
FREERDP_API void offscreen_cache_free(rdpOffscreenCache* offscreen);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_OFFSCREEN_CACHE_H */

View File

@@ -46,6 +46,10 @@ struct rdp_palette_cache
rdpSettings* settings;
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API void* palette_cache_get(rdpPaletteCache* palette, UINT32 index);
FREERDP_API void palette_cache_put(rdpPaletteCache* palette, UINT32 index, void* entry);
@@ -54,4 +58,8 @@ FREERDP_API void palette_cache_register_callbacks(rdpUpdate* update);
FREERDP_API rdpPaletteCache* palette_cache_new(rdpSettings* settings);
FREERDP_API void palette_cache_free(rdpPaletteCache* palette_cache);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_PALETTE_CACHE_H */

View File

@@ -42,6 +42,10 @@ struct rdp_pointer_cache
rdpSettings* settings;
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API rdpPointer* pointer_cache_get(rdpPointerCache* pointer_cache, UINT32 index);
FREERDP_API void pointer_cache_put(rdpPointerCache* pointer_cache, UINT32 index, rdpPointer* pointer);
@@ -50,4 +54,8 @@ FREERDP_API void pointer_cache_register_callbacks(rdpUpdate* update);
FREERDP_API rdpPointerCache* pointer_cache_new(rdpSettings* settings);
FREERDP_API void pointer_cache_free(rdpPointerCache* pointer_cache);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_POINTER_CACHE_H */

View File

@@ -24,11 +24,19 @@
#include <freerdp/addin.h>
#include <freerdp/channels/channels.h>
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API void* freerdp_channels_client_find_static_entry(const char* name, const char* identifier);
FREERDP_API void* freerdp_channels_load_static_addin_entry(LPCSTR pszName, LPSTR pszSubsystem, LPSTR pszType, DWORD dwFlags);
FREERDP_API FREERDP_ADDIN** freerdp_channels_list_addins(LPSTR lpName, LPSTR lpSubsystem, LPSTR lpType, DWORD dwFlags);
FREERDP_API void freerdp_channels_addin_list_free(FREERDP_ADDIN** ppAddins);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_CHANNELS_CLIENT */

View File

@@ -23,6 +23,10 @@
#include <freerdp/api.h>
#include <freerdp/freerdp.h>
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API int freerdp_client_parse_command_line_arguments(int argc, char** argv, rdpSettings* settings);
FREERDP_API int freerdp_client_load_addins(rdpChannels* channels, rdpSettings* settings);
@@ -36,4 +40,8 @@ FREERDP_API int freerdp_client_add_device_channel(rdpSettings* settings, int cou
FREERDP_API int freerdp_client_add_static_channel(rdpSettings* settings, int count, char** params);
FREERDP_API int freerdp_client_add_dynamic_channel(rdpSettings* settings, int count, char** params);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_CLIENT_CMDLINE_H */

View File

@@ -133,6 +133,10 @@ struct rdp_file
typedef struct rdp_file rdpFile;
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API BOOL freerdp_client_parse_rdp_file(rdpFile* file, char* name);
FREERDP_API BOOL freerdp_client_parse_rdp_file_buffer(rdpFile* file, BYTE* buffer, size_t size);
FREERDP_API BOOL freerdp_client_populate_settings_from_rdp_file(rdpFile* file, rdpSettings* settings);
@@ -140,4 +144,8 @@ FREERDP_API BOOL freerdp_client_populate_settings_from_rdp_file(rdpFile* file, r
FREERDP_API rdpFile* freerdp_client_rdp_file_new();
FREERDP_API void freerdp_client_rdp_file_free(rdpFile* file);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_CLIENT_RDP_FILE_H */

View File

@@ -101,8 +101,6 @@ struct _RFX_CONTEXT
BYTE quant_idx_cr;
/* routines */
void (*decode_ycbcr_to_rgb)(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf);
void (*encode_rgb_to_ycbcr)(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf);
void (*quantization_decode)(INT16* buffer, const UINT32* quantization_values);
void (*quantization_encode)(INT16* buffer, const UINT32* quantization_values);
void (*dwt_2d_decode)(INT16* buffer, INT16* dwt_buffer);

View File

@@ -29,6 +29,10 @@ typedef struct rdp_glyph rdpGlyph;
#include <freerdp/types.h>
#include <freerdp/freerdp.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Bitmap Class */
typedef void (*pBitmap_New)(rdpContext* context, rdpBitmap* bitmap);
@@ -166,4 +170,8 @@ FREERDP_API void graphics_register_glyph(rdpGraphics* graphics, rdpGlyph* glyph)
FREERDP_API rdpGraphics* graphics_new(rdpContext* context);
FREERDP_API void graphics_free(rdpGraphics* graphics);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_GRAPHICS_H */

View File

@@ -74,6 +74,10 @@ struct rdp_input
UINT32 paddingB[32 - 21]; /* 21 */
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API void freerdp_input_send_synchronize_event(rdpInput* input, UINT32 flags);
FREERDP_API void freerdp_input_send_keyboard_event(rdpInput* input, UINT16 flags, UINT16 code);
FREERDP_API void freerdp_input_send_keyboard_event_ex(rdpInput* input, BOOL down, UINT32 rdp_scancode);
@@ -81,4 +85,8 @@ FREERDP_API void freerdp_input_send_unicode_keyboard_event(rdpInput* input, UINT
FREERDP_API void freerdp_input_send_mouse_event(rdpInput* input, UINT16 flags, UINT16 x, UINT16 y);
FREERDP_API void freerdp_input_send_extended_mouse_event(rdpInput* input, UINT16 flags, UINT16 x, UINT16 y);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_INPUT_H */

View File

@@ -81,10 +81,18 @@ struct rdp_freerdp_peer
SEC_WINNT_AUTH_IDENTITY identity;
};
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API void freerdp_peer_context_new(freerdp_peer* client);
FREERDP_API void freerdp_peer_context_free(freerdp_peer* client);
FREERDP_API freerdp_peer* freerdp_peer_new(int sockfd);
FREERDP_API void freerdp_peer_free(freerdp_peer* client);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_PEER_H */

View File

@@ -0,0 +1,215 @@
/* primitives.h
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License. Algorithms used by
* this code may be covered by patents by HP, Microsoft, or other parties.
*/
#ifdef __GNUC__
# pragma once
#endif
#ifndef __PRIMITIVES_H_INCLUDED__
#define __PRIMITIVES_H_INCLUDED__
#include <freerdp/api.h>
#include <freerdp/types.h>
typedef INT32 pstatus_t; /* match IppStatus. */
#define PRIMITIVES_SUCCESS (0) /* match ippStsNoErr */
/* Simple macro for address of an x,y location in 2d 4-byte memory block */
#define PIXMAP4_ADDR(_dst_, _x_, _y_, _span_) \
((void *) (((BYTE *) (_dst_)) + (((_x_) + (_y_)*(_span_)) << 2)))
#define PRIM_X86_MMX_AVAILABLE (1U<<0)
#define PRIM_X86_3DNOW_AVAILABLE (1U<<1)
#define PRIM_X86_3DNOW_PREFETCH_AVAILABLE (1U<<2)
#define PRIM_X86_SSE_AVAILABLE (1U<<3)
#define PRIM_X86_SSE2_AVAILABLE (1U<<4)
#define PRIM_X86_SSE3_AVAILABLE (1U<<5)
#define PRIM_X86_SSSE3_AVAILABLE (1U<<6)
#define PRIM_X86_SSE41_AVAILABLE (1U<<7)
#define PRIM_X86_SSE42_AVAILABLE (1U<<8)
#define PRIM_X86_AVX_AVAILABLE (1U<<9)
#define PRIM_X86_FMA_AVAILABLE (1U<<10)
#define PRIM_X86_AVX_AES_AVAILABLE (1U<<11)
#define PRIM_X86_AVX2_AVAILABLE (1U<<12)
#define PRIM_ARM_VFP1_AVAILABLE (1U<<0)
#define PRIM_ARM_VFP2_AVAILABLE (1U<<1)
#define PRIM_ARM_VFP3_AVAILABLE (1U<<2)
#define PRIM_ARM_VFP4_AVAILABLE (1U<<3)
#define PRIM_ARM_FPA_AVAILABLE (1U<<4)
#define PRIM_ARM_FPE_AVAILABLE (1U<<5)
#define PRIM_ARM_IWMMXT_AVAILABLE (1U<<6)
#define PRIM_ARM_NEON_AVAILABLE (1U<<7)
/* Structures compatible with IPP */
typedef struct
{
INT32 width;
INT32 height;
} prim_size_t; /* like IppiSize */
/* Function prototypes for all of the supported primitives. */
typedef pstatus_t (*__copy_t)(
const void *pSrc,
void *pDst,
INT32 bytes);
typedef pstatus_t (*__copy_8u_t)(
const BYTE *pSrc,
BYTE *pDst,
INT32 len);
typedef pstatus_t (*__copy_8u_AC4r_t)(
const BYTE *pSrc,
INT32 srcStep, /* bytes */
BYTE *pDst,
INT32 dstStep, /* bytes */
INT32 width, INT32 height); /* pixels */
typedef pstatus_t (*__set_8u_t)(
BYTE val,
BYTE *pDst,
INT32 len);
typedef pstatus_t (*__set_32s_t)(
INT32 val,
INT32 *pDst,
INT32 len);
typedef pstatus_t (*__set_32u_t)(
UINT32 val,
UINT32 *pDst,
INT32 len);
typedef pstatus_t (*__zero_t)(
void *pDst,
size_t bytes);
typedef pstatus_t (*__alphaComp_argb_t)(
const BYTE *pSrc1, INT32 src1Step,
const BYTE *pSrc2, INT32 src2Step,
BYTE *pDst, INT32 dstStep,
INT32 width, INT32 height);
typedef pstatus_t (*__add_16s_t)(
const INT16 *pSrc1,
const INT16 *pSrc2,
INT16 *pDst,
INT32 len);
typedef pstatus_t (*__lShiftC_16s_t)(
const INT16 *pSrc,
INT32 val,
INT16 *pSrcDst,
INT32 len);
typedef pstatus_t (*__lShiftC_16u_t)(
const UINT16 *pSrc,
INT32 val,
UINT16 *pSrcDst,
INT32 len);
typedef pstatus_t (*__rShiftC_16s_t)(
const INT16 *pSrc,
INT32 val,
INT16 *pSrcDst,
INT32 len);
typedef pstatus_t (*__rShiftC_16u_t)(
const UINT16 *pSrc,
INT32 val,
UINT16 *pSrcDst,
INT32 len);
typedef pstatus_t (*__shiftC_16s_t)(
const INT16 *pSrc,
INT32 val,
INT16 *pSrcDst,
INT32 len);
typedef pstatus_t (*__shiftC_16u_t)(
const UINT16 *pSrc,
INT32 val,
UINT16 *pSrcDst,
INT32 len);
typedef pstatus_t (*__sign_16s_t)(
const INT16 *pSrc,
INT16 *pDst,
INT32 len);
typedef pstatus_t (*__yCbCrToRGB_16s16s_P3P3_t)(
const INT16 *pSrc[3], INT32 srcStep,
INT16 *pDst[3], INT32 dstStep,
const prim_size_t *roi);
typedef pstatus_t (*__RGBToYCbCr_16s16s_P3P3_t)(
const INT16 *pSrc[3], INT32 srcStep,
INT16 *pDst[3], INT32 dstStep,
const prim_size_t *roi);
typedef pstatus_t (*__RGBToRGB_16s8u_P3AC4R_t)(
const INT16 *pSrc[3], INT32 srcStep,
BYTE *pDst, INT32 dstStep,
const prim_size_t *roi);
typedef pstatus_t (*__andC_32u_t)(
const UINT32 *pSrc,
UINT32 val,
UINT32 *pDst,
INT32 len);
typedef pstatus_t (*__orC_32u_t)(
const UINT32 *pSrc,
UINT32 val,
UINT32 *pDst,
INT32 len);
typedef struct
{
/* Memory-to-memory copy routines */
__copy_t copy; /* memcpy/memmove, basically */
__copy_8u_t copy_8u; /* more strongly typed */
__copy_8u_AC4r_t copy_8u_AC4r; /* pixel copy function */
/* Memory setting routines */
__set_8u_t set_8u; /* memset, basically */
__set_32s_t set_32s;
__set_32u_t set_32u;
__zero_t zero; /* bzero or faster */
/* Arithmetic functions */
__add_16s_t add_16s;
/* And/or */
__andC_32u_t andC_32u;
__orC_32u_t orC_32u;
/* Shifts */
__lShiftC_16s_t lShiftC_16s;
__lShiftC_16u_t lShiftC_16u;
__rShiftC_16s_t rShiftC_16s;
__rShiftC_16u_t rShiftC_16u;
__shiftC_16s_t shiftC_16s;
__shiftC_16u_t shiftC_16u;
/* Alpha Composition */
__alphaComp_argb_t alphaComp_argb;
/* Sign */
__sign_16s_t sign_16s;
/* Color conversions */
__yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3;
__RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3;
__RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R;
/* internal use for CPU flags and such. */
void *hints;
} primitives_t;
#ifdef __cplusplus
extern "C" {
#endif
/* Prototypes for the externally-visible entrypoints. */
FREERDP_API void primitives_init(void);
FREERDP_API primitives_t *primitives_get(void);
FREERDP_API UINT32 primitives_get_flags(
const primitives_t *prims);
FREERDP_API void primitives_flags_str(
const primitives_t *prims,
char *str,
size_t len);
FREERDP_API void primitives_deinit(void);
#ifdef __cplusplus
}
#endif
#endif /* !__PRIMITIVES_H_INCLUDED__ */

View File

@@ -970,6 +970,10 @@ struct rdp_settings
};
typedef struct rdp_settings rdpSettings;
#ifdef __cplusplus
extern "C" {
#endif
FREERDP_API rdpSettings* freerdp_settings_new(void* instance);
FREERDP_API void freerdp_settings_free(rdpSettings* settings);
@@ -989,4 +993,8 @@ FREERDP_API void freerdp_dynamic_channel_collection_add(rdpSettings* settings, A
FREERDP_API ADDIN_ARGV* freerdp_dynamic_channel_collection_find(rdpSettings* settings, const char* name);
FREERDP_API void freerdp_dynamic_channel_collection_free(rdpSettings* settings);
#ifdef __cplusplus
}
#endif
#endif /* FREERDP_SETTINGS_H */

View File

@@ -152,6 +152,7 @@ typedef void (*pSuppressOutput)(rdpContext* context, BYTE allow, RECTANGLE_16* a
typedef void (*pSurfaceCommand)(rdpContext* context, STREAM* s);
typedef void (*pSurfaceBits)(rdpContext* context, SURFACE_BITS_COMMAND* surface_bits_command);
typedef void (*pSurfaceFrameMarker)(rdpContext* context, SURFACE_FRAME_MARKER* surface_frame_marker);
typedef void (*pSurfaceFrameAcknowledge)(rdpContext* context, UINT32 frameId);
struct rdp_update
{
@@ -182,7 +183,8 @@ struct rdp_update
pSurfaceCommand SurfaceCommand; /* 64 */
pSurfaceBits SurfaceBits; /* 65 */
pSurfaceFrameMarker SurfaceFrameMarker; /* 66 */
UINT32 paddingE[80 - 67]; /* 67 */
pSurfaceFrameAcknowledge SurfaceFrameAcknowledge; /* 67 */
UINT32 paddingE[80 - 68]; /* 68 */
/* internal */

View File

@@ -31,6 +31,7 @@ set(${MODULE_PREFIX}_SUBMODULES
codec
crypto
locale
primitives
core)
foreach(${MODULE_PREFIX}_SUBMODULE ${${MODULE_PREFIX}_SUBMODULES})

View File

@@ -31,8 +31,6 @@ set(${MODULE_PREFIX}_SRCS
rfx_dwt.h
rfx_encode.c
rfx_encode.h
rfx_pool.c
rfx_pool.h
rfx_quantization.c
rfx_quantization.h
rfx_rlgr.c
@@ -97,9 +95,12 @@ set(${MODULE_PREFIX}_LIBS
set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS
MONOLITHIC ${MONOLITHIC_BUILD} INTERNAL
MODULE freerdp
MODULES freerdp-utils)
message(STATUS "libfreerdp-codec libs: ${${MODULE_PREFIX}_LIBS}")
MODULES freerdp-primitives freerdp-utils)
set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS
MONOLITHIC ${MONOLITHIC_BUILD}
MODULE winpr
MODULES winpr-crt winpr-pool winpr-registry winpr-utils)
if(MONOLITHIC_BUILD)
set(FREERDP_LIBS ${FREERDP_LIBS} ${${MODULE_PREFIX}_LIBS} PARENT_SCOPE)

View File

@@ -247,7 +247,10 @@ void nsc_context_free(NSC_CONTEXT* context)
for (i = 0; i < 4; i++)
{
if (context->priv->plane_buf[i])
{
free(context->priv->plane_buf[i]);
context->priv->plane_buf[i] = NULL;
}
}
if (context->bmpdata)
free(context->bmpdata);
@@ -260,14 +263,21 @@ void nsc_context_free(NSC_CONTEXT* context)
free(context->priv);
free(context);
context = NULL;
}
NSC_CONTEXT* nsc_context_new(void)
{
NSC_CONTEXT* nsc_context;
UINT8 i;
nsc_context = (NSC_CONTEXT*) malloc(sizeof(NSC_CONTEXT));
nsc_context->priv = (NSC_CONTEXT_PRIV*) malloc(sizeof(NSC_CONTEXT_PRIV));
for (i=0; i < 5; ++i)
{
nsc_context->priv->plane_buf[i] = NULL;
}
nsc_context->bmpdata = NULL;
nsc_context->decode = nsc_decode;
nsc_context->encode = nsc_encode;

View File

@@ -30,13 +30,13 @@
#endif
#include <winpr/crt.h>
#include <winpr/registry.h>
#include <freerdp/codec/rfx.h>
#include <freerdp/constants.h>
#include "rfx_constants.h"
#include "rfx_types.h"
#include "rfx_pool.h"
#include "rfx_decode.h"
#include "rfx_encode.h"
#include "rfx_quantization.h"
@@ -79,7 +79,7 @@ static void rfx_profiler_create(RFX_CONTEXT* context)
PROFILER_CREATE(context->priv->prof_rfx_differential_decode, "rfx_differential_decode");
PROFILER_CREATE(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode");
PROFILER_CREATE(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode");
PROFILER_CREATE(context->priv->prof_rfx_decode_ycbcr_to_rgb, "rfx_decode_ycbcr_to_rgb");
PROFILER_CREATE(context->priv->prof_rfx_ycbcr_to_rgb, "prims->yCbCrToRGB");
PROFILER_CREATE(context->priv->prof_rfx_decode_format_rgb, "rfx_decode_format_rgb");
PROFILER_CREATE(context->priv->prof_rfx_encode_rgb, "rfx_encode_rgb");
@@ -88,7 +88,7 @@ static void rfx_profiler_create(RFX_CONTEXT* context)
PROFILER_CREATE(context->priv->prof_rfx_differential_encode, "rfx_differential_encode");
PROFILER_CREATE(context->priv->prof_rfx_quantization_encode, "rfx_quantization_encode");
PROFILER_CREATE(context->priv->prof_rfx_dwt_2d_encode, "rfx_dwt_2d_encode");
PROFILER_CREATE(context->priv->prof_rfx_encode_rgb_to_ycbcr, "rfx_encode_rgb_to_ycbcr");
PROFILER_CREATE(context->priv->prof_rfx_rgb_to_ycbcr, "prims->RGBToYCbCr");
PROFILER_CREATE(context->priv->prof_rfx_encode_format_rgb, "rfx_encode_format_rgb");
}
@@ -100,7 +100,7 @@ static void rfx_profiler_free(RFX_CONTEXT* context)
PROFILER_FREE(context->priv->prof_rfx_differential_decode);
PROFILER_FREE(context->priv->prof_rfx_quantization_decode);
PROFILER_FREE(context->priv->prof_rfx_dwt_2d_decode);
PROFILER_FREE(context->priv->prof_rfx_decode_ycbcr_to_rgb);
PROFILER_FREE(context->priv->prof_rfx_ycbcr_to_rgb);
PROFILER_FREE(context->priv->prof_rfx_decode_format_rgb);
PROFILER_FREE(context->priv->prof_rfx_encode_rgb);
@@ -109,7 +109,7 @@ static void rfx_profiler_free(RFX_CONTEXT* context)
PROFILER_FREE(context->priv->prof_rfx_differential_encode);
PROFILER_FREE(context->priv->prof_rfx_quantization_encode);
PROFILER_FREE(context->priv->prof_rfx_dwt_2d_encode);
PROFILER_FREE(context->priv->prof_rfx_encode_rgb_to_ycbcr);
PROFILER_FREE(context->priv->prof_rfx_rgb_to_ycbcr);
PROFILER_FREE(context->priv->prof_rfx_encode_format_rgb);
}
@@ -123,7 +123,7 @@ static void rfx_profiler_print(RFX_CONTEXT* context)
PROFILER_PRINT(context->priv->prof_rfx_differential_decode);
PROFILER_PRINT(context->priv->prof_rfx_quantization_decode);
PROFILER_PRINT(context->priv->prof_rfx_dwt_2d_decode);
PROFILER_PRINT(context->priv->prof_rfx_decode_ycbcr_to_rgb);
PROFILER_PRINT(context->priv->prof_rfx_ycbcr_to_rgb);
PROFILER_PRINT(context->priv->prof_rfx_decode_format_rgb);
PROFILER_PRINT(context->priv->prof_rfx_encode_rgb);
@@ -132,7 +132,7 @@ static void rfx_profiler_print(RFX_CONTEXT* context)
PROFILER_PRINT(context->priv->prof_rfx_differential_encode);
PROFILER_PRINT(context->priv->prof_rfx_quantization_encode);
PROFILER_PRINT(context->priv->prof_rfx_dwt_2d_encode);
PROFILER_PRINT(context->priv->prof_rfx_encode_rgb_to_ycbcr);
PROFILER_PRINT(context->priv->prof_rfx_rgb_to_ycbcr);
PROFILER_PRINT(context->priv->prof_rfx_encode_format_rgb);
PROFILER_PRINT_FOOTER;
@@ -140,6 +140,11 @@ static void rfx_profiler_print(RFX_CONTEXT* context)
RFX_CONTEXT* rfx_context_new(void)
{
HKEY hKey;
LONG status;
DWORD dwType;
DWORD dwSize;
DWORD dwValue;
RFX_CONTEXT* context;
context = (RFX_CONTEXT*) malloc(sizeof(RFX_CONTEXT));
@@ -148,24 +153,58 @@ RFX_CONTEXT* rfx_context_new(void)
context->priv = (RFX_CONTEXT_PRIV*) malloc(sizeof(RFX_CONTEXT_PRIV));
ZeroMemory(context->priv, sizeof(RFX_CONTEXT_PRIV));
context->priv->pool = rfx_pool_new();
context->priv->TilePool = Queue_New(TRUE, -1, -1);
context->priv->TileQueue = Queue_New(TRUE, -1, -1);
/*
* align buffers to 16 byte boundary (needed for SSE/NEON instructions)
*
* y_r_buffer, cb_g_buffer, cr_b_buffer: 64 * 64 * 4 = 16384 (0x4000)
* dwt_buffer: 32 * 32 * 2 * 2 * 4 = 16384, maximum sub-band width is 32
*/
context->priv->BufferPool = BufferPool_New(TRUE, 16384, 16);
context->priv->UseThreads = FALSE;
context->priv->MinThreadCount = 4;
context->priv->MaxThreadCount = 0;
status = RegOpenKeyEx(HKEY_LOCAL_MACHINE, _T("Software\\FreeRDP\\RemoteFX"), 0, KEY_READ | KEY_WOW64_64KEY, &hKey);
if (status == ERROR_SUCCESS)
{
if (RegQueryValueEx(hKey, _T("UseThreads"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS)
context->priv->UseThreads = dwValue ? 1 : 0;
if (RegQueryValueEx(hKey, _T("MinThreadCount"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS)
context->priv->MinThreadCount = dwValue;
if (RegQueryValueEx(hKey, _T("MaxThreadCount"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS)
context->priv->MaxThreadCount = dwValue;
RegCloseKey(hKey);
}
if (context->priv->UseThreads)
{
context->priv->ThreadPool = CreateThreadpool(NULL);
InitializeThreadpoolEnvironment(&context->priv->ThreadPoolEnv);
SetThreadpoolCallbackPool(&context->priv->ThreadPoolEnv, context->priv->ThreadPool);
if (context->priv->MinThreadCount)
SetThreadpoolThreadMinimum(context->priv->ThreadPool, context->priv->MinThreadCount);
if (context->priv->MaxThreadCount)
SetThreadpoolThreadMaximum(context->priv->ThreadPool, context->priv->MaxThreadCount);
}
/* initialize the default pixel format */
rfx_context_set_pixel_format(context, RDP_PIXEL_FORMAT_B8G8R8A8);
/* align buffers to 16 byte boundary (needed for SSE/SSE2 instructions) */
context->priv->y_r_buffer = (INT16*)(((uintptr_t)context->priv->y_r_mem + 16) & ~ 0x0F);
context->priv->cb_g_buffer = (INT16*)(((uintptr_t)context->priv->cb_g_mem + 16) & ~ 0x0F);
context->priv->cr_b_buffer = (INT16*)(((uintptr_t)context->priv->cr_b_mem + 16) & ~ 0x0F);
context->priv->dwt_buffer = (INT16*)(((uintptr_t)context->priv->dwt_mem + 16) & ~ 0x0F);
/* create profilers for default decoding routines */
rfx_profiler_create(context);
/* set up default routines */
context->decode_ycbcr_to_rgb = rfx_decode_ycbcr_to_rgb;
context->encode_rgb_to_ycbcr = rfx_encode_rgb_to_ycbcr;
context->quantization_decode = rfx_quantization_decode;
context->quantization_encode = rfx_quantization_encode;
context->dwt_2d_decode = rfx_dwt_2d_decode;
@@ -185,11 +224,20 @@ void rfx_context_free(RFX_CONTEXT* context)
{
free(context->quants);
rfx_pool_free(context->priv->pool);
Queue_Free(context->priv->TilePool);
Queue_Free(context->priv->TileQueue);
rfx_profiler_print(context);
rfx_profiler_free(context);
if (context->priv->UseThreads)
{
CloseThreadpool(context->priv->ThreadPool);
DestroyThreadpoolEnvironment(&context->priv->ThreadPoolEnv);
}
BufferPool_Free(context->priv->BufferPool);
free(context->priv);
free(context);
}
@@ -197,6 +245,7 @@ void rfx_context_free(RFX_CONTEXT* context)
void rfx_context_set_pixel_format(RFX_CONTEXT* context, RDP_PIXEL_FORMAT pixel_format)
{
context->pixel_format = pixel_format;
switch (pixel_format)
{
case RDP_PIXEL_FORMAT_B8G8R8A8:
@@ -229,6 +278,30 @@ void rfx_context_reset(RFX_CONTEXT* context)
context->frame_idx = 0;
}
RFX_TILE* rfx_tile_pool_take(RFX_CONTEXT* context)
{
RFX_TILE* tile = NULL;
if (WaitForSingleObject(Queue_Event(context->priv->TilePool), 0) == WAIT_OBJECT_0)
tile = Queue_Dequeue(context->priv->TilePool);
if (!tile)
{
tile = (RFX_TILE*) malloc(sizeof(RFX_TILE));
tile->x = tile->y = 0;
tile->data = (BYTE*) malloc(4096 * 4); /* 64x64 * 4 */
}
return tile;
}
int rfx_tile_pool_return(RFX_CONTEXT* context, RFX_TILE* tile)
{
Queue_Enqueue(context->priv->TilePool, tile);
return 0;
}
static void rfx_process_message_sync(RFX_CONTEXT* context, STREAM* s)
{
UINT32 magic;
@@ -414,19 +487,35 @@ static void rfx_process_message_tile(RFX_CONTEXT* context, RFX_TILE* tile, STREA
YLen, context->quants + (quantIdxY * 10),
CbLen, context->quants + (quantIdxCb * 10),
CrLen, context->quants + (quantIdxCr * 10),
tile->data);
tile->data, 64 * 4);
}
struct _RFX_TILE_WORK_PARAM
{
STREAM s;
RFX_TILE* tile;
RFX_CONTEXT* context;
};
typedef struct _RFX_TILE_WORK_PARAM RFX_TILE_WORK_PARAM;
void CALLBACK rfx_process_message_tile_work_callback(PTP_CALLBACK_INSTANCE instance, void* context, PTP_WORK work)
{
RFX_TILE_WORK_PARAM* param = (RFX_TILE_WORK_PARAM*) context;
rfx_process_message_tile(param->context, param->tile, &(param->s));
}
static void rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* message, STREAM* s)
{
int i;
int pos;
BYTE quant;
UINT32* quants;
UINT16 subtype;
UINT32 blockLen;
UINT32 blockType;
UINT32 tilesDataSize;
UINT32* quants;
BYTE quant;
int pos;
PTP_WORK* work_objects = NULL;
RFX_TILE_WORK_PARAM* params = NULL;
stream_read_UINT16(s, subtype); /* subtype (2 bytes) must be set to CBT_TILESET (0xCAC2) */
@@ -492,7 +581,14 @@ static void rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* messa
context->quants[i * 10 + 8], context->quants[i * 10 + 9]);
}
message->tiles = rfx_pool_get_tiles(context->priv->pool, message->num_tiles);
message->tiles = (RFX_TILE**) malloc(sizeof(RFX_TILE*) * message->num_tiles);
ZeroMemory(message->tiles, sizeof(RFX_TILE*) * message->num_tiles);
if (context->priv->UseThreads)
{
work_objects = (PTP_WORK*) malloc(sizeof(PTP_WORK) * message->num_tiles);
params = (RFX_TILE_WORK_PARAM*) malloc(sizeof(RFX_TILE_WORK_PARAM) * message->num_tiles);
}
/* tiles */
for (i = 0; i < message->num_tiles; i++)
@@ -509,10 +605,35 @@ static void rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* messa
break;
}
rfx_process_message_tile(context, message->tiles[i], s);
message->tiles[i] = rfx_tile_pool_take(context);
if (context->priv->UseThreads)
{
params[i].context = context;
params[i].tile = message->tiles[i];
CopyMemory(&(params[i].s), s, sizeof(STREAM));
work_objects[i] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_process_message_tile_work_callback,
(void*) &params[i], &context->priv->ThreadPoolEnv);
SubmitThreadpoolWork(work_objects[i]);
}
else
{
rfx_process_message_tile(context, message->tiles[i], s);
}
stream_set_pos(s, pos);
}
if (context->priv->UseThreads)
{
for (i = 0; i < message->num_tiles; i++)
WaitForThreadpoolWorkCallbacks(work_objects[i], FALSE);
free(work_objects);
free(params);
}
}
RFX_MESSAGE* rfx_process_message(RFX_CONTEXT* context, BYTE* data, UINT32 length)
@@ -623,13 +744,17 @@ RFX_RECT* rfx_message_get_rect(RFX_MESSAGE* message, int index)
void rfx_message_free(RFX_CONTEXT* context, RFX_MESSAGE* message)
{
int i;
if (message != NULL)
{
free(message->rects);
if (message->tiles != NULL)
if (message->tiles)
{
rfx_pool_put_tiles(context->priv->pool, message->tiles, message->num_tiles);
for (i = 0; i < message->num_tiles; i++)
rfx_tile_pool_return(context, message->tiles[i]);
free(message->tiles);
}
@@ -792,9 +917,9 @@ static void rfx_compose_message_tile(RFX_CONTEXT* context, STREAM* s,
static void rfx_compose_message_tileset(RFX_CONTEXT* context, STREAM* s,
BYTE* image_data, int width, int height, int rowstride)
{
int i;
int size;
int start_pos, end_pos;
int i;
int numQuants;
const UINT32* quantVals;
const UINT32* quantValsPtr;

View File

@@ -27,6 +27,7 @@
#include <string.h>
#include <freerdp/utils/stream.h>
#include <freerdp/primitives.h>
#include "rfx_types.h"
#include "rfx_rlgr.h"
@@ -36,49 +37,55 @@
#include "rfx_decode.h"
/* stride is bytes between rows in the output buffer. */
static void rfx_decode_format_rgb(INT16* r_buf, INT16* g_buf, INT16* b_buf,
RDP_PIXEL_FORMAT pixel_format, BYTE* dst_buf)
RDP_PIXEL_FORMAT pixel_format, BYTE* dst_buf, int stride)
{
primitives_t *prims = primitives_get();
INT16* r = r_buf;
INT16* g = g_buf;
INT16* b = b_buf;
INT16* pSrc[3];
static const prim_size_t roi_64x64 = { 64, 64 };
BYTE* dst = dst_buf;
int i;
int x, y;
switch (pixel_format)
{
case RDP_PIXEL_FORMAT_B8G8R8A8:
for (i = 0; i < 4096; i++)
{
*dst++ = (BYTE) (*b++);
*dst++ = (BYTE) (*g++);
*dst++ = (BYTE) (*r++);
*dst++ = 0xFF;
}
pSrc[0] = r; pSrc[1] = g; pSrc[2] = b;
prims->RGBToRGB_16s8u_P3AC4R(
(const INT16 **) pSrc, 64*sizeof(INT16),
dst, stride, &roi_64x64);
break;
case RDP_PIXEL_FORMAT_R8G8B8A8:
for (i = 0; i < 4096; i++)
{
*dst++ = (BYTE) (*r++);
*dst++ = (BYTE) (*g++);
*dst++ = (BYTE) (*b++);
*dst++ = 0xFF;
}
pSrc[0] = b; pSrc[1] = g; pSrc[2] = r;
prims->RGBToRGB_16s8u_P3AC4R(
(const INT16 **) pSrc, 64*sizeof(INT16),
dst, stride, &roi_64x64);
break;
case RDP_PIXEL_FORMAT_B8G8R8:
for (i = 0; i < 4096; i++)
for (y=0; y<64; y++)
{
*dst++ = (BYTE) (*b++);
*dst++ = (BYTE) (*g++);
*dst++ = (BYTE) (*r++);
for (x=0; x<64; x++)
{
*dst++ = (BYTE) (*b++);
*dst++ = (BYTE) (*g++);
*dst++ = (BYTE) (*r++);
}
dst += stride - (64*3);
}
break;
case RDP_PIXEL_FORMAT_R8G8B8:
for (i = 0; i < 4096; i++)
for (y=0; y<64; y++)
{
*dst++ = (BYTE) (*r++);
*dst++ = (BYTE) (*g++);
*dst++ = (BYTE) (*b++);
for (x=0; x<64; x++)
{
*dst++ = (BYTE) (*r++);
*dst++ = (BYTE) (*g++);
*dst++ = (BYTE) (*b++);
}
dst += stride - (64*3);
}
break;
default:
@@ -86,72 +93,13 @@ static void rfx_decode_format_rgb(INT16* r_buf, INT16* g_buf, INT16* b_buf,
}
}
#define MINMAX(_v,_l,_h) ((_v) < (_l) ? (_l) : ((_v) > (_h) ? (_h) : (_v)))
void rfx_decode_ycbcr_to_rgb(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf)
{
/* INT32 is used intentionally because we calculate with shifted factors! */
INT32 y, cb, cr;
INT32 r, g, b;
int i;
/**
* The decoded YCbCr coeffectients are represented as 11.5 fixed-point numbers:
*
* 1 sign bit + 10 integer bits + 5 fractional bits
*
* However only 7 integer bits will be actually used since the value range is [-128.0, 127.0].
* In other words, the decoded coeffectients is scaled by << 5 when intepreted as INT16.
* It was scaled in the quantization phase, so we must scale it back here.
*/
for (i = 0; i < 4096; i++)
{
y = y_r_buf[i];
cb = cb_g_buf[i];
cr = cr_b_buf[i];
#if 0
/**
* This is the slow floating point version kept here for reference
*/
y = y + 4096; /* 128<<5=4096 so that we can scale the sum by >> 5 */
r = y + cr*1.403f;
g = y - cb*0.344f - cr*0.714f;
b = y + cb*1.770f;
y_r_buf[i] = MINMAX(r>>5, 0, 255);
cb_g_buf[i] = MINMAX(g>>5, 0, 255);
cr_b_buf[i] = MINMAX(b>>5, 0, 255);
#else
/**
* We scale the factors by << 16 into 32-bit integers in order to avoid slower
* floating point multiplications. Since the final result needs to be scaled
* by >> 5 we will extract only the upper 11 bits (>> 21) from the final sum.
* Hence we also have to scale the other terms of the sum by << 16.
*
* R: 1.403 << 16 = 91947
* G: 0.344 << 16 = 22544, 0.714 << 16 = 46792
* B: 1.770 << 16 = 115998
*/
y = (y+4096)<<16;
r = y + cr*91947;
g = y - cb*22544 - cr*46792;
b = y + cb*115998;
y_r_buf[i] = MINMAX(r>>21, 0, 255);
cb_g_buf[i] = MINMAX(g>>21, 0, 255);
cr_b_buf[i] = MINMAX(b>>21, 0, 255);
#endif
}
}
static void rfx_decode_component(RFX_CONTEXT* context, const UINT32* quantization_values,
const BYTE* data, int size, INT16* buffer)
{
INT16* dwt_buffer;
dwt_buffer = BufferPool_Take(context->priv->BufferPool, -1); /* dwt_buffer */
PROFILER_ENTER(context->priv->prof_rfx_decode_component);
PROFILER_ENTER(context->priv->prof_rfx_rlgr_decode);
@@ -167,34 +115,50 @@ static void rfx_decode_component(RFX_CONTEXT* context, const UINT32* quantizatio
PROFILER_EXIT(context->priv->prof_rfx_quantization_decode);
PROFILER_ENTER(context->priv->prof_rfx_dwt_2d_decode);
context->dwt_2d_decode(buffer, context->priv->dwt_buffer);
context->dwt_2d_decode(buffer, dwt_buffer);
PROFILER_EXIT(context->priv->prof_rfx_dwt_2d_decode);
PROFILER_EXIT(context->priv->prof_rfx_decode_component);
BufferPool_Return(context->priv->BufferPool, dwt_buffer);
}
/* rfx_decode_ycbcr_to_rgb code now resides in the primitives library. */
/* stride is bytes between rows in the output buffer. */
void rfx_decode_rgb(RFX_CONTEXT* context, STREAM* data_in,
int y_size, const UINT32 * y_quants,
int cb_size, const UINT32 * cb_quants,
int cr_size, const UINT32 * cr_quants, BYTE* rgb_buffer)
int y_size, const UINT32* y_quants,
int cb_size, const UINT32* cb_quants,
int cr_size, const UINT32* cr_quants, BYTE* rgb_buffer, int stride)
{
INT16* pSrcDst[3];
static const prim_size_t roi_64x64 = { 64, 64 };
const primitives_t *prims = primitives_get();
PROFILER_ENTER(context->priv->prof_rfx_decode_rgb);
rfx_decode_component(context, y_quants, stream_get_tail(data_in), y_size, context->priv->y_r_buffer); /* YData */
pSrcDst[0] = BufferPool_Take(context->priv->BufferPool, -1); /* y_r_buffer */
pSrcDst[1] = BufferPool_Take(context->priv->BufferPool, -1); /* cb_g_buffer */
pSrcDst[2] = BufferPool_Take(context->priv->BufferPool, -1); /* cr_b_buffer */
rfx_decode_component(context, y_quants, stream_get_tail(data_in), y_size, pSrcDst[0]); /* YData */
stream_seek(data_in, y_size);
rfx_decode_component(context, cb_quants, stream_get_tail(data_in), cb_size, context->priv->cb_g_buffer); /* CbData */
rfx_decode_component(context, cb_quants, stream_get_tail(data_in), cb_size, pSrcDst[1]); /* CbData */
stream_seek(data_in, cb_size);
rfx_decode_component(context, cr_quants, stream_get_tail(data_in), cr_size, context->priv->cr_b_buffer); /* CrData */
rfx_decode_component(context, cr_quants, stream_get_tail(data_in), cr_size, pSrcDst[2]); /* CrData */
stream_seek(data_in, cr_size);
PROFILER_ENTER(context->priv->prof_rfx_decode_ycbcr_to_rgb);
context->decode_ycbcr_to_rgb(context->priv->y_r_buffer, context->priv->cb_g_buffer, context->priv->cr_b_buffer);
PROFILER_EXIT(context->priv->prof_rfx_decode_ycbcr_to_rgb);
prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16),
pSrcDst, 64 * sizeof(INT16), &roi_64x64);
PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb);
rfx_decode_format_rgb(context->priv->y_r_buffer, context->priv->cb_g_buffer, context->priv->cr_b_buffer,
context->pixel_format, rgb_buffer);
rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2],
context->pixel_format, rgb_buffer, stride);
PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb);
PROFILER_EXIT(context->priv->prof_rfx_decode_rgb);
BufferPool_Return(context->priv->BufferPool, pSrcDst[0]);
BufferPool_Return(context->priv->BufferPool, pSrcDst[1]);
BufferPool_Return(context->priv->BufferPool, pSrcDst[2]);
}

View File

@@ -22,12 +22,12 @@
#include <freerdp/codec/rfx.h>
void rfx_decode_ycbcr_to_rgb(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf);
/* stride is bytes between rows in the output buffer. */
void rfx_decode_rgb(RFX_CONTEXT* context, STREAM* data_in,
int y_size, const UINT32 * y_quants,
int cb_size, const UINT32 * cb_quants,
int cr_size, const UINT32 * cr_quants, BYTE* rgb_buffer);
int cr_size, const UINT32 * cr_quants, BYTE* rgb_buffer,
int stride);
#endif /* __RFX_DECODE_H */

View File

@@ -26,6 +26,11 @@
#include <stdlib.h>
#include <string.h>
#include <winpr/crt.h>
#include <winpr/collections.h>
#include <freerdp/primitives.h>
#include "rfx_types.h"
#include "rfx_rlgr.h"
#include "rfx_differential.h"
@@ -180,55 +185,19 @@ static void rfx_encode_format_rgb(const BYTE* rgb_data, int width, int height, i
}
}
void rfx_encode_rgb_to_ycbcr(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf)
{
/* INT32 is used intentionally because we calculate with shifted factors! */
int i;
INT32 r, g, b;
INT32 y, cb, cr;
/**
* The encoded YCbCr coefficients are represented as 11.5 fixed-point numbers:
*
* 1 sign bit + 10 integer bits + 5 fractional bits
*
* However only 7 integer bits will be actually used since the value range is [-128.0, 127.0].
* In other words, the encoded coefficients is scaled by << 5 when interpreted as INT16.
* It will be scaled down to original during the quantization phase.
*/
for (i = 0; i < 4096; i++)
{
r = y_r_buf[i];
g = cb_g_buf[i];
b = cr_b_buf[i];
/*
* We scale the factors by << 15 into 32-bit integers in order to avoid slower
* floating point multiplications. Since the terms need to be scaled by << 5 we
* simply scale the final sum by >> 10
*
* Y: 0.299000 << 15 = 9798, 0.587000 << 15 = 19235, 0.114000 << 15 = 3735
* Cb: 0.168935 << 15 = 5535, 0.331665 << 15 = 10868, 0.500590 << 15 = 16403
* Cr: 0.499813 << 15 = 16377, 0.418531 << 15 = 13714, 0.081282 << 15 = 2663
*/
y = (r * 9798 + g * 19235 + b * 3735) >> 10;
cb = (r * -5535 + g * -10868 + b * 16403) >> 10;
cr = (r * 16377 + g * -13714 + b * -2663) >> 10;
y_r_buf[i] = MINMAX(y - 4096, -4096, 4095);
cb_g_buf[i] = MINMAX(cb, -4096, 4095);
cr_b_buf[i] = MINMAX(cr, -4096, 4095);
}
}
/* rfx_encode_rgb_to_ycbcr code now resides in the primitives library. */
static void rfx_encode_component(RFX_CONTEXT* context, const UINT32* quantization_values,
INT16* data, BYTE* buffer, int buffer_size, int* size)
{
INT16* dwt_buffer;
dwt_buffer = BufferPool_Take(context->priv->BufferPool, -1); /* dwt_buffer */
PROFILER_ENTER(context->priv->prof_rfx_encode_component);
PROFILER_ENTER(context->priv->prof_rfx_dwt_2d_encode);
context->dwt_2d_encode(data, context->priv->dwt_buffer);
context->dwt_2d_encode(data, dwt_buffer);
PROFILER_EXIT(context->priv->prof_rfx_dwt_2d_encode);
PROFILER_ENTER(context->priv->prof_rfx_quantization_encode);
@@ -244,42 +213,54 @@ static void rfx_encode_component(RFX_CONTEXT* context, const UINT32* quantizatio
PROFILER_EXIT(context->priv->prof_rfx_rlgr_encode);
PROFILER_EXIT(context->priv->prof_rfx_encode_component);
BufferPool_Return(context->priv->BufferPool, dwt_buffer);
}
void rfx_encode_rgb(RFX_CONTEXT* context, const BYTE* rgb_data, int width, int height, int rowstride,
const UINT32* y_quants, const UINT32* cb_quants, const UINT32* cr_quants,
STREAM* data_out, int* y_size, int* cb_size, int* cr_size)
{
INT16* y_r_buffer = context->priv->y_r_buffer;
INT16* cb_g_buffer = context->priv->cb_g_buffer;
INT16* cr_b_buffer = context->priv->cr_b_buffer;
INT16* pSrcDst[3];
primitives_t* prims = primitives_get();
static const prim_size_t roi_64x64 = { 64, 64 };
pSrcDst[0] = BufferPool_Take(context->priv->BufferPool, -1); /* y_r_buffer */
pSrcDst[1] = BufferPool_Take(context->priv->BufferPool, -1); /* cb_g_buffer */
pSrcDst[2] = BufferPool_Take(context->priv->BufferPool, -1); /* cr_b_buffer */
PROFILER_ENTER(context->priv->prof_rfx_encode_rgb);
PROFILER_ENTER(context->priv->prof_rfx_encode_format_rgb);
rfx_encode_format_rgb(rgb_data, width, height, rowstride,
context->pixel_format, context->palette, y_r_buffer, cb_g_buffer, cr_b_buffer);
context->pixel_format, context->palette, pSrcDst[0], pSrcDst[1], pSrcDst[2]);
PROFILER_EXIT(context->priv->prof_rfx_encode_format_rgb);
PROFILER_ENTER(context->priv->prof_rfx_encode_rgb_to_ycbcr);
context->encode_rgb_to_ycbcr(context->priv->y_r_buffer, context->priv->cb_g_buffer, context->priv->cr_b_buffer);
PROFILER_EXIT(context->priv->prof_rfx_encode_rgb_to_ycbcr);
PROFILER_ENTER(context->priv->prof_rfx_rgb_to_ycbcr);
prims->RGBToYCbCr_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16),
pSrcDst, 64 * sizeof(INT16), &roi_64x64);
PROFILER_EXIT(context->priv->prof_rfx_rgb_to_ycbcr);
/* Ensure the buffer is reasonably large enough */
stream_check_size(data_out, 4096);
rfx_encode_component(context, y_quants, context->priv->y_r_buffer,
rfx_encode_component(context, y_quants, pSrcDst[0],
stream_get_tail(data_out), stream_get_left(data_out), y_size);
stream_seek(data_out, *y_size);
stream_check_size(data_out, 4096);
rfx_encode_component(context, cb_quants, context->priv->cb_g_buffer,
rfx_encode_component(context, cb_quants, pSrcDst[1],
stream_get_tail(data_out), stream_get_left(data_out), cb_size);
stream_seek(data_out, *cb_size);
stream_check_size(data_out, 4096);
rfx_encode_component(context, cr_quants, context->priv->cr_b_buffer,
rfx_encode_component(context, cr_quants, pSrcDst[2],
stream_get_tail(data_out), stream_get_left(data_out), cr_size);
stream_seek(data_out, *cr_size);
PROFILER_EXIT(context->priv->prof_rfx_encode_rgb);
BufferPool_Return(context->priv->BufferPool, pSrcDst[0]);
BufferPool_Return(context->priv->BufferPool, pSrcDst[1]);
BufferPool_Return(context->priv->BufferPool, pSrcDst[2]);
}

View File

@@ -22,8 +22,6 @@
#include <freerdp/codec/rfx.h>
void rfx_encode_rgb_to_ycbcr(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf);
void rfx_encode_rgb(RFX_CONTEXT* context, const BYTE* rgb_data, int width, int height, int rowstride,
const UINT32* y_quants, const UINT32* cb_quants, const UINT32* cr_quants,
STREAM* data_out, int* y_size, int* cb_size, int* cr_size);

View File

@@ -35,56 +35,7 @@
#include "cpu-features.h"
#endif
void rfx_decode_YCbCr_to_RGB_NEON(INT16 * y_r_buffer, INT16 * cb_g_buffer, INT16 * cr_b_buffer)
{
int16x8_t zero = vdupq_n_s16(0);
int16x8_t max = vdupq_n_s16(255);
int16x8_t y_add = vdupq_n_s16(128);
int16x8_t* y_r_buf = (int16x8_t*) y_r_buffer;
int16x8_t* cb_g_buf = (int16x8_t*) cb_g_buffer;
int16x8_t* cr_b_buf = (int16x8_t*) cr_b_buffer;
int i;
for (i = 0; i < 4096 / 8; i++)
{
int16x8_t y = vld1q_s16((INT16*) &y_r_buf[i]);
y = vaddq_s16(y, y_add);
int16x8_t cr = vld1q_s16((INT16*) &cr_b_buf[i]);
// r = between((y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5)), 0, 255);
int16x8_t r = vaddq_s16(y, cr);
r = vaddq_s16(r, vshrq_n_s16(cr, 2));
r = vaddq_s16(r, vshrq_n_s16(cr, 3));
r = vaddq_s16(r, vshrq_n_s16(cr, 5));
r = vminq_s16(vmaxq_s16(r, zero), max);
vst1q_s16((INT16*)&y_r_buf[i], r);
// cb = cb_g_buf[i];
int16x8_t cb = vld1q_s16((INT16*)&cb_g_buf[i]);
// g = between(y - (cb >> 2) - (cb >> 4) - (cb >> 5) - (cr >> 1) - (cr >> 3) - (cr >> 4) - (cr >> 5), 0, 255);
int16x8_t g = vsubq_s16(y, vshrq_n_s16(cb, 2));
g = vsubq_s16(g, vshrq_n_s16(cb, 4));
g = vsubq_s16(g, vshrq_n_s16(cb, 5));
g = vsubq_s16(g, vshrq_n_s16(cr, 1));
g = vsubq_s16(g, vshrq_n_s16(cr, 3));
g = vsubq_s16(g, vshrq_n_s16(cr, 4));
g = vsubq_s16(g, vshrq_n_s16(cr, 5));
g = vminq_s16(vmaxq_s16(g, zero), max);
vst1q_s16((INT16*)&cb_g_buf[i], g);
// b = between((y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6)), 0, 255);
int16x8_t b = vaddq_s16(y, cb);
b = vaddq_s16(b, vshrq_n_s16(cb, 1));
b = vaddq_s16(b, vshrq_n_s16(cb, 2));
b = vaddq_s16(b, vshrq_n_s16(cb, 6));
b = vminq_s16(vmaxq_s16(b, zero), max);
vst1q_s16((INT16*)&cr_b_buf[i], b);
}
}
/* rfx_decode_YCbCr_to_RGB_NEON code now resides in the primitives library. */
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
rfx_quantization_decode_block_NEON(INT16 * buffer, const int buffer_size, const UINT32 factor)
@@ -338,11 +289,10 @@ void rfx_init_neon(RFX_CONTEXT * context)
{
DEBUG_RFX("Using NEON optimizations");
IF_PROFILER(context->priv->prof_rfx_decode_ycbcr_to_rgb->name = "rfx_decode_YCbCr_to_RGB_NEON");
IF_PROFILER(context->priv->prof_rfx_ycbcr_to_rgb->name = "rfx_decode_YCbCr_to_RGB_NEON");
IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_NEON");
IF_PROFILER(context->priv->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_NEON");
context->decode_ycbcr_to_rgb = rfx_decode_YCbCr_to_RGB_NEON;
context->quantization_decode = rfx_quantization_decode_NEON;
context->dwt_2d_decode = rfx_dwt_2d_decode_NEON;
}

View File

@@ -1,121 +0,0 @@
/**
* FreeRDP: A Remote Desktop Protocol Implementation
* RemoteFX Codec Library - Memory Pool
*
* Copyright 2011 Marc-Andre Moreau <marcandre.moreau@gmail.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <winpr/crt.h>
#include "rfx_pool.h"
RFX_POOL* rfx_pool_new()
{
RFX_POOL* pool;
pool = (RFX_POOL*) malloc(sizeof(RFX_POOL));
ZeroMemory(pool, sizeof(RFX_POOL));
pool->size = 64;
pool->tiles = (RFX_TILE**) malloc(sizeof(RFX_TILE*) * pool->size);
ZeroMemory(pool->tiles, sizeof(RFX_TILE*) * pool->size);
return pool;
}
void rfx_pool_free(RFX_POOL* pool)
{
int i;
RFX_TILE* tile;
for (i = 0; i < pool->count; i++)
{
tile = pool->tiles[i];
if (tile != NULL)
{
if (tile->data != NULL)
free(tile->data);
free(tile);
}
}
free(pool->tiles);
free(pool);
}
void rfx_pool_put_tile(RFX_POOL* pool, RFX_TILE* tile)
{
if (pool->count >= pool->size)
{
pool->size *= 2;
pool->tiles = (RFX_TILE**) realloc((void*) pool->tiles, sizeof(RFX_TILE*) * pool->size);
}
pool->tiles[(pool->count)++] = tile;
}
RFX_TILE* rfx_pool_get_tile(RFX_POOL* pool)
{
RFX_TILE* tile;
if (pool->count < 1)
{
tile = (RFX_TILE*) malloc(sizeof(RFX_TILE));
ZeroMemory(tile, sizeof(RFX_TILE));
tile->data = (BYTE*) malloc(4096 * 4); /* 64x64 * 4 */
}
else
{
tile = pool->tiles[--(pool->count)];
}
return tile;
}
void rfx_pool_put_tiles(RFX_POOL* pool, RFX_TILE** tiles, int count)
{
int i;
for (i = 0; i < count; i++)
{
rfx_pool_put_tile(pool, tiles[i]);
}
}
RFX_TILE** rfx_pool_get_tiles(RFX_POOL* pool, int count)
{
int i;
RFX_TILE** tiles;
tiles = (RFX_TILE**) malloc(sizeof(RFX_TILE*) * count);
for (i = 0; i < count; i++)
{
tiles[i] = rfx_pool_get_tile(pool);
}
return tiles;
}

View File

@@ -1,40 +0,0 @@
/**
* FreeRDP: A Remote Desktop Protocol Implementation
* RemoteFX Codec Library - Memory Pool
*
* Copyright 2011 Marc-Andre Moreau <marcandre.moreau@gmail.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __RFX_POOL_H
#define __RFX_POOL_H
#include <freerdp/codec/rfx.h>
struct _RFX_POOL
{
int size;
int count;
RFX_TILE** tiles;
};
typedef struct _RFX_POOL RFX_POOL;
RFX_POOL* rfx_pool_new();
void rfx_pool_free(RFX_POOL* pool);
void rfx_pool_put_tile(RFX_POOL* pool, RFX_TILE* tile);
RFX_TILE* rfx_pool_get_tile(RFX_POOL* pool);
void rfx_pool_put_tiles(RFX_POOL* pool, RFX_TILE** tiles, int count);
RFX_TILE** rfx_pool_get_tiles(RFX_POOL* pool, int count);
#endif /* __RFX_POOL_H */

View File

@@ -21,36 +21,34 @@
#include "config.h"
#endif
#include <freerdp/primitives.h>
#include "rfx_quantization.h"
static void rfx_quantization_decode_block(INT16* buffer, int buffer_size, UINT32 factor)
static void rfx_quantization_decode_block(const primitives_t *prims, INT16* buffer, int buffer_size, UINT32 factor)
{
INT16* dst;
if (factor == 0)
return;
for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
{
*dst <<= factor;
}
prims->lShiftC_16s(buffer, factor, buffer, buffer_size);
}
void rfx_quantization_decode(INT16* buffer, const UINT32* quantization_values)
{
/* Scale the values so that they are represented as 11.5 fixed-point number */
rfx_quantization_decode_block(buffer, 4096, 5);
const primitives_t *prims = primitives_get();
rfx_quantization_decode_block(buffer, 1024, quantization_values[8] - 6); /* HL1 */
rfx_quantization_decode_block(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
rfx_quantization_decode_block(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
rfx_quantization_decode_block(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
rfx_quantization_decode_block(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
rfx_quantization_decode_block(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
rfx_quantization_decode_block(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
rfx_quantization_decode_block(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
rfx_quantization_decode_block(buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */
rfx_quantization_decode_block(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
/* Scale the values so that they are represented as 11.5 fixed-point number */
rfx_quantization_decode_block(prims, buffer, 4096, 5);
rfx_quantization_decode_block(prims, buffer, 1024, quantization_values[8] - 6); /* HL1 */
rfx_quantization_decode_block(prims, buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
rfx_quantization_decode_block(prims, buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
rfx_quantization_decode_block(prims, buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
rfx_quantization_decode_block(prims, buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
rfx_quantization_decode_block(prims, buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
rfx_quantization_decode_block(prims, buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
rfx_quantization_decode_block(prims, buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
rfx_quantization_decode_block(prims, buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */
rfx_quantization_decode_block(prims, buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
}
static void rfx_quantization_encode_block(INT16* buffer, int buffer_size, UINT32 factor)
@@ -62,6 +60,7 @@ static void rfx_quantization_encode_block(INT16* buffer, int buffer_size, UINT32
return;
half = (1 << (factor - 1));
/* Could probably use prims->rShiftC_16s(dst+half, factor, dst, buffer_size); */
for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
{
*dst = (*dst + half) >> factor;

View File

@@ -52,177 +52,8 @@ _mm_prefetch_buffer(char * buffer, int num_bytes)
}
}
static void rfx_decode_ycbcr_to_rgb_sse2(INT16* y_r_buffer, INT16* cb_g_buffer, INT16* cr_b_buffer)
{
__m128i zero = _mm_setzero_si128();
__m128i max = _mm_set1_epi16(255);
__m128i* y_r_buf = (__m128i*) y_r_buffer;
__m128i* cb_g_buf = (__m128i*) cb_g_buffer;
__m128i* cr_b_buf = (__m128i*) cr_b_buffer;
__m128i y;
__m128i cr;
__m128i cb;
__m128i r;
__m128i g;
__m128i b;
int i;
__m128i r_cr = _mm_set1_epi16(22986); // 1.403 << 14
__m128i g_cb = _mm_set1_epi16(-5636); // -0.344 << 14
__m128i g_cr = _mm_set1_epi16(-11698); // -0.714 << 14
__m128i b_cb = _mm_set1_epi16(28999); // 1.770 << 14
__m128i c4096 = _mm_set1_epi16(4096);
for (i = 0; i < (4096 * sizeof(INT16) / sizeof(__m128i)); i += (CACHE_LINE_BYTES / sizeof(__m128i)))
{
_mm_prefetch((char*)(&y_r_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cb_g_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cr_b_buf[i]), _MM_HINT_NTA);
}
for (i = 0; i < (4096 * sizeof(INT16) / sizeof(__m128i)); i++)
{
/*
In order to use SSE2 signed 16-bit integer multiplication we need to convert
the floating point factors to signed int without loosing information.
The result of this multiplication is 32 bit and we have two SSE instructions
that return either the hi or lo word.
Thus we will multiply the factors by the highest possible 2^n, take the
upper 16 bits of the signed 32-bit result (_mm_mulhi_epi16) and correct this
result by multiplying it by 2^(16-n).
For the given factors in the conversion matrix the best possible n is 14.
Example for calculating r:
r = (y>>5) + 128 + (cr*1.403)>>5 // our base formula
r = (y>>5) + 128 + (HIWORD(cr*(1.403<<14)<<2))>>5 // see above
r = (y+4096)>>5 + (HIWORD(cr*22986)<<2)>>5 // simplification
r = ((y+4096)>>2 + HIWORD(cr*22986)) >> 3
*/
/* y = (y_r_buf[i] + 4096) >> 2 */
y = _mm_load_si128(&y_r_buf[i]);
y = _mm_add_epi16(y, c4096);
y = _mm_srai_epi16(y, 2);
/* cb = cb_g_buf[i]; */
cb = _mm_load_si128(&cb_g_buf[i]);
/* cr = cr_b_buf[i]; */
cr = _mm_load_si128(&cr_b_buf[i]);
/* (y + HIWORD(cr*22986)) >> 3 */
r = _mm_add_epi16(y, _mm_mulhi_epi16(cr, r_cr));
r = _mm_srai_epi16(r, 3);
/* y_r_buf[i] = MINMAX(r, 0, 255); */
_mm_between_epi16(r, zero, max);
_mm_store_si128(&y_r_buf[i], r);
/* (y + HIWORD(cb*-5636) + HIWORD(cr*-11698)) >> 3 */
g = _mm_add_epi16(y, _mm_mulhi_epi16(cb, g_cb));
g = _mm_add_epi16(g, _mm_mulhi_epi16(cr, g_cr));
g = _mm_srai_epi16(g, 3);
/* cb_g_buf[i] = MINMAX(g, 0, 255); */
_mm_between_epi16(g, zero, max);
_mm_store_si128(&cb_g_buf[i], g);
/* (y + HIWORD(cb*28999)) >> 3 */
b = _mm_add_epi16(y, _mm_mulhi_epi16(cb, b_cb));
b = _mm_srai_epi16(b, 3);
/* cr_b_buf[i] = MINMAX(b, 0, 255); */
_mm_between_epi16(b, zero, max);
_mm_store_si128(&cr_b_buf[i], b);
}
}
/* The encodec YCbCr coeffectients are represented as 11.5 fixed-point numbers. See rfx_encode.c */
static void rfx_encode_rgb_to_ycbcr_sse2(INT16* y_r_buffer, INT16* cb_g_buffer, INT16* cr_b_buffer)
{
__m128i min = _mm_set1_epi16(-128 << 5);
__m128i max = _mm_set1_epi16(127 << 5);
__m128i* y_r_buf = (__m128i*) y_r_buffer;
__m128i* cb_g_buf = (__m128i*) cb_g_buffer;
__m128i* cr_b_buf = (__m128i*) cr_b_buffer;
__m128i y;
__m128i cr;
__m128i cb;
__m128i r;
__m128i g;
__m128i b;
__m128i y_r = _mm_set1_epi16(9798); // 0.299000 << 15
__m128i y_g = _mm_set1_epi16(19235); // 0.587000 << 15
__m128i y_b = _mm_set1_epi16(3735); // 0.114000 << 15
__m128i cb_r = _mm_set1_epi16(-5535); // -0.168935 << 15
__m128i cb_g = _mm_set1_epi16(-10868); // -0.331665 << 15
__m128i cb_b = _mm_set1_epi16(16403); // 0.500590 << 15
__m128i cr_r = _mm_set1_epi16(16377); // 0.499813 << 15
__m128i cr_g = _mm_set1_epi16(-13714); // -0.418531 << 15
__m128i cr_b = _mm_set1_epi16(-2663); // -0.081282 << 15
int i;
for (i = 0; i < (4096 * sizeof(INT16) / sizeof(__m128i)); i += (CACHE_LINE_BYTES / sizeof(__m128i)))
{
_mm_prefetch((char*)(&y_r_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cb_g_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cr_b_buf[i]), _MM_HINT_NTA);
}
for (i = 0; i < (4096 * sizeof(INT16) / sizeof(__m128i)); i++)
{
/*
In order to use SSE2 signed 16-bit integer multiplication we need to convert
the floating point factors to signed int without loosing information.
The result of this multiplication is 32 bit and using SSE2 we get either the
product's hi or lo word.
Thus we will multiply the factors by the highest possible 2^n and take the
upper 16 bits of the signed 32-bit result (_mm_mulhi_epi16).
Since the final result needs to be scaled by << 5 and also in in order to keep
the precision within the upper 16 bits we will also have to scale the RGB
values used in the multiplication by << 5+(16-n).
*/
/* r = y_r_buf[i]; */
r = _mm_load_si128(&y_r_buf[i]);
/* g = cb_g_buf[i]; */
g = _mm_load_si128(&cb_g_buf[i]);
/* b = cr_b_buf[i]; */
b = _mm_load_si128(&cr_b_buf[i]);
/* r<<6; g<<6; b<<6 */
r = _mm_slli_epi16(r, 6);
g = _mm_slli_epi16(g, 6);
b = _mm_slli_epi16(b, 6);
/* y = HIWORD(r*y_r) + HIWORD(g*y_g) + HIWORD(b*y_b) + min */
y = _mm_mulhi_epi16(r, y_r);
y = _mm_add_epi16(y, _mm_mulhi_epi16(g, y_g));
y = _mm_add_epi16(y, _mm_mulhi_epi16(b, y_b));
y = _mm_add_epi16(y, min);
/* y_r_buf[i] = MINMAX(y, 0, (255 << 5)) - (128 << 5); */
_mm_between_epi16(y, min, max);
_mm_store_si128(&y_r_buf[i], y);
/* cb = HIWORD(r*cb_r) + HIWORD(g*cb_g) + HIWORD(b*cb_b) */
cb = _mm_mulhi_epi16(r, cb_r);
cb = _mm_add_epi16(cb, _mm_mulhi_epi16(g, cb_g));
cb = _mm_add_epi16(cb, _mm_mulhi_epi16(b, cb_b));
/* cb_g_buf[i] = MINMAX(cb, (-128 << 5), (127 << 5)); */
_mm_between_epi16(cb, min, max);
_mm_store_si128(&cb_g_buf[i], cb);
/* cr = HIWORD(r*cr_r) + HIWORD(g*cr_g) + HIWORD(b*cr_b) */
cr = _mm_mulhi_epi16(r, cr_r);
cr = _mm_add_epi16(cr, _mm_mulhi_epi16(g, cr_g));
cr = _mm_add_epi16(cr, _mm_mulhi_epi16(b, cr_b));
/* cr_b_buf[i] = MINMAX(cr, (-128 << 5), (127 << 5)); */
_mm_between_epi16(cr, min, max);
_mm_store_si128(&cr_b_buf[i], cr);
}
}
/* rfx_decode_ycbcr_to_rgb_sse2 code now resides in the primitives library. */
/* rfx_encode_rgb_to_ycbcr_sse2 code now resides in the primitives library. */
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
rfx_quantization_decode_block_sse2(INT16* buffer, const int buffer_size, const UINT32 factor)
@@ -658,15 +489,11 @@ void rfx_init_sse2(RFX_CONTEXT* context)
{
DEBUG_RFX("Using SSE2 optimizations");
IF_PROFILER(context->priv->prof_rfx_decode_ycbcr_to_rgb->name = "rfx_decode_ycbcr_to_rgb_sse2");
IF_PROFILER(context->priv->prof_rfx_encode_rgb_to_ycbcr->name = "rfx_encode_rgb_to_ycbcr_sse2");
IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_sse2");
IF_PROFILER(context->priv->prof_rfx_quantization_encode->name = "rfx_quantization_encode_sse2");
IF_PROFILER(context->priv->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_sse2");
IF_PROFILER(context->priv->prof_rfx_dwt_2d_encode->name = "rfx_dwt_2d_encode_sse2");
context->decode_ycbcr_to_rgb = rfx_decode_ycbcr_to_rgb_sse2;
context->encode_rgb_to_ycbcr = rfx_encode_rgb_to_ycbcr_sse2;
context->quantization_decode = rfx_quantization_decode_sse2;
context->quantization_encode = rfx_quantization_encode_sse2;
context->dwt_2d_decode = rfx_dwt_2d_decode_sse2;

View File

@@ -24,6 +24,10 @@
#include "config.h"
#endif
#include <winpr/crt.h>
#include <winpr/pool.h>
#include <winpr/collections.h>
#include <freerdp/utils/debug.h>
#include <freerdp/utils/profiler.h>
@@ -33,25 +37,19 @@
#define DEBUG_RFX(fmt, ...) DEBUG_NULL(fmt, ## __VA_ARGS__)
#endif
#include "rfx_pool.h"
struct _RFX_CONTEXT_PRIV
{
/* pre-allocated buffers */
wQueue* TilePool;
wQueue* TileQueue;
RFX_POOL* pool; /* memory pool */
BOOL UseThreads;
DWORD MinThreadCount;
DWORD MaxThreadCount;
INT16 y_r_mem[4096 + 8]; /* 4096 = 64x64 (+ 8x2 = 16 for mem align) */
INT16 cb_g_mem[4096 + 8]; /* 4096 = 64x64 (+ 8x2 = 16 for mem align) */
INT16 cr_b_mem[4096 + 8]; /* 4096 = 64x64 (+ 8x2 = 16 for mem align) */
PTP_POOL ThreadPool;
TP_CALLBACK_ENVIRON ThreadPoolEnv;
INT16* y_r_buffer;
INT16* cb_g_buffer;
INT16* cr_b_buffer;
INT16 dwt_mem[32 * 32 * 2 * 2 + 8]; /* maximum sub-band width is 32 */
INT16* dwt_buffer;
wBufferPool* BufferPool;
/* profilers */
PROFILER_DEFINE(prof_rfx_decode_rgb);
@@ -60,7 +58,7 @@ struct _RFX_CONTEXT_PRIV
PROFILER_DEFINE(prof_rfx_differential_decode);
PROFILER_DEFINE(prof_rfx_quantization_decode);
PROFILER_DEFINE(prof_rfx_dwt_2d_decode);
PROFILER_DEFINE(prof_rfx_decode_ycbcr_to_rgb);
PROFILER_DEFINE(prof_rfx_ycbcr_to_rgb);
PROFILER_DEFINE(prof_rfx_decode_format_rgb);
PROFILER_DEFINE(prof_rfx_encode_rgb);
@@ -69,7 +67,7 @@ struct _RFX_CONTEXT_PRIV
PROFILER_DEFINE(prof_rfx_differential_encode);
PROFILER_DEFINE(prof_rfx_quantization_encode);
PROFILER_DEFINE(prof_rfx_dwt_2d_encode);
PROFILER_DEFINE(prof_rfx_encode_rgb_to_ycbcr);
PROFILER_DEFINE(prof_rfx_rgb_to_ycbcr);
PROFILER_DEFINE(prof_rfx_encode_format_rgb);
};

View File

@@ -145,9 +145,7 @@ BOOL rdp_send_server_control_cooperate_pdu(rdpRdp* rdp)
stream_write_UINT16(s, 0); /* grantId (2 bytes) */
stream_write_UINT32(s, 0); /* controlId (4 bytes) */
rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_CONTROL, rdp->mcs->user_id);
return TRUE;
return rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_CONTROL, rdp->mcs->user_id);
}
BOOL rdp_send_server_control_granted_pdu(rdpRdp* rdp)
@@ -160,9 +158,7 @@ BOOL rdp_send_server_control_granted_pdu(rdpRdp* rdp)
stream_write_UINT16(s, rdp->mcs->user_id); /* grantId (2 bytes) */
stream_write_UINT32(s, 0x03EA); /* controlId (4 bytes) */
rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_CONTROL, rdp->mcs->user_id);
return TRUE;
return rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_CONTROL, rdp->mcs->user_id);
}
BOOL rdp_send_client_control_pdu(rdpRdp* rdp, UINT16 action)
@@ -288,9 +284,17 @@ BOOL rdp_recv_deactivate_all(rdpRdp* rdp, STREAM* s)
*/
if (stream_get_left(s) > 0)
{
stream_read_UINT32(s, rdp->settings->ShareId); /* shareId (4 bytes) */
stream_read_UINT16(s, lengthSourceDescriptor); /* lengthSourceDescriptor (2 bytes) */
stream_seek(s, lengthSourceDescriptor); /* sourceDescriptor (should be 0x00) */
do {
if(stream_get_left(s) < 4)
break;
stream_read_UINT32(s, rdp->settings->ShareId); /* shareId (4 bytes) */
if(stream_get_left(s) < 2)
break;
stream_read_UINT16(s, lengthSourceDescriptor); /* lengthSourceDescriptor (2 bytes) */
if(stream_get_left(s) < lengthSourceDescriptor)
break;
stream_seek(s, lengthSourceDescriptor); /* sourceDescriptor (should be 0x00) */
} while(0);
}
rdp->state = CONNECTION_STATE_CAPABILITY;

View File

@@ -2147,9 +2147,7 @@ BOOL rdp_send_demand_active(rdpRdp* rdp)
rdp_write_demand_active(s, rdp->settings);
rdp_send_pdu(rdp, s, PDU_TYPE_DEMAND_ACTIVE, rdp->mcs->user_id);
return TRUE;
return rdp_send_pdu(rdp, s, PDU_TYPE_DEMAND_ACTIVE, rdp->mcs->user_id);
}
BOOL rdp_recv_confirm_active(rdpRdp* rdp, STREAM* s)
@@ -2305,6 +2303,10 @@ void rdp_write_confirm_active(STREAM* s, rdpSettings* settings)
rdp_write_frame_acknowledge_capability_set(s, settings);
}
}
else
{
settings->FrameAcknowledge = 0;
}
if (settings->ReceivedCapabilities[CAPSET_TYPE_BITMAP_CACHE_V3_CODEC_ID])
{

View File

@@ -123,6 +123,29 @@
*
*/
static const char *certificate_read_errors[] = {
"Certificate tag",
"TBSCertificate",
"Explicit Contextual Tag [0]",
"version",
"CertificateSerialNumber",
"AlgorithmIdentifier",
"Issuer Name",
"Validity",
"Subject Name",
"SubjectPublicKeyInfo Tag",
"subjectPublicKeyInfo::AlgorithmIdentifier",
"subjectPublicKeyInfo::subjectPublicKey",
"RSAPublicKey Tag",
"modulusLength",
"zero padding",
"modulusLength",
"modulus",
"publicExponent length",
"publicExponent"
};
/**
* Read X.509 Certificate
* @param certificate certificate module
@@ -137,88 +160,105 @@ BOOL certificate_read_x509_certificate(rdpCertBlob* cert, rdpCertInfo* info)
UINT32 version;
int modulus_length;
int exponent_length;
int error = 0;
s = stream_new(0);
stream_attach(s, cert->data, cert->length);
info->Modulus = 0;
if(!ber_read_sequence_tag(s, &length)) /* Certificate (SEQUENCE) */
goto error1;
error++;
if(!ber_read_sequence_tag(s, &length)) /* TBSCertificate (SEQUENCE) */
goto error1;
error++;
/* Explicit Contextual Tag [0] */
if(!ber_read_contextual_tag(s, 0, &length, TRUE))
if(!ber_read_contextual_tag(s, 0, &length, TRUE)) /* Explicit Contextual Tag [0] */
goto error1;
error++;
if(!ber_read_integer(s, &version)) /* version (INTEGER) */
goto error1;
error++;
version++;
/* serialNumber */
if(!ber_read_integer(s, NULL)) /* CertificateSerialNumber (INTEGER) */
goto error1;
error++;
/* signature */
if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* AlgorithmIdentifier (SEQUENCE) */
goto error1;
error++;
/* issuer */
if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* Name (SEQUENCE) */
goto error1;
error++;
/* validity */
if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* Validity (SEQUENCE) */
goto error1;
error++;
/* subject */
if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* Name (SEQUENCE) */
goto error1;
error++;
/* subjectPublicKeyInfo */
if(!ber_read_sequence_tag(s, &length)) /* SubjectPublicKeyInfo (SEQUENCE) */
goto error1;
error++;
/* subjectPublicKeyInfo::AlgorithmIdentifier */
if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* AlgorithmIdentifier (SEQUENCE) */
goto error1;
error++;
/* subjectPublicKeyInfo::subjectPublicKey */
if(!ber_read_bit_string(s, &length, &padding)) /* BIT_STRING */
goto error1;
error++;
/* RSAPublicKey (SEQUENCE) */
if(!ber_read_sequence_tag(s, &length)) /* SEQUENCE */
goto error1;
error++;
if(!ber_read_integer_length(s, &modulus_length)) /* modulus (INTEGER) */
goto error1;
error++;
/* skip zero padding, if any */
do
{
if(stream_get_left(s) < padding)
if(stream_get_left(s) < 1)
goto error1;
stream_peek_BYTE(s, padding);
if (padding == 0)
{
if(stream_get_left(s) < 1)
if(!stream_skip(s, 1))
goto error1;
stream_seek(s, 1);
modulus_length--;
}
}
while (padding == 0);
error++;
if(stream_get_left(s) < modulus_length)
goto error1;
info->ModulusLength = modulus_length;
info->Modulus = (BYTE*) malloc(info->ModulusLength);
stream_read(s, info->Modulus, info->ModulusLength);
error++;
if(!ber_read_integer_length(s, &exponent_length)) /* publicExponent (INTEGER) */
goto error2;
if(stream_get_left(s) < exponent_length)
error++;
if(stream_get_left(s) < exponent_length || exponent_length > 4)
goto error2;
stream_read(s, &info->exponent[4 - exponent_length], exponent_length);
crypto_reverse(info->Modulus, info->ModulusLength);
@@ -232,6 +272,7 @@ error2:
free(info->Modulus);
info->Modulus = 0;
error1:
printf("error reading when reading certificate: part=%s error=%d\n", certificate_read_errors[error], error);
stream_detach(s);
stream_free(s);
return FALSE;
@@ -490,9 +531,13 @@ BOOL certificate_read_server_x509_certificate_chain(rdpCertificate* certificate,
DEBUG_CERTIFICATE("License Server Certificate");
ret = certificate_read_x509_certificate(&certificate->x509_cert_chain->array[i], &cert_info);
DEBUG_LICENSE("modulus length:%d", (int) cert_info.ModulusLength);
free(cert_info.Modulus);
if(!ret)
if (cert_info.Modulus)
free(cert_info.Modulus);
if (!ret) {
printf("failed to read License Server, content follows:\n");
winpr_HexDump(certificate->x509_cert_chain->array[i].data, certificate->x509_cert_chain->array[i].length);
return FALSE;
}
}
else if (numCertBlobs - i == 1)
{

View File

@@ -337,7 +337,8 @@ static BOOL rdp_server_establish_keys(rdpRdp* rdp, STREAM* s)
return FALSE;
}
rdp_read_security_header(s, &sec_flags);
if (!rdp_read_security_header(s, &sec_flags))
return FALSE;
if ((sec_flags & SEC_EXCHANGE_PKT) == 0)
{
@@ -345,7 +346,12 @@ static BOOL rdp_server_establish_keys(rdpRdp* rdp, STREAM* s)
return FALSE;
}
if(stream_get_left(s) < 4)
return FALSE;
stream_read_UINT32(s, rand_len);
if(stream_get_left(s) < rand_len + 8) /* include 8 bytes of padding */
return FALSE;
key_len = rdp->settings->RdpServerRsaKey->ModulusLength;
if (rand_len != key_len + 8)
@@ -547,9 +553,7 @@ BOOL rdp_client_connect_demand_active(rdpRdp* rdp, STREAM* s)
rdp->state = CONNECTION_STATE_FINALIZATION;
update_reset_state(rdp->update);
rdp_client_connect_finalize(rdp);
return TRUE;
return rdp_client_connect_finalize(rdp);
}
BOOL rdp_client_connect_finalize(rdpRdp* rdp)

View File

@@ -198,8 +198,9 @@ static BOOL fastpath_recv_update_synchronize(rdpFastPath* fastpath, STREAM* s)
return TRUE;
}
static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32 size, STREAM* s)
static int fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32 size, STREAM* s)
{
int status = 0;
rdpUpdate* update = fastpath->rdp->update;
rdpContext* context = fastpath->rdp->update->context;
rdpPointerUpdate* pointer = update->pointer;
@@ -213,13 +214,13 @@ static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32
{
case FASTPATH_UPDATETYPE_ORDERS:
if (!fastpath_recv_orders(fastpath, s))
return FALSE;
return -1;
break;
case FASTPATH_UPDATETYPE_BITMAP:
case FASTPATH_UPDATETYPE_PALETTE:
if(!fastpath_recv_update_common(fastpath, s))
return FALSE;
if (!fastpath_recv_update_common(fastpath, s))
return -1;
break;
case FASTPATH_UPDATETYPE_SYNCHRONIZE:
@@ -230,8 +231,7 @@ static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32
break;
case FASTPATH_UPDATETYPE_SURFCMDS:
if (update_recv_surfcmds(update, size, s) < 0)
return FALSE;
status = update_recv_surfcmds(update, size, s);
break;
case FASTPATH_UPDATETYPE_PTR_NULL:
@@ -246,25 +246,25 @@ static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32
case FASTPATH_UPDATETYPE_PTR_POSITION:
if (!update_read_pointer_position(s, &pointer->pointer_position))
return FALSE;
return -1;
IFCALL(pointer->PointerPosition, context, &pointer->pointer_position);
break;
case FASTPATH_UPDATETYPE_COLOR:
if (!update_read_pointer_color(s, &pointer->pointer_color))
return FALSE;
return -1;
IFCALL(pointer->PointerColor, context, &pointer->pointer_color);
break;
case FASTPATH_UPDATETYPE_CACHED:
if (!update_read_pointer_cached(s, &pointer->pointer_cached))
return FALSE;
return -1;
IFCALL(pointer->PointerCached, context, &pointer->pointer_cached);
break;
case FASTPATH_UPDATETYPE_POINTER:
if (!update_read_pointer_new(s, &pointer->pointer_new))
return FALSE;
return -1;
IFCALL(pointer->PointerNew, context, &pointer->pointer_new);
break;
@@ -273,11 +273,12 @@ static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32
break;
}
return TRUE;
return status;
}
static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s)
{
int status;
UINT16 size;
int next_pos;
UINT32 totalSize;
@@ -287,10 +288,11 @@ static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s)
BYTE compressionFlags;
STREAM* update_stream;
STREAM* comp_stream;
rdpRdp *rdp;
rdpRdp* rdp;
UINT32 roff;
UINT32 rlen;
status = 0;
rdp = fastpath->rdp;
fastpath_read_update_header(s, &updateCode, &fragmentation, &compression);
@@ -301,8 +303,10 @@ static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s)
compressionFlags = 0;
stream_read_UINT16(s, size);
if(stream_get_left(s) < size)
return FALSE;
if (stream_get_left(s) < size)
return -1;
next_pos = stream_get_pos(s) + size;
comp_stream = s;
@@ -348,8 +352,10 @@ static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s)
if (update_stream)
{
if (!fastpath_recv_update(fastpath, updateCode, totalSize, update_stream))
return FALSE;
status = fastpath_recv_update(fastpath, updateCode, totalSize, update_stream);
if (status < 0)
return -1;
}
stream_set_pos(s, next_pos);
@@ -357,24 +363,25 @@ static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s)
if (comp_stream != s)
free(comp_stream);
return TRUE;
return status;
}
int fastpath_recv_updates(rdpFastPath* fastpath, STREAM* s)
{
int status = 0;
rdpUpdate* update = fastpath->rdp->update;
IFCALL(update->BeginPaint, update->context);
while (stream_get_left(s) >= 3)
{
if (!fastpath_recv_update_data(fastpath, s))
if (fastpath_recv_update_data(fastpath, s) < 0)
return -1;
}
IFCALL(update->EndPaint, update->context);
return 0;
return status;
}
static BOOL fastpath_read_input_event_header(STREAM* s, BYTE* eventFlags, BYTE* eventCode)

View File

@@ -488,7 +488,7 @@ void gcc_write_user_data_header(STREAM* s, UINT16 type, UINT16 length)
BOOL gcc_read_client_core_data(STREAM* s, rdpSettings* settings, UINT16 blockLength)
{
char* str;
char* str = NULL;
UINT32 version;
UINT32 color_depth;
UINT16 colorDepth = 0;
@@ -518,6 +518,7 @@ BOOL gcc_read_client_core_data(STREAM* s, rdpSettings* settings, UINT16 blockLen
sprintf_s(settings->ClientHostname, 31, "%s", str);
settings->ClientHostname[31] = 0;
free(str);
str = NULL;
stream_read_UINT32(s, settings->KeyboardType); /* KeyboardType */
stream_read_UINT32(s, settings->KeyboardSubType); /* KeyboardSubType */

View File

@@ -439,7 +439,9 @@ BOOL rdp_recv_client_info(rdpRdp* rdp, STREAM* s)
if (!rdp_read_header(rdp, s, &length, &channelId))
return FALSE;
rdp_read_security_header(s, &securityFlags);
if (!rdp_read_security_header(s, &securityFlags))
return FALSE;
if ((securityFlags & SEC_INFO_PKT) == 0)
return FALSE;

View File

@@ -932,9 +932,7 @@ BOOL license_send_valid_client_error_packet(rdpLicense* license)
license_write_binary_blob(s, license->error_info);
license_send(license, s, ERROR_ALERT);
return TRUE;
return license_send(license, s, ERROR_ALERT);
}
/**

View File

@@ -532,6 +532,7 @@ BOOL mcs_send_connect_response(rdpMcs* mcs)
{
STREAM* s;
int length;
int ret;
BYTE *bm, *em;
STREAM* gcc_CCrsp;
STREAM* server_data;
@@ -556,12 +557,12 @@ BOOL mcs_send_connect_response(rdpMcs* mcs)
tpdu_write_data(s);
stream_set_mark(s, em);
transport_write(mcs->transport, s);
ret = transport_write(mcs->transport, s);
stream_free(gcc_CCrsp);
stream_free(server_data);
return TRUE;
return (ret < 0) ? FALSE : TRUE;
}
/**

View File

@@ -152,14 +152,14 @@ static INLINE BOOL update_read_coord(STREAM* s, INT32* coord, BOOL delta)
if (delta)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, lsi8);
*coord += lsi8;
}
else
{
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, lsi16);
*coord = lsi16;
@@ -171,7 +171,7 @@ static INLINE BOOL update_read_color(STREAM* s, UINT32* color)
{
BYTE byte;
if(stream_get_left(s) < 3)
if (stream_get_left(s) < 3)
return FALSE;
stream_read_BYTE(s, byte);
*color = byte;
@@ -212,13 +212,13 @@ static INLINE BOOL update_read_2byte_unsigned(STREAM* s, UINT32* value)
{
BYTE byte;
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
if (byte & 0x80)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
*value = (byte & 0x7F) << 8;
@@ -237,7 +237,7 @@ static INLINE BOOL update_read_2byte_signed(STREAM* s, INT32* value)
BYTE byte;
BOOL negative;
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
@@ -248,7 +248,7 @@ static INLINE BOOL update_read_2byte_signed(STREAM* s, INT32* value)
if (byte & 0x80)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
*value = (*value << 8) | byte;
@@ -264,12 +264,12 @@ static INLINE BOOL update_read_4byte_unsigned(STREAM* s, UINT32* value)
BYTE byte;
BYTE count;
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
count = (byte & 0xC0) >> 6;
if(stream_get_left(s) < count)
if (stream_get_left(s) < count)
return FALSE;
switch (count)
@@ -312,7 +312,7 @@ static INLINE BOOL update_read_delta(STREAM* s, INT32* value)
{
BYTE byte;
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
@@ -323,7 +323,7 @@ static INLINE BOOL update_read_delta(STREAM* s, INT32* value)
if (byte & 0x80)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
*value = (*value << 8) | byte;
@@ -357,28 +357,28 @@ static INLINE BOOL update_read_brush(STREAM* s, rdpBrush* brush, BYTE fieldFlags
{
if (fieldFlags & ORDER_FIELD_01)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, brush->x);
}
if (fieldFlags & ORDER_FIELD_02)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, brush->y);
}
if (fieldFlags & ORDER_FIELD_03)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, brush->style);
}
if (fieldFlags & ORDER_FIELD_04)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, brush->hatch);
}
@@ -395,7 +395,7 @@ static INLINE BOOL update_read_brush(STREAM* s, rdpBrush* brush, BYTE fieldFlags
if (fieldFlags & ORDER_FIELD_05)
{
if(stream_get_left(s) < 7)
if (stream_get_left(s) < 7)
return FALSE;
brush->data = (BYTE*) brush->p8x8;
stream_read_BYTE(s, brush->data[7]);
@@ -422,7 +422,7 @@ static INLINE BOOL update_read_delta_rects(STREAM* s, DELTA_RECT* rectangles, in
zeroBitsSize = ((number + 1) / 2);
if(stream_get_left(s) < zeroBitsSize)
if (stream_get_left(s) < zeroBitsSize)
return FALSE;
stream_get_mark(s, zeroBits);
stream_seek(s, zeroBitsSize);
@@ -442,7 +442,7 @@ static INLINE BOOL update_read_delta_rects(STREAM* s, DELTA_RECT* rectangles, in
if (~flags & 0x20)
{
if(!update_read_delta(s, &rectangles[i].width))
if (!update_read_delta(s, &rectangles[i].width))
return FALSE;
}
else
@@ -450,7 +450,7 @@ static INLINE BOOL update_read_delta_rects(STREAM* s, DELTA_RECT* rectangles, in
if (~flags & 0x10)
{
if(!update_read_delta(s, &rectangles[i].height))
if (!update_read_delta(s, &rectangles[i].height))
return FALSE;
}
else
@@ -473,7 +473,7 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int
zeroBitsSize = ((number + 3) / 4);
if(stream_get_left(s) < zeroBitsSize)
if (stream_get_left(s) < zeroBitsSize)
return FALSE;
stream_get_mark(s, zeroBits);
stream_seek(s, zeroBitsSize);
@@ -501,8 +501,10 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int
do {\
if (orderInfo->fieldFlags & (1 << (NO-1))) \
{ \
if(stream_get_left(s) < 1) \
if (stream_get_left(s) < 1) {\
printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \
return FALSE; \
} \
stream_read_BYTE(s, TARGET); \
} \
} while(0)
@@ -511,8 +513,10 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int
do {\
if (orderInfo->fieldFlags & (1 << (NO-1))) \
{ \
if(stream_get_left(s) < 2) \
if (stream_get_left(s) < 2) { \
printf("%s: error reading %s or %s\n", __FUNCTION__, #TARGET1, #TARGET2); \
return FALSE; \
} \
stream_read_BYTE(s, TARGET1); \
stream_read_BYTE(s, TARGET2); \
} \
@@ -522,8 +526,10 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int
do {\
if (orderInfo->fieldFlags & (1 << (NO-1))) \
{ \
if(stream_get_left(s) < 2) \
if (stream_get_left(s) < 2) { \
printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \
return FALSE; \
} \
stream_read_UINT16(s, TARGET); \
} \
} while(0)
@@ -531,26 +537,42 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int
do {\
if (orderInfo->fieldFlags & (1 << (NO-1))) \
{ \
if(stream_get_left(s) < 4) \
if (stream_get_left(s) < 4) { \
printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \
return FALSE; \
} \
stream_read_UINT32(s, TARGET); \
} \
} while(0)
#define ORDER_FIELD_COORD(NO, TARGET) \
if ((orderInfo->fieldFlags & (1 << (NO-1))) && !update_read_coord(s, &TARGET, orderInfo->deltaCoordinates)) \
return FALSE
do { \
if ((orderInfo->fieldFlags & (1 << (NO-1))) && !update_read_coord(s, &TARGET, orderInfo->deltaCoordinates)) { \
printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \
return FALSE; \
} \
} while(0)
#define ORDER_FIELD_COLOR(NO, TARGET) \
if ((orderInfo->fieldFlags & (1 << (NO-1))) && !update_read_color(s, &TARGET)) \
return FALSE
do { \
if ((orderInfo->fieldFlags & (1 << (NO-1))) && !update_read_color(s, &TARGET)) { \
printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \
return FALSE; \
} \
} while(0)
#define FIELD_SKIP_BUFFER16(s, TARGET_LEN) \
if(stream_get_left(s) < 2) \
do { \
if (stream_get_left(s) < 2) {\
printf("%s: error reading length %s\n", __FUNCTION__, #TARGET_LEN); \
return FALSE; \
}\
stream_read_UINT16(s, TARGET_LEN); \
if(!stream_skip(s, TARGET_LEN)) \
return FALSE
if (!stream_skip(s, TARGET_LEN)) { \
printf("%s: error skipping %d bytes\n", __FUNCTION__, TARGET_LEN); \
return FALSE; \
} \
} while(0)
/* Primary Drawing Orders */
@@ -599,21 +621,21 @@ BOOL update_read_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, OPAQUE_RECT
if (orderInfo->fieldFlags & ORDER_FIELD_05)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
opaque_rect->color = (opaque_rect->color & 0xFFFFFF00) | byte;
}
if (orderInfo->fieldFlags & ORDER_FIELD_06) {
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
opaque_rect->color = (opaque_rect->color & 0xFFFF00FF) | (byte << 8);
}
if (orderInfo->fieldFlags & ORDER_FIELD_07) {
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
opaque_rect->color = (opaque_rect->color & 0xFF00FFFF) | (byte << 16);
@@ -642,7 +664,7 @@ BOOL update_read_multi_dstblt_order(STREAM* s, ORDER_INFO* orderInfo, MULTI_DSTB
if (orderInfo->fieldFlags & ORDER_FIELD_07)
{
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, multi_dstblt->cbData);
return update_read_delta_rects(s, multi_dstblt->rectangles, multi_dstblt->numRectangles);
@@ -660,17 +682,17 @@ BOOL update_read_multi_patblt_order(STREAM* s, ORDER_INFO* orderInfo, MULTI_PATB
ORDER_FIELD_COLOR(6, multi_patblt->backColor);
ORDER_FIELD_COLOR(7, multi_patblt->foreColor);
if(!update_read_brush(s, &multi_patblt->brush, orderInfo->fieldFlags >> 7))
if (!update_read_brush(s, &multi_patblt->brush, orderInfo->fieldFlags >> 7))
return FALSE;
ORDER_FIELD_BYTE(13, multi_patblt->numRectangles);
if (orderInfo->fieldFlags & ORDER_FIELD_14)
{
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, multi_patblt->cbData);
if(!update_read_delta_rects(s, multi_patblt->rectangles, multi_patblt->numRectangles))
if (!update_read_delta_rects(s, multi_patblt->rectangles, multi_patblt->numRectangles))
return FALSE;
}
return TRUE;
@@ -689,7 +711,7 @@ BOOL update_read_multi_scrblt_order(STREAM* s, ORDER_INFO* orderInfo, MULTI_SCRB
if (orderInfo->fieldFlags & ORDER_FIELD_09)
{
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, multi_scrblt->cbData);
return update_read_delta_rects(s, multi_scrblt->rectangles, multi_scrblt->numRectangles);
@@ -707,7 +729,7 @@ BOOL update_read_multi_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, MULTI
if (orderInfo->fieldFlags & ORDER_FIELD_05)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
multi_opaque_rect->color = (multi_opaque_rect->color & 0xFFFFFF00) | byte;
@@ -715,7 +737,7 @@ BOOL update_read_multi_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, MULTI
if (orderInfo->fieldFlags & ORDER_FIELD_06)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
multi_opaque_rect->color = (multi_opaque_rect->color & 0xFFFF00FF) | (byte << 8);
@@ -723,7 +745,7 @@ BOOL update_read_multi_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, MULTI
if (orderInfo->fieldFlags & ORDER_FIELD_07)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, byte);
multi_opaque_rect->color = (multi_opaque_rect->color & 0xFF00FFFF) | (byte << 16);
@@ -733,7 +755,7 @@ BOOL update_read_multi_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, MULTI
if (orderInfo->fieldFlags & ORDER_FIELD_09)
{
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, multi_opaque_rect->cbData);
return update_read_delta_rects(s, multi_opaque_rect->rectangles, multi_opaque_rect->numRectangles);
@@ -786,7 +808,7 @@ BOOL update_read_polyline_order(STREAM* s, ORDER_INFO* orderInfo, POLYLINE_ORDER
if (orderInfo->fieldFlags & ORDER_FIELD_07)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, polyline->cbData);
@@ -830,7 +852,7 @@ BOOL update_read_mem3blt_order(STREAM* s, ORDER_INFO* orderInfo, MEM3BLT_ORDER*
ORDER_FIELD_COLOR(9, mem3blt->backColor);
ORDER_FIELD_COLOR(10, mem3blt->foreColor);
if(!update_read_brush(s, &mem3blt->brush, orderInfo->fieldFlags >> 10))
if (!update_read_brush(s, &mem3blt->brush, orderInfo->fieldFlags >> 10))
return FALSE;
ORDER_FIELD_UINT16(16, mem3blt->cacheIndex);
@@ -867,7 +889,7 @@ BOOL update_read_glyph_index_order(STREAM* s, ORDER_INFO* orderInfo, GLYPH_INDEX
ORDER_FIELD_UINT16(13, glyph_index->opRight);
ORDER_FIELD_UINT16(14, glyph_index->opBottom);
if(!update_read_brush(s, &glyph_index->brush, orderInfo->fieldFlags >> 14))
if (!update_read_brush(s, &glyph_index->brush, orderInfo->fieldFlags >> 14))
return FALSE;
ORDER_FIELD_UINT16(20, glyph_index->x);
@@ -875,11 +897,11 @@ BOOL update_read_glyph_index_order(STREAM* s, ORDER_INFO* orderInfo, GLYPH_INDEX
if (orderInfo->fieldFlags & ORDER_FIELD_22)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, glyph_index->cbData);
if(stream_get_left(s) < glyph_index->cbData)
if (stream_get_left(s) < glyph_index->cbData)
return FALSE;
memcpy(glyph_index->data, s->p, glyph_index->cbData);
stream_seek(s, glyph_index->cbData);
@@ -908,11 +930,11 @@ BOOL update_read_fast_index_order(STREAM* s, ORDER_INFO* orderInfo, FAST_INDEX_O
if (orderInfo->fieldFlags & ORDER_FIELD_15)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, fast_index->cbData);
if(stream_get_left(s) < fast_index->cbData)
if (stream_get_left(s) < fast_index->cbData)
return FALSE;
memcpy(fast_index->data, s->p, fast_index->cbData);
stream_seek(s, fast_index->cbData);
@@ -944,10 +966,10 @@ BOOL update_read_fast_glyph_order(STREAM* s, ORDER_INFO* orderInfo, FAST_GLYPH_O
if (orderInfo->fieldFlags & ORDER_FIELD_15)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, fast_glyph->cbData);
if(stream_get_left(s) < fast_glyph->cbData)
if (stream_get_left(s) < fast_glyph->cbData)
return FALSE;
memcpy(fast_glyph->data, s->p, fast_glyph->cbData);
phold = s->p;
@@ -959,14 +981,14 @@ BOOL update_read_fast_glyph_order(STREAM* s, ORDER_INFO* orderInfo, FAST_GLYPH_O
/* parse optional glyph data */
glyph = (GLYPH_DATA_V2*) malloc(sizeof(GLYPH_DATA_V2));
glyph->cacheIndex = fast_glyph->data[0];
if(!update_read_2byte_signed(s, &glyph->x) ||
if (!update_read_2byte_signed(s, &glyph->x) ||
!update_read_2byte_signed(s, &glyph->y) ||
!update_read_2byte_unsigned(s, &glyph->cx) ||
!update_read_2byte_unsigned(s, &glyph->cy))
return FALSE;
glyph->cb = ((glyph->cx + 7) / 8) * glyph->cy;
glyph->cb += ((glyph->cb % 4) > 0) ? 4 - (glyph->cb % 4) : 0;
if(stream_get_left(s) < glyph->cb)
if (stream_get_left(s) < glyph->cb)
return FALSE;
glyph->aj = (BYTE*) malloc(glyph->cb);
stream_read(s, glyph->aj, glyph->cb);
@@ -988,7 +1010,7 @@ BOOL update_read_polygon_sc_order(STREAM* s, ORDER_INFO* orderInfo, POLYGON_SC_O
if (orderInfo->fieldFlags & ORDER_FIELD_07)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, polygon_sc->cbData);
@@ -1011,14 +1033,14 @@ BOOL update_read_polygon_cb_order(STREAM* s, ORDER_INFO* orderInfo, POLYGON_CB_O
ORDER_FIELD_COLOR(5, polygon_cb->backColor);
ORDER_FIELD_COLOR(6, polygon_cb->foreColor);
if(!update_read_brush(s, &polygon_cb->brush, orderInfo->fieldFlags >> 6))
if (!update_read_brush(s, &polygon_cb->brush, orderInfo->fieldFlags >> 6))
return FALSE;
ORDER_FIELD_BYTE(12, polygon_cb->numPoints);
if (orderInfo->fieldFlags & ORDER_FIELD_13)
{
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, polygon_cb->cbData);
@@ -1027,7 +1049,7 @@ BOOL update_read_polygon_cb_order(STREAM* s, ORDER_INFO* orderInfo, POLYGON_CB_O
else
polygon_cb->points = (DELTA_POINT*) realloc(polygon_cb->points, sizeof(DELTA_POINT) * polygon_cb->numPoints);
if(!update_read_delta_points(s, polygon_cb->points, polygon_cb->numPoints, polygon_cb->xStart, polygon_cb->yStart))
if (!update_read_delta_points(s, polygon_cb->points, polygon_cb->numPoints, polygon_cb->xStart, polygon_cb->yStart))
return FALSE;
}
@@ -1065,7 +1087,7 @@ BOOL update_read_ellipse_cb_order(STREAM* s, ORDER_INFO* orderInfo, ELLIPSE_CB_O
BOOL update_read_cache_bitmap_order(STREAM* s, CACHE_BITMAP_ORDER* cache_bitmap_order, BOOL compressed, UINT16 flags)
{
if(stream_get_left(s) < 9)
if (stream_get_left(s) < 9)
return FALSE;
stream_read_BYTE(s, cache_bitmap_order->cacheId); /* cacheId (1 byte) */
stream_seek_BYTE(s); /* pad1Octet (1 byte) */
@@ -1080,13 +1102,13 @@ BOOL update_read_cache_bitmap_order(STREAM* s, CACHE_BITMAP_ORDER* cache_bitmap_
if ((flags & NO_BITMAP_COMPRESSION_HDR) == 0)
{
BYTE* bitmapComprHdr = (BYTE*) &(cache_bitmap_order->bitmapComprHdr);
if(stream_get_left(s) < 8)
if (stream_get_left(s) < 8)
return FALSE;
stream_read(s, bitmapComprHdr, 8); /* bitmapComprHdr (8 bytes) */
cache_bitmap_order->bitmapLength -= 8;
}
if(stream_get_left(s) < cache_bitmap_order->bitmapLength)
if (stream_get_left(s) < cache_bitmap_order->bitmapLength)
return FALSE;
stream_get_mark(s, cache_bitmap_order->bitmapDataStream);
@@ -1094,7 +1116,7 @@ BOOL update_read_cache_bitmap_order(STREAM* s, CACHE_BITMAP_ORDER* cache_bitmap_
}
else
{
if(stream_get_left(s) < cache_bitmap_order->bitmapLength)
if (stream_get_left(s) < cache_bitmap_order->bitmapLength)
return FALSE;
stream_get_mark(s, cache_bitmap_order->bitmapDataStream);
@@ -1116,7 +1138,7 @@ BOOL update_read_cache_bitmap_v2_order(STREAM* s, CACHE_BITMAP_V2_ORDER* cache_b
if (cache_bitmap_v2_order->flags & CBR2_PERSISTENT_KEY_PRESENT)
{
if(stream_get_left(s) < 8)
if (stream_get_left(s) < 8)
return FALSE;
stream_read_UINT32(s, cache_bitmap_v2_order->key1); /* key1 (4 bytes) */
stream_read_UINT32(s, cache_bitmap_v2_order->key2); /* key2 (4 bytes) */
@@ -1124,18 +1146,18 @@ BOOL update_read_cache_bitmap_v2_order(STREAM* s, CACHE_BITMAP_V2_ORDER* cache_b
if (cache_bitmap_v2_order->flags & CBR2_HEIGHT_SAME_AS_WIDTH)
{
if(!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapWidth)) /* bitmapWidth */
if (!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapWidth)) /* bitmapWidth */
return FALSE;
cache_bitmap_v2_order->bitmapHeight = cache_bitmap_v2_order->bitmapWidth;
}
else
{
if(!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapWidth) || /* bitmapWidth */
if (!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapWidth) || /* bitmapWidth */
!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapHeight)) /* bitmapHeight */
return FALSE;
}
if(!update_read_4byte_unsigned(s, &cache_bitmap_v2_order->bitmapLength) || /* bitmapLength */
if (!update_read_4byte_unsigned(s, &cache_bitmap_v2_order->bitmapLength) || /* bitmapLength */
!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->cacheIndex)) /* cacheIndex */
return FALSE;
@@ -1146,7 +1168,7 @@ BOOL update_read_cache_bitmap_v2_order(STREAM* s, CACHE_BITMAP_V2_ORDER* cache_b
{
if (!(cache_bitmap_v2_order->flags & CBR2_NO_BITMAP_COMPRESSION_HDR))
{
if(stream_get_left(s) < 8)
if (stream_get_left(s) < 8)
return FALSE;
stream_read_UINT16(s, cache_bitmap_v2_order->cbCompFirstRowSize); /* cbCompFirstRowSize (2 bytes) */
@@ -1156,14 +1178,14 @@ BOOL update_read_cache_bitmap_v2_order(STREAM* s, CACHE_BITMAP_V2_ORDER* cache_b
cache_bitmap_v2_order->bitmapLength = cache_bitmap_v2_order->cbCompMainBodySize;
}
if(stream_get_left(s) < cache_bitmap_v2_order->bitmapLength)
if (stream_get_left(s) < cache_bitmap_v2_order->bitmapLength)
return FALSE;
stream_get_mark(s, cache_bitmap_v2_order->bitmapDataStream);
stream_seek(s, cache_bitmap_v2_order->bitmapLength);
}
else
{
if(stream_get_left(s) < cache_bitmap_v2_order->bitmapLength)
if (stream_get_left(s) < cache_bitmap_v2_order->bitmapLength)
return FALSE;
stream_get_mark(s, cache_bitmap_v2_order->bitmapDataStream);
stream_seek(s, cache_bitmap_v2_order->bitmapLength);
@@ -1183,7 +1205,7 @@ BOOL update_read_cache_bitmap_v3_order(STREAM* s, CACHE_BITMAP_V3_ORDER* cache_b
bitsPerPixelId = (flags & 0x00000078) >> 3;
cache_bitmap_v3_order->bpp = CBR23_BPP[bitsPerPixelId];
if(stream_get_left(s) < 21)
if (stream_get_left(s) < 21)
return FALSE;
stream_read_UINT16(s, cache_bitmap_v3_order->cacheIndex); /* cacheIndex (2 bytes) */
stream_read_UINT32(s, cache_bitmap_v3_order->key1); /* key1 (4 bytes) */
@@ -1199,7 +1221,7 @@ BOOL update_read_cache_bitmap_v3_order(STREAM* s, CACHE_BITMAP_V3_ORDER* cache_b
stream_read_UINT16(s, bitmapData->height); /* height (2 bytes) */
stream_read_UINT32(s, bitmapData->length); /* length (4 bytes) */
if(stream_get_left(s) < bitmapData->length)
if (stream_get_left(s) < bitmapData->length)
return FALSE;
if (bitmapData->data == NULL)
bitmapData->data = (BYTE*) malloc(bitmapData->length);
@@ -1215,12 +1237,12 @@ BOOL update_read_cache_color_table_order(STREAM* s, CACHE_COLOR_TABLE_ORDER* cac
int i;
UINT32* colorTable;
if(stream_get_left(s) < 3)
if (stream_get_left(s) < 3)
return FALSE;
stream_read_BYTE(s, cache_color_table_order->cacheIndex); /* cacheIndex (1 byte) */
stream_read_UINT16(s, cache_color_table_order->numberColors); /* numberColors (2 bytes) */
if(stream_get_left(s) < cache_color_table_order->numberColors * 4)
if (stream_get_left(s) < cache_color_table_order->numberColors * 4)
return FALSE;
colorTable = cache_color_table_order->colorTable;
@@ -1245,7 +1267,7 @@ BOOL update_read_cache_glyph_order(STREAM* s, CACHE_GLYPH_ORDER* cache_glyph_ord
INT16 lsi16;
GLYPH_DATA* glyph;
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_BYTE(s, cache_glyph_order->cacheId); /* cacheId (1 byte) */
stream_read_BYTE(s, cache_glyph_order->cGlyphs); /* cGlyphs (1 byte) */
@@ -1258,7 +1280,7 @@ BOOL update_read_cache_glyph_order(STREAM* s, CACHE_GLYPH_ORDER* cache_glyph_ord
}
glyph = cache_glyph_order->glyphData[i];
if(stream_get_left(s) < 10)
if (stream_get_left(s) < 10)
return FALSE;
stream_read_UINT16(s, glyph->cacheIndex);
stream_read_UINT16(s, lsi16);
@@ -1271,7 +1293,7 @@ BOOL update_read_cache_glyph_order(STREAM* s, CACHE_GLYPH_ORDER* cache_glyph_ord
glyph->cb = ((glyph->cx + 7) / 8) * glyph->cy;
glyph->cb += ((glyph->cb % 4) > 0) ? 4 - (glyph->cb % 4) : 0;
if(stream_get_left(s) < glyph->cb)
if (stream_get_left(s) < glyph->cb)
return FALSE;
glyph->aj = (BYTE*) malloc(glyph->cb);
stream_read(s, glyph->aj, glyph->cb);
@@ -1300,11 +1322,11 @@ BOOL update_read_cache_glyph_v2_order(STREAM* s, CACHE_GLYPH_V2_ORDER* cache_gly
}
glyph = cache_glyph_v2_order->glyphData[i];
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, glyph->cacheIndex);
if(!update_read_2byte_signed(s, &glyph->x) ||
if (!update_read_2byte_signed(s, &glyph->x) ||
!update_read_2byte_signed(s, &glyph->y) ||
!update_read_2byte_unsigned(s, &glyph->cx) ||
!update_read_2byte_unsigned(s, &glyph->cy))
@@ -1315,7 +1337,7 @@ BOOL update_read_cache_glyph_v2_order(STREAM* s, CACHE_GLYPH_V2_ORDER* cache_gly
glyph->cb = ((glyph->cx + 7) / 8) * glyph->cy;
glyph->cb += ((glyph->cb % 4) > 0) ? 4 - (glyph->cb % 4) : 0;
if(stream_get_left(s) < glyph->cb)
if (stream_get_left(s) < glyph->cb)
return FALSE;
glyph->aj = (BYTE*) malloc(glyph->cb);
stream_read(s, glyph->aj, glyph->cb);
@@ -1338,7 +1360,7 @@ BOOL update_decompress_brush(STREAM* s, BYTE* output, BYTE bpp)
palette = s->p + 16;
bytesPerPixel = ((bpp + 1) / 8);
if(stream_get_left(s) < 16) // 64 / 4
if (stream_get_left(s) < 16) // 64 / 4
return FALSE;
for (y = 7; y >= 0; y--)
@@ -1366,7 +1388,7 @@ BOOL update_read_cache_brush_order(STREAM* s, CACHE_BRUSH_ORDER* cache_brush_ord
BYTE iBitmapFormat;
BOOL compressed = FALSE;
if(stream_get_left(s) < 6)
if (stream_get_left(s) < 6)
return FALSE;
stream_read_BYTE(s, cache_brush_order->index); /* cacheEntry (1 byte) */
@@ -1393,7 +1415,7 @@ BOOL update_read_cache_brush_order(STREAM* s, CACHE_BRUSH_ORDER* cache_brush_ord
}
/* rows are encoded in reverse order */
if(stream_get_left(s) < 8)
if (stream_get_left(s) < 8)
return FALSE;
for (i = 7; i >= 0; i--)
@@ -1413,14 +1435,14 @@ BOOL update_read_cache_brush_order(STREAM* s, CACHE_BRUSH_ORDER* cache_brush_ord
if (compressed != FALSE)
{
/* compressed brush */
if(!update_decompress_brush(s, cache_brush_order->data, cache_brush_order->bpp))
if (!update_decompress_brush(s, cache_brush_order->data, cache_brush_order->bpp))
return FALSE;
}
else
{
/* uncompressed brush */
int scanline = (cache_brush_order->bpp / 8) * 8;
if(stream_get_left(s) < scanline * 8)
if (stream_get_left(s) < scanline * 8)
return FALSE;
for (i = 7; i >= 0; i--)
@@ -1441,7 +1463,7 @@ BOOL update_read_create_offscreen_bitmap_order(STREAM* s, CREATE_OFFSCREEN_BITMA
BOOL deleteListPresent;
OFFSCREEN_DELETE_LIST* deleteList;
if(stream_get_left(s) < 6)
if (stream_get_left(s) < 6)
return FALSE;
stream_read_UINT16(s, flags); /* flags (2 bytes) */
create_offscreen_bitmap->id = flags & 0x7FFF;
@@ -1454,7 +1476,7 @@ BOOL update_read_create_offscreen_bitmap_order(STREAM* s, CREATE_OFFSCREEN_BITMA
if (deleteListPresent)
{
int i;
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, deleteList->cIndices);
@@ -1464,7 +1486,7 @@ BOOL update_read_create_offscreen_bitmap_order(STREAM* s, CREATE_OFFSCREEN_BITMA
deleteList->indices = realloc(deleteList->indices, deleteList->sIndices * 2);
}
if(stream_get_left(s) < 2 * deleteList->cIndices)
if (stream_get_left(s) < 2 * deleteList->cIndices)
return FALSE;
for (i = 0; i < (int) deleteList->cIndices; i++)
@@ -1481,7 +1503,7 @@ BOOL update_read_create_offscreen_bitmap_order(STREAM* s, CREATE_OFFSCREEN_BITMA
BOOL update_read_switch_surface_order(STREAM* s, SWITCH_SURFACE_ORDER* switch_surface)
{
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, switch_surface->bitmapId); /* bitmapId (2 bytes) */
return TRUE;
@@ -1491,7 +1513,7 @@ BOOL update_read_create_nine_grid_bitmap_order(STREAM* s, CREATE_NINE_GRID_BITMA
{
NINE_GRID_BITMAP_INFO* nineGridInfo;
if(stream_get_left(s) < 19)
if (stream_get_left(s) < 19)
return FALSE;
stream_read_BYTE(s, create_nine_grid_bitmap->bitmapBpp); /* bitmapBpp (1 byte) */
stream_read_UINT16(s, create_nine_grid_bitmap->bitmapId); /* bitmapId (2 bytes) */
@@ -1508,7 +1530,7 @@ BOOL update_read_create_nine_grid_bitmap_order(STREAM* s, CREATE_NINE_GRID_BITMA
BOOL update_read_frame_marker_order(STREAM* s, FRAME_MARKER_ORDER* frame_marker)
{
if(stream_get_left(s) < 4)
if (stream_get_left(s) < 4)
return FALSE;
stream_read_UINT32(s, frame_marker->action); /* action (4 bytes) */
return TRUE;
@@ -1516,7 +1538,7 @@ BOOL update_read_frame_marker_order(STREAM* s, FRAME_MARKER_ORDER* frame_marker)
BOOL update_read_stream_bitmap_first_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER* stream_bitmap_first)
{
if(stream_get_left(s) < 10) // 8 + 2 at least
if (stream_get_left(s) < 10) // 8 + 2 at least
return FALSE;
stream_read_BYTE(s, stream_bitmap_first->bitmapFlags); /* bitmapFlags (1 byte) */
stream_read_BYTE(s, stream_bitmap_first->bitmapBpp); /* bitmapBpp (1 byte) */
@@ -1525,11 +1547,11 @@ BOOL update_read_stream_bitmap_first_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER*
stream_read_UINT16(s, stream_bitmap_first->bitmapHeight); /* bitmapHeigth (2 bytes) */
if (stream_bitmap_first->bitmapFlags & STREAM_BITMAP_V2) {
if(stream_get_left(s) < 4)
if (stream_get_left(s) < 4)
return FALSE;
stream_read_UINT32(s, stream_bitmap_first->bitmapSize); /* bitmapSize (4 bytes) */
} else {
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, stream_bitmap_first->bitmapSize); /* bitmapSize (2 bytes) */
}
@@ -1540,7 +1562,7 @@ BOOL update_read_stream_bitmap_first_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER*
BOOL update_read_stream_bitmap_next_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER* stream_bitmap_next)
{
if(stream_get_left(s) < 5)
if (stream_get_left(s) < 5)
return FALSE;
stream_read_BYTE(s, stream_bitmap_next->bitmapFlags); /* bitmapFlags (1 byte) */
stream_read_UINT16(s, stream_bitmap_next->bitmapType); /* bitmapType (2 bytes) */
@@ -1550,7 +1572,7 @@ BOOL update_read_stream_bitmap_next_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER*
BOOL update_read_draw_gdiplus_first_order(STREAM* s, DRAW_GDIPLUS_FIRST_ORDER* draw_gdiplus_first)
{
if(stream_get_left(s) < 11)
if (stream_get_left(s) < 11)
return FALSE;
stream_seek_BYTE(s); /* pad1Octet (1 byte) */
stream_read_UINT16(s, draw_gdiplus_first->cbSize); /* cbSize (2 bytes) */
@@ -1562,7 +1584,7 @@ BOOL update_read_draw_gdiplus_first_order(STREAM* s, DRAW_GDIPLUS_FIRST_ORDER* d
BOOL update_read_draw_gdiplus_next_order(STREAM* s, DRAW_GDIPLUS_NEXT_ORDER* draw_gdiplus_next)
{
if(stream_get_left(s) < 3)
if (stream_get_left(s) < 3)
return FALSE;
stream_seek_BYTE(s); /* pad1Octet (1 byte) */
FIELD_SKIP_BUFFER16(s, draw_gdiplus_next->cbSize); /* cbSize(2 bytes) + emfRecords */
@@ -1571,7 +1593,7 @@ BOOL update_read_draw_gdiplus_next_order(STREAM* s, DRAW_GDIPLUS_NEXT_ORDER* dra
BOOL update_read_draw_gdiplus_end_order(STREAM* s, DRAW_GDIPLUS_END_ORDER* draw_gdiplus_end)
{
if(stream_get_left(s) < 11)
if (stream_get_left(s) < 11)
return FALSE;
stream_seek_BYTE(s); /* pad1Octet (1 byte) */
stream_read_UINT16(s, draw_gdiplus_end->cbSize); /* cbSize (2 bytes) */
@@ -1583,7 +1605,7 @@ BOOL update_read_draw_gdiplus_end_order(STREAM* s, DRAW_GDIPLUS_END_ORDER* draw_
BOOL update_read_draw_gdiplus_cache_first_order(STREAM* s, DRAW_GDIPLUS_CACHE_FIRST_ORDER* draw_gdiplus_cache_first)
{
if(stream_get_left(s) < 11)
if (stream_get_left(s) < 11)
return FALSE;
stream_read_BYTE(s, draw_gdiplus_cache_first->flags); /* flags (1 byte) */
stream_read_UINT16(s, draw_gdiplus_cache_first->cacheType); /* cacheType (2 bytes) */
@@ -1596,7 +1618,7 @@ BOOL update_read_draw_gdiplus_cache_first_order(STREAM* s, DRAW_GDIPLUS_CACHE_FI
BOOL update_read_draw_gdiplus_cache_next_order(STREAM* s, DRAW_GDIPLUS_CACHE_NEXT_ORDER* draw_gdiplus_cache_next)
{
if(stream_get_left(s) < 7)
if (stream_get_left(s) < 7)
return FALSE;
stream_read_BYTE(s, draw_gdiplus_cache_next->flags); /* flags (1 byte) */
stream_read_UINT16(s, draw_gdiplus_cache_next->cacheType); /* cacheType (2 bytes) */
@@ -1608,7 +1630,7 @@ BOOL update_read_draw_gdiplus_cache_next_order(STREAM* s, DRAW_GDIPLUS_CACHE_NEX
BOOL update_read_draw_gdiplus_cache_end_order(STREAM* s, DRAW_GDIPLUS_CACHE_END_ORDER* draw_gdiplus_cache_end)
{
if(stream_get_left(s) < 11)
if (stream_get_left(s) < 11)
return FALSE;
stream_read_BYTE(s, draw_gdiplus_cache_end->flags); /* flags (1 byte) */
stream_read_UINT16(s, draw_gdiplus_cache_end->cacheType); /* cacheType (2 bytes) */
@@ -1635,7 +1657,7 @@ BOOL update_read_field_flags(STREAM* s, UINT32* fieldFlags, BYTE flags, BYTE fie
fieldBytes = 0;
}
if(stream_get_left(s) < fieldBytes)
if (stream_get_left(s) < fieldBytes)
return FALSE;
*fieldFlags = 0;
@@ -1651,7 +1673,7 @@ BOOL update_read_bounds(STREAM* s, rdpBounds* bounds)
{
BYTE flags;
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, flags); /* field flags */
@@ -1673,29 +1695,29 @@ BOOL update_read_bounds(STREAM* s, rdpBounds* bounds)
}
else if (flags & BOUND_DELTA_TOP)
{
if(!update_read_coord(s, &bounds->top, TRUE))
if (!update_read_coord(s, &bounds->top, TRUE))
return FALSE;
}
if (flags & BOUND_RIGHT)
{
if(!update_read_coord(s, &bounds->right, FALSE))
if (!update_read_coord(s, &bounds->right, FALSE))
return FALSE;
}
else if (flags & BOUND_DELTA_RIGHT)
{
if(!update_read_coord(s, &bounds->right, TRUE))
if (!update_read_coord(s, &bounds->right, TRUE))
return FALSE;
}
if (flags & BOUND_BOTTOM)
{
if(!update_read_coord(s, &bounds->bottom, FALSE))
if (!update_read_coord(s, &bounds->bottom, FALSE))
return FALSE;
}
else if (flags & BOUND_DELTA_BOTTOM)
{
if(!update_read_coord(s, &bounds->bottom, TRUE))
if (!update_read_coord(s, &bounds->bottom, TRUE))
return FALSE;
}
return TRUE;
@@ -1894,7 +1916,7 @@ BOOL update_recv_secondary_order(rdpUpdate* update, STREAM* s, BYTE flags)
rdpContext* context = update->context;
rdpSecondaryUpdate* secondary = update->secondary;
if(stream_get_left(s) < 5)
if (stream_get_left(s) < 5)
return FALSE;
stream_read_UINT16(s, orderLength); /* orderLength (2 bytes) */
stream_read_UINT16(s, extraFlags); /* extraFlags (2 bytes) */
@@ -1950,20 +1972,20 @@ BOOL update_recv_secondary_order(rdpUpdate* update, STREAM* s, BYTE flags)
case ORDER_TYPE_CACHE_GLYPH:
if (secondary->glyph_v2)
{
if(!update_read_cache_glyph_v2_order(s, &(secondary->cache_glyph_v2_order), extraFlags))
if (!update_read_cache_glyph_v2_order(s, &(secondary->cache_glyph_v2_order), extraFlags))
return FALSE;
IFCALL(secondary->CacheGlyphV2, context, &(secondary->cache_glyph_v2_order));
}
else
{
if(!update_read_cache_glyph_order(s, &(secondary->cache_glyph_order), extraFlags))
if (!update_read_cache_glyph_order(s, &(secondary->cache_glyph_order), extraFlags))
return FALSE;
IFCALL(secondary->CacheGlyph, context, &(secondary->cache_glyph_order));
}
break;
case ORDER_TYPE_CACHE_BRUSH:
if(!update_read_cache_brush_order(s, &(secondary->cache_brush_order), extraFlags))
if (!update_read_cache_brush_order(s, &(secondary->cache_brush_order), extraFlags))
return FALSE;
IFCALL(secondary->CacheBrush, context, &(secondary->cache_brush_order));
break;
@@ -1994,73 +2016,73 @@ BOOL update_recv_altsec_order(rdpUpdate* update, STREAM* s, BYTE flags)
switch (orderType)
{
case ORDER_TYPE_CREATE_OFFSCREEN_BITMAP:
if(!update_read_create_offscreen_bitmap_order(s, &(altsec->create_offscreen_bitmap)))
if (!update_read_create_offscreen_bitmap_order(s, &(altsec->create_offscreen_bitmap)))
return FALSE;
IFCALL(altsec->CreateOffscreenBitmap, context, &(altsec->create_offscreen_bitmap));
break;
case ORDER_TYPE_SWITCH_SURFACE:
if(!update_read_switch_surface_order(s, &(altsec->switch_surface)))
if (!update_read_switch_surface_order(s, &(altsec->switch_surface)))
return FALSE;
IFCALL(altsec->SwitchSurface, context, &(altsec->switch_surface));
break;
case ORDER_TYPE_CREATE_NINE_GRID_BITMAP:
if(!update_read_create_nine_grid_bitmap_order(s, &(altsec->create_nine_grid_bitmap)))
if (!update_read_create_nine_grid_bitmap_order(s, &(altsec->create_nine_grid_bitmap)))
return FALSE;
IFCALL(altsec->CreateNineGridBitmap, context, &(altsec->create_nine_grid_bitmap));
break;
case ORDER_TYPE_FRAME_MARKER:
if(!update_read_frame_marker_order(s, &(altsec->frame_marker)))
if (!update_read_frame_marker_order(s, &(altsec->frame_marker)))
return FALSE;
IFCALL(altsec->FrameMarker, context, &(altsec->frame_marker));
break;
case ORDER_TYPE_STREAM_BITMAP_FIRST:
if(!update_read_stream_bitmap_first_order(s, &(altsec->stream_bitmap_first)))
if (!update_read_stream_bitmap_first_order(s, &(altsec->stream_bitmap_first)))
return FALSE;
IFCALL(altsec->StreamBitmapFirst, context, &(altsec->stream_bitmap_first));
break;
case ORDER_TYPE_STREAM_BITMAP_NEXT:
if(!update_read_stream_bitmap_next_order(s, &(altsec->stream_bitmap_next)))
if (!update_read_stream_bitmap_next_order(s, &(altsec->stream_bitmap_next)))
return FALSE;
IFCALL(altsec->StreamBitmapNext, context, &(altsec->stream_bitmap_next));
break;
case ORDER_TYPE_GDIPLUS_FIRST:
if(!update_read_draw_gdiplus_first_order(s, &(altsec->draw_gdiplus_first)))
if (!update_read_draw_gdiplus_first_order(s, &(altsec->draw_gdiplus_first)))
return FALSE;
IFCALL(altsec->DrawGdiPlusFirst, context, &(altsec->draw_gdiplus_first));
break;
case ORDER_TYPE_GDIPLUS_NEXT:
if(!update_read_draw_gdiplus_next_order(s, &(altsec->draw_gdiplus_next)))
if (!update_read_draw_gdiplus_next_order(s, &(altsec->draw_gdiplus_next)))
return FALSE;
IFCALL(altsec->DrawGdiPlusNext, context, &(altsec->draw_gdiplus_next));
break;
case ORDER_TYPE_GDIPLUS_END:
if(update_read_draw_gdiplus_end_order(s, &(altsec->draw_gdiplus_end)))
if (update_read_draw_gdiplus_end_order(s, &(altsec->draw_gdiplus_end)))
return FALSE;
IFCALL(altsec->DrawGdiPlusEnd, context, &(altsec->draw_gdiplus_end));
break;
case ORDER_TYPE_GDIPLUS_CACHE_FIRST:
if(!update_read_draw_gdiplus_cache_first_order(s, &(altsec->draw_gdiplus_cache_first)))
if (!update_read_draw_gdiplus_cache_first_order(s, &(altsec->draw_gdiplus_cache_first)))
return FALSE;
IFCALL(altsec->DrawGdiPlusCacheFirst, context, &(altsec->draw_gdiplus_cache_first));
break;
case ORDER_TYPE_GDIPLUS_CACHE_NEXT:
if(!update_read_draw_gdiplus_cache_next_order(s, &(altsec->draw_gdiplus_cache_next)))
if (!update_read_draw_gdiplus_cache_next_order(s, &(altsec->draw_gdiplus_cache_next)))
return FALSE;
IFCALL(altsec->DrawGdiPlusCacheNext, context, &(altsec->draw_gdiplus_cache_next));
break;
case ORDER_TYPE_GDIPLUS_CACHE_END:
if(!update_read_draw_gdiplus_cache_end_order(s, &(altsec->draw_gdiplus_cache_end)))
if (!update_read_draw_gdiplus_cache_end_order(s, &(altsec->draw_gdiplus_cache_end)))
return FALSE;
IFCALL(altsec->DrawGdiPlusCacheEnd, context, &(altsec->draw_gdiplus_cache_end));
break;
@@ -2082,8 +2104,9 @@ BOOL update_recv_order(rdpUpdate* update, STREAM* s)
{
BYTE controlFlags;
if(stream_get_left(s) < 1)
if (stream_get_left(s) < 1)
return FALSE;
stream_read_BYTE(s, controlFlags); /* controlFlags (1 byte) */
if (!(controlFlags & ORDER_STANDARD))

View File

@@ -135,6 +135,8 @@ static BOOL peer_recv_data_pdu(freerdp_peer* client, STREAM* s)
return FALSE;
case DATA_PDU_TYPE_FRAME_ACKNOWLEDGE:
if(stream_get_left(s) < 4)
return FALSE;
stream_read_UINT32(s, client->ack_frame_id);
break;
@@ -176,7 +178,8 @@ static int peer_recv_tpkt_pdu(freerdp_peer* client, STREAM* s)
if (rdp->settings->DisableEncryption)
{
rdp_read_security_header(s, &securityFlags);
if (!rdp_read_security_header(s, &securityFlags))
return -1;
if (securityFlags & SEC_ENCRYPT)
{
@@ -237,7 +240,7 @@ static int peer_recv_fastpath_pdu(freerdp_peer* client, STREAM* s)
if (fastpath->encryptionFlags & FASTPATH_OUTPUT_ENCRYPTED)
{
if(!rdp_decrypt(rdp, s, length, (fastpath->encryptionFlags & FASTPATH_OUTPUT_SECURE_CHECKSUM) ? SEC_SECURE_CHECKSUM : 0))
if (!rdp_decrypt(rdp, s, length, (fastpath->encryptionFlags & FASTPATH_OUTPUT_SECURE_CHECKSUM) ? SEC_SECURE_CHECKSUM : 0))
return -1;
}

View File

@@ -503,7 +503,7 @@ int rdp_recv_data_pdu(rdpRdp* rdp, STREAM* s)
UINT32 rlen;
STREAM* comp_stream;
if(!rdp_read_share_data_header(s, &length, &type, &share_id, &compressed_type, &compressed_len))
if (!rdp_read_share_data_header(s, &length, &type, &share_id, &compressed_type, &compressed_len))
return -1;
comp_stream = s;
@@ -552,7 +552,7 @@ int rdp_recv_data_pdu(rdpRdp* rdp, STREAM* s)
break;
case DATA_PDU_TYPE_SYNCHRONIZE:
if(!rdp_recv_synchronize_pdu(rdp, comp_stream))
if (!rdp_recv_synchronize_pdu(rdp, comp_stream))
return -1;
break;
@@ -677,6 +677,7 @@ BOOL rdp_decrypt(rdpRdp* rdp, STREAM* s, int length, UINT16 securityFlags)
if (stream_get_left(s) < 12)
return FALSE;
stream_read_UINT16(s, len); /* 0x10 */
stream_read_BYTE(s, version); /* 0x1 */
stream_read_BYTE(s, pad);
@@ -781,7 +782,7 @@ static int rdp_recv_tpkt_pdu(rdpRdp* rdp, STREAM* s)
if (channelId != MCS_GLOBAL_CHANNEL_ID)
{
if(!freerdp_channel_process(rdp->instance, s, channelId))
if (!freerdp_channel_process(rdp->instance, s, channelId))
return -1;
}
else
@@ -789,8 +790,10 @@ static int rdp_recv_tpkt_pdu(rdpRdp* rdp, STREAM* s)
while (stream_get_left(s) > 3)
{
stream_get_mark(s, nextp);
if (!rdp_read_share_control_header(s, &pduLength, &pduType, &pduSource))
return -1;
nextp += pduLength;
rdp->settings->PduSource = pduSource;
@@ -832,6 +835,7 @@ static int rdp_recv_fastpath_pdu(rdpRdp* rdp, STREAM* s)
rdpFastPath* fastpath;
fastpath = rdp->fastpath;
if (!fastpath_read_header_rdp(fastpath, s, &length))
return -1;
@@ -844,6 +848,7 @@ static int rdp_recv_fastpath_pdu(rdpRdp* rdp, STREAM* s)
if (fastpath->encryptionFlags & FASTPATH_OUTPUT_ENCRYPTED)
{
UINT16 flags = (fastpath->encryptionFlags & FASTPATH_OUTPUT_SECURE_CHECKSUM) ? SEC_SECURE_CHECKSUM : 0;
if (!rdp_decrypt(rdp, s, length, flags))
return -1;
}

View File

@@ -25,13 +25,14 @@
#include "surface.h"
static BOOL update_recv_surfcmd_surface_bits(rdpUpdate* update, STREAM* s, UINT32 *length)
static int update_recv_surfcmd_surface_bits(rdpUpdate* update, STREAM* s, UINT32 *length)
{
int pos;
SURFACE_BITS_COMMAND* cmd = &update->surface_bits_command;
if(stream_get_left(s) < 20)
return FALSE;
if (stream_get_left(s) < 20)
return -1;
stream_read_UINT16(s, cmd->destLeft);
stream_read_UINT16(s, cmd->destTop);
stream_read_UINT16(s, cmd->destRight);
@@ -42,17 +43,19 @@ static BOOL update_recv_surfcmd_surface_bits(rdpUpdate* update, STREAM* s, UINT3
stream_read_UINT16(s, cmd->width);
stream_read_UINT16(s, cmd->height);
stream_read_UINT32(s, cmd->bitmapDataLength);
if(stream_get_left(s) < cmd->bitmapDataLength)
return FALSE;
if (stream_get_left(s) < cmd->bitmapDataLength)
return -1;
pos = stream_get_pos(s) + cmd->bitmapDataLength;
cmd->bitmapData = stream_get_tail(s);
IFCALL(update->SurfaceBits, update->context, cmd);
stream_set_pos(s, pos);
*length = 20 + cmd->bitmapDataLength;
return TRUE;
return 0;
}
static void update_send_frame_acknowledge(rdpRdp* rdp, UINT32 frameId)
@@ -64,24 +67,28 @@ static void update_send_frame_acknowledge(rdpRdp* rdp, UINT32 frameId)
rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_FRAME_ACKNOWLEDGE, rdp->mcs->user_id);
}
static BOOL update_recv_surfcmd_frame_marker(rdpUpdate* update, STREAM* s, UINT32 *length)
static int update_recv_surfcmd_frame_marker(rdpUpdate* update, STREAM* s, UINT32 *length)
{
SURFACE_FRAME_MARKER* marker = &update->surface_frame_marker;
if(stream_get_left(s) < 6)
return FALSE;
if (stream_get_left(s) < 6)
return -1;
stream_read_UINT16(s, marker->frameAction);
stream_read_UINT32(s, marker->frameId);
IFCALL(update->SurfaceFrameMarker, update->context, marker);
if (update->context->rdp->settings->ReceivedCapabilities[CAPSET_TYPE_FRAME_ACKNOWLEDGE] && update->context->rdp->settings->FrameAcknowledge > 0 && marker->frameAction == SURFACECMD_FRAMEACTION_END)
if (update->context->rdp->settings->ReceivedCapabilities[CAPSET_TYPE_FRAME_ACKNOWLEDGE] &&
(update->context->rdp->settings->FrameAcknowledge > 0) &&
(marker->frameAction == SURFACECMD_FRAMEACTION_END))
{
update_send_frame_acknowledge(update->context->rdp, marker->frameId);
}
*length = 6;
return TRUE;
return 0;
}
int update_recv_surfcmds(rdpUpdate* update, UINT32 size, STREAM* s)
@@ -101,12 +108,12 @@ int update_recv_surfcmds(rdpUpdate* update, UINT32 size, STREAM* s)
{
case CMDTYPE_SET_SURFACE_BITS:
case CMDTYPE_STREAM_SURFACE_BITS:
if (!update_recv_surfcmd_surface_bits(update, s, &cmdLength))
if (update_recv_surfcmd_surface_bits(update, s, &cmdLength) < 0)
return -1;
break;
case CMDTYPE_FRAME_MARKER:
if (!update_recv_surfcmd_frame_marker(update, s, &cmdLength))
if (update_recv_surfcmd_frame_marker(update, s, &cmdLength) < 0)
return -1;
break;
@@ -154,4 +161,3 @@ void update_write_surfcmd_frame_marker(STREAM* s, UINT16 frameAction, UINT32 fra
stream_write_UINT16(s, frameAction);
stream_write_UINT32(s, frameId);
}

View File

@@ -76,7 +76,7 @@ void rdp_write_system_time(STREAM* s, SYSTEM_TIME* system_time)
BOOL rdp_read_client_time_zone(STREAM* s, rdpSettings* settings)
{
char* str;
char* str = NULL;
TIME_ZONE_INFO* clientTimeZone;
if (stream_get_left(s) < 172)
@@ -91,6 +91,7 @@ BOOL rdp_read_client_time_zone(STREAM* s, rdpSettings* settings)
stream_seek(s, 64);
strncpy(clientTimeZone->standardName, str, sizeof(clientTimeZone->standardName));
free(str);
str = NULL;
rdp_read_system_time(s, &clientTimeZone->standardDate); /* StandardDate */
stream_read_UINT32(s, clientTimeZone->standardBias); /* StandardBias */

View File

@@ -259,6 +259,9 @@ BOOL transport_accept_nla(rdpTransport* transport)
if (transport->TlsIn == NULL)
transport->TlsIn = tls_new(transport->settings);
if (transport->TlsOut == NULL)
transport->TlsOut = transport->TlsIn;
transport->layer = TRANSPORT_LAYER_TLS;
transport->TlsIn->sockfd = transport->TcpIn->sockfd;
@@ -394,6 +397,7 @@ int transport_read(rdpTransport* transport, STREAM* s)
int stream_bytes;
int transport_status;
pdu_bytes = 0;
transport_status = 0;
/* first check if we have header */
@@ -693,14 +697,15 @@ int transport_check_fds(rdpTransport** ptransport)
* 1: asynchronous return
*/
ReferenceTable_Add(transport->ReceiveReferences, received);
recv_status = transport->ReceiveCallback(transport, received, transport->ReceiveExtra);
ReferenceTable_Release(transport->ReceiveReferences, received);
if (recv_status < 0)
status = -1;
if (recv_status == 0)
transport_receive_pool_return(transport, received);
if (status < 0)
return status;
@@ -789,6 +794,9 @@ rdpTransport* transport_new(rdpSettings* settings)
transport->ReceiveQueue = Queue_New(TRUE, -1, -1);
Queue_Object(transport->ReceivePool)->fnObjectFree = (OBJECT_FREE_FN) stream_free;
Queue_Object(transport->ReceiveQueue)->fnObjectFree = (OBJECT_FREE_FN) stream_free;
transport->ReceiveReferences = ReferenceTable_New(TRUE,
(void*) transport, (REFERENCE_FREE) transport_receive_pool_return);
}
return transport;
@@ -819,6 +827,8 @@ void transport_free(rdpTransport* transport)
Queue_Free(transport->ReceivePool);
Queue_Free(transport->ReceiveQueue);
ReferenceTable_Free(transport->ReceiveReferences);
free(transport);
}
}

View File

@@ -69,6 +69,8 @@ struct rdp_transport
wQueue* ReceivePool;
wQueue* ReceiveQueue;
wReferenceTable* ReceiveReferences;
};
STREAM* transport_recv_stream_init(rdpTransport* transport, int size);

View File

@@ -44,8 +44,9 @@ BOOL update_recv_orders(rdpUpdate* update, STREAM* s)
{
UINT16 numberOrders;
if(stream_get_left(s) < 6)
if (stream_get_left(s) < 6)
return FALSE;
stream_seek_UINT16(s); /* pad2OctetsA (2 bytes) */
stream_read_UINT16(s, numberOrders); /* numberOrders (2 bytes) */
stream_seek_UINT16(s); /* pad2OctetsB (2 bytes) */
@@ -62,8 +63,9 @@ BOOL update_recv_orders(rdpUpdate* update, STREAM* s)
BOOL update_read_bitmap_data(STREAM* s, BITMAP_DATA* bitmap_data)
{
if(stream_get_left(s) < 18)
if (stream_get_left(s) < 18)
return FALSE;
stream_read_UINT16(s, bitmap_data->destLeft);
stream_read_UINT16(s, bitmap_data->destTop);
stream_read_UINT16(s, bitmap_data->destRight);
@@ -91,7 +93,7 @@ BOOL update_read_bitmap_data(STREAM* s, BITMAP_DATA* bitmap_data)
}
else
{
if(stream_get_left(s) < bitmap_data->bitmapLength)
if (stream_get_left(s) < bitmap_data->bitmapLength)
return FALSE;
bitmap_data->compressed = FALSE;
stream_get_mark(s, bitmap_data->bitmapDataStream);
@@ -103,8 +105,10 @@ BOOL update_read_bitmap_data(STREAM* s, BITMAP_DATA* bitmap_data)
BOOL update_read_bitmap(rdpUpdate* update, STREAM* s, BITMAP_UPDATE* bitmap_update)
{
int i;
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, bitmap_update->number); /* numberRectangles (2 bytes) */
if (bitmap_update->number > bitmap_update->count)
@@ -136,15 +140,16 @@ BOOL update_read_palette(rdpUpdate* update, STREAM* s, PALETTE_UPDATE* palette_u
int i;
PALETTE_ENTRY* entry;
if(stream_get_left(s) < 6)
if (stream_get_left(s) < 6)
return FALSE;
stream_seek_UINT16(s); /* pad2Octets (2 bytes) */
stream_read_UINT32(s, palette_update->number); /* numberColors (4 bytes), must be set to 256 */
if (palette_update->number > 256)
palette_update->number = 256;
if(stream_get_left(s) < palette_update->number * 3)
if (stream_get_left(s) < palette_update->number * 3)
return FALSE;
/* paletteEntries */
@@ -171,25 +176,29 @@ void update_read_synchronize(rdpUpdate* update, STREAM* s)
BOOL update_read_play_sound(STREAM* s, PLAY_SOUND_UPDATE* play_sound)
{
if(stream_get_left(s) < 8)
if (stream_get_left(s) < 8)
return FALSE;
stream_read_UINT32(s, play_sound->duration); /* duration (4 bytes) */
stream_read_UINT32(s, play_sound->frequency); /* frequency (4 bytes) */
return TRUE;
}
BOOL update_recv_play_sound(rdpUpdate* update, STREAM* s)
{
if(!update_read_play_sound(s, &update->play_sound))
if (!update_read_play_sound(s, &update->play_sound))
return FALSE;
IFCALL(update->PlaySound, update->context, &update->play_sound);
return TRUE;
}
BOOL update_read_pointer_position(STREAM* s, POINTER_POSITION_UPDATE* pointer_position)
{
if(stream_get_left(s) < 4)
if (stream_get_left(s) < 4)
return FALSE;
stream_read_UINT16(s, pointer_position->xPos); /* xPos (2 bytes) */
stream_read_UINT16(s, pointer_position->yPos); /* yPos (2 bytes) */
return TRUE;
@@ -197,15 +206,16 @@ BOOL update_read_pointer_position(STREAM* s, POINTER_POSITION_UPDATE* pointer_po
BOOL update_read_pointer_system(STREAM* s, POINTER_SYSTEM_UPDATE* pointer_system)
{
if(stream_get_left(s) < 4)
if (stream_get_left(s) < 4)
return FALSE;
stream_read_UINT32(s, pointer_system->type); /* systemPointerType (4 bytes) */
return TRUE;
}
BOOL update_read_pointer_color(STREAM* s, POINTER_COLOR_UPDATE* pointer_color)
{
if(stream_get_left(s) < 14)
if (stream_get_left(s) < 14)
return FALSE;
stream_read_UINT16(s, pointer_color->cacheIndex); /* cacheIndex (2 bytes) */
@@ -229,7 +239,7 @@ BOOL update_read_pointer_color(STREAM* s, POINTER_COLOR_UPDATE* pointer_color)
if (pointer_color->lengthXorMask > 0)
{
if(stream_get_left(s) < pointer_color->lengthXorMask)
if (stream_get_left(s) < pointer_color->lengthXorMask)
return FALSE;
pointer_color->xorMaskData = (BYTE*) malloc(pointer_color->lengthXorMask);
stream_read(s, pointer_color->xorMaskData, pointer_color->lengthXorMask);
@@ -237,7 +247,7 @@ BOOL update_read_pointer_color(STREAM* s, POINTER_COLOR_UPDATE* pointer_color)
if (pointer_color->lengthAndMask > 0)
{
if(stream_get_left(s) < pointer_color->lengthAndMask)
if (stream_get_left(s) < pointer_color->lengthAndMask)
return FALSE;
pointer_color->andMaskData = (BYTE*) malloc(pointer_color->lengthAndMask);
stream_read(s, pointer_color->andMaskData, pointer_color->lengthAndMask);
@@ -250,16 +260,18 @@ BOOL update_read_pointer_color(STREAM* s, POINTER_COLOR_UPDATE* pointer_color)
BOOL update_read_pointer_new(STREAM* s, POINTER_NEW_UPDATE* pointer_new)
{
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, pointer_new->xorBpp); /* xorBpp (2 bytes) */
return update_read_pointer_color(s, &pointer_new->colorPtrAttr); /* colorPtrAttr */
}
BOOL update_read_pointer_cached(STREAM* s, POINTER_CACHED_UPDATE* pointer_cached)
{
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, pointer_cached->cacheIndex); /* cacheIndex (2 bytes) */
return TRUE;
}
@@ -270,8 +282,9 @@ BOOL update_recv_pointer(rdpUpdate* update, STREAM* s)
rdpContext* context = update->context;
rdpPointerUpdate* pointer = update->pointer;
if(stream_get_left(s) < 2+2)
if (stream_get_left(s) < 2 + 2)
return FALSE;
stream_read_UINT16(s, messageType); /* messageType (2 bytes) */
stream_seek_UINT16(s); /* pad2Octets (2 bytes) */
@@ -318,8 +331,9 @@ BOOL update_recv(rdpUpdate* update, STREAM* s)
UINT16 updateType;
rdpContext* context = update->context;
if(stream_get_left(s) < 2)
if (stream_get_left(s) < 2)
return FALSE;
stream_read_UINT16(s, updateType); /* updateType (2 bytes) */
//printf("%s Update Data PDU\n", UPDATE_TYPE_STRINGS[updateType]);
@@ -494,6 +508,19 @@ static void update_send_surface_frame_marker(rdpContext* context, SURFACE_FRAME_
fastpath_send_update_pdu(rdp->fastpath, FASTPATH_UPDATETYPE_SURFCMDS, s);
}
static void update_send_frame_acknowledge(rdpContext* context, UINT32 frameId)
{
STREAM* s;
rdpRdp* rdp = context->rdp;
if (rdp->settings->ReceivedCapabilities[CAPSET_TYPE_FRAME_ACKNOWLEDGE])
{
s = rdp_data_pdu_init(rdp);
stream_write_UINT32(s, frameId);
rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_FRAME_ACKNOWLEDGE, rdp->mcs->user_id);
}
}
static void update_send_synchronize(rdpContext* context)
{
STREAM* s;
@@ -615,7 +642,7 @@ BOOL update_read_refresh_rect(rdpUpdate* update, STREAM* s)
stream_read_BYTE(s, numberOfAreas);
stream_seek(s, 3); /* pad3Octects */
if(stream_get_left(s) < numberOfAreas * 4 * 2)
if (stream_get_left(s) < numberOfAreas * 4 * 2)
return FALSE;
areas = (RECTANGLE_16*) malloc(sizeof(RECTANGLE_16) * numberOfAreas);
@@ -673,6 +700,7 @@ void update_register_client_callbacks(rdpUpdate* update)
{
update->RefreshRect = update_send_refresh_rect;
update->SuppressOutput = update_send_suppress_output;
update->SurfaceFrameAcknowledge = update_send_frame_acknowledge;
}
static void* update_thread(void* arg)
@@ -755,6 +783,8 @@ void update_free(rdpUpdate* update)
CloseHandle(update->thread);
Queue_Free(update->queue);
free(update);
}
}

View File

@@ -21,6 +21,7 @@
#include "config.h"
#endif
#include <stdio.h>
#include <freerdp/crypto/ber.h>
BOOL ber_read_length(STREAM* s, int* length)
@@ -364,7 +365,7 @@ BOOL ber_read_integer(STREAM* s, UINT32* value)
{
int length;
if(!ber_read_universal_tag(s, BER_TAG_INTEGER, FALSE) ||
if (!ber_read_universal_tag(s, BER_TAG_INTEGER, FALSE) ||
!ber_read_length(s, &length) ||
stream_get_left(s) < length)
return FALSE;
@@ -372,16 +373,17 @@ BOOL ber_read_integer(STREAM* s, UINT32* value)
if (value == NULL)
{
// even if we don't care the integer value, check the announced size
if(length < 1 || length > 4)
return FALSE;
stream_seek(s, length);
return TRUE;
return stream_skip(s, length);
}
if (length == 1)
{
stream_read_BYTE(s, *value);
}
else if (length == 2)
{
stream_read_UINT16_be(s, *value);
}
else if (length == 3)
{
BYTE byte;
@@ -390,9 +392,19 @@ BOOL ber_read_integer(STREAM* s, UINT32* value)
*value += (byte << 16);
}
else if (length == 4)
{
stream_read_UINT32_be(s, *value);
else
}
else if (length == 8)
{
printf("%s: should implement reading an 8 bytes integer\n", __FUNCTION__);
return FALSE;
}
else
{
printf("%s: should implement reading an integer with length=%d\n", __FUNCTION__, length);
return FALSE;
}
return TRUE;
}

View File

@@ -232,14 +232,17 @@ UINT32 freerdp_keyboard_init_x11(UINT32 keyboardLayoutId, RDP_SCANCODE x11_keyco
#else
{
char* keymap;
char* xkb_layout;
char* xkb_variant;
char* xkb_layout = 0;
char* xkb_variant = 0;
if (keyboardLayoutId == 0)
{
keyboardLayoutId = freerdp_detect_keyboard_layout_from_xkb(&xkb_layout, &xkb_variant);
free(xkb_layout);
free(xkb_variant);
if (xkb_layout)
free(xkb_layout);
if (xkb_variant)
free(xkb_variant);
}
keymap = freerdp_detect_keymap_from_xkb();

View File

@@ -0,0 +1,86 @@
# FreeRDP: A Remote Desktop Protocol Client
# libfreerdp-primitives cmake build script
# vi:ts=4 sw=4:
#
# (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing permissions
# and limitations under the License.
#
set(MODULE_NAME "freerdp-primitives")
set(MODULE_PREFIX "FREERDP_PRIMITIVES")
set(${MODULE_PREFIX}_SRCS
prim_add.c
prim_andor.c
prim_alphaComp.c
prim_colors.c
prim_copy.c
prim_set.c
prim_shift.c
prim_sign.c
primitives.c
prim_internal.h)
add_definitions(-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})
### IPP Variable debugging
if(WITH_IPP)
if(CMAKE_COMPILER_IS_GNUCC)
foreach(INCLDIR ${IPP_INCLUDE_DIRS})
set(OPTIMIZATION "${OPTIMIZATION} -I${INCLDIR}")
endforeach(INCLDIR)
endif()
endif()
if(WITH_SSE2)
if(CMAKE_COMPILER_IS_GNUCC)
set(OPTIMIZATION "${OPTIMIZATION} -msse2 -mssse3 -Wdeclaration-after-statement")
endif()
if(MSVC)
set(OPTIMIZATION "${OPTIMIZATION} /arch:SSE2")
endif()
elseif(WITH_NEON)
if(CMAKE_COMPILER_IS_GNUCC)
set(OPTIMIZATION "${OPTIMIZATION} -mfpu=neon -mfloat-abi=softfp")
endif()
# TODO: Add MSVC equivalent
endif()
set_property(SOURCE ${${MODULE_PREFIX}_SRCS} PROPERTY COMPILE_FLAGS ${OPTIMIZATION})
add_complex_library(MODULE ${MODULE_NAME} TYPE "OBJECT"
MONOLITHIC ${MONOLITHIC_BUILD}
SOURCES ${${MODULE_PREFIX}_SRCS})
set_target_properties(${MODULE_NAME} PROPERTIES VERSION ${FREERDP_VERSION} SOVERSION ${FREERDP_API_VERSION} PREFIX "lib")
if(IPP_FOUND)
include_directories(${IPP_INCLUDE_DIRS})
foreach(ipp_lib ${IPP_LIBRARIES})
add_library("${ipp_lib}_imported" STATIC IMPORTED)
set_property(TARGET "${ipp_lib}_imported" PROPERTY IMPORTED_LOCATION "${IPP_LIBRARY_DIRS}/${ipp_lib}")
set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} "${ipp_lib}_imported")
endforeach()
endif()
if(MONOLITHIC_BUILD)
set(FREERDP_LIBS ${FREERDP_LIBS} ${${MODULE_PREFIX}_LIBS} PARENT_SCOPE)
else()
target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS})
install(TARGETS ${MODULE_NAME} DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER "FreeRDP/libfreerdp")
if(BUILD_TESTING AND ((NOT WIN32) AND (NOT APPLE)))
add_subdirectory(test)
endif()

View File

@@ -0,0 +1,113 @@
The Primitives Library
Introduction
------------
The purpose of the primitives library is to give the freerdp code easy
access to *run-time* optimization via SIMD operations. When the library
is initialized, dynamic checks of processor features are run (such as
the support of SSE3 or Neon), and entrypoints are linked to through
function pointers to provide the fastest possible operations. All
routines offer generic C alternatives as fallbacks.
Run-time optimization has the advantage of allowing a single executable
to run fast on multiple platforms with different SIMD capabilities.
Use In Code
-----------
A singleton pointing to a structure containing the function pointers
is accessed through primitives_get(). The function pointers can then
be used from that structure, e.g.
primitives_t *prims = primitives_get();
prims->shiftC_16s(buffer, shifts, buffer, 256);
Of course, there is some overhead in calling through the function pointer
and setting up the SIMD operations, so it would be counterproductive to
call the primitives library for very small operation, e.g. initializing an
array of eight values to a constant. The primitives library is intended
for larger-scale operations, e.g. arrays of size 64 and larger.
Initialization and Cleanup
--------------------------
Library initialization is done the first time primitives_init() is called
or the first time primitives_get() is used. Cleanup (if any) is done by
primitives_deinit().
Intel Integrated Performance Primitives (IPP)
---------------------------------------------
If freerdp is compiled with IPP support (-DWITH_IPP=ON), the IPP function
calls will be used (where available) to fill the function pointers.
Where possible, function names and parameter lists match IPP format so
that the IPP functions can be plugged into the function pointers without
a wrapper layer. Use of IPP is completely optional, and in many cases
the SSE operations in the primitives library itself are faster or similar
in performance.
Coverage
--------
The primitives library is not meant to be comprehensive, offering
entrypoints for every operation and operand type. Instead, the coverage
is focused on operations known to be performance bottlenecks in the code.
For instance, 16-bit signed operations are used widely in the RemoteFX
software, so you'll find 16s versions of several operations, but there
is no attempt to provide (unused) copies of the same code for 8u, 16u,
32s, etc.
New Optimizations
-----------------
As the need arises, new optimizations can be added to the library,
including NEON, AVX, and perhaps OpenCL or other SIMD implementations.
The initialization routine is free to do any quick run-time test to
determine which features are available before hooking the operation's
function pointer, or it can simply look at the processor features list
from the hints passed to the initialization routine.
Adding Entrypoints
------------------
As the need for new operations or operands arises, new entrypoints can
be added.
1) Function prototypes and pointers are added to
include/freerdp/primitives.h
2) New module initialization and cleanup function prototypes are added
to prim_internal.h and called in primitives.c (primitives_init()
and primitives_deinit()).
3) Operation names and parameter lists should be compatible with the IPP.
IPP manuals are available online at software.intel.com.
4) A generic C entrypoint must be available as a fallback.
5) prim_templates.h contains macro-based templates for simple operations,
such as applying a single SSE operation to arrays of data.
The template functions can frequently be used to extend the
operations without writing a lot of new code.
Flags
-----
The entrypoint primitives_get_flags() returns a bitfield of processor flags
(as defined in primitives.h) and primitives_flag_str() returns a string
related to those processor flags, for debugging and information. The
bitfield can be used elsewhere in the code as needed.
Cache Management
----------------
I haven't found a lot of speed improvement by attempting prefetch, and
in fact it seems to have a negative impact in some cases. Done correctly
perhaps the routines could be further accelerated by proper use of prefetch,
fences, etc.
Testing
-------
In the test subdirectory is an executable (prim_test) that tests both
functionality and speed of primitives library operations. Any new
modules should be added to that test, following the conventions already
established in that directory. The program can be executed on various
target hardware to compare generic C, optimized, and IPP performance
with various array sizes.

View File

@@ -0,0 +1,88 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Add operations.
* vi:ts=4 sw=4:
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
#include <pmmintrin.h>
#endif /* WITH_SSE2 */
#ifdef WITH_IPP
#include <ipps.h>
#endif /* WITH_IPP */
#include "prim_internal.h"
#include "prim_templates.h"
/* ----------------------------------------------------------------------------
* 16-bit signed add with saturation (under and over).
*/
PRIM_STATIC pstatus_t general_add_16s(
const INT16 *pSrc1,
const INT16 *pSrc2,
INT16 *pDst,
INT32 len)
{
while (len--)
{
INT32 k = (INT32) (*pSrc1++) + (INT32) (*pSrc2++);
if (k > 32767) *pDst++ = ((INT16) 32767);
else if (k < -32768) *pDst++ = ((INT16) -32768);
else *pDst++ = (INT16) k;
}
return PRIMITIVES_SUCCESS;
}
#ifdef WITH_SSE2
# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
/* ------------------------------------------------------------------------- */
SSE3_SSD_ROUTINE(sse3_add_16s, INT16, general_add_16s,
_mm_adds_epi16, general_add_16s(sptr1++, sptr2++, dptr++, 1))
# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_add(
const primitives_hints_t *hints,
primitives_t *prims)
{
prims->add_16s = general_add_16s;
#ifdef WITH_IPP
prims->add_16s = (__add_16s_t) ippsAdd_16s;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
{
prims->add_16s = sse3_add_16s;
}
#endif
}
/* ------------------------------------------------------------------------- */
void primitives_deinit_add(
primitives_t *prims)
{
/* Nothing to do. */
}

View File

@@ -0,0 +1,305 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Alpha blending routines.
* vi:ts=4 sw=4:
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
* Note: this code assumes the second operand is fully opaque,
* e.g.
* newval = alpha1*val1 + (1-alpha1)*val2
* rather than
* newval = alpha1*val1 + (1-alpha1)*alpha2*val2
* The IPP gives other options.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include "prim_internal.h"
#ifdef WITH_SSE2
#include <emmintrin.h>
#include <pmmintrin.h>
#endif /* WITH_SSE2 */
#ifdef WITH_IPP
#include <ippi.h>
#endif /* WITH_IPP */
#define ALPHA(_k_) (((_k_) & 0xFF000000U) >> 24)
#define RED(_k_) (((_k_) & 0x00FF0000U) >> 16)
#define GRN(_k_) (((_k_) & 0x0000FF00U) >> 8)
#define BLU(_k_) (((_k_) & 0x000000FFU))
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_alphaComp_argb(
const BYTE *pSrc1, INT32 src1Step,
const BYTE *pSrc2, INT32 src2Step,
BYTE *pDst, INT32 dstStep,
INT32 width, INT32 height)
{
const UINT32 *sptr1 = (const UINT32 *) pSrc1;
const UINT32 *sptr2 = (const UINT32 *) pSrc2;
UINT32 *dptr = (UINT32 *) pDst;
int linebytes = width * sizeof(UINT32);
int src1Jump = (src1Step - linebytes) / sizeof(UINT32);
int src2Jump = (src2Step - linebytes) / sizeof(UINT32);
int dstJump = (dstStep - linebytes) / sizeof(UINT32);
int y;
for (y=0; y<height; y++)
{
int x;
for (x=0; x<width; x++)
{
const UINT32 src1 = *sptr1++;
const UINT32 src2 = *sptr2++;
UINT32 alpha = ALPHA(src1) + 1;
if (alpha == 256)
{
/* If alpha is 255+1, just copy src1. */
*dptr++ = src1;
}
else if (alpha <= 1)
{
/* If alpha is 0+1, just copy src2. */
*dptr++ = src2;
}
else
{
/* A perfectly accurate blend would do (a*src + (255-a)*dst)/255
* rather than adding one to alpha and dividing by 256, but this
* is much faster and only differs by one 16% of the time.
* I'm not sure who first designed the double-ops trick
* (Red Blue and Alpha Green).
*/
UINT32 rb, ag;
UINT32 s2rb = src2 & 0x00FF00FFU;
UINT32 s2ag = (src2 >> 8) & 0x00FF00FFU;
UINT32 s1rb = src1 & 0x00FF00FFU;
UINT32 s1ag = (src1 >> 8) & 0x00FF00FFU;
UINT32 drb = s1rb - s2rb;
UINT32 dag = s1ag - s2ag;
drb *= alpha;
dag *= alpha;
rb = ((drb >> 8) + s2rb) & 0x00FF00FFU;
ag = (((dag >> 8) + s2ag) << 8) & 0xFF00FF00U;
*dptr++ = rb | ag;
}
}
sptr1 += src1Jump;
sptr2 += src2Jump;
dptr += dstJump;
}
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
#ifdef WITH_SSE2
# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
PRIM_STATIC pstatus_t sse2_alphaComp_argb(
const BYTE *pSrc1, INT32 src1Step,
const BYTE *pSrc2, INT32 src2Step,
BYTE *pDst, INT32 dstStep,
INT32 width, INT32 height)
{
const UINT32 *sptr1 = (const UINT32 *) pSrc1;
const UINT32 *sptr2 = (const UINT32 *) pSrc2;
UINT32 *dptr;
int linebytes, src1Jump, src2Jump, dstJump, y;
__m128i xmm0, xmm1;
if ((width <= 0) || (height <= 0)) return PRIMITIVES_SUCCESS;
if (width < 4) /* pointless if too small */
{
return general_alphaComp_argb(pSrc1, src1Step, pSrc2, src2Step,
pDst, dstStep, width, height);
}
dptr = (UINT32 *) pDst;
linebytes = width * sizeof(UINT32);
src1Jump = (src1Step - linebytes) / sizeof(UINT32);
src2Jump = (src2Step - linebytes) / sizeof(UINT32);
dstJump = (dstStep - linebytes) / sizeof(UINT32);
xmm0 = _mm_set1_epi32(0);
xmm1 = _mm_set1_epi16(1);
for (y=0; y<height; ++y)
{
int pixels = width;
int count;
/* Get to the 16-byte boundary now. */
int leadIn = 0;
switch ((ULONG_PTR) dptr & 0x0f)
{
case 0:
leadIn = 0;
break;
case 4:
leadIn = 3;
break;
case 8:
leadIn = 2;
break;
case 12:
leadIn = 1;
break;
default:
/* We'll never hit a 16-byte boundary, so do the whole
* thing the slow way.
*/
leadIn = width;
break;
}
if (leadIn)
{
general_alphaComp_argb((const BYTE *) sptr1,
src1Step, (const BYTE *) sptr2, src2Step,
(BYTE *) dptr, dstStep, leadIn, 1);
sptr1 += leadIn;
sptr2 += leadIn;
dptr += leadIn;
pixels -= leadIn;
}
/* Use SSE registers to do 4 pixels at a time. */
count = pixels >> 2;
pixels -= count << 2;
while (count--)
{
__m128i xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
/* BdGdRdAdBcGcRcAcBbGbRbAbBaGaRaAa */
xmm2 = LOAD_SI128(sptr1); sptr1 += 4;
/* BhGhRhAhBgGgRgAgBfGfRfAfBeGeReAe */
xmm3 = LOAD_SI128(sptr2); sptr2 += 4;
/* 00Bb00Gb00Rb00Ab00Ba00Ga00Ra00Aa */
xmm4 = _mm_unpackhi_epi8(xmm2, xmm0);
/* 00Bf00Gf00Bf00Af00Be00Ge00Re00Ae */
xmm5 = _mm_unpackhi_epi8(xmm3, xmm0);
/* subtract */
xmm6 = _mm_subs_epi16(xmm4, xmm5);
/* 00Bb00Gb00Rb00Ab00Aa00Aa00Aa00Aa */
xmm4 = _mm_shufflelo_epi16(xmm4, 0xff);
/* 00Ab00Ab00Ab00Ab00Aa00Aa00Aa00Aa */
xmm4 = _mm_shufflehi_epi16(xmm4, 0xff);
/* Add one to alphas */
xmm4 = _mm_adds_epi16(xmm4, xmm1);
/* Multiply and take low word */
xmm4 = _mm_mullo_epi16(xmm4, xmm6);
/* Shift 8 right */
xmm4 = _mm_srai_epi16(xmm4, 8);
/* Add xmm5 */
xmm4 = _mm_adds_epi16(xmm4, xmm5);
/* 00Bj00Gj00Rj00Aj00Bi00Gi00Ri00Ai */
/* 00Bd00Gd00Rd00Ad00Bc00Gc00Rc00Ac */
xmm5 = _mm_unpacklo_epi8(xmm2, xmm0);
/* 00Bh00Gh00Rh00Ah00Bg00Gg00Rg00Ag */
xmm6 = _mm_unpacklo_epi8(xmm3, xmm0);
/* subtract */
xmm7 = _mm_subs_epi16(xmm5, xmm6);
/* 00Bd00Gd00Rd00Ad00Ac00Ac00Ac00Ac */
xmm5 = _mm_shufflelo_epi16(xmm5, 0xff);
/* 00Ad00Ad00Ad00Ad00Ac00Ac00Ac00Ac */
xmm5 = _mm_shufflehi_epi16(xmm5, 0xff);
/* Add one to alphas */
xmm5 = _mm_adds_epi16(xmm5, xmm1);
/* Multiply and take low word */
xmm5 = _mm_mullo_epi16(xmm5, xmm7);
/* Shift 8 right */
xmm5 = _mm_srai_epi16(xmm5, 8);
/* Add xmm6 */
xmm5 = _mm_adds_epi16(xmm5, xmm6);
/* 00Bl00Gl00Rl00Al00Bk00Gk00Rk0ABk */
/* Must mask off remainders or pack gets confused */
xmm3 = _mm_set1_epi16(0x00ffU);
xmm4 = _mm_and_si128(xmm4, xmm3);
xmm5 = _mm_and_si128(xmm5, xmm3);
/* BlGlRlAlBkGkRkAkBjGjRjAjBiGiRiAi */
xmm5 = _mm_packus_epi16(xmm5, xmm4);
_mm_store_si128((__m128i *) dptr, xmm5); dptr += 4;
}
/* Finish off the remainder. */
if (pixels)
{
general_alphaComp_argb((const BYTE *) sptr1, src1Step,
(const BYTE *) sptr2, src2Step,
(BYTE *) dptr, dstStep, pixels, 1);
sptr1 += pixels;
sptr2 += pixels;
dptr += pixels;
}
/* Jump to next row. */
sptr1 += src1Jump;
sptr2 += src2Jump;
dptr += dstJump;
}
return PRIMITIVES_SUCCESS;
}
# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */
#endif
#ifdef WITH_IPP
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t ipp_alphaComp_argb(
const BYTE *pSrc1, INT32 src1Step,
const BYTE *pSrc2, INT32 src2Step,
BYTE *pDst, INT32 dstStep,
INT32 width, INT32 height)
{
IppiSize sz;
sz.width = width;
sz.height = height;
return ippiAlphaComp_8u_AC4R(pSrc1, src1Step, pSrc2, src2Step,
pDst, dstStep, sz, ippAlphaOver);
}
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_alphaComp(
const primitives_hints_t *hints,
primitives_t *prims)
{
prims->alphaComp_argb = general_alphaComp_argb;
#ifdef WITH_IPP
prims->alphaComp_argb = ipp_alphaComp_argb;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
{
prims->alphaComp_argb = sse2_alphaComp_argb;
}
#endif
}
/* ------------------------------------------------------------------------- */
void primitives_deinit_alphaComp(
primitives_t *prims)
{
/* Nothing to do. */
}

View File

@@ -0,0 +1,110 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Logical operations.
* vi:ts=4 sw=4:
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
#include <pmmintrin.h>
#endif /* WITH_SSE2 */
#ifdef WITH_IPP
#include <ipps.h>
#endif /* WITH_IPP */
#include "prim_internal.h"
#include "prim_templates.h"
/* ----------------------------------------------------------------------------
* 32-bit AND with a constant.
*/
PRIM_STATIC pstatus_t general_andC_32u(
const UINT32 *pSrc,
UINT32 val,
UINT32 *pDst,
INT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;
while (len--)
*pDst++ = *pSrc++ & val;
return PRIMITIVES_SUCCESS;
}
/* ----------------------------------------------------------------------------
* 32-bit OR with a constant.
*/
PRIM_STATIC pstatus_t general_orC_32u(
const UINT32 *pSrc,
UINT32 val,
UINT32 *pDst,
INT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;
while (len--)
*pDst++ = *pSrc++ | val;
return PRIMITIVES_SUCCESS;
}
#ifdef WITH_SSE2
# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
/* ------------------------------------------------------------------------- */
SSE3_SCD_PRE_ROUTINE(sse3_andC_32u, UINT32, general_andC_32u,
_mm_and_si128, *dptr++ = *sptr++ & val)
SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, general_orC_32u,
_mm_or_si128, *dptr++ = *sptr++ | val)
# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_andor(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->andC_32u = general_andC_32u;
prims->orC_32u = general_orC_32u;
#if defined(WITH_IPP)
prims->andC_32u = (__andC_32u_t) ippsAndC_32u;
prims->orC_32u = (__orC_32u_t) ippsOrC_32u;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
{
prims->andC_32u = sse3_andC_32u;
prims->orC_32u = sse3_orC_32u;
}
#endif
}
/* ------------------------------------------------------------------------- */
void primitives_deinit_andor(
primitives_t *prims)
{
/* Nothing to do. */
}

View File

@@ -0,0 +1,743 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Color conversion operations.
* vi:ts=4 sw=4:
*
* Copyright 2011 Stephen Erisman
* Copyright 2011 Norbert Federa <nfedera@thinstuff.com>
* Copyright 2011 Martin Fleisz <mfleisz@thinstuff.com>
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
#elif WITH_NEON
#include <arm_neon.h>
#endif /* WITH_SSE2 else WITH_NEON */
#include "prim_internal.h"
#include "prim_templates.h"
#ifndef MINMAX
#define MINMAX(_v_, _l_, _h_) \
((_v_) < (_l_) ? (_l_) : ((_v_) > (_h_) ? (_h_) : (_v_)))
#endif /* !MINMAX */
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_yCbCrToRGB_16s16s_P3P3(
const INT16 *pSrc[3], INT32 srcStep,
INT16 *pDst[3], INT32 dstStep,
const prim_size_t *roi) /* region of interest */
{
/**
* The decoded YCbCr coeffectients are represented as 11.5 fixed-point
* numbers:
*
* 1 sign bit + 10 integer bits + 5 fractional bits
*
* However only 7 integer bits will be actually used since the value range
* is [-128.0, 127.0]. In other words, the decoded coefficients are scaled
* by << 5 when interpreted as INT16.
* It was scaled in the quantization phase, so we must scale it back here.
*/
const INT16 *yptr = pSrc[0];
const INT16 *cbptr = pSrc[1];
const INT16 *crptr = pSrc[2];
INT16 *rptr = pDst[0];
INT16 *gptr = pDst[1];
INT16 *bptr = pDst[2];
int srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
int dstbump = (dstStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
int y;
for (y=0; y<roi->height; y++)
{
int x;
for (x=0; x<roi->width; ++x)
{
/* INT32 is used intentionally because we calculate
* with shifted factors!
*/
INT32 y = (INT32) (*yptr++);
INT32 cb = (INT32) (*cbptr++);
INT32 cr = (INT32) (*crptr++);
INT32 r,g,b;
/*
* This is the slow floating point version kept here for reference.
* y = y + 4096; // 128<<5=4096 so that we can scale the sum by>>5
* r = y + cr*1.403f;
* g = y - cb*0.344f - cr*0.714f;
* b = y + cb*1.770f;
* y_r_buf[i] = MINMAX(r>>5, 0, 255);
* cb_g_buf[i] = MINMAX(g>>5, 0, 255);
* cr_b_buf[i] = MINMAX(b>>5, 0, 255);
*/
/*
* We scale the factors by << 16 into 32-bit integers in order to
* avoid slower floating point multiplications. Since the final
* result needs to be scaled by >> 5 we will extract only the
* upper 11 bits (>> 21) from the final sum.
* Hence we also have to scale the other terms of the sum by << 16.
* R: 1.403 << 16 = 91947
* G: 0.344 << 16 = 22544, 0.714 << 16 = 46792
* B: 1.770 << 16 = 115998
*/
y = (y+4096)<<16;
r = y + cr*91947;
g = y - cb*22544 - cr*46792;
b = y + cb*115998;
*rptr++ = MINMAX(r>>21, 0, 255);
*gptr++ = MINMAX(g>>21, 0, 255);
*bptr++ = MINMAX(b>>21, 0, 255);
}
yptr += srcbump;
cbptr += srcbump;
crptr += srcbump;
rptr += dstbump;
gptr += dstbump;
bptr += dstbump;
}
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_RGBToYCbCr_16s16s_P3P3(
const INT16 *pSrc[3], INT32 srcStep,
INT16 *pDst[3], INT32 dstStep,
const prim_size_t *roi) /* region of interest */
{
/* The encoded YCbCr coefficients are represented as 11.5 fixed-point
* numbers:
*
* 1 sign bit + 10 integer bits + 5 fractional bits
*
* However only 7 integer bits will be actually used since the value
* range is [-128.0, 127.0]. In other words, the encoded coefficients
* is scaled by << 5 when interpreted as INT16.
* It will be scaled down to original during the quantization phase.
*/
const INT16 *rptr = pSrc[0];
const INT16 *gptr = pSrc[1];
const INT16 *bptr = pSrc[2];
INT16 *yptr = pDst[0];
INT16 *cbptr = pDst[1];
INT16 *crptr = pDst[2];
int srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
int dstbump = (dstStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
int y;
for (y=0; y<roi->height; y++)
{
int x;
for (x=0; x<roi->width; ++x)
{
/* INT32 is used intentionally because we calculate with
* shifted factors!
*/
INT32 r = (INT32) (*rptr++);
INT32 g = (INT32) (*gptr++);
INT32 b = (INT32) (*bptr++);
/* We scale the factors by << 15 into 32-bit integers in order
* to avoid slower floating point multiplications. Since the
* terms need to be scaled by << 5 we simply scale the final
* sum by >> 10
*
* Y: 0.299000 << 15 = 9798, 0.587000 << 15 = 19235,
* 0.114000 << 15 = 3735
* Cb: 0.168935 << 15 = 5535, 0.331665 << 15 = 10868,
* 0.500590 << 15 = 16403
* Cr: 0.499813 << 15 = 16377, 0.418531 << 15 = 13714,
* 0.081282 << 15 = 2663
*/
INT32 y = (r * 9798 + g * 19235 + b * 3735) >> 10;
INT32 cb = (r * -5535 + g * -10868 + b * 16403) >> 10;
INT32 cr = (r * 16377 + g * -13714 + b * -2663) >> 10;
*yptr++ = (INT16) MINMAX(y - 4096, -4096, 4095);
*cbptr++ = (INT16) MINMAX(cb, -4096, 4095);
*crptr++ = (INT16) MINMAX(cr, -4096, 4095);
}
yptr += srcbump;
cbptr += srcbump;
crptr += srcbump;
rptr += dstbump;
gptr += dstbump;
bptr += dstbump;
}
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_RGBToRGB_16s8u_P3AC4R(
const INT16 *pSrc[3], /* 16-bit R,G, and B arrays */
int srcStep, /* bytes between rows in source data */
BYTE *pDst, /* 32-bit interleaved ARGB (ABGR?) data */
int dstStep, /* bytes between rows in dest data */
const prim_size_t *roi) /* region of interest */
{
const INT16 *r = pSrc[0];
const INT16 *g = pSrc[1];
const INT16 *b = pSrc[2];
BYTE *dst = pDst;
int x,y;
int srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
int dstbump = (dstStep - (roi->width * sizeof(UINT32)));
for (y=0; y<roi->height; ++y)
{
for (x=0; x<roi->width; ++x)
{
*dst++ = (BYTE) (*b++);
*dst++ = (BYTE) (*g++);
*dst++ = (BYTE) (*r++);
*dst++ = ((BYTE) (0xFFU));
}
dst += dstbump;
r += srcbump;
g += srcbump;
b += srcbump;
}
return PRIMITIVES_SUCCESS;
}
#ifdef WITH_SSE2
#ifdef __GNUC__
# define GNU_INLINE \
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
#else
# define GNU_INLINE
#endif
#define CACHE_LINE_BYTES 64
#define _mm_between_epi16(_val, _min, _max) \
do { _val = _mm_min_epi16(_max, _mm_max_epi16(_val, _min)); } while (0)
#ifdef DO_PREFETCH
/*---------------------------------------------------------------------------*/
static inline void GNU_INLINE _mm_prefetch_buffer(
char * buffer,
int num_bytes)
{
__m128i * buf = (__m128i*) buffer;
unsigned int i;
for (i = 0; i < (num_bytes / sizeof(__m128i));
i+=(CACHE_LINE_BYTES / sizeof(__m128i)))
{
_mm_prefetch((char*)(&buf[i]), _MM_HINT_NTA);
}
}
#endif /* DO_PREFETCH */
/*---------------------------------------------------------------------------*/
PRIM_STATIC pstatus_t sse2_yCbCrToRGB_16s16s_P3P3(
const INT16 *pSrc[3],
int srcStep,
INT16 *pDst[3],
int dstStep,
const prim_size_t *roi) /* region of interest */
{
__m128i zero, max, r_cr, g_cb, g_cr, b_cb, c4096;
__m128i *y_buf, *cb_buf, *cr_buf, *r_buf, *g_buf, *b_buf;
int srcbump, dstbump, yp, imax;
if (((ULONG_PTR) (pSrc[0]) & 0x0f)
|| ((ULONG_PTR) (pSrc[1]) & 0x0f)
|| ((ULONG_PTR) (pSrc[2]) & 0x0f)
|| ((ULONG_PTR) (pDst[0]) & 0x0f)
|| ((ULONG_PTR) (pDst[1]) & 0x0f)
|| ((ULONG_PTR) (pDst[2]) & 0x0f)
|| (roi->width & 0x07)
|| (srcStep & 127)
|| (dstStep & 127))
{
/* We can't maintain 16-byte alignment. */
return general_yCbCrToRGB_16s16s_P3P3(pSrc, srcStep,
pDst, dstStep, roi);
}
zero = _mm_setzero_si128();
max = _mm_set1_epi16(255);
y_buf = (__m128i*) (pSrc[0]);
cb_buf = (__m128i*) (pSrc[1]);
cr_buf = (__m128i*) (pSrc[2]);
r_buf = (__m128i*) (pDst[0]);
g_buf = (__m128i*) (pDst[1]);
b_buf = (__m128i*) (pDst[2]);
r_cr = _mm_set1_epi16(22986); /* 1.403 << 14 */
g_cb = _mm_set1_epi16(-5636); /* -0.344 << 14 */
g_cr = _mm_set1_epi16(-11698); /* -0.714 << 14 */
b_cb = _mm_set1_epi16(28999); /* 1.770 << 14 */
c4096 = _mm_set1_epi16(4096);
srcbump = srcStep / sizeof(__m128i);
dstbump = dstStep / sizeof(__m128i);
#ifdef DO_PREFETCH
/* Prefetch Y's, Cb's, and Cr's. */
for (yp=0; yp<roi->height; yp++)
{
int i;
for (i=0; i<roi->width * sizeof(INT16) / sizeof(__m128i);
i += (CACHE_LINE_BYTES / sizeof(__m128i)))
{
_mm_prefetch((char*)(&y_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cb_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cr_buf[i]), _MM_HINT_NTA);
}
y_buf += srcbump;
cb_buf += srcbump;
cr_buf += srcbump;
}
y_buf = (__m128i*) (pSrc[0]);
cb_buf = (__m128i*) (pSrc[1]);
cr_buf = (__m128i*) (pSrc[2]);
#endif /* DO_PREFETCH */
imax = roi->width * sizeof(INT16) / sizeof(__m128i);
for (yp=0; yp<roi->height; ++yp)
{
int i;
for (i=0; i<imax; i++)
{
/* In order to use SSE2 signed 16-bit integer multiplication
* we need to convert the floating point factors to signed int
* without losing information.
* The result of this multiplication is 32 bit and we have two
* SSE instructions that return either the hi or lo word.
* Thus we will multiply the factors by the highest possible 2^n,
* take the upper 16 bits of the signed 32-bit result
* (_mm_mulhi_epi16) and correct this result by multiplying
* it by 2^(16-n).
*
* For the given factors in the conversion matrix the best
* possible n is 14.
*
* Example for calculating r:
* r = (y>>5) + 128 + (cr*1.403)>>5 // our base formula
* r = (y>>5) + 128 + (HIWORD(cr*(1.403<<14)<<2))>>5 // see above
* r = (y+4096)>>5 + (HIWORD(cr*22986)<<2)>>5 // simplification
* r = ((y+4096)>>2 + HIWORD(cr*22986)) >> 3
*/
/* y = (y_r_buf[i] + 4096) >> 2 */
__m128i y, cb, cr, r, g, b;
y = _mm_load_si128(y_buf + i);
y = _mm_add_epi16(y, c4096);
y = _mm_srai_epi16(y, 2);
/* cb = cb_g_buf[i]; */
cb = _mm_load_si128(cb_buf + i);
/* cr = cr_b_buf[i]; */
cr = _mm_load_si128(cr_buf + i);
/* (y + HIWORD(cr*22986)) >> 3 */
r = _mm_add_epi16(y, _mm_mulhi_epi16(cr, r_cr));
r = _mm_srai_epi16(r, 3);
/* r_buf[i] = MINMAX(r, 0, 255); */
_mm_between_epi16(r, zero, max);
_mm_store_si128(r_buf + i, r);
/* (y + HIWORD(cb*-5636) + HIWORD(cr*-11698)) >> 3 */
g = _mm_add_epi16(y, _mm_mulhi_epi16(cb, g_cb));
g = _mm_add_epi16(g, _mm_mulhi_epi16(cr, g_cr));
g = _mm_srai_epi16(g, 3);
/* g_buf[i] = MINMAX(g, 0, 255); */
_mm_between_epi16(g, zero, max);
_mm_store_si128(g_buf + i, g);
/* (y + HIWORD(cb*28999)) >> 3 */
b = _mm_add_epi16(y, _mm_mulhi_epi16(cb, b_cb));
b = _mm_srai_epi16(b, 3);
/* b_buf[i] = MINMAX(b, 0, 255); */
_mm_between_epi16(b, zero, max);
_mm_store_si128(b_buf + i, b);
}
y_buf += srcbump;
cb_buf += srcbump;
cr_buf += srcbump;
r_buf += dstbump;
g_buf += dstbump;
b_buf += dstbump;
}
return PRIMITIVES_SUCCESS;
}
/*---------------------------------------------------------------------------*/
/* The encodec YCbCr coeffectients are represented as 11.5 fixed-point
* numbers. See the general code above.
*/
PRIM_STATIC pstatus_t sse2_RGBToYCbCr_16s16s_P3P3(
const INT16 *pSrc[3],
int srcStep,
INT16 *pDst[3],
int dstStep,
const prim_size_t *roi) /* region of interest */
{
__m128i min, max, y_r, y_g, y_b, cb_r, cb_g, cb_b, cr_r, cr_g, cr_b;
__m128i *r_buf, *g_buf, *b_buf, *y_buf, *cb_buf, *cr_buf;
int srcbump, dstbump, yp, imax;
if (((ULONG_PTR) (pSrc[0]) & 0x0f)
|| ((ULONG_PTR) (pSrc[1]) & 0x0f)
|| ((ULONG_PTR) (pSrc[2]) & 0x0f)
|| ((ULONG_PTR) (pDst[0]) & 0x0f)
|| ((ULONG_PTR) (pDst[1]) & 0x0f)
|| ((ULONG_PTR) (pDst[2]) & 0x0f)
|| (roi->width & 0x07)
|| (srcStep & 127)
|| (dstStep & 127))
{
/* We can't maintain 16-byte alignment. */
return general_RGBToYCbCr_16s16s_P3P3(pSrc, srcStep,
pDst, dstStep, roi);
}
min = _mm_set1_epi16(-128 << 5);
max = _mm_set1_epi16(127 << 5);
r_buf = (__m128i*) (pSrc[0]);
g_buf = (__m128i*) (pSrc[1]);
b_buf = (__m128i*) (pSrc[2]);
y_buf = (__m128i*) (pDst[0]);
cb_buf = (__m128i*) (pDst[1]);
cr_buf = (__m128i*) (pDst[2]);
y_r = _mm_set1_epi16(9798); /* 0.299000 << 15 */
y_g = _mm_set1_epi16(19235); /* 0.587000 << 15 */
y_b = _mm_set1_epi16(3735); /* 0.114000 << 15 */
cb_r = _mm_set1_epi16(-5535); /* -0.168935 << 15 */
cb_g = _mm_set1_epi16(-10868); /* -0.331665 << 15 */
cb_b = _mm_set1_epi16(16403); /* 0.500590 << 15 */
cr_r = _mm_set1_epi16(16377); /* 0.499813 << 15 */
cr_g = _mm_set1_epi16(-13714); /* -0.418531 << 15 */
cr_b = _mm_set1_epi16(-2663); /* -0.081282 << 15 */
srcbump = srcStep / sizeof(__m128i);
dstbump = dstStep / sizeof(__m128i);
#ifdef DO_PREFETCH
/* Prefetch RGB's. */
for (yp=0; yp<roi->height; yp++)
{
int i;
for (i=0; i<roi->width * sizeof(INT16) / sizeof(__m128i);
i += (CACHE_LINE_BYTES / sizeof(__m128i)))
{
_mm_prefetch((char*)(&r_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&g_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&b_buf[i]), _MM_HINT_NTA);
}
r_buf += srcbump;
g_buf += srcbump;
b_buf += srcbump;
}
r_buf = (__m128i*) (pSrc[0]);
g_buf = (__m128i*) (pSrc[1]);
b_buf = (__m128i*) (pSrc[2]);
#endif /* DO_PREFETCH */
imax = roi->width * sizeof(INT16) / sizeof(__m128i);
for (yp=0; yp<roi->height; ++yp)
{
int i;
for (i=0; i<imax; i++)
{
/* In order to use SSE2 signed 16-bit integer multiplication we
* need to convert the floating point factors to signed int
* without loosing information. The result of this multiplication
* is 32 bit and using SSE2 we get either the product's hi or lo
* word. Thus we will multiply the factors by the highest
* possible 2^n and take the upper 16 bits of the signed 32-bit
* result (_mm_mulhi_epi16). Since the final result needs to
* be scaled by << 5 and also in in order to keep the precision
* within the upper 16 bits we will also have to scale the RGB
* values used in the multiplication by << 5+(16-n).
*/
__m128i r, g, b, y, cb, cr;
r = _mm_load_si128(y_buf+i);
g = _mm_load_si128(g_buf+i);
b = _mm_load_si128(b_buf+i);
/* r<<6; g<<6; b<<6 */
r = _mm_slli_epi16(r, 6);
g = _mm_slli_epi16(g, 6);
b = _mm_slli_epi16(b, 6);
/* y = HIWORD(r*y_r) + HIWORD(g*y_g) + HIWORD(b*y_b) + min */
y = _mm_mulhi_epi16(r, y_r);
y = _mm_add_epi16(y, _mm_mulhi_epi16(g, y_g));
y = _mm_add_epi16(y, _mm_mulhi_epi16(b, y_b));
y = _mm_add_epi16(y, min);
/* y_r_buf[i] = MINMAX(y, 0, (255 << 5)) - (128 << 5); */
_mm_between_epi16(y, min, max);
_mm_store_si128(y_buf+i, y);
/* cb = HIWORD(r*cb_r) + HIWORD(g*cb_g) + HIWORD(b*cb_b) */
cb = _mm_mulhi_epi16(r, cb_r);
cb = _mm_add_epi16(cb, _mm_mulhi_epi16(g, cb_g));
cb = _mm_add_epi16(cb, _mm_mulhi_epi16(b, cb_b));
/* cb_g_buf[i] = MINMAX(cb, (-128 << 5), (127 << 5)); */
_mm_between_epi16(cb, min, max);
_mm_store_si128(cb_buf+i, cb);
/* cr = HIWORD(r*cr_r) + HIWORD(g*cr_g) + HIWORD(b*cr_b) */
cr = _mm_mulhi_epi16(r, cr_r);
cr = _mm_add_epi16(cr, _mm_mulhi_epi16(g, cr_g));
cr = _mm_add_epi16(cr, _mm_mulhi_epi16(b, cr_b));
/* cr_b_buf[i] = MINMAX(cr, (-128 << 5), (127 << 5)); */
_mm_between_epi16(cr, min, max);
_mm_store_si128(cr_buf+i, cr);
}
y_buf += srcbump;
cb_buf += srcbump;
cr_buf += srcbump;
r_buf += dstbump;
g_buf += dstbump;
b_buf += dstbump;
}
return PRIMITIVES_SUCCESS;
}
/*---------------------------------------------------------------------------*/
#define LOAD128(_src_) \
_mm_load_si128((__m128i *) _src_)
#define STORE128(_dst_, _src_) \
_mm_store_si128((__m128i *) _dst_, _src_)
#define PUNPCKLBW(_dst_, _src_) \
_dst_ = _mm_unpacklo_epi8(_src_, _dst_)
#define PUNPCKHBW(_dst_, _src_) \
_dst_ = _mm_unpackhi_epi8(_src_, _dst_)
#define PUNPCKLWD(_dst_, _src_) \
_dst_ = _mm_unpacklo_epi16(_src_, _dst_)
#define PUNPCKHWD(_dst_, _src_) \
_dst_ = _mm_unpackhi_epi16(_src_, _dst_)
#define PACKUSWB(_dst_, _src_) \
_dst_ = _mm_packus_epi16(_dst_, _src_)
#define PREFETCH(_ptr_) \
_mm_prefetch((const void *) _ptr_, _MM_HINT_T0)
#define XMM_ALL_ONES \
_mm_set1_epi32(0xFFFFFFFFU)
PRIM_STATIC pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
const INT16 *pSrc[3], /* 16-bit R,G, and B arrays */
INT32 srcStep, /* bytes between rows in source data */
BYTE *pDst, /* 32-bit interleaved ARGB (ABGR?) data */
INT32 dstStep, /* bytes between rows in dest data */
const prim_size_t *roi) /* region of interest */
{
const UINT16 *r = (const UINT16 *) (pSrc[0]);
const UINT16 *g = (const UINT16 *) (pSrc[1]);
const UINT16 *b = (const UINT16 *) (pSrc[2]);
BYTE *out;
int srcbump, dstbump, y;
/* Ensure 16-byte alignment on all pointers,
* that width is a multiple of 8,
* and that the next row will also remain aligned.
* Since this is usually used for 64x64 aligned arrays,
* these checks should presumably pass.
*/
if ((((ULONG_PTR) (pSrc[0]) & 0x0f) != 0)
|| (((ULONG_PTR) (pSrc[1]) & 0x0f) != 0)
|| (((ULONG_PTR) (pSrc[2]) & 0x0f) != 0)
|| (((ULONG_PTR) pDst & 0x0f) != 0)
|| (roi->width & 0x0f)
|| (srcStep & 0x0f)
|| (dstStep & 0x0f))
{
return general_RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, roi);
}
out = (BYTE *) pDst;
srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
dstbump = (dstStep - (roi->width * sizeof(UINT32)));
for (y=0; y<roi->height; ++y)
{
int width = roi->width;
do {
__m128i R0, R1, R2, R3, R4;
/* The comments below pretend these are 8-byte registers
* rather than 16-byte, for readability.
*/
R0 = LOAD128(b); b += 8; /* R0 = 00B300B200B100B0 */
R1 = LOAD128(b); b += 8; /* R1 = 00B700B600B500B4 */
PACKUSWB(R0,R1); /* R0 = B7B6B5B4B3B2B1B0 */
R1 = LOAD128(g); g += 8; /* R1 = 00G300G200G100G0 */
R2 = LOAD128(g); g += 8; /* R2 = 00G700G600G500G4 */
PACKUSWB(R1,R2); /* R1 = G7G6G5G4G3G2G1G0 */
R2 = R1; /* R2 = G7G6G5G4G3G2G1G0 */
PUNPCKLBW(R2,R0); /* R2 = G3B3G2B2G1B1G0B0 */
PUNPCKHBW(R1,R0); /* R1 = G7B7G6B7G5B5G4B4 */
R0 = LOAD128(r); r += 8; /* R0 = 00R300R200R100R0 */
R3 = LOAD128(r); r += 8; /* R3 = 00R700R600R500R4 */
PACKUSWB(R0,R3); /* R0 = R7R6R5R4R3R2R1R0 */
R3 = XMM_ALL_ONES; /* R3 = FFFFFFFFFFFFFFFF */
R4 = R3; /* R4 = FFFFFFFFFFFFFFFF */
PUNPCKLBW(R4,R0); /* R4 = FFR3FFR2FFR1FFR0 */
PUNPCKHBW(R3,R0); /* R3 = FFR7FFR6FFR5FFR4 */
R0 = R4; /* R0 = R4 */
PUNPCKLWD(R0,R2); /* R0 = FFR1G1B1FFR0G0B0 */
PUNPCKHWD(R4,R2); /* R4 = FFR3G3B3FFR2G2B2 */
R2 = R3; /* R2 = R3 */
PUNPCKLWD(R2,R1); /* R2 = FFR5G5B5FFR4G4B4 */
PUNPCKHWD(R3,R1); /* R3 = FFR7G7B7FFR6G6B6 */
STORE128(out, R0); out += 16; /* FFR1G1B1FFR0G0B0 */
STORE128(out, R4); out += 16; /* FFR3G3B3FFR2G2B2 */
STORE128(out, R2); out += 16; /* FFR5G5B5FFR4G4B4 */
STORE128(out, R3); out += 16; /* FFR7G7B7FFR6G6B6 */
} while (width -= 16);
/* Jump to next row. */
r += srcbump;
g += srcbump;
b += srcbump;
out += dstbump;
}
return PRIMITIVES_SUCCESS;
}
#endif /* WITH_SSE2 */
/*---------------------------------------------------------------------------*/
#ifdef WITH_NEON
PRIM_STATIC pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
const INT16 *pSrc[3],
int srcStep,
INT16 *pDst[3],
int dstStep,
const prim_size_t *roi) /* region of interest */
{
/* TODO: If necessary, check alignments and call the general version. */
int16x8_t zero = vdupq_n_s16(0);
int16x8_t max = vdupq_n_s16(255);
int16x8_t y_add = vdupq_n_s16(128);
int16x8_t* y_buf = (int16x8_t*) pSrc[0];
int16x8_t* cb_buf = (int16x8_t*) pSrc[1];
int16x8_t* cr_buf = (int16x8_t*) pSrc[2];
int16x8_t* r_buf = (int16x8_t*) pDst[0];
int16x8_t* g_buf = (int16x8_t*) pDst[1];
int16x8_t* b_buf = (int16x8_t*) pDst[2];
int srcbump = srcStep / sizeof(int16x8_t);
int dstbump = dstStep / sizeof(int16x8_t);
int yp;
int imax = roi->width * sizeof(INT16) / sizeof(int16x8_t);
for (yp=0; yp<roi->height; ++yp)
{
int i;
for (i=0; i<imax; i++)
{
int16x8_t y = vld1q_s16((INT16*) (y_buf+i));
y = vaddq_s16(y, y_add);
int16x8_t cr = vld1q_s16((INT16*) (cr_buf+i));
/* r = between((y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5)),
* 0, 255);
*/
int16x8_t r = vaddq_s16(y, cr);
r = vaddq_s16(r, vshrq_n_s16(cr, 2));
r = vaddq_s16(r, vshrq_n_s16(cr, 3));
r = vaddq_s16(r, vshrq_n_s16(cr, 5));
r = vminq_s16(vmaxq_s16(r, zero), max);
vst1q_s16((INT16*) (r_buf+i), r);
/* cb = cb_g_buf[i]; */
int16x8_t cb = vld1q_s16((INT16*) (cb_buf+i));
/* g = between(y - (cb >> 2) - (cb >> 4) - (cb >> 5) - (cr >> 1)
* - (cr >> 3) - (cr >> 4) - (cr >> 5), 0, 255);
*/
int16x8_t g = vsubq_s16(y, vshrq_n_s16(cb, 2));
g = vsubq_s16(g, vshrq_n_s16(cb, 4));
g = vsubq_s16(g, vshrq_n_s16(cb, 5));
g = vsubq_s16(g, vshrq_n_s16(cr, 1));
g = vsubq_s16(g, vshrq_n_s16(cr, 3));
g = vsubq_s16(g, vshrq_n_s16(cr, 4));
g = vsubq_s16(g, vshrq_n_s16(cr, 5));
g = vminq_s16(vmaxq_s16(g, zero), max);
vst1q_s16((INT16*) (g_buf+i), g);
/* b = between((y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6)),
* 0, 255);
*/
int16x8_t b = vaddq_s16(y, cb);
b = vaddq_s16(b, vshrq_n_s16(cb, 1));
b = vaddq_s16(b, vshrq_n_s16(cb, 2));
b = vaddq_s16(b, vshrq_n_s16(cb, 6));
b = vminq_s16(vmaxq_s16(b, zero), max);
vst1q_s16((INT16*) (b_buf+i), b);
}
y_buf += srcbump;
cb_buf += srcbump;
cr_buf += srcbump;
r_buf += dstbump;
g_buf += dstbump;
b_buf += dstbump;
}
}
#endif /* WITH_NEON */
/* I don't see a direct IPP version of this, since the input is INT16
* YCbCr. It may be possible via Deinterleave and then YCbCrToRGB_<mod>.
* But that would likely be slower.
*/
/* ------------------------------------------------------------------------- */
void primitives_init_colors(
const primitives_hints_t *hints,
primitives_t *prims)
{
prims->RGBToRGB_16s8u_P3AC4R = general_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3;
prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3;
#if defined(WITH_SSE2)
if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
{
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
}
#elif defined(WITH_NEON)
if (hints->arm_flags & PRIM_ARM_NEON_AVAILABLE)
{
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
}
#endif /* WITH_SSE2 */
}
/* ------------------------------------------------------------------------- */
void primitives_deinit_colors(
primitives_t *prims)
{
/* Nothing to do. */
}

View File

@@ -0,0 +1,180 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Copy operations.
* vi:ts=4 sw=4:
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#ifdef WITH_IPP
# include <ipps.h>
# include <ippi.h>
#endif /* WITH_IPP */
#include "prim_internal.h"
/* ------------------------------------------------------------------------- */
/*static inline BOOL memory_regions_overlap_1d(*/
static BOOL memory_regions_overlap_1d(
const BYTE *p1,
const BYTE *p2,
size_t bytes)
{
const ULONG_PTR p1m = (const ULONG_PTR) p1;
const ULONG_PTR p2m = (const ULONG_PTR) p2;
if (p1m <= p2m)
{
if (p1m + bytes > p2m) return TRUE;
}
else
{
if (p2m + bytes > p1m) return TRUE;
}
/* else */
return FALSE;
}
/* ------------------------------------------------------------------------- */
/*static inline BOOL memory_regions_overlap_2d( */
static BOOL memory_regions_overlap_2d(
const BYTE *p1, int p1Step, int p1Size,
const BYTE *p2, int p2Step, int p2Size,
int width, int height)
{
ULONG_PTR p1m = (ULONG_PTR) p1;
ULONG_PTR p2m = (ULONG_PTR) p2;
if (p1m <= p2m)
{
ULONG_PTR p1mEnd = p1m + (height-1)*p1Step + width*p1Size;
if (p1mEnd > p2m) return TRUE;
}
else
{
ULONG_PTR p2mEnd = p2m + (height-1)*p2Step + width*p2Size;
if (p2mEnd > p1m) return TRUE;
}
/* else */
return FALSE;
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_copy_8u(
const BYTE *pSrc,
BYTE *pDst,
INT32 len)
{
if (memory_regions_overlap_1d(pSrc, pDst, (size_t) len))
{
memmove((void *) pDst, (const void *) pSrc, (size_t) len);
}
else
{
memcpy((void *) pDst, (const void *) pSrc, (size_t) len);
}
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
/* Copy a block of pixels from one buffer to another.
* The addresses are assumed to have been already offset to the upper-left
* corners of the source and destination region of interest.
*/
PRIM_STATIC pstatus_t general_copy_8u_AC4r(
const BYTE *pSrc, INT32 srcStep,
BYTE *pDst, INT32 dstStep,
INT32 width, INT32 height)
{
primitives_t *prims = primitives_get();
const BYTE *src = (const BYTE *) pSrc;
BYTE *dst = (BYTE *) pDst;
int rowbytes = width * sizeof(UINT32);
if ((width == 0) || (height == 0)) return PRIMITIVES_SUCCESS;
if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32),
pDst, dstStep, sizeof(UINT32), width, height))
{
do {
prims->copy(src, dst, rowbytes);
src += srcStep;
dst += dstStep;
} while (--height);
}
else
{
/* TODO: do it in one operation when the rowdata is adjacent. */
do {
/* If we find a replacement for memcpy that is consistently
* faster, this could be replaced with that.
*/
memcpy(dst, src, rowbytes);
src += srcStep;
dst += dstStep;
} while (--height);
}
return PRIMITIVES_SUCCESS;
}
#ifdef WITH_IPP
/* ------------------------------------------------------------------------- */
/* This is just ippiCopy_8u_AC4R without the IppiSize structure parameter. */
static pstatus_t ippiCopy_8u_AC4r(
const BYTE *pSrc, INT32 srcStep,
BYTE *pDst, INT32 dstStep,
INT32 width, INT32 height)
{
IppiSize roi;
roi.width = width;
roi.height = height;
return (pstatus_t) ippiCopy_8u_AC4R(pSrc, srcStep, pDst, dstStep, roi);
}
#endif /* WITH_IPP */
/* ------------------------------------------------------------------------- */
void primitives_init_copy(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->copy_8u = general_copy_8u;
prims->copy_8u_AC4r = general_copy_8u_AC4r;
/* Pick tuned versions if possible. */
#ifdef WITH_IPP
prims->copy_8u = (__copy_8u_t) ippsCopy_8u;
prims->copy_8u_AC4r = (__copy_8u_AC4r_t) ippiCopy_8u_AC4r;
#endif
/* Performance with an SSE2 version with no prefetch seemed to be
* all over the map vs. memcpy.
* Sometimes it was significantly faster, sometimes dreadfully slower,
* and it seemed to vary a lot depending on block size and processor.
* Hence, no SSE version is used here unless once can be written that
* is consistently faster than memcpy.
*/
/* This is just an alias with void* parameters */
prims->copy = (__copy_t) (prims->copy_8u);
}
/* ------------------------------------------------------------------------- */
void primitives_deinit_copy(
primitives_t *prims)
{
/* Nothing to do. */
}

View File

@@ -0,0 +1,105 @@
/* prim_internal.h
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License. Algorithms used by
* this code may be covered by patents by HP, Microsoft, or other parties.
*
*/
#ifdef __GNUC__
# pragma once
#endif
#ifndef __PRIM_INTERNAL_H_INCLUDED__
#define __PRIM_INTERNAL_H_INCLUDED__
#ifndef CMAKE_BUILD_TYPE
#define CMAKE_BUILD_TYPE Release
#endif
#include <freerdp/primitives.h>
/* Normally the internal entrypoints should be static, but a benchmark
* program may want to access them directly and turn this off.
*/
#ifndef PRIM_STATIC
# define PRIM_STATIC static
#else
# undef PRIM_STATIC
# define PRIM_STATIC
#endif /* !PRIM_STATIC */
/* Use lddqu for unaligned; load for 16-byte aligned. */
#define LOAD_SI128(_ptr_) \
(((ULONG_PTR) (_ptr_) & 0x0f) \
? _mm_lddqu_si128((__m128i *) (_ptr_)) \
: _mm_load_si128((__m128i *) (_ptr_)))
/* This structure can (eventually) be used to provide hints to the
* initialization routines, e.g. whether SSE2 or NEON or IPP instructions
* or calls are available.
*/
typedef struct
{
UINT32 x86_flags;
UINT32 arm_flags;
} primitives_hints_t;
/* Function prototypes for all the init/deinit routines. */
extern void primitives_init_copy(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_copy(
primitives_t *prims);
extern void primitives_init_set(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_set(
primitives_t *prims);
extern void primitives_init_add(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_add(
primitives_t *prims);
extern void primitives_init_andor(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_andor(
primitives_t *prims);
extern void primitives_init_shift(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_shift(
primitives_t *prims);
extern void primitives_init_sign(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_sign(
primitives_t *prims);
extern void primitives_init_alphaComp(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_alphaComp(
primitives_t *prims);
extern void primitives_init_colors(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_colors(
primitives_t *prims);
#endif /* !__PRIM_INTERNAL_H_INCLUDED__ */

View File

@@ -0,0 +1,312 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Routines to set a chunk of memory to a constant.
* vi:ts=4 sw=4:
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#ifdef WITH_SSE2
# include <emmintrin.h>
#endif /* WITH_SSE2 */
#ifdef WITH_IPP
# include <ipps.h>
#endif /* WITH_IPP */
#include "prim_internal.h"
/* ========================================================================= */
PRIM_STATIC pstatus_t general_set_8u(
BYTE val,
BYTE *pDst,
INT32 len)
{
memset((void *) pDst, (int) val, (size_t) len);
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_zero(
void *pDst,
size_t len)
{
memset(pDst, 0, len);
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
#ifdef WITH_SSE2
# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
PRIM_STATIC pstatus_t sse2_set_8u(
BYTE val,
BYTE *pDst,
INT32 len)
{
BYTE byte, *dptr;
__m128i xmm0;
size_t count;
if (len < 16) return general_set_8u(val, pDst, len);
byte = val;
dptr = (BYTE *) pDst;
/* Seek 16-byte alignment. */
while ((ULONG_PTR) dptr & 0x0f)
{
*dptr++ = byte;
if (--len == 0) return PRIMITIVES_SUCCESS;
}
xmm0 = _mm_set1_epi8(byte);
/* Cover 256-byte chunks via SSE register stores. */
count = len >> 8;
len -= count << 8;
/* Do 256-byte chunks using one XMM register. */
while (count--)
{
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
}
/* Cover 16-byte chunks via SSE register stores. */
count = len >> 4;
len -= count << 4;
/* Do 16-byte chunks using one XMM register. */
while (count--)
{
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 16;
}
/* Do leftover bytes. */
while (len--) *dptr++ = byte;
return PRIMITIVES_SUCCESS;
}
# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */
#endif /* WITH_SSE2 */
/* ========================================================================= */
PRIM_STATIC pstatus_t general_set_32s(
INT32 val,
INT32 *pDst,
INT32 len)
{
INT32 *dptr = (INT32 *) pDst;
size_t span, remaining;
primitives_t *prims;
if (len < 256)
{
while (len--) *dptr++ = val;
return PRIMITIVES_SUCCESS;
}
/* else quadratic growth memcpy algorithm */
span = 1;
*dptr = val;
remaining = len - 1;
prims = primitives_get();
while (remaining)
{
size_t thiswidth = span;
if (thiswidth > remaining) thiswidth = remaining;
prims->copy_8u((BYTE *) dptr, (BYTE *) (dptr + span), thiswidth<<2);
remaining -= thiswidth;
span <<= 1;
}
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_set_32u(
UINT32 val,
UINT32 *pDst,
INT32 len)
{
UINT32 *dptr = (UINT32 *) pDst;
size_t span, remaining;
primitives_t *prims;
if (len < 256)
{
while (len--) *dptr++ = val;
return PRIMITIVES_SUCCESS;
}
/* else quadratic growth memcpy algorithm */
span = 1;
*dptr = val;
remaining = len - 1;
prims = primitives_get();
while (remaining)
{
size_t thiswidth = span;
if (thiswidth > remaining) thiswidth = remaining;
prims->copy_8u((BYTE *) dptr, (BYTE *) (dptr + span), thiswidth<<2);
remaining -= thiswidth;
span <<= 1;
}
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
#ifdef WITH_SSE2
# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
PRIM_STATIC pstatus_t sse2_set_32u(
UINT32 val,
UINT32 *pDst,
INT32 len)
{
UINT32 *dptr = (UINT32 *) pDst;
__m128i xmm0;
size_t count;
/* If really short, just do it here. */
if (len < 32)
{
while (len--) *dptr++ = val;
return PRIMITIVES_SUCCESS;
}
/* Assure we can reach 16-byte alignment. */
if (((ULONG_PTR) dptr & 0x03) != 0)
{
return general_set_32u(val, pDst, len);
}
/* Seek 16-byte alignment. */
while ((ULONG_PTR) dptr & 0x0f)
{
*dptr++ = val;
if (--len == 0) return PRIMITIVES_SUCCESS;
}
xmm0 = _mm_set1_epi32(val);
/* Cover 256-byte chunks via SSE register stores. */
count = len >> 6;
len -= count << 6;
/* Do 256-byte chunks using one XMM register. */
while (count--)
{
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
}
/* Cover 16-byte chunks via SSE register stores. */
count = len >> 2;
len -= count << 2;
/* Do 16-byte chunks using one XMM register. */
while (count--)
{
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 4;
}
/* Do leftover bytes. */
while (len--) *dptr++ = val;
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t sse2_set_32s(
INT32 val,
INT32 *pDst,
INT32 len)
{
UINT32 uval = *((UINT32 *) &val);
return sse2_set_32u(uval, (UINT32 *) pDst, len);
}
# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */
#endif /* WITH_SSE2 */
#ifdef WITH_IPP
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t ipp_wrapper_set_32u(
UINT32 val,
UINT32 *pDst,
INT32 len)
{
/* A little type conversion, then use the signed version. */
INT32 sval = *((INT32 *) &val);
return ippsSet_32s(sval, (INT32 *) pDst, len);
}
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_set(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->set_8u = general_set_8u;
prims->set_32s = general_set_32s;
prims->set_32u = general_set_32u;
prims->zero = general_zero;
/* Pick tuned versions if possible. */
#ifdef WITH_IPP
prims->set_8u = (__set_8u_t) ippsSet_8u;
prims->set_32s = (__set_32s_t) ippsSet_32s;
prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u;
prims->zero = (__zero_t) ippsZero_8u;
#elif defined(WITH_SSE2)
if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
{
prims->set_8u = sse2_set_8u;
prims->set_32s = sse2_set_32s;
prims->set_32u = sse2_set_32u;
}
#endif
}
/* ------------------------------------------------------------------------- */
void primitives_deinit_set(
primitives_t *prims)
{
/* Nothing to do. */
}

View File

@@ -0,0 +1,172 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Shift operations.
* vi:ts=4 sw=4:
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
#include <pmmintrin.h>
#endif /* WITH_SSE2 */
#ifdef WITH_IPP
#include <ipps.h>
#endif /* WITH_IPP */
#include "prim_internal.h"
#include "prim_templates.h"
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_lShiftC_16s(
const INT16 *pSrc,
INT32 val,
INT16 *pDst,
INT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
while (len--) *pDst++ = *pSrc++ << val;
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_rShiftC_16s(
const INT16 *pSrc,
INT32 val,
INT16 *pDst,
INT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
while (len--) *pDst++ = *pSrc++ >> val;
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_lShiftC_16u(
const UINT16 *pSrc,
INT32 val,
UINT16 *pDst,
INT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
while (len--) *pDst++ = *pSrc++ << val;
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_rShiftC_16u(
const UINT16 *pSrc,
INT32 val,
UINT16 *pDst,
INT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
while (len--) *pDst++ = *pSrc++ >> val;
return PRIMITIVES_SUCCESS;
}
#ifdef WITH_SSE2
# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
/* ------------------------------------------------------------------------- */
SSE3_SCD_ROUTINE(sse2_lShiftC_16s, INT16, general_lShiftC_16s,
_mm_slli_epi16, *dptr++ = *sptr++ << val)
/* ------------------------------------------------------------------------- */
SSE3_SCD_ROUTINE(sse2_rShiftC_16s, INT16, general_rShiftC_16s,
_mm_srai_epi16, *dptr++ = *sptr++ >> val)
/* ------------------------------------------------------------------------- */
SSE3_SCD_ROUTINE(sse2_lShiftC_16u, UINT16, general_lShiftC_16u,
_mm_slli_epi16, *dptr++ = *sptr++ << val)
/* ------------------------------------------------------------------------- */
SSE3_SCD_ROUTINE(sse2_rShiftC_16u, UINT16, general_rShiftC_16u,
_mm_srli_epi16, *dptr++ = *sptr++ >> val)
# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */
#endif
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_shiftC_16s(
const INT16 *pSrc,
INT32 val,
INT16 *pDst,
INT32 len)
{
primitives_t *prims;
if (val == 0) return PRIMITIVES_SUCCESS;
prims = primitives_get();
if (val < 0) return prims->rShiftC_16s(pSrc, -val, pDst, len);
else return prims->lShiftC_16s(pSrc, val, pDst, len);
}
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t general_shiftC_16u(
const UINT16 *pSrc,
INT32 val,
UINT16 *pDst,
INT32 len)
{
primitives_t *prims;
if (val == 0) return PRIMITIVES_SUCCESS;
prims = primitives_get();
if (val < 0) return prims->rShiftC_16u(pSrc, -val, pDst, len);
else return prims->lShiftC_16u(pSrc, val, pDst, len);
}
/* Note: the IPP version will have to call ippLShiftC_16s or ippRShiftC_16s
* depending on the sign of val. To avoid using the deprecated inplace
* routines, a wrapper can use the src for the dest.
*/
/* ------------------------------------------------------------------------- */
void primitives_init_shift(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->lShiftC_16s = general_lShiftC_16s;
prims->rShiftC_16s = general_rShiftC_16s;
prims->lShiftC_16u = general_lShiftC_16u;
prims->rShiftC_16u = general_rShiftC_16u;
#if defined(WITH_IPP)
prims->lShiftC_16s = (__lShiftC_16s_t) ippsLShiftC_16s;
prims->rShiftC_16s = (__rShiftC_16s_t) ippsRShiftC_16s;
prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u;
prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
{
prims->lShiftC_16s = sse2_lShiftC_16s;
prims->rShiftC_16s = sse2_rShiftC_16s;
prims->lShiftC_16u = sse2_lShiftC_16u;
prims->rShiftC_16u = sse2_rShiftC_16u;
}
#endif
/* Wrappers */
prims->shiftC_16s = general_shiftC_16s;
prims->shiftC_16u = general_shiftC_16u;
}
/* ------------------------------------------------------------------------- */
void primitives_deinit_shift(
primitives_t *prims)
{
/* Nothing to do. */
}

View File

@@ -0,0 +1,176 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Sign operations.
* vi:ts=4 sw=4:
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
#include <tmmintrin.h>
#endif /* WITH_SSE2 */
#include "prim_internal.h"
/* ----------------------------------------------------------------------------
* Set pDst to the sign-value of the 16-bit values in pSrc (-1, 0, or 1).
*/
PRIM_STATIC pstatus_t general_sign_16s(
const INT16 *pSrc,
INT16 *pDst,
INT32 len)
{
while (len--)
{
INT16 src = *pSrc++;
*pDst++ = (src < 0) ? (-1) : ((src > 0) ? 1 : 0);
}
return PRIMITIVES_SUCCESS;
}
#ifdef WITH_SSE2
/* ------------------------------------------------------------------------- */
PRIM_STATIC pstatus_t ssse3_sign_16s(
const INT16 *pSrc,
INT16 *pDst,
INT32 len)
{
const INT16 *sptr = (const INT16 *) pSrc;
INT16 *dptr = (INT16 *) pDst;
size_t count;
if (len < 16)
{
return general_sign_16s(pSrc, pDst, len);
}
/* Check for 16-byte alignment (eventually). */
if ((ULONG_PTR) pDst & 0x01)
{
return general_sign_16s(pSrc, pDst, len);
}
/* Seek 16-byte alignment. */
while ((ULONG_PTR) dptr & 0x0f)
{
INT16 src = *sptr++;
*dptr++ = (src < 0) ? (-1) : ((src > 0) ? 1 : 0);
if (--len == 0) return PRIMITIVES_SUCCESS;
}
/* Do 32-short chunks using 8 XMM registers. */
count = len >> 5; /* / 32 */
len -= count << 5; /* * 32 */
if ((ULONG_PTR) sptr & 0x0f)
{
/* Unaligned */
while (count--)
{
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
xmm0 = _mm_set1_epi16(0x0001U);
xmm1 = _mm_set1_epi16(0x0001U);
xmm2 = _mm_set1_epi16(0x0001U);
xmm3 = _mm_set1_epi16(0x0001U);
xmm4 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
xmm5 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
xmm6 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
xmm7 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
xmm0 = _mm_sign_epi16(xmm0, xmm4);
xmm1 = _mm_sign_epi16(xmm1, xmm5);
xmm2 = _mm_sign_epi16(xmm2, xmm6);
xmm3 = _mm_sign_epi16(xmm3, xmm7);
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 8;
_mm_store_si128((__m128i *) dptr, xmm1); dptr += 8;
_mm_store_si128((__m128i *) dptr, xmm2); dptr += 8;
_mm_store_si128((__m128i *) dptr, xmm3); dptr += 8;
}
}
else
{
/* Aligned */
while (count--)
{
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
xmm0 = _mm_set1_epi16(0x0001U);
xmm1 = _mm_set1_epi16(0x0001U);
xmm2 = _mm_set1_epi16(0x0001U);
xmm3 = _mm_set1_epi16(0x0001U);
xmm4 = _mm_load_si128((__m128i *) sptr); sptr += 8;
xmm5 = _mm_load_si128((__m128i *) sptr); sptr += 8;
xmm6 = _mm_load_si128((__m128i *) sptr); sptr += 8;
xmm7 = _mm_load_si128((__m128i *) sptr); sptr += 8;
xmm0 = _mm_sign_epi16(xmm0, xmm4);
xmm1 = _mm_sign_epi16(xmm1, xmm5);
xmm2 = _mm_sign_epi16(xmm2, xmm6);
xmm3 = _mm_sign_epi16(xmm3, xmm7);
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 8;
_mm_store_si128((__m128i *) dptr, xmm1); dptr += 8;
_mm_store_si128((__m128i *) dptr, xmm2); dptr += 8;
_mm_store_si128((__m128i *) dptr, xmm3); dptr += 8;
}
}
/* Do 8-short chunks using two XMM registers. */
count = len >> 3;
len -= count << 3;
while (count--)
{
__m128i xmm0 = _mm_set1_epi16(0x0001U);
__m128i xmm1 = LOAD_SI128(sptr); sptr += 8;
xmm0 = _mm_sign_epi16(xmm0, xmm1);
_mm_store_si128((__m128i *) dptr, xmm0); dptr += 8;
}
/* Do leftovers. */
while (len--)
{
INT16 src = *sptr++;
*dptr++ = (src < 0) ? -1 : ((src > 0) ? 1 : 0);
}
return PRIMITIVES_SUCCESS;
}
#endif /* WITH_SSE2 */
/* ------------------------------------------------------------------------- */
void primitives_init_sign(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->sign_16s = general_sign_16s;
/* Pick tuned versions if possible. */
/* I didn't spot an IPP version of this. */
#if defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSSE3_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
{
prims->sign_16s = ssse3_sign_16s;
}
#endif
}
/* ------------------------------------------------------------------------- */
void primitives_deinit_sign(
primitives_t *prims)
{
/* Nothing to do. */
}

View File

@@ -0,0 +1,416 @@
/* prim_templates.h
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License. Algorithms used by
* this code may be covered by patents by HP, Microsoft, or other parties.
*/
#ifdef __GNUC__
# pragma once
#endif
#ifndef __PRIM_TEMPLATES_H_INCLUDED__
#define __PRIM_TEMPLATES_H_INCLUDED__
/* These are prototypes for SSE (potentially NEON) routines that do a
* simple SSE operation over an array of data. Since so much of this
* code is shared except for the operation itself, these prototypes are
* used rather than duplicating code. The naming convention depends on
* the parameters: S=Source param; C=Constant; D=Destination.
* All the macros have parameters for a fallback procedure if the data
* is too small and an operation "the slow way" for use at 16-byte edges.
*/
/* SSE3 note: If someone needs to support an SSE2 version of these without
* SSE3 support, an alternative version could be added that merely checks
* that 16-byte alignment on both destination and source(s) can be
* achieved, rather than use LDDQU for unaligned reads.
*/
/* Note: the compiler is good at turning (16/sizeof(_type_)) into a constant.
* It easily can't do that if the value is stored in a variable.
* So don't save it as an intermediate value.
*/
/* ----------------------------------------------------------------------------
* SCD = Source, Constant, Destination
*/
#define SSE3_SCD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
PRIM_STATIC pstatus_t _name_(const _type_ *pSrc, INT32 val, _type_ *pDst, INT32 len) \
{ \
int shifts; \
UINT32 offBeatMask; \
const _type_ *sptr = pSrc; \
_type_ *dptr = pDst; \
size_t count; \
if (len < 16) /* pointless if too small */ \
{ \
return _fallback_(pSrc, val, pDst, len); \
} \
if (sizeof(_type_) == 1) shifts = 1; \
else if (sizeof(_type_) == 2) shifts = 2; \
else if (sizeof(_type_) == 4) shifts = 3; \
else if (sizeof(_type_) == 8) shifts = 4; \
offBeatMask = (1 << (shifts - 1)) - 1; \
if ((ULONG_PTR) pDst & offBeatMask) \
{ \
/* Incrementing the pointer skips over 16-byte boundary. */ \
return _fallback_(pSrc, val, pDst, len); \
} \
/* Get to the 16-byte boundary now. */ \
while ((ULONG_PTR) dptr & 0x0f) \
{ \
_slowWay_; \
if (--len == 0) return PRIMITIVES_SUCCESS; \
} \
/* Use 8 128-bit SSE registers. */ \
count = len >> (8-shifts); \
len -= count << (8-shifts); \
if ((ULONG_PTR) sptr & 0x0f) \
{ \
while (count--) \
{ \
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
xmm0 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm1 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm2 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm3 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm4 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm5 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm6 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm7 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm0 = _op_(xmm0, val); \
xmm1 = _op_(xmm1, val); \
xmm2 = _op_(xmm2, val); \
xmm3 = _op_(xmm3, val); \
xmm4 = _op_(xmm4, val); \
xmm5 = _op_(xmm5, val); \
xmm6 = _op_(xmm6, val); \
xmm7 = _op_(xmm7, val); \
_mm_store_si128((__m128i *) dptr, xmm0); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm1); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm2); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm3); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm4); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm5); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm6); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm7); \
dptr += (16/sizeof(_type_)); \
} \
} \
else \
{ \
while (count--) \
{ \
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
xmm0 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm1 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm2 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm3 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm4 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm5 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm6 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm7 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm0 = _op_(xmm0, val); \
xmm1 = _op_(xmm1, val); \
xmm2 = _op_(xmm2, val); \
xmm3 = _op_(xmm3, val); \
xmm4 = _op_(xmm4, val); \
xmm5 = _op_(xmm5, val); \
xmm6 = _op_(xmm6, val); \
xmm7 = _op_(xmm7, val); \
_mm_store_si128((__m128i *) dptr, xmm0); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm1); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm2); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm3); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm4); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm5); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm6); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm7); \
dptr += (16/sizeof(_type_)); \
} \
} \
/* Use a single 128-bit SSE register. */ \
count = len >> (5-shifts); \
len -= count << (5-shifts); \
while (count--) \
{ \
__m128i xmm0 = LOAD_SI128(sptr); sptr += (16/sizeof(_type_)); \
xmm0 = _op_(xmm0, val); \
_mm_store_si128((__m128i *) dptr, xmm0); \
dptr += (16/sizeof(_type_)); \
} \
/* Finish off the remainder. */ \
while (len--) { _slowWay_; } \
return PRIMITIVES_SUCCESS; \
}
/* ----------------------------------------------------------------------------
* SCD = Source, Constant, Destination
* PRE = preload xmm0 with the constant.
*/
#define SSE3_SCD_PRE_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
PRIM_STATIC pstatus_t _name_(const _type_ *pSrc, _type_ val, _type_ *pDst, INT32 len) \
{ \
int shifts; \
UINT32 offBeatMask; \
const _type_ *sptr = pSrc; \
_type_ *dptr = pDst; \
size_t count; \
__m128i xmm0; \
if (len < 16) /* pointless if too small */ \
{ \
return _fallback_(pSrc, val, pDst, len); \
} \
if (sizeof(_type_) == 1) shifts = 1; \
else if (sizeof(_type_) == 2) shifts = 2; \
else if (sizeof(_type_) == 4) shifts = 3; \
else if (sizeof(_type_) == 8) shifts = 4; \
offBeatMask = (1 << (shifts - 1)) - 1; \
if ((ULONG_PTR) pDst & offBeatMask) \
{ \
/* Incrementing the pointer skips over 16-byte boundary. */ \
return _fallback_(pSrc, val, pDst, len); \
} \
/* Get to the 16-byte boundary now. */ \
while ((ULONG_PTR) dptr & 0x0f) \
{ \
_slowWay_; \
if (--len == 0) return PRIMITIVES_SUCCESS; \
} \
/* Use 4 128-bit SSE registers. */ \
count = len >> (7-shifts); \
len -= count << (7-shifts); \
xmm0 = _mm_set1_epi32(val); \
if ((ULONG_PTR) sptr & 0x0f) \
{ \
while (count--) \
{ \
__m128i xmm1, xmm2, xmm3, xmm4; \
xmm1 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm2 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm3 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm4 = _mm_lddqu_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm1 = _op_(xmm1, xmm0); \
xmm2 = _op_(xmm2, xmm0); \
xmm3 = _op_(xmm3, xmm0); \
xmm4 = _op_(xmm4, xmm0); \
_mm_store_si128((__m128i *) dptr, xmm1); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm2); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm3); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm4); \
dptr += (16/sizeof(_type_)); \
} \
} \
else \
{ \
while (count--) \
{ \
__m128i xmm1, xmm2, xmm3, xmm4; \
xmm1 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm2 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm3 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm4 = _mm_load_si128((__m128i *) sptr); \
sptr += (16/sizeof(_type_)); \
xmm1 = _op_(xmm1, xmm0); \
xmm2 = _op_(xmm2, xmm0); \
xmm3 = _op_(xmm3, xmm0); \
xmm4 = _op_(xmm4, xmm0); \
_mm_store_si128((__m128i *) dptr, xmm1); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm2); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm3); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm4); \
dptr += (16/sizeof(_type_)); \
} \
} \
/* Use a single 128-bit SSE register. */ \
count = len >> (5-shifts); \
len -= count << (5-shifts); \
while (count--) \
{ \
__m128i xmm1 = LOAD_SI128(sptr); sptr += (16/sizeof(_type_)); \
xmm1 = _op_(xmm1, xmm0); \
_mm_store_si128((__m128i *) dptr, xmm1); \
dptr += (16/sizeof(_type_)); \
} \
/* Finish off the remainder. */ \
while (len--) { _slowWay_; } \
return PRIMITIVES_SUCCESS; \
}
/* ----------------------------------------------------------------------------
* SSD = Source1, Source2, Destination
*/
#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
PRIM_STATIC pstatus_t _name_(const _type_ *pSrc1, const _type_ *pSrc2, _type_ *pDst, INT32 len) \
{ \
int shifts; \
UINT32 offBeatMask; \
const _type_ *sptr1 = pSrc1; \
const _type_ *sptr2 = pSrc2; \
_type_ *dptr = pDst; \
size_t count; \
if (len < 16) /* pointless if too small */ \
{ \
return _fallback_(pSrc1, pSrc2, pDst, len); \
} \
if (sizeof(_type_) == 1) shifts = 1; \
else if (sizeof(_type_) == 2) shifts = 2; \
else if (sizeof(_type_) == 4) shifts = 3; \
else if (sizeof(_type_) == 8) shifts = 4; \
offBeatMask = (1 << (shifts - 1)) - 1; \
if ((ULONG_PTR) pDst & offBeatMask) \
{ \
/* Incrementing the pointer skips over 16-byte boundary. */ \
return _fallback_(pSrc1, pSrc2, pDst, len); \
} \
/* Get to the 16-byte boundary now. */ \
while ((ULONG_PTR) dptr & 0x0f) \
{ \
_slowWay_; \
if (--len == 0) return PRIMITIVES_SUCCESS; \
} \
/* Use 4 128-bit SSE registers. */ \
count = len >> (7-shifts); \
len -= count << (7-shifts); \
if (((ULONG_PTR) sptr1 & 0x0f) || ((ULONG_PTR) sptr2 & 0x0f)) \
{ \
/* Unaligned loads */ \
while (count--) \
{ \
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
xmm0 = _mm_lddqu_si128((__m128i *) sptr1); \
sptr1 += (16/sizeof(_type_)); \
xmm1 = _mm_lddqu_si128((__m128i *) sptr1); \
sptr1 += (16/sizeof(_type_)); \
xmm2 = _mm_lddqu_si128((__m128i *) sptr1); \
sptr1 += (16/sizeof(_type_)); \
xmm3 = _mm_lddqu_si128((__m128i *) sptr1); \
sptr1 += (16/sizeof(_type_)); \
xmm4 = _mm_lddqu_si128((__m128i *) sptr2); \
sptr2 += (16/sizeof(_type_)); \
xmm5 = _mm_lddqu_si128((__m128i *) sptr2); \
sptr2 += (16/sizeof(_type_)); \
xmm6 = _mm_lddqu_si128((__m128i *) sptr2); \
sptr2 += (16/sizeof(_type_)); \
xmm7 = _mm_lddqu_si128((__m128i *) sptr2); \
sptr2 += (16/sizeof(_type_)); \
xmm0 = _op_(xmm0, xmm4); \
xmm1 = _op_(xmm1, xmm5); \
xmm2 = _op_(xmm2, xmm6); \
xmm3 = _op_(xmm3, xmm7); \
_mm_store_si128((__m128i *) dptr, xmm0); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm1); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm2); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm3); \
dptr += (16/sizeof(_type_)); \
} \
} \
else \
{ \
/* Aligned loads */ \
while (count--) \
{ \
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
xmm0 = _mm_load_si128((__m128i *) sptr1); \
sptr1 += (16/sizeof(_type_)); \
xmm1 = _mm_load_si128((__m128i *) sptr1); \
sptr1 += (16/sizeof(_type_)); \
xmm2 = _mm_load_si128((__m128i *) sptr1); \
sptr1 += (16/sizeof(_type_)); \
xmm3 = _mm_load_si128((__m128i *) sptr1); \
sptr1 += (16/sizeof(_type_)); \
xmm4 = _mm_load_si128((__m128i *) sptr2); \
sptr2 += (16/sizeof(_type_)); \
xmm5 = _mm_load_si128((__m128i *) sptr2); \
sptr2 += (16/sizeof(_type_)); \
xmm6 = _mm_load_si128((__m128i *) sptr2); \
sptr2 += (16/sizeof(_type_)); \
xmm7 = _mm_load_si128((__m128i *) sptr2); \
sptr2 += (16/sizeof(_type_)); \
xmm0 = _op_(xmm0, xmm4); \
xmm1 = _op_(xmm1, xmm5); \
xmm2 = _op_(xmm2, xmm6); \
xmm3 = _op_(xmm3, xmm7); \
_mm_store_si128((__m128i *) dptr, xmm0); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm1); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm2); \
dptr += (16/sizeof(_type_)); \
_mm_store_si128((__m128i *) dptr, xmm3); \
dptr += (16/sizeof(_type_)); \
} \
} \
/* Use a single 128-bit SSE register. */ \
count = len >> (5-shifts); \
len -= count << (5-shifts); \
while (count--) \
{ \
__m128i xmm0, xmm1; \
xmm0 = LOAD_SI128(sptr1); sptr1 += (16/sizeof(_type_)); \
xmm1 = LOAD_SI128(sptr2); sptr2 += (16/sizeof(_type_)); \
xmm0 = _op_(xmm0, xmm1); \
_mm_store_si128((__m128i *) dptr, xmm0); \
dptr += (16/sizeof(_type_)); \
} \
/* Finish off the remainder. */ \
while (len--) { _slowWay_; } \
return PRIMITIVES_SUCCESS; \
}
#endif /* !__PRIM_TEMPLATES_H_INCLUDED__ */

View File

@@ -0,0 +1,347 @@
/* primitives.c
* This code queries processor features and calls the init/deinit routines.
* vi:ts=4 sw=4
*
* Copyright 2011 Martin Fleisz <mfleisz@thinstuff.com>
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <stdlib.h>
#include <freerdp/primitives.h>
#include "prim_internal.h"
#ifdef ANDROID
#include "cpu-features.h"
#endif
/* Singleton pointer used throughout the program when requested. */
static primitives_t* pPrimitives = NULL;
#define D_BIT_MMX (1<<23)
#define D_BIT_SSE (1<<25)
#define D_BIT_SSE2 (1<<26)
#define D_BIT_3DN (1<<30)
#define C_BIT_SSE3 (1<<0)
#define C_BIT_3DNP (1<<8)
#define C_BIT_SSSE3 (1<<9)
#define C_BIT_SSE41 (1<<19)
#define C_BIT_SSE42 (1<<20)
#define C_BIT_XGETBV (1<<27)
#define C_BIT_AVX (1<<28)
#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX)
#define E_BIT_XMM (1<<1)
#define E_BIT_YMM (1<<2)
#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM)
#define C_BIT_FMA (1<<11)
#define C_BIT_AVX_AES (1<<24)
/* If x86 */
#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) \
|| defined(__amd64__) || defined(_M_AMD64) || defined(_M_X64) \
|| defined(i386) || defined(__i386) || defined(__i386__) \
|| defined(_M_IX86) || defined(_X86_)
#ifndef i386
#define i386
#endif
/* If GCC */
#ifdef __GNUC__
#ifdef __AVX__
#define xgetbv(_func_, _lo_, _hi_) \
__asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_))
#endif
static void cpuid(
unsigned info,
unsigned *eax,
unsigned *ebx,
unsigned *ecx,
unsigned *edx)
{
*eax = *ebx = *ecx = *edx = 0;
__asm volatile
(
/* The EBX (or RBX register on x86_64) is used for the PIC base address
* and must not be corrupted by our inline assembly.
*/
# if defined(__i386__)
"mov %%ebx, %%esi;"
"cpuid;"
"xchg %%ebx, %%esi;"
#else
"mov %%rbx, %%rsi;"
"cpuid;"
"xchg %%rbx, %%rsi;"
#endif
: "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx)
: "0" (info)
);
}
static void set_hints(primitives_hints_t* hints)
{
unsigned a, b, c, d;
cpuid(1, &a, &b, &c, &d);
if (d & D_BIT_MMX)
hints->x86_flags |= PRIM_X86_MMX_AVAILABLE;
if (d & D_BIT_SSE)
hints->x86_flags |= PRIM_X86_SSE_AVAILABLE;
if (d & D_BIT_SSE2)
hints->x86_flags |= PRIM_X86_SSE2_AVAILABLE;
if (d & D_BIT_3DN)
hints->x86_flags |= PRIM_X86_3DNOW_AVAILABLE;
if (c & C_BIT_3DNP)
hints->x86_flags |= PRIM_X86_3DNOW_PREFETCH_AVAILABLE;
if (c & C_BIT_SSE3)
hints->x86_flags |= PRIM_X86_SSE3_AVAILABLE;
if (c & C_BIT_SSSE3)
hints->x86_flags |= PRIM_X86_SSSE3_AVAILABLE;
if (c & C_BIT_SSE41)
hints->x86_flags |= PRIM_X86_SSE41_AVAILABLE;
if (c & C_BIT_SSE42)
hints->x86_flags |= PRIM_X86_SSE42_AVAILABLE;
#ifdef __AVX__
if ((c & C_BITS_AVX) == C_BITS_AVX)
{
int e, f;
xgetbv(0, e, f);
if ((e & E_BITS_AVX) == E_BITS_AVX)
{
hints->x86_flags |= PRIM_X86_AVX_AVAILABLE;
if (c & C_BIT_FMA)
hints->x86_flags |= PRIM_X86_FMA_AVAILABLE;
if (c & C_BIT_AVX_AES)
hints->x86_flags |= PRIM_X86_AVX_AES_AVAILABLE;
}
}
/* TODO: AVX2: set eax=7, ecx=0, cpuid, check ebx-bit5 */
#endif
}
#else
static void set_hints(primitives_hints_t* hints)
{
/* x86 non-GCC: TODO */
}
#endif /* __GNUC__ */
/* ------------------------------------------------------------------------- */
#elif defined(__arm__) || defined(__ARM_ARCH_7A__) \
|| defined(__ARM_EABI__) || defined(__ARMEL__) || defined(ANDROID)
#ifndef __arm__
#define __arm__
#endif
static UINT32 androidNeon(void)
{
#if ANDROID
if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) return 0;
UINT64 features = android_getCpuFeatures();
if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7))
{
if (features & ANDROID_CPU_ARM_FEATURE_NEON)
{
return PRIM_ARM_NEON_AVAILABLE;
}
}
/* else */
#endif
return 0;
}
static void set_hints(
primitives_hints_t *hints)
{
/* ARM: TODO */
hints->arm_flags |= androidNeon();
}
#else
static void set_hints(
primitives_hints_t *hints)
{
}
#endif /* x86 else ARM else */
/* ------------------------------------------------------------------------- */
void primitives_init(void)
{
primitives_hints_t* hints;
if (pPrimitives == NULL)
{
pPrimitives = calloc(1, sizeof(primitives_t));
if (pPrimitives == NULL)
return;
}
hints = calloc(1, sizeof(primitives_hints_t));
set_hints(hints);
pPrimitives->hints = (void *) hints;
/* Now call each section's initialization routine. */
primitives_init_add(hints, pPrimitives);
primitives_init_andor(hints, pPrimitives);
primitives_init_alphaComp(hints, pPrimitives);
primitives_init_copy(hints, pPrimitives);
primitives_init_set(hints, pPrimitives);
primitives_init_shift(hints, pPrimitives);
primitives_init_sign(hints, pPrimitives);
primitives_init_colors(hints, pPrimitives);
}
/* ------------------------------------------------------------------------- */
primitives_t* primitives_get(void)
{
if (pPrimitives == NULL)
primitives_init();
return pPrimitives;
}
/* ------------------------------------------------------------------------- */
UINT32 primitives_get_flags(const primitives_t* prims)
{
primitives_hints_t* hints = (primitives_hints_t*) (prims->hints);
#ifdef i386
return hints->x86_flags;
#elif defined(__arm__)
return hints->arm_flags;
#else
return 0;
#endif
}
/* ------------------------------------------------------------------------- */
typedef struct
{
UINT32 flag;
const char *str;
} flagpair_t;
static const flagpair_t x86_flags[] =
{
{ PRIM_X86_MMX_AVAILABLE, "MMX" },
{ PRIM_X86_3DNOW_AVAILABLE, "3DNow" },
{ PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" },
{ PRIM_X86_SSE_AVAILABLE, "SSE" },
{ PRIM_X86_SSE2_AVAILABLE, "SSE2" },
{ PRIM_X86_SSE3_AVAILABLE, "SSE3" },
{ PRIM_X86_SSSE3_AVAILABLE, "SSSE3" },
{ PRIM_X86_SSE41_AVAILABLE, "SSE4.1" },
{ PRIM_X86_SSE42_AVAILABLE, "SSE4.2" },
{ PRIM_X86_AVX_AVAILABLE, "AVX" },
{ PRIM_X86_FMA_AVAILABLE, "FMA" },
{ PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" },
{ PRIM_X86_AVX2_AVAILABLE, "AVX2" },
};
static const flagpair_t arm_flags[] =
{
{ PRIM_ARM_VFP1_AVAILABLE, "VFP1" },
{ PRIM_ARM_VFP2_AVAILABLE, "VFP2" },
{ PRIM_ARM_VFP3_AVAILABLE, "VFP3" },
{ PRIM_ARM_VFP4_AVAILABLE, "VFP4" },
{ PRIM_ARM_FPA_AVAILABLE, "FPA" },
{ PRIM_ARM_FPE_AVAILABLE, "FPE" },
{ PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" },
{ PRIM_ARM_NEON_AVAILABLE, "NEON" },
};
void primitives_flags_str(const primitives_t* prims, char* str, size_t len)
{
int i;
primitives_hints_t* hints;
*str = '\0';
--len; /* for the '/0' */
hints = (primitives_hints_t*) (prims->hints);
for (i = 0; i < sizeof(x86_flags) / sizeof(flagpair_t); ++i)
{
if (hints->x86_flags & x86_flags[i].flag)
{
int slen = strlen(x86_flags[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, x86_flags[i].str);
len -= slen;
}
}
for (i = 0; i < sizeof(arm_flags) / sizeof(flagpair_t); ++i)
{
if (hints->arm_flags & arm_flags[i].flag)
{
int slen = strlen(arm_flags[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, arm_flags[i].str);
len -= slen;
}
}
}
/* ------------------------------------------------------------------------- */
void primitives_deinit(void)
{
if (pPrimitives == NULL)
return;
/* Call each section's de-initialization routine. */
primitives_deinit_add(pPrimitives);
primitives_deinit_andor(pPrimitives);
primitives_deinit_alphaComp(pPrimitives);
primitives_deinit_copy(pPrimitives);
primitives_deinit_set(pPrimitives);
primitives_deinit_shift(pPrimitives);
primitives_deinit_sign(pPrimitives);
primitives_deinit_colors(pPrimitives);
if (pPrimitives->hints != NULL)
free((void*) (pPrimitives->hints));
free((void*) pPrimitives);
pPrimitives = NULL;
}

2
libfreerdp/primitives/test/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
prim_test

View File

@@ -0,0 +1,140 @@
# FreeRDP: A Remote Desktop Protocol Client
# primitives test makefile builder
# vi:ts=4 sw=4:
#
# (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing permissions
# and limitations under the License.
#
# TODO: Integrate this into the testing framework, in some form.
# Right now this produces a standalone test that covers both functionality
# and performance of the primitives library entrypoints.
cmake_minimum_required(VERSION 2.8)
set(MODULE_NAME "prim_test")
set(MODULE_PREFIX "PRIMITIVES_LIBRARY_TEST")
set(PRIMITIVE_TEST_CFILES
prim_test.c
test_add.c
test_alphaComp.c
test_andor.c
test_colors.c
test_copy.c
test_set.c
test_shift.c
test_sign.c
../prim_add.c
../prim_andor.c
../prim_alphaComp.c
../prim_colors.c
../prim_copy.c
../prim_set.c
../prim_shift.c
../prim_sign.c
../primitives.c
)
set(PRIMITIVE_TEST_HEADERS
measure.h
prim_test.h
../prim_internal.h
)
set(PRIMITIVE_TEST_SRCS
${PRIMITIVE_TEST_CFILES}
${PRIMITIVE_TEST_HEADERS}
)
include_directories(. ../../.. ../../../include ../../../winpr/include)
add_definitions(-DPRIM_STATIC=auto -DALL_PRIMITIVES_VERSIONS -DHAVE_CONFIG_H)
# If these haven't been set by the caller, set them now to defaults.
if(NOT DEFINED WITH_IPP)
set(WITH_IPP FALSE)
endif()
if(NOT DEFINED WITH_SSE2)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm*")
set(WITH_SSE2 FALSE)
else()
set(WITH_SSE2 TRUE)
endif()
endif()
if(NOT DEFINED WITH_NEON)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm*")
set(WITH_NEON TRUE)
else()
set(WITH_NEON FALSE)
endif()
endif()
if(WITH_SSE2)
if(CMAKE_COMPILER_IS_GNUCC)
set(OPTFLAGS "${OPTFLAGS} -msse2 -mssse3 -O2 -Wdeclaration-after-statement")
endif()
if(MSVC)
set(OPTFLAGS "${OPTFLAGS} /arch:SSE2")
endif()
elseif(WITH_NEON)
if(CMAKE_COMPILER_IS_GNUCC)
set(OPTIMZATION "${OPTFLAGS} -mfpu=neon -mfloat-abi=softfp -O2")
endif()
# TODO: Add MSVC equivalent
endif()
add_executable(prim_test ${PRIMITIVE_TEST_SRCS})
if(WITH_IPP)
if(NOT DEFINED IPP_FOUND)
include(../../../cmake/FindIPP.cmake)
endif()
# IPP PATH debugging messages
message(IPP_FOUND=${IPP_FOUND})
message(IPP_VERSION_STR=${IPP_VERSION_STR})
message(IPP_VERSION_MAJOR=${IPP_VERSION_MAJOR})
message(IPP_VERSION_MINOR=${IPP_VERSION_MINOR})
message(IPP_VERSION_BUILD=${IPP_VERSION_BUILD})
message(IPP_ROOT_DIR=${IPP_ROOT_DIR})
message(IPP_INCLUDE_DIRS=${IPP_INCLUDE_DIRS})
message(IPP_LIBRARY_DIRS=${IPP_LIBRARY_DIRS})
message(IPP_LIBRARIES=${IPP_LIBRARIES})
message(IPP_COMPILER_LIBRARY_DIRS=${IPP_COMPILER_LIBRARY_DIRS})
message(IPP_COMPILER_LIBRARIES=${IPP_COMPILER_LIBRARIES})
message(IPP_LIBRARY_LIST=${IPP_LIBRARY_LIST})
message(IPP_LIB_PREFIX=${IPP_LIB_PREFIX})
message(IPP_LIB_SUFFIX=${IPP_LIB_SUFFIX})
message(IPP_PREFIX=${IPP_PREFIX})
message(IPP_SUFFIX=${IPP_SUFFIX})
message(IPPCORE=${IPPCORE})
message(IPPS=${IPPS})
message(IPPI=${IPPI})
message(IPPCC=${IPPCC})
message(IPPCV=${IPPCV})
message(IPPVM=${IPPVM})
if(CMAKE_COMPILER_IS_GNUCC)
foreach(INCLDIR ${IPP_INCLUDE_DIRS})
set(OPTFLAGS "${OPTFLAGS} -I${INCLDIR}")
endforeach(INCLDIR)
endif()
target_link_libraries(prim_test ${IPP_LIBRARY_LIST})
endif()
set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS})
target_link_libraries(prim_test rt)
if(NOT TESTING_OUTPUT_DIRECTORY)
set(TESTING_OUTPUT_DIRECTORY .)
endif()
add_test(prim_test ${TESTING_OUTPUT_DIRECTORY}/prim_test functionality)
set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER "FreeRDP/Test")

View File

@@ -0,0 +1,125 @@
/* measure.h
* Macros to help with performance measurement.
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License. Algorithms used by
* this code may be covered by patents by HP, Microsoft, or other parties.
*
* MEASURE_LOOP_START("measurement", 2000)
* code to be measured
* MEASURE_LOOP_STOP
* buffer flush and such
* MEASURE_SHOW_RESULTS
*
* Define GOOGLE_PROFILER if you want gperftools included.
*/
#ifdef _GNUC_
# pragma once
#endif
#ifndef __MEASURE_H_INCLUDED__
#define __MEASURE_H_INCLUDED__
#include <time.h>
#ifndef _WIN32
#include <sys/param.h>
#endif
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef GOOGLE_PROFILER
#include <gperftools/profiler.h>
#define PROFILER_START(_prefix_) \
do { \
char _path[PATH_MAX]; \
sprintf(_path, "./%s.prof", (_prefix_)); \
ProfilerStart(_path); \
} while (0);
# define PROFILER_STOP \
do { \
ProfilerStop(); \
} while (0);
#else
#define PROFILER_START(_prefix_)
#define PROFILER_STOP
#endif // GOOGLE_PROFILER
extern float _delta_time(const struct timespec *t0, const struct timespec *t1);
extern void _floatprint(float t, char *output);
#ifndef CLOCK_MONOTONIC_RAW
#define CLOCK_MONOTONIC_RAW 4
#endif // !CLOCK_MONOTONIC_RAW
#define MEASURE_LOOP_START(_prefix_, _count_) \
{ struct timespec _start, _stop; \
char *_prefix; \
int _count = (_count_); \
int _loop; \
float _delta; \
char _str1[32], _str2[32]; \
_prefix = strdup(_prefix_); \
_str1[0] = '\0'; _str2[0] = '\0'; \
clock_gettime(CLOCK_MONOTONIC_RAW, &_start); \
PROFILER_START(_prefix); \
_loop = (_count); \
do {
#define MEASURE_LOOP_STOP \
} while (--_loop);
#define MEASURE_GET_RESULTS(_result_) \
PROFILER_STOP; \
clock_gettime(CLOCK_MONOTONIC_RAW, &_stop); \
_delta = _delta_time(&_start, &_stop); \
(_result_) = (float) _count / _delta; \
free(_prefix); \
}
#define MEASURE_SHOW_RESULTS(_result_) \
PROFILER_STOP; \
clock_gettime(CLOCK_MONOTONIC_RAW, &_stop); \
_delta = _delta_time(&_start, &_stop); \
(_result_) = (float) _count / _delta; \
_floatprint((float) _count / _delta, _str1); \
printf("%s: %9d iterations in %5.1f seconds = %s/s \n", \
_prefix, _count, _delta, _str1); \
free(_prefix); \
}
#define MEASURE_SHOW_RESULTS_SCALED(_scale_, _label_) \
PROFILER_STOP; \
clock_gettime(CLOCK_MONOTONIC_RAW, &_stop); \
_delta = _delta_time(&_start, &_stop); \
_floatprint((float) _count / _delta, _str1); \
_floatprint((float) _count / _delta * (_scale_), _str2); \
printf("%s: %9d iterations in %5.1f seconds = %s/s = %s%s \n", \
_prefix, _count, _delta, _str1, _str2, _label_); \
free(_prefix); \
}
#define MEASURE_TIMED(_label_, _init_iter_, _test_time_, _result_, _call_) \
{ float _r; \
MEASURE_LOOP_START(_label_, _init_iter_); \
_call_; \
MEASURE_LOOP_STOP; \
MEASURE_GET_RESULTS(_r); \
MEASURE_LOOP_START(_label_, _r * _test_time_); \
_call_; \
MEASURE_LOOP_STOP; \
MEASURE_SHOW_RESULTS(_result_); \
}
#endif // __MEASURE_H_INCLUDED__

View File

@@ -0,0 +1,432 @@
/* prim_test.c
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "prim_test.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdlib.h>
#include <time.h>
int test_sizes[] = { 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
int Quiet = 0;
/* ------------------------------------------------------------------------- */
static void get_random_data_lrand(
void *buffer,
size_t size)
{
static int seeded = 0;
long int *ptr = (long int *) buffer;
unsigned char *cptr;
if (!seeded)
{
seeded = 1;
srand48(time(NULL));
}
/* This isn't the perfect random number generator, but that's okay. */
while (size >= sizeof(long int))
{
*ptr++ = lrand48();
size -= sizeof(long int);
}
cptr = (unsigned char *) ptr;
while (size > 0)
{
*cptr++ = lrand48() & 0xff;
--size;
}
}
/* ------------------------------------------------------------------------- */
void get_random_data(
void *buffer,
size_t size)
{
#ifdef linux
size_t offset = 0;
int fd = open("/dev/urandom", O_RDONLY);
if (fd < 0)
{
get_random_data_lrand(buffer, size);
return;
}
while (size > 0)
{
ssize_t count = read(fd, buffer+offset, size);
size -= count;
offset += count;
}
close(fd);
#else
get_random_data_lrand(buffer, size);
#endif
}
/* ------------------------------------------------------------------------- */
float _delta_time(
const struct timespec *t0,
const struct timespec *t1)
{
INT64 secs = (INT64) (t1->tv_sec) - (INT64) (t0->tv_sec);
long nsecs = t1->tv_nsec - t0->tv_nsec;
double retval;
if (nsecs < 0)
{
--secs;
nsecs += 1000000000;
}
retval = (double) secs + (double) nsecs / (double) 1000000000.0;
return (retval < 0.0) ? 0.0 : (float) retval;
}
/* ------------------------------------------------------------------------- */
void _floatprint(
float t,
char *output)
{
/* I don't want to link against -lm, so avoid log,exp,... */
float f = 10.0;
int i;
while (t > f) f *= 10.0;
f /= 1000.0;
i = ((int) (t/f+0.5)) * (int) f;
if (t < 0.0) sprintf(output, "%f", t);
else if (i == 0) sprintf(output, "%d", (int) (t+0.5));
else if (t < 1e+3) sprintf(output, "%3d", i);
else if (t < 1e+6) sprintf(output, "%3d,%03d",
i/1000, i % 1000);
else if (t < 1e+9) sprintf(output, "%3d,%03d,000",
i/1000000, (i % 1000000) / 1000);
else if (t < 1e+12) sprintf(output, "%3d,%03d,000,000",
i/1000000000, (i % 1000000000) / 1000000);
else sprintf(output, "%f", t);
}
/* ------------------------------------------------------------------------- */
/* Specific areas to test: */
#define TEST_COPY8 (1<<0)
#define TEST_SET8 (1<<1)
#define TEST_SET32S (1<<2)
#define TEST_SET32U (1<<3)
#define TEST_SIGN16S (1<<4)
#define TEST_ADD16S (1<<5)
#define TEST_LSHIFT16S (1<<6)
#define TEST_LSHIFT16U (1<<7)
#define TEST_RSHIFT16S (1<<8)
#define TEST_RSHIFT16U (1<<9)
#define TEST_RGB (1<<10)
#define TEST_ALPHA (1<<11)
#define TEST_AND (1<<12)
#define TEST_OR (1<<13)
/* Specific types of testing: */
#define TEST_FUNCTIONALITY (1<<0)
#define TEST_PERFORMANCE (1<<1)
/* ------------------------------------------------------------------------- */
typedef struct
{
const char *testStr;
UINT32 bits;
} test_t;
static const test_t testList[] =
{
{ "all", 0xFFFFFFFFU },
{ "copy", TEST_COPY8 },
{ "copy8", TEST_COPY8 },
{ "set", TEST_SET8|TEST_SET32S|TEST_SET32U },
{ "set8", TEST_SET8 },
{ "set32", TEST_SET32S|TEST_SET32U },
{ "set32s", TEST_SET32S },
{ "set32u", TEST_SET32U },
{ "sign", TEST_SIGN16S },
{ "sign16s", TEST_SIGN16S },
{ "add", TEST_ADD16S },
{ "add16s", TEST_ADD16S },
{ "lshift", TEST_LSHIFT16S|TEST_LSHIFT16U },
{ "rshift", TEST_RSHIFT16S|TEST_RSHIFT16U },
{ "shift", TEST_LSHIFT16S|TEST_LSHIFT16U|TEST_RSHIFT16S|TEST_RSHIFT16U },
{ "lshift16s", TEST_LSHIFT16S },
{ "lshift16u", TEST_LSHIFT16U },
{ "rshift16s", TEST_RSHIFT16S },
{ "rshift16u", TEST_RSHIFT16U },
{ "rgb", TEST_RGB },
{ "color", TEST_RGB },
{ "colors", TEST_RGB },
{ "alpha", TEST_ALPHA },
{ "and", TEST_AND },
{ "or", TEST_OR }
};
#define NUMTESTS (sizeof(testList)/sizeof(test_t))
static const test_t testTypeList[] =
{
{ "functionality", TEST_FUNCTIONALITY },
{ "performance", TEST_PERFORMANCE },
};
#define NUMTESTTYPES (sizeof(testTypeList)/sizeof(test_t))
int main(int argc, char** argv)
{
int i;
char hints[256];
UINT32 testSet = 0;
UINT32 testTypes = 0;
int results = SUCCESS;
/* Parse command line for the test set. */
for (i = 1; i < argc; ++i)
{
int j;
BOOL found = 0;
for (j=0; j<NUMTESTS; ++j)
{
if (strcasecmp(argv[i], testList[j].testStr) == 0)
{
testSet |= testList[j].bits;
found = 1;
break;
}
}
for (j=0; j<NUMTESTTYPES; ++j)
{
if (strcasecmp(argv[i], testTypeList[j].testStr) == 0)
{
testTypes |= testTypeList[j].bits;
found = 1;
break;
}
}
if (!found)
{
if (strstr(argv[i], "help") != NULL)
{
printf("Available tests:\n");
for (j=0; j<NUMTESTS; ++j)
{
printf(" %s\n", testList[j].testStr);
}
for (j=0; j<NUMTESTTYPES; ++j)
{
printf(" %s\n", testTypeList[j].testStr);
}
}
else fprintf(stderr, "Unknown parameter '%s'!\n", argv[i]);
}
}
if (testSet == 0)
testSet = 0xffffffff;
if (testTypes == 0)
testTypes = 0xffffffff;
primitives_init();
primitives_flags_str(primitives_get(), hints, sizeof(hints));
printf("Hints: %s\n", hints);
/* COPY */
if (testSet & TEST_COPY8)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_copy8u_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_copy8u_speed();
}
}
/* SET */
if (testSet & TEST_SET8)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_set8u_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_set8u_speed();
}
}
if (testSet & TEST_SET32S)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_set32s_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_set32s_speed();
}
}
if (testSet & TEST_SET32U)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_set32u_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_set32u_speed();
}
}
/* SIGN */
if (testSet & TEST_SIGN16S)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_sign16s_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_sign16s_speed();
}
}
/* ADD */
if (testSet & TEST_ADD16S)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_add16s_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_add16s_speed();
}
}
/* SHIFTS */
if (testSet & TEST_LSHIFT16S)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_lShift_16s_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_lShift_16s_speed();
}
}
if (testSet & TEST_LSHIFT16U)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_lShift_16u_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_lShift_16u_speed();
}
}
if (testSet & TEST_RSHIFT16S)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_rShift_16s_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_rShift_16s_speed();
}
}
if (testSet & TEST_RSHIFT16U)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_rShift_16u_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_rShift_16u_speed();
}
}
/* COLORS */
if (testSet & TEST_RGB)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_RGBToRGB_16s8u_P3AC4R_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_RGBToRGB_16s8u_P3AC4R_speed();
}
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_yCbCrToRGB_16s16s_P3P3_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_yCbCrToRGB_16s16s_P3P3_speed();
}
}
/* ALPHA COMPOSITION */
if (testSet & TEST_ALPHA)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_alphaComp_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_alphaComp_speed();
}
}
/* AND & OR */
if (testSet & TEST_AND)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_and_32u_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_and_32u_speed();
}
}
if (testSet & TEST_OR)
{
if (testTypes & TEST_FUNCTIONALITY)
{
results |= test_or_32u_func();
}
if (testTypes & TEST_PERFORMANCE)
{
results |= test_or_32u_speed();
}
}
primitives_deinit();
return results;
}

View File

@@ -0,0 +1,250 @@
/* primtest.h
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License. Algorithms used by
* this code may be covered by patents by HP, Microsoft, or other parties.
*/
#ifdef __GNUC__
# pragma once
#endif
#ifndef __PRIMTEST_H_INCLUDED__
#define __PRIMTEST_H_INCLUDED__
#include <config.h>
#include <stdint.h>
#include <winpr/wtypes.h>
#include <measure.h>
#include <string.h>
#include <stdio.h>
#include <freerdp/primitives.h>
#ifdef WITH_IPP
#include <ipps.h>
#include <ippi.h>
#endif
#define BLOCK_ALIGNMENT 16
#ifdef __GNUC__
#define ALIGN(x) x __attribute((aligned(BLOCK_ALIGNMENT)))
#define POSSIBLY_UNUSED(x) x __attribute((unused))
#else
/* TODO: Someone needs to finish this for non-GNU C */
#define ALIGN(x) x
#define POSSIBLY_UNUSED(x) x
#endif
#define ABS(_x_) ((_x_) < 0 ? (-(_x_)) : (_x_))
#define MAX_TEST_SIZE 4096
extern int test_sizes[];
#define NUM_TEST_SIZES 10
extern void get_random_data(void *buffer, size_t size);
#ifndef SUCCESS
#define SUCCESS 0
#endif
#ifndef FAILURE
#define FAILURE 1
#endif
extern int test_copy8u_func(void);
extern int test_copy8u_speed(void);
extern int test_set8u_func(void);
extern int test_set32s_func(void);
extern int test_set32u_func(void);
extern int test_set8u_speed(void);
extern int test_set32s_speed(void);
extern int test_set32u_speed(void);
extern int test_sign16s_func(void);
extern int test_sign16s_speed(void);
extern int test_add16s_func(void);
extern int test_add16s_speed(void);
extern int test_lShift_16s_func(void);
extern int test_lShift_16u_func(void);
extern int test_rShift_16s_func(void);
extern int test_rShift_16u_func(void);
extern int test_lShift_16s_speed(void);
extern int test_lShift_16u_speed(void);
extern int test_rShift_16s_speed(void);
extern int test_rShift_16u_speed(void);
extern int test_RGBToRGB_16s8u_P3AC4R_func(void);
extern int test_RGBToRGB_16s8u_P3AC4R_speed(void);
extern int test_yCbCrToRGB_16s16s_P3P3_func(void);
extern int test_yCbCrToRGB_16s16s_P3P3_speed(void);
extern int test_alphaComp_func(void);
extern int test_alphaComp_speed(void);
extern int test_and_32u_func(void);
extern int test_and_32u_speed(void);
extern int test_or_32u_func(void);
extern int test_or_32u_speed(void);
/* Since so much of this code is repeated, define a macro to build
* functions to do speed tests.
*/
#ifdef armel
#define SIMD_TYPE "Neon"
#else
#define SIMD_TYPE "SSE"
#endif
#define DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
{ \
int iter; \
char label[256]; \
int size = size_array[s]; \
_prework_; \
iter = iterations/size; \
sprintf(label, "%s-%-4d", oplabel, size); \
MEASURE_TIMED(label, iter, test_time, resultNormal[s], \
_funcNormal_); \
} \
} while (0)
#if defined(i386) && defined(WITH_SSE2)
#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
{ \
int iter; \
char label[256]; \
int size = size_array[s]; \
_prework_; \
iter = iterations/size; \
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
_funcSSE_); \
} \
} while (0)
#else
#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_)
#endif
#if defined(armel) && defined(INCLUDE_NEON_MEASUREMENTS)
#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
{ \
int iter; \
char label[256]; \
int size = size_array[s]; \
_prework_; \
iter = iterations/size; \
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
_funcNeon_); \
} \
} while (0)
#else
#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_)
#endif
#if defined(i386) && defined(WITH_IPP)
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
{ \
int iter; \
char label[256]; \
int size = size_array[s]; \
_prework_; \
iter = iterations/size; \
sprintf(label, "IPP-%s-%-4d", oplabel, size); \
MEASURE_TIMED(label, iter, test_time, resultIPP[s], \
_funcIPP_); \
} \
} while (0)
#else
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_)
#endif
/* ------------------------------------------------------------------------- */
#define STD_SPEED_TEST( \
_name_, _srctype_, _dsttype_, _prework_, \
_doNormal_, _funcNormal_, \
_doSSE_, _funcSSE_, _flagsSSE_, \
_doNeon_, _funcNeon_, _flagsNeon_, \
_doIPP_, _funcIPP_) \
static void _name_( \
const char *oplabel, const char *type, \
const _srctype_ *src1, const _srctype_ *src2, _srctype_ constant, \
_dsttype_ *dst, \
const int *size_array, int num_sizes, \
int iterations, float test_time) \
{ \
int s; \
float *resultNormal, *resultSSENeon, *resultIPP; \
UINT32 pflags = primitives_get_flags(primitives_get()); \
resultNormal = (float *) calloc(num_sizes, sizeof(float)); \
resultSSENeon = (float *) calloc(num_sizes, sizeof(float)); \
resultIPP = (float *) calloc(num_sizes, sizeof(float)); \
printf("******************** %s %s ******************\n", \
oplabel, type); \
if (_doNormal_) { DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_); } \
if (_doSSE_) { \
if ((pflags & (_flagsSSE_)) == (_flagsSSE_)) \
{ \
DO_SSE_MEASUREMENTS(_funcSSE_, _prework_); \
} \
} \
if (_doNeon_) { \
if ((pflags & (_flagsNeon_)) == (_flagsNeon_)) \
{ \
DO_NEON_MEASUREMENTS(_funcNeon_, _prework_); \
} \
} \
if (_doIPP_) { DO_IPP_MEASUREMENTS(_funcIPP_, _prework_); } \
printf("----------------------- SUMMARY ----------------------------\n"); \
printf("%8s: %15s %15s %5s %15s %5s\n", \
"size", "general", SIMD_TYPE, "%", "IPP", "%"); \
for (s=0; s<num_sizes; ++s) \
{ \
char sN[32], sSN[32], sSNp[8], sIPP[32], sIPPp[8]; \
strcpy(sN, "N/A"); strcpy(sSN, "N/A"); strcpy(sSNp, "N/A"); \
strcpy(sIPP, "N/A"); strcpy(sIPPp, "N/A"); \
if (resultNormal[s] > 0.0) _floatprint(resultNormal[s], sN); \
if (resultSSENeon[s] > 0.0) \
{ \
_floatprint(resultSSENeon[s], sSN); \
if (resultNormal[s] > 0.0) \
{ \
sprintf(sSNp, "%d%%", \
(int) (resultSSENeon[s] / resultNormal[s] * 100.0 + 0.5)); \
} \
} \
if (resultIPP[s] > 0.0) \
{ \
_floatprint(resultIPP[s], sIPP); \
if (resultNormal[s] > 0.0) \
{ \
sprintf(sIPPp, "%d%%", \
(int) (resultIPP[s] / resultNormal[s] * 100.0 + 0.5)); \
} \
} \
printf("%8d: %15s %15s %5s %15s %5s\n", \
size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \
} \
free(resultNormal); free(resultSSENeon); free(resultIPP); \
}
#endif // !__PRIMTEST_H_INCLUDED__

View File

@@ -0,0 +1,109 @@
/* test_add.c
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
static const int ADD16S_PRETEST_ITERATIONS = 300000*64;
static const int TEST_TIME = 2.0; // seconds
extern pstatus_t general_add_16s(
const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, int len);
extern pstatus_t sse3_add_16s(
const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, int len);
/* ========================================================================= */
int test_add16s_func(void)
{
INT16 ALIGN(src1[FUNC_TEST_SIZE+3]), ALIGN(src2[FUNC_TEST_SIZE+3]),
ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]);
int failed = 0;
int i;
char testStr[256];
UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
get_random_data(src1, sizeof(src1));
get_random_data(src2, sizeof(src2));
memset(d1, 0, sizeof(d1));
memset(d2, 0, sizeof(d2));
general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE);
#ifdef i386
if (pflags & PRIM_X86_SSE3_AVAILABLE)
{
strcat(testStr, " SSE3");
/* Aligned */
sse3_add_16s(src1+1, src2+1, d2+1, FUNC_TEST_SIZE);
for (i=1; i<FUNC_TEST_SIZE; ++i)
{
if (d1[i] != d2[i])
{
printf("ADD16S-SSE-aligned FAIL[%d] %d+%d=%d, got %d\n",
i, src1[i], src2[i], d1[i], d2[i]);
++failed;
}
}
/* Unaligned */
sse3_add_16s(src1+1, src2+1, d2+2, FUNC_TEST_SIZE);
for (i=1; i<FUNC_TEST_SIZE; ++i)
{
if (d1[i] != d2[i+1])
{
printf("ADD16S-SSE-unaligned FAIL[%d] %d+%d=%d, got %d\n",
i, src1[i], src2[i], d1[i], d2[i+1]);
++failed;
}
}
}
#endif /* i386 */
#ifdef WITH_IPP
strcat(testStr, " IPP");
ippsAdd_16s(src1+1, src2+1, d2+1, FUNC_TEST_SIZE);
for (i=1; i<FUNC_TEST_SIZE; ++i)
{
if (d1[i] != d2[i])
{
printf("ADD16S-IPP FAIL[%d] %d+%d=%d, got %d\n",
i, src1[i], src2[i], d1[i], d2[i]);
++failed;
}
}
#endif /* WITH_IPP */
if (!failed) printf("All add16s tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_add_16s(src1, src2, dst, size),
TRUE, sse3_add_16s(src1, src2, dst, size), PRIM_X86_SSE3_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ippsAdd_16s(src1, src2, dst, size));
int test_add16s_speed(void)
{
INT16 ALIGN(src1[MAX_TEST_SIZE+3]), ALIGN(src2[MAX_TEST_SIZE+3]),
ALIGN(dst[MAX_TEST_SIZE+3]);
get_random_data(src1, sizeof(src1));
get_random_data(src2, sizeof(src2));
add16s_speed_test("add16s", "aligned", src1, src2, 0, dst,
test_sizes, NUM_TEST_SIZES, ADD16S_PRETEST_ITERATIONS, TEST_TIME);
add16s_speed_test("add16s", "unaligned", src1+1, src2+2, 0, dst,
test_sizes, NUM_TEST_SIZES, ADD16S_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}

View File

@@ -0,0 +1,230 @@
/* test_alphaComp.c
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "prim_test.h"
static const int ALPHA_PRETEST_ITERATIONS = 5000000;
static const float TEST_TIME = 5.0;
static const int block_size[] = { 4, 64, 256 };
#define NUM_BLOCK_SIZES (sizeof(block_size)/sizeof(int))
#define MAX_BLOCK_SIZE 256
#define SIZE_SQUARED (MAX_BLOCK_SIZE*MAX_BLOCK_SIZE)
extern pstatus_t general_alphaComp_argb(
const BYTE *pSrc1, int src1Step,
const BYTE *pSrc2, int src2Step,
BYTE *pDst, int dstStep,
int width, int height);
extern pstatus_t sse2_alphaComp_argb(
const BYTE *pSrc1, int src1Step,
const BYTE *pSrc2, int src2Step,
BYTE *pDst, int dstStep,
int width, int height);
extern pstatus_t ipp_alphaComp_argb(
const BYTE *pSrc1, int src1Step,
const BYTE *pSrc2, int src2Step,
BYTE *pDst, int dstStep,
int width, int height);
/* ========================================================================= */
#define ALF(_c_) (((_c_) & 0xFF000000U) >> 24)
#define RED(_c_) (((_c_) & 0x00FF0000U) >> 16)
#define GRN(_c_) (((_c_) & 0x0000FF00U) >> 8)
#define BLU(_c_) ((_c_) & 0x000000FFU)
#define TOLERANCE 1
#define PIXEL(_addr_, _bytes_, _x_, _y_) \
((UINT32 *) (((BYTE *) (_addr_)) + (_x_)*4 + (_y_)*(_bytes_)))
#define SRC1_WIDTH 6
#define SRC1_HEIGHT 6
#define SRC2_WIDTH 7
#define SRC2_HEIGHT 7
#define DST_WIDTH 9
#define DST_HEIGHT 9
#define TEST_WIDTH 4
#define TEST_HEIGHT 5
/* ------------------------------------------------------------------------- */
static UINT32 alpha_add(
UINT32 c1,
UINT32 c2)
{
UINT32 a1 = ALF(c1);
UINT32 r1 = RED(c1);
UINT32 g1 = GRN(c1);
UINT32 b1 = BLU(c1);
UINT32 a2 = ALF(c2);
UINT32 r2 = RED(c2);
UINT32 g2 = GRN(c2);
UINT32 b2 = BLU(c2);
UINT32 a3 = ((a1 * a1 + (255-a1) * a2) / 255) & 0xff;
UINT32 r3 = ((a1 * r1 + (255-a1) * r2) / 255) & 0xff;
UINT32 g3 = ((a1 * g1 + (255-a1) * g2) / 255) & 0xff;
UINT32 b3 = ((a1 * b1 + (255-a1) * b2) / 255) & 0xff;
return (a3 << 24) | (r3 << 16) | (g3 << 8) | b3;
}
/* ------------------------------------------------------------------------- */
static UINT32 colordist(
UINT32 c1,
UINT32 c2)
{
int d, maxd = 0;
d = ABS(ALF(c1) - ALF(c2));
if (d > maxd) maxd = d;
d = ABS(RED(c1) - RED(c2));
if (d > maxd) maxd = d;
d = ABS(GRN(c1) - GRN(c2));
if (d > maxd) maxd = d;
d = ABS(BLU(c1) - BLU(c2));
if (d > maxd) maxd = d;
return maxd;
}
/* ------------------------------------------------------------------------- */
int test_alphaComp_func(void)
{
UINT32 ALIGN(src1[SRC1_WIDTH*SRC1_HEIGHT]);
UINT32 ALIGN(src2[SRC2_WIDTH*SRC2_HEIGHT]);
UINT32 ALIGN(dst1[DST_WIDTH*DST_HEIGHT]);
UINT32 ALIGN(dst2a[DST_WIDTH*DST_HEIGHT]);
UINT32 ALIGN(dst2u[DST_WIDTH*DST_HEIGHT+1]);
UINT32 ALIGN(dst3[DST_WIDTH*DST_HEIGHT]);
int error = 0;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
UINT32 *ptr;
int i, x, y;
testStr[0] = '\0';
get_random_data(src1, sizeof(src1));
/* Special-case the first two values */
src1[0] &= 0x00FFFFFFU;
src1[1] |= 0xFF000000U;
get_random_data(src2, sizeof(src2));
/* Set the second operand to fully-opaque. */
ptr = src2;
for (i=0; i<sizeof(src2)/4; ++i) *ptr++ |= 0xFF000000U;
memset(dst1, 0, sizeof(dst1));
memset(dst2a, 0, sizeof(dst2a));
memset(dst2u, 0, sizeof(dst2u));
memset(dst3, 0, sizeof(dst3));
general_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
(const BYTE *) src2, 4*SRC2_WIDTH,
(BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
#ifdef i386
if (pflags & PRIM_X86_SSE2_AVAILABLE)
{
strcat(testStr, " SSE2");
sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
(const BYTE *) src2, 4*SRC2_WIDTH,
(BYTE *) dst2a, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
(const BYTE *) src2, 4*SRC2_WIDTH,
(BYTE *) (dst2u+1), 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
}
#endif /* i386 */
#ifdef WITH_IPP
strcat(testStr, " IPP");
ipp_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
(const BYTE *) src2, 4*SRC2_WIDTH,
(BYTE *) dst3, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
#endif
for (y=0; y<TEST_HEIGHT; ++y)
{
for (x=0; x<TEST_WIDTH; ++x)
{
UINT32 s1 = *PIXEL(src1, 4*SRC1_WIDTH, x, y);
UINT32 s2 = *PIXEL(src2, 4*SRC2_WIDTH, x, y);
UINT32 c0 = alpha_add(s1, s2);
UINT32 c1 = *PIXEL(dst1, 4*DST_WIDTH, x, y);
if (colordist(c0, c1) > TOLERANCE)
{
printf("alphaComp-general: [%d,%d] 0x%08x+0x%08x=0x%08x, got 0x%08x\n",
x, y, s1, s2, c0, c1);
error = 1;
}
#ifdef i386
if (pflags & PRIM_X86_SSE2_AVAILABLE)
{
UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y);
if (colordist(c0, c2) > TOLERANCE)
{
printf("alphaComp-SSE-aligned: [%d,%d] 0x%08x+0x%08x=0x%08x, got 0x%08x\n",
x, y, s1, s2, c0, c2);
error = 1;
}
c2 = *PIXEL(dst2u+1, 4*DST_WIDTH, x, y);
if (colordist(c0, c2) > TOLERANCE)
{
printf("alphaComp-SSE-unaligned: [%d,%d] 0x%08x+0x%08x=0x%08x, got 0x%08x\n",
x, y, s1, s2, c0, c2);
error = 1;
}
}
#endif /* i386 */
#ifdef WITH_IPP
UINT32 c3 = *PIXEL(dst3, 4*DST_WIDTH, x, y);
if (colordist(c0, c3) > TOLERANCE)
{
printf("alphaComp-IPP: [%d,%d] 0x%08x+0x%08x=0x%08x, got 0x%08x\n",
x, y, s1, s2, c0, c3);
error = 1;
}
#endif
}
}
if (!error) printf("All alphaComp tests passed (%s).\n", testStr);
return (error > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4,
TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size),
TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size));
int test_alphaComp_speed(void)
{
INT32 ALIGN(src1[MAX_BLOCK_SIZE*(MAX_BLOCK_SIZE+1)]),
ALIGN(src2[SIZE_SQUARED]),
ALIGN(dst[SIZE_SQUARED]);
get_random_data(src1, sizeof(src1));
get_random_data(src2, sizeof(src2));
alphaComp_speed("alphaComp", "aligned",
(BYTE *) src1, (BYTE *) src2, 0, (BYTE *) dst,
block_size, NUM_BLOCK_SIZES, ALPHA_PRETEST_ITERATIONS, TEST_TIME);
alphaComp_speed("alphaComp", "unaligned",
(BYTE *) src1+1, (BYTE *) src2, 0, (BYTE *) dst,
block_size, NUM_BLOCK_SIZES, ALPHA_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}

View File

@@ -0,0 +1,182 @@
/* test_andor.c
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
static const int ANDOR_PRETEST_ITERATIONS = 100000;
static const int TEST_TIME = 2.0; // seconds
extern pstatus_t general_andC_32u(const UINT32 *pSrc, UINT32 val,
UINT32 *pDst, int len);
extern pstatus_t sse3_andC_32u(const UINT32 *pSrc, UINT32 val,
UINT32 *pDst, int len);
extern pstatus_t general_orC_32u(const UINT32 *pSrc, UINT32 val,
UINT32 *pDst, int len);
extern pstatus_t sse3_orC_32u(const UINT32 *pSrc, UINT32 val,
UINT32 *pDst, int len);
#define VALUE (0xA5A5A5A5U)
/* ========================================================================= */
int test_and_32u_func(void)
{
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
int failed = 0;
int i;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
testStr[0] = '\0';
get_random_data(src, sizeof(src));
general_andC_32u(src+1, VALUE, dst+1, FUNC_TEST_SIZE);
strcat(testStr, " general");
for (i=1; i<=FUNC_TEST_SIZE; ++i)
{
if (dst[i] != (src[i] & VALUE))
{
printf("AND-general FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
i, src[i], VALUE, src[i] & VALUE, dst[i]);
++failed;
}
}
#ifdef i386
if (pflags & PRIM_X86_SSE3_AVAILABLE)
{
strcat(testStr, " SSE3");
/* Aligned */
memset(dst, 0, sizeof(dst));
sse3_andC_32u(src+1, VALUE, dst+1, FUNC_TEST_SIZE);
for (i=1; i<=FUNC_TEST_SIZE; ++i)
{
if (dst[i] != (src[i] & VALUE))
{
printf("AND-SSE-aligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
i, src[i], VALUE, src[i] & VALUE, dst[i]);
++failed;
}
}
/* Unaligned */
memset(dst, 0, sizeof(dst));
sse3_andC_32u(src+1, VALUE, dst+2, FUNC_TEST_SIZE);
for (i=1; i<=FUNC_TEST_SIZE; ++i)
{
if (dst[i+1] != (src[i] & VALUE))
{
printf("AND-SSE-unaligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
i, src[i], VALUE, src[i] & VALUE, dst[i+1]);
++failed;
}
}
}
#endif /* i386 */
if (!failed) printf("All and_32u tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_andC_32u(src1, constant, dst, size),
TRUE, sse3_andC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ippsAndC_32u(src1, constant, dst, size))
int test_and_32u_speed(void)
{
UINT32 ALIGN(src[MAX_TEST_SIZE+3]), ALIGN(dst[MAX_TEST_SIZE+3]);
get_random_data(src, sizeof(src));
andC_32u_speed_test("and32u", "aligned", src, NULL, VALUE, dst,
test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME);
andC_32u_speed_test("and32u", "unaligned", src+1, NULL, VALUE, dst,
test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}
/* ========================================================================= */
int test_or_32u_func(void)
{
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
int failed = 0;
int i;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
testStr[0] = '\0';
get_random_data(src, sizeof(src));
general_orC_32u(src+1, VALUE, dst+1, FUNC_TEST_SIZE);
strcat(testStr, " general");
for (i=1; i<=FUNC_TEST_SIZE; ++i)
{
if (dst[i] != (src[i] | VALUE))
{
printf("OR-general general FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
i, src[i], VALUE, src[i] | VALUE, dst[i]);
++failed;
}
}
#ifdef i386
if (pflags & PRIM_X86_SSE3_AVAILABLE)
{
strcat(testStr, " SSE3");
/* Aligned */
memset(dst, 0, sizeof(dst));
sse3_orC_32u(src+1, VALUE, dst+1, FUNC_TEST_SIZE);
for (i=1; i<FUNC_TEST_SIZE; ++i)
{
if (dst[i] != (src[i] | VALUE))
{
printf("OR-SSE-aligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
i, src[i], VALUE, src[i] | VALUE, dst[i]);
++failed;
}
}
/* Unaligned */
memset(dst, 0, sizeof(dst));
sse3_orC_32u(src+1, VALUE, dst+2, FUNC_TEST_SIZE);
for (i=1; i<FUNC_TEST_SIZE; ++i)
{
if (dst[i+1] != (src[i] | VALUE))
{
printf("OR-SSE-unaligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
i, src[i], VALUE, src[i] | VALUE, dst[i+1]);
++failed;
}
}
}
#endif /* i386 */
if (!failed) printf("All or_32u tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_orC_32u(src1, constant, dst, size),
TRUE, sse3_orC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ippsOrC_32u(src1, constant, dst, size))
int test_or_32u_speed(void)
{
UINT32 ALIGN(src[MAX_TEST_SIZE+3]), ALIGN(dst[MAX_TEST_SIZE+3]);
get_random_data(src, sizeof(src));
orC_32u_speed_test("or32u", "aligned", src, NULL, VALUE, dst,
test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME);
orC_32u_speed_test("or32u", "unaligned", src+1, NULL, VALUE, dst,
test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}

View File

@@ -0,0 +1,230 @@
/* test_colors.c
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "prim_test.h"
static const int RGB_TRIAL_ITERATIONS = 1000;
static const int YCBCR_TRIAL_ITERATIONS = 1000;
static const float TEST_TIME = 4.0;
extern pstatus_t general_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3],
int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi);
extern pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3],
int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi);
extern pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3],
int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi);
extern pstatus_t sse2_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3],
int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi);
/* ------------------------------------------------------------------------- */
int test_RGBToRGB_16s8u_P3AC4R_func(void)
{
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]);
int i;
int failed = 0;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
INT16 *ptrs[3];
prim_size_t roi = { 64, 64 };
testStr[0] = '\0';
get_random_data(r, sizeof(r));
get_random_data(g, sizeof(g));
get_random_data(b, sizeof(b));
/* clear upper bytes */
for (i=0; i<4096; ++i)
{
r[i] &= 0x00FFU;
g[i] &= 0x00FFU;
b[i] &= 0x00FFU;
}
ptrs[0] = r;
ptrs[1] = g;
ptrs[2] = b;
general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
(BYTE *) out1, 64*4, &roi);
#ifdef i386
if (pflags & PRIM_X86_SSE2_AVAILABLE)
{
strcat(testStr, " SSE2");
sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
(BYTE *) out2, 64*4, &roi);
for (i=0; i<4096; ++i)
{
if (out1[i] != out2[i])
{
printf("RGBToRGB-SSE FAIL: out1[%d]=0x%08x out2[%d]=0x%08x\n",
i, out1[i], i, out2[i]);
failed = 1;
}
}
}
#endif /* i386 */
if (!failed) printf("All RGBToRGB_16s8u_P3AC4R tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
static const prim_size_t roi64x64 = { 64, 64 };
STD_SPEED_TEST(
rgb_to_argb_speed, INT16*, UINT32, dst=dst,
TRUE, general_RGBToRGB_16s8u_P3AC4R(
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
TRUE, sse2_RGBToRGB_16s8u_P3AC4R(
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
FALSE, dst=dst);
int test_RGBToRGB_16s8u_P3AC4R_speed(void)
{
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
UINT32 ALIGN(dst[4096]);
int i;
INT16 *ptrs[3];
int size_array[] = { 64 };
get_random_data(r, sizeof(r));
get_random_data(g, sizeof(g));
get_random_data(b, sizeof(b));
/* clear upper bytes */
for (i=0; i<4096; ++i)
{
r[i] &= 0x00FFU;
g[i] &= 0x00FFU;
b[i] &= 0x00FFU;
}
ptrs[0] = r;
ptrs[1] = g;
ptrs[2] = b;
rgb_to_argb_speed("RGBToARGB", "aligned",
(const INT16 **) ptrs, NULL, 0, dst,
size_array, 1, RGB_TRIAL_ITERATIONS, TEST_TIME);
return SUCCESS;
}
/* ========================================================================= */
int test_yCbCrToRGB_16s16s_P3P3_func(void)
{
INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]);
INT16 ALIGN(r1[4096]), ALIGN(g1[4096]), ALIGN(b1[4096]);
INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]);
int i;
int failed = 0;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
const INT16 *in[3];
INT16 *out1[3];
INT16 *out2[3];
prim_size_t roi = { 64, 64 };
testStr[0] = '\0';
get_random_data(y, sizeof(y));
get_random_data(cb, sizeof(cb));
get_random_data(cr, sizeof(cr));
/* Normalize to 11.5 fixed radix */
for (i=0; i<4096; ++i)
{
y[i] &= 0x1FE0U;
cb[i] &= 0x1FE0U;
cr[i] &= 0x1FE0U;
}
memset(r1, 0, sizeof(r1));
memset(g1, 0, sizeof(g1));
memset(b1, 0, sizeof(b1));
memset(r2, 0, sizeof(r2));
memset(g2, 0, sizeof(g2));
memset(b2, 0, sizeof(b2));
in[0] = y;
in[1] = cb;
in[2] = cr;
out1[0] = r1;
out1[1] = g1;
out1[2] = b1;
out2[0] = r2;
out2[1] = g2;
out2[2] = b2;
general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi);
#ifdef i386
if (pflags & PRIM_X86_SSE2_AVAILABLE)
{
strcat(testStr, " SSE2");
sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi);
for (i=0; i<4096; ++i)
{
if ((ABS(r1[i]-r2[i]) > 1)
|| (ABS(g1[i]-g2[i]) > 1)
|| (ABS(b1[i]-b2[i]) > 1)) {
printf("YCbCrToRGB-SSE FAIL[%d]: %d,%d,%d vs %d,%d,%d\n", i,
r1[i],g1[i],b1[i], r2[i],g2[i],b2[i]);
failed = 1;
}
}
}
#endif /* i386 */
if (!failed) printf("All yCbCrToRGB_16s16s_P3P3 tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(
ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst,
TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
FALSE, dst=dst);
int test_yCbCrToRGB_16s16s_P3P3_speed(void)
{
INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]);
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
int i;
const INT16 *input[3];
INT16 *output[3];
int size_array[] = { 64 };
get_random_data(y, sizeof(y));
get_random_data(cb, sizeof(cb));
get_random_data(cr, sizeof(cr));
/* Normalize to 11.5 fixed radix */
for (i=0; i<4096; ++i)
{
y[i] &= 0x1FE0U;
cb[i] &= 0x1FE0U;
cr[i] &= 0x1FE0U;
}
input[0] = y;
input[1] = cb;
input[2] = cr;
output[0] = r;
output[1] = g;
output[2] = b;
ycbcr_to_rgb_speed("yCbCrToRGB", "aligned", input, NULL, NULL, output,
size_array, 1, YCBCR_TRIAL_ITERATIONS, TEST_TIME);
return SUCCESS;
}

View File

@@ -0,0 +1,87 @@
/* test_copy.c
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "prim_test.h"
static const int MEMCPY_PRETEST_ITERATIONS = 1000000;
static const int TEST_TIME = 1.0; // seconds
#define COPY_TESTSIZE (256*2+16*2+15+15)
#if 0
extern pstatus_t sse3_copy_8u(const BYTE *pSrc, BYTE *pDst, int len);
#endif
/* ------------------------------------------------------------------------- */
int test_copy8u_func(void)
{
primitives_t *prims = primitives_get();
BYTE ALIGN(data[COPY_TESTSIZE+15]);
int i, soff;
int failed = 0;
char testStr[256];
BYTE ALIGN(dest[COPY_TESTSIZE+15]);
testStr[0] = '\0';
get_random_data(data, sizeof(data));
strcat(testStr, " ptr");
for (soff=0; soff<16; ++soff)
{
int doff;
for (doff=0; doff<16; ++doff)
{
int length;
for (length=1; length<=COPY_TESTSIZE-doff; ++length)
{
memset(dest, 0, sizeof(dest));
prims->copy_8u(data+soff, dest+doff, length);
for (i=0; i<length; ++i)
{
if (dest[i+doff] != data[i+soff])
{
printf("COPY8U FAIL: off=%d len=%d, dest[%d]=0x%02x"
"data[%d]=0x%02x\n",
doff, length, i+doff, dest[i+doff],
i+soff, data[i+soff]);
failed = 1;
}
}
}
}
}
if (!failed) printf("All copy8 tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memcpy(dst, src1, size),
FALSE, NULL, 0,
FALSE, NULL, 0,
TRUE, ippsCopy_8u(src1, dst, size));
int test_copy8u_speed(void)
{
BYTE ALIGN(src[MAX_TEST_SIZE+4]);
BYTE ALIGN(intervening[MAX_TEST_SIZE*7]);
BYTE ALIGN(dst[MAX_TEST_SIZE+4]);
copy8u_speed_test("copy8u", "aligned", src, NULL, 0, dst,
test_sizes, NUM_TEST_SIZES, MEMCPY_PRETEST_ITERATIONS, TEST_TIME);
copy8u_speed_test("copy8u", "unaligned", src+1, NULL, 0, dst,
test_sizes, NUM_TEST_SIZES, MEMCPY_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}

View File

@@ -0,0 +1,298 @@
/* test_set.c
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "prim_test.h"
static const int MEMSET8_PRETEST_ITERATIONS = 100000000;
static const int MEMSET32_PRETEST_ITERATIONS = 40000000;
static const float TEST_TIME = 1.0;
extern pstatus_t general_set_8u(BYTE val, BYTE *pDst, int len);
extern pstatus_t sse2_set_8u(BYTE val, BYTE *pDst, int len);
extern pstatus_t general_set_32s(INT32 val, INT32 *pDst, int len);
extern pstatus_t sse2_set_32s(INT32 val, INT32 *pDst, int len);
extern pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, int len);
extern pstatus_t sse2_set_32u(UINT32 val, UINT32 *pDst, int len);
extern pstatus_t ipp_wrapper_set_32u(UINT32 val, UINT32 *pDst, int len);
static const int set_sizes[] = { 1, 4, 16, 32, 64, 256, 1024, 4096 };
#define NUM_SET_SIZES (sizeof(set_sizes)/sizeof(int))
/* ------------------------------------------------------------------------- */
int test_set8u_func(void)
{
BYTE ALIGN(dest[48]);
int failed = 0;
int off;
char testStr[256];
UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
#ifdef i386
/* Test SSE under various alignments */
if (pflags & PRIM_X86_SSE2_AVAILABLE)
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off)
{
int len;
for (len=1; len<48-off; ++len)
{
int i;
memset(dest, 0, sizeof(dest));
sse2_set_8u(0xa5, dest+off, len);
for (i=0; i<len; ++i)
{
if (dest[off+i] != 0xa5)
{
printf("SET8U-SSE FAILED: off=%d len=%d dest[%d]=0x%02x\n",
off, len, i+off, dest[i+off]);
failed=1;
}
}
}
}
}
#endif /* i386 */
#ifdef WITH_IPP
/* Test IPP under various alignments */
strcat(testStr, " IPP");
for (off=0; off<16; ++off)
{
int len;
for (len=1; len<48-off; ++len)
{
int i;
memset(dest, 0, sizeof(dest));
ippsSet_8u(0xa5, dest+off, len);
for (i=0; i<len; ++i)
{
if (dest[off+i] != 0xa5)
{
printf("SET8U-IPP FAILED: off=%d len=%d dest[%d]=0x%02x\n",
off, len, i+off, dest[i+off]);
failed=1;
}
}
}
}
#endif /* WITH_IPP */
if (!failed) printf("All set8u tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memset(dst, constant, size),
FALSE, NULL, 0,
FALSE, NULL, 0,
TRUE, ippsSet_8u(constant, dst, size));
int test_set8u_speed(void)
{
BYTE ALIGN(dst[MAX_TEST_SIZE]);
set8u_speed_test("set8u", "aligned", NULL, NULL, 0xA5, dst,
set_sizes, NUM_SET_SIZES, MEMSET8_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}
/* ------------------------------------------------------------------------- */
int test_set32s_func(void)
{
primitives_t *prims = primitives_get();
INT32 ALIGN(dest[512]);
int failed = 0;
int off;
char testStr[256];
UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
#ifdef i386
/* Test SSE under various alignments */
if (pflags & PRIM_X86_SSE2_AVAILABLE)
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off) {
int len;
for (len=1; len<512-off; ++len)
{
int i;
memset(dest, 0, sizeof(dest));
sse2_set_32s(0xdeadbeef, dest+off, len);
for (i=0; i<len; ++i)
{
if (dest[off+i] != 0xdeadbeef)
{
printf("set32s-SSE FAIL: off=%d len=%d dest[%d]=0x%08x\n",
off, len, i+off, dest[i+off]);
failed=1;
}
}
}
}
}
#endif /* i386 */
#ifdef WITH_IPP
strcat(testStr, " IPP");
for (off=0; off<16; ++off) {
int len;
for (len=1; len<512-off; ++len)
{
int i;
memset(dest, 0, sizeof(dest));
ippsSet_32s(0xdeadbeef, dest+off, len);
for (i=0; i<len; ++i)
{
if (dest[off+i] != 0xdeadbeef)
{
printf("set32s-IPP FAIL: off=%d len=%d dest[%d]=0x%08x\n",
off, len, i+off, dest[i+off]);
failed=1;
}
}
}
}
#endif /* WITH_IPP */
if (!failed) printf("All set32s tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
int test_set32u_func(void)
{
primitives_t *prims = primitives_get();
UINT32 ALIGN(dest[512]);
int failed = 0;
int off;
char testStr[256];
UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
#ifdef i386
/* Test SSE under various alignments */
if (pflags & PRIM_X86_SSE2_AVAILABLE)
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off) {
int len;
for (len=1; len<512-off; ++len)
{
int i;
memset(dest, 0, sizeof(dest));
sse2_set_32u(0xdeadbeefU, dest+off, len);
for (i=0; i<len; ++i)
{
if (dest[off+i] != 0xdeadbeefU)
{
printf("set32u-SSE FAIL: off=%d len=%d dest[%d]=0x%08x\n",
off, len, i+off, dest[i+off]);
failed=1;
}
}
}
}
}
#endif /* i386 */
#ifdef WITH_IPP
strcat(testStr, " IPP");
for (off=0; off<16; ++off) {
int len;
for (len=1; len<512-off; ++len)
{
int i;
memset(dest, 0, sizeof(dest));
ipp_wrapper_set_32u(0xdeadbeefU, dest+off, len);
for (i=0; i<len; ++i)
{
if (dest[off+i] != 0xdeadbeefU)
{
printf("set32u-IPP FAIL: off=%d len=%d dest[%d]=0x%08x\n",
off, len, i+off, dest[i+off]);
failed=1;
}
}
}
}
#endif /* WITH_IPP */
if (!failed) printf("All set32u tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
static inline void memset32u_naive(
UINT32 val,
UINT32 *dst,
size_t count)
{
while (count--) *dst++ = val;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, memset32u_naive(constant, dst, size),
TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ipp_wrapper_set_32u(constant, dst, size));
int test_set32u_speed(void)
{
UINT32 ALIGN(dst[MAX_TEST_SIZE+1]);
set32u_speed_test("set32u", "aligned", NULL, NULL, 0xdeadbeef, dst,
set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME);
#if 0
/* Not really necessary; should be almost as fast. */
set32u_speed_test("set32u", "unaligned", NULL, NULL, dst+1,
set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME);
#endif
return SUCCESS;
}
/* ------------------------------------------------------------------------- */
static inline void memset32s_naive(
INT32 val,
INT32 *dst,
size_t count)
{
while (count--) *dst++ = val;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst,
TRUE, memset32s_naive(constant, dst, size),
TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ippsSet_32s(constant, dst, size));
int test_set32s_speed(void)
{
INT32 ALIGN(dst[MAX_TEST_SIZE+1]);
set32s_speed_test("set32s", "aligned", NULL, NULL, 0xdeadbeef, dst,
set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME);
#if 0
/* Not really necessary; should be almost as fast. */
set32s_speed_test("set32s", "unaligned", NULL, NULL, dst+1,
set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME);
#endif
return SUCCESS;
}

View File

@@ -0,0 +1,177 @@
/* test_shift.c
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
static const int SHIFT_PRETEST_ITERATIONS = 50000;
static const float TEST_TIME = 1.0;
extern pstatus_t general_lShiftC_16s(
const INT16 *pSrc, int val, INT16 *pDst, int len);
extern pstatus_t general_rShiftC_16s(
const INT16 *pSrc, int val, INT16 *pDst, int len);
extern pstatus_t general_shiftC_16s(
const INT16 *pSrc, int val, INT16 *pDst, int len);
extern pstatus_t general_lShiftC_16u(
const UINT16 *pSrc, int val, UINT16 *pDst, int len);
extern pstatus_t general_rShiftC_16u(
const UINT16 *pSrc, int val, UINT16 *pDst, int len);
extern pstatus_t general_shiftC_16u(
const UINT16 *pSrc, int val, UINT16 *pDst, int len);
extern pstatus_t sse2_lShiftC_16s(
const INT16 *pSrc, int val, INT16 *pDst, int len);
extern pstatus_t sse2_rShiftC_16s(
const INT16 *pSrc, int val, INT16 *pDst, int len);
extern pstatus_t sse2_shiftC_16s(
const INT16 *pSrc, int val, INT16 *pDst, int len);
extern pstatus_t sse2_lShiftC_16u(
const UINT16 *pSrc, int val, UINT16 *pDst, int len);
extern pstatus_t sse2_rShiftC_16u(
const UINT16 *pSrc, int val, UINT16 *pDst, int len);
extern pstatus_t sse2_shiftC_16u(
const UINT16 *pSrc, int val, UINT16 *pDst, int len);
#ifdef i386
#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \
int _name_(void) \
{ \
_type_ ALIGN(src[FUNC_TEST_SIZE+3]), \
ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \
int failed = 0; \
int i; \
UINT32 pflags = primitives_get_flags(primitives_get()); \
char testStr[256]; \
testStr[0] = '\0'; \
get_random_data(src, sizeof(src)); \
_f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \
if (pflags & PRIM_X86_SSE3_AVAILABLE) \
{ \
strcat(testStr, " SSE3"); \
/* Aligned */ \
_f2_(src+1, 3, d2+1, FUNC_TEST_SIZE); \
for (i=1; i<=FUNC_TEST_SIZE; ++i) \
{ \
if (d1[i] != d2[i]) \
{ \
printf("%s-SSE-aligned FAIL[%d]: 0x%x>>3=0x%x, got 0x%x\n", \
_str_, i, src[i], d1[i], d2[i]); \
++failed; \
} \
} \
/* Unaligned */ \
_f2_(src+1, 3, d2+2, FUNC_TEST_SIZE); \
for (i=1; i<=FUNC_TEST_SIZE; ++i) \
{ \
if (d1[i] != d2[i+1]) \
{ \
printf("%s-SSE-unaligned FAIL[%d]: 0x%x>>3=0x%x, got 0x%x\n", \
_str_, i, src[i], d1[i], d2[i+1]); \
++failed; \
} \
} \
} \
if (!failed) printf("All %s tests passed (%s).\n", _str_, testStr); \
return (failed > 0) ? FAILURE : SUCCESS; \
}
#else
#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \
int _name_(void) \
{ \
return SUCCESS; \
}
#endif /* i386 */
SHIFT_TEST_FUNC(test_lShift_16s_func, INT16, "lshift_16s", general_lShiftC_16s,
sse2_lShiftC_16s)
SHIFT_TEST_FUNC(test_lShift_16u_func, UINT16, "lshift_16u", general_lShiftC_16u,
sse2_lShiftC_16u)
SHIFT_TEST_FUNC(test_rShift_16s_func, INT16, "rshift_16s", general_rShiftC_16s,
sse2_rShiftC_16s)
SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u,
sse2_rShiftC_16u)
/* ========================================================================= */
STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst,
TRUE, general_lShiftC_16s(src1, constant, dst, size),
TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ippsLShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_lShiftC_16u(src1, constant, dst, size),
TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ippsLShiftC_16u(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst,
TRUE, general_rShiftC_16s(src1, constant, dst, size),
TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ippsRShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_rShiftC_16u(src1, constant, dst, size),
TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ippsRShiftC_16u(src1, constant, dst, size));
/* ------------------------------------------------------------------------- */
int test_lShift_16s_speed(void)
{
INT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]);
get_random_data(src, sizeof(src));
speed_lShift_16s("lShift_16s", "aligned", src, NULL, 3, dst,
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
speed_lShift_16s("lShift_16s", "unaligned", src+1, NULL, 3, dst,
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}
/* ------------------------------------------------------------------------- */
int test_lShift_16u_speed(void)
{
UINT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]);
get_random_data(src, sizeof(src));
speed_lShift_16u("lShift_16u", "aligned", src, NULL, 3, dst,
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
speed_lShift_16u("lShift_16u", "unaligned", src+1, NULL, 3, dst,
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}
/* ------------------------------------------------------------------------- */
int test_rShift_16s_speed(void)
{
INT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]);
get_random_data(src, sizeof(src));
speed_rShift_16s("rShift_16s", "aligned", src, NULL, 3, dst,
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
speed_rShift_16s("rShift_16s", "unaligned", src+1, NULL, 3, dst,
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}
/* ------------------------------------------------------------------------- */
int test_rShift_16u_speed(void)
{
UINT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]);
get_random_data(src, sizeof(src));
speed_rShift_16u("rShift_16u", "aligned", src, NULL, 3, dst,
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
speed_rShift_16u("rShift_16u", "unaligned", src+1, NULL, 3, dst,
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}

View File

@@ -0,0 +1,95 @@
/* test_sign.c
* vi:ts=4 sw=4
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "prim_test.h"
static const int SIGN_PRETEST_ITERATIONS = 100000;
static const float TEST_TIME = 1.0;
extern pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, int len);
extern pstatus_t ssse3_sign_16s(const INT16 *pSrc, INT16 *pDst, int len);
/* ------------------------------------------------------------------------- */
int test_sign16s_func(void)
{
INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]);
int failed = 0;
int i;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
/* Test when we can reach 16-byte alignment */
testStr[0] = '\0';
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+1, 65535);
#ifdef i386
if (pflags & PRIM_X86_SSSE3_AVAILABLE)
{
strcat(testStr, " SSSE3");
ssse3_sign_16s(src+1, d2+1, 65535);
for (i=1; i<65535; ++i)
{
if (d1[i] != d2[i])
{
printf("SIGN16s-SSE-aligned FAIL[%d] of %d: want %d, got %d\n",
i, src[i], d1[i], d2[i]);
++failed;
}
}
}
#endif /* i386 */
/* Test when we cannot reach 16-byte alignment */
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+2, 65535);
#ifdef i386
if (pflags & PRIM_X86_SSSE3_AVAILABLE)
{
ssse3_sign_16s(src+1, d2+2, 65535);
for (i=2; i<65535; ++i)
{
if (d1[i] != d2[i])
{
printf("SIGN16s-SSE-unaligned FAIL[%d] of %d: want %d, got %d\n",
i, src[i-1], d1[i], d2[i]);
++failed;
}
}
}
#endif /* i386 */
if (!failed) printf("All sign16s tests passed (%s).\n", testStr);
return (failed > 0) ? FAILURE : SUCCESS;
}
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_sign_16s(src1, dst, size),
TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE,
FALSE, dst=dst, 0,
FALSE, dst=dst);
int test_sign16s_speed(void)
{
INT16 ALIGN(src[MAX_TEST_SIZE+3]), ALIGN(dst[MAX_TEST_SIZE+3]);
get_random_data(src, sizeof(src));
sign16s_speed_test("sign16s", "aligned", src, NULL, 0, dst,
test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME);
sign16s_speed_test("sign16s", "unaligned", src+1, NULL, 0, dst,
test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME);
return SUCCESS;
}

View File

@@ -325,7 +325,10 @@ void rail_UpdateWindow(rdpRail* rail, rdpWindow* window)
if (window->fieldFlags & WINDOW_ORDER_FIELD_TITLE)
{
if (window->title != NULL)
{
free(window->title);
window->title = NULL;
}
ConvertFromUnicode(CP_UTF8, 0, (WCHAR*) window->titleInfo.string, window->titleInfo.length / 2,
&window->title, 0, NULL, NULL);

View File

@@ -338,16 +338,22 @@ static void svc_plugin_process_terminated(rdpSvcPlugin* plugin)
{
svc_data_in_item* item;
freerdp_thread_stop(plugin->priv->thread);
freerdp_thread_free(plugin->priv->thread);
if (plugin->priv->thread)
{
freerdp_thread_stop(plugin->priv->thread);
freerdp_thread_free(plugin->priv->thread);
}
plugin->channel_entry_points.pVirtualChannelClose(plugin->priv->open_handle);
svc_plugin_remove(plugin);
while ((item = list_dequeue(plugin->priv->data_in_list)) != NULL)
svc_data_in_item_free(item);
list_free(plugin->priv->data_in_list);
if (plugin->priv->data_in_list)
{
while ((item = list_dequeue(plugin->priv->data_in_list)) != NULL)
svc_data_in_item_free(item);
list_free(plugin->priv->data_in_list);
}
if (plugin->priv->data_in != NULL)
{

View File

@@ -34,7 +34,7 @@ UINT64 freerdp_windows_gmtime()
time_t unix_time;
UINT64 windows_time;
gmtime(&unix_time);
time(&unix_time);
windows_time = freerdp_get_windows_time_from_unix_time(unix_time);
return windows_time;

View File

@@ -64,7 +64,7 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS}
set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS
MONOLITHIC ${MONOLITHIC_BUILD}
MODULE freerdp
MODULES freerdp-core freerdp-utils freerdp-codec)
MODULES freerdp-core freerdp-utils freerdp-codec freerdp-primitives)
set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS
MONOLITHIC ${MONOLITHIC_BUILD}

Some files were not shown because too many files have changed in this diff Show More