diff --git a/CMakeLists.txt b/CMakeLists.txt index aa541170c..3e6098432 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,6 +231,10 @@ set(GSTREAMER_FEATURE_TYPE "RECOMMENDED") set(GSTREAMER_FEATURE_PURPOSE "multimedia") set(GSTREAMER_FEATURE_DESCRIPTION "multimedia redirection, audio and video playback") +set(IPP_FEATURE_TYPE "OPTIONAL") +set(IPP_FEATURE_PURPOSE "performance") +set(IPP_FEATURE_DESCRIPTION "Intel Integrated Performance Primitives library") + if(WIN32) set(X11_FEATURE_TYPE "DISABLED") set(ZLIB_FEATURE_TYPE "DISABLED") @@ -285,6 +289,9 @@ find_feature(PCSC ${PCSC_FEATURE_TYPE} ${PCSC_FEATURE_PURPOSE} ${PCSC_FEATURE_DE find_feature(FFmpeg ${FFMPEG_FEATURE_TYPE} ${FFMPEG_FEATURE_PURPOSE} ${FFMPEG_FEATURE_DESCRIPTION}) find_feature(Gstreamer ${GSTREAMER_FEATURE_TYPE} ${GSTREAMER_FEATURE_PURPOSE} ${GSTREAMER_FEATURE_DESCRIPTION}) +# Intel Performance Primitives +find_feature(IPP ${IPP_FEATURE_TYPE} ${IPP_FEATURE_PURPOSE} ${IPP_FEATURE_DESCRIPTION}) + # Installation Paths if(WIN32) set(CMAKE_INSTALL_BINDIR ".") diff --git a/channels/cliprdr/client/cliprdr_format.c b/channels/cliprdr/client/cliprdr_format.c index 7f368537e..2c6db2308 100644 --- a/channels/cliprdr/client/cliprdr_format.c +++ b/channels/cliprdr/client/cliprdr_format.c @@ -144,6 +144,7 @@ void cliprdr_process_short_format_names(cliprdrPlugin* cliprdr, STREAM* s, UINT3 } else { + format_name->name = NULL; format_name->length = ConvertFromUnicode(CP_UTF8, 0, (WCHAR*) s->p, 32 / 2, &format_name->name, 0, NULL, NULL); } diff --git a/channels/drive/client/drive_file.c b/channels/drive/client/drive_file.c index 78701f8fd..4d4e6e8dd 100644 --- a/channels/drive/client/drive_file.c +++ b/channels/drive/client/drive_file.c @@ -430,7 +430,7 @@ BOOL drive_file_query_information(DRIVE_FILE* file, UINT32 FsInformationClass, S BOOL drive_file_set_information(DRIVE_FILE* file, UINT32 FsInformationClass, UINT32 Length, STREAM* input) { - char* s; + char* s = NULL; mode_t m; UINT64 size; int status; diff --git a/channels/drive/client/drive_main.c b/channels/drive/client/drive_main.c index 7571297e8..f6f48554c 100644 --- a/channels/drive/client/drive_main.c +++ b/channels/drive/client/drive_main.c @@ -471,7 +471,7 @@ static void drive_process_irp_query_volume_information(DRIVE_DEVICE* disk, IRP* static void drive_process_irp_query_directory(DRIVE_DEVICE* disk, IRP* irp) { - char* path; + char* path = NULL; int status; DRIVE_FILE* file; BYTE InitialQuery; diff --git a/channels/parallel/client/parallel_main.c b/channels/parallel/client/parallel_main.c index 3f1e06ccc..9ac56a8ca 100644 --- a/channels/parallel/client/parallel_main.c +++ b/channels/parallel/client/parallel_main.c @@ -72,7 +72,7 @@ typedef struct _PARALLEL_DEVICE PARALLEL_DEVICE; static void parallel_process_irp_create(PARALLEL_DEVICE* parallel, IRP* irp) { - char* path; + char* path = NULL; int status; UINT32 PathLength; diff --git a/channels/rdpsnd/client/rdpsnd_main.c b/channels/rdpsnd/client/rdpsnd_main.c index 431cb1b35..1005303f3 100644 --- a/channels/rdpsnd/client/rdpsnd_main.c +++ b/channels/rdpsnd/client/rdpsnd_main.c @@ -650,12 +650,15 @@ static void rdpsnd_process_terminate(rdpSvcPlugin* plugin) if (rdpsnd->device) IFCALL(rdpsnd->device->Free, rdpsnd->device); - while ((item = list_dequeue(rdpsnd->data_out_list)) != NULL) + if (rdpsnd->data_out_list) { - stream_free(item->data_out); - free(item); + while ((item = list_dequeue(rdpsnd->data_out_list)) != NULL) + { + stream_free(item->data_out); + free(item); + } + list_free(rdpsnd->data_out_list); } - list_free(rdpsnd->data_out_list); if (rdpsnd->subsystem) free(rdpsnd->subsystem); diff --git a/channels/serial/client/serial_main.c b/channels/serial/client/serial_main.c index c882cb7a1..71eefe61d 100644 --- a/channels/serial/client/serial_main.c +++ b/channels/serial/client/serial_main.c @@ -79,7 +79,7 @@ static BOOL serial_check_fds(SERIAL_DEVICE* serial); static void serial_process_irp_create(SERIAL_DEVICE* serial, IRP* irp) { - char* path; + char* path = NULL; int status; SERIAL_TTY* tty; UINT32 PathLength; @@ -570,6 +570,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial) IRP* prev; SERIAL_TTY* tty; UINT32 result = 0; + BOOL irp_completed = FALSE; memset(&serial->tv, 0, sizeof(struct timeval)); tty = serial->tty; @@ -588,6 +589,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial) { irp->IoStatus = STATUS_SUCCESS; serial_process_irp_read(serial, irp); + irp_completed = TRUE; } break; @@ -596,6 +598,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial) { irp->IoStatus = STATUS_SUCCESS; serial_process_irp_write(serial, irp); + irp_completed = TRUE; } break; @@ -607,6 +610,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial) irp->IoStatus = STATUS_SUCCESS; stream_write_UINT32(irp->output, result); irp->Complete(irp); + irp_completed = TRUE; } break; @@ -618,7 +622,7 @@ static void __serial_check_fds(SERIAL_DEVICE* serial) prev = irp; irp = (IRP*) list_next(serial->pending_irps, irp); - if (prev->IoStatus == STATUS_SUCCESS) + if (irp_completed || prev->IoStatus == STATUS_SUCCESS) { list_remove(serial->pending_irps, prev); SetEvent(serial->in_event); diff --git a/client/DirectFB/CMakeLists.txt b/client/DirectFB/CMakeLists.txt index bd3637de4..52c07ef69 100644 --- a/client/DirectFB/CMakeLists.txt +++ b/client/DirectFB/CMakeLists.txt @@ -36,7 +36,7 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} freerdp-client) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} MODULE freerdp - MODULES freerdp-core freerdp-gdi freerdp-locale freerdp-codec freerdp-utils) + MODULES freerdp-core freerdp-gdi freerdp-locale freerdp-codec freerdp-primitives freerdp-utils) target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS}) install(TARGETS ${MODULE_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/client/Mac/CMakeLists.txt b/client/Mac/CMakeLists.txt index 8e7a1a0ec..734b66933 100644 --- a/client/Mac/CMakeLists.txt +++ b/client/Mac/CMakeLists.txt @@ -78,7 +78,7 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} freerdp-client) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} MODULE freerdp - MODULES freerdp-core freerdp-cache freerdp-gdi freerdp-codec freerdp-rail freerdp-utils) + MODULES freerdp-core freerdp-cache freerdp-gdi freerdp-codec freerdp-primitives freerdp-rail freerdp-utils) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} MODULE winpr diff --git a/client/Windows/CMakeLists.txt b/client/Windows/CMakeLists.txt index 312f75db4..92e75a532 100644 --- a/client/Windows/CMakeLists.txt +++ b/client/Windows/CMakeLists.txt @@ -46,7 +46,7 @@ set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} MODULE freerdp - MODULES freerdp-core freerdp-gdi freerdp-codec freerdp-utils) + MODULES freerdp-core freerdp-gdi freerdp-codec freerdp-primitives freerdp-utils) target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS}) diff --git a/client/Windows/wf_gdi.c b/client/Windows/wf_gdi.c index fcb681a97..41e268e90 100644 --- a/client/Windows/wf_gdi.c +++ b/client/Windows/wf_gdi.c @@ -544,6 +544,19 @@ void wf_gdi_surface_bits(rdpContext* context, SURFACE_BITS_COMMAND* surface_bits free(tile_bitmap); } +void wf_gdi_surface_frame_marker(rdpContext* context, SURFACE_FRAME_MARKER* surface_frame_marker) +{ + wfInfo* wfi; + rdpSettings* settings; + + wfi = ((wfContext*) context)->wfi; + settings = wfi->instance->settings; + if (surface_frame_marker->frameAction == SURFACECMD_FRAMEACTION_END && settings->FrameAcknowledge > 0) + { + IFCALL(wfi->instance->update->SurfaceFrameAcknowledge, context, surface_frame_marker->frameId); + } +} + void wf_gdi_register_update_callbacks(rdpUpdate* update) { rdpPrimaryUpdate* primary = update->primary; @@ -575,4 +588,5 @@ void wf_gdi_register_update_callbacks(rdpUpdate* update) primary->EllipseCB = NULL; update->SurfaceBits = wf_gdi_surface_bits; + update->SurfaceFrameMarker = wf_gdi_surface_frame_marker; } diff --git a/client/Windows/wfreerdp.c b/client/Windows/wfreerdp.c index 0c4c839af..1b0233b58 100644 --- a/client/Windows/wfreerdp.c +++ b/client/Windows/wfreerdp.c @@ -74,7 +74,9 @@ void wf_context_new(freerdp* instance, rdpContext* context) void wf_context_free(freerdp* instance, rdpContext* context) { - + if (context->cache) + cache_free(context->cache); + freerdp_channels_free(context->channels); } int wf_create_console(void) @@ -227,6 +229,8 @@ BOOL wf_pre_connect(freerdp* instance) freerdp_client_parse_rdp_file(file, settings->ConnectionFile); freerdp_client_populate_settings_from_rdp_file(file, settings); + + freerdp_client_rdp_file_free(file); } settings->OsMajorType = OSMAJORTYPE_WINDOWS; @@ -259,7 +263,7 @@ BOOL wf_pre_connect(freerdp* instance) wfi->cursor = g_default_cursor; wfi->fullscreen = settings->Fullscreen; - wfi->fs_toggle = wfi->fullscreen; + wfi->fs_toggle = 1; wfi->sw_gdi = settings->SoftwareGdi; wfi->clrconv = (HCLRCONV) malloc(sizeof(CLRCONV)); @@ -279,7 +283,7 @@ BOOL wf_pre_connect(freerdp* instance) settings->DesktopHeight = i1; } - if (wfi->fs_toggle) + if (wfi->fullscreen) { settings->DesktopWidth = GetSystemMetrics(SM_CXSCREEN); settings->DesktopHeight = GetSystemMetrics(SM_CYSCREEN); @@ -673,7 +677,6 @@ int wfreerdp_run(freerdp* instance) /* cleanup */ freerdp_channels_close(channels, instance); - freerdp_channels_free(channels); freerdp_disconnect(instance); return 0; diff --git a/client/X11/CMakeLists.txt b/client/X11/CMakeLists.txt index ad93f63cd..735ebeb00 100644 --- a/client/X11/CMakeLists.txt +++ b/client/X11/CMakeLists.txt @@ -120,10 +120,14 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} freerdp-client) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} MODULE freerdp - MODULES freerdp-core freerdp-gdi freerdp-locale freerdp-rail freerdp-utils) + MODULES freerdp-core freerdp-gdi freerdp-locale freerdp-primitives freerdp-rail freerdp-utils) target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS}) +if(WITH_IPP) + target_link_libraries(xfreerdp ${IPP_LIBRARY_LIST}) +endif() + install(TARGETS ${MODULE_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR}) set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER "Client/X11") diff --git a/client/X11/xf_gdi.c b/client/X11/xf_gdi.c index 7448577af..6f2683a2a 100644 --- a/client/X11/xf_gdi.c +++ b/client/X11/xf_gdi.c @@ -880,8 +880,11 @@ void xf_gdi_ellipse_cb(rdpContext* context, ELLIPSE_CB_ORDER* ellipse_cb) void xf_gdi_surface_frame_marker(rdpContext* context, SURFACE_FRAME_MARKER* surface_frame_marker) { - xfInfo* xfi = ((xfContext*) context)->xfi; + xfInfo* xfi; + rdpSettings* settings; + xfi = ((xfContext*) context)->xfi; + settings = xfi->instance->settings; switch (surface_frame_marker->frameAction) { case SURFACECMD_FRAMEACTION_BEGIN: @@ -906,6 +909,10 @@ void xf_gdi_surface_frame_marker(rdpContext* context, SURFACE_FRAME_MARKER* surf gdi_InvalidateRegion(xfi->hdc, xfi->frame_x1, xfi->frame_y1, xfi->frame_x2 - xfi->frame_x1, xfi->frame_y2 - xfi->frame_y1); } + if (settings->FrameAcknowledge > 0) + { + IFCALL(xfi->instance->update->SurfaceFrameAcknowledge, context, surface_frame_marker->frameId); + } break; } } diff --git a/cmake/ConfigOptions.cmake b/cmake/ConfigOptions.cmake index 7f7c09be9..c207611a5 100644 --- a/cmake/ConfigOptions.cmake +++ b/cmake/ConfigOptions.cmake @@ -9,6 +9,7 @@ endif() option(WITH_MANPAGES "Generate manpages." ON) option(WITH_PROFILER "Compile profiler." OFF) +option(WITH_IPP "Use Intel Performance Primitives." OFF) if((TARGET_ARCH MATCHES "x86|x64") AND (NOT DEFINED WITH_SSE2)) option(WITH_SSE2 "Enable SSE2 optimization." ON) diff --git a/cmake/FindIPP.cmake b/cmake/FindIPP.cmake index 22e01b979..1e06762e5 100644 --- a/cmake/FindIPP.cmake +++ b/cmake/FindIPP.cmake @@ -46,7 +46,7 @@ set(IPPVM "vm") # vector math set(IPP_X64 0) -if (CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8) +if (CMAKE_SIZEOF_VOID_P EQUAL 8) set(IPP_X64 1) endif() if (CMAKE_CL_64) @@ -67,6 +67,11 @@ function(get_ipp_version _ROOT_DIR) file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR1 REGEX "IPP_VERSION_MAJOR") file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR2 REGEX "IPP_VERSION_MINOR") file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_BUILD") + + if(NOT STR3) + file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_UPDATE") + endif() + file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR4 REGEX "IPP_VERSION_STR") # extract info and assign to variables @@ -198,16 +203,20 @@ function(set_ipp_variables _LATEST_VERSION) # set INCLUDE and LIB folders set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include PARENT_SCOPE) - if (IPP_X64) - if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64) - message(SEND_ERROR "IPP EM64T libraries not found") - endif() - set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/intel64 PARENT_SCOPE) + if(APPLE) + set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib PARENT_SCOPE) else() - if(NOT EXISTS ${IPP_ROOT_DIR}/lib/ia32) - message(SEND_ERROR "IPP IA32 libraries not found") + if(IPP_X64) + if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64) + message(SEND_ERROR "IPP EM64T libraries not found") + endif() + set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/intel64 PARENT_SCOPE) + else() + if(NOT EXISTS ${IPP_ROOT_DIR}/lib/ia32) + message(SEND_ERROR "IPP IA32 libraries not found") + endif() + set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/ia32 PARENT_SCOPE) endif() - set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/ia32 PARENT_SCOPE) endif() # set IPP_LIBRARIES variable (7.x lib names) @@ -265,7 +274,7 @@ if(NOT IPP_FOUND) # Note, if several IPP installations found the newest version will be # selected # ------------------------------------------------------------------------ - foreach(curdir ${CMAKE_SYSTEM_PREFIX_PATH}) + foreach(curdir ${CMAKE_SYSTEM_PREFIX_PATH} /opt) set(curdir ${curdir}/intel) file(TO_CMAKE_PATH ${curdir} CURDIR) @@ -336,3 +345,53 @@ if(WIN32 AND MINGW AND NOT IPP_LATEST_VERSION_MAJOR LESS 7) set(MSV_NTDLL "ntdll") set(IPP_LIBRARIES ${IPP_LIBRARIES} ${MSV_NTDLL}${IPP_LIB_SUFFIX}) endif() + +# ------------------------------------------------------------------------ +# This section will look for the IPP "compiler" dependent library +# libiomp5. +# ------------------------------------------------------------------------ +foreach(curdir ${CMAKE_SYSTEM_PREFIX_PATH} /opt) + set(curdir ${curdir}/intel) + + if(EXISTS ${curdir}) + file(GLOB_RECURSE liblist FOLLOW_SYMLINKS ${curdir}/libiomp5.*) + foreach(lib ${liblist}) + get_filename_component(libdir ${lib} REALPATH) + get_filename_component(libdir ${libdir} PATH) + + if(${IPP_VERSION_MAJOR} VERSION_LESS "7") + set(IPP_COMPILER_LIBRARY_DIRS ${libdir}) + set(IPP_COMPILER_LIBRARIES iomp5) + else() + if(APPLE) + set(IPP_COMPILER_LIBRARY_DIRS ${libdir}) + set(IPP_COMPILER_LIBRARIES iomp5) + else() + if(IPP_X64) + if(("${libdir}" MATCHES "intel64")) + set(IPP_COMPILER_LIBRARY_DIRS ${libdir}) + set(IPP_COMPILER_LIBRARIES iomp5) + endif() + else() + set(IPP_COMPILER_LIBRARY_DIRS ${libdir}) + set(IPP_COMPILER_LIBRARIES iomp5) + endif() + endif() + endif() + endforeach(lib) + endif() +endforeach(curdir) + +# ------------------------------------------------------------------------ +# Build fullpath library list. +# ------------------------------------------------------------------------ +find_library(LIB_IPPI ippi PATHS ${IPP_LIBRARY_DIRS}) +set(IPP_LIBRARY_LIST ${IPP_LIBRARY_LIST} ${LIB_IPPI}) +find_library(LIB_IPPS ipps PATHS ${IPP_LIBRARY_DIRS}) +set(IPP_LIBRARY_LIST ${IPP_LIBRARY_LIST} ${LIB_IPPS}) +find_library(LIB_IPPCORE ippcore PATHS ${IPP_LIBRARY_DIRS}) +set(IPP_LIBRARY_LIST ${IPP_LIBRARY_LIST} ${LIB_IPPCORE}) +find_library(LIB_IOMP5 iomp5 PATHS ${IPP_COMPILER_LIBRARY_DIRS}) +set(IPP_LIBRARY_LIST ${IPP_LIBRARY_LIST} ${LIB_IOMP5}) + + diff --git a/config.h.in b/config.h.in index 71b69fba2..1bb2b647f 100644 --- a/config.h.in +++ b/config.h.in @@ -36,6 +36,7 @@ #cmakedefine WITH_PROFILER #cmakedefine WITH_SSE2 #cmakedefine WITH_NEON +#cmakedefine WITH_IPP #cmakedefine WITH_NATIVE_SSPI #cmakedefine WITH_JPEG #cmakedefine WITH_WIN8 diff --git a/include/freerdp/addin.h b/include/freerdp/addin.h index 8a3dcdc63..dd0758c8e 100644 --- a/include/freerdp/addin.h +++ b/include/freerdp/addin.h @@ -48,6 +48,10 @@ typedef struct _FREERDP_ADDIN FREERDP_ADDIN; typedef void* (*FREERDP_LOAD_CHANNEL_ADDIN_ENTRY_FN)(LPCSTR pszName, LPSTR pszSubsystem, LPSTR pszType, DWORD dwFlags); +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API LPSTR freerdp_get_library_install_path(); FREERDP_API LPSTR freerdp_get_dynamic_addin_install_path(); @@ -57,5 +61,9 @@ FREERDP_API void* freerdp_load_dynamic_addin(LPCSTR pszFileName, LPCSTR pszPath, FREERDP_API void* freerdp_load_dynamic_channel_addin_entry(LPCSTR pszName, LPSTR pszSubsystem, LPSTR pszType, DWORD dwFlags); FREERDP_API void* freerdp_load_channel_addin_entry(LPCSTR pszName, LPSTR pszSubsystem, LPSTR pszType, DWORD dwFlags); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_COMMON_ADDIN_H */ diff --git a/include/freerdp/api.h b/include/freerdp/api.h index b10538a9f..de25d8d39 100644 --- a/include/freerdp/api.h +++ b/include/freerdp/api.h @@ -32,6 +32,10 @@ #define INLINE inline #endif +#ifdef _WIN32 +#define __func__ __FUNCTION__ +#endif + #if defined _WIN32 || defined __CYGWIN__ #ifdef FREERDP_EXPORTS #ifdef __GNUC__ diff --git a/include/freerdp/cache/bitmap.h b/include/freerdp/cache/bitmap.h index 4cdbc3319..6a68b8068 100644 --- a/include/freerdp/cache/bitmap.h +++ b/include/freerdp/cache/bitmap.h @@ -59,6 +59,10 @@ struct rdp_bitmap_cache rdpSettings* settings; }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API rdpBitmap* bitmap_cache_get(rdpBitmapCache* bitmap_cache, UINT32 id, UINT32 index); FREERDP_API void bitmap_cache_put(rdpBitmapCache* bitmap_cache, UINT32 id, UINT32 index, rdpBitmap* bitmap); @@ -67,4 +71,8 @@ FREERDP_API void bitmap_cache_register_callbacks(rdpUpdate* update); FREERDP_API rdpBitmapCache* bitmap_cache_new(rdpSettings* settings); FREERDP_API void bitmap_cache_free(rdpBitmapCache* bitmap_cache); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_BITMAP_V2_CACHE_H */ diff --git a/include/freerdp/cache/brush.h b/include/freerdp/cache/brush.h index 68ab5b888..93d1fd679 100644 --- a/include/freerdp/cache/brush.h +++ b/include/freerdp/cache/brush.h @@ -56,6 +56,10 @@ struct rdp_brush_cache rdpSettings* settings; }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API void* brush_cache_get(rdpBrushCache* brush, UINT32 index, UINT32* bpp); FREERDP_API void brush_cache_put(rdpBrushCache* brush, UINT32 index, void* entry, UINT32 bpp); @@ -64,4 +68,8 @@ FREERDP_API void brush_cache_register_callbacks(rdpUpdate* update); FREERDP_API rdpBrushCache* brush_cache_new(rdpSettings* settings); FREERDP_API void brush_cache_free(rdpBrushCache* brush); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_BRUSH_CACHE_H */ diff --git a/include/freerdp/cache/cache.h b/include/freerdp/cache/cache.h index ead8cd7a9..7b0dbe4a8 100644 --- a/include/freerdp/cache/cache.h +++ b/include/freerdp/cache/cache.h @@ -48,7 +48,15 @@ struct rdp_cache rdpSettings* settings; }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API rdpCache* cache_new(rdpSettings* settings); FREERDP_API void cache_free(rdpCache* cache); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_CACHE_H */ diff --git a/include/freerdp/cache/glyph.h b/include/freerdp/cache/glyph.h index a295695e2..4e193987a 100644 --- a/include/freerdp/cache/glyph.h +++ b/include/freerdp/cache/glyph.h @@ -59,6 +59,10 @@ struct rdp_glyph_cache rdpSettings* settings; }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API rdpGlyph* glyph_cache_get(rdpGlyphCache* glyph_cache, UINT32 id, UINT32 index); FREERDP_API void glyph_cache_put(rdpGlyphCache* glyph_cache, UINT32 id, UINT32 index, rdpGlyph* entry); @@ -70,4 +74,8 @@ FREERDP_API void glyph_cache_register_callbacks(rdpUpdate* update); FREERDP_API rdpGlyphCache* glyph_cache_new(rdpSettings* settings); FREERDP_API void glyph_cache_free(rdpGlyphCache* glyph); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_GLYPH_CACHE_H */ diff --git a/include/freerdp/cache/nine_grid.h b/include/freerdp/cache/nine_grid.h index 0b0c18d01..77e0f4636 100644 --- a/include/freerdp/cache/nine_grid.h +++ b/include/freerdp/cache/nine_grid.h @@ -52,6 +52,10 @@ struct rdp_nine_grid_cache rdpSettings* settings; }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API void* nine_grid_cache_get(rdpNineGridCache* nine_grid, UINT32 index); FREERDP_API void nine_grid_cache_put(rdpNineGridCache* nine_grid, UINT32 index, void* entry); @@ -60,4 +64,8 @@ FREERDP_API void nine_grid_cache_register_callbacks(rdpUpdate* update); FREERDP_API rdpNineGridCache* nine_grid_cache_new(rdpSettings* settings); FREERDP_API void nine_grid_cache_free(rdpNineGridCache* nine_grid); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_NINE_GRID_CACHE_H */ diff --git a/include/freerdp/cache/offscreen.h b/include/freerdp/cache/offscreen.h index c69042646..4da367b75 100644 --- a/include/freerdp/cache/offscreen.h +++ b/include/freerdp/cache/offscreen.h @@ -43,6 +43,10 @@ struct rdp_offscreen_cache rdpSettings* settings; }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API rdpBitmap* offscreen_cache_get(rdpOffscreenCache* offscreen_cache, UINT32 index); FREERDP_API void offscreen_cache_put(rdpOffscreenCache* offscreen_cache, UINT32 index, rdpBitmap* bitmap); FREERDP_API void offscreen_cache_delete(rdpOffscreenCache* offscreen, UINT32 index); @@ -52,4 +56,8 @@ FREERDP_API void offscreen_cache_register_callbacks(rdpUpdate* update); FREERDP_API rdpOffscreenCache* offscreen_cache_new(rdpSettings* settings); FREERDP_API void offscreen_cache_free(rdpOffscreenCache* offscreen); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_OFFSCREEN_CACHE_H */ diff --git a/include/freerdp/cache/palette.h b/include/freerdp/cache/palette.h index 88f10eaf8..6e7d890ec 100644 --- a/include/freerdp/cache/palette.h +++ b/include/freerdp/cache/palette.h @@ -46,6 +46,10 @@ struct rdp_palette_cache rdpSettings* settings; }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API void* palette_cache_get(rdpPaletteCache* palette, UINT32 index); FREERDP_API void palette_cache_put(rdpPaletteCache* palette, UINT32 index, void* entry); @@ -54,4 +58,8 @@ FREERDP_API void palette_cache_register_callbacks(rdpUpdate* update); FREERDP_API rdpPaletteCache* palette_cache_new(rdpSettings* settings); FREERDP_API void palette_cache_free(rdpPaletteCache* palette_cache); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_PALETTE_CACHE_H */ diff --git a/include/freerdp/cache/pointer.h b/include/freerdp/cache/pointer.h index f3a09fcb7..ccfeae03b 100644 --- a/include/freerdp/cache/pointer.h +++ b/include/freerdp/cache/pointer.h @@ -42,6 +42,10 @@ struct rdp_pointer_cache rdpSettings* settings; }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API rdpPointer* pointer_cache_get(rdpPointerCache* pointer_cache, UINT32 index); FREERDP_API void pointer_cache_put(rdpPointerCache* pointer_cache, UINT32 index, rdpPointer* pointer); @@ -50,4 +54,8 @@ FREERDP_API void pointer_cache_register_callbacks(rdpUpdate* update); FREERDP_API rdpPointerCache* pointer_cache_new(rdpSettings* settings); FREERDP_API void pointer_cache_free(rdpPointerCache* pointer_cache); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_POINTER_CACHE_H */ diff --git a/include/freerdp/client/channels.h b/include/freerdp/client/channels.h index 27a051308..4f58b1893 100644 --- a/include/freerdp/client/channels.h +++ b/include/freerdp/client/channels.h @@ -24,11 +24,19 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API void* freerdp_channels_client_find_static_entry(const char* name, const char* identifier); FREERDP_API void* freerdp_channels_load_static_addin_entry(LPCSTR pszName, LPSTR pszSubsystem, LPSTR pszType, DWORD dwFlags); FREERDP_API FREERDP_ADDIN** freerdp_channels_list_addins(LPSTR lpName, LPSTR lpSubsystem, LPSTR lpType, DWORD dwFlags); FREERDP_API void freerdp_channels_addin_list_free(FREERDP_ADDIN** ppAddins); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_CHANNELS_CLIENT */ diff --git a/include/freerdp/client/cmdline.h b/include/freerdp/client/cmdline.h index 0bb70b839..212266a2f 100644 --- a/include/freerdp/client/cmdline.h +++ b/include/freerdp/client/cmdline.h @@ -23,6 +23,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API int freerdp_client_parse_command_line_arguments(int argc, char** argv, rdpSettings* settings); FREERDP_API int freerdp_client_load_addins(rdpChannels* channels, rdpSettings* settings); @@ -36,4 +40,8 @@ FREERDP_API int freerdp_client_add_device_channel(rdpSettings* settings, int cou FREERDP_API int freerdp_client_add_static_channel(rdpSettings* settings, int count, char** params); FREERDP_API int freerdp_client_add_dynamic_channel(rdpSettings* settings, int count, char** params); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_CLIENT_CMDLINE_H */ diff --git a/include/freerdp/client/file.h b/include/freerdp/client/file.h index 23788d969..9ed4959e7 100644 --- a/include/freerdp/client/file.h +++ b/include/freerdp/client/file.h @@ -133,6 +133,10 @@ struct rdp_file typedef struct rdp_file rdpFile; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API BOOL freerdp_client_parse_rdp_file(rdpFile* file, char* name); FREERDP_API BOOL freerdp_client_parse_rdp_file_buffer(rdpFile* file, BYTE* buffer, size_t size); FREERDP_API BOOL freerdp_client_populate_settings_from_rdp_file(rdpFile* file, rdpSettings* settings); @@ -140,4 +144,8 @@ FREERDP_API BOOL freerdp_client_populate_settings_from_rdp_file(rdpFile* file, r FREERDP_API rdpFile* freerdp_client_rdp_file_new(); FREERDP_API void freerdp_client_rdp_file_free(rdpFile* file); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_CLIENT_RDP_FILE_H */ diff --git a/include/freerdp/codec/rfx.h b/include/freerdp/codec/rfx.h index 266cb065c..e2e9b6c85 100644 --- a/include/freerdp/codec/rfx.h +++ b/include/freerdp/codec/rfx.h @@ -101,8 +101,6 @@ struct _RFX_CONTEXT BYTE quant_idx_cr; /* routines */ - void (*decode_ycbcr_to_rgb)(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf); - void (*encode_rgb_to_ycbcr)(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf); void (*quantization_decode)(INT16* buffer, const UINT32* quantization_values); void (*quantization_encode)(INT16* buffer, const UINT32* quantization_values); void (*dwt_2d_decode)(INT16* buffer, INT16* dwt_buffer); diff --git a/include/freerdp/graphics.h b/include/freerdp/graphics.h index 747702e17..3fd9b2c0f 100644 --- a/include/freerdp/graphics.h +++ b/include/freerdp/graphics.h @@ -29,6 +29,10 @@ typedef struct rdp_glyph rdpGlyph; #include #include +#ifdef __cplusplus +extern "C" { +#endif + /* Bitmap Class */ typedef void (*pBitmap_New)(rdpContext* context, rdpBitmap* bitmap); @@ -166,4 +170,8 @@ FREERDP_API void graphics_register_glyph(rdpGraphics* graphics, rdpGlyph* glyph) FREERDP_API rdpGraphics* graphics_new(rdpContext* context); FREERDP_API void graphics_free(rdpGraphics* graphics); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_GRAPHICS_H */ diff --git a/include/freerdp/input.h b/include/freerdp/input.h index 24d0f90b7..9ea47caab 100644 --- a/include/freerdp/input.h +++ b/include/freerdp/input.h @@ -74,6 +74,10 @@ struct rdp_input UINT32 paddingB[32 - 21]; /* 21 */ }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API void freerdp_input_send_synchronize_event(rdpInput* input, UINT32 flags); FREERDP_API void freerdp_input_send_keyboard_event(rdpInput* input, UINT16 flags, UINT16 code); FREERDP_API void freerdp_input_send_keyboard_event_ex(rdpInput* input, BOOL down, UINT32 rdp_scancode); @@ -81,4 +85,8 @@ FREERDP_API void freerdp_input_send_unicode_keyboard_event(rdpInput* input, UINT FREERDP_API void freerdp_input_send_mouse_event(rdpInput* input, UINT16 flags, UINT16 x, UINT16 y); FREERDP_API void freerdp_input_send_extended_mouse_event(rdpInput* input, UINT16 flags, UINT16 x, UINT16 y); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_INPUT_H */ diff --git a/include/freerdp/peer.h b/include/freerdp/peer.h index e68708c3e..7c9058b33 100644 --- a/include/freerdp/peer.h +++ b/include/freerdp/peer.h @@ -81,10 +81,18 @@ struct rdp_freerdp_peer SEC_WINNT_AUTH_IDENTITY identity; }; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API void freerdp_peer_context_new(freerdp_peer* client); FREERDP_API void freerdp_peer_context_free(freerdp_peer* client); FREERDP_API freerdp_peer* freerdp_peer_new(int sockfd); FREERDP_API void freerdp_peer_free(freerdp_peer* client); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_PEER_H */ diff --git a/include/freerdp/primitives.h b/include/freerdp/primitives.h new file mode 100644 index 000000000..917a0159b --- /dev/null +++ b/include/freerdp/primitives.h @@ -0,0 +1,215 @@ +/* primitives.h + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. Algorithms used by + * this code may be covered by patents by HP, Microsoft, or other parties. + */ + +#ifdef __GNUC__ +# pragma once +#endif + +#ifndef __PRIMITIVES_H_INCLUDED__ +#define __PRIMITIVES_H_INCLUDED__ + +#include +#include + +typedef INT32 pstatus_t; /* match IppStatus. */ +#define PRIMITIVES_SUCCESS (0) /* match ippStsNoErr */ + +/* Simple macro for address of an x,y location in 2d 4-byte memory block */ +#define PIXMAP4_ADDR(_dst_, _x_, _y_, _span_) \ + ((void *) (((BYTE *) (_dst_)) + (((_x_) + (_y_)*(_span_)) << 2))) + +#define PRIM_X86_MMX_AVAILABLE (1U<<0) +#define PRIM_X86_3DNOW_AVAILABLE (1U<<1) +#define PRIM_X86_3DNOW_PREFETCH_AVAILABLE (1U<<2) +#define PRIM_X86_SSE_AVAILABLE (1U<<3) +#define PRIM_X86_SSE2_AVAILABLE (1U<<4) +#define PRIM_X86_SSE3_AVAILABLE (1U<<5) +#define PRIM_X86_SSSE3_AVAILABLE (1U<<6) +#define PRIM_X86_SSE41_AVAILABLE (1U<<7) +#define PRIM_X86_SSE42_AVAILABLE (1U<<8) +#define PRIM_X86_AVX_AVAILABLE (1U<<9) +#define PRIM_X86_FMA_AVAILABLE (1U<<10) +#define PRIM_X86_AVX_AES_AVAILABLE (1U<<11) +#define PRIM_X86_AVX2_AVAILABLE (1U<<12) + +#define PRIM_ARM_VFP1_AVAILABLE (1U<<0) +#define PRIM_ARM_VFP2_AVAILABLE (1U<<1) +#define PRIM_ARM_VFP3_AVAILABLE (1U<<2) +#define PRIM_ARM_VFP4_AVAILABLE (1U<<3) +#define PRIM_ARM_FPA_AVAILABLE (1U<<4) +#define PRIM_ARM_FPE_AVAILABLE (1U<<5) +#define PRIM_ARM_IWMMXT_AVAILABLE (1U<<6) +#define PRIM_ARM_NEON_AVAILABLE (1U<<7) + +/* Structures compatible with IPP */ +typedef struct +{ + INT32 width; + INT32 height; +} prim_size_t; /* like IppiSize */ + +/* Function prototypes for all of the supported primitives. */ +typedef pstatus_t (*__copy_t)( + const void *pSrc, + void *pDst, + INT32 bytes); +typedef pstatus_t (*__copy_8u_t)( + const BYTE *pSrc, + BYTE *pDst, + INT32 len); +typedef pstatus_t (*__copy_8u_AC4r_t)( + const BYTE *pSrc, + INT32 srcStep, /* bytes */ + BYTE *pDst, + INT32 dstStep, /* bytes */ + INT32 width, INT32 height); /* pixels */ +typedef pstatus_t (*__set_8u_t)( + BYTE val, + BYTE *pDst, + INT32 len); +typedef pstatus_t (*__set_32s_t)( + INT32 val, + INT32 *pDst, + INT32 len); +typedef pstatus_t (*__set_32u_t)( + UINT32 val, + UINT32 *pDst, + INT32 len); +typedef pstatus_t (*__zero_t)( + void *pDst, + size_t bytes); +typedef pstatus_t (*__alphaComp_argb_t)( + const BYTE *pSrc1, INT32 src1Step, + const BYTE *pSrc2, INT32 src2Step, + BYTE *pDst, INT32 dstStep, + INT32 width, INT32 height); +typedef pstatus_t (*__add_16s_t)( + const INT16 *pSrc1, + const INT16 *pSrc2, + INT16 *pDst, + INT32 len); +typedef pstatus_t (*__lShiftC_16s_t)( + const INT16 *pSrc, + INT32 val, + INT16 *pSrcDst, + INT32 len); +typedef pstatus_t (*__lShiftC_16u_t)( + const UINT16 *pSrc, + INT32 val, + UINT16 *pSrcDst, + INT32 len); +typedef pstatus_t (*__rShiftC_16s_t)( + const INT16 *pSrc, + INT32 val, + INT16 *pSrcDst, + INT32 len); +typedef pstatus_t (*__rShiftC_16u_t)( + const UINT16 *pSrc, + INT32 val, + UINT16 *pSrcDst, + INT32 len); +typedef pstatus_t (*__shiftC_16s_t)( + const INT16 *pSrc, + INT32 val, + INT16 *pSrcDst, + INT32 len); +typedef pstatus_t (*__shiftC_16u_t)( + const UINT16 *pSrc, + INT32 val, + UINT16 *pSrcDst, + INT32 len); +typedef pstatus_t (*__sign_16s_t)( + const INT16 *pSrc, + INT16 *pDst, + INT32 len); +typedef pstatus_t (*__yCbCrToRGB_16s16s_P3P3_t)( + const INT16 *pSrc[3], INT32 srcStep, + INT16 *pDst[3], INT32 dstStep, + const prim_size_t *roi); +typedef pstatus_t (*__RGBToYCbCr_16s16s_P3P3_t)( + const INT16 *pSrc[3], INT32 srcStep, + INT16 *pDst[3], INT32 dstStep, + const prim_size_t *roi); +typedef pstatus_t (*__RGBToRGB_16s8u_P3AC4R_t)( + const INT16 *pSrc[3], INT32 srcStep, + BYTE *pDst, INT32 dstStep, + const prim_size_t *roi); +typedef pstatus_t (*__andC_32u_t)( + const UINT32 *pSrc, + UINT32 val, + UINT32 *pDst, + INT32 len); +typedef pstatus_t (*__orC_32u_t)( + const UINT32 *pSrc, + UINT32 val, + UINT32 *pDst, + INT32 len); + +typedef struct +{ + /* Memory-to-memory copy routines */ + __copy_t copy; /* memcpy/memmove, basically */ + __copy_8u_t copy_8u; /* more strongly typed */ + __copy_8u_AC4r_t copy_8u_AC4r; /* pixel copy function */ + /* Memory setting routines */ + __set_8u_t set_8u; /* memset, basically */ + __set_32s_t set_32s; + __set_32u_t set_32u; + __zero_t zero; /* bzero or faster */ + /* Arithmetic functions */ + __add_16s_t add_16s; + /* And/or */ + __andC_32u_t andC_32u; + __orC_32u_t orC_32u; + /* Shifts */ + __lShiftC_16s_t lShiftC_16s; + __lShiftC_16u_t lShiftC_16u; + __rShiftC_16s_t rShiftC_16s; + __rShiftC_16u_t rShiftC_16u; + __shiftC_16s_t shiftC_16s; + __shiftC_16u_t shiftC_16u; + /* Alpha Composition */ + __alphaComp_argb_t alphaComp_argb; + /* Sign */ + __sign_16s_t sign_16s; + /* Color conversions */ + __yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3; + __RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3; + __RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R; + + /* internal use for CPU flags and such. */ + void *hints; +} primitives_t; + +#ifdef __cplusplus +extern "C" { +#endif + +/* Prototypes for the externally-visible entrypoints. */ +FREERDP_API void primitives_init(void); +FREERDP_API primitives_t *primitives_get(void); +FREERDP_API UINT32 primitives_get_flags( + const primitives_t *prims); +FREERDP_API void primitives_flags_str( + const primitives_t *prims, + char *str, + size_t len); +FREERDP_API void primitives_deinit(void); + +#ifdef __cplusplus +} +#endif + +#endif /* !__PRIMITIVES_H_INCLUDED__ */ diff --git a/include/freerdp/settings.h b/include/freerdp/settings.h index 87606bd56..d6ae91c07 100644 --- a/include/freerdp/settings.h +++ b/include/freerdp/settings.h @@ -970,6 +970,10 @@ struct rdp_settings }; typedef struct rdp_settings rdpSettings; +#ifdef __cplusplus +extern "C" { +#endif + FREERDP_API rdpSettings* freerdp_settings_new(void* instance); FREERDP_API void freerdp_settings_free(rdpSettings* settings); @@ -989,4 +993,8 @@ FREERDP_API void freerdp_dynamic_channel_collection_add(rdpSettings* settings, A FREERDP_API ADDIN_ARGV* freerdp_dynamic_channel_collection_find(rdpSettings* settings, const char* name); FREERDP_API void freerdp_dynamic_channel_collection_free(rdpSettings* settings); +#ifdef __cplusplus +} +#endif + #endif /* FREERDP_SETTINGS_H */ diff --git a/include/freerdp/update.h b/include/freerdp/update.h index 526f22f0d..6aa15ea30 100644 --- a/include/freerdp/update.h +++ b/include/freerdp/update.h @@ -152,6 +152,7 @@ typedef void (*pSuppressOutput)(rdpContext* context, BYTE allow, RECTANGLE_16* a typedef void (*pSurfaceCommand)(rdpContext* context, STREAM* s); typedef void (*pSurfaceBits)(rdpContext* context, SURFACE_BITS_COMMAND* surface_bits_command); typedef void (*pSurfaceFrameMarker)(rdpContext* context, SURFACE_FRAME_MARKER* surface_frame_marker); +typedef void (*pSurfaceFrameAcknowledge)(rdpContext* context, UINT32 frameId); struct rdp_update { @@ -182,7 +183,8 @@ struct rdp_update pSurfaceCommand SurfaceCommand; /* 64 */ pSurfaceBits SurfaceBits; /* 65 */ pSurfaceFrameMarker SurfaceFrameMarker; /* 66 */ - UINT32 paddingE[80 - 67]; /* 67 */ + pSurfaceFrameAcknowledge SurfaceFrameAcknowledge; /* 67 */ + UINT32 paddingE[80 - 68]; /* 68 */ /* internal */ diff --git a/libfreerdp/CMakeLists.txt b/libfreerdp/CMakeLists.txt index ed6592a27..b8b6adb28 100644 --- a/libfreerdp/CMakeLists.txt +++ b/libfreerdp/CMakeLists.txt @@ -31,6 +31,7 @@ set(${MODULE_PREFIX}_SUBMODULES codec crypto locale + primitives core) foreach(${MODULE_PREFIX}_SUBMODULE ${${MODULE_PREFIX}_SUBMODULES}) diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt index 5c6a7fe81..0276ac1aa 100644 --- a/libfreerdp/codec/CMakeLists.txt +++ b/libfreerdp/codec/CMakeLists.txt @@ -31,8 +31,6 @@ set(${MODULE_PREFIX}_SRCS rfx_dwt.h rfx_encode.c rfx_encode.h - rfx_pool.c - rfx_pool.h rfx_quantization.c rfx_quantization.h rfx_rlgr.c @@ -97,9 +95,12 @@ set(${MODULE_PREFIX}_LIBS set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} INTERNAL MODULE freerdp - MODULES freerdp-utils) - -message(STATUS "libfreerdp-codec libs: ${${MODULE_PREFIX}_LIBS}") + MODULES freerdp-primitives freerdp-utils) + +set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS + MONOLITHIC ${MONOLITHIC_BUILD} + MODULE winpr + MODULES winpr-crt winpr-pool winpr-registry winpr-utils) if(MONOLITHIC_BUILD) set(FREERDP_LIBS ${FREERDP_LIBS} ${${MODULE_PREFIX}_LIBS} PARENT_SCOPE) diff --git a/libfreerdp/codec/nsc.c b/libfreerdp/codec/nsc.c index 171995361..d20e015c3 100644 --- a/libfreerdp/codec/nsc.c +++ b/libfreerdp/codec/nsc.c @@ -247,7 +247,10 @@ void nsc_context_free(NSC_CONTEXT* context) for (i = 0; i < 4; i++) { if (context->priv->plane_buf[i]) + { free(context->priv->plane_buf[i]); + context->priv->plane_buf[i] = NULL; + } } if (context->bmpdata) free(context->bmpdata); @@ -260,14 +263,21 @@ void nsc_context_free(NSC_CONTEXT* context) free(context->priv); free(context); + context = NULL; } NSC_CONTEXT* nsc_context_new(void) { NSC_CONTEXT* nsc_context; + UINT8 i; nsc_context = (NSC_CONTEXT*) malloc(sizeof(NSC_CONTEXT)); nsc_context->priv = (NSC_CONTEXT_PRIV*) malloc(sizeof(NSC_CONTEXT_PRIV)); + for (i=0; i < 5; ++i) + { + nsc_context->priv->plane_buf[i] = NULL; + } + nsc_context->bmpdata = NULL; nsc_context->decode = nsc_decode; nsc_context->encode = nsc_encode; diff --git a/libfreerdp/codec/rfx.c b/libfreerdp/codec/rfx.c index 6acf280f8..5f0c38432 100644 --- a/libfreerdp/codec/rfx.c +++ b/libfreerdp/codec/rfx.c @@ -30,13 +30,13 @@ #endif #include +#include #include #include #include "rfx_constants.h" #include "rfx_types.h" -#include "rfx_pool.h" #include "rfx_decode.h" #include "rfx_encode.h" #include "rfx_quantization.h" @@ -79,7 +79,7 @@ static void rfx_profiler_create(RFX_CONTEXT* context) PROFILER_CREATE(context->priv->prof_rfx_differential_decode, "rfx_differential_decode"); PROFILER_CREATE(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode"); PROFILER_CREATE(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode"); - PROFILER_CREATE(context->priv->prof_rfx_decode_ycbcr_to_rgb, "rfx_decode_ycbcr_to_rgb"); + PROFILER_CREATE(context->priv->prof_rfx_ycbcr_to_rgb, "prims->yCbCrToRGB"); PROFILER_CREATE(context->priv->prof_rfx_decode_format_rgb, "rfx_decode_format_rgb"); PROFILER_CREATE(context->priv->prof_rfx_encode_rgb, "rfx_encode_rgb"); @@ -88,7 +88,7 @@ static void rfx_profiler_create(RFX_CONTEXT* context) PROFILER_CREATE(context->priv->prof_rfx_differential_encode, "rfx_differential_encode"); PROFILER_CREATE(context->priv->prof_rfx_quantization_encode, "rfx_quantization_encode"); PROFILER_CREATE(context->priv->prof_rfx_dwt_2d_encode, "rfx_dwt_2d_encode"); - PROFILER_CREATE(context->priv->prof_rfx_encode_rgb_to_ycbcr, "rfx_encode_rgb_to_ycbcr"); + PROFILER_CREATE(context->priv->prof_rfx_rgb_to_ycbcr, "prims->RGBToYCbCr"); PROFILER_CREATE(context->priv->prof_rfx_encode_format_rgb, "rfx_encode_format_rgb"); } @@ -100,7 +100,7 @@ static void rfx_profiler_free(RFX_CONTEXT* context) PROFILER_FREE(context->priv->prof_rfx_differential_decode); PROFILER_FREE(context->priv->prof_rfx_quantization_decode); PROFILER_FREE(context->priv->prof_rfx_dwt_2d_decode); - PROFILER_FREE(context->priv->prof_rfx_decode_ycbcr_to_rgb); + PROFILER_FREE(context->priv->prof_rfx_ycbcr_to_rgb); PROFILER_FREE(context->priv->prof_rfx_decode_format_rgb); PROFILER_FREE(context->priv->prof_rfx_encode_rgb); @@ -109,7 +109,7 @@ static void rfx_profiler_free(RFX_CONTEXT* context) PROFILER_FREE(context->priv->prof_rfx_differential_encode); PROFILER_FREE(context->priv->prof_rfx_quantization_encode); PROFILER_FREE(context->priv->prof_rfx_dwt_2d_encode); - PROFILER_FREE(context->priv->prof_rfx_encode_rgb_to_ycbcr); + PROFILER_FREE(context->priv->prof_rfx_rgb_to_ycbcr); PROFILER_FREE(context->priv->prof_rfx_encode_format_rgb); } @@ -123,7 +123,7 @@ static void rfx_profiler_print(RFX_CONTEXT* context) PROFILER_PRINT(context->priv->prof_rfx_differential_decode); PROFILER_PRINT(context->priv->prof_rfx_quantization_decode); PROFILER_PRINT(context->priv->prof_rfx_dwt_2d_decode); - PROFILER_PRINT(context->priv->prof_rfx_decode_ycbcr_to_rgb); + PROFILER_PRINT(context->priv->prof_rfx_ycbcr_to_rgb); PROFILER_PRINT(context->priv->prof_rfx_decode_format_rgb); PROFILER_PRINT(context->priv->prof_rfx_encode_rgb); @@ -132,7 +132,7 @@ static void rfx_profiler_print(RFX_CONTEXT* context) PROFILER_PRINT(context->priv->prof_rfx_differential_encode); PROFILER_PRINT(context->priv->prof_rfx_quantization_encode); PROFILER_PRINT(context->priv->prof_rfx_dwt_2d_encode); - PROFILER_PRINT(context->priv->prof_rfx_encode_rgb_to_ycbcr); + PROFILER_PRINT(context->priv->prof_rfx_rgb_to_ycbcr); PROFILER_PRINT(context->priv->prof_rfx_encode_format_rgb); PROFILER_PRINT_FOOTER; @@ -140,6 +140,11 @@ static void rfx_profiler_print(RFX_CONTEXT* context) RFX_CONTEXT* rfx_context_new(void) { + HKEY hKey; + LONG status; + DWORD dwType; + DWORD dwSize; + DWORD dwValue; RFX_CONTEXT* context; context = (RFX_CONTEXT*) malloc(sizeof(RFX_CONTEXT)); @@ -148,24 +153,58 @@ RFX_CONTEXT* rfx_context_new(void) context->priv = (RFX_CONTEXT_PRIV*) malloc(sizeof(RFX_CONTEXT_PRIV)); ZeroMemory(context->priv, sizeof(RFX_CONTEXT_PRIV)); - context->priv->pool = rfx_pool_new(); + context->priv->TilePool = Queue_New(TRUE, -1, -1); + context->priv->TileQueue = Queue_New(TRUE, -1, -1); + + /* + * align buffers to 16 byte boundary (needed for SSE/NEON instructions) + * + * y_r_buffer, cb_g_buffer, cr_b_buffer: 64 * 64 * 4 = 16384 (0x4000) + * dwt_buffer: 32 * 32 * 2 * 2 * 4 = 16384, maximum sub-band width is 32 + */ + + context->priv->BufferPool = BufferPool_New(TRUE, 16384, 16); + + context->priv->UseThreads = FALSE; + context->priv->MinThreadCount = 4; + context->priv->MaxThreadCount = 0; + + status = RegOpenKeyEx(HKEY_LOCAL_MACHINE, _T("Software\\FreeRDP\\RemoteFX"), 0, KEY_READ | KEY_WOW64_64KEY, &hKey); + + if (status == ERROR_SUCCESS) + { + if (RegQueryValueEx(hKey, _T("UseThreads"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS) + context->priv->UseThreads = dwValue ? 1 : 0; + + if (RegQueryValueEx(hKey, _T("MinThreadCount"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS) + context->priv->MinThreadCount = dwValue; + + if (RegQueryValueEx(hKey, _T("MaxThreadCount"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS) + context->priv->MaxThreadCount = dwValue; + + RegCloseKey(hKey); + } + + if (context->priv->UseThreads) + { + context->priv->ThreadPool = CreateThreadpool(NULL); + InitializeThreadpoolEnvironment(&context->priv->ThreadPoolEnv); + SetThreadpoolCallbackPool(&context->priv->ThreadPoolEnv, context->priv->ThreadPool); + + if (context->priv->MinThreadCount) + SetThreadpoolThreadMinimum(context->priv->ThreadPool, context->priv->MinThreadCount); + + if (context->priv->MaxThreadCount) + SetThreadpoolThreadMaximum(context->priv->ThreadPool, context->priv->MaxThreadCount); + } /* initialize the default pixel format */ rfx_context_set_pixel_format(context, RDP_PIXEL_FORMAT_B8G8R8A8); - /* align buffers to 16 byte boundary (needed for SSE/SSE2 instructions) */ - context->priv->y_r_buffer = (INT16*)(((uintptr_t)context->priv->y_r_mem + 16) & ~ 0x0F); - context->priv->cb_g_buffer = (INT16*)(((uintptr_t)context->priv->cb_g_mem + 16) & ~ 0x0F); - context->priv->cr_b_buffer = (INT16*)(((uintptr_t)context->priv->cr_b_mem + 16) & ~ 0x0F); - - context->priv->dwt_buffer = (INT16*)(((uintptr_t)context->priv->dwt_mem + 16) & ~ 0x0F); - /* create profilers for default decoding routines */ rfx_profiler_create(context); /* set up default routines */ - context->decode_ycbcr_to_rgb = rfx_decode_ycbcr_to_rgb; - context->encode_rgb_to_ycbcr = rfx_encode_rgb_to_ycbcr; context->quantization_decode = rfx_quantization_decode; context->quantization_encode = rfx_quantization_encode; context->dwt_2d_decode = rfx_dwt_2d_decode; @@ -185,11 +224,20 @@ void rfx_context_free(RFX_CONTEXT* context) { free(context->quants); - rfx_pool_free(context->priv->pool); + Queue_Free(context->priv->TilePool); + Queue_Free(context->priv->TileQueue); rfx_profiler_print(context); rfx_profiler_free(context); + if (context->priv->UseThreads) + { + CloseThreadpool(context->priv->ThreadPool); + DestroyThreadpoolEnvironment(&context->priv->ThreadPoolEnv); + } + + BufferPool_Free(context->priv->BufferPool); + free(context->priv); free(context); } @@ -197,6 +245,7 @@ void rfx_context_free(RFX_CONTEXT* context) void rfx_context_set_pixel_format(RFX_CONTEXT* context, RDP_PIXEL_FORMAT pixel_format) { context->pixel_format = pixel_format; + switch (pixel_format) { case RDP_PIXEL_FORMAT_B8G8R8A8: @@ -229,6 +278,30 @@ void rfx_context_reset(RFX_CONTEXT* context) context->frame_idx = 0; } +RFX_TILE* rfx_tile_pool_take(RFX_CONTEXT* context) +{ + RFX_TILE* tile = NULL; + + if (WaitForSingleObject(Queue_Event(context->priv->TilePool), 0) == WAIT_OBJECT_0) + tile = Queue_Dequeue(context->priv->TilePool); + + if (!tile) + { + tile = (RFX_TILE*) malloc(sizeof(RFX_TILE)); + + tile->x = tile->y = 0; + tile->data = (BYTE*) malloc(4096 * 4); /* 64x64 * 4 */ + } + + return tile; +} + +int rfx_tile_pool_return(RFX_CONTEXT* context, RFX_TILE* tile) +{ + Queue_Enqueue(context->priv->TilePool, tile); + return 0; +} + static void rfx_process_message_sync(RFX_CONTEXT* context, STREAM* s) { UINT32 magic; @@ -414,19 +487,35 @@ static void rfx_process_message_tile(RFX_CONTEXT* context, RFX_TILE* tile, STREA YLen, context->quants + (quantIdxY * 10), CbLen, context->quants + (quantIdxCb * 10), CrLen, context->quants + (quantIdxCr * 10), - tile->data); + tile->data, 64 * 4); +} + +struct _RFX_TILE_WORK_PARAM +{ + STREAM s; + RFX_TILE* tile; + RFX_CONTEXT* context; +}; +typedef struct _RFX_TILE_WORK_PARAM RFX_TILE_WORK_PARAM; + +void CALLBACK rfx_process_message_tile_work_callback(PTP_CALLBACK_INSTANCE instance, void* context, PTP_WORK work) +{ + RFX_TILE_WORK_PARAM* param = (RFX_TILE_WORK_PARAM*) context; + rfx_process_message_tile(param->context, param->tile, &(param->s)); } static void rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* message, STREAM* s) { int i; + int pos; + BYTE quant; + UINT32* quants; UINT16 subtype; UINT32 blockLen; UINT32 blockType; UINT32 tilesDataSize; - UINT32* quants; - BYTE quant; - int pos; + PTP_WORK* work_objects = NULL; + RFX_TILE_WORK_PARAM* params = NULL; stream_read_UINT16(s, subtype); /* subtype (2 bytes) must be set to CBT_TILESET (0xCAC2) */ @@ -492,7 +581,14 @@ static void rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* messa context->quants[i * 10 + 8], context->quants[i * 10 + 9]); } - message->tiles = rfx_pool_get_tiles(context->priv->pool, message->num_tiles); + message->tiles = (RFX_TILE**) malloc(sizeof(RFX_TILE*) * message->num_tiles); + ZeroMemory(message->tiles, sizeof(RFX_TILE*) * message->num_tiles); + + if (context->priv->UseThreads) + { + work_objects = (PTP_WORK*) malloc(sizeof(PTP_WORK) * message->num_tiles); + params = (RFX_TILE_WORK_PARAM*) malloc(sizeof(RFX_TILE_WORK_PARAM) * message->num_tiles); + } /* tiles */ for (i = 0; i < message->num_tiles; i++) @@ -509,10 +605,35 @@ static void rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* messa break; } - rfx_process_message_tile(context, message->tiles[i], s); + message->tiles[i] = rfx_tile_pool_take(context); + + if (context->priv->UseThreads) + { + params[i].context = context; + params[i].tile = message->tiles[i]; + CopyMemory(&(params[i].s), s, sizeof(STREAM)); + + work_objects[i] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_process_message_tile_work_callback, + (void*) ¶ms[i], &context->priv->ThreadPoolEnv); + + SubmitThreadpoolWork(work_objects[i]); + } + else + { + rfx_process_message_tile(context, message->tiles[i], s); + } stream_set_pos(s, pos); } + + if (context->priv->UseThreads) + { + for (i = 0; i < message->num_tiles; i++) + WaitForThreadpoolWorkCallbacks(work_objects[i], FALSE); + + free(work_objects); + free(params); + } } RFX_MESSAGE* rfx_process_message(RFX_CONTEXT* context, BYTE* data, UINT32 length) @@ -623,13 +744,17 @@ RFX_RECT* rfx_message_get_rect(RFX_MESSAGE* message, int index) void rfx_message_free(RFX_CONTEXT* context, RFX_MESSAGE* message) { + int i; + if (message != NULL) { free(message->rects); - if (message->tiles != NULL) + if (message->tiles) { - rfx_pool_put_tiles(context->priv->pool, message->tiles, message->num_tiles); + for (i = 0; i < message->num_tiles; i++) + rfx_tile_pool_return(context, message->tiles[i]); + free(message->tiles); } @@ -792,9 +917,9 @@ static void rfx_compose_message_tile(RFX_CONTEXT* context, STREAM* s, static void rfx_compose_message_tileset(RFX_CONTEXT* context, STREAM* s, BYTE* image_data, int width, int height, int rowstride) { + int i; int size; int start_pos, end_pos; - int i; int numQuants; const UINT32* quantVals; const UINT32* quantValsPtr; diff --git a/libfreerdp/codec/rfx_decode.c b/libfreerdp/codec/rfx_decode.c index 2d7774590..fd8eca2ea 100644 --- a/libfreerdp/codec/rfx_decode.c +++ b/libfreerdp/codec/rfx_decode.c @@ -27,6 +27,7 @@ #include #include +#include #include "rfx_types.h" #include "rfx_rlgr.h" @@ -36,49 +37,55 @@ #include "rfx_decode.h" +/* stride is bytes between rows in the output buffer. */ static void rfx_decode_format_rgb(INT16* r_buf, INT16* g_buf, INT16* b_buf, - RDP_PIXEL_FORMAT pixel_format, BYTE* dst_buf) + RDP_PIXEL_FORMAT pixel_format, BYTE* dst_buf, int stride) { + primitives_t *prims = primitives_get(); INT16* r = r_buf; INT16* g = g_buf; INT16* b = b_buf; + INT16* pSrc[3]; + static const prim_size_t roi_64x64 = { 64, 64 }; BYTE* dst = dst_buf; - int i; + int x, y; switch (pixel_format) { case RDP_PIXEL_FORMAT_B8G8R8A8: - for (i = 0; i < 4096; i++) - { - *dst++ = (BYTE) (*b++); - *dst++ = (BYTE) (*g++); - *dst++ = (BYTE) (*r++); - *dst++ = 0xFF; - } + pSrc[0] = r; pSrc[1] = g; pSrc[2] = b; + prims->RGBToRGB_16s8u_P3AC4R( + (const INT16 **) pSrc, 64*sizeof(INT16), + dst, stride, &roi_64x64); break; case RDP_PIXEL_FORMAT_R8G8B8A8: - for (i = 0; i < 4096; i++) - { - *dst++ = (BYTE) (*r++); - *dst++ = (BYTE) (*g++); - *dst++ = (BYTE) (*b++); - *dst++ = 0xFF; - } + pSrc[0] = b; pSrc[1] = g; pSrc[2] = r; + prims->RGBToRGB_16s8u_P3AC4R( + (const INT16 **) pSrc, 64*sizeof(INT16), + dst, stride, &roi_64x64); break; case RDP_PIXEL_FORMAT_B8G8R8: - for (i = 0; i < 4096; i++) + for (y=0; y<64; y++) { - *dst++ = (BYTE) (*b++); - *dst++ = (BYTE) (*g++); - *dst++ = (BYTE) (*r++); + for (x=0; x<64; x++) + { + *dst++ = (BYTE) (*b++); + *dst++ = (BYTE) (*g++); + *dst++ = (BYTE) (*r++); + } + dst += stride - (64*3); } break; case RDP_PIXEL_FORMAT_R8G8B8: - for (i = 0; i < 4096; i++) + for (y=0; y<64; y++) { - *dst++ = (BYTE) (*r++); - *dst++ = (BYTE) (*g++); - *dst++ = (BYTE) (*b++); + for (x=0; x<64; x++) + { + *dst++ = (BYTE) (*r++); + *dst++ = (BYTE) (*g++); + *dst++ = (BYTE) (*b++); + } + dst += stride - (64*3); } break; default: @@ -86,72 +93,13 @@ static void rfx_decode_format_rgb(INT16* r_buf, INT16* g_buf, INT16* b_buf, } } -#define MINMAX(_v,_l,_h) ((_v) < (_l) ? (_l) : ((_v) > (_h) ? (_h) : (_v))) - -void rfx_decode_ycbcr_to_rgb(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf) -{ - /* INT32 is used intentionally because we calculate with shifted factors! */ - INT32 y, cb, cr; - INT32 r, g, b; - int i; - - /** - * The decoded YCbCr coeffectients are represented as 11.5 fixed-point numbers: - * - * 1 sign bit + 10 integer bits + 5 fractional bits - * - * However only 7 integer bits will be actually used since the value range is [-128.0, 127.0]. - * In other words, the decoded coeffectients is scaled by << 5 when intepreted as INT16. - * It was scaled in the quantization phase, so we must scale it back here. - */ - for (i = 0; i < 4096; i++) - { - y = y_r_buf[i]; - cb = cb_g_buf[i]; - cr = cr_b_buf[i]; - -#if 0 - /** - * This is the slow floating point version kept here for reference - */ - - y = y + 4096; /* 128<<5=4096 so that we can scale the sum by >> 5 */ - - r = y + cr*1.403f; - g = y - cb*0.344f - cr*0.714f; - b = y + cb*1.770f; - - y_r_buf[i] = MINMAX(r>>5, 0, 255); - cb_g_buf[i] = MINMAX(g>>5, 0, 255); - cr_b_buf[i] = MINMAX(b>>5, 0, 255); -#else - /** - * We scale the factors by << 16 into 32-bit integers in order to avoid slower - * floating point multiplications. Since the final result needs to be scaled - * by >> 5 we will extract only the upper 11 bits (>> 21) from the final sum. - * Hence we also have to scale the other terms of the sum by << 16. - * - * R: 1.403 << 16 = 91947 - * G: 0.344 << 16 = 22544, 0.714 << 16 = 46792 - * B: 1.770 << 16 = 115998 - */ - - y = (y+4096)<<16; - - r = y + cr*91947; - g = y - cb*22544 - cr*46792; - b = y + cb*115998; - - y_r_buf[i] = MINMAX(r>>21, 0, 255); - cb_g_buf[i] = MINMAX(g>>21, 0, 255); - cr_b_buf[i] = MINMAX(b>>21, 0, 255); -#endif - } -} - static void rfx_decode_component(RFX_CONTEXT* context, const UINT32* quantization_values, const BYTE* data, int size, INT16* buffer) { + INT16* dwt_buffer; + + dwt_buffer = BufferPool_Take(context->priv->BufferPool, -1); /* dwt_buffer */ + PROFILER_ENTER(context->priv->prof_rfx_decode_component); PROFILER_ENTER(context->priv->prof_rfx_rlgr_decode); @@ -167,34 +115,50 @@ static void rfx_decode_component(RFX_CONTEXT* context, const UINT32* quantizatio PROFILER_EXIT(context->priv->prof_rfx_quantization_decode); PROFILER_ENTER(context->priv->prof_rfx_dwt_2d_decode); - context->dwt_2d_decode(buffer, context->priv->dwt_buffer); + context->dwt_2d_decode(buffer, dwt_buffer); PROFILER_EXIT(context->priv->prof_rfx_dwt_2d_decode); PROFILER_EXIT(context->priv->prof_rfx_decode_component); + + BufferPool_Return(context->priv->BufferPool, dwt_buffer); } +/* rfx_decode_ycbcr_to_rgb code now resides in the primitives library. */ + +/* stride is bytes between rows in the output buffer. */ void rfx_decode_rgb(RFX_CONTEXT* context, STREAM* data_in, - int y_size, const UINT32 * y_quants, - int cb_size, const UINT32 * cb_quants, - int cr_size, const UINT32 * cr_quants, BYTE* rgb_buffer) + int y_size, const UINT32* y_quants, + int cb_size, const UINT32* cb_quants, + int cr_size, const UINT32* cr_quants, BYTE* rgb_buffer, int stride) { + INT16* pSrcDst[3]; + static const prim_size_t roi_64x64 = { 64, 64 }; + const primitives_t *prims = primitives_get(); + PROFILER_ENTER(context->priv->prof_rfx_decode_rgb); - rfx_decode_component(context, y_quants, stream_get_tail(data_in), y_size, context->priv->y_r_buffer); /* YData */ + pSrcDst[0] = BufferPool_Take(context->priv->BufferPool, -1); /* y_r_buffer */ + pSrcDst[1] = BufferPool_Take(context->priv->BufferPool, -1); /* cb_g_buffer */ + pSrcDst[2] = BufferPool_Take(context->priv->BufferPool, -1); /* cr_b_buffer */ + + rfx_decode_component(context, y_quants, stream_get_tail(data_in), y_size, pSrcDst[0]); /* YData */ stream_seek(data_in, y_size); - rfx_decode_component(context, cb_quants, stream_get_tail(data_in), cb_size, context->priv->cb_g_buffer); /* CbData */ + rfx_decode_component(context, cb_quants, stream_get_tail(data_in), cb_size, pSrcDst[1]); /* CbData */ stream_seek(data_in, cb_size); - rfx_decode_component(context, cr_quants, stream_get_tail(data_in), cr_size, context->priv->cr_b_buffer); /* CrData */ + rfx_decode_component(context, cr_quants, stream_get_tail(data_in), cr_size, pSrcDst[2]); /* CrData */ stream_seek(data_in, cr_size); - PROFILER_ENTER(context->priv->prof_rfx_decode_ycbcr_to_rgb); - context->decode_ycbcr_to_rgb(context->priv->y_r_buffer, context->priv->cb_g_buffer, context->priv->cr_b_buffer); - PROFILER_EXIT(context->priv->prof_rfx_decode_ycbcr_to_rgb); + prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16), + pSrcDst, 64 * sizeof(INT16), &roi_64x64); PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb); - rfx_decode_format_rgb(context->priv->y_r_buffer, context->priv->cb_g_buffer, context->priv->cr_b_buffer, - context->pixel_format, rgb_buffer); + rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2], + context->pixel_format, rgb_buffer, stride); PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb); PROFILER_EXIT(context->priv->prof_rfx_decode_rgb); + + BufferPool_Return(context->priv->BufferPool, pSrcDst[0]); + BufferPool_Return(context->priv->BufferPool, pSrcDst[1]); + BufferPool_Return(context->priv->BufferPool, pSrcDst[2]); } diff --git a/libfreerdp/codec/rfx_decode.h b/libfreerdp/codec/rfx_decode.h index 7a19b7bb2..e96eb66e2 100644 --- a/libfreerdp/codec/rfx_decode.h +++ b/libfreerdp/codec/rfx_decode.h @@ -22,12 +22,12 @@ #include -void rfx_decode_ycbcr_to_rgb(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf); - +/* stride is bytes between rows in the output buffer. */ void rfx_decode_rgb(RFX_CONTEXT* context, STREAM* data_in, int y_size, const UINT32 * y_quants, int cb_size, const UINT32 * cb_quants, - int cr_size, const UINT32 * cr_quants, BYTE* rgb_buffer); + int cr_size, const UINT32 * cr_quants, BYTE* rgb_buffer, + int stride); #endif /* __RFX_DECODE_H */ diff --git a/libfreerdp/codec/rfx_encode.c b/libfreerdp/codec/rfx_encode.c index 8e511eac0..b7ebe9d9d 100644 --- a/libfreerdp/codec/rfx_encode.c +++ b/libfreerdp/codec/rfx_encode.c @@ -26,6 +26,11 @@ #include #include +#include +#include + +#include + #include "rfx_types.h" #include "rfx_rlgr.h" #include "rfx_differential.h" @@ -180,55 +185,19 @@ static void rfx_encode_format_rgb(const BYTE* rgb_data, int width, int height, i } } -void rfx_encode_rgb_to_ycbcr(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf) -{ - /* INT32 is used intentionally because we calculate with shifted factors! */ - int i; - INT32 r, g, b; - INT32 y, cb, cr; - - /** - * The encoded YCbCr coefficients are represented as 11.5 fixed-point numbers: - * - * 1 sign bit + 10 integer bits + 5 fractional bits - * - * However only 7 integer bits will be actually used since the value range is [-128.0, 127.0]. - * In other words, the encoded coefficients is scaled by << 5 when interpreted as INT16. - * It will be scaled down to original during the quantization phase. - */ - for (i = 0; i < 4096; i++) - { - r = y_r_buf[i]; - g = cb_g_buf[i]; - b = cr_b_buf[i]; - - /* - * We scale the factors by << 15 into 32-bit integers in order to avoid slower - * floating point multiplications. Since the terms need to be scaled by << 5 we - * simply scale the final sum by >> 10 - * - * Y: 0.299000 << 15 = 9798, 0.587000 << 15 = 19235, 0.114000 << 15 = 3735 - * Cb: 0.168935 << 15 = 5535, 0.331665 << 15 = 10868, 0.500590 << 15 = 16403 - * Cr: 0.499813 << 15 = 16377, 0.418531 << 15 = 13714, 0.081282 << 15 = 2663 - */ - - y = (r * 9798 + g * 19235 + b * 3735) >> 10; - cb = (r * -5535 + g * -10868 + b * 16403) >> 10; - cr = (r * 16377 + g * -13714 + b * -2663) >> 10; - - y_r_buf[i] = MINMAX(y - 4096, -4096, 4095); - cb_g_buf[i] = MINMAX(cb, -4096, 4095); - cr_b_buf[i] = MINMAX(cr, -4096, 4095); - } -} +/* rfx_encode_rgb_to_ycbcr code now resides in the primitives library. */ static void rfx_encode_component(RFX_CONTEXT* context, const UINT32* quantization_values, INT16* data, BYTE* buffer, int buffer_size, int* size) { + INT16* dwt_buffer; + + dwt_buffer = BufferPool_Take(context->priv->BufferPool, -1); /* dwt_buffer */ + PROFILER_ENTER(context->priv->prof_rfx_encode_component); PROFILER_ENTER(context->priv->prof_rfx_dwt_2d_encode); - context->dwt_2d_encode(data, context->priv->dwt_buffer); + context->dwt_2d_encode(data, dwt_buffer); PROFILER_EXIT(context->priv->prof_rfx_dwt_2d_encode); PROFILER_ENTER(context->priv->prof_rfx_quantization_encode); @@ -244,42 +213,54 @@ static void rfx_encode_component(RFX_CONTEXT* context, const UINT32* quantizatio PROFILER_EXIT(context->priv->prof_rfx_rlgr_encode); PROFILER_EXIT(context->priv->prof_rfx_encode_component); + + BufferPool_Return(context->priv->BufferPool, dwt_buffer); } void rfx_encode_rgb(RFX_CONTEXT* context, const BYTE* rgb_data, int width, int height, int rowstride, const UINT32* y_quants, const UINT32* cb_quants, const UINT32* cr_quants, STREAM* data_out, int* y_size, int* cb_size, int* cr_size) { - INT16* y_r_buffer = context->priv->y_r_buffer; - INT16* cb_g_buffer = context->priv->cb_g_buffer; - INT16* cr_b_buffer = context->priv->cr_b_buffer; + INT16* pSrcDst[3]; + primitives_t* prims = primitives_get(); + static const prim_size_t roi_64x64 = { 64, 64 }; + + pSrcDst[0] = BufferPool_Take(context->priv->BufferPool, -1); /* y_r_buffer */ + pSrcDst[1] = BufferPool_Take(context->priv->BufferPool, -1); /* cb_g_buffer */ + pSrcDst[2] = BufferPool_Take(context->priv->BufferPool, -1); /* cr_b_buffer */ PROFILER_ENTER(context->priv->prof_rfx_encode_rgb); PROFILER_ENTER(context->priv->prof_rfx_encode_format_rgb); rfx_encode_format_rgb(rgb_data, width, height, rowstride, - context->pixel_format, context->palette, y_r_buffer, cb_g_buffer, cr_b_buffer); + context->pixel_format, context->palette, pSrcDst[0], pSrcDst[1], pSrcDst[2]); PROFILER_EXIT(context->priv->prof_rfx_encode_format_rgb); - PROFILER_ENTER(context->priv->prof_rfx_encode_rgb_to_ycbcr); - context->encode_rgb_to_ycbcr(context->priv->y_r_buffer, context->priv->cb_g_buffer, context->priv->cr_b_buffer); - PROFILER_EXIT(context->priv->prof_rfx_encode_rgb_to_ycbcr); + PROFILER_ENTER(context->priv->prof_rfx_rgb_to_ycbcr); + prims->RGBToYCbCr_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16), + pSrcDst, 64 * sizeof(INT16), &roi_64x64); + PROFILER_EXIT(context->priv->prof_rfx_rgb_to_ycbcr); /* Ensure the buffer is reasonably large enough */ stream_check_size(data_out, 4096); - rfx_encode_component(context, y_quants, context->priv->y_r_buffer, + + rfx_encode_component(context, y_quants, pSrcDst[0], stream_get_tail(data_out), stream_get_left(data_out), y_size); stream_seek(data_out, *y_size); stream_check_size(data_out, 4096); - rfx_encode_component(context, cb_quants, context->priv->cb_g_buffer, + rfx_encode_component(context, cb_quants, pSrcDst[1], stream_get_tail(data_out), stream_get_left(data_out), cb_size); stream_seek(data_out, *cb_size); stream_check_size(data_out, 4096); - rfx_encode_component(context, cr_quants, context->priv->cr_b_buffer, + rfx_encode_component(context, cr_quants, pSrcDst[2], stream_get_tail(data_out), stream_get_left(data_out), cr_size); stream_seek(data_out, *cr_size); PROFILER_EXIT(context->priv->prof_rfx_encode_rgb); + + BufferPool_Return(context->priv->BufferPool, pSrcDst[0]); + BufferPool_Return(context->priv->BufferPool, pSrcDst[1]); + BufferPool_Return(context->priv->BufferPool, pSrcDst[2]); } diff --git a/libfreerdp/codec/rfx_encode.h b/libfreerdp/codec/rfx_encode.h index 56220e838..94dfc706d 100644 --- a/libfreerdp/codec/rfx_encode.h +++ b/libfreerdp/codec/rfx_encode.h @@ -22,8 +22,6 @@ #include -void rfx_encode_rgb_to_ycbcr(INT16* y_r_buf, INT16* cb_g_buf, INT16* cr_b_buf); - void rfx_encode_rgb(RFX_CONTEXT* context, const BYTE* rgb_data, int width, int height, int rowstride, const UINT32* y_quants, const UINT32* cb_quants, const UINT32* cr_quants, STREAM* data_out, int* y_size, int* cb_size, int* cr_size); diff --git a/libfreerdp/codec/rfx_neon.c b/libfreerdp/codec/rfx_neon.c index 11965e4b1..7331ea18b 100644 --- a/libfreerdp/codec/rfx_neon.c +++ b/libfreerdp/codec/rfx_neon.c @@ -35,56 +35,7 @@ #include "cpu-features.h" #endif -void rfx_decode_YCbCr_to_RGB_NEON(INT16 * y_r_buffer, INT16 * cb_g_buffer, INT16 * cr_b_buffer) -{ - int16x8_t zero = vdupq_n_s16(0); - int16x8_t max = vdupq_n_s16(255); - int16x8_t y_add = vdupq_n_s16(128); - - int16x8_t* y_r_buf = (int16x8_t*) y_r_buffer; - int16x8_t* cb_g_buf = (int16x8_t*) cb_g_buffer; - int16x8_t* cr_b_buf = (int16x8_t*) cr_b_buffer; - - int i; - for (i = 0; i < 4096 / 8; i++) - { - int16x8_t y = vld1q_s16((INT16*) &y_r_buf[i]); - y = vaddq_s16(y, y_add); - - int16x8_t cr = vld1q_s16((INT16*) &cr_b_buf[i]); - - // r = between((y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5)), 0, 255); - int16x8_t r = vaddq_s16(y, cr); - r = vaddq_s16(r, vshrq_n_s16(cr, 2)); - r = vaddq_s16(r, vshrq_n_s16(cr, 3)); - r = vaddq_s16(r, vshrq_n_s16(cr, 5)); - r = vminq_s16(vmaxq_s16(r, zero), max); - vst1q_s16((INT16*)&y_r_buf[i], r); - - // cb = cb_g_buf[i]; - int16x8_t cb = vld1q_s16((INT16*)&cb_g_buf[i]); - - // g = between(y - (cb >> 2) - (cb >> 4) - (cb >> 5) - (cr >> 1) - (cr >> 3) - (cr >> 4) - (cr >> 5), 0, 255); - int16x8_t g = vsubq_s16(y, vshrq_n_s16(cb, 2)); - g = vsubq_s16(g, vshrq_n_s16(cb, 4)); - g = vsubq_s16(g, vshrq_n_s16(cb, 5)); - g = vsubq_s16(g, vshrq_n_s16(cr, 1)); - g = vsubq_s16(g, vshrq_n_s16(cr, 3)); - g = vsubq_s16(g, vshrq_n_s16(cr, 4)); - g = vsubq_s16(g, vshrq_n_s16(cr, 5)); - g = vminq_s16(vmaxq_s16(g, zero), max); - vst1q_s16((INT16*)&cb_g_buf[i], g); - - // b = between((y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6)), 0, 255); - int16x8_t b = vaddq_s16(y, cb); - b = vaddq_s16(b, vshrq_n_s16(cb, 1)); - b = vaddq_s16(b, vshrq_n_s16(cb, 2)); - b = vaddq_s16(b, vshrq_n_s16(cb, 6)); - b = vminq_s16(vmaxq_s16(b, zero), max); - vst1q_s16((INT16*)&cr_b_buf[i], b); - } - -} +/* rfx_decode_YCbCr_to_RGB_NEON code now resides in the primitives library. */ static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) rfx_quantization_decode_block_NEON(INT16 * buffer, const int buffer_size, const UINT32 factor) @@ -338,11 +289,10 @@ void rfx_init_neon(RFX_CONTEXT * context) { DEBUG_RFX("Using NEON optimizations"); - IF_PROFILER(context->priv->prof_rfx_decode_ycbcr_to_rgb->name = "rfx_decode_YCbCr_to_RGB_NEON"); + IF_PROFILER(context->priv->prof_rfx_ycbcr_to_rgb->name = "rfx_decode_YCbCr_to_RGB_NEON"); IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_NEON"); IF_PROFILER(context->priv->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_NEON"); - context->decode_ycbcr_to_rgb = rfx_decode_YCbCr_to_RGB_NEON; context->quantization_decode = rfx_quantization_decode_NEON; context->dwt_2d_decode = rfx_dwt_2d_decode_NEON; } diff --git a/libfreerdp/codec/rfx_pool.c b/libfreerdp/codec/rfx_pool.c deleted file mode 100644 index b7d630ccf..000000000 --- a/libfreerdp/codec/rfx_pool.c +++ /dev/null @@ -1,121 +0,0 @@ -/** - * FreeRDP: A Remote Desktop Protocol Implementation - * RemoteFX Codec Library - Memory Pool - * - * Copyright 2011 Marc-Andre Moreau - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include - -#include - -#include "rfx_pool.h" - -RFX_POOL* rfx_pool_new() -{ - RFX_POOL* pool; - - pool = (RFX_POOL*) malloc(sizeof(RFX_POOL)); - ZeroMemory(pool, sizeof(RFX_POOL)); - - pool->size = 64; - pool->tiles = (RFX_TILE**) malloc(sizeof(RFX_TILE*) * pool->size); - ZeroMemory(pool->tiles, sizeof(RFX_TILE*) * pool->size); - - return pool; -} - -void rfx_pool_free(RFX_POOL* pool) -{ - int i; - RFX_TILE* tile; - - for (i = 0; i < pool->count; i++) - { - tile = pool->tiles[i]; - - if (tile != NULL) - { - if (tile->data != NULL) - free(tile->data); - - free(tile); - } - } - - free(pool->tiles); - free(pool); -} - -void rfx_pool_put_tile(RFX_POOL* pool, RFX_TILE* tile) -{ - if (pool->count >= pool->size) - { - pool->size *= 2; - pool->tiles = (RFX_TILE**) realloc((void*) pool->tiles, sizeof(RFX_TILE*) * pool->size); - } - - pool->tiles[(pool->count)++] = tile; -} - -RFX_TILE* rfx_pool_get_tile(RFX_POOL* pool) -{ - RFX_TILE* tile; - - if (pool->count < 1) - { - tile = (RFX_TILE*) malloc(sizeof(RFX_TILE)); - ZeroMemory(tile, sizeof(RFX_TILE)); - - tile->data = (BYTE*) malloc(4096 * 4); /* 64x64 * 4 */ - } - else - { - tile = pool->tiles[--(pool->count)]; - } - - return tile; -} - -void rfx_pool_put_tiles(RFX_POOL* pool, RFX_TILE** tiles, int count) -{ - int i; - - for (i = 0; i < count; i++) - { - rfx_pool_put_tile(pool, tiles[i]); - } -} - -RFX_TILE** rfx_pool_get_tiles(RFX_POOL* pool, int count) -{ - int i; - RFX_TILE** tiles; - - tiles = (RFX_TILE**) malloc(sizeof(RFX_TILE*) * count); - - for (i = 0; i < count; i++) - { - tiles[i] = rfx_pool_get_tile(pool); - } - - return tiles; -} diff --git a/libfreerdp/codec/rfx_pool.h b/libfreerdp/codec/rfx_pool.h deleted file mode 100644 index 787a32627..000000000 --- a/libfreerdp/codec/rfx_pool.h +++ /dev/null @@ -1,40 +0,0 @@ -/** - * FreeRDP: A Remote Desktop Protocol Implementation - * RemoteFX Codec Library - Memory Pool - * - * Copyright 2011 Marc-Andre Moreau - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __RFX_POOL_H -#define __RFX_POOL_H - -#include - -struct _RFX_POOL -{ - int size; - int count; - RFX_TILE** tiles; -}; -typedef struct _RFX_POOL RFX_POOL; - -RFX_POOL* rfx_pool_new(); -void rfx_pool_free(RFX_POOL* pool); -void rfx_pool_put_tile(RFX_POOL* pool, RFX_TILE* tile); -RFX_TILE* rfx_pool_get_tile(RFX_POOL* pool); -void rfx_pool_put_tiles(RFX_POOL* pool, RFX_TILE** tiles, int count); -RFX_TILE** rfx_pool_get_tiles(RFX_POOL* pool, int count); - -#endif /* __RFX_POOL_H */ diff --git a/libfreerdp/codec/rfx_quantization.c b/libfreerdp/codec/rfx_quantization.c index a25a8dabe..2cedfd91d 100644 --- a/libfreerdp/codec/rfx_quantization.c +++ b/libfreerdp/codec/rfx_quantization.c @@ -21,36 +21,34 @@ #include "config.h" #endif +#include #include "rfx_quantization.h" -static void rfx_quantization_decode_block(INT16* buffer, int buffer_size, UINT32 factor) +static void rfx_quantization_decode_block(const primitives_t *prims, INT16* buffer, int buffer_size, UINT32 factor) { - INT16* dst; - if (factor == 0) return; - for (dst = buffer; buffer_size > 0; dst++, buffer_size--) - { - *dst <<= factor; - } + prims->lShiftC_16s(buffer, factor, buffer, buffer_size); } void rfx_quantization_decode(INT16* buffer, const UINT32* quantization_values) { - /* Scale the values so that they are represented as 11.5 fixed-point number */ - rfx_quantization_decode_block(buffer, 4096, 5); + const primitives_t *prims = primitives_get(); - rfx_quantization_decode_block(buffer, 1024, quantization_values[8] - 6); /* HL1 */ - rfx_quantization_decode_block(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */ - rfx_quantization_decode_block(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */ - rfx_quantization_decode_block(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */ - rfx_quantization_decode_block(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */ - rfx_quantization_decode_block(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */ - rfx_quantization_decode_block(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */ - rfx_quantization_decode_block(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */ - rfx_quantization_decode_block(buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */ - rfx_quantization_decode_block(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */ + /* Scale the values so that they are represented as 11.5 fixed-point number */ + rfx_quantization_decode_block(prims, buffer, 4096, 5); + + rfx_quantization_decode_block(prims, buffer, 1024, quantization_values[8] - 6); /* HL1 */ + rfx_quantization_decode_block(prims, buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */ + rfx_quantization_decode_block(prims, buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */ + rfx_quantization_decode_block(prims, buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */ + rfx_quantization_decode_block(prims, buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */ + rfx_quantization_decode_block(prims, buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */ + rfx_quantization_decode_block(prims, buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */ + rfx_quantization_decode_block(prims, buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */ + rfx_quantization_decode_block(prims, buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */ + rfx_quantization_decode_block(prims, buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */ } static void rfx_quantization_encode_block(INT16* buffer, int buffer_size, UINT32 factor) @@ -62,6 +60,7 @@ static void rfx_quantization_encode_block(INT16* buffer, int buffer_size, UINT32 return; half = (1 << (factor - 1)); + /* Could probably use prims->rShiftC_16s(dst+half, factor, dst, buffer_size); */ for (dst = buffer; buffer_size > 0; dst++, buffer_size--) { *dst = (*dst + half) >> factor; diff --git a/libfreerdp/codec/rfx_sse2.c b/libfreerdp/codec/rfx_sse2.c index ac2376d28..72b0570a1 100644 --- a/libfreerdp/codec/rfx_sse2.c +++ b/libfreerdp/codec/rfx_sse2.c @@ -52,177 +52,8 @@ _mm_prefetch_buffer(char * buffer, int num_bytes) } } -static void rfx_decode_ycbcr_to_rgb_sse2(INT16* y_r_buffer, INT16* cb_g_buffer, INT16* cr_b_buffer) -{ - __m128i zero = _mm_setzero_si128(); - __m128i max = _mm_set1_epi16(255); - - __m128i* y_r_buf = (__m128i*) y_r_buffer; - __m128i* cb_g_buf = (__m128i*) cb_g_buffer; - __m128i* cr_b_buf = (__m128i*) cr_b_buffer; - - __m128i y; - __m128i cr; - __m128i cb; - __m128i r; - __m128i g; - __m128i b; - - int i; - - __m128i r_cr = _mm_set1_epi16(22986); // 1.403 << 14 - __m128i g_cb = _mm_set1_epi16(-5636); // -0.344 << 14 - __m128i g_cr = _mm_set1_epi16(-11698); // -0.714 << 14 - __m128i b_cb = _mm_set1_epi16(28999); // 1.770 << 14 - __m128i c4096 = _mm_set1_epi16(4096); - - for (i = 0; i < (4096 * sizeof(INT16) / sizeof(__m128i)); i += (CACHE_LINE_BYTES / sizeof(__m128i))) - { - _mm_prefetch((char*)(&y_r_buf[i]), _MM_HINT_NTA); - _mm_prefetch((char*)(&cb_g_buf[i]), _MM_HINT_NTA); - _mm_prefetch((char*)(&cr_b_buf[i]), _MM_HINT_NTA); - } - for (i = 0; i < (4096 * sizeof(INT16) / sizeof(__m128i)); i++) - { - /* - In order to use SSE2 signed 16-bit integer multiplication we need to convert - the floating point factors to signed int without loosing information. - The result of this multiplication is 32 bit and we have two SSE instructions - that return either the hi or lo word. - Thus we will multiply the factors by the highest possible 2^n, take the - upper 16 bits of the signed 32-bit result (_mm_mulhi_epi16) and correct this - result by multiplying it by 2^(16-n). - For the given factors in the conversion matrix the best possible n is 14. - - Example for calculating r: - r = (y>>5) + 128 + (cr*1.403)>>5 // our base formula - r = (y>>5) + 128 + (HIWORD(cr*(1.403<<14)<<2))>>5 // see above - r = (y+4096)>>5 + (HIWORD(cr*22986)<<2)>>5 // simplification - r = ((y+4096)>>2 + HIWORD(cr*22986)) >> 3 - */ - - /* y = (y_r_buf[i] + 4096) >> 2 */ - y = _mm_load_si128(&y_r_buf[i]); - y = _mm_add_epi16(y, c4096); - y = _mm_srai_epi16(y, 2); - /* cb = cb_g_buf[i]; */ - cb = _mm_load_si128(&cb_g_buf[i]); - /* cr = cr_b_buf[i]; */ - cr = _mm_load_si128(&cr_b_buf[i]); - - /* (y + HIWORD(cr*22986)) >> 3 */ - r = _mm_add_epi16(y, _mm_mulhi_epi16(cr, r_cr)); - r = _mm_srai_epi16(r, 3); - /* y_r_buf[i] = MINMAX(r, 0, 255); */ - _mm_between_epi16(r, zero, max); - _mm_store_si128(&y_r_buf[i], r); - - /* (y + HIWORD(cb*-5636) + HIWORD(cr*-11698)) >> 3 */ - g = _mm_add_epi16(y, _mm_mulhi_epi16(cb, g_cb)); - g = _mm_add_epi16(g, _mm_mulhi_epi16(cr, g_cr)); - g = _mm_srai_epi16(g, 3); - /* cb_g_buf[i] = MINMAX(g, 0, 255); */ - _mm_between_epi16(g, zero, max); - _mm_store_si128(&cb_g_buf[i], g); - - /* (y + HIWORD(cb*28999)) >> 3 */ - b = _mm_add_epi16(y, _mm_mulhi_epi16(cb, b_cb)); - b = _mm_srai_epi16(b, 3); - /* cr_b_buf[i] = MINMAX(b, 0, 255); */ - _mm_between_epi16(b, zero, max); - _mm_store_si128(&cr_b_buf[i], b); - } -} - -/* The encodec YCbCr coeffectients are represented as 11.5 fixed-point numbers. See rfx_encode.c */ -static void rfx_encode_rgb_to_ycbcr_sse2(INT16* y_r_buffer, INT16* cb_g_buffer, INT16* cr_b_buffer) -{ - __m128i min = _mm_set1_epi16(-128 << 5); - __m128i max = _mm_set1_epi16(127 << 5); - - __m128i* y_r_buf = (__m128i*) y_r_buffer; - __m128i* cb_g_buf = (__m128i*) cb_g_buffer; - __m128i* cr_b_buf = (__m128i*) cr_b_buffer; - - __m128i y; - __m128i cr; - __m128i cb; - __m128i r; - __m128i g; - __m128i b; - - __m128i y_r = _mm_set1_epi16(9798); // 0.299000 << 15 - __m128i y_g = _mm_set1_epi16(19235); // 0.587000 << 15 - __m128i y_b = _mm_set1_epi16(3735); // 0.114000 << 15 - __m128i cb_r = _mm_set1_epi16(-5535); // -0.168935 << 15 - __m128i cb_g = _mm_set1_epi16(-10868); // -0.331665 << 15 - __m128i cb_b = _mm_set1_epi16(16403); // 0.500590 << 15 - __m128i cr_r = _mm_set1_epi16(16377); // 0.499813 << 15 - __m128i cr_g = _mm_set1_epi16(-13714); // -0.418531 << 15 - __m128i cr_b = _mm_set1_epi16(-2663); // -0.081282 << 15 - - int i; - - for (i = 0; i < (4096 * sizeof(INT16) / sizeof(__m128i)); i += (CACHE_LINE_BYTES / sizeof(__m128i))) - { - _mm_prefetch((char*)(&y_r_buf[i]), _MM_HINT_NTA); - _mm_prefetch((char*)(&cb_g_buf[i]), _MM_HINT_NTA); - _mm_prefetch((char*)(&cr_b_buf[i]), _MM_HINT_NTA); - } - for (i = 0; i < (4096 * sizeof(INT16) / sizeof(__m128i)); i++) - { - /* - In order to use SSE2 signed 16-bit integer multiplication we need to convert - the floating point factors to signed int without loosing information. - The result of this multiplication is 32 bit and using SSE2 we get either the - product's hi or lo word. - Thus we will multiply the factors by the highest possible 2^n and take the - upper 16 bits of the signed 32-bit result (_mm_mulhi_epi16). - Since the final result needs to be scaled by << 5 and also in in order to keep - the precision within the upper 16 bits we will also have to scale the RGB - values used in the multiplication by << 5+(16-n). - */ - - /* r = y_r_buf[i]; */ - r = _mm_load_si128(&y_r_buf[i]); - - /* g = cb_g_buf[i]; */ - g = _mm_load_si128(&cb_g_buf[i]); - - /* b = cr_b_buf[i]; */ - b = _mm_load_si128(&cr_b_buf[i]); - - /* r<<6; g<<6; b<<6 */ - r = _mm_slli_epi16(r, 6); - g = _mm_slli_epi16(g, 6); - b = _mm_slli_epi16(b, 6); - - /* y = HIWORD(r*y_r) + HIWORD(g*y_g) + HIWORD(b*y_b) + min */ - y = _mm_mulhi_epi16(r, y_r); - y = _mm_add_epi16(y, _mm_mulhi_epi16(g, y_g)); - y = _mm_add_epi16(y, _mm_mulhi_epi16(b, y_b)); - y = _mm_add_epi16(y, min); - /* y_r_buf[i] = MINMAX(y, 0, (255 << 5)) - (128 << 5); */ - _mm_between_epi16(y, min, max); - _mm_store_si128(&y_r_buf[i], y); - - /* cb = HIWORD(r*cb_r) + HIWORD(g*cb_g) + HIWORD(b*cb_b) */ - cb = _mm_mulhi_epi16(r, cb_r); - cb = _mm_add_epi16(cb, _mm_mulhi_epi16(g, cb_g)); - cb = _mm_add_epi16(cb, _mm_mulhi_epi16(b, cb_b)); - /* cb_g_buf[i] = MINMAX(cb, (-128 << 5), (127 << 5)); */ - _mm_between_epi16(cb, min, max); - _mm_store_si128(&cb_g_buf[i], cb); - - /* cr = HIWORD(r*cr_r) + HIWORD(g*cr_g) + HIWORD(b*cr_b) */ - cr = _mm_mulhi_epi16(r, cr_r); - cr = _mm_add_epi16(cr, _mm_mulhi_epi16(g, cr_g)); - cr = _mm_add_epi16(cr, _mm_mulhi_epi16(b, cr_b)); - /* cr_b_buf[i] = MINMAX(cr, (-128 << 5), (127 << 5)); */ - _mm_between_epi16(cr, min, max); - _mm_store_si128(&cr_b_buf[i], cr); - } -} +/* rfx_decode_ycbcr_to_rgb_sse2 code now resides in the primitives library. */ +/* rfx_encode_rgb_to_ycbcr_sse2 code now resides in the primitives library. */ static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) rfx_quantization_decode_block_sse2(INT16* buffer, const int buffer_size, const UINT32 factor) @@ -658,15 +489,11 @@ void rfx_init_sse2(RFX_CONTEXT* context) { DEBUG_RFX("Using SSE2 optimizations"); - IF_PROFILER(context->priv->prof_rfx_decode_ycbcr_to_rgb->name = "rfx_decode_ycbcr_to_rgb_sse2"); - IF_PROFILER(context->priv->prof_rfx_encode_rgb_to_ycbcr->name = "rfx_encode_rgb_to_ycbcr_sse2"); IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_sse2"); IF_PROFILER(context->priv->prof_rfx_quantization_encode->name = "rfx_quantization_encode_sse2"); IF_PROFILER(context->priv->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_sse2"); IF_PROFILER(context->priv->prof_rfx_dwt_2d_encode->name = "rfx_dwt_2d_encode_sse2"); - context->decode_ycbcr_to_rgb = rfx_decode_ycbcr_to_rgb_sse2; - context->encode_rgb_to_ycbcr = rfx_encode_rgb_to_ycbcr_sse2; context->quantization_decode = rfx_quantization_decode_sse2; context->quantization_encode = rfx_quantization_encode_sse2; context->dwt_2d_decode = rfx_dwt_2d_decode_sse2; diff --git a/libfreerdp/codec/rfx_types.h b/libfreerdp/codec/rfx_types.h index 223cbf9cd..80ae12511 100644 --- a/libfreerdp/codec/rfx_types.h +++ b/libfreerdp/codec/rfx_types.h @@ -24,6 +24,10 @@ #include "config.h" #endif +#include +#include +#include + #include #include @@ -33,25 +37,19 @@ #define DEBUG_RFX(fmt, ...) DEBUG_NULL(fmt, ## __VA_ARGS__) #endif -#include "rfx_pool.h" - struct _RFX_CONTEXT_PRIV { - /* pre-allocated buffers */ + wQueue* TilePool; + wQueue* TileQueue; - RFX_POOL* pool; /* memory pool */ + BOOL UseThreads; + DWORD MinThreadCount; + DWORD MaxThreadCount; - INT16 y_r_mem[4096 + 8]; /* 4096 = 64x64 (+ 8x2 = 16 for mem align) */ - INT16 cb_g_mem[4096 + 8]; /* 4096 = 64x64 (+ 8x2 = 16 for mem align) */ - INT16 cr_b_mem[4096 + 8]; /* 4096 = 64x64 (+ 8x2 = 16 for mem align) */ + PTP_POOL ThreadPool; + TP_CALLBACK_ENVIRON ThreadPoolEnv; - INT16* y_r_buffer; - INT16* cb_g_buffer; - INT16* cr_b_buffer; - - INT16 dwt_mem[32 * 32 * 2 * 2 + 8]; /* maximum sub-band width is 32 */ - - INT16* dwt_buffer; + wBufferPool* BufferPool; /* profilers */ PROFILER_DEFINE(prof_rfx_decode_rgb); @@ -60,7 +58,7 @@ struct _RFX_CONTEXT_PRIV PROFILER_DEFINE(prof_rfx_differential_decode); PROFILER_DEFINE(prof_rfx_quantization_decode); PROFILER_DEFINE(prof_rfx_dwt_2d_decode); - PROFILER_DEFINE(prof_rfx_decode_ycbcr_to_rgb); + PROFILER_DEFINE(prof_rfx_ycbcr_to_rgb); PROFILER_DEFINE(prof_rfx_decode_format_rgb); PROFILER_DEFINE(prof_rfx_encode_rgb); @@ -69,7 +67,7 @@ struct _RFX_CONTEXT_PRIV PROFILER_DEFINE(prof_rfx_differential_encode); PROFILER_DEFINE(prof_rfx_quantization_encode); PROFILER_DEFINE(prof_rfx_dwt_2d_encode); - PROFILER_DEFINE(prof_rfx_encode_rgb_to_ycbcr); + PROFILER_DEFINE(prof_rfx_rgb_to_ycbcr); PROFILER_DEFINE(prof_rfx_encode_format_rgb); }; diff --git a/libfreerdp/core/activation.c b/libfreerdp/core/activation.c index 0554c5711..115cd1218 100644 --- a/libfreerdp/core/activation.c +++ b/libfreerdp/core/activation.c @@ -145,9 +145,7 @@ BOOL rdp_send_server_control_cooperate_pdu(rdpRdp* rdp) stream_write_UINT16(s, 0); /* grantId (2 bytes) */ stream_write_UINT32(s, 0); /* controlId (4 bytes) */ - rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_CONTROL, rdp->mcs->user_id); - - return TRUE; + return rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_CONTROL, rdp->mcs->user_id); } BOOL rdp_send_server_control_granted_pdu(rdpRdp* rdp) @@ -160,9 +158,7 @@ BOOL rdp_send_server_control_granted_pdu(rdpRdp* rdp) stream_write_UINT16(s, rdp->mcs->user_id); /* grantId (2 bytes) */ stream_write_UINT32(s, 0x03EA); /* controlId (4 bytes) */ - rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_CONTROL, rdp->mcs->user_id); - - return TRUE; + return rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_CONTROL, rdp->mcs->user_id); } BOOL rdp_send_client_control_pdu(rdpRdp* rdp, UINT16 action) @@ -288,9 +284,17 @@ BOOL rdp_recv_deactivate_all(rdpRdp* rdp, STREAM* s) */ if (stream_get_left(s) > 0) { - stream_read_UINT32(s, rdp->settings->ShareId); /* shareId (4 bytes) */ - stream_read_UINT16(s, lengthSourceDescriptor); /* lengthSourceDescriptor (2 bytes) */ - stream_seek(s, lengthSourceDescriptor); /* sourceDescriptor (should be 0x00) */ + do { + if(stream_get_left(s) < 4) + break; + stream_read_UINT32(s, rdp->settings->ShareId); /* shareId (4 bytes) */ + if(stream_get_left(s) < 2) + break; + stream_read_UINT16(s, lengthSourceDescriptor); /* lengthSourceDescriptor (2 bytes) */ + if(stream_get_left(s) < lengthSourceDescriptor) + break; + stream_seek(s, lengthSourceDescriptor); /* sourceDescriptor (should be 0x00) */ + } while(0); } rdp->state = CONNECTION_STATE_CAPABILITY; diff --git a/libfreerdp/core/capabilities.c b/libfreerdp/core/capabilities.c index 5bfe0867e..4656357d3 100644 --- a/libfreerdp/core/capabilities.c +++ b/libfreerdp/core/capabilities.c @@ -2147,9 +2147,7 @@ BOOL rdp_send_demand_active(rdpRdp* rdp) rdp_write_demand_active(s, rdp->settings); - rdp_send_pdu(rdp, s, PDU_TYPE_DEMAND_ACTIVE, rdp->mcs->user_id); - - return TRUE; + return rdp_send_pdu(rdp, s, PDU_TYPE_DEMAND_ACTIVE, rdp->mcs->user_id); } BOOL rdp_recv_confirm_active(rdpRdp* rdp, STREAM* s) @@ -2305,6 +2303,10 @@ void rdp_write_confirm_active(STREAM* s, rdpSettings* settings) rdp_write_frame_acknowledge_capability_set(s, settings); } } + else + { + settings->FrameAcknowledge = 0; + } if (settings->ReceivedCapabilities[CAPSET_TYPE_BITMAP_CACHE_V3_CODEC_ID]) { diff --git a/libfreerdp/core/certificate.c b/libfreerdp/core/certificate.c index 150892de7..7a61c8000 100644 --- a/libfreerdp/core/certificate.c +++ b/libfreerdp/core/certificate.c @@ -123,6 +123,29 @@ * */ +static const char *certificate_read_errors[] = { + "Certificate tag", + "TBSCertificate", + "Explicit Contextual Tag [0]", + "version", + "CertificateSerialNumber", + "AlgorithmIdentifier", + "Issuer Name", + "Validity", + "Subject Name", + "SubjectPublicKeyInfo Tag", + "subjectPublicKeyInfo::AlgorithmIdentifier", + "subjectPublicKeyInfo::subjectPublicKey", + "RSAPublicKey Tag", + "modulusLength", + "zero padding", + "modulusLength", + "modulus", + "publicExponent length", + "publicExponent" +}; + + /** * Read X.509 Certificate * @param certificate certificate module @@ -137,88 +160,105 @@ BOOL certificate_read_x509_certificate(rdpCertBlob* cert, rdpCertInfo* info) UINT32 version; int modulus_length; int exponent_length; + int error = 0; s = stream_new(0); stream_attach(s, cert->data, cert->length); + info->Modulus = 0; if(!ber_read_sequence_tag(s, &length)) /* Certificate (SEQUENCE) */ goto error1; + error++; if(!ber_read_sequence_tag(s, &length)) /* TBSCertificate (SEQUENCE) */ goto error1; + error++; - /* Explicit Contextual Tag [0] */ - if(!ber_read_contextual_tag(s, 0, &length, TRUE)) + if(!ber_read_contextual_tag(s, 0, &length, TRUE)) /* Explicit Contextual Tag [0] */ goto error1; + error++; if(!ber_read_integer(s, &version)) /* version (INTEGER) */ goto error1; + error++; version++; /* serialNumber */ if(!ber_read_integer(s, NULL)) /* CertificateSerialNumber (INTEGER) */ goto error1; + error++; /* signature */ if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* AlgorithmIdentifier (SEQUENCE) */ goto error1; + error++; /* issuer */ if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* Name (SEQUENCE) */ goto error1; + error++; /* validity */ if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* Validity (SEQUENCE) */ goto error1; + error++; /* subject */ if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* Name (SEQUENCE) */ goto error1; + error++; /* subjectPublicKeyInfo */ if(!ber_read_sequence_tag(s, &length)) /* SubjectPublicKeyInfo (SEQUENCE) */ goto error1; + error++; /* subjectPublicKeyInfo::AlgorithmIdentifier */ if(!ber_read_sequence_tag(s, &length) || !stream_skip(s, length)) /* AlgorithmIdentifier (SEQUENCE) */ goto error1; + error++; /* subjectPublicKeyInfo::subjectPublicKey */ if(!ber_read_bit_string(s, &length, &padding)) /* BIT_STRING */ goto error1; + error++; /* RSAPublicKey (SEQUENCE) */ if(!ber_read_sequence_tag(s, &length)) /* SEQUENCE */ goto error1; + error++; if(!ber_read_integer_length(s, &modulus_length)) /* modulus (INTEGER) */ goto error1; + error++; /* skip zero padding, if any */ do { - if(stream_get_left(s) < padding) + if(stream_get_left(s) < 1) goto error1; stream_peek_BYTE(s, padding); if (padding == 0) { - if(stream_get_left(s) < 1) + if(!stream_skip(s, 1)) goto error1; - stream_seek(s, 1); modulus_length--; } } while (padding == 0); + error++; if(stream_get_left(s) < modulus_length) goto error1; info->ModulusLength = modulus_length; info->Modulus = (BYTE*) malloc(info->ModulusLength); stream_read(s, info->Modulus, info->ModulusLength); + error++; if(!ber_read_integer_length(s, &exponent_length)) /* publicExponent (INTEGER) */ goto error2; - if(stream_get_left(s) < exponent_length) + error++; + if(stream_get_left(s) < exponent_length || exponent_length > 4) goto error2; stream_read(s, &info->exponent[4 - exponent_length], exponent_length); crypto_reverse(info->Modulus, info->ModulusLength); @@ -232,6 +272,7 @@ error2: free(info->Modulus); info->Modulus = 0; error1: + printf("error reading when reading certificate: part=%s error=%d\n", certificate_read_errors[error], error); stream_detach(s); stream_free(s); return FALSE; @@ -490,9 +531,13 @@ BOOL certificate_read_server_x509_certificate_chain(rdpCertificate* certificate, DEBUG_CERTIFICATE("License Server Certificate"); ret = certificate_read_x509_certificate(&certificate->x509_cert_chain->array[i], &cert_info); DEBUG_LICENSE("modulus length:%d", (int) cert_info.ModulusLength); - free(cert_info.Modulus); - if(!ret) + if (cert_info.Modulus) + free(cert_info.Modulus); + if (!ret) { + printf("failed to read License Server, content follows:\n"); + winpr_HexDump(certificate->x509_cert_chain->array[i].data, certificate->x509_cert_chain->array[i].length); return FALSE; + } } else if (numCertBlobs - i == 1) { diff --git a/libfreerdp/core/connection.c b/libfreerdp/core/connection.c index 5eb43b1ee..8f65d3dc0 100644 --- a/libfreerdp/core/connection.c +++ b/libfreerdp/core/connection.c @@ -337,7 +337,8 @@ static BOOL rdp_server_establish_keys(rdpRdp* rdp, STREAM* s) return FALSE; } - rdp_read_security_header(s, &sec_flags); + if (!rdp_read_security_header(s, &sec_flags)) + return FALSE; if ((sec_flags & SEC_EXCHANGE_PKT) == 0) { @@ -345,7 +346,12 @@ static BOOL rdp_server_establish_keys(rdpRdp* rdp, STREAM* s) return FALSE; } + if(stream_get_left(s) < 4) + return FALSE; stream_read_UINT32(s, rand_len); + if(stream_get_left(s) < rand_len + 8) /* include 8 bytes of padding */ + return FALSE; + key_len = rdp->settings->RdpServerRsaKey->ModulusLength; if (rand_len != key_len + 8) @@ -547,9 +553,7 @@ BOOL rdp_client_connect_demand_active(rdpRdp* rdp, STREAM* s) rdp->state = CONNECTION_STATE_FINALIZATION; update_reset_state(rdp->update); - rdp_client_connect_finalize(rdp); - - return TRUE; + return rdp_client_connect_finalize(rdp); } BOOL rdp_client_connect_finalize(rdpRdp* rdp) diff --git a/libfreerdp/core/fastpath.c b/libfreerdp/core/fastpath.c index b497f7eec..4ab569623 100644 --- a/libfreerdp/core/fastpath.c +++ b/libfreerdp/core/fastpath.c @@ -198,8 +198,9 @@ static BOOL fastpath_recv_update_synchronize(rdpFastPath* fastpath, STREAM* s) return TRUE; } -static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32 size, STREAM* s) +static int fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32 size, STREAM* s) { + int status = 0; rdpUpdate* update = fastpath->rdp->update; rdpContext* context = fastpath->rdp->update->context; rdpPointerUpdate* pointer = update->pointer; @@ -213,13 +214,13 @@ static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32 { case FASTPATH_UPDATETYPE_ORDERS: if (!fastpath_recv_orders(fastpath, s)) - return FALSE; + return -1; break; case FASTPATH_UPDATETYPE_BITMAP: case FASTPATH_UPDATETYPE_PALETTE: - if(!fastpath_recv_update_common(fastpath, s)) - return FALSE; + if (!fastpath_recv_update_common(fastpath, s)) + return -1; break; case FASTPATH_UPDATETYPE_SYNCHRONIZE: @@ -230,8 +231,7 @@ static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32 break; case FASTPATH_UPDATETYPE_SURFCMDS: - if (update_recv_surfcmds(update, size, s) < 0) - return FALSE; + status = update_recv_surfcmds(update, size, s); break; case FASTPATH_UPDATETYPE_PTR_NULL: @@ -246,25 +246,25 @@ static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32 case FASTPATH_UPDATETYPE_PTR_POSITION: if (!update_read_pointer_position(s, &pointer->pointer_position)) - return FALSE; + return -1; IFCALL(pointer->PointerPosition, context, &pointer->pointer_position); break; case FASTPATH_UPDATETYPE_COLOR: if (!update_read_pointer_color(s, &pointer->pointer_color)) - return FALSE; + return -1; IFCALL(pointer->PointerColor, context, &pointer->pointer_color); break; case FASTPATH_UPDATETYPE_CACHED: if (!update_read_pointer_cached(s, &pointer->pointer_cached)) - return FALSE; + return -1; IFCALL(pointer->PointerCached, context, &pointer->pointer_cached); break; case FASTPATH_UPDATETYPE_POINTER: if (!update_read_pointer_new(s, &pointer->pointer_new)) - return FALSE; + return -1; IFCALL(pointer->PointerNew, context, &pointer->pointer_new); break; @@ -273,11 +273,12 @@ static BOOL fastpath_recv_update(rdpFastPath* fastpath, BYTE updateCode, UINT32 break; } - return TRUE; + return status; } static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s) { + int status; UINT16 size; int next_pos; UINT32 totalSize; @@ -287,10 +288,11 @@ static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s) BYTE compressionFlags; STREAM* update_stream; STREAM* comp_stream; - rdpRdp *rdp; + rdpRdp* rdp; UINT32 roff; UINT32 rlen; + status = 0; rdp = fastpath->rdp; fastpath_read_update_header(s, &updateCode, &fragmentation, &compression); @@ -301,8 +303,10 @@ static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s) compressionFlags = 0; stream_read_UINT16(s, size); - if(stream_get_left(s) < size) - return FALSE; + + if (stream_get_left(s) < size) + return -1; + next_pos = stream_get_pos(s) + size; comp_stream = s; @@ -348,8 +352,10 @@ static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s) if (update_stream) { - if (!fastpath_recv_update(fastpath, updateCode, totalSize, update_stream)) - return FALSE; + status = fastpath_recv_update(fastpath, updateCode, totalSize, update_stream); + + if (status < 0) + return -1; } stream_set_pos(s, next_pos); @@ -357,24 +363,25 @@ static BOOL fastpath_recv_update_data(rdpFastPath* fastpath, STREAM* s) if (comp_stream != s) free(comp_stream); - return TRUE; + return status; } int fastpath_recv_updates(rdpFastPath* fastpath, STREAM* s) { + int status = 0; rdpUpdate* update = fastpath->rdp->update; IFCALL(update->BeginPaint, update->context); while (stream_get_left(s) >= 3) { - if (!fastpath_recv_update_data(fastpath, s)) + if (fastpath_recv_update_data(fastpath, s) < 0) return -1; } IFCALL(update->EndPaint, update->context); - return 0; + return status; } static BOOL fastpath_read_input_event_header(STREAM* s, BYTE* eventFlags, BYTE* eventCode) diff --git a/libfreerdp/core/gcc.c b/libfreerdp/core/gcc.c index 4d9131af4..1a761e4be 100644 --- a/libfreerdp/core/gcc.c +++ b/libfreerdp/core/gcc.c @@ -488,7 +488,7 @@ void gcc_write_user_data_header(STREAM* s, UINT16 type, UINT16 length) BOOL gcc_read_client_core_data(STREAM* s, rdpSettings* settings, UINT16 blockLength) { - char* str; + char* str = NULL; UINT32 version; UINT32 color_depth; UINT16 colorDepth = 0; @@ -518,6 +518,7 @@ BOOL gcc_read_client_core_data(STREAM* s, rdpSettings* settings, UINT16 blockLen sprintf_s(settings->ClientHostname, 31, "%s", str); settings->ClientHostname[31] = 0; free(str); + str = NULL; stream_read_UINT32(s, settings->KeyboardType); /* KeyboardType */ stream_read_UINT32(s, settings->KeyboardSubType); /* KeyboardSubType */ diff --git a/libfreerdp/core/info.c b/libfreerdp/core/info.c index 40e1aa48a..fbcbd1c12 100644 --- a/libfreerdp/core/info.c +++ b/libfreerdp/core/info.c @@ -439,7 +439,9 @@ BOOL rdp_recv_client_info(rdpRdp* rdp, STREAM* s) if (!rdp_read_header(rdp, s, &length, &channelId)) return FALSE; - rdp_read_security_header(s, &securityFlags); + if (!rdp_read_security_header(s, &securityFlags)) + return FALSE; + if ((securityFlags & SEC_INFO_PKT) == 0) return FALSE; diff --git a/libfreerdp/core/license.c b/libfreerdp/core/license.c index 3f3829d2d..c5a945493 100644 --- a/libfreerdp/core/license.c +++ b/libfreerdp/core/license.c @@ -932,9 +932,7 @@ BOOL license_send_valid_client_error_packet(rdpLicense* license) license_write_binary_blob(s, license->error_info); - license_send(license, s, ERROR_ALERT); - - return TRUE; + return license_send(license, s, ERROR_ALERT); } /** diff --git a/libfreerdp/core/mcs.c b/libfreerdp/core/mcs.c index 8ca2ccf4e..a436f3c5a 100644 --- a/libfreerdp/core/mcs.c +++ b/libfreerdp/core/mcs.c @@ -532,6 +532,7 @@ BOOL mcs_send_connect_response(rdpMcs* mcs) { STREAM* s; int length; + int ret; BYTE *bm, *em; STREAM* gcc_CCrsp; STREAM* server_data; @@ -556,12 +557,12 @@ BOOL mcs_send_connect_response(rdpMcs* mcs) tpdu_write_data(s); stream_set_mark(s, em); - transport_write(mcs->transport, s); + ret = transport_write(mcs->transport, s); stream_free(gcc_CCrsp); stream_free(server_data); - return TRUE; + return (ret < 0) ? FALSE : TRUE; } /** diff --git a/libfreerdp/core/orders.c b/libfreerdp/core/orders.c index 35e0f3cfd..4be63fdf4 100644 --- a/libfreerdp/core/orders.c +++ b/libfreerdp/core/orders.c @@ -152,14 +152,14 @@ static INLINE BOOL update_read_coord(STREAM* s, INT32* coord, BOOL delta) if (delta) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, lsi8); *coord += lsi8; } else { - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; stream_read_UINT16(s, lsi16); *coord = lsi16; @@ -171,7 +171,7 @@ static INLINE BOOL update_read_color(STREAM* s, UINT32* color) { BYTE byte; - if(stream_get_left(s) < 3) + if (stream_get_left(s) < 3) return FALSE; stream_read_BYTE(s, byte); *color = byte; @@ -212,13 +212,13 @@ static INLINE BOOL update_read_2byte_unsigned(STREAM* s, UINT32* value) { BYTE byte; - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); if (byte & 0x80) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; *value = (byte & 0x7F) << 8; @@ -237,7 +237,7 @@ static INLINE BOOL update_read_2byte_signed(STREAM* s, INT32* value) BYTE byte; BOOL negative; - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); @@ -248,7 +248,7 @@ static INLINE BOOL update_read_2byte_signed(STREAM* s, INT32* value) if (byte & 0x80) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); *value = (*value << 8) | byte; @@ -264,12 +264,12 @@ static INLINE BOOL update_read_4byte_unsigned(STREAM* s, UINT32* value) BYTE byte; BYTE count; - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); count = (byte & 0xC0) >> 6; - if(stream_get_left(s) < count) + if (stream_get_left(s) < count) return FALSE; switch (count) @@ -312,7 +312,7 @@ static INLINE BOOL update_read_delta(STREAM* s, INT32* value) { BYTE byte; - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); @@ -323,7 +323,7 @@ static INLINE BOOL update_read_delta(STREAM* s, INT32* value) if (byte & 0x80) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); *value = (*value << 8) | byte; @@ -357,28 +357,28 @@ static INLINE BOOL update_read_brush(STREAM* s, rdpBrush* brush, BYTE fieldFlags { if (fieldFlags & ORDER_FIELD_01) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, brush->x); } if (fieldFlags & ORDER_FIELD_02) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, brush->y); } if (fieldFlags & ORDER_FIELD_03) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, brush->style); } if (fieldFlags & ORDER_FIELD_04) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, brush->hatch); } @@ -395,7 +395,7 @@ static INLINE BOOL update_read_brush(STREAM* s, rdpBrush* brush, BYTE fieldFlags if (fieldFlags & ORDER_FIELD_05) { - if(stream_get_left(s) < 7) + if (stream_get_left(s) < 7) return FALSE; brush->data = (BYTE*) brush->p8x8; stream_read_BYTE(s, brush->data[7]); @@ -422,7 +422,7 @@ static INLINE BOOL update_read_delta_rects(STREAM* s, DELTA_RECT* rectangles, in zeroBitsSize = ((number + 1) / 2); - if(stream_get_left(s) < zeroBitsSize) + if (stream_get_left(s) < zeroBitsSize) return FALSE; stream_get_mark(s, zeroBits); stream_seek(s, zeroBitsSize); @@ -442,7 +442,7 @@ static INLINE BOOL update_read_delta_rects(STREAM* s, DELTA_RECT* rectangles, in if (~flags & 0x20) { - if(!update_read_delta(s, &rectangles[i].width)) + if (!update_read_delta(s, &rectangles[i].width)) return FALSE; } else @@ -450,7 +450,7 @@ static INLINE BOOL update_read_delta_rects(STREAM* s, DELTA_RECT* rectangles, in if (~flags & 0x10) { - if(!update_read_delta(s, &rectangles[i].height)) + if (!update_read_delta(s, &rectangles[i].height)) return FALSE; } else @@ -473,7 +473,7 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int zeroBitsSize = ((number + 3) / 4); - if(stream_get_left(s) < zeroBitsSize) + if (stream_get_left(s) < zeroBitsSize) return FALSE; stream_get_mark(s, zeroBits); stream_seek(s, zeroBitsSize); @@ -501,8 +501,10 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int do {\ if (orderInfo->fieldFlags & (1 << (NO-1))) \ { \ - if(stream_get_left(s) < 1) \ + if (stream_get_left(s) < 1) {\ + printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \ return FALSE; \ + } \ stream_read_BYTE(s, TARGET); \ } \ } while(0) @@ -511,8 +513,10 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int do {\ if (orderInfo->fieldFlags & (1 << (NO-1))) \ { \ - if(stream_get_left(s) < 2) \ + if (stream_get_left(s) < 2) { \ + printf("%s: error reading %s or %s\n", __FUNCTION__, #TARGET1, #TARGET2); \ return FALSE; \ + } \ stream_read_BYTE(s, TARGET1); \ stream_read_BYTE(s, TARGET2); \ } \ @@ -522,8 +526,10 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int do {\ if (orderInfo->fieldFlags & (1 << (NO-1))) \ { \ - if(stream_get_left(s) < 2) \ + if (stream_get_left(s) < 2) { \ + printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \ return FALSE; \ + } \ stream_read_UINT16(s, TARGET); \ } \ } while(0) @@ -531,26 +537,42 @@ static INLINE BOOL update_read_delta_points(STREAM* s, DELTA_POINT* points, int do {\ if (orderInfo->fieldFlags & (1 << (NO-1))) \ { \ - if(stream_get_left(s) < 4) \ + if (stream_get_left(s) < 4) { \ + printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \ return FALSE; \ + } \ stream_read_UINT32(s, TARGET); \ } \ } while(0) #define ORDER_FIELD_COORD(NO, TARGET) \ - if ((orderInfo->fieldFlags & (1 << (NO-1))) && !update_read_coord(s, &TARGET, orderInfo->deltaCoordinates)) \ - return FALSE + do { \ + if ((orderInfo->fieldFlags & (1 << (NO-1))) && !update_read_coord(s, &TARGET, orderInfo->deltaCoordinates)) { \ + printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \ + return FALSE; \ + } \ + } while(0) #define ORDER_FIELD_COLOR(NO, TARGET) \ - if ((orderInfo->fieldFlags & (1 << (NO-1))) && !update_read_color(s, &TARGET)) \ - return FALSE + do { \ + if ((orderInfo->fieldFlags & (1 << (NO-1))) && !update_read_color(s, &TARGET)) { \ + printf("%s: error reading %s\n", __FUNCTION__, #TARGET); \ + return FALSE; \ + } \ + } while(0) + #define FIELD_SKIP_BUFFER16(s, TARGET_LEN) \ - if(stream_get_left(s) < 2) \ + do { \ + if (stream_get_left(s) < 2) {\ + printf("%s: error reading length %s\n", __FUNCTION__, #TARGET_LEN); \ return FALSE; \ + }\ stream_read_UINT16(s, TARGET_LEN); \ - if(!stream_skip(s, TARGET_LEN)) \ - return FALSE - + if (!stream_skip(s, TARGET_LEN)) { \ + printf("%s: error skipping %d bytes\n", __FUNCTION__, TARGET_LEN); \ + return FALSE; \ + } \ + } while(0) /* Primary Drawing Orders */ @@ -599,21 +621,21 @@ BOOL update_read_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, OPAQUE_RECT if (orderInfo->fieldFlags & ORDER_FIELD_05) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); opaque_rect->color = (opaque_rect->color & 0xFFFFFF00) | byte; } if (orderInfo->fieldFlags & ORDER_FIELD_06) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); opaque_rect->color = (opaque_rect->color & 0xFFFF00FF) | (byte << 8); } if (orderInfo->fieldFlags & ORDER_FIELD_07) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); opaque_rect->color = (opaque_rect->color & 0xFF00FFFF) | (byte << 16); @@ -642,7 +664,7 @@ BOOL update_read_multi_dstblt_order(STREAM* s, ORDER_INFO* orderInfo, MULTI_DSTB if (orderInfo->fieldFlags & ORDER_FIELD_07) { - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; stream_read_UINT16(s, multi_dstblt->cbData); return update_read_delta_rects(s, multi_dstblt->rectangles, multi_dstblt->numRectangles); @@ -660,17 +682,17 @@ BOOL update_read_multi_patblt_order(STREAM* s, ORDER_INFO* orderInfo, MULTI_PATB ORDER_FIELD_COLOR(6, multi_patblt->backColor); ORDER_FIELD_COLOR(7, multi_patblt->foreColor); - if(!update_read_brush(s, &multi_patblt->brush, orderInfo->fieldFlags >> 7)) + if (!update_read_brush(s, &multi_patblt->brush, orderInfo->fieldFlags >> 7)) return FALSE; ORDER_FIELD_BYTE(13, multi_patblt->numRectangles); if (orderInfo->fieldFlags & ORDER_FIELD_14) { - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; stream_read_UINT16(s, multi_patblt->cbData); - if(!update_read_delta_rects(s, multi_patblt->rectangles, multi_patblt->numRectangles)) + if (!update_read_delta_rects(s, multi_patblt->rectangles, multi_patblt->numRectangles)) return FALSE; } return TRUE; @@ -689,7 +711,7 @@ BOOL update_read_multi_scrblt_order(STREAM* s, ORDER_INFO* orderInfo, MULTI_SCRB if (orderInfo->fieldFlags & ORDER_FIELD_09) { - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; stream_read_UINT16(s, multi_scrblt->cbData); return update_read_delta_rects(s, multi_scrblt->rectangles, multi_scrblt->numRectangles); @@ -707,7 +729,7 @@ BOOL update_read_multi_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, MULTI if (orderInfo->fieldFlags & ORDER_FIELD_05) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); multi_opaque_rect->color = (multi_opaque_rect->color & 0xFFFFFF00) | byte; @@ -715,7 +737,7 @@ BOOL update_read_multi_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, MULTI if (orderInfo->fieldFlags & ORDER_FIELD_06) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); multi_opaque_rect->color = (multi_opaque_rect->color & 0xFFFF00FF) | (byte << 8); @@ -723,7 +745,7 @@ BOOL update_read_multi_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, MULTI if (orderInfo->fieldFlags & ORDER_FIELD_07) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, byte); multi_opaque_rect->color = (multi_opaque_rect->color & 0xFF00FFFF) | (byte << 16); @@ -733,7 +755,7 @@ BOOL update_read_multi_opaque_rect_order(STREAM* s, ORDER_INFO* orderInfo, MULTI if (orderInfo->fieldFlags & ORDER_FIELD_09) { - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; stream_read_UINT16(s, multi_opaque_rect->cbData); return update_read_delta_rects(s, multi_opaque_rect->rectangles, multi_opaque_rect->numRectangles); @@ -786,7 +808,7 @@ BOOL update_read_polyline_order(STREAM* s, ORDER_INFO* orderInfo, POLYLINE_ORDER if (orderInfo->fieldFlags & ORDER_FIELD_07) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, polyline->cbData); @@ -830,7 +852,7 @@ BOOL update_read_mem3blt_order(STREAM* s, ORDER_INFO* orderInfo, MEM3BLT_ORDER* ORDER_FIELD_COLOR(9, mem3blt->backColor); ORDER_FIELD_COLOR(10, mem3blt->foreColor); - if(!update_read_brush(s, &mem3blt->brush, orderInfo->fieldFlags >> 10)) + if (!update_read_brush(s, &mem3blt->brush, orderInfo->fieldFlags >> 10)) return FALSE; ORDER_FIELD_UINT16(16, mem3blt->cacheIndex); @@ -867,7 +889,7 @@ BOOL update_read_glyph_index_order(STREAM* s, ORDER_INFO* orderInfo, GLYPH_INDEX ORDER_FIELD_UINT16(13, glyph_index->opRight); ORDER_FIELD_UINT16(14, glyph_index->opBottom); - if(!update_read_brush(s, &glyph_index->brush, orderInfo->fieldFlags >> 14)) + if (!update_read_brush(s, &glyph_index->brush, orderInfo->fieldFlags >> 14)) return FALSE; ORDER_FIELD_UINT16(20, glyph_index->x); @@ -875,11 +897,11 @@ BOOL update_read_glyph_index_order(STREAM* s, ORDER_INFO* orderInfo, GLYPH_INDEX if (orderInfo->fieldFlags & ORDER_FIELD_22) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, glyph_index->cbData); - if(stream_get_left(s) < glyph_index->cbData) + if (stream_get_left(s) < glyph_index->cbData) return FALSE; memcpy(glyph_index->data, s->p, glyph_index->cbData); stream_seek(s, glyph_index->cbData); @@ -908,11 +930,11 @@ BOOL update_read_fast_index_order(STREAM* s, ORDER_INFO* orderInfo, FAST_INDEX_O if (orderInfo->fieldFlags & ORDER_FIELD_15) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, fast_index->cbData); - if(stream_get_left(s) < fast_index->cbData) + if (stream_get_left(s) < fast_index->cbData) return FALSE; memcpy(fast_index->data, s->p, fast_index->cbData); stream_seek(s, fast_index->cbData); @@ -944,10 +966,10 @@ BOOL update_read_fast_glyph_order(STREAM* s, ORDER_INFO* orderInfo, FAST_GLYPH_O if (orderInfo->fieldFlags & ORDER_FIELD_15) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, fast_glyph->cbData); - if(stream_get_left(s) < fast_glyph->cbData) + if (stream_get_left(s) < fast_glyph->cbData) return FALSE; memcpy(fast_glyph->data, s->p, fast_glyph->cbData); phold = s->p; @@ -959,14 +981,14 @@ BOOL update_read_fast_glyph_order(STREAM* s, ORDER_INFO* orderInfo, FAST_GLYPH_O /* parse optional glyph data */ glyph = (GLYPH_DATA_V2*) malloc(sizeof(GLYPH_DATA_V2)); glyph->cacheIndex = fast_glyph->data[0]; - if(!update_read_2byte_signed(s, &glyph->x) || + if (!update_read_2byte_signed(s, &glyph->x) || !update_read_2byte_signed(s, &glyph->y) || !update_read_2byte_unsigned(s, &glyph->cx) || !update_read_2byte_unsigned(s, &glyph->cy)) return FALSE; glyph->cb = ((glyph->cx + 7) / 8) * glyph->cy; glyph->cb += ((glyph->cb % 4) > 0) ? 4 - (glyph->cb % 4) : 0; - if(stream_get_left(s) < glyph->cb) + if (stream_get_left(s) < glyph->cb) return FALSE; glyph->aj = (BYTE*) malloc(glyph->cb); stream_read(s, glyph->aj, glyph->cb); @@ -988,7 +1010,7 @@ BOOL update_read_polygon_sc_order(STREAM* s, ORDER_INFO* orderInfo, POLYGON_SC_O if (orderInfo->fieldFlags & ORDER_FIELD_07) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, polygon_sc->cbData); @@ -1011,14 +1033,14 @@ BOOL update_read_polygon_cb_order(STREAM* s, ORDER_INFO* orderInfo, POLYGON_CB_O ORDER_FIELD_COLOR(5, polygon_cb->backColor); ORDER_FIELD_COLOR(6, polygon_cb->foreColor); - if(!update_read_brush(s, &polygon_cb->brush, orderInfo->fieldFlags >> 6)) + if (!update_read_brush(s, &polygon_cb->brush, orderInfo->fieldFlags >> 6)) return FALSE; ORDER_FIELD_BYTE(12, polygon_cb->numPoints); if (orderInfo->fieldFlags & ORDER_FIELD_13) { - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, polygon_cb->cbData); @@ -1027,7 +1049,7 @@ BOOL update_read_polygon_cb_order(STREAM* s, ORDER_INFO* orderInfo, POLYGON_CB_O else polygon_cb->points = (DELTA_POINT*) realloc(polygon_cb->points, sizeof(DELTA_POINT) * polygon_cb->numPoints); - if(!update_read_delta_points(s, polygon_cb->points, polygon_cb->numPoints, polygon_cb->xStart, polygon_cb->yStart)) + if (!update_read_delta_points(s, polygon_cb->points, polygon_cb->numPoints, polygon_cb->xStart, polygon_cb->yStart)) return FALSE; } @@ -1065,7 +1087,7 @@ BOOL update_read_ellipse_cb_order(STREAM* s, ORDER_INFO* orderInfo, ELLIPSE_CB_O BOOL update_read_cache_bitmap_order(STREAM* s, CACHE_BITMAP_ORDER* cache_bitmap_order, BOOL compressed, UINT16 flags) { - if(stream_get_left(s) < 9) + if (stream_get_left(s) < 9) return FALSE; stream_read_BYTE(s, cache_bitmap_order->cacheId); /* cacheId (1 byte) */ stream_seek_BYTE(s); /* pad1Octet (1 byte) */ @@ -1080,13 +1102,13 @@ BOOL update_read_cache_bitmap_order(STREAM* s, CACHE_BITMAP_ORDER* cache_bitmap_ if ((flags & NO_BITMAP_COMPRESSION_HDR) == 0) { BYTE* bitmapComprHdr = (BYTE*) &(cache_bitmap_order->bitmapComprHdr); - if(stream_get_left(s) < 8) + if (stream_get_left(s) < 8) return FALSE; stream_read(s, bitmapComprHdr, 8); /* bitmapComprHdr (8 bytes) */ cache_bitmap_order->bitmapLength -= 8; } - if(stream_get_left(s) < cache_bitmap_order->bitmapLength) + if (stream_get_left(s) < cache_bitmap_order->bitmapLength) return FALSE; stream_get_mark(s, cache_bitmap_order->bitmapDataStream); @@ -1094,7 +1116,7 @@ BOOL update_read_cache_bitmap_order(STREAM* s, CACHE_BITMAP_ORDER* cache_bitmap_ } else { - if(stream_get_left(s) < cache_bitmap_order->bitmapLength) + if (stream_get_left(s) < cache_bitmap_order->bitmapLength) return FALSE; stream_get_mark(s, cache_bitmap_order->bitmapDataStream); @@ -1116,7 +1138,7 @@ BOOL update_read_cache_bitmap_v2_order(STREAM* s, CACHE_BITMAP_V2_ORDER* cache_b if (cache_bitmap_v2_order->flags & CBR2_PERSISTENT_KEY_PRESENT) { - if(stream_get_left(s) < 8) + if (stream_get_left(s) < 8) return FALSE; stream_read_UINT32(s, cache_bitmap_v2_order->key1); /* key1 (4 bytes) */ stream_read_UINT32(s, cache_bitmap_v2_order->key2); /* key2 (4 bytes) */ @@ -1124,18 +1146,18 @@ BOOL update_read_cache_bitmap_v2_order(STREAM* s, CACHE_BITMAP_V2_ORDER* cache_b if (cache_bitmap_v2_order->flags & CBR2_HEIGHT_SAME_AS_WIDTH) { - if(!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapWidth)) /* bitmapWidth */ + if (!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapWidth)) /* bitmapWidth */ return FALSE; cache_bitmap_v2_order->bitmapHeight = cache_bitmap_v2_order->bitmapWidth; } else { - if(!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapWidth) || /* bitmapWidth */ + if (!update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapWidth) || /* bitmapWidth */ !update_read_2byte_unsigned(s, &cache_bitmap_v2_order->bitmapHeight)) /* bitmapHeight */ return FALSE; } - if(!update_read_4byte_unsigned(s, &cache_bitmap_v2_order->bitmapLength) || /* bitmapLength */ + if (!update_read_4byte_unsigned(s, &cache_bitmap_v2_order->bitmapLength) || /* bitmapLength */ !update_read_2byte_unsigned(s, &cache_bitmap_v2_order->cacheIndex)) /* cacheIndex */ return FALSE; @@ -1146,7 +1168,7 @@ BOOL update_read_cache_bitmap_v2_order(STREAM* s, CACHE_BITMAP_V2_ORDER* cache_b { if (!(cache_bitmap_v2_order->flags & CBR2_NO_BITMAP_COMPRESSION_HDR)) { - if(stream_get_left(s) < 8) + if (stream_get_left(s) < 8) return FALSE; stream_read_UINT16(s, cache_bitmap_v2_order->cbCompFirstRowSize); /* cbCompFirstRowSize (2 bytes) */ @@ -1156,14 +1178,14 @@ BOOL update_read_cache_bitmap_v2_order(STREAM* s, CACHE_BITMAP_V2_ORDER* cache_b cache_bitmap_v2_order->bitmapLength = cache_bitmap_v2_order->cbCompMainBodySize; } - if(stream_get_left(s) < cache_bitmap_v2_order->bitmapLength) + if (stream_get_left(s) < cache_bitmap_v2_order->bitmapLength) return FALSE; stream_get_mark(s, cache_bitmap_v2_order->bitmapDataStream); stream_seek(s, cache_bitmap_v2_order->bitmapLength); } else { - if(stream_get_left(s) < cache_bitmap_v2_order->bitmapLength) + if (stream_get_left(s) < cache_bitmap_v2_order->bitmapLength) return FALSE; stream_get_mark(s, cache_bitmap_v2_order->bitmapDataStream); stream_seek(s, cache_bitmap_v2_order->bitmapLength); @@ -1183,7 +1205,7 @@ BOOL update_read_cache_bitmap_v3_order(STREAM* s, CACHE_BITMAP_V3_ORDER* cache_b bitsPerPixelId = (flags & 0x00000078) >> 3; cache_bitmap_v3_order->bpp = CBR23_BPP[bitsPerPixelId]; - if(stream_get_left(s) < 21) + if (stream_get_left(s) < 21) return FALSE; stream_read_UINT16(s, cache_bitmap_v3_order->cacheIndex); /* cacheIndex (2 bytes) */ stream_read_UINT32(s, cache_bitmap_v3_order->key1); /* key1 (4 bytes) */ @@ -1199,7 +1221,7 @@ BOOL update_read_cache_bitmap_v3_order(STREAM* s, CACHE_BITMAP_V3_ORDER* cache_b stream_read_UINT16(s, bitmapData->height); /* height (2 bytes) */ stream_read_UINT32(s, bitmapData->length); /* length (4 bytes) */ - if(stream_get_left(s) < bitmapData->length) + if (stream_get_left(s) < bitmapData->length) return FALSE; if (bitmapData->data == NULL) bitmapData->data = (BYTE*) malloc(bitmapData->length); @@ -1215,12 +1237,12 @@ BOOL update_read_cache_color_table_order(STREAM* s, CACHE_COLOR_TABLE_ORDER* cac int i; UINT32* colorTable; - if(stream_get_left(s) < 3) + if (stream_get_left(s) < 3) return FALSE; stream_read_BYTE(s, cache_color_table_order->cacheIndex); /* cacheIndex (1 byte) */ stream_read_UINT16(s, cache_color_table_order->numberColors); /* numberColors (2 bytes) */ - if(stream_get_left(s) < cache_color_table_order->numberColors * 4) + if (stream_get_left(s) < cache_color_table_order->numberColors * 4) return FALSE; colorTable = cache_color_table_order->colorTable; @@ -1245,7 +1267,7 @@ BOOL update_read_cache_glyph_order(STREAM* s, CACHE_GLYPH_ORDER* cache_glyph_ord INT16 lsi16; GLYPH_DATA* glyph; - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; stream_read_BYTE(s, cache_glyph_order->cacheId); /* cacheId (1 byte) */ stream_read_BYTE(s, cache_glyph_order->cGlyphs); /* cGlyphs (1 byte) */ @@ -1258,7 +1280,7 @@ BOOL update_read_cache_glyph_order(STREAM* s, CACHE_GLYPH_ORDER* cache_glyph_ord } glyph = cache_glyph_order->glyphData[i]; - if(stream_get_left(s) < 10) + if (stream_get_left(s) < 10) return FALSE; stream_read_UINT16(s, glyph->cacheIndex); stream_read_UINT16(s, lsi16); @@ -1271,7 +1293,7 @@ BOOL update_read_cache_glyph_order(STREAM* s, CACHE_GLYPH_ORDER* cache_glyph_ord glyph->cb = ((glyph->cx + 7) / 8) * glyph->cy; glyph->cb += ((glyph->cb % 4) > 0) ? 4 - (glyph->cb % 4) : 0; - if(stream_get_left(s) < glyph->cb) + if (stream_get_left(s) < glyph->cb) return FALSE; glyph->aj = (BYTE*) malloc(glyph->cb); stream_read(s, glyph->aj, glyph->cb); @@ -1300,11 +1322,11 @@ BOOL update_read_cache_glyph_v2_order(STREAM* s, CACHE_GLYPH_V2_ORDER* cache_gly } glyph = cache_glyph_v2_order->glyphData[i]; - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, glyph->cacheIndex); - if(!update_read_2byte_signed(s, &glyph->x) || + if (!update_read_2byte_signed(s, &glyph->x) || !update_read_2byte_signed(s, &glyph->y) || !update_read_2byte_unsigned(s, &glyph->cx) || !update_read_2byte_unsigned(s, &glyph->cy)) @@ -1315,7 +1337,7 @@ BOOL update_read_cache_glyph_v2_order(STREAM* s, CACHE_GLYPH_V2_ORDER* cache_gly glyph->cb = ((glyph->cx + 7) / 8) * glyph->cy; glyph->cb += ((glyph->cb % 4) > 0) ? 4 - (glyph->cb % 4) : 0; - if(stream_get_left(s) < glyph->cb) + if (stream_get_left(s) < glyph->cb) return FALSE; glyph->aj = (BYTE*) malloc(glyph->cb); stream_read(s, glyph->aj, glyph->cb); @@ -1338,7 +1360,7 @@ BOOL update_decompress_brush(STREAM* s, BYTE* output, BYTE bpp) palette = s->p + 16; bytesPerPixel = ((bpp + 1) / 8); - if(stream_get_left(s) < 16) // 64 / 4 + if (stream_get_left(s) < 16) // 64 / 4 return FALSE; for (y = 7; y >= 0; y--) @@ -1366,7 +1388,7 @@ BOOL update_read_cache_brush_order(STREAM* s, CACHE_BRUSH_ORDER* cache_brush_ord BYTE iBitmapFormat; BOOL compressed = FALSE; - if(stream_get_left(s) < 6) + if (stream_get_left(s) < 6) return FALSE; stream_read_BYTE(s, cache_brush_order->index); /* cacheEntry (1 byte) */ @@ -1393,7 +1415,7 @@ BOOL update_read_cache_brush_order(STREAM* s, CACHE_BRUSH_ORDER* cache_brush_ord } /* rows are encoded in reverse order */ - if(stream_get_left(s) < 8) + if (stream_get_left(s) < 8) return FALSE; for (i = 7; i >= 0; i--) @@ -1413,14 +1435,14 @@ BOOL update_read_cache_brush_order(STREAM* s, CACHE_BRUSH_ORDER* cache_brush_ord if (compressed != FALSE) { /* compressed brush */ - if(!update_decompress_brush(s, cache_brush_order->data, cache_brush_order->bpp)) + if (!update_decompress_brush(s, cache_brush_order->data, cache_brush_order->bpp)) return FALSE; } else { /* uncompressed brush */ int scanline = (cache_brush_order->bpp / 8) * 8; - if(stream_get_left(s) < scanline * 8) + if (stream_get_left(s) < scanline * 8) return FALSE; for (i = 7; i >= 0; i--) @@ -1441,7 +1463,7 @@ BOOL update_read_create_offscreen_bitmap_order(STREAM* s, CREATE_OFFSCREEN_BITMA BOOL deleteListPresent; OFFSCREEN_DELETE_LIST* deleteList; - if(stream_get_left(s) < 6) + if (stream_get_left(s) < 6) return FALSE; stream_read_UINT16(s, flags); /* flags (2 bytes) */ create_offscreen_bitmap->id = flags & 0x7FFF; @@ -1454,7 +1476,7 @@ BOOL update_read_create_offscreen_bitmap_order(STREAM* s, CREATE_OFFSCREEN_BITMA if (deleteListPresent) { int i; - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; stream_read_UINT16(s, deleteList->cIndices); @@ -1464,7 +1486,7 @@ BOOL update_read_create_offscreen_bitmap_order(STREAM* s, CREATE_OFFSCREEN_BITMA deleteList->indices = realloc(deleteList->indices, deleteList->sIndices * 2); } - if(stream_get_left(s) < 2 * deleteList->cIndices) + if (stream_get_left(s) < 2 * deleteList->cIndices) return FALSE; for (i = 0; i < (int) deleteList->cIndices; i++) @@ -1481,7 +1503,7 @@ BOOL update_read_create_offscreen_bitmap_order(STREAM* s, CREATE_OFFSCREEN_BITMA BOOL update_read_switch_surface_order(STREAM* s, SWITCH_SURFACE_ORDER* switch_surface) { - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; stream_read_UINT16(s, switch_surface->bitmapId); /* bitmapId (2 bytes) */ return TRUE; @@ -1491,7 +1513,7 @@ BOOL update_read_create_nine_grid_bitmap_order(STREAM* s, CREATE_NINE_GRID_BITMA { NINE_GRID_BITMAP_INFO* nineGridInfo; - if(stream_get_left(s) < 19) + if (stream_get_left(s) < 19) return FALSE; stream_read_BYTE(s, create_nine_grid_bitmap->bitmapBpp); /* bitmapBpp (1 byte) */ stream_read_UINT16(s, create_nine_grid_bitmap->bitmapId); /* bitmapId (2 bytes) */ @@ -1508,7 +1530,7 @@ BOOL update_read_create_nine_grid_bitmap_order(STREAM* s, CREATE_NINE_GRID_BITMA BOOL update_read_frame_marker_order(STREAM* s, FRAME_MARKER_ORDER* frame_marker) { - if(stream_get_left(s) < 4) + if (stream_get_left(s) < 4) return FALSE; stream_read_UINT32(s, frame_marker->action); /* action (4 bytes) */ return TRUE; @@ -1516,7 +1538,7 @@ BOOL update_read_frame_marker_order(STREAM* s, FRAME_MARKER_ORDER* frame_marker) BOOL update_read_stream_bitmap_first_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER* stream_bitmap_first) { - if(stream_get_left(s) < 10) // 8 + 2 at least + if (stream_get_left(s) < 10) // 8 + 2 at least return FALSE; stream_read_BYTE(s, stream_bitmap_first->bitmapFlags); /* bitmapFlags (1 byte) */ stream_read_BYTE(s, stream_bitmap_first->bitmapBpp); /* bitmapBpp (1 byte) */ @@ -1525,11 +1547,11 @@ BOOL update_read_stream_bitmap_first_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER* stream_read_UINT16(s, stream_bitmap_first->bitmapHeight); /* bitmapHeigth (2 bytes) */ if (stream_bitmap_first->bitmapFlags & STREAM_BITMAP_V2) { - if(stream_get_left(s) < 4) + if (stream_get_left(s) < 4) return FALSE; stream_read_UINT32(s, stream_bitmap_first->bitmapSize); /* bitmapSize (4 bytes) */ } else { - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; stream_read_UINT16(s, stream_bitmap_first->bitmapSize); /* bitmapSize (2 bytes) */ } @@ -1540,7 +1562,7 @@ BOOL update_read_stream_bitmap_first_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER* BOOL update_read_stream_bitmap_next_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER* stream_bitmap_next) { - if(stream_get_left(s) < 5) + if (stream_get_left(s) < 5) return FALSE; stream_read_BYTE(s, stream_bitmap_next->bitmapFlags); /* bitmapFlags (1 byte) */ stream_read_UINT16(s, stream_bitmap_next->bitmapType); /* bitmapType (2 bytes) */ @@ -1550,7 +1572,7 @@ BOOL update_read_stream_bitmap_next_order(STREAM* s, STREAM_BITMAP_FIRST_ORDER* BOOL update_read_draw_gdiplus_first_order(STREAM* s, DRAW_GDIPLUS_FIRST_ORDER* draw_gdiplus_first) { - if(stream_get_left(s) < 11) + if (stream_get_left(s) < 11) return FALSE; stream_seek_BYTE(s); /* pad1Octet (1 byte) */ stream_read_UINT16(s, draw_gdiplus_first->cbSize); /* cbSize (2 bytes) */ @@ -1562,7 +1584,7 @@ BOOL update_read_draw_gdiplus_first_order(STREAM* s, DRAW_GDIPLUS_FIRST_ORDER* d BOOL update_read_draw_gdiplus_next_order(STREAM* s, DRAW_GDIPLUS_NEXT_ORDER* draw_gdiplus_next) { - if(stream_get_left(s) < 3) + if (stream_get_left(s) < 3) return FALSE; stream_seek_BYTE(s); /* pad1Octet (1 byte) */ FIELD_SKIP_BUFFER16(s, draw_gdiplus_next->cbSize); /* cbSize(2 bytes) + emfRecords */ @@ -1571,7 +1593,7 @@ BOOL update_read_draw_gdiplus_next_order(STREAM* s, DRAW_GDIPLUS_NEXT_ORDER* dra BOOL update_read_draw_gdiplus_end_order(STREAM* s, DRAW_GDIPLUS_END_ORDER* draw_gdiplus_end) { - if(stream_get_left(s) < 11) + if (stream_get_left(s) < 11) return FALSE; stream_seek_BYTE(s); /* pad1Octet (1 byte) */ stream_read_UINT16(s, draw_gdiplus_end->cbSize); /* cbSize (2 bytes) */ @@ -1583,7 +1605,7 @@ BOOL update_read_draw_gdiplus_end_order(STREAM* s, DRAW_GDIPLUS_END_ORDER* draw_ BOOL update_read_draw_gdiplus_cache_first_order(STREAM* s, DRAW_GDIPLUS_CACHE_FIRST_ORDER* draw_gdiplus_cache_first) { - if(stream_get_left(s) < 11) + if (stream_get_left(s) < 11) return FALSE; stream_read_BYTE(s, draw_gdiplus_cache_first->flags); /* flags (1 byte) */ stream_read_UINT16(s, draw_gdiplus_cache_first->cacheType); /* cacheType (2 bytes) */ @@ -1596,7 +1618,7 @@ BOOL update_read_draw_gdiplus_cache_first_order(STREAM* s, DRAW_GDIPLUS_CACHE_FI BOOL update_read_draw_gdiplus_cache_next_order(STREAM* s, DRAW_GDIPLUS_CACHE_NEXT_ORDER* draw_gdiplus_cache_next) { - if(stream_get_left(s) < 7) + if (stream_get_left(s) < 7) return FALSE; stream_read_BYTE(s, draw_gdiplus_cache_next->flags); /* flags (1 byte) */ stream_read_UINT16(s, draw_gdiplus_cache_next->cacheType); /* cacheType (2 bytes) */ @@ -1608,7 +1630,7 @@ BOOL update_read_draw_gdiplus_cache_next_order(STREAM* s, DRAW_GDIPLUS_CACHE_NEX BOOL update_read_draw_gdiplus_cache_end_order(STREAM* s, DRAW_GDIPLUS_CACHE_END_ORDER* draw_gdiplus_cache_end) { - if(stream_get_left(s) < 11) + if (stream_get_left(s) < 11) return FALSE; stream_read_BYTE(s, draw_gdiplus_cache_end->flags); /* flags (1 byte) */ stream_read_UINT16(s, draw_gdiplus_cache_end->cacheType); /* cacheType (2 bytes) */ @@ -1635,7 +1657,7 @@ BOOL update_read_field_flags(STREAM* s, UINT32* fieldFlags, BYTE flags, BYTE fie fieldBytes = 0; } - if(stream_get_left(s) < fieldBytes) + if (stream_get_left(s) < fieldBytes) return FALSE; *fieldFlags = 0; @@ -1651,7 +1673,7 @@ BOOL update_read_bounds(STREAM* s, rdpBounds* bounds) { BYTE flags; - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; stream_read_BYTE(s, flags); /* field flags */ @@ -1673,29 +1695,29 @@ BOOL update_read_bounds(STREAM* s, rdpBounds* bounds) } else if (flags & BOUND_DELTA_TOP) { - if(!update_read_coord(s, &bounds->top, TRUE)) + if (!update_read_coord(s, &bounds->top, TRUE)) return FALSE; } if (flags & BOUND_RIGHT) { - if(!update_read_coord(s, &bounds->right, FALSE)) + if (!update_read_coord(s, &bounds->right, FALSE)) return FALSE; } else if (flags & BOUND_DELTA_RIGHT) { - if(!update_read_coord(s, &bounds->right, TRUE)) + if (!update_read_coord(s, &bounds->right, TRUE)) return FALSE; } if (flags & BOUND_BOTTOM) { - if(!update_read_coord(s, &bounds->bottom, FALSE)) + if (!update_read_coord(s, &bounds->bottom, FALSE)) return FALSE; } else if (flags & BOUND_DELTA_BOTTOM) { - if(!update_read_coord(s, &bounds->bottom, TRUE)) + if (!update_read_coord(s, &bounds->bottom, TRUE)) return FALSE; } return TRUE; @@ -1894,7 +1916,7 @@ BOOL update_recv_secondary_order(rdpUpdate* update, STREAM* s, BYTE flags) rdpContext* context = update->context; rdpSecondaryUpdate* secondary = update->secondary; - if(stream_get_left(s) < 5) + if (stream_get_left(s) < 5) return FALSE; stream_read_UINT16(s, orderLength); /* orderLength (2 bytes) */ stream_read_UINT16(s, extraFlags); /* extraFlags (2 bytes) */ @@ -1950,20 +1972,20 @@ BOOL update_recv_secondary_order(rdpUpdate* update, STREAM* s, BYTE flags) case ORDER_TYPE_CACHE_GLYPH: if (secondary->glyph_v2) { - if(!update_read_cache_glyph_v2_order(s, &(secondary->cache_glyph_v2_order), extraFlags)) + if (!update_read_cache_glyph_v2_order(s, &(secondary->cache_glyph_v2_order), extraFlags)) return FALSE; IFCALL(secondary->CacheGlyphV2, context, &(secondary->cache_glyph_v2_order)); } else { - if(!update_read_cache_glyph_order(s, &(secondary->cache_glyph_order), extraFlags)) + if (!update_read_cache_glyph_order(s, &(secondary->cache_glyph_order), extraFlags)) return FALSE; IFCALL(secondary->CacheGlyph, context, &(secondary->cache_glyph_order)); } break; case ORDER_TYPE_CACHE_BRUSH: - if(!update_read_cache_brush_order(s, &(secondary->cache_brush_order), extraFlags)) + if (!update_read_cache_brush_order(s, &(secondary->cache_brush_order), extraFlags)) return FALSE; IFCALL(secondary->CacheBrush, context, &(secondary->cache_brush_order)); break; @@ -1994,73 +2016,73 @@ BOOL update_recv_altsec_order(rdpUpdate* update, STREAM* s, BYTE flags) switch (orderType) { case ORDER_TYPE_CREATE_OFFSCREEN_BITMAP: - if(!update_read_create_offscreen_bitmap_order(s, &(altsec->create_offscreen_bitmap))) + if (!update_read_create_offscreen_bitmap_order(s, &(altsec->create_offscreen_bitmap))) return FALSE; IFCALL(altsec->CreateOffscreenBitmap, context, &(altsec->create_offscreen_bitmap)); break; case ORDER_TYPE_SWITCH_SURFACE: - if(!update_read_switch_surface_order(s, &(altsec->switch_surface))) + if (!update_read_switch_surface_order(s, &(altsec->switch_surface))) return FALSE; IFCALL(altsec->SwitchSurface, context, &(altsec->switch_surface)); break; case ORDER_TYPE_CREATE_NINE_GRID_BITMAP: - if(!update_read_create_nine_grid_bitmap_order(s, &(altsec->create_nine_grid_bitmap))) + if (!update_read_create_nine_grid_bitmap_order(s, &(altsec->create_nine_grid_bitmap))) return FALSE; IFCALL(altsec->CreateNineGridBitmap, context, &(altsec->create_nine_grid_bitmap)); break; case ORDER_TYPE_FRAME_MARKER: - if(!update_read_frame_marker_order(s, &(altsec->frame_marker))) + if (!update_read_frame_marker_order(s, &(altsec->frame_marker))) return FALSE; IFCALL(altsec->FrameMarker, context, &(altsec->frame_marker)); break; case ORDER_TYPE_STREAM_BITMAP_FIRST: - if(!update_read_stream_bitmap_first_order(s, &(altsec->stream_bitmap_first))) + if (!update_read_stream_bitmap_first_order(s, &(altsec->stream_bitmap_first))) return FALSE; IFCALL(altsec->StreamBitmapFirst, context, &(altsec->stream_bitmap_first)); break; case ORDER_TYPE_STREAM_BITMAP_NEXT: - if(!update_read_stream_bitmap_next_order(s, &(altsec->stream_bitmap_next))) + if (!update_read_stream_bitmap_next_order(s, &(altsec->stream_bitmap_next))) return FALSE; IFCALL(altsec->StreamBitmapNext, context, &(altsec->stream_bitmap_next)); break; case ORDER_TYPE_GDIPLUS_FIRST: - if(!update_read_draw_gdiplus_first_order(s, &(altsec->draw_gdiplus_first))) + if (!update_read_draw_gdiplus_first_order(s, &(altsec->draw_gdiplus_first))) return FALSE; IFCALL(altsec->DrawGdiPlusFirst, context, &(altsec->draw_gdiplus_first)); break; case ORDER_TYPE_GDIPLUS_NEXT: - if(!update_read_draw_gdiplus_next_order(s, &(altsec->draw_gdiplus_next))) + if (!update_read_draw_gdiplus_next_order(s, &(altsec->draw_gdiplus_next))) return FALSE; IFCALL(altsec->DrawGdiPlusNext, context, &(altsec->draw_gdiplus_next)); break; case ORDER_TYPE_GDIPLUS_END: - if(update_read_draw_gdiplus_end_order(s, &(altsec->draw_gdiplus_end))) + if (update_read_draw_gdiplus_end_order(s, &(altsec->draw_gdiplus_end))) return FALSE; IFCALL(altsec->DrawGdiPlusEnd, context, &(altsec->draw_gdiplus_end)); break; case ORDER_TYPE_GDIPLUS_CACHE_FIRST: - if(!update_read_draw_gdiplus_cache_first_order(s, &(altsec->draw_gdiplus_cache_first))) + if (!update_read_draw_gdiplus_cache_first_order(s, &(altsec->draw_gdiplus_cache_first))) return FALSE; IFCALL(altsec->DrawGdiPlusCacheFirst, context, &(altsec->draw_gdiplus_cache_first)); break; case ORDER_TYPE_GDIPLUS_CACHE_NEXT: - if(!update_read_draw_gdiplus_cache_next_order(s, &(altsec->draw_gdiplus_cache_next))) + if (!update_read_draw_gdiplus_cache_next_order(s, &(altsec->draw_gdiplus_cache_next))) return FALSE; IFCALL(altsec->DrawGdiPlusCacheNext, context, &(altsec->draw_gdiplus_cache_next)); break; case ORDER_TYPE_GDIPLUS_CACHE_END: - if(!update_read_draw_gdiplus_cache_end_order(s, &(altsec->draw_gdiplus_cache_end))) + if (!update_read_draw_gdiplus_cache_end_order(s, &(altsec->draw_gdiplus_cache_end))) return FALSE; IFCALL(altsec->DrawGdiPlusCacheEnd, context, &(altsec->draw_gdiplus_cache_end)); break; @@ -2082,8 +2104,9 @@ BOOL update_recv_order(rdpUpdate* update, STREAM* s) { BYTE controlFlags; - if(stream_get_left(s) < 1) + if (stream_get_left(s) < 1) return FALSE; + stream_read_BYTE(s, controlFlags); /* controlFlags (1 byte) */ if (!(controlFlags & ORDER_STANDARD)) diff --git a/libfreerdp/core/peer.c b/libfreerdp/core/peer.c index f3c4d80ac..8b5a651e5 100644 --- a/libfreerdp/core/peer.c +++ b/libfreerdp/core/peer.c @@ -135,6 +135,8 @@ static BOOL peer_recv_data_pdu(freerdp_peer* client, STREAM* s) return FALSE; case DATA_PDU_TYPE_FRAME_ACKNOWLEDGE: + if(stream_get_left(s) < 4) + return FALSE; stream_read_UINT32(s, client->ack_frame_id); break; @@ -176,7 +178,8 @@ static int peer_recv_tpkt_pdu(freerdp_peer* client, STREAM* s) if (rdp->settings->DisableEncryption) { - rdp_read_security_header(s, &securityFlags); + if (!rdp_read_security_header(s, &securityFlags)) + return -1; if (securityFlags & SEC_ENCRYPT) { @@ -237,7 +240,7 @@ static int peer_recv_fastpath_pdu(freerdp_peer* client, STREAM* s) if (fastpath->encryptionFlags & FASTPATH_OUTPUT_ENCRYPTED) { - if(!rdp_decrypt(rdp, s, length, (fastpath->encryptionFlags & FASTPATH_OUTPUT_SECURE_CHECKSUM) ? SEC_SECURE_CHECKSUM : 0)) + if (!rdp_decrypt(rdp, s, length, (fastpath->encryptionFlags & FASTPATH_OUTPUT_SECURE_CHECKSUM) ? SEC_SECURE_CHECKSUM : 0)) return -1; } diff --git a/libfreerdp/core/rdp.c b/libfreerdp/core/rdp.c index b308b80f2..edd2e8edf 100644 --- a/libfreerdp/core/rdp.c +++ b/libfreerdp/core/rdp.c @@ -503,7 +503,7 @@ int rdp_recv_data_pdu(rdpRdp* rdp, STREAM* s) UINT32 rlen; STREAM* comp_stream; - if(!rdp_read_share_data_header(s, &length, &type, &share_id, &compressed_type, &compressed_len)) + if (!rdp_read_share_data_header(s, &length, &type, &share_id, &compressed_type, &compressed_len)) return -1; comp_stream = s; @@ -552,7 +552,7 @@ int rdp_recv_data_pdu(rdpRdp* rdp, STREAM* s) break; case DATA_PDU_TYPE_SYNCHRONIZE: - if(!rdp_recv_synchronize_pdu(rdp, comp_stream)) + if (!rdp_recv_synchronize_pdu(rdp, comp_stream)) return -1; break; @@ -677,6 +677,7 @@ BOOL rdp_decrypt(rdpRdp* rdp, STREAM* s, int length, UINT16 securityFlags) if (stream_get_left(s) < 12) return FALSE; + stream_read_UINT16(s, len); /* 0x10 */ stream_read_BYTE(s, version); /* 0x1 */ stream_read_BYTE(s, pad); @@ -781,7 +782,7 @@ static int rdp_recv_tpkt_pdu(rdpRdp* rdp, STREAM* s) if (channelId != MCS_GLOBAL_CHANNEL_ID) { - if(!freerdp_channel_process(rdp->instance, s, channelId)) + if (!freerdp_channel_process(rdp->instance, s, channelId)) return -1; } else @@ -789,8 +790,10 @@ static int rdp_recv_tpkt_pdu(rdpRdp* rdp, STREAM* s) while (stream_get_left(s) > 3) { stream_get_mark(s, nextp); + if (!rdp_read_share_control_header(s, &pduLength, &pduType, &pduSource)) return -1; + nextp += pduLength; rdp->settings->PduSource = pduSource; @@ -832,6 +835,7 @@ static int rdp_recv_fastpath_pdu(rdpRdp* rdp, STREAM* s) rdpFastPath* fastpath; fastpath = rdp->fastpath; + if (!fastpath_read_header_rdp(fastpath, s, &length)) return -1; @@ -844,6 +848,7 @@ static int rdp_recv_fastpath_pdu(rdpRdp* rdp, STREAM* s) if (fastpath->encryptionFlags & FASTPATH_OUTPUT_ENCRYPTED) { UINT16 flags = (fastpath->encryptionFlags & FASTPATH_OUTPUT_SECURE_CHECKSUM) ? SEC_SECURE_CHECKSUM : 0; + if (!rdp_decrypt(rdp, s, length, flags)) return -1; } diff --git a/libfreerdp/core/surface.c b/libfreerdp/core/surface.c index c53a35159..3d43a4ba5 100644 --- a/libfreerdp/core/surface.c +++ b/libfreerdp/core/surface.c @@ -25,13 +25,14 @@ #include "surface.h" -static BOOL update_recv_surfcmd_surface_bits(rdpUpdate* update, STREAM* s, UINT32 *length) +static int update_recv_surfcmd_surface_bits(rdpUpdate* update, STREAM* s, UINT32 *length) { int pos; SURFACE_BITS_COMMAND* cmd = &update->surface_bits_command; - if(stream_get_left(s) < 20) - return FALSE; + if (stream_get_left(s) < 20) + return -1; + stream_read_UINT16(s, cmd->destLeft); stream_read_UINT16(s, cmd->destTop); stream_read_UINT16(s, cmd->destRight); @@ -42,17 +43,19 @@ static BOOL update_recv_surfcmd_surface_bits(rdpUpdate* update, STREAM* s, UINT3 stream_read_UINT16(s, cmd->width); stream_read_UINT16(s, cmd->height); stream_read_UINT32(s, cmd->bitmapDataLength); - if(stream_get_left(s) < cmd->bitmapDataLength) - return FALSE; + + if (stream_get_left(s) < cmd->bitmapDataLength) + return -1; + pos = stream_get_pos(s) + cmd->bitmapDataLength; cmd->bitmapData = stream_get_tail(s); IFCALL(update->SurfaceBits, update->context, cmd); stream_set_pos(s, pos); - *length = 20 + cmd->bitmapDataLength; - return TRUE; + + return 0; } static void update_send_frame_acknowledge(rdpRdp* rdp, UINT32 frameId) @@ -64,24 +67,28 @@ static void update_send_frame_acknowledge(rdpRdp* rdp, UINT32 frameId) rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_FRAME_ACKNOWLEDGE, rdp->mcs->user_id); } -static BOOL update_recv_surfcmd_frame_marker(rdpUpdate* update, STREAM* s, UINT32 *length) +static int update_recv_surfcmd_frame_marker(rdpUpdate* update, STREAM* s, UINT32 *length) { SURFACE_FRAME_MARKER* marker = &update->surface_frame_marker; - if(stream_get_left(s) < 6) - return FALSE; + if (stream_get_left(s) < 6) + return -1; + stream_read_UINT16(s, marker->frameAction); stream_read_UINT32(s, marker->frameId); IFCALL(update->SurfaceFrameMarker, update->context, marker); - if (update->context->rdp->settings->ReceivedCapabilities[CAPSET_TYPE_FRAME_ACKNOWLEDGE] && update->context->rdp->settings->FrameAcknowledge > 0 && marker->frameAction == SURFACECMD_FRAMEACTION_END) + if (update->context->rdp->settings->ReceivedCapabilities[CAPSET_TYPE_FRAME_ACKNOWLEDGE] && + (update->context->rdp->settings->FrameAcknowledge > 0) && + (marker->frameAction == SURFACECMD_FRAMEACTION_END)) { update_send_frame_acknowledge(update->context->rdp, marker->frameId); } *length = 6; - return TRUE; + + return 0; } int update_recv_surfcmds(rdpUpdate* update, UINT32 size, STREAM* s) @@ -101,12 +108,12 @@ int update_recv_surfcmds(rdpUpdate* update, UINT32 size, STREAM* s) { case CMDTYPE_SET_SURFACE_BITS: case CMDTYPE_STREAM_SURFACE_BITS: - if (!update_recv_surfcmd_surface_bits(update, s, &cmdLength)) + if (update_recv_surfcmd_surface_bits(update, s, &cmdLength) < 0) return -1; break; case CMDTYPE_FRAME_MARKER: - if (!update_recv_surfcmd_frame_marker(update, s, &cmdLength)) + if (update_recv_surfcmd_frame_marker(update, s, &cmdLength) < 0) return -1; break; @@ -154,4 +161,3 @@ void update_write_surfcmd_frame_marker(STREAM* s, UINT16 frameAction, UINT32 fra stream_write_UINT16(s, frameAction); stream_write_UINT32(s, frameId); } - diff --git a/libfreerdp/core/timezone.c b/libfreerdp/core/timezone.c index 958b0c8ab..f5cb0b46c 100644 --- a/libfreerdp/core/timezone.c +++ b/libfreerdp/core/timezone.c @@ -76,7 +76,7 @@ void rdp_write_system_time(STREAM* s, SYSTEM_TIME* system_time) BOOL rdp_read_client_time_zone(STREAM* s, rdpSettings* settings) { - char* str; + char* str = NULL; TIME_ZONE_INFO* clientTimeZone; if (stream_get_left(s) < 172) @@ -91,6 +91,7 @@ BOOL rdp_read_client_time_zone(STREAM* s, rdpSettings* settings) stream_seek(s, 64); strncpy(clientTimeZone->standardName, str, sizeof(clientTimeZone->standardName)); free(str); + str = NULL; rdp_read_system_time(s, &clientTimeZone->standardDate); /* StandardDate */ stream_read_UINT32(s, clientTimeZone->standardBias); /* StandardBias */ diff --git a/libfreerdp/core/transport.c b/libfreerdp/core/transport.c index cbdbe89ef..a0ed90947 100644 --- a/libfreerdp/core/transport.c +++ b/libfreerdp/core/transport.c @@ -259,6 +259,9 @@ BOOL transport_accept_nla(rdpTransport* transport) if (transport->TlsIn == NULL) transport->TlsIn = tls_new(transport->settings); + if (transport->TlsOut == NULL) + transport->TlsOut = transport->TlsIn; + transport->layer = TRANSPORT_LAYER_TLS; transport->TlsIn->sockfd = transport->TcpIn->sockfd; @@ -394,6 +397,7 @@ int transport_read(rdpTransport* transport, STREAM* s) int stream_bytes; int transport_status; + pdu_bytes = 0; transport_status = 0; /* first check if we have header */ @@ -693,14 +697,15 @@ int transport_check_fds(rdpTransport** ptransport) * 1: asynchronous return */ + ReferenceTable_Add(transport->ReceiveReferences, received); + recv_status = transport->ReceiveCallback(transport, received, transport->ReceiveExtra); + ReferenceTable_Release(transport->ReceiveReferences, received); + if (recv_status < 0) status = -1; - if (recv_status == 0) - transport_receive_pool_return(transport, received); - if (status < 0) return status; @@ -789,6 +794,9 @@ rdpTransport* transport_new(rdpSettings* settings) transport->ReceiveQueue = Queue_New(TRUE, -1, -1); Queue_Object(transport->ReceivePool)->fnObjectFree = (OBJECT_FREE_FN) stream_free; Queue_Object(transport->ReceiveQueue)->fnObjectFree = (OBJECT_FREE_FN) stream_free; + + transport->ReceiveReferences = ReferenceTable_New(TRUE, + (void*) transport, (REFERENCE_FREE) transport_receive_pool_return); } return transport; @@ -819,6 +827,8 @@ void transport_free(rdpTransport* transport) Queue_Free(transport->ReceivePool); Queue_Free(transport->ReceiveQueue); + ReferenceTable_Free(transport->ReceiveReferences); + free(transport); } } diff --git a/libfreerdp/core/transport.h b/libfreerdp/core/transport.h index cca3d090b..92ec3b090 100644 --- a/libfreerdp/core/transport.h +++ b/libfreerdp/core/transport.h @@ -69,6 +69,8 @@ struct rdp_transport wQueue* ReceivePool; wQueue* ReceiveQueue; + + wReferenceTable* ReceiveReferences; }; STREAM* transport_recv_stream_init(rdpTransport* transport, int size); diff --git a/libfreerdp/core/update.c b/libfreerdp/core/update.c index 7e2735763..2ae14b5b3 100644 --- a/libfreerdp/core/update.c +++ b/libfreerdp/core/update.c @@ -44,8 +44,9 @@ BOOL update_recv_orders(rdpUpdate* update, STREAM* s) { UINT16 numberOrders; - if(stream_get_left(s) < 6) + if (stream_get_left(s) < 6) return FALSE; + stream_seek_UINT16(s); /* pad2OctetsA (2 bytes) */ stream_read_UINT16(s, numberOrders); /* numberOrders (2 bytes) */ stream_seek_UINT16(s); /* pad2OctetsB (2 bytes) */ @@ -62,8 +63,9 @@ BOOL update_recv_orders(rdpUpdate* update, STREAM* s) BOOL update_read_bitmap_data(STREAM* s, BITMAP_DATA* bitmap_data) { - if(stream_get_left(s) < 18) + if (stream_get_left(s) < 18) return FALSE; + stream_read_UINT16(s, bitmap_data->destLeft); stream_read_UINT16(s, bitmap_data->destTop); stream_read_UINT16(s, bitmap_data->destRight); @@ -91,7 +93,7 @@ BOOL update_read_bitmap_data(STREAM* s, BITMAP_DATA* bitmap_data) } else { - if(stream_get_left(s) < bitmap_data->bitmapLength) + if (stream_get_left(s) < bitmap_data->bitmapLength) return FALSE; bitmap_data->compressed = FALSE; stream_get_mark(s, bitmap_data->bitmapDataStream); @@ -103,8 +105,10 @@ BOOL update_read_bitmap_data(STREAM* s, BITMAP_DATA* bitmap_data) BOOL update_read_bitmap(rdpUpdate* update, STREAM* s, BITMAP_UPDATE* bitmap_update) { int i; - if(stream_get_left(s) < 2) + + if (stream_get_left(s) < 2) return FALSE; + stream_read_UINT16(s, bitmap_update->number); /* numberRectangles (2 bytes) */ if (bitmap_update->number > bitmap_update->count) @@ -136,15 +140,16 @@ BOOL update_read_palette(rdpUpdate* update, STREAM* s, PALETTE_UPDATE* palette_u int i; PALETTE_ENTRY* entry; - if(stream_get_left(s) < 6) + if (stream_get_left(s) < 6) return FALSE; + stream_seek_UINT16(s); /* pad2Octets (2 bytes) */ stream_read_UINT32(s, palette_update->number); /* numberColors (4 bytes), must be set to 256 */ if (palette_update->number > 256) palette_update->number = 256; - if(stream_get_left(s) < palette_update->number * 3) + if (stream_get_left(s) < palette_update->number * 3) return FALSE; /* paletteEntries */ @@ -171,25 +176,29 @@ void update_read_synchronize(rdpUpdate* update, STREAM* s) BOOL update_read_play_sound(STREAM* s, PLAY_SOUND_UPDATE* play_sound) { - if(stream_get_left(s) < 8) + if (stream_get_left(s) < 8) return FALSE; + stream_read_UINT32(s, play_sound->duration); /* duration (4 bytes) */ stream_read_UINT32(s, play_sound->frequency); /* frequency (4 bytes) */ + return TRUE; } BOOL update_recv_play_sound(rdpUpdate* update, STREAM* s) { - if(!update_read_play_sound(s, &update->play_sound)) + if (!update_read_play_sound(s, &update->play_sound)) return FALSE; + IFCALL(update->PlaySound, update->context, &update->play_sound); return TRUE; } BOOL update_read_pointer_position(STREAM* s, POINTER_POSITION_UPDATE* pointer_position) { - if(stream_get_left(s) < 4) + if (stream_get_left(s) < 4) return FALSE; + stream_read_UINT16(s, pointer_position->xPos); /* xPos (2 bytes) */ stream_read_UINT16(s, pointer_position->yPos); /* yPos (2 bytes) */ return TRUE; @@ -197,15 +206,16 @@ BOOL update_read_pointer_position(STREAM* s, POINTER_POSITION_UPDATE* pointer_po BOOL update_read_pointer_system(STREAM* s, POINTER_SYSTEM_UPDATE* pointer_system) { - if(stream_get_left(s) < 4) + if (stream_get_left(s) < 4) return FALSE; + stream_read_UINT32(s, pointer_system->type); /* systemPointerType (4 bytes) */ return TRUE; } BOOL update_read_pointer_color(STREAM* s, POINTER_COLOR_UPDATE* pointer_color) { - if(stream_get_left(s) < 14) + if (stream_get_left(s) < 14) return FALSE; stream_read_UINT16(s, pointer_color->cacheIndex); /* cacheIndex (2 bytes) */ @@ -229,7 +239,7 @@ BOOL update_read_pointer_color(STREAM* s, POINTER_COLOR_UPDATE* pointer_color) if (pointer_color->lengthXorMask > 0) { - if(stream_get_left(s) < pointer_color->lengthXorMask) + if (stream_get_left(s) < pointer_color->lengthXorMask) return FALSE; pointer_color->xorMaskData = (BYTE*) malloc(pointer_color->lengthXorMask); stream_read(s, pointer_color->xorMaskData, pointer_color->lengthXorMask); @@ -237,7 +247,7 @@ BOOL update_read_pointer_color(STREAM* s, POINTER_COLOR_UPDATE* pointer_color) if (pointer_color->lengthAndMask > 0) { - if(stream_get_left(s) < pointer_color->lengthAndMask) + if (stream_get_left(s) < pointer_color->lengthAndMask) return FALSE; pointer_color->andMaskData = (BYTE*) malloc(pointer_color->lengthAndMask); stream_read(s, pointer_color->andMaskData, pointer_color->lengthAndMask); @@ -250,16 +260,18 @@ BOOL update_read_pointer_color(STREAM* s, POINTER_COLOR_UPDATE* pointer_color) BOOL update_read_pointer_new(STREAM* s, POINTER_NEW_UPDATE* pointer_new) { - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; + stream_read_UINT16(s, pointer_new->xorBpp); /* xorBpp (2 bytes) */ return update_read_pointer_color(s, &pointer_new->colorPtrAttr); /* colorPtrAttr */ } BOOL update_read_pointer_cached(STREAM* s, POINTER_CACHED_UPDATE* pointer_cached) { - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; + stream_read_UINT16(s, pointer_cached->cacheIndex); /* cacheIndex (2 bytes) */ return TRUE; } @@ -270,8 +282,9 @@ BOOL update_recv_pointer(rdpUpdate* update, STREAM* s) rdpContext* context = update->context; rdpPointerUpdate* pointer = update->pointer; - if(stream_get_left(s) < 2+2) + if (stream_get_left(s) < 2 + 2) return FALSE; + stream_read_UINT16(s, messageType); /* messageType (2 bytes) */ stream_seek_UINT16(s); /* pad2Octets (2 bytes) */ @@ -318,8 +331,9 @@ BOOL update_recv(rdpUpdate* update, STREAM* s) UINT16 updateType; rdpContext* context = update->context; - if(stream_get_left(s) < 2) + if (stream_get_left(s) < 2) return FALSE; + stream_read_UINT16(s, updateType); /* updateType (2 bytes) */ //printf("%s Update Data PDU\n", UPDATE_TYPE_STRINGS[updateType]); @@ -494,6 +508,19 @@ static void update_send_surface_frame_marker(rdpContext* context, SURFACE_FRAME_ fastpath_send_update_pdu(rdp->fastpath, FASTPATH_UPDATETYPE_SURFCMDS, s); } +static void update_send_frame_acknowledge(rdpContext* context, UINT32 frameId) +{ + STREAM* s; + rdpRdp* rdp = context->rdp; + + if (rdp->settings->ReceivedCapabilities[CAPSET_TYPE_FRAME_ACKNOWLEDGE]) + { + s = rdp_data_pdu_init(rdp); + stream_write_UINT32(s, frameId); + rdp_send_data_pdu(rdp, s, DATA_PDU_TYPE_FRAME_ACKNOWLEDGE, rdp->mcs->user_id); + } +} + static void update_send_synchronize(rdpContext* context) { STREAM* s; @@ -615,7 +642,7 @@ BOOL update_read_refresh_rect(rdpUpdate* update, STREAM* s) stream_read_BYTE(s, numberOfAreas); stream_seek(s, 3); /* pad3Octects */ - if(stream_get_left(s) < numberOfAreas * 4 * 2) + if (stream_get_left(s) < numberOfAreas * 4 * 2) return FALSE; areas = (RECTANGLE_16*) malloc(sizeof(RECTANGLE_16) * numberOfAreas); @@ -673,6 +700,7 @@ void update_register_client_callbacks(rdpUpdate* update) { update->RefreshRect = update_send_refresh_rect; update->SuppressOutput = update_send_suppress_output; + update->SurfaceFrameAcknowledge = update_send_frame_acknowledge; } static void* update_thread(void* arg) @@ -755,6 +783,8 @@ void update_free(rdpUpdate* update) CloseHandle(update->thread); + Queue_Free(update->queue); + free(update); } } diff --git a/libfreerdp/crypto/ber.c b/libfreerdp/crypto/ber.c index 313b5244c..344d5570d 100644 --- a/libfreerdp/crypto/ber.c +++ b/libfreerdp/crypto/ber.c @@ -21,6 +21,7 @@ #include "config.h" #endif +#include #include BOOL ber_read_length(STREAM* s, int* length) @@ -364,7 +365,7 @@ BOOL ber_read_integer(STREAM* s, UINT32* value) { int length; - if(!ber_read_universal_tag(s, BER_TAG_INTEGER, FALSE) || + if (!ber_read_universal_tag(s, BER_TAG_INTEGER, FALSE) || !ber_read_length(s, &length) || stream_get_left(s) < length) return FALSE; @@ -372,16 +373,17 @@ BOOL ber_read_integer(STREAM* s, UINT32* value) if (value == NULL) { // even if we don't care the integer value, check the announced size - if(length < 1 || length > 4) - return FALSE; - stream_seek(s, length); - return TRUE; + return stream_skip(s, length); } if (length == 1) + { stream_read_BYTE(s, *value); + } else if (length == 2) + { stream_read_UINT16_be(s, *value); + } else if (length == 3) { BYTE byte; @@ -390,9 +392,19 @@ BOOL ber_read_integer(STREAM* s, UINT32* value) *value += (byte << 16); } else if (length == 4) + { stream_read_UINT32_be(s, *value); - else + } + else if (length == 8) + { + printf("%s: should implement reading an 8 bytes integer\n", __FUNCTION__); return FALSE; + } + else + { + printf("%s: should implement reading an integer with length=%d\n", __FUNCTION__, length); + return FALSE; + } return TRUE; } diff --git a/libfreerdp/locale/keyboard_x11.c b/libfreerdp/locale/keyboard_x11.c index a572c551d..99c1524f3 100644 --- a/libfreerdp/locale/keyboard_x11.c +++ b/libfreerdp/locale/keyboard_x11.c @@ -232,14 +232,17 @@ UINT32 freerdp_keyboard_init_x11(UINT32 keyboardLayoutId, RDP_SCANCODE x11_keyco #else { char* keymap; - char* xkb_layout; - char* xkb_variant; + char* xkb_layout = 0; + char* xkb_variant = 0; if (keyboardLayoutId == 0) { keyboardLayoutId = freerdp_detect_keyboard_layout_from_xkb(&xkb_layout, &xkb_variant); - free(xkb_layout); - free(xkb_variant); + if (xkb_layout) + free(xkb_layout); + if (xkb_variant) + free(xkb_variant); + } keymap = freerdp_detect_keymap_from_xkb(); diff --git a/libfreerdp/primitives/CMakeLists.txt b/libfreerdp/primitives/CMakeLists.txt new file mode 100644 index 000000000..ad3c16761 --- /dev/null +++ b/libfreerdp/primitives/CMakeLists.txt @@ -0,0 +1,86 @@ +# FreeRDP: A Remote Desktop Protocol Client +# libfreerdp-primitives cmake build script +# vi:ts=4 sw=4: +# +# (c) Copyright 2012 Hewlett-Packard Development Company, L.P. +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing permissions +# and limitations under the License. +# + +set(MODULE_NAME "freerdp-primitives") +set(MODULE_PREFIX "FREERDP_PRIMITIVES") + +set(${MODULE_PREFIX}_SRCS + prim_add.c + prim_andor.c + prim_alphaComp.c + prim_colors.c + prim_copy.c + prim_set.c + prim_shift.c + prim_sign.c + primitives.c + prim_internal.h) + +add_definitions(-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}) + +### IPP Variable debugging +if(WITH_IPP) + if(CMAKE_COMPILER_IS_GNUCC) + foreach(INCLDIR ${IPP_INCLUDE_DIRS}) + set(OPTIMIZATION "${OPTIMIZATION} -I${INCLDIR}") + endforeach(INCLDIR) + endif() +endif() + +if(WITH_SSE2) + if(CMAKE_COMPILER_IS_GNUCC) + set(OPTIMIZATION "${OPTIMIZATION} -msse2 -mssse3 -Wdeclaration-after-statement") + endif() + + if(MSVC) + set(OPTIMIZATION "${OPTIMIZATION} /arch:SSE2") + endif() +elseif(WITH_NEON) + if(CMAKE_COMPILER_IS_GNUCC) + set(OPTIMIZATION "${OPTIMIZATION} -mfpu=neon -mfloat-abi=softfp") + endif() + # TODO: Add MSVC equivalent +endif() + +set_property(SOURCE ${${MODULE_PREFIX}_SRCS} PROPERTY COMPILE_FLAGS ${OPTIMIZATION}) + +add_complex_library(MODULE ${MODULE_NAME} TYPE "OBJECT" + MONOLITHIC ${MONOLITHIC_BUILD} + SOURCES ${${MODULE_PREFIX}_SRCS}) + +set_target_properties(${MODULE_NAME} PROPERTIES VERSION ${FREERDP_VERSION} SOVERSION ${FREERDP_API_VERSION} PREFIX "lib") + +if(IPP_FOUND) + include_directories(${IPP_INCLUDE_DIRS}) + foreach(ipp_lib ${IPP_LIBRARIES}) + add_library("${ipp_lib}_imported" STATIC IMPORTED) + set_property(TARGET "${ipp_lib}_imported" PROPERTY IMPORTED_LOCATION "${IPP_LIBRARY_DIRS}/${ipp_lib}") + set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} "${ipp_lib}_imported") + endforeach() +endif() + +if(MONOLITHIC_BUILD) + set(FREERDP_LIBS ${FREERDP_LIBS} ${${MODULE_PREFIX}_LIBS} PARENT_SCOPE) +else() + target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS}) + install(TARGETS ${MODULE_NAME} DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + +set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER "FreeRDP/libfreerdp") + +if(BUILD_TESTING AND ((NOT WIN32) AND (NOT APPLE))) + add_subdirectory(test) +endif() + diff --git a/libfreerdp/primitives/README.txt b/libfreerdp/primitives/README.txt new file mode 100644 index 000000000..369102c0d --- /dev/null +++ b/libfreerdp/primitives/README.txt @@ -0,0 +1,113 @@ +The Primitives Library + +Introduction +------------ +The purpose of the primitives library is to give the freerdp code easy +access to *run-time* optimization via SIMD operations. When the library +is initialized, dynamic checks of processor features are run (such as +the support of SSE3 or Neon), and entrypoints are linked to through +function pointers to provide the fastest possible operations. All +routines offer generic C alternatives as fallbacks. + +Run-time optimization has the advantage of allowing a single executable +to run fast on multiple platforms with different SIMD capabilities. + + +Use In Code +----------- +A singleton pointing to a structure containing the function pointers +is accessed through primitives_get(). The function pointers can then +be used from that structure, e.g. + + primitives_t *prims = primitives_get(); + prims->shiftC_16s(buffer, shifts, buffer, 256); + +Of course, there is some overhead in calling through the function pointer +and setting up the SIMD operations, so it would be counterproductive to +call the primitives library for very small operation, e.g. initializing an +array of eight values to a constant. The primitives library is intended +for larger-scale operations, e.g. arrays of size 64 and larger. + + +Initialization and Cleanup +-------------------------- +Library initialization is done the first time primitives_init() is called +or the first time primitives_get() is used. Cleanup (if any) is done by +primitives_deinit(). + + +Intel Integrated Performance Primitives (IPP) +--------------------------------------------- +If freerdp is compiled with IPP support (-DWITH_IPP=ON), the IPP function +calls will be used (where available) to fill the function pointers. +Where possible, function names and parameter lists match IPP format so +that the IPP functions can be plugged into the function pointers without +a wrapper layer. Use of IPP is completely optional, and in many cases +the SSE operations in the primitives library itself are faster or similar +in performance. + + +Coverage +-------- +The primitives library is not meant to be comprehensive, offering +entrypoints for every operation and operand type. Instead, the coverage +is focused on operations known to be performance bottlenecks in the code. +For instance, 16-bit signed operations are used widely in the RemoteFX +software, so you'll find 16s versions of several operations, but there +is no attempt to provide (unused) copies of the same code for 8u, 16u, +32s, etc. + + +New Optimizations +----------------- +As the need arises, new optimizations can be added to the library, +including NEON, AVX, and perhaps OpenCL or other SIMD implementations. +The initialization routine is free to do any quick run-time test to +determine which features are available before hooking the operation's +function pointer, or it can simply look at the processor features list +from the hints passed to the initialization routine. + + +Adding Entrypoints +------------------ +As the need for new operations or operands arises, new entrypoints can +be added. + 1) Function prototypes and pointers are added to + include/freerdp/primitives.h + 2) New module initialization and cleanup function prototypes are added + to prim_internal.h and called in primitives.c (primitives_init() + and primitives_deinit()). + 3) Operation names and parameter lists should be compatible with the IPP. + IPP manuals are available online at software.intel.com. + 4) A generic C entrypoint must be available as a fallback. + 5) prim_templates.h contains macro-based templates for simple operations, + such as applying a single SSE operation to arrays of data. + The template functions can frequently be used to extend the + operations without writing a lot of new code. + + +Flags +----- +The entrypoint primitives_get_flags() returns a bitfield of processor flags +(as defined in primitives.h) and primitives_flag_str() returns a string +related to those processor flags, for debugging and information. The +bitfield can be used elsewhere in the code as needed. + + +Cache Management +---------------- +I haven't found a lot of speed improvement by attempting prefetch, and +in fact it seems to have a negative impact in some cases. Done correctly +perhaps the routines could be further accelerated by proper use of prefetch, +fences, etc. + + +Testing +------- +In the test subdirectory is an executable (prim_test) that tests both +functionality and speed of primitives library operations. Any new +modules should be added to that test, following the conventions already +established in that directory. The program can be executed on various +target hardware to compare generic C, optimized, and IPP performance +with various array sizes. + diff --git a/libfreerdp/primitives/prim_add.c b/libfreerdp/primitives/prim_add.c new file mode 100644 index 000000000..17e504201 --- /dev/null +++ b/libfreerdp/primitives/prim_add.c @@ -0,0 +1,88 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Add operations. + * vi:ts=4 sw=4: + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include + +#ifdef WITH_SSE2 +#include +#include +#endif /* WITH_SSE2 */ + +#ifdef WITH_IPP +#include +#endif /* WITH_IPP */ + +#include "prim_internal.h" +#include "prim_templates.h" + +/* ---------------------------------------------------------------------------- + * 16-bit signed add with saturation (under and over). + */ +PRIM_STATIC pstatus_t general_add_16s( + const INT16 *pSrc1, + const INT16 *pSrc2, + INT16 *pDst, + INT32 len) +{ + while (len--) + { + INT32 k = (INT32) (*pSrc1++) + (INT32) (*pSrc2++); + if (k > 32767) *pDst++ = ((INT16) 32767); + else if (k < -32768) *pDst++ = ((INT16) -32768); + else *pDst++ = (INT16) k; + } + + return PRIMITIVES_SUCCESS; +} + +#ifdef WITH_SSE2 +# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) +/* ------------------------------------------------------------------------- */ +SSE3_SSD_ROUTINE(sse3_add_16s, INT16, general_add_16s, + _mm_adds_epi16, general_add_16s(sptr1++, sptr2++, dptr++, 1)) +# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */ +#endif + +/* ------------------------------------------------------------------------- */ +void primitives_init_add( + const primitives_hints_t *hints, + primitives_t *prims) +{ + prims->add_16s = general_add_16s; +#ifdef WITH_IPP + prims->add_16s = (__add_16s_t) ippsAdd_16s; +#elif defined(WITH_SSE2) + if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */ + { + prims->add_16s = sse3_add_16s; + } +#endif +} + +/* ------------------------------------------------------------------------- */ +void primitives_deinit_add( + primitives_t *prims) +{ + /* Nothing to do. */ +} diff --git a/libfreerdp/primitives/prim_alphaComp.c b/libfreerdp/primitives/prim_alphaComp.c new file mode 100644 index 000000000..b6d4e2e80 --- /dev/null +++ b/libfreerdp/primitives/prim_alphaComp.c @@ -0,0 +1,305 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Alpha blending routines. + * vi:ts=4 sw=4: + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * Note: this code assumes the second operand is fully opaque, + * e.g. + * newval = alpha1*val1 + (1-alpha1)*val2 + * rather than + * newval = alpha1*val1 + (1-alpha1)*alpha2*val2 + * The IPP gives other options. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include + +#include "prim_internal.h" + +#ifdef WITH_SSE2 +#include +#include +#endif /* WITH_SSE2 */ + +#ifdef WITH_IPP +#include +#endif /* WITH_IPP */ + +#define ALPHA(_k_) (((_k_) & 0xFF000000U) >> 24) +#define RED(_k_) (((_k_) & 0x00FF0000U) >> 16) +#define GRN(_k_) (((_k_) & 0x0000FF00U) >> 8) +#define BLU(_k_) (((_k_) & 0x000000FFU)) + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_alphaComp_argb( + const BYTE *pSrc1, INT32 src1Step, + const BYTE *pSrc2, INT32 src2Step, + BYTE *pDst, INT32 dstStep, + INT32 width, INT32 height) +{ + const UINT32 *sptr1 = (const UINT32 *) pSrc1; + const UINT32 *sptr2 = (const UINT32 *) pSrc2; + UINT32 *dptr = (UINT32 *) pDst; + int linebytes = width * sizeof(UINT32); + int src1Jump = (src1Step - linebytes) / sizeof(UINT32); + int src2Jump = (src2Step - linebytes) / sizeof(UINT32); + int dstJump = (dstStep - linebytes) / sizeof(UINT32); + + int y; + for (y=0; y> 8) & 0x00FF00FFU; + UINT32 s1rb = src1 & 0x00FF00FFU; + UINT32 s1ag = (src1 >> 8) & 0x00FF00FFU; + + UINT32 drb = s1rb - s2rb; + UINT32 dag = s1ag - s2ag; + drb *= alpha; + dag *= alpha; + + rb = ((drb >> 8) + s2rb) & 0x00FF00FFU; + ag = (((dag >> 8) + s2ag) << 8) & 0xFF00FF00U; + *dptr++ = rb | ag; + } + } + sptr1 += src1Jump; + sptr2 += src2Jump; + dptr += dstJump; + } + + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +#ifdef WITH_SSE2 +# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) + +PRIM_STATIC pstatus_t sse2_alphaComp_argb( + const BYTE *pSrc1, INT32 src1Step, + const BYTE *pSrc2, INT32 src2Step, + BYTE *pDst, INT32 dstStep, + INT32 width, INT32 height) +{ + const UINT32 *sptr1 = (const UINT32 *) pSrc1; + const UINT32 *sptr2 = (const UINT32 *) pSrc2; + UINT32 *dptr; + int linebytes, src1Jump, src2Jump, dstJump, y; + __m128i xmm0, xmm1; + + if ((width <= 0) || (height <= 0)) return PRIMITIVES_SUCCESS; + + if (width < 4) /* pointless if too small */ + { + return general_alphaComp_argb(pSrc1, src1Step, pSrc2, src2Step, + pDst, dstStep, width, height); + } + dptr = (UINT32 *) pDst; + linebytes = width * sizeof(UINT32); + src1Jump = (src1Step - linebytes) / sizeof(UINT32); + src2Jump = (src2Step - linebytes) / sizeof(UINT32); + dstJump = (dstStep - linebytes) / sizeof(UINT32); + + xmm0 = _mm_set1_epi32(0); + xmm1 = _mm_set1_epi16(1); + + for (y=0; y> 2; + pixels -= count << 2; + while (count--) + { + __m128i xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + /* BdGdRdAdBcGcRcAcBbGbRbAbBaGaRaAa */ + xmm2 = LOAD_SI128(sptr1); sptr1 += 4; + /* BhGhRhAhBgGgRgAgBfGfRfAfBeGeReAe */ + xmm3 = LOAD_SI128(sptr2); sptr2 += 4; + /* 00Bb00Gb00Rb00Ab00Ba00Ga00Ra00Aa */ + xmm4 = _mm_unpackhi_epi8(xmm2, xmm0); + /* 00Bf00Gf00Bf00Af00Be00Ge00Re00Ae */ + xmm5 = _mm_unpackhi_epi8(xmm3, xmm0); + /* subtract */ + xmm6 = _mm_subs_epi16(xmm4, xmm5); + /* 00Bb00Gb00Rb00Ab00Aa00Aa00Aa00Aa */ + xmm4 = _mm_shufflelo_epi16(xmm4, 0xff); + /* 00Ab00Ab00Ab00Ab00Aa00Aa00Aa00Aa */ + xmm4 = _mm_shufflehi_epi16(xmm4, 0xff); + /* Add one to alphas */ + xmm4 = _mm_adds_epi16(xmm4, xmm1); + /* Multiply and take low word */ + xmm4 = _mm_mullo_epi16(xmm4, xmm6); + /* Shift 8 right */ + xmm4 = _mm_srai_epi16(xmm4, 8); + /* Add xmm5 */ + xmm4 = _mm_adds_epi16(xmm4, xmm5); + /* 00Bj00Gj00Rj00Aj00Bi00Gi00Ri00Ai */ + + /* 00Bd00Gd00Rd00Ad00Bc00Gc00Rc00Ac */ + xmm5 = _mm_unpacklo_epi8(xmm2, xmm0); + /* 00Bh00Gh00Rh00Ah00Bg00Gg00Rg00Ag */ + xmm6 = _mm_unpacklo_epi8(xmm3, xmm0); + /* subtract */ + xmm7 = _mm_subs_epi16(xmm5, xmm6); + /* 00Bd00Gd00Rd00Ad00Ac00Ac00Ac00Ac */ + xmm5 = _mm_shufflelo_epi16(xmm5, 0xff); + /* 00Ad00Ad00Ad00Ad00Ac00Ac00Ac00Ac */ + xmm5 = _mm_shufflehi_epi16(xmm5, 0xff); + /* Add one to alphas */ + xmm5 = _mm_adds_epi16(xmm5, xmm1); + /* Multiply and take low word */ + xmm5 = _mm_mullo_epi16(xmm5, xmm7); + /* Shift 8 right */ + xmm5 = _mm_srai_epi16(xmm5, 8); + /* Add xmm6 */ + xmm5 = _mm_adds_epi16(xmm5, xmm6); + /* 00Bl00Gl00Rl00Al00Bk00Gk00Rk0ABk */ + + /* Must mask off remainders or pack gets confused */ + xmm3 = _mm_set1_epi16(0x00ffU); + xmm4 = _mm_and_si128(xmm4, xmm3); + xmm5 = _mm_and_si128(xmm5, xmm3); + + /* BlGlRlAlBkGkRkAkBjGjRjAjBiGiRiAi */ + xmm5 = _mm_packus_epi16(xmm5, xmm4); + _mm_store_si128((__m128i *) dptr, xmm5); dptr += 4; + } + + /* Finish off the remainder. */ + if (pixels) + { + general_alphaComp_argb((const BYTE *) sptr1, src1Step, + (const BYTE *) sptr2, src2Step, + (BYTE *) dptr, dstStep, pixels, 1); + sptr1 += pixels; + sptr2 += pixels; + dptr += pixels; + } + + /* Jump to next row. */ + sptr1 += src1Jump; + sptr2 += src2Jump; + dptr += dstJump; + } + + return PRIMITIVES_SUCCESS; +} +# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */ +#endif + +#ifdef WITH_IPP +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t ipp_alphaComp_argb( + const BYTE *pSrc1, INT32 src1Step, + const BYTE *pSrc2, INT32 src2Step, + BYTE *pDst, INT32 dstStep, + INT32 width, INT32 height) +{ + IppiSize sz; + sz.width = width; + sz.height = height; + return ippiAlphaComp_8u_AC4R(pSrc1, src1Step, pSrc2, src2Step, + pDst, dstStep, sz, ippAlphaOver); +} +#endif + +/* ------------------------------------------------------------------------- */ +void primitives_init_alphaComp( + const primitives_hints_t *hints, + primitives_t *prims) +{ + prims->alphaComp_argb = general_alphaComp_argb; +#ifdef WITH_IPP + prims->alphaComp_argb = ipp_alphaComp_argb; +#elif defined(WITH_SSE2) + if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */ + { + prims->alphaComp_argb = sse2_alphaComp_argb; + } +#endif +} + +/* ------------------------------------------------------------------------- */ +void primitives_deinit_alphaComp( + primitives_t *prims) +{ + /* Nothing to do. */ +} diff --git a/libfreerdp/primitives/prim_andor.c b/libfreerdp/primitives/prim_andor.c new file mode 100644 index 000000000..358d6c823 --- /dev/null +++ b/libfreerdp/primitives/prim_andor.c @@ -0,0 +1,110 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Logical operations. + * vi:ts=4 sw=4: + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include + +#ifdef WITH_SSE2 +#include +#include +#endif /* WITH_SSE2 */ + +#ifdef WITH_IPP +#include +#endif /* WITH_IPP */ + +#include "prim_internal.h" +#include "prim_templates.h" + +/* ---------------------------------------------------------------------------- + * 32-bit AND with a constant. + */ +PRIM_STATIC pstatus_t general_andC_32u( + const UINT32 *pSrc, + UINT32 val, + UINT32 *pDst, + INT32 len) +{ + if (val == 0) + return PRIMITIVES_SUCCESS; + + while (len--) + *pDst++ = *pSrc++ & val; + + return PRIMITIVES_SUCCESS; +} + +/* ---------------------------------------------------------------------------- + * 32-bit OR with a constant. + */ +PRIM_STATIC pstatus_t general_orC_32u( + const UINT32 *pSrc, + UINT32 val, + UINT32 *pDst, + INT32 len) +{ + if (val == 0) + return PRIMITIVES_SUCCESS; + + while (len--) + *pDst++ = *pSrc++ | val; + + return PRIMITIVES_SUCCESS; +} + +#ifdef WITH_SSE2 +# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) +/* ------------------------------------------------------------------------- */ +SSE3_SCD_PRE_ROUTINE(sse3_andC_32u, UINT32, general_andC_32u, + _mm_and_si128, *dptr++ = *sptr++ & val) +SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, general_orC_32u, + _mm_or_si128, *dptr++ = *sptr++ | val) +# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */ +#endif + +/* ------------------------------------------------------------------------- */ +void primitives_init_andor( + const primitives_hints_t *hints, + primitives_t *prims) +{ + /* Start with the default. */ + prims->andC_32u = general_andC_32u; + prims->orC_32u = general_orC_32u; + +#if defined(WITH_IPP) + prims->andC_32u = (__andC_32u_t) ippsAndC_32u; + prims->orC_32u = (__orC_32u_t) ippsOrC_32u; +#elif defined(WITH_SSE2) + if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) + { + prims->andC_32u = sse3_andC_32u; + prims->orC_32u = sse3_orC_32u; + } +#endif +} + +/* ------------------------------------------------------------------------- */ +void primitives_deinit_andor( + primitives_t *prims) +{ + /* Nothing to do. */ +} diff --git a/libfreerdp/primitives/prim_colors.c b/libfreerdp/primitives/prim_colors.c new file mode 100644 index 000000000..9829265ed --- /dev/null +++ b/libfreerdp/primitives/prim_colors.c @@ -0,0 +1,743 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Color conversion operations. + * vi:ts=4 sw=4: + * + * Copyright 2011 Stephen Erisman + * Copyright 2011 Norbert Federa + * Copyright 2011 Martin Fleisz + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#ifdef WITH_SSE2 +#include +#elif WITH_NEON +#include +#endif /* WITH_SSE2 else WITH_NEON */ +#include "prim_internal.h" +#include "prim_templates.h" + +#ifndef MINMAX +#define MINMAX(_v_, _l_, _h_) \ + ((_v_) < (_l_) ? (_l_) : ((_v_) > (_h_) ? (_h_) : (_v_))) +#endif /* !MINMAX */ + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_yCbCrToRGB_16s16s_P3P3( + const INT16 *pSrc[3], INT32 srcStep, + INT16 *pDst[3], INT32 dstStep, + const prim_size_t *roi) /* region of interest */ +{ + /** + * The decoded YCbCr coeffectients are represented as 11.5 fixed-point + * numbers: + * + * 1 sign bit + 10 integer bits + 5 fractional bits + * + * However only 7 integer bits will be actually used since the value range + * is [-128.0, 127.0]. In other words, the decoded coefficients are scaled + * by << 5 when interpreted as INT16. + * It was scaled in the quantization phase, so we must scale it back here. + */ + const INT16 *yptr = pSrc[0]; + const INT16 *cbptr = pSrc[1]; + const INT16 *crptr = pSrc[2]; + INT16 *rptr = pDst[0]; + INT16 *gptr = pDst[1]; + INT16 *bptr = pDst[2]; + int srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16); + int dstbump = (dstStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16); + int y; + + for (y=0; yheight; y++) + { + int x; + for (x=0; xwidth; ++x) + { + /* INT32 is used intentionally because we calculate + * with shifted factors! + */ + INT32 y = (INT32) (*yptr++); + INT32 cb = (INT32) (*cbptr++); + INT32 cr = (INT32) (*crptr++); + INT32 r,g,b; + + /* + * This is the slow floating point version kept here for reference. + * y = y + 4096; // 128<<5=4096 so that we can scale the sum by>>5 + * r = y + cr*1.403f; + * g = y - cb*0.344f - cr*0.714f; + * b = y + cb*1.770f; + * y_r_buf[i] = MINMAX(r>>5, 0, 255); + * cb_g_buf[i] = MINMAX(g>>5, 0, 255); + * cr_b_buf[i] = MINMAX(b>>5, 0, 255); + */ + + /* + * We scale the factors by << 16 into 32-bit integers in order to + * avoid slower floating point multiplications. Since the final + * result needs to be scaled by >> 5 we will extract only the + * upper 11 bits (>> 21) from the final sum. + * Hence we also have to scale the other terms of the sum by << 16. + * R: 1.403 << 16 = 91947 + * G: 0.344 << 16 = 22544, 0.714 << 16 = 46792 + * B: 1.770 << 16 = 115998 + */ + y = (y+4096)<<16; + + r = y + cr*91947; + g = y - cb*22544 - cr*46792; + b = y + cb*115998; + + *rptr++ = MINMAX(r>>21, 0, 255); + *gptr++ = MINMAX(g>>21, 0, 255); + *bptr++ = MINMAX(b>>21, 0, 255); + } + yptr += srcbump; + cbptr += srcbump; + crptr += srcbump; + rptr += dstbump; + gptr += dstbump; + bptr += dstbump; + } + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_RGBToYCbCr_16s16s_P3P3( + const INT16 *pSrc[3], INT32 srcStep, + INT16 *pDst[3], INT32 dstStep, + const prim_size_t *roi) /* region of interest */ +{ + /* The encoded YCbCr coefficients are represented as 11.5 fixed-point + * numbers: + * + * 1 sign bit + 10 integer bits + 5 fractional bits + * + * However only 7 integer bits will be actually used since the value + * range is [-128.0, 127.0]. In other words, the encoded coefficients + * is scaled by << 5 when interpreted as INT16. + * It will be scaled down to original during the quantization phase. + */ + const INT16 *rptr = pSrc[0]; + const INT16 *gptr = pSrc[1]; + const INT16 *bptr = pSrc[2]; + INT16 *yptr = pDst[0]; + INT16 *cbptr = pDst[1]; + INT16 *crptr = pDst[2]; + int srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16); + int dstbump = (dstStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16); + int y; + + for (y=0; yheight; y++) + { + int x; + for (x=0; xwidth; ++x) + { + /* INT32 is used intentionally because we calculate with + * shifted factors! + */ + INT32 r = (INT32) (*rptr++); + INT32 g = (INT32) (*gptr++); + INT32 b = (INT32) (*bptr++); + + /* We scale the factors by << 15 into 32-bit integers in order + * to avoid slower floating point multiplications. Since the + * terms need to be scaled by << 5 we simply scale the final + * sum by >> 10 + * + * Y: 0.299000 << 15 = 9798, 0.587000 << 15 = 19235, + * 0.114000 << 15 = 3735 + * Cb: 0.168935 << 15 = 5535, 0.331665 << 15 = 10868, + * 0.500590 << 15 = 16403 + * Cr: 0.499813 << 15 = 16377, 0.418531 << 15 = 13714, + * 0.081282 << 15 = 2663 + */ + INT32 y = (r * 9798 + g * 19235 + b * 3735) >> 10; + INT32 cb = (r * -5535 + g * -10868 + b * 16403) >> 10; + INT32 cr = (r * 16377 + g * -13714 + b * -2663) >> 10; + + *yptr++ = (INT16) MINMAX(y - 4096, -4096, 4095); + *cbptr++ = (INT16) MINMAX(cb, -4096, 4095); + *crptr++ = (INT16) MINMAX(cr, -4096, 4095); + } + yptr += srcbump; + cbptr += srcbump; + crptr += srcbump; + rptr += dstbump; + gptr += dstbump; + bptr += dstbump; + } + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_RGBToRGB_16s8u_P3AC4R( + const INT16 *pSrc[3], /* 16-bit R,G, and B arrays */ + int srcStep, /* bytes between rows in source data */ + BYTE *pDst, /* 32-bit interleaved ARGB (ABGR?) data */ + int dstStep, /* bytes between rows in dest data */ + const prim_size_t *roi) /* region of interest */ +{ + const INT16 *r = pSrc[0]; + const INT16 *g = pSrc[1]; + const INT16 *b = pSrc[2]; + BYTE *dst = pDst; + int x,y; + int srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16); + int dstbump = (dstStep - (roi->width * sizeof(UINT32))); + + for (y=0; yheight; ++y) + { + for (x=0; xwidth; ++x) + { + *dst++ = (BYTE) (*b++); + *dst++ = (BYTE) (*g++); + *dst++ = (BYTE) (*r++); + *dst++ = ((BYTE) (0xFFU)); + } + dst += dstbump; + r += srcbump; + g += srcbump; + b += srcbump; + } + return PRIMITIVES_SUCCESS; +} + + +#ifdef WITH_SSE2 + +#ifdef __GNUC__ +# define GNU_INLINE \ + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +#else +# define GNU_INLINE +#endif + +#define CACHE_LINE_BYTES 64 + +#define _mm_between_epi16(_val, _min, _max) \ + do { _val = _mm_min_epi16(_max, _mm_max_epi16(_val, _min)); } while (0) + +#ifdef DO_PREFETCH +/*---------------------------------------------------------------------------*/ +static inline void GNU_INLINE _mm_prefetch_buffer( + char * buffer, + int num_bytes) +{ + __m128i * buf = (__m128i*) buffer; + unsigned int i; + for (i = 0; i < (num_bytes / sizeof(__m128i)); + i+=(CACHE_LINE_BYTES / sizeof(__m128i))) + { + _mm_prefetch((char*)(&buf[i]), _MM_HINT_NTA); + } +} +#endif /* DO_PREFETCH */ + +/*---------------------------------------------------------------------------*/ +PRIM_STATIC pstatus_t sse2_yCbCrToRGB_16s16s_P3P3( + const INT16 *pSrc[3], + int srcStep, + INT16 *pDst[3], + int dstStep, + const prim_size_t *roi) /* region of interest */ +{ + __m128i zero, max, r_cr, g_cb, g_cr, b_cb, c4096; + __m128i *y_buf, *cb_buf, *cr_buf, *r_buf, *g_buf, *b_buf; + int srcbump, dstbump, yp, imax; + + if (((ULONG_PTR) (pSrc[0]) & 0x0f) + || ((ULONG_PTR) (pSrc[1]) & 0x0f) + || ((ULONG_PTR) (pSrc[2]) & 0x0f) + || ((ULONG_PTR) (pDst[0]) & 0x0f) + || ((ULONG_PTR) (pDst[1]) & 0x0f) + || ((ULONG_PTR) (pDst[2]) & 0x0f) + || (roi->width & 0x07) + || (srcStep & 127) + || (dstStep & 127)) + { + /* We can't maintain 16-byte alignment. */ + return general_yCbCrToRGB_16s16s_P3P3(pSrc, srcStep, + pDst, dstStep, roi); + } + + zero = _mm_setzero_si128(); + max = _mm_set1_epi16(255); + + y_buf = (__m128i*) (pSrc[0]); + cb_buf = (__m128i*) (pSrc[1]); + cr_buf = (__m128i*) (pSrc[2]); + r_buf = (__m128i*) (pDst[0]); + g_buf = (__m128i*) (pDst[1]); + b_buf = (__m128i*) (pDst[2]); + + r_cr = _mm_set1_epi16(22986); /* 1.403 << 14 */ + g_cb = _mm_set1_epi16(-5636); /* -0.344 << 14 */ + g_cr = _mm_set1_epi16(-11698); /* -0.714 << 14 */ + b_cb = _mm_set1_epi16(28999); /* 1.770 << 14 */ + c4096 = _mm_set1_epi16(4096); + srcbump = srcStep / sizeof(__m128i); + dstbump = dstStep / sizeof(__m128i); + +#ifdef DO_PREFETCH + /* Prefetch Y's, Cb's, and Cr's. */ + for (yp=0; ypheight; yp++) + { + int i; + for (i=0; iwidth * sizeof(INT16) / sizeof(__m128i); + i += (CACHE_LINE_BYTES / sizeof(__m128i))) + { + _mm_prefetch((char*)(&y_buf[i]), _MM_HINT_NTA); + _mm_prefetch((char*)(&cb_buf[i]), _MM_HINT_NTA); + _mm_prefetch((char*)(&cr_buf[i]), _MM_HINT_NTA); + } + y_buf += srcbump; + cb_buf += srcbump; + cr_buf += srcbump; + } + y_buf = (__m128i*) (pSrc[0]); + cb_buf = (__m128i*) (pSrc[1]); + cr_buf = (__m128i*) (pSrc[2]); +#endif /* DO_PREFETCH */ + + imax = roi->width * sizeof(INT16) / sizeof(__m128i); + for (yp=0; ypheight; ++yp) + { + int i; + for (i=0; i>5) + 128 + (cr*1.403)>>5 // our base formula + * r = (y>>5) + 128 + (HIWORD(cr*(1.403<<14)<<2))>>5 // see above + * r = (y+4096)>>5 + (HIWORD(cr*22986)<<2)>>5 // simplification + * r = ((y+4096)>>2 + HIWORD(cr*22986)) >> 3 + */ + + /* y = (y_r_buf[i] + 4096) >> 2 */ + __m128i y, cb, cr, r, g, b; + y = _mm_load_si128(y_buf + i); + y = _mm_add_epi16(y, c4096); + y = _mm_srai_epi16(y, 2); + /* cb = cb_g_buf[i]; */ + cb = _mm_load_si128(cb_buf + i); + /* cr = cr_b_buf[i]; */ + cr = _mm_load_si128(cr_buf + i); + + /* (y + HIWORD(cr*22986)) >> 3 */ + r = _mm_add_epi16(y, _mm_mulhi_epi16(cr, r_cr)); + r = _mm_srai_epi16(r, 3); + + /* r_buf[i] = MINMAX(r, 0, 255); */ + _mm_between_epi16(r, zero, max); + _mm_store_si128(r_buf + i, r); + + /* (y + HIWORD(cb*-5636) + HIWORD(cr*-11698)) >> 3 */ + g = _mm_add_epi16(y, _mm_mulhi_epi16(cb, g_cb)); + g = _mm_add_epi16(g, _mm_mulhi_epi16(cr, g_cr)); + g = _mm_srai_epi16(g, 3); + + /* g_buf[i] = MINMAX(g, 0, 255); */ + _mm_between_epi16(g, zero, max); + _mm_store_si128(g_buf + i, g); + + /* (y + HIWORD(cb*28999)) >> 3 */ + b = _mm_add_epi16(y, _mm_mulhi_epi16(cb, b_cb)); + b = _mm_srai_epi16(b, 3); + /* b_buf[i] = MINMAX(b, 0, 255); */ + _mm_between_epi16(b, zero, max); + _mm_store_si128(b_buf + i, b); + } + y_buf += srcbump; + cb_buf += srcbump; + cr_buf += srcbump; + r_buf += dstbump; + g_buf += dstbump; + b_buf += dstbump; + } + + return PRIMITIVES_SUCCESS; +} + +/*---------------------------------------------------------------------------*/ +/* The encodec YCbCr coeffectients are represented as 11.5 fixed-point + * numbers. See the general code above. + */ +PRIM_STATIC pstatus_t sse2_RGBToYCbCr_16s16s_P3P3( + const INT16 *pSrc[3], + int srcStep, + INT16 *pDst[3], + int dstStep, + const prim_size_t *roi) /* region of interest */ +{ + __m128i min, max, y_r, y_g, y_b, cb_r, cb_g, cb_b, cr_r, cr_g, cr_b; + __m128i *r_buf, *g_buf, *b_buf, *y_buf, *cb_buf, *cr_buf; + int srcbump, dstbump, yp, imax; + + if (((ULONG_PTR) (pSrc[0]) & 0x0f) + || ((ULONG_PTR) (pSrc[1]) & 0x0f) + || ((ULONG_PTR) (pSrc[2]) & 0x0f) + || ((ULONG_PTR) (pDst[0]) & 0x0f) + || ((ULONG_PTR) (pDst[1]) & 0x0f) + || ((ULONG_PTR) (pDst[2]) & 0x0f) + || (roi->width & 0x07) + || (srcStep & 127) + || (dstStep & 127)) + { + /* We can't maintain 16-byte alignment. */ + return general_RGBToYCbCr_16s16s_P3P3(pSrc, srcStep, + pDst, dstStep, roi); + } + + min = _mm_set1_epi16(-128 << 5); + max = _mm_set1_epi16(127 << 5); + + r_buf = (__m128i*) (pSrc[0]); + g_buf = (__m128i*) (pSrc[1]); + b_buf = (__m128i*) (pSrc[2]); + y_buf = (__m128i*) (pDst[0]); + cb_buf = (__m128i*) (pDst[1]); + cr_buf = (__m128i*) (pDst[2]); + + y_r = _mm_set1_epi16(9798); /* 0.299000 << 15 */ + y_g = _mm_set1_epi16(19235); /* 0.587000 << 15 */ + y_b = _mm_set1_epi16(3735); /* 0.114000 << 15 */ + cb_r = _mm_set1_epi16(-5535); /* -0.168935 << 15 */ + cb_g = _mm_set1_epi16(-10868); /* -0.331665 << 15 */ + cb_b = _mm_set1_epi16(16403); /* 0.500590 << 15 */ + cr_r = _mm_set1_epi16(16377); /* 0.499813 << 15 */ + cr_g = _mm_set1_epi16(-13714); /* -0.418531 << 15 */ + cr_b = _mm_set1_epi16(-2663); /* -0.081282 << 15 */ + + srcbump = srcStep / sizeof(__m128i); + dstbump = dstStep / sizeof(__m128i); + +#ifdef DO_PREFETCH + /* Prefetch RGB's. */ + for (yp=0; ypheight; yp++) + { + int i; + for (i=0; iwidth * sizeof(INT16) / sizeof(__m128i); + i += (CACHE_LINE_BYTES / sizeof(__m128i))) + { + _mm_prefetch((char*)(&r_buf[i]), _MM_HINT_NTA); + _mm_prefetch((char*)(&g_buf[i]), _MM_HINT_NTA); + _mm_prefetch((char*)(&b_buf[i]), _MM_HINT_NTA); + } + r_buf += srcbump; + g_buf += srcbump; + b_buf += srcbump; + } + r_buf = (__m128i*) (pSrc[0]); + g_buf = (__m128i*) (pSrc[1]); + b_buf = (__m128i*) (pSrc[2]); +#endif /* DO_PREFETCH */ + + imax = roi->width * sizeof(INT16) / sizeof(__m128i); + for (yp=0; ypheight; ++yp) + { + int i; + for (i=0; iwidth & 0x0f) + || (srcStep & 0x0f) + || (dstStep & 0x0f)) + { + return general_RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, roi); + } + + out = (BYTE *) pDst; + srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16); + dstbump = (dstStep - (roi->width * sizeof(UINT32))); + + for (y=0; yheight; ++y) + { + int width = roi->width; + do { + __m128i R0, R1, R2, R3, R4; + /* The comments below pretend these are 8-byte registers + * rather than 16-byte, for readability. + */ + R0 = LOAD128(b); b += 8; /* R0 = 00B300B200B100B0 */ + R1 = LOAD128(b); b += 8; /* R1 = 00B700B600B500B4 */ + PACKUSWB(R0,R1); /* R0 = B7B6B5B4B3B2B1B0 */ + R1 = LOAD128(g); g += 8; /* R1 = 00G300G200G100G0 */ + R2 = LOAD128(g); g += 8; /* R2 = 00G700G600G500G4 */ + PACKUSWB(R1,R2); /* R1 = G7G6G5G4G3G2G1G0 */ + R2 = R1; /* R2 = G7G6G5G4G3G2G1G0 */ + PUNPCKLBW(R2,R0); /* R2 = G3B3G2B2G1B1G0B0 */ + PUNPCKHBW(R1,R0); /* R1 = G7B7G6B7G5B5G4B4 */ + R0 = LOAD128(r); r += 8; /* R0 = 00R300R200R100R0 */ + R3 = LOAD128(r); r += 8; /* R3 = 00R700R600R500R4 */ + PACKUSWB(R0,R3); /* R0 = R7R6R5R4R3R2R1R0 */ + R3 = XMM_ALL_ONES; /* R3 = FFFFFFFFFFFFFFFF */ + R4 = R3; /* R4 = FFFFFFFFFFFFFFFF */ + PUNPCKLBW(R4,R0); /* R4 = FFR3FFR2FFR1FFR0 */ + PUNPCKHBW(R3,R0); /* R3 = FFR7FFR6FFR5FFR4 */ + R0 = R4; /* R0 = R4 */ + PUNPCKLWD(R0,R2); /* R0 = FFR1G1B1FFR0G0B0 */ + PUNPCKHWD(R4,R2); /* R4 = FFR3G3B3FFR2G2B2 */ + R2 = R3; /* R2 = R3 */ + PUNPCKLWD(R2,R1); /* R2 = FFR5G5B5FFR4G4B4 */ + PUNPCKHWD(R3,R1); /* R3 = FFR7G7B7FFR6G6B6 */ + STORE128(out, R0); out += 16; /* FFR1G1B1FFR0G0B0 */ + STORE128(out, R4); out += 16; /* FFR3G3B3FFR2G2B2 */ + STORE128(out, R2); out += 16; /* FFR5G5B5FFR4G4B4 */ + STORE128(out, R3); out += 16; /* FFR7G7B7FFR6G6B6 */ + } while (width -= 16); + /* Jump to next row. */ + r += srcbump; + g += srcbump; + b += srcbump; + out += dstbump; + } + return PRIMITIVES_SUCCESS; +} +#endif /* WITH_SSE2 */ + +/*---------------------------------------------------------------------------*/ +#ifdef WITH_NEON +PRIM_STATIC pstatus_t neon_yCbCrToRGB_16s16s_P3P3( + const INT16 *pSrc[3], + int srcStep, + INT16 *pDst[3], + int dstStep, + const prim_size_t *roi) /* region of interest */ +{ + /* TODO: If necessary, check alignments and call the general version. */ + + int16x8_t zero = vdupq_n_s16(0); + int16x8_t max = vdupq_n_s16(255); + int16x8_t y_add = vdupq_n_s16(128); + + int16x8_t* y_buf = (int16x8_t*) pSrc[0]; + int16x8_t* cb_buf = (int16x8_t*) pSrc[1]; + int16x8_t* cr_buf = (int16x8_t*) pSrc[2]; + int16x8_t* r_buf = (int16x8_t*) pDst[0]; + int16x8_t* g_buf = (int16x8_t*) pDst[1]; + int16x8_t* b_buf = (int16x8_t*) pDst[2]; + + int srcbump = srcStep / sizeof(int16x8_t); + int dstbump = dstStep / sizeof(int16x8_t); + int yp; + + int imax = roi->width * sizeof(INT16) / sizeof(int16x8_t); + for (yp=0; ypheight; ++yp) + { + int i; + for (i=0; i> 2) + (cr >> 3) + (cr >> 5)), + * 0, 255); + */ + int16x8_t r = vaddq_s16(y, cr); + r = vaddq_s16(r, vshrq_n_s16(cr, 2)); + r = vaddq_s16(r, vshrq_n_s16(cr, 3)); + r = vaddq_s16(r, vshrq_n_s16(cr, 5)); + r = vminq_s16(vmaxq_s16(r, zero), max); + vst1q_s16((INT16*) (r_buf+i), r); + + /* cb = cb_g_buf[i]; */ + int16x8_t cb = vld1q_s16((INT16*) (cb_buf+i)); + + /* g = between(y - (cb >> 2) - (cb >> 4) - (cb >> 5) - (cr >> 1) + * - (cr >> 3) - (cr >> 4) - (cr >> 5), 0, 255); + */ + int16x8_t g = vsubq_s16(y, vshrq_n_s16(cb, 2)); + g = vsubq_s16(g, vshrq_n_s16(cb, 4)); + g = vsubq_s16(g, vshrq_n_s16(cb, 5)); + g = vsubq_s16(g, vshrq_n_s16(cr, 1)); + g = vsubq_s16(g, vshrq_n_s16(cr, 3)); + g = vsubq_s16(g, vshrq_n_s16(cr, 4)); + g = vsubq_s16(g, vshrq_n_s16(cr, 5)); + g = vminq_s16(vmaxq_s16(g, zero), max); + vst1q_s16((INT16*) (g_buf+i), g); + + /* b = between((y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6)), + * 0, 255); + */ + int16x8_t b = vaddq_s16(y, cb); + b = vaddq_s16(b, vshrq_n_s16(cb, 1)); + b = vaddq_s16(b, vshrq_n_s16(cb, 2)); + b = vaddq_s16(b, vshrq_n_s16(cb, 6)); + b = vminq_s16(vmaxq_s16(b, zero), max); + vst1q_s16((INT16*) (b_buf+i), b); + } + y_buf += srcbump; + cb_buf += srcbump; + cr_buf += srcbump; + r_buf += dstbump; + g_buf += dstbump; + b_buf += dstbump; + } +} +#endif /* WITH_NEON */ + + +/* I don't see a direct IPP version of this, since the input is INT16 + * YCbCr. It may be possible via Deinterleave and then YCbCrToRGB_. + * But that would likely be slower. + */ + +/* ------------------------------------------------------------------------- */ +void primitives_init_colors( + const primitives_hints_t *hints, + primitives_t *prims) +{ + prims->RGBToRGB_16s8u_P3AC4R = general_RGBToRGB_16s8u_P3AC4R; + prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3; + prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3; +#if defined(WITH_SSE2) + if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + { + prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R; + prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3; + prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3; + } +#elif defined(WITH_NEON) + if (hints->arm_flags & PRIM_ARM_NEON_AVAILABLE) + { + prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3; + } +#endif /* WITH_SSE2 */ +} + +/* ------------------------------------------------------------------------- */ +void primitives_deinit_colors( + primitives_t *prims) +{ + /* Nothing to do. */ +} diff --git a/libfreerdp/primitives/prim_copy.c b/libfreerdp/primitives/prim_copy.c new file mode 100644 index 000000000..71303b1a6 --- /dev/null +++ b/libfreerdp/primitives/prim_copy.c @@ -0,0 +1,180 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Copy operations. + * vi:ts=4 sw=4: + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#ifdef WITH_IPP +# include +# include +#endif /* WITH_IPP */ +#include "prim_internal.h" + +/* ------------------------------------------------------------------------- */ +/*static inline BOOL memory_regions_overlap_1d(*/ +static BOOL memory_regions_overlap_1d( + const BYTE *p1, + const BYTE *p2, + size_t bytes) +{ + const ULONG_PTR p1m = (const ULONG_PTR) p1; + const ULONG_PTR p2m = (const ULONG_PTR) p2; + if (p1m <= p2m) + { + if (p1m + bytes > p2m) return TRUE; + } + else + { + if (p2m + bytes > p1m) return TRUE; + } + /* else */ + return FALSE; +} + +/* ------------------------------------------------------------------------- */ +/*static inline BOOL memory_regions_overlap_2d( */ +static BOOL memory_regions_overlap_2d( + const BYTE *p1, int p1Step, int p1Size, + const BYTE *p2, int p2Step, int p2Size, + int width, int height) +{ + ULONG_PTR p1m = (ULONG_PTR) p1; + ULONG_PTR p2m = (ULONG_PTR) p2; + + if (p1m <= p2m) + { + ULONG_PTR p1mEnd = p1m + (height-1)*p1Step + width*p1Size; + if (p1mEnd > p2m) return TRUE; + } + else + { + ULONG_PTR p2mEnd = p2m + (height-1)*p2Step + width*p2Size; + if (p2mEnd > p1m) return TRUE; + } + /* else */ + return FALSE; +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_copy_8u( + const BYTE *pSrc, + BYTE *pDst, + INT32 len) +{ + if (memory_regions_overlap_1d(pSrc, pDst, (size_t) len)) + { + memmove((void *) pDst, (const void *) pSrc, (size_t) len); + } + else + { + memcpy((void *) pDst, (const void *) pSrc, (size_t) len); + } + + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +/* Copy a block of pixels from one buffer to another. + * The addresses are assumed to have been already offset to the upper-left + * corners of the source and destination region of interest. + */ +PRIM_STATIC pstatus_t general_copy_8u_AC4r( + const BYTE *pSrc, INT32 srcStep, + BYTE *pDst, INT32 dstStep, + INT32 width, INT32 height) +{ + primitives_t *prims = primitives_get(); + const BYTE *src = (const BYTE *) pSrc; + BYTE *dst = (BYTE *) pDst; + int rowbytes = width * sizeof(UINT32); + + if ((width == 0) || (height == 0)) return PRIMITIVES_SUCCESS; + + if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32), + pDst, dstStep, sizeof(UINT32), width, height)) + { + do { + prims->copy(src, dst, rowbytes); + src += srcStep; + dst += dstStep; + } while (--height); + } + else + { + /* TODO: do it in one operation when the rowdata is adjacent. */ + do { + /* If we find a replacement for memcpy that is consistently + * faster, this could be replaced with that. + */ + memcpy(dst, src, rowbytes); + src += srcStep; + dst += dstStep; + } while (--height); + } + + return PRIMITIVES_SUCCESS; +} + +#ifdef WITH_IPP +/* ------------------------------------------------------------------------- */ +/* This is just ippiCopy_8u_AC4R without the IppiSize structure parameter. */ +static pstatus_t ippiCopy_8u_AC4r( + const BYTE *pSrc, INT32 srcStep, + BYTE *pDst, INT32 dstStep, + INT32 width, INT32 height) +{ + IppiSize roi; + roi.width = width; + roi.height = height; + return (pstatus_t) ippiCopy_8u_AC4R(pSrc, srcStep, pDst, dstStep, roi); +} +#endif /* WITH_IPP */ + +/* ------------------------------------------------------------------------- */ +void primitives_init_copy( + const primitives_hints_t *hints, + primitives_t *prims) +{ + /* Start with the default. */ + prims->copy_8u = general_copy_8u; + prims->copy_8u_AC4r = general_copy_8u_AC4r; + + /* Pick tuned versions if possible. */ +#ifdef WITH_IPP + prims->copy_8u = (__copy_8u_t) ippsCopy_8u; + prims->copy_8u_AC4r = (__copy_8u_AC4r_t) ippiCopy_8u_AC4r; +#endif + /* Performance with an SSE2 version with no prefetch seemed to be + * all over the map vs. memcpy. + * Sometimes it was significantly faster, sometimes dreadfully slower, + * and it seemed to vary a lot depending on block size and processor. + * Hence, no SSE version is used here unless once can be written that + * is consistently faster than memcpy. + */ + + /* This is just an alias with void* parameters */ + prims->copy = (__copy_t) (prims->copy_8u); +} + +/* ------------------------------------------------------------------------- */ +void primitives_deinit_copy( + primitives_t *prims) +{ + /* Nothing to do. */ +} diff --git a/libfreerdp/primitives/prim_internal.h b/libfreerdp/primitives/prim_internal.h new file mode 100644 index 000000000..21df8cae0 --- /dev/null +++ b/libfreerdp/primitives/prim_internal.h @@ -0,0 +1,105 @@ +/* prim_internal.h + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. Algorithms used by + * this code may be covered by patents by HP, Microsoft, or other parties. + * + */ + +#ifdef __GNUC__ +# pragma once +#endif + +#ifndef __PRIM_INTERNAL_H_INCLUDED__ +#define __PRIM_INTERNAL_H_INCLUDED__ + +#ifndef CMAKE_BUILD_TYPE +#define CMAKE_BUILD_TYPE Release +#endif + +#include + +/* Normally the internal entrypoints should be static, but a benchmark + * program may want to access them directly and turn this off. + */ +#ifndef PRIM_STATIC +# define PRIM_STATIC static +#else +# undef PRIM_STATIC +# define PRIM_STATIC +#endif /* !PRIM_STATIC */ + +/* Use lddqu for unaligned; load for 16-byte aligned. */ +#define LOAD_SI128(_ptr_) \ + (((ULONG_PTR) (_ptr_) & 0x0f) \ + ? _mm_lddqu_si128((__m128i *) (_ptr_)) \ + : _mm_load_si128((__m128i *) (_ptr_))) + +/* This structure can (eventually) be used to provide hints to the + * initialization routines, e.g. whether SSE2 or NEON or IPP instructions + * or calls are available. + */ +typedef struct +{ + UINT32 x86_flags; + UINT32 arm_flags; +} primitives_hints_t; + +/* Function prototypes for all the init/deinit routines. */ +extern void primitives_init_copy( + const primitives_hints_t *hints, + primitives_t *prims); +extern void primitives_deinit_copy( + primitives_t *prims); + +extern void primitives_init_set( + const primitives_hints_t *hints, + primitives_t *prims); +extern void primitives_deinit_set( + primitives_t *prims); + +extern void primitives_init_add( + const primitives_hints_t *hints, + primitives_t *prims); +extern void primitives_deinit_add( + primitives_t *prims); + +extern void primitives_init_andor( + const primitives_hints_t *hints, + primitives_t *prims); +extern void primitives_deinit_andor( + primitives_t *prims); + +extern void primitives_init_shift( + const primitives_hints_t *hints, + primitives_t *prims); +extern void primitives_deinit_shift( + primitives_t *prims); + +extern void primitives_init_sign( + const primitives_hints_t *hints, + primitives_t *prims); +extern void primitives_deinit_sign( + primitives_t *prims); + +extern void primitives_init_alphaComp( + const primitives_hints_t *hints, + primitives_t *prims); +extern void primitives_deinit_alphaComp( + primitives_t *prims); + +extern void primitives_init_colors( + const primitives_hints_t *hints, + primitives_t *prims); +extern void primitives_deinit_colors( + primitives_t *prims); + +#endif /* !__PRIM_INTERNAL_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_set.c b/libfreerdp/primitives/prim_set.c new file mode 100644 index 000000000..5b40ce00c --- /dev/null +++ b/libfreerdp/primitives/prim_set.c @@ -0,0 +1,312 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Routines to set a chunk of memory to a constant. + * vi:ts=4 sw=4: + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#ifdef WITH_SSE2 +# include +#endif /* WITH_SSE2 */ +#ifdef WITH_IPP +# include +#endif /* WITH_IPP */ +#include "prim_internal.h" + +/* ========================================================================= */ +PRIM_STATIC pstatus_t general_set_8u( + BYTE val, + BYTE *pDst, + INT32 len) +{ + memset((void *) pDst, (int) val, (size_t) len); + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_zero( + void *pDst, + size_t len) +{ + memset(pDst, 0, len); + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +#ifdef WITH_SSE2 +# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) +PRIM_STATIC pstatus_t sse2_set_8u( + BYTE val, + BYTE *pDst, + INT32 len) +{ + BYTE byte, *dptr; + __m128i xmm0; + size_t count; + + if (len < 16) return general_set_8u(val, pDst, len); + + byte = val; + dptr = (BYTE *) pDst; + + /* Seek 16-byte alignment. */ + while ((ULONG_PTR) dptr & 0x0f) + { + *dptr++ = byte; + if (--len == 0) return PRIMITIVES_SUCCESS; + } + + xmm0 = _mm_set1_epi8(byte); + + /* Cover 256-byte chunks via SSE register stores. */ + count = len >> 8; + len -= count << 8; + /* Do 256-byte chunks using one XMM register. */ + while (count--) + { + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + } + + /* Cover 16-byte chunks via SSE register stores. */ + count = len >> 4; + len -= count << 4; + /* Do 16-byte chunks using one XMM register. */ + while (count--) + { + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 16; + } + + /* Do leftover bytes. */ + while (len--) *dptr++ = byte; + + return PRIMITIVES_SUCCESS; +} +# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */ +#endif /* WITH_SSE2 */ + +/* ========================================================================= */ +PRIM_STATIC pstatus_t general_set_32s( + INT32 val, + INT32 *pDst, + INT32 len) +{ + INT32 *dptr = (INT32 *) pDst; + size_t span, remaining; + primitives_t *prims; + + if (len < 256) + { + while (len--) *dptr++ = val; + return PRIMITIVES_SUCCESS; + } + + /* else quadratic growth memcpy algorithm */ + span = 1; + *dptr = val; + remaining = len - 1; + prims = primitives_get(); + while (remaining) + { + size_t thiswidth = span; + if (thiswidth > remaining) thiswidth = remaining; + prims->copy_8u((BYTE *) dptr, (BYTE *) (dptr + span), thiswidth<<2); + remaining -= thiswidth; + span <<= 1; + } + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_set_32u( + UINT32 val, + UINT32 *pDst, + INT32 len) +{ + UINT32 *dptr = (UINT32 *) pDst; + size_t span, remaining; + primitives_t *prims; + + if (len < 256) + { + while (len--) *dptr++ = val; + return PRIMITIVES_SUCCESS; + } + + /* else quadratic growth memcpy algorithm */ + span = 1; + *dptr = val; + remaining = len - 1; + prims = primitives_get(); + while (remaining) + { + size_t thiswidth = span; + if (thiswidth > remaining) thiswidth = remaining; + prims->copy_8u((BYTE *) dptr, (BYTE *) (dptr + span), thiswidth<<2); + remaining -= thiswidth; + span <<= 1; + } + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +#ifdef WITH_SSE2 +# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) +PRIM_STATIC pstatus_t sse2_set_32u( + UINT32 val, + UINT32 *pDst, + INT32 len) +{ + UINT32 *dptr = (UINT32 *) pDst; + __m128i xmm0; + size_t count; + + /* If really short, just do it here. */ + if (len < 32) + { + while (len--) *dptr++ = val; + return PRIMITIVES_SUCCESS; + } + + /* Assure we can reach 16-byte alignment. */ + if (((ULONG_PTR) dptr & 0x03) != 0) + { + return general_set_32u(val, pDst, len); + } + + /* Seek 16-byte alignment. */ + while ((ULONG_PTR) dptr & 0x0f) + { + *dptr++ = val; + if (--len == 0) return PRIMITIVES_SUCCESS; + } + + xmm0 = _mm_set1_epi32(val); + + /* Cover 256-byte chunks via SSE register stores. */ + count = len >> 6; + len -= count << 6; + /* Do 256-byte chunks using one XMM register. */ + while (count--) + { + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + } + + /* Cover 16-byte chunks via SSE register stores. */ + count = len >> 2; + len -= count << 2; + /* Do 16-byte chunks using one XMM register. */ + while (count--) + { + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 4; + } + + /* Do leftover bytes. */ + while (len--) *dptr++ = val; + + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t sse2_set_32s( + INT32 val, + INT32 *pDst, + INT32 len) +{ + UINT32 uval = *((UINT32 *) &val); + return sse2_set_32u(uval, (UINT32 *) pDst, len); +} +# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */ +#endif /* WITH_SSE2 */ + +#ifdef WITH_IPP +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t ipp_wrapper_set_32u( + UINT32 val, + UINT32 *pDst, + INT32 len) +{ + /* A little type conversion, then use the signed version. */ + INT32 sval = *((INT32 *) &val); + return ippsSet_32s(sval, (INT32 *) pDst, len); +} +#endif + +/* ------------------------------------------------------------------------- */ +void primitives_init_set( + const primitives_hints_t *hints, + primitives_t *prims) +{ + /* Start with the default. */ + prims->set_8u = general_set_8u; + prims->set_32s = general_set_32s; + prims->set_32u = general_set_32u; + prims->zero = general_zero; + + /* Pick tuned versions if possible. */ +#ifdef WITH_IPP + prims->set_8u = (__set_8u_t) ippsSet_8u; + prims->set_32s = (__set_32s_t) ippsSet_32s; + prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u; + prims->zero = (__zero_t) ippsZero_8u; +#elif defined(WITH_SSE2) + if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + { + prims->set_8u = sse2_set_8u; + prims->set_32s = sse2_set_32s; + prims->set_32u = sse2_set_32u; + } +#endif +} + +/* ------------------------------------------------------------------------- */ +void primitives_deinit_set( + primitives_t *prims) +{ + /* Nothing to do. */ +} diff --git a/libfreerdp/primitives/prim_shift.c b/libfreerdp/primitives/prim_shift.c new file mode 100644 index 000000000..331c7216e --- /dev/null +++ b/libfreerdp/primitives/prim_shift.c @@ -0,0 +1,172 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Shift operations. + * vi:ts=4 sw=4: + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include + +#ifdef WITH_SSE2 +#include +#include +#endif /* WITH_SSE2 */ + +#ifdef WITH_IPP +#include +#endif /* WITH_IPP */ + +#include "prim_internal.h" +#include "prim_templates.h" + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_lShiftC_16s( + const INT16 *pSrc, + INT32 val, + INT16 *pDst, + INT32 len) +{ + if (val == 0) return PRIMITIVES_SUCCESS; + while (len--) *pDst++ = *pSrc++ << val; + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_rShiftC_16s( + const INT16 *pSrc, + INT32 val, + INT16 *pDst, + INT32 len) +{ + if (val == 0) return PRIMITIVES_SUCCESS; + while (len--) *pDst++ = *pSrc++ >> val; + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_lShiftC_16u( + const UINT16 *pSrc, + INT32 val, + UINT16 *pDst, + INT32 len) +{ + if (val == 0) return PRIMITIVES_SUCCESS; + while (len--) *pDst++ = *pSrc++ << val; + return PRIMITIVES_SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_rShiftC_16u( + const UINT16 *pSrc, + INT32 val, + UINT16 *pDst, + INT32 len) +{ + if (val == 0) return PRIMITIVES_SUCCESS; + while (len--) *pDst++ = *pSrc++ >> val; + return PRIMITIVES_SUCCESS; +} + +#ifdef WITH_SSE2 +# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) +/* ------------------------------------------------------------------------- */ +SSE3_SCD_ROUTINE(sse2_lShiftC_16s, INT16, general_lShiftC_16s, + _mm_slli_epi16, *dptr++ = *sptr++ << val) +/* ------------------------------------------------------------------------- */ +SSE3_SCD_ROUTINE(sse2_rShiftC_16s, INT16, general_rShiftC_16s, + _mm_srai_epi16, *dptr++ = *sptr++ >> val) +/* ------------------------------------------------------------------------- */ +SSE3_SCD_ROUTINE(sse2_lShiftC_16u, UINT16, general_lShiftC_16u, + _mm_slli_epi16, *dptr++ = *sptr++ << val) +/* ------------------------------------------------------------------------- */ +SSE3_SCD_ROUTINE(sse2_rShiftC_16u, UINT16, general_rShiftC_16u, + _mm_srli_epi16, *dptr++ = *sptr++ >> val) +# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */ +#endif + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_shiftC_16s( + const INT16 *pSrc, + INT32 val, + INT16 *pDst, + INT32 len) +{ + primitives_t *prims; + + if (val == 0) return PRIMITIVES_SUCCESS; + prims = primitives_get(); + if (val < 0) return prims->rShiftC_16s(pSrc, -val, pDst, len); + else return prims->lShiftC_16s(pSrc, val, pDst, len); +} + +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t general_shiftC_16u( + const UINT16 *pSrc, + INT32 val, + UINT16 *pDst, + INT32 len) +{ + primitives_t *prims; + + if (val == 0) return PRIMITIVES_SUCCESS; + prims = primitives_get(); + if (val < 0) return prims->rShiftC_16u(pSrc, -val, pDst, len); + else return prims->lShiftC_16u(pSrc, val, pDst, len); +} + +/* Note: the IPP version will have to call ippLShiftC_16s or ippRShiftC_16s + * depending on the sign of val. To avoid using the deprecated inplace + * routines, a wrapper can use the src for the dest. + */ + +/* ------------------------------------------------------------------------- */ +void primitives_init_shift( + const primitives_hints_t *hints, + primitives_t *prims) +{ + /* Start with the default. */ + prims->lShiftC_16s = general_lShiftC_16s; + prims->rShiftC_16s = general_rShiftC_16s; + prims->lShiftC_16u = general_lShiftC_16u; + prims->rShiftC_16u = general_rShiftC_16u; +#if defined(WITH_IPP) + prims->lShiftC_16s = (__lShiftC_16s_t) ippsLShiftC_16s; + prims->rShiftC_16s = (__rShiftC_16s_t) ippsRShiftC_16s; + prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u; + prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u; +#elif defined(WITH_SSE2) + if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) + { + prims->lShiftC_16s = sse2_lShiftC_16s; + prims->rShiftC_16s = sse2_rShiftC_16s; + prims->lShiftC_16u = sse2_lShiftC_16u; + prims->rShiftC_16u = sse2_rShiftC_16u; + } +#endif + /* Wrappers */ + prims->shiftC_16s = general_shiftC_16s; + prims->shiftC_16u = general_shiftC_16u; +} + +/* ------------------------------------------------------------------------- */ +void primitives_deinit_shift( + primitives_t *prims) +{ + /* Nothing to do. */ +} diff --git a/libfreerdp/primitives/prim_sign.c b/libfreerdp/primitives/prim_sign.c new file mode 100644 index 000000000..a3b11ee14 --- /dev/null +++ b/libfreerdp/primitives/prim_sign.c @@ -0,0 +1,176 @@ +/* FreeRDP: A Remote Desktop Protocol Client + * Sign operations. + * vi:ts=4 sw=4: + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include + +#ifdef WITH_SSE2 +#include +#include +#endif /* WITH_SSE2 */ + +#include "prim_internal.h" + +/* ---------------------------------------------------------------------------- + * Set pDst to the sign-value of the 16-bit values in pSrc (-1, 0, or 1). + */ +PRIM_STATIC pstatus_t general_sign_16s( + const INT16 *pSrc, + INT16 *pDst, + INT32 len) +{ + while (len--) + { + INT16 src = *pSrc++; + *pDst++ = (src < 0) ? (-1) : ((src > 0) ? 1 : 0); + } + + return PRIMITIVES_SUCCESS; +} + +#ifdef WITH_SSE2 +/* ------------------------------------------------------------------------- */ +PRIM_STATIC pstatus_t ssse3_sign_16s( + const INT16 *pSrc, + INT16 *pDst, + INT32 len) +{ + const INT16 *sptr = (const INT16 *) pSrc; + INT16 *dptr = (INT16 *) pDst; + size_t count; + + if (len < 16) + { + return general_sign_16s(pSrc, pDst, len); + } + + /* Check for 16-byte alignment (eventually). */ + if ((ULONG_PTR) pDst & 0x01) + { + return general_sign_16s(pSrc, pDst, len); + } + + /* Seek 16-byte alignment. */ + while ((ULONG_PTR) dptr & 0x0f) + { + INT16 src = *sptr++; + *dptr++ = (src < 0) ? (-1) : ((src > 0) ? 1 : 0); + if (--len == 0) return PRIMITIVES_SUCCESS; + } + + /* Do 32-short chunks using 8 XMM registers. */ + count = len >> 5; /* / 32 */ + len -= count << 5; /* * 32 */ + if ((ULONG_PTR) sptr & 0x0f) + { + /* Unaligned */ + while (count--) + { + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + xmm0 = _mm_set1_epi16(0x0001U); + xmm1 = _mm_set1_epi16(0x0001U); + xmm2 = _mm_set1_epi16(0x0001U); + xmm3 = _mm_set1_epi16(0x0001U); + xmm4 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8; + xmm5 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8; + xmm6 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8; + xmm7 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8; + xmm0 = _mm_sign_epi16(xmm0, xmm4); + xmm1 = _mm_sign_epi16(xmm1, xmm5); + xmm2 = _mm_sign_epi16(xmm2, xmm6); + xmm3 = _mm_sign_epi16(xmm3, xmm7); + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 8; + _mm_store_si128((__m128i *) dptr, xmm1); dptr += 8; + _mm_store_si128((__m128i *) dptr, xmm2); dptr += 8; + _mm_store_si128((__m128i *) dptr, xmm3); dptr += 8; + } + } + else + { + /* Aligned */ + while (count--) + { + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + xmm0 = _mm_set1_epi16(0x0001U); + xmm1 = _mm_set1_epi16(0x0001U); + xmm2 = _mm_set1_epi16(0x0001U); + xmm3 = _mm_set1_epi16(0x0001U); + xmm4 = _mm_load_si128((__m128i *) sptr); sptr += 8; + xmm5 = _mm_load_si128((__m128i *) sptr); sptr += 8; + xmm6 = _mm_load_si128((__m128i *) sptr); sptr += 8; + xmm7 = _mm_load_si128((__m128i *) sptr); sptr += 8; + xmm0 = _mm_sign_epi16(xmm0, xmm4); + xmm1 = _mm_sign_epi16(xmm1, xmm5); + xmm2 = _mm_sign_epi16(xmm2, xmm6); + xmm3 = _mm_sign_epi16(xmm3, xmm7); + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 8; + _mm_store_si128((__m128i *) dptr, xmm1); dptr += 8; + _mm_store_si128((__m128i *) dptr, xmm2); dptr += 8; + _mm_store_si128((__m128i *) dptr, xmm3); dptr += 8; + } + } + + /* Do 8-short chunks using two XMM registers. */ + count = len >> 3; + len -= count << 3; + while (count--) + { + __m128i xmm0 = _mm_set1_epi16(0x0001U); + __m128i xmm1 = LOAD_SI128(sptr); sptr += 8; + xmm0 = _mm_sign_epi16(xmm0, xmm1); + _mm_store_si128((__m128i *) dptr, xmm0); dptr += 8; + } + + /* Do leftovers. */ + while (len--) + { + INT16 src = *sptr++; + *dptr++ = (src < 0) ? -1 : ((src > 0) ? 1 : 0); + } + + return PRIMITIVES_SUCCESS; +} +#endif /* WITH_SSE2 */ + +/* ------------------------------------------------------------------------- */ +void primitives_init_sign( + const primitives_hints_t *hints, + primitives_t *prims) +{ + /* Start with the default. */ + prims->sign_16s = general_sign_16s; + /* Pick tuned versions if possible. */ + /* I didn't spot an IPP version of this. */ +#if defined(WITH_SSE2) + if ((hints->x86_flags & PRIM_X86_SSSE3_AVAILABLE) + && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) + { + prims->sign_16s = ssse3_sign_16s; + } +#endif +} + +/* ------------------------------------------------------------------------- */ +void primitives_deinit_sign( + primitives_t *prims) +{ + /* Nothing to do. */ +} diff --git a/libfreerdp/primitives/prim_templates.h b/libfreerdp/primitives/prim_templates.h new file mode 100644 index 000000000..c0b6ac10d --- /dev/null +++ b/libfreerdp/primitives/prim_templates.h @@ -0,0 +1,416 @@ +/* prim_templates.h + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. Algorithms used by + * this code may be covered by patents by HP, Microsoft, or other parties. + */ + +#ifdef __GNUC__ +# pragma once +#endif + +#ifndef __PRIM_TEMPLATES_H_INCLUDED__ +#define __PRIM_TEMPLATES_H_INCLUDED__ + +/* These are prototypes for SSE (potentially NEON) routines that do a + * simple SSE operation over an array of data. Since so much of this + * code is shared except for the operation itself, these prototypes are + * used rather than duplicating code. The naming convention depends on + * the parameters: S=Source param; C=Constant; D=Destination. + * All the macros have parameters for a fallback procedure if the data + * is too small and an operation "the slow way" for use at 16-byte edges. + */ + +/* SSE3 note: If someone needs to support an SSE2 version of these without + * SSE3 support, an alternative version could be added that merely checks + * that 16-byte alignment on both destination and source(s) can be + * achieved, rather than use LDDQU for unaligned reads. + */ + +/* Note: the compiler is good at turning (16/sizeof(_type_)) into a constant. + * It easily can't do that if the value is stored in a variable. + * So don't save it as an intermediate value. + */ + +/* ---------------------------------------------------------------------------- + * SCD = Source, Constant, Destination + */ +#define SSE3_SCD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \ +PRIM_STATIC pstatus_t _name_(const _type_ *pSrc, INT32 val, _type_ *pDst, INT32 len) \ +{ \ + int shifts; \ + UINT32 offBeatMask; \ + const _type_ *sptr = pSrc; \ + _type_ *dptr = pDst; \ + size_t count; \ + if (len < 16) /* pointless if too small */ \ + { \ + return _fallback_(pSrc, val, pDst, len); \ + } \ + if (sizeof(_type_) == 1) shifts = 1; \ + else if (sizeof(_type_) == 2) shifts = 2; \ + else if (sizeof(_type_) == 4) shifts = 3; \ + else if (sizeof(_type_) == 8) shifts = 4; \ + offBeatMask = (1 << (shifts - 1)) - 1; \ + if ((ULONG_PTR) pDst & offBeatMask) \ + { \ + /* Incrementing the pointer skips over 16-byte boundary. */ \ + return _fallback_(pSrc, val, pDst, len); \ + } \ + /* Get to the 16-byte boundary now. */ \ + while ((ULONG_PTR) dptr & 0x0f) \ + { \ + _slowWay_; \ + if (--len == 0) return PRIMITIVES_SUCCESS; \ + } \ + /* Use 8 128-bit SSE registers. */ \ + count = len >> (8-shifts); \ + len -= count << (8-shifts); \ + if ((ULONG_PTR) sptr & 0x0f) \ + { \ + while (count--) \ + { \ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \ + xmm0 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm1 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm2 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm3 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm4 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm5 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm6 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm7 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm0 = _op_(xmm0, val); \ + xmm1 = _op_(xmm1, val); \ + xmm2 = _op_(xmm2, val); \ + xmm3 = _op_(xmm3, val); \ + xmm4 = _op_(xmm4, val); \ + xmm5 = _op_(xmm5, val); \ + xmm6 = _op_(xmm6, val); \ + xmm7 = _op_(xmm7, val); \ + _mm_store_si128((__m128i *) dptr, xmm0); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm1); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm2); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm3); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm4); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm5); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm6); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm7); \ + dptr += (16/sizeof(_type_)); \ + } \ + } \ + else \ + { \ + while (count--) \ + { \ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \ + xmm0 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm1 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm2 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm3 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm4 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm5 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm6 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm7 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm0 = _op_(xmm0, val); \ + xmm1 = _op_(xmm1, val); \ + xmm2 = _op_(xmm2, val); \ + xmm3 = _op_(xmm3, val); \ + xmm4 = _op_(xmm4, val); \ + xmm5 = _op_(xmm5, val); \ + xmm6 = _op_(xmm6, val); \ + xmm7 = _op_(xmm7, val); \ + _mm_store_si128((__m128i *) dptr, xmm0); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm1); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm2); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm3); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm4); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm5); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm6); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm7); \ + dptr += (16/sizeof(_type_)); \ + } \ + } \ + /* Use a single 128-bit SSE register. */ \ + count = len >> (5-shifts); \ + len -= count << (5-shifts); \ + while (count--) \ + { \ + __m128i xmm0 = LOAD_SI128(sptr); sptr += (16/sizeof(_type_)); \ + xmm0 = _op_(xmm0, val); \ + _mm_store_si128((__m128i *) dptr, xmm0); \ + dptr += (16/sizeof(_type_)); \ + } \ + /* Finish off the remainder. */ \ + while (len--) { _slowWay_; } \ + return PRIMITIVES_SUCCESS; \ +} + +/* ---------------------------------------------------------------------------- + * SCD = Source, Constant, Destination + * PRE = preload xmm0 with the constant. + */ +#define SSE3_SCD_PRE_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \ +PRIM_STATIC pstatus_t _name_(const _type_ *pSrc, _type_ val, _type_ *pDst, INT32 len) \ +{ \ + int shifts; \ + UINT32 offBeatMask; \ + const _type_ *sptr = pSrc; \ + _type_ *dptr = pDst; \ + size_t count; \ + __m128i xmm0; \ + if (len < 16) /* pointless if too small */ \ + { \ + return _fallback_(pSrc, val, pDst, len); \ + } \ + if (sizeof(_type_) == 1) shifts = 1; \ + else if (sizeof(_type_) == 2) shifts = 2; \ + else if (sizeof(_type_) == 4) shifts = 3; \ + else if (sizeof(_type_) == 8) shifts = 4; \ + offBeatMask = (1 << (shifts - 1)) - 1; \ + if ((ULONG_PTR) pDst & offBeatMask) \ + { \ + /* Incrementing the pointer skips over 16-byte boundary. */ \ + return _fallback_(pSrc, val, pDst, len); \ + } \ + /* Get to the 16-byte boundary now. */ \ + while ((ULONG_PTR) dptr & 0x0f) \ + { \ + _slowWay_; \ + if (--len == 0) return PRIMITIVES_SUCCESS; \ + } \ + /* Use 4 128-bit SSE registers. */ \ + count = len >> (7-shifts); \ + len -= count << (7-shifts); \ + xmm0 = _mm_set1_epi32(val); \ + if ((ULONG_PTR) sptr & 0x0f) \ + { \ + while (count--) \ + { \ + __m128i xmm1, xmm2, xmm3, xmm4; \ + xmm1 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm2 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm3 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm4 = _mm_lddqu_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm1 = _op_(xmm1, xmm0); \ + xmm2 = _op_(xmm2, xmm0); \ + xmm3 = _op_(xmm3, xmm0); \ + xmm4 = _op_(xmm4, xmm0); \ + _mm_store_si128((__m128i *) dptr, xmm1); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm2); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm3); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm4); \ + dptr += (16/sizeof(_type_)); \ + } \ + } \ + else \ + { \ + while (count--) \ + { \ + __m128i xmm1, xmm2, xmm3, xmm4; \ + xmm1 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm2 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm3 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm4 = _mm_load_si128((__m128i *) sptr); \ + sptr += (16/sizeof(_type_)); \ + xmm1 = _op_(xmm1, xmm0); \ + xmm2 = _op_(xmm2, xmm0); \ + xmm3 = _op_(xmm3, xmm0); \ + xmm4 = _op_(xmm4, xmm0); \ + _mm_store_si128((__m128i *) dptr, xmm1); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm2); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm3); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm4); \ + dptr += (16/sizeof(_type_)); \ + } \ + } \ + /* Use a single 128-bit SSE register. */ \ + count = len >> (5-shifts); \ + len -= count << (5-shifts); \ + while (count--) \ + { \ + __m128i xmm1 = LOAD_SI128(sptr); sptr += (16/sizeof(_type_)); \ + xmm1 = _op_(xmm1, xmm0); \ + _mm_store_si128((__m128i *) dptr, xmm1); \ + dptr += (16/sizeof(_type_)); \ + } \ + /* Finish off the remainder. */ \ + while (len--) { _slowWay_; } \ + return PRIMITIVES_SUCCESS; \ +} + +/* ---------------------------------------------------------------------------- + * SSD = Source1, Source2, Destination + */ +#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \ +PRIM_STATIC pstatus_t _name_(const _type_ *pSrc1, const _type_ *pSrc2, _type_ *pDst, INT32 len) \ +{ \ + int shifts; \ + UINT32 offBeatMask; \ + const _type_ *sptr1 = pSrc1; \ + const _type_ *sptr2 = pSrc2; \ + _type_ *dptr = pDst; \ + size_t count; \ + if (len < 16) /* pointless if too small */ \ + { \ + return _fallback_(pSrc1, pSrc2, pDst, len); \ + } \ + if (sizeof(_type_) == 1) shifts = 1; \ + else if (sizeof(_type_) == 2) shifts = 2; \ + else if (sizeof(_type_) == 4) shifts = 3; \ + else if (sizeof(_type_) == 8) shifts = 4; \ + offBeatMask = (1 << (shifts - 1)) - 1; \ + if ((ULONG_PTR) pDst & offBeatMask) \ + { \ + /* Incrementing the pointer skips over 16-byte boundary. */ \ + return _fallback_(pSrc1, pSrc2, pDst, len); \ + } \ + /* Get to the 16-byte boundary now. */ \ + while ((ULONG_PTR) dptr & 0x0f) \ + { \ + _slowWay_; \ + if (--len == 0) return PRIMITIVES_SUCCESS; \ + } \ + /* Use 4 128-bit SSE registers. */ \ + count = len >> (7-shifts); \ + len -= count << (7-shifts); \ + if (((ULONG_PTR) sptr1 & 0x0f) || ((ULONG_PTR) sptr2 & 0x0f)) \ + { \ + /* Unaligned loads */ \ + while (count--) \ + { \ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \ + xmm0 = _mm_lddqu_si128((__m128i *) sptr1); \ + sptr1 += (16/sizeof(_type_)); \ + xmm1 = _mm_lddqu_si128((__m128i *) sptr1); \ + sptr1 += (16/sizeof(_type_)); \ + xmm2 = _mm_lddqu_si128((__m128i *) sptr1); \ + sptr1 += (16/sizeof(_type_)); \ + xmm3 = _mm_lddqu_si128((__m128i *) sptr1); \ + sptr1 += (16/sizeof(_type_)); \ + xmm4 = _mm_lddqu_si128((__m128i *) sptr2); \ + sptr2 += (16/sizeof(_type_)); \ + xmm5 = _mm_lddqu_si128((__m128i *) sptr2); \ + sptr2 += (16/sizeof(_type_)); \ + xmm6 = _mm_lddqu_si128((__m128i *) sptr2); \ + sptr2 += (16/sizeof(_type_)); \ + xmm7 = _mm_lddqu_si128((__m128i *) sptr2); \ + sptr2 += (16/sizeof(_type_)); \ + xmm0 = _op_(xmm0, xmm4); \ + xmm1 = _op_(xmm1, xmm5); \ + xmm2 = _op_(xmm2, xmm6); \ + xmm3 = _op_(xmm3, xmm7); \ + _mm_store_si128((__m128i *) dptr, xmm0); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm1); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm2); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm3); \ + dptr += (16/sizeof(_type_)); \ + } \ + } \ + else \ + { \ + /* Aligned loads */ \ + while (count--) \ + { \ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \ + xmm0 = _mm_load_si128((__m128i *) sptr1); \ + sptr1 += (16/sizeof(_type_)); \ + xmm1 = _mm_load_si128((__m128i *) sptr1); \ + sptr1 += (16/sizeof(_type_)); \ + xmm2 = _mm_load_si128((__m128i *) sptr1); \ + sptr1 += (16/sizeof(_type_)); \ + xmm3 = _mm_load_si128((__m128i *) sptr1); \ + sptr1 += (16/sizeof(_type_)); \ + xmm4 = _mm_load_si128((__m128i *) sptr2); \ + sptr2 += (16/sizeof(_type_)); \ + xmm5 = _mm_load_si128((__m128i *) sptr2); \ + sptr2 += (16/sizeof(_type_)); \ + xmm6 = _mm_load_si128((__m128i *) sptr2); \ + sptr2 += (16/sizeof(_type_)); \ + xmm7 = _mm_load_si128((__m128i *) sptr2); \ + sptr2 += (16/sizeof(_type_)); \ + xmm0 = _op_(xmm0, xmm4); \ + xmm1 = _op_(xmm1, xmm5); \ + xmm2 = _op_(xmm2, xmm6); \ + xmm3 = _op_(xmm3, xmm7); \ + _mm_store_si128((__m128i *) dptr, xmm0); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm1); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm2); \ + dptr += (16/sizeof(_type_)); \ + _mm_store_si128((__m128i *) dptr, xmm3); \ + dptr += (16/sizeof(_type_)); \ + } \ + } \ + /* Use a single 128-bit SSE register. */ \ + count = len >> (5-shifts); \ + len -= count << (5-shifts); \ + while (count--) \ + { \ + __m128i xmm0, xmm1; \ + xmm0 = LOAD_SI128(sptr1); sptr1 += (16/sizeof(_type_)); \ + xmm1 = LOAD_SI128(sptr2); sptr2 += (16/sizeof(_type_)); \ + xmm0 = _op_(xmm0, xmm1); \ + _mm_store_si128((__m128i *) dptr, xmm0); \ + dptr += (16/sizeof(_type_)); \ + } \ + /* Finish off the remainder. */ \ + while (len--) { _slowWay_; } \ + return PRIMITIVES_SUCCESS; \ +} + +#endif /* !__PRIM_TEMPLATES_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/primitives.c b/libfreerdp/primitives/primitives.c new file mode 100644 index 000000000..29f6f7024 --- /dev/null +++ b/libfreerdp/primitives/primitives.c @@ -0,0 +1,347 @@ +/* primitives.c + * This code queries processor features and calls the init/deinit routines. + * vi:ts=4 sw=4 + * + * Copyright 2011 Martin Fleisz + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include + +#include "prim_internal.h" + +#ifdef ANDROID +#include "cpu-features.h" +#endif + +/* Singleton pointer used throughout the program when requested. */ +static primitives_t* pPrimitives = NULL; + +#define D_BIT_MMX (1<<23) +#define D_BIT_SSE (1<<25) +#define D_BIT_SSE2 (1<<26) +#define D_BIT_3DN (1<<30) +#define C_BIT_SSE3 (1<<0) +#define C_BIT_3DNP (1<<8) +#define C_BIT_SSSE3 (1<<9) +#define C_BIT_SSE41 (1<<19) +#define C_BIT_SSE42 (1<<20) +#define C_BIT_XGETBV (1<<27) +#define C_BIT_AVX (1<<28) +#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX) +#define E_BIT_XMM (1<<1) +#define E_BIT_YMM (1<<2) +#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM) +#define C_BIT_FMA (1<<11) +#define C_BIT_AVX_AES (1<<24) + +/* If x86 */ +#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) \ + || defined(__amd64__) || defined(_M_AMD64) || defined(_M_X64) \ + || defined(i386) || defined(__i386) || defined(__i386__) \ + || defined(_M_IX86) || defined(_X86_) +#ifndef i386 +#define i386 +#endif + +/* If GCC */ +#ifdef __GNUC__ + +#ifdef __AVX__ +#define xgetbv(_func_, _lo_, _hi_) \ + __asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_)) +#endif + +static void cpuid( + unsigned info, + unsigned *eax, + unsigned *ebx, + unsigned *ecx, + unsigned *edx) +{ + *eax = *ebx = *ecx = *edx = 0; + __asm volatile + ( + /* The EBX (or RBX register on x86_64) is used for the PIC base address + * and must not be corrupted by our inline assembly. + */ +# if defined(__i386__) + "mov %%ebx, %%esi;" + "cpuid;" + "xchg %%ebx, %%esi;" +#else + "mov %%rbx, %%rsi;" + "cpuid;" + "xchg %%rbx, %%rsi;" +#endif + : "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx) + : "0" (info) + ); +} + +static void set_hints(primitives_hints_t* hints) +{ + unsigned a, b, c, d; + + cpuid(1, &a, &b, &c, &d); + + if (d & D_BIT_MMX) + hints->x86_flags |= PRIM_X86_MMX_AVAILABLE; + if (d & D_BIT_SSE) + hints->x86_flags |= PRIM_X86_SSE_AVAILABLE; + if (d & D_BIT_SSE2) + hints->x86_flags |= PRIM_X86_SSE2_AVAILABLE; + if (d & D_BIT_3DN) + hints->x86_flags |= PRIM_X86_3DNOW_AVAILABLE; + if (c & C_BIT_3DNP) + hints->x86_flags |= PRIM_X86_3DNOW_PREFETCH_AVAILABLE; + if (c & C_BIT_SSE3) + hints->x86_flags |= PRIM_X86_SSE3_AVAILABLE; + if (c & C_BIT_SSSE3) + hints->x86_flags |= PRIM_X86_SSSE3_AVAILABLE; + if (c & C_BIT_SSE41) + hints->x86_flags |= PRIM_X86_SSE41_AVAILABLE; + if (c & C_BIT_SSE42) + hints->x86_flags |= PRIM_X86_SSE42_AVAILABLE; + +#ifdef __AVX__ + if ((c & C_BITS_AVX) == C_BITS_AVX) + { + int e, f; + xgetbv(0, e, f); + + if ((e & E_BITS_AVX) == E_BITS_AVX) + { + hints->x86_flags |= PRIM_X86_AVX_AVAILABLE; + + if (c & C_BIT_FMA) + hints->x86_flags |= PRIM_X86_FMA_AVAILABLE; + if (c & C_BIT_AVX_AES) + hints->x86_flags |= PRIM_X86_AVX_AES_AVAILABLE; + } + } + /* TODO: AVX2: set eax=7, ecx=0, cpuid, check ebx-bit5 */ +#endif +} + +#else + +static void set_hints(primitives_hints_t* hints) +{ + /* x86 non-GCC: TODO */ +} + +#endif /* __GNUC__ */ + +/* ------------------------------------------------------------------------- */ + +#elif defined(__arm__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_EABI__) || defined(__ARMEL__) || defined(ANDROID) +#ifndef __arm__ +#define __arm__ +#endif + +static UINT32 androidNeon(void) +{ +#if ANDROID + if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) return 0; + + UINT64 features = android_getCpuFeatures(); + + if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7)) + { + if (features & ANDROID_CPU_ARM_FEATURE_NEON) + { + return PRIM_ARM_NEON_AVAILABLE; + } + } + /* else */ +#endif + return 0; +} + +static void set_hints( + primitives_hints_t *hints) +{ + /* ARM: TODO */ + hints->arm_flags |= androidNeon(); +} + +#else +static void set_hints( + primitives_hints_t *hints) +{ +} +#endif /* x86 else ARM else */ + +/* ------------------------------------------------------------------------- */ +void primitives_init(void) +{ + primitives_hints_t* hints; + + if (pPrimitives == NULL) + { + pPrimitives = calloc(1, sizeof(primitives_t)); + + if (pPrimitives == NULL) + return; + } + + hints = calloc(1, sizeof(primitives_hints_t)); + set_hints(hints); + pPrimitives->hints = (void *) hints; + + /* Now call each section's initialization routine. */ + primitives_init_add(hints, pPrimitives); + primitives_init_andor(hints, pPrimitives); + primitives_init_alphaComp(hints, pPrimitives); + primitives_init_copy(hints, pPrimitives); + primitives_init_set(hints, pPrimitives); + primitives_init_shift(hints, pPrimitives); + primitives_init_sign(hints, pPrimitives); + primitives_init_colors(hints, pPrimitives); +} + +/* ------------------------------------------------------------------------- */ +primitives_t* primitives_get(void) +{ + if (pPrimitives == NULL) + primitives_init(); + + return pPrimitives; +} + +/* ------------------------------------------------------------------------- */ +UINT32 primitives_get_flags(const primitives_t* prims) +{ + primitives_hints_t* hints = (primitives_hints_t*) (prims->hints); + +#ifdef i386 + return hints->x86_flags; +#elif defined(__arm__) + return hints->arm_flags; +#else + return 0; +#endif +} + +/* ------------------------------------------------------------------------- */ + +typedef struct +{ + UINT32 flag; + const char *str; +} flagpair_t; + +static const flagpair_t x86_flags[] = +{ + { PRIM_X86_MMX_AVAILABLE, "MMX" }, + { PRIM_X86_3DNOW_AVAILABLE, "3DNow" }, + { PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" }, + { PRIM_X86_SSE_AVAILABLE, "SSE" }, + { PRIM_X86_SSE2_AVAILABLE, "SSE2" }, + { PRIM_X86_SSE3_AVAILABLE, "SSE3" }, + { PRIM_X86_SSSE3_AVAILABLE, "SSSE3" }, + { PRIM_X86_SSE41_AVAILABLE, "SSE4.1" }, + { PRIM_X86_SSE42_AVAILABLE, "SSE4.2" }, + { PRIM_X86_AVX_AVAILABLE, "AVX" }, + { PRIM_X86_FMA_AVAILABLE, "FMA" }, + { PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" }, + { PRIM_X86_AVX2_AVAILABLE, "AVX2" }, +}; + +static const flagpair_t arm_flags[] = +{ + { PRIM_ARM_VFP1_AVAILABLE, "VFP1" }, + { PRIM_ARM_VFP2_AVAILABLE, "VFP2" }, + { PRIM_ARM_VFP3_AVAILABLE, "VFP3" }, + { PRIM_ARM_VFP4_AVAILABLE, "VFP4" }, + { PRIM_ARM_FPA_AVAILABLE, "FPA" }, + { PRIM_ARM_FPE_AVAILABLE, "FPE" }, + { PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" }, + { PRIM_ARM_NEON_AVAILABLE, "NEON" }, +}; + +void primitives_flags_str(const primitives_t* prims, char* str, size_t len) +{ + int i; + primitives_hints_t* hints; + + *str = '\0'; + --len; /* for the '/0' */ + + hints = (primitives_hints_t*) (prims->hints); + + for (i = 0; i < sizeof(x86_flags) / sizeof(flagpair_t); ++i) + { + if (hints->x86_flags & x86_flags[i].flag) + { + int slen = strlen(x86_flags[i].str) + 1; + + if (len < slen) + break; + + if (*str != '\0') + strcat(str, " "); + + strcat(str, x86_flags[i].str); + len -= slen; + } + } + + for (i = 0; i < sizeof(arm_flags) / sizeof(flagpair_t); ++i) + { + if (hints->arm_flags & arm_flags[i].flag) + { + int slen = strlen(arm_flags[i].str) + 1; + + if (len < slen) + break; + + if (*str != '\0') + strcat(str, " "); + + strcat(str, arm_flags[i].str); + len -= slen; + } + } +} + +/* ------------------------------------------------------------------------- */ +void primitives_deinit(void) +{ + if (pPrimitives == NULL) + return; + + /* Call each section's de-initialization routine. */ + primitives_deinit_add(pPrimitives); + primitives_deinit_andor(pPrimitives); + primitives_deinit_alphaComp(pPrimitives); + primitives_deinit_copy(pPrimitives); + primitives_deinit_set(pPrimitives); + primitives_deinit_shift(pPrimitives); + primitives_deinit_sign(pPrimitives); + primitives_deinit_colors(pPrimitives); + + if (pPrimitives->hints != NULL) + free((void*) (pPrimitives->hints)); + + free((void*) pPrimitives); + pPrimitives = NULL; +} diff --git a/libfreerdp/primitives/test/.gitignore b/libfreerdp/primitives/test/.gitignore new file mode 100644 index 000000000..082fee1c1 --- /dev/null +++ b/libfreerdp/primitives/test/.gitignore @@ -0,0 +1,2 @@ +prim_test + diff --git a/libfreerdp/primitives/test/CMakeLists.txt b/libfreerdp/primitives/test/CMakeLists.txt new file mode 100644 index 000000000..49e1f2d3a --- /dev/null +++ b/libfreerdp/primitives/test/CMakeLists.txt @@ -0,0 +1,140 @@ +# FreeRDP: A Remote Desktop Protocol Client +# primitives test makefile builder +# vi:ts=4 sw=4: +# +# (c) Copyright 2012 Hewlett-Packard Development Company, L.P. +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing permissions +# and limitations under the License. +# + +# TODO: Integrate this into the testing framework, in some form. +# Right now this produces a standalone test that covers both functionality +# and performance of the primitives library entrypoints. + +cmake_minimum_required(VERSION 2.8) +set(MODULE_NAME "prim_test") +set(MODULE_PREFIX "PRIMITIVES_LIBRARY_TEST") + +set(PRIMITIVE_TEST_CFILES + prim_test.c + test_add.c + test_alphaComp.c + test_andor.c + test_colors.c + test_copy.c + test_set.c + test_shift.c + test_sign.c + ../prim_add.c + ../prim_andor.c + ../prim_alphaComp.c + ../prim_colors.c + ../prim_copy.c + ../prim_set.c + ../prim_shift.c + ../prim_sign.c + ../primitives.c + ) + +set(PRIMITIVE_TEST_HEADERS + measure.h + prim_test.h + ../prim_internal.h + ) + +set(PRIMITIVE_TEST_SRCS + ${PRIMITIVE_TEST_CFILES} + ${PRIMITIVE_TEST_HEADERS} + ) + +include_directories(. ../../.. ../../../include ../../../winpr/include) +add_definitions(-DPRIM_STATIC=auto -DALL_PRIMITIVES_VERSIONS -DHAVE_CONFIG_H) + +# If these haven't been set by the caller, set them now to defaults. +if(NOT DEFINED WITH_IPP) + set(WITH_IPP FALSE) +endif() +if(NOT DEFINED WITH_SSE2) + if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm*") + set(WITH_SSE2 FALSE) + else() + set(WITH_SSE2 TRUE) + endif() +endif() +if(NOT DEFINED WITH_NEON) + if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm*") + set(WITH_NEON TRUE) + else() + set(WITH_NEON FALSE) + endif() +endif() + +if(WITH_SSE2) + if(CMAKE_COMPILER_IS_GNUCC) + set(OPTFLAGS "${OPTFLAGS} -msse2 -mssse3 -O2 -Wdeclaration-after-statement") + endif() + + if(MSVC) + set(OPTFLAGS "${OPTFLAGS} /arch:SSE2") + endif() +elseif(WITH_NEON) + if(CMAKE_COMPILER_IS_GNUCC) + set(OPTIMZATION "${OPTFLAGS} -mfpu=neon -mfloat-abi=softfp -O2") + endif() + # TODO: Add MSVC equivalent +endif() + +add_executable(prim_test ${PRIMITIVE_TEST_SRCS}) + +if(WITH_IPP) + if(NOT DEFINED IPP_FOUND) + include(../../../cmake/FindIPP.cmake) + endif() + + # IPP PATH debugging messages + message(IPP_FOUND=${IPP_FOUND}) + message(IPP_VERSION_STR=${IPP_VERSION_STR}) + message(IPP_VERSION_MAJOR=${IPP_VERSION_MAJOR}) + message(IPP_VERSION_MINOR=${IPP_VERSION_MINOR}) + message(IPP_VERSION_BUILD=${IPP_VERSION_BUILD}) + message(IPP_ROOT_DIR=${IPP_ROOT_DIR}) + message(IPP_INCLUDE_DIRS=${IPP_INCLUDE_DIRS}) + message(IPP_LIBRARY_DIRS=${IPP_LIBRARY_DIRS}) + message(IPP_LIBRARIES=${IPP_LIBRARIES}) + message(IPP_COMPILER_LIBRARY_DIRS=${IPP_COMPILER_LIBRARY_DIRS}) + message(IPP_COMPILER_LIBRARIES=${IPP_COMPILER_LIBRARIES}) + message(IPP_LIBRARY_LIST=${IPP_LIBRARY_LIST}) + message(IPP_LIB_PREFIX=${IPP_LIB_PREFIX}) + message(IPP_LIB_SUFFIX=${IPP_LIB_SUFFIX}) + message(IPP_PREFIX=${IPP_PREFIX}) + message(IPP_SUFFIX=${IPP_SUFFIX}) + message(IPPCORE=${IPPCORE}) + message(IPPS=${IPPS}) + message(IPPI=${IPPI}) + message(IPPCC=${IPPCC}) + message(IPPCV=${IPPCV}) + message(IPPVM=${IPPVM}) + + if(CMAKE_COMPILER_IS_GNUCC) + foreach(INCLDIR ${IPP_INCLUDE_DIRS}) + set(OPTFLAGS "${OPTFLAGS} -I${INCLDIR}") + endforeach(INCLDIR) + endif() + target_link_libraries(prim_test ${IPP_LIBRARY_LIST}) +endif() + +set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS}) + +target_link_libraries(prim_test rt) +if(NOT TESTING_OUTPUT_DIRECTORY) + set(TESTING_OUTPUT_DIRECTORY .) +endif() +add_test(prim_test ${TESTING_OUTPUT_DIRECTORY}/prim_test functionality) + +set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER "FreeRDP/Test") diff --git a/libfreerdp/primitives/test/measure.h b/libfreerdp/primitives/test/measure.h new file mode 100644 index 000000000..ba2909c00 --- /dev/null +++ b/libfreerdp/primitives/test/measure.h @@ -0,0 +1,125 @@ +/* measure.h + * Macros to help with performance measurement. + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. Algorithms used by + * this code may be covered by patents by HP, Microsoft, or other parties. + * + * MEASURE_LOOP_START("measurement", 2000) + * code to be measured + * MEASURE_LOOP_STOP + * buffer flush and such + * MEASURE_SHOW_RESULTS + * + * Define GOOGLE_PROFILER if you want gperftools included. + */ + +#ifdef _GNUC_ +# pragma once +#endif + +#ifndef __MEASURE_H_INCLUDED__ +#define __MEASURE_H_INCLUDED__ + +#include + +#ifndef _WIN32 +#include +#endif + +#include +#include +#include + +#ifdef GOOGLE_PROFILER +#include +#define PROFILER_START(_prefix_) \ + do { \ + char _path[PATH_MAX]; \ + sprintf(_path, "./%s.prof", (_prefix_)); \ + ProfilerStart(_path); \ + } while (0); +# define PROFILER_STOP \ + do { \ + ProfilerStop(); \ + } while (0); +#else +#define PROFILER_START(_prefix_) +#define PROFILER_STOP +#endif // GOOGLE_PROFILER + +extern float _delta_time(const struct timespec *t0, const struct timespec *t1); +extern void _floatprint(float t, char *output); + +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW 4 +#endif // !CLOCK_MONOTONIC_RAW + +#define MEASURE_LOOP_START(_prefix_, _count_) \ +{ struct timespec _start, _stop; \ + char *_prefix; \ + int _count = (_count_); \ + int _loop; \ + float _delta; \ + char _str1[32], _str2[32]; \ + _prefix = strdup(_prefix_); \ + _str1[0] = '\0'; _str2[0] = '\0'; \ + clock_gettime(CLOCK_MONOTONIC_RAW, &_start); \ + PROFILER_START(_prefix); \ + _loop = (_count); \ + do { + +#define MEASURE_LOOP_STOP \ + } while (--_loop); + +#define MEASURE_GET_RESULTS(_result_) \ + PROFILER_STOP; \ + clock_gettime(CLOCK_MONOTONIC_RAW, &_stop); \ + _delta = _delta_time(&_start, &_stop); \ + (_result_) = (float) _count / _delta; \ + free(_prefix); \ +} + +#define MEASURE_SHOW_RESULTS(_result_) \ + PROFILER_STOP; \ + clock_gettime(CLOCK_MONOTONIC_RAW, &_stop); \ + _delta = _delta_time(&_start, &_stop); \ + (_result_) = (float) _count / _delta; \ + _floatprint((float) _count / _delta, _str1); \ + printf("%s: %9d iterations in %5.1f seconds = %s/s \n", \ + _prefix, _count, _delta, _str1); \ + free(_prefix); \ +} + +#define MEASURE_SHOW_RESULTS_SCALED(_scale_, _label_) \ + PROFILER_STOP; \ + clock_gettime(CLOCK_MONOTONIC_RAW, &_stop); \ + _delta = _delta_time(&_start, &_stop); \ + _floatprint((float) _count / _delta, _str1); \ + _floatprint((float) _count / _delta * (_scale_), _str2); \ + printf("%s: %9d iterations in %5.1f seconds = %s/s = %s%s \n", \ + _prefix, _count, _delta, _str1, _str2, _label_); \ + free(_prefix); \ +} + +#define MEASURE_TIMED(_label_, _init_iter_, _test_time_, _result_, _call_) \ +{ float _r; \ + MEASURE_LOOP_START(_label_, _init_iter_); \ + _call_; \ + MEASURE_LOOP_STOP; \ + MEASURE_GET_RESULTS(_r); \ + MEASURE_LOOP_START(_label_, _r * _test_time_); \ + _call_; \ + MEASURE_LOOP_STOP; \ + MEASURE_SHOW_RESULTS(_result_); \ +} + +#endif // __MEASURE_H_INCLUDED__ diff --git a/libfreerdp/primitives/test/prim_test.c b/libfreerdp/primitives/test/prim_test.c new file mode 100644 index 000000000..172d6ffd4 --- /dev/null +++ b/libfreerdp/primitives/test/prim_test.c @@ -0,0 +1,432 @@ +/* prim_test.c + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "prim_test.h" + +#include +#include +#include + +#ifdef HAVE_UNISTD_H +#include +#endif + +#include +#include + +int test_sizes[] = { 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 }; +int Quiet = 0; + +/* ------------------------------------------------------------------------- */ +static void get_random_data_lrand( + void *buffer, + size_t size) +{ + static int seeded = 0; + long int *ptr = (long int *) buffer; + unsigned char *cptr; + + if (!seeded) + { + seeded = 1; + srand48(time(NULL)); + } + /* This isn't the perfect random number generator, but that's okay. */ + while (size >= sizeof(long int)) + { + *ptr++ = lrand48(); + size -= sizeof(long int); + } + cptr = (unsigned char *) ptr; + while (size > 0) + { + *cptr++ = lrand48() & 0xff; + --size; + } +} + +/* ------------------------------------------------------------------------- */ +void get_random_data( + void *buffer, + size_t size) +{ +#ifdef linux + size_t offset = 0; + int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + { + get_random_data_lrand(buffer, size); + return; + } + + while (size > 0) + { + ssize_t count = read(fd, buffer+offset, size); + size -= count; + offset += count; + } + close(fd); +#else + get_random_data_lrand(buffer, size); +#endif +} + +/* ------------------------------------------------------------------------- */ +float _delta_time( + const struct timespec *t0, + const struct timespec *t1) +{ + INT64 secs = (INT64) (t1->tv_sec) - (INT64) (t0->tv_sec); + long nsecs = t1->tv_nsec - t0->tv_nsec; + double retval; + + if (nsecs < 0) + { + --secs; + nsecs += 1000000000; + } + retval = (double) secs + (double) nsecs / (double) 1000000000.0; + return (retval < 0.0) ? 0.0 : (float) retval; +} + +/* ------------------------------------------------------------------------- */ +void _floatprint( + float t, + char *output) +{ + /* I don't want to link against -lm, so avoid log,exp,... */ + float f = 10.0; + int i; + while (t > f) f *= 10.0; + f /= 1000.0; + i = ((int) (t/f+0.5)) * (int) f; + if (t < 0.0) sprintf(output, "%f", t); + else if (i == 0) sprintf(output, "%d", (int) (t+0.5)); + else if (t < 1e+3) sprintf(output, "%3d", i); + else if (t < 1e+6) sprintf(output, "%3d,%03d", + i/1000, i % 1000); + else if (t < 1e+9) sprintf(output, "%3d,%03d,000", + i/1000000, (i % 1000000) / 1000); + else if (t < 1e+12) sprintf(output, "%3d,%03d,000,000", + i/1000000000, (i % 1000000000) / 1000000); + else sprintf(output, "%f", t); +} + +/* ------------------------------------------------------------------------- */ +/* Specific areas to test: */ +#define TEST_COPY8 (1<<0) +#define TEST_SET8 (1<<1) +#define TEST_SET32S (1<<2) +#define TEST_SET32U (1<<3) +#define TEST_SIGN16S (1<<4) +#define TEST_ADD16S (1<<5) +#define TEST_LSHIFT16S (1<<6) +#define TEST_LSHIFT16U (1<<7) +#define TEST_RSHIFT16S (1<<8) +#define TEST_RSHIFT16U (1<<9) +#define TEST_RGB (1<<10) +#define TEST_ALPHA (1<<11) +#define TEST_AND (1<<12) +#define TEST_OR (1<<13) + +/* Specific types of testing: */ +#define TEST_FUNCTIONALITY (1<<0) +#define TEST_PERFORMANCE (1<<1) + +/* ------------------------------------------------------------------------- */ + +typedef struct +{ + const char *testStr; + UINT32 bits; +} test_t; + +static const test_t testList[] = +{ + { "all", 0xFFFFFFFFU }, + { "copy", TEST_COPY8 }, + { "copy8", TEST_COPY8 }, + { "set", TEST_SET8|TEST_SET32S|TEST_SET32U }, + { "set8", TEST_SET8 }, + { "set32", TEST_SET32S|TEST_SET32U }, + { "set32s", TEST_SET32S }, + { "set32u", TEST_SET32U }, + { "sign", TEST_SIGN16S }, + { "sign16s", TEST_SIGN16S }, + { "add", TEST_ADD16S }, + { "add16s", TEST_ADD16S }, + { "lshift", TEST_LSHIFT16S|TEST_LSHIFT16U }, + { "rshift", TEST_RSHIFT16S|TEST_RSHIFT16U }, + { "shift", TEST_LSHIFT16S|TEST_LSHIFT16U|TEST_RSHIFT16S|TEST_RSHIFT16U }, + { "lshift16s", TEST_LSHIFT16S }, + { "lshift16u", TEST_LSHIFT16U }, + { "rshift16s", TEST_RSHIFT16S }, + { "rshift16u", TEST_RSHIFT16U }, + { "rgb", TEST_RGB }, + { "color", TEST_RGB }, + { "colors", TEST_RGB }, + { "alpha", TEST_ALPHA }, + { "and", TEST_AND }, + { "or", TEST_OR } +}; + +#define NUMTESTS (sizeof(testList)/sizeof(test_t)) + +static const test_t testTypeList[] = +{ + { "functionality", TEST_FUNCTIONALITY }, + { "performance", TEST_PERFORMANCE }, +}; + +#define NUMTESTTYPES (sizeof(testTypeList)/sizeof(test_t)) + +int main(int argc, char** argv) +{ + int i; + char hints[256]; + UINT32 testSet = 0; + UINT32 testTypes = 0; + int results = SUCCESS; + + /* Parse command line for the test set. */ + + for (i = 1; i < argc; ++i) + { + int j; + BOOL found = 0; + + for (j=0; j +#include +#include + +#include +#include +#include + +#include + +#ifdef WITH_IPP +#include +#include +#endif + +#define BLOCK_ALIGNMENT 16 +#ifdef __GNUC__ +#define ALIGN(x) x __attribute((aligned(BLOCK_ALIGNMENT))) +#define POSSIBLY_UNUSED(x) x __attribute((unused)) +#else +/* TODO: Someone needs to finish this for non-GNU C */ +#define ALIGN(x) x +#define POSSIBLY_UNUSED(x) x +#endif +#define ABS(_x_) ((_x_) < 0 ? (-(_x_)) : (_x_)) +#define MAX_TEST_SIZE 4096 + +extern int test_sizes[]; +#define NUM_TEST_SIZES 10 + +extern void get_random_data(void *buffer, size_t size); + +#ifndef SUCCESS +#define SUCCESS 0 +#endif +#ifndef FAILURE +#define FAILURE 1 +#endif + +extern int test_copy8u_func(void); +extern int test_copy8u_speed(void); + +extern int test_set8u_func(void); +extern int test_set32s_func(void); +extern int test_set32u_func(void); +extern int test_set8u_speed(void); +extern int test_set32s_speed(void); +extern int test_set32u_speed(void); + +extern int test_sign16s_func(void); +extern int test_sign16s_speed(void); + +extern int test_add16s_func(void); +extern int test_add16s_speed(void); + +extern int test_lShift_16s_func(void); +extern int test_lShift_16u_func(void); +extern int test_rShift_16s_func(void); +extern int test_rShift_16u_func(void); +extern int test_lShift_16s_speed(void); +extern int test_lShift_16u_speed(void); +extern int test_rShift_16s_speed(void); +extern int test_rShift_16u_speed(void); + +extern int test_RGBToRGB_16s8u_P3AC4R_func(void); +extern int test_RGBToRGB_16s8u_P3AC4R_speed(void); +extern int test_yCbCrToRGB_16s16s_P3P3_func(void); +extern int test_yCbCrToRGB_16s16s_P3P3_speed(void); + +extern int test_alphaComp_func(void); +extern int test_alphaComp_speed(void); + +extern int test_and_32u_func(void); +extern int test_and_32u_speed(void); +extern int test_or_32u_func(void); +extern int test_or_32u_speed(void); + +/* Since so much of this code is repeated, define a macro to build + * functions to do speed tests. + */ +#ifdef armel +#define SIMD_TYPE "Neon" +#else +#define SIMD_TYPE "SSE" +#endif + +#define DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_) \ + do { \ + for (s=0; s 0.0) _floatprint(resultNormal[s], sN); \ + if (resultSSENeon[s] > 0.0) \ + { \ + _floatprint(resultSSENeon[s], sSN); \ + if (resultNormal[s] > 0.0) \ + { \ + sprintf(sSNp, "%d%%", \ + (int) (resultSSENeon[s] / resultNormal[s] * 100.0 + 0.5)); \ + } \ + } \ + if (resultIPP[s] > 0.0) \ + { \ + _floatprint(resultIPP[s], sIPP); \ + if (resultNormal[s] > 0.0) \ + { \ + sprintf(sIPPp, "%d%%", \ + (int) (resultIPP[s] / resultNormal[s] * 100.0 + 0.5)); \ + } \ + } \ + printf("%8d: %15s %15s %5s %15s %5s\n", \ + size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \ + } \ + free(resultNormal); free(resultSSENeon); free(resultIPP); \ +} + +#endif // !__PRIMTEST_H_INCLUDED__ diff --git a/libfreerdp/primitives/test/test_add.c b/libfreerdp/primitives/test/test_add.c new file mode 100644 index 000000000..d28707e0c --- /dev/null +++ b/libfreerdp/primitives/test/test_add.c @@ -0,0 +1,109 @@ +/* test_add.c + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "prim_test.h" + +#define FUNC_TEST_SIZE 65536 +static const int ADD16S_PRETEST_ITERATIONS = 300000*64; +static const int TEST_TIME = 2.0; // seconds + +extern pstatus_t general_add_16s( + const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, int len); +extern pstatus_t sse3_add_16s( + const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, int len); + +/* ========================================================================= */ +int test_add16s_func(void) +{ + INT16 ALIGN(src1[FUNC_TEST_SIZE+3]), ALIGN(src2[FUNC_TEST_SIZE+3]), + ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); + int failed = 0; + int i; + char testStr[256]; + UINT32 pflags = primitives_get_flags(primitives_get()); + + testStr[0] = '\0'; + get_random_data(src1, sizeof(src1)); + get_random_data(src2, sizeof(src2)); + memset(d1, 0, sizeof(d1)); + memset(d2, 0, sizeof(d2)); + general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE); +#ifdef i386 + if (pflags & PRIM_X86_SSE3_AVAILABLE) + { + strcat(testStr, " SSE3"); + /* Aligned */ + sse3_add_16s(src1+1, src2+1, d2+1, FUNC_TEST_SIZE); + for (i=1; i 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst, + TRUE, general_add_16s(src1, src2, dst, size), + TRUE, sse3_add_16s(src1, src2, dst, size), PRIM_X86_SSE3_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ippsAdd_16s(src1, src2, dst, size)); + +int test_add16s_speed(void) +{ + INT16 ALIGN(src1[MAX_TEST_SIZE+3]), ALIGN(src2[MAX_TEST_SIZE+3]), + ALIGN(dst[MAX_TEST_SIZE+3]); + get_random_data(src1, sizeof(src1)); + get_random_data(src2, sizeof(src2)); + add16s_speed_test("add16s", "aligned", src1, src2, 0, dst, + test_sizes, NUM_TEST_SIZES, ADD16S_PRETEST_ITERATIONS, TEST_TIME); + add16s_speed_test("add16s", "unaligned", src1+1, src2+2, 0, dst, + test_sizes, NUM_TEST_SIZES, ADD16S_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} diff --git a/libfreerdp/primitives/test/test_alphaComp.c b/libfreerdp/primitives/test/test_alphaComp.c new file mode 100644 index 000000000..7a68f1783 --- /dev/null +++ b/libfreerdp/primitives/test/test_alphaComp.c @@ -0,0 +1,230 @@ +/* test_alphaComp.c + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "prim_test.h" + +static const int ALPHA_PRETEST_ITERATIONS = 5000000; +static const float TEST_TIME = 5.0; + +static const int block_size[] = { 4, 64, 256 }; +#define NUM_BLOCK_SIZES (sizeof(block_size)/sizeof(int)) +#define MAX_BLOCK_SIZE 256 +#define SIZE_SQUARED (MAX_BLOCK_SIZE*MAX_BLOCK_SIZE) + +extern pstatus_t general_alphaComp_argb( + const BYTE *pSrc1, int src1Step, + const BYTE *pSrc2, int src2Step, + BYTE *pDst, int dstStep, + int width, int height); +extern pstatus_t sse2_alphaComp_argb( + const BYTE *pSrc1, int src1Step, + const BYTE *pSrc2, int src2Step, + BYTE *pDst, int dstStep, + int width, int height); +extern pstatus_t ipp_alphaComp_argb( + const BYTE *pSrc1, int src1Step, + const BYTE *pSrc2, int src2Step, + BYTE *pDst, int dstStep, + int width, int height); + +/* ========================================================================= */ +#define ALF(_c_) (((_c_) & 0xFF000000U) >> 24) +#define RED(_c_) (((_c_) & 0x00FF0000U) >> 16) +#define GRN(_c_) (((_c_) & 0x0000FF00U) >> 8) +#define BLU(_c_) ((_c_) & 0x000000FFU) +#define TOLERANCE 1 +#define PIXEL(_addr_, _bytes_, _x_, _y_) \ + ((UINT32 *) (((BYTE *) (_addr_)) + (_x_)*4 + (_y_)*(_bytes_))) +#define SRC1_WIDTH 6 +#define SRC1_HEIGHT 6 +#define SRC2_WIDTH 7 +#define SRC2_HEIGHT 7 +#define DST_WIDTH 9 +#define DST_HEIGHT 9 +#define TEST_WIDTH 4 +#define TEST_HEIGHT 5 + +/* ------------------------------------------------------------------------- */ +static UINT32 alpha_add( + UINT32 c1, + UINT32 c2) +{ + UINT32 a1 = ALF(c1); + UINT32 r1 = RED(c1); + UINT32 g1 = GRN(c1); + UINT32 b1 = BLU(c1); + + UINT32 a2 = ALF(c2); + UINT32 r2 = RED(c2); + UINT32 g2 = GRN(c2); + UINT32 b2 = BLU(c2); + + UINT32 a3 = ((a1 * a1 + (255-a1) * a2) / 255) & 0xff; + UINT32 r3 = ((a1 * r1 + (255-a1) * r2) / 255) & 0xff; + UINT32 g3 = ((a1 * g1 + (255-a1) * g2) / 255) & 0xff; + UINT32 b3 = ((a1 * b1 + (255-a1) * b2) / 255) & 0xff; + + return (a3 << 24) | (r3 << 16) | (g3 << 8) | b3; +} + +/* ------------------------------------------------------------------------- */ +static UINT32 colordist( + UINT32 c1, + UINT32 c2) +{ + int d, maxd = 0; + + d = ABS(ALF(c1) - ALF(c2)); + if (d > maxd) maxd = d; + d = ABS(RED(c1) - RED(c2)); + if (d > maxd) maxd = d; + d = ABS(GRN(c1) - GRN(c2)); + if (d > maxd) maxd = d; + d = ABS(BLU(c1) - BLU(c2)); + if (d > maxd) maxd = d; + return maxd; +} + +/* ------------------------------------------------------------------------- */ +int test_alphaComp_func(void) +{ + UINT32 ALIGN(src1[SRC1_WIDTH*SRC1_HEIGHT]); + UINT32 ALIGN(src2[SRC2_WIDTH*SRC2_HEIGHT]); + UINT32 ALIGN(dst1[DST_WIDTH*DST_HEIGHT]); + UINT32 ALIGN(dst2a[DST_WIDTH*DST_HEIGHT]); + UINT32 ALIGN(dst2u[DST_WIDTH*DST_HEIGHT+1]); + UINT32 ALIGN(dst3[DST_WIDTH*DST_HEIGHT]); + int error = 0; + UINT32 pflags = primitives_get_flags(primitives_get()); + char testStr[256]; + UINT32 *ptr; + int i, x, y; + + testStr[0] = '\0'; + get_random_data(src1, sizeof(src1)); + /* Special-case the first two values */ + src1[0] &= 0x00FFFFFFU; + src1[1] |= 0xFF000000U; + get_random_data(src2, sizeof(src2)); + /* Set the second operand to fully-opaque. */ + ptr = src2; + for (i=0; i TOLERANCE) + { + printf("alphaComp-general: [%d,%d] 0x%08x+0x%08x=0x%08x, got 0x%08x\n", + x, y, s1, s2, c0, c1); + error = 1; + } +#ifdef i386 + if (pflags & PRIM_X86_SSE2_AVAILABLE) + { + UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y); + if (colordist(c0, c2) > TOLERANCE) + { + printf("alphaComp-SSE-aligned: [%d,%d] 0x%08x+0x%08x=0x%08x, got 0x%08x\n", + x, y, s1, s2, c0, c2); + error = 1; + } + c2 = *PIXEL(dst2u+1, 4*DST_WIDTH, x, y); + if (colordist(c0, c2) > TOLERANCE) + { + printf("alphaComp-SSE-unaligned: [%d,%d] 0x%08x+0x%08x=0x%08x, got 0x%08x\n", + x, y, s1, s2, c0, c2); + error = 1; + } + } +#endif /* i386 */ +#ifdef WITH_IPP + UINT32 c3 = *PIXEL(dst3, 4*DST_WIDTH, x, y); + if (colordist(c0, c3) > TOLERANCE) + { + printf("alphaComp-IPP: [%d,%d] 0x%08x+0x%08x=0x%08x, got 0x%08x\n", + x, y, s1, s2, c0, c3); + error = 1; + } +#endif + } + } + if (!error) printf("All alphaComp tests passed (%s).\n", testStr); + return (error > 0) ? FAILURE : SUCCESS; +} + + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4, + TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, + size, size), + TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, + size, size), PRIM_X86_SSE2_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, + size, size)); + +int test_alphaComp_speed(void) +{ + INT32 ALIGN(src1[MAX_BLOCK_SIZE*(MAX_BLOCK_SIZE+1)]), + ALIGN(src2[SIZE_SQUARED]), + ALIGN(dst[SIZE_SQUARED]); + + get_random_data(src1, sizeof(src1)); + get_random_data(src2, sizeof(src2)); + + alphaComp_speed("alphaComp", "aligned", + (BYTE *) src1, (BYTE *) src2, 0, (BYTE *) dst, + block_size, NUM_BLOCK_SIZES, ALPHA_PRETEST_ITERATIONS, TEST_TIME); + alphaComp_speed("alphaComp", "unaligned", + (BYTE *) src1+1, (BYTE *) src2, 0, (BYTE *) dst, + block_size, NUM_BLOCK_SIZES, ALPHA_PRETEST_ITERATIONS, TEST_TIME); + + return SUCCESS; +} diff --git a/libfreerdp/primitives/test/test_andor.c b/libfreerdp/primitives/test/test_andor.c new file mode 100644 index 000000000..e2c13ba35 --- /dev/null +++ b/libfreerdp/primitives/test/test_andor.c @@ -0,0 +1,182 @@ +/* test_andor.c + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "prim_test.h" + +#define FUNC_TEST_SIZE 65536 +static const int ANDOR_PRETEST_ITERATIONS = 100000; +static const int TEST_TIME = 2.0; // seconds + +extern pstatus_t general_andC_32u(const UINT32 *pSrc, UINT32 val, + UINT32 *pDst, int len); +extern pstatus_t sse3_andC_32u(const UINT32 *pSrc, UINT32 val, + UINT32 *pDst, int len); +extern pstatus_t general_orC_32u(const UINT32 *pSrc, UINT32 val, + UINT32 *pDst, int len); +extern pstatus_t sse3_orC_32u(const UINT32 *pSrc, UINT32 val, + UINT32 *pDst, int len); + +#define VALUE (0xA5A5A5A5U) + +/* ========================================================================= */ +int test_and_32u_func(void) +{ + UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]); + int failed = 0; + int i; + UINT32 pflags = primitives_get_flags(primitives_get()); + char testStr[256]; + + testStr[0] = '\0'; + get_random_data(src, sizeof(src)); + general_andC_32u(src+1, VALUE, dst+1, FUNC_TEST_SIZE); + strcat(testStr, " general"); + for (i=1; i<=FUNC_TEST_SIZE; ++i) + { + if (dst[i] != (src[i] & VALUE)) + { + printf("AND-general FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n", + i, src[i], VALUE, src[i] & VALUE, dst[i]); + ++failed; + } + } +#ifdef i386 + if (pflags & PRIM_X86_SSE3_AVAILABLE) + { + strcat(testStr, " SSE3"); + /* Aligned */ + memset(dst, 0, sizeof(dst)); + sse3_andC_32u(src+1, VALUE, dst+1, FUNC_TEST_SIZE); + for (i=1; i<=FUNC_TEST_SIZE; ++i) + { + if (dst[i] != (src[i] & VALUE)) + { + printf("AND-SSE-aligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n", + i, src[i], VALUE, src[i] & VALUE, dst[i]); + ++failed; + } + } + /* Unaligned */ + memset(dst, 0, sizeof(dst)); + sse3_andC_32u(src+1, VALUE, dst+2, FUNC_TEST_SIZE); + for (i=1; i<=FUNC_TEST_SIZE; ++i) + { + if (dst[i+1] != (src[i] & VALUE)) + { + printf("AND-SSE-unaligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n", + i, src[i], VALUE, src[i] & VALUE, dst[i+1]); + ++failed; + } + } + } +#endif /* i386 */ + if (!failed) printf("All and_32u tests passed (%s).\n", testStr); + return (failed > 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst, + TRUE, general_andC_32u(src1, constant, dst, size), + TRUE, sse3_andC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ippsAndC_32u(src1, constant, dst, size)) + +int test_and_32u_speed(void) +{ + UINT32 ALIGN(src[MAX_TEST_SIZE+3]), ALIGN(dst[MAX_TEST_SIZE+3]); + get_random_data(src, sizeof(src)); + andC_32u_speed_test("and32u", "aligned", src, NULL, VALUE, dst, + test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME); + andC_32u_speed_test("and32u", "unaligned", src+1, NULL, VALUE, dst, + test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} + +/* ========================================================================= */ +int test_or_32u_func(void) +{ + UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]); + int failed = 0; + int i; + UINT32 pflags = primitives_get_flags(primitives_get()); + char testStr[256]; + + testStr[0] = '\0'; + get_random_data(src, sizeof(src)); + general_orC_32u(src+1, VALUE, dst+1, FUNC_TEST_SIZE); + strcat(testStr, " general"); + for (i=1; i<=FUNC_TEST_SIZE; ++i) + { + if (dst[i] != (src[i] | VALUE)) + { + printf("OR-general general FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n", + i, src[i], VALUE, src[i] | VALUE, dst[i]); + ++failed; + } + } +#ifdef i386 + if (pflags & PRIM_X86_SSE3_AVAILABLE) + { + strcat(testStr, " SSE3"); + /* Aligned */ + memset(dst, 0, sizeof(dst)); + sse3_orC_32u(src+1, VALUE, dst+1, FUNC_TEST_SIZE); + for (i=1; i 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst, + TRUE, general_orC_32u(src1, constant, dst, size), + TRUE, sse3_orC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ippsOrC_32u(src1, constant, dst, size)) + +int test_or_32u_speed(void) +{ + UINT32 ALIGN(src[MAX_TEST_SIZE+3]), ALIGN(dst[MAX_TEST_SIZE+3]); + get_random_data(src, sizeof(src)); + orC_32u_speed_test("or32u", "aligned", src, NULL, VALUE, dst, + test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME); + orC_32u_speed_test("or32u", "unaligned", src+1, NULL, VALUE, dst, + test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} diff --git a/libfreerdp/primitives/test/test_colors.c b/libfreerdp/primitives/test/test_colors.c new file mode 100644 index 000000000..b82c5ebe9 --- /dev/null +++ b/libfreerdp/primitives/test/test_colors.c @@ -0,0 +1,230 @@ +/* test_colors.c + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "prim_test.h" + +static const int RGB_TRIAL_ITERATIONS = 1000; +static const int YCBCR_TRIAL_ITERATIONS = 1000; +static const float TEST_TIME = 4.0; + +extern pstatus_t general_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3], + int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi); +extern pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3], + int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi); +extern pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3], + int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi); +extern pstatus_t sse2_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3], + int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi); + +/* ------------------------------------------------------------------------- */ +int test_RGBToRGB_16s8u_P3AC4R_func(void) +{ + INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]); + UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]); + int i; + int failed = 0; + UINT32 pflags = primitives_get_flags(primitives_get()); + char testStr[256]; + INT16 *ptrs[3]; + prim_size_t roi = { 64, 64 }; + + testStr[0] = '\0'; + get_random_data(r, sizeof(r)); + get_random_data(g, sizeof(g)); + get_random_data(b, sizeof(b)); + /* clear upper bytes */ + for (i=0; i<4096; ++i) + { + r[i] &= 0x00FFU; + g[i] &= 0x00FFU; + b[i] &= 0x00FFU; + } + + ptrs[0] = r; + ptrs[1] = g; + ptrs[2] = b; + + general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2, + (BYTE *) out1, 64*4, &roi); +#ifdef i386 + if (pflags & PRIM_X86_SSE2_AVAILABLE) + { + strcat(testStr, " SSE2"); + sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2, + (BYTE *) out2, 64*4, &roi); + for (i=0; i<4096; ++i) + { + if (out1[i] != out2[i]) + { + printf("RGBToRGB-SSE FAIL: out1[%d]=0x%08x out2[%d]=0x%08x\n", + i, out1[i], i, out2[i]); + failed = 1; + } + } + } +#endif /* i386 */ + if (!failed) printf("All RGBToRGB_16s8u_P3AC4R tests passed (%s).\n", testStr); + return (failed > 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +static const prim_size_t roi64x64 = { 64, 64 }; +STD_SPEED_TEST( + rgb_to_argb_speed, INT16*, UINT32, dst=dst, + TRUE, general_RGBToRGB_16s8u_P3AC4R( + (const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64), + TRUE, sse2_RGBToRGB_16s8u_P3AC4R( + (const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64), + PRIM_X86_SSE2_AVAILABLE, + FALSE, dst=dst, 0, + FALSE, dst=dst); + +int test_RGBToRGB_16s8u_P3AC4R_speed(void) +{ + INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]); + UINT32 ALIGN(dst[4096]); + int i; + INT16 *ptrs[3]; + int size_array[] = { 64 }; + + get_random_data(r, sizeof(r)); + get_random_data(g, sizeof(g)); + get_random_data(b, sizeof(b)); + /* clear upper bytes */ + for (i=0; i<4096; ++i) + { + r[i] &= 0x00FFU; + g[i] &= 0x00FFU; + b[i] &= 0x00FFU; + } + + ptrs[0] = r; + ptrs[1] = g; + ptrs[2] = b; + + rgb_to_argb_speed("RGBToARGB", "aligned", + (const INT16 **) ptrs, NULL, 0, dst, + size_array, 1, RGB_TRIAL_ITERATIONS, TEST_TIME); + return SUCCESS; +} + +/* ========================================================================= */ +int test_yCbCrToRGB_16s16s_P3P3_func(void) +{ + INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]); + INT16 ALIGN(r1[4096]), ALIGN(g1[4096]), ALIGN(b1[4096]); + INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]); + int i; + int failed = 0; + UINT32 pflags = primitives_get_flags(primitives_get()); + char testStr[256]; + const INT16 *in[3]; + INT16 *out1[3]; + INT16 *out2[3]; + prim_size_t roi = { 64, 64 }; + + testStr[0] = '\0'; + get_random_data(y, sizeof(y)); + get_random_data(cb, sizeof(cb)); + get_random_data(cr, sizeof(cr)); + /* Normalize to 11.5 fixed radix */ + for (i=0; i<4096; ++i) + { + y[i] &= 0x1FE0U; + cb[i] &= 0x1FE0U; + cr[i] &= 0x1FE0U; + } + memset(r1, 0, sizeof(r1)); + memset(g1, 0, sizeof(g1)); + memset(b1, 0, sizeof(b1)); + memset(r2, 0, sizeof(r2)); + memset(g2, 0, sizeof(g2)); + memset(b2, 0, sizeof(b2)); + + in[0] = y; + in[1] = cb; + in[2] = cr; + out1[0] = r1; + out1[1] = g1; + out1[2] = b1; + out2[0] = r2; + out2[1] = g2; + out2[2] = b2; + + general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi); +#ifdef i386 + if (pflags & PRIM_X86_SSE2_AVAILABLE) + { + strcat(testStr, " SSE2"); + sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi); + for (i=0; i<4096; ++i) + { + if ((ABS(r1[i]-r2[i]) > 1) + || (ABS(g1[i]-g2[i]) > 1) + || (ABS(b1[i]-b2[i]) > 1)) { + printf("YCbCrToRGB-SSE FAIL[%d]: %d,%d,%d vs %d,%d,%d\n", i, + r1[i],g1[i],b1[i], r2[i],g2[i],b2[i]); + failed = 1; + } + } + } +#endif /* i386 */ + if (!failed) printf("All yCbCrToRGB_16s16s_P3P3 tests passed (%s).\n", testStr); + return (failed > 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST( + ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst, + TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64), + TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64), + PRIM_X86_SSE2_AVAILABLE, + FALSE, dst=dst, 0, + FALSE, dst=dst); + +int test_yCbCrToRGB_16s16s_P3P3_speed(void) +{ + INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]); + INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]); + int i; + const INT16 *input[3]; + INT16 *output[3]; + int size_array[] = { 64 }; + + get_random_data(y, sizeof(y)); + get_random_data(cb, sizeof(cb)); + get_random_data(cr, sizeof(cr)); + /* Normalize to 11.5 fixed radix */ + for (i=0; i<4096; ++i) + { + y[i] &= 0x1FE0U; + cb[i] &= 0x1FE0U; + cr[i] &= 0x1FE0U; + } + + input[0] = y; + input[1] = cb; + input[2] = cr; + output[0] = r; + output[1] = g; + output[2] = b; + + ycbcr_to_rgb_speed("yCbCrToRGB", "aligned", input, NULL, NULL, output, + size_array, 1, YCBCR_TRIAL_ITERATIONS, TEST_TIME); + return SUCCESS; +} diff --git a/libfreerdp/primitives/test/test_copy.c b/libfreerdp/primitives/test/test_copy.c new file mode 100644 index 000000000..c92221e92 --- /dev/null +++ b/libfreerdp/primitives/test/test_copy.c @@ -0,0 +1,87 @@ +/* test_copy.c + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "prim_test.h" + +static const int MEMCPY_PRETEST_ITERATIONS = 1000000; +static const int TEST_TIME = 1.0; // seconds +#define COPY_TESTSIZE (256*2+16*2+15+15) +#if 0 +extern pstatus_t sse3_copy_8u(const BYTE *pSrc, BYTE *pDst, int len); +#endif + +/* ------------------------------------------------------------------------- */ +int test_copy8u_func(void) +{ + primitives_t *prims = primitives_get(); + BYTE ALIGN(data[COPY_TESTSIZE+15]); + int i, soff; + int failed = 0; + char testStr[256]; + BYTE ALIGN(dest[COPY_TESTSIZE+15]); + + testStr[0] = '\0'; + get_random_data(data, sizeof(data)); + + strcat(testStr, " ptr"); + for (soff=0; soff<16; ++soff) + { + int doff; + for (doff=0; doff<16; ++doff) + { + int length; + for (length=1; length<=COPY_TESTSIZE-doff; ++length) + { + memset(dest, 0, sizeof(dest)); + prims->copy_8u(data+soff, dest+doff, length); + for (i=0; i 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst, + TRUE, memcpy(dst, src1, size), + FALSE, NULL, 0, + FALSE, NULL, 0, + TRUE, ippsCopy_8u(src1, dst, size)); + +int test_copy8u_speed(void) +{ + BYTE ALIGN(src[MAX_TEST_SIZE+4]); + BYTE ALIGN(intervening[MAX_TEST_SIZE*7]); + BYTE ALIGN(dst[MAX_TEST_SIZE+4]); + copy8u_speed_test("copy8u", "aligned", src, NULL, 0, dst, + test_sizes, NUM_TEST_SIZES, MEMCPY_PRETEST_ITERATIONS, TEST_TIME); + copy8u_speed_test("copy8u", "unaligned", src+1, NULL, 0, dst, + test_sizes, NUM_TEST_SIZES, MEMCPY_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} diff --git a/libfreerdp/primitives/test/test_set.c b/libfreerdp/primitives/test/test_set.c new file mode 100644 index 000000000..ee50e870a --- /dev/null +++ b/libfreerdp/primitives/test/test_set.c @@ -0,0 +1,298 @@ +/* test_set.c + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "prim_test.h" + +static const int MEMSET8_PRETEST_ITERATIONS = 100000000; +static const int MEMSET32_PRETEST_ITERATIONS = 40000000; +static const float TEST_TIME = 1.0; + +extern pstatus_t general_set_8u(BYTE val, BYTE *pDst, int len); +extern pstatus_t sse2_set_8u(BYTE val, BYTE *pDst, int len); +extern pstatus_t general_set_32s(INT32 val, INT32 *pDst, int len); +extern pstatus_t sse2_set_32s(INT32 val, INT32 *pDst, int len); +extern pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, int len); +extern pstatus_t sse2_set_32u(UINT32 val, UINT32 *pDst, int len); +extern pstatus_t ipp_wrapper_set_32u(UINT32 val, UINT32 *pDst, int len); + +static const int set_sizes[] = { 1, 4, 16, 32, 64, 256, 1024, 4096 }; +#define NUM_SET_SIZES (sizeof(set_sizes)/sizeof(int)) + +/* ------------------------------------------------------------------------- */ +int test_set8u_func(void) +{ + BYTE ALIGN(dest[48]); + int failed = 0; + int off; + char testStr[256]; + UINT32 pflags = primitives_get_flags(primitives_get()); + testStr[0] = '\0'; + +#ifdef i386 + /* Test SSE under various alignments */ + if (pflags & PRIM_X86_SSE2_AVAILABLE) + { + strcat(testStr, " SSE2"); + for (off=0; off<16; ++off) + { + int len; + for (len=1; len<48-off; ++len) + { + int i; + memset(dest, 0, sizeof(dest)); + sse2_set_8u(0xa5, dest+off, len); + for (i=0; i 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst, + TRUE, memset(dst, constant, size), + FALSE, NULL, 0, + FALSE, NULL, 0, + TRUE, ippsSet_8u(constant, dst, size)); + +int test_set8u_speed(void) +{ + BYTE ALIGN(dst[MAX_TEST_SIZE]); + set8u_speed_test("set8u", "aligned", NULL, NULL, 0xA5, dst, + set_sizes, NUM_SET_SIZES, MEMSET8_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +int test_set32s_func(void) +{ + primitives_t *prims = primitives_get(); + INT32 ALIGN(dest[512]); + int failed = 0; + int off; + char testStr[256]; + UINT32 pflags = primitives_get_flags(primitives_get()); + testStr[0] = '\0'; + +#ifdef i386 + /* Test SSE under various alignments */ + if (pflags & PRIM_X86_SSE2_AVAILABLE) + { + strcat(testStr, " SSE2"); + for (off=0; off<16; ++off) { + int len; + for (len=1; len<512-off; ++len) + { + int i; + memset(dest, 0, sizeof(dest)); + sse2_set_32s(0xdeadbeef, dest+off, len); + for (i=0; i 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +int test_set32u_func(void) +{ + primitives_t *prims = primitives_get(); + UINT32 ALIGN(dest[512]); + int failed = 0; + int off; + char testStr[256]; + UINT32 pflags = primitives_get_flags(primitives_get()); + testStr[0] = '\0'; + +#ifdef i386 + /* Test SSE under various alignments */ + if (pflags & PRIM_X86_SSE2_AVAILABLE) + { + strcat(testStr, " SSE2"); + for (off=0; off<16; ++off) { + int len; + for (len=1; len<512-off; ++len) + { + int i; + memset(dest, 0, sizeof(dest)); + sse2_set_32u(0xdeadbeefU, dest+off, len); + for (i=0; i 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +static inline void memset32u_naive( + UINT32 val, + UINT32 *dst, + size_t count) +{ + while (count--) *dst++ = val; +} + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst, + TRUE, memset32u_naive(constant, dst, size), + TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ipp_wrapper_set_32u(constant, dst, size)); + +int test_set32u_speed(void) +{ + UINT32 ALIGN(dst[MAX_TEST_SIZE+1]); + set32u_speed_test("set32u", "aligned", NULL, NULL, 0xdeadbeef, dst, + set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); +#if 0 + /* Not really necessary; should be almost as fast. */ + set32u_speed_test("set32u", "unaligned", NULL, NULL, dst+1, + set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); +#endif + return SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +static inline void memset32s_naive( + INT32 val, + INT32 *dst, + size_t count) +{ + while (count--) *dst++ = val; +} + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst, + TRUE, memset32s_naive(constant, dst, size), + TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ippsSet_32s(constant, dst, size)); + +int test_set32s_speed(void) +{ + INT32 ALIGN(dst[MAX_TEST_SIZE+1]); + set32s_speed_test("set32s", "aligned", NULL, NULL, 0xdeadbeef, dst, + set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); +#if 0 + /* Not really necessary; should be almost as fast. */ + set32s_speed_test("set32s", "unaligned", NULL, NULL, dst+1, + set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); +#endif + return SUCCESS; +} diff --git a/libfreerdp/primitives/test/test_shift.c b/libfreerdp/primitives/test/test_shift.c new file mode 100644 index 000000000..cf5c033f2 --- /dev/null +++ b/libfreerdp/primitives/test/test_shift.c @@ -0,0 +1,177 @@ +/* test_shift.c + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "prim_test.h" + +#define FUNC_TEST_SIZE 65536 +static const int SHIFT_PRETEST_ITERATIONS = 50000; +static const float TEST_TIME = 1.0; + +extern pstatus_t general_lShiftC_16s( + const INT16 *pSrc, int val, INT16 *pDst, int len); +extern pstatus_t general_rShiftC_16s( + const INT16 *pSrc, int val, INT16 *pDst, int len); +extern pstatus_t general_shiftC_16s( + const INT16 *pSrc, int val, INT16 *pDst, int len); +extern pstatus_t general_lShiftC_16u( + const UINT16 *pSrc, int val, UINT16 *pDst, int len); +extern pstatus_t general_rShiftC_16u( + const UINT16 *pSrc, int val, UINT16 *pDst, int len); +extern pstatus_t general_shiftC_16u( + const UINT16 *pSrc, int val, UINT16 *pDst, int len); +extern pstatus_t sse2_lShiftC_16s( + const INT16 *pSrc, int val, INT16 *pDst, int len); +extern pstatus_t sse2_rShiftC_16s( + const INT16 *pSrc, int val, INT16 *pDst, int len); +extern pstatus_t sse2_shiftC_16s( + const INT16 *pSrc, int val, INT16 *pDst, int len); +extern pstatus_t sse2_lShiftC_16u( + const UINT16 *pSrc, int val, UINT16 *pDst, int len); +extern pstatus_t sse2_rShiftC_16u( + const UINT16 *pSrc, int val, UINT16 *pDst, int len); +extern pstatus_t sse2_shiftC_16u( + const UINT16 *pSrc, int val, UINT16 *pDst, int len); + +#ifdef i386 +#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \ +int _name_(void) \ +{ \ + _type_ ALIGN(src[FUNC_TEST_SIZE+3]), \ + ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \ + int failed = 0; \ + int i; \ + UINT32 pflags = primitives_get_flags(primitives_get()); \ + char testStr[256]; \ + testStr[0] = '\0'; \ + get_random_data(src, sizeof(src)); \ + _f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \ + if (pflags & PRIM_X86_SSE3_AVAILABLE) \ + { \ + strcat(testStr, " SSE3"); \ + /* Aligned */ \ + _f2_(src+1, 3, d2+1, FUNC_TEST_SIZE); \ + for (i=1; i<=FUNC_TEST_SIZE; ++i) \ + { \ + if (d1[i] != d2[i]) \ + { \ + printf("%s-SSE-aligned FAIL[%d]: 0x%x>>3=0x%x, got 0x%x\n", \ + _str_, i, src[i], d1[i], d2[i]); \ + ++failed; \ + } \ + } \ + /* Unaligned */ \ + _f2_(src+1, 3, d2+2, FUNC_TEST_SIZE); \ + for (i=1; i<=FUNC_TEST_SIZE; ++i) \ + { \ + if (d1[i] != d2[i+1]) \ + { \ + printf("%s-SSE-unaligned FAIL[%d]: 0x%x>>3=0x%x, got 0x%x\n", \ + _str_, i, src[i], d1[i], d2[i+1]); \ + ++failed; \ + } \ + } \ + } \ + if (!failed) printf("All %s tests passed (%s).\n", _str_, testStr); \ + return (failed > 0) ? FAILURE : SUCCESS; \ +} +#else +#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \ +int _name_(void) \ +{ \ + return SUCCESS; \ +} +#endif /* i386 */ + +SHIFT_TEST_FUNC(test_lShift_16s_func, INT16, "lshift_16s", general_lShiftC_16s, + sse2_lShiftC_16s) +SHIFT_TEST_FUNC(test_lShift_16u_func, UINT16, "lshift_16u", general_lShiftC_16u, + sse2_lShiftC_16u) +SHIFT_TEST_FUNC(test_rShift_16s_func, INT16, "rshift_16s", general_rShiftC_16s, + sse2_rShiftC_16s) +SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u, + sse2_rShiftC_16u) + +/* ========================================================================= */ +STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst, + TRUE, general_lShiftC_16s(src1, constant, dst, size), + TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ippsLShiftC_16s(src1, constant, dst, size)); +STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst, + TRUE, general_lShiftC_16u(src1, constant, dst, size), + TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ippsLShiftC_16u(src1, constant, dst, size)); +STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst, + TRUE, general_rShiftC_16s(src1, constant, dst, size), + TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ippsRShiftC_16s(src1, constant, dst, size)); +STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst, + TRUE, general_rShiftC_16u(src1, constant, dst, size), + TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, + FALSE, dst=dst, 0, + TRUE, ippsRShiftC_16u(src1, constant, dst, size)); + +/* ------------------------------------------------------------------------- */ +int test_lShift_16s_speed(void) +{ + INT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]); + get_random_data(src, sizeof(src)); + speed_lShift_16s("lShift_16s", "aligned", src, NULL, 3, dst, + test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME); + speed_lShift_16s("lShift_16s", "unaligned", src+1, NULL, 3, dst, + test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +int test_lShift_16u_speed(void) +{ + UINT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]); + get_random_data(src, sizeof(src)); + speed_lShift_16u("lShift_16u", "aligned", src, NULL, 3, dst, + test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME); + speed_lShift_16u("lShift_16u", "unaligned", src+1, NULL, 3, dst, + test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +int test_rShift_16s_speed(void) +{ + INT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]); + get_random_data(src, sizeof(src)); + speed_rShift_16s("rShift_16s", "aligned", src, NULL, 3, dst, + test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME); + speed_rShift_16s("rShift_16s", "unaligned", src+1, NULL, 3, dst, + test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +int test_rShift_16u_speed(void) +{ + UINT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]); + get_random_data(src, sizeof(src)); + speed_rShift_16u("rShift_16u", "aligned", src, NULL, 3, dst, + test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME); + speed_rShift_16u("rShift_16u", "unaligned", src+1, NULL, 3, dst, + test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} diff --git a/libfreerdp/primitives/test/test_sign.c b/libfreerdp/primitives/test/test_sign.c new file mode 100644 index 000000000..f18c736e6 --- /dev/null +++ b/libfreerdp/primitives/test/test_sign.c @@ -0,0 +1,95 @@ +/* test_sign.c + * vi:ts=4 sw=4 + * + * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "prim_test.h" + +static const int SIGN_PRETEST_ITERATIONS = 100000; +static const float TEST_TIME = 1.0; + +extern pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, int len); +extern pstatus_t ssse3_sign_16s(const INT16 *pSrc, INT16 *pDst, int len); + +/* ------------------------------------------------------------------------- */ +int test_sign16s_func(void) +{ + INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]); + int failed = 0; + int i; + UINT32 pflags = primitives_get_flags(primitives_get()); + char testStr[256]; + + /* Test when we can reach 16-byte alignment */ + testStr[0] = '\0'; + get_random_data(src, sizeof(src)); + general_sign_16s(src+1, d1+1, 65535); +#ifdef i386 + if (pflags & PRIM_X86_SSSE3_AVAILABLE) + { + strcat(testStr, " SSSE3"); + ssse3_sign_16s(src+1, d2+1, 65535); + for (i=1; i<65535; ++i) + { + if (d1[i] != d2[i]) + { + printf("SIGN16s-SSE-aligned FAIL[%d] of %d: want %d, got %d\n", + i, src[i], d1[i], d2[i]); + ++failed; + } + } + } +#endif /* i386 */ + + /* Test when we cannot reach 16-byte alignment */ + get_random_data(src, sizeof(src)); + general_sign_16s(src+1, d1+2, 65535); +#ifdef i386 + if (pflags & PRIM_X86_SSSE3_AVAILABLE) + { + ssse3_sign_16s(src+1, d2+2, 65535); + for (i=2; i<65535; ++i) + { + if (d1[i] != d2[i]) + { + printf("SIGN16s-SSE-unaligned FAIL[%d] of %d: want %d, got %d\n", + i, src[i-1], d1[i], d2[i]); + ++failed; + } + } + } +#endif /* i386 */ + if (!failed) printf("All sign16s tests passed (%s).\n", testStr); + return (failed > 0) ? FAILURE : SUCCESS; +} + +/* ------------------------------------------------------------------------- */ +STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst, + TRUE, general_sign_16s(src1, dst, size), + TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE, + FALSE, dst=dst, 0, + FALSE, dst=dst); + +int test_sign16s_speed(void) +{ + INT16 ALIGN(src[MAX_TEST_SIZE+3]), ALIGN(dst[MAX_TEST_SIZE+3]); + get_random_data(src, sizeof(src)); + sign16s_speed_test("sign16s", "aligned", src, NULL, 0, dst, + test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME); + sign16s_speed_test("sign16s", "unaligned", src+1, NULL, 0, dst, + test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME); + return SUCCESS; +} diff --git a/libfreerdp/rail/window.c b/libfreerdp/rail/window.c index 7819beafb..35a55e3b7 100644 --- a/libfreerdp/rail/window.c +++ b/libfreerdp/rail/window.c @@ -325,7 +325,10 @@ void rail_UpdateWindow(rdpRail* rail, rdpWindow* window) if (window->fieldFlags & WINDOW_ORDER_FIELD_TITLE) { if (window->title != NULL) + { free(window->title); + window->title = NULL; + } ConvertFromUnicode(CP_UTF8, 0, (WCHAR*) window->titleInfo.string, window->titleInfo.length / 2, &window->title, 0, NULL, NULL); diff --git a/libfreerdp/utils/svc_plugin.c b/libfreerdp/utils/svc_plugin.c index cf6e1d5eb..bfe33bdb6 100644 --- a/libfreerdp/utils/svc_plugin.c +++ b/libfreerdp/utils/svc_plugin.c @@ -338,16 +338,22 @@ static void svc_plugin_process_terminated(rdpSvcPlugin* plugin) { svc_data_in_item* item; - freerdp_thread_stop(plugin->priv->thread); - freerdp_thread_free(plugin->priv->thread); + if (plugin->priv->thread) + { + freerdp_thread_stop(plugin->priv->thread); + freerdp_thread_free(plugin->priv->thread); + } plugin->channel_entry_points.pVirtualChannelClose(plugin->priv->open_handle); svc_plugin_remove(plugin); - while ((item = list_dequeue(plugin->priv->data_in_list)) != NULL) - svc_data_in_item_free(item); - list_free(plugin->priv->data_in_list); + if (plugin->priv->data_in_list) + { + while ((item = list_dequeue(plugin->priv->data_in_list)) != NULL) + svc_data_in_item_free(item); + list_free(plugin->priv->data_in_list); + } if (plugin->priv->data_in != NULL) { diff --git a/libfreerdp/utils/time.c b/libfreerdp/utils/time.c index 27cbfebf3..62209ec9d 100644 --- a/libfreerdp/utils/time.c +++ b/libfreerdp/utils/time.c @@ -34,7 +34,7 @@ UINT64 freerdp_windows_gmtime() time_t unix_time; UINT64 windows_time; - gmtime(&unix_time); + time(&unix_time); windows_time = freerdp_get_windows_time_from_unix_time(unix_time); return windows_time; diff --git a/server/Mac/CMakeLists.txt b/server/Mac/CMakeLists.txt index 39584139a..dbc4c1df9 100644 --- a/server/Mac/CMakeLists.txt +++ b/server/Mac/CMakeLists.txt @@ -64,7 +64,7 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} MODULE freerdp - MODULES freerdp-core freerdp-utils freerdp-codec) + MODULES freerdp-core freerdp-utils freerdp-codec freerdp-primitives) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} diff --git a/server/Sample/CMakeLists.txt b/server/Sample/CMakeLists.txt index 976893198..972cc29f6 100644 --- a/server/Sample/CMakeLists.txt +++ b/server/Sample/CMakeLists.txt @@ -33,7 +33,7 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} freerdp-server) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} MODULE freerdp - MODULES freerdp-core freerdp-utils freerdp-codec) + MODULES freerdp-core freerdp-utils freerdp-codec freerdp-primitives) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} diff --git a/server/Windows/CMakeLists.txt b/server/Windows/CMakeLists.txt index bdabe1329..08ebb6563 100644 --- a/server/Windows/CMakeLists.txt +++ b/server/Windows/CMakeLists.txt @@ -59,7 +59,7 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} freerdp-server) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} MODULE freerdp - MODULES freerdp-core freerdp-utils freerdp-codec) + MODULES freerdp-core freerdp-utils freerdp-codec freerdp-primitives) target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS}) diff --git a/server/X11/CMakeLists.txt b/server/X11/CMakeLists.txt index dd24a2976..9e8709ddc 100644 --- a/server/X11/CMakeLists.txt +++ b/server/X11/CMakeLists.txt @@ -90,7 +90,7 @@ set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} ${X11_LIBRARIES}) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} MODULE freerdp - MODULES freerdp-core freerdp-common freerdp-codec freerdp-utils freerdp-gdi freerdp-crypto freerdp-locale) + MODULES freerdp-core freerdp-common freerdp-codec freerdp-primitives freerdp-utils freerdp-gdi freerdp-crypto freerdp-locale) set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS MONOLITHIC ${MONOLITHIC_BUILD} diff --git a/server/X11/xf_peer.c b/server/X11/xf_peer.c index 9bc54d335..6d8690ca9 100644 --- a/server/X11/xf_peer.c +++ b/server/X11/xf_peer.c @@ -165,7 +165,7 @@ void xf_xshm_init(xfInfo* xfi) shmctl(xfi->fb_shm_info.shmid, IPC_RMID, 0); printf("display: %p root_window: %p width: %d height: %d depth: %d\n", - xfi->display, xfi->root_window, xfi->fb_image->width, xfi->fb_image->height, xfi->fb_image->depth); + xfi->display, (void*) xfi->root_window, xfi->fb_image->width, xfi->fb_image->height, xfi->fb_image->depth); xfi->fb_pixmap = XShmCreatePixmap(xfi->display, xfi->root_window, xfi->fb_image->data, &(xfi->fb_shm_info), diff --git a/winpr/include/winpr/collections.h b/winpr/include/winpr/collections.h index f092255d7..ac37f0544 100644 --- a/winpr/include/winpr/collections.h +++ b/winpr/include/winpr/collections.h @@ -174,4 +174,76 @@ struct _wKeyValuePair }; typedef struct _wKeyValuePair wKeyValuePair; +/* Reference Table */ + +struct _wReference +{ + UINT32 Count; + void* Pointer; +}; +typedef struct _wReference wReference; + +typedef int (*REFERENCE_FREE)(void* context, void* ptr); + +struct _wReferenceTable +{ + UINT32 size; + HANDLE mutex; + void* context; + BOOL synchronized; + wReference* array; + REFERENCE_FREE ReferenceFree; +}; +typedef struct _wReferenceTable wReferenceTable; + +WINPR_API UINT32 ReferenceTable_Add(wReferenceTable* referenceTable, void* ptr); +WINPR_API UINT32 ReferenceTable_Release(wReferenceTable* referenceTable, void* ptr); + +WINPR_API wReferenceTable* ReferenceTable_New(BOOL synchronized, void* context, REFERENCE_FREE ReferenceFree); +WINPR_API void ReferenceTable_Free(wReferenceTable* referenceTable); + +/* Countdown Event */ + +struct _wCountdownEvent +{ + DWORD count; + HANDLE mutex; + HANDLE event; + DWORD initialCount; +}; +typedef struct _wCountdownEvent wCountdownEvent; + +WINPR_API DWORD CountdownEvent_CurrentCount(wCountdownEvent* countdown); +WINPR_API DWORD CountdownEvent_InitialCount(wCountdownEvent* countdown); +WINPR_API BOOL CountdownEvent_IsSet(wCountdownEvent* countdown); +WINPR_API HANDLE CountdownEvent_WaitHandle(wCountdownEvent* countdown); + +WINPR_API void CountdownEvent_AddCount(wCountdownEvent* countdown, DWORD signalCount); +WINPR_API BOOL CountdownEvent_Signal(wCountdownEvent* countdown, DWORD signalCount); +WINPR_API void CountdownEvent_Reset(wCountdownEvent* countdown, DWORD count); + +WINPR_API wCountdownEvent* CountdownEvent_New(DWORD initialCount); +WINPR_API void CountdownEvent_Free(wCountdownEvent* countdown); + +/* BufferPool */ + +struct _wBufferPool +{ + int size; + int capacity; + void** array; + HANDLE mutex; + int fixedSize; + DWORD alignment; + BOOL synchronized; +}; +typedef struct _wBufferPool wBufferPool; + +WINPR_API void* BufferPool_Take(wBufferPool* pool, int bufferSize); +WINPR_API void BufferPool_Return(wBufferPool* pool, void* buffer); +WINPR_API void BufferPool_Clear(wBufferPool* pool); + +WINPR_API wBufferPool* BufferPool_New(BOOL synchronized, int fixedSize, DWORD alignment); +WINPR_API void BufferPool_Free(wBufferPool* pool); + #endif /* WINPR_COLLECTIONS_H */ diff --git a/winpr/include/winpr/pool.h b/winpr/include/winpr/pool.h index 4f2416aa0..c2d3aff6e 100644 --- a/winpr/include/winpr/pool.h +++ b/winpr/include/winpr/pool.h @@ -26,7 +26,7 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) +#ifndef _WIN32 typedef DWORD TP_VERSION, *PTP_VERSION; @@ -55,6 +55,33 @@ typedef struct _TP_CLEANUP_GROUP TP_CLEANUP_GROUP, *PTP_CLEANUP_GROUP; typedef VOID (*PTP_CLEANUP_GROUP_CANCEL_CALLBACK)(PVOID ObjectContext, PVOID CleanupContext); +typedef struct _TP_CALLBACK_ENVIRON_V1 +{ + TP_VERSION Version; + PTP_POOL Pool; + PTP_CLEANUP_GROUP CleanupGroup; + PTP_CLEANUP_GROUP_CANCEL_CALLBACK CleanupGroupCancelCallback; + PVOID RaceDll; + struct _ACTIVATION_CONTEXT* ActivationContext; + PTP_SIMPLE_CALLBACK FinalizationCallback; + + union + { + DWORD Flags; + struct + { + DWORD LongFunction:1; + DWORD Persistent:1; + DWORD Private:30; + } s; + } u; +} TP_CALLBACK_ENVIRON_V1; + +#endif + +/* Non-Windows and pre Windows 7 */ +#if ((!defined(_WIN32)) || (defined(_WIN32) && (_WIN32_WINNT < 0x0601))) + typedef struct _TP_CALLBACK_ENVIRON_V3 { TP_VERSION Version; @@ -81,27 +108,30 @@ typedef struct _TP_CALLBACK_ENVIRON_V3 DWORD Size; } TP_CALLBACK_ENVIRON_V3; -typedef TP_CALLBACK_ENVIRON_V3 TP_CALLBACK_ENVIRON, *PTP_CALLBACK_ENVIRON; +//typedef TP_CALLBACK_ENVIRON_V3 TP_CALLBACK_ENVIRON, *PTP_CALLBACK_ENVIRON; + +#endif typedef struct _TP_WORK TP_WORK, *PTP_WORK; - -typedef VOID (*PTP_WORK_CALLBACK)(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_WORK Work); - typedef struct _TP_TIMER TP_TIMER, *PTP_TIMER; -typedef VOID (*PTP_TIMER_CALLBACK)(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_TIMER Timer); - typedef DWORD TP_WAIT_RESULT; - typedef struct _TP_WAIT TP_WAIT, *PTP_WAIT; -typedef VOID (*PTP_WAIT_CALLBACK)(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_WAIT Wait, TP_WAIT_RESULT WaitResult); - typedef struct _TP_IO TP_IO, *PTP_IO; +typedef TP_CALLBACK_ENVIRON_V1 TP_CALLBACK_ENVIRON, *PTP_CALLBACK_ENVIRON; + +#ifndef _WIN32 + +typedef VOID (*PTP_WORK_CALLBACK)(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_WORK Work); +typedef VOID (*PTP_TIMER_CALLBACK)(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_TIMER Timer); +typedef VOID (*PTP_WAIT_CALLBACK)(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_WAIT Wait, TP_WAIT_RESULT WaitResult); typedef VOID (*PTP_WIN32_IO_CALLBACK)(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PVOID Overlapped, ULONG IoResult, ULONG_PTR NumberOfBytesTransferred, PTP_IO Io); +#endif + /* Synch */ WINPR_API PTP_WAIT CreateThreadpoolWait(PTP_WAIT_CALLBACK pfnwa, PVOID pv, PTP_CALLBACK_ENVIRON pcbe); @@ -136,15 +166,15 @@ WINPR_API VOID WaitForThreadpoolIoCallbacks(PTP_IO pio, BOOL fCancelPendingCallb /* Clean-up Group */ WINPR_API PTP_CLEANUP_GROUP CreateThreadpoolCleanupGroup(); -VOID CloseThreadpoolCleanupGroupMembers(PTP_CLEANUP_GROUP ptpcg, BOOL fCancelPendingCallbacks, PVOID pvCleanupContext); -VOID CloseThreadpoolCleanupGroup(PTP_CLEANUP_GROUP ptpcg); +WINPR_API VOID CloseThreadpoolCleanupGroupMembers(PTP_CLEANUP_GROUP ptpcg, BOOL fCancelPendingCallbacks, PVOID pvCleanupContext); +WINPR_API VOID CloseThreadpoolCleanupGroup(PTP_CLEANUP_GROUP ptpcg); /* Pool */ WINPR_API PTP_POOL CreateThreadpool(PVOID reserved); WINPR_API VOID CloseThreadpool(PTP_POOL ptpp); -WINPR_API VOID SetThreadpoolThreadMaximum(PTP_POOL ptpp, DWORD cthrdMost); WINPR_API BOOL SetThreadpoolThreadMinimum(PTP_POOL ptpp, DWORD cthrdMic); +WINPR_API VOID SetThreadpoolThreadMaximum(PTP_POOL ptpp, DWORD cthrdMost); /* Callback Environment */ @@ -170,7 +200,7 @@ WINPR_API VOID LeaveCriticalSectionWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci WINPR_API VOID FreeLibraryWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, HMODULE mod); WINPR_API VOID DisassociateCurrentThreadFromCallback(PTP_CALLBACK_INSTANCE pci); -#endif +/* Dummy */ WINPR_API void winpr_pool_dummy(); diff --git a/winpr/include/winpr/thread.h b/winpr/include/winpr/thread.h index 7ecc6ec41..6f7610277 100644 --- a/winpr/include/winpr/thread.h +++ b/winpr/include/winpr/thread.h @@ -107,7 +107,7 @@ WINPR_API BOOL CreateProcessAsUserW(HANDLE hToken, LPCWSTR lpApplicationName, LP WINPR_API VOID ExitProcess(UINT uExitCode); -WINPR_API HANDLE GetCurrentProcess(VOID); +WINPR_API HANDLE _GetCurrentProcess(VOID); WINPR_API DWORD GetCurrentProcessId(VOID); WINPR_API BOOL TerminateProcess(HANDLE hProcess, UINT uExitCode); @@ -125,7 +125,7 @@ WINPR_API HANDLE CreateRemoteThread(HANDLE hProcess, LPSECURITY_ATTRIBUTES lpThr WINPR_API VOID ExitThread(DWORD dwExitCode); -WINPR_API HANDLE GetCurrentThread(VOID); +WINPR_API HANDLE _GetCurrentThread(VOID); WINPR_API DWORD GetCurrentThreadId(VOID); WINPR_API DWORD ResumeThread(HANDLE hThread); @@ -147,6 +147,14 @@ WINPR_API LPVOID TlsGetValue(DWORD dwTlsIndex); WINPR_API BOOL TlsSetValue(DWORD dwTlsIndex, LPVOID lpTlsValue); WINPR_API BOOL TlsFree(DWORD dwTlsIndex); +#else + +/* + * GetCurrentProcess / GetCurrentThread cause a conflict on Mac OS X + */ +#define _GetCurrentProcess GetCurrentProcess +#define _GetCurrentThread GetCurrentThread + #endif #endif /* WINPR_THREAD_H */ diff --git a/winpr/libwinpr/pool/CMakeLists.txt b/winpr/libwinpr/pool/CMakeLists.txt index 9841e4392..e14033681 100644 --- a/winpr/libwinpr/pool/CMakeLists.txt +++ b/winpr/libwinpr/pool/CMakeLists.txt @@ -25,6 +25,7 @@ set(${MODULE_PREFIX}_SRCS io.c cleanup_group.c pool.c + pool.h callback_environment.c callback.c callback_cleanup.c) @@ -47,14 +48,22 @@ if(${CMAKE_SYSTEM_NAME} MATCHES SunOS) set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} rt) endif() +set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS + MONOLITHIC ${MONOLITHIC_BUILD} INTERNAL + MODULE winpr + MODULES winpr-thread winpr-synch winpr-utils) + if(MONOLITHIC_BUILD) set(WINPR_LIBS ${WINPR_LIBS} ${${MODULE_PREFIX}_LIBS} PARENT_SCOPE) else() - set(${MODULE_PREFIX}_LIBS ${${MODULE_PREFIX}_LIBS} winpr-thread winpr-synch) - target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS}) install(TARGETS ${MODULE_NAME} DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER "WinPR") +if(BUILD_TESTING) + add_subdirectory(test) +endif() + + diff --git a/winpr/libwinpr/pool/callback.c b/winpr/libwinpr/pool/callback.c index 11e7b812c..f9a600fa8 100644 --- a/winpr/libwinpr/pool/callback.c +++ b/winpr/libwinpr/pool/callback.c @@ -24,11 +24,40 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) +#ifdef _WIN32 -BOOL CallbackMayRunLong(PTP_CALLBACK_INSTANCE pci) +static BOOL module_initialized = FALSE; +static BOOL module_available = FALSE; +static HMODULE kernel32_module = NULL; + +static BOOL (WINAPI * pCallbackMayRunLong)(PTP_CALLBACK_INSTANCE pci); + +static void module_init() { - return FALSE; + if (module_initialized) + return; + + kernel32_module = LoadLibraryA("kernel32.dll"); + module_initialized = TRUE; + + if (!kernel32_module) + return; + + module_available = TRUE; + + pCallbackMayRunLong = (void*) GetProcAddress(kernel32_module, "CallbackMayRunLong"); } #endif + +BOOL CallbackMayRunLong(PTP_CALLBACK_INSTANCE pci) +{ +#ifdef _WIN32 + module_init(); + + if (pCallbackMayRunLong) + return pCallbackMayRunLong(pci); +#else +#endif + return FALSE; +} diff --git a/winpr/libwinpr/pool/callback_cleanup.c b/winpr/libwinpr/pool/callback_cleanup.c index 048ec56aa..f0653ae98 100644 --- a/winpr/libwinpr/pool/callback_cleanup.c +++ b/winpr/libwinpr/pool/callback_cleanup.c @@ -24,37 +24,109 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) +#include "pool.h" -VOID SetEventWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, HANDLE evt) +#ifdef _WIN32 + +static BOOL module_initialized = FALSE; +static BOOL module_available = FALSE; +static HMODULE kernel32_module = NULL; + +static VOID (WINAPI * pSetEventWhenCallbackReturns)(PTP_CALLBACK_INSTANCE pci, HANDLE evt); +static VOID (WINAPI * pReleaseSemaphoreWhenCallbackReturns)(PTP_CALLBACK_INSTANCE pci, HANDLE sem, DWORD crel); +static VOID (WINAPI * pReleaseMutexWhenCallbackReturns)(PTP_CALLBACK_INSTANCE pci, HANDLE mut); +static VOID (WINAPI * pLeaveCriticalSectionWhenCallbackReturns)(PTP_CALLBACK_INSTANCE pci, PCRITICAL_SECTION pcs); +static VOID (WINAPI * pFreeLibraryWhenCallbackReturns)(PTP_CALLBACK_INSTANCE pci, HMODULE mod); +static VOID (WINAPI * pDisassociateCurrentThreadFromCallback)(PTP_CALLBACK_INSTANCE pci); + +static void module_init() { + if (module_initialized) + return; -} + kernel32_module = LoadLibraryA("kernel32.dll"); + module_initialized = TRUE; -VOID ReleaseSemaphoreWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, HANDLE sem, DWORD crel) -{ + if (!kernel32_module) + return; -} - -VOID ReleaseMutexWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, HANDLE mut) -{ - -} - -VOID LeaveCriticalSectionWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, PCRITICAL_SECTION pcs) -{ - -} - -VOID FreeLibraryWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, HMODULE mod) -{ - -} - -VOID DisassociateCurrentThreadFromCallback(PTP_CALLBACK_INSTANCE pci) -{ + module_available = TRUE; + pSetEventWhenCallbackReturns = (void*) GetProcAddress(kernel32_module, "SetEventWhenCallbackReturns"); + pReleaseSemaphoreWhenCallbackReturns = (void*) GetProcAddress(kernel32_module, "ReleaseSemaphoreWhenCallbackReturns"); + pReleaseMutexWhenCallbackReturns = (void*) GetProcAddress(kernel32_module, "ReleaseMutexWhenCallbackReturns"); + pLeaveCriticalSectionWhenCallbackReturns = (void*) GetProcAddress(kernel32_module, "LeaveCriticalSectionWhenCallbackReturns"); + pFreeLibraryWhenCallbackReturns = (void*) GetProcAddress(kernel32_module, "FreeLibraryWhenCallbackReturns"); + pDisassociateCurrentThreadFromCallback = (void*) GetProcAddress(kernel32_module, "DisassociateCurrentThreadFromCallback"); } #endif +VOID SetEventWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, HANDLE evt) +{ +#ifdef _WIN32 + module_init(); + + if (pSetEventWhenCallbackReturns) + pSetEventWhenCallbackReturns(pci, evt); +#else +#endif +} + +VOID ReleaseSemaphoreWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, HANDLE sem, DWORD crel) +{ +#ifdef _WIN32 + module_init(); + + if (pReleaseSemaphoreWhenCallbackReturns) + pReleaseSemaphoreWhenCallbackReturns(pci, sem, crel); +#else +#endif +} + +VOID ReleaseMutexWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, HANDLE mut) +{ +#ifdef _WIN32 + module_init(); + + if (pReleaseMutexWhenCallbackReturns) + pReleaseMutexWhenCallbackReturns(pci, mut); +#else +#endif +} + +VOID LeaveCriticalSectionWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, PCRITICAL_SECTION pcs) +{ +#ifdef _WIN32 + module_init(); + + if (pLeaveCriticalSectionWhenCallbackReturns) + pLeaveCriticalSectionWhenCallbackReturns(pci, pcs); +#else +#endif +} + +VOID FreeLibraryWhenCallbackReturns(PTP_CALLBACK_INSTANCE pci, HMODULE mod) +{ +#ifdef _WIN32 + module_init(); + + if (pFreeLibraryWhenCallbackReturns) + pFreeLibraryWhenCallbackReturns(pci, mod); +#else +#endif +} + +VOID DisassociateCurrentThreadFromCallback(PTP_CALLBACK_INSTANCE pci) +{ +#ifdef _WIN32 + module_init(); + + if (pDisassociateCurrentThreadFromCallback) + pDisassociateCurrentThreadFromCallback(pci); +#else +#endif +} + + + diff --git a/winpr/libwinpr/pool/callback_environment.c b/winpr/libwinpr/pool/callback_environment.c index d3f85a690..93d981777 100644 --- a/winpr/libwinpr/pool/callback_environment.c +++ b/winpr/libwinpr/pool/callback_environment.c @@ -24,42 +24,172 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) +#include "pool.h" -VOID InitializeThreadpoolEnvironment(PTP_CALLBACK_ENVIRON pcbe) +VOID InitializeCallbackEnvironment_V1(TP_CALLBACK_ENVIRON_V1* pcbe) { + pcbe->Version = 1; + pcbe->Pool = NULL; + pcbe->CleanupGroup = NULL; + pcbe->CleanupGroupCancelCallback = NULL; + pcbe->RaceDll = NULL; + pcbe->ActivationContext = NULL; + pcbe->FinalizationCallback = NULL; + pcbe->u.Flags = 0; } -VOID DestroyThreadpoolEnvironment(PTP_CALLBACK_ENVIRON pcbe) +VOID InitializeCallbackEnvironment_V3(TP_CALLBACK_ENVIRON_V3* pcbe) { + pcbe->Version = 3; + pcbe->Pool = NULL; + pcbe->CleanupGroup = NULL; + pcbe->CleanupGroupCancelCallback = NULL; + pcbe->RaceDll = NULL; + pcbe->ActivationContext = NULL; + pcbe->FinalizationCallback = NULL; + pcbe->u.Flags = 0; + + pcbe->CallbackPriority = TP_CALLBACK_PRIORITY_NORMAL; + pcbe->Size = sizeof(TP_CALLBACK_ENVIRON); } -VOID SetThreadpoolCallbackPool(PTP_CALLBACK_ENVIRON pcbe, PTP_POOL ptpp) -{ +#ifdef _WIN32 +static BOOL module_initialized = FALSE; +static BOOL module_available = FALSE; +static HMODULE kernel32_module = NULL; + +static VOID (WINAPI * pDestroyThreadpoolEnvironment)(PTP_CALLBACK_ENVIRON pcbe); +static VOID (WINAPI * pSetThreadpoolCallbackPool)(PTP_CALLBACK_ENVIRON pcbe, PTP_POOL ptpp); +static VOID (WINAPI * pSetThreadpoolCallbackCleanupGroup)(PTP_CALLBACK_ENVIRON pcbe, PTP_CLEANUP_GROUP ptpcg, PTP_CLEANUP_GROUP_CANCEL_CALLBACK pfng); +static VOID (WINAPI * pSetThreadpoolCallbackRunsLong)(PTP_CALLBACK_ENVIRON pcbe); +static VOID (WINAPI * pSetThreadpoolCallbackLibrary)(PTP_CALLBACK_ENVIRON pcbe, PVOID mod); +static VOID (WINAPI * pSetThreadpoolCallbackPriority)(PTP_CALLBACK_ENVIRON pcbe, TP_CALLBACK_PRIORITY Priority); + +static void module_init() +{ + if (module_initialized) + return; + + kernel32_module = LoadLibraryA("kernel32.dll"); + module_initialized = TRUE; + + if (!kernel32_module) + return; + + module_available = TRUE; + + /* InitializeThreadpoolEnvironment is an inline function */ + pDestroyThreadpoolEnvironment = (void*) GetProcAddress(kernel32_module, "DestroyThreadpoolEnvironment"); + pSetThreadpoolCallbackPool = (void*) GetProcAddress(kernel32_module, "SetThreadpoolCallbackPool"); + pSetThreadpoolCallbackCleanupGroup = (void*) GetProcAddress(kernel32_module, "SetThreadpoolCallbackCleanupGroup"); + pSetThreadpoolCallbackRunsLong = (void*) GetProcAddress(kernel32_module, "SetThreadpoolCallbackRunsLong"); + pSetThreadpoolCallbackRunsLong = (void*) GetProcAddress(kernel32_module, "SetThreadpoolCallbackRunsLong"); + pSetThreadpoolCallbackLibrary = (void*) GetProcAddress(kernel32_module, "SetThreadpoolCallbackLibrary"); + pSetThreadpoolCallbackPriority = (void*) GetProcAddress(kernel32_module, "SetThreadpoolCallbackPriority"); } -VOID SetThreadpoolCallbackCleanupGroup(PTP_CALLBACK_ENVIRON pcbe, PTP_CLEANUP_GROUP ptpcg, PTP_CLEANUP_GROUP_CANCEL_CALLBACK pfng) +#else + +static TP_CALLBACK_ENVIRON DEFAULT_CALLBACK_ENVIRONMENT = { + 1, /* Version */ + NULL, /* Pool */ + NULL, /* CleanupGroup */ + NULL, /* CleanupGroupCancelCallback */ + NULL, /* RaceDll */ + NULL, /* ActivationContext */ + NULL, /* FinalizationCallback */ + { 0 } /* Flags */ +}; -} - -VOID SetThreadpoolCallbackRunsLong(PTP_CALLBACK_ENVIRON pcbe) +PTP_CALLBACK_ENVIRON GetDefaultThreadpoolEnvironment() { + PTP_CALLBACK_ENVIRON environment = &DEFAULT_CALLBACK_ENVIRONMENT; -} - -VOID SetThreadpoolCallbackLibrary(PTP_CALLBACK_ENVIRON pcbe, PVOID mod) -{ - -} - -VOID SetThreadpoolCallbackPriority(PTP_CALLBACK_ENVIRON pcbe, TP_CALLBACK_PRIORITY Priority) -{ + environment->Pool = GetDefaultThreadpool(); + return environment; } #endif +VOID InitializeThreadpoolEnvironment(PTP_CALLBACK_ENVIRON pcbe) +{ + if (pcbe->Version == 3) + InitializeCallbackEnvironment_V3((TP_CALLBACK_ENVIRON_V3*) pcbe); + else + InitializeCallbackEnvironment_V1(pcbe); +} + +VOID DestroyThreadpoolEnvironment(PTP_CALLBACK_ENVIRON pcbe) +{ +#ifdef _WIN32 + module_init(); + + if (pDestroyThreadpoolEnvironment) + pDestroyThreadpoolEnvironment(pcbe); +#else +#endif +} + +VOID SetThreadpoolCallbackPool(PTP_CALLBACK_ENVIRON pcbe, PTP_POOL ptpp) +{ +#ifdef _WIN32 + module_init(); + + if (pSetThreadpoolCallbackPool) + pSetThreadpoolCallbackPool(pcbe, ptpp); +#else + pcbe->Pool = ptpp; +#endif +} + +VOID SetThreadpoolCallbackCleanupGroup(PTP_CALLBACK_ENVIRON pcbe, PTP_CLEANUP_GROUP ptpcg, PTP_CLEANUP_GROUP_CANCEL_CALLBACK pfng) +{ +#ifdef _WIN32 + module_init(); + + if (pSetThreadpoolCallbackCleanupGroup) + pSetThreadpoolCallbackCleanupGroup(pcbe, ptpcg, pfng); +#else + pcbe->CleanupGroup = ptpcg; + pcbe->CleanupGroupCancelCallback = pfng; +#endif +} + +VOID SetThreadpoolCallbackRunsLong(PTP_CALLBACK_ENVIRON pcbe) +{ +#ifdef _WIN32 + module_init(); + + if (pSetThreadpoolCallbackRunsLong) + pSetThreadpoolCallbackRunsLong(pcbe); +#else + pcbe->u.s.LongFunction = TRUE; +#endif +} + +VOID SetThreadpoolCallbackLibrary(PTP_CALLBACK_ENVIRON pcbe, PVOID mod) +{ +#ifdef _WIN32 + module_init(); + + if (pSetThreadpoolCallbackLibrary) + pSetThreadpoolCallbackLibrary(pcbe, mod); +#else +#endif +} + +VOID SetThreadpoolCallbackPriority(PTP_CALLBACK_ENVIRON pcbe, TP_CALLBACK_PRIORITY Priority) +{ +#ifdef _WIN32 + module_init(); + + if (pSetThreadpoolCallbackPriority) + pSetThreadpoolCallbackPriority(pcbe, Priority); +#else +#endif +} diff --git a/winpr/libwinpr/pool/cleanup_group.c b/winpr/libwinpr/pool/cleanup_group.c index 2220757a9..bd8676df6 100644 --- a/winpr/libwinpr/pool/cleanup_group.c +++ b/winpr/libwinpr/pool/cleanup_group.c @@ -24,22 +24,74 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) +#include "pool.h" -PTP_CLEANUP_GROUP CreateThreadpoolCleanupGroup() +#ifdef _WIN32 + +static BOOL module_initialized = FALSE; +static BOOL module_available = FALSE; +static HMODULE kernel32_module = NULL; + +static PTP_CLEANUP_GROUP (WINAPI * pCreateThreadpoolCleanupGroup)(); +static VOID (WINAPI * pCloseThreadpoolCleanupGroupMembers)(PTP_CLEANUP_GROUP ptpcg, BOOL fCancelPendingCallbacks, PVOID pvCleanupContext); +static VOID (WINAPI * pCloseThreadpoolCleanupGroup)(PTP_CLEANUP_GROUP ptpcg); + +static void module_init() { - return NULL; -} + if (module_initialized) + return; -VOID CloseThreadpoolCleanupGroupMembers(PTP_CLEANUP_GROUP ptpcg, BOOL fCancelPendingCallbacks, PVOID pvCleanupContext) -{ + kernel32_module = LoadLibraryA("kernel32.dll"); + module_initialized = TRUE; -} + if (!kernel32_module) + return; -VOID CloseThreadpoolCleanupGroup(PTP_CLEANUP_GROUP ptpcg) -{ + module_available = TRUE; + pCreateThreadpoolCleanupGroup = (void*) GetProcAddress(kernel32_module, "CreateThreadpoolCleanupGroup"); + pCloseThreadpoolCleanupGroupMembers = (void*) GetProcAddress(kernel32_module, "CloseThreadpoolCleanupGroupMembers"); + pCloseThreadpoolCleanupGroup = (void*) GetProcAddress(kernel32_module, "CloseThreadpoolCleanupGroup"); } #endif +PTP_CLEANUP_GROUP CreateThreadpoolCleanupGroup() +{ + PTP_CLEANUP_GROUP cleanupGroup = NULL; +#ifdef _WIN32 + module_init(); + + if (pCreateThreadpoolCleanupGroup) + return pCreateThreadpoolCleanupGroup(); +#else + cleanupGroup = (PTP_CLEANUP_GROUP) malloc(sizeof(TP_CLEANUP_GROUP)); +#endif + return cleanupGroup; +} + +VOID CloseThreadpoolCleanupGroupMembers(PTP_CLEANUP_GROUP ptpcg, BOOL fCancelPendingCallbacks, PVOID pvCleanupContext) +{ +#ifdef _WIN32 + module_init(); + + if (pCloseThreadpoolCleanupGroupMembers) + pCloseThreadpoolCleanupGroupMembers(ptpcg, fCancelPendingCallbacks, pvCleanupContext); +#else + +#endif +} + +VOID CloseThreadpoolCleanupGroup(PTP_CLEANUP_GROUP ptpcg) +{ +#ifdef _WIN32 + module_init(); + + if (pCloseThreadpoolCleanupGroup) + pCloseThreadpoolCleanupGroup(ptpcg); +#else + free(ptpcg); +#endif +} + + diff --git a/winpr/libwinpr/pool/io.c b/winpr/libwinpr/pool/io.c index 8e6ff1d17..5327148d9 100644 --- a/winpr/libwinpr/pool/io.c +++ b/winpr/libwinpr/pool/io.c @@ -24,8 +24,6 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) - PTP_IO CreateThreadpoolIo(HANDLE fl, PTP_WIN32_IO_CALLBACK pfnio, PVOID pv, PTP_CALLBACK_ENVIRON pcbe) { return NULL; @@ -51,5 +49,3 @@ VOID WaitForThreadpoolIoCallbacks(PTP_IO pio, BOOL fCancelPendingCallbacks) } -#endif - diff --git a/winpr/libwinpr/pool/pool.c b/winpr/libwinpr/pool/pool.c index 941bafe2e..6a3f40c89 100644 --- a/winpr/libwinpr/pool/pool.c +++ b/winpr/libwinpr/pool/pool.c @@ -24,29 +24,213 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) +#include "pool.h" + +#ifdef _WIN32 + +static BOOL module_initialized = FALSE; +static BOOL module_available = FALSE; +static HMODULE kernel32_module = NULL; + +static PTP_POOL (WINAPI * pCreateThreadpool)(PVOID reserved); +static VOID (WINAPI * pCloseThreadpool)(PTP_POOL ptpp); +static BOOL (WINAPI * pSetThreadpoolThreadMinimum)(PTP_POOL ptpp, DWORD cthrdMic); +static VOID (WINAPI * pSetThreadpoolThreadMaximum)(PTP_POOL ptpp, DWORD cthrdMost); + +static void module_init() +{ + if (module_initialized) + return; + + kernel32_module = LoadLibraryA("kernel32.dll"); + module_initialized = TRUE; + + if (!kernel32_module) + return; + + module_available = TRUE; + + pCreateThreadpool = (void*) GetProcAddress(kernel32_module, "CreateThreadpool"); + pCloseThreadpool = (void*) GetProcAddress(kernel32_module, "CloseThreadpool"); + pSetThreadpoolThreadMinimum = (void*) GetProcAddress(kernel32_module, "SetThreadpoolThreadMinimum"); + pSetThreadpoolThreadMaximum = (void*) GetProcAddress(kernel32_module, "SetThreadpoolThreadMaximum"); +} + +#else + +static TP_POOL DEFAULT_POOL = +{ + 0, /* Minimum */ + 500, /* Maximum */ + NULL, /* Threads */ + 0, /* ThreadCount */ +}; + +static void* thread_pool_work_func(void* arg) +{ + DWORD status; + PTP_POOL pool; + PTP_WORK work; + HANDLE events[2]; + PTP_CALLBACK_INSTANCE callbackInstance; + + pool = (PTP_POOL) arg; + + events[0] = pool->TerminateEvent; + events[1] = Queue_Event(pool->PendingQueue); + + while (1) + { + status = WaitForMultipleObjects(2, events, FALSE, INFINITE); + + if (status == WAIT_OBJECT_0) + break; + + if (status != (WAIT_OBJECT_0 + 1)) + break; + + callbackInstance = (PTP_CALLBACK_INSTANCE) Queue_Dequeue(pool->PendingQueue); + + if (callbackInstance) + { + work = callbackInstance->Work; + work->WorkCallback(callbackInstance, work->CallbackParameter, work); + CountdownEvent_Signal(pool->WorkComplete, 1); + free(callbackInstance); + } + } + + return NULL; +} + +void InitializeThreadpool(PTP_POOL pool) +{ + int index; + HANDLE thread; + + if (!pool->Threads) + { + pool->Minimum = 0; + pool->Maximum = 500; + + pool->Threads = ArrayList_New(TRUE); + + pool->PendingQueue = Queue_New(TRUE, -1, -1); + pool->WorkComplete = CountdownEvent_New(0); + + pool->TerminateEvent = CreateEvent(NULL, TRUE, FALSE, NULL); + + for (index = 0; index < 4; index++) + { + thread = CreateThread(NULL, 0, + (LPTHREAD_START_ROUTINE) thread_pool_work_func, + (void*) pool, 0, NULL); + + ArrayList_Add(pool->Threads, thread); + } + } +} + +PTP_POOL GetDefaultThreadpool() +{ + PTP_POOL pool = NULL; + + pool = &DEFAULT_POOL; + + InitializeThreadpool(pool); + + return pool; +} + +#endif PTP_POOL CreateThreadpool(PVOID reserved) { - return NULL; + PTP_POOL pool = NULL; + +#ifdef _WIN32 + module_init(); + + if (pCreateThreadpool) + return pCreateThreadpool(reserved); +#else + pool = (PTP_POOL) malloc(sizeof(TP_POOL)); + + if (pool) + InitializeThreadpool(pool); +#endif + + return pool; } VOID CloseThreadpool(PTP_POOL ptpp) { +#ifdef _WIN32 + module_init(); -} + if (pCloseThreadpool) + pCloseThreadpool(ptpp); +#else + int index; + HANDLE thread; -VOID SetThreadpoolThreadMaximum(PTP_POOL ptpp, DWORD cthrdMost) -{ + SetEvent(ptpp->TerminateEvent); + index = ArrayList_Count(ptpp->Threads) - 1; + + while (index >= 0) + { + thread = (HANDLE) ArrayList_GetItem(ptpp->Threads, index); + WaitForSingleObject(thread, INFINITE); + index--; + } + + ArrayList_Free(ptpp->Threads); + Queue_Free(ptpp->PendingQueue); + CountdownEvent_Free(ptpp->WorkComplete); + CloseHandle(ptpp->TerminateEvent); + + free(ptpp); +#endif } BOOL SetThreadpoolThreadMinimum(PTP_POOL ptpp, DWORD cthrdMic) { - return FALSE; +#ifdef _WIN32 + module_init(); + + if (pSetThreadpoolThreadMinimum) + return pSetThreadpoolThreadMinimum(ptpp, cthrdMic); +#else + HANDLE thread; + + ptpp->Minimum = cthrdMic; + + while (ArrayList_Count(ptpp->Threads) < ptpp->Minimum) + { + thread = CreateThread(NULL, 0, + (LPTHREAD_START_ROUTINE) thread_pool_work_func, + (void*) ptpp, 0, NULL); + + ArrayList_Add(ptpp->Threads, thread); + } +#endif + return TRUE; } +VOID SetThreadpoolThreadMaximum(PTP_POOL ptpp, DWORD cthrdMost) +{ +#ifdef _WIN32 + module_init(); + + if (pSetThreadpoolThreadMaximum) + pSetThreadpoolThreadMaximum(ptpp, cthrdMost); +#else + ptpp->Maximum = cthrdMost; #endif +} + +/* dummy */ void winpr_pool_dummy() { diff --git a/winpr/libwinpr/pool/pool.h b/winpr/libwinpr/pool/pool.h new file mode 100644 index 000000000..81485f45e --- /dev/null +++ b/winpr/libwinpr/pool/pool.h @@ -0,0 +1,78 @@ +/** + * WinPR: Windows Portable Runtime + * Thread Pool API (Pool) + * + * Copyright 2012 Marc-Andre Moreau + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef WINPR_POOL_PRIVATE_H +#define WINPR_POOL_PRIVATE_H + +#include +#include +#include +#include + +struct _TP_CALLBACK_INSTANCE +{ + PTP_WORK Work; +}; + +struct _TP_POOL +{ + DWORD Minimum; + DWORD Maximum; + wArrayList* Threads; + wQueue* PendingQueue; + HANDLE TerminateEvent; + wCountdownEvent* WorkComplete; +}; + +struct _TP_WORK +{ + PVOID CallbackParameter; + PTP_WORK_CALLBACK WorkCallback; + PTP_CALLBACK_ENVIRON CallbackEnvironment; +}; + +struct _TP_TIMER +{ + void* dummy; +}; + +struct _TP_WAIT +{ + void* dummy; +}; + +struct _TP_IO +{ + void* dummy; +}; + +struct _TP_CLEANUP_GROUP +{ + void* dummy; +}; + +#ifndef _WIN32 + +PTP_POOL GetDefaultThreadpool(); +PTP_CALLBACK_ENVIRON GetDefaultThreadpoolEnvironment(); + +#endif + +#endif /* WINPR_POOL_PRIVATE_H */ + diff --git a/winpr/libwinpr/pool/synch.c b/winpr/libwinpr/pool/synch.c index aca798967..5f87c78ff 100644 --- a/winpr/libwinpr/pool/synch.c +++ b/winpr/libwinpr/pool/synch.c @@ -24,8 +24,6 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) - PTP_WAIT CreateThreadpoolWait(PTP_WAIT_CALLBACK pfnwa, PVOID pv, PTP_CALLBACK_ENVIRON pcbe) { return NULL; @@ -46,5 +44,4 @@ VOID WaitForThreadpoolWaitCallbacks(PTP_WAIT pwa, BOOL fCancelPendingCallbacks) } -#endif diff --git a/winpr/libwinpr/pool/test/.gitignore b/winpr/libwinpr/pool/test/.gitignore new file mode 100644 index 000000000..0a87cfd44 --- /dev/null +++ b/winpr/libwinpr/pool/test/.gitignore @@ -0,0 +1,2 @@ +TestPool +TestPool.c diff --git a/winpr/libwinpr/pool/test/CMakeLists.txt b/winpr/libwinpr/pool/test/CMakeLists.txt new file mode 100644 index 000000000..eb03c0b6f --- /dev/null +++ b/winpr/libwinpr/pool/test/CMakeLists.txt @@ -0,0 +1,35 @@ + +set(MODULE_NAME "TestPool") +set(MODULE_PREFIX "TEST_POOL") + +set(${MODULE_PREFIX}_DRIVER ${MODULE_NAME}.c) + +set(${MODULE_PREFIX}_TESTS + TestPoolIO.c + TestPoolSynch.c + TestPoolThread.c + TestPoolTimer.c + TestPoolWork.c) + +create_test_sourcelist(${MODULE_PREFIX}_SRCS + ${${MODULE_PREFIX}_DRIVER} + ${${MODULE_PREFIX}_TESTS}) + +add_executable(${MODULE_NAME} ${${MODULE_PREFIX}_SRCS}) + +set_complex_link_libraries(VARIABLE ${MODULE_PREFIX}_LIBS + MONOLITHIC ${MONOLITHIC_BUILD} + MODULE winpr + MODULES winpr-pool winpr-interlocked) + +target_link_libraries(${MODULE_NAME} ${${MODULE_PREFIX}_LIBS}) + +set_target_properties(${MODULE_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${TESTING_OUTPUT_DIRECTORY}") + +foreach(test ${${MODULE_PREFIX}_TESTS}) + get_filename_component(TestName ${test} NAME_WE) + add_test(${TestName} ${TESTING_OUTPUT_DIRECTORY}/${MODULE_NAME} ${TestName}) +endforeach() + +set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER "WinPR/Test") + diff --git a/winpr/libwinpr/pool/test/TestPoolIO.c b/winpr/libwinpr/pool/test/TestPoolIO.c new file mode 100644 index 000000000..5e8d0b210 --- /dev/null +++ b/winpr/libwinpr/pool/test/TestPoolIO.c @@ -0,0 +1,9 @@ + +#include +#include + +int TestPoolIO(int argc, char* argv[]) +{ + return 0; +} + diff --git a/winpr/libwinpr/pool/test/TestPoolSynch.c b/winpr/libwinpr/pool/test/TestPoolSynch.c new file mode 100644 index 000000000..c0e9bc3e2 --- /dev/null +++ b/winpr/libwinpr/pool/test/TestPoolSynch.c @@ -0,0 +1,9 @@ + +#include +#include + +int TestPoolSynch(int argc, char* argv[]) +{ + return 0; +} + diff --git a/winpr/libwinpr/pool/test/TestPoolThread.c b/winpr/libwinpr/pool/test/TestPoolThread.c new file mode 100644 index 000000000..d842beaed --- /dev/null +++ b/winpr/libwinpr/pool/test/TestPoolThread.c @@ -0,0 +1,29 @@ + +#include +#include + +/** + * Improve Scalability With New Thread Pool APIs: + * http://msdn.microsoft.com/en-us/magazine/cc16332.aspx + * + * Developing with Thread Pool Enhancements: + * http://msdn.microsoft.com/en-us/library/cc308561.aspx + * + * Introduction to the Windows Threadpool: + * http://blogs.msdn.com/b/harip/archive/2010/10/11/introduction-to-the-windows-threadpool-part-1.aspx + * http://blogs.msdn.com/b/harip/archive/2010/10/12/introduction-to-the-windows-threadpool-part-2.aspx + */ + +int TestPoolThread(int argc, char* argv[]) +{ + TP_POOL* pool; + + pool = CreateThreadpool(NULL); + + SetThreadpoolThreadMinimum(pool, 8); /* default is 0 */ + SetThreadpoolThreadMaximum(pool, 64); /* default is 500 */ + + CloseThreadpool(pool); + + return 0; +} diff --git a/winpr/libwinpr/pool/test/TestPoolTimer.c b/winpr/libwinpr/pool/test/TestPoolTimer.c new file mode 100644 index 000000000..bfd5550d1 --- /dev/null +++ b/winpr/libwinpr/pool/test/TestPoolTimer.c @@ -0,0 +1,9 @@ + +#include +#include + +int TestPoolTimer(int argc, char* argv[]) +{ + return 0; +} + diff --git a/winpr/libwinpr/pool/test/TestPoolWork.c b/winpr/libwinpr/pool/test/TestPoolWork.c new file mode 100644 index 000000000..72bc228ff --- /dev/null +++ b/winpr/libwinpr/pool/test/TestPoolWork.c @@ -0,0 +1,102 @@ + +#include +#include +#include + +static LONG count = 0; + +void CALLBACK test_WorkCallback(PTP_CALLBACK_INSTANCE instance, void* context, PTP_WORK work) +{ + int index; + BYTE a[1024]; + BYTE b[1024]; + BYTE c[1024]; + + printf("Hello %s: %d (thread: %d)\n", context, + InterlockedIncrement(&count), GetCurrentThreadId()); + + for (index = 0; index < 100; index++) + { + ZeroMemory(a, 1024); + ZeroMemory(b, 1024); + ZeroMemory(c, 1024); + FillMemory(a, 1024, 0xAA); + FillMemory(b, 1024, 0xBB); + CopyMemory(c, a, 1024); + CopyMemory(c, b, 1024); + } +} + +int TestPoolWork(int argc, char* argv[]) +{ + int index; + PTP_POOL pool; + PTP_WORK work; + PTP_CLEANUP_GROUP cleanupGroup; + TP_CALLBACK_ENVIRON environment; + + printf("Global Thread Pool\n"); + + work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", NULL); + + if (!work) + { + printf("CreateThreadpoolWork failure\n"); + return -1; + } + + /** + * You can post a work object one or more times (up to MAXULONG) without waiting for prior callbacks to complete. + * The callbacks will execute in parallel. To improve efficiency, the thread pool may throttle the threads. + */ + + for (index = 0; index < 10; index++) + SubmitThreadpoolWork(work); + + WaitForThreadpoolWorkCallbacks(work, FALSE); + CloseThreadpoolWork(work); + + printf("Private Thread Pool\n"); + + pool = CreateThreadpool(NULL); + + SetThreadpoolThreadMinimum(pool, 4); + SetThreadpoolThreadMaximum(pool, 8); + + InitializeThreadpoolEnvironment(&environment); + SetThreadpoolCallbackPool(&environment, pool); + + cleanupGroup = CreateThreadpoolCleanupGroup(); + + if (!cleanupGroup) + { + printf("CreateThreadpoolCleanupGroup failure\n"); + return -1; + } + + SetThreadpoolCallbackCleanupGroup(&environment, cleanupGroup, NULL); + + work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", &environment); + + if (!work) + { + printf("CreateThreadpoolWork failure\n"); + return -1; + } + + for (index = 0; index < 10; index++) + SubmitThreadpoolWork(work); + + WaitForThreadpoolWorkCallbacks(work, FALSE); + + CloseThreadpoolCleanupGroupMembers(cleanupGroup, TRUE, NULL); + + CloseThreadpoolCleanupGroup(cleanupGroup); + + DestroyThreadpoolEnvironment(&environment); + + CloseThreadpoolWork(work); + CloseThreadpool(pool); + + return 0; +} diff --git a/winpr/libwinpr/pool/timer.c b/winpr/libwinpr/pool/timer.c index 93933435b..9867941a6 100644 --- a/winpr/libwinpr/pool/timer.c +++ b/winpr/libwinpr/pool/timer.c @@ -24,8 +24,6 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) - PTP_TIMER CreateThreadpoolTimer(PTP_TIMER_CALLBACK pfnti, PVOID pv, PTP_CALLBACK_ENVIRON pcbe) { return NULL; @@ -51,5 +49,4 @@ VOID WaitForThreadpoolTimerCallbacks(PTP_TIMER pti, BOOL fCancelPendingCallbacks } -#endif diff --git a/winpr/libwinpr/pool/work.c b/winpr/libwinpr/pool/work.c index 62db5c7c8..62581650b 100644 --- a/winpr/libwinpr/pool/work.c +++ b/winpr/libwinpr/pool/work.c @@ -24,32 +24,132 @@ #include #include -#if (!(defined _WIN32 && (_WIN32_WINNT < 0x0600))) +#include "pool.h" + +#ifdef _WIN32 + +static BOOL module_initialized = FALSE; +static BOOL module_available = FALSE; +static HMODULE kernel32_module = NULL; + +static PTP_WORK (WINAPI * pCreateThreadpoolWork)(PTP_WORK_CALLBACK pfnwk, PVOID pv, PTP_CALLBACK_ENVIRON pcbe); +static VOID (WINAPI * pCloseThreadpoolWork)(PTP_WORK pwk); +static VOID (WINAPI * pSubmitThreadpoolWork)(PTP_WORK pwk); +static BOOL (WINAPI * pTrySubmitThreadpoolCallback)(PTP_SIMPLE_CALLBACK pfns, PVOID pv, PTP_CALLBACK_ENVIRON pcbe); +static VOID (WINAPI * pWaitForThreadpoolWorkCallbacks)(PTP_WORK pwk, BOOL fCancelPendingCallbacks); + +static void module_init() +{ + if (module_initialized) + return; + + kernel32_module = LoadLibraryA("kernel32.dll"); + module_initialized = TRUE; + + if (!kernel32_module) + return; + + module_available = TRUE; + + pCreateThreadpoolWork = (void*) GetProcAddress(kernel32_module, "CreateThreadpoolWork"); + pCloseThreadpoolWork = (void*) GetProcAddress(kernel32_module, "CloseThreadpoolWork"); + pSubmitThreadpoolWork = (void*) GetProcAddress(kernel32_module, "SubmitThreadpoolWork"); + pTrySubmitThreadpoolCallback = (void*) GetProcAddress(kernel32_module, "TrySubmitThreadpoolCallback"); + pWaitForThreadpoolWorkCallbacks = (void*) GetProcAddress(kernel32_module, "WaitForThreadpoolWorkCallbacks"); +} + +#endif PTP_WORK CreateThreadpoolWork(PTP_WORK_CALLBACK pfnwk, PVOID pv, PTP_CALLBACK_ENVIRON pcbe) { - return NULL; + PTP_WORK work = NULL; + +#ifdef _WIN32 + module_init(); + + if (pCreateThreadpoolWork) + return pCreateThreadpoolWork(pfnwk, pv, pcbe); +#else + work = (PTP_WORK) malloc(sizeof(TP_WORK)); + + if (work) + { + work->WorkCallback = pfnwk; + work->CallbackParameter = pv; + + if (!pcbe) + pcbe = GetDefaultThreadpoolEnvironment(); + + work->CallbackEnvironment = pcbe; + } +#endif + + return work; } VOID CloseThreadpoolWork(PTP_WORK pwk) { +#ifdef _WIN32 + module_init(); + if (pCloseThreadpoolWork) + pCloseThreadpoolWork(pwk); +#else + free(pwk); +#endif } VOID SubmitThreadpoolWork(PTP_WORK pwk) { +#ifdef _WIN32 + module_init(); + if (pSubmitThreadpoolWork) + pSubmitThreadpoolWork(pwk); +#else + PTP_POOL pool; + PTP_CALLBACK_INSTANCE callbackInstance; + + pool = pwk->CallbackEnvironment->Pool; + + callbackInstance = (PTP_CALLBACK_INSTANCE) malloc(sizeof(TP_CALLBACK_INSTANCE)); + + if (callbackInstance) + { + callbackInstance->Work = pwk; + CountdownEvent_AddCount(pool->WorkComplete, 1); + Queue_Enqueue(pool->PendingQueue, callbackInstance); + } +#endif } BOOL TrySubmitThreadpoolCallback(PTP_SIMPLE_CALLBACK pfns, PVOID pv, PTP_CALLBACK_ENVIRON pcbe) { +#ifdef _WIN32 + module_init(); + + if (pTrySubmitThreadpoolCallback) + return pTrySubmitThreadpoolCallback(pfns, pv, pcbe); +#else +#endif return FALSE; } VOID WaitForThreadpoolWorkCallbacks(PTP_WORK pwk, BOOL fCancelPendingCallbacks) { +#ifdef _WIN32 + module_init(); -} + if (pWaitForThreadpoolWorkCallbacks) + pWaitForThreadpoolWorkCallbacks(pwk, fCancelPendingCallbacks); +#else + HANDLE event; + PTP_POOL pool; + pool = pwk->CallbackEnvironment->Pool; + event = CountdownEvent_WaitHandle(pool->WorkComplete); + + if (WaitForSingleObject(event, INFINITE) != WAIT_OBJECT_0) + printf("WaitForThreadpoolWorkCallbacks: error waiting on work completion\n"); #endif - +} diff --git a/winpr/libwinpr/sspi/test/TestSchannel.c b/winpr/libwinpr/sspi/test/TestSchannel.c index e7ddae7c0..e0b69eda1 100644 --- a/winpr/libwinpr/sspi/test/TestSchannel.c +++ b/winpr/libwinpr/sspi/test/TestSchannel.c @@ -98,7 +98,7 @@ int schannel_recv(PSecurityFunctionTable table, HANDLE hPipe, PCtxtHandle phCont { BYTE* ioBuffer; UINT32 ioBufferLength; - BYTE* pMessageBuffer; + //BYTE* pMessageBuffer; SecBuffer Buffers[4]; SecBufferDesc Message; SECURITY_STATUS status; diff --git a/winpr/libwinpr/synch/wait.c b/winpr/libwinpr/synch/wait.c index 6c83897f1..e2b7ac35f 100644 --- a/winpr/libwinpr/synch/wait.c +++ b/winpr/libwinpr/synch/wait.c @@ -51,14 +51,19 @@ DWORD WaitForSingleObject(HANDLE hHandle, DWORD dwMilliseconds) if (Type == HANDLE_TYPE_THREAD) { + int status; WINPR_THREAD* thread; + void* thread_status = NULL; if (dwMilliseconds != INFINITE) printf("WaitForSingleObject: timeout not implemented for thread wait\n"); thread = (WINPR_THREAD*) Object; - pthread_join(thread->thread, NULL); + status = pthread_join(thread->thread, &thread_status); + + if (status != 0) + printf("WaitForSingleObject: pthread_join failure: %d\n", status); } if (Type == HANDLE_TYPE_MUTEX) { @@ -161,6 +166,8 @@ DWORD WaitForMultipleObjects(DWORD nCount, const HANDLE* lpHandles, BOOL bWaitAl PVOID Object; struct timeval timeout; + if (!nCount) + return WAIT_FAILED; maxfd = 0; FD_ZERO(&fds); ZeroMemory(&timeout, sizeof(timeout)); diff --git a/winpr/libwinpr/thread/process.c b/winpr/libwinpr/thread/process.c index c786178f2..b5cc87858 100644 --- a/winpr/libwinpr/thread/process.c +++ b/winpr/libwinpr/thread/process.c @@ -51,6 +51,10 @@ #ifndef _WIN32 +#ifdef HAVE_UNISTD_H +#include +#endif + #include typedef void *(*pthread_start_routine)(void*); @@ -88,14 +92,14 @@ VOID ExitProcess(UINT uExitCode) } -HANDLE GetCurrentProcess(VOID) +HANDLE _GetCurrentProcess(VOID) { return NULL; } DWORD GetCurrentProcessId(VOID) { - return 0; + return ((DWORD) getpid()); } DWORD GetProcessId(HANDLE Process) diff --git a/winpr/libwinpr/thread/thread.c b/winpr/libwinpr/thread/thread.c index 1ed610ab2..72290404a 100644 --- a/winpr/libwinpr/thread/thread.c +++ b/winpr/libwinpr/thread/thread.c @@ -81,7 +81,7 @@ void winpr_StartThread(WINPR_THREAD* thread) pthread_attr_t attr; pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); if (thread->dwStackSize > 0) pthread_attr_setstacksize(&attr, (size_t) thread->dwStackSize); @@ -128,14 +128,16 @@ VOID ExitThread(DWORD dwExitCode) pthread_exit((void*) dwExitCode); } -HANDLE GetCurrentThread(VOID) +HANDLE _GetCurrentThread(VOID) { return NULL; } DWORD GetCurrentThreadId(VOID) { - return 0; + pthread_t tid; + tid = pthread_self(); + return (DWORD) tid; } DWORD ResumeThread(HANDLE hThread) diff --git a/winpr/libwinpr/utils/CMakeLists.txt b/winpr/libwinpr/utils/CMakeLists.txt index 6b0e096f1..67735fa71 100644 --- a/winpr/libwinpr/utils/CMakeLists.txt +++ b/winpr/libwinpr/utils/CMakeLists.txt @@ -21,10 +21,13 @@ set(MODULE_PREFIX "WINPR_UTILS") set(${MODULE_PREFIX}_COLLECTIONS_SRCS collections/Queue.c collections/Stack.c + collections/Reference.c collections/ArrayList.c collections/Dictionary.c collections/ListDictionary.c - collections/KeyValuePair.c) + collections/KeyValuePair.c + collections/CountdownEvent.c + collections/BufferPool.c) set(${MODULE_PREFIX}_SRCS sam.c diff --git a/winpr/libwinpr/utils/collections/BufferPool.c b/winpr/libwinpr/utils/collections/BufferPool.c new file mode 100644 index 000000000..348e231b1 --- /dev/null +++ b/winpr/libwinpr/utils/collections/BufferPool.c @@ -0,0 +1,165 @@ +/** + * WinPR: Windows Portable Runtime + * Buffer Pool + * + * Copyright 2012 Marc-Andre Moreau + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include + +/** + * C equivalent of the C# BufferManager Class: + * http://msdn.microsoft.com/en-us/library/ms405814.aspx + */ + +/** + * Methods + */ + +/** + * Gets a buffer of at least the specified size from the pool. + */ + +void* BufferPool_Take(wBufferPool* pool, int bufferSize) +{ + void* buffer = NULL; + + if (pool->synchronized) + WaitForSingleObject(pool->mutex, INFINITE); + + if (pool->fixedSize) + { + if (pool->size > 0) + buffer = pool->array[--(pool->size)]; + + if (!buffer) + { + if (pool->alignment) + buffer = _aligned_malloc(pool->fixedSize, pool->alignment); + else + buffer = malloc(pool->fixedSize); + } + } + else + { + printf("Variable-size BufferPool not yet implemented\n"); + } + + if (pool->synchronized) + ReleaseMutex(pool->mutex); + + return buffer; +} + +/** + * Returns a buffer to the pool. + */ + +void BufferPool_Return(wBufferPool* pool, void* buffer) +{ + if (pool->synchronized) + WaitForSingleObject(pool->mutex, INFINITE); + + if ((pool->size + 1) >= pool->capacity) + { + pool->capacity *= 2; + pool->array = (void**) realloc(pool->array, sizeof(void*) * pool->capacity); + } + + pool->array[(pool->size)++] = buffer; + + if (pool->synchronized) + ReleaseMutex(pool->mutex); +} + +/** + * Releases the buffers currently cached in the pool. + */ + +void BufferPool_Clear(wBufferPool* pool) +{ + if (pool->synchronized) + WaitForSingleObject(pool->mutex, INFINITE); + + while (pool->size > 0) + { + (pool->size)--; + + if (pool->alignment) + _aligned_free(pool->array[pool->size]); + else + free(pool->array[pool->size]); + } + + if (pool->synchronized) + ReleaseMutex(pool->mutex); +} + +/** + * Construction, Destruction + */ + +wBufferPool* BufferPool_New(BOOL synchronized, int fixedSize, DWORD alignment) +{ + wBufferPool* pool = NULL; + + pool = (wBufferPool*) malloc(sizeof(wBufferPool)); + + if (pool) + { + pool->fixedSize = fixedSize; + + if (pool->fixedSize < 0) + pool->fixedSize = 0; + + pool->alignment = alignment; + pool->synchronized = synchronized; + + if (pool->synchronized) + pool->mutex = CreateMutex(NULL, FALSE, NULL); + + if (!pool->fixedSize) + { + printf("Variable-size BufferPool not yet implemented\n"); + } + + pool->size = 0; + pool->capacity = 32; + pool->array = (void**) malloc(sizeof(void*) * pool->capacity); + } + + return pool; +} + +void BufferPool_Free(wBufferPool* pool) +{ + if (pool) + { + BufferPool_Clear(pool); + + if (pool->synchronized) + CloseHandle(pool->mutex); + + free(pool->array); + + free(pool); + } +} diff --git a/winpr/libwinpr/utils/collections/CountdownEvent.c b/winpr/libwinpr/utils/collections/CountdownEvent.c new file mode 100644 index 000000000..0e9df2b35 --- /dev/null +++ b/winpr/libwinpr/utils/collections/CountdownEvent.c @@ -0,0 +1,180 @@ +/** + * WinPR: Windows Portable Runtime + * Countdown Event + * + * Copyright 2012 Marc-Andre Moreau + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#include + +#include + +/** + * C equivalent of the C# CountdownEvent Class + * http://msdn.microsoft.com/en-us/library/dd235708/ + */ + +/** + * Properties + */ + +/** + * Gets the number of remaining signals required to set the event. + */ + +DWORD CountdownEvent_CurrentCount(wCountdownEvent* countdown) +{ + return countdown->count; +} + +/** + * Gets the numbers of signals initially required to set the event. + */ + +DWORD CountdownEvent_InitialCount(wCountdownEvent* countdown) +{ + return countdown->initialCount; +} + +/** + * Determines whether the event is set. + */ + +BOOL CountdownEvent_IsSet(wCountdownEvent* countdown) +{ + BOOL status = FALSE; + + if (WaitForSingleObject(countdown->event, 0) == WAIT_OBJECT_0) + status = TRUE; + + return status; +} + +/** + * Gets a WaitHandle that is used to wait for the event to be set. + */ + +HANDLE CountdownEvent_WaitHandle(wCountdownEvent* countdown) +{ + return countdown->event; +} + +/** + * Methods + */ + +/** + * Increments the CountdownEvent's current count by a specified value. + */ + +void CountdownEvent_AddCount(wCountdownEvent* countdown, DWORD signalCount) +{ + WaitForSingleObject(countdown->mutex, INFINITE); + + countdown->count += signalCount; + + if (countdown->count > 0) + ResetEvent(countdown->event); + + ReleaseMutex(countdown->mutex); +} + +/** + * Registers multiple signals with the CountdownEvent, decrementing the value of CurrentCount by the specified amount. + */ + +BOOL CountdownEvent_Signal(wCountdownEvent* countdown, DWORD signalCount) +{ + BOOL status; + BOOL newStatus; + BOOL oldStatus; + + status = newStatus = oldStatus = FALSE; + + WaitForSingleObject(countdown->mutex, INFINITE); + + if (WaitForSingleObject(countdown->event, 0) == WAIT_OBJECT_0) + oldStatus = TRUE; + + countdown->count -= signalCount; + + if (countdown->count < 0) + { + printf("CountdownEvent_Signal warning: count is less than zero\n"); + countdown->count = 0; + } + + if (countdown->count == 0) + newStatus = TRUE; + + if (newStatus && (!oldStatus)) + { + SetEvent(countdown->event); + status = TRUE; + } + + ReleaseMutex(countdown->mutex); + + return status; +} + +/** + * Resets the InitialCount property to a specified value. + */ + +void CountdownEvent_Reset(wCountdownEvent* countdown, DWORD count) +{ + countdown->initialCount = count; +} + +/** + * Construction, Destruction + */ + +wCountdownEvent* CountdownEvent_New(DWORD initialCount) +{ + wCountdownEvent* countdown = NULL; + + countdown = (wCountdownEvent*) malloc(sizeof(wCountdownEvent)); + + if (countdown) + { + countdown->count = initialCount; + countdown->initialCount = initialCount; + countdown->mutex = CreateMutex(NULL, FALSE, NULL); + countdown->event = CreateEvent(NULL, TRUE, FALSE, NULL); + + if (countdown->count == 0) + SetEvent(countdown->event); + } + + return countdown; +} + +void CountdownEvent_Free(wCountdownEvent* countdown) +{ + CloseHandle(countdown->mutex); + CloseHandle(countdown->event); + + free(countdown); +} diff --git a/winpr/libwinpr/utils/collections/Queue.c b/winpr/libwinpr/utils/collections/Queue.c index 2135ae5a6..841bc49fb 100644 --- a/winpr/libwinpr/utils/collections/Queue.c +++ b/winpr/libwinpr/utils/collections/Queue.c @@ -138,8 +138,21 @@ void Queue_Enqueue(wQueue* queue, void* obj) if (queue->size == queue->capacity) { - queue->capacity *= queue->growthFactor; + int old_capacity; + int new_capacity; + + old_capacity = queue->capacity; + new_capacity = queue->capacity * queue->growthFactor; + + queue->capacity = new_capacity; queue->array = (void**) realloc(queue->array, sizeof(void*) * queue->capacity); + ZeroMemory(&(queue->array[old_capacity]), old_capacity * sizeof(void*)); + + if (queue->tail < (old_capacity - 1)) + { + CopyMemory(&(queue->array[old_capacity]), queue->array, queue->tail * sizeof(void*)); + queue->tail += old_capacity; + } } queue->array[queue->tail] = obj; diff --git a/winpr/libwinpr/utils/collections/Reference.c b/winpr/libwinpr/utils/collections/Reference.c new file mode 100644 index 000000000..7df1c84b2 --- /dev/null +++ b/winpr/libwinpr/utils/collections/Reference.c @@ -0,0 +1,171 @@ +/** + * WinPR: Windows Portable Runtime + * Reference Count Table + * + * Copyright 2012 Marc-Andre Moreau + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#include + +#include + +/** + * C reference counting + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms693431/ + */ + +wReference* ReferenceTable_FindEntry(wReferenceTable* referenceTable, void* ptr) +{ + int index = 0; + BOOL found = FALSE; + wReference* reference = NULL; + + for (index = 0; index < referenceTable->size; index++) + { + reference = &referenceTable->array[index]; + + if (reference->Pointer == ptr) + found = TRUE; + } + + return (found) ? reference : NULL; +} + +wReference* ReferenceTable_GetFreeEntry(wReferenceTable* referenceTable) +{ + int index = 0; + BOOL found = FALSE; + wReference* reference = NULL; + + for (index = 0; index < referenceTable->size; index++) + { + reference = &referenceTable->array[index]; + + if (reference->Pointer == NULL) + { + reference->Count = 0; + found = TRUE; + } + } + + if (!found) + { + referenceTable->size *= 2; + referenceTable->array = (wReference*) realloc(referenceTable->array, sizeof(wReference) * referenceTable->size); + + ZeroMemory(&referenceTable->array[(referenceTable->size / 2)], + sizeof(wReference) * (referenceTable->size / 2)); + + return ReferenceTable_GetFreeEntry(referenceTable); + } + + return reference; +} + +UINT32 ReferenceTable_Add(wReferenceTable* referenceTable, void* ptr) +{ + UINT32 count = 0; + wReference* reference = NULL; + + if (referenceTable->synchronized) + WaitForSingleObject(referenceTable->mutex, INFINITE); + + reference = ReferenceTable_FindEntry(referenceTable, ptr); + + if (!reference) + { + reference = ReferenceTable_GetFreeEntry(referenceTable); + reference->Pointer = ptr; + reference->Count = 0; + } + + count = ++(reference->Count); + + if (referenceTable->synchronized) + ReleaseMutex(referenceTable->mutex); + + return count; +} + +UINT32 ReferenceTable_Release(wReferenceTable* referenceTable, void* ptr) +{ + UINT32 count = 0; + wReference* reference = NULL; + + if (referenceTable->synchronized) + WaitForSingleObject(referenceTable->mutex, INFINITE); + + reference = ReferenceTable_FindEntry(referenceTable, ptr); + + if (reference) + { + count = --(reference->Count); + + if (count < 1) + { + if (referenceTable->ReferenceFree) + { + referenceTable->ReferenceFree(referenceTable->context, ptr); + reference->Pointer = NULL; + reference->Count = 0; + } + } + } + + if (referenceTable->synchronized) + ReleaseMutex(referenceTable->mutex); + + return count; +} + +wReferenceTable* ReferenceTable_New(BOOL synchronized, void* context, REFERENCE_FREE ReferenceFree) +{ + wReferenceTable* referenceTable; + + referenceTable = (wReferenceTable*) malloc(sizeof(wReferenceTable)); + + if (referenceTable) + { + referenceTable->context = context; + referenceTable->ReferenceFree = ReferenceFree; + + referenceTable->size = 32; + referenceTable->array = (wReference*) malloc(sizeof(wReference) * referenceTable->size); + ZeroMemory(referenceTable->array, sizeof(wReference) * referenceTable->size); + + referenceTable->synchronized = synchronized; + referenceTable->mutex = CreateMutex(NULL, FALSE, NULL); + } + + return referenceTable; +} + +void ReferenceTable_Free(wReferenceTable* referenceTable) +{ + if (referenceTable) + { + CloseHandle(referenceTable->mutex); + free(referenceTable->array); + free(referenceTable); + } +} diff --git a/winpr/libwinpr/utils/test/TestQueue.c b/winpr/libwinpr/utils/test/TestQueue.c index 8f9d6c014..2ef6120da 100644 --- a/winpr/libwinpr/utils/test/TestQueue.c +++ b/winpr/libwinpr/utils/test/TestQueue.c @@ -18,7 +18,6 @@ int TestQueue(int argc, char* argv[]) } count = Queue_Count(queue); - printf("queue count: %d\n", count); for (index = 1; index <= 10; index++) @@ -29,6 +28,25 @@ int TestQueue(int argc, char* argv[]) return -1; } + count = Queue_Count(queue); + printf("queue count: %d\n", count); + + Queue_Enqueue(queue, (void*) (size_t) 1); + Queue_Enqueue(queue, (void*) (size_t) 2); + Queue_Enqueue(queue, (void*) (size_t) 3); + + Queue_Dequeue(queue); + Queue_Dequeue(queue); + + Queue_Enqueue(queue, (void*) (size_t) 4); + Queue_Enqueue(queue, (void*) (size_t) 5); + Queue_Enqueue(queue, (void*) (size_t) 6); + + Queue_Dequeue(queue); + Queue_Dequeue(queue); + Queue_Dequeue(queue); + Queue_Dequeue(queue); + Queue_Clear(queue); Queue_Free(queue);