summaryrefslogtreecommitdiffstats
path: root/graphics
diff options
context:
space:
mode:
authorFlorian Franzmann2019-10-25 20:16:17 +0200
committerFlorian Franzmann2019-10-25 20:31:55 +0200
commit7e3d91a74ec02d2a7674dfde87ee05a2a08c2de4 (patch)
tree3a40c248f1e4de6ae874ff15092ba52b02f51102 /graphics
parentb42bef6836aa40d5674cfd67ed3e80af6cacc96c (diff)
graphics/darktable: version 2.6.3
Diffstat (limited to 'graphics')
-rwxr-xr-xgraphics/darktable/DETAILS4
-rw-r--r--graphics/darktable/HISTORY4
-rwxr-xr-xgraphics/darktable/PRE_BUILD4
-rw-r--r--graphics/darktable/gcc9.patch5171
4 files changed, 6 insertions, 5177 deletions
diff --git a/graphics/darktable/DETAILS b/graphics/darktable/DETAILS
index bf7525e940..773f8b302d 100755
--- a/graphics/darktable/DETAILS
+++ b/graphics/darktable/DETAILS
@@ -2,10 +2,10 @@ source $GRIMOIRE/CMAKE_FUNCTIONS
. "$GRIMOIRE/FUNCTIONS"
SPELL=darktable
if [[ "$DARKTABLE_BRANCH" == "stable" ]]; then
- VERSION=2.6.2
+ VERSION=2.6.3
SOURCE="${SPELL}-${VERSION}.tar.xz"
SOURCE_URL[0]=https://github.com/darktable-org/darktable/releases/download/release-${VERSION}/${SOURCE}
- SOURCE_HASH=sha256:9cb9efbb09a40375ff05cef89343235a621c58339539e44985470a029a7ffb45:UPSTREAM_HASH
+ SOURCE_HASH=sha256:a518999c8458472edfc04577026ce5047d74553052af0f52d10ba8ce601b78f0:UPSTREAM_HASH
SOURCE_DIRECTORY="${BUILD_DIRECTORY}/${SPELL}-${VERSION}"
else
VERSION=$(get_scm_version)
diff --git a/graphics/darktable/HISTORY b/graphics/darktable/HISTORY
index a2e1c0088f..92351dd44f 100644
--- a/graphics/darktable/HISTORY
+++ b/graphics/darktable/HISTORY
@@ -1,3 +1,7 @@
+2019-10-25 Florian Franzmann <siflfran@hawo.stw.uni-erlangen.de>
+ * DETAILS: version 2.6.3
+ * PRE_BUILD, gcc-9.patch: removed
+
2019-10-05 Florian Franzmann <siflfran@hawo.stw.uni-erlangen.de>
* DETAILS, DEPENDS, CONFIGURE: use CMAKE_*
diff --git a/graphics/darktable/PRE_BUILD b/graphics/darktable/PRE_BUILD
deleted file mode 100755
index d33652b6f3..0000000000
--- a/graphics/darktable/PRE_BUILD
+++ /dev/null
@@ -1,4 +0,0 @@
-default_pre_build &&
-cd "$SOURCE_DIRECTORY" &&
-
-patch -p1 < "$SPELL_DIRECTORY/gcc9.patch"
diff --git a/graphics/darktable/gcc9.patch b/graphics/darktable/gcc9.patch
deleted file mode 100644
index 0b9c840026..0000000000
--- a/graphics/darktable/gcc9.patch
+++ /dev/null
@@ -1,5171 +0,0 @@
-From b654564713dfdba38ccd82d72b129b58d06a85d0 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Stefan=20Br=C3=BCns?= <stefan.bruens@rwth-aachen.de>
-Date: Mon, 15 Jul 2019 20:04:01 +0200
-Subject: [PATCH] Fix OpenMP 4.0 issues with GCC9
-
-Backport of pull request #2550.
----
- CMakeLists.txt | 72 ++++++++----------------
- ConfigureChecks.cmake | 30 ++++++++++
- DefineOptions.cmake | 44 +++++++++++++++
- src/CMakeLists.txt | 2 -
- src/chart/cairo.c | 5 +-
- src/chart/main.c | 16 ++++--
- src/common/bilateral.c | 18 ++++--
- src/common/color_picker.c | 9 ++-
- src/common/darktable.h | 24 ++++++--
- src/common/dwt.c | 20 +++++--
- src/common/focus.h | 10 +++-
- src/common/gaussian.c | 22 ++++++--
- src/common/heal.c | 22 ++++++--
- src/common/histogram.c | 10 +++-
- src/common/imageio.c | 34 ++++++++---
- src/common/imageio_rawspeed.cc | 10 +++-
- src/common/interpolation.c | 16 ++++--
- src/common/locallaplacian.c | 99 ++++++++++++++++++++++++++-------
- src/common/opencl.c | 8 ++-
- src/config.cmake.h | 2 +
- src/control/jobs/control_jobs.c | 5 +-
- src/develop/blend.c | 36 ++++++++----
- src/develop/imageop_math.c | 55 ++++++++++++++----
- src/develop/imageop_math.h | 5 +-
- src/develop/masks/circle.c | 12 +++-
- src/develop/masks/ellipse.c | 12 +++-
- src/develop/masks/gradient.c | 24 ++++++--
- src/develop/masks/group.c | 24 ++++++--
- src/develop/pixelpipe_hb.c | 15 ++++-
- src/develop/tiling.c | 35 +++++++++---
- src/gui/draw.h | 11 +++-
- src/iop/ashift.c | 50 +++++++++++++----
- src/iop/atrous.c | 33 ++++++++---
- src/iop/basecurve.c | 48 ++++++++++++----
- src/iop/bilateral.cc | 6 +-
- src/iop/bloom.c | 20 +++++--
- src/iop/channelmixer.c | 5 +-
- src/iop/clahe.c | 11 +++-
- src/iop/clipping.c | 11 +++-
- src/iop/colisa.c | 15 ++++-
- src/iop/colorbalance.c | 39 +++++++++++--
- src/iop/colorchecker.c | 5 +-
- src/iop/colorcontrast.c | 14 ++++-
- src/iop/colorin.c | 50 +++++++++++++----
- src/iop/colorize.c | 5 +-
- src/iop/colormapping.c | 15 ++++-
- src/iop/colorout.c | 16 ++++--
- src/iop/colorreconstruction.c | 11 +++-
- src/iop/colortransfer.c | 15 ++++-
- src/iop/colorzones.c | 5 +-
- src/iop/defringe.c | 14 +++--
- src/iop/demosaic.c | 71 ++++++++++++++++++-----
- src/iop/denoiseprofile.c | 64 ++++++++++++++++-----
- src/iop/dither.c | 12 +++-
- src/iop/equalizer_eaw.h | 22 ++++++--
- src/iop/exposure.c | 8 ++-
- src/iop/filmic.c | 16 +++++-
- src/iop/gamma.c | 16 ++++--
- src/iop/globaltonemap.c | 15 ++++-
- src/iop/graduatednd.c | 20 +++++--
- src/iop/grain.c | 5 +-
- src/iop/hazeremoval.c | 41 ++++++++++----
- src/iop/highlights.c | 44 +++++++++++----
- src/iop/highpass.c | 10 +++-
- src/iop/hotpixels.c | 13 ++++-
- src/iop/invert.c | 27 +++++++--
- src/iop/lens.c | 46 ++++++++++++---
- src/iop/levels.c | 4 +-
- src/iop/liquify.c | 4 +-
- src/iop/lowlight.c | 5 +-
- src/iop/lowpass.c | 15 ++++-
- src/iop/monochrome.c | 10 +++-
- src/iop/nlmeans.c | 22 ++++++--
- src/iop/overexposed.c | 5 +-
- src/iop/profile_gamma.c | 20 +++++--
- src/iop/rawdenoise.c | 48 ++++++++++++----
- src/iop/rawoverexposed.c | 12 +++-
- src/iop/rawprepare.c | 26 +++++++--
- src/iop/relight.c | 5 +-
- src/iop/retouch.c | 65 +++++++++++++++++-----
- src/iop/rotatepixels.c | 5 +-
- src/iop/scalepixels.c | 5 +-
- src/iop/shadhi.c | 13 ++++-
- src/iop/sharpen.c | 41 ++++++++++----
- src/iop/soften.c | 38 ++++++++++---
- src/iop/splittoning.c | 6 +-
- src/iop/spots.c | 4 +-
- src/iop/temperature.c | 30 ++++++++--
- src/iop/tonecurve.c | 6 +-
- src/iop/velvia.c | 9 ++-
- src/iop/vibrance.c | 5 +-
- src/iop/vignette.c | 6 +-
- src/iop/zonesystem.c | 29 ++++++++--
- 93 files changed, 1519 insertions(+), 444 deletions(-)
- create mode 100644 ConfigureChecks.cmake
- create mode 100644 DefineOptions.cmake
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 5b783d1..ce4047b 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -33,15 +33,7 @@ IF(DEFINED DT_FORCE_CXX_COMPILER)
- set(CMAKE_CXX_COMPILER ${DT_FORCE_CXX_COMPILER})
- endif()
-
--include(CheckCCompilerFlag)
--include(TestBigEndian)
--
--# Check if this is source package build
--if(NOT IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.git)
-- set(SOURCE_PACKAGE 1)
--else()
-- set(SOURCE_PACKAGE 0)
--endif()
-+include(DefineOptions.cmake)
-
- # Include GNUInstallDirs, which sets sensible defaults for install directories.
- # See https://cmake.org/cmake/help/v3.0/module/GNUInstallDirs.html for further information.
-@@ -50,41 +42,31 @@ endif()
-
- include(GNUInstallDirs)
-
--option(USE_CAMERA_SUPPORT "Detect and use camera support if available." ON)
--option(USE_NLS "Build Native Language Support (using gettext)" ON)
--option(USE_COLORD "Enable colord support" ON)
--option(USE_MAP "Build Map View parts" ON)
--option(USE_LUA "Build lua scripting support" ON)
--option(DONT_USE_INTERNAL_LUA "Never fall back to the intree copy of lua" ON)
--option(USE_FLICKR "Enable Flickr support" ON)
--option(USE_KWALLET "Build kwallet password storage back-end" ON)
--option(USE_LIBSECRET "Build libsecret password storage back-end" ON)
--option(USE_UNITY "Use libunity to report progress in the launcher" OFF)
--option(USE_OPENMP "Use openmp threading support." ON)
--option(USE_OPENCL "Use OpenCL support." ON)
--option(USE_GRAPHICSMAGICK "Use GraphicsMagick library for image import." ON)
--option(USE_DARKTABLE_PROFILING OFF)
--option(CUSTOM_CFLAGS "Don't override compiler optimization flags." OFF)
--option(BUILD_USERMANUAL "Build all the versions of the usermanual." OFF)
--option(BINARY_PACKAGE_BUILD "Sets march optimization to generic" OFF)
--option(USE_XMLLINT "Run xmllint to test if darktableconfig.xml is valid" ON)
--option(USE_OPENJPEG "Enable JPEG 2000 support" ON)
--option(USE_WEBP "Enable WebP export support" ON)
--option(BUILD_CMSTEST "Build a test program to check your system's color management setup" ON)
--option(USE_OPENEXR "Enable OpenEXR support" ON)
--option(BUILD_PRINT "Build the print module" ON)
--option(BUILD_RS_IDENTIFY "Build the darktable-rs-identify debug aid" ON)
--option(BUILD_SSE2_CODEPATHS "(EXPERIMENTAL OPTION, DO NOT DISABLE) Building SSE2-optimized codepaths" ON)
--option(VALIDATE_APPDATA_FILE "Use appstream-util (if found) to validate the .appdata file" OFF)
--option(BUILD_TESTS "Build tests in src/tests/, runnable from the build/ directory" OFF)
--option(BUILD_BATTERY_INDICATOR "Add an icon to the top toolbar showing the state of a laptop battery" OFF)
--option(BUILD_MSYS2_INSTALL "Build an MSYS2 version of the install, aka for Windows platform, but without dependency installs" OFF)
--option(BUILD_NOISE_TOOLS "Build tools for generating noise proifles" OFF)
-+if (USE_OPENMP)
-+ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
-+ CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
-
--if(USE_OPENCL)
-- option(TESTBUILD_OPENCL_PROGRAMS "Test-compile opencl programs (needs llvm and clang 3.9+)" ON)
-+ # Clang has an option to specify the OpenMP standard to use. Specify it.
-+ # FIXME: Implement this in FindOpenMP.cmake
-+ set(OPENMP_VERSION_SPECIFIER "-fopenmp-version=40")
-+
-+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_VERSION_SPECIFIER}")
-+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_VERSION_SPECIFIER}")
-+ endif()
-+
-+ find_package(OpenMP 4.0 REQUIRED)
-+endif()
-+
-+include(ConfigureChecks.cmake)
-+
-+include(CheckCCompilerFlag)
-+include(TestBigEndian)
-+
-+# Check if this is source package build
-+if(NOT IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.git)
-+ set(SOURCE_PACKAGE 1)
- else()
-- set(TESTBUILD_OPENCL_PROGRAMS OFF)
-+ set(SOURCE_PACKAGE 0)
- endif()
-
- if(BUILD_SSE2_CODEPATHS)
-@@ -122,12 +104,6 @@ MESSAGE(STATUS "Is the target platform supported: ${IS_SUPPORTED_PLATFORM}")
- set(CMAKE_REQUIRED_INCLUDES "${CMAKE_REQUIRED_INCLUDES_OLD}")
- unset(CMAKE_REQUIRED_INCLUDES_OLD)
-
--if(APPLE)
-- option(USE_MAC_INTEGRATION "Enable OS X integration" ON)
--else(APPLE)
-- set(USE_MAC_INTEGRATION OFF)
--endif(APPLE)
--
- # When cross compiling, CMAKE_INSTALL_PREFIX will point to something like "/opt/darktable", but that's not useful when using the path to load
- # modules on runtime. Then we need something like "C:\Program Files\darktable". Doesn't need to be set when doing regular compiles.
- if(NOT DEFINED RUNTIME_INSTALL_PREFIX)
-diff --git a/DefineOptions.cmake b/DefineOptions.cmake
-new file mode 100644
-index 0000000..b49c21b
---- /dev/null
-+++ b/DefineOptions.cmake
-@@ -0,0 +1,44 @@
-+option(USE_CAMERA_SUPPORT "Detect and use camera support if available." ON)
-+option(USE_NLS "Build Native Language Support (using gettext)" ON)
-+option(USE_COLORD "Enable colord support" ON)
-+option(USE_MAP "Build Map View parts" ON)
-+option(USE_LUA "Build lua scripting support" ON)
-+option(DONT_USE_INTERNAL_LUA "Never fall back to the intree copy of lua" ON)
-+option(USE_FLICKR "Enable Flickr support" ON)
-+option(USE_KWALLET "Build kwallet password storage back-end" ON)
-+option(USE_LIBSECRET "Build libsecret password storage back-end" ON)
-+option(USE_UNITY "Use libunity to report progress in the launcher" OFF)
-+option(USE_OPENMP "Use openmp threading support." ON)
-+option(USE_OPENCL "Use OpenCL support." ON)
-+option(USE_GRAPHICSMAGICK "Use GraphicsMagick library for image import." ON)
-+option(USE_DARKTABLE_PROFILING OFF)
-+option(CUSTOM_CFLAGS "Don't override compiler optimization flags." OFF)
-+option(BUILD_USERMANUAL "Build all the versions of the usermanual." OFF)
-+option(BINARY_PACKAGE_BUILD "Sets march optimization to generic" OFF)
-+option(USE_XMLLINT "Run xmllint to test if darktableconfig.xml is valid" ON)
-+option(USE_OPENJPEG "Enable JPEG 2000 support" ON)
-+option(USE_WEBP "Enable WebP export support" ON)
-+option(BUILD_CMSTEST "Build a test program to check your system's color management setup" ON)
-+option(USE_OPENEXR "Enable OpenEXR support" ON)
-+option(BUILD_PRINT "Build the print module" ON)
-+option(BUILD_RS_IDENTIFY "Build the darktable-rs-identify debug aid" ON)
-+option(BUILD_SSE2_CODEPATHS "(EXPERIMENTAL OPTION, DO NOT DISABLE) Building SSE2-optimized codepaths" ON)
-+option(VALIDATE_APPDATA_FILE "Use appstream-util (if found) to validate the .appdata file" OFF)
-+option(BUILD_TESTS "Build tests in src/tests/, runnable from the build/ directory" OFF)
-+option(BUILD_BATTERY_INDICATOR "Add an icon to the top toolbar showing the state of a laptop battery" OFF)
-+option(BUILD_MSYS2_INSTALL "Build an MSYS2 version of the install, aka for Windows platform, but without dependency installs" OFF)
-+option(BUILD_NOISE_TOOLS "Build tools for generating noise proifles" OFF)
-+option(BUILD_CURVE_TOOLS "Build tools for generating base and tone curves" OFF)
-+option(USE_LENSFUN "Enable LensFun support" ON)
-+
-+if (USE_OPENCL)
-+ option(TESTBUILD_OPENCL_PROGRAMS "Test-compile opencl programs (needs llvm and clang 3.9+)" ON)
-+else ()
-+ set(TESTBUILD_OPENCL_PROGRAMS OFF)
-+endif ()
-+
-+if(APPLE)
-+ option(USE_MAC_INTEGRATION "Enable OS X integration" ON)
-+else(APPLE)
-+ set(USE_MAC_INTEGRATION OFF)
-+endif(APPLE)
-diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
-index 0ed087d..d01dc6b 100644
---- a/src/CMakeLists.txt
-+++ b/src/CMakeLists.txt
-@@ -225,8 +225,6 @@ list(APPEND LIBS "${CMAKE_THREAD_LIBS_INIT}")
- # Need to explicitly link against math library.
- list(APPEND LIBS "-lm")
-
--# Check for openmp
--find_package(OpenMP)
- if(USE_OPENMP)
- if(OpenMP_C_FLAGS)
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-diff --git a/ConfigureChecks.cmake b/ConfigureChecks.cmake
-new file mode 100644
-index 0000000..58d229a
---- /dev/null
-+++ b/ConfigureChecks.cmake
-@@ -0,0 +1,30 @@
-+include(CheckCSourceCompiles)
-+
-+if (OpenMP_FOUND)
-+
-+set(CMAKE_REQUIRED_FLAGS ${OpenMP_C_FLAGS})
-+set(CMAKE_REQUIRED_LIBRARIES ${OpenMP_C_LIBRARIES})
-+check_c_source_compiles("
-+#include <omp.h>
-+
-+static void sink(const int x, int a[])
-+{
-+#pragma omp parallel for default(none) firstprivate(x) shared(a)
-+ for(int i = 0; i < 3; i++) {
-+ a[i] = x + i;
-+ }
-+}
-+
-+int main(void)
-+{
-+ int x = 42;
-+ int a[3] = {0};
-+
-+ sink(x, a);
-+
-+ return 0;
-+}" HAVE_OMP_FIRSTPRIVATE_WITH_CONST)
-+
-+set(CMAKE_REQUIRED_FLAGS)
-+set(CMAKE_REQUIRED_LIBRARIES)
-+endif()
-diff --git a/src/config.cmake.h b/src/config.cmake.h
-index 715ea48..bdd72c9 100644
---- a/src/config.cmake.h
-+++ b/src/config.cmake.h
-@@ -66,6 +66,8 @@ static const char *dt_supported_extensions[] __attribute__((unused)) = {"@DT_SUP
- #define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size))
- #endif
-
-+#cmakedefine HAVE_OMP_FIRSTPRIVATE_WITH_CONST 1
-+
- // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh
- // vim: shiftwidth=2 expandtab tabstop=2 cindent
- // kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
-diff --git a/src/common/darktable.h b/src/common/darktable.h
-index 25e8d46..5bdcbf8 100644
---- a/src/common/darktable.h
-+++ b/src/common/darktable.h
-@@ -75,11 +75,23 @@ typedef unsigned int u_int;
- #endif
-
- #ifdef _OPENMP
--#include <omp.h>
--#else
--#define omp_get_max_threads() 1
--#define omp_get_thread_num() 0
--#endif
-+# include <omp.h>
-+
-+/* See https://redmine.darktable.org/issues/12568#note-14 */
-+# ifdef HAVE_OMP_FIRSTPRIVATE_WITH_CONST
-+ /* If the compiler correctly supports firstprivate, use it. */
-+# define dt_omp_firstprivate(...) firstprivate(__VA_ARGS__)
-+# else /* HAVE_OMP_FIRSTPRIVATE_WITH_CONST */
-+ /* This is needed for clang < 7.0 */
-+# define dt_omp_firstprivate(...)
-+# endif/* HAVE_OMP_FIRSTPRIVATE_WITH_CONST */
-+
-+#else /* _OPENMP */
-+
-+# define omp_get_max_threads() 1
-+# define omp_get_thread_num() 0
-+
-+#endif /* _OPENMP */
-
- #ifndef _RELEASE
- #include "common/poison.h"
-@@ -542,7 +554,7 @@ int dt_load_from_string(const gchar *image_to_load, gboolean open_image_in_dr, g
-
- #define dt_unreachable_codepath_with_desc(D) \
- dt_unreachable_codepath_with_caller(D, __FILE__, __LINE__, __FUNCTION__)
--#define dt_unreachable_codepath() dt_unreachable_codepath_with_caller(NULL, __FILE__, __LINE__, __FUNCTION__)
-+#define dt_unreachable_codepath() dt_unreachable_codepath_with_caller("unreachable", __FILE__, __LINE__, __FUNCTION__)
- static inline void dt_unreachable_codepath_with_caller(const char *description, const char *file,
- const int line, const char *function)
- {
-diff --git a/src/common/bilateral.c b/src/common/bilateral.c
-index 2708eea..ac67210 100644
---- a/src/common/bilateral.c
-+++ b/src/common/bilateral.c
-@@ -127,7 +127,7 @@ void dt_bilateral_splat(dt_bilateral_t *b, const float *const in)
- const int oz = b->size_y * b->size_x;
- // splat into downsampled grid
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(b)
-+#pragma omp parallel for default(none) dt_omp_firstprivate(in, oy, oz) shared(b)
- #endif
- for(int j = 0; j < b->height; j++)
- {
-@@ -170,7 +170,9 @@ static void blur_line_z(float *buf, const int offset1, const int offset2, const
- const float w1 = 4.f / 16.f;
- const float w2 = 2.f / 16.f;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(size1, size2, size3, offset1, offset2, offset3) \
-+ shared(buf)
- #endif
- for(int k = 0; k < size1; k++)
- {
-@@ -208,7 +210,9 @@ static void blur_line(float *buf, const int offset1, const int offset2, const in
- const float w1 = 4.f / 16.f;
- const float w2 = 1.f / 16.f;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(size1, size2, size3, offset1, offset2, offset3) \
-+ shared(buf)
- #endif
- for(int k = 0; k < size1; k++)
- {
-@@ -260,7 +264,9 @@ void dt_bilateral_slice(const dt_bilateral_t *const b, const float *const in, fl
- const int oy = b->size_x;
- const int oz = b->size_y * b->size_x;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(b, in, norm, oy, oz) \
-+ shared(out)
- #endif
- for(int j = 0; j < b->height; j++)
- {
-@@ -306,7 +312,9 @@ void dt_bilateral_slice_to_output(const dt_bilateral_t *const b, const float *co
- const int oy = b->size_x;
- const int oz = b->size_y * b->size_x;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(b, in, norm, oy, oz) \
-+ shared(out)
- #endif
- for(int j = 0; j < b->height; j++)
- {
-diff --git a/src/common/color_picker.c b/src/common/color_picker.c
-index 76875d4..45c49a2 100644
---- a/src/common/color_picker.c
-+++ b/src/common/color_picker.c
-@@ -79,7 +79,8 @@ static void color_picker_helper_4ch_parallel(const dt_iop_buffer_dsc_t *dsc, con
- }
-
- #ifdef _OPENMP
--#pragma omp parallel default(none)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(w, pixel, width, box, mean, mmin, mmax)
- #endif
- {
- const int tnum = dt_get_thread_num();
-@@ -199,7 +200,8 @@ static void color_picker_helper_bayer_parallel(const dt_iop_buffer_dsc_t *const
- }
-
- #ifdef _OPENMP
--#pragma omp parallel default(none)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(pixel, width, roi, filters, box, msum, mmin, mmax, cnt)
- #endif
- {
- const int tnum = dt_get_thread_num();
-@@ -326,7 +328,8 @@ static void color_picker_helper_xtrans_parallel(const dt_iop_buffer_dsc_t *const
- }
-
- #ifdef _OPENMP
--#pragma omp parallel default(none)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(pixel, width, roi, xtrans, box, cnt, msum, mmin, mmax)
- #endif
- {
- const int tnum = dt_get_thread_num();
-diff --git a/src/common/dwt.c b/src/common/dwt.c
-index eb37224..0fedbe8 100644
---- a/src/common/dwt.c
-+++ b/src/common/dwt.c
-@@ -194,7 +194,10 @@ static void dwt_add_layer_sse(float *const img, float *layers, dwt_params_t *con
- const int i_size = p->width * p->height * 4;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(layers) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(img, i_size) \
-+ shared(layers) \
-+ schedule(static)
- #endif
- for(int i = 0; i < i_size; i += 4)
- {
-@@ -216,7 +219,10 @@ static void dwt_add_layer(float *const img, float *layers, dwt_params_t *const p
- const int i_size = p->width * p->height * p->ch;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(layers) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(img, i_size) \
-+ shared(layers) \
-+ schedule(static)
- #endif
- for(int i = 0; i < i_size; i++) layers[i] += img[i];
- }
-@@ -233,7 +239,10 @@ static void dwt_subtract_layer_sse(float *bl, float *bh, dwt_params_t *const p)
- const int size = p->width * p->height * 4;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(bl, bh) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(v4_lpass_mult, size) \
-+ shared(bl, bh) \
-+ schedule(static)
- #endif
- for(int i = 0; i < size; i += 4)
- {
-@@ -258,7 +267,10 @@ static void dwt_subtract_layer(float *bl, float *bh, dwt_params_t *const p)
- const int size = p->width * p->height * p->ch;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(bl, bh) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(size) \
-+ shared(bl, bh) \
-+ schedule(static)
- #endif
- for(int i = 0; i < size; i++)
- {
-diff --git a/src/common/gaussian.c b/src/common/gaussian.c
-index 545377e..0798060 100644
---- a/src/common/gaussian.c
-+++ b/src/common/gaussian.c
-@@ -173,8 +173,10 @@ void dt_gaussian_blur(dt_gaussian_t *g, const float *const in, float *const out)
-
- // vertical blur column by column
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(temp, Labmin, Labmax, a0, a1, a2, a3, b1, b2, coefp, \
-- coefn) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, width, height, ch) \
-+ shared(temp, Labmin, Labmax, a0, a1, a2, a3, b1, b2, coefp, coefn) \
-+ schedule(static)
- #endif
- for(int i = 0; i < width; i++)
- {
-@@ -245,8 +247,10 @@ void dt_gaussian_blur(dt_gaussian_t *g, const float *const in, float *const out)
-
- // horizontal blur line by line
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(temp, Labmin, Labmax, a0, a1, a2, a3, b1, b2, coefp, \
-- coefn) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(out, ch, width, height) \
-+ shared(temp, Labmin, Labmax, a0, a1, a2, a3, b1, b2, coefp, coefn) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -340,7 +344,10 @@ static void dt_gaussian_blur_4c_sse(dt_gaussian_t *g, const float *const in, flo
-
- // vertical blur column by column
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(temp, a0, a1, a2, a3, b1, b2, coefp, coefn) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, Labmin, Labmax, width, height) \
-+ shared(temp, a0, a1, a2, a3, b1, b2, coefp, coefn) \
-+ schedule(static)
- #endif
- for(int i = 0; i < width; i++)
- {
-@@ -408,7 +415,10 @@ static void dt_gaussian_blur_4c_sse(dt_gaussian_t *g, const float *const in, flo
-
- // horizontal blur line by line
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(temp, a0, a1, a2, a3, b1, b2, coefp, coefn) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(out, Labmin, Labmax, width, height) \
-+ shared(temp, a0, a1, a2, a3, b1, b2, coefp, coefn) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < height; j++)
- {
-diff --git a/src/common/heal.c b/src/common/heal.c
-index 37e53eb..56ae231 100644
---- a/src/common/heal.c
-+++ b/src/common/heal.c
-@@ -54,7 +54,10 @@ static void dt_heal_sub(const float *const top_buffer, const float *const bottom
- const int i_size = width * height * ch;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(result_buffer) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(top_buffer, bottom_buffer, i_size) \
-+ shared(result_buffer) \
-+ schedule(static)
- #endif
- for(int i = 0; i < i_size; i++) result_buffer[i] = top_buffer[i] - bottom_buffer[i];
- }
-@@ -66,7 +69,10 @@ static void dt_heal_add(const float *const first_buffer, const float *const seco
- const int i_size = width * height * ch;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(result_buffer) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(first_buffer, second_buffer, i_size) \
-+ shared(result_buffer) \
-+ schedule(static)
- #endif
- for(int i = 0; i < i_size; i++) result_buffer[i] = first_buffer[i] + second_buffer[i];
- }
-@@ -78,7 +84,11 @@ static float dt_heal_laplace_iteration_sse(float *pixels, const float *const Adi
- float err = 0.f;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(pixels) schedule(static) reduction(+ : err)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(Adiag, Aidx, w, nmask_from, nmask_to) \
-+ shared(pixels) \
-+ schedule(static) \
-+ reduction(+ : err)
- #endif
- for(int i = nmask_from; i < nmask_to; i++)
- {
-@@ -135,7 +145,11 @@ static float dt_heal_laplace_iteration(float *pixels, const float *const Adiag,
- const int ch1 = (ch == 4) ? ch - 1 : ch;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(pixels) schedule(static) reduction(+ : err)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(Adiag, Aidx, w, nmask_from, nmask_to, ch1) \
-+ shared(pixels) \
-+ schedule(static) \
-+ reduction(+ : err)
- #endif
- for(int i = nmask_from; i < nmask_to; i++)
- {
-diff --git a/src/common/histogram.c b/src/common/histogram.c
-index 2fccdd3..b7bbfd7 100644
---- a/src/common/histogram.c
-+++ b/src/common/histogram.c
-@@ -217,7 +217,10 @@ void dt_histogram_worker(dt_dev_histogram_collection_params_t *const histogram_p
- const dt_histogram_roi_t *const roi = histogram_params->roi;
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(partial_hists)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(histogram_params, pixel, Worker, bins_total, roi) \
-+ shared(partial_hists) \
-+ schedule(static)
- #endif
- for(int j = roi->crop_y; j < roi->height - roi->crop_height; j++)
- {
-@@ -230,7 +233,10 @@ void dt_histogram_worker(dt_dev_histogram_collection_params_t *const histogram_p
- memset(*histogram, 0, buf_size);
- uint32_t *hist = *histogram;
-
--#pragma omp parallel for schedule(static) default(none) shared(hist, partial_hists)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(nthreads, bins_total) \
-+ shared(hist, partial_hists) \
-+ schedule(static)
- for(size_t k = 0; k < bins_total; k++)
- {
- for(size_t n = 0; n < nthreads; n++)
-diff --git a/src/common/imageio.c b/src/common/imageio.c
-index c82cdb5..f1ac149 100644
---- a/src/common/imageio.c
-+++ b/src/common/imageio.c
-@@ -182,7 +182,10 @@ void dt_imageio_flip_buffers(char *out, const char *in, const size_t bpp, const
- if(!orientation)
- {
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ht, wd, bpp, stride) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < ht; j++) memcpy(out + (size_t)j * bpp * wd, in + (size_t)j * stride, bpp * wd);
- return;
-@@ -205,7 +208,10 @@ void dt_imageio_flip_buffers(char *out, const char *in, const size_t bpp, const
- si = -si;
- }
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out, jj, ii, sj, si)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(wd, bpp, ht, stride) \
-+ shared(in, out, jj, ii, sj, si) \
-+ schedule(static)
- #endif
- for(int j = 0; j < ht; j++)
- {
-@@ -229,7 +235,10 @@ void dt_imageio_flip_buffers_ui16_to_float(float *out, const uint16_t *in, const
- if(!orientation)
- {
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(wd, ch, scale, stride, black, ht) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < ht; j++)
- for(int i = 0; i < wd; i++)
-@@ -255,7 +264,10 @@ void dt_imageio_flip_buffers_ui16_to_float(float *out, const uint16_t *in, const
- si = -si;
- }
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out, jj, ii, sj, si)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(wd, ch, black, scale, stride, ht) \
-+ shared(in, out, jj, ii, sj, si) \
-+ schedule(static)
- #endif
- for(int j = 0; j < ht; j++)
- {
-@@ -279,7 +291,10 @@ void dt_imageio_flip_buffers_ui8_to_float(float *out, const uint8_t *in, const f
- if(!orientation)
- {
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(wd, scale, black, ht, ch, stride) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < ht; j++)
- for(int i = 0; i < wd; i++)
-@@ -305,7 +320,10 @@ void dt_imageio_flip_buffers_ui8_to_float(float *out, const uint8_t *in, const f
- si = -si;
- }
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out, jj, ii, sj, si)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(wd, ch, scale, black, stride, ht) \
-+ shared(in, out, jj, ii, sj, si) \
-+ schedule(static)
- #endif
- for(int j = 0; j < ht; j++)
- {
-@@ -865,7 +883,9 @@ int dt_imageio_export_with_flags(const uint32_t imgid, const char *filename,
- { // !display_byteorder, need to swap:
- uint8_t *const buf8 = pipe.backbuf;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(processed_width, processed_height, buf8) \
-+ schedule(static)
- #endif
- // just flip byte order
- for(size_t k = 0; k < (size_t)processed_width * processed_height; k++)
-diff --git a/src/common/imageio_rawspeed.cc b/src/common/imageio_rawspeed.cc
-index 0dcdc81..a6fc74d 100644
---- a/src/common/imageio_rawspeed.cc
-+++ b/src/common/imageio_rawspeed.cc
-@@ -383,7 +383,10 @@ dt_imageio_retval_t dt_imageio_open_rawspeed_sraw(dt_image_t *img, RawImage r, d
- */
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(r, img, buf)
-+#pragma omp parallel for default(none) \
-+ schedule(static) \
-+ dt_omp_firstprivate(cpp) \
-+ shared(r, img, buf)
- #endif
- for(int j = 0; j < img->height; j++)
- {
-@@ -407,7 +410,10 @@ dt_imageio_retval_t dt_imageio_open_rawspeed_sraw(dt_image_t *img, RawImage r, d
- */
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(r, img, buf)
-+#pragma omp parallel for default(none) \
-+ schedule(static) \
-+ dt_omp_firstprivate(cpp) \
-+ shared(r, img, buf)
- #endif
- for(int j = 0; j < img->height; j++)
- {
-diff --git a/src/common/interpolation.c b/src/common/interpolation.c
-index f627d0d..154715a 100644
---- a/src/common/interpolation.c
-+++ b/src/common/interpolation.c
-@@ -1378,7 +1378,9 @@ static void dt_interpolation_resample_plain(const struct dt_interpolation *itor,
- int64_t ts_resampling = getts();
- #endif
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out_stride, roi_out, x0, l) \
-+ shared(out)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -1424,7 +1426,9 @@ static void dt_interpolation_resample_plain(const struct dt_interpolation *itor,
-
- // Process each output line
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out, hindex, hlength, hkernel, vindex, vlength, vkernel, vmeta)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out_stride, roi_out) \
-+ shared(out, hindex, hlength, hkernel, vindex, vlength, vkernel, vmeta)
- #endif
- for(int oy = 0; oy < roi_out->height; oy++)
- {
-@@ -1536,7 +1540,9 @@ static void dt_interpolation_resample_sse(const struct dt_interpolation *itor, f
- int64_t ts_resampling = getts();
- #endif
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out_stride, roi_out, x0, l) \
-+ shared(out)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -1582,7 +1588,9 @@ static void dt_interpolation_resample_sse(const struct dt_interpolation *itor, f
-
- // Process each output line
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out, hindex, hlength, hkernel, vindex, vlength, vkernel, vmeta)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out_stride, roi_out) \
-+ shared(out, hindex, hlength, hkernel, vindex, vlength, vkernel, vmeta)
- #endif
- for(int oy = 0; oy < roi_out->height; oy++)
- {
-diff --git a/src/common/locallaplacian.c b/src/common/locallaplacian.c
-index 5deb3c1..bfe7f4c 100644
---- a/src/common/locallaplacian.c
-+++ b/src/common/locallaplacian.c
-@@ -114,7 +114,10 @@ static inline void gauss_expand(
- const int ht)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(fine, input, wd, ht) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j=1;j<((ht-1)&~1);j++) // even ht: two px boundary. odd ht: one px.
- for(int i=1;i<((wd-1)&~1);i++)
-@@ -151,7 +154,9 @@ static inline void gauss_reduce_sse2(
- float *const row = ringbuf + (rowj % 5)*stride;
- const float *const in = input + rowj*wd;
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(cw, in, row) \
-+ schedule(static)
- #endif
- for(int i=1;i<cw-1;i++)
- row[i] = 6*in[2*i] + 4*(in[2*i-1]+in[2*i+1]) + in[2*i-2] + in[2*i+2];
-@@ -169,7 +174,9 @@ static inline void gauss_reduce_sse2(
- *const row2 = rows[2], *const row3 = rows[3], *const row4 = rows[4];
- const __m128 four = _mm_set1_ps(4.f), scale = _mm_set1_ps(1.f/256.f);
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(cw, out, scale, four, row0, row1, row2, row3, row4) \
-+ schedule(static)
- #endif
- for(int i=0;i<=cw-8;i+=8)
- {
-@@ -224,7 +231,10 @@ static inline void gauss_reduce(
- memset(coarse, 0, sizeof(float)*cw*ch);
- // direct 5x5 stencil only on required pixels:
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(coarse, cw, ch, input, w, wd) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j=1;j<ch-1;j++) for(int i=1;i<cw-1;i++)
- for(int jj=-2;jj<=2;jj++) for(int ii=-2;ii<=2;ii++)
-@@ -251,7 +261,11 @@ static inline float *ll_pad_input(
- if(b && b->mode == 2)
- { // pad by preview buffer
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(wd2, ht2) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ht, input, max_supp, out, wd) \
-+ shared(wd2, ht2) \
-+ schedule(dynamic) \
-+ collapse(2)
- #endif // fill regular pixels:
- for(int j=0;j<ht;j++) for(int i=0;i<wd;i++)
- out[(j+max_supp)**wd2+i+max_supp] = input[stride*(wd*j+i)] * 0.01f; // L -> [0,1]
-@@ -276,22 +290,38 @@ static inline float *ll_pad_input(
- out[*wd2*j+i] = b->pad0[b->pwd*py+px];\
- } } while(0)
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(wd2, ht2, b) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(input, max_supp, out, wd) \
-+ shared(wd2, ht2, b) \
-+ schedule(dynamic) \
-+ collapse(2)
- #endif // left border
- for(int j=max_supp;j<*ht2-max_supp;j++) for(int i=0;i<max_supp;i++)
- LL_FILL(input[stride*wd*(j-max_supp)]* 0.01f);
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(wd2, ht2, b) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(input, max_supp, out, stride, wd) \
-+ shared(wd2, ht2, b) \
-+ schedule(dynamic) \
-+ collapse(2)
- #endif // right border
- for(int j=max_supp;j<*ht2-max_supp;j++) for(int i=wd+max_supp;i<*wd2;i++)
- LL_FILL(input[stride*((j-max_supp)*wd+wd-1)] * 0.01f);
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(wd2, ht2, b) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(max_supp, out) \
-+ shared(wd2, ht2, b) \
-+ schedule(dynamic) \
-+ collapse(2)
- #endif // top border
- for(int j=0;j<max_supp;j++) for(int i=0;i<*wd2;i++)
- LL_FILL(out[*wd2*max_supp+i]);
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(wd2, ht2, b) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ht, max_supp, out) \
-+ shared(wd2, ht2, b) \
-+ schedule(dynamic) \
-+ collapse(2)
- #endif // bottom border
- for(int j=max_supp+ht;j<*ht2;j++) for(int i=0;i<*wd2;i++)
- LL_FILL(out[*wd2*(max_supp+ht-1)+i]);
-@@ -300,7 +330,10 @@ static inline float *ll_pad_input(
- else
- { // pad by replication:
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(wd2, ht2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(input, ht, max_supp, out, wd) \
-+ shared(wd2, ht2) \
-+ schedule(dynamic)
- #endif
- for(int j=0;j<ht;j++)
- {
-@@ -312,12 +345,18 @@ static inline float *ll_pad_input(
- out[(j+max_supp)**wd2+i] = input[stride*(j*wd+wd-1)] * 0.01f; // L -> [0,1]
- }
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(wd2, ht2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(max_supp, out) \
-+ shared(wd2, ht2) \
-+ schedule(dynamic)
- #endif
- for(int j=0;j<max_supp;j++)
- memcpy(out + *wd2*j, out+max_supp**wd2, sizeof(float)**wd2);
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(wd2, ht2)
-+#pragma omp parallel for default(none) \
-+ schedule(dynamic) \
-+ dt_omp_firstprivate(ht, max_supp, out) \
-+ shared(wd2, ht2)
- #endif
- for(int j=max_supp+ht;j<*ht2;j++)
- memcpy(out + *wd2*j, out + *wd2*(max_supp+ht-1), sizeof(float)**wd2);
-@@ -452,7 +491,9 @@ void apply_curve_sse2(
- {
- // TODO: do all this in avx2 8-wide (should be straight forward):
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(dynamic)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(clarity, g, h, highlights, in, out, padding, shadows, sigma, w) \
-+ schedule(dynamic)
- #endif
- for(uint32_t j=padding;j<h-padding;j++)
- {
-@@ -478,11 +519,15 @@ void apply_curve_sse2(
- for(int i=w-padding;i<w;i++) out2[i] = out2[w-padding-1];
- }
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(dynamic)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(out, padding, w) \
-+ schedule(dynamic)
- #endif
- for(int j=0;j<padding;j++) memcpy(out + w*j, out+padding*w, sizeof(float)*w);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(dynamic)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(h, out, padding, w) \
-+ schedule(dynamic)
- #endif
- for(int j=h-padding;j<h;j++) memcpy(out + w*j, out+w*(h-padding-1), sizeof(float)*w);
- }
-@@ -502,7 +547,9 @@ void apply_curve(
- const float clarity)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(dynamic)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(clarity, g, h, highlights, in, out, padding, sigma, shadows, w) \
-+ schedule(dynamic)
- #endif
- for(uint32_t j=padding;j<h-padding;j++)
- {
-@@ -515,11 +562,15 @@ void apply_curve(
- for(int i=w-padding;i<w;i++) out2[i] = out2[w-padding-1];
- }
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(dynamic)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(out, padding, w) \
-+ schedule(dynamic)
- #endif
- for(int j=0;j<padding;j++) memcpy(out + w*j, out+padding*w, sizeof(float)*w);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(dynamic)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(h, out, padding, w) \
-+ schedule(dynamic)
- #endif
- for(int j=h-padding;j<h;j++) memcpy(out + w*j, out+w*(h-padding-1), sizeof(float)*w);
- }
-@@ -704,7 +755,11 @@ void local_laplacian_internal(
- gauss_expand(output[l+1], output[l], pw, ph);
- // go through all coefficients in the upsampled gauss buffer:
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) collapse(2) shared(w,h,buf,output,l,gamma,padded)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ph, pw) \
-+ shared(w,h,buf,output,l,gamma,padded) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j=0;j<ph;j++) for(int i=0;i<pw;i++)
- {
-@@ -724,7 +779,11 @@ void local_laplacian_internal(
- }
- }
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(dynamic) collapse(2) shared(w,output,buf)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ht, input, max_supp, out, wd) \
-+ shared(w,output,buf) \
-+ schedule(dynamic) \
-+ collapse(2)
- #endif
- for(int j=0;j<ht;j++) for(int i=0;i<wd;i++)
- {
-diff --git a/src/common/opencl.c b/src/common/opencl.c
-index e426f39..5160e13 100644
---- a/src/common/opencl.c
-+++ b/src/common/opencl.c
-@@ -950,7 +950,9 @@ static float dt_opencl_benchmark_gpu(const int devid, const size_t width, const
- if(buf == NULL) goto error;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, tea_states, width) \
-+ shared(buf)
- #endif
- for(size_t j = 0; j < height; j++)
- {
-@@ -1023,7 +1025,9 @@ static float dt_opencl_benchmark_cpu(const size_t width, const size_t height, co
- if(buf == NULL) goto error;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width, tea_states) \
-+ shared(buf)
- #endif
- for(size_t j = 0; j < height; j++)
- {
-diff --git a/src/common/focus.h b/src/common/focus.h
-index efbfca5..a887686 100644
---- a/src/common/focus.h
-+++ b/src/common/focus.h
-@@ -46,7 +46,10 @@ static inline void _dt_focus_cdf22_wtf(uint8_t *buf, const int l, const int widt
- const int st = step / 2;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, st, step, width) \
-+ shared(buf) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -66,7 +69,10 @@ static inline void _dt_focus_cdf22_wtf(uint8_t *buf, const int l, const int widt
- gbuf(buf, i, j) += _from_uint8(gbuf(buf, i - st, j)) / 2;
- }
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, st, step, width) \
-+ shared(buf) \
-+ schedule(static)
- #endif
- for(int i = 0; i < width; i++)
- {
-diff --git a/src/control/jobs/control_jobs.c b/src/control/jobs/control_jobs.c
-index 2fc03e9..df88fb9 100644
---- a/src/control/jobs/control_jobs.c
-+++ b/src/control/jobs/control_jobs.c
-@@ -356,7 +356,10 @@ static int dt_control_merge_hdr_process(dt_imageio_module_data_t *datai, const c
- float saturation = 1.0f;
- d->whitelevel = fmaxf(d->whitelevel, saturation * cal);
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(d, saturation)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ivoid, cal, photoncnt) \
-+ shared(d, saturation) \
-+ schedule(static)
- #endif
- for(int y = 0; y < d->ht; y++)
- for(int x = 0; x < d->wd; x++)
-diff --git a/src/develop/blend.c b/src/develop/blend.c
-index 044880b..ad24cb5 100644
---- a/src/develop/blend.c
-+++ b/src/develop/blend.c
-@@ -2901,7 +2901,8 @@ void dt_develop_blend_process(struct dt_iop_module_t *self, struct dt_dev_pixelp
- // blend uniformly (no drawn or parametric mask)
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buffsize, mask, opacity)
- #endif
- for(size_t i = 0; i < buffsize; i++) mask[i] = opacity;
- }
-@@ -2920,7 +2921,8 @@ void dt_develop_blend_process(struct dt_iop_module_t *self, struct dt_dev_pixelp
- {
- // if we have a mask and this flag is set -> invert the mask
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+ #pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buffsize, mask)
- #endif
- for(size_t i = 0; i < buffsize; i++) mask[i] = 1.0f - mask[i];
- }
-@@ -2931,7 +2933,8 @@ void dt_develop_blend_process(struct dt_iop_module_t *self, struct dt_dev_pixelp
- // we fill the buffer with 1.0f or 0.0f depending on mask_combine
- const float fill = (d->mask_combine & DEVELOP_COMBINE_MASKS_POS) ? 0.0f : 1.0f;
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+ #pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buffsize, mask, fill)
- #endif
- for(size_t i = 0; i < buffsize; i++) mask[i] = fill;
- }
-@@ -2940,14 +2943,17 @@ void dt_develop_blend_process(struct dt_iop_module_t *self, struct dt_dev_pixelp
- // we fill the buffer with 1.0f or 0.0f depending on mask_combine
- const float fill = (d->mask_combine & DEVELOP_COMBINE_INCL) ? 0.0f : 1.0f;
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+ #pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buffsize, mask, fill)
- #endif
- for(size_t i = 0; i < buffsize; i++) mask[i] = fill;
- }
-
- // get parametric mask (if any) and apply global opacity
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(bch, ch, cst, d, oheight, opacity, ivoid, iwidth, \
-+ mask, owidth, ovoid, xoffs, yoffs)
- #endif
- for(size_t y = 0; y < oheight; y++)
- {
-@@ -2985,7 +2991,8 @@ void dt_develop_blend_process(struct dt_iop_module_t *self, struct dt_dev_pixelp
- {
- float *const guide_tmp = dt_alloc_align(64, sizeof(*guide_tmp) * buffsize * ch);
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, guide_tmp, ivoid, iwidth, oheight, owidth, xoffs, yoffs)
- #endif
- for(size_t y = 0; y < oheight; y++)
- {
-@@ -3018,7 +3025,8 @@ void dt_develop_blend_process(struct dt_iop_module_t *self, struct dt_dev_pixelp
- const float e = expf(3.f * d->contrast);
- const float brightness = d->brightness;
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(brightness, buffsize, e, mask, opacity)
- #endif
- for(size_t k = 0; k < buffsize; k++)
- {
-@@ -3047,7 +3055,10 @@ void dt_develop_blend_process(struct dt_iop_module_t *self, struct dt_dev_pixelp
- // select the blend operator
- _blend_row_func *const blend = dt_develop_choose_blend_func(d->blend_mode);
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(bch, blend, blendflag, ch, cst, ivoid, iwidth, mask, \
-+ mask_display, oheight, ovoid, owidth, \
-+ request_mask_display, xoffs, yoffs)
- #endif
- for(size_t y = 0; y < oheight; y++)
- {
-@@ -3224,7 +3235,8 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
- {
- // if we have a mask and this flag is set -> invert the mask
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+ #pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buffsize, mask)
- #endif
- for(size_t i = 0; i < buffsize; i++) mask[i] = 1.0f - mask[i];
- }
-@@ -3235,7 +3247,8 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
- // we fill the buffer with 1.0f or 0.0f depending on mask_combine
- const float fill = (d->mask_combine & DEVELOP_COMBINE_MASKS_POS) ? 0.0f : 1.0f;
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+ #pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buffsize, mask, fill)
- #endif
- for(size_t i = 0; i < buffsize; i++) mask[i] = fill;
- }
-@@ -3244,7 +3257,8 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
- // we fill the buffer with 1.0f or 0.0f depending on mask_combine
- const float fill = (d->mask_combine & DEVELOP_COMBINE_INCL) ? 0.0f : 1.0f;
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+ #pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buffsize, mask, fill)
- #endif
- for(size_t i = 0; i < buffsize; i++) mask[i] = fill;
- }
-diff --git a/src/develop/imageop_math.c b/src/develop/imageop_math.c
-index 3159e14..a10492c 100644
---- a/src/develop/imageop_math.c
-+++ b/src/develop/imageop_math.c
-@@ -67,7 +67,10 @@ void dt_iop_flip_and_zoom_8(const uint8_t *in, int32_t iw, int32_t ih, uint8_t *
- const int32_t offm = half_pixel * bpp * MIN(MIN(0, si), MIN(sj, si + sj));
- const int32_t offM = half_pixel * bpp * MAX(MAX(0, si), MAX(sj, si + sj));
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out, jj, ii, sj, si, iw, ih)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(bpp, half_pixel, ht, offM, offm, scale, wd) \
-+ shared(in, out, jj, ii, sj, si, iw, ih) \
-+ schedule(static)
- #endif
- for(uint32_t j = 0; j < ht; j++)
- {
-@@ -211,7 +214,9 @@ void dt_iop_clip_and_zoom_mosaic_half_size_plain(uint16_t *const out, const uint
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(clut) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(filters, in, in_stride, out, out_stride, px_footprint, rggbx, rggby, roi_in, roi_out) \
-+ shared(clut) schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -270,7 +275,9 @@ void dt_iop_clip_and_zoom_mosaic_half_size_sse2(uint16_t *const out, const uint1
- const int rggbx = trggbx, rggby = trggby;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out, out_stride, px_footprint, rggbx, rggby, roi_in, roi_out, samples) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -479,7 +486,9 @@ void dt_iop_clip_and_zoom_mosaic_half_size_f_plain(float *const out, const float
- const int rggbx = trggbx, rggby = trggby;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out, out_stride, px_footprint, rggbx, rggby, roi_in, roi_out, samples) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -666,7 +675,10 @@ void dt_iop_clip_and_zoom_mosaic_half_size_f_sse2(float *const out, const float
- const int rggbx = trggbx, rggby = trggby;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out, out_stride, px_footprint, rggbx, \
-+ rggby, roi_in, roi_out, samples) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -867,7 +879,9 @@ void dt_iop_clip_and_zoom_mosaic_third_size_xtrans(uint16_t *const out, const ui
- // sample (rounded to nearest input pixel) to anti-alias. Higher MP
- // images need larger filters to avoid artifacts.
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out, out_stride, px_footprint, roi_in, roi_out, xtrans) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -906,7 +920,9 @@ void dt_iop_clip_and_zoom_mosaic_third_size_xtrans_f(float *const out, const flo
- {
- const float px_footprint = 1.f / roi_out->scale;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out, out_stride, px_footprint, roi_in, roi_out, xtrans) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -951,7 +967,10 @@ void dt_iop_clip_and_zoom_demosaic_passthrough_monochrome_f_plain(float *out, co
- const int samples = round(px_footprint);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out_stride, px_footprint, roi_in, roi_out, samples) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -1093,7 +1112,10 @@ void dt_iop_clip_and_zoom_demosaic_passthrough_monochrome_f_sse2(float *out, con
- const int samples = round(px_footprint);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out_stride, px_footprint, roi_in, roi_out, samples) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -1347,7 +1369,10 @@ void dt_iop_clip_and_zoom_demosaic_half_size_f_plain(float *out, const float *co
- const int rggbx = trggbx, rggby = trggby;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out_stride, px_footprint, rggbx, rggby, roi_in, roi_out, samples) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -1523,7 +1548,10 @@ void dt_iop_clip_and_zoom_demosaic_half_size_f_sse2(float *out, const float *con
- const int rggbx = trggbx, rggby = trggby;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, px_footprint, rggbx, rggby, out_stride, roi_in, roi_out, samples) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -1712,7 +1740,10 @@ void dt_iop_clip_and_zoom_demosaic_third_size_xtrans_f(float *out, const float *
- // by non-integer number of samples.
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, in_stride, out_stride, px_footprint, roi_in, roi_out, samples, xtrans) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-diff --git a/src/develop/masks/ellipse.c b/src/develop/masks/ellipse.c
-index f498c84..f422b9c 100644
---- a/src/develop/masks/ellipse.c
-+++ b/src/develop/masks/ellipse.c
-@@ -1631,7 +1631,9 @@ static int dt_ellipse_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop
-
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(iscale, mh, mw, py, px) \
-+ shared(points)
- #else
- #pragma omp parallel for shared(points)
- #endif
-@@ -1689,7 +1691,9 @@ static int dt_ellipse_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop
-
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points, a, b, ta, tb, alpha)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(center, mh, mw) \
-+ shared(points, a, b, ta, tb, alpha)
- #else
- #pragma omp parallel for shared(points, a, b, ta, tb, alpha)
- #endif
-@@ -1721,7 +1725,9 @@ static int dt_ellipse_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop
- // we fill the output buffer by interpolation
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points, buffer)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(h, mw, w) \
-+ shared(points, buffer)
- #else
- #pragma omp parallel for shared(points, buffer)
- #endif
-diff --git a/src/develop/masks/gradient.c b/src/develop/masks/gradient.c
-index 5a22673..f3e5f56 100644
---- a/src/develop/masks/gradient.c
-+++ b/src/develop/masks/gradient.c
-@@ -805,7 +805,9 @@ static int dt_gradient_get_mask(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t
-
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(mh, mw, px, py) \
-+ shared(points)
- #else
- #pragma omp parallel for shared(points)
- #endif
-@@ -849,7 +851,9 @@ static int dt_gradient_get_mask(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t
-
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(cosv, hwscale, normf, offset, mh, mw, steepness, sinv) \
-+ shared(points)
- #else
- #pragma omp parallel for shared(points)
- #endif
-@@ -879,7 +883,9 @@ static int dt_gradient_get_mask(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t
- // we fill the mask buffer by interpolation
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points, buffer)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(h, mw, w) \
-+ shared(points, buffer)
- #else
- #pragma omp parallel for shared(points, buffer)
- #endif
-@@ -933,7 +939,9 @@ static int dt_gradient_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_io
-
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(iscale, mh, mw, py, px) \
-+ shared(points)
- #else
- #pragma omp parallel for shared(points)
- #endif
-@@ -979,7 +987,9 @@ static int dt_gradient_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_io
-
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(cosv, hwscale, mh, mw, normf, offset, sinv, steepness) \
-+ shared(points)
- #else
- #pragma omp parallel for shared(points)
- #endif
-@@ -1002,7 +1012,9 @@ static int dt_gradient_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_io
- // we fill the mask buffer by interpolation
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points, buffer)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(h, mw, w) \
-+ shared(points, buffer)
- #else
- #pragma omp parallel for shared(points, buffer)
- #endif
-diff --git a/src/develop/masks/group.c b/src/develop/masks/group.c
-index f5b1ec8..df9c434 100644
---- a/src/develop/masks/group.c
-+++ b/src/develop/masks/group.c
-@@ -549,7 +549,9 @@ static int dt_group_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t
- {
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(bufs)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(bufs)
- #else
- #pragma omp parallel for shared(bufs)
- #endif
-@@ -566,7 +568,9 @@ static int dt_group_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t
- {
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(bufs, buffer)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, op, width) \
-+ shared(bufs, buffer)
- #else
- #pragma omp parallel for shared(bufs, buffer)
- #endif
-@@ -582,7 +586,9 @@ static int dt_group_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t
- {
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(bufs, buffer)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, op, width) \
-+ shared(bufs, buffer)
- #else
- #pragma omp parallel for shared(bufs, buffer)
- #endif
-@@ -603,7 +609,9 @@ static int dt_group_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t
- {
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(bufs, buffer)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, op, width) \
-+ shared(bufs, buffer)
- #else
- #pragma omp parallel for shared(bufs, buffer)
- #endif
-@@ -621,7 +629,9 @@ static int dt_group_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t
- {
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(bufs, buffer)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, op, width) \
-+ shared(bufs, buffer)
- #else
- #pragma omp parallel for shared(bufs, buffer)
- #endif
-@@ -642,7 +652,9 @@ static int dt_group_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t
- {
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(bufs, buffer)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, op, width) \
-+ shared(bufs, buffer)
- #else
- #pragma omp parallel for shared(bufs, buffer)
- #endif
-diff --git a/src/develop/masks/circle.c b/src/develop/masks/circle.c
-index 7ba7edb..424d890 100644
---- a/src/develop/masks/circle.c
-+++ b/src/develop/masks/circle.c
-@@ -937,7 +937,9 @@ static int dt_circle_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_
-
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(iscale, mh, mw, px, py) \
-+ shared(points)
- #else
- #pragma omp parallel for shared(points)
- #endif
-@@ -976,7 +978,9 @@ static int dt_circle_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_
-
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(center, mh, mw, radius2, total2) \
-+ shared(points)
- #else
- #pragma omp parallel for shared(points)
- #endif
-@@ -1003,7 +1007,9 @@ static int dt_circle_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_
- // we fill the output buffer by interpolation
- #ifdef _OPENMP
- #if !defined(__SUNOS__) && !defined(__NetBSD__)
--#pragma omp parallel for default(none) shared(points, buffer)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(h, mw, w) \
-+ shared(points, buffer)
- #else
- #pragma omp parallel for shared(points, buffer)
- #endif
-diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
-index 290c4cd..97057b3 100644
---- a/src/develop/pixelpipe_hb.c
-+++ b/src/develop/pixelpipe_hb.c
-@@ -689,7 +689,10 @@ static int dt_dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, dt_develop_t *
- const int cp_height = MIN(roi_out->height, pipe->iheight - in_y);
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(pipe, roi_out, roi_in, output)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(bpp, cp_height, cp_width, in_x, in_y) \
-+ shared(pipe, roi_out, roi_in, output) \
-+ schedule(static)
- #endif
- for(int j = 0; j < cp_height; j++)
- memcpy(((char *)*output) + (size_t)bpp * j * roi_out->width,
-@@ -791,7 +794,10 @@ static int dt_dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, dt_develop_t *
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(roi_out, roi_in, output, input)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in_bpp, out_bpp) \
-+ shared(roi_out, roi_in, output, input) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- memcpy(((char *)*output) + (size_t)out_bpp * j * roi_out->width,
-@@ -800,7 +806,10 @@ static int dt_dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, dt_develop_t *
- }
- #else // don't HAVE_OPENCL
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(roi_out, roi_in, output, input)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in_bpp, out_bpp) \
-+ shared(roi_out, roi_in, output, input) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- memcpy(((char *)*output) + (size_t)out_bpp * j * roi_out->width,
-diff --git a/src/develop/tiling.c b/src/develop/tiling.c
-index fdb5261..c8b363a 100644
---- a/src/develop/tiling.c
-+++ b/src/develop/tiling.c
-@@ -753,7 +753,10 @@ static void _default_process_tiling_ptp(struct dt_iop_module_t *self, struct dt_
-
- /* prepare input tile buffer */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(input, width, ioffs) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ht, in_bpp, ipitch, ivoid, wd) \
-+ shared(input, width, ioffs) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < ht; j++)
- memcpy((char *)input + j * wd * in_bpp, (char *)ivoid + ioffs + j * ipitch, (size_t)wd * in_bpp);
-@@ -794,7 +797,10 @@ static void _default_process_tiling_ptp(struct dt_iop_module_t *self, struct dt_
-
- /* copy "good" part of tile to output buffer */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(ooffs, output, width, origin, region) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(opitch, out_bpp, ovoid, wd) \
-+ shared(ooffs, output, width, origin, region) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < region[1]; j++)
- memcpy((char *)ovoid + ooffs + j * opitch,
-@@ -1096,7 +1102,10 @@ static void _default_process_tiling_roi(struct dt_iop_module_t *self, struct dt_
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(input, ioffs, iroi_full) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in_bpp, ipitch, ivoid) \
-+ shared(input, ioffs, iroi_full) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < iroi_full.height; j++)
- memcpy((char *)input + j * iroi_full.width * in_bpp, (char *)ivoid + ioffs + j * ipitch,
-@@ -1125,7 +1134,10 @@ static void _default_process_tiling_roi(struct dt_iop_module_t *self, struct dt_
- const int origin_x = oroi_good.x - oroi_full.x;
- const int origin_y = oroi_good.y - oroi_full.y;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(ooffs, output, oroi_good, oroi_full) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(opitch, origin_x, origin_y, out_bpp, ovoid) \
-+ shared(ooffs, output, oroi_good, oroi_full) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < oroi_good.height; j++)
- memcpy((char *)ovoid + ooffs + j * opitch,
-@@ -1408,7 +1420,10 @@ static int _default_process_tiling_cl_ptp(struct dt_iop_module_t *self, struct d
- {
- /* prepare pinned input tile buffer: copy part of input image */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(input_buffer, width, ioffs, wd, ht) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in_bpp, ipitch, ivoid) \
-+ shared(input_buffer, width, ioffs, wd, ht) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < ht; j++)
- memcpy((char *)input_buffer + j * wd * in_bpp, (char *)ivoid + ioffs + j * ipitch,
-@@ -1863,7 +1878,9 @@ static int _default_process_tiling_cl_roi(struct dt_iop_module_t *self, struct d
- {
- /* prepare pinned input tile buffer: copy part of input image */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(input_buffer, width, ioffs, iroi_full) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in_bpp, ipitch, ivoid) \
-+ shared(input_buffer, width, ioffs, iroi_full) schedule(static)
- #endif
- for(size_t j = 0; j < iroi_full.height; j++)
- memcpy((char *)input_buffer + j * iroi_full.width * in_bpp, (char *)ivoid + ioffs + j * ipitch,
-@@ -1910,8 +1927,10 @@ static int _default_process_tiling_cl_roi(struct dt_iop_module_t *self, struct d
-
- /* copy "good" part of tile from pinned output buffer to output image */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(ooffs, output_buffer, oroi_full, oorigin, \
-- oregion) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ipitch, opitch, ovoid, out_bpp) \
-+ shared(ooffs, output_buffer, oroi_full, oorigin, oregion) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < oregion[1]; j++)
- memcpy((char *)ovoid + ooffs + j * opitch,
-diff --git a/src/gui/draw.h b/src/gui/draw.h
-index 50d4550..dcaaf21 100644
---- a/src/gui/draw.h
-+++ b/src/gui/draw.h
-@@ -24,6 +24,7 @@
- #include "config.h"
- #endif
-
-+#include "common/darktable.h"
- #include "common/curve_tools.h"
- #include <cairo.h>
- #include <glib.h>
-@@ -229,14 +230,20 @@ static inline void dt_draw_curve_calc_values(dt_draw_curve_t *c, const float min
- if(x)
- {
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) shared(x) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(res) \
-+ shared(x) \
-+ schedule(static)
- #endif
- for(int k = 0; k < res; k++) x[k] = k * (1.0f / res);
- }
- if(y)
- {
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) shared(y, c) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(min, max, res) \
-+ shared(y, c) \
-+ schedule(static)
- #endif
- for(int k = 0; k < res; k++) y[k] = min + (max - min) * c->csample.m_Samples[k] * (1.0f / 0x10000);
- }
-diff --git a/src/iop/hotpixels.c b/src/iop/hotpixels.c
-index 0c07f7d..74bc0db 100644
---- a/src/iop/hotpixels.c
-+++ b/src/iop/hotpixels.c
-@@ -117,7 +117,11 @@ static int process_bayer(const dt_iop_hotpixels_data_t *data,
- int fixed = 0;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) reduction(+ : fixed) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ivoid, markfixed, min_neighbours, multiplier, ovoid, \
-+ roi_out, threshold, width, widthx2) \
-+ reduction(+ : fixed) \
-+ schedule(static)
- #endif
- for(int row = 2; row < roi_out->height - 2; row++)
- {
-@@ -214,7 +218,12 @@ static int process_xtrans(const dt_iop_hotpixels_data_t *data,
- int fixed = 0;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(offsets) reduction(+ : fixed) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ivoid, markfixed, min_neighbours, multiplier, ovoid, \
-+ roi_out, threshold, xtrans, width) \
-+ shared(offsets) \
-+ reduction(+ : fixed) \
-+ schedule(static)
- #endif
- for(int row = 2; row < roi_out->height - 2; row++)
- {
-diff --git a/src/iop/lens.c b/src/iop/lens.c
-index ff0a72a..d9fea09 100644
---- a/src/iop/lens.c
-+++ b/src/iop/lens.c
-@@ -370,7 +370,11 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- void *buf = dt_alloc_align(16, bufsize * dt_get_num_threads() * sizeof(float));
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf, modifier) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(bufsize, ch, ch_width, d, interpolation, ivoid, \
-+ mask_display, ovoid, roi_in, roi_out) \
-+ shared(buf, modifier) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -424,7 +428,10 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- if(modflags & LF_MODIFY_VIGNETTING)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(modifier) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, pixelformat, roi_out, ovoid) \
-+ shared(modifier) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -446,7 +453,10 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- if(modflags & LF_MODIFY_VIGNETTING)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf, modifier) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, pixelformat, roi_in) \
-+ shared(buf, modifier) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_in->height; y++)
- {
-@@ -465,7 +475,10 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- void *buf2 = dt_alloc_align(16, buf2size * sizeof(float) * dt_get_num_threads());
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf2, buf, modifier) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buf2size, ch, ch_width, d, interpolation, mask_display, ovoid, roi_in, roi_out) \
-+ shared(buf2, buf, modifier) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -616,7 +629,10 @@ int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_m
- if(modflags & (LF_MODIFY_TCA | LF_MODIFY_DISTORTION | LF_MODIFY_GEOMETRY | LF_MODIFY_SCALE))
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(tmpbuf, d, modifier) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(tmpbufwidth, roi_out) \
-+ shared(tmpbuf, d, modifier) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -652,7 +668,10 @@ int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_m
- if(modflags & LF_MODIFY_VIGNETTING)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(tmpbuf, modifier, d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, pixelformat, roi_out) \
-+ shared(tmpbuf, modifier, d) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -691,7 +710,10 @@ int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_m
- if(modflags & LF_MODIFY_VIGNETTING)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(tmpbuf, modifier, d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, pixelformat, roi_in) \
-+ shared(tmpbuf, modifier, d) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_in->height; y++)
- {
-@@ -725,7 +747,10 @@ int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_m
- if(modflags & (LF_MODIFY_TCA | LF_MODIFY_DISTORTION | LF_MODIFY_GEOMETRY | LF_MODIFY_SCALE))
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(tmpbuf, d, modifier) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(tmpbufwidth, roi_out) \
-+ shared(tmpbuf, d, modifier) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -888,7 +913,10 @@ void modify_roi_in(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *
- float *const buf = dt_alloc_align(16, nbpoints * 2 * 3 * sizeof(float));
-
- #ifdef _OPENMP
--#pragma omp parallel default(none) shared(modifier) reduction(min : xm, ym) reduction(max : xM, yM)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(aheight, awidth, buf, height, nbpoints, width, xoff, \
-+ xstep, yoff, ystep) \
-+ shared(modifier) reduction(min : xm, ym) reduction(max : xM, yM)
- #endif
- {
- #ifdef _OPENMP
-diff --git a/src/develop/imageop_math.h b/src/develop/imageop_math.h
-index 7203fa7..0b4acf8 100644
---- a/src/develop/imageop_math.h
-+++ b/src/develop/imageop_math.h
-@@ -158,7 +158,10 @@ static inline float dt_iop_eval_exp(const float *const coeff, const float x)
- static inline void dt_iop_alpha_copy(const void *ivoid, void *ovoid, const int width, const int height)
- {
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(ovoid, ivoid)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(ovoid, ivoid) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-diff --git a/src/iop/hazeremoval.c b/src/iop/hazeremoval.c
-index 409e8d2..772a9ed 100644
---- a/src/iop/hazeremoval.c
-+++ b/src/iop/hazeremoval.c
-@@ -369,7 +369,9 @@ static void box_max(const gray_image img1, const gray_image img2, const int w)
- if(img1.data == img2.data)
- {
- #ifdef _OPENMP
--#pragma omp parallel default(none) private(img2_bak)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(img1, img2, w) \
-+ private(img2_bak)
- #endif
- {
- img2_bak = new_gray_image(img2.width, 1);
-@@ -387,7 +389,9 @@ static void box_max(const gray_image img1, const gray_image img2, const int w)
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel default(none) private(img2_bak)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(img1, img2, w) \
-+ private(img2_bak)
- #endif
- {
- #ifdef _OPENMP
-@@ -398,7 +402,9 @@ static void box_max(const gray_image img1, const gray_image img2, const int w)
- }
- }
- #ifdef _OPENMP
--#pragma omp parallel default(none) private(img2_bak)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(img1, img2, w) \
-+ private(img2_bak)
- #endif
- {
- img2_bak = new_gray_image(1, img2.height);
-@@ -440,7 +446,9 @@ static void box_min(const gray_image img1, const gray_image img2, const int w)
- if(img1.data == img2.data)
- {
- #ifdef _OPENMP
--#pragma omp parallel default(none) private(img2_bak)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(img1, img2, w) \
-+ private(img2_bak)
- #endif
- {
- img2_bak = new_gray_image(img2.width, 1);
-@@ -458,7 +466,9 @@ static void box_min(const gray_image img1, const gray_image img2, const int w)
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel default(none) private(img2_bak)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(img1, img2, w) \
-+ private(img2_bak)
- #endif
- {
- #ifdef _OPENMP
-@@ -469,7 +479,9 @@ static void box_min(const gray_image img1, const gray_image img2, const int w)
- }
- }
- #ifdef _OPENMP
--#pragma omp parallel default(none) private(img2_bak)
-+#pragma omp parallel default(none) \
-+ dt_omp_firstprivate(img1, img2, w) \
-+ private(img2_bak)
- #endif
- {
- img2_bak = new_gray_image(1, img2.height);
-@@ -490,7 +502,9 @@ static void dark_channel(const const_rgb_image img1, const gray_image img2, cons
- {
- const size_t size = (size_t)img1.height * img1.width;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(img1, img2, size) \
-+ schedule(static)
- #endif
- for(size_t i = 0; i < size; i++)
- {
-@@ -509,7 +523,9 @@ static void transition_map(const const_rgb_image img1, const gray_image img2, co
- {
- const size_t size = (size_t)img1.height * img1.width;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(A0, img1, img2, size, strength) \
-+ schedule(static)
- #endif
- for(size_t i = 0; i < size; i++)
- {
-@@ -782,7 +798,10 @@ static float ambient_light(const const_rgb_image img, int w1, rgb_pixel *pA0)
- size_t N_bright_hazy = 0;
- const float *const data = dark_ch.data;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) reduction(+ : N_bright_hazy, A0_r, A0_g, A0_b)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(crit_brightness, crit_haze_level, data, img, size) \
-+ schedule(static) \
-+ reduction(+ : N_bright_hazy, A0_r, A0_g, A0_b)
- #endif
- for(size_t i = 0; i < size; i++)
- {
-@@ -898,7 +917,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const gray_image c_trans_map_filtered = trans_map_filtered;
- #ifdef _OPENMP
- // use dynamic load ballancing as tiles may have varying size
--#pragma omp parallel for default(none) schedule(dynamic) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(c_trans_map, c_trans_map_filtered, height, img_in, w2, eps, tile_width, width) \
-+ schedule(dynamic) \
-+ collapse(2)
- #endif
- for(int j = 0; j < height; j += tile_width)
- {
-@@ -914,7 +936,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- = fmaxf(expf(-distance * distance_max), 1.f / 1024); // minimum allowed value for transition map
- const float *const c_A0 = A0;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(c_A0, c_trans_map_filtered, img_in, img_out, size, t_min) \
-+ schedule(static)
- #endif
- for(size_t i = 0; i < size; i++)
- {
-diff --git a/src/iop/ashift.c b/src/iop/ashift.c
-index 1245178..8650cf9 100644
---- a/src/iop/ashift.c
-+++ b/src/iop/ashift.c
-@@ -878,7 +878,10 @@ int distort_transform(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, floa
- const float cy = fullheight * data->ct;
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(points, points_count, homograph)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(cx, cy) \
-+ shared(points, points_count, homograph) \
-+ schedule(static)
- #endif
- for(size_t i = 0; i < points_count * 2; i += 2)
- {
-@@ -912,7 +915,10 @@ int distort_backtransform(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece,
- const float cy = fullheight * data->ct;
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(points, points_count, ihomograph)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(cx, cy) \
-+ shared(points, points_count, ihomograph) \
-+ schedule(static)
- #endif
- for(size_t i = 0; i < points_count * 2; i += 2)
- {
-@@ -1062,7 +1068,10 @@ static void rgb2grey256(const float *in, double *out, const int width, const int
- const int ch = 4;
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -1089,7 +1098,10 @@ static void edge_enhance_1d(const double *in, double *out, const int width, cons
- const double *kernel = (dir == ASHIFT_ENHANCE_HORIZONTAL) ? (const double *)hkernel : (const double *)vkernel;
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out, kernel)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(in, out, kernel) \
-+ schedule(static)
- #endif
- // loop over image pixels and perform sobel convolution
- for(int j = khwidth; j < height - khwidth; j++)
-@@ -1113,7 +1125,10 @@ static void edge_enhance_1d(const double *in, double *out, const int width, cons
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(out) \
-+ schedule(static)
- #endif
- // border fill in output buffer, so we don't get pseudo lines at image frame
- for(int j = 0; j < height; j++)
-@@ -1155,7 +1170,10 @@ static int edge_enhance(const double *in, double *out, const int width, const in
-
- // calculate absolute values
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(Gx, Gy, out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(Gx, Gy, out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)width * height; k++)
- {
-@@ -1202,7 +1220,10 @@ static int detail_enhance(const float *in, float *out, const int width, const in
-
- // convert RGB input to Lab, use output buffer for intermediate storage
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -1231,7 +1252,10 @@ static int detail_enhance(const float *in, float *out, const int width, const in
-
- // convert resulting Lab to RGB output
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -1251,7 +1275,10 @@ static int detail_enhance(const float *in, float *out, const int width, const in
- static void gamma_correct(const float *in, float *out, const int width, const int height)
- {
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(in, out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -2835,7 +2862,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(ihomograph, interpolation)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ch_width, cx, cy, ivoid, ovoid, roi_in, roi_out) \
-+ shared(ihomograph, interpolation) \
-+ schedule(static)
- #endif
- // go over all pixels of output image
- for(int j = 0; j < roi_out->height; j++)
-diff --git a/src/iop/globaltonemap.c b/src/iop/globaltonemap.c
-index 45493e3..e71e744 100644
---- a/src/iop/globaltonemap.c
-+++ b/src/iop/globaltonemap.c
-@@ -139,7 +139,10 @@ static inline void process_reinhard(struct dt_iop_module_t *self, dt_dev_pixelpi
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out, data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, roi_out) \
-+ shared(in, out, data) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -216,7 +219,10 @@ static inline void process_drago(struct dt_iop_module_t *self, dt_dev_pixelpipe_
- const float bl = logf(fmaxf(eps, data->drago.bias)) / logf(0.5);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out, lwmax) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, bl, ldc, roi_out) \
-+ shared(in, out, lwmax) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -240,7 +246,10 @@ static inline void process_filmic(struct dt_iop_module_t *self, dt_dev_pixelpipe
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out, data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, roi_out) \
-+ shared(in, out, data) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-diff --git a/src/iop/colorcontrast.c b/src/iop/colorcontrast.c
-index f88dd15..b7de39d 100644
---- a/src/iop/colorcontrast.c
-+++ b/src/iop/colorcontrast.c
-@@ -153,7 +153,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- if(d->unbound)
- {
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, d, in, out, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -166,7 +168,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, d, in, out, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -200,7 +204,11 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
-
- // iterate over all output pixels (same coordinates as input)
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(d)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, max, min, offset, ovoid, roi_in, roi_out, \
-+ scale, unbound) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-diff --git a/src/iop/colorize.c b/src/iop/colorize.c
-index 23f0340..5fb0db7 100644
---- a/src/iop/colorize.c
-+++ b/src/iop/colorize.c
-@@ -144,7 +144,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const float Lmlmix = L - (mix * 100.0f) / 2.0f;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) private(in, out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(a, b, ch, ivoid, Lmlmix, mix, ovoid, roi_out) \
-+ private(in, out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/nlmeans.c b/src/iop/nlmeans.c
-index a9c1e5d..d3e6cd7 100644
---- a/src/iop/nlmeans.c
-+++ b/src/iop/nlmeans.c
-@@ -410,7 +410,11 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- // do this in parallel with a little threading overhead. could parallelize the outer loops with a bit more
- // memory
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) firstprivate(inited_slide) shared(kj, ki, Sa)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ivoid, norm2, ovoid, P, roi_in, roi_out, sharpness) \
-+ firstprivate(inited_slide) \
-+ shared(kj, ki, Sa) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -496,7 +500,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- float *const out = ((float *const)ovoid);
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, in, invert, out, roi_out, weight) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -554,7 +561,11 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- // do this in parallel with a little threading overhead. could parallelize the outer loops with a bit more
- // memory
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) firstprivate(inited_slide) shared(kj, ki, Sa)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ivoid, norm2, ovoid, P, roi_in, roi_out, sharpness) \
-+ firstprivate(inited_slide) \
-+ shared(kj, ki, Sa) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -686,7 +697,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 weight = _mm_set_ps(1.0f, d->chroma, d->chroma, d->luma);
- const __m128 invert = _mm_sub_ps(_mm_set1_ps(1.0f), weight);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(d)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(invert, ivoid, ovoid, roi_out, weight) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-diff --git a/src/iop/lowlight.c b/src/iop/lowlight.c
-index 59b56e9..24cf8a6 100644
---- a/src/iop/lowlight.c
-+++ b/src/iop/lowlight.c
-@@ -132,7 +132,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- dt_Lab_to_XYZ(Lab_sw, XYZ_sw);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(d, XYZ_sw)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, i, o, roi_out) \
-+ shared(d, XYZ_sw) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-diff --git a/src/iop/overexposed.c b/src/iop/overexposed.c
-index 2969f2b..a417ec7 100644
---- a/src/iop/overexposed.c
-+++ b/src/iop/overexposed.c
-@@ -125,7 +125,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- float *const out = (float *const)ovoid;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, in, lower, lower_color, out, roi_out, \
-+ upper, upper_color) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -174,7 +177,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 lower_color = _mm_load_ps(dt_iop_overexposed_colors[colorscheme][1]);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, lower, lower_color, ovoid, roi_out, \
-+ mupper, mlower, upper, upper_color) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/clipping.c b/src/iop/clipping.c
-index 3e72040..42bb0ca 100644
---- a/src/iop/clipping.c
-+++ b/src/iop/clipping.c
-@@ -844,7 +844,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- && roi_in->height == roi_out->height)
- {
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(d)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, ovoid, roi_out) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -870,8 +873,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- keystone_get_matrix(k_space, kxa, kxb, kxc, kxd, kya, kyb, kyc, kyd, &ma, &mb, &md, &me, &mg, &mh);
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(d, interpolation, k_space, ma, mb, md, me, \
-- mg, mh)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ch_width, ivoid, kxa, kya, ovoid, roi_in, roi_out) \
-+ shared(d, interpolation, k_space, ma, mb, md, me, mg, mh) \
-+ schedule(static)
- #endif
- // (slow) point-by-point transformation.
- // TODO: optimize with scanlines and linear steps between?
-diff --git a/src/iop/rawdenoise.c b/src/iop/rawdenoise.c
-index 0a4773f..86173b0 100644
---- a/src/iop/rawdenoise.c
-+++ b/src/iop/rawdenoise.c
-@@ -235,7 +235,10 @@ static void wavelet_denoise(const float *const in, float *const out, const dt_io
- const int halfheight = roi->height / 2 + (roi->height & (~c) & 1);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(c) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, fimg, roi, size, halfwidth) \
-+ shared(c) \
-+ schedule(static)
- #endif
- for(int row = c & 1; row < roi->height; row += 2)
- {
-@@ -255,7 +258,10 @@ static void wavelet_denoise(const float *const in, float *const out, const dt_io
-
- // filter horizontally and transpose
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(lev) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(fimg, halfheight, halfwidth, pass1, pass2) \
-+ shared(lev) \
-+ schedule(static)
- #endif
- for(int col = 0; col < halfwidth; col++)
- {
-@@ -264,7 +270,10 @@ static void wavelet_denoise(const float *const in, float *const out, const dt_io
- }
- // filter vertically and transpose back
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(lev) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(fimg, halfheight, halfwidth, pass2, pass3) \
-+ shared(lev) \
-+ schedule(static)
- #endif
- for(int row = 0; row < halfheight; row++)
- {
-@@ -274,7 +283,9 @@ static void wavelet_denoise(const float *const in, float *const out, const dt_io
-
- const float thold = threshold * noise[lev];
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(lev)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(fimg, halfheight, halfwidth, pass1, pass3, thold) \
-+ shared(lev)
- #endif
- for(size_t i = 0; i < (size_t)halfwidth * halfheight; i++)
- {
-@@ -286,7 +297,10 @@ static void wavelet_denoise(const float *const in, float *const out, const dt_io
- lastpass = pass3;
- }
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(c, lastpass) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(fimg, halfwidth, out, roi) \
-+ shared(c, lastpass) \
-+ schedule(static)
- #endif
- for(int row = c & 1; row < roi->height; row += 2)
- {
-@@ -390,7 +404,10 @@ static void wavelet_denoise_xtrans(const float *const in, float *out, const dt_i
- memset(fimg, 0, size * sizeof(float));
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(c) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(fimg, height, in, roi, size, width, xtrans) \
-+ shared(c) \
-+ schedule(static)
- #endif
- for(int row = (c != 1); row < height - 1; row++)
- {
-@@ -423,20 +440,28 @@ static void wavelet_denoise_xtrans(const float *const in, float *out, const dt_i
-
- // filter horizontally and transpose
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(lev) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(fimg, height, pass1, pass2, width) \
-+ shared(lev) \
-+ schedule(static)
- #endif
- for(int col = 0; col < width; col++)
- hat_transform(fimg + pass2 + (size_t)col * height, fimg + pass1 + col, width, height, 1 << lev);
- // filter vertically and transpose back
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(lev) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(fimg, height, pass2, pass3, width) \
-+ shared(lev) \
-+ schedule(static)
- #endif
- for(int row = 0; row < height; row++)
- hat_transform(fimg + pass3 + (size_t)row * width, fimg + pass2 + row, height, width, 1 << lev);
-
- const float thold = threshold * noise[lev];
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(lev)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(fimg, pass1, pass3, size, thold) \
-+ shared(lev)
- #endif
- for(size_t i = 0; i < size; i++)
- {
-@@ -449,7 +474,10 @@ static void wavelet_denoise_xtrans(const float *const in, float *out, const dt_i
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(c, lastpass, out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, fimg, roi, width, xtrans) \
-+ shared(c, lastpass, out) \
-+ schedule(static)
- #endif
- for(int row = 0; row < height; row++)
- {
-diff --git a/src/iop/colorchecker.c b/src/iop/colorchecker.c
-index 1218a59..a32557d 100644
---- a/src/iop/colorchecker.c
-+++ b/src/iop/colorchecker.c
-@@ -380,7 +380,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const dt_iop_colorchecker_data_t *const data = (dt_iop_colorchecker_data_t *)piece->data;
- const int ch = piece->colors;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, data, ivoid, ovoid, roi_in, roi_out) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j=0;j<roi_out->height;j++)
- {
-diff --git a/src/iop/colorzones.c b/src/iop/colorzones.c
-index d798cf4..604b583 100644
---- a/src/iop/colorzones.c
-+++ b/src/iop/colorzones.c
-@@ -203,7 +203,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- dt_iop_colorzones_data_t *d = (dt_iop_colorzones_data_t *)(piece->data);
- const int ch = piece->colors;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(d)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, i, o, roi_out) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-diff --git a/src/iop/colorout.c b/src/iop/colorout.c
-index e9108a0..be9995b 100644
---- a/src/iop/colorout.c
-+++ b/src/iop/colorout.c
-@@ -292,7 +292,9 @@ static void process_fastpath_apply_tonecurves(struct dt_iop_module_t *self, dt_d
- { // apply profile
- float *const out = (float *const)ovoid;
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, out, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -307,7 +309,9 @@ static void process_fastpath_apply_tonecurves(struct dt_iop_module_t *self, dt_d
- { // apply profile
- float *const out = (float *const)ovoid;
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, out, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -340,7 +344,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- // fprintf(stderr,"Using cmatrix codepath\n");
- // convert to rgb using matrix
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -366,7 +372,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- {
- // fprintf(stderr,"Using xform codepath\n");
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, gamutcheck, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -410,7 +418,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- // fprintf(stderr,"Using cmatrix codepath\n");
- // convert to rgb using matrix
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(d, ch, ivoid, ovoid, roi_in, roi_out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -441,7 +451,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- // fprintf(stderr,"Using xform codepath\n");
- const __m128 outofgamutpixel = _mm_set_ps(0.0f, 1.0f, 1.0f, 0.0f);
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, gamutcheck, outofgamutpixel, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/bilateral.cc b/src/iop/bilateral.cc
-index 79cf150..2a0cac7 100644
---- a/src/iop/bilateral.cc
-+++ b/src/iop/bilateral.cc
-@@ -144,7 +144,11 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- float *const weights_buf = (float *)malloc(weights_size * dt_get_num_threads() * sizeof(float));
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(m, mat, isig2col) private(in, out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, ovoid, rad, roi_in, roi_out, wd, weights_buf) \
-+ shared(m, mat, isig2col) \
-+ private(in, out) \
-+ schedule(static)
- #endif
- for(int j = rad; j < roi_out->height - rad; j++)
- {
-diff --git a/src/iop/colorbalance.c b/src/iop/colorbalance.c
-index 19270e4..a966442 100644
---- a/src/iop/colorbalance.c
-+++ b/src/iop/colorbalance.c
-@@ -334,7 +334,11 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- (gamma[2] != 0.0) ? 1.0 / gamma[2] : 1000000.0 };
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) shared(d) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, gain, gamma_inv, lift, ivoid, ovoid, roi_in, \
-+ roi_out) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_in->width * roi_out->height; k += ch)
- {
-@@ -381,7 +385,12 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- (gamma[2] != 0.0) ? 1.0 / gamma[2] : 1000000.0 };
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) shared(d) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, contrast, gain, gamma_inv, grey, ivoid, lift, \
-+ ovoid, roi_in, roi_out, run_contrast, \
-+ run_saturation, run_saturation_out) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_in->width * roi_out->height; k += ch)
- {
-@@ -445,7 +454,12 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- (2.0f - d->gamma[CHANNEL_BLUE]) * (2.0f - d->gamma[CHANNEL_FACTOR])};
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) shared(d) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, contrast, gain, gamma, grey, ivoid, lift, ovoid, \
-+ roi_in, roi_out, run_contrast, run_saturation, \
-+ run_saturation_out) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_in->width * roi_out->height; k += ch)
- {
-@@ -543,7 +557,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- 0.0f);
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, gain, gamma_inv, ivoid, lift, one, ovoid, roi_in, roi_out, zero) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_in->width * roi_out->height; k += ch)
- {
-@@ -591,7 +607,13 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 gamma_inv_RGB = _mm_set1_ps(1.0f/2.2f);
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD()default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, contrast, gain, gamma_inv, gamma_inv_RGB, \
-+ gamma_RGB, grey, ivoid, lift, one, ovoid, roi_in, \
-+ roi_out, run_contrast, run_saturation, \
-+ run_saturation_out, saturation, saturation_out, \
-+ zero) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_in->width * roi_out->height; k += ch)
- {
-@@ -659,7 +681,12 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- 0.0f);
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, contrast, gain, gamma, grey, ivoid, lift, ovoid, \
-+ roi_in, roi_out, run_contrast, run_saturation, \
-+ run_saturation_out, saturation, saturation_out, \
-+ zero) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_in->width * roi_out->height; k += ch)
- {
-diff --git a/src/iop/equalizer_eaw.h b/src/iop/equalizer_eaw.h
-index 1d58e24..e2afc51 100644
---- a/src/iop/equalizer_eaw.h
-+++ b/src/iop/equalizer_eaw.h
-@@ -43,7 +43,11 @@ static void dt_iop_equalizer_wtf(float *buf, float **weight_a, const int l, cons
-
- float *const tmp_width_buf = (float *)malloc(width * dt_get_num_threads() * sizeof(float));
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(weight_a, buf) private(ch) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, l, st, step, tmp_width_buf, wd, width) \
-+ shared(weight_a, buf) \
-+ private(ch) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -73,7 +77,11 @@ static void dt_iop_equalizer_wtf(float *buf, float **weight_a, const int l, cons
-
- float *const tmp_height_buf = (float *)malloc(height * dt_get_num_threads() * sizeof(float));
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(weight_a, buf) private(ch) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, l, st, step, tmp_height_buf, wd, width) \
-+ shared(weight_a, buf) \
-+ private(ch) \
-+ schedule(static)
- #endif
- for(int i = 0; i < width; i++)
- {
-@@ -110,7 +118,10 @@ static void dt_iop_equalizer_iwtf(float *buf, float **weight_a, const int l, con
-
- float *const tmp_height_buf = (float *)malloc(height * dt_get_num_threads() * sizeof(float));
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(weight_a, buf) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, l, st, step, tmp_height_buf, wd, width) \
-+ shared(weight_a, buf) \
-+ schedule(static)
- #endif
- for(int i = 0; i < width; i++)
- {
-@@ -139,7 +150,10 @@ static void dt_iop_equalizer_iwtf(float *buf, float **weight_a, const int l, con
-
- float *const tmp_width_buf = (float *)malloc(width * dt_get_num_threads() * sizeof(float));
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(weight_a, buf) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, l, st, step, tmp_width_buf, wd, width) \
-+ shared(weight_a, buf) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-diff --git a/src/iop/rotatepixels.c b/src/iop/rotatepixels.c
-index 519180a..eacc681 100644
---- a/src/iop/rotatepixels.c
-+++ b/src/iop/rotatepixels.c
-@@ -257,7 +257,10 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- const struct dt_interpolation *interpolation = dt_interpolation_new(DT_INTERPOLATION_USERPREF);
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(piece, interpolation)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ch_width, ivoid, ovoid, roi_in, roi_out, scale) \
-+ shared(piece, interpolation) \
-+ schedule(static)
- #endif
- // (slow) point-by-point transformation.
- // TODO: optimize with scanlines and linear steps between?
-diff --git a/src/iop/vibrance.c b/src/iop/vibrance.c
-index c9eaafd..f3fe554 100644
---- a/src/iop/vibrance.c
-+++ b/src/iop/vibrance.c
-@@ -101,7 +101,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const float amount = (d->amount * 0.01);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(amount, ch, roi_out) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/colorreconstruction.c b/src/iop/colorreconstruction.c
-index a5e3ffe..ac7e98a 100644
---- a/src/iop/colorreconstruction.c
-+++ b/src/iop/colorreconstruction.c
-@@ -365,7 +365,9 @@ static void dt_iop_colorreconstruct_bilateral_splat(dt_iop_colorreconstruct_bila
-
- // splat into downsampled grid
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(b, precedence, params)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, threshold) \
-+ shared(b, precedence, params)
- #endif
- for(int j = 0; j < b->height; j++)
- {
-@@ -439,7 +441,9 @@ static void blur_line(dt_iop_colorreconstruct_Lab_t *buf, const int offset1, con
- const float w1 = 4.f / 16.f;
- const float w2 = 1.f / 16.f;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(buf)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(offset1, offset2, offset3, size1, size2, size3) \
-+ shared(buf)
- #endif
- for(int k = 0; k < size1; k++)
- {
-@@ -515,7 +519,8 @@ static void dt_iop_colorreconstruct_bilateral_slice(const dt_iop_colorreconstruc
- const int oy = b->size_x;
- const int oz = b->size_y * b->size_x;
- #ifdef _OPENMP
--#pragma omp parallel for default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(b, in, out, oy, oz, rescale, roi, threshold)
- #endif
- for(int j = 0; j < roi->height; j++)
- {
-diff --git a/src/iop/bloom.c b/src/iop/bloom.c
-index d76ce73..2e03ce0 100644
---- a/src/iop/bloom.c
-+++ b/src/iop/bloom.c
-@@ -126,7 +126,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- /* get the thresholded lights into buffer */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(data, blurlightness) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, roi_out, scale) \
-+ shared(data, blurlightness) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -146,7 +149,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- for(int iteration = 0; iteration < BOX_ITERATIONS; iteration++)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(blurlightness) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(hr, roi_out, scanline_buf, size) \
-+ shared(blurlightness) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -180,7 +186,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(blurlightness) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(hr, npoffs, opoffs, roi_out, size, scanline_buf) \
-+ shared(blurlightness) \
-+ schedule(static)
- #endif
- for(int x = 0; x < roi_out->width; x++)
- {
-@@ -214,7 +223,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- /* screen blend lightness with original */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out, data, blurlightness) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, roi_out) \
-+ shared(in, out, data, blurlightness) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-diff --git a/src/iop/rawprepare.c b/src/iop/rawprepare.c
-index 030fe5b..d985637 100644
---- a/src/iop/rawprepare.c
-+++ b/src/iop/rawprepare.c
-@@ -301,7 +301,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- float *const out = (float *const)ovoid;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(csx, csy, d, in, out, roi_in, roi_out) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -325,7 +328,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- float *const out = (float *const)ovoid;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(csx, csy, d, in, out, roi_in, roi_out) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -353,7 +359,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(3)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, csx, csy, div, in, out, roi_in, roi_out, sub) \
-+ schedule(static) collapse(3)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -387,7 +395,9 @@ void process_sse2(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const vo
- if(piece->pipe->dsc.filters && piece->dsc_in.channels == 1 && piece->dsc_in.datatype == TYPE_UINT16)
- { // raw mosaic
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(csx, csy, d, ivoid, ovoid, roi_in, roi_out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -446,7 +456,9 @@ void process_sse2(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const vo
- else if(piece->pipe->dsc.filters && piece->dsc_in.channels == 1 && piece->dsc_in.datatype == TYPE_FLOAT)
- { // raw mosaic, fp, unnormalized
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(csx, csy, d, ivoid, ovoid, roi_in, roi_out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -498,7 +510,9 @@ void process_sse2(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const vo
- const __m128 sub = _mm_load_ps(d->sub), div = _mm_load_ps(d->div);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(csx, csy, div, ivoid, ovoid, roi_in, roi_out, sub) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-diff --git a/src/iop/filmic.c b/src/iop/filmic.c
-index 91afe02..9739a62 100644
---- a/src/iop/filmic.c
-+++ b/src/iop/filmic.c
-@@ -431,7 +431,9 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- const float saturation = data->global_saturation / 100.0f;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, data, desaturate, ivoid, ovoid, preserve_color, roi_out, saturation) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < roi_out->height * roi_out->width * ch; k += ch)
- {
-@@ -556,7 +558,12 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 one = _mm_set1_ps(1.0f);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(black, black_sse, ch, data, desaturate, dynamic_range, \
-+ dynamic_range_sse, EPS, grey, grey_sse, ivoid, one, \
-+ ovoid, power, preserve_color, roi_out, saturation_sse, \
-+ zero) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < roi_out->height * roi_out->width * ch; k += ch)
- {
-@@ -1421,7 +1428,10 @@ void commit_params(dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pixelpipe_
- const float sigma = saturation * saturation * latitude * latitude;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) shared(d) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(center, sigma) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < 65536; k++)
- {
-diff --git a/src/iop/monochrome.c b/src/iop/monochrome.c
-index ba98c59..e8ca0c5 100644
---- a/src/iop/monochrome.c
-+++ b/src/iop/monochrome.c
-@@ -153,7 +153,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const float sigma2 = (d->size * 128.0) * (d->size * 128.0f);
- // first pass: evaluate color filter:
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(i, o, roi_out, sigma2) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -180,7 +183,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- dt_bilateral_free(b);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(i, o, roi_out) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/colorin.c b/src/iop/colorin.c
-index 107b4e3..59914ab 100644
---- a/src/iop/colorin.c
-+++ b/src/iop/colorin.c
-@@ -483,7 +483,9 @@ static void process_cmatrix_bm(struct dt_iop_module_t *self, dt_dev_pixelpipe_io
- // fprintf(stderr, "Using cmatrix codepath\n");
- // only color matrix. use our optimized fast path!
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, clipping, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -562,7 +564,9 @@ static void process_cmatrix_fastpath_simple(struct dt_iop_module_t *self, dt_dev
- // fprintf(stderr, "Using cmatrix codepath\n");
- // only color matrix. use our optimized fast path!
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -594,7 +598,9 @@ static void process_cmatrix_fastpath_clipping(struct dt_iop_module_t *self, dt_d
- // fprintf(stderr, "Using cmatrix codepath\n");
- // only color matrix. use our optimized fast path!
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -659,7 +665,9 @@ static void process_cmatrix_proper(struct dt_iop_module_t *self, dt_dev_pixelpip
- // fprintf(stderr, "Using cmatrix codepath\n");
- // only color matrix. use our optimized fast path!
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, clipping, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -755,7 +763,9 @@ static void process_lcms2_bm(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_
-
- // use general lcms2 fallback
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -800,7 +810,9 @@ static void process_lcms2_proper(struct dt_iop_module_t *self, dt_dev_pixelpipe_
-
- // use general lcms2 fallback
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -884,7 +896,10 @@ static void process_sse2_cmatrix_bm(struct dt_iop_module_t *self, dt_dev_pixelpi
- float *in = (float *)ivoid;
- float *out = (float *)ovoid;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out, in) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, clipping, cmat, d, lmat, nmat, roi_in, roi_out) \
-+ shared(out, in) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -956,7 +971,9 @@ static void process_sse2_cmatrix_fastpath_simple(struct dt_iop_module_t *self, d
- const __m128 cm2 = _mm_set_ps(0.0f, cmat[8], cmat[5], cmat[2]);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, cm0, cm1, cm2, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -994,7 +1011,9 @@ static void process_sse2_cmatrix_fastpath_clipping(struct dt_iop_module_t *self,
- const __m128 lm2 = _mm_set_ps(0.0f, lmat[8], lmat[5], lmat[2]);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, lm0, lm1, lm2, nm0, nm1, nm2, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -1047,7 +1066,10 @@ static void process_sse2_cmatrix_proper(struct dt_iop_module_t *self, dt_dev_pix
- float *in = (float *)ivoid;
- float *out = (float *)ovoid;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out, in) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, clipping, cmat, d, lmat, nmat, roi_in, roi_out) \
-+ shared(out, in) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -1131,7 +1153,9 @@ static void process_sse2_lcms2_bm(struct dt_iop_module_t *self, dt_dev_pixelpipe
-
- // use general lcms2 fallback
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -1179,7 +1203,9 @@ static void process_sse2_lcms2_proper(struct dt_iop_module_t *self, dt_dev_pixel
-
- // use general lcms2 fallback
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/basecurve.c b/src/iop/basecurve.c
-index 72dc2a8..44a3b05 100644
---- a/src/iop/basecurve.c
-+++ b/src/iop/basecurve.c
-@@ -823,7 +823,9 @@ static inline void apply_ev_and_curve(
- const float *const unbounded_coeffs)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, in, mul, out, table, unbounded_coeffs, width) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)width * height; k++)
- {
-@@ -852,7 +854,10 @@ static inline void compute_features(
- // 2) saturation
- // 3) local contrast (handled in laplacian form later)
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(col, ht, wd) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j=0;j<ht;j++) for(int i=0;i<wd;i++)
- {
-@@ -882,7 +887,10 @@ static inline void gauss_blur(
- float *tmp = dt_alloc_align(64, (size_t)wd*ht*4*sizeof(float));
- memset(tmp, 0, 4*wd*ht*sizeof(float));
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(tmp)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ht, input, w, wd) \
-+ shared(tmp) \
-+ schedule(static)
- #endif
- for(int j=0;j<ht;j++)
- { // horizontal pass
-@@ -901,7 +909,10 @@ static inline void gauss_blur(
- }
- memset(output, 0, 4*wd*ht*sizeof(float));
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(tmp)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ht, output, w, wd) \
-+ shared(tmp) \
-+ schedule(static)
- #endif
- for(int i=0;i<wd;i++)
- { // vertical pass
-@@ -928,7 +939,10 @@ static inline void gauss_expand(
- // fill numbers in even pixels, zero odd ones
- memset(fine, 0, 4*wd*ht*sizeof(float));
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) collapse(2)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(cw, fine, ht, input, wd) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j=0;j<ht;j+=2)
- for(int i=0;i<wd;i+=2)
-@@ -1015,7 +1029,10 @@ void process_fusion(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece,
- h = ht;
- gauss_reduce(col[0], col[1], out, w, h);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(col) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ht, out, wd) \
-+ shared(col) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < 4ul * wd * ht; k += 4)
- col[0][k + 3] *= .1f + sqrtf(out[k] * out[k] + out[k + 1] * out[k + 1] + out[k + 2] * out[k + 2]);
-@@ -1051,7 +1068,10 @@ void process_fusion(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece,
- // abuse output buffer as temporary memory:
- if(k != num_levels - 1) gauss_expand(col[k + 1], out, w, h);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(col, comb, w, h, num_levels, k) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(out) \
-+ shared(col, comb, w, h, num_levels, k) \
-+ schedule(static)
- #endif
- for(int j = 0; j < h; j++)
- for(int i = 0; i < w; i++)
-@@ -1095,7 +1115,10 @@ void process_fusion(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece,
- { // reconstruct output image
- gauss_expand(comb[k + 1], out, w, h);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(comb, w, h, k) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(out) \
-+ shared(comb, w, h, k) \
-+ schedule(static)
- #endif
- for(int j = 0; j < h; j++)
- for(int i = 0; i < w; i++)
-@@ -1108,7 +1131,10 @@ void process_fusion(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece,
- #endif
- // copy output buffer
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(comb) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(col, in, ht, out, wd) \
-+ shared(comb) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)4 * wd * ht; k += 4)
- {
-@@ -1137,7 +1163,9 @@ void process_lut(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, co
- dt_iop_basecurve_data_t *const d = (dt_iop_basecurve_data_t *)(piece->data);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, in, out, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-diff --git a/src/iop/shadhi.c b/src/iop/shadhi.c
-index b1ed33b..930b066 100644
---- a/src/iop/shadhi.c
-+++ b/src/iop/shadhi.c
-@@ -373,7 +373,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- // invert and desaturate
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(roi_out) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < (size_t)roi_out->width * roi_out->height * 4; j += 4)
- {
-@@ -391,7 +394,13 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, compress, doublemax, flags, halfmax, height, \
-+ highlights, highlights_ccorrect, lmax, lmin, \
-+ low_approximation, max, min, shadows, \
-+ shadows_ccorrect, unbound_mask, whitepoint, width) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < (size_t)width * height * ch; j += ch)
- {
-diff --git a/src/iop/sharpen.c b/src/iop/sharpen.c
-index 06f7e71..ffd00fd 100644
---- a/src/iop/sharpen.c
-+++ b/src/iop/sharpen.c
-@@ -318,7 +318,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- // gauss blur the image horizontally
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, mat, rad, roi_in, roi_out, tmp, wd4) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -358,7 +360,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- // gauss blur the image vertically
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, mat, ovoid, rad, roi_in, roi_out, tmp, wd4) \
-+ schedule(static)
- #endif
- for(int j = rad; j < roi_out->height - wd4 * 4 + rad; j++)
- {
-@@ -385,7 +389,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- }
- }
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, mat, ovoid, rad, roi_in, roi_out, tmp, wd4) \
-+ schedule(static)
- #endif
- for(int j = roi_out->height - wd4 * 4 + rad; j < roi_out->height - rad; j++)
- {
-@@ -418,7 +424,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- dt_free_align(tmp);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, ovoid, rad, roi_out) \
-+ schedule(static)
- #endif
- for(int j = rad; j < roi_out->height - rad; j++)
- {
-@@ -429,7 +437,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, data, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- // subtract blurred image, if diff > thrs, add *amount to original image
- for(int j = 0; j < roi_out->height; j++)
-@@ -502,7 +512,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
-
- // gauss blur the image horizontally
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, mat, rad, roi_in, roi_out, tmp, wd4) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -543,7 +555,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
-
- // gauss blur the image vertically
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, mat, ovoid, rad, roi_in, roi_out, tmp, wd4) \
-+ schedule(static)
- #endif
- for(int j = rad; j < roi_out->height - wd4 * 4 + rad; j++)
- {
-@@ -571,7 +585,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- }
- }
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, mat, ovoid, rad, roi_in, roi_out, tmp, wd4) \
-+ schedule(static)
- #endif
- for(int j = roi_out->height - wd4 * 4 + rad; j < roi_out->height - rad; j++)
- {
-@@ -606,7 +622,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- dt_free_align(tmp);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, ovoid, rad, roi_out) \
-+ schedule(static)
- #endif
- for(int j = rad; j < roi_out->height - rad; j++)
- {
-@@ -617,7 +635,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, ovoid, roi_out) \
-+ shared(data) \
-+ schedule(static)
- #endif
- // subtract blurred image, if diff > thrs, add *amount to original image
- for(int j = 0; j < roi_out->height; j++)
-diff --git a/src/iop/highpass.c b/src/iop/highpass.c
-index 7920c7e..e765b59 100644
---- a/src/iop/highpass.c
-+++ b/src/iop/highpass.c
-@@ -288,7 +288,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- /* create inverted image and then blur */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, roi_out) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- out[ch * k] = 100.0f - LCLIP(in[ch * k]); // only L in Lab space
-@@ -365,7 +368,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- const float contrast_scale = ((data->contrast / 100.0) * 7.5);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out, data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, contrast_scale, roi_out) \
-+ shared(in, out, data) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-diff --git a/src/iop/invert.c b/src/iop/invert.c
-index f5ceae4..6106906 100644
---- a/src/iop/invert.c
-+++ b/src/iop/invert.c
-@@ -250,7 +250,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- if(filters == 9u)
- { // xtrans float mosaiced
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(film_rgb_f, in, out, roi_out, xtrans) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -267,7 +270,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- { // bayer float mosaiced
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(film_rgb_f, filters, in, out, roi_out) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -285,7 +291,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, d, in, out, roi_out) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -326,7 +335,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 val_max = _mm_set1_ps(1.0f);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(film_rgb_f, ivoid, ovoid, roi_out, val_max, val_min, xtrans) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -381,7 +392,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 val_max = _mm_set1_ps(1.0f);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(film_rgb_f, filters, ivoid, ovoid, roi_out, val_max, val_min) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -423,7 +436,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 film = _mm_set_ps(1.0f, d->color[2], d->color[1], d->color[0]);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, film, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/gamma.c b/src/iop/gamma.c
-index 7cbd8a7..a9fbaad 100644
---- a/src/iop/gamma.c
-+++ b/src/iop/gamma.c
-@@ -199,7 +199,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- {
- const float yellow[3] = { 1.0f, 1.0f, 0.0f };
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, i, mask_display, o, roi_out, yellow) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -222,7 +224,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- {
- const float yellow[3] = { 1.0f, 1.0f, 0.0f };
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, i, mask_display, o, roi_out, yellow) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -243,7 +247,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- {
- const float yellow[3] = { 1.0f, 1.0f, 0.0f };
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, i, o, roi_out, yellow) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -264,7 +270,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, i, o, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/defringe.c b/src/iop/defringe.c
-index fdab4dc..950c101 100644
---- a/src/iop/defringe.c
-+++ b/src/iop/defringe.c
-@@ -251,8 +251,11 @@ void process(struct dt_iop_module_t *module, dt_dev_pixelpipe_iop_t *piece, cons
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(width, height, \
-- d) reduction(+ : avg_edge_chroma) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, in, out) \
-+ shared(width, height, d) \
-+ reduction(+ : avg_edge_chroma) \
-+ schedule(static)
- #endif
- for(int v = 0; v < height; v++)
- {
-@@ -288,8 +291,11 @@ void process(struct dt_iop_module_t *module, dt_dev_pixelpipe_iop_t *piece, cons
- #ifdef _OPENMP
- // dynamically/guided scheduled due to possible uneven edge-chroma distribution (thanks to rawtherapee code
- // for this hint!)
--#pragma omp parallel for default(none) shared(width, height, d, xy_small, xy_avg, xy_artifact) \
-- firstprivate(thresh, avg_edge_chroma) schedule(guided, 32)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, in, out, samples_avg, samples_small) \
-+ shared(width, height, d, xy_small, xy_avg, xy_artifact) \
-+ firstprivate(thresh, avg_edge_chroma) \
-+ schedule(guided, 32)
- #endif
- for(int v = 0; v < height; v++)
- {
-diff --git a/src/iop/atrous.c b/src/iop/atrous.c
-index d915429..16a4395 100644
---- a/src/iop/atrous.c
-+++ b/src/iop/atrous.c
-@@ -324,7 +324,9 @@ static void eaw_decompose(float *const out, const float *const in, float *const
- /* The first "2*mult" lines use the macro with tests because the 5x5 kernel
- * requires nearest pixel interpolation for at least a pixel in the sum */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, mult, out, sharpen, width) \
-+ schedule(static)
- #endif
- for(int j = 0; j < 2 * mult; j++)
- {
-@@ -345,7 +347,9 @@ static void eaw_decompose(float *const out, const float *const in, float *const
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, mult, out, sharpen, width) \
-+ schedule(static)
- #endif
- for(int j = 2 * mult; j < height - 2 * mult; j++)
- {
-@@ -402,7 +406,9 @@ static void eaw_decompose(float *const out, const float *const in, float *const
- /* The last "2*mult" lines use the macro with tests because the 5x5 kernel
- * requires nearest pixel interpolation for at least a pixel in the sum */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, mult, out, sharpen, width) \
-+ schedule(static)
- #endif
- for(int j = height - 2 * mult; j < height; j++)
- {
-@@ -439,7 +445,9 @@ static void eaw_decompose_sse2(float *const out, const float *const in, float *c
- /* The first "2*mult" lines use the macro with tests because the 5x5 kernel
- * requires nearest pixel interpolation for at least a pixel in the sum */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, mult, out, sharpen, width) \
-+ schedule(static)
- #endif
- for(int j = 0; j < 2 * mult; j++)
- {
-@@ -460,7 +468,9 @@ static void eaw_decompose_sse2(float *const out, const float *const in, float *c
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, mult, out, sharpen, width) \
-+ schedule(static)
- #endif
- for(int j = 2 * mult; j < height - 2 * mult; j++)
- {
-@@ -517,7 +527,9 @@ static void eaw_decompose_sse2(float *const out, const float *const in, float *c
- /* The last "2*mult" lines use the macro with tests because the 5x5 kernel
- * requires nearest pixel interpolation for at least a pixel in the sum */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, mult, out, sharpen, width) \
-+ schedule(static)
- #endif
- for(int j = height - 2 * mult; j < height; j++)
- {
-@@ -558,7 +570,10 @@ static void eaw_synthesize(float *const out, const float *const in, const float
- const float boost[4] = { boostf[0], boostf[1], boostf[2], boostf[3] };
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(boost, detail, height, in, out, width, threshold) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(size_t k = 0; k < (size_t)4 * width * height; k += 4)
- {
-@@ -580,7 +595,9 @@ static void eaw_synthesize_sse2(float *const out, const float *const in, const f
- const __m128 boost = _mm_set_ps(boostf[3], boostf[2], boostf[1], boostf[0]);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(boost, detail, height, in, out, threshold, width) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-diff --git a/src/iop/colisa.c b/src/iop/colisa.c
-index 99acf04..777306b 100644
---- a/src/iop/colisa.c
-+++ b/src/iop/colisa.c
-@@ -180,7 +180,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out, data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, height, width) \
-+ shared(in, out, data) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)width * height; k++)
- {
-@@ -250,7 +253,10 @@ void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pix
- const float contrastm1sq = boost * (d->contrast - 1.0f) * (d->contrast - 1.0f);
- const float contrastscale = sqrt(1.0f + contrastm1sq);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(contrastm1sq, contrastscale) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < 0x10000; k++)
- {
-@@ -272,7 +278,10 @@ void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pix
- const float gamma = (d->brightness >= 0.0f) ? 1.0f / (1.0f + d->brightness) : (1.0f - d->brightness);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(gamma) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < 0x10000; k++)
- {
-diff --git a/src/iop/profile_gamma.c b/src/iop/profile_gamma.c
-index 33975bf..c7e9049 100644
---- a/src/iop/profile_gamma.c
-+++ b/src/iop/profile_gamma.c
-@@ -333,7 +333,10 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- const float noise = powf(2.0f, -16.0f);
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) shared(data) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, grey, ivoid, ovoid, roi_out) \
-+ shared(data) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k++)
- {
-@@ -356,7 +359,10 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- case PROFILEGAMMA_GAMMA:
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, ovoid, roi_out) \
-+ shared(data) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -688,7 +694,10 @@ void commit_params(dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pixelpipe_
- if(linear == 0.0)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(gamma) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < 0x10000; k++) d->table[k] = powf(1.00 * k / 0x10000, gamma);
- }
-@@ -707,7 +716,10 @@ void commit_params(dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pixelpipe_
- c = 1.0;
- }
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d, a, b, c, g) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(linear) \
-+ shared(d, a, b, c, g) \
-+ schedule(static)
- #endif
- for(int k = 0; k < 0x10000; k++)
- {
-diff --git a/src/iop/rawoverexposed.c b/src/iop/rawoverexposed.c
-index 0016762..aafd4de 100644
---- a/src/iop/rawoverexposed.c
-+++ b/src/iop/rawoverexposed.c
-@@ -188,7 +188,12 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- void *coordbuf = dt_alloc_align(16, coordbufsize * sizeof(float) * dt_get_num_threads());
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) shared(self, coordbuf, buf) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, color, coordbufsize, d, \
-+ dt_iop_rawoverexposed_colors, filters, priority, mode, \
-+ out, raw, roi_in, roi_out, xtrans) \
-+ shared(self, coordbuf, buf) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -311,7 +316,10 @@ int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_m
- if(coordbuf == NULL) goto error;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) shared(self, coordbuf, buf) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(height, roi_in, roi_out, width) \
-+ shared(self, coordbuf, buf) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-diff --git a/src/iop/soften.c b/src/iop/soften.c
-index cee9f87..b20752d 100644
---- a/src/iop/soften.c
-+++ b/src/iop/soften.c
-@@ -129,7 +129,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- /* create overexpose image and then blur */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(brightness, ch, in, out, roi_out, saturation) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -154,7 +156,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- for(int iteration = 0; iteration < BOX_ITERATIONS; iteration++)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, radius, out, roi_out, scanline_buf, scanline_size) \
-+ schedule(static)
- #endif
- /* horizontal blur out into out */
- for(int y = 0; y < roi_out->height; y++)
-@@ -206,7 +210,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const int opoffs = -(radius + 1) * roi_out->width;
- const int npoffs = (radius)*roi_out->width;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, npoffs, opoffs, radius, out, roi_out, \
-+ scanline_buf, scanline_size) \
-+ schedule(static)
- #endif
- for(int x = 0; x < roi_out->width; x++)
- {
-@@ -262,7 +269,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const float amount_1 = (1 - (d->amount) / 100.0);
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(amount, amount_1, ch, in, out, roi_out) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -286,7 +296,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const float saturation = data->saturation / 100.0;
- /* create overexpose image and then blur */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, brightness, roi_out, saturation) \
-+ shared(in, out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -311,7 +324,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- for(int iteration = 0; iteration < BOX_ITERATIONS; iteration++)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, radius, roi_out, scanline_buf, size) \
-+ shared(out) \
-+ schedule(static)
- #endif
- /* horizontal blur out into out */
- for(int y = 0; y < roi_out->height; y++)
-@@ -344,7 +360,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const int opoffs = -(radius + 1) * roi_out->width;
- const int npoffs = (radius)*roi_out->width;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, npoffs, opoffs, radius, roi_out, scanline_buf, size) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int x = 0; x < roi_out->width; x++)
- {
-@@ -381,7 +400,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 amount = _mm_set1_ps(data->amount / 100.0);
- const __m128 amount_1 = _mm_set1_ps(1 - (data->amount) / 100.0);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out, data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, amount, amount_1, roi_out) \
-+ shared(in, out, data) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-diff --git a/src/iop/denoiseprofile.c b/src/iop/denoiseprofile.c
-index 8eaa05e..5901eee 100644
---- a/src/iop/denoiseprofile.c
-+++ b/src/iop/denoiseprofile.c
-@@ -377,7 +377,10 @@ static inline void precondition(const float *const in, float *const buf, const i
- (b[2] / a[2]) * (b[2] / a[2]) + 3.f / 8.f };
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(a)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buf, ht, in, sigma2_plus_3_8, wd) \
-+ shared(a) \
-+ schedule(static)
- #endif
- for(int j = 0; j < ht; j++)
- {
-@@ -405,7 +408,10 @@ static inline void backtransform(float *const buf, const int wd, const int ht, c
- (b[2] / a[2]) * (b[2] / a[2]) + 1.f / 8.f };
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(a)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(buf, ht, sigma2_plus_1_8, wd) \
-+ shared(a) \
-+ schedule(static)
- #endif
- for(int j = 0; j < ht; j++)
- {
-@@ -590,7 +596,9 @@ static void eaw_decompose(float *const out, const float *const in, float *const
- /* The first "2*mult" lines use the macro with tests because the 5x5 kernel
- * requires nearest pixel interpolation for at least a pixel in the sum */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, inv_sigma2, mult, out, width) \
-+ schedule(static)
- #endif
- for(int j = 0; j < 2 * mult; j++)
- {
-@@ -611,7 +619,9 @@ static void eaw_decompose(float *const out, const float *const in, float *const
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, inv_sigma2, mult, out, width) \
-+ schedule(static)
- #endif
- for(int j = 2 * mult; j < height - 2 * mult; j++)
- {
-@@ -668,7 +678,9 @@ static void eaw_decompose(float *const out, const float *const in, float *const
- /* The last "2*mult" lines use the macro with tests because the 5x5 kernel
- * requires nearest pixel interpolation for at least a pixel in the sum */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, inv_sigma2, mult, out, width) \
-+ schedule(static)
- #endif
- for(int j = height - 2 * mult; j < height; j++)
- {
-@@ -705,7 +717,9 @@ static void eaw_decompose_sse(float *const out, const float *const in, float *co
- /* The first "2*mult" lines use the macro with tests because the 5x5 kernel
- * requires nearest pixel interpolation for at least a pixel in the sum */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, inv_sigma2, mult, out, width) \
-+ schedule(static)
- #endif
- for(int j = 0; j < 2 * mult; j++)
- {
-@@ -726,7 +740,9 @@ static void eaw_decompose_sse(float *const out, const float *const in, float *co
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, inv_sigma2, mult, out, width) \
-+ schedule(static)
- #endif
- for(int j = 2 * mult; j < height - 2 * mult; j++)
- {
-@@ -783,7 +799,9 @@ static void eaw_decompose_sse(float *const out, const float *const in, float *co
- /* The last "2*mult" lines use the macro with tests because the 5x5 kernel
- * requires nearest pixel interpolation for at least a pixel in the sum */
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(detail, filter, height, in, inv_sigma2, mult, out, width) \
-+ schedule(static)
- #endif
- for(int j = height - 2 * mult; j < height; j++)
- {
-@@ -824,7 +842,10 @@ static void eaw_synthesize(float *const out, const float *const in, const float
- const float boost[4] = { boostf[0], boostf[1], boostf[2], boostf[3] };
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(boost, detail, height, in, out, threshold, width) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(size_t k = 0; k < (size_t)4 * width * height; k += 4)
- {
-@@ -846,7 +867,9 @@ static void eaw_synthesize_sse2(float *const out, const float *const in, const f
- const __m128 boost = _mm_set_ps(boostf[3], boostf[2], boostf[1], boostf[0]);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(boost, detail, height, in, out, threshold, width) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -1102,7 +1125,11 @@ static void process_nlmeans(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t
- // do this in parallel with a little threading overhead. could parallelize the outer loops with a bit more
- // memory
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) firstprivate(inited_slide) shared(kj, ki, in, Sa)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(d, ovoid, P, roi_in, roi_out) \
-+ firstprivate(inited_slide) \
-+ shared(kj, ki, in, Sa) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -1188,7 +1215,9 @@ static void process_nlmeans(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t
-
- // normalize
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, out, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -1246,7 +1275,11 @@ static void process_nlmeans_sse(struct dt_iop_module_t *self, dt_dev_pixelpipe_i
- // do this in parallel with a little threading overhead. could parallelize the outer loops with a bit more
- // memory
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) firstprivate(inited_slide) shared(kj, ki, in, Sa)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ovoid, P, roi_in, roi_out) \
-+ firstprivate(inited_slide) \
-+ shared(kj, ki, in, Sa) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -1379,7 +1412,10 @@ static void process_nlmeans_sse(struct dt_iop_module_t *self, dt_dev_pixelpipe_i
- }
- // normalize
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(d)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ovoid, roi_out) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-diff --git a/src/iop/spots.c b/src/iop/spots.c
-index 80601de..f4fa791 100644
---- a/src/iop/spots.c
-+++ b/src/iop/spots.c
-@@ -372,7 +372,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- // we don't modify most of the image:
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(out, in)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, in, out, roi_in, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/lowpass.c b/src/iop/lowpass.c
-index 6076de7..c97f126 100644
---- a/src/iop/lowpass.c
-+++ b/src/iop/lowpass.c
-@@ -420,7 +420,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const float *const Labminf = (float *)&Labmin;
- const float *const Labmaxf = (float *)&Labmax;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out, data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, Labmaxf, Labminf, roi_out) \
-+ shared(in, out, data) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -529,7 +532,10 @@ void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pix
- const float contrastm1sq = boost * (fabs(d->contrast) - 1.0f) * (fabs(d->contrast) - 1.0f);
- const float contrastscale = copysign(sqrt(1.0f + contrastm1sq), d->contrast);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(contrastm1sq, contrastscale) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < 0x10000; k++)
- {
-@@ -551,7 +557,10 @@ void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pix
- const float gamma = (d->brightness >= 0.0f) ? 1.0f / (1.0f + d->brightness) : (1.0f - d->brightness);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(gamma) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < 0x10000; k++)
- {
-diff --git a/src/iop/temperature.c b/src/iop/temperature.c
-index 2bdf8d8..927980e 100644
---- a/src/iop/temperature.c
-+++ b/src/iop/temperature.c
-@@ -454,7 +454,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- if(filters == 9u)
- { // xtrans float mosaiced
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(d, in, out, roi_out, xtrans) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -468,7 +471,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- else if(filters)
- { // bayer float mosaiced
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(d, filters, in, out, roi_out) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -484,7 +490,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, d, in, out, roi_out) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -516,7 +525,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- if(filters == 9u)
- { // xtrans float mosaiced
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ivoid, ovoid, roi_out, xtrans) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -557,7 +569,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- else if(filters)
- { // bayer float mosaiced
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(filters, ivoid, ovoid, roi_out) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -599,7 +614,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 coeffs = _mm_set_ps(1.0f, d->coeffs[2], d->coeffs[1], d->coeffs[0]);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, coeffs, ivoid, ovoid, roi_out) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/scalepixels.c b/src/iop/scalepixels.c
-index 9db887c..037fb21 100644
---- a/src/iop/scalepixels.c
-+++ b/src/iop/scalepixels.c
-@@ -185,7 +185,10 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- const dt_iop_scalepixels_data_t * const d = piece->data;
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(interpolation)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch_width, d, ivoid, ovoid, roi_in, roi_out) \
-+ shared(interpolation) \
-+ schedule(static)
- #endif
- // (slow) point-by-point transformation.
- // TODO: optimize with scanlines and linear steps between?
-diff --git a/src/iop/velvia.c b/src/iop/velvia.c
-index 99c97e9..1701d8a 100644
---- a/src/iop/velvia.c
-+++ b/src/iop/velvia.c
-@@ -140,7 +140,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, data, ivoid, ovoid, roi_out, strength) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -186,7 +188,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(in, out, data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, roi_out, strength) \
-+ shared(in, out, data) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-diff --git a/src/iop/vignette.c b/src/iop/vignette.c
-index cdf529f..4df43e5 100644
---- a/src/iop/vignette.c
-+++ b/src/iop/vignette.c
-@@ -740,7 +740,11 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- unsigned int *const tea_states = calloc(2 * dt_get_num_threads(), sizeof(unsigned int));
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(data, yscale, xscale, dither) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, dscale, exp1, exp2, fscale, ivoid, ovoid, \
-+ roi_center_scaled, roi_out, tea_states, unbound) \
-+ shared(data, yscale, xscale, dither) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-diff --git a/src/iop/levels.c b/src/iop/levels.c
-index 8bef474..f183d15 100644
---- a/src/iop/levels.c
-+++ b/src/iop/levels.c
-@@ -301,7 +301,9 @@ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *c
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, ovoid, roi_out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/liquify.c b/src/iop/liquify.c
-index 70193c5..d91248b 100644
---- a/src/iop/liquify.c
-+++ b/src/iop/liquify.c
-@@ -1337,7 +1337,9 @@ void process(struct dt_iop_module_t *module, dt_dev_pixelpipe_iop_t *piece, cons
- assert (ch == 4);
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, in, out, roi_in, roi_out) \
-+ schedule(static)
- #endif
- for (int i = 0; i < roi_out->height; i++)
- {
-diff --git a/src/iop/tonecurve.c b/src/iop/tonecurve.c
-index b8a6e93..ba051a3 100644
---- a/src/iop/tonecurve.c
-+++ b/src/iop/tonecurve.c
-@@ -307,7 +307,11 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const int unbound_ab = d->unbound_ab;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(d) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(autoscale_ab, ch, height, i, low_approximation, o, \
-+ xm_al, xm_ar, xm_bl, xm_br, xm_L, unbound_ab, width) \
-+ shared(d) \
-+ schedule(static)
- #endif
- for(int k = 0; k < height; k++)
- {
-diff --git a/src/iop/splittoning.c b/src/iop/splittoning.c
-index 7259a30..9cb8375 100644
---- a/src/iop/splittoning.c
-+++ b/src/iop/splittoning.c
-@@ -165,7 +165,11 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- const float compress = (data->compress / 110.0) / 2.0; // Don't allow 100% compression..
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(data) private(in, out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, compress, ivoid, ovoid, roi_out) \
-+ shared(data) \
-+ private(in, out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/relight.c b/src/iop/relight.c
-index ed20436..6269386 100644
---- a/src/iop/relight.c
-+++ b/src/iop/relight.c
-@@ -128,7 +128,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const float c = (data->width / 10.0) / 2.0; // Width
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(b, c, ch, ivoid, ovoid, roi_out) \
-+ shared(data) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/grain.c b/src/iop/grain.c
-index 0cabbad..d610348 100644
---- a/src/iop/grain.c
-+++ b/src/iop/grain.c
-@@ -477,7 +477,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const float fib1div2 = fib1 / fib2;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(data, hash)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, filter, filtermul, ivoid, ovoid, roi_out, strength, \
-+ wd, zoom) \
-+ shared(data, hash)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-diff --git a/src/iop/dither.c b/src/iop/dither.c
-index 00fecb3..ebb5b0e 100644
---- a/src/iop/dither.c
-+++ b/src/iop/dither.c
-@@ -308,7 +308,9 @@ static void process_floyd_steinberg(struct dt_iop_module_t *self, dt_dev_pixelpi
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, height, ivoid, ovoid, width) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -474,7 +476,9 @@ static void process_floyd_steinberg_sse2(struct dt_iop_module_t *self, dt_dev_pi
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, height, ivoid, ovoid, width) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-@@ -604,7 +608,9 @@ static void process_random(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t
- unsigned int *const tea_states = calloc(2 * dt_get_num_threads(), sizeof(unsigned int));
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, dither, height, ivoid, ovoid, tea_states, width) \
-+ schedule(static)
- #endif
- for(int j = 0; j < height; j++)
- {
-diff --git a/src/iop/clahe.c b/src/iop/clahe.c
-index 1978862..72b0a9f 100644
---- a/src/iop/clahe.c
-+++ b/src/iop/clahe.c
-@@ -86,7 +86,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- float *luminance = (float *)malloc(((size_t)roi_out->width * roi_out->height) * sizeof(float));
- // double lsmax=0.0,lsmin=1.0;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(luminance)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ivoid, roi_out) \
-+ shared(luminance) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -115,7 +118,11 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- // CLAHE
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(luminance)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, dest_buf, destbuf_size, ivoid, ovoid, rad, roi_in, \
-+ roi_out, slope) \
-+ shared(luminance) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-diff --git a/src/iop/exposure.c b/src/iop/exposure.c
-index 79a9131..5b24b21 100644
---- a/src/iop/exposure.c
-+++ b/src/iop/exposure.c
-@@ -400,7 +400,9 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, d, i, o, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k++)
- {
-@@ -425,7 +427,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const __m128 scalev = _mm_set1_ps(d->scale);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(blackv, ch, i, o, roi_out, scalev) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/colortransfer.c b/src/iop/colortransfer.c
-index a3ed68c..8c41b42 100644
---- a/src/iop/colortransfer.c
-+++ b/src/iop/colortransfer.c
-@@ -260,7 +260,10 @@ static void kmeans(const float *col, const dt_iop_roi_t *const roi, const int n,
- for(int k = 0; k < n; k++) cnt[k] = 0;
- // randomly sample col positions inside roi
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(col, mean_out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(cnt, mean, n, roi, samples, var) \
-+ shared(col, mean_out) \
-+ schedule(static)
- #endif
- for(int s = 0; s < samples; s++)
- {
-@@ -358,7 +361,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- int hist[HISTN];
- capture_histogram(in, roi_in, hist);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(data, in, out, hist)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, roi_out) \
-+ shared(data, in, out, hist) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-@@ -385,7 +391,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- // for all pixels: find input cluster, transfer to mapped target cluster
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(data, in, out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, mapio, mean, roi_out, var) \
-+ shared(data, in, out) \
-+ schedule(static)
- #endif
- for(int k = 0; k < roi_out->height; k++)
- {
-diff --git a/src/iop/colormapping.c b/src/iop/colormapping.c
-index 7de02c9..1495087 100644
---- a/src/iop/colormapping.c
-+++ b/src/iop/colormapping.c
-@@ -333,7 +333,10 @@ static void kmeans(const float *col, const int width, const int height, const in
- for(int k = 0; k < n; k++) cnt[k] = 0;
- // randomly sample col positions inside roi
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(col, mean_out)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(cnt, height, mean, n, samples, var, width) \
-+ shared(col, mean_out) \
-+ schedule(static)
- #endif
- for(int s = 0; s < samples; s++)
- {
-@@ -493,7 +496,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- // first get delta L of equalized L minus original image L, scaled to fit into [0 .. 100]
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(data, in, out, equalization)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, height, width) \
-+ shared(data, in, out, equalization) \
-+ schedule(static)
- #endif
- for(int k = 0; k < height; k++)
- {
-@@ -528,7 +534,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- float *const weight_buf = malloc(data->n * dt_get_num_threads() * sizeof(float));
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) shared(data, in, out, equalization)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, height, mapio, var_ratio, weight_buf, width) \
-+ shared(data, in, out, equalization) \
-+ schedule(static)
- #endif
- for(int k = 0; k < height; k++)
- {
-diff --git a/src/iop/highlights.c b/src/iop/highlights.c
-index 1a5e192..bced8ba 100644
---- a/src/iop/highlights.c
-+++ b/src/iop/highlights.c
-@@ -526,7 +526,9 @@ static void process_lch_bayer(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *pie
- const uint32_t filters = piece->pipe->dsc.filters;
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(clip, filters, ivoid, ovoid, roi_out) \
-+ schedule(dynamic)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -625,7 +627,9 @@ static void process_lch_xtrans(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *pi
- const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])piece->pipe->dsc.xtrans;
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(clip, ivoid, ovoid, roi_in, roi_out, xtrans) \
-+ schedule(dynamic)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -760,7 +764,9 @@ static void process_clip_plain(dt_dev_pixelpipe_iop_t *piece, const void *const
- if(piece->pipe->dsc.filters)
- { // raw mosaic
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(clip, in, out, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
- {
-@@ -772,7 +778,9 @@ static void process_clip_plain(dt_dev_pixelpipe_iop_t *piece, const void *const
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, clip, in, out, roi_out) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k++)
- {
-@@ -793,7 +801,9 @@ static void process_clip_sse2(dt_dev_pixelpipe_iop_t *piece, const void *const i
- float *const out = (float *)ovoid;
- float *const in = (float *)ivoid;
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(clipm, in, n, out) \
-+ schedule(static)
- #endif
- for(size_t j = 0; j < (n & ~3u); j += 4) _mm_stream_ps(out + j, _mm_min_ps(clipm, _mm_load_ps(in + j)));
- _mm_sfence();
-@@ -807,7 +817,9 @@ static void process_clip_sse2(dt_dev_pixelpipe_iop_t *piece, const void *const i
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, clipm, ivoid, ovoid, roi_in, roi_out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -868,7 +880,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- {
- const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])piece->pipe->dsc.xtrans;
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(clips, filters, ivoid, ovoid, roi_in, roi_out, \
-+ xtrans) \
-+ schedule(dynamic)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -876,7 +891,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, -1, j, clips, xtrans, 1);
- }
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(clips, filters, ivoid, ovoid, roi_in, roi_out, \
-+ xtrans) \
-+ schedule(dynamic)
- #endif
- for(int i = 0; i < roi_out->width; i++)
- {
-@@ -887,7 +905,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(data, piece)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(clips, filters, ivoid, ovoid, roi_out) \
-+ shared(data, piece) \
-+ schedule(dynamic)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -897,7 +918,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
-
- // up/down directions
- #ifdef _OPENMP
--#pragma omp parallel for schedule(dynamic) default(none) shared(data, piece)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(clips, filters, ivoid, ovoid, roi_out) \
-+ shared(data, piece) \
-+ schedule(dynamic)
- #endif
- for(int i = 0; i < roi_out->width; i++)
- {
-diff --git a/src/iop/channelmixer.c b/src/iop/channelmixer.c
-index 5f3a03f..1bb3f19 100644
---- a/src/iop/channelmixer.c
-+++ b/src/iop/channelmixer.c
-@@ -155,7 +155,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- const int ch = piece->colors;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(data) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, gray_mix_mode, ivoid, ovoid, roi_out) \
-+ shared(data) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-diff --git a/src/iop/retouch.c b/src/iop/retouch.c
-index 68a36c7..7c966b1 100644
---- a/src/iop/retouch.c
-+++ b/src/iop/retouch.c
-@@ -3525,7 +3525,10 @@ static void image_rgb2lab(float *img_src, const int width, const int height, con
- if(ch == 4 && use_sse)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(img_src) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, stride) \
-+ shared(img_src) \
-+ schedule(static)
- #endif
- for(int i = 0; i < stride; i += ch)
- {
-@@ -3541,7 +3544,10 @@ static void image_rgb2lab(float *img_src, const int width, const int height, con
- #endif
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(img_src) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, stride) \
-+ shared(img_src) \
-+ schedule(static)
- #endif
- for(int i = 0; i < stride; i += ch)
- {
-@@ -3560,7 +3566,10 @@ static void image_lab2rgb(float *img_src, const int width, const int height, con
- if(ch == 4 && use_sse)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(img_src) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, stride) \
-+ shared(img_src) \
-+ schedule(static)
- #endif
- for(int i = 0; i < stride; i += ch)
- {
-@@ -3576,7 +3585,10 @@ static void image_lab2rgb(float *img_src, const int width, const int height, con
- #endif
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(img_src) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, stride) \
-+ shared(img_src) \
-+ schedule(static)
- #endif
- for(int i = 0; i < stride; i += ch)
- {
-@@ -3597,8 +3609,12 @@ static void rt_process_stats(const float *const img_src, const int width, const
- int count = 0;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static) reduction(+ : count, l_sum) reduction(max : l_max) \
-- reduction(min : l_min)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, img_src, size) \
-+ schedule(static) \
-+ reduction(+ : count, l_sum) \
-+ reduction(max : l_max) \
-+ reduction(min : l_min)
- #endif
- for(int i = 0; i < size; i += ch)
- {
-@@ -3636,7 +3652,10 @@ static void rt_adjust_levels(float *img_src, const int width, const int height,
- const float in_inv_gamma = pow(10, tmp);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(img_src) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, in_inv_gamma, left, right, size) \
-+ shared(img_src) \
-+ schedule(static)
- #endif
- for(int i = 0; i < size; i += ch)
- {
-@@ -3701,7 +3720,10 @@ static void rt_copy_in_to_out(const float *const in, const struct dt_iop_roi_t *
- const int y_to = MIN(roi_out->height, roi_in->height);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, in, out, roi_in, roi_out, rowsize, xoffs, yoffs, \
-+ y_to) \
-+ schedule(static)
- #endif
- for(int y = 0; y < y_to; y++)
- {
-@@ -3744,7 +3766,10 @@ static void rt_build_scaled_mask(float *const mask, dt_iop_roi_t *const roi_mask
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(mask_tmp, roi_mask_scaled) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(mask, roi_in, roi_mask, x_to, y_to) \
-+ shared(mask_tmp, roi_mask_scaled) \
-+ schedule(static)
- #endif
- for(int yy = roi_mask_scaled->y; yy < y_to; yy++)
- {
-@@ -3778,7 +3803,10 @@ static void rt_copy_image_masked(float *const img_src, float *img_dest, dt_iop_r
- if(ch == 4 && use_sse)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(img_dest) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, img_src, mask_scaled, opacity, roi_dest, roi_mask_scaled) \
-+ shared(img_dest) \
-+ schedule(static)
- #endif
- for(int yy = 0; yy < roi_mask_scaled->height; yy++)
- {
-@@ -3809,7 +3837,10 @@ static void rt_copy_image_masked(float *const img_src, float *img_dest, dt_iop_r
- const int ch1 = (ch == 4) ? ch - 1 : ch;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(img_dest) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ch1, img_src, mask_scaled, opacity, roi_dest, roi_mask_scaled) \
-+ shared(img_dest) \
-+ schedule(static)
- #endif
- for(int yy = 0; yy < roi_mask_scaled->height; yy++)
- {
-@@ -3841,7 +3872,9 @@ static void rt_copy_mask_to_alpha(float *const img, dt_iop_roi_t *const roi_img,
- const float opacity)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, img, mask_scaled, opacity, roi_img, roi_mask_scaled) \
-+ schedule(static)
- #endif
- for(int yy = 0; yy < roi_mask_scaled->height; yy++)
- {
-@@ -3871,7 +3904,9 @@ static void retouch_fill_sse(float *const in, dt_iop_roi_t *const roi_in, float
- const __m128 val_fill = _mm_load_ps(valf4_fill);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, in, mask_scaled, opacity, roi_in, roi_mask_scaled, val_fill) \
-+ schedule(static)
- #endif
- for(int yy = 0; yy < roi_mask_scaled->height; yy++)
- {
-@@ -3909,7 +3944,9 @@ static void retouch_fill(float *const in, dt_iop_roi_t *const roi_in, const int
- const int ch1 = (ch == 4) ? ch - 1 : ch;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, ch1, fill_color, in, mask_scaled, opacity, roi_in, roi_mask_scaled) \
-+ schedule(static)
- #endif
- for(int yy = 0; yy < roi_mask_scaled->height; yy++)
- {
-diff --git a/src/iop/graduatednd.c b/src/iop/graduatednd.c
-index e48b4ab..6473b4a 100644
---- a/src/iop/graduatednd.c
-+++ b/src/iop/graduatednd.c
-@@ -705,7 +705,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- if(data->density > 0)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, cosv, data, filter_compression, hh_inv, hw_inv, \
-+ ivoid, ix, iy, offset, ovoid, roi_out, sinv) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -751,7 +754,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, cosv, data, filter_compression, hh_inv, hw_inv, \
-+ ivoid, ix, iy, offset, ovoid, roi_out, sinv) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -830,7 +836,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- if(data->density > 0)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, cosv, data, filter_compression, hh_inv, hw_inv, \
-+ ivoid, ix, iy, offset, ovoid, roi_out, sinv) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-@@ -878,7 +887,10 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- else
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, cosv, data, filter_compression, hh_inv, hw_inv, \
-+ ivoid, ix, iy, offset, ovoid, roi_out, sinv) \
-+ schedule(static)
- #endif
- for(int y = 0; y < roi_out->height; y++)
- {
-diff --git a/src/iop/zonesystem.c b/src/iop/zonesystem.c
-index b8a4437..a4be9ed 100644
---- a/src/iop/zonesystem.c
-+++ b/src/iop/zonesystem.c
-@@ -224,7 +224,10 @@ static void process_common_cleanup(struct dt_iop_module_t *self, dt_dev_pixelpip
- if(gauss && tmp)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(tmp) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, height, width, ivoid) \
-+ shared(tmp) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)width * height; k++) tmp[k] = ((float *)ivoid)[ch * k];
-
-@@ -233,7 +236,10 @@ static void process_common_cleanup(struct dt_iop_module_t *self, dt_dev_pixelpip
- /* create zonemap preview for input */
- dt_pthread_mutex_lock(&g->lock);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(tmp, g) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, size, width) \
-+ shared(tmp, g) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)width * height; k++)
- {
-@@ -243,7 +249,10 @@ static void process_common_cleanup(struct dt_iop_module_t *self, dt_dev_pixelpip
-
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(tmp) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, height, ovoid, width) \
-+ shared(tmp) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)width * height; k++) tmp[k] = ((float *)ovoid)[ch * k];
-
-@@ -253,7 +262,10 @@ static void process_common_cleanup(struct dt_iop_module_t *self, dt_dev_pixelpip
- /* create zonemap preview for output */
- dt_pthread_mutex_lock(&g->lock);
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(tmp, g) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, size, width) \
-+ shared(tmp, g) \
-+ schedule(static)
- #endif
- for(size_t k = 0; k < (size_t)width * height; k++)
- {
-@@ -281,7 +293,10 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const
- float *const out = (float *const)ovoid;
-
- #ifdef _OPENMP
--#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
-+#pragma omp parallel for SIMD() default(none) \
-+ dt_omp_firstprivate(ch, d, in, out, roi_out, size) \
-+ schedule(static) \
-+ collapse(2)
- #endif
- for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch)
- {
-@@ -311,7 +326,9 @@ void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, c
- const int size = d->params.size;
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(ch, d, ivoid, ovoid, roi_out, size) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-diff --git a/src/iop/demosaic.c b/src/iop/demosaic.c
-index f4a943d..57f0141 100644
---- a/src/iop/demosaic.c
-+++ b/src/iop/demosaic.c
-@@ -310,7 +310,10 @@ static void pre_median_b(float *out, const float *const in, const dt_iop_roi_t *
- for(int pass = 0; pass < num_passes; pass++)
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(filters, in, lim, roi, threshold) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int row = 3; row < roi->height - 3; row++)
- {
-@@ -370,7 +373,10 @@ static void color_smoothing(float *out, const dt_iop_roi_t *const roi_out, const
- for(int i = 0; i < roi_out->width; i++, outp += 4) outp[3] = outp[c];
- }
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(out, c)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(roi_out, width4) \
-+ shared(out, c) \
-+ schedule(static)
- #endif
- for(int j = 1; j < roi_out->height - 1; j++)
- {
-@@ -425,7 +431,10 @@ static void green_equilibration_lavg(float *out, const float *const in, const in
- memcpy(out, in, height * width * sizeof(float));
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(out, oi, oj)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, in, thr, width) \
-+ shared(out, oi, oj) \
-+ schedule(static)
- #endif
- for(size_t j = oj; j < height - 2; j += 2)
- {
-@@ -471,7 +480,11 @@ static void green_equilibration_favg(float *out, const float *const in, const in
- const int g2_offset = oi ? -1 : 1;
- memcpy(out, in, (size_t)height * width * sizeof(float));
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) reduction(+ : sum1, sum2) shared(oi, oj)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(g2_offset, height, in, width) \
-+ reduction(+ : sum1, sum2) \
-+ shared(oi, oj) \
-+ schedule(static)
- #endif
- for(size_t j = oj; j < (height - 1); j += 2)
- {
-@@ -488,7 +501,10 @@ static void green_equilibration_favg(float *out, const float *const in, const in
- return;
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(out, oi, oj, gr_ratio)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(g2_offset, height, in, width) \
-+ shared(out, oi, oj, gr_ratio) \
-+ schedule(static)
- #endif
- for(int j = oj; j < (height - 1); j += 2)
- {
-@@ -584,7 +600,10 @@ static void xtrans_markesteijn_interpolate(float *out, const float *const in,
- // extra passes propagates out errors at edges, hence need more padding
- const int pad_tile = (passes == 1) ? 12 : 17;
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(sgrow, sgcol, allhex, out) schedule(dynamic)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(all_buffers, buffer_size, dir, height, in, ndir, pad_tile, passes, roi_in, width, xtrans) \
-+ shared(sgrow, sgcol, allhex, out) \
-+ schedule(dynamic)
- #endif
- // step through TSxTS cells of image, each tile overlapping the
- // prior as interpolation needs a substantial border
-@@ -1597,7 +1616,10 @@ static void xtrans_fdc_interpolate(float *out, const float *const in, const dt_i
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(sgrow, sgcol, allhex, out, rowoffset, coloffset) schedule(dynamic)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(all_buffers, buffer_size, dir, height, in, ndir, pad_tile, roi_in, width, xtrans, directionality, harr, Minv, modarr) \
-+ shared(sgrow, sgcol, allhex, out, rowoffset, coloffset) \
-+ schedule(dynamic)
- #endif
- // step through TSxTS cells of image, each tile overlapping the
- // prior as interpolation needs a substantial border
-@@ -2089,7 +2111,10 @@ static void lin_interpolate(float *out, const float *const in, const dt_iop_roi_
-
- // border interpolate
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(colors, filters, in, roi_in, roi_out, xtrans) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int row = 0; row < roi_out->height; row++)
- for(int col = 0; col < roi_out->width; col++)
-@@ -2163,7 +2188,10 @@ static void lin_interpolate(float *out, const float *const in, const dt_iop_roi_
- }
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(colors, in, lookup, roi_in, roi_out, size) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int row = 1; row < roi_out->height - 1; row++)
- {
-@@ -2297,7 +2325,11 @@ static void vng_interpolate(float *out, const float *const in,
- for(int row = 2; row < height - 2; row++) /* Do VNG interpolation */
- {
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(row, code, brow, out, filters4) private(ip) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(colors, pcol, prow, roi_in, width, xtrans) \
-+ shared(row, code, brow, out, filters4) \
-+ private(ip) \
-+ schedule(static)
- #endif
- for(int col = 2; col < width - 2; col++)
- {
-@@ -2362,7 +2394,10 @@ static void vng_interpolate(float *out, const float *const in,
- if(filters != 9 && !FILTERS_ARE_4BAYER(filters)) // x-trans or CYGM/RGBE
- // for Bayer mix the two greens to make VNG4
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int i = 0; i < height * width; i++) out[i * 4 + 1] = (out[i * 4 + 1] + out[i * 4 + 3]) / 2.0f;
- }
-@@ -2376,7 +2411,10 @@ static void passthrough_monochrome(float *out, const float *const in, dt_iop_roi
- assert(roi_in->height >= roi_out->height);
-
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(out) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(in, roi_out, roi_in) \
-+ shared(out) \
-+ schedule(static)
- #endif
- for(int j = 0; j < roi_out->height; j++)
- {
-@@ -2445,7 +2483,10 @@ static void demosaic_ppg(float *const out, const float *const in, const dt_iop_r
- }
- // for all pixels: interpolate green into float array, or copy color.
- #ifdef _OPENMP
--#pragma omp parallel for default(none) shared(input) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(filters, out, roi_in, roi_out) \
-+ shared(input) \
-+ schedule(static)
- #endif
- for(int j = offy; j < roi_out->height - offY; j++)
- {
-@@ -2525,7 +2566,9 @@ static void demosaic_ppg(float *const out, const float *const in, const dt_iop_r
-
- // for all pixels: interpolate colors into float array
- #ifdef _OPENMP
--#pragma omp parallel for default(none) schedule(static)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(filters, out, roi_out) \
-+ schedule(static)
- #endif
- for(int j = 1; j < roi_out->height - 1; j++)
- {
-diff --git a/src/chart/main.c b/src/chart/main.c
-index 2df0c83..450758c 100644
---- a/src/chart/main.c
-+++ b/src/chart/main.c
-@@ -1424,10 +1424,13 @@ static void get_xyz_sample_from_image(const image_t *const image, float shrink,
- double sample_x = 0.0, sample_y = 0.0, sample_z = 0.0;
- size_t n_samples = 0;
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) \
-- shared(corners, x_start, y_start, x_end, y_end, delta_x_top, delta_y_top, delta_x_bottom, delta_y_bottom, \
-- delta_x_left, delta_y_left, delta_x_right, \
-- delta_y_right) reduction(+ : n_samples, sample_x, sample_y, sample_z)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(image) \
-+ shared(corners, x_start, y_start, x_end, y_end, delta_x_top, delta_y_top, \
-+ delta_x_bottom, delta_y_bottom, delta_x_left, delta_y_left, \
-+ delta_x_right, delta_y_right) \
-+ reduction(+ : n_samples, sample_x, sample_y, sample_z) \
-+ schedule(static)
- #endif
- for(int y = y_start; y < y_end; y++)
- for(int x = x_start; x < x_end; x++)
-@@ -1542,7 +1545,10 @@ static void free_image(image_t *image)
- static void image_lab_to_xyz(float *image, const int width, const int height)
- {
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(image)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, width) \
-+ shared(image) \
-+ schedule(static)
- #endif
- for(int y = 0; y < height; y++)
- for(int x = 0; x < width; x++)
-diff --git a/src/chart/cairo.c b/src/chart/cairo.c
-index acc2717..e5b4f12 100644
---- a/src/chart/cairo.c
-+++ b/src/chart/cairo.c
-@@ -187,7 +187,10 @@ cairo_surface_t *cairo_surface_create_from_xyz_data(const float *const image, co
- unsigned char *rgbbuf = (unsigned char *)malloc(sizeof(unsigned char) * height * width * 4);
-
- #ifdef _OPENMP
--#pragma omp parallel for schedule(static) default(none) shared(rgbbuf)
-+#pragma omp parallel for default(none) \
-+ dt_omp_firstprivate(height, image, width) \
-+ shared(rgbbuf) \
-+ schedule(static)
- #endif
- for(int y = 0; y < height; y++)
- {
---
-2.22.0
-