OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_arch.h
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_arch.h
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38
39#ifndef OJPH_ARCH_H
40#define OJPH_ARCH_H
41
42#include <cstdio>
43#include <cstdint>
44#include <cmath>
45
46#include "ojph_defs.h"
47
48
50// preprocessor directives for compiler
52#ifdef _MSC_VER
53#define OJPH_COMPILER_MSVC
54#elif (defined __GNUC__)
55#define OJPH_COMPILER_GNUC
56#endif
57
58#ifdef __EMSCRIPTEN__
59#define OJPH_EMSCRIPTEN
60#endif
61
62#ifdef OJPH_COMPILER_MSVC
63#include <intrin.h>
64#endif
65
67// preprocessor directives for architecture
69#if defined(__arm__) || defined(__TARGET_ARCH_ARM) \
70 || defined(__aarch64__) || defined(_M_ARM64)
71 #define OJPH_ARCH_ARM
72#elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
73 #define OJPH_ARCH_I386
74#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) \
75 || defined(_M_X64)
76 #define OJPH_ARCH_X86_64
77#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
78 #define OJPH_ARCH_IA64
79#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \
80 || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \
81 || defined(_M_MPPC) || defined(_M_PPC)
82 #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
83 #define OJPH_ARCH_PPC64
84 #else
85 #define OJPH_ARCH_PPC
86 #endif
87#else
88 #define OJPH_ARCH_UNKNOWN
89#endif
90
91namespace ojph {
93 // disable SIMD for unknown architecture
95#if !defined(OJPH_ARCH_X86_64) && !defined(OJPH_ARCH_I386) && \
96 !defined(OJPH_ARCH_ARM) && !defined(OJPH_DISABLE_SIMD)
97#define OJPH_DISABLE_SIMD
98#endif // !OJPH_ARCH_UNKNOWN
99
101 // OS detection definitions
103#if (defined WIN32) || (defined _WIN32) || (defined _WIN64)
104#define OJPH_OS_WINDOWS
105#elif (defined __APPLE__)
106#define OJPH_OS_APPLE
107#elif (defined __ANDROID__)
108#define OJPH_OS_ANDROID
109#elif (defined __linux)
110#define OJPH_OS_LINUX
111#elif (defined __FreeBSD__)
112#define OJPH_OS_FREEBSD
113#elif (defined __OpenBSD__)
114#define OJPH_OS_OPENBSD
115#endif
116
118 // defines for dll
120#if defined(OJPH_OS_WINDOWS) && defined(OJPH_BUILD_SHARED_LIBRARY)
121#define OJPH_EXPORT __declspec(dllexport)
122#else
123#define OJPH_EXPORT
124#endif
125
127 // cpu features
130 int get_cpu_ext_level();
131
132 enum : int {
145 };
146
147 enum : int {
153 };
154
156 static inline ui32 population_count(ui32 val)
157 {
158 #if defined(OJPH_COMPILER_MSVC) \
159 && (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
160 return (ui32)__popcnt(val);
161 #elif (defined OJPH_COMPILER_GNUC)
162 return (ui32)__builtin_popcount(val);
163 #else
164 val -= ((val >> 1) & 0x55555555);
165 val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
166 val = (((val >> 4) + val) & 0x0f0f0f0f);
167 val += (val >> 8);
168 val += (val >> 16);
169 return (int)(val & 0x0000003f);
170 #endif
171 }
172
174#ifdef OJPH_COMPILER_MSVC
175 #pragma intrinsic(_BitScanReverse)
176#endif
177 static inline ui32 count_leading_zeros(ui32 val)
178 {
179 #ifdef OJPH_COMPILER_MSVC
180 unsigned long result = 0;
181 _BitScanReverse(&result, val);
182 return 31 ^ (ui32)result;
183 #elif (defined OJPH_COMPILER_GNUC)
184 return (ui32)__builtin_clz(val);
185 #else
186 val |= (val >> 1);
187 val |= (val >> 2);
188 val |= (val >> 4);
189 val |= (val >> 8);
190 val |= (val >> 16);
191 return 32 - population_count(val);
192 #endif
193 }
194
196#ifdef OJPH_COMPILER_MSVC
197 #if (defined OJPH_ARCH_X86_64 || defined OJPH_ARCH_ARM)
198 #pragma intrinsic(_BitScanReverse64)
199 #elif (defined OJPH_ARCH_I386)
200 #pragma intrinsic(_BitScanReverse)
201 #else
202 #error Error unsupport MSVC version
203 #endif
204#endif
205 static inline ui32 count_leading_zeros(ui64 val)
206 {
207 #ifdef OJPH_COMPILER_MSVC
208 unsigned long result = 0;
209 #if (defined OJPH_ARCH_X86_64) || (defined OJPH_ARCH_ARM)
210 _BitScanReverse64(&result, val);
211 #elif (defined OJPH_ARCH_I386)
212 ui32 msb = (ui32)(val >> 32), lsb = (ui32)val;
213 if (msb == 0)
214 _BitScanReverse(&result, lsb);
215 else {
216 _BitScanReverse(&result, msb);
217 result += 32;
218 }
219 #else
220 #error Error unsupport MSVC version
221 #endif
222 return 63 ^ (ui32)result;
223 #elif (defined OJPH_COMPILER_GNUC)
224 return (ui32)__builtin_clzll(val);
225 #else
226 val |= (val >> 1);
227 val |= (val >> 2);
228 val |= (val >> 4);
229 val |= (val >> 8);
230 val |= (val >> 16);
231 val |= (val >> 32);
232 return 64 - population_count64(val);
233 #endif
234 }
235
237#ifdef OJPH_COMPILER_MSVC
238 #pragma intrinsic(_BitScanForward)
239#endif
240 static inline ui32 count_trailing_zeros(ui32 val)
241 {
242 #ifdef OJPH_COMPILER_MSVC
243 unsigned long result = 0;
244 _BitScanForward(&result, val);
245 return (ui32)result;
246 #elif (defined OJPH_COMPILER_GNUC)
247 return (ui32)__builtin_ctz(val);
248 #else
249 val |= (val << 1);
250 val |= (val << 2);
251 val |= (val << 4);
252 val |= (val << 8);
253 val |= (val << 16);
254 return 32 - population_count(val);
255 #endif
256 }
257
259 static inline si32 ojph_round(float val)
260 {
261 #ifdef OJPH_COMPILER_MSVC
262 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
263 #elif (defined OJPH_COMPILER_GNUC)
264 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
265 #else
266 return (si32)round(val);
267 #endif
268 }
269
271 static inline si32 ojph_trunc(float val)
272 {
273 #ifdef OJPH_COMPILER_MSVC
274 return (si32)(val);
275 #elif (defined OJPH_COMPILER_GNUC)
276 return (si32)(val);
277 #else
278 return (si32)trunc(val);
279 #endif
280 }
281
283 // constants
285 #ifndef OJPH_EMSCRIPTEN
286 const ui32 byte_alignment = 64; // 64 bytes == 512 bits
289 #else
290 const ui32 byte_alignment = 16; // 16 bytes == 128 bits
292 const ui32 object_alignment = 8;
293 #endif
294
296 // templates for alignment
298
300 // finds the size such that it is a multiple of byte_alignment
301 template <typename T, ui32 N>
302 size_t calc_aligned_size(size_t size) {
303 size = size * sizeof(T) + N - 1;
304 size &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
305 size >>= (63 - count_leading_zeros((ui64)sizeof(T)));
306 return size;
307 }
308
310 // moves the pointer to first address that is a multiple of byte_alignment
311 template <typename T, ui32 N>
312 inline T *align_ptr(T *ptr) {
313 intptr_t p = reinterpret_cast<intptr_t>(ptr);
314 p += N - 1;
315 p &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
316 return reinterpret_cast<T *>(p);
317 }
318
319}
320
321#endif // !OJPH_ARCH_H
const ui32 object_alignment
Definition ojph_arch.h:288
@ ARM_CPU_EXT_LEVEL_SVE
Definition ojph_arch.h:151
@ ARM_CPU_EXT_LEVEL_SVE2
Definition ojph_arch.h:152
@ ARM_CPU_EXT_LEVEL_NEON
Definition ojph_arch.h:149
@ ARM_CPU_EXT_LEVEL_GENERIC
Definition ojph_arch.h:148
@ ARM_CPU_EXT_LEVEL_ASIMD
Definition ojph_arch.h:150
const ui32 byte_alignment
Definition ojph_arch.h:286
uint64_t ui64
Definition ojph_defs.h:56
static si32 ojph_round(float val)
Definition ojph_arch.h:259
size_t calc_aligned_size(size_t size)
Definition ojph_arch.h:302
T * align_ptr(T *ptr)
Definition ojph_arch.h:312
static ui32 population_count(ui32 val)
Definition ojph_arch.h:156
OJPH_EXPORT int get_cpu_ext_level()
static si32 ojph_trunc(float val)
Definition ojph_arch.h:271
static ui32 count_trailing_zeros(ui32 val)
Definition ojph_arch.h:240
static ui32 count_leading_zeros(ui32 val)
Definition ojph_arch.h:177
int32_t si32
Definition ojph_defs.h:55
const ui32 log_byte_alignment
Definition ojph_arch.h:287
uint32_t ui32
Definition ojph_defs.h:54
@ X86_CPU_EXT_LEVEL_AVX2
Definition ojph_arch.h:142
@ X86_CPU_EXT_LEVEL_AVX
Definition ojph_arch.h:141
@ X86_CPU_EXT_LEVEL_AVX512
Definition ojph_arch.h:144
@ X86_CPU_EXT_LEVEL_GENERIC
Definition ojph_arch.h:133
@ X86_CPU_EXT_LEVEL_SSE2
Definition ojph_arch.h:136
@ X86_CPU_EXT_LEVEL_SSE41
Definition ojph_arch.h:139
@ X86_CPU_EXT_LEVEL_SSE
Definition ojph_arch.h:135
@ X86_CPU_EXT_LEVEL_MMX
Definition ojph_arch.h:134
@ X86_CPU_EXT_LEVEL_SSE42
Definition ojph_arch.h:140
@ X86_CPU_EXT_LEVEL_SSSE3
Definition ojph_arch.h:138
@ X86_CPU_EXT_LEVEL_SSE3
Definition ojph_arch.h:137
@ X86_CPU_EXT_LEVEL_AVX2FMA
Definition ojph_arch.h:143
#define OJPH_EXPORT
Definition ojph_arch.h:123