aboutsummaryrefslogtreecommitdiff
path: root/ext/ipp/sources/ippcp/cpinit.c
blob: 267a1dbda7c5f60a60ae2d868a20d0551ff25062 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
/*******************************************************************************
* Copyright 2001-2018 Intel Corporation
* All Rights Reserved.
*
* If this  software was obtained  under the  Intel Simplified  Software License,
* the following terms apply:
*
* The source code,  information  and material  ("Material") contained  herein is
* owned by Intel Corporation or its  suppliers or licensors,  and  title to such
* Material remains with Intel  Corporation or its  suppliers or  licensors.  The
* Material  contains  proprietary  information  of  Intel or  its suppliers  and
* licensors.  The Material is protected by  worldwide copyright  laws and treaty
* provisions.  No part  of  the  Material   may  be  used,  copied,  reproduced,
* modified, published,  uploaded, posted, transmitted,  distributed or disclosed
* in any way without Intel's prior express written permission.  No license under
* any patent,  copyright or other  intellectual property rights  in the Material
* is granted to  or  conferred  upon  you,  either   expressly,  by implication,
* inducement,  estoppel  or  otherwise.  Any  license   under such  intellectual
* property rights must be express and approved by Intel in writing.
*
* Unless otherwise agreed by Intel in writing,  you may not remove or alter this
* notice or  any  other  notice   embedded  in  Materials  by  Intel  or Intel's
* suppliers or licensors in any way.
*
*
* If this  software  was obtained  under the  Apache License,  Version  2.0 (the
* "License"), the following terms apply:
*
* You may  not use this  file except  in compliance  with  the License.  You may
* obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless  required  by   applicable  law  or  agreed  to  in  writing,  software
* distributed under the License  is distributed  on an  "AS IS"  BASIS,  WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the   License  for the   specific  language   governing   permissions  and
* limitations under the License.
*******************************************************************************/

#if defined( _OPENMP )
  #include <omp.h>
#endif

#include "owndefs.h"
#include "ippcpdefs.h"
#include "ippcp.h"
#ifdef _PCS
#undef _PCS
#define _MY_PCS_DISABLED
#endif
#include "dispatcher.h"
#ifdef _MY_PCS_DISABLED
#define _PCS
#endif
#if defined( _IPP_DATA )

static Ipp64u cpFeatures = 0;
static Ipp64u cpFeaturesMask = 0;

static int cpGetFeatures( Ipp64u* pFeaturesMask );
extern void IPP_CDECL cpGetReg( int* buf, int valEAX, int valECX );
extern int IPP_CDECL cp_is_avx_extension();
extern int IPP_CDECL cp_is_avx512_extension();
IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index );

IPPFUN( Ipp64u, ippcpGetEnabledCpuFeatures, ( void ))
{
    return cpFeaturesMask;
}

/*===================================================================*/
IPPFUN( IppStatus, ippcpGetCpuFeatures, ( Ipp64u* pFeaturesMask ))
{
  IPP_BAD_PTR1_RET( pFeaturesMask )
  {
    if( 0 != cpFeatures){
        *pFeaturesMask = cpFeatures;// & cpFeaturesMask;
    } else {
        int ret = cpGetFeatures( pFeaturesMask );
        if( !ret ) return ippStsNotSupportedCpu;
    }
    return ippStsNoErr;
  }
}

/*===================================================================*/

int cpGetFeature( Ipp64u Feature )
{
  if(( cpFeaturesMask & Feature ) == Feature ){
    return 1;
  } else {
    return 0;
  }
}

int k0_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int n0_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int l9_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int e9_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int y8_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }

int h9_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int g9_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int p8_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }

/*===================================================================*/
#define BIT00 0x00000001
#define BIT01 0x00000002
#define BIT02 0x00000004
#define BIT03 0x00000008
#define BIT04 0x00000010
#define BIT05 0x00000020
#define BIT06 0x00000040
#define BIT07 0x00000080
#define BIT08 0x00000100
#define BIT09 0x00000200
#define BIT10 0x00000400
#define BIT11 0x00000800
#define BIT12 0x00001000
#define BIT13 0x00002000
#define BIT14 0x00004000
#define BIT15 0x00008000
#define BIT16 0x00010000
#define BIT17 0x00020000
#define BIT18 0x00040000
#define BIT19 0x00080000
#define BIT20 0x00100000
#define BIT21 0x00200000
#define BIT22 0x00400000
#define BIT23 0x00800000
#define BIT24 0x01000000
#define BIT25 0x02000000
#define BIT26 0x04000000
#define BIT27 0x08000000
#define BIT28 0x10000000
#define BIT29 0x20000000
#define BIT30 0x40000000
#define BIT31 0x80000000


static int cpGetFeatures( Ipp64u* pFeaturesMask )
{
    Ipp32u  buf[4];
    Ipp32u  eax_, ebx_, ecx_, edx_, tmp;
    Ipp64u  mask;
    int flgFMA=0, flgINT=0, flgGPR=0;   // for avx2
    Ipp32u idBaseMax, idExtdMax;

    cpGetReg((int*)buf, 0, 0);          //get max value for basic info.
    idBaseMax = buf[0];
    cpGetReg((int*)buf, 0x80000000, 0); //get max value for extended info.
    idExtdMax = buf[0];

    cpGetReg( (int*)buf, 1, 0 );
    eax_ = (Ipp32u)buf[0];
    ecx_ = (Ipp32u)buf[2];
    edx_ = (Ipp32u)buf[3];
    mask = 0;
    if( edx_ & BIT23 ) mask |= ippCPUID_MMX;          // edx[23] - MMX(TM) Technology
    if( edx_ & BIT25 ) mask |= ippCPUID_SSE;          // edx[25] - Intel(R) Streaming SIMD Extensions (Intel(R) SSE)
    if( edx_ & BIT26 ) mask |= ippCPUID_SSE2;         // edx[26] - Intel(R) Streaming SIMD Extensions 2 (Intel(R) SSE2)
    if( ecx_ & BIT00 ) mask |= ippCPUID_SSE3;         // ecx[0]  - Intel(R) Streaming SIMD Extensions 3 (Intel(R) SSE3) (formerly codenamed Prescott)
    if( ecx_ & BIT09 ) mask |= ippCPUID_SSSE3;        // ecx[9]  - Supplemental Streaming SIMD Extensions 3 (SSSE3) (formerly codenamed Merom)
    if( ecx_ & BIT22 ) mask |= ippCPUID_MOVBE;        // ecx[22] - Intel(R) instruction MOVBE (Intel Atom(R) processor)
    if( ecx_ & BIT19 ) mask |= ippCPUID_SSE41;        // ecx[19] - Intel(R) Streaming SIMD Extensions 4.1 (Intel(R) SSE4.1) (formerly codenamed Penryn)
    if( ecx_ & BIT20 ) mask |= ippCPUID_SSE42;        // ecx[20] - Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) (formerly codenamed Nenalem)
    if( ecx_ & BIT28 ) mask |= ippCPUID_AVX;          // ecx[28] - Intel(R) Advanced Vector Extensions (Intel(R) AVX) (formerly codenamed Sandy Bridge)
    if(( ecx_ & 0x18000000 ) == 0x18000000 ){
        tmp = (Ipp32u)cp_is_avx_extension();
        if( tmp & BIT00 ) mask |= ippAVX_ENABLEDBYOS; // Intel(R) AVX is supported by OS
    }
    if( ecx_ & BIT25 ) mask |= ippCPUID_AES;          // ecx[25] - Intel(R) AES New Instructions
    if( ecx_ & BIT01 ) mask |= ippCPUID_CLMUL;        // ecx[1]  - Intel(R) instruction PCLMULQDQ
    if( ecx_ & BIT30 ) mask |= ippCPUID_RDRAND;       // ecx[30] - Intel(R) instruction RDRRAND
    if( ecx_ & BIT29 ) mask |= ippCPUID_F16C;         // ecx[29] - Intel(R) instruction F16C
         // Intel(R) AVX2 instructions extention: only if 3 features are enabled at once:
         // FMA, Intel(R) AVX 256 int & GPR BMI (bit-manipulation);
    if( ecx_ & BIT12 ) flgFMA = 1; else flgFMA = 0;   // ecx[12] - FMA 128 & 256 bit
    if( idBaseMax >= 7 ){                             // get CPUID.eax = 7
       cpGetReg( (int*)buf, 0x7, 0 );
       ebx_ = (Ipp32u)buf[1];
       ecx_ = (Ipp32u)buf[2];
       edx_ = (Ipp32u)buf[3];
       if( ebx_ & BIT05 ) flgINT = 1;
       else flgINT = 0;                               //ebx[5], Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) (int 256bits)
           // ebx[3] - enabled ANDN, BEXTR, BLSI, BLSMK, BLSR, TZCNT
           // ebx[8] - enabled BZHI, MULX, PDEP, PEXT, RORX, SARX, SHLX, SHRX
       if(( ebx_ & BIT03 )&&( ebx_ & BIT08 )) flgGPR = 1;
       else flgGPR = 0;                               // VEX-encoded GPR instructions (GPR BMI)
           // Intel(R) architecture formerly codenamed Broadwell instructions extention
       if( ebx_ & BIT19 ) mask |= ippCPUID_ADCOX;     // eax[0x7] -->> ebx:: Bit 19: Intel(R) instructions ADOX/ADCX
       if( ebx_ & BIT18 ) mask |= ippCPUID_RDSEED;    // eax[0x7] -->> ebx:: Bit 18: Intel(R) instruction RDSEED
       if( ebx_ & BIT29 ) mask |= ippCPUID_SHA;       // eax[0x7] -->> ebx:: Bit 29: Intel(R) Secure Hash Algorithm Extensions
           // Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) extention
       if(cp_is_avx512_extension()){
           mask |= ippAVX512_ENABLEDBYOS;             // Intel(R) AVX-512 is supported by OS
       }
       if( ebx_ & BIT16 ) mask |= ippCPUID_AVX512F;   // ebx[16] - Intel(R) AVX-512 Foundation
       if( ebx_ & BIT26 ) mask |= ippCPUID_AVX512PF;  // ebx[26] - Intel(R) AVX-512 Prefetch instructions
       if( ebx_ & BIT27 ) mask |= ippCPUID_AVX512ER;  // ebx[27] - Intel(R) AVX-512 Exponential and Reciprocal instructions
       if( ebx_ & BIT28 ) mask |= ippCPUID_AVX512CD;  // ebx[28] - Intel(R) AVX-512 Conflict Detection
       if( ebx_ & BIT17 ) mask |= ippCPUID_AVX512DQ;  // ebx[17] - Intel(R) AVX-512 Dword & Quadword
       if( ebx_ & BIT30 ) mask |= ippCPUID_AVX512BW;  // ebx[30] - Intel(R) AVX-512 Byte & Word
       if( ebx_ & BIT31 ) mask |= ippCPUID_AVX512VL;  // ebx[31] - Intel(R) AVX-512 Vector Length extensions
       if( ecx_ & BIT01 ) mask |= ippCPUID_AVX512VBMI; // ecx[01] - Intel(R) AVX-512 Vector Byte Manipulation Instructions
       if( edx_ & BIT02 ) mask |= ippCPUID_AVX512_4VNNIW; // edx[02] - Intel(R) AVX-512 Vector instructions for deep learning enhanced word variable precision
       if( edx_ & BIT03 ) mask |= ippCPUID_AVX512_4FMADDPS; // edx[03] - Intel(R) AVX-512 Vector instructions for deep learning floating-point single precision
       // bitwise OR between ippCPUID_MPX & ippCPUID_AVX flags can be used to define that arch is GE than formerly codenamed Skylake
       if( ebx_ & BIT14 ) mask |= ippCPUID_MPX;       // ebx[14] - Intel(R) Memory Protection Extensions (Intel(R) MPX)
       if( ebx_ & BIT21 ) mask |= ippCPUID_AVX512IFMA;  // ebx[21] - Intel(R) AVX-512 IFMA PMADD52
    }
    mask = ( flgFMA && flgINT && flgGPR ) ? (mask | ippCPUID_AVX2) : mask; // to separate Intel(R) AVX2 flags here

    if( idExtdMax >= 0x80000001 ){ // get CPUID.eax=0x80000001
       cpGetReg( (int*)buf, 0x80000001, 0 );
       ecx_ = (Ipp32u)buf[2];
           // Intel(R) architecture formerly codenamed Broadwell instructions extention
       if( ecx_ & BIT08 ) mask |= ippCPUID_PREFETCHW; // eax[0x80000001] -->> ecx:: Bit 8: Intel(R) instruction PREFETCHW
    }
       // Intel(R) architecture formerly codenamed Knights Corner
    if(((( eax_ << 20 ) >> 24 ) ^ 0xb1 ) == 0 ){
        mask = mask | ippCPUID_KNC;
    }
    cpFeatures = mask;
    cpFeaturesMask = mask; /* all CPU features are enabled by default */
    *pFeaturesMask = cpFeatures;
    return 1; /* if somebody need to check for cpuid support - do it at the top of function and return 0 if it's not supported */
}

int ippcpJumpIndexForMergedLibs = -1;
static int cpthreads_omp_of_n_ipp = 1;

IPPFUN( int, ippcpGetEnabledNumThreads,( void ))
{
    return cpthreads_omp_of_n_ipp;
}


#define AVX3X_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ )
#define AVX3M_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER )
// AVX3X_FEATURES means Intel(R) Xeon(R) processor
// AVX3M_FEATURES means Intel(R) Many Integrated Core Architecture


IppStatus owncpFeaturesToIdx(  Ipp64u* cpuFeatures, int* index )
{
   IppStatus ownStatus = ippStsNoErr;
   Ipp64u    mask = 0;

   *index = 0;

   if(( AVX3X_FEATURES  == ( *cpuFeatures & AVX3X_FEATURES  ))&&
      ( ippAVX512_ENABLEDBYOS & cpFeatures )){                         /* Intel(R) architecture formerlySkylake ia32=S0, x64=K0 */
         mask = AVX3X_MSK;
         *index = LIB_AVX3X;
   } else
   if(( AVX3M_FEATURES  == ( *cpuFeatures & AVX3M_FEATURES  ))&&
      ( ippAVX512_ENABLEDBYOS & cpFeatures )){                         /* Intel(R) architecture formerly codenamed Knights Landing ia32=i0, x64=N0 */
       mask = AVX3M_MSK;
       *index = LIB_AVX3M;
   } else
   if(( ippCPUID_AVX2  == ( *cpuFeatures & ippCPUID_AVX2  ))&&
      ( ippAVX_ENABLEDBYOS & cpFeatures )){                            /* Intel(R) architecture formerly codenamed Haswell ia32=H9, x64=L9 */
       mask = AVX2_MSK;
       *index = LIB_AVX2;
   } else
   if(( ippCPUID_AVX   == ( *cpuFeatures & ippCPUID_AVX   ))&&
      ( ippAVX_ENABLEDBYOS & cpFeatures )){                            /* Intel(R) architecture formerly codenamed Sandy Bridge ia32=G9, x64=E9 */
       mask = AVX_MSK;
       *index = LIB_AVX;
   } else
   if( ippCPUID_SSE42 == ( *cpuFeatures & ippCPUID_SSE42 )){           /* Intel(R) architecture formerly codenamed Nehalem or Intel(R) architecture formerly codenamed Westmer = Intel(R) architecture formerly codenamed Penryn + Intel(R) SSE4.2 + ?Intel(R) instruction PCLMULQDQ + ?(Intel(R) AES New Instructions) + ?(Intel(R) Secure Hash Algorithm Extensions) */
       mask = SSE42_MSK;                                               /* or new Intel Atom(R) processor formerly codenamed Silvermont */
       *index = LIB_SSE42;
   } else
   if( ippCPUID_SSE41 == ( *cpuFeatures & ippCPUID_SSE41 )){           /* Intel(R) architecture formerly codenamed Penryn ia32=P8, x64=Y8 */
       mask = SSE41_MSK;
       *index = LIB_SSE41;
   } else
   if( ippCPUID_MOVBE == ( *cpuFeatures & ippCPUID_MOVBE )) {          /* Intel Atom(R) processor formerly codenamed Silverthorne ia32=S8, x64=N8 */
       mask = ATOM_MSK;
       *index = LIB_ATOM;
   } else
   if( ippCPUID_SSSE3 == ( *cpuFeatures & ippCPUID_SSSE3 )) {          /* Intel(R) architecture formerly codenamed Merom ia32=V8, x64=U8 (letters etymology is unknown) */
       mask = SSSE3_MSK;
       *index = LIB_SSSE3;
   } else
   if( ippCPUID_SSE3  == ( *cpuFeatures & ippCPUID_SSE3  )) {          /* Intel(R) architecture formerly codenamed Prescott ia32=W7, x64=M7 */
       mask = SSE3_MSK;
       *index = LIB_SSE3;
   } else
   if( ippCPUID_SSE2  == ( *cpuFeatures & ippCPUID_SSE2  )) {          /* Intel(R) architecture formerly codenamed Willamette ia32=W7, x64=PX */
       mask = SSE2_MSK;
       *index = LIB_SSE2;
   } else
   if( ippCPUID_SSE   == ( *cpuFeatures & ippCPUID_SSE   )) {          /* Intel(R) Pentium(R) processor III ia32=PX only */
       mask = SSE_MSK;
       *index = LIB_SSE;
#if (defined( _WIN32E ) || defined( linux32e ) || defined( OSXEM64T )) && !(defined( _ARCH_LRB2 ))
       ownStatus = ippStsNotSupportedCpu;                              /* the lowest CPU supported by Intel(R) Integrated Performance Primitives (Intel(R) IPP) must at least support Intel(R) SSE2 for x64 */
#endif
   } else
   if( ippCPUID_MMX   >= ( *cpuFeatures & ippCPUID_MMX   )) {          /* not supported, PX dispatched */
       mask = MMX_MSK;
       *index = LIB_MMX;
       ownStatus = ippStsNotSupportedCpu; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
   }
#if defined ( _IPP_QUARK)
     else {
       mask = PX_MSK;
       *index = LIB_PX;
       ownStatus = ippStsNoErr; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
   }
#endif

    if(( mask != ( *cpuFeatures & mask ))&&( ownStatus == ippStsNoErr ))
        ownStatus = ippStsFeaturesCombination; /* warning if combination of features is incomplete */
   *cpuFeatures |= mask;
   return ownStatus;
}

#ifdef _PCS

extern IppStatus (IPP_STDCALL *pcpSetCpuFeatures)( Ipp64u cpuFeatures );
extern IppStatus (IPP_STDCALL *pcpSetNumThreads)( int numThr );
extern IppStatus (IPP_STDCALL *pcpGetNumThreads)( int* pNumThr );

IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
{
   IppStatus status = ippStsNoErr;

   if (pcpSetNumThreads != 0)
   {
      status = pcpSetNumThreads(numThr);
      if (status == ippStsNoErr)
      {
          cpthreads_omp_of_n_ipp = numThr;
      }
   }
   return status;
}

IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
{
   IppStatus status = ippStsNoErr;

   IPP_BAD_PTR1_RET( pNumThr )

   if (pcpGetNumThreads != 0)
   {
      status = pcpGetNumThreads(pNumThr);
   }
   return status;
}
#else


IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
{
   IppStatus status = ippStsNoErr;
#if defined( _OPENMP )
   IPP_BAD_SIZE_RET( numThr )
   cpthreads_omp_of_n_ipp = numThr;
   status = ippStsNoErr;
#else
   UNREFERENCED_PARAMETER(numThr);
   status = ippStsNoOperation;
#endif
   return status;
}

IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
{
   IppStatus status = ippStsNoErr;
   IPP_BAD_PTR1_RET( pNumThr )

#if defined( _OPENMP )
   *pNumThr = cpthreads_omp_of_n_ipp;
   status =  ippStsNoErr;
#else
   *pNumThr = 1;
   status = ippStsNoOperation;
#endif
   return status;
}

#endif /* #ifdef _PCS */

#ifdef _IPP_DYNAMIC

typedef IppStatus (IPP_STDCALL *DYN_RELOAD)( int );
static DYN_RELOAD IppDispatcher; /* ippCP only */
static int currentCpu = -1;      /* control for disabling the same DLL re-loading */

void owncpRegisterLib( DYN_RELOAD reload )
{
    pcpSetCpuFeatures = 0;
    pcpSetNumThreads  = 0;
    pcpGetNumThreads  = 0;

    IppDispatcher = reload;  /* function DynReload() that is defined in ippmain.gen - */
    return;                                                               /* therefore in each domain there is own DynReload() function */
}

void owncpUnregisterLib( void )
{
   IppDispatcher = 0;
   currentCpu = -1;

   pcpSetCpuFeatures = 0;
   pcpSetNumThreads  = 0;
   pcpGetNumThreads  = 0;

   return;
}

IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
{
   IppStatus status, ownStatus;
   int       index = 0;

    ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
    if(( IppDispatcher )&&( currentCpu != index )) {
        status = IppDispatcher( index );
        currentCpu = index;
    } else
        status = ippStsNoErr;

#ifdef _PCS
    if (pcpSetCpuFeatures != 0 && status >= ippStsNoErr)
    {
        /* Pass down features to Waterfall dll */
        status = pcpSetCpuFeatures(cpuFeatures);
    }
    if (pcpSetNumThreads != 0 && status >= ippStsNoErr)
    {
        /* Pass down features to Waterfall dll */
        status = pcpSetNumThreads(cpthreads_omp_of_n_ipp);
    }
#endif

    if( status != ippStsNoErr && status != ippStsNoOperation)
        return status;
    else
        return ownStatus;
}

IPPFUN( IppStatus, ippcpInit,( void ))
{
    int index = 0;
    IppStatus status, statusf, statusi;
    Ipp64u    cpuFeatures;

    statusf = ippcpGetCpuFeatures( &cpuFeatures );
    statusi = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index ); /* ownSetFeatures instead of ippSetFeatures because need unconditional initialization, */
    if( IppDispatcher ) status = IppDispatcher( index ); /* call DynReload() function for each domain */
    else status = ippStsNoErr;
    currentCpu = index;
    if( ippStsNoErr != statusf ) return statusf;
    if( ippStsNoErr != statusi ) return statusi;
    if( ippStsNoErr != status ) return status;
    return ippStsNoErr;
}


#else /* _IPP_DYNAMIC */

IPPFUN( IppStatus, ippcpInit,( void ))
{
    Ipp64u     cpuFeatures;

#if defined( _OPENMP )
    ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
#endif
    ippcpGetCpuFeatures( &cpuFeatures );
    return ippcpSetCpuFeatures( cpuFeatures );
}


IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
{
   IppStatus ownStatus;
   int       index = 0;

#if defined( _OPENMP )
    ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
#endif
    ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
    ippcpJumpIndexForMergedLibs = index;
    cpFeaturesMask = cpuFeatures;
    return ownStatus;
}

#endif

IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index )
{
    Ipp64u    tmp;
    IppStatus tmpStatus;
    *index = 0;

    if( ippCPUID_NOCHECK & cpuFeatures ){
    // if NOCHECK is set - static variable cpFeatures is initialized unconditionally and real CPU features from CPUID are ignored;
    // the one who uses this method of initialization must understand what and why it does and the possible unpredictable consequences.
    // the only one known purpose for this approach - environments where CPUID instruction is disabled (for example Intel(R) Software Guard Extensions).
        cpuFeatures &= ( IPP_MAX_64U ^ ippCPUID_NOCHECK );
        cpFeatures = cpuFeatures;
    } else
//    if( 0 == cpFeatures ) //do cpFeatures restore unconditionally - to protect from possible previous NOCHECK
    {
    // if library has not been initialized yet
        cpGetFeatures( &tmp );
    }
    tmpStatus = owncpFeaturesToIdx( &cpuFeatures, index );
    cpFeaturesMask = cpuFeatures;

    return tmpStatus;
}

static struct {
   int sts;
   const char *msg;
} ippcpMsg[] = {
/* ippStatus */
/* -9999 */ ippStsCpuNotSupportedErr, "ippStsCpuNotSupportedErr: The target CPU is not supported",
/* -9702 */ MSG_NO_SHARED, "No shared libraries were found in the Waterfall procedure",
/* -9701 */ MSG_NO_DLL, "No DLLs were found in the Waterfall procedure",
/* -9700 */ MSG_LOAD_DLL_ERR, "Error at loading of %s library",
/* -1016 */ ippStsQuadraticNonResidueErr, "ippStsQuadraticNonResidueErr: SQRT operation on quadratic non-residue value",
/* -1015 */ ippStsPointAtInfinity, "ippStsPointAtInfinity: Point at infinity is detected",
/* -1014 */ ippStsOFBSizeErr, "ippStsOFBSizeErr: Incorrect value for crypto OFB block size",
/* -1013 */ ippStsIncompleteContextErr, "ippStsIncompleteContextErr: Crypto: set up of context is not complete",
/* -1012 */ ippStsCTRSizeErr, "ippStsCTRSizeErr: Incorrect value for crypto CTR block size",
/* -1011 */ ippStsEphemeralKeyErr, "ippStsEphemeralKeyErr: ECC: Invalid ephemeral key",
/* -1010 */ ippStsMessageErr, "ippStsMessageErr: ECC: Invalid message digest",
/* -1009 */ ippStsShareKeyErr, "ippStsShareKeyErr: ECC: Invalid share key",
/* -1008 */ ippStsIvalidPrivateKey, "ippStsIvalidPrivateKey ECC: Invalid private key",
/* -1007 */ ippStsOutOfECErr, "ippStsOutOfECErr: ECC: Point out of EC",
/* -1006 */ ippStsECCInvalidFlagErr, "ippStsECCInvalidFlagErr: ECC: Invalid Flag",
/* -1005 */ ippStsUnderRunErr, "ippStsUnderRunErr: Error in data under run",
/* -1004 */ ippStsPaddingErr, "ippStsPaddingErr: Detected padding error indicates the possible data corruption",
/* -1003 */ ippStsCFBSizeErr, "ippStsCFBSizeErr: Incorrect value for crypto CFB block size",
/* -1002 */ ippStsPaddingSchemeErr, "ippStsPaddingSchemeErr: Invalid padding scheme",
/* -1001 */ ippStsBadModulusErr, "ippStsBadModulusErr: Bad modulus caused a failure in module inversion",
/*  -216 */ ippStsUnknownStatusCodeErr, "ippStsUnknownStatusCodeErr: Unknown status code",
/*  -221 */ ippStsLoadDynErr, "ippStsLoadDynErr: Error when loading the dynamic library",
/*   -15 */ ippStsLengthErr, "ippStsLengthErr: Incorrect value for string length",
/*   -14 */ ippStsNotSupportedModeErr, "ippStsNotSupportedModeErr: The requested mode is currently not supported",
/*   -13 */ ippStsContextMatchErr, "ippStsContextMatchErr: Context parameter does not match the operation",
/*   -12 */ ippStsScaleRangeErr, "ippStsScaleRangeErr: Scale bounds are out of range",
/*   -11 */ ippStsOutOfRangeErr, "ippStsOutOfRangeErr: Argument is out of range, or point is outside the image",
/*   -10 */ ippStsDivByZeroErr, "ippStsDivByZeroErr: An attempt to divide by zero",
/*    -9 */ ippStsMemAllocErr, "ippStsMemAllocErr: Memory allocated for the operation is not enough",
/*    -8 */ ippStsNullPtrErr, "ippStsNullPtrErr: Null pointer error",
/*    -7 */ ippStsRangeErr, "ippStsRangeErr: Incorrect values for bounds: the lower bound is greater than the upper bound",
/*    -6 */ ippStsSizeErr, "ippStsSizeErr: Incorrect value for data size",
/*    -5 */ ippStsBadArgErr, "ippStsBadArgErr: Incorrect arg/param of the function",
/*    -4 */ ippStsNoMemErr, "ippStsNoMemErr: Not enough memory for the operation",
/*    -2 */ ippStsErr, "ippStsErr: Unknown/unspecified error, -2",
/*     0 */ ippStsNoErr, "ippStsNoErr: No errors",
/*     1 */ ippStsNoOperation, "ippStsNoOperation: No operation has been executed",
/*     2 */ ippStsDivByZero, "ippStsDivByZero: Zero value(s) for the divisor in the Div function",
/*    25 */ ippStsInsufficientEntropy, "ippStsInsufficientEntropy: Generation of the prime/key failed due to insufficient entropy in the random seed and stimulus bit string",
/*    36 */ ippStsNotSupportedCpu, "The CPU is not supported",
/*    36 */ ippStsFeaturesCombination, "Wrong combination of features",
};

/* /////////////////////////////////////////////////////////////////////////////
//  Name:       ippcpGetStatusString
//  Purpose:    transformation of a code of a status Intel(R) IPP to string
//  Returns:
//  Parameters:
//    StsCode   Intel(R) IPP status code
//
//  Notes:      not necessary to release the returned string
*/
IPPFUN( const char*, ippcpGetStatusString, ( IppStatus StsCode ) )
{
   unsigned int i;
   for( i=0; i<IPP_COUNT_OF( ippcpMsg ); i++ ) {
      if( StsCode == ippcpMsg[i].sts ) {
         return ippcpMsg[i].msg;
      }
   }
   return ippcpGetStatusString( ippStsUnknownStatusCodeErr );
}

extern Ipp64u IPP_CDECL cp_get_pentium_counter (void);

/* /////////////////////////////////////////////////////////////////////////////
//  Name:       ippcpGetCpuClocks
//  Purpose:    time stamp counter (TSC) register reading
//  Returns:    TSC value
//
//  Note:      An hardware exception is possible if TSC reading is not supported by
//             the current chipset
*/
IPPFUN( Ipp64u, ippcpGetCpuClocks, (void) )
{
   return (Ipp64u)cp_get_pentium_counter();
}

#endif /* _IPP_DATA */