mirror of
https://github.com/nzbget/nzbget.git
synced 2026-05-19 02:24:28 -04:00
#454: integrated node-yencode library by Anime Tosho
1) integrated the library; 2) splitted units by CPU architecture; 3) extended makefile and configure script to detect CPU architecture and use appropriate compiler flags; 4) runtime CPU features detection for x86 and ARM with dynamic code dispatching; 5) temporary (for test purposes) printing info about SIMD support to stdout on program startup; 6) new SIMD routines are not yet used in the program
This commit is contained in:
141
lib/yencode/SimdInit.cpp
Normal file
141
lib/yencode/SimdInit.cpp
Normal file
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
* Based on node-yencode library by Anime Tosho:
|
||||
* https://github.com/animetosho/node-yencode
|
||||
*
|
||||
* Copyright (C) 2017 Anime Tosho (animetosho)
|
||||
* Copyright (C) 2017 Andrey Prygunkov <hugbug@users.sourceforge.net>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "nzbget.h"
|
||||
|
||||
#if (defined(__i686__) || defined(__amd64__)) && !defined(WIN32)
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
#include "YEncode.h"
|
||||
|
||||
namespace YEncode
|
||||
{
|
||||
|
||||
size_t (*decode)(const unsigned char*, unsigned char*, size_t, char* state) = nullptr;
|
||||
size_t (*decode_simd)(const unsigned char*, unsigned char*, size_t, char* state) = nullptr;
|
||||
uint32_t (*crc32_simd)(const unsigned char* src, long len) = nullptr;
|
||||
uint32_t (*inc_crc32_simd)(uint32_t crc, const unsigned char* src, long len) = nullptr;
|
||||
|
||||
#if defined(__i686__) || defined(__amd64__)
|
||||
size_t (*decode_sse2)(const unsigned char* src, unsigned char* dest, size_t len, char* state) = nullptr;
|
||||
extern void init_decode_sse2();
|
||||
size_t (*decode_ssse3)(const unsigned char* src, unsigned char* dest, size_t len, char* state) = nullptr;
|
||||
extern void init_decode_ssse3();
|
||||
uint32_t (*crc32_pclmul)(const unsigned char *src, long len) = nullptr;
|
||||
extern void init_crc32_pclmul();
|
||||
|
||||
class CpuId
|
||||
{
|
||||
uint32_t regs[4];
|
||||
public:
|
||||
CpuId(unsigned level)
|
||||
{
|
||||
#ifdef WIN32
|
||||
__cpuid((int *)regs, (int)level);
|
||||
#else
|
||||
__cpuid(level, regs[0], regs[1], regs[2], regs[3]);
|
||||
#endif
|
||||
}
|
||||
const uint32_t &EAX() const {return regs[0];}
|
||||
const uint32_t &EBX() const {return regs[1];}
|
||||
const uint32_t &ECX() const {return regs[2];}
|
||||
const uint32_t &EDX() const {return regs[3];}
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
size_t (*decode_neon)(const unsigned char* src, unsigned char* dest, size_t len, char* state) = nullptr;
|
||||
extern void init_decode_neon();
|
||||
uint32_t (*crc32_arm)(const unsigned char *src, long len) = nullptr;
|
||||
extern void init_crc32_arm();
|
||||
#endif
|
||||
|
||||
void init()
|
||||
{
|
||||
decode = &decode_scalar;
|
||||
|
||||
#if defined(__i686__) || defined(__amd64__)
|
||||
CpuId cpuid(1);
|
||||
|
||||
bool cpu_supports_sse2 = cpuid.EDX() & 0x04000000;
|
||||
bool cpu_supports_ssse3 = cpuid.ECX() & 0x00000200;
|
||||
bool cpu_supports_sse41 = cpuid.ECX() & 0x00080000;
|
||||
bool cpu_supports_pclmul = cpuid.ECX() & 0x00000002;
|
||||
|
||||
if (cpu_supports_sse2)
|
||||
{
|
||||
init_decode_sse2();
|
||||
decode_simd = decode_sse2;
|
||||
}
|
||||
if (cpu_supports_ssse3)
|
||||
{
|
||||
init_decode_ssse3();
|
||||
if (decode_ssse3)
|
||||
{
|
||||
decode_simd = decode_ssse3;
|
||||
}
|
||||
}
|
||||
if (cpu_supports_sse41 && cpu_supports_pclmul)
|
||||
{
|
||||
init_crc32_pclmul();
|
||||
crc32_simd = crc32_pclmul;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
bool cpu_supports_neon = false;
|
||||
bool cpu_supports_crc = false;
|
||||
|
||||
#ifdef __linux__
|
||||
if (FILE* file = fopen("/proc/cpuinfo", "r"))
|
||||
{
|
||||
char buf[200];
|
||||
while (fgets(buf, sizeof(buf), file))
|
||||
{
|
||||
cpu_supports_neon |= !strncasecmp(buf, "Features", 8) &&
|
||||
(strstr(buf, " neon ") || strstr(buf, " asimd "));
|
||||
cpu_supports_crc |= !strncasecmp(buf, "Features", 8) && strstr(buf, " crc32 ");
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cpu_supports_neon)
|
||||
{
|
||||
init_decode_neon();
|
||||
decode_simd = decode_neon;
|
||||
}
|
||||
if (cpu_supports_crc)
|
||||
{
|
||||
init_crc32_arm();
|
||||
crc32_simd = crc32_arm;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (decode_simd)
|
||||
{
|
||||
decode = decode_simd;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user