Jef*_*eff 4 simd c99 variable-length-array restrict-qualifier auto-vectorization
我正在探索如何基于函数签名在C99中简单循环的不同实现自动矢量化。
这是我的代码:
/* #define PRAGMA_SIMD _Pragma("simd") */
#define PRAGMA_SIMD
#ifdef __INTEL_COMPILER
#define ASSUME_ALIGNED(a) __assume_aligned(a,64)
#else
#define ASSUME_ALIGNED(a)
#endif
#ifndef ARRAY_RESTRICT
#define ARRAY_RESTRICT
#endif
void foo1(double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo2(double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Undetermined size version */
void foo3(int n, double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo4(int n, double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Static array versions */
void foo5(double ARRAY_RESTRICT a[2048], const double ARRAY_RESTRICT b[2048], const double ARRAY_RESTRICT c[2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo6(double ARRAY_RESTRICT a[2048], const double ARRAY_RESTRICT b[2048], const double ARRAY_RESTRICT c[2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* VLA versions */
void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo8(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
Run Code Online (Sandbox Code Playgroud)
当我编译时
$ icc -O3 -std=c99 -opt-report5 -mavx -S foo.c
icc: remark #10397: optimization reports are generated in *.optrpt files in the output location
Run Code Online (Sandbox Code Playgroud)
我看到VLA情况不是自动矢量化的,但是当我添加标志以断言没有别名时-fno-alias,它们是自动的。因此,我得出结论,我应该在源代码中对此进行规定,因此我尝试通过使用
$ icc -O3 -std=c99 -opt-report5 -mavx -DARRAY_RESTRICT=restrict -S foo.c
icc: remark #10397: optimization reports are generated in *.optrpt files in the output location
Run Code Online (Sandbox Code Playgroud)
编译器错误输出包括
foo.c(98): error: "restrict" is not allowed
void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n],
const double ARRAY_RESTRICT c[n])
^
Run Code Online (Sandbox Code Playgroud)
但是如您所见,我的VLA参数上不允许使用限制。
所以我的问题是:没有办法在ISO C中断言VLA没有别名吗?
请注意,我可以使用杂注断言在源代码中不走样-例如simd,omp simd,ivdep等等-并获得自动向量化,我想但这些都不是ISO C.
在这种情况下,ISO C表示C的最新版本,在撰写本文时,当然是C11。
您的原始代码对我来说很失败,并显示以下消息:
void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n])
^
restrict.c:126:1: error: invalid use of ‘restrict’
restrict.c:126:1: error: invalid use of ‘restrict’
restrict.c:145:1: error: invalid use of ‘restrict’
Run Code Online (Sandbox Code Playgroud)
转移评论的选定部分
§6.7.6.3 函数声明符(包括原型)具有示例5,该示例表示以下函数原型声明符是等效的:
void f(double (* restrict a)[5]);
void f(double a[restrict][5]);
void f(double a[restrict 3][5]);
void f(double a[restrict static 3][5]);
Run Code Online (Sandbox Code Playgroud)
这是标准中唯一出现与数组类型直接相关的限制的地方。§6.7.6通常用于声明符,而§6.7.6.2通常用于数组声明符,在我看来,限制似乎必须出现在数组维的第一个组件内。在您的上下文中,应为:
void foo7(int n, double a[ARRAY_RESTRICT n],
const double b[ARRAY_RESTRICT n],
const double c[ARRAY_RESTRICT n])
Run Code Online (Sandbox Code Playgroud)
如果不看标准中的示例,而您问这个问题,我将不会相信这种表示法!请注意,这适用于阵列以及VLA。
经过修改的代码基于注释,可以在相同的编译选项下清晰地编译:
gcc -g -O3 -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes \
-Wold-style-definition -Wold-style-declaration -Werror -c new.restrict.c
Run Code Online (Sandbox Code Playgroud)
编译选项要求事先声明非静态函数,因此声明位于文件顶部。我也强加#define ARRAY_RESTRICT restrict了源代码,而不是将其保留为编译选项。
编译器是在Ubuntu 14.04衍生产品上运行的GCC 4.9.2。
档案
new.restrict.c:
/* #define PRAGMA_SIMD _Pragma("simd") */
#define PRAGMA_SIMD
#ifdef __INTEL_COMPILER
#define ASSUME_ALIGNED(a) __assume_aligned(a, 64)
#else
#define ASSUME_ALIGNED(a)
#endif
#define ARRAY_RESTRICT restrict
#ifndef ARRAY_RESTRICT
#define ARRAY_RESTRICT
#endif
void foo1(double *restrict a, const double *restrict b, const double *restrict c);
void foo2(double *restrict a, const double *restrict b, const double *restrict c);
void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c);
void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c);
void foo5(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048]);
void foo6(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048]);
void foo7(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n]);
void foo8(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n]);
void foo1(double *restrict a, const double *restrict b, const double *restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i)
{
if (c[i] > 0)
{
a[i] = b[i];
}
else
{
a[i] = 0.0;
}
}
}
void foo2(double *restrict a, const double *restrict b, const double *restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i)
{
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Undetermined size version */
void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i)
{
if (c[i] > 0)
{
a[i] = b[i];
}
else
{
a[i] = 0.0;
}
}
}
void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i)
{
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Static array versions */
void foo5(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i)
{
if (c[i] > 0)
{
a[i] = b[i];
}
else
{
a[i] = 0.0;
}
}
}
void foo6(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i)
{
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* VLA versions */
void foo7(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i)
{
if (c[i] > 0)
{
a[i] = b[i];
}
else
{
a[i] = 0.0;
}
}
}
void foo8(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i)
{
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
747 次 |
| 最近记录: |