我有一系列复杂的函数执行非常相似的任务,除了函数中间的一个操作符.我的代码的简化版本可能是这样的:
#include <assert.h>
static void memopXor(char * buffer1, char * buffer2, char * res, unsigned n){
for (unsigned x = 0 ; x < n ; x++){
res[x] = buffer1[x] ^ buffer2[x];
}
};
static void memopPlus(char * buffer1, char * buffer2, char * res, unsigned n){
for (unsigned x = 0 ; x < n ; x++){
res[x] = buffer1[x] + buffer2[x];
}
};
static void memopMul(char * buffer1, char * buffer2, char * res, unsigned n){
for (unsigned x = 0 ; x < n ; x++){
res[x] = buffer1[x] * buffer2[x];
}
};
int main(int argc, char ** argv){
char b1[5] = {0, 1, 2, 3, 4};
char b2[5] = {0, 1, 2, 3, 4};
char res1[5] = {};
memopXor(b1, b2, res1, 5);
assert(res1[0] == 0);
assert(res1[1] == 0);
assert(res1[2] == 0);
assert(res1[3] == 0);
assert(res1[4] == 1);
char res2[5] = {};
memopPlus(b1, b2, res2, 5);
assert(res2[0] == 0);
assert(res2[1] == 2);
assert(res2[2] == 4);
assert(res2[3] == 6);
assert(res2[4] == 8);
char res3[5] = {};
memopMul(b1, b2, res3, 5);
assert(res3[0] == 0);
assert(res3[1] == 1);
assert(res3[2] == 4);
assert(res3[3] == 9);
assert(res3[4] == 16);
}
Run Code Online (Sandbox Code Playgroud)
使用C++模板来避免重复代码看起来是个好例子,因此我一直在寻找一种方法来将我的代码更改为类似下面的内容(伪代码):
#include <assert.h>
template <FUNCTION>
void memop<FUNCTION>(char * buffer1, char * buffer2, char * res, size_t n){
for (size_t x = 0 ; x < n ; x++){
res[x] = FUNCTION(buffer1[x], buffer2[x]);
}
}
int main(int argc, char ** argv){
char b1[5] = {0, 1, 2, 3, 4};
char b2[5] = {0, 1, 2, 3, 4};
char res1[5] = {};
memop<operator^>(b1, b2, res1, 5);
assert(res1[0] == 0);
assert(res1[1] == 0);
assert(res1[2] == 0);
assert(res1[3] == 0);
assert(res1[4] == 0);
char res2[5] = {};
memop<operator+>(b1, b2, res2, 5);
assert(res2[0] == 0);
assert(res2[1] == 2);
assert(res2[2] == 4);
assert(res2[3] == 6);
assert(res2[4] == 8);
char res3[5] = {};
memop<operator*>(b1, b2, res3, 5);
assert(res3[0] == 0);
assert(res3[1] == 1);
assert(res3[2] == 4);
assert(res3[3] == 9);
assert(res3[4] == 16);
}
Run Code Online (Sandbox Code Playgroud)
难点在于我不愿意接受任何结果代码的减速.这意味着暗示间接调用的解决方案(通过vtable或函数指针)都不行.
这个问题的常见C++解决方案似乎是将操作符包装在functor类的operator()方法中.通常得到类似下面的代码:
#include <assert.h>
template <typename Op>
void memop(char * buffer1, char * buffer2, char * res, unsigned n){
Op o;
for (unsigned x = 0 ; x < n ; x++){
res[x] = o(buffer1[x], buffer2[x]);
}
};
struct Xor
{
char operator()(char a, char b){
return a ^ b;
}
};
struct Plus
{
char operator()(char a, char b){
return a + b;
}
};
struct Mul
{
char operator()(char a, char b){
return a * b;
}
};
int main(int argc, char ** argv){
char b1[5] = {0, 1, 2, 3, 4};
char b2[5] = {0, 1, 2, 3, 4};
char res1[5] = {};
memop<Xor>(b1, b2, res1, 5);
assert(res1[0] == 0);
assert(res1[1] == 0);
assert(res1[2] == 0);
assert(res1[3] == 0);
assert(res1[4] == 0);
char res2[5] = {};
memop<Plus>(b1, b2, res2, 5);
assert(res2[0] == 0);
assert(res2[1] == 2);
assert(res2[2] == 4);
assert(res2[3] == 6);
assert(res2[4] == 8);
char res3[5] = {};
memop<Mul>(b1, b2, res3, 5);
assert(res3[0] == 0);
assert(res3[1] == 1);
assert(res3[2] == 4);
assert(res3[3] == 9);
assert(res3[4] == 16);
}
Run Code Online (Sandbox Code Playgroud)
这样做是否有性能损失?
就bencharmk而言,你公开的代码是无用的.
char cversion() {
char b1[5] = {0, 1, 2, 3, 4};
char b2[5] = {0, 1, 2, 3, 4};
char res1[5] = {};
memopXor(b1, b2, res1, 5);
return res1[4];
}
char cppversion() {
char b1[5] = {0, 1, 2, 3, 4};
char b2[5] = {0, 1, 2, 3, 4};
char res1[5] = {};
memop<Xor>(b1, b2, res1, 5);
return res1[4];
}
Run Code Online (Sandbox Code Playgroud)
被编译为这样的LLVM IR:
define signext i8 @cversion()() nounwind uwtable readnone {
ret i8 0
}
define signext i8 @cppversion()() nounwind uwtable readnone {
ret i8 0
}
Run Code Online (Sandbox Code Playgroud)
也就是说,编译器在编译期间进行整个计算.
所以我冒昧地定义了一个新功能:
void cppmemopXor(char * buffer1,
char * buffer2,
char * res,
unsigned n)
{
memop<Xor>(buffer1, buffer2, res, n);
}
Run Code Online (Sandbox Code Playgroud)
并删除了static限定符memopXor,然后重复了以下经验:
define void @memopXor(char*, char*, char*, unsigned int)(i8* nocapture %buffer1, i8* nocapture %buffer2, i8* nocapture %res, i32 %n) nounwind uwtable {
%1 = icmp eq i32 %n, 0
br i1 %1, label %._crit_edge, label %.lr.ph
.lr.ph: ; preds = %.lr.ph, %0
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
%2 = getelementptr inbounds i8* %buffer1, i64 %indvars.iv
%3 = load i8* %2, align 1, !tbaa !0
%4 = getelementptr inbounds i8* %buffer2, i64 %indvars.iv
%5 = load i8* %4, align 1, !tbaa !0
%6 = xor i8 %5, %3
%7 = getelementptr inbounds i8* %res, i64 %indvars.iv
store i8 %6, i8* %7, align 1, !tbaa !0
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph, %0
ret void
}
Run Code Online (Sandbox Code Playgroud)
带有模板的C++版本:
define void @cppmemopXor(char*, char*, char*, unsigned int)(i8* nocapture %buffer1, i8* nocapture %buffer2, i8* nocapture %res, i32 %n) nounwind uwtable {
%1 = icmp eq i32 %n, 0
br i1 %1, label %_ZL5memopI3XorEvPcS1_S1_j.exit, label %.lr.ph.i
.lr.ph.i: ; preds = %.lr.ph.i, %0
%indvars.iv.i = phi i64 [ %indvars.iv.next.i, %.lr.ph.i ], [ 0, %0 ]
%2 = getelementptr inbounds i8* %buffer1, i64 %indvars.iv.i
%3 = load i8* %2, align 1, !tbaa !0
%4 = getelementptr inbounds i8* %buffer2, i64 %indvars.iv.i
%5 = load i8* %4, align 1, !tbaa !0
%6 = xor i8 %5, %3
%7 = getelementptr inbounds i8* %res, i64 %indvars.iv.i
store i8 %6, i8* %7, align 1, !tbaa !0
%indvars.iv.next.i = add i64 %indvars.iv.i, 1
%lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %_ZL5memopI3XorEvPcS1_S1_j.exit, label %.lr.ph.i
_ZL5memopI3XorEvPcS1_S1_j.exit: ; preds = %.lr.ph.i, %0
ret void
}
Run Code Online (Sandbox Code Playgroud)
正如预期的那样,它们在结构上完全相同,因为仿函数代码已完全内联(即使不了解IR也可见).
请注意,这不是孤立的结果.例如,std::sort执行两次至三次,qsort因为它使用仿函数而不是间接函数调用.当然,使用模板化函数和仿函数意味着每个不同的实例化都会生成新代码,就像您手动编写函数一样,但这正是您手动执行的操作.
| 归档时间: |
|
| 查看次数: |
243 次 |
| 最近记录: |