Ser*_*tch 4 c++ memory winapi stack setrlimit
是否有可能在WinAPI中为运行时的当前线程设置堆栈大小,就像setrlimit在Linux上一样?我的意思是增加当前线程的保留堆栈大小,如果它对于当前要求来说太小了.这是一个可以被其他编程语言的线程调用的库,因此在编译时不能选择设置堆栈大小.
如果没有,任何关于像汇编蹦床这样的解决方案的想法会将堆栈指针更改为动态分配的内存块?
常见问题:代理线程是一个万无一失的解决方案(除非调用者线程的堆栈非常小).但是,线程切换似乎是性能杀手.我需要大量的堆栈用于递归或_alloca.这也是为了提高性能,因为堆分配很慢,特别是如果多个线程并行地从堆中分配(它们被相同的libc/ CRT互斥锁阻塞,因此代码变为串行).
你不能在库代码中的当前线程(分配self,delete old)中完全交换堆栈,因为在旧堆栈中返回地址,可能是指向堆栈中变量的指针等.
并且你不能扩展堆栈(虚拟内存已经分配(保留/提交)而不能扩展.
但是可以在调用期间分配临时堆栈并切换到该堆栈.你必须在这种情况下保存旧StackBase和StackLimit从NT_TIB(查看此结构winnt.h),设置新值(您需要为新堆栈分配内存),执行调用(对于交换机堆栈,您需要一些汇编代码 - 您不能只在c /上执行此操作c ++)并返回原始StackBase和StackLimit.在kernelmode中存在对此的支持 - KeExpandKernelStackAndCallout
但是在用户模式中存在Fibers - 这是非常罕见的,但看起来完全匹配任务.使用Fiber,我们可以在当前线程中创建额外的堆栈/执行上下文.
所以通常解决方案是下一个(对于库):
上DLL_THREAD_ATTACH:
ConvertThreadToFiber)(如果它返回false检查也
GetLastError为ERROR_ALREADY_FIBER-这也行代码)CreateFiberEx我们这样做只有一次.每次调用过程时,需要大量的堆栈空间:
GetCurrentFiberSwitchToFiberGetCurrentFiber再次通过返回到原始光纤(从通话中保存)SwitchToFiber最后DLL_THREAD_DETACH你需要:
DeleteFiberConvertFiberToThread但仅在初始ConvertThreadToFiber返回的情况下true(如果是
ERROR_ALREADY_FIBER- 让谁首先将线程转换为光纤将其转换回来 - 在这种情况下这不是您的任务)你需要一些与你的光纤/线程相关的(通常很小的)数据.这当然必须是每个线程变量.所以你需要__declspec(thread)用来声明这些数据.或直接使用TLS(或存在哪些现代c ++功能)
演示实现是下一个:
typedef ULONG (WINAPI * MY_EXPAND_STACK_CALLOUT) (PVOID Parameter);
class FIBER_DATA
{
public:
PVOID _PrevFiber, _MyFiber;
MY_EXPAND_STACK_CALLOUT _pfn;
PVOID _Parameter;
ULONG _dwError;
BOOL _bConvertToThread;
static VOID CALLBACK _FiberProc( PVOID lpParameter)
{
reinterpret_cast<FIBER_DATA*>(lpParameter)->FiberProc();
}
VOID FiberProc()
{
for (;;)
{
_dwError = _pfn(_Parameter);
SwitchToFiber(_PrevFiber);
}
}
public:
~FIBER_DATA()
{
if (_MyFiber)
{
DeleteFiber(_MyFiber);
}
if (_bConvertToThread)
{
ConvertFiberToThread();
}
}
FIBER_DATA()
{
_bConvertToThread = FALSE, _MyFiber = 0;
}
ULONG Create(SIZE_T dwStackCommitSize, SIZE_T dwStackReserveSize);
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
_PrevFiber = GetCurrentFiber();
_pfn = pfn;
_Parameter = Parameter;
SwitchToFiber(_MyFiber);
return _dwError;
}
};
__declspec(thread) FIBER_DATA* g_pData;
ULONG FIBER_DATA::Create(SIZE_T dwStackCommitSize, SIZE_T dwStackReserveSize)
{
if (ConvertThreadToFiber(this))
{
_bConvertToThread = TRUE;
}
else
{
ULONG dwError = GetLastError();
if (dwError != ERROR_ALREADY_FIBER)
{
return dwError;
}
}
return (_MyFiber = CreateFiberEx(dwStackCommitSize, dwStackReserveSize, 0, _FiberProc, this)) ? NOERROR : GetLastError();
}
void OnDetach()
{
if (FIBER_DATA* pData = g_pData)
{
delete pData;
}
}
ULONG OnAttach()
{
if (FIBER_DATA* pData = new FIBER_DATA)
{
if (ULONG dwError = pData->Create(2*PAGE_SIZE, 512 * PAGE_SIZE))
{
delete pData;
return dwError;
}
g_pData = pData;
return NOERROR;
}
return ERROR_NO_SYSTEM_RESOURCES;
}
ULONG WINAPI TestCallout(PVOID param)
{
DbgPrint("TestCallout(%s)\n", param);
return NOERROR;
}
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
if (FIBER_DATA* pData = g_pData)
{
return pData->DoCallout(pfn, Parameter);
}
return ERROR_GEN_FAILURE;
}
if (!OnAttach())//DLL_THREAD_ATTACH
{
DoCallout(TestCallout, "Demo Task #1");
DoCallout(TestCallout, "Demo Task #2");
OnDetach();//DLL_THREAD_DETACH
}
Run Code Online (Sandbox Code Playgroud)
另请注意,在单线程上下文中执行的所有光纤 - 与线程关联的多个光纤不能并发执行 - 只能顺序执行,您自己控制切换时间.所以不需要任何额外的同步.并且SwitchToFiber- 这是完整的用户模式proc.执行速度非常快,从不失败(因为从不分配任何资源)
更新
尽管使用__declspec(thread) FIBER_DATA* g_pData;更简单(更少的代码),更好的实现直接使用TlsGetValue/ TlsSetValue并FIBER_DATA在线程内的第一次调用分配,但不是所有线程.__declspec(thread)在XP for dll中也没有正确工作(根本不工作).所以可以进行一些修改
在DLL_PROCESS_ATTACH分配你的TLS插槽gTlsIndex = TlsAlloc();
并释放它 DLL_PROCESS_DETACH
if (gTlsIndex != TLS_OUT_OF_INDEXES) TlsFree(gTlsIndex);
Run Code Online (Sandbox Code Playgroud)
每次DLL_THREAD_DETACH通知电话
void OnThreadDetach()
{
if (FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex))
{
delete pData;
}
}
Run Code Online (Sandbox Code Playgroud)
并DoCallout需要以下一种方式进行修改
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex);
if (!pData)
{
// this code executed only once on first call
if (!(pData = new FIBER_DATA))
{
return ERROR_NO_SYSTEM_RESOURCES;
}
if (ULONG dwError = pData->Create(512*PAGE_SIZE, 4*PAGE_SIZE))// or what stack size you need
{
delete pData;
return dwError;
}
TlsSetValue(gTlsIndex, pData);
}
return pData->DoCallout(pfn, Parameter);
}
Run Code Online (Sandbox Code Playgroud)
所以改为为每个新线程分配堆栈DLL_THREAD_ATTACH通过OnAttach()更好地为真正需要的线程(在第一次调用时)分配它
如果其他人也尝试使用纤维,这个代码可能会出现纤维问题.在msdn 示例代码中没有检查ERROR_ALREADY_FIBER以防ConvertThreadToFiber返回0.所以我们可以等到主应用程序处理这种情况不正确如果我们在它决定创建光纤之前它还尝试使用光纤.也ERROR_ALREADY_FIBER没有在xp中工作(从vista开始).
所以可能和另一种解决方案 - 你自己创建线程堆栈,并临时切换到需要大堆栈空间的doring调用.main不仅要为堆栈和交换esp(或rsp)分配空间,而且不要忘记正确的建立StackBase和StackLimitin NT_TIB- 这是必要和充分的条件(否则异常和保护页面扩展将不起作用).
尽管这个替代解决方案需要更多代码(手动创建线程堆栈和堆栈切换),它也可以在xp上运行,并且当其他人也尝试在线程中使用光纤时没有任何影响
typedef ULONG (WINAPI * MY_EXPAND_STACK_CALLOUT) (PVOID Parameter);
extern "C" PVOID __fastcall SwitchToStack(PVOID param, PVOID stack);
struct FIBER_DATA
{
PVOID _Stack, _StackLimit, _StackPtr, _StackBase;
MY_EXPAND_STACK_CALLOUT _pfn;
PVOID _Parameter;
ULONG _dwError;
static void __fastcall FiberProc(FIBER_DATA* pData, PVOID stack)
{
for (;;)
{
pData->_dwError = pData->_pfn(pData->_Parameter);
// StackLimit can changed during _pfn call
pData->_StackLimit = ((PNT_TIB)NtCurrentTeb())->StackLimit;
stack = SwitchToStack(0, stack);
}
}
ULONG Create(SIZE_T Reserve, SIZE_T Commit);
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
_pfn = pfn;
_Parameter = Parameter;
PNT_TIB tib = (PNT_TIB)NtCurrentTeb();
PVOID StackBase = tib->StackBase, StackLimit = tib->StackLimit;
tib->StackBase = _StackBase, tib->StackLimit = _StackLimit;
_StackPtr = SwitchToStack(this, _StackPtr);
tib->StackBase = StackBase, tib->StackLimit = StackLimit;
return _dwError;
}
~FIBER_DATA()
{
if (_Stack)
{
VirtualFree(_Stack, 0, MEM_RELEASE);
}
}
FIBER_DATA()
{
_Stack = 0;
}
};
ULONG FIBER_DATA::Create(SIZE_T Reserve, SIZE_T Commit)
{
Reserve = (Reserve + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
Commit = (Commit + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
if (Reserve < Commit || !Reserve)
{
return ERROR_INVALID_PARAMETER;
}
if (PBYTE newStack = (PBYTE)VirtualAlloc(0, Reserve, MEM_RESERVE, PAGE_NOACCESS))
{
union {
PBYTE newStackBase;
void** ppvStack;
};
newStackBase = newStack + Reserve;
PBYTE newStackLimit = newStackBase - Commit;
if (newStackLimit = (PBYTE)VirtualAlloc(newStackLimit, Commit, MEM_COMMIT, PAGE_READWRITE))
{
if (Reserve == Commit || VirtualAlloc(newStackLimit - PAGE_SIZE, PAGE_SIZE, MEM_COMMIT, PAGE_READWRITE|PAGE_GUARD))
{
_StackBase = newStackBase, _StackLimit = newStackLimit, _Stack = newStack;
#if defined(_M_IX86)
*--ppvStack = FiberProc;
ppvStack -= 4;// ebp,esi,edi,ebx
#elif defined(_M_AMD64)
ppvStack -= 5;// x64 space
*--ppvStack = FiberProc;
ppvStack -= 8;// r15,r14,r13,r12,rbp,rsi,rdi,rbx
#else
#error "not supported"
#endif
_StackPtr = ppvStack;
return NOERROR;
}
}
VirtualFree(newStack, 0, MEM_RELEASE);
}
return GetLastError();
}
ULONG gTlsIndex;
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex);
if (!pData)
{
// this code executed only once on first call
if (!(pData = new FIBER_DATA))
{
return ERROR_NO_SYSTEM_RESOURCES;
}
if (ULONG dwError = pData->Create(512*PAGE_SIZE, 4*PAGE_SIZE))
{
delete pData;
return dwError;
}
TlsSetValue(gTlsIndex, pData);
}
return pData->DoCallout(pfn, Parameter);
}
void OnThreadDetach()
{
if (FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex))
{
delete pData;
}
}
Run Code Online (Sandbox Code Playgroud)
和汇编代码SwitchToStack:在x86上
@SwitchToStack@8 proc
push ebx
push edi
push esi
push ebp
xchg esp,edx
mov eax,edx
pop ebp
pop esi
pop edi
pop ebx
ret
@SwitchToStack@8 endp
Run Code Online (Sandbox Code Playgroud)
对于x64:
SwitchToStack proc
push rbx
push rdi
push rsi
push rbp
push r12
push r13
push r14
push r15
xchg rsp,rdx
mov rax,rdx
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rsi
pop rdi
pop rbx
ret
SwitchToStack endp
Run Code Online (Sandbox Code Playgroud)
用法/测试可以是下一个:
gTlsIndex = TlsAlloc();//DLL_PROCESS_ATTACH
if (gTlsIndex != TLS_OUT_OF_INDEXES)
{
TestStackMemory();
DoCallout(TestCallout, "test #1");
//play with stack, excepions, guard pages
PSTR str = (PSTR)alloca(256);
DoCallout(zTestCallout, str);
DbgPrint("str=%s\n", str);
DoCallout(TestCallout, "test #2");
OnThreadDetach();//DLL_THREAD_DETACH
TlsFree(gTlsIndex);//DLL_PROCESS_DETACH
}
void TestMemory(PVOID AllocationBase)
{
MEMORY_BASIC_INFORMATION mbi;
PVOID BaseAddress = AllocationBase;
while (VirtualQuery(BaseAddress, &mbi, sizeof(mbi)) >= sizeof(mbi) && mbi.AllocationBase == AllocationBase)
{
BaseAddress = (PBYTE)mbi.BaseAddress + mbi.RegionSize;
DbgPrint("[%p, %p) %p %08x %08x\n", mbi.BaseAddress, BaseAddress, (PVOID)(mbi.RegionSize >> PAGE_SHIFT), mbi.State, mbi.Protect);
}
}
void TestStackMemory()
{
MEMORY_BASIC_INFORMATION mbi;
if (VirtualQuery(_AddressOfReturnAddress(), &mbi, sizeof(mbi)) >= sizeof(mbi))
{
TestMemory(mbi.AllocationBase);
}
}
ULONG WINAPI zTestCallout(PVOID Parameter)
{
TestStackMemory();
alloca(5*PAGE_SIZE);
TestStackMemory();
__try
{
*(int*)0=0;
}
__except(EXCEPTION_EXECUTE_HANDLER)
{
DbgPrint("exception %x handled\n", GetExceptionCode());
}
strcpy((PSTR)Parameter, "zTestCallout demo");
return NOERROR;
}
ULONG WINAPI TestCallout(PVOID param)
{
TestStackMemory();
DbgPrint("TestCallout(%s)\n", param);
return NOERROR;
}
Run Code Online (Sandbox Code Playgroud)