Ben*_*ing 24 javascript linux strace node.js
我有一个长期运行的node.js进程,有时会跳转到100%的CPU并停止响应请求.最近一次,我strace
加入了这个过程,这就是我所看到的:
Process 19523 attached - interrupt to quit
gettimeofday({1394558574, 234192}, NULL) = 0
gettimeofday({1394558574, 235381}, NULL) = 0
gettimeofday({1394558574, 306460}, NULL) = 0
mmap(0x3edab823a000, 2097152, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x3edab823a000
munmap(0x3edab823a000, 811008) = 0
munmap(0x3edab8400000, 237568) = 0
mmap(0x3edab8300000, 1048576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3edab8300000
gettimeofday({1394558574, 316435}, NULL) = 0
gettimeofday({1394558574, 316576}, NULL) = 0
gettimeofday({1394558574, 316677}, NULL) = 0
gettimeofday({1394558574, 316775}, NULL) = 0
gettimeofday({1394558574, 316879}, NULL) = 0
gettimeofday({1394558574, 316978}, NULL) = 0
gettimeofday({1394558574, 317439}, NULL) = 0
gettimeofday({1394558574, 317537}, NULL) = 0
gettimeofday({1394558574, 318139}, NULL) = 0
gettimeofday({1394558574, 318234}, NULL) = 0
gettimeofday({1394558574, 318343}, NULL) = 0
gettimeofday({1394558574, 318437}, NULL) = 0
gettimeofday({1394558574, 318530}, NULL) = 0
gettimeofday({1394558574, 318624}, NULL) = 0
gettimeofday({1394558574, 319135}, NULL) = 0
gettimeofday({1394558574, 319648}, NULL) = 0
gettimeofday({1394558574, 319769}, NULL) = 0
gettimeofday({1394558574, 319975}, NULL) = 0
futex(0x7f5b380008c8, FUTEX_WAKE_PRIVATE, 1) = 1
gettimeofday({1394558574, 322266}, NULL) = 0
gettimeofday({1394558574, 322426}, NULL) = 0
gettimeofday({1394558574, 322520}, NULL) = 0
gettimeofday({1394558574, 322759}, NULL) = 0
gettimeofday({1394558574, 322853}, NULL) = 0
gettimeofday({1394558574, 322995}, NULL) = 0
futex(0x7f5b380008c8, FUTEX_WAKE_PRIVATE, 1) = 1
gettimeofday({1394558574, 417614}, NULL) = 0
gettimeofday({1394558575, 386566}, NULL) = 0
gettimeofday({1394558575, 387704}, NULL) = 0
gettimeofday({1394558575, 463410}, NULL) = 0
mmap(0x24cfd260f000, 2097152, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x24cfd260f000
munmap(0x24cfd260f000, 987136) = 0
munmap(0x24cfd2800000, 61440) = 0
mmap(0x24cfd2700000, 1048576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x24cfd2700000
Run Code Online (Sandbox Code Playgroud)
那是很多gettimeofday
电话,而不是其他的!什么可能导致node.js像这样被卡住?
更新:我从较旧版本的node.js升级到10.29(我认为),这就消失了.我刚刚升级到10.33,问题又回来了.这次我在调试方面取得了一些进展.第一次:
$ sudo strace -p 11812 -c
Process 11812 attached - interrupt to quit
Process 11812 detached
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
100.00 0.000192 0 2780 gettimeofday
0.00 0.000000 0 1390 getrusage
0.00 0.000000 0 31 futex
0.00 0.000000 0 1390 clock_gettime
------ ----------- ----------- --------- --------- ----------------
100.00 0.000192 5591 total
Run Code Online (Sandbox Code Playgroud)
来自Node.js:如何附加到正在运行的进程并使用控制台调试服务器?我发现将节点调试器附加到正在运行的进程,这是我得到的地方:
$ sudo kill -s SIGUSR1 11812
$ sudo node debug -p 11812
connecting... ok
break in timers.js:79
77 }
78
79 function listOnTimeout() {
80 var msecs = this.msecs;
81 var list = this;
debug> bt
#0 timers.js:79:23
Run Code Online (Sandbox Code Playgroud)
所以它看起来肯定与计时器相关,但我不知道如何进一步向上移动堆栈以找出我的代码中的问题被触发的位置.
我的猜测是有人手动实现阻塞"setTimeout".如果有人不想释放对主JS线程的控制以防止潜在的竞争条件,则可能发生这种情况.
由于显而易见的原因,这是生产代码一种不好的做法,但我用它有时在调试强制异步进程以特定的顺序执行.
你可以找到像这样的傻事:
var mockAsyncFunction = function (cb) {
setTimeout(function () {
cb(null, 'dummy_result')
}, 5000);
};
var myResult = null;
mockAsyncFunction(function (err, result) {
myResult = result;
});
var timeOut = 10000; // timeout in 10 sec.
var timeStart = new Date().getTime();
while (1) {
if (new Date().getTime() - timeStart > 10000) {
break;
}
}
console.log('DONE');
Run Code Online (Sandbox Code Playgroud)
或者像nextTick递归那样更邪恶的东西,比如:
var timeStart = new Date().getTime();
var recurseUntilDone = function () {
if (new Date().getTime() - timeStart < 10000) {
process.nextTick(recurseUntilDone);
} else {
console.log('Done recursing');
}
};
recurseUntilDone();
Run Code Online (Sandbox Code Playgroud)
归档时间: |
|
查看次数: |
5098 次 |
最近记录: |