use*_*549 7 javascript json node.js
我有以下JSON文件:
sensorlogs.json
{"arr":[{"UTCTime":10000001,"s1":22,"s2":32,"s3":42,"s4":12},
{"UTCTime":10000002,"s1":23,"s2":33,"s4":13},
{"UTCTime":10000003,"s1":24,"s2":34,"s3":43,"s4":14},
{"UTCTime":10000005,"s1":26,"s2":36,"s3":44,"s4":16},
{"UTCTime":10000006,"s1":27,"s2":37,"s4":17},
{"UTCTime":10000004,"s1":25,"s2":35,"s4":15},
...
{"UTCTime":12345678,"s1":57,"s2":35,"s3":77,"s4":99}
]}
Run Code Online (Sandbox Code Playgroud)
传感器s1,s2,s3等都以不同的频率发送(注意s3每2秒发送一次,并且时间表可能出现故障).
我怎样才能实现像 -
Analyzing s1:
s = [[10000001, 22], [10000002, 23],.. [12345678,57]]
s1 had 2 missing entries
Analyzing s2:
s = [[10000001, 32], [10000002, 33],.. [12345678,35]]
s2 had 0 missing entries
Analyzing s3:
s = [[10000001, 42], [10000003, 43],.. [12345678,77]]
s3 had 0 missing entries
Analyzing s4:
s = [[10000001, 12], [10000003, 13],.. [12345678,99]]
s4 had 1 missing entries
Run Code Online (Sandbox Code Playgroud)
sensorlogs.json是16 GB.
可以根据连续UTC时间戳的差异找到缺少的条目.每个传感器以已知频率传输.
由于内存限制,我无法使用多个大型数组进行分析,因此我必须在同一个JSON日志文件上进行多次传递,并且只使用单个大型数组进行分析.
到目前为止我所拥有的是 -
var result = [];
//1. Extract all the keys from the log file
console.log("Extracting keys... \n");
var stream = fs.createReadStream(filePath);
var lineReader = lr.createInterface(
{
input: stream
});
lineReader.on('line', function (line)
{
getKeys(line);//extract all the keys from the JSON
});
stream.on('end', function()
{
//obj -> arr
for(var key in tmpObj)
arrStrm.push(key);
//2. Validate individual sensors
console.log("Validating the sensor data ...\n");
//Synchronous execution of the sensors in the array
async.each(arrStrm, function(key)
{
{
currSensor = key;
console.log("validating " + currSensor + "...\n");
stream = fs.createReadStream(filePath);
lineReader = lr.createInterface(
{
input: stream
});
lineReader.on('line', function (line)
{
processLine(line);//Create the arrays for the sensors
});
stream.on('end', function()
{
processSensor(currSensor);//Process the data for the current sensor
});
}
});
});
function getKeys(line)
{
if(((pos = line.indexOf('[')) >= 0)||((pos = line.indexOf(']')) >= 0))
return;
if (line[line.length-1] == '\r') line=line.substr(0,line.length-1); // discard CR (0x0D)
if (line[line.length-1] == ',') line=line.substr(0,line.length-1); // discard ,
// console.log(line);
if (line.length > 1)
{ // ignore empty lines
var obj = JSON.parse(line); // parse the JSON
for(var key in obj)
{
if(key != "debug")
{
if(tmpObj[key] == undefined)
tmpObj[key]=[];
}
};
}
}
Run Code Online (Sandbox Code Playgroud)
当然这不起作用,我无法在网上找到任何解释如何实现的内容.
注意:我可以选择任何我选择的语言来开发这个工具(C/C++,C#/ Java/Python),但我会使用JavaScript,因为它能够轻松地解析JSON数组(而且我对JS的改进感兴趣)以及).如果JavaScript不是最好的语言制作这样的工具,是否有人喜欢建议使用替代语言?
编辑:一些重要的信息要么不是很清楚,要么我之前没有包含,但看起来重要的是要包含在问题中 -
我修改了我的JSON和预期输出
一种解决方案可能是对JSON文件进行多次传递,一次将一个传感器数据与时间戳存储在一个数组中,然后对数组进行排序,最后分析数据是否存在损坏和间隙.这就是我在上面的代码中尝试做的事情
再次编辑以考虑您的编辑:
var fs = require('fs');
var stream = fs.createReadStream('sensorlogs.json', {flags: 'r', encoding: 'utf-8'});
var buffer = '';
var sensor = process.argv[2];
var readings = [];
var missingCont = 0;
console.log('Analizying ' + sensor + ':');
stream.on('data', function(d) {
buffer += d.toString();
processBuffer();
console.log(readings);
console.log(sensor + ' had ' + missingCont + ' missing entries');
});
function processBuffer() {
buffer = buffer.slice(buffer.indexOf('[{'));
while(buffer.indexOf('{') != -1) {
buffer = buffer.slice(buffer.indexOf('{"'));
processLine(buffer.slice(0, buffer.indexOf('}') + 1));
buffer = buffer.slice(buffer.indexOf('}') + 2);
}
};
function processLine(line) {
if(line != ""){
var obj = JSON.parse(line);
if(!obj[sensor]){
missingCont++;
}else{
var pos;
for(pos = 0; pos < readings.length; pos++){
if(obj.UTCTime < readings[pos][0]){
var reading = [obj.UTCTime, obj[sensor]]
readings.splice(pos, 0, reading);
break;
}
}
if(pos == readings.length){
readings.push([obj.UTCTime, obj[sensor]]);
}
}
}
};
Run Code Online (Sandbox Code Playgroud)
您必须使用要分析的传感器的参数来调用它:
node.exe scripts\processJson.js <param>
Run Code Online (Sandbox Code Playgroud)
为了测试它,我拿了这个样本:
{"arr":[{"UTCTime":10000001,"s1":22,"s2":32,"s3":42,"s4":12},
{"UTCTime":10000005,"s1":20,"s2":30,"s3":40,"s4":10},
{"UTCTime":10000002,"s1":23,"s2":33,"s4":13},
{"UTCTime":10000003,"s1":24,"s2":34,"s3":43,"s4":14},
{"UTCTime":12345678,"s1":57,"s2":35,"s3":77,"s4":99}
]}
Run Code Online (Sandbox Code Playgroud)
输出是:
> node.exe scripts\processJson.js s1
Analizying s1:
[[10000001, 22], [10000002, 23], [10000003, 24], [10000005, 20], [12345678, 57]]
s1 had 0 missing entries
> node.exe scripts\processJson.js s2
Analizying s2:
[[10000001, 32], [10000002, 33], [10000003, 34], [10000005, 30], [12345678, 35]]
s2 had 0 missing entries
> node.exe scripts\processJson.js s3
Analizying s3:
[[10000001, 42], [10000003, 43], [10000005, 40], [12345678, 77]]
s3 had 1 missing entries
> node.exe scripts\processJson.js s4
Analizying s4:
[[10000001, 12], [10000002, 13], [10000003, 14], [10000005, 10], [12345678, 99]]
s4 had 0 missing entries
Run Code Online (Sandbox Code Playgroud)