数据损坏C++和Python之间的管道

Sem*_*ape 9 c++ python stdin stdout pipe

我正在编写一些代码,它从Python获取二进制数据,将其管理到C++,对数据进行一些处理(在这种情况下计算互信息度量),然后将结果传递回python.测试时我发现一切正常,如果我发送的数据是一组2个阵列,尺寸小于1500 X 1500,但如果我发送2个2K X 2K阵列,我会收到很多损坏的废话.

我目前认为代码的算法部分很好,因为它在测试期间使用小(<= 1500 X1500)数组提供了预期的答案.这让我相信这是stdin或stdout管道的问题.也许我在某处传递了一些内在限制.

Python代码和C++代码如下.

Python代码:

import subprocess
import struct
import sys
import numpy as np

#set up the variables needed 
bytesPerDouble = 8
sizeX = 2000
sizeY = 2000
offset = sizeX*sizeY
totalBytesPerArray = sizeX*sizeY*bytesPerDouble
totalBytes = totalBytesPerArray*2                   #the 2 is because we pass 2 different versions of the 2D array

#setup the testing data array 
a = np.zeros(sizeX*sizeY*2, dtype='d')
for i in range(sizeX):
    for j in range(sizeY):
        a[j+i*sizeY] = i
        a[j+i*sizeY+offset] = i
        if i % 10 == 0:
            a[j+i*sizeY+offset] = j

data = a.tobytes('C')      

strTotalBytes = str(totalBytes)
strLineBytes  = str(sizeY*bytesPerDouble)

#communicate with c++ code
print("starting C++ code")     
command =   "C:\Python27\PythonPipes.exe"
proc = subprocess.Popen([command, strTotalBytes, strLineBytes, str(sizeY), str(sizeX)], stdin=subprocess.PIPE,stderr=subprocess.PIPE,stdout=subprocess.PIPE)

ByteBuffer = (data)
proc.stdin.write(ByteBuffer)

print("Reading results back from C++")
for i in range(sizeX):
    returnvalues = proc.stdout.read(sizeY*bytesPerDouble)
    a = buffer(returnvalues)
    b = struct.unpack_from(str(sizeY)+'d', a)
    print str(b) + " " + str(i)

print('done')
Run Code Online (Sandbox Code Playgroud)

C++代码:主要功能:

int main(int argc, char **argv) {
    int count = 0;
    long totalbytes = stoi(argv[argc-4], nullptr,10);       //bytes being transfered
    long bytechunk = stoi(argv[argc - 3], nullptr, 10); //bytes being transfered at a time
    long height = stoi(argv[argc-2], nullptr, 10);  //bytes being transfered at a time
    long width  = stoi(argv[argc-1], nullptr, 10);  //bytes being transfered at a time
    long offset = totalbytes / sizeof(double) / 2;


    data = new double[totalbytes/sizeof(double)];
    int columnindex = 0;
    //read in data from pipe
    while (count<totalbytes) {

        fread(&(data[columnindex]), 1, bytechunk, stdin);
        columnindex += bytechunk / sizeof(double);
        count += bytechunk;

    }


    //calculate the data transform
    MutualInformation MI = MutualInformation();
    MI.Initialize(data, height, width, offset);
    MI.calcMI();
    count = 0;
    //*
    //write out data to pipe
    columnindex = 0;
    while (count<totalbytes/2) {

        fwrite(&(MI.getOutput()[columnindex]), 1, bytechunk, stdout);
        fflush(stdout);
        count += bytechunk;
        columnindex += bytechunk/sizeof(double);
    }
    //*/
    delete [] data;

    return 0;
}
Run Code Online (Sandbox Code Playgroud)

如果您需要它的实际处理代码:

double MutualInformation::calcMI(){
    double rvalue = 0.0;
    std::map<int, map<int, double>> lHistXY = map<int, map<int, double>>();
    std::map<int, double> lHistX = map<int, double>();
    std::map<int, double> lHistY = map<int, double>();
    typedef std::map<int, std::map<int, double>>::iterator HistXY_iter;
    typedef std::map<int, double>::iterator HistY_iter;

    //calculate Entropys and MI
    double MI = 0.0;
    double Hx = 0.0;
    double Hy = 0.0;
    double Px = 0.0;
    double Py = 0.0;
    double Pxy = 0.0;

    //scan through the image
    int ip = 0;
    int jp = 0;
    int chipsize = 3;

    //setup zero array
    double * zeros = new double[this->mHeight];
    for (int j = 0; j < this->mHeight; j++){
        zeros[j] = 0.0;
    }

    //zero out Output array
    for (int i = 0; i < this->mWidth; i++){
        memcpy(&(this->mOutput[i*this->mHeight]), zeros, this->mHeight*8);
    }


    double index = 0.0;
    for (int ioutter = chipsize; ioutter < (this->mWidth - chipsize); ioutter++){
        //write out processing status
        //index = (double)ioutter;
        //fwrite(&index, 8, 1, stdout);
        //fflush(stdout);
        //*
        for (int j = chipsize; j < (this->mHeight - chipsize); j++){

            //clear the histograms
            lHistX.clear();
            lHistY.clear();
            lHistXY.clear();
            //chip out a section of the image
            for (int k = -chipsize; k <= chipsize; k++){
                for (int l = -chipsize; l <= chipsize; l++){
                    ip = ioutter + k;
                    jp = j + l;
                    //update X histogram
                    if (lHistX.count(int(this->mData[ip*this->mHeight + jp]))){
                        lHistX[int(this->mData[ip*this->mHeight + jp])] += 1.0;
                    }else{
                        lHistX[int(this->mData[ip*this->mHeight + jp])] = 1.0;

                    }
                    //update Y histogram
                    if (lHistY.count(int(this->mData[ip*this->mHeight + jp+this->mOffset]))){
                        lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] += 1.0;
                    }
                    else{
                        lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] = 1.0;

                    }

                    //update X and Y Histogram
                    if (lHistXY.count(int(this->mData[ip*this->mHeight + jp]))){ 
                        //X Key exists check if Y key exists
                        if (lHistXY[int(this->mData[ip*this->mHeight + jp])].count(int(this->mData[ip*this->mHeight + jp + this->mOffset]))){
                            //X & Y keys exist
                            lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] += 1;
                        }else{
                            //X exist but Y doesn't
                            lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
                        }
                    }else{
                        //X Key Didn't exist
                        lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
                    };
                }
            }

            //calculate PMI, Hx, Hy
            // iterator->first = key
            // iterator->second = value

             MI = 0.0;
             Hx = 0.0;
             Hy = 0.0;

            for (HistXY_iter Hist2D_iter = lHistXY.begin(); Hist2D_iter != lHistXY.end(); Hist2D_iter++) {

                Px = lHistX[Hist2D_iter->first] / ((double) this->mOffset);
                Hx -= Px*log(Px);

                for (HistY_iter HistY_iter = Hist2D_iter->second.begin(); HistY_iter != Hist2D_iter->second.end(); HistY_iter++) {
                    Py = lHistY[HistY_iter->first] / ((double) this->mOffset);
                    Hy -= Py*log(Py);
                    Pxy = HistY_iter->second / ((double) this->mOffset);
                    MI += Pxy*log(Pxy / Py / Px);
                }
            }

            //normalize PMI to max(Hx,Hy) so that the PMI value runs from 0 to 1
            if (Hx >= Hy && Hx > 0.0){
                MI /= Hx;
            }else if(Hy > Hx && Hy > 0.0){
                MI /= Hy;
            }
            else{
                MI = 0.0;
            }

            //write PMI to data output array
            if (MI < 1.1){
                this->mOutput[ioutter*this->mHeight + j] = MI;
            }
            else{
                this->mOutput[ioutter*this->mHeight + j] = 0.0;

            }

        }



    }

    return rvalue;
}
Run Code Online (Sandbox Code Playgroud)

使用返回有意义的数组的数组我得到的输出限制在0和1之间,如下所示:

(0.0,0.0,0.0,0.77160627908692593,0.6376472316395495,0.5728801401524277,...

对于2Kx2K或更高的数组,我得到这样的无意义(即使代码将值钳位在0和1之间):

(-2.2491400820412374e + 228,-2.2491400820412374e + 228,-2.2491400820412374e + 228,-2.2491400820412374e + 228,-2.2491400820412374e + 228,...

我想知道为什么这个代码在0.0和1之间分配后会破坏数据集,以及它是否是管道问题,stdin/stdout问题,某种缓冲区问题或编码问题我我根本没有看到.

更新我尝试使用Chris建议的代码以较小的块传递数据,但没有运气.另外值得注意的是,我在stdout上添加了一个关于ferror的捕获并且它永远不会被绊倒所以我很确定这些字节至少使它成为stdout.是否有可能某种东西以某种方式写入stdout?也许在我的程序运行时,一个额外的字节进入stdout?我发现这是值得怀疑的,因为错误在第10个条目中的第4个fwrite读取时始终出现.

Per Craig的请求是完整的C++代码(完整的Python代码已经发布):它位于3个文件中:

main.cpp中

#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <iostream>
#include "./MutualInformation.h"

double * data;
using namespace std;

void
xxwrite(unsigned char *buf, size_t wlen, FILE *fo)
{
    size_t xlen;

    for (; wlen > 0; wlen -= xlen, buf += xlen) {
        xlen = wlen;
        if (xlen > 1024)
            xlen = 1024;
        xlen = fwrite(buf, 1, xlen, fo);
        fflush(fo);
    }
}

int main(int argc, char **argv) {
    int count = 0;
    long totalbytes = stoi(argv[argc-4], nullptr,10);       //bytes being transfered
    long bytechunk = stoi(argv[argc - 3], nullptr, 10); //bytes being transfered at a time
    long height = stoi(argv[argc-2], nullptr, 10);  //bytes being transfered at a time
    long width  = stoi(argv[argc-1], nullptr, 10);  //bytes being transfered at a time
    long offset = totalbytes / sizeof(double) / 2;


    data = new double[totalbytes/sizeof(double)];
    int columnindex = 0;
    //read in data from pipe
    while (count<totalbytes) {

        fread(&(data[columnindex]), 1, bytechunk, stdin);
        columnindex += bytechunk / sizeof(double);
        count += bytechunk;

    }


    //calculate the data transform
    MutualInformation MI = MutualInformation();
    MI.Initialize(data, height, width, offset);
    MI.calcMI();
    count = 0;

    columnindex = 0;
    while (count<totalbytes/2) {

        xxwrite((unsigned char*)&(MI.getOutput()[columnindex]),  bytechunk, stdout);
        count += bytechunk;
        columnindex += bytechunk/sizeof(double);
    }
    delete [] data;

    return 0;
}
Run Code Online (Sandbox Code Playgroud)

MutualInformation.h

#include <map>

using namespace std;

class MutualInformation
{
private:
    double * mData;
    double * mOutput;
    long mHeight;
    long mWidth;
    long mOffset;

public:
    MutualInformation();
    ~MutualInformation();
    bool Initialize(double * data, long Height, long Width, long Offset);
    const double * getOutput();

    double calcMI();

};
Run Code Online (Sandbox Code Playgroud)

MutualInformation.cpp

#include "MutualInformation.h"


MutualInformation::MutualInformation()
{
    this->mData = nullptr;
    this->mOutput = nullptr;
    this->mHeight = 0;
    this->mWidth = 0;

}


MutualInformation::~MutualInformation()
{
    delete[] this->mOutput;
}

bool MutualInformation::Initialize(double * data, long Height, long Width, long Offset){
    bool rvalue = false;
    this->mData = data;
    this->mHeight = Height;
    this->mWidth = Width;
    this->mOffset = Offset;


    //allocate output data
    this->mOutput = new double[this->mHeight*this->mWidth];

    return rvalue;
}

const double * MutualInformation::getOutput(){
    return this->mOutput;
}


double MutualInformation::calcMI(){
    double rvalue = 0.0;
    std::map<int, map<int, double>> lHistXY = map<int, map<int, double>>();
    std::map<int, double> lHistX = map<int, double>();
    std::map<int, double> lHistY = map<int, double>();
    typedef std::map<int, std::map<int, double>>::iterator HistXY_iter;
    typedef std::map<int, double>::iterator HistY_iter;

    //calculate Entropys and MI
    double MI = 0.0;
    double Hx = 0.0;
    double Hy = 0.0;
    double Px = 0.0;
    double Py = 0.0;
    double Pxy = 0.0;

    //scan through the image
    int ip = 0;
    int jp = 0;
    int chipsize = 3;

    //setup zero array
    double * zeros = new double[this->mHeight];
    for (int j = 0; j < this->mHeight; j++){
        zeros[j] = 0.0;
    }

    //zero out Output array
    for (int i = 0; i < this->mWidth; i++){
        memcpy(&(this->mOutput[i*this->mHeight]), zeros, this->mHeight*8);
    }


    double index = 0.0;
    for (int ioutter = chipsize; ioutter < (this->mWidth - chipsize); ioutter++){

        for (int j = chipsize; j < (this->mHeight - chipsize); j++){

            //clear the histograms
            lHistX.clear();
            lHistY.clear();
            lHistXY.clear();
            //chip out a section of the image
            for (int k = -chipsize; k <= chipsize; k++){
                for (int l = -chipsize; l <= chipsize; l++){
                    ip = ioutter + k;
                    jp = j + l;
                    //update X histogram
                    if (lHistX.count(int(this->mData[ip*this->mHeight + jp]))){
                        lHistX[int(this->mData[ip*this->mHeight + jp])] += 1.0;
                    }else{
                        lHistX[int(this->mData[ip*this->mHeight + jp])] = 1.0;

                    }
                    //update Y histogram
                    if (lHistY.count(int(this->mData[ip*this->mHeight + jp+this->mOffset]))){
                        lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] += 1.0;
                    }
                    else{
                        lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] = 1.0;

                    }

                    //update X and Y Histogram
                    if (lHistXY.count(int(this->mData[ip*this->mHeight + jp]))){ 
                        //X Key exists check if Y key exists
                        if (lHistXY[int(this->mData[ip*this->mHeight + jp])].count(int(this->mData[ip*this->mHeight + jp + this->mOffset]))){
                            //X & Y keys exist
                            lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] += 1;
                        }else{
                            //X exist but Y doesn't
                            lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
                        }
                    }else{
                        //X Key Didn't exist
                        lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
                    };
                }
            }

            //calculate PMI, Hx, Hy
            // iterator->first = key
            // iterator->second = value

             MI = 0.0;
             Hx = 0.0;
             Hy = 0.0;

            for (HistXY_iter Hist2D_iter = lHistXY.begin(); Hist2D_iter != lHistXY.end(); Hist2D_iter++) {

                Px = lHistX[Hist2D_iter->first] / ((double) this->mOffset);
                Hx -= Px*log(Px);

                for (HistY_iter HistY_iter = Hist2D_iter->second.begin(); HistY_iter != Hist2D_iter->second.end(); HistY_iter++) {
                    Py = lHistY[HistY_iter->first] / ((double) this->mOffset);
                    Hy -= Py*log(Py);
                    Pxy = HistY_iter->second / ((double) this->mOffset);
                    MI += Pxy*log(Pxy / Py / Px);
                }
            }

            //normalize PMI to max(Hx,Hy) so that the PMI value runs from 0 to 1
            if (Hx >= Hy && Hx > 0.0){
                MI /= Hx;
            }else if(Hy > Hx && Hy > 0.0){
                MI /= Hy;
            }
            else{
                MI = 0.0;
            }

            //write PMI to data output array
            if (MI < 1.1){
                this->mOutput[ioutter*this->mHeight + j] = MI;
            }
            else{
                this->mOutput[ioutter*this->mHeight + j] = 0.0;
                //cout << "problem with output";
            }

        }



    }



    //*/
    return rvalue;
}
Run Code Online (Sandbox Code Playgroud)

由6502解决

6502的答案解决了我的问题.我需要明确告诉Windows使用二进制模式stdin/stdout.要做到这一点,我必须在我的主cpp文件中包含2个新的头文件.

#include <fcntl.h>
#include <io.h>
Run Code Online (Sandbox Code Playgroud)

添加以下代码行(从6502的POSIX版本修改,因为Visual Studio抱怨)到我的主函数的开头

_setmode(_fileno(stdout), O_BINARY);
_setmode(_fileno(stdin), O_BINARY);
Run Code Online (Sandbox Code Playgroud)

然后将这些行添加到我的Python代码中:

import os, msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
Run Code Online (Sandbox Code Playgroud)

650*_*502 6

问题是窗口中的stdin/ stdout在文本模式下打开,而不是在二进制模式下打开,因此在\r发送字符13()时会出现问题.

您可以在Python中设置示例二进制模式

import os, msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
Run Code Online (Sandbox Code Playgroud)

并在C++中

_setmode(fileno(stdout), O_BINARY);
_setmode(fileno(stdin), O_BINARY);
Run Code Online (Sandbox Code Playgroud)

请参阅https://msdn.microsoft.com/en-us/library/tw4k6df8.aspx