将 csv 输出写入 StringIO 对象

Clo*_*oud 1 python-3.x

以下代码无法创建 StringIO 对象类型的临时 csv 文件。代码中某处有错误吗?“data_temp”变量不断产生一个空对象。

我使用 StringIO 对象是为了避免在磁盘上创建另一个文件。

from bs4 import BeautifulSoup
from io import StringIO

import csv
import re


# Creates a new csv file to import data to MySQL
def create_csv_file():
    source_html = open(r'C:\\Users\\Admin\\OneDrive\\eCommerce\\Servi-fied\\Raw Data\\EMA - Electricians (Raw).txt', 'r')
    bs_object = BeautifulSoup(source_html, "html.parser")

    data_temp = StringIO()
    csv_file1 = open(r'C:\\Users\\Admin\\OneDrive\\eCommerce\\Servi-fied\\Raw Data\\EMA - Electricians (Processed).csv', 'w+')

    writer1 = csv.writer(data_temp, delimiter='<', skipinitialspace=True)

    table = bs_object.find("table", {"id":"gasOfferSearch"})
    rows = table.findAll("tr")
    # Debugging statement
    print("There are " + (len(rows) - 1).__str__() + " rows.")

    try:
        # Iterates through t   he list, but skips the first record (i.e. the table header)
        counter = 0
        for row in rows[1:]:
            csvRow = []
            for cell in row.findAll(['td','th']):
                # Replace "\n" with a whitespace; replace <br> tags with 5 whitespaces
                line = str(cell).replace('\n', ' ').replace('<br>', '     ')
                # Replace 2 or more spaces with "\n"
                line = re.sub('\s{2,}', '*', line)
                # Converts results to a BeautifulSoup object
                line_bsObj = BeautifulSoup(line, "html.parser")
                # Strips: Removes all tags and trailing and leading whitespaces
                # Replace: Removes all quotation marks
                csvRow.append(line_bsObj.get_text().strip().replace('"',''))

            # Converts the string into a csv file
            writer1.writerow(csvRow)
            print(data_temp.readlines())
            counter += 1

        # Debugging statement
        print("There are " + counter.__str__() + " rows.")
        print(data_temp.readlines())

        # Reads from the temp file and replaces all "<*" with "<"
        csv_file1.write(
            data_temp.read().replace("<*", "<").replace("*\n", "").replace("*", "<", 1)
        )

    finally:
        source_html.close()
        csv_file1.close()

    return None

# Execute the following functions
create_csv_file()
Run Code Online (Sandbox Code Playgroud)

小智 6

您正在写入 StringIO 对象,data_temp然后立即尝试从中读取:

data_temp = StringIO()
writer1 = csv.writer(data_temp, delimiter='<', skipinitialspace=True)
...
writer1.writerow(csvRow)
print(data_temp.readlines())
Run Code Online (Sandbox Code Playgroud)

在那一刻(以及稍后同上),data_temp的“文件”指针位于流的末尾。所以你试图读取当前文件的末尾,导致没有数据。

如果你想以这种方式做事seekdate_temp首先要开始,在阅读之前:

data_temp.seek(0)
result = data_temp.read()
Run Code Online (Sandbox Code Playgroud)

(但是,如果没有彻底深入研究您的代码,我会猜测有另一种方法可以完成您正在做的事情,而无需写入和读取临时对象。)