使用 Python 将邮箱转为 csv

Apr*_*cot 5 csv gmail parsing python-2.7

我已从我的 Gmail 帐户下载了邮件存档。我正在使用以下来自博客的 python(2.7) 代码将存档的内容转换为 csv。

import mailbox
import csv
writer = csv.writer(open(("clean_mail.csv", "wb"))
for message in mailbox.mbox('archive.mbox'):
    writer.writerow([message['subject'], message['from'], message['date']])
Run Code Online (Sandbox Code Playgroud)

我也想包含邮件正文(实际消息)...但不知道如何。我之前没有使用过python,有人可以帮忙吗?我已经使用了其他给出的选项,但无法通过。

为了完成相同的任务,我也使用了以下代码:但是第 60 行出现缩进错误:return json_msg。我尝试了不同的缩进选项,但没有任何改进。

import sys
import mailbox
import email
import quopri
import json
import time
from BeautifulSoup import BeautifulSoup
from dateutil.parser import parse

MBOX = 'Users/mymachine/client1/Takeout/Mail/archive.mbox'
OUT_FILE = 'Users/mymachine/client1/Takeout/Mail/archive.mbox.json'

def cleanContent(msg):
    msg = quopri.decodestring(msg)
    try:
        soup = BeautifulSoup(msg)
    except:
        return ''
    return ''.join(soup.findAll(text=True))
# There's a lot of data to process, and the Pythonic way to do it is with a
# generator. See http://wiki.python.org/moin/Generators.
# Using a generator requires a trivial encoder to be passed to json for object
# serialization.
class Encoder(json.JSONEncoder):
    def default(self, o): return list(o)

def gen_json_msgs(mb):
    while 1:
        msg = mb.next()
        if msg is None:
            break
            yield jsonifyMessage(msg)

def jsonifyMessage(msg):
    json_msg = {'parts': []}
    for (k, v) in msg.items():
        json_msg[k] = v.decode('utf-8', 'ignore')

    for k in ['To', 'Cc', 'Bcc']:
            if not json_msg.get(k):
                continue
    json_msg[k] = json_msg[k].replace('\n', '').replace('\t', '').replace('\r', '')\
    .replace(' ', '').decode('utf-8', 'ignore').split(',')

for part in msg.walk():
    json_part = {}
    if part.get_content_maintype() == 'multipart':
        continue


    json_part['contentType'] = part.get_content_type()
    content = part.get_payload(decode=False).decode('utf-8', 'ignore')
    json_part['content'] = cleanContent(content)

    json_msg['parts'].append(json_part)
    then = parse(json_msg['Date'])
    millis = int(time.mktime(then.timetuple())*1000 + then.microsecond/1000)
    json_msg['Date'] = {'$date' : millis}

return json_msg

mbox = mailbox.UnixMailbox(open(MBOX, 'rb'), email.message_from_file)

f = open(OUT_FILE, 'w')
for msg in gen_json_msgs(mbox):
    if msg != None:
            f.write(json.dumps(msg, cls=Encoder) + '\n')
f.close()
Run Code Online (Sandbox Code Playgroud)

Rah*_*hul 4

尝试这个。

import mailbox
import csv
writer = csv.writer(open(("clean_mail.csv", "wb"))
for message in mailbox.mbox('archive.mbox'):
    if message.is_multipart():
        content = ''.join(part.get_payload() for part in message.get_payload())
    else:
        content = message.get_payload()
    writer.writerow([message['subject'], message['from'], message['date'],content])
Run Code Online (Sandbox Code Playgroud)

或这个:

import mailbox
import csv

def get_message(message):
    if not message.is_multipart():
        return message.get_payload()
    contents = ""
    for msg in message.get_payload():
        contents = contents + str(msg.get_payload()) + '\n'
    return contents

if __name__ == "__main__":

    writer = csv.writer(open("clean_mail.csv", "wb"))
    for message in mailbox.mbox("archive.mbox"):
        contents = get_message(message)
        writer.writerow([message["subject"], message["from"], message["date"],contents])
Run Code Online (Sandbox Code Playgroud)

在此处查找文档。