使用 imaplib 在 python 3.7 中读取电子邮件以及电子邮件中的 HTML 正文和附件

1 python email imaplib

如果有人可以帮助我解决这个问题,我将非常感激。

我已经实现了以下代码来读取“gmail 收件箱中的未读电子邮件”。我需要打印“收件人”、“发件人”、“主题”、“正文”和“将附件保存在指定位置”

我这里有两个问题。

  1. 如果有任何带有附件的电子邮件,则会出现错误Body: [<email.message.Message object at 0x026D1050>, <email.message.Message object at 0x02776B70>]。它将打印所有必需的内容并保存附件,但不打印正文。

如果不包含附件,则效果很好。

  1. 如果电子邮件正文中包含任何样式(例如“粗体/斜体/下划线/颜色...等”),则它不会按原样打印。

示例:Python 打印为 Python=C2=A0i=,有时不同的样式用“*”分隔。

def get_body(email_message):
for payload in email_message.get_payload():
     # print('Body:\t', payload.get_payload())
     break
return(payload.get_payload())
def read_email(server,uname,pwd):
    username = uname
    password = pwd
    mail = imaplib.IMAP4_SSL(server)
    mail.login(username, password)
    mail.select("inbox")
    try:
        result, data = mail.uid('search', None, '(UNSEEN)')
        inbox_item_list = data[0].split()
        most_recent = inbox_item_list[-1]
        result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
        raw_email = email_data[0][1].decode("UTF-8")
        email_message = email.message_from_string(raw_email)
        for part in email_message.walk():
            if part.get_content_maintype() == 'multipart':
                continue
            if part.get('Content-Disposition') is None:
                continue
            filename = part.get_filename()
            att_path = os.path.join(location, filename)

            if not os.path.isfile(att_path):
                fp = open(att_path, 'wb')
                fp.write(part.get_payload(decode=True))
                fp.close()
                print('Downloaded file:', filename)
        if email_message.is_multipart():
            for payload in email_message.get_payload():
                print('To:\t\t', email_message['To'])
                print('From:\t',     email_message['From'])
                print('Subject:', email_message['Subject'])
                print('Date:\t',email_message['Date'])
                print('Body:\t', get_body(email_message))
                break        
        else:
            print('Nothing'])               
    except IndexError:
        print("No new email")
while True:
    read_email("imap.gmail.com", "s@gmail.com", "spassword")
time.sleep(10)
Run Code Online (Sandbox Code Playgroud)

非常感谢

小智 6

我是 python 新手,这是我为阅读看不见的电子邮件所做的完整工作代码。您可以根据您的要求打印元素。它适用于 gmail 和 Office 365。该脚本每 10 秒运行一次。通过传递凭据,这也可能适用于其他电子邮件提供商。希望这可以帮助。

import email
import imaplib
import os
import html2text
import time
detach_dir = 'locationWhereYouWantToSaveYourAttachments'


def get_body(email_message):
    for payload in email_message.get_payload():
        break
    return payload.get_payload()
 def two_way_email(server,uname,pwd):
    username = uname
    password = pwd
    mail = imaplib.IMAP4_SSL(server)
    mail.login(username, password)
    mail.select("inbox")
    try:
        result, data = mail.uid('search', None, '(UNSEEN)')
        inbox_item_list = data[0].split()
        most_recent = inbox_item_list[-1]
        result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
        raw_email = email_data[0][1].decode("UTF-8")
        email_message = email.message_from_string(raw_email)
         for part in email_message.walk():
            if part.get_content_maintype() == 'multipart':
                continue
            if part.get('Content-Disposition') is None:
                continue
             filename = part.get_filename()
            att_path = os.path.join(detach_dir, filename)
             if not os.path.isfile(att_path):
                fp = open(att_path, 'wb')
                fp.write(part.get_payload(decode=True))
                fp.close()
                print('Downloaded file:', filename)
        if email_message.is_multipart():
            for payload in email_message.get_payload():
                print('To:\t\t', email_message['To'])
                print('From:\t',     email_message['From'])
                print('Subject:', email_message['Subject'])
                print('Date:\t',email_message['Date'])
                for part in email_message.walk():
                    if (part.get_content_type() == 'text/plain') and (part.get('Content-Disposition') is None):
                        print('Body:\t',part.get_payload())
                break
        else:
            print('To:\t\t', email_message['To'])
            print('From:\t', email_message['From'])
            print('Subject:', email_message['Subject'])
            print('Date:\t', email_message['Date'])
            print('Thread-Index:\t', email_message['Thread-Index'])
            text = f"{email_message.get_payload(decode=True)}"
            html = text.replace("b'", "")
            h = html2text.HTML2Text()
            h.ignore_links = True
            output = (h.handle(f'''{html}''').replace("\\r\\n", ""))
            output = output.replace("'", "")
            print(output)
     except IndexError:
        print("No new email")
while True:
    two_way_email("outlook.office365.com", "yourOffice365EmailAddressHere", "yourpassword")
     two_way_email("imap.gmail.com", "yourGmailAddressHere", "yourPassword")
    time.sleep(10)
Run Code Online (Sandbox Code Playgroud)