【发布时间】:2021-01-04 07:40:52
【问题描述】:
我想将 mbox 文件转换为 msg 格式。为此,我已经完成了,但我没有得到正确的格式。我能够读取 mbox 文件,但我不知道如何使用它创建 msg 文件。我已将 mbox 文件转换为 eml 文件,但我想以同样的方式创建 msg 文件,但我不知道如何做到这一点。
下面是mbox转eml的代码
import os
import mailbox
from email import generator
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
count = 0
def emlGenerator(body, thisemail):
global count
msg = MIMEMultipart('alternative')
msg['Subject'] = thisemail['subject']
msg['From'] = thisemail['From']
msg['To'] = thisemail['To']
msg['Cc'] = thisemail['Cc']
msg['Bcc'] = thisemail['Bcc']
msg['Date'] = thisemail['Date']
name = str(count) + '.eml'
count += 1
part = MIMEText(body)
msg.attach(part)
outfile_name = os.path.join('xxxxx/test2', name)
with open(outfile_name, 'w') as outfile:
gen = generator.Generator(outfile)
gen.flatten(msg)
def getcharsets(msg):
charsets = set({})
for c in msg.get_charsets():
if c is not None:
charsets.update([c])
return charsets
def handleerror(errmsg, emailmsg, cs):
print()
print(errmsg)
print("This error occurred while decoding with ", cs, " charset.")
print("These charsets were found in the one email.", getcharsets(emailmsg))
print("This is the subject:", emailmsg['subject'])
print("This is the sender:", emailmsg['From'])
def getbodyfromemail(msg):
body = None
# Walk through the parts of the email to find the text body.
if msg.is_multipart():
for part in msg.walk():
# If part is multipart, walk through the subparts.
if part.is_multipart():
for subpart in part.walk():
if subpart.get_content_type() == 'text/plain':
# Get the subpart payload (i.e the message body)
body = subpart.get_payload(decode=True)
# charset = subpart.get_charset()
# Part isn't multipart so get the email body
elif part.get_content_type() == 'text/plain':
body = part.get_payload(decode=True)
# charset = part.get_charset()
# If this isn't a multi-part message then get the payload (i.e the message body)
elif msg.get_content_type() == 'text/plain':
body = msg.get_payload(decode=True)
# No checking done to match the charset with the correct part.
charsets = set({})
for c in msg.get_charsets():
if c is not None:
charsets.update([c])
for charset in charsets:
try:
body = body.decode(charset)
except:
print("Hit a UnicodeDecodeError or AttributeError. Moving right along.")
return body
if __name__ == "__main__":
for thisemail in mailbox.mbox('xxxxxx/topics.mbox'):
print (thisemail['Message-id'])
body = getbodyfromemail(thisemail)
emlGenerator(body, thisemail)
print("=========== DONE ============")
print("Total ", count, " File")
【问题讨论】:
-
@SimonLi 这没用,你能告诉我们其他选择吗
标签: python python-2.7 outlook win32com