# Copyright (C) 2002-2005 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""MIME-stripping filter for Mailman.
This module scans a message for MIME content, removing those sections whose
MIME types match one of a list of matches. multipart/alternative sections are
replaced by the first non-empty component, and multipart/mixed sections
wrapping only single sections after other processing are replaced by their
contents.
"""
import os
import errno
import tempfile
from os.path import splitext
from email.Iterators import typed_subpart_iterator
from Mailman import mm_cfg
from Mailman import Errors
from Mailman.Message import UserNotification
from Mailman.Queue.sbcache import get_switchboard
from Mailman.Logging.Syslog import syslog
from Mailman.Version import VERSION
from Mailman.i18n import _
from Mailman.Utils import oneline
def process(mlist, msg, msgdata):
# Short-circuits
if not mlist.filter_content:
return
if msgdata.get('isdigest'):
return
# We also don't care about our own digests or plaintext
ctype = msg.get_content_type()
mtype = msg.get_content_maintype()
# Check to see if the outer type matches one of the filter types
filtertypes = mlist.filter_mime_types
passtypes = mlist.pass_mime_types
if ctype in filtertypes or mtype in filtertypes:
dispose(mlist, msg, msgdata,
_("The message's content type was explicitly disallowed"))
# Check to see if there is a pass types and the outer type doesn't match
# one of these types
if passtypes and not (ctype in passtypes or mtype in passtypes):
dispose(mlist, msg, msgdata,
_("The message's content type was not explicitly allowed"))
# Filter by file extensions
filterexts = mlist.filter_filename_extensions
passexts = mlist.pass_filename_extensions
fext = get_file_ext(msg)
if fext:
if fext in filterexts:
dispose(mlist, msg, msgdata,
_("The message's file extension was explicitly disallowed"))
if passexts and not (fext in passexts):
dispose(mlist, msg, msgdata,
_("The message's file extension was not explicitly allowed"))
numparts = len([subpart for subpart in msg.walk()])
# If the message is a multipart, filter out matching subparts
if msg.is_multipart():
# Recursively filter out any subparts that match the filter list
prelen = len(msg.get_payload())
filter_parts(msg, filtertypes, passtypes, filterexts, passexts)
# If the outer message is now an empty multipart (and it wasn't
# before!) then, again it gets discarded.
postlen = len(msg.get_payload())
if postlen == 0 and prelen > 0:
dispose(mlist, msg, msgdata,
_("After content filtering, the message was empty"))
# Now replace all multipart/alternatives with just the first non-empty
# alternative. BAW: We have to special case when the outer part is a
# multipart/alternative because we need to retain most of the outer part's
# headers. For now we'll move the subpart's payload into the outer part,
# and then copy over its Content-Type: and Content-Transfer-Encoding:
# headers (any others?).
# TK: Make this configurable from Gui/ContentFilter.py.
if mlist.collapse_alternatives:
collapse_multipart_alternatives(msg)
if ctype == 'multipart/alternative':
firstalt = msg.get_payload(0)
reset_payload(msg, firstalt)
# If we removed some parts, make note of this
changedp = 0
if numparts <> len([subpart for subpart in msg.walk()]):
changedp = 1
# Now perhaps convert all text/html to text/plain
if mlist.convert_html_to_plaintext and mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND:
changedp += to_plaintext(msg)
# If we're left with only two parts, an empty body and one attachment,
# recast the message to one of just that part
if msg.is_multipart() and len(msg.get_payload()) == 2:
if msg.get_payload(0).get_payload() == '':
useful = msg.get_payload(1)
reset_payload(msg, useful)
changedp = 1
if changedp:
msg['X-Content-Filtered-By'] = 'Mailman/MimeDel %s' % VERSION
def reset_payload(msg, subpart):
# Reset payload of msg to contents of subpart, and fix up content headers
payload = subpart.get_payload()
msg.set_payload(payload)
del msg['content-type']
del msg['content-transfer-encoding']
del msg['content-disposition']
del msg['content-description']
msg['Content-Type'] = subpart.get('content-type', 'text/plain')
cte = subpart.get('content-transfer-encoding')
if cte:
msg['Content-Transfer-Encoding'] = cte
cdisp = subpart.get('content-disposition')
if cdisp:
msg['Content-Disposition'] = cdisp
cdesc = subpart.get('content-description')
if cdesc:
msg['Content-Description'] = cdesc
def filter_parts(msg, filtertypes, passtypes, filterexts, passexts):
# Look at all the message's subparts, and recursively filter
if not msg.is_multipart():
return 1
payload = msg.get_payload()
prelen = len(payload)
newpayload = []
for subpart in payload:
keep = filter_parts(subpart, filtertypes, passtypes,
filterexts, passexts)
if not keep:
continue
ctype = subpart.get_content_type()
mtype = subpart.get_content_maintype()
if ctype in filtertypes or mtype in filtertypes:
# Throw this subpart away
continue
if passtypes and not (ctype in passtypes or mtype in passtypes):
# Throw this subpart away
continue
# check file extension
fext = get_file_ext(subpart)
if fext:
if fext in filterexts:
continue
if passexts and not (fext in passexts):
continue
newpayload.append(subpart)
# Check to see if we discarded all the subparts
postlen = len(newpayload)
msg.set_payload(newpayload)
if postlen == 0 and prelen > 0:
# We threw away everything
return 0
return 1
def collapse_multipart_alternatives(msg):
if not msg.is_multipart():
return
newpayload = []
for subpart in msg.get_payload():
if subpart.get_content_type() == 'multipart/alternative':
try:
firstalt = subpart.get_payload(0)
newpayload.append(firstalt)
except IndexError:
pass
else:
newpayload.append(subpart)
msg.set_payload(newpayload)
def to_plaintext(msg):
changedp = 0
for subpart in typed_subpart_iterator(msg, 'text', 'html'):
filename = tempfile.mktemp('.html')
fp = open(filename, 'w')
try:
fp.write(subpart.get_payload())
fp.close()
cmd = os.popen(mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND %
{'filename': filename})
plaintext = cmd.read()
rtn = cmd.close()
if rtn:
syslog('error', 'HTML->text/plain error: %s', rtn)
finally:
try:
os.unlink(filename)
except OSError, e:
if e.errno <> errno.ENOENT: raise
# Now replace the payload of the subpart and twiddle the Content-Type:
subpart.set_payload(plaintext)
subpart.set_type('text/plain')
changedp = 1
return changedp
def dispose(mlist, msg, msgdata, why):
# filter_action == 0 just discards, see below
if mlist.filter_action == 1:
# Bounce the message to the original author
raise Errors.RejectMessage, why
if mlist.filter_action == 2:
# Forward it on to the list owner
listname = mlist.internal_name()
mlist.ForwardMessage(
msg,
text=_("""\
The attached message matched the %(listname)s mailing list's content filtering
rules and was prevented from being forwarded on to the list membership. You
are receiving the only remaining copy of the discarded message.
"""),
subject=_('Content filtered message notification'))
if mlist.filter_action == 3 and \
mm_cfg.OWNERS_CAN_PRESERVE_FILTERED_MESSAGES:
badq = get_switchboard(mm_cfg.BADQUEUE_DIR)
badq.enqueue(msg, msgdata)
# Most cases also discard the message
raise Errors.DiscardMessage
def get_file_ext(m):
"""
Get filename extension. Caution: some virus don't put filename
in 'Content-Disposition' header.
"""
fext = ''
filename = m.get_filename('') or m.get_param('name', '')
if filename:
fext = splitext(oneline(filename,'utf-8'))[1]
if len(fext) > 1:
fext = fext[1:]
else:
fext = ''
return fext