From fa36dc94bbd0bc86bf63458d8f00da04785ac669 Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Thu, 3 Jan 2019 11:14:30 +0100 Subject: [PATCH] Trap internal AssertionError from python libraries For some really broken messages, we end up in a cannot-happen codepath. Trap this one and just consider that MIME part empty, and try again later. In passing, also change it so we continue loading after failures of parsing. We continued in the mode where we just generated diffs, but not when making updates. Now continue in both cases, but of course don't do the actual update if the parsing failed. --- loader/lib/parser.py | 47 ++++++++++++++++++++++++++++++++++----- loader/reparse_message.py | 5 +++-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/loader/lib/parser.py b/loader/lib/parser.py index 8ee25c5..b97c8b3 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -124,7 +124,14 @@ class ArchivesParser(object): return charset def get_payload_as_unicode(self, msg): - b = msg.get_payload(decode=True) + try: + b = msg.get_payload(decode=True) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore it and hope for a better MIME part later. + b = None + if b: # Find out if there is a charset charset = None @@ -303,8 +310,15 @@ class ArchivesParser(object): return # For now, accept anything not text/plain if container.get_content_type() != 'text/plain': - self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + try: + self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return return + # It's a text/plain, it might be worthwhile. # If it has a name, we consider it an attachments if not container.get_params(): @@ -312,19 +326,42 @@ class ArchivesParser(object): for k,v in container.get_params(): if k=='name' and v != '': # Yes, it has a name - self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + try: + self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return + return + # If it's content-disposition=attachment, we also want to save it if 'Content-Disposition' in container and container['Content-Disposition'].startswith('attachment'): - self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + try: + self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return + return + # If we have already found one text/plain part, make all # further text/plain parts attachments if self.attachments_found_first_plaintext: # However, this will also *always* catch the MIME part added # by majordomo with the footer. So if that one is present, # we need to explicitly exclude it again. - b = container.get_payload(decode=True) + try: + b = container.get_payload(decode=True) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return + if isinstance(b, str) and not self._re_footer.match(b): # We know there is no name for this one self.attachments.append((None, container.get_content_type(), b)) diff --git a/loader/reparse_message.py b/loader/reparse_message.py index df4501a..ed4def2 100755 --- a/loader/reparse_message.py +++ b/loader/reparse_message.py @@ -102,8 +102,9 @@ if __name__ == "__main__": ap.analyze(date_override=opt.force_date) except IgnorableException as e: if opt.update: - raise e - f.write("Exception loading %s: %s" % (id, e)) + print("Exception loading {0}: {1}".format(id, e)) + else: + f.write("Exception loading %s: %s" % (id, e)) continue if opt.update: -- 2.39.5