From: Magnus Hagander Date: Fri, 23 Aug 2019 07:51:16 +0000 (+0200) Subject: Parse new HTML which has
inside on cm X-Git-Url: http://git.postgresql.org/gitweb/static/session/%7B%7Bsession.id%7D%7D-%7B%7Bsession.title%7Cslugify%7D%7D?a=commitdiff_plain;h=bbf59e1dd58b6df780aa5659b0806500c807f8e1;p=pgeu-web.git Parse new HTML which has inside on cm --- diff --git a/code/pgeusite/cmutuel/management/commands/cmscrape.py b/code/pgeusite/cmutuel/management/commands/cmscrape.py index f5be767..609abf8 100755 --- a/code/pgeusite/cmutuel/management/commands/cmscrape.py +++ b/code/pgeusite/cmutuel/management/commands/cmscrape.py @@ -31,10 +31,12 @@ class FormHtmlParser(HTMLParser): self.target_url = None def handle_starttag(self, tag, attrs): + if self.target_url: + return if tag == 'form': for k, v in attrs: if k == 'action': - if v.find('telechargement.cgi?'): + if v.find('telechargement.cgi?') >= 0: self.in_form = True self.target_url = v return