From: Magnus Hagander Date: Thu, 24 Oct 2019 13:58:49 +0000 (+0200) Subject: Remove one level of redirects in the CM scraper X-Git-Url: http://git.postgresql.org/gitweb/static/%7B%7Bpgdulink%28?a=commitdiff_plain;h=a68221ea3a0483a96c0ed852a91f5c737c3d3a03;p=pgeu-web.git Remove one level of redirects in the CM scraper Page seems to be removed. This means we can't catch the "please enter more personal data" prompt at this point, but we'll cross that bridge when we get there. --- diff --git a/code/pgeusite/cmutuel/management/commands/cmscrape.py b/code/pgeusite/cmutuel/management/commands/cmscrape.py index 89b235d..54ac1b6 100755 --- a/code/pgeusite/cmutuel/management/commands/cmscrape.py +++ b/code/pgeusite/cmutuel/management/commands/cmscrape.py @@ -102,13 +102,6 @@ class Command(BaseCommand): # Follow a redirect chain to collect more cookies sess.expect_redirect('https://www.creditmutuel.fr/en/banque/pageaccueil.html', 'https://www.creditmutuel.fr/en/banque/paci_engine/engine.aspx') - got_redir = sess.expect_redirect('https://www.creditmutuel.fr/en/banque/paci_engine/engine.aspx', - ['https://www.creditmutuel.fr/en/banque/homepage_dispatcher.cgi', - 'https://www.creditmutuel.fr/en/banque/paci_engine/static_content_manager.aspx']) - if got_redir == 'https://www.creditmutuel.fr/en/banque/paci_engine/static_content_manager.aspx': - # Got the "please fill out your personal data" form. So let's bypass it - sess.expect_redirect('https://www.creditmutuel.fr/en/banque/paci_engine/static_content_manager.aspx?_productfilter=PACI&_pid=ContentManager&_fid=DoStopPaciAndRemind', - 'https://www.creditmutuel.fr/en/banque/homepage_dispatcher.cgi') # Download the form if verbose: