diff options
| author | stephanchrst <stephanchrst@gmail.com> | 2022-05-10 21:51:50 +0700 |
|---|---|---|
| committer | stephanchrst <stephanchrst@gmail.com> | 2022-05-10 21:51:50 +0700 |
| commit | 3751379f1e9a4c215fb6eb898b4ccc67659b9ace (patch) | |
| tree | a44932296ef4a9b71d5f010906253d8c53727726 /addons/website/tests/test_crawl.py | |
| parent | 0a15094050bfde69a06d6eff798e9a8ddf2b8c21 (diff) | |
initial commit 2
Diffstat (limited to 'addons/website/tests/test_crawl.py')
| -rw-r--r-- | addons/website/tests/test_crawl.py | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/addons/website/tests/test_crawl.py b/addons/website/tests/test_crawl.py new file mode 100644 index 00000000..d210fd9b --- /dev/null +++ b/addons/website/tests/test_crawl.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# Part of Odoo. See LICENSE file for full copyright and licensing details. + +import logging +import time + +import lxml.html +from werkzeug import urls + +import odoo +import re + +from odoo.addons.base.tests.common import HttpCaseWithUserDemo + +_logger = logging.getLogger(__name__) + + +@odoo.tests.common.tagged('post_install', '-at_install', 'crawl') +class Crawler(HttpCaseWithUserDemo): + """ Test suite crawling an Odoo CMS instance and checking that all + internal links lead to a 200 response. + + If a username and a password are provided, authenticates the user before + starting the crawl + """ + + def setUp(self): + super(Crawler, self).setUp() + + if hasattr(self.env['res.partner'], 'grade_id'): + # Create at least one published parter, so that /partners doesn't + # return a 404 + grade = self.env['res.partner.grade'].create({ + 'name': 'A test grade', + 'website_published': True, + }) + self.env['res.partner'].create({ + 'name': 'A Company for /partners', + 'is_company': True, + 'grade_id': grade.id, + 'website_published': True, + }) + + def crawl(self, url, seen=None, msg=''): + if seen is None: + seen = set() + + url_slug = re.sub(r"[/](([^/=?&]+-)?[0-9]+)([/]|$)", '/<slug>/', url) + url_slug = re.sub(r"([^/=?&]+)=[^/=?&]+", '\g<1>=param', url_slug) + if url_slug in seen: + return seen + else: + seen.add(url_slug) + + _logger.info("%s %s", msg, url) + r = self.url_open(url, allow_redirects=False) + if r.status_code in (301, 302): + # check local redirect to avoid fetch externals pages + new_url = r.headers.get('Location') + current_url = r.url + if urls.url_parse(new_url).netloc != urls.url_parse(current_url).netloc: + return seen + r = self.url_open(new_url) + + code = r.status_code + self.assertIn(code, range(200, 300), "%s Fetching %s returned error response (%d)" % (msg, url, code)) + + if r.headers['Content-Type'].startswith('text/html'): + doc = lxml.html.fromstring(r.content) + for link in doc.xpath('//a[@href]'): + href = link.get('href') + + parts = urls.url_parse(href) + # href with any fragment removed + href = parts.replace(fragment='').to_url() + + # FIXME: handle relative link (not parts.path.startswith /) + if parts.netloc or \ + not parts.path.startswith('/') or \ + parts.path == '/web' or\ + parts.path.startswith('/web/') or \ + parts.path.startswith('/en_US/') or \ + (parts.scheme and parts.scheme not in ('http', 'https')): + continue + + self.crawl(href, seen, msg) + return seen + + def test_10_crawl_public(self): + t0 = time.time() + t0_sql = self.registry.test_cr.sql_log_count + seen = self.crawl('/', msg='Anonymous Coward') + count = len(seen) + duration = time.time() - t0 + sql = self.registry.test_cr.sql_log_count - t0_sql + _logger.runbot("public crawled %s urls in %.2fs %s queries, %.3fs %.2fq per request, ", count, duration, sql, duration / count, float(sql) / count) + + def test_20_crawl_demo(self): + t0 = time.time() + t0_sql = self.registry.test_cr.sql_log_count + self.authenticate('demo', 'demo') + seen = self.crawl('/', msg='demo') + count = len(seen) + duration = time.time() - t0 + sql = self.registry.test_cr.sql_log_count - t0_sql + _logger.runbot("demo crawled %s urls in %.2fs %s queries, %.3fs %.2fq per request", count, duration, sql, duration / count, float(sql) / count) + + def test_30_crawl_admin(self): + t0 = time.time() + t0_sql = self.registry.test_cr.sql_log_count + self.authenticate('admin', 'admin') + seen = self.crawl('/', msg='admin') + count = len(seen) + duration = time.time() - t0 + sql = self.registry.test_cr.sql_log_count - t0_sql + _logger.runbot("admin crawled %s urls in %.2fs %s queries, %.3fs %.2fq per request", count, duration, sql, duration / count, float(sql) / count) |
