Coverage for gpaw/utilities/urlcheck.py: 27%

41 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-07-09 00:21 +0000

1"""Check URL's in Python files.""" 

2import re 

3import sys 

4from pathlib import Path 

5from urllib.error import HTTPError, URLError 

6from urllib.request import urlopen, Request 

7 

8OK = {'https://doi.org/%s', 

9 'https://arxiv.org/abs/%s', 

10 'https://gitlab.com/gpaw/gpaw/-/merge_requests/%s', 

11 'https://gitlab.com/gpaw/gpaw/-/issues/%s', 

12 'https://xkcd.com/%s', 

13 'https://gitlab.com/ase/ase.git@master', 

14 'https://gitlab.com/{name}/{name}.git', 

15 'https://cmrdb.fysik.dtu.dk/c2db', 

16 'https://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-*.tar.gz', 

17 'https://wiki.fysik.dtu.dk/gpaw-files', 

18 'https://wiki.fysik.dtu.dk/gpaw-files/', 

19 'https://wiki.fysik.dtu.dk/gpaw-files/things/', 

20 'https://gpaw.readthedocs.io/devel'} 

21 

22USERAGENT = 'Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11' 

23 

24 

25def check(root: Path) -> int: 

26 """Chech URL's in Python files inside root.""" 

27 errors = 0 

28 for path in root.glob('**/*.py'): 

29 for n, line in enumerate(path.read_text().splitlines()): 

30 for url in re.findall(r'https?://\S+', line): 

31 url = url.rstrip(""",.'"}):""") 

32 if url not in OK and 'html/_downloads' not in str(path): 

33 if '(' in url and ')' not in url: 

34 url += ')' 

35 if not check1(path, n, url): 

36 errors += 1 

37 return errors 

38 

39 

40def check1(path: Path, n: int, url: str) -> bool: 

41 try: 

42 req = Request(url, headers={'User-Agent': USERAGENT}) 

43 urlopen(req) 

44 except (HTTPError, URLError, ConnectionResetError) as e: 

45 print(f'{path}:{n + 1}') 

46 print(url) 

47 print(e) 

48 print() 

49 return False 

50 except Exception: 

51 print(f'{path}:{n + 1}') 

52 print(url) 

53 raise 

54 return True 

55 

56 

57def test(): 

58 errors = sum(check(Path(f)) for f in ['gpaw', 'doc']) 

59 assert errors < 10 

60 

61 

62if __name__ == '__main__': 

63 for arg in sys.argv[1:]: 

64 root = Path(arg) 

65 check(root)