Coverage for src / anpr2mqtt / normalizers.py: 100%

49 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-30 16:07 +0000

1import re 

2from collections.abc import Collection 

3from dataclasses import dataclass 

4 

5from rapidfuzz.distance import Levenshtein 

6 

7DIGIT_TO_ALPHA: dict[str, str] = {"0": "O", "1": "I", "8": "B"} 

8ALPHA_TO_DIGIT: dict[str, str] = {v: k for k, v in DIGIT_TO_ALPHA.items()} 

9 

10 

11@dataclass 

12class RegionRules: 

13 target_type: str 

14 region: str 

15 digit_pos: Collection[int] 

16 alpha_pos: Collection[int] 

17 valid_re: re.Pattern[str] 

18 

19 @property 

20 def length(self) -> int: 

21 return len(self.digit_pos) + len(self.alpha_pos) 

22 

23 

24RULES: dict[str, RegionRules] = { 

25 # UK current-format plate: AA99AAA (2001+, ~95% of plates in use, since Sept 2001) 

26 # https://assets.publishing.service.gov.uk/media/6694e379fc8e12ac3edafc60/inf104-vehicle-registration-numbers-and-number-plates.pdf 

27 "UK_2001": RegionRules("plate", "UK", {2, 3}, {0, 1, 4, 5, 6}, re.compile(r"^[A-Z]{2}[0-9]{2}[A-Z]{3}$")), 

28 "ITALY_1999": RegionRules("plate", "IT", {2, 3, 4}, {0, 1, 5, 6}, re.compile(r"^[A-Z]{2}[0-9]{2}[A-Z]{3}$")), 

29 "FRANCE_2001": RegionRules("plate", "FR", {2, 3, 4}, {0, 1, 5, 6}, re.compile(r"^[A-Z]{2}[0-9]{2}[A-Z]{3}$")), 

30} 

31 

32 

33class Normalizer: 

34 def __init__(self, target_type: str | None = None, region: str | None = None) -> None: 

35 self.target_type: str | None = target_type 

36 self.region: str | None = region 

37 self.rules: dict[str, RegionRules] = { 

38 name: rule for name, rule in RULES.items() if (rule.target_type == self.target_type and rule.region == self.region) 

39 } 

40 

41 def _digit_swapped(self, plate: str, digit_pos: Collection[int], alpha_pos: Collection[int]) -> str | None: 

42 """Return OCR-confusable single-swap variants of a string. 

43 

44 Reverses the I/1 and O/0 substitution rules so that a misread which the 

45 normalizer cannot fully repair (e.g. '9' seen as 'S') can still be caught 

46 by fuzzy matching against a variant that differs by only one edit. 

47 """ 

48 chars = list(plate.upper()) 

49 swaps: int = 0 

50 for i, ch in enumerate(chars): 

51 swap: str | None = None 

52 if i in digit_pos and ch in ALPHA_TO_DIGIT: 

53 swap = ALPHA_TO_DIGIT[ch] 

54 elif i in alpha_pos and ch in DIGIT_TO_ALPHA: 

55 swap = DIGIT_TO_ALPHA[ch] 

56 if swap is not None: 

57 chars[i] = swap 

58 swaps += 1 

59 if swaps > 0: 

60 return "".join(chars) 

61 return None 

62 

63 def normalize(self, target: str) -> str | None: 

64 """Return a corrected plate if I/1 or O/0 substitutions (position-aware) yield a valid plate.""" 

65 plate = target.upper() 

66 for rule in self.rules.values(): 

67 if len(plate) == rule.length and not rule.valid_re.match(plate): 

68 # only 1 alternative so far 

69 alt = self._digit_swapped(plate, digit_pos=rule.digit_pos, alpha_pos=rule.alpha_pos) 

70 if alt: 

71 # may not be a valid plate, but partial correction may support a subsequent different style of correction 

72 return alt 

73 return None 

74 

75 

76def fuzzy_match(target_id: str, max_dist: int, candidates: list[str]) -> str | None: 

77 """Return the closest key in candidates within max_dist edits, or None.""" 

78 best: str | None = None 

79 best_dist = max_dist + 1 

80 for candidate in candidates: 

81 d = Levenshtein.distance(target_id, candidate) 

82 if d < best_dist: 

83 best_dist = d 

84 best = candidate 

85 return best if best_dist <= max_dist else None