Coverage for src / anpr2mqtt / normalizers.py: 100%
49 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-30 16:07 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-30 16:07 +0000
1import re
2from collections.abc import Collection
3from dataclasses import dataclass
5from rapidfuzz.distance import Levenshtein
7DIGIT_TO_ALPHA: dict[str, str] = {"0": "O", "1": "I", "8": "B"}
8ALPHA_TO_DIGIT: dict[str, str] = {v: k for k, v in DIGIT_TO_ALPHA.items()}
11@dataclass
12class RegionRules:
13 target_type: str
14 region: str
15 digit_pos: Collection[int]
16 alpha_pos: Collection[int]
17 valid_re: re.Pattern[str]
19 @property
20 def length(self) -> int:
21 return len(self.digit_pos) + len(self.alpha_pos)
24RULES: dict[str, RegionRules] = {
25 # UK current-format plate: AA99AAA (2001+, ~95% of plates in use, since Sept 2001)
26 # https://assets.publishing.service.gov.uk/media/6694e379fc8e12ac3edafc60/inf104-vehicle-registration-numbers-and-number-plates.pdf
27 "UK_2001": RegionRules("plate", "UK", {2, 3}, {0, 1, 4, 5, 6}, re.compile(r"^[A-Z]{2}[0-9]{2}[A-Z]{3}$")),
28 "ITALY_1999": RegionRules("plate", "IT", {2, 3, 4}, {0, 1, 5, 6}, re.compile(r"^[A-Z]{2}[0-9]{2}[A-Z]{3}$")),
29 "FRANCE_2001": RegionRules("plate", "FR", {2, 3, 4}, {0, 1, 5, 6}, re.compile(r"^[A-Z]{2}[0-9]{2}[A-Z]{3}$")),
30}
33class Normalizer:
34 def __init__(self, target_type: str | None = None, region: str | None = None) -> None:
35 self.target_type: str | None = target_type
36 self.region: str | None = region
37 self.rules: dict[str, RegionRules] = {
38 name: rule for name, rule in RULES.items() if (rule.target_type == self.target_type and rule.region == self.region)
39 }
41 def _digit_swapped(self, plate: str, digit_pos: Collection[int], alpha_pos: Collection[int]) -> str | None:
42 """Return OCR-confusable single-swap variants of a string.
44 Reverses the I/1 and O/0 substitution rules so that a misread which the
45 normalizer cannot fully repair (e.g. '9' seen as 'S') can still be caught
46 by fuzzy matching against a variant that differs by only one edit.
47 """
48 chars = list(plate.upper())
49 swaps: int = 0
50 for i, ch in enumerate(chars):
51 swap: str | None = None
52 if i in digit_pos and ch in ALPHA_TO_DIGIT:
53 swap = ALPHA_TO_DIGIT[ch]
54 elif i in alpha_pos and ch in DIGIT_TO_ALPHA:
55 swap = DIGIT_TO_ALPHA[ch]
56 if swap is not None:
57 chars[i] = swap
58 swaps += 1
59 if swaps > 0:
60 return "".join(chars)
61 return None
63 def normalize(self, target: str) -> str | None:
64 """Return a corrected plate if I/1 or O/0 substitutions (position-aware) yield a valid plate."""
65 plate = target.upper()
66 for rule in self.rules.values():
67 if len(plate) == rule.length and not rule.valid_re.match(plate):
68 # only 1 alternative so far
69 alt = self._digit_swapped(plate, digit_pos=rule.digit_pos, alpha_pos=rule.alpha_pos)
70 if alt:
71 # may not be a valid plate, but partial correction may support a subsequent different style of correction
72 return alt
73 return None
76def fuzzy_match(target_id: str, max_dist: int, candidates: list[str]) -> str | None:
77 """Return the closest key in candidates within max_dist edits, or None."""
78 best: str | None = None
79 best_dist = max_dist + 1
80 for candidate in candidates:
81 d = Levenshtein.distance(target_id, candidate)
82 if d < best_dist:
83 best_dist = d
84 best = candidate
85 return best if best_dist <= max_dist else None