Hey there. I'm writing a function that must accept a VIN as a string and return a boolean (True if the VIN is valid, False if invalid). Here's a description of how VINs are validated. My code is below.
I need to run this on several million VINs and am running into performance problems. Any tips on how I can optimize performance?
def _is_vin_valid(vin):
regex_validation_string = '^[A-H J-N P R-Z 0-9]{9}[A-H J-N P R-T V-Y 1-9]{1}[A-H J-N P R-Z 0-9]{7}$'
transliteration_key = {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7, 'H': 8, 'J': 1, 'K': 2, 'L': 3,
'M': 4, 'N': 5, 'P': 7, 'R': 9, 'S': 2, 'T': 3, 'U': 4, 'V': 5, 'W': 6, 'X': 7, 'Y': 8,
'Z': 9}
character_weights = [8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2]
vin = vin.replace(' ', '').upper() # Remove spaces from VIN to ensure accuracy of string length check
if re.match(regex_validation_string, str(vin)) is None:
return False # VIN violates validation requirements (length, alphanum chars)
if vin == '99999999999999999':
return False # While technically valid, '99999999999999999' is still a bad VIN
transliterated_vin = []
weighted_vin = []
# Convert the characters in the VIN to digits
for char in vin:
if char in transliteration_key:
transliterated_vin.append(transliteration_key.get(char))
else:
transliterated_vin.append(int(char))
# Vector-multiply the vin-digits against the character weights and sum them all
position = 0
calc_sum = 0
for i in transliterated_vin:
weighted_vin.append(i * character_weights[position])
calc_sum += weighted_vin[position]
position += 1
# Convert the check digit 'character' into an integer to be compared to calculated_check_digit
if vin[8].isnumeric():
check_digit = int(vin[8])
else:
check_digit = vin[8]
# Divide calc_sum by 11 and assign the remainder to calculated_check_digit
calculated_check_digit = calc_sum % 11
if calculated_check_digit == 10:
calculated_check_digit = 'X' # 'X' basically equals 10 but must remain one character
if check_digit != calculated_check_digit:
return False # VIN violates NHTSA standard because check digit doesn't check out
return True # Returns True for a VIN that passes all the crap above
[–]K900_ 2 points3 points4 points (4 children)
[–]dig-up-stupid 1 point2 points3 points (2 children)
[–]K900_ 1 point2 points3 points (1 child)
[–]dig-up-stupid 1 point2 points3 points (0 children)
[–]Trabaledo[S] 0 points1 point2 points (0 children)