chore: add validation flagging for strings with incorrect tag formatting

pull/3207/head
Ryan Miller 7 months ago
parent 2e5c354a4f
commit 4775559cf3
No known key found for this signature in database
GPG Key ID: 128C6E0A1246C6B3

@ -60,6 +60,100 @@ def extractVariablesFromDict(input_dict):
return output_dict_new, output_dict_old
def extractDisallowedTags(input_dict, allowed_tags):
"""
Reads through a dictionary of key-value pairs and creates a new dictionary
where the value is just a list of tags that are not allowed as per the allowed_tags.
Args:
input_dict (dict): The dictionary to extract tags from.
allowed_tags (list): A list of allowed tag names (e.g., ['b', 'br', 'span']).
Returns:
dict: A dictionary with the same keys as input_dict, but the values are lists of disallowed tags.
"""
# Compile a regex to match any HTML-like tags
tag_pattern = re.compile(r'<(/?)(\w+)[^>]*>')
# Create a set of allowed tags for quick lookup
allowed_tag_set = set(allowed_tags)
output_dict = {}
for key, value in input_dict.items():
disallowed_tags = []
for match in tag_pattern.finditer(value):
tag_name = match.group(2)
if tag_name not in allowed_tag_set:
disallowed_tags.append(match.group(0))
output_dict[key] = disallowed_tags
return output_dict
def findImproperTags(input_dict):
"""
Reads through a dictionary of key-value pairs and identifies any uses of angled brackets
that do not form a proper HTML tag.
Args:
input_dict (dict): The dictionary to search for improper tags.
Returns:
dict: A dictionary with the same keys as input_dict, but the values are lists of improper tags.
"""
# Regular expression to find improper use of angled brackets:
# 1. Matches a standalone '<' or '>' not forming a valid tag.
# 2. Matches text enclosed in angled brackets that do not form a valid HTML tag.
improper_tag_pattern = re.compile(r'<[^>]*>|>')
output_dict = {}
for key, value in input_dict.items():
# Find all improper tag matches
improper_tags = [match for match in improper_tag_pattern.findall(value)
if not re.match(r'<\s*/?\s*\w+.*?>', match)]
print(improper_tags)
# Store the results in the output dictionary
output_dict[key] = improper_tags
return output_dict
def flagInvalidAngleBrackets(input_dict, allowed_tag_starts):
"""
Flags an issue if a string contains an angled bracket '<'
but that angle bracket is not followed by a 'b' or an 's' (case-insensitive).
Args:
input_dict (dict): A dictionary where the values are strings to check.
Returns:
dict: A dictionary where keys are the same as input_dict,
and values are lists of issues found in the corresponding string.
"""
output_dict = {}
for key, value in input_dict.items():
issues = []
# Find all occurrences of '<'
indices = [m.start() for m in re.finditer('<', value)]
for idx in indices:
# Look ahead to find the next non-space character after '<'
match = re.match(r'\s*([^\s>])', value[idx + 1:])
if match:
next_char = match.group(1)
if next_char.lower() not in allowed_tag_starts:
# Flag an issue
snippet = value[idx:idx + 10] # Extract a snippet for context
issues.append(f"Invalid tag starting with '<{next_char}' at position {idx}: '{snippet}'")
else:
# No non-space character after '<', flag an issue
issues.append(f"Invalid angle bracket '<' at position {idx}")
if issues:
output_dict[key] = issues
return output_dict
def extractFormattingTags(input_dict):
"""
Reads through a dictionary of key-value pairs and creates a new dictionary
@ -74,17 +168,20 @@ def extractFormattingTags(input_dict):
output_dict_b_tags = {}
output_dict_br_tags = {}
output_dict_span_tags = {}
disallowed_tags = extractDisallowedTags(input_dict, ["b", "br", "span"])
improper_tags = findImproperTags(input_dict)
for key, value in input_dict.items():
console.debug(f"key: {key}, value: {value}")
output_dict_b_tags[key] = extractAllMatches(value, r"<b>(.*?)</b>")
output_dict_br_tags[key] = extractAllMatches(value, r"<br/>")
output_dict_span_tags[key] = extractAllMatches(value, r"<span>(.*?)</span>")
return output_dict_b_tags, output_dict_br_tags, output_dict_span_tags
return output_dict_b_tags, output_dict_br_tags, output_dict_span_tags, disallowed_tags, improper_tags
def identifyLocaleDyanmicVariableDifferences(locales, locale_b_tags,
locale_br_tags,
locale_span_tags):
locale_span_tags, locale_disallowed_tags, locale_improper_tags):
"""
Identifies the differences between each locale's dynamic variables.
@ -104,6 +201,8 @@ def identifyLocaleDyanmicVariableDifferences(locales, locale_b_tags,
current_locale_b_tags = locale_b_tags[locale_name]
current_locale_br_tags = locale_br_tags[locale_name]
current_locale_span_tags = locale_span_tags[locale_name]
current_locale_disallowed_tags = locale_disallowed_tags[locale_name]
current_locale_improper_tags = locale_improper_tags[locale_name]
if locale_name == "en":
continue
@ -115,6 +214,8 @@ def identifyLocaleDyanmicVariableDifferences(locales, locale_b_tags,
"missing_b_tags": {},
"missing_br_tags": {},
"missing_span_tags": {},
"disallowed_tags": {},
"improper_tags": {},
}
for key, value in master_locale.items():
@ -138,6 +239,8 @@ def identifyLocaleDyanmicVariableDifferences(locales, locale_b_tags,
locale_issues["missing_b_tags"][key] = len(master_locale_b_tags[key]) - len(current_locale_b_tags[key])
locale_issues["missing_br_tags"][key] = len(master_locale_br_tags[key]) - len(current_locale_br_tags[key])
locale_issues["missing_span_tags"][key] = len(master_locale_span_tags[key]) - len(current_locale_span_tags[key])
locale_issues["disallowed_tags"][key] = len(current_locale_disallowed_tags[key])
locale_issues["improper_tags"][key] = len(current_locale_improper_tags[key])
for key in locale:
if key not in master_locale:

@ -118,6 +118,8 @@ localeVariablesOld = dict()
locale_b_tags = dict()
locale_br_tags = dict()
locale_span_tags = dict()
locale_disallowed_tags = dict()
locale_improper_tags = dict()
# Extract the dynamic variables from each locale and store them in a dictionary
for locale, data in locales.items():
console.debug(f"Extracting dynamic variables for {locale}")
@ -129,11 +131,13 @@ for locale, data in locales.items():
locale_b_tags[locale],
locale_br_tags[locale],
locale_span_tags[locale],
locale_disallowed_tags[locale],
locale_improper_tags[locale],
) = extractFormattingTags(data)
problems = identifyLocaleDyanmicVariableDifferences(localeVariables, locale_b_tags,
locale_br_tags,
locale_span_tags, )
locale_span_tags, locale_disallowed_tags, locale_improper_tags)
found_old_dynamic_variables = identifyAndPrintOldDynamicVariables(
localeVariablesOld, args.print_old_dynamic_variables
@ -182,8 +186,23 @@ if problems:
string_to_locales[problem_string] = [locale]
else:
string_to_locales[problem_string].append(locale)
if "disallowed_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["disallowed_tags"].items():
print(f"problem string: {problem_string}, tag_issues: {tag_issues}")
if tag_issues > 0:
if problem_string not in string_to_locales:
string_to_locales[problem_string] = [locale]
else:
string_to_locales[problem_string].append(locale)
if "improper_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["improper_tags"].items():
if tag_issues > 0:
if problem_string not in string_to_locales:
string_to_locales[problem_string] = [locale]
else:
string_to_locales[problem_string].append(locale)
console.info(f"Problem strings: {json.dumps(string_to_locales, indent=2)}")
console.debug(f"Problem strings: {json.dumps(string_to_locales, indent=2)}")
message += " See above for problem strings and which locales they are in."
if args.print_problem_formatting_tag_strings:
@ -192,6 +211,8 @@ if problems:
locale_missing_br_tags = set()
locale_missing_b_tags = set()
locale_missing_span_tags = set()
locale_disallowed_tags = set()
locale_improper_tags = set()
if "missing_br_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["missing_br_tags"].items():
if tag_issues > 0:
@ -204,11 +225,21 @@ if problems:
for problem_string, tag_issues in locale_problems["missing_span_tags"].items():
if tag_issues > 0:
locale_missing_span_tags.add(problem_string)
if "disallowed_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["disallowed_tags"].items():
if tag_issues > 0:
locale_disallowed_tags.add(problem_string)
if "improper_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["improper_tags"].items():
if tag_issues > 0:
locale_improper_tags.add(problem_string)
locales_to_strings[locale] = {
"br": list(locale_missing_br_tags),
"b": list(locale_missing_b_tags),
"span": list(locale_missing_span_tags),
"disallowed_tags": list(locale_disallowed_tags),
"improper_tags": list(locale_improper_tags),
}
if locales_to_strings[locale]["br"] == []:
@ -217,11 +248,16 @@ if problems:
del locales_to_strings[locale]["b"]
if locales_to_strings[locale]["span"] == []:
del locales_to_strings[locale]["span"]
if locales_to_strings[locale]["disallowed_tags"] == []:
del locales_to_strings[locale]["disallowed_tags"]
if locales_to_strings[locale]["improper_tags"] == []:
del locales_to_strings[locale]["improper_tags"]
console.info(f"Problem strings: {json.dumps(locales_to_strings, indent=2)}")
message += " See above for problem strings and which locales they are in."
for locale, locale_strings in locales_to_strings.items():
printed_locale = False
printed_problem_strings = set()
for tag_type, tag_strings in locale_strings.items():
if tag_strings:
if locale in ignored_strings_formatting and tag_strings == ignored_strings_formatting[locale]:
@ -230,9 +266,11 @@ if problems:
print(f"{locale} - [Link Here](https://crowdin.com/editor/session-crossplatform-strings/300/en-{locale})")
printed_locale = True
for tag_string in tag_strings:
number_of_tag_problems += 1
print(
f"- [{tag_string}](https://crowdin.com/editor/session-crossplatform-strings/300/en-{locale}?view=comfortable&filter=basic&value=3#q={tag_string})")
if tag_string not in printed_problem_strings:
printed_problem_strings.add(tag_string)
number_of_tag_problems += 1
print(
f"- [{tag_string}](https://crowdin.com/editor/session-crossplatform-strings/300/en-{locale}?view=comfortable&filter=basic&value=3#q={tag_string})")
print(f"Total Problems: {number_of_tag_problems}")
if args.print_problems:

Loading…
Cancel
Save