chore: add formatting tag validation to generateLocales script

pull/3207/head
Ryan Miller 7 months ago
parent f196513c01
commit 7f7bb81dc1
No known key found for this signature in database
GPG Key ID: 128C6E0A1246C6B3

@ -9,208 +9,245 @@ from util.listUtils import missingFromList
from util.logger import console from util.logger import console
def extractDynamicVariables(input_string, pattern): def extractAllMatches(input_string, pattern):
""" """
Extracts dynamic variables from the input string. Extracts regex matches from the input string.
Args: Args:
input_string (str): The string to extract dynamic variables from. input_string (str): The string to extract regex matches from.
Returns: Returns:
list: A list of dynamic variables found in the input string. list: A list of regex matches found in the input string.
""" """
matches = re.findall(pattern, input_string) matches = re.findall(pattern, input_string)
if len(matches) > 0:
console.debug(f"matches: {matches}") console.debug(f"matches: {matches}")
return matches return matches
def extractOldDynamicVariables(input_string): def extractOldDynamicVariables(input_string):
""" """
Extracts dynamic variables from the input string. Extracts dynamic variables from the input string.
Args: Args:
input_string (str): The string to extract dynamic variables from. input_string (str): The string to extract dynamic variables from.
Returns: Returns:
list: A list of dynamic variables found in the input string. list: A list of dynamic variables found in the input string.
""" """
pattern = r"\$(\w+)\$" pattern = r"\$(\w+)\$"
matches = re.findall(pattern, input_string) matches = re.findall(pattern, input_string)
return matches return matches
def extractVariablesFromDict(input_dict): def extractVariablesFromDict(input_dict):
""" """
Reads through a dictionary of key-value pairs and creates a new dictionary Reads through a dictionary of key-value pairs and creates a new dictionary
where the value is just a list of dynamic variables found in the original value. where the value is just a list of dynamic variables found in the original value.
Args: Args:
input_dict (dict): The dictionary to extract dynamic variables from. input_dict (dict): The dictionary to extract dynamic variables from.
Returns: Returns:
dict: A dictionary with the same keys as input_dict, but the values are lists of dynamic variables. dict: A dictionary with the same keys as input_dict, but the values are lists of dynamic variables.
""" """
output_dict_new = {} output_dict_new = {}
output_dict_old = {} output_dict_old = {}
for key, value in input_dict.items(): for key, value in input_dict.items():
console.debug(f"key: {key}, value: {value}") console.debug(f"key: {key}, value: {value}")
output_dict_new[key] = extractDynamicVariables(value, r"\{(\w+)\}") output_dict_new[key] = extractAllMatches(value, r"\{(\w+)\}")
output_dict_old[key] = extractDynamicVariables(value, r"\$(\w+)\$") output_dict_old[key] = extractAllMatches(value, r"\$(\w+)\$")
return output_dict_new, output_dict_old return output_dict_new, output_dict_old
def identifyLocaleDyanmicVariableDifferences(locales): def extractFormattingTags(input_dict):
""" """
Identifies the differences between each locale's dynamic variables. Reads through a dictionary of key-value pairs and creates a new dictionary
where the value is just a list of formatting tags found in the original value.
Args:
locales (dict): A dictionary with keys being a locale name and values being a dictionary of locales. Args:
input_dict (dict): The dictionary to extract formatting tags from.
Returns:
dict: A dictionary with the same keys as locales, but the values are dictionaries of issues. Returns:
""" dict: A dictionary with the same keys as input_dict, but the values are lists of formatting tags.
master_locale = locales["en"] """
issues = {} output_dict_b_tags = {}
output_dict_br_tags = {}
for locale_name, locale in locales.items(): output_dict_span_tags = {}
if locale_name == "en": for key, value in input_dict.items():
continue console.debug(f"key: {key}, value: {value}")
output_dict_b_tags[key] = extractAllMatches(value, r"<b>(.*?)</b>")
locale_issues = { output_dict_br_tags[key] = extractAllMatches(value, r"<br/>")
"missing_keys": [], output_dict_span_tags[key] = extractAllMatches(value, r"<span>(.*?)</span>")
"additional_keys": [], return output_dict_b_tags, output_dict_br_tags, output_dict_span_tags
"missing_variables": {},
"additional_variables": {},
} def identifyLocaleDyanmicVariableDifferences(locales, locale_b_tags,
locale_br_tags,
for key, value in master_locale.items(): locale_span_tags):
# If a key is missing from the locale, add it to the missing_keys list """
if key not in locale: Identifies the differences between each locale's dynamic variables.
locale_issues["missing_keys"].append(key)
else: Args:
locales (dict): A dictionary with keys being a locale name and values being a dictionary of locales.
locale_value = locale[key]
Returns:
# Find the dynamic variables that are missing from the locale. If there are none this will set the value to an empty list. dict: A dictionary with the same keys as locales, but the values are dictionaries of issues.
locale_issues["missing_variables"][key] = missingFromList( """
value, locale_value master_locale = locales["en"]
) master_locale_b_tags = locale_b_tags["en"]
master_locale_br_tags = locale_br_tags["en"]
# Find the dynamic variables that are additional to the locale. If there are none this will set the value to an empty list. master_locale_span_tags = locale_span_tags["en"]
locale_issues["additional_variables"][key] = missingFromList( issues = {}
locale_value, value
) for locale_name, locale in locales.items():
current_locale_b_tags = locale_b_tags[locale_name]
for key in locale: current_locale_br_tags = locale_br_tags[locale_name]
if key not in master_locale: current_locale_span_tags = locale_span_tags[locale_name]
locale_issues["additional_keys"].append(key) if locale_name == "en":
continue
# Only add the locale to the issues if there are any issues
if ( locale_issues = {
locale_issues["missing_keys"] "missing_keys": [],
or locale_issues["additional_keys"] "additional_keys": [],
or locale_issues["missing_variables"] "missing_variables": {},
or locale_issues["additional_variables"] "additional_variables": {},
): "missing_b_tags": {},
"missing_br_tags": {},
# Remove empty lists from missing_variables "missing_span_tags": {},
locale_issues["missing_variables"] = { }
k: v for k, v in locale_issues["missing_variables"].items() if v
} for key, value in master_locale.items():
# If a key is missing from the locale, add it to the missing_keys list
# Remove empty lists from additional_variables if key not in locale:
locale_issues["additional_variables"] = { locale_issues["missing_keys"].append(key)
k: v for k, v in locale_issues["additional_variables"].items() if v else:
}
locale_value = locale[key]
# remove missing_keys if it's empty
if not locale_issues["missing_keys"]: # Find the dynamic variables that are missing from the locale. If there are none this will set the value to an empty list.
del locale_issues["missing_keys"] locale_issues["missing_variables"][key] = missingFromList(
value, locale_value
# remove additional_keys if it's empty )
if not locale_issues["additional_keys"]:
del locale_issues["additional_keys"]
# Remove missing_variables if it's empty
if not locale_issues["missing_variables"]:
del locale_issues["missing_variables"]
# Remove additional_variables if it's empty
if not locale_issues["additional_variables"]:
del locale_issues["additional_variables"]
console.debug_json(f"locale_issues:", locale_issues)
issues[locale_name] = locale_issues
return issues
# Find the dynamic variables that are additional to the locale. If there are none this will set the value to an empty list.
locale_issues["additional_variables"][key] = missingFromList(
locale_value, value
)
def prettyPrintIssuesTable(issues): locale_issues["missing_b_tags"][key] = len(master_locale_b_tags[key]) - len(current_locale_b_tags[key])
""" locale_issues["missing_br_tags"][key] = len(master_locale_br_tags[key]) - len(current_locale_br_tags[key])
Pretty prints a table from the return of identifyLocaleDyanmicVariableDifferences locale_issues["missing_span_tags"][key] = len(master_locale_span_tags[key]) - len(current_locale_span_tags[key])
where the rows are locale name and the columns are the issue types.
Values will be number of occurrences of each issues.
Args: for key in locale:
issues (dict): The issues dictionary returned from identifyLocaleDyanmicVariableDifferences. if key not in master_locale:
locale_issues["additional_keys"].append(key)
""" # Only add the locale to the issues if there are any issues
if (
locale_issues["missing_keys"]
or locale_issues["additional_keys"]
or locale_issues["missing_variables"]
or locale_issues["additional_variables"]
):
PADDING = 10 # Remove empty lists from missing_variables
locale_issues["missing_variables"] = {
k: v for k, v in locale_issues["missing_variables"].items() if v
}
# Print the header key # Remove empty lists from additional_variables
print( locale_issues["additional_variables"] = {
f"\n{'-'*5*PADDING:<{PADDING}}\n\n" k: v for k, v in locale_issues["additional_variables"].items() if v
f"+ Keys: Keys present in the master locale but missing in the locale\n" }
f"- Keys: Keys present in the locale but missing in the master locale\n"
f"- Vars: Dynamic variables present in the master locale but missing in the locale\n"
f"+ Vars: Dynamic variables present in the locale but missing in the master locale\n"
)
# Print the header # remove missing_keys if it's empty
print( if not locale_issues["missing_keys"]:
f"{'Locale':<{PADDING}}{'+ Keys':<{PADDING}}{'- Keys':<{PADDING}}{'- Vars':<{PADDING}}{'+ Vars':<{PADDING}}\n" del locale_issues["missing_keys"]
f"{'-'*5*PADDING:<{PADDING}}"
)
for locale_name, locale_issues in issues.items(): # remove additional_keys if it's empty
if locale_name == "en": if not locale_issues["additional_keys"]:
continue del locale_issues["additional_keys"]
missing_keys = len(locale_issues.get("missing_keys", [])) # Remove missing_variables if it's empty
additional_keys = len(locale_issues.get("additional_keys", [])) if not locale_issues["missing_variables"]:
missing_variables = sum( del locale_issues["missing_variables"]
len(v) for v in locale_issues.get("missing_variables", {}).values()
)
additional_variables = sum(
len(v) for v in locale_issues.get("additional_variables", {}).values()
)
print( # Remove additional_variables if it's empty
f"{locale_name:<{PADDING}}{missing_keys:<{PADDING}}{additional_keys:<{PADDING}}{missing_variables:<{PADDING}}{additional_variables:<{PADDING}}" if not locale_issues["additional_variables"]:
) del locale_issues["additional_variables"]
console.debug_json(f"locale_issues:", locale_issues)
issues[locale_name] = locale_issues
return issues
def prettyPrintIssuesTable(issues):
"""
Pretty prints a table from the return of identifyLocaleDyanmicVariableDifferences
where the rows are locale name and the columns are the issue types.
Values will be number of occurrences of each issues.
Args:
issues (dict): The issues dictionary returned from identifyLocaleDyanmicVariableDifferences.
"""
PADDING = 10
# Print the header key
print(
f"\n{'-' * 5 * PADDING:<{PADDING}}\n\n"
f"+ Keys: Keys present in the master locale but missing in the locale\n"
f"- Keys: Keys present in the locale but missing in the master locale\n"
f"- Vars: Dynamic variables present in the master locale but missing in the locale\n"
f"+ Vars: Dynamic variables present in the locale but missing in the master locale\n"
)
# Print the header
print(
f"{'Locale':<{PADDING}}{'+ Keys':<{PADDING}}{'- Keys':<{PADDING}}{'- Vars':<{PADDING}}{'+ Vars':<{PADDING}}\n"
f"{'-' * 5 * PADDING:<{PADDING}}"
)
for locale_name, locale_issues in issues.items():
if locale_name == "en":
continue
missing_keys = len(locale_issues.get("missing_keys", []))
additional_keys = len(locale_issues.get("additional_keys", []))
missing_variables = sum(
len(v) for v in locale_issues.get("missing_variables", {}).values()
)
additional_variables = sum(
len(v) for v in locale_issues.get("additional_variables", {}).values()
)
print(
f"{locale_name:<{PADDING}}{missing_keys:<{PADDING}}{additional_keys:<{PADDING}}{missing_variables:<{PADDING}}{additional_variables:<{PADDING}}"
)
def identifyAndPrintOldDynamicVariables( def identifyAndPrintOldDynamicVariables(
localeWithOldVariables, printOldVariables=False localeWithOldVariables, printOldVariables=False
): ):
""" """
Prints the keys that contain dynamic variables for each locale. Prints the keys that contain dynamic variables for each locale.
Args: Args:
localeWithOldVariables (dict): A dictionary with keys being a locale name and values being a dictionary of locales. localeWithOldVariables (dict): A dictionary with keys being a locale name and values being a dictionary of locales.
""" """
found_problems = False found_problems = False
for locale_name, locale in localeWithOldVariables.items(): for locale_name, locale in localeWithOldVariables.items():
invalid_strings = dict() invalid_strings = dict()
for key, value in locale.items(): for key, value in locale.items():
if value: if value:
invalid_strings[key] = value invalid_strings[key] = value
found_problems = True found_problems = True
if invalid_strings: if invalid_strings:
console.warn(
console.warn( f"{json.dumps(invalid_strings, indent=2, sort_keys=True) if printOldVariables else ''}"
f"{json.dumps(invalid_strings, indent=2, sort_keys=True) if printOldVariables else ''}" f"\nLocale {locale_name} contains {len(invalid_strings)} strings with old dynamic variables. (see above)"
f"\nLocale {locale_name} contains {len(invalid_strings)} strings with old dynamic variables. (see above)" )
) return found_problems
return found_problems

@ -15,7 +15,7 @@ from dynamicVariables import (
extractVariablesFromDict, extractVariablesFromDict,
identifyLocaleDyanmicVariableDifferences, identifyLocaleDyanmicVariableDifferences,
prettyPrintIssuesTable, prettyPrintIssuesTable,
identifyAndPrintOldDynamicVariables, identifyAndPrintOldDynamicVariables, extractFormattingTags,
) )
from localization.localeTypes import generateLocalesType from localization.localeTypes import generateLocalesType
from util.logger import console from util.logger import console
@ -44,6 +44,11 @@ parser.add_argument(
action="store_true", action="store_true",
help="Print the problem strings and which locales they are in", help="Print the problem strings and which locales they are in",
) )
parser.add_argument(
"--print-problem-formatting-tag-strings",
action="store_true",
help="Print the problem strings and which locales they are in",
)
parser.add_argument( parser.add_argument(
"--write-problems", action="store_true", help="Write the problems to a file" "--write-problems", action="store_true", help="Write the problems to a file"
) )
@ -103,7 +108,9 @@ if GENERATE_TYPES:
localeVariables = dict() localeVariables = dict()
localeVariablesOld = dict() localeVariablesOld = dict()
locale_b_tags = dict()
locale_br_tags = dict()
locale_span_tags = dict()
# Extract the dynamic variables from each locale and store them in a dictionary # Extract the dynamic variables from each locale and store them in a dictionary
for locale, data in locales.items(): for locale, data in locales.items():
console.debug(f"Extracting dynamic variables for {locale}") console.debug(f"Extracting dynamic variables for {locale}")
@ -111,8 +118,15 @@ for locale, data in locales.items():
localeVariables[locale], localeVariables[locale],
localeVariablesOld[locale], localeVariablesOld[locale],
) = extractVariablesFromDict(data) ) = extractVariablesFromDict(data)
(
locale_b_tags[locale],
locale_br_tags[locale],
locale_span_tags[locale],
) = extractFormattingTags(data)
problems = identifyLocaleDyanmicVariableDifferences(localeVariables) problems = identifyLocaleDyanmicVariableDifferences(localeVariables, locale_b_tags,
locale_br_tags,
locale_span_tags, )
found_old_dynamic_variables = identifyAndPrintOldDynamicVariables( found_old_dynamic_variables = identifyAndPrintOldDynamicVariables(
localeVariablesOld, args.print_old_dynamic_variables localeVariablesOld, args.print_old_dynamic_variables
@ -138,9 +152,80 @@ if problems:
string_to_locales[problem_string] = [locale] string_to_locales[problem_string] = [locale]
else: else:
string_to_locales[problem_string].append(locale) string_to_locales[problem_string].append(locale)
if "missing_br_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["missing_br_tags"].items():
if tag_issues > 0:
if problem_string not in string_to_locales:
string_to_locales[problem_string] = [locale]
else:
string_to_locales[problem_string].append(locale)
if "missing_b_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["missing_b_tags"].items():
if tag_issues > 0:
print("ME", problem_string, tag_issues)
if problem_string not in string_to_locales:
string_to_locales[problem_string] = [locale]
else:
string_to_locales[problem_string].append(locale)
if "missing_span_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["missing_span_tags"].items():
if tag_issues > 0:
if problem_string not in string_to_locales:
string_to_locales[problem_string] = [locale]
else:
string_to_locales[problem_string].append(locale)
console.info(f"Problem strings: {json.dumps(string_to_locales, indent=2)}") console.info(f"Problem strings: {json.dumps(string_to_locales, indent=2)}")
message += " See above for problem strings and which locales they are in." message += " See above for problem strings and which locales they are in."
if args.print_problem_formatting_tag_strings:
locales_to_strings = {}
for locale, locale_problems in problems.items():
locale_missing_br_tags = set()
locale_missing_b_tags = set()
locale_missing_span_tags = set()
if "missing_br_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["missing_br_tags"].items():
if tag_issues > 0:
locale_missing_br_tags.add(problem_string)
if "missing_b_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["missing_b_tags"].items():
if tag_issues > 0:
locale_missing_b_tags.add(problem_string)
if "missing_span_tags" in locale_problems:
for problem_string, tag_issues in locale_problems["missing_span_tags"].items():
if tag_issues > 0:
locale_missing_span_tags.add(problem_string)
locales_to_strings[locale] = {
"br": list(locale_missing_br_tags),
"b": list(locale_missing_b_tags),
"span": list(locale_missing_span_tags),
}
if locales_to_strings[locale]["br"] == []:
del locales_to_strings[locale]["br"]
if locales_to_strings[locale]["b"] == []:
del locales_to_strings[locale]["b"]
if locales_to_strings[locale]["span"] == []:
del locales_to_strings[locale]["span"]
console.info(f"Problem strings: {json.dumps(locales_to_strings, indent=2)}")
message += " See above for problem strings and which locales they are in."
number_of_problems = 0
for locale, locale_strings in locales_to_strings.items():
printed_locale = False
for tag_type, tag_strings in locale_strings.items():
if tag_strings:
if not printed_locale:
print(f"{locale} - [Link Here](https://crowdin.com/editor/session-crossplatform-strings/300/en-{locale})")
printed_locale = True
for tag_string in tag_strings:
number_of_problems += 1
print(
f"- [{tag_string}](https://crowdin.com/editor/session-crossplatform-strings/300/en-{locale}?view=comfortable&filter=basic&value=3#q={tag_string})")
print(f"Total Problems: {number_of_problems}")
if args.print_problems: if args.print_problems:
prettyPrintIssuesTable(problems) prettyPrintIssuesTable(problems)
message += " See above for details." message += " See above for details."

Loading…
Cancel
Save