From 7f7bb81dc1c4fd18685dcc29405e42bc0c02894f Mon Sep 17 00:00:00 2001 From: Ryan Miller Date: Tue, 17 Sep 2024 10:15:58 +1000 Subject: [PATCH] chore: add formatting tag validation to generateLocales script --- tools/localization/dynamicVariables.py | 401 ++++++++++++++----------- tools/localization/generateLocales.py | 91 +++++- 2 files changed, 307 insertions(+), 185 deletions(-) diff --git a/tools/localization/dynamicVariables.py b/tools/localization/dynamicVariables.py index 1441588ad..bb6e7dec0 100644 --- a/tools/localization/dynamicVariables.py +++ b/tools/localization/dynamicVariables.py @@ -9,208 +9,245 @@ from util.listUtils import missingFromList from util.logger import console -def extractDynamicVariables(input_string, pattern): - """ - Extracts dynamic variables from the input string. - - Args: - input_string (str): The string to extract dynamic variables from. - - Returns: - list: A list of dynamic variables found in the input string. - """ - matches = re.findall(pattern, input_string) +def extractAllMatches(input_string, pattern): + """ + Extracts regex matches from the input string. + + Args: + input_string (str): The string to extract regex matches from. + + Returns: + list: A list of regex matches found in the input string. + """ + matches = re.findall(pattern, input_string) + if len(matches) > 0: console.debug(f"matches: {matches}") - return matches + return matches def extractOldDynamicVariables(input_string): - """ - Extracts dynamic variables from the input string. + """ + Extracts dynamic variables from the input string. - Args: - input_string (str): The string to extract dynamic variables from. + Args: + input_string (str): The string to extract dynamic variables from. - Returns: - list: A list of dynamic variables found in the input string. - """ - pattern = r"\$(\w+)\$" - matches = re.findall(pattern, input_string) - return matches + Returns: + list: A list of dynamic variables found in the input string. + """ + pattern = r"\$(\w+)\$" + matches = re.findall(pattern, input_string) + return matches def extractVariablesFromDict(input_dict): - """ - Reads through a dictionary of key-value pairs and creates a new dictionary - where the value is just a list of dynamic variables found in the original value. - - Args: - input_dict (dict): The dictionary to extract dynamic variables from. - - Returns: - dict: A dictionary with the same keys as input_dict, but the values are lists of dynamic variables. - """ - output_dict_new = {} - output_dict_old = {} - for key, value in input_dict.items(): - console.debug(f"key: {key}, value: {value}") - output_dict_new[key] = extractDynamicVariables(value, r"\{(\w+)\}") - output_dict_old[key] = extractDynamicVariables(value, r"\$(\w+)\$") - return output_dict_new, output_dict_old - - -def identifyLocaleDyanmicVariableDifferences(locales): - """ - Identifies the differences between each locale's dynamic variables. - - Args: - locales (dict): A dictionary with keys being a locale name and values being a dictionary of locales. - - Returns: - dict: A dictionary with the same keys as locales, but the values are dictionaries of issues. - """ - master_locale = locales["en"] - issues = {} - - for locale_name, locale in locales.items(): - if locale_name == "en": - continue - - locale_issues = { - "missing_keys": [], - "additional_keys": [], - "missing_variables": {}, - "additional_variables": {}, - } - - for key, value in master_locale.items(): - # If a key is missing from the locale, add it to the missing_keys list - if key not in locale: - locale_issues["missing_keys"].append(key) - else: - - locale_value = locale[key] - - # Find the dynamic variables that are missing from the locale. If there are none this will set the value to an empty list. - locale_issues["missing_variables"][key] = missingFromList( - value, locale_value - ) - - # Find the dynamic variables that are additional to the locale. If there are none this will set the value to an empty list. - locale_issues["additional_variables"][key] = missingFromList( - locale_value, value - ) - - for key in locale: - if key not in master_locale: - locale_issues["additional_keys"].append(key) - - # Only add the locale to the issues if there are any issues - if ( - locale_issues["missing_keys"] - or locale_issues["additional_keys"] - or locale_issues["missing_variables"] - or locale_issues["additional_variables"] - ): - - # Remove empty lists from missing_variables - locale_issues["missing_variables"] = { - k: v for k, v in locale_issues["missing_variables"].items() if v - } - - # Remove empty lists from additional_variables - locale_issues["additional_variables"] = { - k: v for k, v in locale_issues["additional_variables"].items() if v - } - - # remove missing_keys if it's empty - if not locale_issues["missing_keys"]: - del locale_issues["missing_keys"] - - # remove additional_keys if it's empty - if not locale_issues["additional_keys"]: - del locale_issues["additional_keys"] - - # Remove missing_variables if it's empty - if not locale_issues["missing_variables"]: - del locale_issues["missing_variables"] - - # Remove additional_variables if it's empty - if not locale_issues["additional_variables"]: - del locale_issues["additional_variables"] - - console.debug_json(f"locale_issues:", locale_issues) - issues[locale_name] = locale_issues - - return issues + """ + Reads through a dictionary of key-value pairs and creates a new dictionary + where the value is just a list of dynamic variables found in the original value. + + Args: + input_dict (dict): The dictionary to extract dynamic variables from. + + Returns: + dict: A dictionary with the same keys as input_dict, but the values are lists of dynamic variables. + """ + output_dict_new = {} + output_dict_old = {} + for key, value in input_dict.items(): + console.debug(f"key: {key}, value: {value}") + output_dict_new[key] = extractAllMatches(value, r"\{(\w+)\}") + output_dict_old[key] = extractAllMatches(value, r"\$(\w+)\$") + return output_dict_new, output_dict_old + + +def extractFormattingTags(input_dict): + """ + Reads through a dictionary of key-value pairs and creates a new dictionary + where the value is just a list of formatting tags found in the original value. + + Args: + input_dict (dict): The dictionary to extract formatting tags from. + + Returns: + dict: A dictionary with the same keys as input_dict, but the values are lists of formatting tags. + """ + output_dict_b_tags = {} + output_dict_br_tags = {} + output_dict_span_tags = {} + for key, value in input_dict.items(): + console.debug(f"key: {key}, value: {value}") + output_dict_b_tags[key] = extractAllMatches(value, r"(.*?)") + output_dict_br_tags[key] = extractAllMatches(value, r"
") + output_dict_span_tags[key] = extractAllMatches(value, r"(.*?)") + return output_dict_b_tags, output_dict_br_tags, output_dict_span_tags + + +def identifyLocaleDyanmicVariableDifferences(locales, locale_b_tags, + locale_br_tags, + locale_span_tags): + """ + Identifies the differences between each locale's dynamic variables. + + Args: + locales (dict): A dictionary with keys being a locale name and values being a dictionary of locales. + + Returns: + dict: A dictionary with the same keys as locales, but the values are dictionaries of issues. + """ + master_locale = locales["en"] + master_locale_b_tags = locale_b_tags["en"] + master_locale_br_tags = locale_br_tags["en"] + master_locale_span_tags = locale_span_tags["en"] + issues = {} + + for locale_name, locale in locales.items(): + current_locale_b_tags = locale_b_tags[locale_name] + current_locale_br_tags = locale_br_tags[locale_name] + current_locale_span_tags = locale_span_tags[locale_name] + if locale_name == "en": + continue + + locale_issues = { + "missing_keys": [], + "additional_keys": [], + "missing_variables": {}, + "additional_variables": {}, + "missing_b_tags": {}, + "missing_br_tags": {}, + "missing_span_tags": {}, + } + + for key, value in master_locale.items(): + # If a key is missing from the locale, add it to the missing_keys list + if key not in locale: + locale_issues["missing_keys"].append(key) + else: + + locale_value = locale[key] + + # Find the dynamic variables that are missing from the locale. If there are none this will set the value to an empty list. + locale_issues["missing_variables"][key] = missingFromList( + value, locale_value + ) + # Find the dynamic variables that are additional to the locale. If there are none this will set the value to an empty list. + locale_issues["additional_variables"][key] = missingFromList( + locale_value, value + ) -def prettyPrintIssuesTable(issues): - """ - Pretty prints a table from the return of identifyLocaleDyanmicVariableDifferences - where the rows are locale name and the columns are the issue types. - Values will be number of occurrences of each issues. + locale_issues["missing_b_tags"][key] = len(master_locale_b_tags[key]) - len(current_locale_b_tags[key]) + locale_issues["missing_br_tags"][key] = len(master_locale_br_tags[key]) - len(current_locale_br_tags[key]) + locale_issues["missing_span_tags"][key] = len(master_locale_span_tags[key]) - len(current_locale_span_tags[key]) - Args: - issues (dict): The issues dictionary returned from identifyLocaleDyanmicVariableDifferences. + for key in locale: + if key not in master_locale: + locale_issues["additional_keys"].append(key) - """ + # Only add the locale to the issues if there are any issues + if ( + locale_issues["missing_keys"] + or locale_issues["additional_keys"] + or locale_issues["missing_variables"] + or locale_issues["additional_variables"] + ): - PADDING = 10 + # Remove empty lists from missing_variables + locale_issues["missing_variables"] = { + k: v for k, v in locale_issues["missing_variables"].items() if v + } - # Print the header key - print( - f"\n{'-'*5*PADDING:<{PADDING}}\n\n" - f"+ Keys: Keys present in the master locale but missing in the locale\n" - f"- Keys: Keys present in the locale but missing in the master locale\n" - f"- Vars: Dynamic variables present in the master locale but missing in the locale\n" - f"+ Vars: Dynamic variables present in the locale but missing in the master locale\n" - ) + # Remove empty lists from additional_variables + locale_issues["additional_variables"] = { + k: v for k, v in locale_issues["additional_variables"].items() if v + } - # Print the header - print( - f"{'Locale':<{PADDING}}{'+ Keys':<{PADDING}}{'- Keys':<{PADDING}}{'- Vars':<{PADDING}}{'+ Vars':<{PADDING}}\n" - f"{'-'*5*PADDING:<{PADDING}}" - ) + # remove missing_keys if it's empty + if not locale_issues["missing_keys"]: + del locale_issues["missing_keys"] - for locale_name, locale_issues in issues.items(): - if locale_name == "en": - continue + # remove additional_keys if it's empty + if not locale_issues["additional_keys"]: + del locale_issues["additional_keys"] - missing_keys = len(locale_issues.get("missing_keys", [])) - additional_keys = len(locale_issues.get("additional_keys", [])) - missing_variables = sum( - len(v) for v in locale_issues.get("missing_variables", {}).values() - ) - additional_variables = sum( - len(v) for v in locale_issues.get("additional_variables", {}).values() - ) + # Remove missing_variables if it's empty + if not locale_issues["missing_variables"]: + del locale_issues["missing_variables"] - print( - f"{locale_name:<{PADDING}}{missing_keys:<{PADDING}}{additional_keys:<{PADDING}}{missing_variables:<{PADDING}}{additional_variables:<{PADDING}}" - ) + # Remove additional_variables if it's empty + if not locale_issues["additional_variables"]: + del locale_issues["additional_variables"] + + console.debug_json(f"locale_issues:", locale_issues) + issues[locale_name] = locale_issues + + return issues + + +def prettyPrintIssuesTable(issues): + """ + Pretty prints a table from the return of identifyLocaleDyanmicVariableDifferences + where the rows are locale name and the columns are the issue types. + Values will be number of occurrences of each issues. + + Args: + issues (dict): The issues dictionary returned from identifyLocaleDyanmicVariableDifferences. + + """ + + PADDING = 10 + + # Print the header key + print( + f"\n{'-' * 5 * PADDING:<{PADDING}}\n\n" + f"+ Keys: Keys present in the master locale but missing in the locale\n" + f"- Keys: Keys present in the locale but missing in the master locale\n" + f"- Vars: Dynamic variables present in the master locale but missing in the locale\n" + f"+ Vars: Dynamic variables present in the locale but missing in the master locale\n" + ) + + # Print the header + print( + f"{'Locale':<{PADDING}}{'+ Keys':<{PADDING}}{'- Keys':<{PADDING}}{'- Vars':<{PADDING}}{'+ Vars':<{PADDING}}\n" + f"{'-' * 5 * PADDING:<{PADDING}}" + ) + + for locale_name, locale_issues in issues.items(): + if locale_name == "en": + continue + + missing_keys = len(locale_issues.get("missing_keys", [])) + additional_keys = len(locale_issues.get("additional_keys", [])) + missing_variables = sum( + len(v) for v in locale_issues.get("missing_variables", {}).values() + ) + additional_variables = sum( + len(v) for v in locale_issues.get("additional_variables", {}).values() + ) + + print( + f"{locale_name:<{PADDING}}{missing_keys:<{PADDING}}{additional_keys:<{PADDING}}{missing_variables:<{PADDING}}{additional_variables:<{PADDING}}" + ) def identifyAndPrintOldDynamicVariables( - localeWithOldVariables, printOldVariables=False + localeWithOldVariables, printOldVariables=False ): - """ - Prints the keys that contain dynamic variables for each locale. - - Args: - localeWithOldVariables (dict): A dictionary with keys being a locale name and values being a dictionary of locales. - """ - found_problems = False - for locale_name, locale in localeWithOldVariables.items(): - invalid_strings = dict() - for key, value in locale.items(): - if value: - invalid_strings[key] = value - found_problems = True - if invalid_strings: - - console.warn( - f"{json.dumps(invalid_strings, indent=2, sort_keys=True) if printOldVariables else ''}" - f"\nLocale {locale_name} contains {len(invalid_strings)} strings with old dynamic variables. (see above)" - ) - return found_problems + """ + Prints the keys that contain dynamic variables for each locale. + + Args: + localeWithOldVariables (dict): A dictionary with keys being a locale name and values being a dictionary of locales. + """ + found_problems = False + for locale_name, locale in localeWithOldVariables.items(): + invalid_strings = dict() + for key, value in locale.items(): + if value: + invalid_strings[key] = value + found_problems = True + if invalid_strings: + console.warn( + f"{json.dumps(invalid_strings, indent=2, sort_keys=True) if printOldVariables else ''}" + f"\nLocale {locale_name} contains {len(invalid_strings)} strings with old dynamic variables. (see above)" + ) + return found_problems diff --git a/tools/localization/generateLocales.py b/tools/localization/generateLocales.py index b7eb52205..c8f4b1628 100755 --- a/tools/localization/generateLocales.py +++ b/tools/localization/generateLocales.py @@ -15,7 +15,7 @@ from dynamicVariables import ( extractVariablesFromDict, identifyLocaleDyanmicVariableDifferences, prettyPrintIssuesTable, - identifyAndPrintOldDynamicVariables, + identifyAndPrintOldDynamicVariables, extractFormattingTags, ) from localization.localeTypes import generateLocalesType from util.logger import console @@ -44,6 +44,11 @@ parser.add_argument( action="store_true", help="Print the problem strings and which locales they are in", ) +parser.add_argument( + "--print-problem-formatting-tag-strings", + action="store_true", + help="Print the problem strings and which locales they are in", +) parser.add_argument( "--write-problems", action="store_true", help="Write the problems to a file" ) @@ -103,7 +108,9 @@ if GENERATE_TYPES: localeVariables = dict() localeVariablesOld = dict() - +locale_b_tags = dict() +locale_br_tags = dict() +locale_span_tags = dict() # Extract the dynamic variables from each locale and store them in a dictionary for locale, data in locales.items(): console.debug(f"Extracting dynamic variables for {locale}") @@ -111,8 +118,15 @@ for locale, data in locales.items(): localeVariables[locale], localeVariablesOld[locale], ) = extractVariablesFromDict(data) + ( + locale_b_tags[locale], + locale_br_tags[locale], + locale_span_tags[locale], + ) = extractFormattingTags(data) -problems = identifyLocaleDyanmicVariableDifferences(localeVariables) +problems = identifyLocaleDyanmicVariableDifferences(localeVariables, locale_b_tags, + locale_br_tags, + locale_span_tags, ) found_old_dynamic_variables = identifyAndPrintOldDynamicVariables( localeVariablesOld, args.print_old_dynamic_variables @@ -138,9 +152,80 @@ if problems: string_to_locales[problem_string] = [locale] else: string_to_locales[problem_string].append(locale) + if "missing_br_tags" in locale_problems: + for problem_string, tag_issues in locale_problems["missing_br_tags"].items(): + if tag_issues > 0: + if problem_string not in string_to_locales: + string_to_locales[problem_string] = [locale] + else: + string_to_locales[problem_string].append(locale) + if "missing_b_tags" in locale_problems: + for problem_string, tag_issues in locale_problems["missing_b_tags"].items(): + if tag_issues > 0: + print("ME", problem_string, tag_issues) + if problem_string not in string_to_locales: + string_to_locales[problem_string] = [locale] + else: + string_to_locales[problem_string].append(locale) + if "missing_span_tags" in locale_problems: + for problem_string, tag_issues in locale_problems["missing_span_tags"].items(): + if tag_issues > 0: + if problem_string not in string_to_locales: + string_to_locales[problem_string] = [locale] + else: + string_to_locales[problem_string].append(locale) + console.info(f"Problem strings: {json.dumps(string_to_locales, indent=2)}") message += " See above for problem strings and which locales they are in." + if args.print_problem_formatting_tag_strings: + locales_to_strings = {} + for locale, locale_problems in problems.items(): + locale_missing_br_tags = set() + locale_missing_b_tags = set() + locale_missing_span_tags = set() + if "missing_br_tags" in locale_problems: + for problem_string, tag_issues in locale_problems["missing_br_tags"].items(): + if tag_issues > 0: + locale_missing_br_tags.add(problem_string) + if "missing_b_tags" in locale_problems: + for problem_string, tag_issues in locale_problems["missing_b_tags"].items(): + if tag_issues > 0: + locale_missing_b_tags.add(problem_string) + if "missing_span_tags" in locale_problems: + for problem_string, tag_issues in locale_problems["missing_span_tags"].items(): + if tag_issues > 0: + locale_missing_span_tags.add(problem_string) + + locales_to_strings[locale] = { + "br": list(locale_missing_br_tags), + "b": list(locale_missing_b_tags), + "span": list(locale_missing_span_tags), + } + + if locales_to_strings[locale]["br"] == []: + del locales_to_strings[locale]["br"] + if locales_to_strings[locale]["b"] == []: + del locales_to_strings[locale]["b"] + if locales_to_strings[locale]["span"] == []: + del locales_to_strings[locale]["span"] + + console.info(f"Problem strings: {json.dumps(locales_to_strings, indent=2)}") + message += " See above for problem strings and which locales they are in." + number_of_problems = 0 + for locale, locale_strings in locales_to_strings.items(): + printed_locale = False + for tag_type, tag_strings in locale_strings.items(): + if tag_strings: + if not printed_locale: + print(f"{locale} - [Link Here](https://crowdin.com/editor/session-crossplatform-strings/300/en-{locale})") + printed_locale = True + for tag_string in tag_strings: + number_of_problems += 1 + print( + f"- [{tag_string}](https://crowdin.com/editor/session-crossplatform-strings/300/en-{locale}?view=comfortable&filter=basic&value=3#q={tag_string})") + print(f"Total Problems: {number_of_problems}") + if args.print_problems: prettyPrintIssuesTable(problems) message += " See above for details."