import json import os import re import sys # This allows for importing from the localization and util directories NOTE: Auto importing tools will also prepend the import paths with "tools." this will not work and needs to be removed from import paths sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from util.listUtils import missingFromList from util.logger import console def extractAllMatches(input_string, pattern): """ Extracts regex matches from the input string. Args: input_string (str): The string to extract regex matches from. Returns: list: A list of regex matches found in the input string. """ matches = re.findall(pattern, input_string) if len(matches) > 0: console.debug(f"matches: {matches}") return matches def extractOldDynamicVariables(input_string): """ Extracts dynamic variables from the input string. Args: input_string (str): The string to extract dynamic variables from. Returns: list: A list of dynamic variables found in the input string. """ pattern = r"\$(\w+)\$" matches = re.findall(pattern, input_string) return matches def extractVariablesFromDict(input_dict): """ Reads through a dictionary of key-value pairs and creates a new dictionary where the value is just a list of dynamic variables found in the original value. Args: input_dict (dict): The dictionary to extract dynamic variables from. Returns: dict: A dictionary with the same keys as input_dict, but the values are lists of dynamic variables. """ output_dict_new = {} output_dict_old = {} for key, value in input_dict.items(): console.debug(f"key: {key}, value: {value}") output_dict_new[key] = extractAllMatches(value, r"\{(\w+)\}") output_dict_old[key] = extractAllMatches(value, r"\$(\w+)\$") return output_dict_new, output_dict_old def extractDisallowedTags(input_dict, allowed_tags): """ Reads through a dictionary of key-value pairs and creates a new dictionary where the value is just a list of tags that are not allowed as per the allowed_tags. Args: input_dict (dict): The dictionary to extract tags from. allowed_tags (list): A list of allowed tag names (e.g., ['b', 'br', 'span']). Returns: dict: A dictionary with the same keys as input_dict, but the values are lists of disallowed tags. """ # Compile a regex to match any HTML-like tags tag_pattern = re.compile(r'<(/?)(\w+)[^>]*>') # Create a set of allowed tags for quick lookup allowed_tag_set = set(allowed_tags) output_dict = {} for key, value in input_dict.items(): disallowed_tags = [] for match in tag_pattern.finditer(value): tag_name = match.group(2) if tag_name not in allowed_tag_set: disallowed_tags.append(match.group(0)) output_dict[key] = disallowed_tags return output_dict def findImproperTags(input_dict): """ Reads through a dictionary of key-value pairs and identifies any uses of angled brackets that do not form a proper HTML tag. Args: input_dict (dict): The dictionary to search for improper tags. Returns: dict: A dictionary with the same keys as input_dict, but the values are lists of improper tags. """ # Regular expression to find improper use of angled brackets: # 1. Matches a standalone '<' or '>' not forming a valid tag. # 2. Matches text enclosed in angled brackets that do not form a valid HTML tag. improper_tag_pattern = re.compile(r'<[^>]*>|>') output_dict = {} for key, value in input_dict.items(): # Find all improper tag matches improper_tags = [match for match in improper_tag_pattern.findall(value) if not re.match(r'<\s*/?\s*\w+.*?>', match)] # Store the results in the output dictionary output_dict[key] = improper_tags return output_dict def flagInvalidAngleBrackets(input_dict, allowed_tag_starts): """ Flags an issue if a string contains an angled bracket '<' but that angle bracket is not followed by a 'b' or an 's' (case-insensitive). Args: input_dict (dict): A dictionary where the values are strings to check. Returns: dict: A dictionary where keys are the same as input_dict, and values are lists of issues found in the corresponding string. """ output_dict = {} for key, value in input_dict.items(): issues = [] # Find all occurrences of '<' indices = [m.start() for m in re.finditer('<', value)] for idx in indices: # Look ahead to find the next non-space character after '<' match = re.match(r'\s*([^\s>])', value[idx + 1:]) if match: next_char = match.group(1) if next_char.lower() not in allowed_tag_starts: # Flag an issue snippet = value[idx:idx + 10] # Extract a snippet for context issues.append(f"Invalid tag starting with '<{next_char}' at position {idx}: '{snippet}'") else: # No non-space character after '<', flag an issue issues.append(f"Invalid angle bracket '<' at position {idx}") if issues: output_dict[key] = issues return output_dict def extractFormattingTags(input_dict): """ Reads through a dictionary of key-value pairs and creates a new dictionary where the value is just a list of formatting tags found in the original value. Args: input_dict (dict): The dictionary to extract formatting tags from. Returns: dict: A dictionary with the same keys as input_dict, but the values are lists of formatting tags. """ output_dict_b_tags = {} output_dict_br_tags = {} output_dict_span_tags = {} disallowed_tags = extractDisallowedTags(input_dict, ["b", "br", "span"]) improper_tags = findImproperTags(input_dict) for key, value in input_dict.items(): console.debug(f"key: {key}, value: {value}") output_dict_b_tags[key] = extractAllMatches(value, r"(.*?)") output_dict_br_tags[key] = extractAllMatches(value, r"
") output_dict_span_tags[key] = extractAllMatches(value, r"(.*?)") return output_dict_b_tags, output_dict_br_tags, output_dict_span_tags, disallowed_tags, improper_tags def identifyLocaleDynamicVariableDifferences(locales, locale_b_tags, locale_br_tags, locale_span_tags, locale_disallowed_tags, locale_improper_tags): """ Identifies the differences between each locale's dynamic variables. Args: locales (dict): A dictionary with keys being a locale name and values being a dictionary of locales. Returns: dict: A dictionary with the same keys as locales, but the values are dictionaries of issues. """ master_locale = locales["en"] master_locale_b_tags = locale_b_tags["en"] master_locale_br_tags = locale_br_tags["en"] master_locale_span_tags = locale_span_tags["en"] issues = {} for locale_name, locale in locales.items(): current_locale_b_tags = locale_b_tags[locale_name] current_locale_br_tags = locale_br_tags[locale_name] current_locale_span_tags = locale_span_tags[locale_name] current_locale_disallowed_tags = locale_disallowed_tags[locale_name] current_locale_improper_tags = locale_improper_tags[locale_name] if locale_name == "en": continue locale_issues = { "missing_keys": [], "additional_keys": [], "missing_variables": {}, "additional_variables": {}, "missing_b_tags": {}, "missing_br_tags": {}, "missing_span_tags": {}, "disallowed_tags": {}, "improper_tags": {}, } for key, value in master_locale.items(): # If a key is missing from the locale, add it to the missing_keys list if key not in locale: locale_issues["missing_keys"].append(key) else: locale_value = locale[key] # Find the dynamic variables that are missing from the locale. If there are none this will set the value to an empty list. locale_issues["missing_variables"][key] = missingFromList( value, locale_value ) # Find the dynamic variables that are additional to the locale. If there are none this will set the value to an empty list. locale_issues["additional_variables"][key] = missingFromList( locale_value, value ) locale_issues["missing_b_tags"][key] = len(master_locale_b_tags[key]) - len(current_locale_b_tags[key]) locale_issues["missing_br_tags"][key] = len(master_locale_br_tags[key]) - len(current_locale_br_tags[key]) locale_issues["missing_span_tags"][key] = len(master_locale_span_tags[key]) - len(current_locale_span_tags[key]) locale_issues["disallowed_tags"][key] = len(current_locale_disallowed_tags[key]) locale_issues["improper_tags"][key] = len(current_locale_improper_tags[key]) for key in locale: if key not in master_locale: locale_issues["additional_keys"].append(key) # Only add the locale to the issues if there are any issues if ( locale_issues["missing_keys"] or locale_issues["additional_keys"] or locale_issues["missing_variables"] or locale_issues["additional_variables"] ): # Remove empty lists from missing_variables locale_issues["missing_variables"] = { k: v for k, v in locale_issues["missing_variables"].items() if v } # Remove empty lists from additional_variables locale_issues["additional_variables"] = { k: v for k, v in locale_issues["additional_variables"].items() if v } # remove missing_keys if it's empty if not locale_issues["missing_keys"]: del locale_issues["missing_keys"] # remove additional_keys if it's empty if not locale_issues["additional_keys"]: del locale_issues["additional_keys"] # Remove missing_variables if it's empty if not locale_issues["missing_variables"]: del locale_issues["missing_variables"] # Remove additional_variables if it's empty if not locale_issues["additional_variables"]: del locale_issues["additional_variables"] console.debug_json(f"locale_issues:", locale_issues) issues[locale_name] = locale_issues return issues def prettyPrintIssuesTable(issues): """ Pretty prints a table from the return of identifyLocaleDynamicVariableDifferences where the rows are locale name and the columns are the issue types. Values will be number of occurrences of each issues. Args: issues (dict): The issues dictionary returned from identifyLocaleDynamicVariableDifferences. """ PADDING = 10 # Print the header key print( f"\n{'-' * 5 * PADDING:<{PADDING}}\n\n" f"+ Keys: Keys present in the master locale but missing in the locale\n" f"- Keys: Keys present in the locale but missing in the master locale\n" f"- Vars: Dynamic variables present in the master locale but missing in the locale\n" f"+ Vars: Dynamic variables present in the locale but missing in the master locale\n" ) # Print the header print( f"{'Locale':<{PADDING}}{'+ Keys':<{PADDING}}{'- Keys':<{PADDING}}{'- Vars':<{PADDING}}{'+ Vars':<{PADDING}}\n" f"{'-' * 5 * PADDING:<{PADDING}}" ) for locale_name, locale_issues in issues.items(): if locale_name == "en": continue missing_keys = len(locale_issues.get("missing_keys", [])) additional_keys = len(locale_issues.get("additional_keys", [])) missing_variables = sum( len(v) for v in locale_issues.get("missing_variables", {}).values() ) additional_variables = sum( len(v) for v in locale_issues.get("additional_variables", {}).values() ) print( f"{locale_name:<{PADDING}}{missing_keys:<{PADDING}}{additional_keys:<{PADDING}}{missing_variables:<{PADDING}}{additional_variables:<{PADDING}}" ) def identifyAndPrintOldDynamicVariables( localeWithOldVariables, printOldVariables=False ): """ Prints the keys that contain dynamic variables for each locale. Args: localeWithOldVariables (dict): A dictionary with keys being a locale name and values being a dictionary of locales. """ found_problems = False for locale_name, locale in localeWithOldVariables.items(): invalid_strings = dict() for key, value in locale.items(): if value: invalid_strings[key] = value found_problems = True if invalid_strings: console.warn( f"{json.dumps(invalid_strings, indent=2, sort_keys=True) if printOldVariables else ''}" f"\nLocale {locale_name} contains {len(invalid_strings)} strings with old dynamic variables. (see above)" ) return found_problems