@ -5,22 +5,22 @@ import csv
import re
import glob
import argparse
import multiprocessing
import json
from functools import partial
# This allows for importing from the localization and util directories NOTE: Auto importing tools will also prepend the import paths with "tools." this will not work and needs to be removed from import paths
sys . path . append ( os . path . abspath ( os . path . join ( os . path . dirname ( __file__ ) , " .. " ) ) )
from util . time import ExecutionTimer
import time
timer = ExecutionTimer ( )
from localization . parseDictionary import parse_dictionary
from localization . regex import localization_regex
from util . listUtils import missingFromSet , removeFromSet
from localization . regex import localization_regex_as_list
from util . fileUtils import makeDirIfNotExists , removeFileIfExists
from util . logger import console
parser = argparse . ArgumentParser ( )
parser . add_argument (
" --debug " , action = " store_true " , help = " Enable debug mode, print debug messages "
@ -32,23 +32,58 @@ parser.add_argument(
help = " Output directory for the results " ,
)
parser . add_argument (
" --master-strings " ,
type = str ,
default = " ./tools/localization/input/master_string_list.txt " ,
help = " Path to the master string list " ,
" --write-found-to-file " ,
action = " store_true " ,
help = " Write the found strings to a file " ,
)
parser . add_argument (
" --to-be-removed " ,
type = str ,
default = " ./tools/localization/input/to_be_removed_list.txt " ,
help = " Path to the list of strings to be removed " ,
" --write-not-found-to-file " ,
action = " store_true " ,
help = " Write the not found strings to a file " ,
)
parser . add_argument (
" --print-not-found " ,
action = " store_true " ,
help = " Print the not found strings " ,
)
parser . add_argument (
" --identify-found-in-files " ,
action = " store_true " ,
help = " Identify line-numbers using regex. " ,
)
parser . add_argument (
" --identify-line-numbers " ,
action = " store_true " ,
help = " Identify line-numbers using regex. " ,
)
parser . add_argument (
" --disable-concurrency " ,
action = " store_true " ,
help = " Disable multiprocessing concurrency. " ,
)
parser . add_argument (
" --find-potential-matches " ,
action = " store_true " ,
help = " Find potential matched strings using very lazy regex. " ,
)
parser . add_argument (
" --delete-unused-keys " ,
action = " store_true " ,
help = " Delete unused keys. "
)
args = parser . parse_args ( )
# Configuration
intentionallyUnusedStrings = [ ]
DEBUG = args . debug
CONCURRENCY_ENABLED = not args . disable_concurrency
if CONCURRENCY_ENABLED and ( args . identify_found_in_files or args . identify_line_numbers ) :
CONCURRENCY_ENABLED = False
console . info ( f " Concurrency is disabled when --identify-found-in-files or --identify-line-numbers is used " )
if CONCURRENCY_ENABLED :
console . info ( f " Concurrency enabled. Use --disable-concurrency to disable concurrency. " )
console . enableDebug ( ) if DEBUG else None
@ -60,9 +95,6 @@ NOT_IN_MASTER_LIST_PATH = os.path.join(OUTPUT_DIR, "not_in_master_list.csv")
EN_PATH = " _locales/en/messages.json "
MASTER_STRINGS_PATH = args . master_strings
TO_BE_REMOVED_PATH = args . to_be_removed
# Remove files that are to be generated if they exist
removeFileIfExists ( FOUND_STRINGS_PATH )
removeFileIfExists ( NOT_FOUND_STRINGS_PATH )
@ -76,247 +108,282 @@ def flush():
# File search setup
console . info ( " Scanning for localized strings... " )
files = [ ]
files_to_ignore = [ " LocalizerKeys.ts " ]
ignore_patterns = [ re . compile ( pattern ) for pattern in files_to_ignore ]
files_to_ignore = [ " ./ts/localization/locales.ts " ]
ignore_patterns = [ re . compile ( re . escape ( pattern ) ) for pattern in files_to_ignore ]
console . debug ( f " Ignoring files: { " , " . join ( files_to_ignore ) } " )
def should_ignore_file ( file_ path) :
return any ( pattern . search ( file_ path) for pattern in ignore_patterns )
def should_ignore_file ( path) :
return any ( pattern . search ( path) for pattern in ignore_patterns )
for extension in ( " *.ts " , " *.tsx " ) :
files . extend (
def find_files_with_extension ( root_dir , extensions ) :
for entry in os . scandir ( root_dir ) :
if entry . is_dir ( ) :
yield from find_files_with_extension ( entry . path , extensions )
elif any ( entry . name . endswith ( ext ) for ext in extensions ) and not should_ignore_file ( entry . path ) :
yield entry . path
os_walk_time_start = time . perf_counter ( )
files = set ( find_files_with_extension ( " ./ts/ " , ( " .ts " , " .tsx " ) ) )
files . update (
[
y
for x in os . walk ( " ./ts/ " )
for y in glob . glob ( os . path . join ( x [ 0 ] , extension ) )
for x in os . listdir( " . /" )
for y in glob . glob ( os . path . join ( x [ 0 ] , " *preload.js " ) )
if not should_ignore_file ( y )
]
)
)
os_walk_time_end = time . perf_counter ( )
bar_length = 50
foundStringsAndLocations = { } # Dictionary to store found strings and their locations
notFoundStrings = set ( ) # Set to store not found strings
total_files = len ( files ) * 1.1
bar_length = 25
PROGRESS_BAR_CURRENT_PERCENTAGE = 0
def progress_bar ( current , total , overallCurrent , overalTotal ) :
def progress_bar ( current , total ) :
global PROGRESS_BAR_CURRENT_PERCENTAGE
if DEBUG :
return
percent = 100.0 * current / total
percentOverall = 100.0 * overallCurrent / overalTotal
percent_overall = round ( 100 * current / total )
if percent_overall < = PROGRESS_BAR_CURRENT_PERCENTAGE :
return
PROGRESS_BAR_CURRENT_PERCENTAGE = percent_overall
sys . stdout . write ( " \r " )
sys . stdout . write (
" Overall : [{ : {} }] {:>3} % " . format (
" = " * int ( percent Overall / ( 100. 0 / bar_length ) ) ,
" Progress : [{ : {} }] {:>3} % " . format (
" = " * int ( percent _overall / ( 10 0 / bar_length ) ) ,
bar_length ,
int ( percentOverall ) ,
)
)
sys . stdout . write (
" Stage: [ { : {} }] {:>3} % " . format (
" = " * int ( percent / ( 100.0 / bar_length ) ) , bar_length , int ( percent )
int ( percent_overall ) ,
)
)
sys . stdout . flush ( )
current_line_number = 0
current_file_number = 0
line_count = 0
keys = [ ]
# Read json file and get all keys
parse_locale_file_time_start = time . perf_counter ( )
with open ( EN_PATH , ' r ' , encoding = ' utf-8 ' ) as messages_file :
key_list = json . load ( messages_file ) . keys ( )
number_of_keys = len ( key_list )
console . info ( f " Loaded { number_of_keys } keys to search for " )
parse_locale_file_time_end = time . perf_counter ( )
with open ( EN_PATH , " r " , encoding = " utf-8 " ) as messages_file :
messages_dict = json . load ( messages_file )
def search_string_in_regex_list ( regex_list , file_content ) :
return any ( matcher . search ( file_content ) for matcher in regex_list )
# Read json file and get all keys
with open ( EN_PATH , " r " , encoding = " utf-8 " ) as messages_file :
for line in messages_file :
for match in re . finditer ( r ' " ([^ " ]+) " : ' , line ) :
keys . append ( match . group ( 1 ) )
total_line_numbers = len ( keys )
console . debug ( f " Total keys: { total_line_numbers } " )
def load_file ( file_path ) :
console . debug ( f " Loading { file_path } into memory " )
return open ( file_path , " r " , encoding = " utf-8 " ) . read ( )
def format_vscode_path ( file_path ) :
return file_path . replace ( " ./ " , " " )
read_files_time_start = time . perf_counter ( )
loaded_files = [ load_file ( file_path ) for file_path in files ]
read_files_time_end = time . perf_counter ( )
# search
for key in keys :
if key in intentionallyUnusedStrings :
continue
def find_key ( key ) :
regex_list = localization_regex_as_list ( key )
return key if any ( search_string_in_regex_list ( regex_list , file_content ) for file_content in loaded_files ) else None
searchedLine = localization_regex ( key )
locations = [ ]
current_file_number = 0 # To keep track of the current file number for progress bar
for file_path in files :
with open ( file_path , " r " , encoding = " utf-8 " ) as file_content :
content = file_content . read ( )
for line_number , line in enumerate ( content . split ( " \n " ) , start = 1 ) :
if searchedLine . search ( line ) :
locations . append ( f " { format_vscode_path ( file_path ) } : { line_number } " )
def process_keys_concurrently ( ) :
with multiprocessing . Pool ( ) as pool :
result = pool . map ( find_key , key_list )
return set ( result )
REGEX_TIME_TRACKER = 0.0
def regex_find ( regex_list , file_content ) :
global REGEX_TIME_TRACKER # Declare the variable as global
regex_start = time . perf_counter ( )
found = search_string_in_regex_list ( regex_list , file_content )
regex_end = time . perf_counter ( )
REGEX_TIME_TRACKER + = ( regex_end - regex_start ) # Correct time calculation
return found
def print_search ( search_key , search_info = " " ) :
console . debug ( f " { search_key : < { 42 } } | { search_info } " )
def process_keys ( ) :
found_strings_and_locations = { } # Dictionary to store found strings and their locations
found_strings_set = set ( ) # Set to store found strings
not_found_strings_set = set ( ) # Set to store not found strings
for_loop_iterations = { }
if DEBUG :
for_loop_iterations [ " keys " ] = 0
for_loop_iterations [ " files " ] = 0
for_loop_iterations [ " lines " ] = 0
for i in range ( number_of_keys ) :
key = key_list [ i ]
regex_list = localization_regex_as_list ( key )
current_file_number + = 1
progress_bar (
current_file_number , total_files , current_line_number , total_line_numbers
i , number_of_key s
)
current_line_number + = 1
if DEBUG :
for_loop_iterations [ " keys " ] + = 1
print_search ( key , f " Searching " )
locations = [ ]
j = - 1
for file_path in files :
j + = 1
if DEBUG :
for_loop_iterations [ " files " ] + = 1
if not regex_find ( regex_list , loaded_files [ j ] ) :
continue
found_strings_set . add ( key )
print_search ( key , f " Found string in { file_path } " )
if args . identify_line_numbers :
for line_number , line in enumerate ( loaded_files [ j ] . split ( " \n " ) , start = 1 ) :
if DEBUG :
for_loop_iterations [ " lines " ] + = 1
if regex_find ( regex_list , line ) :
locations . append ( f " ./ { file_path } : { line_number } " )
if key not in found_strings_set :
not_found_strings_set . add ( key )
print_search ( key , f " Not Found " )
if locations :
console . debug ( f " { key } - Found in { len ( locations ) } " )
foundStringsAndLocations [ key ] = locations
else :
console . debug ( f " { key } - Not Found " )
notFoundStrings . add ( key )
print_search ( key , f " Found in { len ( locations ) } files " )
found_strings_and_locations [ key ] = locations
progress_bar ( 1 , 1 , 1 , 1 )
if DEBUG :
console . debug ( for_loop_iterations )
return found_strings_set , not_found_strings_set , found_strings_and_locations
found_strings_and_locations = None
processing_time_start = time . perf_counter ( )
if CONCURRENCY_ENABLED :
results_set = process_keys_concurrently ( )
found_keys = set ( key_list ) . intersection ( results_set )
not_found_keys = set ( key_list ) . difference ( results_set )
else :
found_keys , not_found_keys , found_strings_and_locations = process_keys ( )
processing_time_end = time . perf_counter ( )
progress_bar ( 1 , 1 )
flush ( )
# Writing found strings and their locations to a CSV file
makeDirIfNotExists ( FOUND_STRINGS_PATH )
with open ( FOUND_STRINGS_PATH , " w " , encoding = " utf-8 " , newline = " " ) as csvfile :
if args . write_found_to_file and found_strings_and_locations is not None :
makeDirIfNotExists ( FOUND_STRINGS_PATH )
with open ( FOUND_STRINGS_PATH , " w " , encoding = " utf-8 " , newline = " " ) as csvfile :
csvwriter = csv . writer ( csvfile )
csvwriter . writerow ( [ " String " , " Phrase " , " Locations " ] ) # Header row
for foundString , locations in foundStringsAndLocations . items ( ) :
csvwriter . writerow ( [ " String " , " Locations" ] ) # Header row
for foundString , locations in found _strings_and_l ocations. items ( ) :
# Write each found string and its locations. Locations are joined into a single string for CSV simplicity
csvwriter . writerow (
[ foundString , messages_dict [ foundString ] , " ; " . join ( locations ) ]
[ foundString , " ; " . join ( locations ) ]
)
# Writing not found strings to a text file as before
makeDirIfNotExists ( NOT_FOUND_STRINGS_PATH )
with open ( NOT_FOUND_STRINGS_PATH , " w " , encoding = " utf-8 " ) as not_found_file :
for notFound in notFoundStrings :
if args . write_not_found_to_file :
makeDirIfNotExists ( NOT_FOUND_STRINGS_PATH )
with open ( NOT_FOUND_STRINGS_PATH , " w " , encoding = " utf-8 " ) as not_found_file :
for notFound in not_found_keys :
not_found_file . write ( f " { notFound } \n " )
num_found = len ( found_keys )
num_not_found = len ( not_found_keys )
sys . stdout . write ( " \n " )
# Print the result statistics and file paths (linkable)
console . info ( f " Found { len ( foundStringsAndLocations ) } strings in { len ( files ) } files " )
console . info ( f " Found strings and their locations written to: { FOUND_STRINGS_PATH } " )
console . info (
f " Identified { len ( notFoundStrings ) } not found strings and written to: { NOT_FOUND_STRINGS_PATH } "
)
if args . print_not_found :
[ print ( key ) for key in not_found_keys ]
# Search for not found strings in any single quotes across all files
console . info ( " Searching for potential matches for not found strings... " )
current_not_found_number = 0
current_file_number = 0
total_not_found_strings = len ( notFoundStrings )
potentialMatches = (
{ }
) # Dictionary to store potential matches: {string: [file1, file2, ...]}
for string in notFoundStrings :
console . debug ( f " Searching for: { string } " )
current_file_number = 0
quotedStringPattern = re . compile (
r " ' {} ' " . format ( string )
) # Pattern to search for 'STRING'
def find_key_lazy ( key ) :
i = - 1
regex = re . compile ( fr " [ ' \" ] { re . escape ( key ) } [ ' \" ] " )
for file_path in files :
with open ( file_path , " r " , encoding = " utf-8 " ) as file_content :
if quotedStringPattern . search ( file_content . read ( ) ) :
console . debug ( f " Potential match found: { string } in { file_path } " )
if string not in potentialMatches :
potentialMatches [ string ] = [ ]
potentialMatches [ string ] . append ( file_path )
current_file_number + = 1
progress_bar (
current_file_number ,
total_files ,
current_not_found_number ,
total_not_found_strings ,
)
current_not_found_number + = 1
i + = 1
if regex . search ( loaded_files [ i ] ) :
return key , file_path
return None , None
# Function to find the line numbers of matches within a specific file
def find_line_numbers ( file_path , pattern ) :
line_numbers = [ ]
with open ( file_path , " r " , encoding = " utf-8 " ) as file :
for i , line in enumerate ( file , start = 1 ) :
if pattern . search ( line ) :
line_numbers . append ( i )
return line_numbers
def find_lazy_matches_for_not_found ( ) :
with multiprocessing . Pool ( ) as pool :
result = pool . map ( find_key_lazy , not_found_keys )
return set ( result )
# Process the found files to add line numbers
for string , files in potentialMatches . items ( ) :
for file_path in files :
quotedStringPattern = re . compile ( r " ' {} ' " . format ( string ) )
line_numbers = find_line_numbers ( file_path , quotedStringPattern )
match_details = [ f " { file_path } : { line } " for line in line_numbers ]
potentialMatches [ string ] = match_details # Update with detailed matches
# Writing potential matches to CSV, now with line numbers
makeDirIfNotExists ( POTENTIAL_MATCHES_PATH )
with open ( POTENTIAL_MATCHES_PATH , " w " , encoding = " utf-8 " , newline = " " ) as csvfile :
csvwriter = csv . writer ( csvfile )
csvwriter . writerow ( [ " String " , " Potential File Matches " ] )
for string , matches in potentialMatches . items ( ) :
csvwriter . writerow ( [ string , " ; " . join ( matches ) ] )
potential_matches = set ( )
if args . find_potential_matches :
potential_matches = find_lazy_matches_for_not_found ( )
potential_matches . discard ( ( None , None ) )
[ console . info ( f " { key : < { 42 } } | Potential Match: { file_name } " ) for key , file_name in potential_matches ]
console . info ( f " Found { len ( potential_matches ) } potential matches " )
sys . stdout . write ( " \n " )
# Print the result statistics and file paths (linkable)
console . info (
f " Potential matches found for { len ( potentialMatches ) } / { len ( notFoundStrings ) } not found strings "
)
console . info ( f " Potential matches written to: { POTENTIAL_MATCHES_PATH } " )
f " Found { num_found } / { number_of_keys } ( { ( num_found / number_of_keys ) : .0% } ) strings in { len ( files ) } files " )
# Identify found strings that are not in the master string list
try :
masterStringList = set ( )
with open ( MASTER_STRINGS_PATH , " r " , encoding = " utf-8 " ) as masterListFile :
for line in masterListFile :
masterStringList . add ( line . strip ( ) )
if args . find_potential_matches and len ( potential_matches ) > 0 :
console . info (
f " (Including all potential matches) Found { num_found + len ( potential_matches ) } / { number_of_keys } ( { ( ( num_found + len ( potential_matches ) ) / number_of_keys ) : .0% } ) strings in { len ( files ) } files " )
notInMasterList = missingFromSet (
set ( foundStringsAndLocations . keys ( ) ) , masterStringList
)
if args . write_found_to_file :
console . info ( f " Found strings and their locations written to: { FOUND_STRINGS_PATH } " )
try :
slatedForRemovalList = set ( )
with open ( TO_BE_REMOVED_PATH , " r " , encoding = " utf-8 " ) as slatedForRemovalFile :
for line in slatedForRemovalFile :
slatedForRemovalList . add ( line . strip ( ) )
notInMasterList = removeFromSet ( notInMasterList , slatedForRemovalList )
except FileNotFoundError :
console . warn (
f " Strings to be removed list not found at: { TO_BE_REMOVED_PATH } . Skipping comparison. "
)
# Output the found strings not in the master list to a CSV file
makeDirIfNotExists ( NOT_IN_MASTER_LIST_PATH )
with open ( NOT_IN_MASTER_LIST_PATH , " w " , encoding = " utf-8 " , newline = " " ) as csvfile :
csvwriter = csv . writer ( csvfile )
csvwriter . writerow ( [ " String " , " Phrase " , " Locations " ] ) # Header row
for notInMaster in notInMasterList :
# Write each found string and its locations. Locations are joined into a single string for CSV simplicity
csvwriter . writerow (
[
notInMaster ,
messages_dict [ notInMaster ] ,
" ; " . join ( foundStringsAndLocations [ notInMaster ] ) ,
]
)
console . info ( f " Found { len ( notInMasterList ) } strings not in the master list " )
if args . write_not_found_to_file :
console . info (
f " Found strings not in the master list written to: { NOT_IN_MASTER_LIST_PATH } "
)
except FileNotFoundError :
console . warn (
f " Master string list not found at: { MASTER_STRINGS_PATH } . Skipping comparison. "
f " Identified { num_not_found } not found strings and written to: { NOT_FOUND_STRINGS_PATH } "
)
else :
console . info ( f " Identified { num_not_found } not found strings " )
if DEBUG and REGEX_TIME_TRACKER > 0 :
console . debug ( f " Time spend in regex land: { REGEX_TIME_TRACKER : 0.4f } seconds " )
if DEBUG :
console . warn (
" This script ran with debug enabled. Please disable debug mode for a cleaner output and faster execution. "
)
os_walk_time = os_walk_time_end - os_walk_time_start
parse_locale_time = parse_locale_file_time_end - parse_locale_file_time_start
read_files_time = read_files_time_end - read_files_time_start
processing_time = processing_time_end - processing_time_start
console . debug ( f " OS Walk reading time: { os_walk_time : 0.4f } seconds " )
console . debug ( f " Locale File parse time: { parse_locale_time : 0.4f } seconds " )
console . debug ( f " File reading time: { read_files_time : 0.4f } seconds " )
console . debug ( f " Processing time: { processing_time : 0.4f } seconds " )
console . debug (
f " Total Elapsed Tracked Time: { os_walk_time + parse_locale_time + read_files_time + processing_time : 0.4f } seconds " )
timer . stop ( )
def remove_keys_from_json ( json_file_path , keys_to_remove ) :
# Load the JSON data from the file
with open ( json_file_path , ' r ' , encoding = ' utf-8 ' ) as json_file :
data = json . load ( json_file )
# Remove the specified keys from the JSON data
data = { key : value for key , value in data . items ( ) if key not in keys_to_remove }
# Write the updated data back to the original JSON file
with open ( json_file_path , ' w ' , encoding = ' utf-8 ' ) as json_file :
json . dump ( data , json_file , ensure_ascii = False , indent = 4 )
print ( f " Keys removed and JSON file updated: { json_file_path } " )
if args . delete_unused_keys :
locale_files = find_files_with_extension ( " ./_locales " , " message.json " )
for locale_file in locale_files :
remove_keys_from_json ( locale_file , not_found_keys )