diff --git a/Scripts/ProcessIP2CountryData.swift b/Scripts/ProcessIP2CountryData.swift index 224d5b27d..accfbaaca 100644 --- a/Scripts/ProcessIP2CountryData.swift +++ b/Scripts/ProcessIP2CountryData.swift @@ -26,7 +26,7 @@ let destinationFileName: String = "GeoLite2-Country-Blocks-IPv4" // Types struct IP2CountryCache { - var countryBlocksIPInt: [Int] = [] + var countryBlocksIPInt: [Int64] = [] var countryBlocksGeonameId: [String] = [] var countryLocationsLocaleCode: [String] = [] @@ -35,11 +35,11 @@ struct IP2CountryCache { } public enum IPv4 { - public static func toInt(_ ip: String) -> Int? { - let octets: [Int] = ip.split(separator: ".").compactMap { Int($0) } + public static func toInt(_ ip: String) -> Int64? { + let octets: [Int64] = ip.split(separator: ".").compactMap { Int64($0) } guard octets.count > 1 else { return nil } - var result: Int = 0 + var result: Int64 = 0 for i in stride(from: 3, through: 0, by: -1) { result += octets[ 3 - i ] << (i * 8) } @@ -67,6 +67,26 @@ class Processor { print("\r\(prefix)[\(bar)] \(Int(progress * 100))%", terminator: "") fflush(stdout) } + + static func parseCsvLine(_ line: String) -> [String] { + var result: [String] = [] + var currentField: String = "" + var inQuotedField: Bool = false + + for char in line { + if char == "," && !inQuotedField { + result.append(currentField) + currentField = "" + } else if char == "\"" { + inQuotedField.toggle() + } else { + currentField.append(char) + } + } + + result.append(currentField) + return result + } static func processFiles() { print("Searching For files") @@ -142,33 +162,22 @@ class Processor { /// Structure of the data should be `network,registered_country_geoname_id` let countryBlockPrefix: String = "Processing country blocks: " - var prevId: String? = nil lines[1...].enumerated().forEach { index, line in guard keepRunning else { return } - let values: [String] = line - .trimmingCharacters(in: .whitespacesAndNewlines) - .components(separatedBy: ",") - - let currId = values[1] + let values: [String] = parseCsvLine(line.trimmingCharacters(in: .whitespacesAndNewlines)) + let progress = (Double(index) / Double(lines.count)) + printProgressBar(prefix: countryBlockPrefix, progress: progress, total: (terminalWidth - 10)) guard values.count == 2, let ipNoSubnetMask: String = values[0].components(separatedBy: "/").first, - let ipAsInt: Int = IPv4.toInt(ipNoSubnetMask) + let ipAsInt: Int64 = IPv4.toInt(ipNoSubnetMask), + cache.countryBlocksGeonameId.last != values[1] else { return } - if prevId == currId { - cache.countryBlocksIPInt[cache.countryBlocksIPInt.count - 1] = ipAsInt - } else { - cache.countryBlocksIPInt.append(ipAsInt) - cache.countryBlocksGeonameId.append(currId) - } - - prevId = currId - - let progress = (Double(index) / Double(lines.count)) - printProgressBar(prefix: countryBlockPrefix, progress: progress, total: (terminalWidth - 10)) + cache.countryBlocksIPInt.append(ipAsInt) + cache.countryBlocksGeonameId.append(values[1]) } guard keepRunning else { return } print("\r\u{1B}[2KProcessing country blocks completed ✅") @@ -187,9 +196,7 @@ class Processor { guard lines.count > 1 else { fatalError("Localised country file had no content") } lines[1...].enumerated().forEach { index, line in - let values: [String] = line - .trimmingCharacters(in: .whitespacesAndNewlines) - .components(separatedBy: ",") + let values: [String] = parseCsvLine(line.trimmingCharacters(in: .whitespacesAndNewlines)) guard values.count == 7 else { return } cache.countryLocationsLocaleCode.append(values[1]) @@ -207,7 +214,7 @@ class Processor { var outputData: Data = Data() var ipCount = Int32(cache.countryBlocksIPInt.count) outputData.append(Data(bytes: &ipCount, count: MemoryLayout.size)) - outputData.append(Data(bytes: cache.countryBlocksIPInt, count: cache.countryBlocksIPInt.count * MemoryLayout.size)) + outputData.append(Data(bytes: cache.countryBlocksIPInt, count: cache.countryBlocksIPInt.count * MemoryLayout.size)) let geonameIdData: Data = cache.countryBlocksGeonameId.joined(separator: "\0\0").data(using: .utf8)! var geonameIdCount = Int32(geonameIdData.count) diff --git a/Session/Meta/Countries/GeoLite2-Country-Blocks-IPv4 b/Session/Meta/Countries/GeoLite2-Country-Blocks-IPv4 index ed55fb131..a6ad8d122 100644 Binary files a/Session/Meta/Countries/GeoLite2-Country-Blocks-IPv4 and b/Session/Meta/Countries/GeoLite2-Country-Blocks-IPv4 differ diff --git a/Session/Utilities/IP2Country.swift b/Session/Utilities/IP2Country.swift index 1630b54fa..59fdf72d5 100644 --- a/Session/Utilities/IP2Country.swift +++ b/Session/Utilities/IP2Country.swift @@ -8,6 +8,10 @@ import GRDB import SessionSnodeKit import SessionUtilitiesKit +private extension Log.Category { + static var ip2Country: Log.Category = "IP2Country" +} + public enum IP2Country { public static var isInitialized: Atomic = Atomic(false) private static var countryNamesCache: Atomic<[String: String]> = Atomic([:]) @@ -46,7 +50,7 @@ public enum IP2Country { /// (or `en` as default), then find the `geonameId` index from `countryLocationsGeonameId` using the same range, and that index /// should be retrieved from `countryLocationsCountryName` in order to get the country name struct IP2CountryCache { - var countryBlocksIPInt: [Int] = [] + var countryBlocksIPInt: [Int64] = [] var countryBlocksGeonameId: [String] = [] var countryLocationsLocaleCode: [String] = [] @@ -70,7 +74,14 @@ public enum IP2Country { var remainingData: Data = data.advanced(by: MemoryLayout.size) /// Extract the IPs - var countryBlockIpInts: [Int] = [Int](repeating: 0, count: Int(countryBlockIPCount)) + var countryBlockIpInts: [Int64] = [Int64](repeating: 0, count: Int(countryBlockIPCount)) + remainingData.withUnsafeBytes { buffer in + _ = countryBlockIpInts.withUnsafeMutableBytes { ipBuffer in + memcpy(ipBuffer.baseAddress, buffer.baseAddress, Int(countryBlockIPCount) * MemoryLayout.size) + } + } + + var countryBlockIpInts2: [Int] = [Int](repeating: 0, count: Int(countryBlockIPCount)) remainingData.withUnsafeBytes { buffer in _ = countryBlockIpInts.withUnsafeMutableBytes { ipBuffer in memcpy(ipBuffer.baseAddress, buffer.baseAddress, Int(countryBlockIPCount) * MemoryLayout.size) @@ -78,31 +89,55 @@ public enum IP2Country { } /// Extract arrays from the parts - func consumeStringArray(from targetData: inout Data) -> [String] { - var targetCount: Int32 = 0 - _ = withUnsafeMutableBytes(of: &targetCount) { countBuffer in - targetData.copyBytes(to: countBuffer, from: ...size) + func consumeStringArray(_ name: String, from targetData: inout Data) -> [String] { + /// The data should have a count, followed by actual data (so should have more data than an Int32 would take + guard targetData.count > MemoryLayout.size else { + Log.error(.ip2Country, "\(name) doesn't have enough data after the count.") + return [] } - /// Move past the count - targetData = targetData.advanced(by: MemoryLayout.size) + var targetCount: Int32 = targetData + .prefix(MemoryLayout.size) + .withUnsafeBytes { bytes -> Int32 in + guard + bytes.count >= MemoryLayout.size, + let baseAddress: UnsafePointer = bytes + .bindMemory(to: Int32.self) + .baseAddress + else { return 0 } + + return baseAddress.pointee + } + + /// Move past the count and extract the content data + targetData = targetData.dropFirst(MemoryLayout.size) + let contentData: Data = targetData.prefix(Int(targetCount)) guard - targetData.count >= targetCount, - let contentString: String = String(data: Data(targetData[.. targetData.count { + Log.error(.ip2Country, "\(name) suggested it had mare data then was actually available (\(targetCount) vs. \(targetData.count)).") + } /// Move past the data and return the result - targetData = targetData.advanced(by: Int(targetCount)) + targetData = targetData.dropFirst(Int(targetCount)) return contentString.components(separatedBy: "\0\0") } /// Move past the IP data - remainingData = remainingData.advanced(by: (Int(countryBlockIPCount) * MemoryLayout.size)) - let countryBlocksGeonameIds: [String] = consumeStringArray(from: &remainingData) - let countryLocaleCodes: [String] = consumeStringArray(from: &remainingData) - let countryGeonameIds: [String] = consumeStringArray(from: &remainingData) - let countryNames: [String] = consumeStringArray(from: &remainingData) + remainingData = remainingData.advanced(by: (Int(countryBlockIPCount) * MemoryLayout.size)) + let countryBlocksGeonameIds: [String] = consumeStringArray("CountryBlocks", from: &remainingData) + let countryLocaleCodes: [String] = consumeStringArray("LocaleCodes", from: &remainingData) + let countryGeonameIds: [String] = consumeStringArray("Geonames", from: &remainingData) + let countryNames: [String] = consumeStringArray("CountryNames", from: &remainingData) return IP2CountryCache( countryBlocksIPInt: countryBlockIpInts, @@ -151,7 +186,7 @@ public enum IP2Country { guard nameCache["\(ip)-\(currentLocale)"] == nil else { return } guard - let ipAsInt: Int = IPv4.toInt(ip), + let ipAsInt: Int64 = IPv4.toInt(ip), let countryBlockGeonameIdIndex: Int = cache.countryBlocksIPInt.firstIndex(where: { $0 > ipAsInt }).map({ $0 - 1 }), let localeStartIndex: Int = cache.countryLocationsLocaleCode.firstIndex(where: { $0 == currentLocale }), let countryNameIndex: Int = Array(cache.countryLocationsGeonameId[localeStartIndex...]).firstIndex(where: { geonameId in diff --git a/SessionUtilitiesKit/Networking/IPv4.swift b/SessionUtilitiesKit/Networking/IPv4.swift index b4c490ec6..90a79c073 100644 --- a/SessionUtilitiesKit/Networking/IPv4.swift +++ b/SessionUtilitiesKit/Networking/IPv4.swift @@ -5,11 +5,11 @@ import Foundation public enum IPv4 { - public static func toInt(_ ip: String) -> Int? { - let octets: [Int] = ip.split(separator: ".").compactMap { Int($0) } + public static func toInt(_ ip: String) -> Int64? { + let octets: [Int64] = ip.split(separator: ".").compactMap { Int64($0) } guard octets.count > 1 else { return nil } - var result: Int = 0 + var result: Int64 = 0 for i in stride(from: 3, through: 0, by: -1) { result += octets[ 3 - i ] << (i * 8) }