// Copyright © 2022 Rangeproof Pty Ltd. All rights reserved. import Foundation import GRDB import PromiseKit import SignalCoreKit import SessionUtilitiesKit import SessionSnodeKit /// This job deletes unused and orphaned data from the database as well as orphaned files from device storage /// /// **Note:** When sheduling this job if no `Details` are provided (with a list of `typesToCollect`) then this job will /// assume that it should be collecting all `Types` public enum GarbageCollectionJob: JobExecutor { public static var maxFailureCount: Int = -1 public static var requiresThreadId: Bool = false public static let requiresInteractionId: Bool = false public static let approxSixMonthsInSeconds: TimeInterval = (6 * 30 * 24 * 60 * 60) public static func run( _ job: Job, queue: DispatchQueue, success: @escaping (Job, Bool) -> (), failure: @escaping (Job, Error?, Bool) -> (), deferred: @escaping (Job) -> () ) { /// Determine what types of data we want to collect (if we didn't provide any then assume we want to collect everything) /// /// **Note:** The reason we default to handle all cases (instead of just doing nothing in that case) is so the initial registration /// of the garbageCollection job never needs to be updated as we continue to add more types going forward let typesToCollect: [Types] = (job.details .map { try? JSONDecoder().decode(Details.self, from: $0) }? .typesToCollect) .defaulting(to: Types.allCases) let timestampNow: TimeInterval = Date().timeIntervalSince1970 GRDBStorage.shared.writeAsync( updates: { db in /// Remove any expired controlMessageProcessRecords if typesToCollect.contains(.expiredControlMessageProcessRecords) { _ = try ControlMessageProcessRecord .filter(ControlMessageProcessRecord.Columns.serverExpirationTimestamp <= timestampNow) .deleteAll(db) } /// Remove any typing indicators if typesToCollect.contains(.threadTypingIndicators) { _ = try ThreadTypingIndicator .deleteAll(db) } /// Remove any old open group messages - open group messages which are older than six months if typesToCollect.contains(.oldOpenGroupMessages) && db[.trimOpenGroupMessagesOlderThanSixMonths] { let interaction: TypedTableAlias = TypedTableAlias() let thread: TypedTableAlias = TypedTableAlias() try db.execute(literal: """ DELETE FROM \(Interaction.self) WHERE \(Column.rowID) IN ( SELECT \(interaction.alias[Column.rowID]) FROM \(Interaction.self) JOIN \(SessionThread.self) ON ( \(SQL("\(thread[.variant]) = \(SessionThread.Variant.openGroup)")) AND \(thread[.id]) = \(interaction[.threadId]) ) WHERE \(interaction[.timestampMs]) < \(timestampNow - approxSixMonthsInSeconds) ) """) } /// Orphaned jobs - jobs which have had their threads or interactions removed if typesToCollect.contains(.orphanedJobs) { let job: TypedTableAlias = TypedTableAlias() let thread: TypedTableAlias = TypedTableAlias() let interaction: TypedTableAlias = TypedTableAlias() try db.execute(literal: """ DELETE FROM \(Job.self) WHERE \(Column.rowID) IN ( SELECT \(job.alias[Column.rowID]) FROM \(Job.self) LEFT JOIN \(SessionThread.self) ON \(thread[.id]) = \(job[.threadId]) LEFT JOIN \(Interaction.self) ON \(interaction[.id]) = \(job[.interactionId]) WHERE ( ( \(job[.threadId]) IS NOT NULL AND \(thread[.id]) IS NULL ) OR ( \(job[.interactionId]) IS NOT NULL AND \(interaction[.id]) IS NULL ) ) ) """) } /// Orphaned link previews - link previews which have no interactions with matching url & rounded timestamps if typesToCollect.contains(.orphanedLinkPreviews) { let linkPreview: TypedTableAlias = TypedTableAlias() let interaction: TypedTableAlias = TypedTableAlias() try db.execute(literal: """ DELETE FROM \(LinkPreview.self) WHERE \(Column.rowID) IN ( SELECT \(linkPreview.alias[Column.rowID]) FROM \(LinkPreview.self) LEFT JOIN \(Interaction.self) ON ( \(interaction[.linkPreviewUrl]) = \(linkPreview[.url]) AND \(Interaction.linkPreviewFilterLiteral()) ) WHERE \(interaction[.id]) IS NULL ) """) } /// Orphaned open groups - open groups which are no longer associated to a thread (except for the session-run ones for which /// we want cached image data even if the user isn't in the group) if typesToCollect.contains(.orphanedOpenGroups) { let openGroup: TypedTableAlias = TypedTableAlias() let thread: TypedTableAlias = TypedTableAlias() try db.execute(literal: """ DELETE FROM \(OpenGroup.self) WHERE \(Column.rowID) IN ( SELECT \(openGroup.alias[Column.rowID]) FROM \(OpenGroup.self) LEFT JOIN \(SessionThread.self) ON \(thread[.id]) = \(openGroup[.threadId]) WHERE ( \(thread[.id]) IS NULL AND \(SQL("\(openGroup[.server]) != \(OpenGroupAPI.defaultServer.lowercased())")) ) ) """) } /// Orphaned open group capabilities - capabilities which have no existing open groups with the same server if typesToCollect.contains(.orphanedOpenGroupCapabilities) { let capability: TypedTableAlias = TypedTableAlias() let openGroup: TypedTableAlias = TypedTableAlias() try db.execute(literal: """ DELETE FROM \(Capability.self) WHERE \(Column.rowID) IN ( SELECT \(capability.alias[Column.rowID]) FROM \(Capability.self) LEFT JOIN \(OpenGroup.self) ON \(openGroup[.server]) = \(capability[.openGroupServer]) WHERE \(openGroup[.threadId]) IS NULL ) """) } /// Orphaned blinded id lookups - lookups which have no existing threads or approval/block settings for either blinded/un-blinded id if typesToCollect.contains(.orphanedBlindedIdLookups) { let blindedIdLookup: TypedTableAlias = TypedTableAlias() let thread: TypedTableAlias = TypedTableAlias() let contact: TypedTableAlias = TypedTableAlias() try db.execute(literal: """ DELETE FROM \(BlindedIdLookup.self) WHERE \(Column.rowID) IN ( SELECT \(blindedIdLookup.alias[Column.rowID]) FROM \(BlindedIdLookup.self) LEFT JOIN \(SessionThread.self) ON ( \(thread[.id]) = \(blindedIdLookup[.blindedId]) OR \(thread[.id]) = \(blindedIdLookup[.sessionId]) ) LEFT JOIN \(Contact.self) ON ( \(contact[.id]) = \(blindedIdLookup[.blindedId]) OR \(contact[.id]) = \(blindedIdLookup[.sessionId]) ) WHERE ( \(thread[.id]) IS NULL AND \(contact[.id]) IS NULL ) ) """) } /// Approved blinded contact records - once a blinded contact has been approved there is no need to keep the blinded /// contact record around anymore if typesToCollect.contains(.approvedBlindedContactRecords) { let contact: TypedTableAlias = TypedTableAlias() let blindedIdLookup: TypedTableAlias = TypedTableAlias() try db.execute(literal: """ DELETE FROM \(Contact.self) WHERE \(Column.rowID) IN ( SELECT \(contact.alias[Column.rowID]) FROM \(Contact.self) LEFT JOIN \(BlindedIdLookup.self) ON ( \(blindedIdLookup[.blindedId]) = \(contact[.id]) AND \(blindedIdLookup[.sessionId]) IS NOT NULL ) WHERE \(blindedIdLookup[.sessionId]) IS NOT NULL ) """) } /// Orphaned attachments - attachments which have no related interactions, quotes or link previews if typesToCollect.contains(.orphanedAttachments) { let attachment: TypedTableAlias = TypedTableAlias() let quote: TypedTableAlias = TypedTableAlias() let linkPreview: TypedTableAlias = TypedTableAlias() let interactionAttachment: TypedTableAlias = TypedTableAlias() try db.execute(literal: """ DELETE FROM \(Attachment.self) WHERE \(Column.rowID) IN ( SELECT \(attachment.alias[Column.rowID]) FROM \(Attachment.self) LEFT JOIN \(Quote.self) ON \(quote[.attachmentId]) = \(attachment[.id]) LEFT JOIN \(LinkPreview.self) ON \(linkPreview[.attachmentId]) = \(attachment[.id]) LEFT JOIN \(InteractionAttachment.self) ON \(interactionAttachment[.attachmentId]) = \(attachment[.id]) WHERE ( \(quote[.attachmentId]) IS NULL AND \(linkPreview[.url]) IS NULL AND \(interactionAttachment[.attachmentId]) IS NULL ) ) """) } }, completion: { _, _ in // Dispatch async so we can swap from the write queue to a read one (we are done writing) queue.async { // Retrieve a list of all valid attachmnet and avatar file paths struct FileInfo { let attachmentLocalRelativePaths: Set let profileAvatarFilenames: Set } let maybeFileInfo: FileInfo? = GRDBStorage.shared.read { db -> FileInfo in var attachmentLocalRelativePaths: Set = [] var profileAvatarFilenames: Set = [] /// Orphaned attachment files - attachment files which don't have an associated record in the database if typesToCollect.contains(.orphanedAttachmentFiles) { /// **Note:** Thumbnails are stored in the `NSCachesDirectory` directory which should be automatically manage /// it's own garbage collection so we can just ignore it according to the various comments in the following stack overflow /// post, the directory will be cleared during app updates as well as if the system is running low on memory (if the app isn't running) /// https://stackoverflow.com/questions/6879860/when-are-files-from-nscachesdirectory-removed attachmentLocalRelativePaths = try Attachment .select(.localRelativeFilePath) .filter(Attachment.Columns.localRelativeFilePath != nil) .asRequest(of: String.self) .fetchSet(db) } /// Orphaned profile avatar files - profile avatar files which don't have an associated record in the database if typesToCollect.contains(.orphanedProfileAvatars) { profileAvatarFilenames = try Profile .select(.profilePictureFileName) .filter(Profile.Columns.profilePictureFileName != nil) .asRequest(of: String.self) .fetchSet(db) } return FileInfo( attachmentLocalRelativePaths: attachmentLocalRelativePaths, profileAvatarFilenames: profileAvatarFilenames ) } // If we couldn't get the file lists then fail (invalid state and don't want to delete all attachment/profile files) guard let fileInfo: FileInfo = maybeFileInfo else { failure(job, StorageError.generic, false) return } var deletionErrors: [Error] = [] // Orphaned attachment files (actual deletion) if typesToCollect.contains(.orphanedAttachmentFiles) { // Note: Looks like in order to recursively look through files we need to use the // enumerator method let fileEnumerator = FileManager.default.enumerator( at: URL(fileURLWithPath: Attachment.attachmentsFolder), includingPropertiesForKeys: nil, options: .skipsHiddenFiles // Ignore the `.DS_Store` for the simulator ) let allAttachmentFilePaths: Set = (fileEnumerator? .allObjects .compactMap { Attachment.localRelativeFilePath(from: ($0 as? URL)?.path) }) .defaulting(to: []) .asSet() // Note: Directories will have their own entries in the list, if there is a folder with content // the file will include the directory in it's path with a forward slash so we can use this to // distinguish empty directories from ones with content so we don't unintentionally delete a // directory which contains content to keep as well as delete (directories which end up empty after // this clean up will be removed during the next run) let directoryNamesContainingContent: [String] = allAttachmentFilePaths .filter { path -> Bool in path.contains("/") } .compactMap { path -> String? in path.components(separatedBy: "/").first } let orphanedAttachmentFiles: Set = allAttachmentFilePaths .subtracting(fileInfo.attachmentLocalRelativePaths) .subtracting(directoryNamesContainingContent) orphanedAttachmentFiles.forEach { filepath in // We don't want a single deletion failure to block deletion of the other files so try // each one and store the error to be used to determine success/failure of the job do { try FileManager.default.removeItem( atPath: URL(fileURLWithPath: Attachment.attachmentsFolder) .appendingPathComponent(filepath) .path ) } catch { deletionErrors.append(error) } } } // Orphaned profile avatar files (actual deletion) if typesToCollect.contains(.orphanedProfileAvatars) { let allAvatarProfileFilenames: Set = (try? FileManager.default .contentsOfDirectory(atPath: ProfileManager.sharedDataProfileAvatarsDirPath)) .defaulting(to: []) .asSet() let orphanedAvatarFiles: Set = allAvatarProfileFilenames .subtracting(fileInfo.profileAvatarFilenames) orphanedAvatarFiles.forEach { filename in // We don't want a single deletion failure to block deletion of the other files so try // each one and store the error to be used to determine success/failure of the job do { try FileManager.default.removeItem( atPath: ProfileManager.profileAvatarFilepath(filename: filename) ) } catch { deletionErrors.append(error) } } } // Report a single file deletion as a job failure (even if other content was successfully removed) guard deletionErrors.isEmpty else { failure(job, (deletionErrors.first ?? StorageError.generic), false) return } success(job, false) } } ) } } // MARK: - GarbageCollectionJob.Details extension GarbageCollectionJob { public enum Types: Codable, CaseIterable { case expiredControlMessageProcessRecords case threadTypingIndicators case oldOpenGroupMessages case orphanedJobs case orphanedLinkPreviews case orphanedOpenGroups case orphanedOpenGroupCapabilities case orphanedBlindedIdLookups case approvedBlindedContactRecords case orphanedAttachments case orphanedAttachmentFiles case orphanedProfileAvatars } public struct Details: Codable { public let typesToCollect: [Types] public init(typesToCollect: [Types] = Types.allCases) { self.typesToCollect = typesToCollect } } }