mirror of https://github.com/oxen-io/session-ios
				
				
				
			
			You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			122 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Swift
		
	
			
		
		
	
	
			122 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Swift
		
	
// stringlint:disable
 | 
						|
 | 
						|
import Foundation
 | 
						|
 | 
						|
public struct HTMLMetadata: Equatable {
 | 
						|
    /// Parsed from <title>
 | 
						|
    var titleTag: String?
 | 
						|
    /// Parsed from <link rel="icon"...>
 | 
						|
    var faviconUrlString: String?
 | 
						|
    /// Parsed from <meta name="description"...>
 | 
						|
    var description: String?
 | 
						|
    /// Parsed from the og:title meta property
 | 
						|
    var ogTitle: String?
 | 
						|
    /// Parsed from the og:description meta property
 | 
						|
    var ogDescription: String?
 | 
						|
    /// Parsed from the og:image or og:image:url meta property
 | 
						|
    var ogImageUrlString: String?
 | 
						|
    /// Parsed from the og:published_time meta property
 | 
						|
    var ogPublishDateString: String?
 | 
						|
    /// Parsed from article:published_time meta property
 | 
						|
    var articlePublishDateString: String?
 | 
						|
    /// Parsed from the og:modified_time meta property
 | 
						|
    var ogModifiedDateString: String?
 | 
						|
    /// Parsed from the article:modified_time meta property
 | 
						|
    var articleModifiedDateString: String?
 | 
						|
 | 
						|
    static func construct(parsing rawHTML: String) -> HTMLMetadata {
 | 
						|
        let metaPropertyTags = Self.parseMetaProperties(in: rawHTML)
 | 
						|
        return HTMLMetadata(
 | 
						|
            titleTag: Self.parseTitleTag(in: rawHTML),
 | 
						|
            faviconUrlString: Self.parseFaviconUrlString(in: rawHTML),
 | 
						|
            description: Self.parseDescriptionTag(in: rawHTML),
 | 
						|
            ogTitle: metaPropertyTags["og:title"],
 | 
						|
            ogDescription: metaPropertyTags["og:description"],
 | 
						|
            ogImageUrlString: (metaPropertyTags["og:image"] ?? metaPropertyTags["og:image:url"]),
 | 
						|
            ogPublishDateString: metaPropertyTags["og:published_time"],
 | 
						|
            articlePublishDateString: metaPropertyTags["article:published_time"],
 | 
						|
            ogModifiedDateString: metaPropertyTags["og:modified_time"],
 | 
						|
            articleModifiedDateString: metaPropertyTags["article:modified_time"]
 | 
						|
        )
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
// MARK: - Parsing
 | 
						|
extension HTMLMetadata {
 | 
						|
 | 
						|
    private static func parseTitleTag(in rawHTML: String) -> String? {
 | 
						|
        titleRegex
 | 
						|
            .firstMatchSet(in: rawHTML)?
 | 
						|
            .group(idx: 0)
 | 
						|
            .flatMap { decodeHTMLEntities(in: String($0)) }
 | 
						|
    }
 | 
						|
 | 
						|
    private static func parseFaviconUrlString(in rawHTML: String) -> String? {
 | 
						|
        guard let matchedTag = faviconRegex
 | 
						|
                .firstMatchSet(in: rawHTML)
 | 
						|
                .map({ String($0.fullString) }) else { return nil }
 | 
						|
 | 
						|
        return faviconUrlRegex
 | 
						|
            .parseFirstMatch(inText: matchedTag)
 | 
						|
            .flatMap { decodeHTMLEntities(in: String($0)) }
 | 
						|
    }
 | 
						|
 | 
						|
    private static func parseDescriptionTag(in rawHTML: String) -> String? {
 | 
						|
        guard let matchedTag = metaDescriptionRegex
 | 
						|
                .firstMatchSet(in: rawHTML)
 | 
						|
                .map({ String($0.fullString) }) else { return nil }
 | 
						|
 | 
						|
        return metaContentRegex
 | 
						|
            .parseFirstMatch(inText: matchedTag)
 | 
						|
            .flatMap { decodeHTMLEntities(in: String($0)) }
 | 
						|
    }
 | 
						|
 | 
						|
    private static func parseMetaProperties(in rawHTML: String) -> [String: String] {
 | 
						|
        metaPropertyRegex
 | 
						|
            .allMatchSets(in: rawHTML)
 | 
						|
            .reduce(into: [:]) { (builder, matchSet) in
 | 
						|
                guard let ogTypeSubstring = matchSet.group(idx: 0) else { return }
 | 
						|
                let ogType = String(ogTypeSubstring)
 | 
						|
                let fullTag = String(matchSet.fullString)
 | 
						|
 | 
						|
                // Exit early if we've already found a tag of this type
 | 
						|
                guard builder[ogType] == nil else { return }
 | 
						|
                guard let content = metaContentRegex.parseFirstMatch(inText: fullTag) else { return }
 | 
						|
 | 
						|
                builder[ogType] = decodeHTMLEntities(in: content)
 | 
						|
            }
 | 
						|
    }
 | 
						|
 | 
						|
    private static func decodeHTMLEntities(in string: String) -> String? {
 | 
						|
        guard let data = string.data(using: .utf8) else {
 | 
						|
            return nil
 | 
						|
        }
 | 
						|
 | 
						|
        let options: [NSAttributedString.DocumentReadingOptionKey: Any] = [
 | 
						|
            .documentType: NSAttributedString.DocumentType.html,
 | 
						|
            .characterEncoding: String.Encoding.utf8.rawValue
 | 
						|
        ]
 | 
						|
 | 
						|
        guard let attributedString = try? NSAttributedString(data: data, options: options, documentAttributes: nil) else {
 | 
						|
            return nil
 | 
						|
        }
 | 
						|
        return attributedString.string
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
 // MARK: - Regular Expressions
 | 
						|
extension HTMLMetadata {
 | 
						|
    static let titleRegex = regex(pattern: "<\\s*title[^>]*>(.*?)<\\s*/title[^>]*>")
 | 
						|
    static let faviconRegex = regex(pattern: "<\\s*link[^>]*rel\\s*=\\s*\"\\s*(shortcut\\s+)?icon\\s*\"[^>]*>")
 | 
						|
    static let faviconUrlRegex = regex(pattern: "href\\s*=\\s*\"([^\"]*)\"")
 | 
						|
    static let metaDescriptionRegex = regex(pattern: "<\\s*meta[^>]*name\\s*=\\s*\"\\s*description[^\"]*\"[^>]*>")
 | 
						|
    static let metaPropertyRegex = regex(pattern: "<\\s*meta[^>]*property\\s*=\\s*\"\\s*([^\"]+?)\"[^>]*>")
 | 
						|
    static let metaContentRegex = regex(pattern: "content\\s*=\\s*\"([^\"]*?)\"")
 | 
						|
 | 
						|
    static private func regex(pattern: String) -> NSRegularExpression {
 | 
						|
        try! NSRegularExpression(
 | 
						|
            pattern: pattern,
 | 
						|
            options: [.dotMatchesLineSeparators, .caseInsensitive])
 | 
						|
    }
 | 
						|
}
 |