You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			252 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			PHP
		
	
			
		
		
	
	
			252 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			PHP
		
	
| <?php
 | |
| 	$REGEX_JOIN_LINK = (function(){
 | |
| 		// See https://github.com/oxen-io/session-pysogs/blob/dev/administration.md
 | |
| 		$protocol = 'https?:';
 | |
| 		$hostname = '[^\/]+';
 | |
| 		$room_name = '[0-9A-Za-z-_]+';
 | |
| 		$public_key = '[[:xdigit:]]{64}';
 | |
| 		// Use pipe delimiter for regex to avoid escaping slashes.
 | |
| 		return "|$protocol//$hostname/$room_name\?public_key=$public_key|i";
 | |
| 	})();
 | |
| 
 | |
| 	/**
 | |
| 	 * Counts the total rooms across the given Community servers.
 | |
| 	 * @param \CommunityServer[] $servers Community Servers to count.
 | |
| 	 * @return int Total number of Community rooms.
 | |
| 	 */
 | |
| 	function count_rooms(array $servers): int {
 | |
| 		$rooms_total = 0;
 | |
| 		foreach ($servers as $server) {
 | |
| 			$rooms_total += count($server->rooms);
 | |
| 		}
 | |
| 		return $rooms_total;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Truncates a string to the given length.
 | |
| 	 * @param string $str String to truncate.
 | |
| 	 * @param int $len Target ellipsised length, excluding ellipsis.
 | |
| 	 * @return string String of given length plus ellipsis,
 | |
| 	 * or original string if not longer.
 | |
| 	 */
 | |
| 	function truncate(string $str, int $len) {
 | |
| 		return (strlen($str) > $len + 3)
 | |
| 			? substr($str, 0, $len).'...'
 | |
| 			: $str;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Check whether URL is reachable, downgrading to HTTP if needed.
 | |
| 	 * @param string $url URL to check.
 | |
| 	 * @param int $retries [optional] Number of connection attempts.
 | |
| 	 * @return bool Whether or not the server responded with a non-5XX HTTP code.
 | |
| 	 */
 | |
| 	function url_is_reachable(string $url, int $retries = 4): bool {
 | |
| 		$retcode = curl_get_response_downgrade(
 | |
| 			$url, retries: $retries,
 | |
| 			curlopts: [CURLOPT_NOBODY => true], stop_on_codes: [404]
 | |
| 		)[0];
 | |
| 		return $retcode != 0 && floor($retcode / 100) != 5 ;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	/**
 | |
| 	 * Fetch URL repeatedly to obtain contents, downgrading to HTTP if needed.
 | |
| 	 * @param string $url URL to fetch.
 | |
| 	 * @param int $retries [optional] Number of connection attempts.
 | |
| 	 * @param int[] $stop_on_codes [optional]
 | |
| 	 * If one of these HTTP codes is encountered, fetching stops early.
 | |
| 	 * @return array Fetched contents (if applicable),
 | |
| 	 * and whether a downgrade to HTTP took place.
 | |
| 	 * A code of 0 corresponds to an unreachable host.
 | |
| 	 */
 | |
| 	function curl_get_contents_downgrade(string $url, int $retries = 4, $stop_on_codes = [404]) {
 | |
| 		list($retcode, $content, $downgrade) = curl_get_response_downgrade($url, $retries, $stop_on_codes);
 | |
| 		return [$retcode == 200 ? $content : null, $downgrade];
 | |
| 	}
 | |
| 
 | |
|     /**
 | |
| 	 * Fetch URL repeatedly to obtain URL contents.
 | |
| 	 * @param string $url URL to fetch.
 | |
| 	 * @param int $retries [optional] Number of connection attempts.
 | |
| 	 * @param int[] $stop_on_codes [optional]
 | |
| 	 * If one of these HTTP codes is encountered, fetching stops early.
 | |
| 	 * @return ?string Fetched contents, if applicable.
 | |
| 	 */
 | |
| 	function curl_get_contents(string $url, int $retries = 4, $stop_on_codes = [404]): ?string {
 | |
| 		return curl_get_response($url, retries: $retries, stop_on_codes: $stop_on_codes)[1];
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Fetch URL repeatedly, downgrading to HTTP if needed.
 | |
| 	 * @param string $url URL to fetch.
 | |
| 	 * @param int $retries [optional] Number of connection attempts.
 | |
| 	 * @param int[] $stop_on_codes [optional]
 | |
| 	 * If one of these HTTP codes is encountered, fetching stops early.
 | |
| 	 * @param int[] $curlopts Associative array of options for `curl_setopt`.
 | |
| 	 * @return array Return code, fetched contents (if applicable),
 | |
| 	 * and whether a downgrade to HTTP took place.
 | |
| 	 * A code of 0 corresponds to an unreachable host.
 | |
| 	 */
 | |
| 	function curl_get_response_downgrade(
 | |
| 		string $url, $retries = 4, $stop_on_codes = [404], $curlopts = []
 | |
| 	) {
 | |
| 		$scheme = parse_url($url, PHP_URL_SCHEME);
 | |
| 		if ($scheme == "https") {
 | |
| 			list($retcode, $content) = curl_get_response($url, floor($retries / 2), $stop_on_codes, $curlopts);
 | |
| 			if ($retcode == 200) return [$retcode, $content, false];
 | |
| 			log_debug("Downgrading to HTTP");
 | |
| 			list($retcode, $content) = curl_get_response(
 | |
| 				substr_replace($url, "http:", 0, strlen("https:")),
 | |
| 				ceil($retries / 2),
 | |
| 				$stop_on_codes, $curlopts
 | |
| 			);
 | |
| 			return [$retcode, $content, true];
 | |
| 		} else {
 | |
| 			list($retcode, $content) = curl_get_response($url, $retries, $stop_on_codes, $curlopts);
 | |
| 			return [$retcode, $content, false];
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Low default retries value so this doesn't run for 30 minutes
 | |
| 	// FIXME: Does not seem to handle 308's, behaviour not transparent.
 | |
| 	// TODO: Parallelize & use in CommunityServer::poll_reachable()
 | |
| 	/**
 | |
| 	 * Fetch URL repeatedly to obtain a valid response.
 | |
| 	 * @param string $url URL to fetch.
 | |
| 	 * @param int $retries Number of connection attempts.
 | |
| 	 * @param string[] $stop_on_codes [optional]
 | |
| 	 * If one of these HTTP codes is encountered, fetching stops early.
 | |
| 	 * @param int[] $curlopts Associative array of options for `curl_setopt`.
 | |
| 	 * @return array Return code and fetched contents, if applicable. A code of 0 corresponds
 | |
| 	 * to an unreachable host.
 | |
| 	 */
 | |
| 	function curl_get_response(string $url, int $retries, $stop_on_codes = [404], $curlopts = []) {
 | |
| 		global $FAST_FETCH_MODE;
 | |
| 		// use separate timeouts to reliably get data from Chinese server with repeated tries
 | |
| 		$connecttimeout = 2; // wait at most X seconds to connect
 | |
| 		$timeout = $FAST_FETCH_MODE ? 1.5 : 3; // can't take longer than X seconds for the whole curl process
 | |
| 		$sleep = 2; // sleep between tries in seconds
 | |
| 		// takes at most ($timeout + $sleep) * $retries seconds
 | |
| 
 | |
| 		$contents = false;
 | |
| 		$retcode = -1;
 | |
| 
 | |
| 		for ($counter = 1; $counter <= $retries; $counter++) {
 | |
| 			$curl = curl_init($url);
 | |
| 
 | |
| 			// curl_setopt($curl, CURLOPT_VERBOSE, true);
 | |
| 
 | |
| 			curl_setopt($curl, CURLOPT_AUTOREFERER, true);
 | |
| 			curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
 | |
| 			curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
 | |
| 
 | |
| 			curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, $connecttimeout);
 | |
| 			curl_setopt($curl, CURLOPT_TIMEOUT, $timeout);
 | |
| 
 | |
| 			foreach ($curlopts as $opt => $val) curl_setopt($curl, $opt, $val);
 | |
| 
 | |
| 			$contents = curl_exec($curl);
 | |
| 			$retcode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
 | |
| 
 | |
| 			curl_close($curl);
 | |
| 
 | |
| 			log_debug("Attempt #" . $counter . " for " . $url . " returned code " . $retcode . ".");
 | |
| 			if ($contents != null || $retcode == 200 || in_array($retcode, $stop_on_codes)) break;
 | |
| 			sleep($sleep);
 | |
| 		}
 | |
| 
 | |
| 		return [$retcode, $retcode == 200 ? $contents : false];
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Returns the base path of a URL.
 | |
| 	 * @param string $url The URL to slice the path from.
 | |
| 	 * @param bool $include_scheme [optional]
 | |
| 	 * Includes the scheme. `true` by default.
 | |
| 	 * @return string A URL composed of the original scheme (unless specified),
 | |
| 	 * hostname, and port (if present).
 | |
| 	 */
 | |
| 	function url_get_base(string $url, bool $include_scheme = true) {
 | |
| 		$url_components = parse_url($url);
 | |
| 		$scheme = $url_components['scheme'];
 | |
| 		$host = $url_components['host'];
 | |
| 
 | |
| 		if (isset($url_components['port'])) {
 | |
| 			$port = $url_components['port'];
 | |
| 			$host .= ":$port";
 | |
| 		}
 | |
| 
 | |
| 		return $include_scheme ? "$scheme://$host" : $host;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Extracts the room token from a join URL.
 | |
| 	 * @param string $join_url Join URL for Session Community.
 | |
| 	 * @return string Name of Community room.
 | |
| 	 */
 | |
| 	function url_get_token(string $join_url) {
 | |
| 		$token = parse_url($join_url)['path'];
 | |
| 		return str_replace("/", "", $token);
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Extracts the server public key from a join URL.
 | |
| 	 * @param string $join_url Join URL for Session Community.
 | |
| 	 * @return string SOGS public key
 | |
| 	 */
 | |
| 	function url_get_pubkey(string $join_url) {
 | |
| 		$url_components = parse_url($join_url);
 | |
| 		parse_str($url_components['query'], $query_components);
 | |
| 		return $query_components['public_key'];
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Computes a room's ID from a join URL.
 | |
| 	 * @param string $join_url Join URL for Session Community.
 | |
| 	 * @return string Room identifier per our format.
 | |
| 	 */
 | |
| 	function url_get_room_id(string $join_url) {
 | |
| 		$room_token = url_get_token($join_url);
 | |
| 		$pubkey = url_get_pubkey($join_url);
 | |
| 		$pubkey_4 = substr($pubkey, 0, 4);
 | |
| 		return "$room_token+$pubkey_4";
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Extracts join links that match $REGEX_JOIN_LINK.
 | |
| 	 * @param ?string $html Text to find join URLs in.
 | |
| 	 * @return string[] Sorted array of unique server join links.
 | |
| 	 */
 | |
| 	function parse_join_links(?string $html){
 | |
| 		global $REGEX_JOIN_LINK;
 | |
| 		preg_match_all($REGEX_JOIN_LINK, $html, $match_result);
 | |
| 		$links = $match_result[0];
 | |
| 		sort($links);
 | |
| 		$links = array_unique($links);
 | |
| 		return $links;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Convert special characters to html entities.
 | |
| 	 * @param string $str String to sanitize
 | |
| 	 * @param int $flags [optional]
 | |
| 	 * A bitmask of one or more of the following flags,
 | |
| 	 *  which specify how to handle quotes, invalid code unit sequences
 | |
| 	 * and the used document type. The default is ENT_COMPAT | ENT_HTML401.
 | |
| 	 * @param bool $double_encode [optional]
 | |
| 	 * When double_encode is turned off, PHP will not encode
 | |
| 	 * existing html entities, the default is to convert everything.
 | |
| 	 * @return string The converted string, possibly empty.
 | |
| 	 */
 | |
| 	function html_sanitize(
 | |
| 		?string $str, int $flags = ENT_QUOTES|ENT_SUBSTITUTE,
 | |
| 		?string $encoding = null, bool $double_encode = true
 | |
| 	): ?string {
 | |
| 		if ($str == "") {
 | |
| 			return "";
 | |
| 		}
 | |
| 		return htmlspecialchars($str, $flags, $encoding, $double_encode);
 | |
| 	}
 | |
| ?>
 |