Refactor querying & improve logging

gravel 1 year ago
parent 291025bfab
commit 03f88e6c0a
Signed by: gravel
GPG Key ID: C0538F3C906B308F

@ -1,731 +1,92 @@
// requires php-curl
// require other php files
require_once "getenv.php";
require_once "utils/server-utils.php";
require_once 'getenv.php';
require_once 'utils/utils.php';
require_once 'servers/known-servers.php';
include_once "$LANGUAGES_ROOT/language_flags.php"; // actually runs fine without it
// room token regex part, must consist of letters, numbers, underscores, or dashes:
$room_token_regex_part = "[0-9A-Za-z-_]+";
* This regex uses the following components:
* - https?:\/\/ - This matches "http" or "https" followed by "://"
* - [^\/]+\/ - This matches one or more characters that are not a forward slash, followed by a forward slash
* - [0-9A-Za-z-]+ - This matches one or more alphanumeric characters or dash (room token)
* - \?public_key= - This matches a question mark followed by the text "public_key="
* - [0-9A-Fa-f]{64} - This matches 64 hexadecimal digits (0-9, A-F and a-f)
* This regex should match strings in the following format:
* http(s)://[server]/[room_token]?public_key=[64_hexadecimal_digits]
$room_join_regex = "/https?:\/\/[^\/]+\/" . $room_token_regex_part . "\?public_key=[0-9A-Fa-f]{64}/";
* Some servers don't appear in the wild yet, but can be queried
* Ideally this shouldn't be necessary, but it is for now
$known_servers = array(
"", // (?) KeeJef
// "", // found via, but now offline
"", // found via
"", // found via
"", // found via
"", // found via
"", // found via
"", // found via
"", // found via
"", // found via
"", // found via
"", // found via
"", // found via
$known_pubkeys = array(
// "server_without_proto" => "64 char hex public key"
"" => "39016f991400c35a46e11e06cb2a64d6d8ab6652e484a556b14f7cf57ed7e73a",
"" => "efcaecf00aebf5b75e62cf1fd550c6052842e1415a9339406e256c8b27cd2039",
"" => "e529311ec8fb6fdb950aaa4fb71fc4da3ea59c6c9ba2886708b9538eea6aa213",
"" => "e093994156ec92e4c13d0387208bfa48ae56dd88b8f60a03980d9ef048af1e3f",
"" => "426453e0e991235b62bc5f35f36d5a204e64b2d8b44e971609add6a10aac6674",
"" => "7908bcd748313355f99e62f9c1f11c395d04019410edb7ee1618dbe26a423c4f",
"" => "b501f2dc7dc912aa0981b0ba10f2ba739d2f729a7d9b37022aee505aaf72807c",
"" => "2cbde327e9da216af9a69876bc57e16cc0c540b0aa2dfecdd1c115e67993b040",
"" => "c9a30da579d8fdaeded009d1afa3de573dd783e3081b7504c4cbfa470e5db378",
"" => "7242ad657dc2dd20e902a6fa82c34465907b67e80daf50173f38d5745abbaa24",
"" => "a03c383cf63c3c4efe67acc52112a6dd734b3a946b9545f488aaa93da7991238",
"" => "4b3e75eedd2116b4dab0bcb6443b0e9fbfce7bcf1d35970bdad8a57a0113fb20",
"" => "fdcb047eb78520e925fda512a45ae74c6e2de9e0df206b3c0471bf1509919559"
// path for HTML output
// $output = "output/index.html";
// path for room data output
$output = "$ROOMS_FILE";
file_exists($CACHE_ROOT) or mkdir($CACHE_ROOT, 0700);
// run main function
require_once 'utils/servers-rooms.php';
function main() {
$timestamp = time(); // unix timestamp in seconds
$html = get_html_from_known_sources();
$wild_join_links = extract_join_links_from_html($html);
$servers = get_servers_from_join_links($wild_join_links);
$servers = reduce_servers($servers);
$servers = merge_servers_with_known_good_servers($servers); //TODO: Switch merge and reduce?
$rooms = query_servers_for_rooms($servers);
$pubkeys = acquire_pubkeys_from_join_links($wild_join_links);
$pubkeys = merge_pubkeys_with_known_pubkeys($pubkeys);
$addr_assignments = get_pubkeys_of_servers($servers, $pubkeys);
$addr_assignments = reduce_addresses_of_pubkeys($addr_assignments);
$room_assignments = assign_rooms_to_address_assignments($addr_assignments, $rooms);
$info_arrays = generate_info_arrays($room_assignments);
// Get join links -> Add known servers ->
// De-dupe based on base URL ->
// Test domains -> De-dupe based on pubkey
// $final_join_links = generate_join_links($room_assignments);
// $final_html = generateHTML($timestamp, $info_arrays);
// print_r($wild_join_links);
// print_r($servers);
// print_r($rooms);
// print_r($pubkeys);
// print_r($addr_assignments);
// print_r($room_assignments);
// print_r($final_join_links);
// print_pinned_messages($room_assignments);
// write output to disk
global $output;
file_put_contents($output, json_encode($info_arrays)); // overwrites existing file
log_info("Done. ");
log_info("Found " . count($info_arrays) . " unique Session Communities on " . count_servers($info_arrays) . " servers." . PHP_EOL);
* Queries following known sources of join links for Session Communities:
* - Awesome Session Open Group List repository on GitHub
* - Open Groups
* - open groups
function get_html_from_known_sources() {
// known open group / community lists
$asgl = "";
$ll = "";
$sd_pre = "" ; // this one has to be expanded first
// get awesome session group list html
log_info("Requesting Awesome Session Group list.");
$asgl_html = file_get_contents($asgl);
// log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
// get html
log_info("Requesting Lokilocker Mods Open Group list.");
$ll_html = file_get_contents($ll);
// log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
// get html
$sd_html = "";
log_info("Requesting list.");
$sd_pre_html = file_get_contents($sd_pre);
// log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
$sd_pattern = "/view_session_group_user_lokinet\.php\?id=\d+/";
preg_match_all($sd_pattern, $sd_pre_html, $sd_links);
$sd_links = $sd_links[0];
foreach ($sd_links as &$link) {
// add prefix " to the sd_links
$link = str_replace('view_session_group_user_lokinet.php?id=', '', $link);
// add html to sd_html
// log_debug("Requesting " . $link);
$sd_html = $sd_html . file_get_contents($link) . PHP_EOL;
// log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
log_info("Done fetching sources.");
// merge all html into a single string
return (
$asgl_html . PHP_EOL .
$ll_html . PHP_EOL .
$sd_html . PHP_EOL
file_exists($CACHE_ROOT) or mkdir($CACHE_ROOT, 0700);
$html_pages = query_known_sources();
// Find join links in each HTML document and concatenate the results.
$join_links = array_merge([], ...array_map('parse_join_links', $html_pages));
* @var CommunityServer[] $servers
$servers = CommunityServer::from_join_urls($join_links);
// Add known hosts.
$servers = [...CommunityServer::from_known_hosts($KNOWN_SERVERS, $KNOWN_PUBKEYS), ...$servers];
$servers = CommunityServer::dedupe_by_url($servers);
$servers = CommunityServer::poll_reachable($servers);
$servers = CommunityServer::dedupe_by_pubkey($servers);
$servers_total = count($servers);
$rooms_total = count_rooms($servers);
// Output query results to file.
log_info("Done fetching communities.");
"Found $rooms_total unique Session Communities " .
"on $servers_total servers." . PHP_EOL
file_put_contents($ROOMS_FILE, json_encode($servers));
* Extracts all links that match the $room_join_regex
* Example: http(s)://whatever:port/?public_key=0123456789abcef
* Result is sorted and unique
* There's no check for reachability or additional https availability
* Iteratively crawls an index for individual Session Community details.
* @return string[]
function extract_join_links_from_html($html){
global $room_join_regex;
$result = array();
preg_match_all($room_join_regex, $html, $result);
// print_r($result);
$result = $result[0]; // there's only $result[0], no $result[1] or others
$result = array_unique($result);
function crawl_source_index($html, $url_base, $item_url_pattern) {
preg_match_all($item_url_pattern, $html, $match_result);
$matched_links = $match_result[0];
return $result;
* Gets all servers from an array of join links
* Returns an array that looks like this:
* [0] =>
* [1] =>
* [2] =>
* [3] => dev.test:23456
* Result is sorted and unique
function get_servers_from_join_links($join_links_arr) {
$result = array();
foreach($join_links_arr as $join_link){
$split = array();
$split = explode("/", $join_link); // http(s): + "" + + "name?public_key=0123456789abcdef"
$result[] = $split[2]; //
foreach ($matched_links as $link) {
$link = $url_base . $link;
log_debug("Requesting $link");
$pages[] = file_get_contents($link);
// Supposed to be "HTTP/1.1 200 OK"
$result = array_unique($result);
return $result;
return $pages;
* Checks whether servers are reachable and whether they support https
* and makes sure that there are no http/https duplicates
* Input is an array of servers without protocol (no http:(s)// in front)
* Result is unique and sorted
* Fetches known sources of Session Community join links.
function reduce_servers($servers_arr) {
log_info("Checking found servers for availability.");
$reduced_servers = array();
$offline_servers = array(); // debug
foreach($servers_arr as $server) {
// try https
$url = "https://" . $server;
$reduced_servers[] = $url;
// try http
$url = "http://" . $server;
$reduced_servers[] = $url;
else {
$offline_servers[] = $url;
// echo("Server " . $server . " is not reachable" . PHP_EOL);
$reduced_servers = array_unique($reduced_servers);
// print_r($offline_servers);
return $reduced_servers;
* Some servers don't appear in the wild yet, but can be queried
* Ideally this shouldn't be necessary, but it is for now
* Should be called after reduce_servers()
function merge_servers_with_known_good_servers($url_arr){
$result = array();
global $known_servers;
$result = array_merge($url_arr, $known_servers);
$result = array_unique($result); // just in case we accidentally add a duplicate
function query_known_sources() {
global $SOURCES;
return $result;
* Takes an input like this:
* [0] =>
* [1] =>
* [2] =>
* [3] => http://dev.test:23456
* and queries the /room JSON API endpoint
* Returns a multidimensional array
* The first dimension uses the server URL as public_key
* The second dimension is an array that contains $room_array array
* $room_array arrays contain token, name, users and description
function query_servers_for_rooms($url_arr) {
log_info("Querying available servers for rooms.");
$rooms = array();
$failed_arr = array(); // debug
// we can't use array_unique later so we make sure the input is unique
$url_arr = array_unique($url_arr); // not really necessary though
// we can't use sort or asort later so me do it now
sort($url_arr); // not really necessary though
// we could probably use ksort or something else that persists the keys
foreach($url_arr as $url) {
$query_result = query_single_servers_for_rooms($url, $failed_arr);
if($query_result) {
$rooms[$url] = $query_result;
// print_r($failed_arr);
return $rooms;
* TODO: Description
function query_single_servers_for_rooms($server_url, &$failed_arr = null) {
$result = array();
$endpoint = "/rooms?all=1";
$json_url = $server_url . $endpoint;
log_info("Polling $server_url for rooms.");
$json = curl_get_contents($json_url); // circumvents flaky routing, don't use file_get_contents
// echo("URL: " . $server_url . " - JSON URL: " . $json_url . PHP_EOL);
// echo("JSON: " . $json . PHP_EOL);
$failed = false;
if($json) {
$json_obj = json_decode($json);
$json_rooms = array();
// if response was not empty
if($json_obj) {
log_info("Received response from $server_url.");
foreach($json_obj as $json_room) {
$token = $json_room->token; // room "name"
$users_per_second = $json_room->active_users / $json_room->active_users_cutoff;
$seconds_in_a_week = 604800;
$weekly_active_users = floor($users_per_second * $seconds_in_a_week);
// echo($token . " has " . $users_per_second . " UPS." . PHP_EOL);
// echo($token . " has " . $weekly_active_users . " WAU. (" . $json_room->active_users . ")" . PHP_EOL);
$room_array = array(
"token" => $token,
"name" => $json_room->name,
"active_users" => $weekly_active_users,
"description" => $json_room->description
$json_rooms[$token] = $room_array;
// print_r($json_rooms);
$result = $json_rooms;
else {
$failed = true;
// echo($json_url . " failed to decode" . PHP_EOL);
else {
$failed = true;
if($failed) {
// 404 - could mean it's a legacy server that doesn't provide /room endpoint
// echo("Failed json_url: " . $json_url . PHP_EOL);
if(!is_null($failed_arr)) {
// if $failed_arr has been used as parameter, add failed URL to it
$failed_arr[] = $server_url;
// echo("Failed: " . $server_url . PHP_EOL);
$legacy_rooms = query_homepage_for_rooms($server_url);
if($legacy_rooms) {
$result = $legacy_rooms;
} else {
log_info("Failed to receive response from $server_url.");
$result = null;
// print_r($failed_arr);
return $result;
* For servers that do not provide the /rooms endpoint
* Takes same input as query_api_for_rooms(), but only singular URL
* Returns array of all available rooms (each its own array with token, name, users and description)
* Result is false if no rooms where found
function query_homepage_for_rooms($url) {
$result = array();
global $room_token_regex_part;
$contents = file_get_contents($url);
if($contents) {
$regex_new = "/\/r\/" . $room_token_regex_part . "/";
$regex_old = "/\/view\/room\/" . $room_token_regex_part . "/"; // @legacy
preg_match_all($regex_new, $contents, $rooms);
$rooms = $rooms[0];
// if the new regex doesn't match, use the old one // @legacy
if(empty($rooms)) {
preg_match_all($regex_old, $contents, $rooms);
$rooms = $rooms[0];
// if one of the two regex has found anything
if(!empty($rooms)) {
// we also want the room names (not tokens)
preg_match_all('/<li.*?><a.*?>(.*?)<\/a><\/li>/', $contents, $names);
$names = $names[1]; // [1] contains only the contents of the a tags, not the li or a tags themselves
// at this point the array contents look either like this:
// /r/token
// or like this
// /view/room/token
// so split by / and use last element
foreach($rooms as $i => $room) {
$exploded = explode("/", $room);
$token = $exploded[count($exploded) - 1]; // take last element
$room_array = array(
"token" => $token,
"name" => $names[$i], // take same index in $names array
"active_users" => -1, // without API we can't query the actual number
"description" => null // same goes for the description
$result[$token] = $room_array;
else {
return false;
else {
return false;
$result = array_unique($result);
return $result;
* Returns an array with the server URLs as keys and the public keys an value
* "https://server:port" => "somehexstring"
function acquire_pubkeys_from_join_links($join_links_arr) {
$result = array(); // will hold the final $server => $pubkey data
$temp = array(); // will hold temporary $server => array("pubkey1", "pubkey2", ...) data
$server_to_server_url = array(); // "" => "" - ugly! TODO: find more elegant solution
// first pass (will collect multiple pubkeys for each server if multiple are found)
foreach($join_links_arr as $join_link) {
// example:
$exploded = explode("/", $join_link);
// we split by / and take the index [2] as the server
$server = $exploded[2];
// we split by / and take the index [0] and $server as the server url
$server_url = $exploded[0] . "//" . $server; // required for visit_first_room_of_server_to_acquire_public_key
// we assume everything behind the "=" is the public key
$pubkey = explode("=", $join_link)[1];
$temp[$server][] = $pubkey;
$server_to_server_url[$server] = $server_url;
// second pass
// will filter the pubkeys
// and if different pubkeys for the same server were found and will query server
foreach($temp as $server => $pubkey_arr) {
$uniq_arr = array_unique($pubkey_arr);
if(count($uniq_arr) >= 1) {
if(count($uniq_arr) == 1) {
$result[$server] = $uniq_arr[0]; // if only one unique pubkey was found use that
else { // multiple unique pubkeys were found
echo("Multiple public keys found for server " . $server . "." . PHP_EOL);
log_info("Requesting Awesome Session Group list...");
$pages_asgl[] = file_get_contents($SOURCES['ASGL']);
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
//$result[$server] = $uniq_arr[0]; // placeholder
log_info("Requesting Lokilocker Mods Open Group list...");
$pages_loki[] = file_get_contents($SOURCES['LOKI']);
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
$actual_pubkey = visit_first_room_of_server_to_acquire_public_key($server_to_server_url[$server]);
log_info("Requesting list...");
$index_sdir = file_get_contents($SOURCES['SDIR']);
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
echo("Server responded with " . $actual_pubkey . PHP_EOL);
$result[$server] = $actual_pubkey;
} // else (<= 1) do nothing
return $result;
* Merge pubkeys
function merge_pubkeys_with_known_pubkeys($pubkeys_arr) {
$result = array();
global $known_pubkeys;
$result = array_merge($pubkeys_arr, $known_pubkeys);
return $result;
* Returns an array with the pubkey as index and the server(s) as an value array
* Expects the $servers_arr to be with protocol
function get_pubkeys_of_servers($servers_arr, $pubkeys_arr) {
$result = array();
$server_to_pubkey = array();
// print_r($servers_arr);
// print_r($pubkeys_arr);
// first create an array of all public keys
// we do this because it is possible that the same server has been added with multiple URLs (but the same public key
$unique_pubkeys = array();
foreach($pubkeys_arr as $pk_server => $pk_value) {
$unique_pubkeys[] = $pk_value;
$unique_pubkeys = array_unique($unique_pubkeys);
// then assign $server_url => $pubkey
foreach($servers_arr as $server_url) {
// split protocol
$server = explode("//", $server_url)[1];
$server_to_pubkey[$server_url] = $pubkeys_arr[$server];
// print_r($server_to_pubkey);
// but this still has duplicates in it
// so we get every address for a known pubkey so the result is an array:
// result[$pubkey] = array("address1", "address2", ...);
foreach($unique_pubkeys as $pubkey) {
$addresses = array();
foreach($server_to_pubkey as $s_address => $s_pubkey) {
if($pubkey == $s_pubkey) {
$addresses[] = $s_address;
$result[$pubkey] = $addresses;
return $result;
* Input array of type array[pubkey] = array("address1", "address2", ...)
* Output array of type array[pubkey] = "address"
* Removes pubkeys that do not have an active address
* For those with multiple addresses it checks if one of them is not a IP address and then assumes it to be the (primary) domain of the server //TODO: not ideal, but works for now
function reduce_addresses_of_pubkeys($pubkey_to_addresses_arr) {
$result = array();
foreach($pubkey_to_addresses_arr as $pubkey => $addresses_arr) {
// print_r($addresses_arr);
// echo($pubkey . " has count " . count($addresses_arr) . "." . PHP_EOL);
// has active addresses?
if(count($addresses_arr) != 0) {
// has only one active address?
if(count($addresses_arr) == 1) {
// add only entry to result
$result[$pubkey] = $addresses_arr[0];
// has more than one active address
$found_domain = false;
foreach($addresses_arr as $address) {
$without_proto = explode("//", $address)[1];
// has no domain be found yet?
if(!$found_domain) {
// (bool)ip2long returns 1 (true) if valid IP //TODO: Does this handle IPv6?
if(!(bool)ip2long($without_proto)) {
$found_domain = true;
// add first found domain to result
$result[$pubkey] = $address;
// has no domain been found?
if(!$found_domain) {
// them simply add first entry
$result[$pubkey] = $addresses_arr[0];
return $result;
* Returns an array that uses the public key as the index
* and assigns an array that has the server URL as index [0],
* and an array with all the room arrays as index[1]
* Example:
* [49ac5595058829c961eea6f60c44914cd08ea9b4c463d657fc82904eb2a89623] => Array (
* [0] =>
* [1] => Array (
* [animu] => Array (
* [token] => animu
* [name] => animu
* [active_users] => 34
* [description] =>
* )
* [cryptography] => Array (
* [token] => cryptography
* [name] => cryptography
* [active_users] => 14
* [description] =>
* )
function assign_rooms_to_address_assignments($addr_assignments_arr, $rooms_arr) {
$result = array();
foreach($addr_assignments_arr as $pubkey => $address) {
// only assign room array when one can be found in $rooms_arr
if($rooms_arr[$address]) {
$result[$pubkey] = array($address, $rooms_arr[$address]);
return $result;
* TODO: Description
* This function is only used for debugging
function generate_join_links($room_assignments_arr) {
$result = array();
// for each server a.k.a. public key do
foreach($room_assignments_arr as $pubkey => $room_assignment) {
// for every room do
foreach($room_assignment[1] as $room_array) {
// info:
// $room_array = array(
// "token" => bla,
// "name" => Blabla,
// "active_users" => -1,
// "description" => Blabla bla bla
$server = $room_assignment[0];
$join_link = $server . "/" . $room_array["token"] . "?public_key=" . $pubkey;
$result[] = $join_link;
$result = array_unique($result); // shouldn't be necessary
return $result;
* Test if preview_links are 404 and return the right one (or null) // @legacy
function get_preview_link($server_url, $token) {
$preview_link = $server_url . "/r/" . $token . "/";
$preview_link_alt = $server_url . "/view/room/" . $token;
$result = $preview_link;
if(!url_is_200($preview_link)) {
if(!url_is_200($preview_link_alt)) {
// $preview_link and $preview_link_alt not reachable
//$result = null;
$result = $preview_link; // assume preview_link to be the valid one TODO: Why is it empty sometimes?
else {
$result = $preview_link_alt; // $preview_link_alt reachable
return $result;
* Queries the first found room for a server for its actual public key
function visit_first_room_of_server_to_acquire_public_key($server_url) {
global $room_join_regex;
$result = null;
$rooms = query_single_servers_for_rooms($server_url);
// print_r($rooms);
if($rooms) {
$room_to_visit = $rooms[array_key_first($rooms)]; // use first room e.g. $rooms["offtopic"]
// print_r($room_to_visit);
$token = $room_to_visit["token"];
$preview_link = get_preview_link($server_url, $token); // @legacy
// var_dump($preview_link);
$preview_contents = file_get_contents($preview_link);
// print_r($preview_contents);
$join_links = array();
preg_match_all($room_join_regex, $preview_contents, $join_links);
// print_r($join_links);
$first_join_link = $join_links[0][0]; // first found join link
$result = explode("=", $first_join_link)[1]; // assume right of "=" is public key
// var_dump($result);
return $result;
* TODO: Description
function generate_info_arrays($room_assignments_arr) {
global $languages; // language_flags.php
$shortened_pubkey_length = 4; // shorten pubkey to this length to make room token unique
$info_arrays = array(); // contains the info for each community, will be the returned as result
// for each server a.k.a. public key do
foreach($room_assignments_arr as $pubkey => $room_assignment) {
$server_url = $room_assignment[0];
$shortened_pubkey = substr($pubkey, 0, $shortened_pubkey_length); // first X chars of pubkey
// for every room do
foreach($room_assignment[1] as $room_array) {
// info:
// $room_array = array(
// "token" => bla,
// "name" => Blabla,
// "active_users" => -1,
// "description" => Blabla bla bla
$join_link = $server_url . "/" . $room_array["token"] . "?public_key=" . $pubkey;
$identifier = $room_array["token"] . "+" . $shortened_pubkey;
$preview_link = get_preview_link($server_url, $room_array["token"]); // @legacy
// debug logging - does not work anymore, since $preview_link will not be empty when failed
if(!$preview_link || $preview_link == "") {
echo("Preview link is empty. Dumping variables." . PHP_EOL);
echo("Join link: " . $join_link . PHP_EOL);
echo("Server: " . $server_url. PHP_EOL);
echo("Token: " . $room_array["token"] . PHP_EOL);
$info_array = array(
"name" => $room_array["name"],
"language" => $languages[$identifier], // example: $languages["deutsch+118d"] = "🇩🇪"
"description" => $room_array["description"],
"active_users" => $room_array["active_users"],
"preview_link" => $preview_link,
"join_link" => $join_link
$info_arrays[$identifier] = $info_array;
log_info("Crawling list...");
$pages_sdir = crawl_source_index(
// sorting that keeps index association, sort by index
ksort($info_arrays, SORT_STRING | SORT_FLAG_CASE);
log_info('Done fetching sources.');
return $info_arrays;
return [...$pages_asgl, ...$pages_loki, ...$pages_sdir];
@ -758,4 +119,7 @@
// run main function

@ -0,0 +1,51 @@
* Community lists crawled for community links.
$SOURCES = array(
'ASGL' => '',
'LOKI' => '',
'SDIR' => '',
'SDIR-BASE' => '',
'SDIR-PATTERN' => '/view_session_group_user_lokinet\.php\?id=\d+/'
* List of servers hosts queried individually.
// Official server
// Kee's server
// Serse with
// ""
"" => "39016f991400c35a46e11e06cb2a64d6d8ab6652e484a556b14f7cf57ed7e73a",
"" => "efcaecf00aebf5b75e62cf1fd550c6052842e1415a9339406e256c8b27cd2039",
"" => "e529311ec8fb6fdb950aaa4fb71fc4da3ea59c6c9ba2886708b9538eea6aa213",
"" => "e093994156ec92e4c13d0387208bfa48ae56dd88b8f60a03980d9ef048af1e3f",
"" => "426453e0e991235b62bc5f35f36d5a204e64b2d8b44e971609add6a10aac6674",
"" => "7908bcd748313355f99e62f9c1f11c395d04019410edb7ee1618dbe26a423c4f",
"" => "b501f2dc7dc912aa0981b0ba10f2ba739d2f729a7d9b37022aee505aaf72807c",
"" => "2cbde327e9da216af9a69876bc57e16cc0c540b0aa2dfecdd1c115e67993b040",
"" => "7242ad657dc2dd20e902a6fa82c34465907b67e80daf50173f38d5745abbaa24",
"" => "a03c383cf63c3c4efe67acc52112a6dd734b3a946b9545f488aaa93da7991238",
"" => "4b3e75eedd2116b4dab0bcb6443b0e9fbfce7bcf1d35970bdad8a57a0113fb20",
"" => "fdcb047eb78520e925fda512a45ae74c6e2de9e0df206b3c0471bf1509919559"

@ -1,9 +1,56 @@
* @var int[] Seconds and nanoseconds at start of logging period.
$hrtime_start = hrtime();
/** @var int Constant giving number of nanoseconds in a second. */
* Comparable enum describing the verbosity of logged messages.
final class LoggingVerbosity {
// Make class functionally static.
private function __construct() {}
const Error = 10;
const Warning = 20;
const Info = 30;
const Debug = 40;
* Returns the proper letter to mark the given message verbosity.
* @param int $verbosity Numeric LoggingVerbosity value.
* @return string
static function getVerbosityMarker(int $verbosity) {
return match($verbosity) {
LoggingVerbosity::Error => 'e',
LoggingVerbosity::Warning => 'w',
LoggingVerbosity::Info => 'i',
LoggingVerbosity::Debug => 'd'
const COLOR_RESET = "\033[0m";
static function getVerbosityColorMarker(int $verbosity) {
// See for reference.
return match($verbosity) {
LoggingVerbosity::Error => "\033[31m",
LoggingVerbosity::Warning => "\033[93m",
// LoggingVerbosity::Debug => "\033[90m",
default => ''
$VERBOSITY = LoggingVerbosity::Info;
* Calculate process runtime as [s, ns].
* @return int[] Seconds and nanoseconds.
function hrtime_interval() {
global $hrtime_start, $NANOSEC;
@ -14,18 +61,45 @@
return [$s - $s0, $ns - $ns0];
* Format process runtime to milisecond precision.
function runtime_str() {
list($s, $ns) = hrtime_interval();
return (
date('i:s.', $s) .
str_pad(intdiv($ns, 1E6), 3, "0", STR_PAD_LEFT);
str_pad(intdiv($ns, 1E6), 3, "0", STR_PAD_LEFT)
function _log_message(string $msg, int $message_verbosity) {
global $VERBOSITY;
if ($message_verbosity > $VERBOSITY) return;
$runtime = runtime_str();
$marker = LoggingVerbosity::getVerbosityMarker($message_verbosity);
$color_marker = LoggingVerbosity::getVerbosityColorMarker($message_verbosity);
$color_reset = LoggingVerbosity::COLOR_RESET;
// Need to concatenate marker to avoid interpolated array member syntax.
fwrite(STDERR, $color_marker . "[$runtime] [$marker] $msg$color_reset" . PHP_EOL);
function log_error(string $msg) {
_log_message($msg, LoggingVerbosity::Error);
function log_warning(string $msg) {
_log_message($msg, LoggingVerbosity::Warning);
function log_info(string $msg) {
_log_message($msg, LoggingVerbosity::Info);
function log_info($msg) {
fwrite(STDERR, "[" . runtime_str() . "] [i] $msg" . PHP_EOL);
function log_debug(string $msg) {
_log_message($msg, LoggingVerbosity::Debug);
function log_debug($msg) {
fwrite(STDERR, "[" . runtime_str() . "] [d] $msg" . PHP_EOL);
function log_value(mixed $value) {
log_debug(var_export($value, true));

@ -0,0 +1,424 @@
include_once "$PROJECT_ROOT/languages/language_flags.php";
class CommunityRoom implements JsonSerializable {
public readonly object $server;
public readonly int $active_users;
public readonly int $active_users_cutoff;
public readonly string $token;
public readonly string $name;
public readonly array $admins;
public readonly array $moderators;
public readonly float $created;
public readonly string $description;
public readonly ?int $image_id;
public readonly int $info_updates;
public readonly int $message_sequence;
public readonly bool $read;
public readonly bool $upload;
public readonly bool $write;
// Custom properties
public readonly string $language_flag;
private function __construct($server, array $data) {
global $languages;
$this->server = $server;
$this->active_users = $data['active_users'];
$this->active_users_cutoff = $data['active_users_cutoff'];
$this->name = $data['name'];
$this->token = $data['token'];
$this->admins = $data['admins'];
$this->moderators = $data['moderators'];
$this->created = $data['created'];
$this->description = $data['description'] ?? "";
$this->image_id = $data['image_id'];
$this->info_updates = $data['info_updates'];
$this->message_sequence = $data['message_sequence'];
$this->read = $data['read'];
$this->write = $data['write'];
$this->upload = $data['upload'];
$room_identifier = $this->get_room_identifier();
$this->language_flag =
? $languages[$room_identifier]
: "";
* Create a CommunityRoom instance from data.
* @param CommunityServer $server
public static function from_data($server, array $data) {
return new CommunityRoom($server, $data);
* Create an array of CommunityRoom instances from data.
* @param array[] $data
* @return CommunityRoom[]
public static function from_data_array($server, array $data) {
return array_map(function($room_data) use ($server) {
return CommunityRoom::from_data($server, $room_data);
}, $data);
function jsonSerialize(): array {
$data = get_object_vars($this);
return $data;
* Returns array of staff Session IDs.
* @return string[]
function get_staff() {
return array_unique(
[...$this->admins, ...$this->moderators]
* Returns seconds elapsed since room was created.
function get_age(): float {
return time() - $this->created;
function get_preview_url(): string {
$base_url = $this->server->base_url;
$token = $this->token;
return "$base_url/r/$token";
function get_invite_url(): string {
$base_url = $this->server->base_url;
$token = $this->token;
return "$base_url/r/$token/invite.png";
function get_join_url(): string {
$base_url = $this->server->base_url;
$pubkey = $this->server->pubkey;
$token = $this->token;
return "$base_url/$token?public_key=$pubkey";
function get_icon_url(): string | bool {
$image_id = $this->image_id;
if ($image_id == null)
return false;
$base_url = $this->server->base_url;
$token = $this->token;
return "$base_url/room/$token/file/$image_id";
* Returns our format of room identifier,
* i.e. token+pubkey[:4]
function get_room_identifier(): string {
$token = $this->token;
$pubkey_4 = substr($this->server->pubkey, 0, 4);
return "$token+$pubkey_4";
class CommunityServer implements JsonSerializable {
// public static int $STRINGIFY_MODE = 0;
public string $base_url = "";
public string $pubkey = "";
public ?array $rooms = null;
* Instructs CommunityServer instances
* to stringify to their base URL.
private static function next_dedupe_by_url() {
* Instructs CommunityServer instances
* to stringify to their public key.
private static function next_dedupe_by_pubkey() {
* Filters the given servers to remove URL duplicates.
* @param CommunityServer[] $servers
* @return CommunityServer[]
public static function dedupe_by_url($servers) {
return array_unique($servers);
* Filters the given servers to remove pubkey duplicates.
* @param CommunityServer[] $servers
* @return CommunityServer[]
public static function dedupe_by_pubkey($servers) {
return array_unique($servers);
private function __construct() { }
* Compare two CommunityServer instances.
* @param CommunityServer $a
* @param CommunityServer $b
static function compare($a, $b): int {
return strcmp($a->base_url, $b->base_url);
* Sort an array of servers in-place based on URL.
* @param CommunityServer[] &$servers
static function sort(array &$servers) {
usort($servers, 'CommunityServer::compare');
* Stringify CommunityServer by custom property
* to allow de-duping using array_uniq.
function __toString(): string {
return match($SERVER_STRINGIFY_MODE) {
0 => $this->base_url,
1 => $this->pubkey
function jsonSerialize(): array {
return get_object_vars($this);
* @return CommunityServer
static function from_host($host) {
$server = new CommunityServer();
$server->base_url = $host;
return $server;
* @return CommunityServer
static function from_known_host($host, $pubkey) {
$server = new CommunityServer();
$server->base_url = $host;
$server->pubkey = $pubkey;
return $server;
* @return CommunityServer[]
static function from_known_hosts($hosts, $pubkeys) {
$servers = [];
foreach ($hosts as $base_url) {
$server = new CommunityServer();
$server->base_url = $base_url;
$hostname = url_get_base($base_url, false);
$server->pubkey = $pubkeys[$hostname];
$servers[] = $server;
return $servers;
* @return CommunityServer
static function from_join_url(string $join_url) {
$server = new CommunityServer();
$server->base_url = url_get_base($join_url);
return $server;
* @return CommunityServer[]
static function from_join_urls(array $join_urls) {
return array_map(
* @param array $data
* @return CommunityServer
static function from_data(array $data) {
$server = new CommunityServer();
$server->base_url = $data['base_url'];
$server->pubkey = $data['pubkey'];
$server->rooms = CommunityRoom::from_data_array($server, $data['rooms']);
return $server;
* @param array[] $data
* @return CommunityServer[]
static function from_data_array(array $data) {
return array_map(
* @param CommunityServer[] $servers
* @return CommunityRoom[]
static function enumerate_rooms($servers) {
$rooms = [];
foreach ($servers as $server) {
$rooms[] = $server->rooms;
return array_merge([], ...$rooms);
* Polls all servers for rooms.
* @param CommunityServer[] $servers Servers to poll.
* @return CommunityServer[] Reachable servers.
public static function poll_reachable(array $servers): array {
$reachable_servers = [];
// Synchronous for-loop for now.
foreach ($servers as $server) {
if (!($server->fetch_rooms())) continue;
// Accept failures to fetch pubkey if already known.
// (Has happened.)
if (!(
|| $server->has_pubkey()
)) continue;
$reachable_servers[] = $server;
return $reachable_servers;
function set_pubkey($pubkey) {
if ($this->has_pubkey() && $this->pubkey != $pubkey) {
$base_url = $this->base_url;
throw new ValueError("Pubkey mismatch for $base_url");
$this->pubkey = $pubkey;
function set_pubkey_from_url($join_url) {
$url_components = parse_url($join_url);
parse_str($url_components['query'], $query_components);
$this->pubkey = $query_components['public_key'];
function has_pubkey() {
return $this->pubkey != "";
function is_reachable() {
return url_is_reachable($this->base_url);
* Attempt to fetch rooms for self using SOGS API.
* @return bool True if successful, false otherwise.
function fetch_rooms() {
$base_url = $this->base_url;
log_info("Fetching rooms for $base_url.");
$rooms = curl_get_contents("$base_url/rooms?all=1");
if (!$rooms) {
log_warning("Could not fetch rooms for $base_url.");
return false;
$room_data = json_decode($rooms, true);
if ($room_data == null) {
log_warning("Could not parse rooms for $base_url.");
return false;
$this->rooms = CommunityRoom::from_data_array($this, $room_data);
return true;
* Attempt to fetch own public key by parsing SOGS HTML preview.
* @return bool True if successful, false otherwise.
function fetch_pubkey() {
if (empty($this->rooms)) {
log_error("Server has no rooms to poll for public key");
throw new Error("Server has no rooms to poll for public key");
$preview_url = $this->rooms[0]->get_preview_url();
log_info("Fetching pubkey from $preview_url");
$room_view = curl_get_contents($preview_url);
if (!$room_view) {
log_warning("Failed to fetch room preview from $preview_url.");
return false;
$links = parse_join_links($room_view);
if (!isset($links[0])) {
log_warning("Could not locate join link in preview at $preview_url.");
return false;
return true;

@ -1,27 +1,29 @@
* Counts every unique server from given $info_arrays and returns the count
$REGEX_JOIN_LINK = (function(){
// See
$protocol = 'https?:';
$hostname = '[^\/]+';
$room_name = '[0-9A-Za-z-_]+';
$public_key = '[[:xdigit:]]{64}';
// Use pipe delimiter for regex to avoid escaping slashes.
return "|$protocol//$hostname/$room_name\?public_key=$public_key|i";
* @param CommunityServer[] $servers
function count_servers($info_arrays) {
$servers = array();
foreach($info_arrays as $i_arr) {
$join_link = $i_arr["join_link"];
// https: + "" + + token?public_key=...
$exploded = explode("/", $join_link);
$servers[] = $exploded[0] . "//" . $exploded[2];
function count_rooms($servers) {
$rooms_total = 0;
foreach ($servers as $server) {
$rooms_total += count($server->rooms);
$servers = array_unique($servers);
// print_r($servers);
return count($servers);
return $rooms_total;
function truncate($url, $len) {
return (strlen($url) > $len + 3)
? substr($url, 0, $len).'...'
: $string;
: $url;
@ -39,14 +41,8 @@
$retcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($retcode != 0) {
// log_debug($url . " is " . $retcode . ".");
return true;
else {
// log_debug($url . " is " . $retcode . ".");
return false;
log_debug($url . " is " . $retcode . ".");
return $retcode != 0;
@ -65,27 +61,27 @@
if ($retcode == 200) {
// log_debug($url . " is " . $retcode . ".");
log_debug($url . " is " . $retcode . ".");
return true;
else {
// log_debug($url . " is " . $retcode . ".");
log_debug($url . " is " . $retcode . ".");
return false;
* file_get_contents alternative that circumvents flaky routing to Chinese servers
function curl_get_contents($url) {
// Low default retries value so this doesn't run for 30 minutes
// FIXME: Does not seem to handle 308's, behaviour not transparent.
// TODO: Parallelize & use in CommunityServer::poll_reachable()
function curl_get_contents(string $url, $retries = 5) {
// use separate timeouts to reliably get data from Chinese server with repeated tries
$connecttimeout = 2; // wait at most X seconds to connect
$timeout = 3; // can't take longer than X seconds for the whole curl process
$sleep = 2; // sleep between tries in seconds
$retries = 120;
// $retries = 10; // debug
// takes at most ($timeout + $sleep) * retries seceonds
// 3 + 2 * 150 = 5 * 120 = 600s = 10m
// takes at most ($timeout + $sleep) * $retries seconds
$contents = false;
$retcode = -1;
@ -93,7 +89,8 @@
while(!$contents && $counter <= $retries && $retcode != 404) {
$curl = curl_init($url);
// curl_setopt($curl, CURLOPT_VERBOSE, true);
// curl_setopt($curl, CURLOPT_VERBOSE, true);
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
@ -107,7 +104,7 @@
// log_debug("Trial #" . $counter . " for " . $url . " returned code " . $retcode . ".");
log_debug("Attempt #" . $counter . " for " . $url . " returned code " . $retcode . ".");
@ -118,4 +115,35 @@
return $contents;
* Returns the scheme, hostname and optional port of a URL.
function url_get_base(string $url, bool $include_scheme = true) {
$url_components = parse_url($url);
$scheme = $url_components['scheme'];
$host = $url_components['host'];
if (isset($url_components['port'])) {
$port = $url_components['port'];
$host .= ":$port";
return $include_scheme ? "$scheme://$host" : $host;
* Extracts join links that match $REGEX_JOIN_LINK.
* @return string[] Sorted array of unique server join links.
function parse_join_links($html){
preg_match_all($REGEX_JOIN_LINK, $html, $match_result);
$links = $match_result[0];
$links = array_unique($links);
return $links;

@ -1,20 +1,19 @@
* @var CommunityRoom[] $rooms
function room_qr_code_cached($room_id) {
global $QR_CODES;
return "$QR_CODES/$room_id.png";
* Derive URL of the invite code for a given room.
function room_invite_png($room_id, $room) {
return $room->preview_link . "invite.png";
* Fetch QR codes from SOGS server and encode them as base64
* @param CommunityRoom $room
function base64_qr_code($room_id, $room, $size = "512x512") {
function base64_qr_code($room, $size = "512x512") {
$room_id = $room->get_room_identifier();
$png_cached = room_qr_code_cached($room_id);
if (file_exists($png_cached)) {
// fwrite(STDERR, "QR code found for " . $room_id . PHP_EOL);
@ -22,7 +21,7 @@
// fwrite(STDERR, "QR code NOT found for " . $room_id . PHP_EOL);
log_info("Fetching QR code for $room_id.");
$png = file_get_contents(room_invite_png($room_id, $room));
$png = file_get_contents($room->get_invite_url());
file_put_contents($png_cached, $png);
return base64_encode($png);
@ -31,14 +30,14 @@
<div id="modal-container">
<?php foreach ($rooms as $id => $room): ?>
<div id="modal_<?=$id?>" class="qr-code-modal">
<?php foreach ($rooms as $room): ?>
<div id="modal_<?=$room->get_room_identifier()?>" class="qr-code-modal">
<div class="qr-code-modal-content">
<span class="qr-code-modal-close" onclick='hideQRModal("<?=$id?>")'>
<span class="qr-code-modal-close" onclick='hideQRModal("<?=$room->get_room_identifier()?>")'>
src="data:image/png;base64,<?=base64_qr_code($id, $room)?>"
alt="Community join link encoded as QR code"

@ -1,5 +1,10 @@
require_once "$PROJECT_ROOT/php/utils/server-utils.php";
require_once "$PROJECT_ROOT/php/utils/utils.php";
require_once "$PROJECT_ROOT/php/utils/servers-rooms.php";
* @var CommunityRoom[] $rooms
// Once handlers are attached in JS, this check ceases to be useful.
function column_sortable($id) {
@ -34,37 +39,37 @@
<?php endforeach; ?>
<?php foreach ($rooms as $id => $room): ?>
<tr id="<?=$id?>">
<td class="td_identifier"><?=$id?></td>
<td class="td_language"><?=$room->language?></td>
<?php foreach ($rooms as $room): ?>
<tr id="<?=$room->get_room_identifier()?>">
<td class="td_identifier"><?=$room->get_room_identifier()?></td>
<td class="td_language"><?=$room->language_flag?></td>
<td class="td_name"><?=$room->name?></td>
<td class="td_description"
<td class="td_users"><?=$room->active_users?></td>
<td class="td_preview">
<a href="<?=$room->preview_link?>" target="_blank" rel="noopener noreferrer">
<?php if (str_starts_with($room->preview_link, 'http://')): ?>
<span class="protocol-indicator protocol-http">HTTP</span>
<?php endif; ?>
<?php if (str_starts_with($room->preview_link, 'https://')): ?>
<span class="protocol-indicator protocol-https">HTTPS</span>
<?php endif; ?>
<a href="<?=$room->get_preview_url()?>" target="_blank" rel="noopener noreferrer">
<?php if (str_starts_with($room->get_preview_url(), 'http://')): ?>
<span class="protocol-indicator protocol-http">HTTP</span>
<?php endif; ?>
<?php if (str_starts_with($room->get_preview_url(), 'https://')): ?>
<span class="protocol-indicator protocol-https">HTTPS</span>
<?php endif; ?>
<td class="td_qr_code">
alt="Pictogram of a QR code"
<td class="td_join_url">
<div class="join_url_container" data-url="<?=$room->join_link?>">
<a class="join_url show-from-w5" title="<?=$room->join_link?>"
><?=truncate($room->join_link, 32)?></a>
<a class="noscript" href="<?=$room->join_link?>"
<div class="join_url_container" data-url="<?=$room->get_join_url()?>">
<a class="join_url show-from-w5" title="<?=$room->get_join_url()?>"
><?=truncate($room->get_join_url(), 32)?></a>
<a class="noscript" href="<?=$room->get_join_url()?>"
>Copy link</a>

@ -1,12 +1,15 @@
// prerequisite include for sites and components
require_once "+getenv.php";
require_once "$PROJECT_ROOT/php/utils/server-utils.php";
require_once "$PROJECT_ROOT/php/utils/utils.php";
require_once "$PROJECT_ROOT/php/utils/servers-rooms.php";
$rooms_raw = file_get_contents($ROOMS_FILE);
$rooms = json_decode($rooms_raw);
$rooms_assoc = json_decode($rooms_raw, true);
$servers_raw = file_get_contents($ROOMS_FILE);
$server_data = json_decode($servers_raw, true);
$servers = CommunityServer::from_data_array($server_data);
$rooms = CommunityServer::enumerate_rooms($servers);
$timestamp = filemtime($ROOMS_FILE);
<!DOCTYPE html>
<html lang="en">
@ -37,8 +40,8 @@
<p id="server_summary">
<?=count($rooms_assoc)?> unique Session Communities
on <?=count_servers($rooms_assoc)?> servers have been found.
<?=count($servers)?> unique Session Communities
on <?=count_rooms($servers)?> servers have been found.
<span id="servers_hidden">(None hidden as JS is off)</span>
<p id="last_checked">
