"64 char hex public key"
"13.233.251.36:8081" => "efcaecf00aebf5b75e62cf1fd550c6052842e1415a9339406e256c8b27cd2039",
"open.getsession.org" => "a03c383cf63c3c4efe67acc52112a6dd734b3a946b9545f488aaa93da7991238",
// "sog.zcyph.cc" => "e56fa54f9da6df91928f97023e8651e2df10fb6cf743a1ec96d0543acb8f2e7a"
);
// path for HTML output
$output = "output/index.php";
// run main function
main();
function main() {
$timestamp = time(); // unix timestamp in seconds
echo("Running, please wait..." . PHP_EOL);
echo("This script will usually take approximately 4 minutes to run." . PHP_EOL);
echo("It will take longer if the Chinese servers are spasming out." . PHP_EOL);
$html = get_html_from_known_sources();
$wild_join_links = extract_join_links_from_html($html);
$servers = get_servers_from_join_links($wild_join_links);
$servers = reduce_servers($servers);
$servers = merge_servers_with_known_good_servers($servers); //TODO: Switch merge and reduce?
$rooms = query_servers_for_rooms($servers);
$pubkeys = acquire_pubkeys_from_join_links($wild_join_links);
$pubkeys = merge_pubkeys_with_known_pubkeys($pubkeys);
$addr_assignments = get_pubkeys_of_servers($servers, $pubkeys);
$addr_assignments = reduce_addresses_of_pubkeys($addr_assignments);
$room_assignments = assign_rooms_to_address_assignments($addr_assignments, $rooms);
$final_join_links = generate_join_links($room_assignments);
// print_r($wild_join_links);
// print_r($servers);
// print_r($rooms);
// print_r($pubkeys);
// print_r($addr_assignments);
// print_r($room_assignments); //TODO: We also assigned empty room arrays. Should probably be fixed
// print_r($final_join_links);
$table_html = get_table_html($room_assignments);
$title = "Self-updating list of active Session Communities";
$final_html = create_html_page_from_table($table_html, $title, $timestamp);
// write output to disk
global $output;
file_put_contents($output, $final_html); // overwrites existing file
echo("Done. " . count($final_join_links) . " unique Session Communities have been found." . PHP_EOL);
}
/*
* Queries following known sources of join links for Session Communities:
* - Awesome Session Open Group List repository on GitHub
* - LokiLocker.com Open Groups
* - https://session.directory open groups
*/
function get_html_from_known_sources() {
// known open group / community lists
$asgl = "https://raw.githubusercontent.com/GNU-Linux-libre/Awesome-Session-Group-List/main/README.md";
$ll = "https://lokilocker.com/Mods/Session-Groups/wiki/Session-Open-Groups";
$sd_pre = "https://session.directory/?all=groups" ; // this one has to be expanded first
// get awesome session group list html
$asgl_html = file_get_contents($asgl);
// get lokilocker.com html
$ll_html = file_get_contents($ll);
// get session.directory html
$sd_html = "";
$sd_pre_html = file_get_contents($sd_pre);
$sd_pattern = "/view_session_group_user_lokinet\.php\?id=\d+/";
preg_match_all($sd_pattern, $sd_pre_html, $sd_links);
$sd_links = $sd_links[0];
foreach ($sd_links as &$link) {
// add prefix "https://session.directory to the sd_links
$link = str_replace('view_session_group_user_lokinet.php?id=', 'https://session.directory/view_session_group_user_lokinet.php?id=', $link);
// add html to sd_html
$sd_html = $sd_html . file_get_contents($link) . PHP_EOL;
}
// merge all html into a single string
return(
$asgl_html . PHP_EOL .
$ll_html . PHP_EOL .
$sd_html . PHP_EOL
);
}
/*
* Extracts all links that match the $room_join_regex
* Example: http(s)://whatever:port/?public_key=0123456789abcef
* Result is sorted and unique
* There's no check for reachability or additional https availability
*/
function extract_join_links_from_html($html){
global $room_join_regex;
$result = array();
preg_match_all($room_join_regex, $html, $result);
// print_r($result);
$result = $result[0]; // there's only $result[0], no $result[1] or others
$result = array_unique($result);
sort($result);
return $result;
}
/*
* Gets all servers from an array of join links
* Returns an array that looks like this:
* [0] => 1.2.3.4
* [1] => 2.3.4.5:12345
* [2] => example.com
* [3] => dev.test:23456
* Result is sorted and unique
*/
function get_servers_from_join_links($join_links_arr) {
$result = array();
foreach($join_links_arr as $join_link){
$split = array();
$split = explode("/", $join_link); // http(s): + "" + 1.2.3.4:56789 + "name?public_key=0123456789abcdef"
$result[] = $split[2]; // 1.2.3.4:56789
}
$result = array_unique($result);
sort($result);
return $result;
}
/*
* Checks whether servers are reachable and whether they support https
* and makes sure that there are no http/https duplicates
* Input is an array of servers without protocol (no http:(s)// in front)
* Result is unique and sorted
*/
function reduce_servers($servers_arr) {
$reduced_servers = array();
$offline_servers = array(); // debug
foreach($servers_arr as $server) {
// try https
$url = "https://" . $server;
if(url_is_reachable($url)){
$reduced_servers[] = $url;
}
else{
// try http
$url = "http://" . $server;
if(url_is_reachable($url)){
$reduced_servers[] = $url;
}
else {
$offline_servers[] = $url;
// echo("Server " . $server . " is not reachable" . PHP_EOL);
}
}
}
$reduced_servers = array_unique($reduced_servers);
sort($reduced_servers);
// print_r($offline_servers);
return $reduced_servers;
}
/*
* Some servers don't appear in the wild yet, but can be queried
* Ideally this shouldn't be necessary, but it is for now
* Should be called after reduce_servers()
*/
function merge_servers_with_known_good_servers($url_arr){
$result = array();
global $known_servers;
$result = array_merge($url_arr, $known_servers);
$result = array_unique($result); // just in case we accidentally add a duplicate
sort($result);
return $result;
}
/*
* Takes an input like this:
* [0] => http://1.2.3.4
* [1] => https://2.3.4.5:12345
* [2] => https://example.com
* [3] => http://dev.test:23456
* and queries the /room JSON API endpoint
* Returns a multidimensional array
* The first dimension uses the server URL as public_key
* The second dimension is an array that contains $room_array array
* $room_array arrays contain token, name, users and description
*/
function query_servers_for_rooms($url_arr) {
$rooms = array();
$failed_arr = array(); // debug
// we can't use array_unique later so we make sure the input is unique
$url_arr = array_unique($url_arr); // not really necessary though
// we can't use sort or asort later so me do it now
sort($url_arr); // not really necessary though
// we could probably use ksort or something else that persists the keys
foreach($url_arr as $url) {
$query_result = query_single_servers_for_rooms($url, $failed_arr);
if($query_result) {
$rooms[$url] = $query_result;
}
}
/*$counter = 0;
foreach($rooms as $room_arr) {
$counter = $counter + count($room_arr);
}
echo("Found " . $counter . " rooms, but there could be duplicates." . PHP_EOL);*/
// print_r($failed_arr);
return $rooms;
}
/*
* TODO: Description
*/
function query_single_servers_for_rooms($server_url, $failed_arr = null) {
$result = array();
$endpoint = "/rooms";
$json_url = $server_url . $endpoint;
// $json = file_get_contents($json_url);
$json = curl_get_contents($json_url); // circumvents flaky routing
// echo("URL: " . $server_url . " - JSON URL: " . $json_url . PHP_EOL);
// echo("JSON: " . $json . PHP_EOL);
$failed = false;
if($json) {
$json_obj = json_decode($json);
$json_rooms = array();
// if response was not empty
if($json_obj) {
foreach($json_obj as $json_room) {
$token = $json_room->token; // room "name"
$room_array = array(
"token" => $token,
"name" => $json_room->name,
"active_users" => $json_room->active_users,
"description" => $json_room->description
);
$json_rooms[$token] = $room_array;
}
// print_r($json_rooms);
$result = $json_rooms;
}
else {
$failed = true;
// echo($json_url . " failed to decode" . PHP_EOL);
}
}
else {
$failed = true;
}
if($failed) {
// 404 - could mean it's a legacy server that doesn't provide /room endpoint
if($failed_arr) {
// if $failed_arr has been used as parameter, add failed URL to it
$failed_arr[] = $server_url;
}
$legacy_rooms = query_homepage_for_rooms($server_url);
if($legacy_rooms) {
$result = $legacy_rooms;
} else {
return null;
}
}
return $result;
}
/*
* For servers that do not provide the /rooms endpoint
* Takes same input as query_api_for_rooms(), but only singular URL
* Returns array of all available rooms (each its own array with token, name, users and description)
* Result is false if no rooms where found
*/
function query_homepage_for_rooms($url) {
$result = array();
global $room_token_regex_part;
$contents = file_get_contents($url);
if($contents) {
$regex_new = "/\/r\/" . $room_token_regex_part . "/";
$regex_old = "/\/view\/room\/" . $room_token_regex_part . "/";
preg_match_all($regex_new, $contents, $rooms);
$rooms = $rooms[0];
// if the new regex doesn't match, use the old one
if(empty($rooms)) {
preg_match_all($regex_old, $contents, $rooms);
$rooms = $rooms[0];
}
// if one of the two regex has found anything
if(!empty($rooms)) {
// we also want the room names (not tokens)
preg_match_all('/