2019-10-18 00:54:11 +02:00
< ? php
2020-09-02 13:53:43 +02:00
function send_response ( $code , $message = " " ) {
2019-10-23 23:50:06 +02:00
http_response_code ( $code );
echo $message ;
exit ();
2019-10-18 00:54:11 +02:00
}
2020-10-07 15:48:57 +02:00
$domain_parts = explode ( '.' , $_SERVER [ 'HTTP_HOST' ]);
$subdomain = implode ( " . " , array_slice ( $domain_parts , 0 , - 2 ));
$tld = end ( $domain_parts );
2020-08-31 00:23:31 +02:00
$request_uri = explode ( " ? " , $_SERVER [ " REQUEST_URI " ])[ 0 ];
$request_url = filter_var ( $request_uri , FILTER_SANITIZE_URL );
$request_url = str_replace ( " %20 " , " " , $request_url );
2020-10-07 15:48:57 +02:00
$request_url_parts = explode ( " / " , $request_url );
$request_url_parts = array_diff ( $request_url_parts , array ( " " )); # Remove empty parts in URL
2021-02-20 20:38:08 +01:00
$repo = " pages " ;
2020-10-07 15:48:57 +02:00
if ( $tld === " org " ) {
$subdomain_repo = array (
2021-03-16 15:46:29 +01:00
// subdomain => array(owner, repo, allowCORS),
" docs " => array ( " docs " , " pages " , false ),
" fonts " => array ( " codeberg-fonts " , " pages " , true ),
" get-it-on " => array ( " get-it-on " , " pages " , false ),
" design " => array ( " Codeberg " , " Design " , true )
2020-10-07 15:48:57 +02:00
);
if ( array_key_exists ( $subdomain , $subdomain_repo )) {
2021-02-20 20:38:08 +01:00
$owner = $subdomain_repo [ $subdomain ][ 0 ];
$repo = $subdomain_repo [ $subdomain ][ 1 ];
2021-03-16 15:46:29 +01:00
if ( $subdomain_repo [ $subdomain ][ 2 ]) {
// Allow CORS requests to specified pages, for web fonts etc.
header ( " Access-Control-Allow-Origin: * " );
}
2020-10-07 15:48:57 +02:00
} else {
$owner = strtolower ( array_shift ( $request_url_parts ));
if ( ! $owner ) {
header ( " Location: https://codeberg.eu " );
exit ;
2020-10-14 12:12:12 +02:00
}
if ( strpos ( $owner , " . " ) === false ) {
2020-10-07 15:48:57 +02:00
$h = " Location: https:// " . $owner . " .codeberg.eu/ " . implode ( " / " , $request_url_parts );
if ( $_SERVER [ 'QUERY_STRING' ] !== " " )
$h .= " ? " . $_SERVER [ 'QUERY_STRING' ];
header ( $h );
exit ;
}
}
} else {
$owner = strtolower ( $subdomain );
if ( strpos ( $owner , " . " ) !== false )
send_response ( 200 , " Pages not supported for user names with dots. Please rename your username to use Codeberg pages. " );
2020-12-28 00:26:59 +01:00
if ( $owner === " raw " ) {
2021-02-21 16:34:48 +01:00
$ch = curl_init ( " http://localhost:3000 " . $_SERVER [ " REQUEST_URI " ]);
curl_setopt ( $ch , CURLOPT_CUSTOMREQUEST , " GET " );
curl_setopt ( $ch , CURLOPT_RETURNTRANSFER , true );
curl_setopt ( $ch , CURLOPT_HEADER , true );
$response = curl_exec ( $ch );
$status = curl_getinfo ( $ch , CURLINFO_HTTP_CODE );
$header_size = curl_getinfo ( $ch , CURLINFO_HEADER_SIZE );
$header = substr ( $response , 0 , $header_size );
$header = explode ( " \r \n " , $header );
$body = substr ( $response , $header_size );
foreach ( $header as $h ) {
if ( $h && substr ( $h , 0 , 11 ) != " Set-Cookie: " )
2021-02-20 20:33:51 +01:00
if ( substr ( $h , 0 , 13 ) == " Content-Type: " && strpos ( $h , " text/html " ) !== false )
// text/html shouldn't be rendered on raw.codeberg.org, as it might confuse both users (with it being a legit codeberg.org subdomain) and developers (with it having a really strict CSP)
header ( str_replace ( " text/html " , " text/plain " , $h ));
else
header ( $h );
2021-02-21 16:34:48 +01:00
}
2021-02-20 20:33:51 +01:00
// Allow CORS
header ( " Access-Control-Allow-Origin: * " );
// Even though text/html isn't allowed, SVG files might still invoke JavaScript, which is blocked here
2021-02-21 16:34:48 +01:00
header ( " Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox " );
send_response ( $status , $body );
2021-02-20 20:33:51 +01:00
die ();
2020-12-28 00:26:59 +01:00
}
2020-10-07 15:48:57 +02:00
}
2020-10-14 12:12:12 +02:00
$reservedUsernames = array (
2020-10-14 13:49:41 +02:00
" abuse " , " admin " , " api " , " app " , " apt " , " apps " , " appserver " , " archive " , " archives " , " assets " , " attachments " , " auth " , " avatar " , " avatars " ,
2020-10-14 12:12:12 +02:00
" bbs " , " blog " ,
" cache " , " cd " , " cdn " , " ci " , " cloud " , " cluster " , " commits " , " connect " , " contact " ,
2020-10-14 14:16:26 +02:00
" dashboard " , " debug " , " deploy " , " deployment " , " dev " , " dns " , " dns0 " , " dns1 " , " dns2 " , " dns3 " , " dns4 " , " doc " , " download " , " downloads " ,
2020-10-14 12:12:12 +02:00
" email " , " error " , " explore " ,
2020-10-14 13:49:41 +02:00
" fonts " , " forum " , " ftp " ,
2020-10-14 12:12:12 +02:00
" ghost " ,
2020-10-18 10:56:16 +02:00
" hello " , " help " , " helpdesk " , " host " ,
2020-10-14 12:12:12 +02:00
" i " , " imap " , " info " , " install " , " internal " , " issues " ,
" less " , " login " ,
2020-10-18 10:56:16 +02:00
" m " , " me " , " mail " , " mailserver " , " manifest " , " merch " , " merchandise " , " metrics " , " milestones " , " mx " ,
2020-10-14 12:12:12 +02:00
" new " , " news " , " notifications " ,
" official " , " org " , " ota " , " owa " ,
2020-12-09 23:21:19 +01:00
" page " , " pages " , " packages " , " plugins " , " poll " , " polls " , " pop " , " pop3 " , " portal " , " postmaster " , " project " , " projects " , " pulls " ,
2020-10-14 12:12:12 +02:00
" raw " , " remote " , " repo " , " robot " , " robots " ,
2020-10-18 10:56:16 +02:00
" search " , " secure " , " server " , " shop " , " shopping " , " signin " , " signon " , " smtp " , " ssl " , " sso " , " stars " , " store " , " support " , " swag " , " swagshop " ,
2020-10-14 12:12:12 +02:00
" takeout " , " template " , " test " , " testing " ,
" user " ,
" vote " , " voting " ,
2020-10-18 10:56:16 +02:00
" web " , " webmail " , " webmaster " , " webshop " , " webstore " , " welcome " , " www " , " www0 " , " www1 " , " www2 " , " www3 " , " www4 " , " www5 " , " www6 " , " www7 " , " www8 " , " www9 " ,
2020-10-14 12:12:12 +02:00
" ns " , " ns0 " , " ns1 " , " ns2 " , " ns3 " , " ns4 " ,
" vpn " ,
);
if ( in_array ( $owner , $reservedUsernames ))
send_response ( 404 , " Reserved user name ' " . $owner . " ' cannot have pages " );
2020-10-07 15:48:57 +02:00
if ( ! $owner ) {
2020-06-18 21:12:42 +02:00
send_response ( 200 , file_get_contents ( " ./default-page.html " ));
2019-10-18 00:54:11 +02:00
}
2020-05-04 22:24:56 +02:00
# Restrict allowed characters in request URI:
2020-10-07 15:48:57 +02:00
if ( preg_match ( " /^ \ /[a-zA-Z0-9_ + \ - \ / \ .]* \$ / " , $request_url ) != 1 )
2019-10-23 23:50:06 +02:00
send_response ( 404 , " invalid request URL " );
2019-10-18 00:54:11 +02:00
2019-10-23 23:50:06 +02:00
$git_prefix = " /data/git/gitea-repositories " ;
2021-02-20 20:38:08 +01:00
$git_root = realpath ( " $git_prefix / $owner / $repo .git " );
2020-10-07 15:48:57 +02:00
$file_url = implode ( " / " , $request_url_parts );
2019-10-23 23:50:06 +02:00
2020-05-04 23:59:54 +02:00
# Ensure that only files within $git_root are accessed:
2020-10-07 15:48:57 +02:00
if ( substr ( $git_root , 0 , strlen ( $git_prefix )) !== $git_prefix )
2019-10-23 23:50:06 +02:00
send_response ( 404 , " this user/organization does not have codeberg pages " );
2020-05-04 23:43:52 +02:00
# If this is a folder, we explicitly redirect to folder URL, otherwise browsers will construct invalid relative links:
2020-06-18 13:17:48 +02:00
$command = " sh -c \" cd ' $git_root ' && /usr/bin/git ls-tree 'HEAD: $file_url ' > /dev/null \" " ;
2020-05-04 21:14:53 +02:00
exec ( $command , $output , $retval );
2020-05-04 22:00:19 +02:00
if ( $retval === 0 ) {
2020-05-04 23:43:52 +02:00
if ( substr ( $request_url , - 1 ) !== " / " ) {
2020-05-04 23:51:46 +02:00
$h = " Location: " . $request_url . " / " ;
if ( $_SERVER [ 'QUERY_STRING' ] !== " " )
$h .= " ? " . $_SERVER [ 'QUERY_STRING' ];
2020-05-04 23:43:52 +02:00
header ( $h );
exit ();
}
2020-05-04 22:13:36 +02:00
if ( $file_url !== " " )
2020-05-04 23:51:46 +02:00
$file_url .= " / " ;
2020-05-04 22:00:19 +02:00
$file_url .= " index.html " ;
2020-05-04 20:15:28 +02:00
}
2020-05-02 17:56:52 +02:00
$ext = pathinfo ( $file_url , PATHINFO_EXTENSION );
$ext = strtolower ( $ext );
2019-10-18 00:54:11 +02:00
2019-10-23 23:50:06 +02:00
$mime_types = array (
2020-05-04 22:00:19 +02:00
" css " => " text/css " ,
" csv " => " text/csv " ,
2019-10-23 23:50:06 +02:00
" gif " => " image/gif " ,
" html " => " text/html " ,
2019-12-21 10:07:13 +01:00
" ico " => " image/x-icon " ,
2020-05-04 22:00:19 +02:00
" ics " => " text/calendar " ,
" jpg " => " image/jpeg " ,
" jpeg " => " image/jpeg " ,
" js " => " application/javascript " ,
" json " => " application/json " ,
" pdf " => " application/pdf " ,
" png " => " image/png " ,
" svg " => " image/svg+xml " ,
" ttf " => " font/ttf " ,
" txt " => " text/plain " ,
2019-12-21 10:07:13 +01:00
" woff " => " font/woff " ,
" woff2 " => " font/woff2 " ,
2020-05-04 22:00:19 +02:00
" xml " => " text/xml "
2019-10-23 23:50:06 +02:00
);
2020-12-28 00:26:59 +01:00
$mime_type = " application/octet-stream " ;
if ( array_key_exists ( $ext , $mime_types ))
$mime_type = $mime_types [ $ext ];
header ( " Content-Type: " . $mime_type );
2020-09-02 13:53:43 +02:00
#header("Cache-Control: public, max-age=10, immutable");
$command = " sh -c \" cd ' $git_root ' && /usr/bin/git log --format='%H' -1 \" " ;
exec ( $command , $output , $retval );
if ( $retval == 0 && count ( $output )) {
$revision = $output [ 0 ];
header ( 'ETag: "' . $revision . '"' );
if ( isset ( $_SERVER [ " HTTP_IF_NONE_MATCH " ])) {
2020-10-07 15:48:57 +02:00
$req_revision = str_replace ( '"' , '' , str_replace ( 'W/"' , '' , $_SERVER [ " HTTP_IF_NONE_MATCH " ]));
2020-09-02 13:53:43 +02:00
if ( $req_revision === $revision ) {
send_response ( 304 );
}
}
}
2020-09-02 11:55:36 +02:00
2020-05-02 17:56:52 +02:00
## We are executing command twice (first for send_response-checking, then for actual raw output to stream),
## which seems wasteful, but it seems exec+echo cannot do raw binary output? Is this true?
2020-06-18 13:17:48 +02:00
$command = " sh -c \" cd ' $git_root ' && /usr/bin/git show 'HEAD: $file_url ' \" " ;
2020-05-04 21:14:53 +02:00
exec ( $command . " > /dev/null " , $output , $retval );
2020-05-02 17:56:52 +02:00
if ( $retval != 0 ) {
2020-05-11 09:06:23 +02:00
# Try adding '.html' suffix, if this does not work either, report error
2020-06-18 13:17:48 +02:00
$command = " sh -c \" cd ' $git_root ' && /usr/bin/git show 'HEAD: $file_url .html' \" " ;
2020-05-04 21:14:53 +02:00
exec ( $command . " > /dev/null " , $output , $retval );
2020-05-11 09:06:23 +02:00
header ( " Content-Type: text/html " );
if ( $retval != 0 ) {
# Render user-provided 404.html if exists, generic 404 message if not:
http_response_code ( 404 );
2020-06-18 13:17:48 +02:00
$command = " sh -c \" cd ' $git_root ' && /usr/bin/git show 'HEAD:404.html' \" " ;
2020-05-11 09:06:23 +02:00
exec ( $command . " > /dev/null " , $output , $retval );
if ( $retval != 0 )
send_response ( 404 , " no such file in repo: ' " . htmlspecialchars ( $file_url ) . " ' " );
}
2020-05-02 17:56:52 +02:00
}
## If we could directly exec+echo raw output from above, we wouldn't need to execute command twice:
2019-10-18 00:54:11 +02:00
passthru ( $command );
2020-10-07 15:48:57 +02:00