2021-12-05 13:45:17 +00:00
package server
import (
2022-11-12 19:37:20 +00:00
"fmt"
"net/http"
"path"
2021-12-05 13:45:17 +00:00
"strings"
2022-06-11 21:02:06 +00:00
"github.com/rs/zerolog"
2021-12-05 14:02:44 +00:00
"github.com/rs/zerolog/log"
2021-12-05 13:45:17 +00:00
"codeberg.org/codeberg/pages/html"
2021-12-05 14:02:44 +00:00
"codeberg.org/codeberg/pages/server/cache"
2022-11-12 19:37:20 +00:00
"codeberg.org/codeberg/pages/server/context"
2021-12-05 14:21:05 +00:00
"codeberg.org/codeberg/pages/server/dns"
2022-06-11 21:02:06 +00:00
"codeberg.org/codeberg/pages/server/gitea"
2021-12-05 13:47:33 +00:00
"codeberg.org/codeberg/pages/server/upstream"
2021-12-03 02:44:21 +00:00
"codeberg.org/codeberg/pages/server/utils"
2022-06-14 18:35:11 +00:00
"codeberg.org/codeberg/pages/server/version"
2021-12-05 13:45:17 +00:00
)
2022-11-12 19:37:20 +00:00
const (
headerAccessControlAllowOrigin = "Access-Control-Allow-Origin"
headerAccessControlAllowMethods = "Access-Control-Allow-Methods"
)
2021-12-05 13:45:17 +00:00
// Handler handles a single HTTP request to the web server.
2022-11-12 19:37:20 +00:00
func Handler ( mainDomainSuffix , rawDomain string ,
2022-06-11 21:02:06 +00:00
giteaClient * gitea . Client ,
giteaRoot , rawInfoPage string ,
2022-11-12 19:37:20 +00:00
blacklistedPaths , allowedCorsDomains [ ] string ,
dnsLookupCache , canonicalDomainCache cache . SetGetKey ,
) http . HandlerFunc {
return func ( w http . ResponseWriter , req * http . Request ) {
log := log . With ( ) . Strs ( "Handler" , [ ] string { string ( req . Host ) , req . RequestURI } ) . Logger ( )
ctx := context . New ( w , req )
2021-12-05 13:45:17 +00:00
2022-11-12 19:37:20 +00:00
ctx . RespWriter . Header ( ) . Set ( "Server" , "CodebergPages/" + version . Version )
2021-12-05 13:45:17 +00:00
// Force new default from specification (since November 2020) - see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy#strict-origin-when-cross-origin
2022-11-12 19:37:20 +00:00
ctx . RespWriter . Header ( ) . Set ( "Referrer-Policy" , "strict-origin-when-cross-origin" )
2021-12-05 13:45:17 +00:00
// Enable browser caching for up to 10 minutes
2022-11-12 19:37:20 +00:00
ctx . RespWriter . Header ( ) . Set ( "Cache-Control" , "public, max-age=600" )
2021-12-05 13:45:17 +00:00
2022-11-12 19:37:20 +00:00
trimmedHost := utils . TrimHostPort ( req . Host )
2021-12-05 13:45:17 +00:00
// Add HSTS for RawDomain and MainDomainSuffix
2022-11-12 19:37:20 +00:00
if hsts := getHSTSHeader ( trimmedHost , mainDomainSuffix , rawDomain ) ; hsts != "" {
ctx . RespWriter . Header ( ) . Set ( "Strict-Transport-Security" , hsts )
2021-12-05 13:45:17 +00:00
}
// Block all methods not required for static pages
2022-11-12 19:37:20 +00:00
if ! ctx . IsMethod ( http . MethodGet ) && ! ctx . IsMethod ( http . MethodHead ) && ! ctx . IsMethod ( http . MethodOptions ) {
ctx . RespWriter . Header ( ) . Set ( "Allow" , http . MethodGet + ", " + http . MethodHead + ", " + http . MethodOptions ) // duplic 1
ctx . String ( "Method not allowed" , http . StatusMethodNotAllowed )
2021-12-05 13:45:17 +00:00
return
}
// Block blacklisted paths (like ACME challenges)
for _ , blacklistedPath := range blacklistedPaths {
2022-11-12 19:37:20 +00:00
if strings . HasPrefix ( ctx . Path ( ) , blacklistedPath ) {
html . ReturnErrorPage ( ctx , "requested blacklisted path" , http . StatusForbidden )
2021-12-05 13:45:17 +00:00
return
}
}
// Allow CORS for specified domains
2022-04-10 16:11:00 +00:00
allowCors := false
for _ , allowedCorsDomain := range allowedCorsDomains {
2022-11-12 19:37:20 +00:00
if strings . EqualFold ( trimmedHost , allowedCorsDomain ) {
2022-04-10 16:11:00 +00:00
allowCors = true
break
2021-12-05 13:45:17 +00:00
}
2022-04-10 16:11:00 +00:00
}
if allowCors {
2022-11-12 19:37:20 +00:00
ctx . RespWriter . Header ( ) . Set ( headerAccessControlAllowOrigin , "*" )
ctx . RespWriter . Header ( ) . Set ( headerAccessControlAllowMethods , http . MethodGet + ", " + http . MethodHead )
2022-04-10 16:11:00 +00:00
}
2022-11-12 19:37:20 +00:00
ctx . RespWriter . Header ( ) . Set ( "Allow" , http . MethodGet + ", " + http . MethodHead + ", " + http . MethodOptions ) // duplic 1
if ctx . IsMethod ( http . MethodOptions ) {
ctx . RespWriter . WriteHeader ( http . StatusNoContent )
2021-12-05 13:45:17 +00:00
return
}
// Prepare request information to Gitea
var targetOwner , targetRepo , targetBranch , targetPath string
2022-03-27 19:54:06 +00:00
targetOptions := & upstream . Options {
2022-06-11 21:02:06 +00:00
TryIndexPages : true ,
2021-12-05 13:45:17 +00:00
}
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, it will
// also disallow search indexing and add a Link header to the canonical URL.
2022-11-12 19:37:20 +00:00
// TODO: move into external func to not alert vars indirectly
tryBranch := func ( log zerolog . Logger , repo , branch string , _path [ ] string , canonicalLink string ) bool {
2021-12-05 13:45:17 +00:00
if repo == "" {
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "tryBranch: repo is empty" )
2021-12-05 13:45:17 +00:00
return false
}
2022-07-08 11:39:24 +00:00
// Replace "~" to "/" so we can access branch that contains slash character
// Branch name cannot contain "~" so doing this is okay
branch = strings . ReplaceAll ( branch , "~" , "/" )
2021-12-05 13:45:17 +00:00
// Check if the branch exists, otherwise treat it as a file path
2022-11-12 19:37:20 +00:00
branchTimestampResult := upstream . GetBranchTimestamp ( giteaClient , targetOwner , repo , branch )
2021-12-05 13:45:17 +00:00
if branchTimestampResult == nil {
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "tryBranch: branch doesn't exist" )
2021-12-05 13:45:17 +00:00
return false
}
// Branch exists, use it
targetRepo = repo
2022-11-12 19:37:20 +00:00
targetPath = path . Join ( _path ... )
2021-12-05 13:47:33 +00:00
targetBranch = branchTimestampResult . Branch
2021-12-05 13:45:17 +00:00
2021-12-05 13:47:33 +00:00
targetOptions . BranchTimestamp = branchTimestampResult . Timestamp
2021-12-05 13:45:17 +00:00
if canonicalLink != "" {
// Hide from search machines & add canonical link
2022-11-12 19:37:20 +00:00
ctx . RespWriter . Header ( ) . Set ( "X-Robots-Tag" , "noarchive, noindex" )
ctx . RespWriter . Header ( ) . Set ( "Link" ,
2021-12-05 13:45:17 +00:00
strings . NewReplacer ( "%b" , targetBranch , "%p" , targetPath ) . Replace ( canonicalLink ) +
"; rel=\"canonical\"" ,
)
}
2022-06-11 21:02:06 +00:00
log . Debug ( ) . Msg ( "tryBranch: true" )
2021-12-05 13:45:17 +00:00
return true
}
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "preparations" )
if rawDomain != "" && strings . EqualFold ( trimmedHost , rawDomain ) {
2021-12-05 13:45:17 +00:00
// Serve raw content from RawDomain
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "raw domain" )
2021-12-05 13:45:17 +00:00
targetOptions . TryIndexPages = false
2022-11-12 19:37:20 +00:00
targetOptions . ServeRaw = true
2021-12-05 13:45:17 +00:00
2022-11-12 19:37:20 +00:00
pathElements := strings . Split ( strings . Trim ( ctx . Path ( ) , "/" ) , "/" )
2021-12-05 13:45:17 +00:00
if len ( pathElements ) < 2 {
// https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required
2022-11-12 19:37:20 +00:00
ctx . Redirect ( rawInfoPage , http . StatusTemporaryRedirect )
2021-12-05 13:45:17 +00:00
return
}
targetOwner = pathElements [ 0 ]
targetRepo = pathElements [ 1 ]
// raw.codeberg.org/example/myrepo/@main/index.html
if len ( pathElements ) > 2 && strings . HasPrefix ( pathElements [ 2 ] , "@" ) {
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "raw domain preparations, now trying with specified branch" )
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
targetRepo , pathElements [ 2 ] [ 1 : ] , pathElements [ 3 : ] ,
2021-12-03 02:05:38 +00:00
giteaRoot + "/" + targetOwner + "/" + targetRepo + "/src/branch/%b/%p" ,
2021-12-05 13:45:17 +00:00
) {
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "tryBranch, now trying upstream 1" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
2022-11-12 19:37:20 +00:00
canonicalDomainCache )
2021-12-05 13:45:17 +00:00
return
}
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "missing branch info" )
html . ReturnErrorPage ( ctx , "missing branch info" , http . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
return
}
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "raw domain preparations, now trying with default branch" )
2022-06-11 21:02:06 +00:00
tryBranch ( log ,
targetRepo , "" , pathElements [ 2 : ] ,
2021-12-05 18:53:23 +00:00
giteaRoot + "/" + targetOwner + "/" + targetRepo + "/src/branch/%b/%p" ,
)
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "tryBranch, now trying upstream 2" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 18:53:23 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
2022-11-12 19:37:20 +00:00
canonicalDomainCache )
2021-12-05 18:53:23 +00:00
return
2022-11-12 19:37:20 +00:00
} else if strings . HasSuffix ( trimmedHost , mainDomainSuffix ) {
2021-12-05 13:45:17 +00:00
// Serve pages from subdomains of MainDomainSuffix
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "main domain suffix" )
2021-12-05 13:45:17 +00:00
2022-11-12 19:37:20 +00:00
pathElements := strings . Split ( strings . Trim ( ctx . Path ( ) , "/" ) , "/" )
targetOwner = strings . TrimSuffix ( trimmedHost , mainDomainSuffix )
2021-12-05 13:45:17 +00:00
targetRepo = pathElements [ 0 ]
targetPath = strings . Trim ( strings . Join ( pathElements [ 1 : ] , "/" ) , "/" )
if targetOwner == "www" {
2021-12-05 18:53:23 +00:00
// www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname?
2022-11-12 19:37:20 +00:00
ctx . Redirect ( "https://" + string ( mainDomainSuffix [ 1 : ] ) + string ( ctx . Path ( ) ) , http . StatusPermanentRedirect )
2021-12-05 13:45:17 +00:00
return
}
// Check if the first directory is a repo with the second directory as a branch
// example.codeberg.page/myrepo/@main/index.html
if len ( pathElements ) > 1 && strings . HasPrefix ( pathElements [ 1 ] , "@" ) {
if targetRepo == "pages" {
// example.codeberg.org/pages/@... redirects to example.codeberg.org/@...
2022-11-12 19:37:20 +00:00
ctx . Redirect ( "/" + strings . Join ( pathElements [ 1 : ] , "/" ) , http . StatusTemporaryRedirect )
2021-12-05 13:45:17 +00:00
return
}
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "main domain preparations, now trying with specified repo & branch" )
branch := pathElements [ 1 ] [ 1 : ]
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
2022-11-12 19:37:20 +00:00
pathElements [ 0 ] , branch , pathElements [ 2 : ] ,
2021-12-05 13:45:17 +00:00
"/" + pathElements [ 0 ] + "/%p" ,
) {
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "tryBranch, now trying upstream 3" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
2022-11-12 19:37:20 +00:00
canonicalDomainCache )
2021-12-05 13:45:17 +00:00
} else {
2022-11-12 19:37:20 +00:00
html . ReturnErrorPage ( ctx ,
fmt . Sprintf ( "explizite set branch %q do not exist at '%s/%s'" , branch , targetOwner , targetRepo ) ,
http . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
}
return
}
// Check if the first directory is a branch for the "pages" repo
// example.codeberg.page/@main/index.html
if strings . HasPrefix ( pathElements [ 0 ] , "@" ) {
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "main domain preparations, now trying with specified branch" )
branch := pathElements [ 0 ] [ 1 : ]
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
2022-11-12 19:37:20 +00:00
"pages" , branch , pathElements [ 1 : ] , "/%p" ) {
log . Debug ( ) . Msg ( "tryBranch, now trying upstream 4" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2022-11-12 19:37:20 +00:00
targetOptions , targetOwner , "pages" , targetBranch , targetPath ,
canonicalDomainCache )
2021-12-05 13:45:17 +00:00
} else {
2022-11-12 19:37:20 +00:00
html . ReturnErrorPage ( ctx ,
fmt . Sprintf ( "explizite set branch %q do not exist at '%s/%s'" , branch , targetOwner , "pages" ) ,
http . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
}
return
}
// Check if the first directory is a repo with a "pages" branch
// example.codeberg.page/myrepo/index.html
// example.codeberg.page/pages/... is not allowed here.
log . Debug ( ) . Msg ( "main domain preparations, now trying with specified repo" )
2022-06-11 21:02:06 +00:00
if pathElements [ 0 ] != "pages" && tryBranch ( log ,
pathElements [ 0 ] , "pages" , pathElements [ 1 : ] , "" ) {
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "tryBranch, now trying upstream 5" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
2022-11-12 19:37:20 +00:00
canonicalDomainCache )
2021-12-05 13:45:17 +00:00
return
}
// Try to use the "pages" repo on its default branch
// example.codeberg.page/index.html
log . Debug ( ) . Msg ( "main domain preparations, now trying with default repo/branch" )
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
"pages" , "" , pathElements , "" ) {
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "tryBranch, now trying upstream 6" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
2022-11-12 19:37:20 +00:00
canonicalDomainCache )
2021-12-05 13:45:17 +00:00
return
}
// Couldn't find a valid repo/branch
2022-11-12 19:37:20 +00:00
html . ReturnErrorPage ( ctx ,
fmt . Sprintf ( "couldn't find a valid repo[%s]/branch[%s]" , targetRepo , targetBranch ) ,
http . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
return
} else {
trimmedHostStr := string ( trimmedHost )
2022-11-12 19:37:20 +00:00
// Serve pages from custom domains
2021-12-05 14:21:05 +00:00
targetOwner , targetRepo , targetBranch = dns . GetTargetFromDNS ( trimmedHostStr , string ( mainDomainSuffix ) , dnsLookupCache )
2021-12-05 13:45:17 +00:00
if targetOwner == "" {
2022-11-12 19:37:20 +00:00
html . ReturnErrorPage ( ctx ,
"could not obtain repo owner from custom domain" ,
http . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
return
}
2022-11-12 19:37:20 +00:00
pathElements := strings . Split ( strings . Trim ( ctx . Path ( ) , "/" ) , "/" )
2021-12-05 13:45:17 +00:00
canonicalLink := ""
if strings . HasPrefix ( pathElements [ 0 ] , "@" ) {
targetBranch = pathElements [ 0 ] [ 1 : ]
pathElements = pathElements [ 1 : ]
canonicalLink = "/%p"
}
// Try to use the given repo on the given branch or the default branch
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "custom domain preparations, now trying with details from DNS" )
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
targetRepo , targetBranch , pathElements , canonicalLink ) {
canonicalDomain , valid := upstream . CheckCanonicalDomain ( giteaClient , targetOwner , targetRepo , targetBranch , trimmedHostStr , string ( mainDomainSuffix ) , canonicalDomainCache )
2021-12-05 13:45:17 +00:00
if ! valid {
2022-11-12 19:37:20 +00:00
html . ReturnErrorPage ( ctx , "domain not specified in <code>.domains</code> file" , http . StatusMisdirectedRequest )
2021-12-05 13:45:17 +00:00
return
} else if canonicalDomain != trimmedHostStr {
// only redirect if the target is also a codeberg page!
2021-12-05 14:21:05 +00:00
targetOwner , _ , _ = dns . GetTargetFromDNS ( strings . SplitN ( canonicalDomain , "/" , 2 ) [ 0 ] , string ( mainDomainSuffix ) , dnsLookupCache )
2021-12-05 13:45:17 +00:00
if targetOwner != "" {
2022-11-12 19:37:20 +00:00
ctx . Redirect ( "https://" + canonicalDomain + string ( ctx . Path ( ) ) , http . StatusTemporaryRedirect )
2021-12-05 13:45:17 +00:00
return
}
2021-12-05 18:53:23 +00:00
2022-11-12 19:37:20 +00:00
html . ReturnErrorPage ( ctx , "target is no codeberg page" , http . StatusFailedDependency )
2021-12-05 18:53:23 +00:00
return
2021-12-05 13:45:17 +00:00
}
2022-11-12 19:37:20 +00:00
log . Debug ( ) . Msg ( "tryBranch, now trying upstream 7" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
2022-11-12 19:37:20 +00:00
canonicalDomainCache )
2021-12-05 13:45:17 +00:00
return
}
2021-12-05 18:53:23 +00:00
2022-11-12 19:37:20 +00:00
html . ReturnErrorPage ( ctx , "could not find target for custom domain" , http . StatusFailedDependency )
2021-12-05 18:53:23 +00:00
return
2021-12-05 13:45:17 +00:00
}
}
}