From 974229681f4cc7f1ed31df9b05eabef2df013809 Mon Sep 17 00:00:00 2001 From: video-prize-ranch Date: Thu, 30 Mar 2023 21:36:31 +0000 Subject: [PATCH] Initial redirects implementation (#148) Adds basic support for `_redirects` files. It supports a subset of what IPFS supports: https://docs.ipfs.tech/how-to/websites-on-ipfs/redirects-and-custom-404s/ Example: ``` /redirect https://example.com/ 301 /another-redirect /page 301 /302 https://example.com/ 302 /app/* /index.html 200 /articles/* /posts/:splat 301 ``` 301 redirect: https://video-prize-ranch.localhost.mock.directory:4430/redirect SPA rewrite: https://video-prize-ranch.localhost.mock.directory:4430/app/path/path Catch-all with splat: https://video-prize-ranch.localhost.mock.directory:4430/articles/path/path Closes #46 Co-authored-by: video-prize-ranch Co-authored-by: 6543 <6543@obermui.de> Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/148 Reviewed-by: 6543 <6543@obermui.de> Co-authored-by: video-prize-ranch Co-committed-by: video-prize-ranch --- FEATURES.md | 45 +++++++++ cmd/main.go | 4 +- integration/get_test.go | 40 ++++++++ server/handler/handler.go | 8 +- server/handler/handler_custom_domain.go | 4 +- server/handler/handler_raw_domain.go | 6 +- server/handler/handler_sub_domain.go | 12 +-- server/handler/handler_test.go | 1 + server/handler/try.go | 3 +- server/upstream/redirects.go | 117 ++++++++++++++++++++++++ server/upstream/upstream.go | 16 +++- 11 files changed, 235 insertions(+), 21 deletions(-) create mode 100644 FEATURES.md create mode 100644 server/upstream/redirects.go diff --git a/FEATURES.md b/FEATURES.md new file mode 100644 index 0000000..7560a1d --- /dev/null +++ b/FEATURES.md @@ -0,0 +1,45 @@ +# Features + +## Custom domains + +... + +## Redirects + +Redirects can be created with a `_redirects` file with the following format: + +``` +# Comment +from to [status] +``` + +* Lines starting with `#` are ignored +* `from` - the path to redirect from (Note: repository and branch names are removed from request URLs) +* `to` - the path or URL to redirect to +* `status` - status code to use when redirecting (default 301) + +### Status codes + +* `200` - returns content from specified path (no external URLs) without changing the URL (rewrite) +* `301` - Moved Permanently (Permanent redirect) +* `302` - Found (Temporary redirect) + +### Examples + +#### SPA (single-page application) rewrite + +Redirects all paths to `/index.html` for single-page apps. + +``` +/* /index.html 200 +``` + +#### Splats + +Redirects every path under `/articles` to `/posts` while keeping the path. + +``` +/articles/* /posts/:splat 302 +``` + +Example: `/articles/2022/10/12/post-1/` -> `/posts/2022/10/12/post-1/` diff --git a/cmd/main.go b/cmd/main.go index 45e151d..84915c9 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -81,6 +81,8 @@ func Serve(ctx *cli.Context) error { canonicalDomainCache := cache.NewKeyValueCache() // dnsLookupCache stores DNS lookups for custom domains dnsLookupCache := cache.NewKeyValueCache() + // redirectsCache stores redirects in _redirects files + redirectsCache := cache.NewKeyValueCache() // clientResponseCache stores responses from the Gitea server clientResponseCache := cache.NewKeyValueCache() @@ -138,7 +140,7 @@ func Serve(ctx *cli.Context) error { rawInfoPage, BlacklistedPaths, allowedCorsDomains, defaultBranches, - dnsLookupCache, canonicalDomainCache) + dnsLookupCache, canonicalDomainCache, redirectsCache) // Start the ssl listener log.Info().Msgf("Start SSL server using TCP listener on %s", listener.Addr()) diff --git a/integration/get_test.go b/integration/get_test.go index 9d97390..cfb7188 100644 --- a/integration/get_test.go +++ b/integration/get_test.go @@ -151,6 +151,46 @@ func TestGetNotFound(t *testing.T) { assert.EqualValues(t, 37, getSize(resp.Body)) } +func TestRedirect(t *testing.T) { + log.Println("=== TestRedirect ===") + // test redirects + resp, err := getTestHTTPSClient().Get("https://cb_pages_tests.localhost.mock.directory:4430/some_redirects/redirect") + assert.NoError(t, err) + if !assert.NotNil(t, resp) { + t.FailNow() + } + assert.EqualValues(t, http.StatusMovedPermanently, resp.StatusCode) + assert.EqualValues(t, "https://example.com/", resp.Header.Get("Location")) +} + +func TestSPARedirect(t *testing.T) { + log.Println("=== TestSPARedirect ===") + // test SPA redirects + url := "https://cb_pages_tests.localhost.mock.directory:4430/some_redirects/app/aqdjw" + resp, err := getTestHTTPSClient().Get(url) + assert.NoError(t, err) + if !assert.NotNil(t, resp) { + t.FailNow() + } + assert.EqualValues(t, http.StatusOK, resp.StatusCode) + assert.EqualValues(t, url, resp.Request.URL.String()) + assert.EqualValues(t, "text/html; charset=utf-8", resp.Header.Get("Content-Type")) + assert.EqualValues(t, "258", resp.Header.Get("Content-Length")) + assert.EqualValues(t, 258, getSize(resp.Body)) +} + +func TestSplatRedirect(t *testing.T) { + log.Println("=== TestSplatRedirect ===") + // test splat redirects + resp, err := getTestHTTPSClient().Get("https://cb_pages_tests.localhost.mock.directory:4430/some_redirects/articles/qfopefe") + assert.NoError(t, err) + if !assert.NotNil(t, resp) { + t.FailNow() + } + assert.EqualValues(t, http.StatusMovedPermanently, resp.StatusCode) + assert.EqualValues(t, "/posts/qfopefe", resp.Header.Get("Location")) +} + func TestFollowSymlink(t *testing.T) { log.Printf("=== TestFollowSymlink ===\n") diff --git a/server/handler/handler.go b/server/handler/handler.go index a944c7e..7edcf95 100644 --- a/server/handler/handler.go +++ b/server/handler/handler.go @@ -25,7 +25,7 @@ func Handler(mainDomainSuffix, rawDomain string, rawInfoPage string, blacklistedPaths, allowedCorsDomains []string, defaultPagesBranches []string, - dnsLookupCache, canonicalDomainCache cache.SetGetKey, + dnsLookupCache, canonicalDomainCache, redirectsCache cache.SetGetKey, ) http.HandlerFunc { return func(w http.ResponseWriter, req *http.Request) { log := log.With().Strs("Handler", []string{req.Host, req.RequestURI}).Logger() @@ -93,7 +93,7 @@ func Handler(mainDomainSuffix, rawDomain string, mainDomainSuffix, rawInfoPage, trimmedHost, pathElements, - canonicalDomainCache) + canonicalDomainCache, redirectsCache) } else if strings.HasSuffix(trimmedHost, mainDomainSuffix) { log.Debug().Msg("subdomain request detecded") handleSubDomain(log, ctx, giteaClient, @@ -101,7 +101,7 @@ func Handler(mainDomainSuffix, rawDomain string, defaultPagesBranches, trimmedHost, pathElements, - canonicalDomainCache) + canonicalDomainCache, redirectsCache) } else { log.Debug().Msg("custom domain request detecded") handleCustomDomain(log, ctx, giteaClient, @@ -109,7 +109,7 @@ func Handler(mainDomainSuffix, rawDomain string, trimmedHost, pathElements, defaultPagesBranches[0], - dnsLookupCache, canonicalDomainCache) + dnsLookupCache, canonicalDomainCache, redirectsCache) } } } diff --git a/server/handler/handler_custom_domain.go b/server/handler/handler_custom_domain.go index 1b85f62..8742be4 100644 --- a/server/handler/handler_custom_domain.go +++ b/server/handler/handler_custom_domain.go @@ -19,7 +19,7 @@ func handleCustomDomain(log zerolog.Logger, ctx *context.Context, giteaClient *g trimmedHost string, pathElements []string, firstDefaultBranch string, - dnsLookupCache, canonicalDomainCache cache.SetGetKey, + dnsLookupCache, canonicalDomainCache, redirectsCache cache.SetGetKey, ) { // Serve pages from custom domains targetOwner, targetRepo, targetBranch := dns.GetTargetFromDNS(trimmedHost, mainDomainSuffix, firstDefaultBranch, dnsLookupCache) @@ -64,7 +64,7 @@ func handleCustomDomain(log zerolog.Logger, ctx *context.Context, giteaClient *g } log.Debug().Msg("tryBranch, now trying upstream 7") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache) return } diff --git a/server/handler/handler_raw_domain.go b/server/handler/handler_raw_domain.go index 5e974da..aa41c52 100644 --- a/server/handler/handler_raw_domain.go +++ b/server/handler/handler_raw_domain.go @@ -19,7 +19,7 @@ func handleRaw(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Clie mainDomainSuffix, rawInfoPage string, trimmedHost string, pathElements []string, - canonicalDomainCache cache.SetGetKey, + canonicalDomainCache, redirectsCache cache.SetGetKey, ) { // Serve raw content from RawDomain log.Debug().Msg("raw domain") @@ -41,7 +41,7 @@ func handleRaw(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Clie TargetPath: path.Join(pathElements[3:]...), }, true); works { log.Trace().Msg("tryUpstream: serve raw domain with specified branch") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache) return } log.Debug().Msg("missing branch info") @@ -58,7 +58,7 @@ func handleRaw(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Clie TargetPath: path.Join(pathElements[2:]...), }, true); works { log.Trace().Msg("tryUpstream: serve raw domain with default branch") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache) } else { html.ReturnErrorPage(ctx, fmt.Sprintf("raw domain could not find repo '%s/%s' or repo is empty", targetOpt.TargetOwner, targetOpt.TargetRepo), diff --git a/server/handler/handler_sub_domain.go b/server/handler/handler_sub_domain.go index 68f4822..8731bec 100644 --- a/server/handler/handler_sub_domain.go +++ b/server/handler/handler_sub_domain.go @@ -21,7 +21,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite defaultPagesBranches []string, trimmedHost string, pathElements []string, - canonicalDomainCache cache.SetGetKey, + canonicalDomainCache, redirectsCache cache.SetGetKey, ) { // Serve pages from subdomains of MainDomainSuffix log.Debug().Msg("main domain suffix") @@ -53,7 +53,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite TargetPath: path.Join(pathElements[2:]...), }, true); works { log.Trace().Msg("tryUpstream: serve with specified repo and branch") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache) } else { html.ReturnErrorPage(ctx, fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo), @@ -83,7 +83,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite TargetPath: path.Join(pathElements[1:]...), }, true); works { log.Trace().Msg("tryUpstream: serve default pages repo with specified branch") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache) } else { html.ReturnErrorPage(ctx, fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo), @@ -106,7 +106,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite TargetPath: path.Join(pathElements[1:]...), }, false); works { log.Debug().Msg("tryBranch, now trying upstream 5") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache) return } } @@ -122,7 +122,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite TargetPath: path.Join(pathElements...), }, false); works { log.Debug().Msg("tryBranch, now trying upstream 6") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache) return } } @@ -137,7 +137,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite TargetPath: path.Join(pathElements...), }, false); works { log.Debug().Msg("tryBranch, now trying upstream 6") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache) return } diff --git a/server/handler/handler_test.go b/server/handler/handler_test.go index ed063b2..de705ec 100644 --- a/server/handler/handler_test.go +++ b/server/handler/handler_test.go @@ -21,6 +21,7 @@ func TestHandlerPerformance(t *testing.T) { []string{"pages"}, cache.NewKeyValueCache(), cache.NewKeyValueCache(), + cache.NewKeyValueCache(), ) testCase := func(uri string, status int) { diff --git a/server/handler/try.go b/server/handler/try.go index 5c65138..6cfe08e 100644 --- a/server/handler/try.go +++ b/server/handler/try.go @@ -18,6 +18,7 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client, mainDomainSuffix, trimmedHost string, options *upstream.Options, canonicalDomainCache cache.SetGetKey, + redirectsCache cache.SetGetKey, ) { // check if a canonical domain exists on a request on MainDomain if strings.HasSuffix(trimmedHost, mainDomainSuffix) && !options.ServeRaw { @@ -39,7 +40,7 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client, options.Host = trimmedHost // Try to request the file from the Gitea API - if !options.Upstream(ctx, giteaClient) { + if !options.Upstream(ctx, giteaClient, redirectsCache) { html.ReturnErrorPage(ctx, "", ctx.StatusCode) } } diff --git a/server/upstream/redirects.go b/server/upstream/redirects.go new file mode 100644 index 0000000..ab6c971 --- /dev/null +++ b/server/upstream/redirects.go @@ -0,0 +1,117 @@ +package upstream + +import ( + "strconv" + "strings" + "time" + + "codeberg.org/codeberg/pages/server/cache" + "codeberg.org/codeberg/pages/server/context" + "codeberg.org/codeberg/pages/server/gitea" + "github.com/rs/zerolog/log" +) + +type Redirect struct { + From string + To string + StatusCode int +} + +// redirectsCacheTimeout specifies the timeout for the redirects cache. +var redirectsCacheTimeout = 10 * time.Minute + +const redirectsConfig = "_redirects" + +// getRedirects returns redirects specified in the _redirects file. +func (o *Options) getRedirects(giteaClient *gitea.Client, redirectsCache cache.SetGetKey) []Redirect { + var redirects []Redirect + cacheKey := o.TargetOwner + "/" + o.TargetRepo + "/" + o.TargetBranch + + // Check for cached redirects + if cachedValue, ok := redirectsCache.Get(cacheKey); ok { + redirects = cachedValue.([]Redirect) + } else { + // Get _redirects file and parse + body, err := giteaClient.GiteaRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, redirectsConfig) + if err == nil { + for _, line := range strings.Split(string(body), "\n") { + redirectArr := strings.Fields(line) + + // Ignore comments and invalid lines + if strings.HasPrefix(line, "#") || len(redirectArr) < 2 { + continue + } + + // Get redirect status code + statusCode := 301 + if len(redirectArr) == 3 { + statusCode, err = strconv.Atoi(redirectArr[2]) + if err != nil { + log.Info().Err(err).Msgf("could not read %s of %s/%s", redirectsConfig, o.TargetOwner, o.TargetRepo) + } + } + + redirects = append(redirects, Redirect{ + From: redirectArr[0], + To: redirectArr[1], + StatusCode: statusCode, + }) + } + } + _ = redirectsCache.Set(cacheKey, redirects, redirectsCacheTimeout) + } + return redirects +} + +func (o *Options) matchRedirects(ctx *context.Context, giteaClient *gitea.Client, redirects []Redirect, redirectsCache cache.SetGetKey) (final bool) { + if len(redirects) > 0 { + for _, redirect := range redirects { + reqUrl := ctx.Req.RequestURI + // remove repo and branch from request url + reqUrl = strings.TrimPrefix(reqUrl, "/"+o.TargetRepo) + reqUrl = strings.TrimPrefix(reqUrl, "/@"+o.TargetBranch) + + // check if from url matches request url + if strings.TrimSuffix(redirect.From, "/") == strings.TrimSuffix(reqUrl, "/") { + // do rewrite if status code is 200 + if redirect.StatusCode == 200 { + o.TargetPath = redirect.To + o.Upstream(ctx, giteaClient, redirectsCache) + return true + } else { + ctx.Redirect(redirect.To, redirect.StatusCode) + return true + } + } + + // handle wildcard redirects + trimmedFromUrl := strings.TrimSuffix(redirect.From, "/*") + if strings.HasSuffix(redirect.From, "/*") && strings.HasPrefix(reqUrl, trimmedFromUrl) { + if strings.Contains(redirect.To, ":splat") { + splatUrl := strings.ReplaceAll(redirect.To, ":splat", strings.TrimPrefix(reqUrl, trimmedFromUrl)) + // do rewrite if status code is 200 + if redirect.StatusCode == 200 { + o.TargetPath = splatUrl + o.Upstream(ctx, giteaClient, redirectsCache) + return true + } else { + ctx.Redirect(splatUrl, redirect.StatusCode) + return true + } + } else { + // do rewrite if status code is 200 + if redirect.StatusCode == 200 { + o.TargetPath = redirect.To + o.Upstream(ctx, giteaClient, redirectsCache) + return true + } else { + ctx.Redirect(redirect.To, redirect.StatusCode) + return true + } + } + } + } + } + + return false +} diff --git a/server/upstream/upstream.go b/server/upstream/upstream.go index 3845969..f97c6ae 100644 --- a/server/upstream/upstream.go +++ b/server/upstream/upstream.go @@ -11,6 +11,7 @@ import ( "github.com/rs/zerolog/log" "codeberg.org/codeberg/pages/html" + "codeberg.org/codeberg/pages/server/cache" "codeberg.org/codeberg/pages/server/context" "codeberg.org/codeberg/pages/server/gitea" ) @@ -52,7 +53,7 @@ type Options struct { } // Upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context. -func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (final bool) { +func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redirectsCache cache.SetGetKey) (final bool) { log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger() if o.TargetOwner == "" || o.TargetRepo == "" { @@ -103,6 +104,12 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin // Handle not found error if err != nil && errors.Is(err, gitea.ErrorNotFound) { + // Get and match redirects + redirects := o.getRedirects(giteaClient, redirectsCache) + if o.matchRedirects(ctx, giteaClient, redirects, redirectsCache) { + return true + } + if o.TryIndexPages { // copy the o struct & try if an index page exists optionsForIndexPages := *o @@ -110,7 +117,7 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin optionsForIndexPages.appendTrailingSlash = true for _, indexPage := range upstreamIndexPages { optionsForIndexPages.TargetPath = strings.TrimSuffix(o.TargetPath, "/") + "/" + indexPage - if optionsForIndexPages.Upstream(ctx, giteaClient) { + if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) { return true } } @@ -118,7 +125,7 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin optionsForIndexPages.appendTrailingSlash = false optionsForIndexPages.redirectIfExists = strings.TrimSuffix(ctx.Path(), "/") + ".html" optionsForIndexPages.TargetPath = o.TargetPath + ".html" - if optionsForIndexPages.Upstream(ctx, giteaClient) { + if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) { return true } } @@ -131,11 +138,12 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin optionsForNotFoundPages.appendTrailingSlash = false for _, notFoundPage := range upstreamNotFoundPages { optionsForNotFoundPages.TargetPath = "/" + notFoundPage - if optionsForNotFoundPages.Upstream(ctx, giteaClient) { + if optionsForNotFoundPages.Upstream(ctx, giteaClient, redirectsCache) { return true } } } + return false }