From 044c684a47853af53c660e454328348a49277c9c Mon Sep 17 00:00:00 2001 From: crapStone Date: Mon, 25 Nov 2024 12:21:55 +0000 Subject: [PATCH] Fix compression (#405) closes #404 Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/405 Co-authored-by: crapStone Co-committed-by: crapStone --- flake.lock | 4 ++-- flake.nix | 2 +- server/gitea/cache.go | 15 ++++++++---- server/gitea/client.go | 47 +++++++++++++++++++++++++++---------- server/upstream/header.go | 3 +++ server/upstream/upstream.go | 2 +- 6 files changed, 53 insertions(+), 20 deletions(-) diff --git a/flake.lock b/flake.lock index 861cc42..5eb7802 100644 --- a/flake.lock +++ b/flake.lock @@ -20,8 +20,8 @@ "nixpkgs": { "locked": { "lastModified": 0, - "narHash": "sha256-x07g4NcqGP6mQn6AISXJaks9sQYDjZmTMBlKIvajvyc=", - "path": "/nix/store/2w8kz6zh3aq80f1dypiin222fry1rv51-source", + "narHash": "sha256-29QfSvJwpjNwppFnU33nnyedAWpaaDBSlDJZzJhg97s=", + "path": "/nix/store/1703v0vkmk136sca5rf1861jrn2ndajr-source", "type": "path" }, "original": { diff --git a/flake.nix b/flake.nix index 61f3b55..36a545e 100644 --- a/flake.nix +++ b/flake.nix @@ -13,7 +13,7 @@ in { devShells.default = pkgs.mkShell { buildInputs = with pkgs; [ - gcc + glibc.static go gofumpt golangci-lint diff --git a/server/gitea/cache.go b/server/gitea/cache.go index c560a19..276358c 100644 --- a/server/gitea/cache.go +++ b/server/gitea/cache.go @@ -38,15 +38,16 @@ type FileResponse struct { Exists bool `json:"exists"` IsSymlink bool `json:"isSymlink"` ETag string `json:"eTag"` - MimeType string `json:"mimeType"` - Body []byte `json:"-"` // saved separately + MimeType string `json:"mimeType"` // uncompressed MIME type + RawMime string `json:"rawMime"` // raw MIME type (if compressed, type of compression) + Body []byte `json:"-"` // saved separately } func (f FileResponse) IsEmpty() bool { return len(f.Body) == 0 } -func (f FileResponse) createHttpResponse(cacheKey string) (header http.Header, statusCode int) { +func (f FileResponse) createHttpResponse(cacheKey string, decompress bool) (header http.Header, statusCode int) { header = make(http.Header) if f.Exists { @@ -59,7 +60,13 @@ func (f FileResponse) createHttpResponse(cacheKey string) (header http.Header, s header.Set(giteaObjectTypeHeader, objTypeSymlink) } header.Set(ETagHeader, f.ETag) - header.Set(ContentTypeHeader, f.MimeType) + + if decompress { + header.Set(ContentTypeHeader, f.MimeType) + } else { + header.Set(ContentTypeHeader, f.RawMime) + } + header.Set(ContentLengthHeader, fmt.Sprintf("%d", len(f.Body))) header.Set(PagesCacheIndicatorHeader, "true") diff --git a/server/gitea/client.go b/server/gitea/client.go index ea57c14..5a12111 100644 --- a/server/gitea/client.go +++ b/server/gitea/client.go @@ -40,9 +40,10 @@ const ( objTypeSymlink = "symlink" // std - ETagHeader = "ETag" - ContentTypeHeader = "Content-Type" - ContentLengthHeader = "Content-Length" + ETagHeader = "ETag" + ContentTypeHeader = "Content-Type" + ContentLengthHeader = "Content-Length" + ContentEncodingHeader = "Content-Encoding" ) type Client struct { @@ -104,7 +105,7 @@ func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource s } func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) { - reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource) + reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource, false) if err != nil { return nil, err } @@ -112,7 +113,7 @@ func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource str return io.ReadAll(reader) } -func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) { +func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string, decompress bool) (io.ReadCloser, http.Header, int, error) { cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource) log := log.With().Str("cache_key", cacheKey).Logger() log.Trace().Msg("try file in cache") @@ -136,12 +137,12 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str } cache.Body = body.([]byte) - cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey) + cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey, decompress) if cache.Exists { if cache.IsSymlink { linkDest := string(cache.Body) log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest) - return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) + return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest, decompress) } else { log.Debug().Msgf("[cache] return %d bytes", len(cache.Body)) return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil @@ -193,19 +194,25 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str } log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest) - return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) + return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest, decompress) } } // now we are sure it's content so set the MIME type - mimeType := client.getMimeTypeByExtension(resource) + mimeType, rawType := client.getMimeTypeByExtension(resource) resp.Response.Header.Set(ContentTypeHeader, mimeType) + if decompress { + resp.Response.Header.Set(ContentTypeHeader, mimeType) + } else { + resp.Response.Header.Set(ContentTypeHeader, rawType) + } // now we write to cache and respond at the same time fileResp := FileResponse{ Exists: true, ETag: resp.Header.Get(ETagHeader), MimeType: mimeType, + RawMime: rawType, } return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil @@ -340,13 +347,29 @@ func (client *Client) GiteaCheckIfOwnerExists(owner string) (bool, error) { return false, nil } -func (client *Client) getMimeTypeByExtension(resource string) string { - mimeType := mime.TypeByExtension(path.Ext(resource)) +func (client *Client) extToMime(ext string) string { + mimeType := mime.TypeByExtension(path.Ext(ext)) mimeTypeSplit := strings.SplitN(mimeType, ";", 2) if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" { mimeType = client.defaultMimeType } - log.Trace().Msgf("probe mime of %q is %q", resource, mimeType) + log.Trace().Msgf("probe mime of extension '%q' is '%q'", ext, mimeType) return mimeType } + +func (client *Client) getMimeTypeByExtension(resource string) (mimeType, rawType string) { + rawExt := path.Ext(resource) + innerExt := rawExt + switch rawExt { + case ".gz", ".br", ".zst": + innerExt = path.Ext(resource[:len(resource)-len(rawExt)]) + } + rawType = client.extToMime(rawExt) + mimeType = rawType + if innerExt != rawExt { + mimeType = client.extToMime(innerExt) + } + log.Trace().Msgf("probe mime of %q is (%q / raw %q)", resource, mimeType, rawType) + return mimeType, rawType +} diff --git a/server/upstream/header.go b/server/upstream/header.go index 7b85df1..3a218a1 100644 --- a/server/upstream/header.go +++ b/server/upstream/header.go @@ -24,5 +24,8 @@ func (o *Options) setHeader(ctx *context.Context, header http.Header) { } else { ctx.RespWriter.Header().Set(gitea.ContentTypeHeader, mime) } + if encoding := header.Get(gitea.ContentEncodingHeader); encoding != "" && encoding != "identity" { + ctx.RespWriter.Header().Set(gitea.ContentEncodingHeader, encoding) + } ctx.RespWriter.Header().Set(headerLastModified, o.BranchTimestamp.In(time.UTC).Format(http.TimeFormat)) } diff --git a/server/upstream/upstream.go b/server/upstream/upstream.go index 2f1751b..98137ba 100644 --- a/server/upstream/upstream.go +++ b/server/upstream/upstream.go @@ -182,7 +182,7 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi // add extension for encoding path := o.TargetPath + allowedEncodings[encoding] - reader, header, statusCode, err = giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, path) + reader, header, statusCode, err = giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, path, true) if statusCode == 404 { continue }