From f74f71cac1b682579fe254ab331dacf480e32580 Mon Sep 17 00:00:00 2001
From: Hoernschen <julian.hoernschemeyer@mailbox.org>
Date: Sun, 25 Feb 2024 19:47:12 +0100
Subject: [PATCH] FIX blank internal pages (#164)

---
 server/gitea/cache.go       | 26 +++++++++++++++--------
 server/gitea/client.go      | 26 +++++++++++++----------
 server/handler/handler.go   |  1 +
 server/upstream/upstream.go | 41 ++++++++++++++++++++++++-------------
 4 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/server/gitea/cache.go b/server/gitea/cache.go
index 267c3d8..362bf52 100644
--- a/server/gitea/cache.go
+++ b/server/gitea/cache.go
@@ -2,6 +2,7 @@ package gitea
 
 import (
 	"bytes"
+	//"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -39,7 +40,7 @@ type FileResponse struct {
 }
 
 func (f FileResponse) IsEmpty() bool {
-	return len(f.Body) != 0
+	return len(f.Body) == 0
 }
 
 func (f FileResponse) createHttpResponse(cacheKey string) (header http.Header, statusCode int) {
@@ -72,13 +73,14 @@ type BranchTimestamp struct {
 type writeCacheReader struct {
 	originalReader io.ReadCloser
 	buffer         *bytes.Buffer
-	rileResponse   *FileResponse
+	fileResponse   *FileResponse
 	cacheKey       string
 	cache          cache.ICache
 	hasError       bool
 }
 
 func (t *writeCacheReader) Read(p []byte) (n int, err error) {
+	log.Trace().Msgf("[cache] read %q", t.cacheKey)
 	n, err = t.originalReader.Read(p)
 	if err != nil && err != io.EOF {
 		log.Trace().Err(err).Msgf("[cache] original reader for %q has returned an error", t.cacheKey)
@@ -90,12 +92,20 @@ func (t *writeCacheReader) Read(p []byte) (n int, err error) {
 }
 
 func (t *writeCacheReader) Close() error {
-	if !t.hasError {
-		fc := *t.rileResponse
-		fc.Body = t.buffer.Bytes()
-		_ = t.cache.Set(t.cacheKey, fc, fileCacheTimeout)
+	doWrite := !t.hasError
+	fc := *t.fileResponse
+	fc.Body = t.buffer.Bytes()
+	if fc.IsEmpty() {
+		log.Trace().Msg("[cache] file response is empty")
+		doWrite = false
 	}
-	log.Trace().Msgf("cacheReader for %q saved=%t closed", t.cacheKey, !t.hasError)
+	if doWrite {
+		err := t.cache.Set(t.cacheKey, fc, fileCacheTimeout)
+		if err != nil {
+			log.Trace().Err(err).Msgf("[cache] writer for %q has returned an error", t.cacheKey)
+		}
+	}
+	log.Trace().Msgf("cacheReader for %q saved=%t closed", t.cacheKey, doWrite)
 	return t.originalReader.Close()
 }
 
@@ -108,7 +118,7 @@ func (f FileResponse) CreateCacheReader(r io.ReadCloser, cache cache.ICache, cac
 	return &writeCacheReader{
 		originalReader: r,
 		buffer:         bytes.NewBuffer(make([]byte, 0)),
-		rileResponse:   &f,
+		fileResponse:   &f,
 		cache:          cache,
 		cacheKey:       cacheKey,
 	}
diff --git a/server/gitea/client.go b/server/gitea/client.go
index 42cf065..dc24792 100644
--- a/server/gitea/client.go
+++ b/server/gitea/client.go
@@ -112,26 +112,28 @@ func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource str
 func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) {
 	cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource)
 	log := log.With().Str("cache_key", cacheKey).Logger()
-
+	log.Trace().Msg("try file in cache")
 	// handle if cache entry exist
 	if cache, ok := client.responseCache.Get(cacheKey); ok {
 		cache := cache.(FileResponse)
 		cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey)
 		// TODO: check against some timestamp mismatch?!?
 		if cache.Exists {
+			log.Debug().Msg("[cache] exists")
 			if cache.IsSymlink {
 				linkDest := string(cache.Body)
 				log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest)
 				return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
-			} else {
-				log.Debug().Msg("[cache] return bytes")
+			} else if !cache.IsEmpty() {
+				log.Debug().Msgf("[cache] return %d bytes", len(cache.Body))
 				return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil
+			} else if cache.IsEmpty() {
+				log.Debug().Msg("[cache] is empty")
 			}
-		} else {
-			return nil, cachedHeader, cachedStatusCode, ErrorNotFound
+			//return nil, cachedHeader, cachedStatusCode, ErrorNotFound
 		}
 	}
-
+	log.Trace().Msg("file not in cache")
 	// not in cache, open reader via gitea api
 	reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS)
 	if resp != nil {
@@ -155,12 +157,14 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str
 					linkDest = path.Join(path.Dir(resource), linkDest)
 
 					// we store symlink not content to reduce duplicates in cache
-					if err := client.responseCache.Set(cacheKey, FileResponse{
-						Exists:    true,
+					fileResponse := FileResponse {
+						Exists: true,
 						IsSymlink: true,
-						Body:      []byte(linkDest),
-						ETag:      resp.Header.Get(ETagHeader),
-					}, fileCacheTimeout); err != nil {
+						Body: []byte(linkDest),
+						ETag: resp.Header.Get(ETagHeader),
+					}
+					log.Trace().Msgf("file response has %d bytes", len(fileResponse.Body))
+					if err := client.responseCache.Set(cacheKey, fileResponse, fileCacheTimeout); err != nil {
 						log.Error().Err(err).Msg("[cache] error on cache write")
 					}
 
diff --git a/server/handler/handler.go b/server/handler/handler.go
index 96788e4..a612639 100644
--- a/server/handler/handler.go
+++ b/server/handler/handler.go
@@ -26,6 +26,7 @@ func Handler(
 	dnsLookupCache, canonicalDomainCache, redirectsCache cache.ICache,
 ) http.HandlerFunc {
 	return func(w http.ResponseWriter, req *http.Request) {
+		log.Info().Msg("\n----------------------------------------------------------")
 		log := log.With().Strs("Handler", []string{req.Host, req.RequestURI}).Logger()
 		ctx := context.New(w, req)
 
diff --git a/server/upstream/upstream.go b/server/upstream/upstream.go
index 07b6ad2..c2f68dc 100644
--- a/server/upstream/upstream.go
+++ b/server/upstream/upstream.go
@@ -46,7 +46,7 @@ type Options struct {
 	TryIndexPages   bool
 	BranchTimestamp time.Time
 	// internal
-	appendTrailingSlash bool
+	dontAppendTrailingSlash bool
 	redirectIfExists    string
 
 	ServeRaw bool
@@ -56,6 +56,8 @@ type Options struct {
 func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redirectsCache cache.ICache) bool {
 	log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger()
 
+	log.Debug().Msg("Start")
+
 	if o.TargetOwner == "" || o.TargetRepo == "" {
 		html.ReturnErrorPage(ctx, "forge client: either repo owner or name info is missing", http.StatusBadRequest)
 		return true
@@ -93,6 +95,19 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi
 		log.Trace().Msg("check response against last modified: outdated")
 	}
 
+	if strings.HasSuffix(ctx.Path(), "/index.html") && !o.ServeRaw {
+		log.Trace().Msg("remove index.html from path and redirect")
+		ctx.Redirect(strings.TrimSuffix(ctx.Path(), "index.html"), http.StatusTemporaryRedirect)
+		return true
+	}
+	// Append trailing slash if missing (for index files), and redirect to fix filenames in general
+	// o.dontAppendTrailingSlash is only true when looking for index pages
+	if !o.dontAppendTrailingSlash && !strings.HasSuffix(ctx.Path(), "/") {
+		log.Trace().Msg("append trailing slash and redirect")
+		ctx.Redirect(ctx.Path()+"/", http.StatusTemporaryRedirect)
+		return true
+	}
+
 	log.Debug().Msg("Preparing")
 
 	reader, header, statusCode, err := giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath)
@@ -104,25 +119,29 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi
 
 	// Handle not found error
 	if err != nil && errors.Is(err, gitea.ErrorNotFound) {
+		log.Debug().Msg("Handling not found error")
 		// Get and match redirects
 		redirects := o.getRedirects(giteaClient, redirectsCache)
 		if o.matchRedirects(ctx, giteaClient, redirects, redirectsCache) {
+			log.Trace().Msg("redirect")
 			return true
 		}
 
 		if o.TryIndexPages {
+			log.Trace().Msg("try index page")
 			// copy the o struct & try if an index page exists
 			optionsForIndexPages := *o
 			optionsForIndexPages.TryIndexPages = false
-			optionsForIndexPages.appendTrailingSlash = true
+			optionsForIndexPages.dontAppendTrailingSlash = false
 			for _, indexPage := range upstreamIndexPages {
 				optionsForIndexPages.TargetPath = strings.TrimSuffix(o.TargetPath, "/") + "/" + indexPage
 				if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
 					return true
 				}
 			}
+			log.Trace().Msg("try html file with path name")
 			// compatibility fix for GitHub Pages (/example → /example.html)
-			optionsForIndexPages.appendTrailingSlash = false
+			optionsForIndexPages.dontAppendTrailingSlash = true
 			optionsForIndexPages.redirectIfExists = strings.TrimSuffix(ctx.Path(), "/") + ".html"
 			optionsForIndexPages.TargetPath = o.TargetPath + ".html"
 			if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
@@ -130,18 +149,22 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi
 			}
 		}
 
+		log.Trace().Msg("not found")
+
 		ctx.StatusCode = http.StatusNotFound
 		if o.TryIndexPages {
+			log.Trace().Msg("try not found page")
 			// copy the o struct & try if a not found page exists
 			optionsForNotFoundPages := *o
 			optionsForNotFoundPages.TryIndexPages = false
-			optionsForNotFoundPages.appendTrailingSlash = false
+			optionsForNotFoundPages.dontAppendTrailingSlash = true
 			for _, notFoundPage := range upstreamNotFoundPages {
 				optionsForNotFoundPages.TargetPath = "/" + notFoundPage
 				if optionsForNotFoundPages.Upstream(ctx, giteaClient, redirectsCache) {
 					return true
 				}
 			}
+			log.Trace().Msg("not found page missing")
 		}
 
 		return false
@@ -170,16 +193,6 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi
 		return true
 	}
 
-	// Append trailing slash if missing (for index files), and redirect to fix filenames in general
-	// o.appendTrailingSlash is only true when looking for index pages
-	if o.appendTrailingSlash && !strings.HasSuffix(ctx.Path(), "/") {
-		ctx.Redirect(ctx.Path()+"/", http.StatusTemporaryRedirect)
-		return true
-	}
-	if strings.HasSuffix(ctx.Path(), "/index.html") && !o.ServeRaw {
-		ctx.Redirect(strings.TrimSuffix(ctx.Path(), "index.html"), http.StatusTemporaryRedirect)
-		return true
-	}
 	if o.redirectIfExists != "" {
 		ctx.Redirect(o.redirectIfExists, http.StatusTemporaryRedirect)
 		return true