package gitea import ( "bytes" "encoding/json" "errors" "fmt" "io" "mime" "net/http" "net/url" "path" "strconv" "strings" "time" "code.gitea.io/sdk/gitea" "github.com/rs/zerolog/log" "codeberg.org/codeberg/pages/config" "codeberg.org/codeberg/pages/server/cache" "codeberg.org/codeberg/pages/server/version" ) var ErrorNotFound = errors.New("not found") const ( // cache key prefixes branchTimestampCacheKeyPrefix = "branchTime" defaultBranchCacheKeyPrefix = "defaultBranch" rawContentCacheKeyPrefix = "rawContent" ownerExistenceKeyPrefix = "ownerExist" // pages server PagesCacheIndicatorHeader = "X-Pages-Cache" symlinkReadLimit = 10000 // gitea giteaObjectTypeHeader = "X-Gitea-Object-Type" objTypeSymlink = "symlink" // std ETagHeader = "ETag" ContentTypeHeader = "Content-Type" ContentLengthHeader = "Content-Length" ContentEncodingHeader = "Content-Encoding" ) type Client struct { sdkClient *gitea.Client responseCache cache.ICache giteaRoot string followSymlinks bool supportLFS bool forbiddenMimeTypes map[string]bool defaultMimeType string } func NewClient(cfg config.ForgeConfig, respCache cache.ICache) (*Client, error) { // url.Parse returns valid on almost anything... rootURL, err := url.ParseRequestURI(cfg.Root) if err != nil { return nil, fmt.Errorf("invalid forgejo/gitea root url: %w", err) } giteaRoot := strings.TrimSuffix(rootURL.String(), "/") stdClient := http.Client{Timeout: 10 * time.Second} forbiddenMimeTypes := make(map[string]bool, len(cfg.ForbiddenMimeTypes)) for _, mimeType := range cfg.ForbiddenMimeTypes { forbiddenMimeTypes[mimeType] = true } defaultMimeType := cfg.DefaultMimeType if defaultMimeType == "" { defaultMimeType = "application/octet-stream" } sdk, err := gitea.NewClient( giteaRoot, gitea.SetHTTPClient(&stdClient), gitea.SetToken(cfg.Token), gitea.SetUserAgent("pages-server/"+version.Version), ) return &Client{ sdkClient: sdk, responseCache: respCache, giteaRoot: giteaRoot, followSymlinks: cfg.FollowSymlinks, supportLFS: cfg.LFSEnabled, forbiddenMimeTypes: forbiddenMimeTypes, defaultMimeType: defaultMimeType, }, err } func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string { return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource) } func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) { reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource, false) if err != nil { return nil, err } defer reader.Close() return io.ReadAll(reader) } func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string, decompress bool) (io.ReadCloser, http.Header, int, error) { cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource) log := log.With().Str("cache_key", cacheKey).Logger() log.Trace().Msg("try file in cache") // handle if cache entry exist if cacheMetadata, ok := client.responseCache.Get(cacheKey + "|Metadata"); ok { var cache FileResponse err := json.Unmarshal(cacheMetadata.([]byte), &cache) if err != nil { log.Error().Err(err).Msgf("[cache] failed to unmarshal metadata for: %s", cacheKey) return nil, nil, http.StatusNotFound, err } if !cache.Exists { return nil, nil, http.StatusNotFound, ErrorNotFound } body, ok := client.responseCache.Get(cacheKey + "|Body") if !ok { log.Error().Msgf("[cache] failed to get body for: %s", cacheKey) return nil, nil, http.StatusNotFound, ErrorNotFound } cache.Body = body.([]byte) cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey, decompress) if cache.Exists { if cache.IsSymlink { linkDest := string(cache.Body) log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest) return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest, decompress) } else { log.Debug().Msgf("[cache] return %d bytes", len(cache.Body)) return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil } } else { return nil, nil, http.StatusNotFound, ErrorNotFound } } log.Trace().Msg("file not in cache") // not in cache, open reader via gitea api reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS) if resp != nil { switch resp.StatusCode { case http.StatusOK: // first handle symlinks { objType := resp.Header.Get(giteaObjectTypeHeader) log.Trace().Msgf("server raw content object %q", objType) if client.followSymlinks && objType == objTypeSymlink { defer reader.Close() // read limited chars for symlink linkDestBytes, err := io.ReadAll(io.LimitReader(reader, symlinkReadLimit)) if err != nil { return nil, nil, http.StatusInternalServerError, err } linkDest := strings.TrimSpace(string(linkDestBytes)) // handle relative links // we first remove the link from the path, and make a relative join (resolve parent paths like "/../" too) linkDest = path.Join(path.Dir(resource), linkDest) // we store symlink not content to reduce duplicates in cache fileResponse := FileResponse{ Exists: true, IsSymlink: true, Body: []byte(linkDest), ETag: resp.Header.Get(ETagHeader), } log.Trace().Msgf("file response has %d bytes", len(fileResponse.Body)) jsonToCache, err := json.Marshal(fileResponse) if err != nil { log.Error().Err(err).Msgf("[cache] marshaling json metadata for %q has returned an error", cacheKey) } if err := client.responseCache.Set(cacheKey+"|Metadata", jsonToCache, fileCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } if err := client.responseCache.Set(cacheKey+"|Body", fileResponse.Body, fileCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest) return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest, decompress) } } // now we are sure it's content so set the MIME type mimeType, rawType := client.getMimeTypeByExtension(resource) resp.Response.Header.Set(ContentTypeHeader, mimeType) if decompress { resp.Response.Header.Set(ContentTypeHeader, mimeType) } else { resp.Response.Header.Set(ContentTypeHeader, rawType) } // now we write to cache and respond at the same time fileResp := FileResponse{ Exists: true, ETag: resp.Header.Get(ETagHeader), MimeType: mimeType, RawMime: rawType, } return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil case http.StatusNotFound: jsonToCache, err := json.Marshal(FileResponse{ETag: resp.Header.Get(ETagHeader)}) if err != nil { log.Error().Err(err).Msgf("[cache] marshaling json metadata for %q has returned an error", cacheKey) } if err := client.responseCache.Set(cacheKey+"|Metadata", jsonToCache, fileCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return nil, resp.Response.Header, http.StatusNotFound, ErrorNotFound default: return nil, resp.Response.Header, resp.StatusCode, fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } } return nil, nil, http.StatusInternalServerError, err } func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) { cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName) if stampRaw, ok := client.responseCache.Get(cacheKey); ok { var stamp BranchTimestamp err := json.Unmarshal(stampRaw.([]byte), &stamp) if err != nil { log.Error().Err(err).Bytes("stamp", stampRaw.([]byte)).Msgf("[cache] failed to unmarshal timestamp for: %s", cacheKey) return &BranchTimestamp{}, ErrorNotFound } if stamp.NotFound { log.Trace().Msgf("[cache] branch %q does not exist", branchName) return &BranchTimestamp{}, ErrorNotFound } else { log.Trace().Msgf("[cache] use branch %q exist", branchName) // This comes from the refactoring of the caching library. // The branch as reported by the API was stored in the cache, and I'm not sure if there are // situations where it differs from the name in the request, hence this is left here. return &stamp, nil } } branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName) if err != nil { if resp != nil && resp.StatusCode == http.StatusNotFound { log.Trace().Msgf("[cache] set cache branch %q not found", branchName) jsonToCache, err := json.Marshal(BranchTimestamp{NotFound: true}) if err != nil { log.Error().Err(err).Msgf("[cache] marshaling empty timestamp for '%s' has returned an error", cacheKey) } if err := client.responseCache.Set(cacheKey, jsonToCache, branchExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return &BranchTimestamp{}, ErrorNotFound } return &BranchTimestamp{}, err } if resp.StatusCode != http.StatusOK { return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } stamp := &BranchTimestamp{ Branch: branch.Name, Timestamp: branch.Commit.Timestamp, } log.Trace().Msgf("set cache branch [%s] exist", branchName) jsonToCache, err := json.Marshal(stamp) if err != nil { log.Error().Err(err).Msgf("[cache] marshaling timestamp for %q has returned an error", cacheKey) } if err := client.responseCache.Set(cacheKey, jsonToCache, branchExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return stamp, nil } func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) { cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName) if branch, ok := client.responseCache.Get(cacheKey); ok { return string(branch.([]byte)), nil } repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName) if err != nil { return "", err } if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } branch := repo.DefaultBranch if err := client.responseCache.Set(cacheKey, []byte(branch), defaultBranchCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return branch, nil } func (client *Client) GiteaCheckIfOwnerExists(owner string) (bool, error) { cacheKey := fmt.Sprintf("%s/%s", ownerExistenceKeyPrefix, owner) if existRaw, ok := client.responseCache.Get(cacheKey); ok && existRaw != nil { exist, err := strconv.ParseBool(existRaw.(string)) return exist, err } _, resp, err := client.sdkClient.GetUserInfo(owner) if resp.StatusCode == http.StatusOK && err == nil { if err := client.responseCache.Set(cacheKey, []byte("true"), ownerExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return true, nil } else if resp.StatusCode != http.StatusNotFound { return false, err } _, resp, err = client.sdkClient.GetOrg(owner) if resp.StatusCode == http.StatusOK && err == nil { if err := client.responseCache.Set(cacheKey, []byte("true"), ownerExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return true, nil } else if resp.StatusCode != http.StatusNotFound { return false, err } if err := client.responseCache.Set(cacheKey, []byte("false"), ownerExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return false, nil } func (client *Client) extToMime(ext string) string { mimeType := mime.TypeByExtension(path.Ext(ext)) mimeTypeSplit := strings.SplitN(mimeType, ";", 2) if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" { mimeType = client.defaultMimeType } log.Trace().Msgf("probe mime of extension '%q' is '%q'", ext, mimeType) return mimeType } func (client *Client) getMimeTypeByExtension(resource string) (mimeType, rawType string) { rawExt := path.Ext(resource) innerExt := rawExt switch rawExt { case ".gz", ".br", ".zst": innerExt = path.Ext(resource[:len(resource)-len(rawExt)]) } rawType = client.extToMime(rawExt) mimeType = rawType if innerExt != rawExt { mimeType = client.extToMime(innerExt) } log.Trace().Msgf("probe mime of %q is (%q / raw %q)", resource, mimeType, rawType) return mimeType, rawType }