pages-server/server/gitea/client.go
Jean-Marie 'Histausse' Mineau 03881382a4 Add option to disable DNS ACME provider (#290)
This PR add the `$NO_DNS_01` option (disabled by default) that removes the DNS ACME provider, and replaces the wildcard certificate by individual certificates obtained using the TLS ACME provider.

This option allows an instance to work without having to manage access tokens for the DNS provider. On the flip side, this means that a certificate can be requested for each subdomains. To limit the risk of DOS, the existence of the user/org corresponding to a subdomain is checked before requesting a cert, however, this limitation is not enough for an forge with a high number of users/orgs.

Co-authored-by: 6543 <6543@obermui.de>
Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/290
Reviewed-by: Moritz Marquardt <momar@noreply.codeberg.org>
Co-authored-by: Jean-Marie 'Histausse' Mineau <histausse@protonmail.com>
Co-committed-by: Jean-Marie 'Histausse' Mineau <histausse@protonmail.com>
2024-04-18 17:05:20 +00:00

330 lines
10 KiB
Go

package gitea
import (
"bytes"
"errors"
"fmt"
"io"
"mime"
"net/http"
"net/url"
"path"
"strconv"
"strings"
"time"
"code.gitea.io/sdk/gitea"
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/config"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/version"
)
var ErrorNotFound = errors.New("not found")
const (
// cache key prefixes
branchTimestampCacheKeyPrefix = "branchTime"
defaultBranchCacheKeyPrefix = "defaultBranch"
rawContentCacheKeyPrefix = "rawContent"
ownerExistenceKeyPrefix = "ownerExist"
// pages server
PagesCacheIndicatorHeader = "X-Pages-Cache"
symlinkReadLimit = 10000
// gitea
giteaObjectTypeHeader = "X-Gitea-Object-Type"
objTypeSymlink = "symlink"
// std
ETagHeader = "ETag"
ContentTypeHeader = "Content-Type"
ContentLengthHeader = "Content-Length"
)
type Client struct {
sdkClient *gitea.Client
responseCache cache.ICache
giteaRoot string
followSymlinks bool
supportLFS bool
forbiddenMimeTypes map[string]bool
defaultMimeType string
}
func NewClient(cfg config.GiteaConfig, respCache cache.ICache) (*Client, error) {
rootURL, err := url.Parse(cfg.Root)
if err != nil {
return nil, err
}
giteaRoot := strings.Trim(rootURL.String(), "/")
stdClient := http.Client{Timeout: 10 * time.Second}
forbiddenMimeTypes := make(map[string]bool, len(cfg.ForbiddenMimeTypes))
for _, mimeType := range cfg.ForbiddenMimeTypes {
forbiddenMimeTypes[mimeType] = true
}
defaultMimeType := cfg.DefaultMimeType
if defaultMimeType == "" {
defaultMimeType = "application/octet-stream"
}
sdk, err := gitea.NewClient(
giteaRoot,
gitea.SetHTTPClient(&stdClient),
gitea.SetToken(cfg.Token),
gitea.SetUserAgent("pages-server/"+version.Version),
)
return &Client{
sdkClient: sdk,
responseCache: respCache,
giteaRoot: giteaRoot,
followSymlinks: cfg.FollowSymlinks,
supportLFS: cfg.LFSEnabled,
forbiddenMimeTypes: forbiddenMimeTypes,
defaultMimeType: defaultMimeType,
}, err
}
func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string {
return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource)
}
func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) {
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
if err != nil {
return nil, err
}
defer reader.Close()
return io.ReadAll(reader)
}
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) {
cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource)
log := log.With().Str("cache_key", cacheKey).Logger()
log.Trace().Msg("try file in cache")
// handle if cache entry exist
if cache, ok := client.responseCache.Get(cacheKey); ok {
cache := cache.(FileResponse)
cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey)
// TODO: check against some timestamp mismatch?!?
if cache.Exists {
log.Debug().Msg("[cache] exists")
if cache.IsSymlink {
linkDest := string(cache.Body)
log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
} else if !cache.IsEmpty() {
log.Debug().Msgf("[cache] return %d bytes", len(cache.Body))
return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil
} else if cache.IsEmpty() {
log.Debug().Msg("[cache] is empty")
}
}
}
log.Trace().Msg("file not in cache")
// not in cache, open reader via gitea api
reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS)
if resp != nil {
switch resp.StatusCode {
case http.StatusOK:
// first handle symlinks
{
objType := resp.Header.Get(giteaObjectTypeHeader)
log.Trace().Msgf("server raw content object %q", objType)
if client.followSymlinks && objType == objTypeSymlink {
defer reader.Close()
// read limited chars for symlink
linkDestBytes, err := io.ReadAll(io.LimitReader(reader, symlinkReadLimit))
if err != nil {
return nil, nil, http.StatusInternalServerError, err
}
linkDest := strings.TrimSpace(string(linkDestBytes))
// handle relative links
// we first remove the link from the path, and make a relative join (resolve parent paths like "/../" too)
linkDest = path.Join(path.Dir(resource), linkDest)
// we store symlink not content to reduce duplicates in cache
fileResponse := FileResponse{
Exists: true,
IsSymlink: true,
Body: []byte(linkDest),
ETag: resp.Header.Get(ETagHeader),
}
log.Trace().Msgf("file response has %d bytes", len(fileResponse.Body))
if err := client.responseCache.Set(cacheKey, fileResponse, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
}
}
// now we are sure it's content so set the MIME type
mimeType := client.getMimeTypeByExtension(resource)
resp.Response.Header.Set(ContentTypeHeader, mimeType)
if !shouldRespBeSavedToCache(resp.Response) {
return reader, resp.Response.Header, resp.StatusCode, err
}
// now we write to cache and respond at the same time
fileResp := FileResponse{
Exists: true,
ETag: resp.Header.Get(ETagHeader),
MimeType: mimeType,
}
return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil
case http.StatusNotFound:
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: false,
ETag: resp.Header.Get(ETagHeader),
}, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return nil, resp.Response.Header, http.StatusNotFound, ErrorNotFound
default:
return nil, resp.Response.Header, resp.StatusCode, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
}
return nil, nil, http.StatusInternalServerError, err
}
func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) {
cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName)
if stamp, ok := client.responseCache.Get(cacheKey); ok && stamp != nil {
branchTimeStamp := stamp.(*BranchTimestamp)
if branchTimeStamp.notFound {
log.Trace().Msgf("[cache] use branch %q not found", branchName)
return &BranchTimestamp{}, ErrorNotFound
}
log.Trace().Msgf("[cache] use branch %q exist", branchName)
return branchTimeStamp, nil
}
branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName)
if err != nil {
if resp != nil && resp.StatusCode == http.StatusNotFound {
log.Trace().Msgf("[cache] set cache branch %q not found", branchName)
if err := client.responseCache.Set(cacheKey, &BranchTimestamp{Branch: branchName, notFound: true}, branchExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return &BranchTimestamp{}, ErrorNotFound
}
return &BranchTimestamp{}, err
}
if resp.StatusCode != http.StatusOK {
return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
stamp := &BranchTimestamp{
Branch: branch.Name,
Timestamp: branch.Commit.Timestamp,
}
log.Trace().Msgf("set cache branch [%s] exist", branchName)
if err := client.responseCache.Set(cacheKey, stamp, branchExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return stamp, nil
}
func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) {
cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName)
if branch, ok := client.responseCache.Get(cacheKey); ok && branch != nil {
return branch.(string), nil
}
repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName)
if err != nil {
return "", err
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
branch := repo.DefaultBranch
if err := client.responseCache.Set(cacheKey, branch, defaultBranchCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return branch, nil
}
func (client *Client) GiteaCheckIfOwnerExists(owner string) (bool, error) {
cacheKey := fmt.Sprintf("%s/%s", ownerExistenceKeyPrefix, owner)
if exist, ok := client.responseCache.Get(cacheKey); ok && exist != nil {
return exist.(bool), nil
}
_, resp, err := client.sdkClient.GetUserInfo(owner)
if resp.StatusCode == http.StatusOK && err == nil {
if err := client.responseCache.Set(cacheKey, true, ownerExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return true, nil
} else if resp.StatusCode != http.StatusNotFound {
return false, err
}
_, resp, err = client.sdkClient.GetOrg(owner)
if resp.StatusCode == http.StatusOK && err == nil {
if err := client.responseCache.Set(cacheKey, true, ownerExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return true, nil
} else if resp.StatusCode != http.StatusNotFound {
return false, err
}
if err := client.responseCache.Set(cacheKey, false, ownerExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return false, nil
}
func (client *Client) getMimeTypeByExtension(resource string) string {
mimeType := mime.TypeByExtension(path.Ext(resource))
mimeTypeSplit := strings.SplitN(mimeType, ";", 2)
if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" {
mimeType = client.defaultMimeType
}
log.Trace().Msgf("probe mime of %q is %q", resource, mimeType)
return mimeType
}
func shouldRespBeSavedToCache(resp *http.Response) bool {
if resp == nil {
return false
}
contentLengthRaw := resp.Header.Get(ContentLengthHeader)
if contentLengthRaw == "" {
return false
}
contentLength, err := strconv.ParseInt(contentLengthRaw, 10, 64)
if err != nil {
log.Error().Err(err).Msg("could not parse content length")
}
// if content to big or could not be determined we not cache it
return contentLength > 0 && contentLength < fileCacheSizeLimit
}