Refactor route path normalization (#31381)

Refactor route path normalization and decouple it from the chi router.
Fix the TODO, fix the legacy strange path behavior.
This commit is contained in:
wxiaoguang 2024-06-18 07:28:47 +08:00 committed by GitHub
parent 5a7376c060
commit d32648b204
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 153 additions and 157 deletions

View File

@ -5,8 +5,10 @@ package web
import ( import (
"net/http" "net/http"
"net/url"
"strings" "strings"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/web/middleware" "code.gitea.io/gitea/modules/web/middleware"
"gitea.com/go-chi/binding" "gitea.com/go-chi/binding"
@ -160,7 +162,7 @@ func (r *Route) Patch(pattern string, h ...any) {
// ServeHTTP implements http.Handler // ServeHTTP implements http.Handler
func (r *Route) ServeHTTP(w http.ResponseWriter, req *http.Request) { func (r *Route) ServeHTTP(w http.ResponseWriter, req *http.Request) {
r.R.ServeHTTP(w, req) r.normalizeRequestPath(w, req, r.R)
} }
// NotFound defines a handler to respond whenever a route could not be found. // NotFound defines a handler to respond whenever a route could not be found.
@ -168,6 +170,61 @@ func (r *Route) NotFound(h http.HandlerFunc) {
r.R.NotFound(h) r.R.NotFound(h)
} }
func (r *Route) normalizeRequestPath(resp http.ResponseWriter, req *http.Request, next http.Handler) {
normalized := false
normalizedPath := req.URL.EscapedPath()
if normalizedPath == "" {
normalizedPath, normalized = "/", true
} else if normalizedPath != "/" {
normalized = strings.HasSuffix(normalizedPath, "/")
normalizedPath = strings.TrimRight(normalizedPath, "/")
}
removeRepeatedSlashes := strings.Contains(normalizedPath, "//")
normalized = normalized || removeRepeatedSlashes
// the following code block is a slow-path for replacing all repeated slashes "//" to one single "/"
// if the path doesn't have repeated slashes, then no need to execute it
if removeRepeatedSlashes {
buf := &strings.Builder{}
for i := 0; i < len(normalizedPath); i++ {
if i == 0 || normalizedPath[i-1] != '/' || normalizedPath[i] != '/' {
buf.WriteByte(normalizedPath[i])
}
}
normalizedPath = buf.String()
}
// If the config tells Gitea to use a sub-url path directly without reverse proxy,
// then we need to remove the sub-url path from the request URL path.
// But "/v2" is special for OCI container registry, it should always be in the root of the site.
if setting.UseSubURLPath {
remainingPath, ok := strings.CutPrefix(normalizedPath, setting.AppSubURL+"/")
if ok {
normalizedPath = "/" + remainingPath
} else if normalizedPath == setting.AppSubURL {
normalizedPath = "/"
} else if !strings.HasPrefix(normalizedPath+"/", "/v2/") {
// do not respond to other requests, to simulate a real sub-path environment
http.Error(resp, "404 page not found, sub-path is: "+setting.AppSubURL, http.StatusNotFound)
return
}
normalized = true
}
// if the path is normalized, then fill it back to the request
if normalized {
decodedPath, err := url.PathUnescape(normalizedPath)
if err != nil {
http.Error(resp, "400 Bad Request: unable to unescape path "+normalizedPath, http.StatusBadRequest)
return
}
req.URL.RawPath = normalizedPath
req.URL.Path = decodedPath
}
next.ServeHTTP(resp, req)
}
// Combo delegates requests to Combo // Combo delegates requests to Combo
func (r *Route) Combo(pattern string, h ...any) *Combo { func (r *Route) Combo(pattern string, h ...any) *Combo {
return &Combo{r, pattern, h} return &Combo{r, pattern, h}

View File

@ -10,6 +10,9 @@ import (
"strconv" "strconv"
"testing" "testing"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
chi "github.com/go-chi/chi/v5" chi "github.com/go-chi/chi/v5"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
@ -176,3 +179,44 @@ func TestRoute3(t *testing.T) {
assert.EqualValues(t, http.StatusOK, recorder.Code) assert.EqualValues(t, http.StatusOK, recorder.Code)
assert.EqualValues(t, 4, hit) assert.EqualValues(t, 4, hit)
} }
func TestRouteNormalizePath(t *testing.T) {
type paths struct {
EscapedPath, RawPath, Path string
}
testPath := func(reqPath string, expectedPaths paths) {
recorder := httptest.NewRecorder()
recorder.Body = bytes.NewBuffer(nil)
actualPaths := paths{EscapedPath: "(none)", RawPath: "(none)", Path: "(none)"}
r := NewRoute()
r.Get("/*", func(resp http.ResponseWriter, req *http.Request) {
actualPaths.EscapedPath = req.URL.EscapedPath()
actualPaths.RawPath = req.URL.RawPath
actualPaths.Path = req.URL.Path
})
req, err := http.NewRequest("GET", reqPath, nil)
assert.NoError(t, err)
r.ServeHTTP(recorder, req)
assert.Equal(t, expectedPaths, actualPaths, "req path = %q", reqPath)
}
// RawPath could be empty if the EscapedPath is the same as escape(Path) and it is already normalized
testPath("/", paths{EscapedPath: "/", RawPath: "", Path: "/"})
testPath("//", paths{EscapedPath: "/", RawPath: "/", Path: "/"})
testPath("/%2f", paths{EscapedPath: "/%2f", RawPath: "/%2f", Path: "//"})
testPath("///a//b/", paths{EscapedPath: "/a/b", RawPath: "/a/b", Path: "/a/b"})
defer test.MockVariableValue(&setting.UseSubURLPath, true)()
defer test.MockVariableValue(&setting.AppSubURL, "/sub-path")()
testPath("/", paths{EscapedPath: "(none)", RawPath: "(none)", Path: "(none)"}) // 404
testPath("/sub-path", paths{EscapedPath: "/", RawPath: "/", Path: "/"})
testPath("/sub-path/", paths{EscapedPath: "/", RawPath: "/", Path: "/"})
testPath("/sub-path//a/b///", paths{EscapedPath: "/a/b", RawPath: "/a/b", Path: "/a/b"})
testPath("/sub-path/%2f/", paths{EscapedPath: "/%2f", RawPath: "/%2f", Path: "//"})
// "/v2" is special for OCI container registry, it should always be in the root of the site
testPath("/v2", paths{EscapedPath: "/v2", RawPath: "/v2", Path: "/v2"})
testPath("/v2/", paths{EscapedPath: "/v2", RawPath: "/v2", Path: "/v2"})
testPath("/v2/%2f", paths{EscapedPath: "/v2/%2f", RawPath: "/v2/%2f", Path: "/v2//"})
}

View File

@ -19,13 +19,23 @@ import (
"gitea.com/go-chi/session" "gitea.com/go-chi/session"
"github.com/chi-middleware/proxy" "github.com/chi-middleware/proxy"
chi "github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5"
) )
// ProtocolMiddlewares returns HTTP protocol related middlewares, and it provides a global panic recovery // ProtocolMiddlewares returns HTTP protocol related middlewares, and it provides a global panic recovery
func ProtocolMiddlewares() (handlers []any) { func ProtocolMiddlewares() (handlers []any) {
// first, normalize the URL path // make sure chi uses EscapedPath(RawPath) as RoutePath, then "%2f" could be handled correctly
handlers = append(handlers, normalizeRequestPathMiddleware) handlers = append(handlers, func(next http.Handler) http.Handler {
return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) {
ctx := chi.RouteContext(req.Context())
if req.URL.RawPath == "" {
ctx.RoutePath = req.URL.EscapedPath()
} else {
ctx.RoutePath = req.URL.RawPath
}
next.ServeHTTP(resp, req)
})
})
// prepare the ContextData and panic recovery // prepare the ContextData and panic recovery
handlers = append(handlers, func(next http.Handler) http.Handler { handlers = append(handlers, func(next http.Handler) http.Handler {
@ -75,58 +85,6 @@ func ProtocolMiddlewares() (handlers []any) {
return handlers return handlers
} }
func normalizeRequestPathMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) {
// escape the URL RawPath to ensure that all routing is done using a correctly escaped URL
req.URL.RawPath = req.URL.EscapedPath()
urlPath := req.URL.RawPath
rctx := chi.RouteContext(req.Context())
if rctx != nil && rctx.RoutePath != "" {
urlPath = rctx.RoutePath
}
normalizedPath := strings.TrimRight(urlPath, "/")
// the following code block is a slow-path for replacing all repeated slashes "//" to one single "/"
// if the path doesn't have repeated slashes, then no need to execute it
if strings.Contains(normalizedPath, "//") {
buf := &strings.Builder{}
prevWasSlash := false
for _, chr := range normalizedPath {
if chr != '/' || !prevWasSlash {
buf.WriteRune(chr)
}
prevWasSlash = chr == '/'
}
normalizedPath = buf.String()
}
if setting.UseSubURLPath {
remainingPath, ok := strings.CutPrefix(normalizedPath, setting.AppSubURL+"/")
if ok {
normalizedPath = "/" + remainingPath
} else if normalizedPath == setting.AppSubURL {
normalizedPath = "/"
} else if !strings.HasPrefix(normalizedPath+"/", "/v2/") {
// do not respond to other requests, to simulate a real sub-path environment
http.Error(resp, "404 page not found, sub-path is: "+setting.AppSubURL, http.StatusNotFound)
return
}
// TODO: it's not quite clear about how req.URL and rctx.RoutePath work together.
// Fortunately, it is only used for debug purpose, we have enough time to figure it out in the future.
req.URL.RawPath = normalizedPath
req.URL.Path = normalizedPath
}
if rctx == nil {
req.URL.Path = normalizedPath
} else {
rctx.RoutePath = normalizedPath
}
next.ServeHTTP(resp, req)
})
}
func Sessioner() func(next http.Handler) http.Handler { func Sessioner() func(next http.Handler) http.Handler {
return session.Sessioner(session.Options{ return session.Sessioner(session.Options{
Provider: setting.SessionConfig.Provider, Provider: setting.SessionConfig.Provider,

View File

@ -1,70 +0,0 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package common
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func TestStripSlashesMiddleware(t *testing.T) {
type test struct {
name string
expectedPath string
inputPath string
}
tests := []test{
{
name: "path with multiple slashes",
inputPath: "https://github.com///go-gitea//gitea.git",
expectedPath: "/go-gitea/gitea.git",
},
{
name: "path with no slashes",
inputPath: "https://github.com/go-gitea/gitea.git",
expectedPath: "/go-gitea/gitea.git",
},
{
name: "path with slashes in the middle",
inputPath: "https://git.data.coop//halfd/new-website.git",
expectedPath: "/halfd/new-website.git",
},
{
name: "path with slashes in the middle",
inputPath: "https://git.data.coop//halfd/new-website.git",
expectedPath: "/halfd/new-website.git",
},
{
name: "path with slashes in the end",
inputPath: "/user2//repo1/",
expectedPath: "/user2/repo1",
},
{
name: "path with slashes and query params",
inputPath: "/repo//migrate?service_type=3",
expectedPath: "/repo/migrate",
},
{
name: "path with encoded slash",
inputPath: "/user2/%2F%2Frepo1",
expectedPath: "/user2/%2F%2Frepo1",
},
}
for _, tt := range tests {
testMiddleware := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.Equal(t, tt.expectedPath, r.URL.Path)
})
// pass the test middleware to validate the changes
handlerToTest := normalizeRequestPathMiddleware(testMiddleware)
// create a mock request to use
req := httptest.NewRequest("GET", tt.inputPath, nil)
// call the handler using a mock response recorder
handlerToTest.ServeHTTP(httptest.NewRecorder(), req)
}
}

View File

@ -191,7 +191,8 @@ func NormalRoutes() *web.Route {
if setting.Packages.Enabled { if setting.Packages.Enabled {
// This implements package support for most package managers // This implements package support for most package managers
r.Mount("/api/packages", packages_router.CommonRoutes()) r.Mount("/api/packages", packages_router.CommonRoutes())
// This implements the OCI API (Note this is not preceded by /api but is instead /v2) // This implements the OCI API, this container registry "/v2" endpoint must be in the root of the site.
// If site admin deploys Gitea in a sub-path, they must configure their reverse proxy to map the "https://host/v2" endpoint to Gitea.
r.Mount("/v2", packages_router.ContainerRoutes()) r.Mount("/v2", packages_router.ContainerRoutes())
} }

View File

@ -18,6 +18,7 @@ import (
"code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation" "code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/web/middleware" "code.gitea.io/gitea/modules/web/middleware"
@ -142,23 +143,27 @@ func (b *Base) RemoteAddr() string {
return b.Req.RemoteAddr return b.Req.RemoteAddr
} }
// Params returns the param on route // Params returns the param in request path, eg: "/{var}" => "/a%2fb", then `var == "a/b"`
func (b *Base) Params(p string) string { func (b *Base) Params(name string) string {
s, _ := url.PathUnescape(chi.URLParam(b.Req, strings.TrimPrefix(p, ":"))) s, err := url.PathUnescape(b.PathParamRaw(name))
if err != nil && !setting.IsProd {
panic("Failed to unescape path param: " + err.Error() + ", there seems to be a double-unescaping bug")
}
return s return s
} }
func (b *Base) PathParamRaw(p string) string { // PathParamRaw returns the raw param in request path, eg: "/{var}" => "/a%2fb", then `var == "a%2fb"`
return chi.URLParam(b.Req, strings.TrimPrefix(p, ":")) func (b *Base) PathParamRaw(name string) string {
return chi.URLParam(b.Req, strings.TrimPrefix(name, ":"))
} }
// ParamsInt64 returns the param on route as int64 // ParamsInt64 returns the param in request path as int64
func (b *Base) ParamsInt64(p string) int64 { func (b *Base) ParamsInt64(p string) int64 {
v, _ := strconv.ParseInt(b.Params(p), 10, 64) v, _ := strconv.ParseInt(b.Params(p), 10, 64)
return v return v
} }
// SetParams set params into routes // SetParams set request path params into routes
func (b *Base) SetParams(k, v string) { func (b *Base) SetParams(k, v string) {
chiCtx := chi.RouteContext(b) chiCtx := chi.RouteContext(b)
chiCtx.URLParams.Add(strings.TrimPrefix(k, ":"), url.PathEscape(v)) chiCtx.URLParams.Add(strings.TrimPrefix(k, ":"), url.PathEscape(v))

View File

@ -1006,12 +1006,12 @@ func RepoRefByType(refType RepoRefType, ignoreNotExistErr ...bool) func(*Context
if refType == RepoRefLegacy { if refType == RepoRefLegacy {
// redirect from old URL scheme to new URL scheme // redirect from old URL scheme to new URL scheme
prefix := strings.TrimPrefix(setting.AppSubURL+strings.ToLower(strings.TrimSuffix(ctx.Req.URL.Path, ctx.Params("*"))), strings.ToLower(ctx.Repo.RepoLink)) prefix := strings.TrimPrefix(setting.AppSubURL+strings.ToLower(strings.TrimSuffix(ctx.Req.URL.Path, ctx.Params("*"))), strings.ToLower(ctx.Repo.RepoLink))
redirect := path.Join(
ctx.Redirect(path.Join(
ctx.Repo.RepoLink, ctx.Repo.RepoLink,
util.PathEscapeSegments(prefix), util.PathEscapeSegments(prefix),
ctx.Repo.BranchNameSubURL(), ctx.Repo.BranchNameSubURL(),
util.PathEscapeSegments(ctx.Repo.TreePath))) util.PathEscapeSegments(ctx.Repo.TreePath))
ctx.Redirect(redirect)
return cancel return cancel
} }
} }

View File

@ -4,6 +4,7 @@
package integration package integration
import ( import (
"fmt"
"net/http" "net/http"
"net/url" "net/url"
"path" "path"
@ -14,22 +15,6 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func testSrcRouteRedirect(t *testing.T, session *TestSession, user, repo, route, expectedLocation string, expectedStatus int) {
prefix := path.Join("/", user, repo, "src")
// Make request
req := NewRequest(t, "GET", path.Join(prefix, route))
resp := session.MakeRequest(t, req, http.StatusSeeOther)
// Check Location header
location := resp.Header().Get("Location")
assert.Equal(t, path.Join(prefix, expectedLocation), location)
// Perform redirect
req = NewRequest(t, "GET", location)
session.MakeRequest(t, req, expectedStatus)
}
func setDefaultBranch(t *testing.T, session *TestSession, user, repo, branch string) { func setDefaultBranch(t *testing.T, session *TestSession, user, repo, branch string) {
location := path.Join("/", user, repo, "settings/branches") location := path.Join("/", user, repo, "settings/branches")
csrf := GetCSRF(t, session, location) csrf := GetCSRF(t, session, location)
@ -41,7 +26,7 @@ func setDefaultBranch(t *testing.T, session *TestSession, user, repo, branch str
session.MakeRequest(t, req, http.StatusSeeOther) session.MakeRequest(t, req, http.StatusSeeOther)
} }
func TestNonasciiBranches(t *testing.T) { func TestNonAsciiBranches(t *testing.T) {
testRedirects := []struct { testRedirects := []struct {
from string from string
to string to string
@ -98,6 +83,7 @@ func TestNonasciiBranches(t *testing.T) {
to: "branch/%E3%83%96%E3%83%A9%E3%83%B3%E3%83%81", to: "branch/%E3%83%96%E3%83%A9%E3%83%B3%E3%83%81",
status: http.StatusOK, status: http.StatusOK,
}, },
// Tags // Tags
{ {
from: "Тэг", from: "Тэг",
@ -119,6 +105,7 @@ func TestNonasciiBranches(t *testing.T) {
to: "tag/%E3%82%BF%E3%82%B0/%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB.md", to: "tag/%E3%82%BF%E3%82%B0/%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB.md",
status: http.StatusOK, status: http.StatusOK,
}, },
// Files // Files
{ {
from: "README.md", from: "README.md",
@ -135,6 +122,7 @@ func TestNonasciiBranches(t *testing.T) {
to: "branch/Plus+Is+Not+Space/%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB.md", to: "branch/Plus+Is+Not+Space/%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB.md",
status: http.StatusNotFound, // it's not on default branch status: http.StatusNotFound, // it's not on default branch
}, },
// Same but url-encoded (few tests) // Same but url-encoded (few tests)
{ {
from: "%E3%83%96%E3%83%A9%E3%83%B3%E3%83%81", from: "%E3%83%96%E3%83%A9%E3%83%B3%E3%83%81",
@ -205,10 +193,23 @@ func TestNonasciiBranches(t *testing.T) {
session := loginUser(t, user) session := loginUser(t, user)
setDefaultBranch(t, session, user, repo, "Plus+Is+Not+Space") setDefaultBranch(t, session, user, repo, "Plus+Is+Not+Space")
defer setDefaultBranch(t, session, user, repo, "master")
for _, test := range testRedirects { for _, test := range testRedirects {
testSrcRouteRedirect(t, session, user, repo, test.from, test.to, test.status) t.Run(test.from, func(t *testing.T) {
req := NewRequest(t, "GET", fmt.Sprintf("/%s/%s/src/%s", user, repo, test.from))
resp := session.MakeRequest(t, req, http.StatusSeeOther)
if resp.Code != http.StatusSeeOther {
return
} }
setDefaultBranch(t, session, user, repo, "master") redirectLocation := resp.Header().Get("Location")
if !assert.Equal(t, fmt.Sprintf("/%s/%s/src/%s", user, repo, test.to), redirectLocation) {
return
}
req = NewRequest(t, "GET", redirectLocation)
session.MakeRequest(t, req, test.status)
})
}
} }