* Pooled and buffered gzip implementation * Add test for gzip * Add integration test * Ensure lfs check within transaction The previous code made it possible for a race condition to occur whereby a LFSMetaObject could be checked into the database twice. We should check if the LFSMetaObject is within the database and insert it if not in one transaction. * Try to avoid primary key problem in postgres The integration tests are being affected by https://github.com/go-testfixtures/testfixtures/issues/39 if we set the primary key high enough, keep a count of this and remove at the end of each test we shouldn't be affected by this.tags/v1.9.0-dev
@@ -0,0 +1,129 @@ | |||
// Copyright 2019 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package integrations | |||
import ( | |||
"archive/zip" | |||
"bytes" | |||
"crypto/sha256" | |||
"encoding/hex" | |||
"io" | |||
"io/ioutil" | |||
"net/http" | |||
"testing" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/gzip" | |||
"code.gitea.io/gitea/modules/lfs" | |||
"code.gitea.io/gitea/modules/setting" | |||
"github.com/stretchr/testify/assert" | |||
gzipp "github.com/klauspost/compress/gzip" | |||
) | |||
func GenerateLFSOid(content io.Reader) (string, error) { | |||
h := sha256.New() | |||
if _, err := io.Copy(h, content); err != nil { | |||
return "", err | |||
} | |||
sum := h.Sum(nil) | |||
return hex.EncodeToString(sum), nil | |||
} | |||
var lfsID = int64(20000) | |||
func storeObjectInRepo(t *testing.T, repositoryID int64, content *[]byte) string { | |||
oid, err := GenerateLFSOid(bytes.NewReader(*content)) | |||
assert.NoError(t, err) | |||
var lfsMetaObject *models.LFSMetaObject | |||
if setting.UsePostgreSQL { | |||
lfsMetaObject = &models.LFSMetaObject{ID: lfsID, Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID} | |||
} else { | |||
lfsMetaObject = &models.LFSMetaObject{Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID} | |||
} | |||
lfsID = lfsID + 1 | |||
lfsMetaObject, err = models.NewLFSMetaObject(lfsMetaObject) | |||
assert.NoError(t, err) | |||
contentStore := &lfs.ContentStore{BasePath: setting.LFS.ContentPath} | |||
if !contentStore.Exists(lfsMetaObject) { | |||
err := contentStore.Put(lfsMetaObject, bytes.NewReader(*content)) | |||
assert.NoError(t, err) | |||
} | |||
return oid | |||
} | |||
func doLfs(t *testing.T, content *[]byte, expectGzip bool) { | |||
prepareTestEnv(t) | |||
repo, err := models.GetRepositoryByOwnerAndName("user2", "repo1") | |||
assert.NoError(t, err) | |||
oid := storeObjectInRepo(t, repo.ID, content) | |||
defer repo.RemoveLFSMetaObjectByOid(oid) | |||
session := loginUser(t, "user2") | |||
// Request OID | |||
req := NewRequest(t, "GET", "/user2/repo1.git/info/lfs/objects/"+oid+"/test") | |||
req.Header.Set("Accept-Encoding", "gzip") | |||
resp := session.MakeRequest(t, req, http.StatusOK) | |||
contentEncoding := resp.Header().Get("Content-Encoding") | |||
if !expectGzip || !setting.EnableGzip { | |||
assert.NotContains(t, contentEncoding, "gzip") | |||
result := resp.Body.Bytes() | |||
assert.Equal(t, *content, result) | |||
} else { | |||
assert.Contains(t, contentEncoding, "gzip") | |||
gzippReader, err := gzipp.NewReader(resp.Body) | |||
assert.NoError(t, err) | |||
result, err := ioutil.ReadAll(gzippReader) | |||
assert.NoError(t, err) | |||
assert.Equal(t, *content, result) | |||
} | |||
} | |||
func TestGetLFSSmall(t *testing.T) { | |||
content := []byte("A very small file\n") | |||
doLfs(t, &content, false) | |||
} | |||
func TestGetLFSLarge(t *testing.T) { | |||
content := make([]byte, gzip.MinSize*10) | |||
for i := range content { | |||
content[i] = byte(i % 256) | |||
} | |||
doLfs(t, &content, true) | |||
} | |||
func TestGetLFSGzip(t *testing.T) { | |||
b := make([]byte, gzip.MinSize*10) | |||
for i := range b { | |||
b[i] = byte(i % 256) | |||
} | |||
outputBuffer := bytes.NewBuffer([]byte{}) | |||
gzippWriter := gzipp.NewWriter(outputBuffer) | |||
gzippWriter.Write(b) | |||
gzippWriter.Close() | |||
content := outputBuffer.Bytes() | |||
doLfs(t, &content, false) | |||
} | |||
func TestGetLFSZip(t *testing.T) { | |||
b := make([]byte, gzip.MinSize*10) | |||
for i := range b { | |||
b[i] = byte(i % 256) | |||
} | |||
outputBuffer := bytes.NewBuffer([]byte{}) | |||
zipWriter := zip.NewWriter(outputBuffer) | |||
fileWriter, err := zipWriter.Create("default") | |||
assert.NoError(t, err) | |||
fileWriter.Write(b) | |||
zipWriter.Close() | |||
content := outputBuffer.Bytes() | |||
doLfs(t, &content, false) | |||
} |
@@ -30,6 +30,7 @@ LFS_CONTENT_PATH = data/lfs-sqlite | |||
OFFLINE_MODE = false | |||
LFS_JWT_SECRET = Tv_MjmZuHqpIY6GFl12ebgkRAMt4RlWt0v4EHKSXO0w | |||
APP_DATA_PATH = integrations/gitea-integration-sqlite/data | |||
ENABLE_GZIP = true | |||
[mailer] | |||
ENABLED = false | |||
@@ -44,20 +44,20 @@ const ( | |||
func NewLFSMetaObject(m *LFSMetaObject) (*LFSMetaObject, error) { | |||
var err error | |||
has, err := x.Get(m) | |||
sess := x.NewSession() | |||
defer sess.Close() | |||
if err = sess.Begin(); err != nil { | |||
return nil, err | |||
} | |||
has, err := sess.Get(m) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if has { | |||
m.Existing = true | |||
return m, nil | |||
} | |||
sess := x.NewSession() | |||
defer sess.Close() | |||
if err = sess.Begin(); err != nil { | |||
return nil, err | |||
return m, sess.Commit() | |||
} | |||
if _, err = sess.Insert(m); err != nil { | |||
@@ -0,0 +1,327 @@ | |||
// Copyright 2019 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package gzip | |||
import ( | |||
"bufio" | |||
"fmt" | |||
"io" | |||
"net" | |||
"net/http" | |||
"regexp" | |||
"strconv" | |||
"strings" | |||
"sync" | |||
"github.com/klauspost/compress/gzip" | |||
"gopkg.in/macaron.v1" | |||
) | |||
const ( | |||
acceptEncodingHeader = "Accept-Encoding" | |||
contentEncodingHeader = "Content-Encoding" | |||
contentLengthHeader = "Content-Length" | |||
contentTypeHeader = "Content-Type" | |||
rangeHeader = "Range" | |||
varyHeader = "Vary" | |||
) | |||
const ( | |||
// MinSize is the minimum size of content we will compress | |||
MinSize = 1400 | |||
) | |||
// noopClosers are io.Writers with a shim to prevent early closure | |||
type noopCloser struct { | |||
io.Writer | |||
} | |||
func (noopCloser) Close() error { return nil } | |||
// WriterPool is a gzip writer pool to reduce workload on creation of | |||
// gzip writers | |||
type WriterPool struct { | |||
pool sync.Pool | |||
compressionLevel int | |||
} | |||
// NewWriterPool creates a new pool | |||
func NewWriterPool(compressionLevel int) *WriterPool { | |||
return &WriterPool{pool: sync.Pool{ | |||
// New will return nil, we'll manage the creation of new | |||
// writers in the middleware | |||
New: func() interface{} { return nil }, | |||
}, | |||
compressionLevel: compressionLevel} | |||
} | |||
// Get a writer from the pool - or create one if not available | |||
func (wp *WriterPool) Get(rw macaron.ResponseWriter) *gzip.Writer { | |||
ret := wp.pool.Get() | |||
if ret == nil { | |||
ret, _ = gzip.NewWriterLevel(rw, wp.compressionLevel) | |||
} else { | |||
ret.(*gzip.Writer).Reset(rw) | |||
} | |||
return ret.(*gzip.Writer) | |||
} | |||
// Put returns a writer to the pool | |||
func (wp *WriterPool) Put(w *gzip.Writer) { | |||
wp.pool.Put(w) | |||
} | |||
var writerPool WriterPool | |||
var regex regexp.Regexp | |||
// Options represents the configuration for the gzip middleware | |||
type Options struct { | |||
CompressionLevel int | |||
} | |||
func validateCompressionLevel(level int) bool { | |||
return level == gzip.DefaultCompression || | |||
level == gzip.ConstantCompression || | |||
(level >= gzip.BestSpeed && level <= gzip.BestCompression) | |||
} | |||
func validate(options []Options) Options { | |||
// Default to level 4 compression (Best results seem to be between 4 and 6) | |||
opt := Options{CompressionLevel: 4} | |||
if len(options) > 0 { | |||
opt = options[0] | |||
} | |||
if !validateCompressionLevel(opt.CompressionLevel) { | |||
opt.CompressionLevel = 4 | |||
} | |||
return opt | |||
} | |||
// Middleware creates a macaron.Handler to proxy the response | |||
func Middleware(options ...Options) macaron.Handler { | |||
opt := validate(options) | |||
writerPool = *NewWriterPool(opt.CompressionLevel) | |||
regex := regexp.MustCompile(`bytes=(\d+)\-.*`) | |||
return func(ctx *macaron.Context) { | |||
// If the client won't accept gzip or x-gzip don't compress | |||
if !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "gzip") && | |||
!strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "x-gzip") { | |||
return | |||
} | |||
// If the client is asking for a specific range of bytes - don't compress | |||
if rangeHdr := ctx.Req.Header.Get(rangeHeader); rangeHdr != "" { | |||
match := regex.FindStringSubmatch(rangeHdr) | |||
if match != nil && len(match) > 1 { | |||
return | |||
} | |||
} | |||
// OK we should proxy the response writer | |||
// We are still not necessarily going to compress... | |||
proxyWriter := &ProxyResponseWriter{ | |||
ResponseWriter: ctx.Resp, | |||
} | |||
defer proxyWriter.Close() | |||
ctx.Resp = proxyWriter | |||
ctx.MapTo(proxyWriter, (*http.ResponseWriter)(nil)) | |||
// Check if render middleware has been registered, | |||
// if yes, we need to modify ResponseWriter for it as well. | |||
if _, ok := ctx.Render.(*macaron.DummyRender); !ok { | |||
ctx.Render.SetResponseWriter(proxyWriter) | |||
} | |||
ctx.Next() | |||
} | |||
} | |||
// ProxyResponseWriter is a wrapped macaron ResponseWriter that may compress its contents | |||
type ProxyResponseWriter struct { | |||
writer io.WriteCloser | |||
macaron.ResponseWriter | |||
stopped bool | |||
code int | |||
buf []byte | |||
} | |||
// Write appends data to the proxied gzip writer. | |||
func (proxy *ProxyResponseWriter) Write(b []byte) (int, error) { | |||
// if writer is initialized, use the writer | |||
if proxy.writer != nil { | |||
return proxy.writer.Write(b) | |||
} | |||
proxy.buf = append(proxy.buf, b...) | |||
var ( | |||
contentLength, _ = strconv.Atoi(proxy.Header().Get(contentLengthHeader)) | |||
contentType = proxy.Header().Get(contentTypeHeader) | |||
contentEncoding = proxy.Header().Get(contentEncodingHeader) | |||
) | |||
// OK if an encoding hasn't been chosen, and content length > 1400 | |||
// and content type isn't a compressed type | |||
if contentEncoding == "" && | |||
(contentLength == 0 || contentLength >= MinSize) && | |||
(contentType == "" || !compressedContentType(contentType)) { | |||
// If current buffer is less than the min size and a Content-Length isn't set, then wait | |||
if len(proxy.buf) < MinSize && contentLength == 0 { | |||
return len(b), nil | |||
} | |||
// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue. | |||
if contentLength >= MinSize || len(proxy.buf) >= MinSize { | |||
// if we don't know the content type, infer it | |||
if contentType == "" { | |||
contentType = http.DetectContentType(proxy.buf) | |||
proxy.Header().Set(contentTypeHeader, contentType) | |||
} | |||
// If the Content-Type is not compressed - Compress! | |||
if !compressedContentType(contentType) { | |||
if err := proxy.startGzip(); err != nil { | |||
return 0, err | |||
} | |||
return len(b), nil | |||
} | |||
} | |||
} | |||
// If we got here, we should not GZIP this response. | |||
if err := proxy.startPlain(); err != nil { | |||
return 0, err | |||
} | |||
return len(b), nil | |||
} | |||
func (proxy *ProxyResponseWriter) startGzip() error { | |||
// Set the content-encoding and vary headers. | |||
proxy.Header().Set(contentEncodingHeader, "gzip") | |||
proxy.Header().Set(varyHeader, acceptEncodingHeader) | |||
// if the Content-Length is already set, then calls to Write on gzip | |||
// will fail to set the Content-Length header since its already set | |||
// See: https://github.com/golang/go/issues/14975. | |||
proxy.Header().Del(contentLengthHeader) | |||
// Write the header to gzip response. | |||
if proxy.code != 0 { | |||
proxy.ResponseWriter.WriteHeader(proxy.code) | |||
// Ensure that no other WriteHeader's happen | |||
proxy.code = 0 | |||
} | |||
// Initialize and flush the buffer into the gzip response if there are any bytes. | |||
// If there aren't any, we shouldn't initialize it yet because on Close it will | |||
// write the gzip header even if nothing was ever written. | |||
if len(proxy.buf) > 0 { | |||
// Initialize the GZIP response. | |||
proxy.writer = writerPool.Get(proxy.ResponseWriter) | |||
return proxy.writeBuf() | |||
} | |||
return nil | |||
} | |||
func (proxy *ProxyResponseWriter) startPlain() error { | |||
if proxy.code != 0 { | |||
proxy.ResponseWriter.WriteHeader(proxy.code) | |||
proxy.code = 0 | |||
} | |||
proxy.stopped = true | |||
proxy.writer = noopCloser{proxy.ResponseWriter} | |||
return proxy.writeBuf() | |||
} | |||
func (proxy *ProxyResponseWriter) writeBuf() error { | |||
if proxy.buf == nil { | |||
return nil | |||
} | |||
n, err := proxy.writer.Write(proxy.buf) | |||
// This should never happen (per io.Writer docs), but if the write didn't | |||
// accept the entire buffer but returned no specific error, we have no clue | |||
// what's going on, so abort just to be safe. | |||
if err == nil && n < len(proxy.buf) { | |||
err = io.ErrShortWrite | |||
} | |||
proxy.buf = nil | |||
return err | |||
} | |||
// WriteHeader will ensure that we have setup the writer before we write the header | |||
func (proxy *ProxyResponseWriter) WriteHeader(code int) { | |||
if proxy.code == 0 { | |||
proxy.code = code | |||
} | |||
} | |||
// Close the writer | |||
func (proxy *ProxyResponseWriter) Close() error { | |||
if proxy.stopped { | |||
return nil | |||
} | |||
if proxy.writer == nil { | |||
err := proxy.startPlain() | |||
if err != nil { | |||
err = fmt.Errorf("GzipMiddleware: write to regular responseWriter at close gets error: %q", err.Error()) | |||
} | |||
} | |||
err := proxy.writer.Close() | |||
if poolWriter, ok := proxy.writer.(*gzip.Writer); ok { | |||
writerPool.Put(poolWriter) | |||
} | |||
proxy.writer = nil | |||
proxy.stopped = true | |||
return err | |||
} | |||
// Flush the writer | |||
func (proxy *ProxyResponseWriter) Flush() { | |||
if proxy.writer == nil { | |||
return | |||
} | |||
if gw, ok := proxy.writer.(*gzip.Writer); ok { | |||
gw.Flush() | |||
} | |||
proxy.ResponseWriter.Flush() | |||
} | |||
// Hijack implements http.Hijacker. If the underlying ResponseWriter is a | |||
// Hijacker, its Hijack method is returned. Otherwise an error is returned. | |||
func (proxy *ProxyResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) { | |||
hijacker, ok := proxy.ResponseWriter.(http.Hijacker) | |||
if !ok { | |||
return nil, nil, fmt.Errorf("the ResponseWriter doesn't support the Hijacker interface") | |||
} | |||
return hijacker.Hijack() | |||
} | |||
// verify Hijacker interface implementation | |||
var _ http.Hijacker = &ProxyResponseWriter{} | |||
func compressedContentType(contentType string) bool { | |||
switch contentType { | |||
case "application/zip": | |||
return true | |||
case "application/x-gzip": | |||
return true | |||
case "application/gzip": | |||
return true | |||
default: | |||
return false | |||
} | |||
} |
@@ -0,0 +1,131 @@ | |||
// Copyright 2019 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package gzip | |||
import ( | |||
"archive/zip" | |||
"bytes" | |||
"io/ioutil" | |||
"net/http" | |||
"net/http/httptest" | |||
"testing" | |||
gzipp "github.com/klauspost/compress/gzip" | |||
"github.com/stretchr/testify/assert" | |||
macaron "gopkg.in/macaron.v1" | |||
) | |||
func setup(sampleResponse []byte) (*macaron.Macaron, *[]byte) { | |||
m := macaron.New() | |||
m.Use(Middleware()) | |||
m.Get("/", func() *[]byte { return &sampleResponse }) | |||
return m, &sampleResponse | |||
} | |||
func reqNoAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte) { | |||
// Request without accept gzip: Should not gzip | |||
resp := httptest.NewRecorder() | |||
req, err := http.NewRequest("GET", "/", nil) | |||
assert.NoError(t, err) | |||
m.ServeHTTP(resp, req) | |||
_, ok := resp.HeaderMap[contentEncodingHeader] | |||
assert.False(t, ok) | |||
contentEncoding := resp.Header().Get(contentEncodingHeader) | |||
assert.NotContains(t, contentEncoding, "gzip") | |||
result := resp.Body.Bytes() | |||
assert.Equal(t, *sampleResponse, result) | |||
} | |||
func reqAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte, expectGzip bool) { | |||
// Request without accept gzip: Should not gzip | |||
resp := httptest.NewRecorder() | |||
req, err := http.NewRequest("GET", "/", nil) | |||
assert.NoError(t, err) | |||
req.Header.Set(acceptEncodingHeader, "gzip") | |||
m.ServeHTTP(resp, req) | |||
_, ok := resp.HeaderMap[contentEncodingHeader] | |||
assert.Equal(t, ok, expectGzip) | |||
contentEncoding := resp.Header().Get(contentEncodingHeader) | |||
if expectGzip { | |||
assert.Contains(t, contentEncoding, "gzip") | |||
gzippReader, err := gzipp.NewReader(resp.Body) | |||
assert.NoError(t, err) | |||
result, err := ioutil.ReadAll(gzippReader) | |||
assert.NoError(t, err) | |||
assert.Equal(t, *sampleResponse, result) | |||
} else { | |||
assert.NotContains(t, contentEncoding, "gzip") | |||
result := resp.Body.Bytes() | |||
assert.Equal(t, *sampleResponse, result) | |||
} | |||
} | |||
func TestMiddlewareSmall(t *testing.T) { | |||
m, sampleResponse := setup([]byte("Small response")) | |||
reqNoAcceptGzip(t, m, sampleResponse) | |||
reqAcceptGzip(t, m, sampleResponse, false) | |||
} | |||
func TestMiddlewareLarge(t *testing.T) { | |||
b := make([]byte, MinSize+1) | |||
for i := range b { | |||
b[i] = byte(i % 256) | |||
} | |||
m, sampleResponse := setup(b) | |||
reqNoAcceptGzip(t, m, sampleResponse) | |||
// This should be gzipped as we accept gzip | |||
reqAcceptGzip(t, m, sampleResponse, true) | |||
} | |||
func TestMiddlewareGzip(t *testing.T) { | |||
b := make([]byte, MinSize*10) | |||
for i := range b { | |||
b[i] = byte(i % 256) | |||
} | |||
outputBuffer := bytes.NewBuffer([]byte{}) | |||
gzippWriter := gzipp.NewWriter(outputBuffer) | |||
gzippWriter.Write(b) | |||
gzippWriter.Flush() | |||
gzippWriter.Close() | |||
output := outputBuffer.Bytes() | |||
m, sampleResponse := setup(output) | |||
reqNoAcceptGzip(t, m, sampleResponse) | |||
// This should not be gzipped even though we accept gzip | |||
reqAcceptGzip(t, m, sampleResponse, false) | |||
} | |||
func TestMiddlewareZip(t *testing.T) { | |||
b := make([]byte, MinSize*10) | |||
for i := range b { | |||
b[i] = byte(i % 256) | |||
} | |||
outputBuffer := bytes.NewBuffer([]byte{}) | |||
zipWriter := zip.NewWriter(outputBuffer) | |||
fileWriter, err := zipWriter.Create("default") | |||
assert.NoError(t, err) | |||
fileWriter.Write(b) | |||
//fileWriter.Close() | |||
zipWriter.Close() | |||
output := outputBuffer.Bytes() | |||
m, sampleResponse := setup(output) | |||
reqNoAcceptGzip(t, m, sampleResponse) | |||
// This should not be gzipped even though we accept gzip | |||
reqAcceptGzip(t, m, sampleResponse, false) | |||
} |
@@ -14,6 +14,7 @@ import ( | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/auth" | |||
"code.gitea.io/gitea/modules/context" | |||
"code.gitea.io/gitea/modules/gzip" | |||
"code.gitea.io/gitea/modules/lfs" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/metrics" | |||
@@ -36,7 +37,6 @@ import ( | |||
"github.com/go-macaron/cache" | |||
"github.com/go-macaron/captcha" | |||
"github.com/go-macaron/csrf" | |||
"github.com/go-macaron/gzip" | |||
"github.com/go-macaron/i18n" | |||
"github.com/go-macaron/session" | |||
"github.com/go-macaron/toolbox" | |||
@@ -54,7 +54,7 @@ func NewMacaron() *macaron.Macaron { | |||
} | |||
m.Use(macaron.Recovery()) | |||
if setting.EnableGzip { | |||
m.Use(gzip.Gziper()) | |||
m.Use(gzip.Middleware()) | |||
} | |||
if setting.Protocol == setting.FCGI { | |||
m.SetURLPrefix(setting.AppSubURL) | |||