@@ -5,6 +5,7 @@
package charset
import (
"strings"
"testing"
"code.gitea.io/gitea/modules/setting"
@@ -12,6 +13,22 @@ import (
"github.com/stretchr/testify/assert"
)
func resetDefaultCharsetsOrder() {
defaultDetectedCharsetsOrder := make([]string, 0, len(setting.Repository.DetectedCharsetsOrder))
for _, charset := range setting.Repository.DetectedCharsetsOrder {
defaultDetectedCharsetsOrder = append(defaultDetectedCharsetsOrder, strings.ToLower(strings.TrimSpace(charset)))
}
setting.Repository.DetectedCharsetScore = map[string]int{}
i := 0
for _, charset := range defaultDetectedCharsetsOrder {
canonicalCharset := strings.ToLower(strings.TrimSpace(charset))
if _, has := setting.Repository.DetectedCharsetScore[canonicalCharset]; !has {
setting.Repository.DetectedCharsetScore[canonicalCharset] = i
i++
}
}
}
func TestRemoveBOMIfPresent(t *testing.T) {
res := RemoveBOMIfPresent([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
@@ -21,6 +38,7 @@ func TestRemoveBOMIfPresent(t *testing.T) {
}
func TestToUTF8WithErr(t *testing.T) {
resetDefaultCharsetsOrder()
var res string
var err error
@@ -76,6 +94,7 @@ func TestToUTF8WithErr(t *testing.T) {
}
func TestToUTF8WithFallback(t *testing.T) {
resetDefaultCharsetsOrder()
// "ABC"
res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43})
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -116,7 +135,7 @@ func TestToUTF8WithFallback(t *testing.T) {
}
func TestToUTF8(t *testing.T) {
resetDefaultCharsetsOrder()
// Note: golang compiler seems so behave differently depending on the current
// locale, so some conversions might behave differently. For that reason, we don't
// depend on particular conversions but in expected behaviors.
@@ -165,6 +184,7 @@ func TestToUTF8(t *testing.T) {
}
func TestToUTF8DropErrors(t *testing.T) {
resetDefaultCharsetsOrder()
// "ABC"
res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43})
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -204,6 +224,7 @@ func TestToUTF8DropErrors(t *testing.T) {
}
func TestDetectEncoding(t *testing.T) {
resetDefaultCharsetsOrder()
testSuccess := func(b []byte, expected string) {
encoding, err := DetectEncoding(b)
assert.NoError(t, err)
@@ -225,10 +246,7 @@ func TestDetectEncoding(t *testing.T) {
b = []byte{0x44, 0xe9, 0x63, 0x6f, 0x72, 0x0a}
encoding, err := DetectEncoding(b)
assert.NoError(t, err)
// due to a race condition in `chardet` library, it could either detect
// "ISO-8859-1" or "IS0-8859-2" here. Technically either is correct, so
// we accept either.
assert.Contains(t, encoding, "ISO-8859")
assert.Contains(t, encoding, "ISO-8859-1")
old := setting.Repository.AnsiCharset
setting.Repository.AnsiCharset = "placeholder"