* Support custom sanitization policy Allowing the gitea administrator to configure sanitization policy allows them to couple external renders and custom templates to support more markup. In particular, the `pandoc` renderer allows generating KaTeX annotations, wrapping them in `<span>` elements with class `math` and either `inline` or `display` (depending on whether or not inline or block mode was requested). This iteration gives the administrator whitelisting powers; carefully crafted regexes will thus let through only the desired attributes necessary to support their custom markup. Resolves: #9054 Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Document new sanitization configuration - Adds basic documentation to app.ini.sample, - Adds an example to the Configuration Cheat Sheet, and - Adds extended information to External Renderers section. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Drop extraneous length check in newMarkupSanitizer(...) Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Fix plural ELEMENT and ALLOW_ATTR in docs These were left over from their initial names. Make them singular to conform with the current expectations. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>tags/v1.21.12.1
| @@ -877,6 +877,12 @@ SHOW_FOOTER_VERSION = true | |||||
| ; Show template execution time in the footer | ; Show template execution time in the footer | ||||
| SHOW_FOOTER_TEMPLATE_LOAD_TIME = true | SHOW_FOOTER_TEMPLATE_LOAD_TIME = true | ||||
| [markup.sanitizer] | |||||
| ; The following keys can be used multiple times to define sanitation policy rules. | |||||
| ;ELEMENT = span | |||||
| ;ALLOW_ATTR = class | |||||
| ;REGEXP = ^(info|warning|error)$ | |||||
| [markup.asciidoc] | [markup.asciidoc] | ||||
| ENABLED = false | ENABLED = false | ||||
| ; List of file extensions that should be rendered by an external command | ; List of file extensions that should be rendered by an external command | ||||
| @@ -578,6 +578,24 @@ Two special environment variables are passed to the render command: | |||||
| - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links. | - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links. | ||||
| - `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths. | - `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths. | ||||
| Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. | |||||
| ```ini | |||||
| [markup.sanitizer] | |||||
| ; Pandoc renders TeX segments as <span>s with the "math" class, optionally | |||||
| ; with "inline" or "display" classes depending on context. | |||||
| ELEMENT = span | |||||
| ALLOW_ATTR = class | |||||
| REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ | |||||
| ``` | |||||
| - `ELEMENT`: The element this policy applies to. Must be non-empty. | |||||
| - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty. | |||||
| - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute. | |||||
| You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. | |||||
| ## Time (`time`) | ## Time (`time`) | ||||
| - `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05 | - `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05 | ||||
| @@ -68,4 +68,22 @@ RENDER_COMMAND = rst2html.py | |||||
| IS_INPUT_FILE = false | IS_INPUT_FILE = false | ||||
| ``` | ``` | ||||
| If your external markup relies on additional classes and attributes on the generated HTML elements, you might need to enable custom sanitizer policies. Gitea uses the [`bluemonday`](https://godoc.org/github.com/microcosm-cc/bluemonday) package as our HTML sanitizier. The example below will support [KaTeX](https://katex.org/) output from [`pandoc`](https://pandoc.org/). | |||||
| ```ini | |||||
| [markup.sanitizer] | |||||
| ; Pandoc renders TeX segments as <span>s with the "math" class, optionally | |||||
| ; with "inline" or "display" classes depending on context. | |||||
| ELEMENT = span | |||||
| ALLOW_ATTR = class | |||||
| REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ | |||||
| [markup.markdown] | |||||
| ENABLED = true | |||||
| FILE_EXTENSIONS = .md,.markdown | |||||
| RENDER_COMMAND = pandoc -f markdown -t html --katex | |||||
| ``` | |||||
| You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. All three must be defined, but `REGEXP` may be blank to allow unconditional whitelisting of that attribute. | |||||
| Once your configuration changes have been made, restart Gitea to have changes take effect. | Once your configuration changes have been made, restart Gitea to have changes take effect. | ||||
| @@ -50,6 +50,15 @@ func ReplaceSanitizer() { | |||||
| // Allow <kbd> tags for keyboard shortcut styling | // Allow <kbd> tags for keyboard shortcut styling | ||||
| sanitizer.policy.AllowElements("kbd") | sanitizer.policy.AllowElements("kbd") | ||||
| // Custom keyword markup | |||||
| for _, rule := range setting.ExternalSanitizerRules { | |||||
| if rule.Regexp != nil { | |||||
| sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) | |||||
| } else { | |||||
| sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) | |||||
| } | |||||
| } | |||||
| } | } | ||||
| // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. | // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. | ||||
| @@ -9,11 +9,14 @@ import ( | |||||
| "strings" | "strings" | ||||
| "code.gitea.io/gitea/modules/log" | "code.gitea.io/gitea/modules/log" | ||||
| "gopkg.in/ini.v1" | |||||
| ) | ) | ||||
| // ExternalMarkupParsers represents the external markup parsers | // ExternalMarkupParsers represents the external markup parsers | ||||
| var ( | var ( | ||||
| ExternalMarkupParsers []MarkupParser | |||||
| ExternalMarkupParsers []MarkupParser | |||||
| ExternalSanitizerRules []MarkupSanitizerRule | |||||
| ) | ) | ||||
| // MarkupParser defines the external parser configured in ini | // MarkupParser defines the external parser configured in ini | ||||
| @@ -25,8 +28,15 @@ type MarkupParser struct { | |||||
| IsInputFile bool | IsInputFile bool | ||||
| } | } | ||||
| // MarkupSanitizerRule defines the policy for whitelisting attributes on | |||||
| // certain elements. | |||||
| type MarkupSanitizerRule struct { | |||||
| Element string | |||||
| AllowAttr string | |||||
| Regexp *regexp.Regexp | |||||
| } | |||||
| func newMarkup() { | func newMarkup() { | ||||
| extensionReg := regexp.MustCompile(`\.\w`) | |||||
| for _, sec := range Cfg.Section("markup").ChildSections() { | for _, sec := range Cfg.Section("markup").ChildSections() { | ||||
| name := strings.TrimPrefix(sec.Name(), "markup.") | name := strings.TrimPrefix(sec.Name(), "markup.") | ||||
| if name == "" { | if name == "" { | ||||
| @@ -34,33 +44,98 @@ func newMarkup() { | |||||
| continue | continue | ||||
| } | } | ||||
| extensions := sec.Key("FILE_EXTENSIONS").Strings(",") | |||||
| var exts = make([]string, 0, len(extensions)) | |||||
| for _, extension := range extensions { | |||||
| if !extensionReg.MatchString(extension) { | |||||
| log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored") | |||||
| } else { | |||||
| exts = append(exts, extension) | |||||
| } | |||||
| if name == "sanitizer" { | |||||
| newMarkupSanitizer(name, sec) | |||||
| } else { | |||||
| newMarkupRenderer(name, sec) | |||||
| } | } | ||||
| } | |||||
| } | |||||
| func newMarkupSanitizer(name string, sec *ini.Section) { | |||||
| haveElement := sec.HasKey("ELEMENT") | |||||
| haveAttr := sec.HasKey("ALLOW_ATTR") | |||||
| haveRegexp := sec.HasKey("REGEXP") | |||||
| if !haveElement && !haveAttr && !haveRegexp { | |||||
| log.Warn("Skipping empty section: markup.%s.", name) | |||||
| return | |||||
| } | |||||
| if !haveElement || !haveAttr || !haveRegexp { | |||||
| log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name) | |||||
| return | |||||
| } | |||||
| elements := sec.Key("ELEMENT").ValueWithShadows() | |||||
| allowAttrs := sec.Key("ALLOW_ATTR").ValueWithShadows() | |||||
| regexps := sec.Key("REGEXP").ValueWithShadows() | |||||
| if len(elements) != len(allowAttrs) || | |||||
| len(elements) != len(regexps) { | |||||
| log.Error("All three keys in markup.%s (ELEMENT, ALLOW_ATTR, REGEXP) must be defined the same number of times! Got %d, %d, and %d respectively.", name, len(elements), len(allowAttrs), len(regexps)) | |||||
| return | |||||
| } | |||||
| if len(exts) == 0 { | |||||
| log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored") | |||||
| ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, len(elements)) | |||||
| for index, pattern := range regexps { | |||||
| if pattern == "" { | |||||
| rule := MarkupSanitizerRule{ | |||||
| Element: elements[index], | |||||
| AllowAttr: allowAttrs[index], | |||||
| Regexp: nil, | |||||
| } | |||||
| ExternalSanitizerRules = append(ExternalSanitizerRules, rule) | |||||
| continue | continue | ||||
| } | } | ||||
| command := sec.Key("RENDER_COMMAND").MustString("") | |||||
| if command == "" { | |||||
| log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored") | |||||
| // Validate when parsing the config that this is a valid regular | |||||
| // expression. Then we can use regexp.MustCompile(...) later. | |||||
| compiled, err := regexp.Compile(pattern) | |||||
| if err != nil { | |||||
| log.Error("In module.%s: REGEXP at definition %d failed to compile: %v", name, index+1, err) | |||||
| continue | continue | ||||
| } | } | ||||
| ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{ | |||||
| Enabled: sec.Key("ENABLED").MustBool(false), | |||||
| MarkupName: name, | |||||
| FileExtensions: exts, | |||||
| Command: command, | |||||
| IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false), | |||||
| }) | |||||
| rule := MarkupSanitizerRule{ | |||||
| Element: elements[index], | |||||
| AllowAttr: allowAttrs[index], | |||||
| Regexp: compiled, | |||||
| } | |||||
| ExternalSanitizerRules = append(ExternalSanitizerRules, rule) | |||||
| } | |||||
| } | |||||
| func newMarkupRenderer(name string, sec *ini.Section) { | |||||
| extensionReg := regexp.MustCompile(`\.\w`) | |||||
| extensions := sec.Key("FILE_EXTENSIONS").Strings(",") | |||||
| var exts = make([]string, 0, len(extensions)) | |||||
| for _, extension := range extensions { | |||||
| if !extensionReg.MatchString(extension) { | |||||
| log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored") | |||||
| } else { | |||||
| exts = append(exts, extension) | |||||
| } | |||||
| } | |||||
| if len(exts) == 0 { | |||||
| log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored") | |||||
| return | |||||
| } | } | ||||
| command := sec.Key("RENDER_COMMAND").MustString("") | |||||
| if command == "" { | |||||
| log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored") | |||||
| return | |||||
| } | |||||
| ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{ | |||||
| Enabled: sec.Key("ENABLED").MustBool(false), | |||||
| MarkupName: name, | |||||
| FileExtensions: exts, | |||||
| Command: command, | |||||
| IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false), | |||||
| }) | |||||
| } | } | ||||