| @@ -96,7 +96,7 @@ require ( | |||
| github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 | |||
| github.com/urfave/cli v1.20.0 | |||
| github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 | |||
| github.com/yuin/goldmark v1.1.19 | |||
| github.com/yuin/goldmark v1.1.23 | |||
| go.etcd.io/bbolt v1.3.3 // indirect | |||
| golang.org/x/crypto v0.0.0-20200221231518-2aa609cf4a9d | |||
| golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa | |||
| @@ -574,8 +574,8 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q | |||
| github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= | |||
| github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 h1:HsIQ6yAjfjQ3IxPGrTusxp6Qxn92gNVq2x5CbvQvx3w= | |||
| github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53/go.mod h1:f6elajwZV+xceiaqgRL090YzLEDGSbqr3poGL3ZgXYo= | |||
| github.com/yuin/goldmark v1.1.19 h1:0s2/60x0XsFCXHeFut+F3azDVAAyIMyUfJRbRexiTYs= | |||
| github.com/yuin/goldmark v1.1.19/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= | |||
| github.com/yuin/goldmark v1.1.23 h1:eTodJ8hwEUvwXhb9qxQNuL/q1d+xMQClrXR4mdvV7gs= | |||
| github.com/yuin/goldmark v1.1.23/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= | |||
| github.com/ziutek/mymysql v1.5.4 h1:GB0qdRGsTwQSBVYuVShFBKaXSnSnYYC2d9knnE1LHFs= | |||
| github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0= | |||
| go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= | |||
| @@ -6,48 +6,48 @@ goldmark | |||
| [](https://coveralls.io/github/yuin/goldmark) | |||
| [](https://goreportcard.com/report/github.com/yuin/goldmark) | |||
| > A Markdown parser written in Go. Easy to extend, standard compliant, well structured. | |||
| > A Markdown parser written in Go. Easy to extend, standards-compliant, well-structured. | |||
| goldmark is compliant with CommonMark 0.29. | |||
| Motivation | |||
| ---------------------- | |||
| I need a Markdown parser for Go that meets following conditions: | |||
| I needed a Markdown parser for Go that satisfies the following requirements: | |||
| - Easy to extend. | |||
| - Markdown is poor in document expressions compared with other light markup languages like reStructuredText. | |||
| - Markdown is poor in document expressions compared to other light markup languages such as reStructuredText. | |||
| - We have extensions to the Markdown syntax, e.g. PHP Markdown Extra, GitHub Flavored Markdown. | |||
| - Standard compliant. | |||
| - Standards-compliant. | |||
| - Markdown has many dialects. | |||
| - GitHub Flavored Markdown is widely used and it is based on CommonMark aside from whether CommonMark is good specification or not. | |||
| - CommonMark is too complicated and hard to implement. | |||
| - Well structured. | |||
| - AST based, and preserves source position of nodes. | |||
| - GitHub-Flavored Markdown is widely used and is based upon CommonMark, effectively mooting the question of whether or not CommonMark is an ideal specification. | |||
| - CommonMark is complicated and hard to implement. | |||
| - Well-structured. | |||
| - AST-based; preserves source position of nodes. | |||
| - Written in pure Go. | |||
| [golang-commonmark](https://gitlab.com/golang-commonmark/markdown) may be a good choice, but it seems to be a copy of [markdown-it](https://github.com/markdown-it). | |||
| [blackfriday.v2](https://github.com/russross/blackfriday/tree/v2) is a fast and widely used implementation, but it is not CommonMark compliant and cannot be extended from outside of the package since its AST uses structs instead of interfaces. | |||
| [blackfriday.v2](https://github.com/russross/blackfriday/tree/v2) is a fast and widely-used implementation, but is not CommonMark-compliant and cannot be extended from outside of the package, since its AST uses structs instead of interfaces. | |||
| Furthermore, its behavior differs from other implementations in some cases, especially regarding lists: ([Deep nested lists don't output correctly #329](https://github.com/russross/blackfriday/issues/329), [List block cannot have a second line #244](https://github.com/russross/blackfriday/issues/244), etc). | |||
| Furthermore, its behavior differs from other implementations in some cases, especially regarding lists: [Deep nested lists don't output correctly #329](https://github.com/russross/blackfriday/issues/329), [List block cannot have a second line #244](https://github.com/russross/blackfriday/issues/244), etc. | |||
| This behavior sometimes causes problems. If you migrate your Markdown text to blackfriday-based wikis from GitHub, many lists will immediately be broken. | |||
| This behavior sometimes causes problems. If you migrate your Markdown text from GitHub to blackfriday-based wikis, many lists will immediately be broken. | |||
| As mentioned above, CommonMark is too complicated and hard to implement, so Markdown parsers based on CommonMark barely exist. | |||
| As mentioned above, CommonMark is complicated and hard to implement, so Markdown parsers based on CommonMark are few and far between. | |||
| Features | |||
| ---------------------- | |||
| - **Standard compliant.** goldmark gets full compliance with the latest CommonMark spec. | |||
| - **Standards-compliant.** goldmark is fully compliant with the latest [CommonMark](https://commonmark.org/) specification. | |||
| - **Extensible.** Do you want to add a `@username` mention syntax to Markdown? | |||
| You can easily do it in goldmark. You can add your AST nodes, | |||
| parsers for block level elements, parsers for inline level elements, | |||
| transformers for paragraphs, transformers for whole AST structure, and | |||
| You can easily do so in goldmark. You can add your AST nodes, | |||
| parsers for block-level elements, parsers for inline-level elements, | |||
| transformers for paragraphs, transformers for the whole AST structure, and | |||
| renderers. | |||
| - **Performance.** goldmark performs pretty much equally to cmark, | |||
| - **Performance.** goldmark's performance is on par with that of cmark, | |||
| the CommonMark reference implementation written in C. | |||
| - **Robust.** goldmark is tested with [go-fuzz](https://github.com/dvyukov/go-fuzz), a fuzz testing tool. | |||
| - **Builtin extensions.** goldmark ships with common extensions like tables, strikethrough, | |||
| - **Built-in extensions.** goldmark ships with common extensions like tables, strikethrough, | |||
| task lists, and definition lists. | |||
| - **Depends only on standard libraries.** | |||
| @@ -62,15 +62,15 @@ Usage | |||
| ---------------------- | |||
| Import packages: | |||
| ``` | |||
| ```go | |||
| import ( | |||
| "bytes" | |||
| "github.com/yuin/goldmark" | |||
| "bytes" | |||
| "github.com/yuin/goldmark" | |||
| ) | |||
| ``` | |||
| Convert Markdown documents with the CommonMark compliant mode: | |||
| Convert Markdown documents with the CommonMark-compliant mode: | |||
| ```go | |||
| var buf bytes.Buffer | |||
| @@ -105,11 +105,11 @@ Custom parser and renderer | |||
| -------------------------- | |||
| ```go | |||
| import ( | |||
| "bytes" | |||
| "github.com/yuin/goldmark" | |||
| "github.com/yuin/goldmark/extension" | |||
| "github.com/yuin/goldmark/parser" | |||
| "github.com/yuin/goldmark/renderer/html" | |||
| "bytes" | |||
| "github.com/yuin/goldmark" | |||
| "github.com/yuin/goldmark/extension" | |||
| "github.com/yuin/goldmark/parser" | |||
| "github.com/yuin/goldmark/renderer/html" | |||
| ) | |||
| md := goldmark.New( | |||
| @@ -128,6 +128,14 @@ if err := md.Convert(source, &buf); err != nil { | |||
| } | |||
| ``` | |||
| | Functional option | Type | Description | | |||
| | ----------------- | ---- | ----------- | | |||
| | `goldmark.WithParser` | `parser.Parser` | This option must be passed before `goldmark.WithParserOptions` and `goldmark.WithExtensions` | | |||
| | `goldmark.WithRenderer` | `renderer.Renderer` | This option must be passed before `goldmark.WithRendererOptions` and `goldmark.WithExtensions` | | |||
| | `goldmark.WithParserOptions` | `...parser.Option` | | | |||
| | `goldmark.WithRendererOptions` | `...renderer.Option` | | | |||
| | `goldmark.WithExtensions` | `...goldmark.Extender` | | | |||
| Parser and Renderer options | |||
| ------------------------------ | |||
| @@ -147,33 +155,33 @@ Parser and Renderer options | |||
| | Functional option | Type | Description | | |||
| | ----------------- | ---- | ----------- | | |||
| | `html.WithWriter` | `html.Writer` | `html.Writer` for writing contents to an `io.Writer`. | | |||
| | `html.WithHardWraps` | `-` | Render new lines as `<br>`.| | |||
| | `html.WithHardWraps` | `-` | Render newlines as `<br>`.| | |||
| | `html.WithXHTML` | `-` | Render as XHTML. | | |||
| | `html.WithUnsafe` | `-` | By default, goldmark does not render raw HTML and potentially dangerous links. With this option, goldmark renders these contents as written. | | |||
| | `html.WithUnsafe` | `-` | By default, goldmark does not render raw HTML or potentially dangerous links. With this option, goldmark renders such content as written. | | |||
| ### Built-in extensions | |||
| - `extension.Table` | |||
| - [GitHub Flavored Markdown: Tables](https://github.github.com/gfm/#tables-extension-) | |||
| - [GitHub Flavored Markdown: Tables](https://github.github.com/gfm/#tables-extension-) | |||
| - `extension.Strikethrough` | |||
| - [GitHub Flavored Markdown: Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) | |||
| - [GitHub Flavored Markdown: Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) | |||
| - `extension.Linkify` | |||
| - [GitHub Flavored Markdown: Autolinks](https://github.github.com/gfm/#autolinks-extension-) | |||
| - [GitHub Flavored Markdown: Autolinks](https://github.github.com/gfm/#autolinks-extension-) | |||
| - `extension.TaskList` | |||
| - [GitHub Flavored Markdown: Task list items](https://github.github.com/gfm/#task-list-items-extension-) | |||
| - [GitHub Flavored Markdown: Task list items](https://github.github.com/gfm/#task-list-items-extension-) | |||
| - `extension.GFM` | |||
| - This extension enables Table, Strikethrough, Linkify and TaskList. | |||
| - This extension does not filter tags defined in [6.11: Disallowed Raw HTML (extension)](https://github.github.com/gfm/#disallowed-raw-html-extension-). | |||
| If you need to filter HTML tags, see [Security](#security) | |||
| - This extension enables Table, Strikethrough, Linkify and TaskList. | |||
| - This extension does not filter tags defined in [6.11: Disallowed Raw HTML (extension)](https://github.github.com/gfm/#disallowed-raw-html-extension-). | |||
| If you need to filter HTML tags, see [Security](#security). | |||
| - `extension.DefinitionList` | |||
| - [PHP Markdown Extra: Definition lists](https://michelf.ca/projects/php-markdown/extra/#def-list) | |||
| - [PHP Markdown Extra: Definition lists](https://michelf.ca/projects/php-markdown/extra/#def-list) | |||
| - `extension.Footnote` | |||
| - [PHP Markdown Extra: Footnotes](https://michelf.ca/projects/php-markdown/extra/#footnotes) | |||
| - [PHP Markdown Extra: Footnotes](https://michelf.ca/projects/php-markdown/extra/#footnotes) | |||
| - `extension.Typographer` | |||
| - This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/). | |||
| - This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/). | |||
| ### Attributes | |||
| `parser.WithAttribute` option allows you to define attributes on some elements. | |||
| The `parser.WithAttribute` option allows you to define attributes on some elements. | |||
| Currently only headings support attributes. | |||
| @@ -197,7 +205,7 @@ heading {#id .className attrName=attrValue} | |||
| ### Typographer extension | |||
| Typographer extension translates plain ASCII punctuation characters into typographic punctuation HTML entities. | |||
| The Typographer extension translates plain ASCII punctuation characters into typographic-punctuation HTML entities. | |||
| Default substitutions are: | |||
| @@ -211,25 +219,65 @@ Default substitutions are: | |||
| | `<<` | `«` | | |||
| | `>>` | `»` | | |||
| You can overwrite the substitutions by `extensions.WithTypographicSubstitutions`. | |||
| You can override the defualt substitutions via `extensions.WithTypographicSubstitutions`: | |||
| ```go | |||
| markdown := goldmark.New( | |||
| goldmark.WithExtensions( | |||
| extension.NewTypographer( | |||
| extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ | |||
| extension.LeftSingleQuote: []byte("‚"), | |||
| extension.RightSingleQuote: nil, // nil disables a substitution | |||
| }), | |||
| ), | |||
| ), | |||
| goldmark.WithExtensions( | |||
| extension.NewTypographer( | |||
| extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ | |||
| extension.LeftSingleQuote: []byte("‚"), | |||
| extension.RightSingleQuote: nil, // nil disables a substitution | |||
| }), | |||
| ), | |||
| ), | |||
| ) | |||
| ``` | |||
| ### Linkify extension | |||
| The Linkify extension implements [Autolinks(extension)](https://github.github.com/gfm/#autolinks-extension-), as | |||
| defined in [GitHub Flavored Markdown Spec](https://github.github.com/gfm/). | |||
| Since the spec does not define details about URLs, there are numerous ambiguous cases. | |||
| You can override autolinking patterns via options. | |||
| | Functional option | Type | Description | | |||
| | ----------------- | ---- | ----------- | | |||
| | `extension.WithLinkifyAllowedProtocols` | `[][]byte` | List of allowed protocols such as `[][]byte{ []byte("http:") }` | | |||
| | `extension.WithLinkifyURLRegexp` | `*regexp.Regexp` | Regexp that defines URLs, including protocols | | |||
| | `extension.WithLinkifyWWWRegexp` | `*regexp.Regexp` | Regexp that defines URL starting with `www.`. This pattern corresponds to [the extended www autolink](https://github.github.com/gfm/#extended-www-autolink) | | |||
| | `extension.WithLinkifyEmailRegexp` | `*regexp.Regexp` | Regexp that defines email addresses` | | |||
| Example, using [xurls](https://github.com/mvdan/xurls): | |||
| ```go | |||
| import "mvdan.cc/xurls/v2" | |||
| markdown := goldmark.New( | |||
| goldmark.WithRendererOptions( | |||
| html.WithXHTML(), | |||
| html.WithUnsafe(), | |||
| ), | |||
| goldmark.WithExtensions( | |||
| extension.NewLinkify( | |||
| extension.WithLinkifyAllowedProtocols([][]byte{ | |||
| []byte("http:"), | |||
| []byte("https:"), | |||
| }), | |||
| extension.WithLinkifyURLRegexp( | |||
| xurls.Strict(), | |||
| ), | |||
| ), | |||
| ), | |||
| ) | |||
| ``` | |||
| Security | |||
| -------------------- | |||
| By default, goldmark does not render raw HTML and potentially dangerous URLs. | |||
| If you need to gain more control over untrusted contents, it is recommended to | |||
| By default, goldmark does not render raw HTML or potentially-dangerous URLs. | |||
| If you need to gain more control over untrusted contents, it is recommended that you | |||
| use an HTML sanitizer such as [bluemonday](https://github.com/microcosm-cc/bluemonday). | |||
| Benchmark | |||
| @@ -238,11 +286,10 @@ You can run this benchmark in the `_benchmark` directory. | |||
| ### against other golang libraries | |||
| blackfriday v2 seems to be fastest, but it is not CommonMark compliant, so the performance of | |||
| blackfriday v2 cannot simply be compared with that of the other CommonMark compliant libraries. | |||
| blackfriday v2 seems to be the fastest, but as it is not CommonMark compliant, its performance cannot be directly compared to that of the CommonMark-compliant libraries. | |||
| Though goldmark builds clean extensible AST structure and get full compliance with | |||
| CommonMark, it is reasonably fast and has lower memory consumption. | |||
| goldmark, meanwhile, builds a clean, extensible AST structure, achieves full compliance with | |||
| CommonMark, and consumes less memory, all while being reasonably fast. | |||
| ``` | |||
| goos: darwin | |||
| @@ -268,21 +315,21 @@ iteration: 50 | |||
| average: 0.0040964230 sec | |||
| ``` | |||
| As you can see, goldmark performs pretty much equally to cmark. | |||
| As you can see, goldmark's performance is on par with cmark's. | |||
| Extensions | |||
| -------------------- | |||
| - [goldmark-meta](https://github.com/yuin/goldmark-meta): A YAML metadata | |||
| extension for the goldmark Markdown parser. | |||
| - [goldmark-highlighting](https://github.com/yuin/goldmark-highlighting): A Syntax highlighting extension | |||
| - [goldmark-highlighting](https://github.com/yuin/goldmark-highlighting): A syntax-highlighting extension | |||
| for the goldmark markdown parser. | |||
| - [goldmark-mathjax](https://github.com/litao91/goldmark-mathjax): Mathjax support for goldmark markdown parser | |||
| - [goldmark-mathjax](https://github.com/litao91/goldmark-mathjax): Mathjax support for the goldmark markdown parser | |||
| goldmark internal(for extension developers) | |||
| ---------------------------------------------- | |||
| ### Overview | |||
| goldmark's Markdown processing is outlined as a bellow diagram. | |||
| goldmark's Markdown processing is outlined in the diagram below. | |||
| ``` | |||
| <Markdown in []byte, parser.Context> | |||
| @@ -313,10 +360,11 @@ goldmark's Markdown processing is outlined as a bellow diagram. | |||
| ### Parsing | |||
| Markdown documents are read through `text.Reader` interface. | |||
| AST nodes do not have concrete text. AST nodes have segment information of the documents. It is represented by `text.Segment` . | |||
| AST nodes do not have concrete text. AST nodes have segment information of the documents, represented by `text.Segment` . | |||
| `text.Segment` has 3 attributes: `Start`, `End`, `Padding` . | |||
| (TBC) | |||
| **TODO** | |||
| @@ -236,10 +236,12 @@ func (n *BaseNode) RemoveChild(self, v Node) { | |||
| // RemoveChildren implements Node.RemoveChildren . | |||
| func (n *BaseNode) RemoveChildren(self Node) { | |||
| for c := n.firstChild; c != nil; c = c.NextSibling() { | |||
| for c := n.firstChild; c != nil; { | |||
| c.SetParent(nil) | |||
| c.SetPreviousSibling(nil) | |||
| next := c.NextSibling() | |||
| c.SetNextSibling(nil) | |||
| c = next | |||
| } | |||
| n.firstChild = nil | |||
| n.lastChild = nil | |||
| @@ -466,20 +468,25 @@ type Walker func(n Node, entering bool) (WalkStatus, error) | |||
| // Walk walks a AST tree by the depth first search algorithm. | |||
| func Walk(n Node, walker Walker) error { | |||
| _, err := walkHelper(n, walker) | |||
| return err | |||
| } | |||
| func walkHelper(n Node, walker Walker) (WalkStatus, error) { | |||
| status, err := walker(n, true) | |||
| if err != nil || status == WalkStop { | |||
| return err | |||
| return status, err | |||
| } | |||
| if status != WalkSkipChildren { | |||
| for c := n.FirstChild(); c != nil; c = c.NextSibling() { | |||
| if err = Walk(c, walker); err != nil { | |||
| return err | |||
| if st, err := walkHelper(c, walker); err != nil || st == WalkStop { | |||
| return WalkStop, err | |||
| } | |||
| } | |||
| } | |||
| status, err = walker(n, false) | |||
| if err != nil || status == WalkStop { | |||
| return err | |||
| return WalkStop, err | |||
| } | |||
| return nil | |||
| return WalkContinue, nil | |||
| } | |||
| @@ -303,11 +303,11 @@ func NewBlockquote() *Blockquote { | |||
| } | |||
| } | |||
| // A List structr represents a list of Markdown text. | |||
| // A List struct represents a list of Markdown text. | |||
| type List struct { | |||
| BaseBlock | |||
| // Marker is a markar character like '-', '+', ')' and '.'. | |||
| // Marker is a marker character like '-', '+', ')' and '.'. | |||
| Marker byte | |||
| // IsTight is a true if this list is a 'tight' list. | |||
| @@ -364,7 +364,7 @@ func NewList(marker byte) *List { | |||
| type ListItem struct { | |||
| BaseBlock | |||
| // Offset is an offset potision of this item. | |||
| // Offset is an offset position of this item. | |||
| Offset int | |||
| } | |||
| @@ -170,7 +170,7 @@ func NewText() *Text { | |||
| } | |||
| } | |||
| // NewTextSegment returns a new Text node with the given source potision. | |||
| // NewTextSegment returns a new Text node with the given source position. | |||
| func NewTextSegment(v textm.Segment) *Text { | |||
| return &Text{ | |||
| BaseInline: BaseInline{}, | |||
| @@ -467,7 +467,7 @@ type AutoLink struct { | |||
| // Inline implements Inline.Inline. | |||
| func (n *AutoLink) Inline() {} | |||
| // Dump implenets Node.Dump | |||
| // Dump implements Node.Dump | |||
| func (n *AutoLink) Dump(source []byte, level int) { | |||
| segment := n.value.Segment | |||
| m := map[string]string{ | |||
| @@ -11,7 +11,7 @@ type TaskCheckBox struct { | |||
| IsChecked bool | |||
| } | |||
| // Dump impelemtns Node.Dump. | |||
| // Dump implements Node.Dump. | |||
| func (n *TaskCheckBox) Dump(source []byte, level int) { | |||
| m := map[string]string{ | |||
| "Checked": fmt.Sprintf("%v", n.IsChecked), | |||
| @@ -2,27 +2,153 @@ package extension | |||
| import ( | |||
| "bytes" | |||
| "regexp" | |||
| "github.com/yuin/goldmark" | |||
| "github.com/yuin/goldmark/ast" | |||
| "github.com/yuin/goldmark/parser" | |||
| "github.com/yuin/goldmark/text" | |||
| "github.com/yuin/goldmark/util" | |||
| "regexp" | |||
| ) | |||
| var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) | |||
| var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) | |||
| var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) | |||
| var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) | |||
| type linkifyParser struct { | |||
| // An LinkifyConfig struct is a data structure that holds configuration of the | |||
| // Linkify extension. | |||
| type LinkifyConfig struct { | |||
| AllowedProtocols [][]byte | |||
| URLRegexp *regexp.Regexp | |||
| WWWRegexp *regexp.Regexp | |||
| EmailRegexp *regexp.Regexp | |||
| } | |||
| const optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols" | |||
| const optLinkifyURLRegexp parser.OptionName = "LinkifyURLRegexp" | |||
| const optLinkifyWWWRegexp parser.OptionName = "LinkifyWWWRegexp" | |||
| const optLinkifyEmailRegexp parser.OptionName = "LinkifyEmailRegexp" | |||
| // SetOption implements SetOptioner. | |||
| func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) { | |||
| switch name { | |||
| case optLinkifyAllowedProtocols: | |||
| c.AllowedProtocols = value.([][]byte) | |||
| case optLinkifyURLRegexp: | |||
| c.URLRegexp = value.(*regexp.Regexp) | |||
| case optLinkifyWWWRegexp: | |||
| c.WWWRegexp = value.(*regexp.Regexp) | |||
| case optLinkifyEmailRegexp: | |||
| c.EmailRegexp = value.(*regexp.Regexp) | |||
| } | |||
| } | |||
| // A LinkifyOption interface sets options for the LinkifyOption. | |||
| type LinkifyOption interface { | |||
| parser.Option | |||
| SetLinkifyOption(*LinkifyConfig) | |||
| } | |||
| type withLinkifyAllowedProtocols struct { | |||
| value [][]byte | |||
| } | |||
| func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) { | |||
| c.Options[optLinkifyAllowedProtocols] = o.value | |||
| } | |||
| func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) { | |||
| p.AllowedProtocols = o.value | |||
| } | |||
| // WithLinkifyAllowedProtocols is a functional option that specify allowed | |||
| // protocols in autolinks. Each protocol must end with ':' like | |||
| // 'http:' . | |||
| func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption { | |||
| return &withLinkifyAllowedProtocols{ | |||
| value: value, | |||
| } | |||
| } | |||
| type withLinkifyURLRegexp struct { | |||
| value *regexp.Regexp | |||
| } | |||
| func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) { | |||
| c.Options[optLinkifyURLRegexp] = o.value | |||
| } | |||
| func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) { | |||
| p.URLRegexp = o.value | |||
| } | |||
| // WithLinkifyURLRegexp is a functional option that specify | |||
| // a pattern of the URL including a protocol. | |||
| func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption { | |||
| return &withLinkifyURLRegexp{ | |||
| value: value, | |||
| } | |||
| } | |||
| // WithLinkifyWWWRegexp is a functional option that specify | |||
| // a pattern of the URL without a protocol. | |||
| // This pattern must start with 'www.' . | |||
| type withLinkifyWWWRegexp struct { | |||
| value *regexp.Regexp | |||
| } | |||
| var defaultLinkifyParser = &linkifyParser{} | |||
| func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) { | |||
| c.Options[optLinkifyWWWRegexp] = o.value | |||
| } | |||
| func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) { | |||
| p.WWWRegexp = o.value | |||
| } | |||
| func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption { | |||
| return &withLinkifyWWWRegexp{ | |||
| value: value, | |||
| } | |||
| } | |||
| // WithLinkifyWWWRegexp is a functional otpion that specify | |||
| // a pattern of the email address. | |||
| type withLinkifyEmailRegexp struct { | |||
| value *regexp.Regexp | |||
| } | |||
| func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) { | |||
| c.Options[optLinkifyEmailRegexp] = o.value | |||
| } | |||
| func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) { | |||
| p.EmailRegexp = o.value | |||
| } | |||
| func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption { | |||
| return &withLinkifyEmailRegexp{ | |||
| value: value, | |||
| } | |||
| } | |||
| type linkifyParser struct { | |||
| LinkifyConfig | |||
| } | |||
| // NewLinkifyParser return a new InlineParser can parse | |||
| // text that seems like a URL. | |||
| func NewLinkifyParser() parser.InlineParser { | |||
| return defaultLinkifyParser | |||
| func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser { | |||
| p := &linkifyParser{ | |||
| LinkifyConfig: LinkifyConfig{ | |||
| AllowedProtocols: nil, | |||
| URLRegexp: urlRegexp, | |||
| WWWRegexp: wwwURLRegxp, | |||
| }, | |||
| } | |||
| for _, o := range opts { | |||
| o.SetLinkifyOption(&p.LinkifyConfig) | |||
| } | |||
| return p | |||
| } | |||
| func (s *linkifyParser) Trigger() []byte { | |||
| @@ -53,14 +179,26 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont | |||
| var m []int | |||
| var protocol []byte | |||
| var typ ast.AutoLinkType = ast.AutoLinkURL | |||
| if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { | |||
| m = urlRegexp.FindSubmatchIndex(line) | |||
| if s.LinkifyConfig.AllowedProtocols == nil { | |||
| if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { | |||
| m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line) | |||
| } | |||
| } else { | |||
| for _, prefix := range s.LinkifyConfig.AllowedProtocols { | |||
| if bytes.HasPrefix(line, prefix) { | |||
| m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line) | |||
| break | |||
| } | |||
| } | |||
| } | |||
| if m == nil && bytes.HasPrefix(line, domainWWW) { | |||
| m = wwwURLRegxp.FindSubmatchIndex(line) | |||
| m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line) | |||
| protocol = []byte("http") | |||
| } | |||
| if m != nil { | |||
| if m != nil && m[0] != 0 { | |||
| m = nil | |||
| } | |||
| if m != nil && m[0] == 0 { | |||
| lastChar := line[m[1]-1] | |||
| if lastChar == '.' { | |||
| m[1]-- | |||
| @@ -96,7 +234,15 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont | |||
| return nil | |||
| } | |||
| typ = ast.AutoLinkEmail | |||
| stop := util.FindEmailIndex(line) | |||
| stop := -1 | |||
| if s.LinkifyConfig.EmailRegexp == nil { | |||
| stop = util.FindEmailIndex(line) | |||
| } else { | |||
| m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line) | |||
| if m != nil && m[0] == 0 { | |||
| stop = m[1] | |||
| } | |||
| } | |||
| if stop < 0 { | |||
| return nil | |||
| } | |||
| @@ -136,15 +282,22 @@ func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) { | |||
| } | |||
| type linkify struct { | |||
| options []LinkifyOption | |||
| } | |||
| // Linkify is an extension that allow you to parse text that seems like a URL. | |||
| var Linkify = &linkify{} | |||
| func NewLinkify(opts ...LinkifyOption) goldmark.Extender { | |||
| return &linkify{ | |||
| options: opts, | |||
| } | |||
| } | |||
| func (e *linkify) Extend(m goldmark.Markdown) { | |||
| m.Parser().AddOptions( | |||
| parser.WithInlineParsers( | |||
| util.Prioritized(NewLinkifyParser(), 999), | |||
| util.Prioritized(NewLinkifyParser(e.options...), 999), | |||
| ), | |||
| ) | |||
| } | |||
| @@ -27,7 +27,7 @@ type tableParagraphTransformer struct { | |||
| var defaultTableParagraphTransformer = &tableParagraphTransformer{} | |||
| // NewTableParagraphTransformer returns a new ParagraphTransformer | |||
| // that can transform pargraphs into tables. | |||
| // that can transform paragraphs into tables. | |||
| func NewTableParagraphTransformer() parser.ParagraphTransformer { | |||
| return defaultTableParagraphTransformer | |||
| } | |||
| @@ -1,6 +1,8 @@ | |||
| package extension | |||
| import ( | |||
| "unicode" | |||
| "github.com/yuin/goldmark" | |||
| gast "github.com/yuin/goldmark/ast" | |||
| "github.com/yuin/goldmark/parser" | |||
| @@ -31,6 +33,8 @@ const ( | |||
| LeftAngleQuote | |||
| // RightAngleQuote is >> | |||
| RightAngleQuote | |||
| // Apostrophe is ' | |||
| Apostrophe | |||
| typographicPunctuationMax | |||
| ) | |||
| @@ -52,6 +56,7 @@ func newDefaultSubstitutions() [][]byte { | |||
| replacements[Ellipsis] = []byte("…") | |||
| replacements[LeftAngleQuote] = []byte("«") | |||
| replacements[RightAngleQuote] = []byte("»") | |||
| replacements[Apostrophe] = []byte("’") | |||
| return replacements | |||
| } | |||
| @@ -189,6 +194,26 @@ func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser | |||
| return nil | |||
| } | |||
| if c == '\'' { | |||
| if s.Substitutions[Apostrophe] != nil { | |||
| // Handle decade abbrevations such as '90s | |||
| if d.CanOpen && !d.CanClose && len(line) > 3 && util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' { | |||
| after := util.ToRune(line, 4) | |||
| if len(line) == 3 || unicode.IsSpace(after) || unicode.IsPunct(after) { | |||
| node := gast.NewString(s.Substitutions[Apostrophe]) | |||
| node.SetCode(true) | |||
| block.Advance(1) | |||
| return node | |||
| } | |||
| } | |||
| // Convert normal apostrophes. This is probably more flexible than necessary but | |||
| // converts any apostrophe in between two alphanumerics. | |||
| if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) && (util.IsAlphaNumeric(line[1])) { | |||
| node := gast.NewString(s.Substitutions[Apostrophe]) | |||
| node.SetCode(true) | |||
| block.Advance(1) | |||
| return node | |||
| } | |||
| } | |||
| if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose { | |||
| node := gast.NewString(s.Substitutions[LeftSingleQuote]) | |||
| node.SetCode(true) | |||
| @@ -228,10 +253,10 @@ type typographer struct { | |||
| options []TypographerOption | |||
| } | |||
| // Typographer is an extension that repalace punctuations with typographic entities. | |||
| // Typographer is an extension that replaces punctuations with typographic entities. | |||
| var Typographer = &typographer{} | |||
| // NewTypographer returns a new Entender that repalace punctuations with typographic entities. | |||
| // NewTypographer returns a new Extender that replaces punctuations with typographic entities. | |||
| func NewTypographer(opts ...TypographerOption) goldmark.Extender { | |||
| return &typographer{ | |||
| options: opts, | |||
| @@ -11,7 +11,7 @@ import ( | |||
| ) | |||
| // A DelimiterProcessor interface provides a set of functions about | |||
| // Deliiter nodes. | |||
| // Delimiter nodes. | |||
| type DelimiterProcessor interface { | |||
| // IsDelimiter returns true if given character is a delimiter, otherwise false. | |||
| IsDelimiter(byte) bool | |||
| @@ -38,7 +38,7 @@ type Delimiter struct { | |||
| // See https://spec.commonmark.org/0.29/#can-open-emphasis for details. | |||
| CanClose bool | |||
| // Length is a remaining length of this delmiter. | |||
| // Length is a remaining length of this delimiter. | |||
| Length int | |||
| // OriginalLength is a original length of this delimiter. | |||
| @@ -147,11 +147,6 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N | |||
| ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) | |||
| return nil | |||
| } | |||
| labelValue := block.Value(text.NewSegment(last.Segment.Start+1, segment.Start)) | |||
| if util.IsBlank(labelValue) && !last.IsImage { | |||
| ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) | |||
| return nil | |||
| } | |||
| c := block.Peek() | |||
| l, pos := block.Position() | |||
| @@ -351,14 +346,31 @@ func parseLinkTitle(block text.Reader) ([]byte, bool) { | |||
| if opener == '(' { | |||
| closer = ')' | |||
| } | |||
| line, _ := block.PeekLine() | |||
| pos := util.FindClosure(line[1:], opener, closer, false, true) | |||
| if pos < 0 { | |||
| return nil, false | |||
| savedLine, savedPosition := block.Position() | |||
| var title []byte | |||
| for i := 0; ; i++ { | |||
| line, _ := block.PeekLine() | |||
| if line == nil { | |||
| block.SetPosition(savedLine, savedPosition) | |||
| return nil, false | |||
| } | |||
| offset := 0 | |||
| if i == 0 { | |||
| offset = 1 | |||
| } | |||
| pos := util.FindClosure(line[offset:], opener, closer, false, true) | |||
| if pos < 0 { | |||
| title = append(title, line[offset:]...) | |||
| block.AdvanceLine() | |||
| continue | |||
| } | |||
| pos += offset + 1 // 1: closer | |||
| block.Advance(pos) | |||
| if i == 0 { // avoid allocating new slice | |||
| return line[offset : pos-1], true | |||
| } | |||
| return append(title, line[offset:pos-1]...), true | |||
| } | |||
| pos += 2 // opener + closer | |||
| block.Advance(pos) | |||
| return line[1 : pos-1], true | |||
| } | |||
| func (s *linkParser) CloseBlock(parent ast.Node, block text.Reader, pc Context) { | |||
| @@ -459,7 +459,7 @@ type Parser interface { | |||
| // Parse parses the given Markdown text into AST nodes. | |||
| Parse(reader text.Reader, opts ...ParseOption) ast.Node | |||
| // AddOption adds the given option to thie parser. | |||
| // AddOption adds the given option to this parser. | |||
| AddOptions(...Option) | |||
| } | |||
| @@ -505,7 +505,7 @@ type BlockParser interface { | |||
| // Close will be called when the parser returns Close. | |||
| Close(node ast.Node, reader text.Reader, pc Context) | |||
| // CanInterruptParagraph returns true if the parser can interrupt pargraphs, | |||
| // CanInterruptParagraph returns true if the parser can interrupt paragraphs, | |||
| // otherwise false. | |||
| CanInterruptParagraph() bool | |||
| @@ -660,13 +660,13 @@ func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter) | |||
| } | |||
| } | |||
| // A Writer interface wirtes textual contents to a writer. | |||
| // A Writer interface writes textual contents to a writer. | |||
| type Writer interface { | |||
| // Write writes the given source to writer with resolving references and unescaping | |||
| // backslash escaped characters. | |||
| Write(writer util.BufWriter, source []byte) | |||
| // RawWrite wirtes the given source to writer without resolving references and | |||
| // RawWrite writes the given source to writer without resolving references and | |||
| // unescaping backslash escaped characters. | |||
| RawWrite(writer util.BufWriter, source []byte) | |||
| } | |||
| @@ -7,7 +7,7 @@ import ( | |||
| var space = []byte(" ") | |||
| // A Segment struct holds information about source potisions. | |||
| // A Segment struct holds information about source positions. | |||
| type Segment struct { | |||
| // Start is a start position of the segment. | |||
| Start int | |||
| @@ -197,7 +197,7 @@ func (s *Segments) Sliced(lo, hi int) []Segment { | |||
| return s.values[lo:hi] | |||
| } | |||
| // Clear delete all element of the collction. | |||
| // Clear delete all element of the collection. | |||
| func (s *Segments) Clear() { | |||
| s.values = nil | |||
| } | |||
| @@ -8,7 +8,6 @@ import ( | |||
| "regexp" | |||
| "sort" | |||
| "strconv" | |||
| "strings" | |||
| "unicode/utf8" | |||
| ) | |||
| @@ -55,7 +54,7 @@ func (b *CopyOnWriteBuffer) IsCopied() bool { | |||
| return b.copied | |||
| } | |||
| // IsEscapedPunctuation returns true if caracter at a given index i | |||
| // IsEscapedPunctuation returns true if character at a given index i | |||
| // is an escaped punctuation, otherwise false. | |||
| func IsEscapedPunctuation(source []byte, i int) bool { | |||
| return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1]) | |||
| @@ -229,7 +228,7 @@ func IndentWidth(bs []byte, currentPos int) (width, pos int) { | |||
| return | |||
| } | |||
| // FirstNonSpacePosition returns a potisoin line that is a first nonspace | |||
| // FirstNonSpacePosition returns a position line that is a first nonspace | |||
| // character. | |||
| func FirstNonSpacePosition(bs []byte) int { | |||
| i := 0 | |||
| @@ -387,6 +386,52 @@ func TrimRightSpace(source []byte) []byte { | |||
| return TrimRight(source, spaces) | |||
| } | |||
| // DoFullUnicodeCaseFolding performs full unicode case folding to given bytes. | |||
| func DoFullUnicodeCaseFolding(v []byte) []byte { | |||
| var rbuf []byte | |||
| cob := NewCopyOnWriteBuffer(v) | |||
| n := 0 | |||
| for i := 0; i < len(v); i++ { | |||
| c := v[i] | |||
| if c < 0xb5 { | |||
| if c >= 0x41 && c <= 0x5a { | |||
| // A-Z to a-z | |||
| cob.Write(v[n:i]) | |||
| cob.WriteByte(c + 32) | |||
| n = i + 1 | |||
| } | |||
| continue | |||
| } | |||
| if !utf8.RuneStart(c) { | |||
| continue | |||
| } | |||
| r, length := utf8.DecodeRune(v[i:]) | |||
| if r == utf8.RuneError { | |||
| continue | |||
| } | |||
| folded, ok := unicodeCaseFoldings[r] | |||
| if !ok { | |||
| continue | |||
| } | |||
| cob.Write(v[n:i]) | |||
| if rbuf == nil { | |||
| rbuf = make([]byte, 4) | |||
| } | |||
| for _, f := range folded { | |||
| l := utf8.EncodeRune(rbuf, f) | |||
| cob.Write(rbuf[:l]) | |||
| } | |||
| i += length - 1 | |||
| n = i + 1 | |||
| } | |||
| if cob.IsCopied() { | |||
| cob.Write(v[n:]) | |||
| } | |||
| return cob.Bytes() | |||
| } | |||
| // ReplaceSpaces replaces sequence of spaces with the given repl. | |||
| func ReplaceSpaces(source []byte, repl byte) []byte { | |||
| var ret []byte | |||
| @@ -439,13 +484,14 @@ func ToValidRune(v rune) rune { | |||
| return v | |||
| } | |||
| // ToLinkReference convert given bytes into a valid link reference string. | |||
| // ToLinkReference trims leading and trailing spaces and convert into lower | |||
| // ToLinkReference converts given bytes into a valid link reference string. | |||
| // ToLinkReference performs unicode case folding, trims leading and trailing spaces, converts into lower | |||
| // case and replace spaces with a single space character. | |||
| func ToLinkReference(v []byte) string { | |||
| v = TrimLeftSpace(v) | |||
| v = TrimRightSpace(v) | |||
| return strings.ToLower(string(ReplaceSpaces(v, ' '))) | |||
| v = DoFullUnicodeCaseFolding(v) | |||
| return string(ReplaceSpaces(v, ' ')) | |||
| } | |||
| var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} | |||
| @@ -589,7 +635,7 @@ var htmlSpace = []byte("%20") | |||
| // 2. resolve numeric references | |||
| // 3. resolve entity references | |||
| // | |||
| // URL encoded values (%xx) are keeped as is. | |||
| // URL encoded values (%xx) are kept as is. | |||
| func URLEscape(v []byte, resolveReference bool) []byte { | |||
| if resolveReference { | |||
| v = UnescapePunctuations(v) | |||
| @@ -450,7 +450,7 @@ github.com/willf/bitset | |||
| github.com/xanzy/ssh-agent | |||
| # github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 | |||
| github.com/yohcop/openid-go | |||
| # github.com/yuin/goldmark v1.1.19 | |||
| # github.com/yuin/goldmark v1.1.23 | |||
| github.com/yuin/goldmark | |||
| github.com/yuin/goldmark/ast | |||
| github.com/yuin/goldmark/extension | |||