You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

markdown.go 9.3 kB

11 years ago
11 years ago
10 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
10 years ago
10 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. // Copyright 2014 The Gogs Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package base
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "net/http"
  10. "path"
  11. "path/filepath"
  12. "regexp"
  13. "strings"
  14. "github.com/Unknwon/com"
  15. "github.com/russross/blackfriday"
  16. "golang.org/x/net/html"
  17. "github.com/gogits/gogs/modules/setting"
  18. )
  19. func isletter(c byte) bool {
  20. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
  21. }
  22. func isalnum(c byte) bool {
  23. return (c >= '0' && c <= '9') || isletter(c)
  24. }
  25. var validLinks = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
  26. func isLink(link []byte) bool {
  27. for _, prefix := range validLinks {
  28. if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
  29. return true
  30. }
  31. }
  32. return false
  33. }
  34. func IsMarkdownFile(name string) bool {
  35. name = strings.ToLower(name)
  36. switch filepath.Ext(name) {
  37. case ".md", ".markdown", ".mdown", ".mkd":
  38. return true
  39. }
  40. return false
  41. }
  42. func IsTextFile(data []byte) (string, bool) {
  43. contentType := http.DetectContentType(data)
  44. if strings.Index(contentType, "text/") != -1 {
  45. return contentType, true
  46. }
  47. return contentType, false
  48. }
  49. func IsImageFile(data []byte) (string, bool) {
  50. contentType := http.DetectContentType(data)
  51. if strings.Index(contentType, "image/") != -1 {
  52. return contentType, true
  53. }
  54. return contentType, false
  55. }
  56. // IsReadmeFile returns true if given file name suppose to be a README file.
  57. func IsReadmeFile(name string) bool {
  58. name = strings.ToLower(name)
  59. if len(name) < 6 {
  60. return false
  61. } else if len(name) == 6 {
  62. if name == "readme" {
  63. return true
  64. }
  65. return false
  66. }
  67. if name[:7] == "readme." {
  68. return true
  69. }
  70. return false
  71. }
  72. type CustomRender struct {
  73. blackfriday.Renderer
  74. urlPrefix string
  75. }
  76. func (options *CustomRender) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
  77. if len(link) > 0 && !isLink(link) {
  78. if link[0] == '#' {
  79. // link = append([]byte(options.urlPrefix), link...)
  80. } else {
  81. link = []byte(path.Join(options.urlPrefix, string(link)))
  82. }
  83. }
  84. options.Renderer.Link(out, link, title, content)
  85. }
  86. var (
  87. svgSuffix = []byte(".svg")
  88. svgSuffixWithMark = []byte(".svg?")
  89. )
  90. func (options *CustomRender) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
  91. prefix := strings.Replace(options.urlPrefix, "/src/", "/raw/", 1)
  92. if len(link) > 0 {
  93. if isLink(link) {
  94. // External link with .svg suffix usually means CI status.
  95. if bytes.HasSuffix(link, svgSuffix) || bytes.Contains(link, svgSuffixWithMark) {
  96. options.Renderer.Image(out, link, title, alt)
  97. return
  98. }
  99. } else {
  100. if link[0] != '/' {
  101. prefix += "/"
  102. }
  103. link = []byte(prefix + string(link))
  104. }
  105. }
  106. out.WriteString(`<a href="`)
  107. out.Write(link)
  108. out.WriteString(`">`)
  109. options.Renderer.Image(out, link, title, alt)
  110. out.WriteString("</a>")
  111. }
  112. var (
  113. MentionPattern = regexp.MustCompile(`(\s|^)@[0-9a-zA-Z_\.]+`)
  114. commitPattern = regexp.MustCompile(`(\s|^)https?.*commit/[0-9a-zA-Z]+(#+[0-9a-zA-Z-]*)?`)
  115. issueFullPattern = regexp.MustCompile(`(\s|^)https?.*issues/[0-9]+(#+[0-9a-zA-Z-]*)?`)
  116. issueIndexPattern = regexp.MustCompile(`( |^)#[0-9]+\b`)
  117. sha1CurrentPattern = regexp.MustCompile(`\b[0-9a-f]{40}\b`)
  118. )
  119. func cutoutVerbosePrefix(prefix string) string {
  120. count := 0
  121. for i := 0; i < len(prefix); i++ {
  122. if prefix[i] == '/' {
  123. count++
  124. }
  125. if count >= 3 {
  126. return prefix[:i]
  127. }
  128. }
  129. return prefix
  130. }
  131. func RenderIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte {
  132. urlPrefix = cutoutVerbosePrefix(urlPrefix)
  133. ms := issueIndexPattern.FindAll(rawBytes, -1)
  134. for _, m := range ms {
  135. var space string
  136. m2 := m
  137. if m2[0] == ' ' {
  138. space = " "
  139. m2 = m2[1:]
  140. }
  141. if metas == nil {
  142. rawBytes = bytes.Replace(rawBytes, m, []byte(fmt.Sprintf(`%s<a href="%s/issues/%s">%s</a>`,
  143. space, urlPrefix, m2[1:], m2)), 1)
  144. } else {
  145. // Support for external issue tracker
  146. metas["index"] = string(m2[1:])
  147. rawBytes = bytes.Replace(rawBytes, m, []byte(fmt.Sprintf(`%s<a href="%s">%s</a>`,
  148. space, com.Expand(metas["format"], metas), m2)), 1)
  149. }
  150. }
  151. return rawBytes
  152. }
  153. func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string) []byte {
  154. ms := MentionPattern.FindAll(rawBytes, -1)
  155. for _, m := range ms {
  156. m = bytes.TrimSpace(m)
  157. rawBytes = bytes.Replace(rawBytes, m,
  158. []byte(fmt.Sprintf(`<a href="%s/%s">%s</a>`, setting.AppSubUrl, m[1:], m)), -1)
  159. }
  160. rawBytes = RenderIssueIndexPattern(rawBytes, urlPrefix, metas)
  161. rawBytes = RenderSha1CurrentPattern(rawBytes, urlPrefix)
  162. return rawBytes
  163. }
  164. func RenderSha1CurrentPattern(rawBytes []byte, urlPrefix string) []byte {
  165. ms := sha1CurrentPattern.FindAll(rawBytes, -1)
  166. for _, m := range ms {
  167. rawBytes = bytes.Replace(rawBytes, m, []byte(fmt.Sprintf(
  168. `<a href="%s/commit/%s"><code>%s</code></a>`, urlPrefix, m, ShortSha(string(m)))), -1)
  169. }
  170. return rawBytes
  171. }
  172. func RenderRawMarkdown(body []byte, urlPrefix string) []byte {
  173. htmlFlags := 0
  174. htmlFlags |= blackfriday.HTML_SKIP_STYLE
  175. htmlFlags |= blackfriday.HTML_OMIT_CONTENTS
  176. renderer := &CustomRender{
  177. Renderer: blackfriday.HtmlRenderer(htmlFlags, "", ""),
  178. urlPrefix: urlPrefix,
  179. }
  180. // set up the parser
  181. extensions := 0
  182. extensions |= blackfriday.EXTENSION_NO_INTRA_EMPHASIS
  183. extensions |= blackfriday.EXTENSION_TABLES
  184. extensions |= blackfriday.EXTENSION_FENCED_CODE
  185. extensions |= blackfriday.EXTENSION_AUTOLINK
  186. extensions |= blackfriday.EXTENSION_STRIKETHROUGH
  187. extensions |= blackfriday.EXTENSION_SPACE_HEADERS
  188. extensions |= blackfriday.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK
  189. if setting.Markdown.EnableHardLineBreak {
  190. extensions |= blackfriday.EXTENSION_HARD_LINE_BREAK
  191. }
  192. body = blackfriday.Markdown(body, renderer, extensions)
  193. return body
  194. }
  195. var (
  196. leftAngleBracket = []byte("</")
  197. rightAngleBracket = []byte(">")
  198. )
  199. var noEndTags = []string{"img", "input", "br", "hr"}
  200. // PreProcessMarkdown renders full links of commits, issues and pulls to shorter version.
  201. func PreProcessMarkdown(rawHTML []byte, urlPrefix string) []byte {
  202. ms := commitPattern.FindAll(rawHTML, -1)
  203. for _, m := range ms {
  204. m = bytes.TrimSpace(m)
  205. i := strings.Index(string(m), "commit/")
  206. j := strings.Index(string(m), "#")
  207. if j == -1 {
  208. j = len(m)
  209. }
  210. rawHTML = bytes.Replace(rawHTML, m, []byte(fmt.Sprintf(
  211. ` <code><a href="%s">%s</a></code>`, m, ShortSha(string(m[i+7:j])))), -1)
  212. }
  213. ms = issueFullPattern.FindAll(rawHTML, -1)
  214. for _, m := range ms {
  215. m = bytes.TrimSpace(m)
  216. i := strings.Index(string(m), "issues/")
  217. j := strings.Index(string(m), "#")
  218. if j == -1 {
  219. j = len(m)
  220. }
  221. rawHTML = bytes.Replace(rawHTML, m, []byte(fmt.Sprintf(
  222. ` <a href="%s">#%s</a>`, m, ShortSha(string(m[i+7:j])))), -1)
  223. }
  224. return rawHTML
  225. }
  226. // PostProcessMarkdown treats different types of HTML differently,
  227. // and only renders special links for plain text blocks.
  228. func PostProcessMarkdown(rawHtml []byte, urlPrefix string, metas map[string]string) []byte {
  229. startTags := make([]string, 0, 5)
  230. var buf bytes.Buffer
  231. tokenizer := html.NewTokenizer(bytes.NewReader(rawHtml))
  232. OUTER_LOOP:
  233. for html.ErrorToken != tokenizer.Next() {
  234. token := tokenizer.Token()
  235. switch token.Type {
  236. case html.TextToken:
  237. buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas))
  238. case html.StartTagToken:
  239. buf.WriteString(token.String())
  240. tagName := token.Data
  241. // If this is an excluded tag, we skip processing all output until a close tag is encountered.
  242. if strings.EqualFold("a", tagName) || strings.EqualFold("code", tagName) || strings.EqualFold("pre", tagName) {
  243. stackNum := 1
  244. for html.ErrorToken != tokenizer.Next() {
  245. token = tokenizer.Token()
  246. // Copy the token to the output verbatim
  247. buf.WriteString(token.String())
  248. if token.Type == html.StartTagToken {
  249. stackNum++
  250. }
  251. // If this is the close tag to the outer-most, we are done
  252. if token.Type == html.EndTagToken && strings.EqualFold(tagName, token.Data) {
  253. stackNum--
  254. if stackNum == 0 {
  255. break
  256. }
  257. }
  258. }
  259. continue OUTER_LOOP
  260. }
  261. if !com.IsSliceContainsStr(noEndTags, token.Data) {
  262. startTags = append(startTags, token.Data)
  263. }
  264. case html.EndTagToken:
  265. if len(startTags) == 0 {
  266. buf.WriteString(token.String())
  267. break
  268. }
  269. buf.Write(leftAngleBracket)
  270. buf.WriteString(startTags[len(startTags)-1])
  271. buf.Write(rightAngleBracket)
  272. startTags = startTags[:len(startTags)-1]
  273. default:
  274. buf.WriteString(token.String())
  275. }
  276. }
  277. if io.EOF == tokenizer.Err() {
  278. return buf.Bytes()
  279. }
  280. // If we are not at the end of the input, then some other parsing error has occurred,
  281. // so return the input verbatim.
  282. return rawHtml
  283. }
  284. func RenderMarkdown(rawBytes []byte, urlPrefix string, metas map[string]string) []byte {
  285. result := PreProcessMarkdown(rawBytes, urlPrefix)
  286. result = RenderRawMarkdown(result, urlPrefix)
  287. result = PostProcessMarkdown(result, urlPrefix, metas)
  288. result = Sanitizer.SanitizeBytes(result)
  289. return result
  290. }
  291. func RenderMarkdownString(raw, urlPrefix string, metas map[string]string) string {
  292. return string(RenderMarkdown([]byte(raw), urlPrefix, metas))
  293. }