You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

html.go 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. // Copyright 2017 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package markup
  5. import (
  6. "bytes"
  7. "net/url"
  8. "path"
  9. "path/filepath"
  10. "regexp"
  11. "strings"
  12. "code.gitea.io/gitea/modules/base"
  13. "code.gitea.io/gitea/modules/setting"
  14. "code.gitea.io/gitea/modules/util"
  15. "github.com/Unknwon/com"
  16. "golang.org/x/net/html"
  17. "golang.org/x/net/html/atom"
  18. )
  19. // Issue name styles
  20. const (
  21. IssueNameStyleNumeric = "numeric"
  22. IssueNameStyleAlphanumeric = "alphanumeric"
  23. )
  24. var (
  25. // NOTE: All below regex matching do not perform any extra validation.
  26. // Thus a link is produced even if the linked entity does not exist.
  27. // While fast, this is also incorrect and lead to false positives.
  28. // TODO: fix invalid linking issue
  29. // mentionPattern matches all mentions in the form of "@user"
  30. mentionPattern = regexp.MustCompile(`(?:\s|^|\W)(@[0-9a-zA-Z-_\.]+)`)
  31. // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
  32. issueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)(#[0-9]+)\b`)
  33. // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
  34. issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\W)([A-Z]{1,10}-[1-9][0-9]*)\b`)
  35. // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
  36. // e.g. gogits/gogs#12345
  37. crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)\b`)
  38. // sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
  39. // Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
  40. // so that abbreviated hash links can be used as well. This matches git and github useability.
  41. sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-f]{7,40})\b`)
  42. // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
  43. shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
  44. // anySHA1Pattern allows to split url containing SHA into parts
  45. anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})/?([^#\s]+)?(?:#(\S+))?`)
  46. validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
  47. // While this email regex is definitely not perfect and I'm sure you can come up
  48. // with edge cases, it is still accepted by the CommonMark specification, as
  49. // well as the HTML5 spec:
  50. // http://spec.commonmark.org/0.28/#email-address
  51. // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
  52. emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
  53. // matches http/https links. used for autlinking those. partly modified from
  54. // the original present in autolink.js
  55. linkRegex = regexp.MustCompile(`(?:(?:http|https):\/\/(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-]*)?\??(?:[\-\+:=&;%@\.\w]*)#?(?:[\.\!\/\\\w]*))?`)
  56. )
  57. // regexp for full links to issues/pulls
  58. var issueFullPattern *regexp.Regexp
  59. // IsLink reports whether link fits valid format.
  60. func IsLink(link []byte) bool {
  61. return isLink(link)
  62. }
  63. // isLink reports whether link fits valid format.
  64. func isLink(link []byte) bool {
  65. return validLinksPattern.Match(link)
  66. }
  67. func isLinkStr(link string) bool {
  68. return validLinksPattern.MatchString(link)
  69. }
  70. func getIssueFullPattern() *regexp.Regexp {
  71. if issueFullPattern == nil {
  72. appURL := setting.AppURL
  73. if len(appURL) > 0 && appURL[len(appURL)-1] != '/' {
  74. appURL += "/"
  75. }
  76. issueFullPattern = regexp.MustCompile(appURL +
  77. `\w+/\w+/(?:issues|pulls)/((?:\w{1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b`)
  78. }
  79. return issueFullPattern
  80. }
  81. // FindAllMentions matches mention patterns in given content
  82. // and returns a list of found user names without @ prefix.
  83. func FindAllMentions(content string) []string {
  84. mentions := mentionPattern.FindAllStringSubmatch(content, -1)
  85. ret := make([]string, len(mentions))
  86. for i, val := range mentions {
  87. ret[i] = val[1][1:]
  88. }
  89. return ret
  90. }
  91. // cutoutVerbosePrefix cutouts URL prefix including sub-path to
  92. // return a clean unified string of request URL path.
  93. func cutoutVerbosePrefix(prefix string) string {
  94. if len(prefix) == 0 || prefix[0] != '/' {
  95. return prefix
  96. }
  97. count := 0
  98. for i := 0; i < len(prefix); i++ {
  99. if prefix[i] == '/' {
  100. count++
  101. }
  102. if count >= 3+setting.AppSubURLDepth {
  103. return prefix[:i]
  104. }
  105. }
  106. return prefix
  107. }
  108. // IsSameDomain checks if given url string has the same hostname as current Gitea instance
  109. func IsSameDomain(s string) bool {
  110. if strings.HasPrefix(s, "/") {
  111. return true
  112. }
  113. if uapp, err := url.Parse(setting.AppURL); err == nil {
  114. if u, err := url.Parse(s); err == nil {
  115. return u.Host == uapp.Host
  116. }
  117. return false
  118. }
  119. return false
  120. }
  121. type postProcessError struct {
  122. context string
  123. err error
  124. }
  125. func (p *postProcessError) Error() string {
  126. return "PostProcess: " + p.context + ", " + p.Error()
  127. }
  128. type processor func(ctx *postProcessCtx, node *html.Node)
  129. var defaultProcessors = []processor{
  130. mentionProcessor,
  131. shortLinkProcessor,
  132. fullIssuePatternProcessor,
  133. issueIndexPatternProcessor,
  134. crossReferenceIssueIndexPatternProcessor,
  135. fullSha1PatternProcessor,
  136. sha1CurrentPatternProcessor,
  137. emailAddressProcessor,
  138. linkProcessor,
  139. }
  140. type postProcessCtx struct {
  141. metas map[string]string
  142. urlPrefix string
  143. isWikiMarkdown bool
  144. // processors used by this context.
  145. procs []processor
  146. // if set to true, when an <a> is found, instead of just returning during
  147. // visitNode, it will recursively visit the node exclusively running
  148. // shortLinkProcessorFull with true.
  149. visitLinksForShortLinks bool
  150. }
  151. // PostProcess does the final required transformations to the passed raw HTML
  152. // data, and ensures its validity. Transformations include: replacing links and
  153. // emails with HTML links, parsing shortlinks in the format of [[Link]], like
  154. // MediaWiki, linking issues in the format #ID, and mentions in the format
  155. // @user, and others.
  156. func PostProcess(
  157. rawHTML []byte,
  158. urlPrefix string,
  159. metas map[string]string,
  160. isWikiMarkdown bool,
  161. ) ([]byte, error) {
  162. // create the context from the parameters
  163. ctx := &postProcessCtx{
  164. metas: metas,
  165. urlPrefix: urlPrefix,
  166. isWikiMarkdown: isWikiMarkdown,
  167. procs: defaultProcessors,
  168. visitLinksForShortLinks: true,
  169. }
  170. return ctx.postProcess(rawHTML)
  171. }
  172. var commitMessageProcessors = []processor{
  173. mentionProcessor,
  174. fullIssuePatternProcessor,
  175. issueIndexPatternProcessor,
  176. crossReferenceIssueIndexPatternProcessor,
  177. fullSha1PatternProcessor,
  178. sha1CurrentPatternProcessor,
  179. emailAddressProcessor,
  180. linkProcessor,
  181. }
  182. // RenderCommitMessage will use the same logic as PostProcess, but will disable
  183. // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is
  184. // set, which changes every text node into a link to the passed default link.
  185. func RenderCommitMessage(
  186. rawHTML []byte,
  187. urlPrefix, defaultLink string,
  188. metas map[string]string,
  189. ) ([]byte, error) {
  190. ctx := &postProcessCtx{
  191. metas: metas,
  192. urlPrefix: urlPrefix,
  193. procs: commitMessageProcessors,
  194. }
  195. if defaultLink != "" {
  196. // we don't have to fear data races, because being
  197. // commitMessageProcessors of fixed len and cap, every time we append
  198. // something to it the slice is realloc+copied, so append always
  199. // generates the slice ex-novo.
  200. ctx.procs = append(ctx.procs, genDefaultLinkProcessor(defaultLink))
  201. }
  202. return ctx.postProcess(rawHTML)
  203. }
  204. var byteBodyTag = []byte("<body>")
  205. var byteBodyTagClosing = []byte("</body>")
  206. func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
  207. if ctx.procs == nil {
  208. ctx.procs = defaultProcessors
  209. }
  210. // give a generous extra 50 bytes
  211. res := make([]byte, 0, len(rawHTML)+50)
  212. res = append(res, byteBodyTag...)
  213. res = append(res, rawHTML...)
  214. res = append(res, byteBodyTagClosing...)
  215. // parse the HTML
  216. nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
  217. if err != nil {
  218. return nil, &postProcessError{"invalid HTML", err}
  219. }
  220. for _, node := range nodes {
  221. ctx.visitNode(node)
  222. }
  223. // Create buffer in which the data will be placed again. We know that the
  224. // length will be at least that of res; to spare a few alloc+copy, we
  225. // reuse res, resetting its length to 0.
  226. buf := bytes.NewBuffer(res[:0])
  227. // Render everything to buf.
  228. for _, node := range nodes {
  229. err = html.Render(buf, node)
  230. if err != nil {
  231. return nil, &postProcessError{"error rendering processed HTML", err}
  232. }
  233. }
  234. // remove initial parts - because Render creates a whole HTML page.
  235. res = buf.Bytes()
  236. res = res[bytes.Index(res, byteBodyTag)+len(byteBodyTag) : bytes.LastIndex(res, byteBodyTagClosing)]
  237. // Everything done successfully, return parsed data.
  238. return res, nil
  239. }
  240. func (ctx *postProcessCtx) visitNode(node *html.Node) {
  241. // We ignore code, pre and already generated links.
  242. switch node.Type {
  243. case html.TextNode:
  244. ctx.textNode(node)
  245. case html.ElementNode:
  246. if node.Data == "a" || node.Data == "code" || node.Data == "pre" {
  247. if node.Data == "a" && ctx.visitLinksForShortLinks {
  248. ctx.visitNodeForShortLinks(node)
  249. }
  250. return
  251. }
  252. for n := node.FirstChild; n != nil; n = n.NextSibling {
  253. ctx.visitNode(n)
  254. }
  255. }
  256. // ignore everything else
  257. }
  258. func (ctx *postProcessCtx) visitNodeForShortLinks(node *html.Node) {
  259. switch node.Type {
  260. case html.TextNode:
  261. shortLinkProcessorFull(ctx, node, true)
  262. case html.ElementNode:
  263. if node.Data == "code" || node.Data == "pre" {
  264. return
  265. }
  266. for n := node.FirstChild; n != nil; n = n.NextSibling {
  267. ctx.visitNodeForShortLinks(n)
  268. }
  269. }
  270. }
  271. // textNode runs the passed node through various processors, in order to handle
  272. // all kinds of special links handled by the post-processing.
  273. func (ctx *postProcessCtx) textNode(node *html.Node) {
  274. for _, processor := range ctx.procs {
  275. processor(ctx, node)
  276. }
  277. }
  278. func createLink(href, content string) *html.Node {
  279. textNode := &html.Node{
  280. Type: html.TextNode,
  281. Data: content,
  282. }
  283. linkNode := &html.Node{
  284. FirstChild: textNode,
  285. LastChild: textNode,
  286. Type: html.ElementNode,
  287. Data: "a",
  288. DataAtom: atom.A,
  289. Attr: []html.Attribute{
  290. {Key: "href", Val: href},
  291. },
  292. }
  293. textNode.Parent = linkNode
  294. return linkNode
  295. }
  296. // replaceContent takes a text node, and in its content it replaces a section of
  297. // it with the specified newNode. An example to visualize how this can work can
  298. // be found here: https://play.golang.org/p/5zP8NnHZ03s
  299. func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
  300. // get the data before and after the match
  301. before := node.Data[:i]
  302. after := node.Data[j:]
  303. // Replace in the current node the text, so that it is only what it is
  304. // supposed to have.
  305. node.Data = before
  306. // Get the current next sibling, before which we place the replaced data,
  307. // and after that we place the new text node.
  308. nextSibling := node.NextSibling
  309. node.Parent.InsertBefore(newNode, nextSibling)
  310. if after != "" {
  311. node.Parent.InsertBefore(&html.Node{
  312. Type: html.TextNode,
  313. Data: after,
  314. }, nextSibling)
  315. }
  316. }
  317. func mentionProcessor(_ *postProcessCtx, node *html.Node) {
  318. m := mentionPattern.FindStringSubmatchIndex(node.Data)
  319. if m == nil {
  320. return
  321. }
  322. // Replace the mention with a link to the specified user.
  323. mention := node.Data[m[2]:m[3]]
  324. replaceContent(node, m[2], m[3], createLink(util.URLJoin(setting.AppURL, mention[1:]), mention))
  325. }
  326. func shortLinkProcessor(ctx *postProcessCtx, node *html.Node) {
  327. shortLinkProcessorFull(ctx, node, false)
  328. }
  329. func shortLinkProcessorFull(ctx *postProcessCtx, node *html.Node, noLink bool) {
  330. m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
  331. if m == nil {
  332. return
  333. }
  334. content := node.Data[m[2]:m[3]]
  335. tail := node.Data[m[4]:m[5]]
  336. props := make(map[string]string)
  337. // MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
  338. // It makes page handling terrible, but we prefer GitHub syntax
  339. // And fall back to MediaWiki only when it is obvious from the look
  340. // Of text and link contents
  341. sl := strings.Split(content, "|")
  342. for _, v := range sl {
  343. if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
  344. // There is no equal in this argument; this is a mandatory arg
  345. if props["name"] == "" {
  346. if isLinkStr(v) {
  347. // If we clearly see it is a link, we save it so
  348. // But first we need to ensure, that if both mandatory args provided
  349. // look like links, we stick to GitHub syntax
  350. if props["link"] != "" {
  351. props["name"] = props["link"]
  352. }
  353. props["link"] = strings.TrimSpace(v)
  354. } else {
  355. props["name"] = v
  356. }
  357. } else {
  358. props["link"] = strings.TrimSpace(v)
  359. }
  360. } else {
  361. // There is an equal; optional argument.
  362. sep := strings.IndexByte(v, '=')
  363. key, val := v[:sep], html.UnescapeString(v[sep+1:])
  364. // When parsing HTML, x/net/html will change all quotes which are
  365. // not used for syntax into UTF-8 quotes. So checking val[0] won't
  366. // be enough, since that only checks a single byte.
  367. if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
  368. (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
  369. const lenQuote = len("‘")
  370. val = val[lenQuote : len(val)-lenQuote]
  371. }
  372. props[key] = val
  373. }
  374. }
  375. var name, link string
  376. if props["link"] != "" {
  377. link = props["link"]
  378. } else if props["name"] != "" {
  379. link = props["name"]
  380. }
  381. if props["title"] != "" {
  382. name = props["title"]
  383. } else if props["name"] != "" {
  384. name = props["name"]
  385. } else {
  386. name = link
  387. }
  388. name += tail
  389. image := false
  390. switch ext := filepath.Ext(string(link)); ext {
  391. // fast path: empty string, ignore
  392. case "":
  393. break
  394. case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
  395. image = true
  396. }
  397. childNode := &html.Node{}
  398. linkNode := &html.Node{
  399. FirstChild: childNode,
  400. LastChild: childNode,
  401. Type: html.ElementNode,
  402. Data: "a",
  403. DataAtom: atom.A,
  404. }
  405. childNode.Parent = linkNode
  406. absoluteLink := isLinkStr(link)
  407. if !absoluteLink {
  408. if image {
  409. link = strings.Replace(link, " ", "+", -1)
  410. } else {
  411. link = strings.Replace(link, " ", "-", -1)
  412. }
  413. }
  414. urlPrefix := ctx.urlPrefix
  415. if image {
  416. if !absoluteLink {
  417. if IsSameDomain(urlPrefix) {
  418. urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
  419. }
  420. if ctx.isWikiMarkdown {
  421. link = util.URLJoin("wiki", "raw", link)
  422. }
  423. link = util.URLJoin(urlPrefix, link)
  424. }
  425. title := props["title"]
  426. if title == "" {
  427. title = props["alt"]
  428. }
  429. if title == "" {
  430. title = path.Base(string(name))
  431. }
  432. alt := props["alt"]
  433. if alt == "" {
  434. alt = name
  435. }
  436. // make the childNode an image - if we can, we also place the alt
  437. childNode.Type = html.ElementNode
  438. childNode.Data = "img"
  439. childNode.DataAtom = atom.Img
  440. childNode.Attr = []html.Attribute{
  441. {Key: "src", Val: link},
  442. {Key: "title", Val: title},
  443. {Key: "alt", Val: alt},
  444. }
  445. if alt == "" {
  446. childNode.Attr = childNode.Attr[:2]
  447. }
  448. } else {
  449. if !absoluteLink {
  450. if ctx.isWikiMarkdown {
  451. link = util.URLJoin("wiki", link)
  452. }
  453. link = util.URLJoin(urlPrefix, link)
  454. }
  455. childNode.Type = html.TextNode
  456. childNode.Data = name
  457. }
  458. if noLink {
  459. linkNode = childNode
  460. } else {
  461. linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
  462. }
  463. replaceContent(node, m[0], m[1], linkNode)
  464. }
  465. func fullIssuePatternProcessor(ctx *postProcessCtx, node *html.Node) {
  466. m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
  467. if m == nil {
  468. return
  469. }
  470. link := node.Data[m[0]:m[1]]
  471. id := "#" + node.Data[m[2]:m[3]]
  472. // TODO if m[4]:m[5] is not nil, then link is to a comment,
  473. // and we should indicate that in the text somehow
  474. replaceContent(node, m[0], m[1], createLink(link, id))
  475. }
  476. func issueIndexPatternProcessor(ctx *postProcessCtx, node *html.Node) {
  477. prefix := cutoutVerbosePrefix(ctx.urlPrefix)
  478. // default to numeric pattern, unless alphanumeric is requested.
  479. pattern := issueNumericPattern
  480. if ctx.metas["style"] == IssueNameStyleAlphanumeric {
  481. pattern = issueAlphanumericPattern
  482. }
  483. match := pattern.FindStringSubmatchIndex(node.Data)
  484. if match == nil {
  485. return
  486. }
  487. id := node.Data[match[2]:match[3]]
  488. var link *html.Node
  489. if ctx.metas == nil {
  490. link = createLink(util.URLJoin(prefix, "issues", id[1:]), id)
  491. } else {
  492. // Support for external issue tracker
  493. if ctx.metas["style"] == IssueNameStyleAlphanumeric {
  494. ctx.metas["index"] = id
  495. } else {
  496. ctx.metas["index"] = id[1:]
  497. }
  498. link = createLink(com.Expand(ctx.metas["format"], ctx.metas), id)
  499. }
  500. replaceContent(node, match[2], match[3], link)
  501. }
  502. func crossReferenceIssueIndexPatternProcessor(ctx *postProcessCtx, node *html.Node) {
  503. m := crossReferenceIssueNumericPattern.FindStringSubmatchIndex(node.Data)
  504. if m == nil {
  505. return
  506. }
  507. ref := node.Data[m[2]:m[3]]
  508. parts := strings.SplitN(ref, "#", 2)
  509. repo, issue := parts[0], parts[1]
  510. replaceContent(node, m[2], m[3],
  511. createLink(util.URLJoin(setting.AppURL, repo, "issues", issue), ref))
  512. }
  513. // fullSha1PatternProcessor renders SHA containing URLs
  514. func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) {
  515. m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
  516. if m == nil {
  517. return
  518. }
  519. // take out what's relevant
  520. urlFull := node.Data[m[0]:m[1]]
  521. hash := node.Data[m[2]:m[3]]
  522. var subtree, line string
  523. // optional, we do them depending on the length.
  524. if m[7] > 0 {
  525. line = node.Data[m[6]:m[7]]
  526. }
  527. if m[5] > 0 {
  528. subtree = node.Data[m[4]:m[5]]
  529. }
  530. text := base.ShortSha(hash)
  531. if subtree != "" {
  532. text += "/" + subtree
  533. }
  534. if line != "" {
  535. text += " ("
  536. text += line
  537. text += ")"
  538. }
  539. replaceContent(node, m[0], m[1], createLink(urlFull, text))
  540. }
  541. // sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
  542. // are assumed to be in the same repository.
  543. func sha1CurrentPatternProcessor(ctx *postProcessCtx, node *html.Node) {
  544. m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data)
  545. if m == nil {
  546. return
  547. }
  548. hash := node.Data[m[2]:m[3]]
  549. // The regex does not lie, it matches the hash pattern.
  550. // However, a regex cannot know if a hash actually exists or not.
  551. // We could assume that a SHA1 hash should probably contain alphas AND numerics
  552. // but that is not always the case.
  553. // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
  554. // as used by git and github for linking and thus we have to do similar.
  555. replaceContent(node, m[2], m[3],
  556. createLink(util.URLJoin(ctx.urlPrefix, "commit", hash), base.ShortSha(hash)))
  557. }
  558. // emailAddressProcessor replaces raw email addresses with a mailto: link.
  559. func emailAddressProcessor(ctx *postProcessCtx, node *html.Node) {
  560. m := emailRegex.FindStringIndex(node.Data)
  561. if m == nil {
  562. return
  563. }
  564. mail := node.Data[m[0]:m[1]]
  565. replaceContent(node, m[0], m[1], createLink("mailto:"+mail, mail))
  566. }
  567. // linkProcessor creates links for any HTTP or HTTPS URL not captured by
  568. // markdown.
  569. func linkProcessor(ctx *postProcessCtx, node *html.Node) {
  570. m := linkRegex.FindStringIndex(node.Data)
  571. if m == nil {
  572. return
  573. }
  574. uri := node.Data[m[0]:m[1]]
  575. replaceContent(node, m[0], m[1], createLink(uri, uri))
  576. }
  577. func genDefaultLinkProcessor(defaultLink string) processor {
  578. return func(ctx *postProcessCtx, node *html.Node) {
  579. ch := &html.Node{
  580. Parent: node,
  581. Type: html.TextNode,
  582. Data: node.Data,
  583. }
  584. node.Type = html.ElementNode
  585. node.Data = "a"
  586. node.DataAtom = atom.A
  587. node.Attr = []html.Attribute{{Key: "href", Val: defaultLink}}
  588. node.FirstChild, node.LastChild = ch, ch
  589. }
  590. }