hypertext.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. package hypertext
  2. import (
  3. "golang.org/x/net/html"
  4. "golang.org/x/net/html/atom"
  5. "servitor/ansi"
  6. "servitor/style"
  7. "regexp"
  8. "strings"
  9. )
  10. type Markup struct {
  11. tree []*html.Node
  12. cached string
  13. cachedWidth int
  14. }
  15. type context struct {
  16. preserveWhitespace bool
  17. width int
  18. links *[]string
  19. }
  20. func NewMarkup(text string) (*Markup, []string, error) {
  21. nodes, err := html.ParseFragment(strings.NewReader(text), &html.Node{
  22. Type: html.ElementNode,
  23. Data: "body",
  24. DataAtom: atom.Body,
  25. })
  26. if err != nil {
  27. return nil, []string{}, err
  28. }
  29. rendered, links := renderWithLinks(nodes, 80)
  30. return &Markup{
  31. tree: nodes,
  32. cached: rendered,
  33. cachedWidth: 80,
  34. }, links, nil
  35. }
  36. func (m *Markup) Render(width int) string {
  37. if m.cachedWidth == width {
  38. return m.cached
  39. }
  40. rendered, _ := renderWithLinks(m.tree, width)
  41. m.cachedWidth = width
  42. m.cached = rendered
  43. return rendered
  44. }
  45. func renderWithLinks(nodes []*html.Node, width int) (string, []string) {
  46. ctx := context{
  47. preserveWhitespace: false,
  48. width: width,
  49. links: &[]string{},
  50. }
  51. output := ""
  52. for _, current := range nodes {
  53. result := renderNode(current, ctx)
  54. output = mergeText(output, result)
  55. }
  56. output = ansi.Wrap(output, width)
  57. return strings.Trim(output, " \n"), *ctx.links
  58. }
  59. /*
  60. Merges text according to the following rules:
  61. 1. Extract trailing whitespace from lhs and
  62. leading whitespace from rhs and concat them.
  63. 2. Append the two sides in the following way,
  64. depending on the extracted whitespace:
  65. - If it is empty, append the sides
  66. - Else, if it contains 0 newlines, append
  67. the sides with a single space between.
  68. - Else, if it contains 1 newline, append
  69. the sides with a single newline between.
  70. - Else, append the sides with 2 newlines
  71. between.
  72. */
  73. func mergeText(lhs string, rhs string) string {
  74. trimRight := regexp.MustCompile(`(?s)^(.*?)([ \n]*)$`)
  75. lhsMatches := trimRight.FindStringSubmatch(lhs)
  76. lhsTrimmed := lhsMatches[1]
  77. trimLeft := regexp.MustCompile(`(?s)^([ \n]*)(.*)$`)
  78. rhsMatches := trimLeft.FindStringSubmatch(rhs)
  79. rhsTrimmed := rhsMatches[2]
  80. whitespace := lhsMatches[2] + rhsMatches[1]
  81. if whitespace == "" {
  82. return lhsTrimmed + rhsTrimmed
  83. }
  84. newlineCount := strings.Count(whitespace, "\n")
  85. if newlineCount == 0 {
  86. return lhsTrimmed + " " + rhsTrimmed
  87. }
  88. if newlineCount == 1 {
  89. return lhsTrimmed + "\n" + rhsTrimmed
  90. }
  91. return lhsTrimmed + "\n\n" + rhsTrimmed
  92. }
  93. func renderNode(node *html.Node, ctx context) string {
  94. if node.Type == html.TextNode {
  95. if !ctx.preserveWhitespace {
  96. whitespace := regexp.MustCompile(`[ \t\n\r]+`)
  97. return whitespace.ReplaceAllString(node.Data, " ")
  98. }
  99. return node.Data
  100. }
  101. if node.Type != html.ElementNode {
  102. return ""
  103. }
  104. switch node.Data {
  105. case "a":
  106. link := getAttribute("href", node.Attr)
  107. if link == "" {
  108. return renderChildren(node, ctx)
  109. }
  110. *ctx.links = append(*ctx.links, link)
  111. /* This must occur before the styling because it mutates ctx.links */
  112. rendered := renderChildren(node, ctx)
  113. return style.Link(rendered, len(*ctx.links))
  114. case "s", "del":
  115. return style.Strikethrough(renderChildren(node, ctx))
  116. case "code":
  117. ctx.preserveWhitespace = true
  118. return style.Code(renderChildren(node, ctx))
  119. case "i", "em":
  120. return style.Italic(renderChildren(node, ctx))
  121. case "b", "strong":
  122. return style.Bold(renderChildren(node, ctx))
  123. case "u", "ins":
  124. return style.Underline(renderChildren(node, ctx))
  125. case "mark":
  126. return style.Highlight(renderChildren(node, ctx))
  127. case "span":
  128. return renderChildren(node, ctx)
  129. case "li":
  130. return strings.Trim(renderChildren(node, ctx), " \n")
  131. case "br":
  132. return "\n"
  133. case "p", "div":
  134. return block(renderChildren(node, ctx))
  135. case "pre":
  136. ctx.preserveWhitespace = true
  137. wrapped := ansi.Pad(situationalWrap(renderChildren(node, ctx), ctx), ctx.width)
  138. return block(style.CodeBlock(wrapped))
  139. case "blockquote":
  140. ctx.width -= 1
  141. wrapped := situationalWrap(renderChildren(node, ctx), ctx)
  142. return block(style.QuoteBlock(strings.Trim(wrapped, " \n")))
  143. case "ul":
  144. return bulletedList(node, ctx)
  145. // case "ul":
  146. // return numberedList(node)
  147. case "h1":
  148. ctx.width -= 2
  149. wrapped := situationalWrap(renderChildren(node, ctx), ctx)
  150. return block(style.Header(wrapped, 1))
  151. case "h2":
  152. ctx.width -= 3
  153. wrapped := situationalWrap(renderChildren(node, ctx), ctx)
  154. return block(style.Header(wrapped, 2))
  155. case "h3":
  156. ctx.width -= 4
  157. wrapped := situationalWrap(renderChildren(node, ctx), ctx)
  158. return block(style.Header(wrapped, 3))
  159. case "h4":
  160. ctx.width -= 5
  161. wrapped := situationalWrap(renderChildren(node, ctx), ctx)
  162. return block(style.Header(wrapped, 4))
  163. case "h5":
  164. ctx.width -= 6
  165. wrapped := situationalWrap(renderChildren(node, ctx), ctx)
  166. return block(style.Header(wrapped, 5))
  167. case "h6":
  168. ctx.width -= 7
  169. wrapped := situationalWrap(renderChildren(node, ctx), ctx)
  170. return block(style.Header(wrapped, 6))
  171. case "hr":
  172. return block(strings.Repeat("\u23AF", ctx.width))
  173. /*
  174. The spec does not define the alt attribute for videos nor audio.
  175. I think it should, so if present I display it. It is
  176. tempting to use the children of the video and audio tags for
  177. this purpose, but it looks like they exist more so for backwards
  178. compatibility, so should contain something like "your browser does
  179. not support inline video; click here" as opposed to actual alt
  180. text.
  181. */
  182. case "img", "video", "audio":
  183. alt := getAttribute("alt", node.Attr)
  184. link := getAttribute("src", node.Attr)
  185. if alt == "" {
  186. alt = link
  187. }
  188. if link == "" {
  189. return block(alt)
  190. }
  191. *ctx.links = append(*ctx.links, link)
  192. ctx.width -= 2
  193. wrapped := situationalWrap(alt, ctx)
  194. return block(style.LinkBlock(wrapped, len(*ctx.links)))
  195. case "iframe":
  196. alt := getAttribute("title", node.Attr)
  197. link := getAttribute("src", node.Attr)
  198. if alt == "" {
  199. alt = link
  200. }
  201. if link == "" {
  202. return block(alt)
  203. }
  204. *ctx.links = append(*ctx.links, link)
  205. ctx.width -= 2
  206. wrapped := situationalWrap(alt, ctx)
  207. return block(style.LinkBlock(wrapped, len(*ctx.links)))
  208. default:
  209. return bad(node, ctx)
  210. }
  211. }
  212. func renderChildren(node *html.Node, ctx context) string {
  213. output := ""
  214. for current := node.FirstChild; current != nil; current = current.NextSibling {
  215. result := renderNode(current, ctx)
  216. output = mergeText(output, result)
  217. }
  218. return output
  219. }
  220. func block(text string) string {
  221. return "\n\n" + strings.Trim(text, " \n") + "\n\n"
  222. }
  223. func bulletedList(node *html.Node, ctx context) string {
  224. output := ""
  225. ctx.width -= 2
  226. for current := node.FirstChild; current != nil; current = current.NextSibling {
  227. if current.Type != html.ElementNode {
  228. continue
  229. }
  230. result := ""
  231. if current.Data != "li" {
  232. result = bad(current, ctx)
  233. } else {
  234. result = renderNode(current, ctx)
  235. }
  236. wrapped := situationalWrap(result, ctx)
  237. output += "\n" + style.Bullet(wrapped)
  238. }
  239. if node.Parent != nil && node.Parent.Data == "li" {
  240. return output
  241. }
  242. return block(output)
  243. }
  244. func bad(node *html.Node, ctx context) string {
  245. return style.Red("<"+node.Data+">") + renderChildren(node, ctx) + style.Red("</"+node.Data+">")
  246. }
  247. func getAttribute(name string, attributes []html.Attribute) string {
  248. for _, attribute := range attributes {
  249. if attribute.Key == name {
  250. return attribute.Val
  251. }
  252. }
  253. return ""
  254. }
  255. func situationalWrap(text string, ctx context) string {
  256. if ctx.preserveWhitespace {
  257. return ansi.DumbWrap(text, ctx.width)
  258. }
  259. return ansi.Wrap(text, ctx.width)
  260. }