Browse Source

Cleaned up and tested html renderer

Benton Edmondson 2 years ago
parent
commit
14b5eceef5
8 changed files with 372 additions and 245 deletions
  1. 205 0
      hypertext/hypertext.go
  2. 124 0
      hypertext/hypertext_test.go
  3. 7 2
      kinds/actor.go
  4. 3 0
      kinds/link.go
  5. 1 1
      kinds/post.go
  6. 0 208
      render/html.go
  7. 3 4
      render/render.go
  8. 29 30
      style/style.go

+ 205 - 0
hypertext/hypertext.go

@@ -0,0 +1,205 @@
+package hypertext
+
+import (
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
+	"strings"
+	"regexp"
+	"mimicry/style"
+	"errors"
+)
+
+// func Render(text string) string {
+// 	return "nothing"
+// }
+
+/* Terminal codes and control characters should already be escaped
+   by this point */
+func Render(text string) (string, error) {
+	nodes, err := html.ParseFragment(strings.NewReader(text), &html.Node{
+		Type: html.ElementNode,
+		Data: "body",
+		DataAtom: atom.Body,
+	})
+	if err != nil {
+		return "", err
+	}
+	serialized, err := serializeList(nodes)
+	if err != nil {
+		return "", err
+	}
+
+	return strings.Trim(serialized, " \n"), nil
+}
+
+func serializeList(nodes []*html.Node) (string, error) {
+	output := ""
+	for _, current := range nodes {
+		result, err := renderNode(current, false)
+		if err != nil {
+			return "", err
+		}
+		output = mergeText(output, result)
+	}
+	return output, nil
+}
+
+/* 	Merges text according to the following rules:
+	1. Extract trailing whitespace from lhs and
+	   leading whitespace from rhs and concat them.
+	2. Append the two sides in the following way,
+	   depending on the extracted whitespace:
+	   	- If it is empty, append the sides
+		- Else, if it contains 0 newlines, append
+		  the sides with a single space between.
+		- Else, if it contains 1 newline, append
+		  the sides with a single newline between.
+		- Else, append the sides with 2 newlines
+		  between.
+*/
+func mergeText(lhs string, rhs string) string {
+	lhsTrimmed := strings.TrimRight(lhs, " \n")
+	rhsTrimmed := strings.TrimLeft(rhs, " \n")
+	lhsWhitespace := lhs[len(lhsTrimmed):]
+	rhsWhitespace := rhs[:len(rhs)-len(rhsTrimmed)]
+	whitespace := lhsWhitespace + rhsWhitespace
+
+	if whitespace == "" {
+		return lhsTrimmed + rhsTrimmed
+	}
+
+	switch strings.Count(whitespace, "\n") {
+	case 0: return lhsTrimmed + " " + rhsTrimmed
+	case 1: return lhsTrimmed + "\n" + rhsTrimmed
+	}
+
+	return lhsTrimmed + "\n\n" + rhsTrimmed
+}
+
+func renderNode(node *html.Node, preserveWhitespace bool) (string, error) {
+	if node.Type == html.TextNode {
+		if !preserveWhitespace {
+			whitespace := regexp.MustCompile(`[ \t\n\r]+`)
+			return whitespace.ReplaceAllString(node.Data, " "), nil
+		}
+		return node.Data, nil
+	}
+
+	if node.Type != html.ElementNode {
+		return "", nil
+	}
+
+	content, err := serializeChildren(node, preserveWhitespace)
+	if err != nil {
+		return "", err
+	}
+
+	switch node.Data {
+	case "a":
+		return style.Link(content), nil
+	case "s", "del":
+		return style.Strikethrough(content), nil
+	case "code":
+		return style.Code(content), nil
+	case "i", "em":
+		return style.Italic(content), nil
+	case "b", "strong":
+		return style.Bold(content), nil
+	case "u":
+		return style.Underline(content), nil
+	case "mark":
+		return style.Highlight(content), nil
+	case "span", "li":
+		return content, nil
+	case "br":
+		return "\n", nil
+
+	case "p", "div":
+		return block(content), nil
+	case "pre":
+		content, err := serializeChildren(node, true)
+		return block(style.CodeBlock(content)), err
+	case "blockquote":
+		return block(style.QuoteBlock(content)), nil
+	case "ul":
+		list, err := bulletedList(node, preserveWhitespace)
+		return block(list), err
+	// case "ul":
+	// 	return numberedList(node), nil
+
+	case "h1":
+		return block(style.Header(content, 1)), nil
+	case "h2":
+		return block(style.Header(content, 2)), nil
+	case "h3":
+		return block(style.Header(content, 3)), nil
+	case "h4":
+		return block(style.Header(content, 4)), nil
+	case "h5":
+		return block(style.Header(content, 5)), nil
+	case "h6":
+		return block(style.Header(content, 6)), nil
+
+	case "hr":
+		return block("―――"), nil
+	case "img", "video", "audio", "iframe":
+		text := getAttribute("alt", node.Attr)
+		if text == "" {
+			text = getAttribute("title", node.Attr)
+		}
+		if text == "" {
+			text = getAttribute("src", node.Attr)
+		}
+		if text == "" {
+			return "", errors.New(node.Data + " tag is missing both `alt` and `src` attributes")
+		}
+		return block(style.LinkBlock(text)), nil
+	}
+
+	return "", errors.New("Encountered unrecognized element " + node.Data)
+}
+
+func serializeChildren(node *html.Node, preserveWhitespace bool) (string, error) {
+	output := ""
+	for current := node.FirstChild; current != nil; current = current.NextSibling {
+		result, err := renderNode(current, preserveWhitespace)
+		if err != nil {
+			return "", err
+		}
+		output = mergeText(output, result)
+	}
+	return output, nil
+}
+
+func block(text string) string {
+	return "\n\n" + strings.Trim(text, " \n") + "\n\n"
+}
+
+func bulletedList(node *html.Node, preserveWhitespace bool) (string, error) {
+	output := ""
+	for current := node.FirstChild; current != nil; current = current.NextSibling {
+		if current.Type != html.ElementNode {
+			continue
+		}
+
+		if current.Data != "li" {
+			continue
+		}
+
+		result, err := renderNode(current, preserveWhitespace)
+		if err != nil {
+			return "", err
+		}
+		output += "\n" + style.Bullet(result)
+	}
+	return block(output), nil
+}
+
+func getAttribute(name string, attributes []html.Attribute) string {
+	for _, attribute := range attributes {
+		if attribute.Key == name {
+			return attribute.Val
+		}
+	}
+	return ""
+}

+ 124 - 0
hypertext/hypertext_test.go

@@ -0,0 +1,124 @@
+package hypertext
+
+import (
+	"testing"
+	"mimicry/style"
+)
+
+func assertEqual(expected string, output string, t *testing.T) {
+	if expected != output {
+		t.Fatalf("Expected `%s` not `%s`\n", expected, output)
+	}
+}
+
+func TestMergeText(t *testing.T) {
+	lhs0 := "front"
+	rhs0 := "back"
+	output0 := mergeText(lhs0, rhs0)
+	expected0 := "frontback"
+	assertEqual(expected0, output0, t)
+
+	lhs1 := "front     "
+	rhs1 := "   back"
+	output1 := mergeText(lhs1, rhs1)
+	expected1 := "front back"
+	assertEqual(expected1, output1, t)
+
+	lhs2 := "front     "
+	rhs2 := " \n  back"
+	output2 := mergeText(lhs2, rhs2)
+	expected2 := "front\nback"
+	assertEqual(expected2, output2, t)
+
+	lhs3 := "front    \n\n\n "
+	rhs3 := " \n  back"
+	output3 := mergeText(lhs3, rhs3)
+	expected3 := "front\n\nback"
+	assertEqual(expected3, output3, t)
+}
+
+func TestStyles(t *testing.T) {
+	input := "<s>s</s><code>code</code><i>i</i><u>u</u><mark>mark</mark>"
+	output, err := Render(input)
+	if err != nil {
+		panic(err)
+	}
+	expected := style.Strikethrough("s") +
+		style.Code("code") +
+		style.Italic("i") +
+		style.Underline("u") +
+		style.Highlight("mark")
+
+	assertEqual(expected, output, t)
+}
+
+func TestSurroundingBlocks(t *testing.T) {
+	input := "<p>first</p>in \t<mark>the</mark> \rmiddle<p>last</p>"
+	output, err := Render(input)
+	if err != nil {
+		panic(err)
+	}
+	expected := `first
+
+in ` + style.Highlight("the") + ` middle
+
+last`
+	assertEqual(expected, output, t)
+}
+
+func TestAdjacentBlocks(t *testing.T) {
+	input := "\t<p>first</p>\n\t<p>second</p>"
+	output, err := Render(input)
+	if err != nil {
+		panic(err)
+	}
+	expected := `first
+
+second`
+	assertEqual(expected, output, t)
+}
+
+func TestPoetry(t *testing.T) {
+	input := "he shouted\t\ta few words<br>at those annoying birds<br><br>and that they heard"
+	output, err := Render(input)
+	if err != nil {
+		panic(err)
+	}
+	expected := `he shouted a few words
+at those annoying birds
+
+and that they heard`
+
+	assertEqual(expected, output, t)
+}
+
+func TestPreservation(t *testing.T) {
+	input := "<pre>tab\tand multi-space   \n\n\n\n\n far down</pre>"
+	output, err := Render(input)
+	if err != nil {
+		panic(err)
+	}
+	expected := style.CodeBlock(`tab	and multi-space   
+
+
+
+
+ far down`)
+	assertEqual(expected, output, t)
+}
+
+func TestNestedBlocks(t *testing.T) {
+	input := `<p>Once a timid child</p>
+
+<p> </p>
+
+<p><img src="https://i.snap.as/P8qpdMbM.jpg" alt=""/></p>`
+	output, err := Render(input)	
+	if err != nil {
+		panic(err)
+	}
+	expected := `Once a timid child
+
+` + style.LinkBlock("https://i.snap.as/P8qpdMbM.jpg")
+	assertEqual(expected, output, t)
+}

+ 7 - 2
kinds/actor.go

@@ -5,6 +5,7 @@ import (
 	"net/url"
 	"mimicry/style"
 	"fmt"
+	"mimicry/render"
 )
 
 type Actor Dict
@@ -48,8 +49,12 @@ func (a Actor) Identifier() (*url.URL, error) {
 }
 
 func (a Actor) Bio() (string, error) {
-	bio, err := GetNatural(a, "summary", "en")
-	return strings.TrimSpace(bio), err
+	body, err := GetNatural(a, "summary", "en")
+	mediaType, err := Get[string](a, "mediaType")
+	if err != nil {
+		mediaType = "text/html"
+	}
+	return render.Render(body, mediaType)
 }
 
 func (a Actor) String() (string, error) {

+ 3 - 0
kinds/link.go

@@ -38,6 +38,9 @@ func (l Link) URL() (*url.URL, error) {
 
 func (l Link) Alt() (string, error) {
 	alt, err := Get[string](l, "name")
+	if alt == "" || err != nil {
+		alt, err = Get[string](l, "href")
+	}
 	return strings.TrimSpace(alt), err
 }
 

+ 1 - 1
kinds/post.go

@@ -94,6 +94,7 @@ func (p Post) Link() (Link, error) {
 	}
 }
 
+// TODO: errors in here should potentially trigger errors!
 func (p Post) String() (string, error) {
 	output := ""
 
@@ -102,7 +103,6 @@ func (p Post) String() (string, error) {
 		output += "\n"
 	}
 
-
 	if body, err := p.Body(); err == nil {
 		output += body
 		output += "\n"

+ 0 - 208
render/html.go

@@ -1,208 +0,0 @@
-package render
-
-import (
-	"golang.org/x/net/html"
-	"fmt"
-	"mimicry/style"
-	"errors"
-	"strings"
-	"regexp"
-	"golang.org/x/net/html/atom"
-)
-
-// preprocessing:
-// substitute escape key for escape visualizer
-
-// newline strategy:
-// blocks have 2 newlines before and two after,
-// at the end collapse 4 newlines into 2
-// maybe instead collapse any amount greater
-// (regex: \n{2,}) than 2 down to 2
-// also at the end trim all newlines from
-// very beginning and very end
-
-// for block links probably use ‣
-
-// I think it may work to collapse all
-// text node whitespace down to space,
-// and then trim the contents of blocks
-// (including the implicit body element)
-
-// FIXME: instead, you should collapse all whitespace into
-// space (including newline, so format newlines don't appear), then do newline insertion for blocks
-// then collapse all single-newline-containing whitespace into
-// one newline and multi-newline-containing whitespace into two
-// newlines
-
-// I will have this issue: https://unix.stackexchange.com/questions/170551/force-lynx-or-elinks-to-interpret-spaces-and-line-breaks
-// i.e. 3 or more br's in a row become idempotent, but I don't care
-
-func renderHTML(markup string) (string, error) {
-	/* 	Preprocessing
-		To prevent input text from changing its color, style, etc
-		via terminal escape codes, swap out U+001B (ESCAPE) for
-		U+241B (SYMBOL FOR ESCAPE)
-
-		TODO: move this to the complete beginning of render, not
-		just the HTML section
-	*/
-	markup = strings.ReplaceAll(markup, "\u001b", "␛")
-
-
-	nodes, err := html.ParseFragment(strings.NewReader(markup), &html.Node{
-		Type: html.ElementNode,
-		Data: "body",
-		DataAtom: atom.Body,
-	})
-	if err != nil {
-		return "", err
-	}
-	serialized, err := SerializeList(nodes)
-	if err != nil {
-		return "", err
-	}
-
-	/*
-		Postprocessing
-		Block elements are separated from siblings by prepending
-		and appending two newline characters. If two blocks are
-		adjacent, this will result in too many newline characters.
-		Furthermore, in text nodes, newline-containing whitespace
-		is collapsed into a single newline, potentially resulting
-		in even more newlines. So collapse sequences of over two
-		newlines into two newlines. Also trim all newlines from
-		the beginning and end of the output.
-	*/
-	manyNewlines := regexp.MustCompile(`\n{2,}`)
-	serialized = manyNewlines.ReplaceAllString(serialized, "\n\n")
-	serialized = strings.Trim(serialized, "\n")
-	return serialized, nil
-}
-
-func renderNode(node *html.Node, preserveWhitespace bool) (string, error) {
-	if node.Type == html.TextNode {
-		if !preserveWhitespace {
-			whitespace := regexp.MustCompile(`[\t ]+`)
-			newline := regexp.MustCompile(`[\n\t ]*\n[n\t ]*`)
-			processed := newline.ReplaceAllString(node.Data, "\n")
-			processed = whitespace.ReplaceAllString(processed, " ")
-			return processed, nil
-		}
-		return node.Data, nil
-	}
-
-	if node.Type != html.ElementNode {
-		return "", nil
-	}
-
-	// this may need to be moved down into the switch
-	// so that pre and code can override the last parameter
-	content := serializeChildren(node, preserveWhitespace)
-
-	switch node.Data {
-	case "a":
-		return style.Linkify(content), nil
-	case "s", "del":
-		return style.Strikethrough(content), nil
-	case "code":
-		return style.Code(content), nil
-	case "i", "em":
-		return style.Italic(content), nil
-	case "b", "strong":
-		return style.Bold(content), nil
-	case "u":
-		return style.Underline(content), nil
-	case "mark":
-		return style.Highlight(content), nil
-	case "span", "li":
-		return content, nil
-	case "br":
-		return "\n", nil
-
-	case "p", "div":
-		return block(content), nil
-	case "pre":
-		return block(style.CodeBlock(content)), nil
-	case "blockquote":
-		// FIXME: change blockquote to style.QuoteBlock
-		return block(blockquote(content)), nil
-	case "ul":
-		return block(bulletedList(node, preserveWhitespace)), nil
-	// case "ul":
-	// 	return numberedList(node), nil
-	}
-
-	return "", errors.New("Encountered unrecognized element " + node.Data)
-}
-
-func serializeChildren(node *html.Node, preserveWhitespace bool) (string) {
-	output := ""
-	for current := node.FirstChild; current != nil; current = current.NextSibling {
-		result, _ := renderNode(current, preserveWhitespace)
-		// if err != nil {
-		// 	return "", err
-		// }
-		output += result
-	}
-	return output
-}
-
-func SerializeList(nodes []*html.Node) (string, error) {
-	output := ""
-	for _, current := range nodes {
-		result, err := renderNode(current, false)
-		if err != nil {
-			return "", err
-		}
-		output += result
-	}
-	return output, nil
-}
-
-func block(text string) string {
-	return fmt.Sprintf("\n\n%s\n\n", text)
-}
-
-func blockquote(text string) string {
-	withBar := fmt.Sprintf("▌%s", strings.ReplaceAll(text, "\n", "\n▌"))
-	withColor := style.Color(withBar)
-	return withColor
-}
-
-func bulletedList(node *html.Node, preserveWhitespace bool) string {
-	output := ""
-	for current := node.FirstChild; current != nil; current = current.NextSibling {
-		if current.Type != html.ElementNode {
-			continue
-		}
-
-		if current.Data != "li" {
-			continue
-		}
-
-		result, _ := renderNode(current, preserveWhitespace)
-		output += fmt.Sprintf("• %s", strings.ReplaceAll(result, "\n", "\n  "))
-	}
-	return output
-}
-
-// could count them and use that to determine
-// indentation, but that is way premature
-// func numberedList(node *html.Node) string {
-// 	output += ""
-// 	i uint := 1
-// 	for current := node.FirstChild; current != nil; current = current.NextSibling {
-// 		if node.Type != html.ElementNode {
-// 			continue
-// 		}
-
-// 		if node.Data != "li" {
-// 			continue
-// 		}
-
-// 		fmt.Sprintf("%d. ")
-// 		output += strings.ReplaceAll(renderNode(node), "\n", "\n  ")
-// 		i += 1
-// 	}
-// 	return output
-// }

+ 3 - 4
render/render.go

@@ -4,18 +4,17 @@ import (
 	"strings"
 	"errors"
 	"fmt"
+	"mimicry/hypertext"
 )
 
-// Just use body as content because that only permits flow content
-// https://stackoverflow.com/questions/15081119/any-way-to-use-html-parse-without-it-adding-nodes-to-make-a-well-formed-tree
-
+// TODO: need to actually parse mediaType, not use `Contains`
 func Render(text string, mediaType string) (string, error) {
 	fmt.Println("started render")
 	switch {
 	case strings.Contains(mediaType, "text/plain"): 
 		return text, nil
 	case strings.Contains(mediaType, "text/html"):
-		return renderHTML(text)
+		return hypertext.Render(text)
 	default:
 		return "", errors.New("Cannot render text of mime type " + mediaType)
 	}

+ 29 - 30
style/style.go

@@ -9,36 +9,25 @@ import (
 // in input, to do so replace the escape character with visual
 // escape character
 
-/*
-	To, e.g., bold, prepend the bold character,
-	then substitute all resets with `${reset}${bold}`
-	to force rebold after all resets, to make sure. Might
-	be complex with layering
-*/
-
-// const (
-// 	Bold = 
-// )
-
-func Background(text string, r uint8, g uint8, b uint8) string {
+func background(text string, r uint8, g uint8, b uint8) string {
 	setter := fmt.Sprintf("\x1b[48;2;%d;%d;%dm", r, g, b)
 	resetter := "\x1b[49m"
 	text = strings.ReplaceAll(text, resetter, setter)
 	return fmt.Sprintf("%s%s%s", setter, text, resetter)
 }
 
-func ExtendBackground(text string) string {
+func extendBackground(text string) string {
 	return strings.ReplaceAll(text, "\n", "\x1b[K\n")
 }
 
-func Foreground(text string, r uint8, g uint8, b uint8) string {
+func foreground(text string, r uint8, g uint8, b uint8) string {
 	setter := fmt.Sprintf("\x1b[38;2;%d;%d;%dm", r, g, b)
 	resetter := "\x1b[39m"
 	newText := strings.ReplaceAll(text, resetter, setter)
 	return fmt.Sprintf("%s%s%s", setter, newText, resetter)
 }
 
-func Display(text string, prependCode int, appendCode int) string {
+func display(text string, prependCode int, appendCode int) string {
 	return fmt.Sprintf("\x1b[%dm%s\x1b[%dm", prependCode, text, appendCode)
 }
 
@@ -46,45 +35,55 @@ func Display(text string, prependCode int, appendCode int) string {
 // 22 removes bold and faint, faint is never used
 // so it does the job
 func Bold(text string) string {
-	return Display(text, 1, 22)
+	return display(text, 1, 22)
 }
 
 func Strikethrough(text string) string {
-	return Display(text, 9, 29)
+	return display(text, 9, 29)
 }
 
 func Underline(text string) string {
-	return Display(text, 4, 24)
+	return display(text, 4, 24)
 }
 
 func Italic(text string) string {
-	return Display(text, 3, 23)
+	return display(text, 3, 23)
 }
 
 func Code(text string) string {
-	return Background(text, 75, 75, 75)
+	return background(text, 75, 75, 75)
 }
 
 func CodeBlock(text string) string {
-	return ExtendBackground(Code(text))
+	return extendBackground(Code(text))
+}
+
+func QuoteBlock(text string) string {
+	withBar := "▌" + strings.ReplaceAll(text, "\n", "\n▌")
+	return Color(withBar)
+}
+
+func LinkBlock(text string) string {
+	return "‣ " + Link(text)
 }
 
 func Highlight(text string) string {
-	return Background(text, 13, 125, 0)
+	return background(text, 13, 125, 0)
 }
 
 func Color(text string) string {
-	return Foreground(text, 164, 245, 155)
+	return foreground(text, 164, 245, 155)
 }
 
-func Linkify(text string) string {
+func Link(text string) string {
 	return Underline(Color(text))
 }
 
-// func Underline(text string) string {
-// 	return Display(text, )
-// }
-
-// func Anchor(text string) string {
+func Header(text string, level uint) string {
+	withPrefix := strings.Repeat("⯁", int(level)) + " " + text
+	return Color(Bold(withPrefix))
+}
 
-// }
+func Bullet(text string) string {
+	return "• " + strings.ReplaceAll(text, "\n", "\n  ")
+}