Browse Source

Backing up everything out of paranoia

Benton Edmondson 2 years ago
parent
commit
2e11dde7d8
11 changed files with 457 additions and 17 deletions
  1. 2 1
      .gitignore
  2. 2 0
      go.mod
  3. 2 0
      go.sum
  4. 68 0
      html.md
  5. 17 0
      implementation.md
  6. 9 2
      kinds/post.go
  7. 24 1
      main.go
  8. 61 0
      notes.md
  9. 203 4
      render/html.go
  10. 8 6
      render/render.go
  11. 61 3
      style/style.go

+ 2 - 1
.gitignore

@@ -1 +1,2 @@
-mi
+mi
+mimicry

+ 2 - 0
go.mod

@@ -1,3 +1,5 @@
 module mimicry
 
 go 1.19
+
+require golang.org/x/net v0.5.0 // indirect

+ 2 - 0
go.sum

@@ -0,0 +1,2 @@
+golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw=
+golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws=

+ 68 - 0
html.md

@@ -0,0 +1,68 @@
+# Presentational Subset of HTML
+
+It is popular to suggest that HTML markup should be semantic and presentational styling should be applied near-exclusively by stylesheets. In practice, this is not how HTML is used. For instance, people nearly never refer to `*` or `_` as "strong importance, seriousness, or urgency" or "stress emphasis" markers. They refer to them as bold and italics and use them that way. When people write an article on Medium or Google Docs and click the `bold` button, they don't think "oh this text is of strong seriousness and should be marked as such"; they want it to be a thicker, perhaps darker, font, i.e. bold.
+
+## Non-Textual Elements
+
+* `img` for images.
+* `video` for videos (content between the tags is ignored).
+* `audio` for audio (content between the tags is ignored).
+* `iframe` for including other content like YouTube videos, Tweets, etc.
+* `hr` for a [section break](https://en.wikipedia.org/wiki/Section_(typography)#Flourished_section_breaks).
+
+## Inline Elements
+
+* `a` for links.
+* `s` for strikethrough. `del` as an alias for compatibility.
+* `code` for monospace font. Like backticks in Markdown.
+* `i` for italics. `em` as an alias for compatibility.
+* `b` for bold. `strong` as an alias for compatibility.
+* `u` for underline.
+* `mark` for highlight. It should be rendered with a changed background color, but the color itself is not specified.
+* `span` is ignored for compatibility.
+* `br` for a single line break.
+
+## Block Elements
+
+All block elements are separated from adjacent content with a blank line.
+
+* `p` for a paragraph. Visually it does nothing besides make the contents a block (and thus separated from adjacent content). `div` as an alias for compatibility.
+* `pre` for text rendered in a monospace font. Like a code block (three backticks) in Markdown. Placing the `code` element within `pre` does nothing as `pre` already monospaces the content.
+* `blockquote` for an indented and recolored quote.
+* `ol` for a numbered list.
+* `ul` for a bullet list.
+    * `li` for the elements enumerated within `ol` or `ul`. If not the immediate child of `ol` or `ul`, then it is treated like `span` and ignored.
+* `h1`, `h2`, `h3`, `h4`, `h5`, `h6` for headers as used in Markdown.
+
+## Styles
+
+This involves the following styles:
+* Code - monospace with grey background
+* Highlight - brighter background
+* Strikethrough
+* Underline
+* Bold
+* Italics
+
+## Formatters
+
+* Block
+* Header
+* Bullet
+* Number
+
+## Whitespace
+
+Currently the plan with whitespace is to
+* Leave code and pre content alone
+* Collapse all other newline-containing whitespace into a single newline
+* Collapse all non-newline-containing whitespace into a single space
+* Prepend and append two newlines to each block
+* Collapse all sequences of over two newlines into two newlines
+* Trim all newlines from the beginning and end of the document.
+
+---
+
+Reasons Semantic is dead:
+* What are we going to mark sentences as sentences, questions as questions, etc?
+* People clearly use markup for visuals, not semantic.

+ 17 - 0
implementation.md

@@ -0,0 +1,17 @@
+# Implementation Guide
+
+ActivityPub's spec isn't specific in certain areas, so this document describes common usage.
+
+## Rendering Markup
+
+`text/plain` is easy to render. Potentially you can go through and try to identify links to make them easily selectable.
+
+`text/gemini` is not used by anyone but easy to render and nice for Twitter-style content.
+
+`text/html`: I will support:
+
+From section 4.8: img, audio, video, iframe for image/*, audio/\*, video/\*, and text/html.
+From section 4.6: a
+From section 4.5: most of the elements
+From section 4.4: most of the elements
+From section 4.3: h1, h2, etc.

+ 9 - 2
kinds/post.go

@@ -7,10 +7,13 @@ import (
 	"mimicry/style"
 	"fmt"
 	"errors"
+	"mimicry/render"
 )
 
 type Post Dict
 
+// TODO: go through and remove all the trims, they
+// make things less predictable
 // TODO: make the Post references *Post because why not
 
 func (p Post) Kind() (string, error) {
@@ -25,7 +28,11 @@ func (p Post) Title() (string, error) {
 
 func (p Post) Body() (string, error) {
 	body, err := GetNatural(p, "content", "en")
-	return strings.TrimSpace(body), err
+	mediaType, err := Get[string](p, "mediaType")
+	if err != nil {
+		mediaType = "text/html"
+	}
+	return render.Render(body, mediaType)
 }
 
 func (p Post) BodyPreview() (string, error) {
@@ -96,7 +103,7 @@ func (p Post) String() (string, error) {
 	}
 
 
-	if body, err := p.BodyPreview(); err == nil {
+	if body, err := p.Body(); err == nil {
 		output += body
 		output += "\n"
 	}

+ 24 - 1
main.go

@@ -2,16 +2,39 @@ package main
 
 import (
 	"encoding/json"
-	"fmt"
 	"mimicry/kinds"
 	"os"
+	"fmt"
+	// "mimicry/style"
+	// "mimicry/render"
 )
 
+// TODO: even if only supported in few terminals,
+// consider using the proportional spacing codes when possible
+
 // TODO: when returning errors, use zero value for return
 // also change all error messages to using sprintf-style
 // formatting, all lowercase, and no punctuation
 
 func main() {
+	// fmt.Println(style.Bold("Bold") + "\tNot Bold")
+	// fmt.Println(style.Strikethrough("Strikethrough") + "\tNot Strikethrough")
+	// fmt.Println(style.Underline("Underline") + "\tNot Underline")
+	// fmt.Println(style.Italic("Italic") + "\tNot Italic")
+	// fmt.Println(style.Code("Code") + "\tNot Code")
+	// fmt.Println(style.Highlight("Highlight") + "\tNot Highlight")
+
+	// fmt.Println(style.Highlight("Stuff here " + style.Code("CODE") + " more here"))
+	// fmt.Println(style.Bold("struff " + style.Strikethrough("bad") + " more stuff"))
+
+	// fmt.Println(style.Linkify("Hello!"))
+
+	// output, err := render.Render("<p>Hello<code>hi</code> Everyone</p><i>@everyone</i> <blockquote>please<br>don't!</blockquote>", "text/html")
+	// if err != nil {
+	// 	panic(err)
+	// }
+	// fmt.Println(output)
+
 	link := os.Args[len(os.Args)-1]
 	command := os.Args[1]
 

+ 61 - 0
notes.md

@@ -0,0 +1,61 @@
+### `Object | Link`
+
+For now I will not handle `Object | Link` types and will instead just filter down to `Post`/`Actor` and give warnings for `Link`s. I don't know how `Link`s would be displayed visually as, e.g., `inReplyTo` (although I see the utility). More importantly though, I don't know how to represent it (e.g. a `Link | Post` slice) in Go.
+
+For the two `Image | Link` cases (`image` and `icon`), I will just convert `Images` to `Links` (using some algorithm to get the best URL, looking at mime types and resolutions). It maps over nicely. I'll do this in the `image`, `icon`, `attachment` scenarios, in addition to potentially others. Furthermore this logic can then be reused when I am pulling the URL from a `Video`, `Image`, etc.
+
+Thinking now, all media types (`Document`, `Audio`, etc), will have a `Link()` function that returns the link using an algorithm for finding the best mime types and resolutions, based on whether it's a `Document`, `Audio`, etc. Then when I am, e.g., looking in `image`, `icon`, or `attachment`s, I will just loop through the list, keeping `Link`s and converting posts to `Link`s via `post.Link()` to keep the slice homogenous.
+
+For the conversion it will be pulled from `name` of the parent document. So
+
+`Document.Link()`, if the `Document.url` is a `Link` use it, if it's a string, then:
+`Document.name` -> `Link.name`
+`Document.url` -> `Link.href`
+iff `content` and `summary` are absent: `Document.mediaType` -> `Link.mediaType` (Mastodon and PeerTube misuse it in this way, they use `mediaType` to refer to the `url`, not `content`)
+
+## Bugs
+
+By far the biggest flaw right now is that, if fulfilling an `id` results in an object with a re-retrieve condition (e.g. only having an `id` and `type`) the object will be re-retrieved infinitely. I need to add a flag "from source" (probably source as `nil`) to say to not re-retrieve.
+
+## Improvements
+
+Create a struct called `Fragment` that has `text string` for rendered text, `warnings []Warning` (or similar) for problems found during rendering, and `links map[rune]*url.URL` for hyperlinks that can be found within the text.
+
+I need to redesign everything with warnings. And ban `nil` from, e.g. `string` types (if possible). Thus every return type is sane (e.g. empty string, empty struct). Only thing is time and URL must be pointers so they can be nilled. The only human rule is to never return `nil` in place of an interface.
+
+## Future Plans
+
+If the CLI client works out, look into making a really nice-looking plain-HTML/CSS front-end that it can serve.
+
+## Ideas for related projects
+
+* Static ActivityPub site generator based on Dhall
+* A patch to Searx (or a better version) that serves results over ActivityPub to be read by an ActivityPub client
+* A dedicated ActivityPub search engine to solve the supposed discoverability problem inherent to decentralized systems (yet doesn't exist with the web)
+* Read-only RSS/Atom/JSON feed to ActivityPub hoster
+
+## Misuse
+
+Because ActivityPub supports `Article`s, `Note`s, `Image`s, having an account on PeerTube and PixelFed and Mastodon is pointless. I think people do this because of this mindset engrained over the past few years of wanting a big list of platforms that I am on. Other reason may be wanting to categorize your things, but clients should just be capable of filtering on `Note`s vs `Image`s to solve that problem.
+
+Another misuse is organizations or people with websites using `mastodon.social`. The entire point of federation is to not use the centralized platform, but rather to use a host that makes sense for you.
+
+## Quirks
+
+For WebFinger, accept `application/json` in addition to `application/jrd+json`. Don't specify an `Accept` header because you don't need to. Problem found on PeerTube.
+
+The thing where, if `content` is absent, `mediaType`/`name` apply to the `url`. Problem found on Mastodon, PeerTube, and PixelFed.
+
+Future recommendation: add a `nameMediaType` field that applies to `name` so it can have markup. Default is `text/plain`. To add markup, recommendation is to use `text/markdown` so that it works fine on prior clients that treat it like `text/plain`.
+
+Use both of the `Accept` headers, some sites only respond to `application/activity+json` (PixelFed).
+
+## TODO
+
+Document the reasoning for treating everything as JSON instead of JSON-LD.
+
+## Minimal HTTPS
+
+After learning HTTP it feels like HTTP1.0 and HTTP3 have good niches. HTTP1.0 is super simple, `Connection: close` by default, so it is delimited by TCP close (which is fine for JSON). It has no chunking, so that isn't a problem. One TCP connection per request. On the other hand, HTTP3 is a binary protocol. (Amazing to think that the entire Web has been run off of a text-based protocol. No wonder everything breaks all the time.) Also, HTTP3 itself seems relatively lightweight because it looks like lower-level stuff is in QUIC instead of jammed in the HTTP headers.
+
+So I think ActivityPub clients should support HTTP1.0 for simple Gemini-style use-cases and hacking, and HTTP3 for more professional use-cases.

+ 203 - 4
render/html.go

@@ -1,9 +1,208 @@
 package render
 
 import (
-	"net/html"
+	"golang.org/x/net/html"
+	"fmt"
+	"mimicry/style"
+	"errors"
+	"strings"
+	"regexp"
+	"golang.org/x/net/html/atom"
 )
 
-func renderHTML(node *html.Node) (string, error) {
-	
-}
+// preprocessing:
+// substitute escape key for escape visualizer
+
+// newline strategy:
+// blocks have 2 newlines before and two after,
+// at the end collapse 4 newlines into 2
+// maybe instead collapse any amount greater
+// (regex: \n{2,}) than 2 down to 2
+// also at the end trim all newlines from
+// very beginning and very end
+
+// for block links probably use ‣
+
+// I think it may work to collapse all
+// text node whitespace down to space,
+// and then trim the contents of blocks
+// (including the implicit body element)
+
+// FIXME: instead, you should collapse all whitespace into
+// space (including newline, so format newlines don't appear), then do newline insertion for blocks
+// then collapse all single-newline-containing whitespace into
+// one newline and multi-newline-containing whitespace into two
+// newlines
+
+// I will have this issue: https://unix.stackexchange.com/questions/170551/force-lynx-or-elinks-to-interpret-spaces-and-line-breaks
+// i.e. 3 or more br's in a row become idempotent, but I don't care
+
+func renderHTML(markup string) (string, error) {
+	/* 	Preprocessing
+		To prevent input text from changing its color, style, etc
+		via terminal escape codes, swap out U+001B (ESCAPE) for
+		U+241B (SYMBOL FOR ESCAPE)
+
+		TODO: move this to the complete beginning of render, not
+		just the HTML section
+	*/
+	markup = strings.ReplaceAll(markup, "\u001b", "␛")
+
+
+	nodes, err := html.ParseFragment(strings.NewReader(markup), &html.Node{
+		Type: html.ElementNode,
+		Data: "body",
+		DataAtom: atom.Body,
+	})
+	if err != nil {
+		return "", err
+	}
+	serialized, err := SerializeList(nodes)
+	if err != nil {
+		return "", err
+	}
+
+	/*
+		Postprocessing
+		Block elements are separated from siblings by prepending
+		and appending two newline characters. If two blocks are
+		adjacent, this will result in too many newline characters.
+		Furthermore, in text nodes, newline-containing whitespace
+		is collapsed into a single newline, potentially resulting
+		in even more newlines. So collapse sequences of over two
+		newlines into two newlines. Also trim all newlines from
+		the beginning and end of the output.
+	*/
+	manyNewlines := regexp.MustCompile(`\n{2,}`)
+	serialized = manyNewlines.ReplaceAllString(serialized, "\n\n")
+	serialized = strings.Trim(serialized, "\n")
+	return serialized, nil
+}
+
+func renderNode(node *html.Node, preserveWhitespace bool) (string, error) {
+	if node.Type == html.TextNode {
+		if !preserveWhitespace {
+			whitespace := regexp.MustCompile(`[\t ]+`)
+			newline := regexp.MustCompile(`[\n\t ]*\n[n\t ]*`)
+			processed := newline.ReplaceAllString(node.Data, "\n")
+			processed = whitespace.ReplaceAllString(processed, " ")
+			return processed, nil
+		}
+		return node.Data, nil
+	}
+
+	if node.Type != html.ElementNode {
+		return "", nil
+	}
+
+	// this may need to be moved down into the switch
+	// so that pre and code can override the last parameter
+	content := serializeChildren(node, preserveWhitespace)
+
+	switch node.Data {
+	case "a":
+		return style.Linkify(content), nil
+	case "s", "del":
+		return style.Strikethrough(content), nil
+	case "code":
+		return style.Code(content), nil
+	case "i", "em":
+		return style.Italic(content), nil
+	case "b", "strong":
+		return style.Bold(content), nil
+	case "u":
+		return style.Underline(content), nil
+	case "mark":
+		return style.Highlight(content), nil
+	case "span", "li":
+		return content, nil
+	case "br":
+		return "\n", nil
+
+	case "p", "div":
+		return block(content), nil
+	case "pre":
+		return block(style.CodeBlock(content)), nil
+	case "blockquote":
+		// FIXME: change blockquote to style.QuoteBlock
+		return block(blockquote(content)), nil
+	case "ul":
+		return block(bulletedList(node, preserveWhitespace)), nil
+	// case "ul":
+	// 	return numberedList(node), nil
+	}
+
+	return "", errors.New("Encountered unrecognized element " + node.Data)
+}
+
+func serializeChildren(node *html.Node, preserveWhitespace bool) (string) {
+	output := ""
+	for current := node.FirstChild; current != nil; current = current.NextSibling {
+		result, _ := renderNode(current, preserveWhitespace)
+		// if err != nil {
+		// 	return "", err
+		// }
+		output += result
+	}
+	return output
+}
+
+func SerializeList(nodes []*html.Node) (string, error) {
+	output := ""
+	for _, current := range nodes {
+		result, err := renderNode(current, false)
+		if err != nil {
+			return "", err
+		}
+		output += result
+	}
+	return output, nil
+}
+
+func block(text string) string {
+	return fmt.Sprintf("\n\n%s\n\n", text)
+}
+
+func blockquote(text string) string {
+	withBar := fmt.Sprintf("▌%s", strings.ReplaceAll(text, "\n", "\n▌"))
+	withColor := style.Color(withBar)
+	return withColor
+}
+
+func bulletedList(node *html.Node, preserveWhitespace bool) string {
+	output := ""
+	for current := node.FirstChild; current != nil; current = current.NextSibling {
+		if current.Type != html.ElementNode {
+			continue
+		}
+
+		if current.Data != "li" {
+			continue
+		}
+
+		result, _ := renderNode(current, preserveWhitespace)
+		output += fmt.Sprintf("• %s", strings.ReplaceAll(result, "\n", "\n  "))
+	}
+	return output
+}
+
+// could count them and use that to determine
+// indentation, but that is way premature
+// func numberedList(node *html.Node) string {
+// 	output += ""
+// 	i uint := 1
+// 	for current := node.FirstChild; current != nil; current = current.NextSibling {
+// 		if node.Type != html.ElementNode {
+// 			continue
+// 		}
+
+// 		if node.Data != "li" {
+// 			continue
+// 		}
+
+// 		fmt.Sprintf("%d. ")
+// 		output += strings.ReplaceAll(renderNode(node), "\n", "\n  ")
+// 		i += 1
+// 	}
+// 	return output
+// }

+ 8 - 6
render/render.go

@@ -3,18 +3,20 @@ package render
 import (
 	"strings"
 	"errors"
+	"fmt"
 )
 
+// Just use body as content because that only permits flow content
+// https://stackoverflow.com/questions/15081119/any-way-to-use-html-parse-without-it-adding-nodes-to-make-a-well-formed-tree
+
 func Render(text string, mediaType string) (string, error) {
+	fmt.Println("started render")
 	switch {
 	case strings.Contains(mediaType, "text/plain"): 
 		return text, nil
 	case strings.Contains(mediaType, "text/html"):
-		node, err := html.Parse(text)
-		if err == nil {
-			return "", err
-		}
-		return renderHTML(node), nil
+		return renderHTML(text)
 	default:
-		return "", errors.New("Cannot render text of mime type %s", mediaType)
+		return "", errors.New("Cannot render text of mime type " + mediaType)
+	}
 }

+ 61 - 3
style/style.go

@@ -2,8 +2,13 @@ package style
 
 import (
 	"fmt"
+	"strings"
 )
 
+// TODO: at some point I need to sanitize preexisting escape codes
+// in input, to do so replace the escape character with visual
+// escape character
+
 /*
 	To, e.g., bold, prepend the bold character,
 	then substitute all resets with `${reset}${bold}`
@@ -15,12 +20,65 @@ import (
 // 	Bold = 
 // )
 
-func Display(text string, code int) string {
-	return fmt.Sprintf("\x1b[%dm%s\x1b[0m", code, text)
+func Background(text string, r uint8, g uint8, b uint8) string {
+	setter := fmt.Sprintf("\x1b[48;2;%d;%d;%dm", r, g, b)
+	resetter := "\x1b[49m"
+	text = strings.ReplaceAll(text, resetter, setter)
+	return fmt.Sprintf("%s%s%s", setter, text, resetter)
+}
+
+func ExtendBackground(text string) string {
+	return strings.ReplaceAll(text, "\n", "\x1b[K\n")
+}
+
+func Foreground(text string, r uint8, g uint8, b uint8) string {
+	setter := fmt.Sprintf("\x1b[38;2;%d;%d;%dm", r, g, b)
+	resetter := "\x1b[39m"
+	newText := strings.ReplaceAll(text, resetter, setter)
+	return fmt.Sprintf("%s%s%s", setter, newText, resetter)
+}
+
+func Display(text string, prependCode int, appendCode int) string {
+	return fmt.Sprintf("\x1b[%dm%s\x1b[%dm", prependCode, text, appendCode)
 }
 
+// 21 doesn't work (does double underline)
+// 22 removes bold and faint, faint is never used
+// so it does the job
 func Bold(text string) string {
-	return Display(text, 1)
+	return Display(text, 1, 22)
+}
+
+func Strikethrough(text string) string {
+	return Display(text, 9, 29)
+}
+
+func Underline(text string) string {
+	return Display(text, 4, 24)
+}
+
+func Italic(text string) string {
+	return Display(text, 3, 23)
+}
+
+func Code(text string) string {
+	return Background(text, 75, 75, 75)
+}
+
+func CodeBlock(text string) string {
+	return ExtendBackground(Code(text))
+}
+
+func Highlight(text string) string {
+	return Background(text, 13, 125, 0)
+}
+
+func Color(text string) string {
+	return Foreground(text, 164, 245, 155)
+}
+
+func Linkify(text string) string {
+	return Underline(Color(text))
 }
 
 // func Underline(text string) string {