فهرست منبع

Implement optimizations including parallelism, caching, duplicate request suppression, and prefetching

Benton Edmondson 1 سال پیش
والد
کامیت
248541d7a6
13فایلهای تغییر یافته به همراه162 افزوده شده و 108 حذف شده
  1. 41 13
      client/client.go
  2. 4 4
      go.mod
  3. 4 0
      go.sum
  4. 1 1
      hypertext/hypertext.go
  5. 21 1
      jtp/jtp.go
  6. 1 3
      main.go
  7. 6 3
      pub/activity.go
  8. 1 1
      pub/actor.go
  9. 22 19
      pub/collection.go
  10. 16 39
      pub/common.go
  11. 14 14
      pub/post.go
  12. 1 2
      style/style.go
  13. 30 8
      ui/ui.go

+ 41 - 13
client/client.go

@@ -9,6 +9,7 @@ import (
 	"encoding/json"
 	"mimicry/object"
 	"fmt"
+	"golang.org/x/sync/singleflight"
 )
 
 const MAX_REDIRECTS = 20
@@ -53,18 +54,42 @@ func FetchUnknown(input any, source *url.URL) (object.Object, *url.URL, error) {
 	return obj, id, err
 }
 
-func FetchURL(link *url.URL) (object.Object, *url.URL, error) {
-	return jtp.Get(
-			link,
-			`application/activity+json,` +
-			`application/ld+json; profile="https://www.w3.org/ns/activitystreams"`,
-			[]string{
-				"application/activity+json",
-				"application/ld+json",
-				"application/json",
-			},
-			MAX_REDIRECTS,
-		)
+var group singleflight.Group
+type bundle struct {
+	item map[string]any
+	source *url.URL
+	err error
+}
+
+/* A map of mutexes is used to ensure no two requests are made simultaneously.
+   Instead, the subsequent ones will wait for the first one to finish (and will
+   then naturally find its result in the cache) */
+
+func FetchURL(uri *url.URL) (object.Object, *url.URL, error) {
+	uriString := uri.String()
+	b, _, _ := group.Do(uriString, func() (any, error) {
+		json, source, err := 
+			jtp.Get(
+				uri,
+				`application/activity+json,` +
+				`application/ld+json; profile="https://www.w3.org/ns/activitystreams"`,
+				[]string{
+					"application/activity+json",
+					"application/ld+json",
+					"application/json",
+				},
+				MAX_REDIRECTS,
+			)
+		return bundle {
+			item: json,
+			source: source,
+			err: err,
+		}, nil
+	})
+	/* By this point the result has been cached in the LRU cache,
+	   so it can be dropped from the singleflight cache */
+	group.Forget(uriString)
+	return b.(bundle).item, b.(bundle).source, b.(bundle).err
 }
 
 /*
@@ -128,7 +153,10 @@ func ResolveWebfinger(username string) (string, error) {
 			} else if err != nil {
 				return "", err
 			}
-			if !mediaType.Matches([]string{"application/activity+json"}) {
+			if !mediaType.Matches([]string{
+				"application/activity+json",
+				"application/ld+json",
+			}) {
 				continue
 			}
 			href, err := o.GetString("href")

+ 4 - 4
go.mod

@@ -3,12 +3,12 @@ module mimicry
 go 1.20
 
 require (
+	github.com/hashicorp/golang-lru/v2 v2.0.2
 	github.com/yuin/goldmark v1.5.4
+	golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea
 	golang.org/x/net v0.8.0
+	golang.org/x/sync v0.2.0
 	golang.org/x/term v0.6.0
 )
 
-require (
-	golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea // indirect
-	golang.org/x/sys v0.6.0 // indirect
-)
+require golang.org/x/sys v0.6.0 // indirect

+ 4 - 0
go.sum

@@ -1,9 +1,13 @@
+github.com/hashicorp/golang-lru/v2 v2.0.2 h1:Dwmkdr5Nc/oBiXgJS3CDHNhJtIHkuZ3DZF5twqnfBdU=
+github.com/hashicorp/golang-lru/v2 v2.0.2/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/yuin/goldmark v1.5.4 h1:2uY/xC0roWy8IBEGLgB1ywIoEJFGmRrX21YQcvGZzjU=
 github.com/yuin/goldmark v1.5.4/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea h1:vLCWI/yYrdEHyN2JzIzPO3aaQJHQdp89IZBA/+azVC4=
 golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w=
 golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
 golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
+golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI=
+golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.6.0 h1:clScbb1cHjoCkyRbWwBEUZ5H/tIFu5TAXIqaZD0Gcjw=

+ 1 - 1
hypertext/hypertext.go

@@ -119,7 +119,7 @@ func renderNode(node *html.Node, width int, preserveWhitespace bool) (string, er
 		return style.Underline(content), nil
 	case "mark":
 		return style.Highlight(content), nil
-	case "span", "li":
+	case "span", "li", "small":
 		return content, nil
 	case "br":
 		return "\n", nil

+ 21 - 1
jtp/jtp.go

@@ -10,12 +10,20 @@ import (
 	"fmt"
 	"strings"
 	"encoding/json"
+	lru "github.com/hashicorp/golang-lru/v2"
 )
 
 var dialer = &tls.Dialer{
 	NetDialer: &net.Dialer{},
 }
 
+type bundle struct {
+	item map[string]any
+	source *url.URL
+	err error
+}
+var cache, _ = lru.New[string, bundle](128)
+
 var mediaTypeRegexp = regexp.MustCompile(`(?s)^(([!#$%&'*+\-.^_\x60|~a-zA-Z0-9]+)/([!#$%&'*+\-.^_\x60|~a-zA-Z0-9]+)).*$`)
 var statusLineRegexp = regexp.MustCompile(`^HTTP/1\.[0-9] ([0-9]{3}).*\n$`)
 var contentTypeRegexp = regexp.MustCompile(`^(?i:content-type):[ \t\r]*(.*?)[ \t\r]*\n$`)
@@ -34,6 +42,10 @@ var locationRegexp = regexp.MustCompile(`^(?i:location):[ \t\r]*(.*?)[ \t\r]*\n$
 		the maximum number of redirects to take
 */
 func Get(link *url.URL, accept string, tolerated []string, maxRedirects uint) (map[string]any, *url.URL, error) {
+	if cached, ok := cache.Get(link.String()); ok {
+		return cached.item, cached.source, cached.err
+	}
+
 	if link.Scheme != "https" {
 		return nil, nil, errors.New(link.Scheme + " is not supported in requests, only https")
 	}
@@ -91,7 +103,10 @@ func Get(link *url.URL, accept string, tolerated []string, maxRedirects uint) (m
 		if err := connection.Close(); err != nil {
 			return nil, nil, err
 		}
-		return Get(location, accept, tolerated, maxRedirects - 1)
+		var b bundle
+		b.item, b.source, b.err = Get(location, accept, tolerated, maxRedirects - 1)
+		cache.Add(link.String(), b)
+		return b.item, b.source, b.err
 	}
 
 	if status != "200" && status != "201" && status != "202" && status != "203" {
@@ -119,6 +134,11 @@ func Get(link *url.URL, accept string, tolerated []string, maxRedirects uint) (m
 		return nil, nil, err
 	}
 
+	cache.Add(link.String(), bundle {
+		item: dictionary,
+		source: link,
+		err: nil,
+	})
 	return dictionary, link, nil
 }
 

+ 1 - 3
main.go

@@ -6,7 +6,6 @@ import (
 	"golang.org/x/term"
 	"strings"
 	"mimicry/ui"
-	"log"
 )
 
 // TODO: clean up most panics
@@ -21,7 +20,6 @@ func main() {
 	width, heightInt, err := term.GetSize(int(os.Stdin.Fd()))
 	if err != nil { panic(err) }
 	height := uint(heightInt)
-	log.Printf("h, w: %v, %v", height, width)
 	printRaw("")
 
 	state := ui.Start(os.Args[1])
@@ -44,6 +42,6 @@ func main() {
 
 func printRaw(output string) {
 	output = strings.ReplaceAll(output, "\n", "\r\n")
-	_, err := fmt.Print("\x1b[2J\x1b[0;0H" + output)
+	_, err := fmt.Print("\x1b[0;0H\x1b[2J" + output)
 	if err != nil { panic(err) }
 }

+ 6 - 3
pub/activity.go

@@ -8,6 +8,7 @@ import (
 	"golang.org/x/exp/slices"
 	"mimicry/ansi"
 	"mimicry/style"
+	"sync"
 )
 
 type Activity struct {
@@ -38,9 +39,11 @@ func NewActivityFromObject(o object.Object, id *url.URL) (*Activity, error) {
 		return nil, fmt.Errorf("%w: %s is not an Activity", ErrWrongType, a.kind)
 	}
 
-	// TODO: parallelize
-	a.actor, a.actorErr = getActor(o, "actor", a.id)
-	a.target = getPostOrActor(o, "object", a.id)
+	var wg sync.WaitGroup
+	wg.Add(2)
+	go func () {a.actor, a.actorErr = getActor(o, "actor", a.id); wg.Done()}()
+	go func() {a.target = getPostOrActor(o, "object", a.id); wg.Done()}()
+	wg.Wait()
 
 	return a, nil
 }

+ 1 - 1
pub/actor.go

@@ -62,9 +62,9 @@ func NewActorFromObject(o object.Object, id *url.URL) (*Actor, error) {
 	a.mediaType, a.mediaTypeErr = o.GetMediaType("mediaType")
 	a.joined, a.joinedErr = o.GetTime("published")
 
-	// TODO: parallelize
 	a.pfp, a.pfpErr = getBestLink(o, "icon", "image")
 	a.banner, a.bannerErr = getBestLink(o, "image", "image")
+	
 	a.posts, a.postsErr = getCollection(o, "outbox", a.id)
 	return a, nil
 }

+ 22 - 19
pub/collection.go

@@ -7,7 +7,7 @@ import (
 	"mimicry/client"
 	"fmt"
 	"golang.org/x/exp/slices"
-	"log"
+	"sync"
 )
 
 /*
@@ -78,10 +78,6 @@ func (c *Collection) Size() (uint64, error) {
 }
 
 func (c *Collection) Harvest(amount uint, startingPoint uint) ([]Tangible, Container, uint) {
-	// To work through this problem you need to go through this step by step and
-	// make sure the logic is good. Then you should probably start writing some tests
-	
-	log.Printf("amount: %d starting: %d", amount, startingPoint)
 	if c.elementsErr != nil && !errors.Is(c.elementsErr, object.ErrKeyNotPresent) {
 		return []Tangible{NewFailure(c.elementsErr)}, nil, 0
 	}
@@ -92,7 +88,6 @@ func (c *Collection) Harvest(amount uint, startingPoint uint) ([]Tangible, Conta
 	} else {
 		length = uint(len(c.elements))
 	}
-	log.Printf("length: %d", length)
 
 	// TODO: change to bool nextWillBeFetched in which case amount from this page is all
 	// and later on the variable is clear
@@ -106,29 +101,37 @@ func (c *Collection) Harvest(amount uint, startingPoint uint) ([]Tangible, Conta
 		amountFromThisPage = length - startingPoint
 	}
 
-	log.Printf("amount from this page: %d", amountFromThisPage)
 	fromThisPage := make([]Tangible, amountFromThisPage)
 	var fromLaterPages []Tangible
 	var nextCollection Container
 	var nextStartingPoint uint
 
-	// TODO: parallelize this
-
+	var wg sync.WaitGroup
 	for i := uint(0); i < amountFromThisPage; i++ {
-		fromThisPage[i] = NewTangible(c.elements[i+startingPoint], c.id)
+		i := i
+		wg.Add(1)
+		go func() {
+			fromThisPage[i] = NewTangible(c.elements[i+startingPoint], c.id)
+			wg.Done()
+		}()
 	}
 
-	if errors.Is(c.nextErr, object.ErrKeyNotPresent) || length > amount + startingPoint {
-		fromLaterPages, nextCollection, nextStartingPoint = []Tangible{}, c, amount + startingPoint
-	} else {
-		if c.nextErr != nil {
-			fromLaterPages, nextCollection, nextStartingPoint = []Tangible{NewFailure(c.nextErr)}, c, amount + startingPoint
-		} else if next, err := NewCollection(c.next, c.id); err != nil {
-			fromLaterPages, nextCollection, nextStartingPoint = []Tangible{NewFailure(err)}, c, amount + startingPoint
+	wg.Add(1)
+	go func() {
+		if errors.Is(c.nextErr, object.ErrKeyNotPresent) || length > amount + startingPoint {
+			fromLaterPages, nextCollection, nextStartingPoint = []Tangible{}, c, amount + startingPoint
 		} else {
-			fromLaterPages, nextCollection, nextStartingPoint = next.Harvest(amount - amountFromThisPage, 0)
+			if c.nextErr != nil {
+				fromLaterPages, nextCollection, nextStartingPoint = []Tangible{NewFailure(c.nextErr)}, c, amount + startingPoint
+			} else if next, err := NewCollection(c.next, c.id); err != nil {
+				fromLaterPages, nextCollection, nextStartingPoint = []Tangible{NewFailure(err)}, c, amount + startingPoint
+			} else {
+				fromLaterPages, nextCollection, nextStartingPoint = next.Harvest(amount - amountFromThisPage, 0)
+			}
 		}
-	}
+		wg.Done()
+	}()
+	wg.Wait()
 
 	return append(fromThisPage, fromLaterPages...), nextCollection, nextStartingPoint
 }

+ 16 - 39
pub/common.go

@@ -6,6 +6,7 @@ import (
 	"errors"
 	"net/url"
 	"mimicry/client"
+	"sync"
 )
 
 var (
@@ -16,26 +17,6 @@ const (
 	timeFormat = "3:04 pm on 2 Jan 2006"
 )
 
-/*
-	This implements functions common to the different types.
-	- getActors
-	- getCollection
-	- getActor
-	- getPostOrActor
-	- NewTangible
-
-	// these will return an error on any problem
-	- getBestLink, link impl will need the link, Rating(), mediatype, and be willing to take in Posts or Links
-	- getFirstLinkShorthand
-	- getBestLinkShorthand
-
-	// used exclusively for attachments, honestly I
-	// think it should probably return markup.
-	// probably should actually be a function within 
-	// Post
-	- getLinks
-*/
-
 type TangibleWithName interface {
 	Tangible
 	Name() string
@@ -48,26 +29,22 @@ func getActors(o object.Object, key string, source *url.URL) []TangibleWithName
 		return []TangibleWithName{NewFailure(err)}
 	}
 
-	// TODO: parallelize will probably require making fixed size
-	// full width, swapping publics for nils, then later filtering
-	// out the nils to reach a dynamic width
-	output := []TangibleWithName{}
-	for _, element := range list {
-		if narrowed, ok := element.(string); ok {
-			if narrowed == "https://www.w3.org/ns/activitystreams#Public" ||
-			narrowed == "as:Public" ||
-			narrowed == "Public" {
-			continue
-		}
-		}
-
-		fetched, err := NewActor(element, source)
-		if err != nil {
-			output = append(output, NewFailure(err))
-		} else {
-			output = append(output, fetched)
-		}
+	output := make([]TangibleWithName, len(list))
+	var wg sync.WaitGroup
+	for i := range list {
+		wg.Add(1)
+		i := i
+		go func() {
+			fetched, err := NewActor(list[i], source)
+			if err != nil {
+				output[i] = NewFailure(err)
+			} else {
+				output[i] = fetched
+			}
+			wg.Done()
+		}()
 	}
+	wg.Wait()
 	return output
 }
 

+ 14 - 14
pub/post.go

@@ -13,6 +13,7 @@ import (
 	"golang.org/x/exp/slices"
 	"mimicry/mime"
 	"mimicry/render"
+	"sync"
 )
 
 type Post struct {
@@ -79,20 +80,19 @@ func NewPostFromObject(o object.Object, id *url.URL) (*Post, error) {
 		p.link, p.linkErr = getFirstLinkShorthand(o, "url")
 	}
 
-	// TODO: perhaps the actor fraud check should occur right here--if
-	// all fail, the entire constructor fails? Probably not, what if
-	// one fails because of the protocol, another fails because of fraud
-	// check, I probably want to show the whole thing
-	p.creators = getActors(o, "attributedTo", p.id)
-	p.recipients = getActors(o, "audience", p.id)
-	p.attachments, p.attachmentsErr = getLinks(o, "attachment")
-
-	// TODO: in the future, I may want to pass an assertion to the collection
-	// asserting that the posts therein do reply to this post
-	p.comments, p.commentsErr = getCollection(o, "replies", p.id)
-	if errors.Is(p.commentsErr, object.ErrKeyNotPresent) {
-		p.comments, p.commentsErr = getCollection(o, "comments", p.id)
-	}
+	var wg sync.WaitGroup
+	wg.Add(4)
+	go func() {p.creators = getActors(o, "attributedTo", p.id); wg.Done()}()
+	go func() {p.recipients = getActors(o, "audience", p.id); wg.Done()}()
+	go func() {p.attachments, p.attachmentsErr = getLinks(o, "attachment"); wg.Done()}()
+	go func() {
+		p.comments, p.commentsErr = getCollection(o, "replies", p.id)
+		if errors.Is(p.commentsErr, object.ErrKeyNotPresent) {
+			p.comments, p.commentsErr = getCollection(o, "comments", p.id)
+		}
+		wg.Done()
+	}()
+	wg.Wait()
 	return p, nil
 }
 

+ 1 - 2
style/style.go

@@ -62,8 +62,7 @@ func QuoteBlock(text string) string {
 }
 
 func LinkBlock(text string) string {
-	indented := ansi.Indent(text, "  ", false)
-	return "‣ " + Link(indented)
+	return "‣ " + ansi.Indent(Link(text), "  ", false)
 }
 
 func Header(text string, level uint) string {

+ 30 - 8
ui/ui.go

@@ -5,7 +5,7 @@ import (
 	"mimicry/ansi"
 	"mimicry/feed"
 	"fmt"
-	"log"
+	"sync"
 )
 
 type State struct {
@@ -33,7 +33,6 @@ func (s *State) View(width int, height uint) string {
 		var serialized string
 		if i == 0 {
 			serialized = s.feed.Get(i).String(width - 4)
-			log.Printf("%d\n", len(serialized))
 		} else if i > 0 {
 			serialized = "╰ " + ansi.Indent(s.feed.Get(i).Preview(width - 4), "  ", false)
 		} else {
@@ -49,7 +48,6 @@ func (s *State) View(width int, height uint) string {
 			bottom += ansi.Indent("│\n" + serialized, "  ", true)
 		}
 	}
-	log.Printf("%s\n", center)
 	return ansi.CenterVertically(top, center, bottom, height)
 }
 
@@ -66,6 +64,9 @@ func (s *State) Update(input byte) {
 
 		if s.feed.Contains(s.index - 1) {
 			s.index -= 1
+
+			/* Preload more into the HTTP cache */
+			s.PreloadUp(s.context)
 		}
 	case 'j': // down
 		mayNeedLoading := s.index + 1 + s.context
@@ -79,6 +80,9 @@ func (s *State) Update(input byte) {
 
 		if s.feed.Contains(s.index + 1) {
 			s.index += 1
+
+			/* Preload more into the HTTP cache */
+			s.PreloadDown(s.context)
 		}
 	}
 	// TODO: the catchall down here will be to look at s.feed.Get(s.index).References()
@@ -89,26 +93,44 @@ func (s *State) SwitchTo(item pub.Any)  {
 	switch narrowed := item.(type) {
 	case pub.Tangible:
 		s.feed = feed.Create(narrowed)
-		s.feed.Prepend(narrowed.Parents(uint(s.context)))
-		var children []pub.Tangible
-		children, s.page, s.basepoint = narrowed.Children(uint(s.context))
+		var parents, children []pub.Tangible
+		var wg sync.WaitGroup
+		wg.Add(2)
+		go func() {parents = narrowed.Parents(uint(s.context)); wg.Done()}()
+		go func() {children, s.page, s.basepoint = narrowed.Children(uint(s.context)); wg.Done()}()
+		wg.Wait()
+		s.feed.Prepend(parents)
 		s.feed.Append(children)
+		s.PreloadUp(s.context)
+		s.PreloadDown(s.context)
 	case pub.Container:
 		var children []pub.Tangible
 		children, s.page, s.basepoint = narrowed.Harvest(uint(s.context), 0)
 		s.feed = feed.CreateAndAppend(children)
+		s.PreloadDown(s.context)
 	default:
 		panic(fmt.Sprintf("unrecognized non-Tangible non-Container: %T", item))
 	}
 }
 
+func (s *State) PreloadDown(amount int) {
+	if s.page != nil {
+		go s.page.Harvest(uint(amount), s.basepoint)
+	}
+} 
+
+func (s *State) PreloadUp(amount int) {
+	if s.feed.Contains(s.index - s.context) {
+		go s.feed.Get(s.index - s.context).Parents(uint(amount))
+	}
+}
+
 func Start(input string) *State {
 	item := pub.FetchUserInput(input)
-	log.Printf("%v\n", item)
 	s := &State{
 		feed: &feed.Feed{},
 		index: 0,
-		context: 3,
+		context: 5,
 	}
 	s.SwitchTo(item)
 	return s