Browse Source

link extraction seems to be working

Benton Edmondson 2 years ago
parent
commit
2911ff45a8
5 changed files with 45 additions and 107 deletions
  1. 4 2
      kinds/construct.go
  2. 6 3
      kinds/extractor.go
  3. 14 97
      kinds/link.go
  4. 17 1
      kinds/post.go
  5. 4 4
      render/render.go

+ 4 - 2
kinds/construct.go

@@ -30,8 +30,10 @@ func Construct(unstructured Dict, source *url.URL) (Content, error) {
 	// if the JSON came from a source (e.g. inline in another collection), with a
 	// different hostname than its ID, refetch
 	// if the JSON only has two keys (type and id), refetch
-	if (source != nil && source.Hostname() != id.Hostname()) || (len(unstructured) <= 2 && hasIdentifier) {
-		return Fetch(id)
+	if (source != nil && id != nil) {
+		if (source.Hostname() != id.Hostname()) || (len(unstructured) <= 2 && hasIdentifier) {
+			return Fetch(id)
+		}
 	}
 
 	switch kind {

+ 6 - 3
kinds/extractor.go

@@ -136,6 +136,9 @@ func GetList(d Dict, key string) ([]any, error) {
 
 	Used exclusively for `Post.url`.
 */
+// TODO: for simplicity, make this a method of Post,
+// it is easier to conceptualize when it works only on
+// Posts, plus I can use my other post methods
 func GetLinks(d Dict, key string) ([]Link, error) {
 	values, err := GetList(d, "url")
 	if err != nil {
@@ -155,9 +158,9 @@ func GetLinks(d Dict, key string) ([]Link, error) {
 		defaultMediaType = nil
 	} else { defaultMediaType = mediaType }
 	var defaultName any // (string | nil)
-	if name, nameErr := Get[string](d, "name"); nameErr != nil {
-		defaultName = name
-	} else { defaultName = nil }
+	if name, nameErr := GetNatural(d, "name", "en"); nameErr != nil {
+		defaultName = nil
+	} else { defaultName = name }
 
 	for _, el := range values {
 		switch narrowed := el.(type) {

+ 14 - 97
kinds/link.go

@@ -2,6 +2,7 @@ package kinds
 
 import (
 	"net/url"
+	"strings"
 )
 
 type Link Dict
@@ -24,7 +25,8 @@ func (l Link) URL() (*url.URL, error) {
 }
 
 func (l Link) Alt() (string, error) {
-	return Get[string](l, "name")
+	alt, err := Get[string](l, "name")
+	return strings.TrimSpace(alt), err
 }
 
 func (l Link) Identifier() (*url.URL, error) {
@@ -33,102 +35,17 @@ func (l Link) Identifier() (*url.URL, error) {
 
 // TODO: update of course to be nice markup of some sort
 func (l Link) String() (string, error) {
-	if url, err := l.URL(); err == nil {
-		return url.String(), nil
-	} else {
-		return "", err
-	}
-}
-
-// guide:
-// Audio, Image, Video
-// filter for ones with audio/, image/, video/
-// as mime type, tiebreaker is resolution
-// otherwise just take what you can get
-// Article, Note, Page, Document
-// probably honestly just take the first one
-
-// probably provide the priorities as lists
-// then write a function that looks up the list
-
-// var priorities = map[string][]string{
-// 	"image": []string{""}
-// }
-
-// given a Post, find the best link
-// func GetLink(p Post) (Link, error) {
-// 	kind, err := p.Kind()
-// 	if err != nil {
-// 		return nil, err
-// 	}
-// 	switch kind {
-// 	// case "audio":
-// 	// 	fallthrough
-// 	// case "image":
-// 	// 	fallthrough
-// 	// case "video":
-// 	// 	return GetBestLink(p)
-// 	case "article":
-// 		fallthrough
-// 	case "document":
-// 		fallthrough
-// 	case "note":
-// 		fallthrough
-// 	case "page":
-// 		return GetFirstLink(p)
-// 	default:
-// 		return nil, errors.New("Link extraction is not supported for type " + kind)
-// 	}
-// }
-
-// pulls the link with the mime type that
-// matches the Kind of the post, used for
-// image, audio, video
-
-// the reason this can't use GetContent is because GetContent
-// treats strings as URLs used to find the end object,
-// whereas in this context strings are URLs that are the href
-// being the endpoint the Link represents
-// func GetBestLink(p Post) (Link, error) {
-
-// }
-
-// pulls the first link
-// func GetFirstLink(p Post) (Link, error) {
-// 	values, err := GetList(p, "url")
-// 	if err != nil {
-// 		return nil, err
-// 	}
-	
-// 	var individual any
-
-// 	if len(values) == 0 {
-// 		return nil, errors.New("Link is an empty list on the post")
-// 	} else {
-// 		individual = values[0]
-// 	}
-
-// 	switch narrowed := individual.(type) {
-// 	case string:
-// 		// here I should build the link out of the outer object
-// 		return Link{"type": "Link", "href": narrowed}, nil
-// 	case Dict:
-// 		return Construct(narrowed)
-// 	default:
-// 		return nil, errors.New("The first URL entry on the post is a non-string, non-object. What?")
-// 	}
-
-// }
-
-//
-// GetLinks(p Post)
-// similar to GetContent, but treats strings
-// as Link.href, not as a reference to an object
-// that should be fulfilled
-// so whereas GetContent uses networking, GetLink
-// does not
+	output := ""
 
+	if alt, err := l.Alt(); err == nil {
+		output += alt
+	} else if url, err := l.URL(); err == nil {
+		output += url.String()
+	}
 
-// GetBestLink - uses mime types/resolutions to determine best link
-// of a list of Links
+	if mediaType, err := l.MediaType(); err == nil {
+		output += " (" + mediaType + ")"
+	}
 
+	return output, nil
+}

+ 17 - 1
kinds/post.go

@@ -30,7 +30,8 @@ func (p Post) Body() (string, error) {
 
 func (p Post) BodyPreview() (string, error) {
 	body, err := p.Body()
-	if len(body) > 280*2 { // pretty much arbitrary length >280
+	// probably should convert to runes and just work with that
+	if len(body) > 280*2 { // this is a bug because len counts bytes whereas later I work based on runes
 		return fmt.Sprintf("%s…", string([]rune(body)[:280])), err
 	} else {
 		return body, err
@@ -57,6 +58,10 @@ func (p Post) Creators() ([]Actor, error) {
 	return GetContent[Actor](p, "attributedTo")
 }
 
+func (p Post) Attachments() ([]Link, error) {
+	return GetAsLinks(p, "attachment")
+}
+
 // func (p Post) bestLink() (Link, error) {
 
 // }
@@ -116,5 +121,16 @@ func (p Post) String() (string, error) {
 		}
 	}
 
+	if attachments, err := p.Attachments(); err == nil {
+		output += "\nAttachments:\n"
+		for _, attachment := range attachments {
+			if attachmentStr, err := attachment.String(); err == nil {
+				output += attachmentStr + "\n"
+			} else {
+				continue
+			}
+		}
+	}
+
 	return strings.TrimSpace(output), nil
 }

+ 4 - 4
render/render.go

@@ -5,16 +5,16 @@ import (
 	"errors"
 )
 
-func Render(text string, kind string) (string, error) {
+func Render(text string, mediaType string) (string, error) {
 	switch {
-	case strings.Contains(kind, "text/plain"): 
+	case strings.Contains(mediaType, "text/plain"): 
 		return text, nil
-	case strings.Contains(kind, "text/html"):
+	case strings.Contains(mediaType, "text/html"):
 		node, err := html.Parse(text)
 		if err == nil {
 			return "", err
 		}
 		return renderHTML(node), nil
 	default:
-		return "", errors.New("Cannot render text of mime type %s", kind)
+		return "", errors.New("Cannot render text of mime type %s", mediaType)
 }