jtp.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. package jtp
  2. import (
  3. "regexp"
  4. "golang.org/x/exp/slices"
  5. "errors"
  6. "crypto/tls"
  7. "net"
  8. "net/url"
  9. "bufio"
  10. "fmt"
  11. "strings"
  12. "encoding/json"
  13. . "mimicry/preamble"
  14. )
  15. // TODO: parseMediaType should probably return an error if the mediaType is invalid
  16. // or at least do something that will be easier to debug
  17. type MediaType struct {
  18. Supertype string
  19. Subtype string
  20. /* Full omits the parameters */
  21. Full string
  22. }
  23. var dialer = &tls.Dialer{
  24. NetDialer: &net.Dialer{},
  25. }
  26. var mediaTypeRegexp = regexp.MustCompile(`(?s)^(([!#$%&'*+\-.^_\x60|~a-zA-Z0-9]+)/([!#$%&'*+\-.^_\x60|~a-zA-Z0-9]+)).*$`)
  27. var statusLineRegexp = regexp.MustCompile(`^HTTP/1\.[0-9] ([0-9]{3}).*\n$`)
  28. var contentTypeRegexp = regexp.MustCompile(`^(?i:content-type):[ \t\r]*(.*?)[ \t\r]*\n$`)
  29. var locationRegexp = regexp.MustCompile(`^(?i:location):[ \t\r]*(.*?)[ \t\r]*\n$`)
  30. var acceptHeader = `application/activity+json,` +
  31. `application/ld+json; profile="https://www.w3.org/ns/activitystreams"`
  32. var toleratedTypes = []string{
  33. "application/activity+json",
  34. "application/ld+json",
  35. "application/json",
  36. }
  37. /*
  38. I send an HTTP/1.0 request to ensure the server doesn't respond
  39. with chunked transfer encoding.
  40. See: https://httpwg.org/specs/rfc9110.html
  41. */
  42. /*
  43. link
  44. the url being requested
  45. maxRedirects
  46. the maximum number of redirects to take
  47. */
  48. func Get(link *url.URL, maxRedirects uint) <-chan *Result[map[string]any] {
  49. channel := make(chan *Result[map[string]any], 1)
  50. go func() {
  51. if link.Scheme != "https" {
  52. channel <- Err[map[string]any](errors.New(link.Scheme + " is not supported in requests, only https"))
  53. return
  54. }
  55. port := link.Port()
  56. if port == "" {
  57. port = "443"
  58. }
  59. hostport := net.JoinHostPort(link.Hostname(), port)
  60. connection, err := dialer.Dial("tcp", hostport)
  61. if err != nil {
  62. channel <- Err[map[string]any](err)
  63. return
  64. }
  65. _, err = connection.Write([]byte(
  66. "GET " + link.RequestURI() + " HTTP/1.0\r\n" +
  67. "Host: " + link.Host + "\r\n" +
  68. "Accept: " + acceptHeader + "\r\n" +
  69. "Accept-Encoding: identity\r\n" +
  70. "\r\n",
  71. ))
  72. if err != nil {
  73. channel <- Err[map[string]any](err, connection.Close())
  74. return
  75. }
  76. buf := bufio.NewReader(connection)
  77. statusLine, err := buf.ReadString('\n')
  78. if err != nil {
  79. channel <- Err[map[string]any](
  80. fmt.Errorf("failed to parse HTTP status line: %w", err),
  81. connection.Close(),
  82. )
  83. return
  84. }
  85. status, err := parseStatusLine(statusLine)
  86. if err != nil {
  87. channel <- Err[map[string]any](err, connection.Close())
  88. return
  89. }
  90. if strings.HasPrefix(status, "3") {
  91. location, err := findLocation(buf, link)
  92. if err != nil {
  93. channel <- Err[map[string]any](err, connection.Close())
  94. return
  95. }
  96. if maxRedirects == 0 {
  97. channel <- Err[map[string]any](
  98. errors.New("Received " + status + " but max redirects has already been reached"),
  99. connection.Close(),
  100. )
  101. return
  102. }
  103. channel <- <-Get(location, maxRedirects - 1)
  104. return
  105. }
  106. if status != "200" && status != "201" && status != "202" && status != "203" {
  107. channel <- Err[map[string]any](errors.New("Received invalid status " + status))
  108. return
  109. }
  110. err = validateHeaders(buf)
  111. if err != nil {
  112. channel <- Err[map[string]any](err)
  113. return
  114. }
  115. var dictionary map[string]any
  116. err = json.NewDecoder(buf).Decode(&dictionary)
  117. if err != nil {
  118. channel <- Err[map[string]any](fmt.Errorf("failed to parse JSON: %w", err))
  119. return
  120. }
  121. err = connection.Close()
  122. if err != nil {
  123. channel <- Err[map[string]any](err)
  124. return
  125. }
  126. channel <- Ok(dictionary)
  127. }()
  128. return channel
  129. }
  130. func ParseMediaType(text string) (MediaType, error) {
  131. matches := mediaTypeRegexp.FindStringSubmatch(text)
  132. if len(matches) != 4 {
  133. return MediaType{}, errors.New(text + " is not a valid media type")
  134. }
  135. return MediaType{
  136. Supertype: matches[2],
  137. Subtype: matches[3],
  138. Full: matches[1],
  139. }, nil
  140. }
  141. func parseStatusLine(text string) (string, error) {
  142. matches := statusLineRegexp.FindStringSubmatch(text)
  143. if len(matches) != 2 {
  144. return "", errors.New("Received invalid status line: " + text)
  145. }
  146. return matches[1], nil
  147. }
  148. func parseContentType(text string) (MediaType, bool, error) {
  149. matches := contentTypeRegexp.FindStringSubmatch(text)
  150. if len(matches) != 2 {
  151. return MediaType{}, false, nil
  152. }
  153. mediaType, err := ParseMediaType(matches[1])
  154. if err != nil {
  155. return MediaType{}, true, err
  156. }
  157. return mediaType, true, nil
  158. }
  159. func parseLocation(text string, baseLink *url.URL) (link *url.URL, isLocationLine bool, err error) {
  160. matches := locationRegexp.FindStringSubmatch(text)
  161. if len(matches) != 2 {
  162. return nil, false, nil
  163. }
  164. reference, err := url.Parse(matches[1])
  165. if err != nil {
  166. return nil, true, err
  167. }
  168. return baseLink.ResolveReference(reference), true, nil
  169. }
  170. func validateHeaders(buf *bufio.Reader) error {
  171. contentTypeValidated := false
  172. for {
  173. line, err := buf.ReadString('\n')
  174. if err != nil {
  175. return err
  176. }
  177. if line == "\r\n" || line == "\n" {
  178. break
  179. }
  180. mediaType, isContentTypeLine, err := parseContentType(line)
  181. if err != nil {
  182. return err
  183. }
  184. if !isContentTypeLine {
  185. continue
  186. }
  187. if slices.Contains(toleratedTypes, mediaType.Full) {
  188. contentTypeValidated = true
  189. } else {
  190. return errors.New("Response contains invalid content type " + mediaType.Full)
  191. }
  192. }
  193. if !contentTypeValidated {
  194. return errors.New("Response did not contain a content type")
  195. }
  196. return nil
  197. }
  198. func findLocation(buf *bufio.Reader, baseLink *url.URL) (*url.URL, error) {
  199. for {
  200. line, err := buf.ReadString('\n')
  201. if err != nil {
  202. return nil, err
  203. }
  204. if line == "\r\n" || line == "\n" {
  205. break
  206. }
  207. location, isLocationLine, err := parseLocation(line, baseLink)
  208. if err != nil {
  209. return nil, err
  210. }
  211. if !isLocationLine {
  212. continue
  213. }
  214. return location, nil
  215. }
  216. return nil, errors.New("Location is not present in headers")
  217. }