jtp.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. package jtp
  2. import (
  3. "regexp"
  4. "golang.org/x/exp/slices"
  5. "errors"
  6. "crypto/tls"
  7. "net"
  8. "net/url"
  9. "bufio"
  10. "fmt"
  11. "strings"
  12. "encoding/json"
  13. )
  14. // TODO: parseMediaType should probably return an error if the mediaType is invalid
  15. // or at least do something that will be easier to debug
  16. type MediaType struct {
  17. Supertype string
  18. Subtype string
  19. /* Full omits the parameters */
  20. Full string
  21. }
  22. var dialer = &tls.Dialer{
  23. NetDialer: &net.Dialer{},
  24. }
  25. var mediaTypeRegexp = regexp.MustCompile(`(?s)^(([!#$%&'*+\-.^_\x60|~a-zA-Z0-9]+)/([!#$%&'*+\-.^_\x60|~a-zA-Z0-9]+)).*$`)
  26. var statusLineRegexp = regexp.MustCompile(`^HTTP/1\.[0-9] ([0-9]{3}).*\n$`)
  27. var contentTypeRegexp = regexp.MustCompile(`^(?i:content-type):[ \t\r]*(.*?)[ \t\r]*\n$`)
  28. var locationRegexp = regexp.MustCompile(`^(?i:location):[ \t\r]*(.*?)[ \t\r]*\n$`)
  29. var acceptHeader = `application/activity+json,` +
  30. `application/ld+json; profile="https://www.w3.org/ns/activitystreams"`
  31. var toleratedTypes = []string{
  32. "application/activity+json",
  33. "application/ld+json",
  34. "application/json",
  35. }
  36. /*
  37. I send an HTTP/1.0 request to ensure the server doesn't respond
  38. with chunked transfer encoding.
  39. See: https://httpwg.org/specs/rfc9110.html
  40. */
  41. /*
  42. link
  43. the url being requested
  44. maxRedirects
  45. the maximum number of redirects to take
  46. */
  47. // TODO: the number of redirects must be limited
  48. func Get(link *url.URL, maxRedirects uint) (map[string]any, error) {
  49. if link.Scheme != "https" {
  50. return nil, errors.New(link.Scheme + " is not supported in requests, only https")
  51. }
  52. port := link.Port()
  53. if port == "" {
  54. port = "443"
  55. }
  56. hostport := net.JoinHostPort(link.Hostname(), port)
  57. connection, err := dialer.Dial("tcp", hostport)
  58. if err != nil {
  59. return nil, err
  60. }
  61. defer connection.Close()
  62. _, err = connection.Write([]byte(
  63. "GET " + link.RequestURI() + " HTTP/1.0\r\n" +
  64. "Host: " + link.Host + "\r\n" +
  65. "Accept: " + acceptHeader + "\r\n" +
  66. "Accept-Encoding: identity\r\n" +
  67. "\r\n",
  68. ))
  69. if err != nil {
  70. return nil, err
  71. }
  72. buf := bufio.NewReader(connection)
  73. statusLine, err := buf.ReadString('\n')
  74. if err != nil {
  75. return nil, fmt.Errorf("Encountered error while reading status line of HTTP response: %w", err)
  76. }
  77. status, err := parseStatusLine(statusLine)
  78. if err != nil {
  79. return nil, err
  80. }
  81. if strings.HasPrefix(status, "3") {
  82. location, err := findLocation(buf, link)
  83. if err != nil {
  84. return nil, err
  85. }
  86. if maxRedirects == 0 {
  87. return nil, errors.New("Received " + status + " but max redirects has already been reached")
  88. }
  89. return Get(location, maxRedirects - 1)
  90. }
  91. if status != "200" && status != "201" && status != "202" && status != "203" {
  92. return nil, errors.New("Received invalid status " + status)
  93. }
  94. err = validateHeaders(buf)
  95. if err != nil {
  96. return nil, err
  97. }
  98. var dictionary map[string]any
  99. err = json.NewDecoder(buf).Decode(&dictionary)
  100. if err != nil {
  101. return nil, err
  102. }
  103. return dictionary, nil
  104. }
  105. func ParseMediaType(text string) (MediaType, error) {
  106. matches := mediaTypeRegexp.FindStringSubmatch(text)
  107. if len(matches) != 4 {
  108. return MediaType{}, errors.New(text + " is not a valid media type")
  109. }
  110. return MediaType{
  111. Supertype: matches[2],
  112. Subtype: matches[3],
  113. Full: matches[1],
  114. }, nil
  115. }
  116. func parseStatusLine(text string) (string, error) {
  117. matches := statusLineRegexp.FindStringSubmatch(text)
  118. if len(matches) != 2 {
  119. return "", errors.New("Received invalid status line: " + text)
  120. }
  121. return matches[1], nil
  122. }
  123. func parseContentType(text string) (MediaType, bool, error) {
  124. matches := contentTypeRegexp.FindStringSubmatch(text)
  125. if len(matches) != 2 {
  126. return MediaType{}, false, nil
  127. }
  128. mediaType, err := ParseMediaType(matches[1])
  129. if err != nil {
  130. return MediaType{}, true, err
  131. }
  132. return mediaType, true, nil
  133. }
  134. func parseLocation(text string, baseLink *url.URL) (link *url.URL, isLocationLine bool, err error) {
  135. matches := locationRegexp.FindStringSubmatch(text)
  136. if len(matches) != 2 {
  137. return nil, false, nil
  138. }
  139. reference, err := url.Parse(matches[1])
  140. if err != nil {
  141. return nil, true, err
  142. }
  143. return baseLink.ResolveReference(reference), true, nil
  144. }
  145. func validateHeaders(buf *bufio.Reader) error {
  146. contentTypeValidated := false
  147. for {
  148. line, err := buf.ReadString('\n')
  149. if err != nil {
  150. return err
  151. }
  152. if line == "\r\n" {
  153. break
  154. }
  155. mediaType, isContentTypeLine, err := parseContentType(line)
  156. if err != nil {
  157. return err
  158. }
  159. if !isContentTypeLine {
  160. continue
  161. }
  162. if slices.Contains(toleratedTypes, mediaType.Full) {
  163. contentTypeValidated = true
  164. } else {
  165. return errors.New("Response contains invalid content type " + mediaType.Full)
  166. }
  167. }
  168. if !contentTypeValidated {
  169. return errors.New("Response did not contain a content type")
  170. }
  171. return nil
  172. }
  173. func findLocation(buf *bufio.Reader, baseLink *url.URL) (*url.URL, error) {
  174. for {
  175. line, err := buf.ReadString('\n')
  176. if err != nil {
  177. return nil, err
  178. }
  179. if line == "\r\n" {
  180. break
  181. }
  182. location, isLocationLine, err := parseLocation(line, baseLink)
  183. if err != nil {
  184. return nil, err
  185. }
  186. if !isLocationLine {
  187. continue
  188. }
  189. return location, nil
  190. }
  191. return nil, errors.New("Location is not present in headers")
  192. }