jtp.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. package jtp
  2. import (
  3. "bufio"
  4. "crypto/tls"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. lru "github.com/hashicorp/golang-lru/v2"
  9. "servitor/mime"
  10. "net"
  11. "net/url"
  12. "regexp"
  13. "strings"
  14. "servitor/config"
  15. )
  16. var dialer = &net.Dialer{
  17. Timeout: config.Parsed.Network.Timeout,
  18. }
  19. type bundle struct {
  20. item map[string]any
  21. source *url.URL
  22. err error
  23. }
  24. var cache, _ = lru.New[string, bundle](config.Parsed.Network.CacheSize)
  25. var mediaTypeRegexp = regexp.MustCompile(`(?s)^(([!#$%&'*+\-.^_\x60|~a-zA-Z0-9]+)/([!#$%&'*+\-.^_\x60|~a-zA-Z0-9]+)).*$`)
  26. var statusLineRegexp = regexp.MustCompile(`^HTTP/1\.[0-9] ([0-9]{3}).*\n$`)
  27. var contentTypeRegexp = regexp.MustCompile(`^(?i:content-type):[ \t\r]*(.*?)[ \t\r]*\n$`)
  28. var locationRegexp = regexp.MustCompile(`^(?i:location):[ \t\r]*(.*?)[ \t\r]*\n$`)
  29. /*
  30. I send an HTTP/1.0 request to ensure the server doesn't respond
  31. with chunked transfer encoding.
  32. See: https://httpwg.org/specs/rfc9110.html
  33. */
  34. /*
  35. link
  36. the url being requested
  37. maxRedirects
  38. the maximum number of redirects to take
  39. */
  40. func Get(link *url.URL, accept string, tolerated []string, maxRedirects uint) (map[string]any, *url.URL, error) {
  41. if cached, ok := cache.Get(link.String()); ok {
  42. return cached.item, cached.source, cached.err
  43. }
  44. if link.Scheme != "https" {
  45. return nil, nil, errors.New(link.Scheme + " is not supported in requests, only https")
  46. }
  47. port := link.Port()
  48. if port == "" {
  49. port = "443"
  50. }
  51. hostport := net.JoinHostPort(link.Hostname(), port)
  52. connection, err := tls.DialWithDialer(dialer, "tcp", hostport, nil)
  53. if err != nil {
  54. return nil, nil, err
  55. }
  56. _, err = connection.Write([]byte(
  57. "GET " + link.RequestURI() + " HTTP/1.0\r\n" +
  58. "Host: " + link.Host + "\r\n" +
  59. "Accept: " + accept + "\r\n" +
  60. "\r\n",
  61. ))
  62. if err != nil {
  63. return nil, nil, errors.Join(err, connection.Close())
  64. }
  65. buf := bufio.NewReader(connection)
  66. statusLine, err := buf.ReadString('\n')
  67. if err != nil {
  68. return nil, nil, errors.Join(
  69. fmt.Errorf("failed to parse HTTP status line: %w", err),
  70. connection.Close(),
  71. )
  72. }
  73. status, err := parseStatusLine(statusLine)
  74. if err != nil {
  75. return nil, nil, errors.Join(err, connection.Close())
  76. }
  77. if strings.HasPrefix(status, "3") {
  78. location, err := findLocation(buf, link)
  79. if err != nil {
  80. return nil, nil, errors.Join(err, connection.Close())
  81. }
  82. if maxRedirects == 0 {
  83. return nil, nil, errors.Join(
  84. errors.New("received "+status+" after redirecting too many times"),
  85. connection.Close(),
  86. )
  87. }
  88. if err := connection.Close(); err != nil {
  89. return nil, nil, err
  90. }
  91. var b bundle
  92. b.item, b.source, b.err = Get(location, accept, tolerated, maxRedirects-1)
  93. cache.Add(link.String(), b)
  94. return b.item, b.source, b.err
  95. }
  96. if status != "200" && status != "201" && status != "202" && status != "203" {
  97. return nil, nil, errors.Join(
  98. errors.New("received invalid status "+status),
  99. connection.Close(),
  100. )
  101. }
  102. err = validateHeaders(buf, tolerated)
  103. if err != nil {
  104. return nil, nil, errors.Join(err, connection.Close())
  105. }
  106. var dictionary map[string]any
  107. err = json.NewDecoder(buf).Decode(&dictionary)
  108. if err != nil {
  109. return nil, nil, errors.Join(
  110. fmt.Errorf("failed to parse JSON: %w", err),
  111. connection.Close(),
  112. )
  113. }
  114. if err := connection.Close(); err != nil {
  115. return nil, nil, err
  116. }
  117. cache.Add(link.String(), bundle{
  118. item: dictionary,
  119. source: link,
  120. err: nil,
  121. })
  122. return dictionary, link, nil
  123. }
  124. func parseStatusLine(text string) (string, error) {
  125. matches := statusLineRegexp.FindStringSubmatch(text)
  126. if len(matches) != 2 {
  127. return "", errors.New("received invalid status line: " + text)
  128. }
  129. return matches[1], nil
  130. }
  131. func parseContentType(text string) (*mime.MediaType, bool, error) {
  132. matches := contentTypeRegexp.FindStringSubmatch(text)
  133. if len(matches) != 2 {
  134. return nil, false, nil
  135. }
  136. mediaType, err := mime.Parse(matches[1])
  137. if err != nil {
  138. return nil, true, err
  139. }
  140. return mediaType, true, nil
  141. }
  142. func parseLocation(text string, baseLink *url.URL) (link *url.URL, isLocationLine bool, err error) {
  143. matches := locationRegexp.FindStringSubmatch(text)
  144. if len(matches) != 2 {
  145. return nil, false, nil
  146. }
  147. reference, err := url.Parse(matches[1])
  148. if err != nil {
  149. return nil, true, err
  150. }
  151. return baseLink.ResolveReference(reference), true, nil
  152. }
  153. func validateHeaders(buf *bufio.Reader, tolerated []string) error {
  154. contentTypeValidated := false
  155. for {
  156. line, err := buf.ReadString('\n')
  157. if err != nil {
  158. return err
  159. }
  160. if line == "\r\n" || line == "\n" {
  161. break
  162. }
  163. mediaType, isContentTypeLine, err := parseContentType(line)
  164. if err != nil {
  165. return err
  166. }
  167. if !isContentTypeLine {
  168. continue
  169. }
  170. if mediaType.Matches(tolerated) {
  171. contentTypeValidated = true
  172. } else {
  173. return errors.New("response is of invalid type " + mediaType.Essence)
  174. }
  175. }
  176. if !contentTypeValidated {
  177. return errors.New("response is missing a content type")
  178. }
  179. return nil
  180. }
  181. func findLocation(buf *bufio.Reader, baseLink *url.URL) (*url.URL, error) {
  182. for {
  183. line, err := buf.ReadString('\n')
  184. if err != nil {
  185. return nil, err
  186. }
  187. if line == "\r\n" || line == "\n" {
  188. break
  189. }
  190. location, isLocationLine, err := parseLocation(line, baseLink)
  191. if err != nil {
  192. return nil, err
  193. }
  194. if !isLocationLine {
  195. continue
  196. }
  197. return location, nil
  198. }
  199. return nil, errors.New("response is missing Location header")
  200. }