regexp.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. // Copyright 2012 The Gorilla Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package mux
  5. import (
  6. "bytes"
  7. "fmt"
  8. "net/http"
  9. "net/url"
  10. "regexp"
  11. "strconv"
  12. "strings"
  13. )
  14. type routeRegexpOptions struct {
  15. strictSlash bool
  16. useEncodedPath bool
  17. }
  18. type regexpType int
  19. const (
  20. regexpTypePath regexpType = 0
  21. regexpTypeHost regexpType = 1
  22. regexpTypePrefix regexpType = 2
  23. regexpTypeQuery regexpType = 3
  24. )
  25. // newRouteRegexp parses a route template and returns a routeRegexp,
  26. // used to match a host, a path or a query string.
  27. //
  28. // It will extract named variables, assemble a regexp to be matched, create
  29. // a "reverse" template to build URLs and compile regexps to validate variable
  30. // values used in URL building.
  31. //
  32. // Previously we accepted only Python-like identifiers for variable
  33. // names ([a-zA-Z_][a-zA-Z0-9_]*), but currently the only restriction is that
  34. // name and pattern can't be empty, and names can't contain a colon.
  35. func newRouteRegexp(tpl string, typ regexpType, options routeRegexpOptions) (*routeRegexp, error) {
  36. // Check if it is well-formed.
  37. idxs, errBraces := braceIndices(tpl)
  38. if errBraces != nil {
  39. return nil, errBraces
  40. }
  41. // Backup the original.
  42. template := tpl
  43. // Now let's parse it.
  44. defaultPattern := "[^/]+"
  45. if typ == regexpTypeQuery {
  46. defaultPattern = ".*"
  47. } else if typ == regexpTypeHost {
  48. defaultPattern = "[^.]+"
  49. }
  50. // Only match strict slash if not matching
  51. if typ != regexpTypePath {
  52. options.strictSlash = false
  53. }
  54. // Set a flag for strictSlash.
  55. endSlash := false
  56. if options.strictSlash && strings.HasSuffix(tpl, "/") {
  57. tpl = tpl[:len(tpl)-1]
  58. endSlash = true
  59. }
  60. varsN := make([]string, len(idxs)/2)
  61. varsR := make([]*regexp.Regexp, len(idxs)/2)
  62. pattern := bytes.NewBufferString("")
  63. pattern.WriteByte('^')
  64. reverse := bytes.NewBufferString("")
  65. var end int
  66. var err error
  67. for i := 0; i < len(idxs); i += 2 {
  68. // Set all values we are interested in.
  69. raw := tpl[end:idxs[i]]
  70. end = idxs[i+1]
  71. parts := strings.SplitN(tpl[idxs[i]+1:end-1], ":", 2)
  72. name := parts[0]
  73. patt := defaultPattern
  74. if len(parts) == 2 {
  75. patt = parts[1]
  76. }
  77. // Name or pattern can't be empty.
  78. if name == "" || patt == "" {
  79. return nil, fmt.Errorf("mux: missing name or pattern in %q",
  80. tpl[idxs[i]:end])
  81. }
  82. // Build the regexp pattern.
  83. fmt.Fprintf(pattern, "%s(?P<%s>%s)", regexp.QuoteMeta(raw), varGroupName(i/2), patt)
  84. // Build the reverse template.
  85. fmt.Fprintf(reverse, "%s%%s", raw)
  86. // Append variable name and compiled pattern.
  87. varsN[i/2] = name
  88. varsR[i/2], err = regexp.Compile(fmt.Sprintf("^%s$", patt))
  89. if err != nil {
  90. return nil, err
  91. }
  92. }
  93. // Add the remaining.
  94. raw := tpl[end:]
  95. pattern.WriteString(regexp.QuoteMeta(raw))
  96. if options.strictSlash {
  97. pattern.WriteString("[/]?")
  98. }
  99. if typ == regexpTypeQuery {
  100. // Add the default pattern if the query value is empty
  101. if queryVal := strings.SplitN(template, "=", 2)[1]; queryVal == "" {
  102. pattern.WriteString(defaultPattern)
  103. }
  104. }
  105. if typ != regexpTypePrefix {
  106. pattern.WriteByte('$')
  107. }
  108. var wildcardHostPort bool
  109. if typ == regexpTypeHost {
  110. if !strings.Contains(pattern.String(), ":") {
  111. wildcardHostPort = true
  112. }
  113. }
  114. reverse.WriteString(raw)
  115. if endSlash {
  116. reverse.WriteByte('/')
  117. }
  118. // Compile full regexp.
  119. reg, errCompile := regexp.Compile(pattern.String())
  120. if errCompile != nil {
  121. return nil, errCompile
  122. }
  123. // Check for capturing groups which used to work in older versions
  124. if reg.NumSubexp() != len(idxs)/2 {
  125. panic(fmt.Sprintf("route %s contains capture groups in its regexp. ", template) +
  126. "Only non-capturing groups are accepted: e.g. (?:pattern) instead of (pattern)")
  127. }
  128. // Done!
  129. return &routeRegexp{
  130. template: template,
  131. regexpType: typ,
  132. options: options,
  133. regexp: reg,
  134. reverse: reverse.String(),
  135. varsN: varsN,
  136. varsR: varsR,
  137. wildcardHostPort: wildcardHostPort,
  138. }, nil
  139. }
  140. // routeRegexp stores a regexp to match a host or path and information to
  141. // collect and validate route variables.
  142. type routeRegexp struct {
  143. // The unmodified template.
  144. template string
  145. // The type of match
  146. regexpType regexpType
  147. // Options for matching
  148. options routeRegexpOptions
  149. // Expanded regexp.
  150. regexp *regexp.Regexp
  151. // Reverse template.
  152. reverse string
  153. // Variable names.
  154. varsN []string
  155. // Variable regexps (validators).
  156. varsR []*regexp.Regexp
  157. // Wildcard host-port (no strict port match in hostname)
  158. wildcardHostPort bool
  159. }
  160. // Match matches the regexp against the URL host or path.
  161. func (r *routeRegexp) Match(req *http.Request, match *RouteMatch) bool {
  162. if r.regexpType == regexpTypeHost {
  163. host := getHost(req)
  164. if r.wildcardHostPort {
  165. // Don't be strict on the port match
  166. if i := strings.Index(host, ":"); i != -1 {
  167. host = host[:i]
  168. }
  169. }
  170. return r.regexp.MatchString(host)
  171. } else {
  172. if r.regexpType == regexpTypeQuery {
  173. return r.matchQueryString(req)
  174. }
  175. path := req.URL.Path
  176. if r.options.useEncodedPath {
  177. path = req.URL.EscapedPath()
  178. }
  179. return r.regexp.MatchString(path)
  180. }
  181. }
  182. // url builds a URL part using the given values.
  183. func (r *routeRegexp) url(values map[string]string) (string, error) {
  184. urlValues := make([]interface{}, len(r.varsN))
  185. for k, v := range r.varsN {
  186. value, ok := values[v]
  187. if !ok {
  188. return "", fmt.Errorf("mux: missing route variable %q", v)
  189. }
  190. if r.regexpType == regexpTypeQuery {
  191. value = url.QueryEscape(value)
  192. }
  193. urlValues[k] = value
  194. }
  195. rv := fmt.Sprintf(r.reverse, urlValues...)
  196. if !r.regexp.MatchString(rv) {
  197. // The URL is checked against the full regexp, instead of checking
  198. // individual variables. This is faster but to provide a good error
  199. // message, we check individual regexps if the URL doesn't match.
  200. for k, v := range r.varsN {
  201. if !r.varsR[k].MatchString(values[v]) {
  202. return "", fmt.Errorf(
  203. "mux: variable %q doesn't match, expected %q", values[v],
  204. r.varsR[k].String())
  205. }
  206. }
  207. }
  208. return rv, nil
  209. }
  210. // getURLQuery returns a single query parameter from a request URL.
  211. // For a URL with foo=bar&baz=ding, we return only the relevant key
  212. // value pair for the routeRegexp.
  213. func (r *routeRegexp) getURLQuery(req *http.Request) string {
  214. if r.regexpType != regexpTypeQuery {
  215. return ""
  216. }
  217. templateKey := strings.SplitN(r.template, "=", 2)[0]
  218. for key, vals := range req.URL.Query() {
  219. if key == templateKey && len(vals) > 0 {
  220. return key + "=" + vals[0]
  221. }
  222. }
  223. return ""
  224. }
  225. func (r *routeRegexp) matchQueryString(req *http.Request) bool {
  226. return r.regexp.MatchString(r.getURLQuery(req))
  227. }
  228. // braceIndices returns the first level curly brace indices from a string.
  229. // It returns an error in case of unbalanced braces.
  230. func braceIndices(s string) ([]int, error) {
  231. var level, idx int
  232. var idxs []int
  233. for i := 0; i < len(s); i++ {
  234. switch s[i] {
  235. case '{':
  236. if level++; level == 1 {
  237. idx = i
  238. }
  239. case '}':
  240. if level--; level == 0 {
  241. idxs = append(idxs, idx, i+1)
  242. } else if level < 0 {
  243. return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
  244. }
  245. }
  246. }
  247. if level != 0 {
  248. return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
  249. }
  250. return idxs, nil
  251. }
  252. // varGroupName builds a capturing group name for the indexed variable.
  253. func varGroupName(idx int) string {
  254. return "v" + strconv.Itoa(idx)
  255. }
  256. // ----------------------------------------------------------------------------
  257. // routeRegexpGroup
  258. // ----------------------------------------------------------------------------
  259. // routeRegexpGroup groups the route matchers that carry variables.
  260. type routeRegexpGroup struct {
  261. host *routeRegexp
  262. path *routeRegexp
  263. queries []*routeRegexp
  264. }
  265. // setMatch extracts the variables from the URL once a route matches.
  266. func (v routeRegexpGroup) setMatch(req *http.Request, m *RouteMatch, r *Route) {
  267. // Store host variables.
  268. if v.host != nil {
  269. host := getHost(req)
  270. matches := v.host.regexp.FindStringSubmatchIndex(host)
  271. if len(matches) > 0 {
  272. extractVars(host, matches, v.host.varsN, m.Vars)
  273. }
  274. }
  275. path := req.URL.Path
  276. if r.useEncodedPath {
  277. path = req.URL.EscapedPath()
  278. }
  279. // Store path variables.
  280. if v.path != nil {
  281. matches := v.path.regexp.FindStringSubmatchIndex(path)
  282. if len(matches) > 0 {
  283. extractVars(path, matches, v.path.varsN, m.Vars)
  284. // Check if we should redirect.
  285. if v.path.options.strictSlash {
  286. p1 := strings.HasSuffix(path, "/")
  287. p2 := strings.HasSuffix(v.path.template, "/")
  288. if p1 != p2 {
  289. u, _ := url.Parse(req.URL.String())
  290. if p1 {
  291. u.Path = u.Path[:len(u.Path)-1]
  292. } else {
  293. u.Path += "/"
  294. }
  295. m.Handler = http.RedirectHandler(u.String(), http.StatusMovedPermanently)
  296. }
  297. }
  298. }
  299. }
  300. // Store query string variables.
  301. for _, q := range v.queries {
  302. queryURL := q.getURLQuery(req)
  303. matches := q.regexp.FindStringSubmatchIndex(queryURL)
  304. if len(matches) > 0 {
  305. extractVars(queryURL, matches, q.varsN, m.Vars)
  306. }
  307. }
  308. }
  309. // getHost tries its best to return the request host.
  310. // According to section 14.23 of RFC 2616 the Host header
  311. // can include the port number if the default value of 80 is not used.
  312. func getHost(r *http.Request) string {
  313. if r.URL.IsAbs() {
  314. return r.URL.Host
  315. }
  316. return r.Host
  317. }
  318. func extractVars(input string, matches []int, names []string, output map[string]string) {
  319. for i, name := range names {
  320. output[name] = input[matches[2*i+2]:matches[2*i+3]]
  321. }
  322. }