1
0

rss2hook.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. // rss2hook is a simple utility which will make HTTP POST
  2. // requests to remote web-hooks when new items appear in an RSS feed.
  3. //
  4. // Steve
  5. //
  6. package main
  7. import (
  8. "bufio"
  9. "bytes"
  10. "crypto/sha1"
  11. "encoding/hex"
  12. "encoding/json"
  13. "flag"
  14. "fmt"
  15. "io/ioutil"
  16. "net/http"
  17. "os"
  18. "os/signal"
  19. "regexp"
  20. "strings"
  21. "syscall"
  22. "time"
  23. "github.com/mmcdole/gofeed"
  24. "github.com/robfig/cron"
  25. )
  26. // RSSEntry describes a single RSS feed and the corresponding hook
  27. // to POST to.
  28. type RSSEntry struct {
  29. // The URL of the RSS/Atom feed
  30. feed string
  31. // The end-point to make the webhook request to.
  32. hook string
  33. }
  34. // Loaded contains the loaded feeds + hooks, as read from the specified
  35. // configuration file
  36. var Loaded []RSSEntry
  37. // Timeout is the (global) timeout we use when loading remote RSS
  38. // feeds.
  39. var Timeout time.Duration
  40. // loadConfig loads the named configuration file and populates our
  41. // `Loaded` list of RSS-feeds & Webhook addresses
  42. func loadConfig(filename string) {
  43. file, err := os.Open(filename)
  44. if err != nil {
  45. fmt.Printf("Error opening %s - %s\n", filename, err.Error())
  46. return
  47. }
  48. defer file.Close()
  49. //
  50. // Process it line by line.
  51. //
  52. scanner := bufio.NewScanner(file)
  53. for scanner.Scan() {
  54. // Get the next line, and strip leading/trailing space
  55. tmp := scanner.Text()
  56. tmp = strings.TrimSpace(tmp)
  57. //
  58. // Skip lines that begin with a comment.
  59. //
  60. if (tmp != "") && (!strings.HasPrefix(tmp, "#")) {
  61. //
  62. // Otherwise find the feed + post-point
  63. //
  64. parser := regexp.MustCompile("^(.*)=([^=]+)")
  65. match := parser.FindStringSubmatch(tmp)
  66. //
  67. // OK we found a suitable entry.
  68. //
  69. if len(match) == 3 {
  70. feed := strings.TrimSpace(match[1])
  71. hook := strings.TrimSpace(match[2])
  72. // Append the new entry to our list
  73. entry := RSSEntry{feed: feed, hook: hook}
  74. Loaded = append(Loaded, entry)
  75. }
  76. }
  77. }
  78. }
  79. // fetchFeed fetches the contents of the specified URL.
  80. func fetchFeed(url string) (string, error) {
  81. // Ensure we setup a timeout for our fetch
  82. client := &http.Client{Timeout: Timeout}
  83. // We'll only make a GET request
  84. req, err := http.NewRequest("GET", url, nil)
  85. if err != nil {
  86. return "", err
  87. }
  88. // We ensure we identify ourself.
  89. req.Header.Set("User-Agent", "rss2email (https://github.com/skx/rss2email)")
  90. // Make the request
  91. resp, err := client.Do(req)
  92. if err != nil {
  93. return "", err
  94. }
  95. defer resp.Body.Close()
  96. // Read the body returned
  97. output, err := ioutil.ReadAll(resp.Body)
  98. if err != nil {
  99. return "", err
  100. }
  101. return string(output), nil
  102. }
  103. // isNew returns TRUE if this feed-item hasn't been notified about
  104. // previously.
  105. func isNew(parent string, item *gofeed.Item) bool {
  106. hasher := sha1.New()
  107. hasher.Write([]byte(parent))
  108. hasher.Write([]byte(item.GUID))
  109. hashBytes := hasher.Sum(nil)
  110. // Hexadecimal conversion
  111. hexSha1 := hex.EncodeToString(hashBytes)
  112. if _, err := os.Stat(os.Getenv("HOME") + "/.rss2hook/seen/" + hexSha1); os.IsNotExist(err) {
  113. return true
  114. }
  115. return false
  116. }
  117. // recordSeen ensures that we won't re-announce a given feed-item.
  118. func recordSeen(parent string, item *gofeed.Item) {
  119. hasher := sha1.New()
  120. hasher.Write([]byte(parent))
  121. hasher.Write([]byte(item.GUID))
  122. hashBytes := hasher.Sum(nil)
  123. // Hexadecimal conversion
  124. hexSha1 := hex.EncodeToString(hashBytes)
  125. dir := os.Getenv("HOME") + "/.rss2hook/seen"
  126. os.MkdirAll(dir, os.ModePerm)
  127. _ = ioutil.WriteFile(dir+"/"+hexSha1, []byte(item.Link), 0644)
  128. }
  129. // checkFeeds is our work-horse.
  130. //
  131. // For each available feed it looks for new entries, and when founds
  132. // triggers `notify` upon the resulting entry
  133. func checkFeeds() {
  134. //
  135. // For each thing we're monitoring
  136. //
  137. for _, monitor := range Loaded {
  138. // Fetch the feed-contents
  139. content, err := fetchFeed(monitor.feed)
  140. if err != nil {
  141. fmt.Printf("Error fetching %s - %s\n",
  142. monitor.feed, err.Error())
  143. continue
  144. }
  145. // Now parse the feed contents into a set of items
  146. fp := gofeed.NewParser()
  147. feed, err := fp.ParseString(content)
  148. if err != nil {
  149. fmt.Printf("Error parsing %s contents: %s\n", monitor.feed, err.Error())
  150. continue
  151. }
  152. // For each entry in the feed
  153. for _, i := range feed.Items {
  154. // If we've not already notified about this one.
  155. if isNew(monitor.feed, i) {
  156. // Trigger the notification
  157. err := notify(monitor.hook, i)
  158. // and if that notification succeeded
  159. // then record this item as having been
  160. // processed successfully.
  161. if err == nil {
  162. recordSeen(monitor.feed, i)
  163. }
  164. }
  165. }
  166. }
  167. }
  168. // notify actually submits the specified item to the remote webhook.
  169. //
  170. // The RSS-item is submitted as a JSON-object.
  171. func notify(hook string, item *gofeed.Item) error {
  172. // We'll post the item as a JSON object.
  173. // So first of all encode it.
  174. jsonValue, err := json.Marshal(item)
  175. if err != nil {
  176. fmt.Printf("notify: Failed to encode JSON:%s\n", err.Error())
  177. return err
  178. }
  179. //
  180. // Post to the specified hook URL.
  181. //
  182. res, err := http.Post(hook,
  183. "application/json",
  184. bytes.NewBuffer(jsonValue))
  185. if err != nil {
  186. fmt.Printf("notify: Failed to POST to %s - %s\n",
  187. hook, err.Error())
  188. return err
  189. }
  190. //
  191. // OK now we've submitted the post.
  192. //
  193. // We should retrieve the status-code + body, if the status-code
  194. // is "odd" then we'll show them.
  195. //
  196. defer res.Body.Close()
  197. _, err = ioutil.ReadAll(res.Body)
  198. if err != nil {
  199. return err
  200. }
  201. status := res.StatusCode
  202. if status != 200 {
  203. fmt.Printf("notify: Warning - Status code was not 200: %d\n", status)
  204. }
  205. return nil
  206. }
  207. // main is our entry-point
  208. func main() {
  209. // Parse the command-line flags
  210. config := flag.String("config", "", "The path to the configuration-file to read")
  211. timeout := flag.Duration("timeout", 5*time.Second, "The timeout used for fetching the remote feeds")
  212. flag.Parse()
  213. // Setup the default timeout.
  214. Timeout = *timeout
  215. if *config == "" {
  216. fmt.Printf("Please specify a configuration-file to read\n")
  217. return
  218. }
  219. //
  220. // Load the configuration file
  221. //
  222. loadConfig(*config)
  223. //
  224. // Show the things we're monitoring
  225. //
  226. for _, ent := range Loaded {
  227. fmt.Printf("Monitoring feed %s\nPosting to %s\n\n",
  228. ent.feed, ent.hook)
  229. }
  230. //
  231. // Make the initial scan of feeds immediately to avoid waiting too
  232. // long for the first time.
  233. //
  234. checkFeeds()
  235. //
  236. // Now repeat that every five minutes.
  237. //
  238. c := cron.New()
  239. c.AddFunc("@every 5m", func() { checkFeeds() })
  240. c.Start()
  241. //
  242. // Now we can loop waiting to be terminated via ctrl-c, etc.
  243. //
  244. sigs := make(chan os.Signal, 1)
  245. done := make(chan bool, 1)
  246. signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
  247. go func() {
  248. _ = <-sigs
  249. done <- true
  250. }()
  251. <-done
  252. }