xs_regex.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. /* copyright (c) 2022 - 2024 grunfink et al. / MIT license */
  2. #ifndef _XS_REGEX_H
  3. #define _XS_REGEX_H
  4. int xs_regex_match(const char *str, const char *rx);
  5. xs_list *xs_regex_split_n(const char *str, const char *rx, int count);
  6. #define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)
  7. xs_list *xs_regex_select_n(const char *str, const char *rx, int count);
  8. #define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL)
  9. xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count);
  10. #define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL)
  11. #define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count)
  12. #define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL)
  13. #ifdef XS_IMPLEMENTATION
  14. #include <regex.h>
  15. xs_list *xs_regex_split_n(const char *str, const char *rx, int count)
  16. /* splits str using regex as a separator, at most count times.
  17. Always returns a list:
  18. len == 0: regcomp error
  19. len == 1: full string (no matches)
  20. len == odd: first part [ separator / next part ]...
  21. */
  22. {
  23. regex_t re;
  24. regmatch_t rm;
  25. int offset = 0;
  26. xs_list *list = xs_list_new();
  27. const char *p;
  28. if (regcomp(&re, rx, REG_EXTENDED))
  29. return list;
  30. while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) {
  31. /* add first the leading part of the string */
  32. xs *s1 = xs_str_new_sz(p, rm.rm_so);
  33. list = xs_list_append(list, s1);
  34. /* add now the matched text as the separator */
  35. xs *s2 = xs_str_new_sz(p + rm.rm_so, rm.rm_eo - rm.rm_so);
  36. list = xs_list_append(list, s2);
  37. /* move forward */
  38. offset += rm.rm_eo;
  39. count--;
  40. }
  41. /* add the rest of the string */
  42. list = xs_list_append(list, p);
  43. regfree(&re);
  44. return list;
  45. }
  46. xs_list *xs_regex_select_n(const char *str, const char *rx, int count)
  47. /* selects all matches and return them as a list */
  48. {
  49. xs_list *list = xs_list_new();
  50. xs *split = NULL;
  51. xs_val *v;
  52. int n = 0;
  53. int c = 0;
  54. /* split */
  55. split = xs_regex_split_n(str, rx, count);
  56. /* now iterate to get only the 'separators' (odd ones) */
  57. while (xs_list_next(split, &v, &c)) {
  58. if (n & 0x1)
  59. list = xs_list_append(list, v);
  60. n++;
  61. }
  62. return list;
  63. }
  64. xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count)
  65. /* replaces all matches with the rep string. If it contains unescaped &,
  66. they are replaced with the match */
  67. {
  68. xs_str *s = xs_str_new(NULL);
  69. xs *split = xs_regex_split_n(str, rx, count);
  70. xs_val *v;
  71. int n = 0;
  72. int c = 0;
  73. int pholder = !!strchr(rep, '&');
  74. while (xs_list_next(split, &v, &c)) {
  75. if (n & 0x1) {
  76. if (pholder) {
  77. /* rep has a placeholder; process char by char */
  78. const char *p = rep;
  79. while (*p) {
  80. if (*p == '&')
  81. s = xs_str_cat(s, v);
  82. else {
  83. if (*p == '\\')
  84. p++;
  85. if (!*p)
  86. break;
  87. s = xs_append_m(s, p, 1);
  88. }
  89. p++;
  90. }
  91. }
  92. else
  93. s = xs_str_cat(s, rep);
  94. }
  95. else
  96. s = xs_str_cat(s, v);
  97. n++;
  98. }
  99. xs_free(str);
  100. return s;
  101. }
  102. int xs_regex_match(const char *str, const char *rx)
  103. /* returns if str matches the regex at least once */
  104. {
  105. xs *l = xs_regex_select_n(str, rx, 1);
  106. return xs_list_len(l) == 1;
  107. }
  108. #endif /* XS_IMPLEMENTATION */
  109. #endif /* XS_REGEX_H */