xs_regex.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /* copyright (c) 2022 - 2024 grunfink et al. / MIT license */
  2. #ifndef _XS_REGEX_H
  3. #define _XS_REGEX_H
  4. int xs_regex_match(const char *str, const char *rx);
  5. xs_list *xs_regex_split_n(const char *str, const char *rx, int count);
  6. #define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)
  7. xs_list *xs_regex_select_n(const char *str, const char *rx, int count);
  8. #define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL)
  9. xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count);
  10. #define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL)
  11. #define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count)
  12. #define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL)
  13. #ifdef XS_IMPLEMENTATION
  14. #ifdef __TINYC__
  15. /* fix a compilation error in tcc */
  16. #define _REGEX_NELTS(n)
  17. #endif
  18. #include <regex.h>
  19. xs_list *xs_regex_split_n(const char *str, const char *rx, int count)
  20. /* splits str using regex as a separator, at most count times.
  21. Always returns a list:
  22. len == 0: regcomp error
  23. len == 1: full string (no matches)
  24. len == odd: first part [ separator / next part ]...
  25. */
  26. {
  27. regex_t re;
  28. regmatch_t rm;
  29. int offset = 0;
  30. xs_list *list = xs_list_new();
  31. const char *p;
  32. if (regcomp(&re, rx, REG_EXTENDED))
  33. return list;
  34. while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) {
  35. /* add first the leading part of the string */
  36. xs *s1 = xs_str_new_sz(p, rm.rm_so);
  37. list = xs_list_append(list, s1);
  38. /* add now the matched text as the separator */
  39. xs *s2 = xs_str_new_sz(p + rm.rm_so, rm.rm_eo - rm.rm_so);
  40. list = xs_list_append(list, s2);
  41. /* move forward */
  42. offset += rm.rm_eo;
  43. count--;
  44. }
  45. /* add the rest of the string */
  46. list = xs_list_append(list, p);
  47. regfree(&re);
  48. return list;
  49. }
  50. xs_list *xs_regex_select_n(const char *str, const char *rx, int count)
  51. /* selects all matches and return them as a list */
  52. {
  53. xs_list *list = xs_list_new();
  54. xs *split = NULL;
  55. const xs_val *v;
  56. int n = 0;
  57. /* split */
  58. split = xs_regex_split_n(str, rx, count);
  59. /* now iterate to get only the 'separators' (odd ones) */
  60. xs_list_foreach(split, v) {
  61. if (n & 0x1)
  62. list = xs_list_append(list, v);
  63. n++;
  64. }
  65. return list;
  66. }
  67. xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count)
  68. /* replaces all matches with the rep string. If it contains unescaped &,
  69. they are replaced with the match */
  70. {
  71. xs_str *s = xs_str_new(NULL);
  72. xs *split = xs_regex_split_n(str, rx, count);
  73. const xs_val *v;
  74. int n = 0;
  75. int pholder = !!strchr(rep, '&');
  76. xs_list_foreach(split, v) {
  77. if (n & 0x1) {
  78. if (pholder) {
  79. /* rep has a placeholder; process char by char */
  80. const char *p = rep;
  81. while (*p) {
  82. if (*p == '&')
  83. s = xs_str_cat(s, v);
  84. else {
  85. if (*p == '\\')
  86. p++;
  87. if (!*p)
  88. break;
  89. s = xs_append_m(s, p, 1);
  90. }
  91. p++;
  92. }
  93. }
  94. else
  95. s = xs_str_cat(s, rep);
  96. }
  97. else
  98. s = xs_str_cat(s, v);
  99. n++;
  100. }
  101. xs_free(str);
  102. return s;
  103. }
  104. int xs_regex_match(const char *str, const char *rx)
  105. /* returns if str matches the regex at least once */
  106. {
  107. xs *l = xs_regex_select_n(str, rx, 1);
  108. return xs_list_len(l) == 1;
  109. }
  110. #endif /* XS_IMPLEMENTATION */
  111. #endif /* XS_REGEX_H */