xs_regex.h 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. /* copyright (c) 2022 - 2024 grunfink et al. / MIT license */
  2. #ifndef _XS_REGEX_H
  3. #define _XS_REGEX_H
  4. xs_list *xs_regex_split_n(const char *str, const char *rx, int count);
  5. #define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)
  6. xs_list *xs_regex_select_n(const char *str, const char *rx, int count);
  7. #define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL)
  8. xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count);
  9. #define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL)
  10. #define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count)
  11. #define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL)
  12. #ifdef XS_IMPLEMENTATION
  13. #include <regex.h>
  14. xs_list *xs_regex_split_n(const char *str, const char *rx, int count)
  15. /* splits str by regex */
  16. {
  17. regex_t re;
  18. regmatch_t rm;
  19. int offset = 0;
  20. xs_list *list = NULL;
  21. const char *p;
  22. if (regcomp(&re, rx, REG_EXTENDED))
  23. return NULL;
  24. list = xs_list_new();
  25. while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) {
  26. /* add first the leading part of the string */
  27. xs *s1 = xs_str_new_sz(p, rm.rm_so);
  28. list = xs_list_append(list, s1);
  29. /* add now the matched text as the separator */
  30. xs *s2 = xs_str_new_sz(p + rm.rm_so, rm.rm_eo - rm.rm_so);
  31. list = xs_list_append(list, s2);
  32. /* move forward */
  33. offset += rm.rm_eo;
  34. count--;
  35. }
  36. /* add the rest of the string */
  37. list = xs_list_append(list, p);
  38. regfree(&re);
  39. return list;
  40. }
  41. xs_list *xs_regex_select_n(const char *str, const char *rx, int count)
  42. /* selects all matches and return them as a list */
  43. {
  44. xs_list *list = xs_list_new();
  45. xs *split = NULL;
  46. xs_list *p;
  47. xs_val *v;
  48. int n = 0;
  49. /* split */
  50. split = xs_regex_split_n(str, rx, count);
  51. /* now iterate to get only the 'separators' (odd ones) */
  52. p = split;
  53. while (xs_list_iter(&p, &v)) {
  54. if (n & 0x1)
  55. list = xs_list_append(list, v);
  56. n++;
  57. }
  58. return list;
  59. }
  60. xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count)
  61. /* replaces all matches with the rep string. If it contains unescaped &,
  62. they are replaced with the match */
  63. {
  64. xs_str *s = xs_str_new(NULL);
  65. xs *split = xs_regex_split_n(str, rx, count);
  66. xs_list *p;
  67. xs_val *v;
  68. int n = 0;
  69. int pholder = !!strchr(rep, '&');
  70. p = split;
  71. while (xs_list_iter(&p, &v)) {
  72. if (n & 0x1) {
  73. if (pholder) {
  74. /* rep has a placeholder; process char by char */
  75. const char *p = rep;
  76. while (*p) {
  77. if (*p == '&')
  78. s = xs_str_cat(s, v);
  79. else {
  80. if (*p == '\\')
  81. p++;
  82. if (!*p)
  83. break;
  84. s = xs_append_m(s, p, 1);
  85. }
  86. p++;
  87. }
  88. }
  89. else
  90. s = xs_str_cat(s, rep);
  91. }
  92. else
  93. s = xs_str_cat(s, v);
  94. n++;
  95. }
  96. xs_free(str);
  97. return s;
  98. }
  99. #endif /* XS_IMPLEMENTATION */
  100. #endif /* XS_REGEX_H */