micro-utils/src/grep.c
2024-11-14 12:33:38 +03:00

316 lines
5.7 KiB
C

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <regex.h>
#include <sys/types.h>
static struct pattern {
char *str;
regex_t reg;
char reg_set;
} **ptrns;
static size_t ptrns_size = 0;
static char i_flag;
static char F_flag;
static char H_flag;
static char E_flag;
static char v_flag;
static char x_flag;
static char w_flag;
static char mode_flag;
static int reg_flag;
static void free_patterns(void) {
if (ptrns == NULL)
return;
for (size_t i = 0; i < ptrns_size; i++) {
if (ptrns[i] == NULL)
continue;
if (ptrns[i]->str != NULL)
free(ptrns[i]->str);
if (ptrns[i]->reg_set)
regfree(&ptrns[i]->reg);
free(ptrns[i]);
}
free(ptrns);
ptrns = NULL;
}
static int addpattern(const char *str, const size_t size) {
if (ptrns == NULL) {
ptrns = malloc(sizeof(struct pattern *));
if (ptrns == NULL) {
fprintf(stderr, "grep: malloc: %s\n", strerror(errno));
return 1;
}
}
struct pattern **bckp = realloc(ptrns, sizeof(struct pattern *) * (ptrns_size + 1));
if (bckp == NULL)
goto ADDP_ERROR;
ptrns = bckp;
ptrns[ptrns_size] = malloc(sizeof(struct pattern));
if (ptrns[ptrns_size] == NULL)
goto ADDP_ERROR;
ptrns[ptrns_size]->str = strdup(str);
if (ptrns[ptrns_size]->str == NULL)
goto ADDP_ERROR;
if (!F_flag) {
char *reg_str = ptrns[ptrns_size]->str;
size_t rs_size = size;
char bol = (ptrns[ptrns_size]->str[0] == '^');
char eol = (ptrns[ptrns_size]->str[size - 1] == '^');
if (x_flag || w_flag) {
if (w_flag)
rs_size += 5 + ((E_flag) ? 2 : 4);
reg_str = malloc(rs_size + 4);
if (reg_str == NULL)
goto ADDP_ERROR;
}
if (x_flag)
snprintf(reg_str, rs_size + 4, "%s%s%s", (bol) ? "" : "^", ptrns[ptrns_size]->str, (eol) ? "" : "$");
else if (w_flag)
snprintf(reg_str, rs_size + 4, "%s\\<%s%.*s%s\\>%s", (bol) ? "^" : "", (E_flag) ? "(" : "\\(", (int)size - bol - eol, ptrns[ptrns_size]->str + bol, (E_flag) ? ")" : "\\)", (eol) ? "$" : "");
if (regcomp(&ptrns[ptrns_size]->reg, reg_str, reg_flag) < 0)
goto ADDP_ERROR;
ptrns[ptrns_size]->reg_set = 1;
if (x_flag || w_flag)
free(reg_str);
}
ptrns_size++;
return 0;
ADDP_ERROR:
ptrns_size++;
free_patterns();
fprintf(stderr, "grep: %s\n", strerror(errno));
return 1;
}
static int addpattern_file(const char *file) {
FILE *fp = fopen(file, "r");
if (fp == NULL) {
fprintf(stderr, "grep: %s: %s\n", file, strerror(errno));
return 1;
}
int ret = 0;
char *buf = NULL;
size_t n = 0;
ssize_t size = 0;
while ((size = getline(&buf, &n, fp)) > 0) {
if (size && buf[size - 1] == '\n')
buf[size - 1] = '\0';
if (addpattern(buf, (size_t)size)) {
ret = 1;
break;
}
}
if (buf != NULL)
free(buf);
fclose(fp);
return ret;
}
static int cmp(const char *str1, const char *str2) {
if (x_flag)
return !((i_flag) ? strcasecmp : strcmp)(str1, str2);
else
return ((i_flag) ? strcasestr : strstr)(str1, str2) != NULL;
}
static int grep(FILE *fp, const char *file) {
int ret = 1;
size_t matched_files = 0;
regmatch_t m;
char *buf = NULL;
size_t n = 0;
ssize_t size = 0;
while ((size = getline(&buf, &n, fp)) > 0) {
if (size && buf[size - 1] == '\n')
buf[size - 1] = '\0';
char match = 0;
size_t i = 0;
for (; i < ptrns_size; i++) {
if (F_flag && cmp(buf, ptrns[i]->str)) {
match = 1;
break;
}
else if (regexec(&ptrns[i]->reg, buf, 1, &m, 0) == 0) {
match = 1;
break;
}
}
if (match != v_flag) {
ret = 0;
switch (mode_flag) {
case 'q':
break;
case 'o':
if (ptrns[i]->reg_set) {
unsigned int start = m.rm_so;
unsigned int finish = m.rm_eo;
printf("%.*s\n", finish - start, buf + start);
}
else
puts(ptrns[i]->str);
break;
case 'c':
matched_files++;
break;
default:
if (H_flag)
printf("%s: ", file);
puts(buf);
}
}
}
if (mode_flag == 'c')
printf("%zu\n", matched_files);
if (buf != NULL)
free(buf);
return ret;
}
int main(int argc, char **argv) {
int opt;
while ((opt = getopt(argc, argv, "e:f:iFHEvxwqoc")) != -1) {
switch (opt) {
case 'e':
if (addpattern(optarg, strlen(optarg)))
return 1;
break;
case 'f':
if (addpattern_file(optarg))
return 1;
break;
case 'i':
i_flag = 1;
reg_flag |= REG_ICASE;
break;
case 'F':
F_flag = 1;
E_flag = 0;
reg_flag &= ~REG_EXTENDED;
break;
case 'H':
H_flag = 1;
break;
case 'E':
E_flag = 1;
F_flag = 0;
reg_flag |= REG_EXTENDED;
break;
case 'v':
v_flag = 1;
break;
case 'x':
x_flag = 1;
break;
case 'w':
w_flag = 1;
break;
case 'q':
mode_flag = 'q';
break;
case 'o':
mode_flag = 'o';
break;
case 'c':
mode_flag = 'c';
break;
default:
puts("grep [efiFHvxwqoc] [FILE]\n\t-e PTRN Pattern to match\n\t-f FILE Read pattern from file\n\t-i Ignore case\n\t-H Add 'filename:' prefix\n\t-F PATTERN is a literal (not regexp)\n\t-E PATTERN is an extended regexp\n\t-v Select non-matching lines\n\t-x Match whole lines only\n\t-w Match whole words only\n\t-q Quiet\n\t-o Show only the matching part of line\n\t-c Show only count of matching lines");
return 0;
}
}
argv += optind;
argc -= optind;
if (ptrns_size == 0) {
fprintf(stderr, "grep: no patterns specified\n");
return 1;
}
int ret = 0;
if (argc == 0)
ret = grep(stdin, "-");
else {
for (int i = 0; i < argc; i++) {
FILE *fp = stdin;
if (strcmp(argv[i], "-")) {
fp = fopen(argv[i], "r");
if (fp == NULL) {
ret = 1;
fprintf(stderr, "grep: %s: %s\n", argv[i], strerror(errno));
continue;
}
}
if (grep(fp, argv[i]))
ret = 1;
if (fp != stdin)
fclose(fp);
}
}
free_patterns();
return ret;
}