Leetcode Regular Expression Matching problem solution in C programming

In the Leetcode Regular Expression Matching problem solution in C programming Given an input string s and a pattern p, implement regular expression matching with support for ‘.’ and ‘*’ where:

‘.’ Matches any single character.
‘*’ Matches zero or more of the preceding element.
The matching should cover the entire input string (not partial).

Leetcode Regular Expression Matching problem solution in C programming

typedef enum {
    OpType_Match,
    OpType_MatchChar,
    OpType_Jump,
    OpType_Split,
} OpType;

typedef struct Op {
    uint16_t x;
    uint16_t y;
    uint8_t type;
    char c;
} Op;

typedef struct ThreadSetEntry {
    uint16_t gen;
} ThreadSetEntry;

typedef struct Thread {
    int op;
} Thread;

typedef struct ThreadList {
    Thread* threads;
    ThreadSetEntry* alreadyAdded;
    int count;
    uint16_t clearCount;
} ThreadList;

void createThreadList(ThreadList* list, int maxThreadCount) {
    list->threads = malloc(maxThreadCount * sizeof(Thread));
    list->alreadyAdded = calloc(maxThreadCount, sizeof(ThreadSetEntry));
    list->count = 0;
    list->clearCount = 1;
}

void addThread(ThreadList* list, int i) {
    ThreadSetEntry* entry = &list->alreadyAdded[i];
    if (entry->gen == list->clearCount)
        return;

    entry->gen = list->clearCount;
    list->threads[list->count] = (Thread){ i };
    list->count++;
}

void clearThreads(ThreadList* list) {
    list->count = 0;
    list->clearCount++;
}

bool thompsonMatch(Op* ops, int opCount, const char* s) {
    ThreadList threadList1;
    ThreadList threadList2;
    createThreadList(&threadList1, opCount);
    createThreadList(&threadList2, opCount);

    ThreadList* curCharThreads = &threadList1;
    ThreadList* nextCharThreads = &threadList2;
    addThread(curCharThreads, 0);

    for (const char* curChar = s; curCharThreads->count; curChar++) {
        char c = *curChar;
        for (int j = 0; j < curCharThreads->count; j++) {
            Thread* thread = &curCharThreads->threads[j];
            Op* op = &ops[thread->op];
            
            switch (op->type) {
            case OpType_Match:
                if (c == '\0')
                    return true;
                break;
            case OpType_MatchChar:
                if (c == op->c || (op->c == '.' && c != '\0'))
                    addThread(nextCharThreads, thread->op+1);
                break;
            case OpType_Jump:
                addThread(curCharThreads, op->x);
                break;
            case OpType_Split:
                addThread(curCharThreads, op->x);
                addThread(curCharThreads, op->y);
                break;
            default:
                assert(!"invalid code path");
                return false;
            }
        }

        ThreadList* temp = curCharThreads;
        curCharThreads = nextCharThreads;
        nextCharThreads = temp;
        clearThreads(nextCharThreads);
    }

    // Leak thread lists b/c we don't care.
    return false;
}

bool isMatch(const char* s, const char* p) {
    int exprLen = strlen(p);

    // 3x the size of the expression is overkill but ensures we always have enough
    // space for three ops per 'a*' plus one more for the match op. I didn't want to realloc().
    Op* ops = malloc((exprLen*3) * sizeof(Op));
    int opCount = 0;
    for (int i = 0; i < exprLen; i++) {
        if (p[i+1] == '*') {
            // L1: split L1, L3
            // L2: char a
            //     jump L1
            // L3:
            Op* split = &ops[opCount++];
            Op* match = &ops[opCount++];
            Op* jump = &ops[opCount++];

            split->type = OpType_Split;
            split->x = match - ops;
            split->y = jump - ops + 1;

            match->type = OpType_MatchChar;
            match->x = split - ops;
            match->c = p[i];

            jump->type = OpType_Jump;
            jump->x = split - ops;

            ++i;
        }
        else {
            // match a
            Op* op = &ops[opCount++];
            op->type = OpType_MatchChar;
            op->x = opCount;
            op->c = p[i];
        }
    }
    ops[opCount++] = (Op){ 0, 0, OpType_Match };

    // Leak ops b/c we don't care.
    return thompsonMatch(ops, opCount, s);
}

Also read,

By Neha Singhal

Hi, my name is Neha singhal a software engineer and coder by profession. I like to solve coding problems that give me the power to write posts for this site.

Leave a Reply

Your email address will not be published. Required fields are marked *