看毛片(kmp)算法实现

大耗子 2020年05月17日 152次浏览

文章链接:https://codemouse.online/archives/2020-05-17215750

kmp算法简单理解

next的获取

双指针,当两个指针的字符一致的时候,一起向前走,如果不一致,指向前面的k回退到next[k-1]的位置,直到遇到一致的字符或者到0的位置,然后放置当前位置的k值.

kmp用法

双指针,一个指向带匹配字符和 匹配字符.
但两个指针指向的字符一致的时候,指针同时往前走,但不一致的时候,匹配字符的指针回退到next[q-1]的位置,直到遇到一致的字符或者到0的位置.
到0的位置后待匹配的指针就会向前移动,因为该字符无匹配,只能放弃字符.

kmp算法实现



#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// 匹配出next数组
void make_next(const char *pattern, int *next) {

	int q, k;
	int m = strlen(pattern);

	next[0] = 0;
	for (q = 1,k = 0;q < m; q ++) {

		while (k > 0 && pattern[q] != pattern[k])
			k = next[k-1];

		if (pattern[q] == pattern[k]) {
			k ++;
		}

		next[q] = k;

	}
	// 假设为abcabcd
	// next[0] = 0;
	// q=1, k=0, pattern[q]:pattern[k] = b:a, next[1] = 0;
	// q=2, k=0, pattern[q]:pattern[k] = c:a, next[2] = 0;
	// q=3, k=0, pattern[q]:pattern[k] = a:a, k++, next[3] = 1;
	// q=4, k=1, pattern[q]:pattern[k] = b:b, k++, next[4] = 2;
	// q=5, k=2, pattern[q]:pattern[k] = c:c, k++, next[5] = 3;
	// q=6, k=3, pattern[q]:pattern[k] = d:a, k=next[k-1] -> k=0; next[6] = 0;

}

// 匹配位置
int kmp(const char *text, const char *pattern, int *next) {

	int n = strlen(text);
	int m = strlen(pattern);

	int i, q;
	for (i = 0, q = 0;i < n;i ++) {

		while (q > 0 && pattern[q] != text[i]) {
			q = next[q-1];
		}

		if (pattern[q] == text[i]) {
			q ++;
		}

		if (q == m) {
			break;
		}
	}
	if (q == m)
		return i-q+1;
	else 
		return -1;
}

应用实例


// 获取第一个匹配的位置
int oneKmp(const char *text, const char *pattern)
{
	int len = strlen(pattern);
	if(len==0) return -2;
	
	int *next = (int *)malloc(sizeof(int)*len);
	make_next(pattern, next);
	int index = kmp(text, pattern, next);
	
	free(next);
	return index;
}

// 返回匹配到的个数,位置放在result数组里面
int allKmp(int *result, const char *text, const char *pattern)
{
	int len = strlen(pattern);
	if(len==0) return -2;
	if(result==NULL) return -3;
	
	int *next = (int *)malloc(sizeof(int)*len);
	make_next(pattern, next);
	
	int i = 0,index = 0;
	int len = strlen(text);
	int ret = 0;
	for(i = 0; 1 ; i++){
		if(index >= len) break;
		if((ret= kmp(text+index,pattern,next))<0)
			break;
		index += ret;
		result[i] = index++;
	}
	free(next);
	return i;
}
int main() {

	int i;
	
	char *text = "ababxbababababcdababcabddcadfdsab";
	char *pattern = "b";
	int *next = (int *)malloc(sizeof(int)*strlen(pattern);
	int *result = (int *)malloc(sizeof(int)*strlen(text));
	
	
	int idx = kmp(text, pattern, next);
	printf("match pattern : %d\n", idx);
	
	int num = allKmp(result,text, pattern);
	printf("num is :%d \n",num);
	for(int i = 0; i < num; i++)
		printf("%4d  ", result[i]);
	printf("\n");
	
	for (i = 0;i < strlen(pattern);i ++) {
		printf("%4d", next[i]);
	}
	printf("\n");

	return 0;

}