字符串匹配sunday算法封装

大耗子 2020年11月04日 42次浏览

字符串匹配

在工作中,需要用字符串匹配,为了可以快速切换不同的库,编写了这个匹配接口,方便实现不同字符串匹配库的切换.

接口封装

  • sunday.h
#include<stdio.h>
#include<string.h>
#include <stdlib.h>

#define KEYWORD_SHIFT_LEN   256 
#define KEYWORD_MAXLEN      32  // 最大长度
#define KEYWORD_DEFAULT     32   // 默认数量

struct keyword_shifts{
	int **sun_shifts;
    char **keywords;
	int num;
    int max;
};

extern int      keyword_init(struct keyword_shifts* shifts, int maxnum);
extern int 		keyword_add(struct keyword_shifts* shifts, char *keyword, int len);
extern int 		keyword_compare(struct keyword_shifts* shifts, char* file_content,int file_content_len);
extern void 	keyword_free(struct keyword_shifts* shifts);


char* keyword_search(char* T,int lenT,char* P,int lenP,int *shift);
void SundayPre(int *sun_shift,char* P,int lenP);
  • sunday.c
#include "sunday.h"



// 返回-1是错误,返回0是命中,返回1为不命中
int keyword_compare(struct keyword_shifts* shifts, char* file_content,int file_content_len)
{
    int i = 0;
    char *ret = NULL;
	char keyword[20] = {0};
    if(NULL == shifts || NULL == file_content)
        return -1;
    for(i = 0; i < shifts->num; i++){
        ret = keyword_search(file_content, file_content_len,shifts->keywords[i], strlen(shifts->keywords[i]),shifts->sun_shifts[i]);
        if(ret){
            // 如果非空,说明命中,需要过滤
			memcpy(keyword, ret, 5);
			printf("hit keyword:%s\n",keyword);
            return 0;
        }
    }
    return 1;
}

int keyword_init(struct keyword_shifts* shifts, int maxnum)
{
	if(!shifts)
		return -1;
	shifts->sun_shifts 	= (int **)malloc(sizeof(int *) * maxnum);
	if(!shifts->sun_shifts)
		return -1;
	memset(shifts->sun_shifts, 0, sizeof(int *) * maxnum);
	
	shifts->keywords 	= (char **)malloc(sizeof(char *) * maxnum);
	if(!shifts->keywords)
		return -1;
	memset(shifts->keywords, 0, sizeof(char *) * maxnum);
	
    shifts->max = maxnum;
	shifts->num = 0;
	return 0;
}

int keyword_add(struct keyword_shifts* shifts, char *keyword, int len)
{
	if(NULL == shifts || NULL == keyword || shifts->num == shifts->max)
		return -1;
    
    shifts->sun_shifts[shifts->num] = (int *)malloc(KEYWORD_SHIFT_LEN*sizeof(int));
    if(!shifts->sun_shifts[shifts->num])
        return -1;
    memset(shifts->sun_shifts[shifts->num], 0, KEYWORD_SHIFT_LEN*sizeof(int));
	
    shifts->keywords[shifts->num] = (char *)malloc(len+1);
    if(!shifts->keywords[shifts->num])
        return -1;
    memset(shifts->keywords[shifts->num], 0, len+1);
	
	SundayPre(shifts->sun_shifts[shifts->num], keyword, len);
    memcpy(shifts->keywords[shifts->num], keyword, len);
	
	shifts->num++;
	return 0;
}

void keyword_free(struct keyword_shifts* shifts)
{
    int i = 0;
    if(NULL == shifts)
        return ;
    for(i = 0; i < shifts->num; i++){
        if(shifts->sun_shifts[i])
            free(shifts->sun_shifts[i]);
        if(shifts->keywords[i])
            free(shifts->keywords[i]);
    }
	if(shifts->sun_shifts)
		free(shifts->sun_shifts);
	if(shifts->keywords)
		free(shifts->keywords);
}


void SundayPre(int *sun_shift,char* P,int lenP)
{
    int i;
    for(i=0;i<KEYWORD_SHIFT_LEN;i++){
       sun_shift[i]=lenP+1;
    }
    // 模式串P中每个字母出现的最后的下标
    // 所对应的主串参与匹配的最末位字符的下一位字符移动到该位,所需要的移动位数
    for(i=0;i<lenP;i++){
        sun_shift[P[i]]=lenP-i;
    }
}
/*
function:keyword_seach
		Sunday字符匹配算法
Param:
@T 文本内容
@TLen 文本内容长度
@p 需要匹配的字符串
@lenP 需要匹配的字符串长度
*/
char* keyword_search(char* T,int lenT,char* P,int lenP,int *shift)
{
    // 默认值,移动m+1位
	int i;
    // 模式串开始位置在主串的哪里
    int pos=0;
	//printf("Sunday pos %d\n",pos);
    //模式串已经匹配到的位置
    int j,count=0;
    while(pos<=lenT-lenP) {
        j=0;
        while(T[pos+j]==P[j]&&j<lenP) j++;
            // 匹配成功
        if(j>=lenP){
            return &T[pos];
        }
        else{
			// 找到主串中当前跟模式串匹配的最末字符的下一个字符
			// 在模式串中出现最后的位置
			// 所需要从(模式串末尾+1)移动到该位置的步数
			pos+=shift[T[pos+lenP]];
		}
    }
    return NULL;
}

调用案例

  • test.c

#include "sunday.h"
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include <unistd.h>
#define My_TXTMAX 1000
#define times 1  //运行次数
#define MAXCH 256
char* My_TXT="LOCALGX=%u6210%u90FD%7C%36%36%39%33%7C%u6210%u90FD%7C%36%36%39%33; Hm_lvt_e9e114d958ea263de46e080563e254c4=1531471439,1531472197,"
			"1531537654,1531618652; BAIDUID=3ACF228CE89D0C2D28B1B3DA5C56C138:FG=1; Hm_lpvt_e9e114d958ea263de46e080563e254c4=1531629911;"
			 "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			 "BIDUPSID=C18478BCF075BCE0高D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			 "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			 "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860;MY_TEST_string H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			 "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			 "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			 "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; MY_TEST_MYringH_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			 "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			  "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			  "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/"
			  "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/" "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/" "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/" "BIDUPSID=C18478BCF075BCE0D2D0149D77980B8B; PSTM=1531620860; H_PS_PSSID=26523_1451_25810_21106_22159 Mozilla/5.0 (X11; Linux x86_64; rv:10.0.12)"
			 " Gecko/20130109 Firefox/10.0.12 http://news.baidu.com/mil AndroidDownloadManager/7.1.2 (Linux; U; Android 7.1.2; MI 5X Build/N2G47H)  http://switch.pcfg.cache."
			 "wpscdn.cn/wps_assets/cfg/ad/switch/load_bubblehttp://notify.wps.cn/notifyserver/notify?v=qidVrBsEjwqbMd%2F4ydmZ5CRKfbUlVq11EGkClTHeWH%2B5msXpw%2BQJC0oEFK%2FB8vu2bNj"
			 "aDZpcjl1wviBMWPHuYA==  http://tile-service.weather.microsoft.com/zh-CN/livetile/preinstall?region=CN&appid=C98EA5B0842DBB9405BBF071E1DA76512D21FE36&FORM=Threshold"
			 "http://res.res.res.res:80/";
int main(int argc, char*argv[])
{	//初始化一些变量
	int lenp,len_txt;
	len_txt=strlen(My_TXT);//My_TXT为文本串,具体内容在"search_character.h"中
	struct timeval start,end;//用来记录时间
	long dif_sec, dif_usec;//存储时间差
	int i;
	char *temp[5],test[5][256];

    struct keyword_shifts shifts;
    char keyword[100] = {0};
    memset(&shifts, 0, sizeof(struct keyword_shifts));
	
    keyword_init(&shifts, KEYWORD_DEFAULT);
	if(argc>=2){
		memcpy(keyword, argv[1], strlen(argv[1]));
		
	}else
		scanf("%s",keyword);
    keyword_add(&shifts, keyword,strlen(keyword));
	
    
    //测试开始
	
	////////////Sunday//////////////
	gettimeofday(&start,NULL);
	if(!keyword_compare(&shifts, My_TXT, strlen(My_TXT))){
		printf("txt 命中\n");
	}else{
		printf("txt 未命中\n");
	}
	gettimeofday(&end,NULL);
	dif_sec=end.tv_sec-start.tv_sec;
	dif_usec=end.tv_usec-start.tv_usec;
	printf("Sunday running time  is %ld 秒 (%ld 微秒)\n",dif_sec, dif_sec*1000000+dif_usec);



    return 0;
}


编译

代码下载链接
gcc -g test.c sunday.c -o test