hyperscan字符串匹配库接口封装

大耗子 2020年11月04日 35次浏览

字符串匹配

在工作中,需要用字符串匹配,为了可以快速切换不同的库,编写了这个匹配接口,方便实现不同字符串匹配库的切换.

接口封装

  • 头文件 kwmatch.h

#ifndef __KWMATCH_H__

#define __KWMATCH_H__

// control log print switch
#define KW_DEBUG 1
#if KW_DEBUG
#define __DEBUG(p,...)   printf("[FILE:%s,FUNC:%s,Line:%d]"p"\n",__FILE__,__FUNCTION__,__LINE__,##__VA_ARGS__)
#else
#define __DEBUG(p,...)   do{}while(0);
#endif

struct keywords_ctx;

struct keyword_operations{
	int 	(*keyword_init)(struct keywords_ctx* data, int max);  // 初始化
	int 	(*keyword_add)(struct keywords_ctx* data, char **keyword, int num); // 添加关键字
	int 	(*keyword_compile)(struct keywords_ctx* data,void *keyword_data);	// 编译
	int 	(*keyword_compare)(struct keywords_ctx* data, void *keyword_data, char* file_content,int file_content_len); // 匹配
	void 	(*keyword_clean)(struct keywords_ctx* data, void *keyword_data); // 清理单个文件的配置
	void 	(*keyword_free)(struct keywords_ctx* data);	// 清理关键字配置
};


struct keywords_ctx{
	void 						*kw_cfg;	// 存储数据配置
	struct keyword_operations 	*kw_ops;	// 存储操作指针
};


#endif

hyperscan关键字匹配库封装

  • 头文件 hyperscan.h

#ifndef __HYPERSCAN_H__

#define __HYPERSCAN_H__

#include "hs.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "kwmatch.h"

struct keyword_hy_config{
	char 			**keywords; 	// 关键字数组
	int 			*ids;			// 关键字编号
	unsigned int	flag;			// 匹配规则(具体看hyperscan的说明)
	int 			elements;		// 关键字数量
	int				max_elements;	// 关键字最大数量
	hs_database_t 	*database;		// 关键字匹配库
};

struct keyword_hy_data{
	int 		result;			// 匹配结果(-1为未命中,大于等于0为命中,值为关键字下标)
	hs_compile_error_t 	*compile_err; 	// 获取hy的报错原因
	hs_stream_t 		*stream;		
	hs_scratch_t 		*scratch;
};

extern struct keyword_hy_config hy_kw_cfg;
extern struct keyword_operations hy_kw_ops;


#endif

  • 对封装的接口的实现hyperscan.c,基于流形式的封装
#include "hyperscan.h"

static int 	keyword_hy_init(struct keywords_ctx* data, int max);
static int 	keyword_hy_add(struct keywords_ctx* data, char **keywords, int num);
static int 	keyword_hy_compile(struct keywords_ctx* data,void *keyword_data);
static int 	keyword_hy_compare(struct keywords_ctx* data, void *keyword_data, char* file_content,int file_content_len);
static void keyword_hy_clean(struct keywords_ctx* data, void *keyword_data);
static void keyword_hy_free(struct keywords_ctx* data);


struct keyword_operations hy_kw_ops = {
	.keyword_init 		= keyword_hy_init,
	.keyword_add 		= keyword_hy_add,
	.keyword_compile 	= keyword_hy_compile,
	.keyword_compare 	= keyword_hy_compare,
	.keyword_clean		= keyword_hy_clean,
	.keyword_free 		= keyword_hy_free
};
struct keyword_hy_config hy_kw_cfg = {0};

static int keyword_hy_hit_handler(unsigned int id, unsigned long long from,
				unsigned long long to, unsigned int flags, void *_ctx) 
{
	int* result = _ctx;
	*result = id;
	printf("keyword hit!from:%lld, to:%lld\n",from,to);
}

// 返回-1是错误,返回0是成功
static int keyword_hy_init(struct keywords_ctx* data, int max)
{
	if(!data)
        return -1;
	struct keyword_hy_config * key_config = (struct keyword_hy_config*)data->kw_cfg;
	if(!key_config)
		return -1;
	// keywords
	key_config->keywords = (char **)malloc(sizeof(char *) * max);
	if(!key_config->keywords)
		return -1;
	memset(key_config->keywords, 0, sizeof(char *) * max);
	// ids
	key_config->ids = (int *)malloc(sizeof(int) * max);
    if(!key_config->ids)
        return -1;
    memset(key_config->ids, 0, sizeof(int) * max);
	
    key_config->max_elements = max;
    key_config->elements = 0;
	key_config->flag = HS_FLAG_SINGLEMATCH;
	key_config->database = NULL;

	return 0;
}

// 返回-1是错误,返回0是成功
static int keyword_hy_add(struct keywords_ctx* data, char **keywords, int num)
{
	if(!data || !keywords)
        return -1;
	struct keyword_hy_config* key_config = (struct keyword_hy_config*)data->kw_cfg;
	
	if(!key_config || key_config->max_elements <= num)
		return -1;
	int i = 0,len = 0;
	for(i = 0; i < num; i++){
		if(!keywords[i]){
			goto add_error;
		}
		len = strlen(keywords[i]);
		key_config->ids[i] = i;
		key_config->keywords[i] = (char *)malloc(len+1);
		if(!key_config->keywords[i])
			goto add_error;
		memset(key_config->keywords[i], 0, len+1);
		memcpy(key_config->keywords[i], keywords[i], len);
	}
	
	key_config->elements = num;
	
	hs_database_t *database = key_config->database;
	hs_compile_error_t *compile_err = NULL;;
	if(!database){
		if (hs_compile_multi(key_config->keywords, &key_config->flag, key_config->ids, key_config->elements, 
				HS_MODE_STREAM|HS_MODE_SOM_HORIZON_SMALL, &database,&compile_err) != HS_SUCCESS) {
			hs_free_compile_error(compile_err);
			fprintf(stderr, "ERROR: Unable to compile error\n");
		}
		key_config->database = database;
	}
	return 0;
add_error:
	for(i = 0; i < num; i++){
		if(key_config->keywords[i])
			free(key_config->keywords[i]);
	}
	return -1;
}

// 返回-1是错误,返回0是成功
static int keyword_hy_compile(struct keywords_ctx* data, void *keyword_data)
{
	if(!data)
        return -1;
	struct keyword_hy_config * key_config = (struct keyword_hy_config*)data->kw_cfg;
	struct keyword_hy_data	 * key_data   = (struct keyword_hy_data*)keyword_data;
	if(!key_config || !key_data)
		return -1;
	
	hs_database_t *database = key_config->database;
	 
	hs_stream_t *stream = NULL;
	if(hs_open_stream(database, keyword_hy_hit_handler, 0, &key_data->result, &stream) != HS_SUCCESS){
		printf("ERROR: Unable to open stream. Exiting.\n");
		goto error;
	}

	hs_scratch_t *scratch = NULL;
	if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
		printf("ERROR: Unable to allocate scratch space. Exiting.\n");
		goto error;
	}
	
	
	key_data->stream  = stream;
	key_data->scratch = scratch;
	return 0;
error:
	keyword_hy_clean(data,keyword_data);
	return -1;
}


// 返回-1是错误,返回0是命中,返回1为不命中
static int keyword_hy_compare(struct keywords_ctx* data, void *keyword_data, char* file_content,int file_content_len)
{
	struct keyword_hy_data * key_data = (struct keyword_hy_data*)keyword_data;
	if(!key_data)
		return -1;
	hs_scan_stream(key_data->stream, (const char *)file_content, file_content_len, 0, key_data->scratch);
	if(0 <= key_data->result)
		return 0;
    return 1;
}

static void keyword_hy_clean(struct keywords_ctx* data, void *keyword_data)
{
	struct keyword_hy_data * key_data = (struct keyword_hy_data*)keyword_data;
	if(!key_data)
		return ;

	if(key_data->scratch) 
		hs_free_scratch(key_data->scratch);
	
	key_data->stream = NULL;
	key_data->scratch = NULL;
	
	key_data->result = -1;
}

static void keyword_hy_free(struct keywords_ctx* data)
{
	if(!data)
        return ;
	struct keyword_hy_config * key_config = (struct keyword_hy_config*)data->kw_cfg;
	if(!key_config)
		return ;
	int i = 0;
    for(i = 0; i < key_config->elements; i++){
        if(key_config->keywords[i]){
            free(key_config->keywords[i]);
			key_config->keywords[i] = NULL;
		}
    }
	if(key_config->ids)
		free(key_config->ids);
	if(key_config->keywords)
		free(key_config->keywords);
	if(key_config->database) 
		hs_free_database(key_config->database);
	
	key_config->ids = NULL;
	key_config->keywords = NULL;
	key_config->database = NULL;
	
}

调用案例

  • test.c
#include "hs.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

#include "hyperscan.h"

#define READ_SIZE 10

// control log print switch
#define HY_DEBUG 1
#if HY_DEBUG
#define __DEBUG(p,...)   printf("[FILE:%s,FUNC:%s,Line:%d]"p"\n",__FILE__,__FUNCTION__,__LINE__,##__VA_ARGS__)
#else
#define __DEBUG(p,...)   do{}while(0);
#endif


static int keyword_hy_hit_handler(unsigned int id, unsigned long long from,
				unsigned long long to, unsigned int flags, void *_ctx) 
{
	int* result = _ctx;
	*result = id;
	printf("keyword hit!from:%lld, to:%lld\n",from,to);
}




void main(int argc, char **argv)
{
	// 未封装调用例子
#if 0
	if(argc < 3){
		__DEBUG("please input %s [filepath] [pattern] \n",argv[0]);
		return ;
	}
	FILE* fp = NULL;;
	if((fp = fopen(argv[1],"r")) == NULL){
		__DEBUG("file open fail\n");
		return ;
	}else{
		__DEBUG("file open success\n");	
	}
	
	char *keyword = argv[2];
	int id = 0;
	int elements = 1;
	int result = -1;
	
	char* one_line = NULL;
	one_line = (char*)malloc(READ_SIZE * sizeof(char) + 1);
	if(!one_line){
		__DEBUG("one_line malloc fail\n");
		goto error;
	}
	
	int flag = HS_FLAG_SINGLEMATCH;
	hs_database_t *database = NULL;
    hs_compile_error_t *compile_err;
	if (hs_compile_multi(&keyword, &flag, &id, elements, 
			HS_MODE_STREAM|HS_MODE_SOM_HORIZON_SMALL, &database,&compile_err) != HS_SUCCESS) {
		fprintf(stderr, "ERROR: Unable to compile error: %s\n",compile_err->message);
        hs_free_compile_error(compile_err);
        goto error;
	}
	
	hs_stream_t *stream = NULL;
	if(hs_open_stream(database, keyword_hy_hit_handler, 0, &result, &stream) != HS_SUCCESS){
			__DEBUG("ERROR: Unable to open stream. Exiting.\n");
			goto error;
	}

	hs_scratch_t *scratch = NULL;
	if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
			__DEBUG("ERROR: Unable to allocate scratch space. Exiting.\n");
			goto error;
	}

	while(fgets(one_line, READ_SIZE, fp) != NULL){
		
		hs_scan_stream(stream, (const char *)one_line, strlen(one_line), 0, scratch);
		if(result >= 0){
			__DEBUG("new line data:%s\n",one_line);
			__DEBUG("keyword result is hit!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			break;
		}
	}
	

	
	
	
error:
	if(fp) fclose(fp);
	if(one_line) free(one_line);
	if(scratch) hs_free_scratch(scratch);
    if(database) hs_free_database(database);
	return ;
	
	
// 封装后的demo	
#else

	if(argc < 3){
		__DEBUG("please input %s [filepath] [pattern1] [pattern2]\n",argv[0]);
		return ;
	}
	FILE* fp = NULL;;
	if((fp = fopen(argv[1],"r")) == NULL){
		__DEBUG("file open fail\n");
		return ;
	}else{
		__DEBUG("file open success\n");	
	}
	
	
	struct keywords_ctx content;
	struct keyword_hy_data keyword_data;
	struct keyword_hy_config keyword_config;
	
	memset(&keyword_data,0,sizeof(struct keyword_hy_data));
	keyword_data.result = -1;
	content.kw_ops = &hy_kw_ops;
	content.kw_cfg = &keyword_config;
	if(content.kw_ops->keyword_init(&content,32) < 0)
	{
		printf("init fail\n");
		goto error2;
	}
	int i = 0;
	char *key[10];
	int num = atoi(argv[2]);
	for(i = 0;i < num; i++){
		key[i] = argv[i + 3];
	}
	
	
	if(content.kw_ops->keyword_add(&content,key,num)< 0)
	{
		printf("add fail\n");
		goto error2;
	}
	if(content.kw_ops->keyword_compile(&content,&keyword_data)< 0)
	{
		printf("compile fail\n");
		goto error2;
	}
	char* one_line = NULL;
	one_line = (char*)malloc(READ_SIZE * sizeof(char) + 1);
	if(!one_line){
		__DEBUG("one_line malloc fail\n");
		goto error2;
	}
	while(fgets(one_line, READ_SIZE, fp) != NULL){
		
		if(content.kw_ops->keyword_compare(&content,&keyword_data, one_line,strlen(one_line)) < 0)
		{
			printf("keyword_compare fail\n");
			goto error2;
		}
		if(keyword_data.result >= 0){
			__DEBUG("new line data:%s\n",one_line);
			__DEBUG("keyword result is hit!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			break;
		}
	}
error2:
	content.kw_ops->keyword_clean(&content,&keyword_data);
	content.kw_ops->keyword_free(&content);
	if(fp) fclose(fp);
	if(one_line) free(one_line);
#endif
	
}

编译运行

文件下载链接
把lib文件放在/usr/lib/ 目录下
/usr/lib/libhs_runtime-2.1.so
/usr/lib/libhs-2.1.so

gcc -g test.c hyperscan.c -lhs -o test