Boyer-Moore-Horsepool snort 源码实现 针对小模式串
作者:互联网
该算法适合特别短的串, 就没有必要构造bm算法的好后缀表,节约空间
源码文件src/dynamic-plugins/sf_engine/bmh.c|h
typedef struct {
/* 模式串*/
unsigned char *P;
/* 模式串 忽略大小写*/
unsigned char *Pnc;
/* 模式串长度*/
int M;
/* 坏字表*/
int bcShift[256];
/* 忽略大小写标志*/
int nocase;
}HBM_STRUCT;
/* 实际预处理函数*/
HBM_STATIC HBM_STRUCT * hbm_prep( unsigned char * pat, int m, int nocase );
/* 预处理函数接口*/
HBM_STATIC int hbm_prepx( HBM_STRUCT *p, unsigned char * pat, int m, int nocase );
/* 匹配函数*/
HBM_STATIC const unsigned char * hbm_match( HBM_STRUCT *p, const unsigned char * text, int n );
/* 释放函数*/
HBM_STATIC void hbm_free( HBM_STRUCT *p );
/* 负责分配内存和调用实际的预处理函数*/
HBM_STATIC
HBM_STRUCT * hbm_prep(unsigned char * pat, int m, int nocase)
{
HBM_STRUCT *p;
p = (HBM_STRUCT*)malloc(sizeof(HBM_STRUCT));
if (!p)
{
DynamicEngineFatalMessage("Failed to allocate memory for pattern matching.");
}
if( !hbm_prepx( p, pat, m, nocase) )
{
DynamicEngineFatalMessage("Error initializing pattern matching. Check arguments.");
}
return p;
}
HBM_STATIC
int hbm_prepx (HBM_STRUCT *p, unsigned char * pat, int m, int nocase )
{
int i,k;
unsigned char *t;
if( !m ) return 0;
if( !p ) return 0;
p->P = pat;
p->M = m;
p->nocase = nocase;
if( nocase ) /* convert to uppercase */
{
t = (unsigned char*)malloc(m);
if ( !t ) return 0;
memcpy(t,pat,m);
for(i=0;i<m;i++)
{
/* 大小写不敏感 所有字符转成 大写*/
t[i] = (unsigned char)toupper(t[i]);
}
p->Pnc = t;
}
else
{
p->Pnc = 0;
}
/* Compute normal Boyer-Moore Bad Character Shift */
/* 默认配置, 所有字符的坏字表长度为字符串长度*/
for(k = 0; k < 256; k++) p->bcShift[k] = m;
if( nocase )
{
/* 循环设置在字符串中出现过的坏字符表的跳转长度*/
for(k = 0; k < m; k++)
p->bcShift[ p->Pnc[k] ] = m - k - 1;
}
else
{
for(k = 0; k < m; k++)
p->bcShift[ p->P[k] ] = m - k - 1;
}
return 1;
}
/*
* px : 模式串对象
* text : 目标串
* n : 目标串长度
*/
HBM_STATIC
const unsigned char * hbm_match(HBM_STRUCT * px, const unsigned char * text, int n)
{
const unsigned char *pat, *t, *et, *q;
int m1, k;
int *bcShift;
/* 是否大小写敏感 */
if( px->nocase )
{
pat = px->Pnc;
}
else
{
pat = px->P;
}
m1 = px->M-1;
bcShift= px->bcShift;
//printf("bmh_match: pattern=%.*s, %d bytes \n",px->M,pat,px->M);
t = text + m1;
et = text + n;
/* Handle 1 Byte patterns - it's a faster loop */
/* 模式串长度为1 暴力搜索匹配*/
if( !m1 )
{
if( !px->nocase )
{
for( ;t<et; t++ )
if( *t == *pat ) return t;
}
else
{
for( ;t<et; t++ )
if( toupper(*t) == *pat ) return t;
}
return 0;
}
/* 大小写敏感 */
if( !px->nocase )
{
/* Handle MultiByte Patterns */
while( t < et )
{
/* Scan Loop - Bad Character Shift */
do
{
/* 使用坏字表*/
t += bcShift[*t];
if( t >= et )return 0;;
t += (k=bcShift[*t]);
if( t >= et )return 0;
} while( k );
/* Unrolled Match Loop */
k = m1;
q = t - m1;
/* 从后往前匹配*/
while( k >= 4 )
{
if( pat[k] != q[k] )goto NoMatch; k--;
if( pat[k] != q[k] )goto NoMatch; k--;
if( pat[k] != q[k] )goto NoMatch; k--;
if( pat[k] != q[k] )goto NoMatch; k--;
}
/* Finish Match Loop */
while( k >= 0 )
{
if( pat[k] != q[k] )goto NoMatch; k--;
}
/* If matched - return 1st char of pattern in text */
return q;
NoMatch:
t++;
}
}
else /* NoCase - convert input string to upper case as we process it */
{
/* Handle MultiByte Patterns */
while( t < et )
{
/* Scan Loop - Bad Character Shift */
do
{
t += bcShift[toupper(*t)];
if( t >= et )return 0;;
t += (k=bcShift[toupper(*t)]);
if( t >= et )return 0;
} while( k );
/* Unrolled Match Loop */
k = m1;
q = t - m1;
while( k >= 4 )
{
if( pat[k] != toupper(q[k]) )goto NoMatchNC; k--;
if( pat[k] != toupper(q[k]) )goto NoMatchNC; k--;
if( pat[k] != toupper(q[k]) )goto NoMatchNC; k--;
if( pat[k] != toupper(q[k]) )goto NoMatchNC; k--;
}
/* Finish Match Loop */
while( k >= 0 )
{
if( pat[k] != toupper(q[k]) )goto NoMatchNC; k--;
}
/* If matched - return 1st char of pattern in text */
return q;
NoMatchNC:
t++;
}
}
return 0;
}
释放函数比较简单就贴出来了
标签:HBM,pat,Moore,Horsepool,unsigned,char,int,源码,return 来源: https://blog.csdn.net/guoguangwu/article/details/88378228