编程语言
首页 > 编程语言> > Boyer-Moore-Horsepool snort 源码实现 针对小模式串

Boyer-Moore-Horsepool snort 源码实现 针对小模式串

作者:互联网

该算法适合特别短的串, 就没有必要构造bm算法的好后缀表,节约空间

源码文件src/dynamic-plugins/sf_engine/bmh.c|h  


typedef struct {
/* 模式串*/
 unsigned char *P;
 /* 模式串 忽略大小写*/
 unsigned char *Pnc;
 /* 模式串长度*/
 int            M;
 /* 坏字表*/
 int            bcShift[256];
 /* 忽略大小写标志*/
 int            nocase;
}HBM_STRUCT;

/* 实际预处理函数*/
HBM_STATIC HBM_STRUCT    * hbm_prep( unsigned char * pat, int m, int nocase );
/* 预处理函数接口*/
HBM_STATIC int hbm_prepx( HBM_STRUCT *p, unsigned char * pat, int m, int nocase );
/* 匹配函数*/
HBM_STATIC const unsigned char * hbm_match( HBM_STRUCT *p, const unsigned char * text, int n );
/* 释放函数*/
HBM_STATIC void            hbm_free( HBM_STRUCT *p );
/* 负责分配内存和调用实际的预处理函数*/
HBM_STATIC
HBM_STRUCT * hbm_prep(unsigned char * pat, int m, int nocase)
{
     HBM_STRUCT    *p;

     p = (HBM_STRUCT*)malloc(sizeof(HBM_STRUCT));
     if (!p)
     {
         DynamicEngineFatalMessage("Failed to allocate memory for pattern matching.");
     }

     if( !hbm_prepx( p, pat, m, nocase) )
     {
          DynamicEngineFatalMessage("Error initializing pattern matching. Check arguments.");
     }

     return p;
}

 

HBM_STATIC
int hbm_prepx (HBM_STRUCT *p, unsigned char * pat, int m, int nocase )
{
     int            i,k;
     unsigned char *t;
     if( !m ) return 0;
     if( !p ) return 0;

     
     p->P = pat;
     p->M = m;
     p->nocase = nocase;

     
     if( nocase ) /* convert to uppercase */
     {
         t = (unsigned char*)malloc(m);
         if ( !t ) return 0;

         memcpy(t,pat,m);

	     for(i=0;i<m;i++)
	     {
          /* 大小写不敏感 所有字符转成 大写*/
          t[i] = (unsigned char)toupper(t[i]);
	     }
         p->Pnc = t;
     }
     else
     {
	     p->Pnc = 0;
     }

     /* Compute normal Boyer-Moore Bad Character Shift */
     /* 默认配置, 所有字符的坏字表长度为字符串长度*/
     for(k = 0; k < 256; k++) p->bcShift[k] = m;

     if( nocase )
     {
        /* 循环设置在字符串中出现过的坏字符表的跳转长度*/
       for(k = 0; k < m; k++)
	   p->bcShift[ p->Pnc[k] ] = m - k - 1;
     }
     else
     {
       for(k = 0; k < m; k++)
	   p->bcShift[ p->P[k] ] = m - k - 1;
     }

     return 1;
}

 

/*
 * px : 模式串对象
 * text : 目标串
 * n : 目标串长度
 */
HBM_STATIC
const unsigned char * hbm_match(HBM_STRUCT * px, const unsigned char * text, int n)
{
   const unsigned char *pat, *t, *et, *q;
   int            m1, k;
   int           *bcShift;

   /* 是否大小写敏感 */
   if( px->nocase  )
   {
     pat = px->Pnc;
   }
   else
   {
     pat = px->P;
   }
   m1     = px->M-1;
   bcShift= px->bcShift;

   //printf("bmh_match: pattern=%.*s, %d bytes \n",px->M,pat,px->M);

   t  = text + m1;
   et = text + n;

   /* Handle 1 Byte patterns - it's a faster loop */
   /* 模式串长度为1 暴力搜索匹配*/
   if( !m1 )
   {
      if( !px->nocase  )
      {
        for( ;t<et; t++ )
          if( *t == *pat ) return t;
      }
      else
      {
        for( ;t<et; t++ )
          if( toupper(*t) == *pat ) return t;
      }
      return 0;
   }

   /* 大小写敏感 */
   if( !px->nocase )
   {
    /* Handle MultiByte Patterns */
    while( t < et )
    {
      /* Scan Loop - Bad Character Shift */
      do
      {
        /* 使用坏字表*/
        t += bcShift[*t];
        if( t >= et )return 0;;

        t += (k=bcShift[*t]);
        if( t >= et )return 0;

      } while( k );

      /* Unrolled Match Loop */
      k = m1;
      q = t - m1;
      /* 从后往前匹配*/
      while( k >= 4 )
      {
        if( pat[k] != q[k] )goto NoMatch;  k--;
        if( pat[k] != q[k] )goto NoMatch;  k--;
        if( pat[k] != q[k] )goto NoMatch;  k--;
        if( pat[k] != q[k] )goto NoMatch;  k--;
      }
      /* Finish Match Loop */
      while( k >= 0 )
      {
        if( pat[k] != q[k] )goto NoMatch;  k--;
      }
      /* If matched - return 1st char of pattern in text */
      return q;

NoMatch:
      t++;
    }

   }
   else  /* NoCase - convert input string to upper case as we process it */
   {

    /* Handle MultiByte Patterns */
    while( t < et )
    {
      /* Scan Loop - Bad Character Shift */
      do
      {
        t += bcShift[toupper(*t)];
        if( t >= et )return 0;;

        t += (k=bcShift[toupper(*t)]);
        if( t >= et )return 0;

      } while( k );

      /* Unrolled Match Loop */
      k = m1;
      q = t - m1;
      while( k >= 4 )
      {
        if( pat[k] != toupper(q[k]) )goto NoMatchNC;  k--;
        if( pat[k] != toupper(q[k]) )goto NoMatchNC;  k--;
        if( pat[k] != toupper(q[k]) )goto NoMatchNC;  k--;
        if( pat[k] != toupper(q[k]) )goto NoMatchNC;  k--;
      }
      /* Finish Match Loop */
      while( k >= 0 )
      {
        if( pat[k] != toupper(q[k]) )goto NoMatchNC;  k--;
      }
      /* If matched - return 1st char of pattern in text */
      return q;

NoMatchNC:
      t++;
      }

   }

   return 0;
}

释放函数比较简单就贴出来了

标签:HBM,pat,Moore,Horsepool,unsigned,char,int,源码,return
来源: https://blog.csdn.net/guoguangwu/article/details/88378228