55
66/* fast search/count implementation, based on a mix between boyer-
77 moore and horspool, with a few more bells and whistles on the top.
8- for some more background, see: http://effbot.org/stringlib.htm */
8+ for some more background, see: http://effbot.org/zone/ stringlib.htm */
99
1010/* note: fastsearch may access s[n], which isn't a problem when using
1111 Python's ordinary string types, but may cause problems if you're
1616
1717#define FAST_COUNT 0
1818#define FAST_SEARCH 1
19+ #define FAST_RSEARCH 2
1920
2021Py_LOCAL_INLINE (Py_ssize_t )
2122fastsearch (const STRINGLIB_CHAR * s , Py_ssize_t n ,
@@ -41,51 +42,92 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
4142 if (s [i ] == p [0 ])
4243 count ++ ;
4344 return count ;
44- } else {
45+ } else if ( mode == FAST_SEARCH ) {
4546 for (i = 0 ; i < n ; i ++ )
4647 if (s [i ] == p [0 ])
4748 return i ;
49+ } else { /* FAST_RSEARCH */
50+ for (i = n - 1 ; i > -1 ; i -- )
51+ if (s [i ] == p [0 ])
52+ return i ;
4853 }
4954 return -1 ;
5055 }
5156
5257 mlast = m - 1 ;
53-
54- /* create compressed boyer-moore delta 1 table */
5558 skip = mlast - 1 ;
56- /* process pattern[:-1] */
57- for (mask = i = 0 ; i < mlast ; i ++ ) {
58- mask |= (1 << (p [i ] & 0x1F ));
59- if (p [i ] == p [mlast ])
60- skip = mlast - i - 1 ;
61- }
62- /* process pattern[-1] outside the loop */
63- mask |= (1 << (p [mlast ] & 0x1F ));
64-
65- for (i = 0 ; i <= w ; i ++ ) {
66- /* note: using mlast in the skip path slows things down on x86 */
67- if (s [i + m - 1 ] == p [m - 1 ]) {
68- /* candidate match */
69- for (j = 0 ; j < mlast ; j ++ )
70- if (s [i + j ] != p [j ])
71- break ;
72- if (j == mlast ) {
73- /* got a match! */
74- if (mode != FAST_COUNT )
59+
60+ if (mode != FAST_RSEARCH ) {
61+
62+ /* create compressed boyer-moore delta 1 table */
63+
64+ /* process pattern[:-1] */
65+ for (mask = i = 0 ; i < mlast ; i ++ ) {
66+ mask |= (1 << (p [i ] & 0x1F ));
67+ if (p [i ] == p [mlast ])
68+ skip = mlast - i - 1 ;
69+ }
70+ /* process pattern[-1] outside the loop */
71+ mask |= (1 << (p [mlast ] & 0x1F ));
72+
73+ for (i = 0 ; i <= w ; i ++ ) {
74+ /* note: using mlast in the skip path slows things down on x86 */
75+ if (s [i + m - 1 ] == p [m - 1 ]) {
76+ /* candidate match */
77+ for (j = 0 ; j < mlast ; j ++ )
78+ if (s [i + j ] != p [j ])
79+ break ;
80+ if (j == mlast ) {
81+ /* got a match! */
82+ if (mode != FAST_COUNT )
83+ return i ;
84+ count ++ ;
85+ i = i + mlast ;
86+ continue ;
87+ }
88+ /* miss: check if next character is part of pattern */
89+ if (!(mask & (1 << (s [i + m ] & 0x1F ))))
90+ i = i + m ;
91+ else
92+ i = i + skip ;
93+ } else {
94+ /* skip: check if next character is part of pattern */
95+ if (!(mask & (1 << (s [i + m ] & 0x1F ))))
96+ i = i + m ;
97+ }
98+ }
99+ } else { /* FAST_RSEARCH */
100+
101+ /* create compressed boyer-moore delta 1 table */
102+
103+ /* process pattern[0] outside the loop */
104+ mask = (1 << (p [0 ] & 0x1F ));
105+ /* process pattern[:0:-1] */
106+ for (i = mlast ; i > 0 ; i -- ) {
107+ mask |= (1 << (p [i ] & 0x1F ));
108+ if (p [i ] == p [0 ])
109+ skip = i - 1 ;
110+ }
111+
112+ for (i = w ; i >= 0 ; i -- ) {
113+ if (s [i ] == p [0 ]) {
114+ /* candidate match */
115+ for (j = mlast ; j > 0 ; j -- )
116+ if (s [i + j ] != p [j ])
117+ break ;
118+ if (j == 0 )
119+ /* got a match! */
75120 return i ;
76- count ++ ;
77- i = i + mlast ;
78- continue ;
121+ /* miss: check if previous character is part of pattern */
122+ if (!(mask & (1 << (s [i - 1 ] & 0x1F ))))
123+ i = i - m ;
124+ else
125+ i = i - skip ;
126+ } else {
127+ /* skip: check if previous character is part of pattern */
128+ if (!(mask & (1 << (s [i - 1 ] & 0x1F ))))
129+ i = i - m ;
79130 }
80- /* miss: check if next character is part of pattern */
81- if (!(mask & (1 << (s [i + m ] & 0x1F ))))
82- i = i + m ;
83- else
84- i = i + skip ;
85- } else {
86- /* skip: check if next character is part of pattern */
87- if (!(mask & (1 << (s [i + m ] & 0x1F ))))
88- i = i + m ;
89131 }
90132 }
91133
0 commit comments