6.28 $\text{String Notes II}$
$\large\text{6.28 Notes} $
$\text{String Notes II} $
\(\text{Content: Suffix Array (Ex. Base Sort), Mismatch Tree}\)
\(\large\to\text{A Suffix Array Blog}\leftarrow\)
inline void Monkey_Sort(int *a) {
bool flag = true;
while(flag) {
flag = false;
for(int i = 1; i < n; i++) {
if(a[i] > a[i + 1]) { flag = true; break; }
if(flag) random_shuffle(a + 1, a + n + 1);
#include <windows.h>
inline void Sleepy_Monkey_Sort(int *a) {
bool flag = true;
while(flag) {
flag = false;
for(int i = 1; i < n; i++) {
if(a[i] > a[i + 1]) {
flag = true;
Sleep(a[i] * 10 + 10);
if(flag) random_shuffle(a + 1, a + n + 1);
\(\text{I - SA: Suffix Array (So Damn Difficult)}\)
\(\text{Q: What about another solution using Hash & Binary Search? (Get known of it!)}\)
#include <bits/stdc++.h>
#define _ read()
#define LL long long
#define ui unsigned int
#define pii pair <int, int>
#define pll pair <LL, LL>
#define Mp make_pair
#define db double
#define eps 1e-7
#define Pi acos(-1)
#define min(x, y) (x < y ? x : y)
#define max(x, y) (x > y ? x : y)
#define lsp p << 1
#define rsp p << 1 | 1
#define lowbit(x) (x & -x)
#define ms(a, x) memset(a, x, sizeof(a))
using namespace std;
const int N = 1e6 + 5; // Check: The value of N
inline void File() {
freopen("in.txt", "r", stdin);
freopen("Ans.txt", "w", stdout);
int n, sa[N], rak[N], tp[N], tax[N], m;
// sa[i]: 排名为 i 的后缀对应的首字符位置
// rak[i]: 首字符位置为 i 的后缀的排名
// tp[i]: 用于 Base Sort 的排名的第二关键字
// tax[i]: 桶, m: 字符集的大小
char s[N];
inline void Bsort() {
for(int i = 0; i <= m; i++) tax[i] = 0;
for(int i = 1; i <= n; i++) tax[rak[i]]++;
for(int i = 1; i <= m; i++) tax[i] += tax[i - 1];
for(int i = n; i >= 1; i--) sa[tax[rak[tp[i]]]--] = tp[i]; // store the result
inline void SA_build() {
m = 75;
for(int i = 1; i <= n; i++) rak[i] = s[i] - '0' + 1, tp[i] = i;
for(int w = 1, p = 0; p < n; m = p, w <<= 1) {
p = 0;
for(int i = 1; i <= w; i++) tp[++p] = n - w + i;
for(int i = 1; i <= n; i++) if(sa[i] > w) tp[++p] = sa[i] - w;
Bsort(); // To base-sort rak(key I) and tp (key II) -> to sa[]
swap(tp, rak);
rak[sa[1]] = p = 1;
for(int i = 2; i <= n; i++) { // tp[i] currently stands for the original rak[i]
rak[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && // sa[i] & sa[i - 1]: check two
// rank-adjacent suffixes with the same rank
tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
for(int i = 1; i <= n; i++) printf("%d ", sa[i]);
signed main() {
scanf("%s", s + 1);
n = strlen(s + 1);
需处理的是,当前已经排好序的(长度为\(w/2\)的)若干后缀。然后对这些长度为\(w/2\)的后缀进行拼接,拼接成长度为\(w\)的新后缀。设新串\(AB=A+B\),则在该过程中,以\(A\)的上一轮排名为第一关键字\((\rm keyI:rak[i])\),以\(B\)的上一轮排名为第二关键字\((\rm keyII:tp[i])\)(因为\(A\)在\(B\)的前面),套用\(\rm Bsort\)的模板进行排序,同时更新\(sa[i]\)。接着用更新后的\(sa[i]\)来更行\(rak[i]\)的值(同时判重),直到后缀数量达到\(n\)。
$\text{Ex. Base-Sort} $
该算法可以用于求解\(\rm SA\)。
然后\(\rm SA\)还有一个东西叫\(\rm height[i]\),这东西看着就困难。毕竟前面就学自闭了\(\text{qwq wtcl awawa}\)。。。
然后\(\text{Suffix Array}\)还有一大堆用法,以后再看......
\(\text{II - Mismatch Tree}\)
while(kmp[j]) j = kmp[j];
可以由长度从大到小遍历一个字符串的所有 border,那么这启示我们可以连一条从 \(\large j\) 指向 \(\large kmp[j]\) 的有向边。
例如 \(\large j\to kmp[j]\to kmp[kmp[j]]\to...\to kmp^x[j]\)
就是调\(LCA\)的时候极其地下饭...... 哎,不说了
