最小表示法

很简单的一个东西

应用

bzoj1398

裸题

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#include<bits/stdC++.h>
using namespace std;
const int N = 1e6 + 9;
char A[N], B[N];
int n, sA, sB;
int get (char *s) {
int i = 0, j = 1, k = 0, mi;
for (; i < n && j < n && k < n; ) {
mi = s[i + k >= n ? i + k - n : i + k] - s[j + k >= n ? j + k - n : j + k];
if (mi == 0) ++k;
else if (mi > 0) i = i + k + 1, k = 0;
else j = j + k + 1, k = 0;
if (i == j) ++j;
}
return i < j ? i : j;
}
int main () {
scanf("%s%s", A, B); n = strlen(A);
sA = get(A); sB = get(B);
for (int i = 0; i < n; ++i) if (A[(sA + i) % n] != B[(sB + i) % n]) return puts("No"), 0;
puts("Yes");
for (int i = 0; i < n; ++i) putchar(A[(sA + i) % n]);
return 0;
}


后缀自动机


right集合

首先,定义很容易理解.

后缀自动机是个DAG

一个串$S$的$right$集合:设S在母串中的所有位置为$\{[l_1,r_1),[l_2,r_2),[l_3,r_3)\}$,那么他的right集合便为$\{r_1,r_2,r_3\}$,也有的人称$right$集合为$end-pos$集合

性质:任意两个状态的$right$集合不同

根据$right$集合,所有的非空子串可以分为多个等价类。

引理1:两个非空子串u和w$(length(u)<=length(v)) right$集合等价当且仅当u只作为w的后缀在字符串中出现.

引理2: 两个非空子串u和w$(length(u)<=length(v)) $他们的$right$集合不相交或者包含

引理3:对于一个$right$等价类,将其中的子串按照长度非递增排序。那么每个子串的长度比前一个长度小一,并且是前一个子串的后缀。


后缀链

对于一个状态$v≠t_0$,一个状态对应着一个等价类,另w作为其中最长的子串,其余的子串都是w的后缀。

w的所有后缀中,一部分在w所在的等价类中,另外一部分位于其他的等价类中。于是定义后缀链$link(v)$连接w所有后缀中位于其他等价类并且长度最长的那个后缀所在的等价类。

引理4:所有后缀链组成一个以$t_0$为根的树。

对于状态$v,link(v)$指向的状态中的子串长度严格小于状态v中的子串,所以跟随后缀链最终可以到达包含空串的$t_0$。

引理5:如果按照$right$包含关系创建一棵树,那么这棵树的结构是和后缀链树的结构相似的。

按照引理2,两个状态的$right$要么包含要么不相交,所以按照包含关系可以创建一棵树。

对于状态$v≠t_0$以及$link(v)$,根据后缀链的定义以及引理2:$right(v)⊂right(link(v))$
得证。


知道了这些,我们就可以yy后缀自动机的构造了
自己画的丑陋的图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
struct State {
int len;
State *par, *go[26];
State (int len = 0) : par(0), len(len) { memset(go, 0, sizeof go); }
}*last, *root;
void extend (int w) {
State *p = last, *np = new State (last -> len + 1);
while (p && !p -> go[w]) p -> go[w] = np, p = p -> par;
if (p == 0) {
np -> par = root;
} else {
State *q = p -> go[w];
if (p -> len + 1 == q -> len) {
np -> par = q;
} else {
State *nq = new State (p -> len + 1);
memcpy(nq -> go, q -> go, sizeof q -> go);
nq -> par = q -> par;
q -> par = nq;
np -> par = nq;
while (p && p -> go[w] == q) p -> go[w] = nq, p = p -> par;
}
}
last = np;
}

应用

poj2774

给你两串字符,要你找出在这两串字符中都出现过的最长子串
对于这种双串问题,一个简单且经典的套路是对一个串建SAM,另一个串在SAM上走.然后parent指针就像AC自动机的失配指针一样,不断地跳。

SA的话直接找相邻的height就完了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#include<cstdio>
#include<cstring>
#include<algorithm>
using namespace std;
const int N = 2e5 + 9;
char A[N], B[N];
int n;
struct State {
int len;
State *go[26], *par;
State (int len = 0) : len(len), par(0) { memset(go, 0, sizeof go); }
}*last, *root, pool[N], *pis = pool;
void Extend (int w) {
State *p = last, *np = new (pis++) State(p -> len + 1);
while (p && !p -> go[w]) p -> go[w] = np, p = p -> par;
if (p == 0) {
np -> par = root;
} else {
State *q = p -> go[w];
if (p -> len + 1 == q -> len) {
np -> par = q;
} else {
State *nq = new (pis++) State(p -> len + 1);
memcpy(nq -> go, q -> go, sizeof q -> go);
nq -> par = q -> par;
q -> par = nq;
np -> par = nq;
while (p && p -> go[w] == q) p -> go[w] = nq, p = p -> par;
}
}
last = np;
}
int ans;
int main () {
scanf("%s%s", A, B);
n = strlen(A);
root = last = new (pis++) State();
for (int i = 0; i < n; ++i) Extend(A[i] - 'a');
n = strlen(B);
State *p = root;
for (int i = 0, w, j = 0; i < n; ++i) {
w = B[i] - 'a';
while (p != root && !p -> go[w]) p = p -> par, j = p -> len;
if (p -> go[w]) p = p -> go[w], ++j;
if (j > ans) ans = j;
}
printf("%d\n", ans);
return 0;
}


poj1743

求最长不重叠重复子串
很显然重复子串对应的right集合相同,所以求每个节点right集合中的最小与最大值,与他的len去min,这个topsort就可以
一个小技巧,topsort可以改成直接对len基数排序!

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include<algorithm>
#include<cstring>
#include<cstdio>
using namespace std;
const int N = 4e4 + 9, INF = 0x3f3f3f3f;
int n, c[N], m, ans;
void Max (int &x, int y) { if (x < y) x = y; }
struct State {
int len, mx, mi;
State *go[180], *par;
State (int len = 0) : len(len), mx(0), mi(INF), par(0) { memset(go, 0, sizeof go); }
void Gx (State *rhs) { if (rhs -> mx > mx) mx = rhs -> mx; if (rhs -> mi < mi) mi = rhs -> mi; }
void Get_Ans () { Max(ans, min(mx - mi, len)); }
}*last, *root, pool[N], *pis = pool, *jh[N];
void Extend (int w) {
State *p = last, *np = new (pis++) State(p -> len + 1);
np -> mi = np -> mx = np -> len + 1;
while (p && !p -> go[w]) p -> go[w] = np, p = p -> par;
if (p == 0) {
np -> par = root;
} else {
State *q = p -> go[w];
if (p -> len + 1 == q -> len) {
np -> par = q;
} else {
State *nq = new (pis++) State(p -> len + 1);
memcpy(nq -> go, q -> go, sizeof q -> go);
nq -> par = q -> par;
q -> par = nq;
np -> par = nq;
while (p && p -> go[w] == q) p -> go[w] = nq, p = p -> par;
}
}
last = np;
}
int main () {
while (scanf("%d", &n) == 1 && n) {
pis = pool;
last = root = new (pis++) State();
for (int i = 1, j, la; i <= n; ++i) {
scanf("%d", &j);
if (i > 1) Extend(j - la + 88);
la = j;
}
--n;
for (int i = 0; i <= n; ++i) c[i] = 0;
for (State *now = pool; now != pis; ++now) ++c[now -> len];
for (int i = 1; i <= n; ++i) c[i] += c[i - 1];
for (State *now = pool; now != pis; ++now) jh[c[now -> len]--] = now;
ans = 0;
for (int i = pis - pool; i > 1; --i) {
jh[i] -> par -> Gx(jh[i]);
jh[i] -> Get_Ans();
}
printf("%d\n", ans < 4 ? 0 : ans + 1);
}
return 0;
}


poj1226

一个简单的处理技巧就是把每个串和他的反串中间加一个符号。
然后就是正常的多串匹配了。
先让一个串建一个SAM,然后其他所有串在SAM上走一次即可。对于同一个串,取最大值,不同串,取最小值。

由于字符集大小问题,我用的SA.


poj3415

SA的话,很显然建好height数组后,我们考虑每个$height[i]$的贡献,那就要找到在他前面比他大的最后一个height,和在他后面比他小的第一个$height$,这个单调栈就行了。
SAM的话,先对A建SAM,求出每个节点$right$集合的大小,然后$B$在$A$上面走,边走边统计,走到$i$这个节点的贡献就为$max(len-k,0)*|right_{i}|$,(len表示当前匹配的长度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include<cstdio>
#include<cctype>
#include<algorithm>
#include<cstring>
using namespace std;
const int N = 2e5 + 9;
int K, c[N], n, m;
char A[N], B[N];
long long ans;
struct State {
int len, sum;
long long qd;
State *go[52], *par;
State (int len = 0) : len(len), sum(0), qd(0), par(0) { memset(go, 0, sizeof go); }
}*last, *root, pool[N], *pis = pool, *xl[N];
void Extend (int w) {
State *p = last, *np = new (pis++) State(p -> len + 1);
np -> sum = 1;
while (p && !p -> go[w]) p -> go[w] = np, p = p -> par;
if (p == 0) {
np -> par = root;
} else {
State *q = p -> go[w];
if (p -> len + 1 == q -> len) {
np -> par = q;
} else {
State *nq = new (pis++) State(p -> len + 1);
memcpy(nq -> go, q -> go, sizeof q -> go);
nq -> par = q -> par;
q -> par = nq;
np -> par = nq;
while (p && p -> go[w] == q) p -> go[w] = nq, p = p -> par;
}
}
last = np;
}
int f (char o) { return (o<='z'&&o>='a')?(o-'a'):(26+o-'A'); }
int main () {
// freopen("3415.in", "r", stdin);
// freopen("3415.out", "w", stdout);
while (scanf("%d", &K), K) {
scanf("%s%s", A, B); n = strlen(A); m = strlen(B);
pis = pool; last = root = new (pis++) State();
for (int i = 0; i < n; ++i) Extend(f(A[i]));
root -> par = root;
for (int i = 0; i <= n; ++i) c[i] = 0;
for (State *p = pis - 1; p != pool; --p) ++c[p -> len];
for (int i = 1; i <= n; ++i) c[i] += c[i - 1];
for (State *p = pis - 1; p != pool; --p) xl[c[p -> len]--] = p;
for (int i = pis - pool - 1; i; --i) xl[i] -> par -> sum += xl[i] -> sum;
for (int i = 1; i < pis - pool; ++i)
xl[i] -> qd = xl[i] -> par -> qd + 1ll * xl[i] -> par -> sum * max(xl[i] -> par -> len - max(xl[i] -> par -> par -> len, K - 1), 0);
State *p = root; ans = 0;
for (int i = 0, j, k = 0; i < m; ++i) {
j = f(B[i]);
while (p != root && !p -> go[j]) p = p -> par, k = p -> len;
if (p -> go[j]) {
p = p -> go[j];
++k;
ans += 1ll * p -> sum * max(k - max(p -> par -> len, K - 1), 0) + p -> qd;
}
// p -> sum * max(p -> len - :: k + 1, 0);
}
printf("%lld\n", ans);
}
return 0;
}

spoj8222 Substrings

给一个字符串S,令$F(x)$表示$S$的所有长度为$x$的子串中,出现次数的最大值。求$F(1)..F(Length(S))$
这道题用SA的话:先求出height数组,然后把sa数组看成一个序列,然后考虑按照height值从小到大将height两边的两段合并在一起就好了。
这道题用SAM的话:直接考虑求出每个节点的right集合大小,然后更新$[min(x),max(x)]$这一段就完了.(然后我们可以发现实际上答案是递减的,所以我们更新$max(x)$就行了.


bzoj2882

最小表示法裸题,我们考虑一下怎么用SAM做呢?
直接用map,然后每次按最小的字典序走就完了….


SPOJ7258

题意:给一个长度不超过90000的串S,每次询问它的所有不同子串中,字典序第K小的,询问不超过500个
建SAM后,我们能够快速求出每个节点能够向后走出多少个不同子串,直接字典序贪心就完了。


bzoj3998

对于一个给定长度为N的字符串,求它的第K小子串是什么。
SA: 对于第一问,用SA就非常的自然,直接扫一扫就完了;
对于第二问,逐位确定,每次对当前新加入的字符判断即可。
第一问和第二处理的复杂度都是线性的.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#include<cstdio>
#include<algorithm>
#include<cstring>
using namespace std;
const int N = 5e5 + 9;
char s[N];
int n, t, k, c[N], sa[N], t1[N], t2[N], rank[N], height[N], xl[N], p;
typedef long long ll;
ll sum[N];
void build_sa (int m) {
int *x = t1, *y = t2, p = 0, i;
for (i = 0; i < m; ++i) c[i] = 0;
for (i = 0; i < n; ++i) ++c[x[i] = s[i] - 'a'];
for (i = 1; i < m; ++i) c[i] += c[i - 1];
for (i = 0; i < n; ++i) sa[--c[x[i]]] = i;
for (int k = 1; k < n; k <<= 1, p = 0) {
for (i = 0; i < m; ++i) c[i] = 0;
for (i = n - k; i < n; ++i) y[p++] = i;
for (i = 0; i < n; ++i) if (sa[i] >= k) y[p++] = sa[i] - k;
for (i = 0; i < n; ++i) ++c[x[i]];
for (i = 1; i < m; ++i) c[i] += c[i - 1];
for (i = n - 1; ~i; --i) sa[--c[x[y[i]]]] = y[i];
swap(x, y); x[sa[0]] = p = 0;
for (i = 1; i < n; ++i) x[sa[i]] = y[sa[i]] == y[sa[i - 1]] && sa[i] + k < n && sa[i - 1] + k < n && y[sa[i] + k] == y[sa[i - 1] + k] ? p : ++p;
if ((m = p + 1) == n) break;
}
}
void get_height () {
int i, j, h = 0;
for (i = 0; i < n; ++i) rank[sa[i]] = i;
for (i = 0; i < n; ++i) {
if (h) --h;
if (!rank[i]) continue ;
j = sa[rank[i] - 1];
while (s[h + j] == s[h + i]) ++h;
height[rank[i]] = h;
}
}
int main () {
scanf("%s", s);
n = strlen(s);
build_sa(26);
get_height();
scanf("%d%d", &t, &k);
if (t == 0) {
for (int i = 0; i < n; ++i) {
k -= n - sa[i] - height[i];
if (k <= 0) {
for (int j = sa[i]; j < n + k; ++j) putchar(s[j]);
return 0;
}
}
return puts("-1"), 0;
}
for (int i = 0; i < n; ++i) sum[i + 1] = sum[i] + n - sa[i];
if (sum[n] < k) return puts("-1"), 0;
for (int i = 0; i <= n; ++i) c[i] = 0;
for (int i = 1; i <= n; ++i) ++c[height[i]];
for (int i = 1; i <= n; ++i) c[i] += c[i - 1];
for (int i = n; i; --i) xl[c[height[i]]--] = i;
int l = 0, r = n - 1, pl = 1, pr = 1;
for (int i = 0; k > 0; ++i) {
while (pl <= n && height[xl[pl]] < i) ++pl;
while (pr <= n && height[xl[pr]] <= i) ++pr;
for (int j = pl, x; j < pr; ++j) if (xl[j] > l && xl[j] <= r + 1) {
x = xl[j]; p = sum[x] - sum[l] - 1ll * i * (x - l);
if (p >= k) { r = x - 1; break; }
k -= p;
l = x;
}
putchar(s[sa[l] + i]); k -= r - l + 1;
}
return 0;
}

SAM: 裸题,按字典序走就完了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#include<cstdio>
#include<algorithm>
#include<cstring>
using namespace std;
const int N = 1e6 + 9;
struct State {
int len, s1, s2, v2;
State *go[26], *par;
State (int len = 0) : len(len), s1(1), s2(0), v2(0), par(0) { memset(go, 0, sizeof go); }
void gx () { for (int i = 0; i < 26; ++i) if (go[i]) s1 += go[i] -> s1, s2 += go[i] -> s2; }
}*last, *root, pool[N], *pis = pool, *xl[N];
void Extend (int w) {
State *p = last, *np = new (pis++) State(p -> len + 1);
np -> v2 = 1;
while (p && !p -> go[w]) p -> go[w] = np, p = p -> par;
if (p == 0) {
np -> par = root;
} else {
State *q = p -> go[w];
if (p -> len + 1 == q -> len) {
np -> par = q;
} else {
State *nq = new (pis++) State(p -> len + 1);
memcpy(nq -> go, q -> go, sizeof q -> go);
nq -> par = q -> par;
q -> par = nq;
np -> par = nq;
while (p && p -> go[w] == q) p -> go[w] = nq, p = p -> par;
}
}
last = np;
}
char s[N];
int t, k, n, c[N], m;
int main () {
scanf("%s%d%d", s, &t, &k);
last = root = new (pis++) State(); root -> s1 = 0;
n = strlen(s);
for (int i = 0; i < n; ++i) Extend(s[i] - 'a');
m = pis - pool;
for (State *i = pis - 1; i != pool; --i) ++c[i -> len];
for (int i = 1; i <= n; ++i) c[i] += c[i - 1];
for (State *i = pis - 1; i != pool; --i) xl[c[i -> len]--] = i;
for (int i = m - 1; i; --i) {
xl[i] -> gx();
xl[i] -> par -> v2 += xl[i] -> v2;
xl[i] -> s2 += xl[i] -> v2;
}
root -> gx();
root -> v2 = 0;
if (t == 0) {
if (k > root -> s1) return puts("-1"), 0;
State *p = root; ++k;
while (k > 1) {
--k;
for (int i = 0; i < 26; ++i) if (p -> go[i]) {
if (p -> go[i] -> s1 < k) k -= p -> go[i] -> s1;
else { putchar('a' + i); p = p -> go[i]; break; }
}
}
} else {
if (k > root -> s2) return puts("-1"), 0;
State *p = root;
while (k > p -> v2) {
k -= p -> v2;
for (int i = 0; i < 26; ++i) if (p -> go[i]) {
if (p -> go[i] -> s2 < k) k -= p -> go[i] -> s2;
else { putchar('a' + i); p = p -> go[i]; break; }
}
}
}
return 0;
}


bzoj4566

给定两个字符串,求出在两个字符串中各取出一个子串使得这两个子串相同的方案数。两个方案不同当且仅当这两个子串中有一个位置不同。
SA: 考虑对每个height统计他作为两个串之间最小的height时的贡献就完了,单调栈!

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include<cstdio>
#include<algorithm>
#include<cstring>
using namespace std;
const int N = 4e5 + 9;
int c[N], n, t1[N], t2[N], sa[N], rank[N], height[N], n1, n2, l[N], r[N], stack[N], t, s1[N], s2[N];
char s[N];
long long ans;
void build_sa (int m) {
int i, p = 0, *x = t1, *y = t2;
for (i = 0; i < m; ++i) c[i] = 0;
for (i = 0; i < n; ++i) ++c[x[i] = s[i]];
for (i = 1; i < m; ++i) c[i] += c[i - 1];
for (i = 0; i < n; ++i) sa[--c[x[i]]] = i;
for (int k = 1; k < n; k <<= 1, p = 0) {
for (i = 0; i < m; ++i) c[i] = 0;
for (i = n - k; i < n; ++i) y[p++] = i;
for (i = 0; i < n; ++i) if (sa[i] >= k) y[p++] = sa[i] - k;
for (i = 0; i < n; ++i) ++c[x[i]];
for (i = 1; i < m; ++i) c[i] += c[i - 1];
for (i = n - 1; ~i; --i) sa[--c[x[y[i]]]] = y[i];
swap(x, y); x[sa[0]] = p = 0;
for (i = 1; i < n; ++i) x[sa[i]] = y[sa[i]] == y[sa[i - 1]] && sa[i] + k < n && sa[i - 1] + k < n && y[sa[i - 1] + k] == y[sa[i] + k] ? p : ++p;
if ((m = p + 1) == n) break;
}
}
void get_height () {
int i, j, h = 0;
for (i = 0; i < n; ++i) rank[sa[i]] = i;
for (i = 0; i < n; ++i) {
if (h) --h;
if (!rank[i]) continue ;
j = sa[rank[i] - 1];
while (s[i + h] == s[j + h]) ++h;
height[rank[i]] = h;
}
}
void count () {
int i;
s1[0] = sa[0] < n1;
s2[0] = sa[0] > n1;
stack[t = 0] = 1;
for (i = 1; i < n; ++i) {
s1[i] = s1[i - 1] + (sa[i] < n1);
s2[i] = s2[i - 1] + (sa[i] > n1);
while (t && height[i] <= height[stack[t]]) --t;
l[i] = stack[t];
stack[++t] = i;
}
stack[t = 0] = n;
for (i = n - 1; i; --i) {
while (t && height[i] < height[stack[t]]) --t;
r[i] = stack[t];
stack[++t] = i;
}
for (i = 1; i < n; ++i) if (height[i]) {
ans += (1ll * (s1[i - 1] - s1[l[i] - 1]) * (s2[r[i] - 1] - s2[i - 1]) + 1ll * (s2[i - 1] - s2[l[i] - 1]) * (s1[r[i] - 1] - s1[i - 1])) * height[i];
}
}
int main () {
scanf("%s", s); n1 = strlen(s); s[n1] = '$';
scanf("%s", s + n1 + 1); n2 = strlen(s + n1 + 1);
n = n1 + 1 + n2;
build_sa(200);
get_height();
count();
printf("%lld\n", ans);
return 0;
}

SAM:
对一个串建SAM,另一个串走一走就完了…

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include<cstdio>
#include<algorithm>
#include<cstring>
using namespace std;
const int N = 4e5 + 9;
int n, c[N], m;
typedef long long ll;
struct State {
int len, num;
ll v;
State *go[26], *par;
State (int len = 0) : len(len), num(0), v(0), par(0) { memset(go, 0, sizeof go); }
}*last, *root, pool[N], *pis = pool, *xl[N];
ll ans;
void Extend (int w) {
State *p = last, *np = new (pis++) State(p -> len + 1);
np -> num = 1;
while (p && !p -> go[w]) p -> go[w] = np, p = p -> par;
if (p == 0) {
np -> par = root;
} else {
State *q = p -> go[w];
if (p -> len + 1 == q -> len) {
np -> par = q;
} else {
State *nq = new (pis++) State(p -> len + 1);
memcpy(nq -> go, q -> go, sizeof q -> go);
nq -> par = q -> par;
q -> par = nq;
np -> par = nq;
while (p && p -> go[w] == q) p -> go[w] = nq, p = p -> par;
}
}
last = np;
}
char A[N], B[N];
int main () {
scanf("%s%s", A, B);
n = strlen(A);
last = root = new (pis++) State();
for (int i = 0; i < n; ++i) Extend(A[i] - 'a');
for (State *s = pis - 1; s != pool; --s) ++c[s -> len];
for (int i = 1; i <= n; ++i) c[i] += c[i - 1];
for (State *s = pis - 1; s != pool; --s) xl[c[s -> len]--] = s;
m = pis - pool - 1; root -> par = root;
for (int i = m; i; --i) xl[i] -> par -> num += xl[i] -> num;
for (int i = 1; i <= m; ++i) xl[i] -> v += xl[i] -> par -> v + 1ll * xl[i] -> par -> num * (xl[i] -> par -> len - xl[i] -> par -> par -> len);
n = strlen(B); State *p = root;
for (int i = 0, w, len = 0; i < n; ++i) {
w = B[i] - 'a';
while (p != root && !p -> go[w]) p = p -> par, len = p -> len; //到root
if (p -> go[w]) {
p = p -> go[w]; ++len;
ans += p -> v + 1ll * (len - p -> par -> len) * p -> num;
}
}
printf("%lld\n", ans);
return 0;
}


bzoj3238: [Ahoi2013]差异

SA:还是单调栈
SAM:在parent树上算就行了


bzoj3473: 字符串

SA: 简单题
SAM:
我们可以由这道题引入“广义SAM”,即多串SAM,也可以说是Trie上SAM(日妈用了一晚上yy,果然是脑子不清晰

  1. 广义SAM的Right集合表示的是Trie上的节点,当然,$len$还是连续的,
  2. 每个串在广义SAM上走都不会失配。也就是说在任何状态$p$下$len=p->len$

首先建立广义后缀树.
然后利用离线+树状数组搞出每一个节点在多少个串中.
然后如果这个节点在不少于k个串中,我们令这个结点的权值为这个节点父亲边的字符个数,否则为0.
随后我们预处理一下从根到每个节点路径上的权值和.
于是每个字符串的答案等于所有这个字符串的后缀节点的从根到该节点的权值和.
时间复杂度O(nlogn).(转自wyfcyx)


bzoj2780: [Spoj]8093 Sevenk Love Oimaster

给出多个字符串,然后对于每一次询问某一个字符串在多少个原串中出现过。
SA: 随便做,直接二分一下这个字符串所在区间,然后就相当于查询区间不同颜色的个数了
SAM: 每次记录下走到的节点,我们显然求出这个节点被多少个原字符串覆盖就行了,就可3473是一个问题了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <cmath>
#include <string>
#include <map>
using namespace std;
const int N=4e5+5;
typedef long long ll;
int n,q;
string s[N>>1];
char ss[N>>1];
struct node{
map<int, int>ch;
int par,val;
int cou,cur;
}t[N];
int sz=1,root=1,last=1;
void extend(int c){
int p=last,np=++sz;
t[np].val=t[p].val+1;
for(;p&&!t[p].ch[c];p=t[p].par) t[p].ch[c]=np;
if(!p) t[np].par=root;
else{
int q=t[p].ch[c];
if(t[q].val==t[p].val+1) t[np].par=q;
else{
int nq=++sz;
t[nq]=t[q];t[nq].val=t[p].val+1;
t[q].par=t[np].par=nq;
for(;p&&t[p].ch[c]==q;p=t[p].par) t[p].ch[c]=nq;
}
}
last=np;
}
int c[N],a[N];
ll f[N];
void RadixSort(){
for(int i=1;i<=sz;i++) c[t[i].val]++;
for(int i=1;i<=sz;i++) c[i]+=c[i-1];
for(int i=sz;i>=1;i--) a[c[t[i].val]--]=i;
}
int count () {
scanf("%s", ss);
int len = strlen(ss), u = root;
for (int j = 0; j < len; ++j) {
u = t[u].ch[ss[j]-'a'];
if (u == 0) return 0;
}
return t[u].cou;
}
void solve(){
int u;ll ans;
for(int i=1;i<=n;i++){//printf("i %d\n",i);
u=root;
for(int j=0;j<s[i].size();j++){
u=t[u].ch[s[i][j]-'a'];//printf("u %d %d %d\n",u,t[u].cou,j);
int p=u;
for(;p&&t[p].cur!=i;p=t[p].par) t[p].cou++,t[p].cur=i;
}
}
for(int i=1;i<=q;++i) {
printf("%d\n", count());
}
}
int main(){
scanf("%d%d",&n,&q);
for(int i=1;i<=n;i++){
scanf("%s",ss),s[i]=string(ss);
last=root;
for(int j=0;j<s[i].size();j++) extend(s[i][j]-'a');
}
solve();
}

代码改编自Candy


bzoj2806

题意:
多个主串和多个询问串,每次询问将询问串分成多个连续子串,如果一个子串长度>=L且在主串中出现过就是熟悉的
如果熟悉的字符串长度>=询问串长的90%就是熟悉的文章;求成为熟悉的文章的最大的L

这道题很显然先二分,然后二分后考虑用dp判定
f[i]表示前i个的最大熟悉值
$f[i]=max(f[i-1], f[j] + i - j, l<=i-j<=len,len$表示以i结尾的字符串最大能向左延伸出多长$)$
这个SAM和SA都行,不过用SAM比较简单.这个dp显然单调队列即可


bzoj1396

SAM: 裸题,|right|=1就可以作为识别子串了,然后分两种情况讨论一下就好。
不想写直接copy Candy的代码
SA: 考虑一下如何不重复地统计只出现一次的串,很显然对于sa[i]就$是[sa[i]+max(height[i],height[i-1]), n)$这段区间,还是线段树更新区间就行了。


CF 235C. Cyclical Quest

题意:给一个主串和多个询问串,求询问串的所有循环同构串出现次数和
SA: 枚举一下每个询问串的转折点就行了。
SAM: 每个询问串后面再接一个他的复制,就完了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#include<cstdio>
#include<algorithm>
#include<cstring>
using namespace std;
const int N = 2e6 + 9;
struct State {
int len, las, val;
State *go[26], *par;
State (int len = 0) : len(len), las(0), val(0), par(0) { memset(go, 0, sizeof go); }
}*last, *root, pool[N], *pis = pool, *xl[N];
void Extend (int w) {
State *p = last, *np = new (pis++) State(p->len + 1); np->val = 1;
while (p && !p->go[w]) p->go[w] = np, p = p->par;
if (p == 0) {
np->par = root;
} else {
State *q = p->go[w];
if (p->len + 1 == q->len) {
np->par = q;
} else {
State *nq = new (pis++) State(*q);
nq->len = p->len + 1; nq->val = 0;
q->par = np->par = nq;
while (p && p->go[w] == q) p->go[w] = nq, p = p->par;
}
}
last = np;
}
int n, m, c[N];
char s[N];
int main () {
scanf("%s", s);
n = strlen(s);
last = root = new (pis++) State();
for (int i = 0; i < n; ++i) Extend(s[i] - 'a');
m = pis - pool - 1;
for (State *p = pis - 1; p != root; --p) ++c[p->len];
for (int i = 1; i <= n; ++i) c[i] += c[i - 1];
for (State *p = pis - 1; p != root; --p) xl[c[p->len]--] = p;
for (int i = m; i; --i) {
if (!xl[i]->val) ++xl[i]->val;
xl[i]->par->val += xl[i]->val;
}
scanf("%d", &n);
for (int i = 1; i <= n; ++i) {
scanf("%s", s);
State *p = root;
m = strlen(s);
int ans = 0;
for (int j = 0; j < m; ++j) s[j + m] = s[j];
for (int j = 0, w, len = 0; j < (m << 1); ++j) {
w = s[j] - 'a';
while (p != root && !p->go[w]) p = p->par, len = p->len;
if (p->go[w]) {
p = p->go[w]; ++len;
while (p->par->len >= m) p = p->par, len = p->len;
if (len >= m && p->las != i) p->las = i, ans += p->val;
}
}
printf("%d\n", ans);
}
return 0;
}


bzoj3926: [Zjoi2015]诸神眷顾的幻想乡

我们可以发现叶子节点不超过20个,可以考虑把每个叶子作为根,然后这棵树看成Trie树,然后加入SAM,直接边DFS边建就行了.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include<stdio.h>
#include<cstring>
#include<algorithm>
using namespace std;
const int N = 2e5 + 9;
int n, c, col[N], Gr[N];
struct Edge {
int to;
Edge *next;
Edge () {}
Edge (int to, Edge *next) : to(to), next(next) {}
}*head[N], pol[N], *ps = pol;
struct State {
int len;
State *go[10], *par;
State (int len = 0) : len(len), par(0) { memset(go, 0, sizeof go); }
}*root, pool[N * 20], *pis = pool;
void Extend (State *&last, int w) {
if (last->go[w] && last->go[w]->len == last->len + 1) return void(last = last -> go[w]);
State *p = last, *np = new (pis++) State(p->len + 1);
while (p && !p->go[w]) p->go[w] = np, p = p->par;
if (p == 0) {
np->par = root;
} else {
State *q = p->go[w];
if (p->len + 1 == q->len) {
np->par = q;
} else {
State *nq = new (pis++) State(*q);
nq->len = p->len + 1;
q->par = np->par = nq;
while (p && p->go[w] == q) p->go[w] = nq, p = p->par;
}
}
last = np;
}
void Dfs (int u, int fa, State *last) {
Extend(last, col[u]);
for (Edge *now = head[u]; now; now = now -> next) if (now -> to != fa) Dfs(now -> to, u, last);
}
int main () {
scanf("%d%d", &n, &c);
for (int i = 1; i <= n; ++i) scanf("%d", &col[i]);
for (int i = 1, u, v; i < n; ++i) {
scanf("%d%d", &u, &v);
head[u] = new (ps++) Edge(v, head[u]);
head[v] = new (ps++) Edge(u, head[v]);
++Gr[u]; ++Gr[v];
}
root = new (pis++) State();
for (int i = 1; i <= n; ++i) if (Gr[i] == 1) Dfs(i, 0, root);
long long ans = 0;
for (State *p = pool + 1; p != pis; ++p) ans += p->len - p->par->len;
printf("%lld\n", ans);
return 0;
}

POJ#3294
无论是SA还是SAM都是裸题,不过要输出方案,我SAM被卡字符集了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include<cstdio>
#include<algorithm>
#include<cstring>
using namespace std;
const int N = 2e5 + 9;
struct State {
int len, las, cou;
State *go[26], *par;
State (int len = 0) : len(len), las(-1), cou(0), par(0) { memset(go, 0, sizeof go); }
}*last, *root, pool[N], *pis = pool;
void Extend (int w) {
State *p = last, *np = new (pis++) State(p->len + 1);
while (p && !p->go[w]) p->go[w] = np, p = p->par;
if (p == 0) {
np->par = root;
} else {
State *q = p->go[w];
if (p->len + 1 == q->len) {
np->par = q;
} else {
State *nq = new (pis++) State(*q);
nq->len = p->len + 1;
np->par = q->par = nq;
while (p && p->go[w] == q) p->go[w] = nq, p = p->par;
}
}
last = np;
}
int n, len[101], ans, stk[N];
char s[101][1001];
void Dfs (State *p, int d) {
if (p->cou > n / 2 && d == ans) {
for (int i = 1; i <= d; ++i) putchar('a' + stk[i]);
putchar('\n');
}
for (int i = 0; i < 26; ++i) if (p->go[i]) stk[d + 1] = i, Dfs(p->go[i], d + 1);
}
int main () {
while (scanf("%d", &n), n) {
pis = pool;
root = new (pis++) State();
for (int i = 0; i < n; ++i) {
scanf("%s", s[i]);
len[i] = strlen(s[i]);
last = root;
for (int j = 0; j < len[i]; ++j) Extend(s[i][j] - 'a');
}
for (int i = 0; i < n; ++i) {
State *p = root;
for (int j = 0; j < len[i]; ++j) {
p = p->go[s[i][j] - 'a'];
for (State *q = p; q && q->las != i; q = q->par) q->las = i, ++q->cou;
}
}
ans = 0;
for (State *p = pool; p != pis; ++p) if (p->cou > n / 2 && p->len > ans) ans = p->len;
if (ans == 0) puts("?");
else Dfs(root, 0);
putchar('\n');
}
return 0;
}


bzoj3756

还是一样直接用trie建SAM就完了.
据dwj和czl提醒,DFS的复杂度是$\sum L_{i}$的,所以还是老老实实BFS吧。因为存在版权问题,不能提交,GG.


bzoj1194

这道题题面真的很辣鸡,根本就不想看…
然后这道题和字符串关系不是很大,主要部分就是求一个自动机是否被另一个自动机包含,我只想到随机化,看题解后恍然大悟:f[i][j]表示i这个状态是否被j包含。就行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include<cstdio>
#include<queue>
#include<algorithm>
#include<cstring>
#include<vector>
using namespace std;
const int N = 55;
int n, f[N][N], g[N][N], dfn[N], low[N], stk[N], top, cnt, tot, bl[N], gr[N], dp[N], si[N];
bool vis[N];
vector<int>V[N], P[N];
typedef vector<int>::iterator it;
queue<int>Q;
struct ac {
int ch[N][2], n, m;
bool is[N];
void init () {
scanf("%d%d", &n, &m);
for (int i = 1, j; i <= m; ++i) scanf("%d", &j), is[j] = 1;
for (int i = 0; i < n; ++i) scanf("%d%d", &ch[i][0], &ch[i][1]);
}
ac () { memset(is, 0, sizeof is); }
}A[N];
bool Dfs (const ac &A, const ac &B, int pa, int pb) {
if (f[pa][pb] != -1) return f[pa][pb];
if (A.is[pa] && !B.is[pb]) return 1;
f[pa][pb] = 0;
return f[pa][pb] = Dfs(A, B, A.ch[pa][0], B.ch[pb][0]) | Dfs(A, B, A.ch[pa][1], B.ch[pb][1]);
}
void Tarjan (int x) {
low[x] = dfn[x] = ++cnt;
stk[++top] = x; vis[x] = 1;
for (it now = V[x].begin(), en = V[x].end(); now != en; ++now) if (!dfn[*now]) {
Tarjan(*now);
low[x] = min(low[x], low[*now]);
} else if (vis[*now]) low[x] = min(low[x], dfn[*now]);
if (low[x] == dfn[x]) {
int v = -1; ++tot;
while (v != x) {
v = stk[top--];
vis[v] = 0;
bl[v] = tot;
++si[tot];
}
}
}
int main () {
scanf("%d", &n);
for (int i = 1; i <= n; ++i) A[i].init();
for (int i = 1; i <= n; ++i) {
for (int j = 1; j <= n; ++j) if (i != j) {
memset(f, -1, sizeof f);
if (!Dfs(A[j], A[i], 0, 0)) V[i].push_back(j);
}
}
for (int i = 1; i <= n; ++i) if (!dfn[i]) Tarjan(i);
for (int i = 1; i <= n; ++i) for (it now = V[i].begin(), en = V[i].end(); now != en; ++now) if (bl[i] != bl[*now]) P[bl[i]].push_back(bl[*now]), ++gr[bl[*now]];
for (int i = 1; i <= tot; ++i) if (!gr[i]) Q.push(i);
int u, ans = 0;
while (!Q.empty()) {
u = Q.front(); Q.pop(); dp[u] += si[u];
if (dp[u] > ans) ans = dp[u];
for (it now = P[u].begin(), en = P[u].end(); now != en; ++now) {
dp[*now] = max(dp[*now], dp[u]);
if (--gr[*now] == 0) Q.push(*now);
}
}
printf("%d\n", ans);
return 0;
}

bzoj4032

子串部分用后缀自动机很好做,子序列部分当然就用子序列自动机辣!这个东西其实还挺好yy的。每个节点的parent是这个字母上一次出现的位置,每更新一个x节点时所有没有x的后继的节点都连向这个节点,然后每次沿着所有字母的last向上爬就好了。
剩余的部分,就和上题(bzoj1194)差不多了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include<cstdio>
#include<algorithm>
#include<cstring>
using namespace std;
const int N = 2e4 + 9;
struct State {
int len;
State *go[26], *par;
State (int len = 0) : len(len), par(0) { memset(go, 0, sizeof go); }
}*last[26], *ra1, *ra2, *rb1, *rb2, pool[N], *pis = pool;
char s[N];
int n, x, y, a, b, c;
int f[6005][6005];
void build_sam (State *&root) {
*last = root = new (pis++) State();
for (int i = 0, w; i < n; ++i) {
w = s[i] - 'a';
State *p = *last, *np = new (pis++) State(p->len + 1);
while (p && !p->go[w]) p->go[w] = np, p=p->par;
if (p == 0) {
np->par = root;
} else {
State *q = p->go[w];
if (p->len + 1 == q->len) {
np->par = q;
} else {
State *nq = new (pis++) State(*q);
nq->len = p->len + 1;
q->par = np->par = nq;
while (p && p->go[w] == q) p->go[w] = nq, p = p->par;
}
}
*last = np;
}
}
void build_sqm (State *&root) {
root = new (pis++) State();
for (int i = 0; i < 26; ++i) last[i] = root;
for (int i = 0, w; i < n; ++i) {
w = s[i] - 'a'; State *q = new (pis++) State(last[w]->len + 1); q->par = last[w];
for (int j = 0; j < 26; ++j) for (State *p = last[j]; p && !p->go[w]; p = p->par) p->go[w] = q;
last[w] = q;
}
}
int Dfs (State *pa, State *pb) {
if (pb == NULL) return 0;
int _a = pa - pool - x, _b = pb - pool - y;
if (f[_a][_b] != -1) return f[_a][_b];
int &ret = f[_a][_b]; ret = 0x3f3f3f3f;
for (int i = 0; i < 26; ++i) if (pa->go[i]) ret = min(ret, Dfs(pa->go[i], pb->go[i]));
return ++ret;
}
void print (int x) {
printf("%d\n", x >= 0x3f3f3f3f ? -1 : x);
}
int main () {
scanf("%s", s); n = strlen(s);
build_sam(ra1);
a = pis - pool; build_sqm(ra2);
scanf("%s", s); n = strlen(s);
b = pis - pool; build_sam(rb1);
c = pis - pool; build_sqm(rb2);
x = 0; y = b; memset(f, -1, sizeof f); print(Dfs(ra1, rb1));
x = 0; y = c; memset(f, -1, sizeof f); print(Dfs(ra1, rb2));
x = a; y = b; memset(f, -1, sizeof f); print(Dfs(ra2, rb1));
x = a; y = c; memset(f, -1, sizeof f); print(Dfs(ra2, rb2));
return 0;
}

为期4天的字符串学习正式收尾了:)