# ICPC2021 Shenyang station M String Problem

Keywords: KMP SAM

Niuke portal

KMP's practices are not understood yet. Here are two SAM practices.
Thank you for the ideas provided by the codes of the two teams of cherry pig driving a meow car to create a big white bear and novice on the road.

The first approach is a little more troublesome:

For each prefix, the substring with the largest dictionary order must be a suffix of the prefix, and the way to compare these suffixes is to select the different characters in front of these suffixes. If the original string is reversed, it can be maintained with SAM.

Build the inverse string into SAM, and then sort the edge (v_i \) of each node \ (U \) on the suffix link tree according to the characters of \ (endpos[v] - len[u] \) in the original string, so as to give priority to accessing the substrings with larger dictionary order.

Now the corresponding maximum suffix is required for each prefix. You can do the reverse: first throw all nodes into a large root heap with dfs order as the keyword, because the substring represented by the node with large dfs order must be large. Then, if the substring represented by the current heap top is within the range of the current prefix of the enumeration, this substring is the answer. Otherwise, pop up the heap top and take the largest element in the heap.

In this way, the time complexity is \ (O(n \log n) \), which requires a little constant optimization to pass.

```#include<bits/stdc++.h>
using namespace std;
#define enter puts("")
#define space putchar(' ')
#define Mem(a, x) memset(a, x, sizeof(a))
#define In inline
#define forE(i, x, y) for(int i = head[x], y; ~i && (y = e[i].to); i = e[i].nxt)
typedef long long ll;
typedef double db;
const int INF = 0x3f3f3f3f;
const db eps = 1e-8;
const int maxn = 1e6 + 5;
const int maxs = 27;
{
ll ans = 0;
char ch = getchar(), las = ' ';
while(!isdigit(ch)) las = ch, ch = getchar();
while(isdigit(ch)) ans = (ans << 1) + (ans << 3) + ch - '0', ch = getchar();
if(las == '-') ans = -ans;
return ans;
}
In void write(ll x)
{
if(x < 0) x = -x, putchar('-');
if(x >= 10) write(x / 10);
putchar(x % 10 + '0');
}

int n, ans[maxn];
char s[maxn];
struct Sam
{
int tra[maxn << 1][maxs], link[maxn << 1], len[maxn << 1], endp[maxn << 1], cnt, las;
In void init() {link[cnt = las = 0] = -1; Mem(tra[0], 0);}
In void insert(int c, int id)
{
int now = ++cnt, p = las; Mem(tra[now], 0);
len[now] = len[p] + 1, endp[now] = id;
while(~p && !tra[p][c]) tra[p][c] = now, p = link[p];
if(p == -1) link[now] = 0;
else
{
int q = tra[p][c];
if(len[q] == len[p] + 1) link[now] = q;
else
{
int clo = ++cnt;
memcpy(tra[clo], tra[q], sizeof(tra[q]));
len[clo] = len[p] + 1, endp[clo] = endp[q];
while(~p && tra[p][c] == q) tra[p][c] = clo, p = link[p];
}
}
las = now;
}
#define pr pair<int, int>
#define mp make_pair
#define F first
#define S second
int buc[maxn << 1], pos[maxn << 1];
vector<pr> V[maxn << 1];
int du[maxn << 1], dfn[maxn << 1], dcnt;
In void dfs(int now)
{
dfn[now] = ++dcnt;
for(auto x : V[now]) dfs(x.S);
}
In void buildGraph()
{
for(int i = 1; i <= cnt; ++i) buc[len[i]]++;
for(int i = 1; i <= cnt; ++i) buc[i] += buc[i - 1];
for(int i = 1; i <= cnt; ++i) pos[buc[len[i]]--] = i;
endp[0] = INF;
for(int i = cnt; i; --i)
{
int now = pos[i], fa = link[now];
du[fa]++;
endp[fa] = min(endp[fa], endp[now]);
V[fa].push_back(mp(s[endp[now] + len[fa]], now));
}
for(int i = 0; i <= cnt; ++i) sort(V[i].begin(), V[i].end());
dcnt = 0, dfs(0);
}
In void solve()
{
priority_queue<pr> q;
for(int i = 1; i <= cnt; ++i) if(!du[i]) q.push(mp(dfn[i], i));
for(int i = n, now = 0; i; --i)
{
while(!ans[i])
{
if(!now) now = q.top().S;		//Reduce heap operations to optimize constants
{
q.pop();
now = 0;
}
else ans[i] = endp[now];
}
}
}
}S;

int main()
{
scanf("%s",s + 1);
n = strlen(s + 1); S.init();
for(int i = n; i; --i) S.insert(s[i] - 'a', i);
S.buildGraph(),
S.solve();
for(int i = 1; i <= n; ++i) write(ans[i]), space, write(i), enter;
return 0;
}
```

The second method has a relatively short amount of code. I think it is an optimization of violence.

First of all, a violent method of \ (O(n^2) \) is to take out all substrings and sort them according to the dictionary order. Remember that a substring is \ (s {L \ SIM r} \), then the answer to \ (ans[r] \) is the first \ (s {L \ SIM r} \).

Optimize this method with SAM: after building SAM with positive string, greedily run the substring with the largest dictionary order on SAM, then the substring that comes to the node first must be the largest. In addition, because the end positions of the substrings at the same node are the same, and the substring prefixes formed from this node to other nodes are the same, the substrings formed later through this node must be smaller than those passed for the first time, so the nodes passing through do not have to go again.

The time complexity is \ (O(27n) \)

```#include<bits/stdc++.h>
using namespace std;
#define enter puts("")
#define space putchar(' ')
#define Mem(a, x) memset(a, x, sizeof(a))
#define In inline
#define forE(i, x, y) for(int i = head[x], y; ~i && (y = e[i].to); i = e[i].nxt)
typedef long long ll;
typedef double db;
const int INF = 0x3f3f3f3f;
const db eps = 1e-8;
const int maxn = 1e6 + 5;
const int maxs = 27;
{
ll ans = 0;
char ch = getchar(), las = ' ';
while(!isdigit(ch)) las = ch, ch = getchar();
while(isdigit(ch)) ans = (ans << 1) + (ans << 3) + ch - '0', ch = getchar();
if(las == '-') ans = -ans;
return ans;
}
In void write(ll x)
{
if(x < 0) x = -x, putchar('-');
if(x >= 10) write(x / 10);
putchar(x % 10 + '0');
}

int n, ans[maxn];
char s[maxn];
struct Sam
{
int tra[maxn << 1][maxs], link[maxn << 1], len[maxn << 1], endp[maxn << 1], cnt, las;
In void init() {link[cnt = las = 0] = -1; Mem(tra[0], 0);}
In void insert(int c, int id)
{
int now = ++cnt, p = las; Mem(tra[now], 0);
len[now] = len[p] + 1, endp[now] = id;
while(~p && !tra[p][c]) tra[p][c] = now, p = link[p];
if(p == -1) link[now] = 0;
else
{
int q = tra[p][c];
if(len[q] == len[p] + 1) link[now] = q;
else
{
int clo = ++cnt;
memcpy(tra[clo], tra[q], sizeof(tra[q]));
len[clo] = len[p] + 1, endp[clo] = endp[q];
while(~p && tra[p][c] == q) tra[p][c] = clo, p = link[p];
}
}
las = now;
}
bool vis[maxn << 1];
In void dfs(int now, int l)				//l: Maximum substring start position
{
vis[now] = 1;
for(int i = 25; i >= 0; --i)		//In SAM, greedily go the biggest
if(tra[now][i] && !vis[tra[now][i]]) dfs(tra[now][i], l + 1);
if(!ans[endp[now]]) ans[endp[now]] = endp[now] - l + 1;
}
}S;

int main()
{
scanf("%s",s + 1);
n = strlen(s + 1); S.init();
for(int i = 1; i <= n; ++i) S.insert(s[i] - 'a', i);
S.dfs(0, 0);
for(int i = 1; i <= n; ++i) write(ans[i]), space, write(i), enter;
return 0;
}
```

Another is the practice of kmp. Although I don't understand it, send the code.

```#include<bits/stdc++.h>
using namespace std;
#define enter puts("")
#define space putchar(' ')
#define Mem(a, x) memset(a, x, sizeof(a))
#define In inline
#define forE(i, x, y) for(int i = head[x], y; ~i && (y = e[i].to); i = e[i].nxt)
typedef long long ll;
typedef double db;
const int INF = 0x3f3f3f3f;
const db eps = 1e-8;
const int maxn = 1e6 + 5;
const int maxs = 27;
{
ll ans = 0;
char ch = getchar(), las = ' ';
while(!isdigit(ch)) las = ch, ch = getchar();
while(isdigit(ch)) ans = (ans << 1) + (ans << 3) + ch - '0', ch = getchar();
if(las == '-') ans = -ans;
return ans;
}
In void write(ll x)
{
if(x < 0) x = -x, putchar('-');
if(x >= 10) write(x / 10);
putchar(x % 10 + '0');
}

int n;
char s[maxn];

vector<int> f, g;

int main()				//It's so short
{
scanf("%s",s + 1);
n = strlen(s + 1);
for(int i = 1; i <= n; ++i)
{
g.clear(), g.push_back(i);
for(auto x : f)
{
while(!g.empty() && s[x + i - g.back()] > s[i]) g.pop_back();
if(g.empty() || s[x + i - g.back()] == s[i]) g.push_back(x);
}
f.clear();
for(auto x : g)
{
while(!f.empty() && (i - f.back() + 1) * 2 > i - x + 1) f.pop_back();
f.push_back(x);
}
write(f.back()), space, write(i), enter;
}
return 0;
}
```

Posted by Magicman0022 on Tue, 23 Nov 2021 05:01:50 -0800