Distinct Substrings (Application of suffix array & & Application of height array)

Title Link

SPOJ-DISUBSTR

meaning of the title

Given a string s, find the number of non repeated substrings of S.

Analysis

Observe the suffix array and notice that the substring must be part of the suffix prefix in the suffix array. That is, each suffix contributes len substring (len is the length of suffix). Now we need to remove the duplicate substrings, that is, the common prefix of the suffix, because the substrings contributed by the common prefix are the same. So you just need to subtract the length sum of the common prefix from all the substrings.

Code

#include <bits/stdc++.h>
#define rank ranka
using namespace std;
const int maxn=2e4+100;
int n,k,rank[maxn],tmp[maxn],sa[maxn],lcp[maxn];
string s;

bool cmp(int i,int j)
{
    if(rank[i]!=rank[j])
        return rank[i]<rank[j];
    else
    {
        int ri=i+k<=n?rank[i+k]:-1;
        int rj=j+k<=n?rank[j+k]:-1;
        return ri<rj;
    }
}
void get_sa()
{
    n=s.size();
    for(int i=0; i<=n; i++)
    {
        sa[i]=i;
        rank[i]=i<n?s[i]:-1;
    }
    for(k=1; k<=n; k*=2)
    {
        sort(sa,sa+n+1,cmp);
        tmp[sa[0]]=0;
        for(int i=1; i<=n; i++)
            tmp[sa[i]]=tmp[sa[i-1]]+(cmp(sa[i-1],sa[i])?1:0);
        for(int i=0; i<=n; i++)
            rank[i]=tmp[i];
    }

}
void get_lcp()
{
    for(int i=0; i<=n; i++)
        rank[sa[i]]=i;
    int h=0;
    lcp[0]=0;
    for(int i=0; i<n; i++)
    {
        int j=sa[rank[i]-1];
        if(h)
            h--;
        for(; i+h<n && j+h<n; h++)
            if(s[i+h]!=s[j+h])
                break;
        lcp[rank[i]-1]=h;
    }
}
int main()
{
    int N;
    cin>>N;
    while(N--)
    {
        cin>>s;
        get_sa();
        get_lcp();
        long long ans=(1+n)*n/2;
        for(int i=1;i<n;i++)
            ans-=lcp[i];
        cout<<ans<<endl;
    }
}

Reference template

Height array template
Suffix array template

Posted by MastahUK on Fri, 03 Apr 2020 03:22:17 -0700