Maximum repetition substring POJ-3693 (suffix array)

Title:

Given a string, find a substring of the string, satisfying that the substring is the substring with the largest number of cycles in the loop section, and output the string with the smallest lexicographic order when the number of cycles is the same.

Train of thought:

Reference to National Training Team Papers Suffix Array: A Powerful Tool for String Processing
First, we consider the length L of the loop section enumerating the substring. We only consider the substring of the loop at least twice. Then we can know that the length of the substring is at least 2L. At the same time, if we follow the original string as 0,L,2L,3L,... iL,(i+1)L,... To partition, the substring may cover iL,(i+1)L. Assuming that this I is the smallest I satisfying the condition, what we need to do now is to find out where the starting point of this substring should be in the range of [max (0, (i-1) L+1, iL]. We first find out LCP(suffix[iL],suffix[(i+1)L]), and then match str [iL-k], str [(i + 1) L] to the left. - k], whenever we encounter a matchable k, we know that it may be the starting point, and what we need to find i s the substring with the smallest dictionary order under the same number of repetitions, so these starting points need to be taken into account before the condition of the smallest dictionary order i s omitted. Then we know that the starting point s can match the suffix LCP(suffix[iL],suffix[(i+1)L] + K characters. Then the number of repetitions here is the number of matched characters / L+1, and then update the answer.

Code:

#define debug printf
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<iostream>
#include<queue>
#include<vector>
#include<cmath>
using namespace std;
const int maxn=200000+100;
int kase=0;
void solve();
char str[maxn];
struct SuffixArray{
    int s[maxn];
    int sa[maxn];
    int ran[maxn];
    int height[maxn];
    int t[maxn],t2[maxn],c[maxn];
    int n;
    void clear()
    {
        n=0;memset(sa,0,sizeof(sa));
    }

    void build_sa(int m)
    {
        int i,*x=t,*y=t2;
        for(i=0;i<m;i++)    c[i]=0;
        for(i=0;i<n;i++)    c[x[i]=s[i]]++;
        for(i=1;i<m;i++)    c[i]+=c[i-1];
        for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
        for(int k=1;k<=n;k<<=1){
            int p=0;
            for(i=n-k;i<n;i++)  y[p++]=i;
            for(i=0;i<n;i++)    if(sa[i]>=k)    y[p++]=sa[i]-k;
            for(i=0;i<m;i++)    c[i]=0;
            for(i=0;i<n;i++)    c[x[y[i]]]++;
            for(i=1;i<m;i++)    c[i]+=c[i-1];
            for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
            swap(x,y);
            p=1;x[sa[0]]=0;
            for(i=1;i<n;i++)
                x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;
                if(p>=n)    break;
                m=p;
        }
    }

    void build_height()
    {
        int i,j,k=0;
        for(i=0;i<n;i++){
            ran[sa[i]]=i;
        }
        for(i=0;i<n;i++){
            if(k)   k--;
            j=sa[ran[i]-1];
            while(s[i+k]==s[j+k])   k++;
            height[ran[i]]=k;
        }
    }
};
SuffixArray sa;
void add(int ch)
{
    sa.s[sa.n++]=ch;
}
void RMQ_init(int * A,int n);
int RMQ(int L,int R);
int main()
{
    kase=0;
    while(~scanf("%s",str)){
        kase++;
        if(strcmp(str,"#")==0)  break;
        solve();
    }

    return 0;
}
void solve()
{
    sa.clear();
    int len=strlen(str);
    for(int j=0;j<len;j++){
        add(str[j]-'a'+1);
    }
    add(0);
    sa.build_sa(27);
    sa.build_height();
    RMQ_init(sa.height,sa.n);
//  for(int i=0;i<sa.n;i++){
//        debug("hh%d %d\n",i,sa.sa[i]);
//  }
    int ans,lef,righ;
    ans=1;
    lef=righ=0;
    for(int L=1;L<=len;L++){
//        debug("%d\n",L);
        int K;
        int p,q;
        for(int i=0;((i+1)*L)<len;i++){
            char x=str[i*L],y=str[(i+1)*L];
            if(x!=y)    continue;
            int ql,qr;
            ql=sa.ran[i*L],qr=sa.ran[(i+1)*L];
            if(ql>qr)   swap(ql,qr);
            ql++;
            K=RMQ(ql,qr);
//          debug("%d %d %d\n",L,i,K);
            p=(i*L),q=((i+1)*L);
            int bound=max(0,(i-1)*L+1);
                int res=(K/L)+1;
//                p++;
                if(res>ans||(res==ans&&sa.ran[p]<sa.ran[lef])){
                    ans=res;
                    lef=p;
                    righ=p+res*L;
                }
            p--;q--;
            while(p>=bound&&str[p]==str[q]){
                K++;
//              debug("%d\n",K);
                int res=(K/L)+1;
//                p++;
                if(res>ans||(res==ans&&sa.ran[p]<sa.ran[lef])){
                    ans=res;
                    lef=p;
                    righ=p+res*L;
                }
                p--;q--;
            }

        }

    }
    printf("Case %d: ",kase);
//  righ=ans+lef;
    if(ans==1){
        char maxv='z';
        for(int i=0;i<len;i++){
            if(str[i]<maxv){
                maxv=str[i];
            }
        }
        printf("%c\n",str[sa.sa[1]]);
        return;
    }
    for(int i=lef;i<righ;i++){
        printf("%c",str[i]);
    }
    printf("\n");
}
int d[maxn][20];
void RMQ_init(int * A,int n)
{
    for(int i=0;i<n;i++){
        d[i][0]=A[i];
//      debug("%d %d\n",i,A[i]);
    }
    for(int j=1;(1<<j)<=n;j++){
        for(int i=0;i+(1<<j)-1<n;i++){
            d[i][j]=min(d[i][j-1],d[i+(1<<(j-1))][j-1]);
        }
    }
}
int RMQ(int L,int R)
{
    int k=0;
    while((1<<(k+1))<=R-L+1)    k++;
    return min(d[L][k],d[R-(1<<k)+1][k]);
}
/*
zzbzbz

baccdbaccdbacbdbacbd
xbcabcab
edbea
abcabcabcab
accdaccaccddcacacaacad
babbabaabaabaabab
abaabaabaaba
ccabababc
daabbccaa
bcbcbaba
pedabacewqpz
ababaaa
#
*/

Posted by iwarlord on Sun, 06 Jan 2019 03:00:09 -0800