Let B=nm − − √, first discretize the weights, then sort them, divide each B weight into one block, and maintain a two-dimensional prefix sum. When asked, the whole O(1), the single O(B) violence, and the total complexity O(qB).
Of course, there are various methods of data structure, such as persistable {line segment tree nested line segment tree}... The outer line segment tree maintains the X-axis, the inner line segment tree maintains the Y-axis, and then the weights are sorted discretely. Each time a weight is added, only one place will be changed. The persistent chain needs to extend from the outer line segment tree to the inner line segment tree, so that only two new logpoints, time complexity O(nmlog2n) and space complexity O(nmlog2n) can be created each time. Because the tree size of both segments is only 300, it is smaller than other data structures.
Code (block):
#include<iostream>
#include<cstdio>
#include<cmath>
#include<cstring>
#include<algorithm>
#define piii pair<int,pair<int,int> >
#define fs first
#define sc second
using namespace std;
const int B=300;
int n,m,num,q,type,lans,z[90010];
piii a[90010];
int read()
{
int x=0;char ch=getchar();
for(;ch<'0'||ch>'9';ch=getchar());
for(;ch>='0'&&ch<='9';ch=getchar()) x=x*10+ch-'0';
return x;
}
struct block
{
int s[310][310],cl,cr,num;
piii c[310];
block(){cl=cr=num=0;memset(s,0,sizeof(s));}
void ins(piii x)
{
c[++num]=x;
s[x.sc.fs][x.sc.sc]++;
}
void gets()
{
for(int i=1;i<=n;i++)
for(int j=1;j<=m;j++)
s[i][j]+=s[i-1][j]+s[i][j-1]-s[i-1][j-1];
}
int qry1(int x1,int y1,int x2,int y2)
{
x1--;y1--;
return s[x2][y2]-s[x1][y2]-s[x2][y1]+s[x1][y1];
}
int qry2(int x1,int y1,int x2,int y2,int s,int t)
{
int re=0;
for(int i=1;i<=num;i++)
re+=(c[i].fs>=s&&c[i].fs<=t&&c[i].sc.fs>=x1&&c[i].sc.fs<=x2&&c[i].sc.sc>=y1&&c[i].sc.sc<=y2);
return re;
}
}blk[310];
int main()
{
n=read();m=read();q=read();type=read();
for(int i=1;i<=n;i++)
for(int j=1;j<=m;j++)
a[(i-1)*m+j]=make_pair(read(),make_pair(i,j)),z[(i-1)*m+j]=a[(i-1)*m+j].fs;
sort(z+1,z+n*m+1);
for(int i=1;i<=n*m;i++)
a[i].fs=lower_bound(z+1,z+n*m+1,a[i].fs)-z;
sort(a+1,a+n*m+1);
for(int i=1,k=1;i<=n*m;i+=B,k++)
{
blk[k].cl=a[i].fs;blk[k].cr=a[min(i+B-1,n*m)].fs;
for(int j=i;j<=min(i+B-1,n*m);j++)
blk[k].ins(a[j]);
blk[k].gets();
}
while(q--)
{
int x1=read(),y1=read(),x2=read(),y2=read(),s=read(),t=read();
if(type) x1^=lans,x2^=lans,y1^=lans,y2^=lans,s^=lans,t^=lans;
x1=(x1-1+n)%n+1;x2=(x2-1+n)%n+1;y1=(y1-1+m)%m+1;y2=(y2-1+m)%m+1;
if(x1>x2) swap(x1,x2);
if(y1>y2) swap(y1,y2);
if(s>t) swap(s,t);
s=lower_bound(z+1,z+n*m+1,s)-z;
t=upper_bound(z+1,z+n*m+1,t)-z-1;
lans=0;
for(int i=1,k=1;i<=n*m;i+=B,k++)
if(blk[k].cl>=s&&blk[k].cr<=t) lans+=blk[k].qry1(x1,y1,x2,y2);
else if(blk[k].cl<=t&&blk[k].cr>=s) lans+=blk[k].qry2(x1,y1,x2,y2,s,t);
printf("%d\n",lans);
}
return 0;
}