Redis Source Tidy Notes: intset and personal understanding

Keywords: encoding less Redis

intset structure

typedef struct intset {
    uint32_t encoding;  // Types of all elements in the current structure
    uint32_t length;    // Number of elements in contents, that is, the number of elements in the set of integers
    int8_t contents[];  // Elements stored in a collection of integers
} intset;

Intset is suitable for situations where the set is all integers and not many. In either case, the underlying structure of the Redis collection type is converted from intset to hashtable:

(1) When the number of elements exceeds a certain number, the specific value can be set by the configuration item set-max-intset-entries
 (2) When the added element is a non-integer type
intset lookup
/* Query Elements */
uint8_t intsetFind(intset *is, int64_t value) {
    uint8_t valenc = _intsetValueEncoding(value);       // Get element type code
    // If the current element's encoding size does not exceed the encoding of the integer set &&exists in the set
    return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);
}
/* 
 * Search for the location of the Value.Returns 1 when a value is found and sets "pos" to the position of the value in the set of integers.
 * When the value does not exist in the integer, return 0 and set "pos" to a position where "value" can be inserted.
 */
static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
    int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
    int64_t cur = -1;

    /* The case where the collection is empty */
    if (intrev32ifbe(is->length) == 0) {
        if (pos) *pos = 0;
        return 0;
    } else {
        /* 
         * Assuming that the value is not found (greater than the maximum and less than the minimum), but the insertion location is known
         *  The set of integers is incrementally ordered by default, and if the integer to be inserted is greater than the last integer element, it should be inserted at the end
         *  If it is less than the first integer element, it should be placed first
         * */
        if (value > _intsetGet(is,max)) {
            if (pos) *pos = intrev32ifbe(is->length);
            return 0;
        } else if (value < _intsetGet(is,0)) {
            if (pos) *pos = 0;
            return 0;
        }
    }

    // Integer sets are ordered by default, searched by dichotomy
    while(max >= min) {
        mid = ((unsigned int)min + (unsigned int)max) >> 1;     // Right shift 1 bit i.e. /2
        cur = _intsetGet(is,mid);                               // Get the integer in the middle
        if (value > cur) {
            min = mid+1;
        } else if (value < cur) {
            max = mid-1;
        } else {
            break;
        }
    }

    // Check if found
    if (value == cur) {
        if (pos) *pos = mid;
        return 1;
    } else {
        if (pos) *pos = min;
        return 0;
    }
}

The search is divided into the following steps:
(1) First determine if its encoding exceeds the current intset encoding, if not, call intsetSearch to find out
(2) In intsetSearch, the first step is to determine if the value to be found is between the minimum and maximum value, and if it exists, to use the intset set set to do a binary search from small to large (and without duplication);
(3) if the element is found in intsetSearch, pos is set to the location of the element, if not found, pos is set to the insertion location that can be in intset, distinguished by return 0 or 1; if the element is found to be greater than the maximum, it is placed at the end of the set, and if it is less than the minimum, it is placed at the beginning of the set, with pos values set to is->length and 0, respectively.

intset add
/* 
 * Insert an integer into the set of integers
 *  (1) Find if there are elements in the collection to insert
 *  (2) If not, expand capacity
 *  (3) Move all integers back after the element is inserted
 *  (4) Insert Integer*/
intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
    uint8_t valenc = _intsetValueEncoding(value);       // Determines the type code of the integer to be added
    uint32_t pos;
    if (success) *success = 1;

    /*
     * If you need to upgrade, you need to update the encoding
     */
    if (valenc > intrev32ifbe(is->encoding)) {
        /* Update and insert new elements */
        return intsetUpgradeAndAdd(is,value);
    } else {
        /* 
         * Find the position of integers in the set, and do not insert integers if they already exist in the set
         * If not, pos indicates where the element can be inserted in the collection to adjust subsequent integers*/
        if (intsetSearch(is,value,&pos)) {
            if (success) *success = 0;
            return is;
        }

        // Adjust Integer Set Size
        is = intsetResize(is,intrev32ifbe(is->length)+1);
         // If the insertion position is not final, move all integers back after the insertion position
        if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);  
    }

    //Set this integer
    _intsetSet(is,pos,value);
    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);      // Adjust Integer Set Length
    return is;
}
/* 
 * Update the type code of the element to be inserted and insert a new node
 * If the value of the value parameter exceeds the current encoding, the type encoding needs to be updated*/
static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
    uint8_t curenc = intrev32ifbe(is->encoding);    // Current Encoding Type
    uint8_t newenc = _intsetValueEncoding(value);     // New encoding type, obtained from value value
    int length = intrev32ifbe(is->length);          // Get Collection Length
    // Since the value must exceed the encoding limit, see if the value is greater than or less than 0 to determine whether the value should be placed in content[0] or content[length]
    // If you upgrade, either the maximum is at the end or the minimum is at the beginning
    int prepend = value < 0 ? 1 : 0;
    
    /* First set new encoding and resize */
    is->encoding = intrev32ifbe(newenc);      			    //  Reset Set Encoding
    is = intsetResize(is,intrev32ifbe(is->length)+1);       // Adjust Collection Size

    /* Upgrade back-to-front so we don't overwrite values.
     * Note that the "prepend" variable is used to make sure we have an empty
     * space at either the beginning or the end of the intset. */
    while(length--)
        //Encoding curenc, take all values in reverse order and assign them to a new location
        _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));

    /* Set the value at the beginning or the end.
     * Depending on whether it is greater or less than 0, place it in the appropriate place */
    if (prepend)
        _intsetSet(is,0,value);
    else
        _intsetSet(is,intrev32ifbe(is->length),value);
    // Update Collection Length
    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
    return is;
}
// Move elements, all after the from node, until the to node, with memmove at the bottom
static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
    void *src, *dst;
    // Number of elements to move
    uint32_t bytes = intrev32ifbe(is->length)-from;
    // Encoding of sets
    uint32_t encoding = intrev32ifbe(is->encoding);

    // Depending on the encoding
    // The position where the src record begins to move
    // dst records where the move ends
    // Bytes calculates the number of bytes to move
    if (encoding == INTSET_ENC_INT64) {
        src = (int64_t*)is->contents+from;
        dst = (int64_t*)is->contents+to;
        bytes *= sizeof(int64_t);
    } else if (encoding == INTSET_ENC_INT32) {
        src = (int32_t*)is->contents+from;
        dst = (int32_t*)is->contents+to;
        bytes *= sizeof(int32_t);
    } else {
        src = (int16_t*)is->contents+from;
        dst = (int16_t*)is->contents+to;
        bytes *= sizeof(int16_t);
    }
    // memmove does not need to assume that two memory regions do not overlap
    memmove(dst,src,bytes);
}

The main steps to add an intset are as follows:
(1) Determine if the encoding of the element to be added exceeds that of the current intset; if it exceeds, it needs to be upgraded;
(2) If not exceeded, call intsetSearch to query if the element already exists and determine where it can be placed;
(3) If you can insert and place it in the middle of a collection element (not at the end), you need to move all subsequent elements back, which is achieved by calling intsetMoveTail, using memmove at the bottom
*(4) If upgrade is required, call intsetUpgradeAndAdd to expand and adjust the encoding type of the intset using the encoding of the new integer, all elements of the intset are adjusted backward and forward according to the new encoding to prevent overwriting; if upgrade is required, the integer to be inserted is either placed at the end of the maximum value or at the minimum value.Put at the top; insert the element last and update the length of the intset

intset deletion
/* Delete elements without causing demotion */
intset *intsetRemove(intset *is, int64_t value, int *success) {
    uint8_t valenc = _intsetValueEncoding(value);       // Gets the set encoding of integers to be deleted
    uint32_t pos;
    if (success) *success = 0;

    // Find the element and return to its location
    // The code of the element to be deleted must be less than the set code and the element can be found
    if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
        uint32_t len = intrev32ifbe(is->length);

        /* We know we can delete */
        if (success) *success = 1;  // Tags that can be deleted

        /* Overwrite value with tail and update length */
        if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);        // Move all elements after deleting a node forward one type length
        is = intsetResize(is,len-1);          // Reset Collection Space
        is->length = intrev32ifbe(len-1);   // Adjust the number of integer set elements
    }
    return is;
}

When intset deletes an integer, it first determines that the type code of the element to be deleted is less than the set and that the element can be found in the set, and if it is found and not at the end of the set, it is necessary to move all elements after the element forward by one data type

Fifteen original articles were published, won 1, visited 346
Private letter follow

Posted by plezops on Sat, 29 Feb 2020 19:33:40 -0800