Data structure STL -- implementation of comparator by golang

Keywords: Go data structure STL

github address: GitHub - hlccd/goSTL

 Comparator

summary

For some data structures with size comparison, it is very cumbersome to implement some size comparison every time. Especially for some officially set types, it will be simple to implement the comparison of their elements when introducing basic types into the data structure.

At the same time, for some common functions, such as sorting, finding, sorting the nth and finding the upper and lower boundaries, these functions need to be implemented through the comparator. In order to further simplify the use, they can be implemented in the comparator.

definition

For a comparator, in addition to the basic type, the comparison function must be passed in. Of course, the basic data type can also be overwritten by a self-defined comparison function. For the comparison function to be used, two elements a and B (in order) need to be passed in, and an int should be returned at the same time, where 0 represents equality, a positive number represents a > b, and a negative number represents a < B.  

type Comparator func(a, b interface{}) int

GetCmp

For some basic data types, the comparison function can be preset to save the process of rewriting the comparison function of basic data types.

Of course, for the basic type to be compared, you need to pass in an object to obtain its data type, so as to return the corresponding default comparator.

The following parts can be copied directly. The implementation only judges the type and returns the default comparator. There is no part to understand.

func GetCmp(e interface{}) (cmp Comparator) {
    if e==nil{
        return nil
    }
    switch e.(type) {
    case bool:
        return boolCmp
    case int:
        return intCmp
    case int8:
        return int8Cmp
    case uint8:
        return uint8Cmp
    case int16:
        return int16Cmp
    case uint16:
        return uint16Cmp
    case int32:
        return int32Cmp
    case uint32:
        return uint32Cmp
    case int64:
        return int64Cmp
    case uint64:
        return uint64Cmp
    case float32:
        return float32Cmp
    case float64:
        return float64Cmp
    case complex64:
        return complex64Cmp
    case complex128:
        return complex128Cmp
    case string:
        return stringCmp
    }
    return nil
}
​

basicCmp

//The following are the default comparators of the system's own types
​
func boolCmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(bool) {
        return 1
    } else if b.(bool) {
        return -1
    }
    return 0
}
func intCmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(int) > b.(int) {
        return 1
    } else if a.(int) < b.(int) {
        return -1
    }
    return 0
}
func int8Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(int8) > b.(int8) {
        return 1
    } else if a.(int8) < b.(int8) {
        return -1
    }
    return 0
}
func uint8Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(uint8) > b.(uint8) {
        return 1
    } else if a.(uint8) < b.(uint8) {
        return -1
    }
    return 0
}
func int16Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(int16) > b.(int16) {
        return 1
    } else if a.(int16) < b.(int16) {
        return -1
    }
    return 0
}
func uint16Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(uint16) > b.(uint16) {
        return 1
    } else if a.(uint16) < b.(uint16) {
        return -1
    }
    return 0
}
func int32Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(int32) > b.(int32) {
        return 1
    } else if a.(int32) < b.(int32) {
        return -1
    }
    return 0
}
func uint32Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(uint32) > b.(uint32) {
        return 1
    } else if a.(uint32) < b.(uint32) {
        return -1
    }
    return 0
}
func int64Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(int64) > b.(int64) {
        return 1
    } else if a.(int64) < b.(int64) {
        return -1
    }
    return 0
}
func uint64Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(uint64) > b.(uint64) {
        return 1
    } else if a.(uint64) < b.(uint64) {
        return -1
    }
    return 0
}
func float32Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(float32) > b.(float32) {
        return 1
    } else if a.(float32) < b.(float32) {
        return -1
    }
    return 0
}
func float64Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if a.(float64) > b.(float64) {
        return 1
    } else if a.(float64) < b.(float64) {
        return -1
    }
    return 0
}
func complex64Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if real(a.(complex64)) > real(b.(complex64)) {
        return 1
    } else if real(a.(complex64)) < real(b.(complex64)) {
        return -1
    } else {
        if imag(a.(complex64)) > imag(b.(complex64)) {
            return 1
        } else if imag(a.(complex64)) < imag(b.(complex64)) {
            return -1
        }
    }
    return 0
}
func complex128Cmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if real(a.(complex128)) > real(b.(complex128)) {
        return 1
    } else if real(a.(complex128)) < real(b.(complex128)) {
        return -1
    } else {
        if imag(a.(complex128)) > imag(b.(complex128)) {
            return 1
        } else if imag(a.(complex128)) < imag(b.(complex128)) {
            return -1
        }
    }
    return 0
}
func stringCmp(a, b interface{}) int {
    if a == b {
        return 0
    }
    if len(a.(string)) > len(b.(string)) {
        return 1
    } else if len(a.(string)) < len(b.(string)) {
        return -1
    } else {
        if a.(string) > b.(string) {
            return 1
        } else if a.(string) < b.(string) {
            return -1
        }
    }
    return 0
}

Sort

Sorting: for the passed in array, sort through the passed in comparison function (the basic type can be obtained directly through GetCmp without passing in).

In order to simplify the implementation, the element group to be compared can be limited to a linear array. For some nonlinear structures, the pointer can be listed as a linear table, and then compared through the incoming comparison function. However, it is generally not recommended to use the comparator for nonlinear structures. Its structure is best to maintain the size distribution, which can better improve the efficiency.

In sorting, binary sorting and merge sorting are realized respectively. Because binary sorting itself is unstable, it is more suitable for arrays with small amount of data. For merge sorting, its performance is very stable and more suitable for arrays with large amount of data. Therefore, the arrays to be sorted are distinguished according to their length to use appropriate sorting methods according to the situation.

The passed in array is its pointer. Passing in the pointer can reduce the duplication and save time.

func Sort(arr *[]interface{}, Cmp ...Comparator) {
    //If you pass in an empty array or nil, it ends directly
    if arr==nil || (*arr)==nil || len((*arr)) == 0 {
        return
    }
    var cmp Comparator
    cmp = nil
    if len(Cmp) > 0 {
        cmp = Cmp[0]
    } else {
        cmp = GetCmp((*arr)[0])
    }
    if cmp == nil {
        //If the comparison function is not passed in and it is not the default type, and the default comparator is not found, the sorting will be terminated directly
        return
    }
    //Sort according to the length of the array and select the sorting function
    if len((*arr)) < 2^26 {
        //Use binary sort when the length is less than 2 ^ 16
        binary(arr,0,len((*arr))-1, cmp)
    } else {
        merge(arr,0,len((*arr))-1, cmp)
    }
}

Binary

For binary sorting, the principle is mainly to find an intermediate quantity (generally the value of the middle point of the array) without an array as a reference. Through comparison and exchange, the left side of the intermediate quantity of the array is always not greater than the intermediate quantity and the right side is always not less than the intermediate quantity, that is, the state of relative order, and then recursively sort the conditions on both sides, So that each part is relatively orderly to ensure the overall order in turn.

When the intermediate value is poor or even extreme, the binary sorting will degenerate into bubble sorting. The sorting scheme is not stable, but it can be used in a small array because it does not need additional space for storage.

func binary(arr *[]interface{},l,r int, cmp Comparator) {
    //Pre sort the current part so that both sides are greater than or less than the middle value
    if l >= r {
        return
    }
    m := (*arr)[(r + l) / 2]
    i, j := l-1, r+1
    for i < j {
        i++
        for cmp((*arr)[i], m) < 0 {
            i++
        }
        j--
        for cmp((*arr)[j],m) > 0 {
            j--
        }
        if i < j {
            (*arr)[i],(*arr)[j]=(*arr)[j],(*arr)[i]
        }
    }
    //Perform iterative binary sorting on the divided two sides
    binary(arr,l,j, cmp)
    binary(arr,j+1,r, cmp)
}

Merge

For merge sort, the principle is to divide an array into left and right, then sort from the smallest part (generally there are only two or one element), and then merge up in turn. Because the interior of the merged two small arrays is orderly, you only need to traverse and compare their sizes in turn. At the same time, You need a temporary array to store the comparison results, and then put the values in the temporary array into the array to be sorted, and merge them into the whole array in turn to ensure its order.

Because the scheme gradually splits the array into the smallest cells for merging, the situation must be relatively stable. Although it needs some additional space for storage, it is also worth the space cost compared with its stability.

func merge(arr *[]interface{},l,r int, cmp Comparator) {
    //Group and sort the current part, and divide the part into two parts for comparison and sorting
    if l >= r {
        return
    }
    m := (r + l) / 2
    //Dichotomize the contents to be sorted
    merge(arr,l,m, cmp)
    merge(arr,m+1,r, cmp)
    //After dichotomy, compare and merge in turn
    i, j := l, m+1
    var tmp []interface{}=make([]interface{},0,r-l+1)
    for i <= m && j <= r {
        if cmp((*arr)[i], (*arr)[j]) <= 0 {
            tmp = append(tmp, (*arr)[i])
            i++
        } else {
            tmp = append(tmp, (*arr)[j])
            j++
        }
    }
    //When one party ends the comparison, add all the remaining contents of the other party
    for ; i <= m; i++ {
        tmp = append(tmp, (*arr)[i])
    }
    for ; j <= r; j++ {
        tmp = append(tmp, (*arr)[j])
    }
    //Place the local sort result in the iterator
    for i, j = l, 0; i <= r; i, j = i+1, j+1 {
        (*arr)[i]=tmp[j]
    }
}

Example

package main
​
import (
    "fmt"
    "github.com/hlccd/goSTL/utils/comparator"
)
​
func main() {
    var arr =make([]interface{},0,0)
    arr=append(arr,5)
    arr=append(arr,3)
    arr=append(arr,2)
    arr=append(arr,4)
    arr=append(arr,1)
    arr=append(arr,4)
    arr=append(arr,3)
    arr=append(arr,1)
    arr=append(arr,5)
    arr=append(arr,2)
    comparator.Sort(&arr)
    for i:=0;i< len(arr);i++{
        println(arr[i].(int))
    }
}

Search

For an ordered linear table, if you want to find an element from it, you can find it by dichotomy, that is, by comparing the size of the element and the median of the current interval, you can judge to remove the left or right interval, and then continue to compare until there is only one element left. At this time, you only need to compare whether the element and the element to be found are equal, If equal, the subscript of the element is returned. If unequal, the return of - 1 indicates that the element is not found.

First judge the passed in parameters to ensure that no error occurs during the search. After judging that I am finished, call the search function to search

func Search(arr *[]interface{}, e interface{}, Cmp ...Comparator) (idx int) {
    if arr==nil || (*arr)==nil || len((*arr)) == 0 {
        return
    }
    //Judge whether the comparison function is valid. If it is invalid, find the default comparator
    var cmp Comparator
    cmp = nil
    if len(Cmp) == 0 {
        cmp = GetCmp(e)
    } else {
        cmp = Cmp[0]
    }
    if cmp == nil {
        //If it is not the default type and no comparison function is passed in, it will end directly
        return -1
    }
    //Find start
    return search(arr, e, cmp)
}

search

Binary lookup function:

func search(arr *[]interface{}, e interface{}, cmp Comparator) (idx int) {
    //Find the element by binary search
    l, m, r := 0, (len((*arr))-1)/2, len((*arr))
    for l < r {
        m = (l + r) / 2
        if cmp((*arr)[m], e) < 0 {
            l = m + 1
        } else {
            r = m
        }
    }
    //Find end
    if (*arr)[l] == e {
        //The element exists, and the subscript is returned
        return l
    }
    //The element does not exist, return - 1
    return -1
}

Example

package main
​
import (
    "fmt"
    "github.com/hlccd/goSTL/utils/comparator"
)
​
func main() {
    var arr =make([]interface{},0,0)
    arr=append(arr,5)
    arr=append(arr,3)
    arr=append(arr,2)
    arr=append(arr,4)
    arr=append(arr,1)
    arr=append(arr,4)
    arr=append(arr,3)
    arr=append(arr,1)
    arr=append(arr,5)
    arr=append(arr,2)
    comparator.Sort(&arr)
    for i:=0;i< len(arr);i++{
        println(arr[i].(int))
    }
    fmt.Println("search:",comparator.Search(&arr,3))
}

NthElement

For the nth element to be found (the subscript starts from 0), you can use a similar method to bisection sorting. However, since you only need to put the nth element in the nth bit, you only need to sort the interval where the nth bit is located, that is, for the two intervals obtained after bisection using the intermediate value, you only need to bisect the interval containing n, The other interval can be ignored directly.

The result of the function will return the element located in N. the process is divided into two parts. The first part verifies its executable status, that is, whether the pointer is nil, whether the array is nil, and whether n exceeds the range of the array. If similar situations occur, it can directly return nil. Otherwise, the array is finite sorted.

func NthElement(arr *[]interface{}, n int, Cmp ...Comparator) (value interface{}){
    if arr==nil || (*arr)==nil || len((*arr)) == 0 {
        return nil
    }
    //Judge whether the comparison function is valid
    var cmp Comparator
    cmp = nil
    if len(Cmp) > 0 {
        cmp = Cmp[0]
    } else {
        cmp = GetCmp((*arr)[0])
    }
    if cmp == nil {
        return nil
    }
    //Judge whether the nth bit to be confirmed is within the range of the set
    if len((*arr)) < n || n<0 {
        return nil
    }
    //Find
    nthElement(arr,0,len((*arr))-1, n, cmp)
    return (*arr)[n]
}

nthElement

The implementation part does not return any value, but only performs limited sorting, that is, only sorts the interval containing n.

func nthElement(arr *[]interface{},l,r int, n int, cmp Comparator){
    //Bisect the area and pre sort it
    if l >= r {
        return
    }
    m := (*arr)[(r + l) / 2]
    i, j := l-1, r+1
    for i < j {
        i++
        for cmp((*arr)[i], m) < 0 {
            i++
        }
        j--
        for cmp((*arr)[j], m) > 0 {
            j--
        }
        if i < j {
            (*arr)[i],(*arr)[j]=(*arr)[j],(*arr)[i]
        }
    }
    //Confirm the range of the nth bit for local bisection
    if n-1 >= i {
        nthElement(arr,j+1,r, n, cmp)
    } else {
        nthElement(arr,l,j, n, cmp)
    }
}

Example

package main
​
import (
    "fmt"
    "github.com/hlccd/goSTL/utils/comparator"
)
​
func main() {
    var arr =make([]interface{},0,0)
    arr=append(arr,5)
    arr=append(arr,3)
    arr=append(arr,2)
    arr=append(arr,4)
    arr=append(arr,1)
    arr=append(arr,4)
    arr=append(arr,3)
    arr=append(arr,1)
    arr=append(arr,5)
    arr=append(arr,2)
    for i:=0;i< len(arr);i++{
        fmt.Println("n:",comparator.NthElement(&arr,i))
    }
}

Bound

For a group of ordered linear tables, its upper and lower bounds can be obtained by binary search. When the element to be searched does not exist in the linear table, the returned previous value is less than its maximum value and the returned next value is greater than its minimum value. When an element exists in a linear table, the upper bound returned is the rightmost subscript of the element, and the next bound is the leftmost subscript of the element

The search method is the deformation of binary search and returns the boundary of the search value.

UpperBound

func UpperBound(arr *[]interface{}, e interface{}, Cmp ...Comparator) (idx int) {
    if arr==nil || (*arr)==nil || len((*arr)) == 0 {
        return -1
    }
    //Judge whether the comparison function is valid
    var cmp Comparator
    cmp = nil
    if len(Cmp) == 0 {
        cmp = GetCmp(e)
    } else {
        cmp = Cmp[0]
    }
    if cmp == nil {
        return -1
    }
    //Find the upper bound of the element
    return upperBound(arr, e, cmp)
}

upperBound

func upperBound(arr *[]interface{}, e interface{}, cmp Comparator) (idx int) {
    l, m, r := 0, len((*arr)) / 2, len((*arr))-1
    for l < r {
        m = (l + r + 1) / 2
        if cmp((*arr)[m], e) <= 0 {
            l = m
        } else {
            r = m - 1
        }
    }
    return l
}

LowerBound

func LowerBound(arr *[]interface{}, e interface{}, Cmp ...Comparator) (idx int) {
    if arr==nil || (*arr)==nil || len((*arr)) == 0 {
        return -1
    }
    //Judge whether the comparison function is valid
    var cmp Comparator
    cmp = nil
    if len(Cmp) == 0 {
        cmp = GetCmp(e)
    } else {
        cmp = Cmp[0]
    }
    if cmp == nil {
        return -1
    }
    //Find the lower bound of the element
    return lowerBound(arr, e, cmp)
}

lowerBound

func lowerBound(arr *[]interface{}, e interface{}, cmp Comparator) (idx int) {
    l, m, r := 0, len((*arr)) / 2, len((*arr))
    for l < r {
        m = (l + r) / 2
        if cmp((*arr)[m], e) >= 0 {
            r = m
        } else {
            l = m + 1
        }
    }
    return l
}

Example

package main
​
import (
    "fmt"
    "github.com/hlccd/goSTL/utils/comparator"
)
​
func main() {
    var arr =make([]interface{},0,0)
    arr=append(arr,5)
    arr=append(arr,3)
    arr=append(arr,2)
    arr=append(arr,4)
    arr=append(arr,1)
    arr=append(arr,4)
    arr=append(arr,3)
    arr=append(arr,1)
    arr=append(arr,5)
    arr=append(arr,2)
    comparator.Sort(&arr)
    for i:=0;i< len(arr);i++{
        fmt.Println(i,"=",arr[i])
    }
    fmt.Println("\n\n\n")
    for i:=0;i< len(arr);i++{
        fmt.Println(i)
        fmt.Println("upper:",comparator.UpperBound(&arr,i))
        fmt.Println("lower:",comparator.LowerBound(&arr,i))
        fmt.Println()
    }
}

Posted by Richter on Mon, 18 Oct 2021 15:48:26 -0700