TiDB 基本数据类型
Datum
// Datum is a data box holds different kind of data.
// It has better performance and is easier to use than `interface{}`.
type Datum struct {
k byte // datum kind.
decimal uint16 // decimal can hold uint16 values.
length uint32 // length can hold uint32 values.
i int64 // i can hold int64 uint64 float64 values.
collation string // collation hold the collation information for string value.
b []byte // b can hold string or []byte values.
x interface{} // x hold all other types.
}
const (
KindNull byte = 0
KindInt64 byte = 1
KindUint64 byte = 2
KindFloat32 byte = 3
KindFloat64 byte = 4
KindString byte = 5
KindBytes byte = 6
KindBinaryLiteral byte = 7 // Used for BIT / HEX literals.
KindMysqlDecimal byte = 8
KindMysqlDuration byte = 9
KindMysqlEnum byte = 10
KindMysqlBit byte = 11 // Used for BIT table column values.
KindMysqlSet byte = 12
KindMysqlTime byte = 13
KindInterface byte = 14
KindMinNotNull byte = 15
KindMaxValue byte = 16
KindRaw byte = 17
KindMysqlJSON byte = 18
)
Chunk
// Chunk stores multiple rows of data in Apache Arrow format.
// See https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout
// Values are appended in compact format and can be directly accessed without decoding.
// When the chunk is done processing, we can reuse the allocated memory by resetting it.
type Chunk struct {
// sel indicates which rows are selected.
// If it is nil, all rows are selected.
sel []int
columns []*Column
// numVirtualRows indicates the number of virtual rows, which have zero Column.
// It is used only when this Chunk doesn't hold any data, i.e. "len(columns)==0".
numVirtualRows int
// capacity indicates the max number of rows this chunk can hold.
// TODO: replace all usages of capacity to requiredRows and remove this field
capacity int
// requiredRows indicates how many rows the parent executor want.
requiredRows int
}
Column
Column offsets[i]表示第i个元素在data中的偏移, data和elmBuf分别作用是啥? elmBuf用来临时append value,将value转为[]byte,然后再append到data上 fixed sized的offsfet数组就不用了,可以直接算出来,这样就省下了offset这个数组.
// Column stores one column of data in Apache Arrow format.
// See https://arrow.apache.org/docs/memory_layout.html
type Column struct {
length int
nullBitmap []byte // bit 0 is null, 1 is not null
offsets []int64
data []byte
elemBuf []byte
}
Row
// Row represents a row of data, can be used to access values.
type Row struct {
c *Chunk
idx int
}
Range
// Range represents a range generated in physical plan building phase.
type Range struct {
LowVal []types.Datum
HighVal []types.Datum
LowExclude bool // Low value is exclusive.
HighExclude bool // High value is exclusive.
}
RowContainer
// RowContainer provides a place for many rows, so many that we might want to spill them into disk.
type RowContainer struct {
m struct {
// RWMutex guarantees spill and get operator for rowContainer is mutually exclusive.
sync.RWMutex
// records stores the chunks in memory.
records *List
// recordsInDisk stores the chunks in disk.
recordsInDisk *ListInDisk
// spillError stores the error when spilling.
spillError error
}
fieldType []*types.FieldType
chunkSize int
numRow int
memTracker *memory.Tracker
diskTracker *disk.Tracker
actionSpill *SpillDiskAction
}