import { LogicalLevelTechnique } from "../metadata/tile/logicalLevelTechnique"; import { PhysicalLevelTechnique } from "../metadata/tile/physicalLevelTechnique"; import { PhysicalStreamType } from "../metadata/tile/physicalStreamType"; import { DictionaryType } from "../metadata/tile/dictionaryType"; import IntWrapper from "../decoding/intWrapper"; import { encodeBooleanRle, encodeFloatsLE, encodeDoubleLE } from "./encodingUtils"; import { encodeVarintInt32Value, encodeVarintInt32, encodeVarintInt64, encodeZigZagInt32Value, encodeZigZagInt64Value, encodeZigZagInt32, } from "./integerEncodingUtils"; /** * Encodes INT_32 values with NONE encoding (no delta, no RLE) */ export function encodeInt32NoneColumn(values) { const zigzagEncoded = encodeZigZagInt32(values); const encodedData = encodeVarintInt32(zigzagEncoded); const streamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, values.length); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes INT_32 values with DELTA encoding */ export function encodeInt32DeltaColumn(values) { // Delta encode: store deltas const deltaEncoded = new Int32Array(values.length); deltaEncoded[0] = values[0]; for (let i = 1; i < values.length; i++) { deltaEncoded[i] = values[i] - values[i - 1]; } const zigzagEncoded = encodeZigZagInt32(deltaEncoded); const encodedData = encodeVarintInt32(zigzagEncoded); const streamMetadata = createStreamMetadata(LogicalLevelTechnique.DELTA, LogicalLevelTechnique.NONE, values.length); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes INT_32 values with RLE encoding * @param runs - Array of [runLength, value] pairs */ export function encodeInt32RleColumn(runs) { const runLengths = []; const values = []; let totalValues = 0; for (const [runLength, value] of runs) { runLengths.push(runLength); values.push(encodeZigZagInt32Value(value)); totalValues += runLength; } const rleValues = [...runLengths, ...values]; const encodedData = encodeVarintInt32(new Uint32Array(rleValues)); const streamMetadata = createRleMetadata(LogicalLevelTechnique.RLE, LogicalLevelTechnique.NONE, runs.length, totalValues); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes INT_32 values with DELTA+RLE encoding * @param runs - Array of [runLength, deltaValue] pairs, where first value is the base */ export function encodeInt32DeltaRleColumn(runs) { const runLengths = []; const values = []; let totalValues = 0; for (const [runLength, value] of runs) { runLengths.push(runLength); values.push(encodeZigZagInt32Value(value)); totalValues += runLength; } const rleValues = [...runLengths, ...values]; const encodedData = encodeVarintInt32(new Uint32Array(rleValues)); const streamMetadata = createRleMetadata(LogicalLevelTechnique.DELTA, LogicalLevelTechnique.RLE, runs.length, totalValues); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes nullable INT_32 values */ export function encodeInt32NullableColumn(values) { const nonNullValues = values.filter((v) => v !== null); const zigzagEncoded = new Uint32Array(nonNullValues.map((v) => encodeZigZagInt32Value(v))); const encodedData = encodeVarintInt32(zigzagEncoded); const dataStreamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nonNullValues.length); const dataStream = buildEncodedStream(dataStreamMetadata, encodedData); // Nullability stream const nullabilityValues = values.map((v) => v !== null); const nullabilityEncoded = encodeBooleanRle(nullabilityValues); const nullabilityMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nullabilityValues.length); const nullabilityStream = buildEncodedStream(nullabilityMetadata, nullabilityEncoded); return concatenateBuffers(nullabilityStream, dataStream); } /** * Encodes UINT_32 values (no zigzag encoding) */ export function encodeUint32Column(values) { const encodedData = encodeVarintInt32(values); const streamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, values.length); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes INT_64 values with NONE encoding */ export function encodeInt64NoneColumn(values) { const zigzagEncoded = new BigUint64Array(Array.from(values, (val) => encodeZigZagInt64Value(val))); const encodedData = encodeVarintInt64(zigzagEncoded); const streamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, values.length); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes INT_64 values with DELTA encoding */ export function encodeInt64DeltaColumn(values) { const deltaEncoded = new BigInt64Array(values.length); deltaEncoded[0] = values[0]; for (let i = 1; i < values.length; i++) { deltaEncoded[i] = values[i] - values[i - 1]; } const zigzagEncoded = new BigUint64Array(deltaEncoded.length); for (let i = 0; i < deltaEncoded.length; i++) { zigzagEncoded[i] = encodeZigZagInt64Value(deltaEncoded[i]); } const encodedData = encodeVarintInt64(zigzagEncoded); const streamMetadata = createStreamMetadata(LogicalLevelTechnique.DELTA, LogicalLevelTechnique.NONE, values.length); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes INT_64 values with RLE encoding */ export function encodeInt64RleColumn(runs) { const runLengths = []; const values = []; let totalValues = 0; for (const [runLength, value] of runs) { runLengths.push(BigInt(runLength)); values.push(encodeZigZagInt64Value(value)); totalValues += runLength; } const rleValues = [...runLengths, ...values]; const encodedData = encodeVarintInt64(new BigUint64Array(rleValues)); const streamMetadata = createRleMetadata(LogicalLevelTechnique.RLE, LogicalLevelTechnique.NONE, runs.length, totalValues); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes INT_64 values with DELTA+RLE encoding */ export function encodeInt64DeltaRleColumn(runs) { const runLengths = []; const values = []; let totalValues = 0; for (const [runLength, value] of runs) { runLengths.push(BigInt(runLength)); values.push(encodeZigZagInt64Value(value)); totalValues += runLength; } const rleValues = [...runLengths, ...values]; const encodedData = encodeVarintInt64(new BigUint64Array(rleValues)); const streamMetadata = createRleMetadata(LogicalLevelTechnique.DELTA, LogicalLevelTechnique.RLE, runs.length, totalValues); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes nullable INT_64 values */ export function encodeInt64NullableColumn(values) { const nonNullValues = values.filter((v) => v !== null); const zigzagEncoded = new BigUint64Array(Array.from(nonNullValues, (val) => encodeZigZagInt64Value(val))); const encodedData = encodeVarintInt64(zigzagEncoded); const dataStreamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nonNullValues.length); const dataStream = buildEncodedStream(dataStreamMetadata, encodedData); const nullabilityValues = values.map((v) => v !== null); const nullabilityEncoded = encodeBooleanRle(nullabilityValues); const nullabilityMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nullabilityValues.length); const nullabilityStream = buildEncodedStream(nullabilityMetadata, nullabilityEncoded); return concatenateBuffers(nullabilityStream, dataStream); } /** * Encodes UINT_64 values (no zigzag encoding) */ export function encodeUint64Column(values) { const encodedData = encodeVarintInt64(values); const streamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, values.length); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes nullable UINT_64 values */ export function encodeUint64NullableColumn(values) { const nonNullValues = values.filter((v) => v !== null); const encodedData = encodeVarintInt64(new BigUint64Array(nonNullValues)); const dataStreamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nonNullValues.length); const dataStream = buildEncodedStream(dataStreamMetadata, encodedData); const nullabilityValues = values.map((v) => v !== null); const nullabilityEncoded = encodeBooleanRle(nullabilityValues); const nullabilityMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nullabilityValues.length); const nullabilityStream = buildEncodedStream(nullabilityMetadata, nullabilityEncoded); return concatenateBuffers(nullabilityStream, dataStream); } /** * Encodes FLOAT values */ export function encodeFloatColumn(values) { const encodedData = encodeFloatsLE(values); const streamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, values.length); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes nullable FLOAT values */ export function encodeFloatNullableColumn(values) { const nonNullValues = values.filter((v) => v !== null); const encodedData = encodeFloatsLE(new Float32Array(nonNullValues)); const dataStreamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nonNullValues.length); const dataStream = buildEncodedStream(dataStreamMetadata, encodedData); const nullabilityValues = values.map((v) => v !== null); const nullabilityEncoded = encodeBooleanRle(nullabilityValues); const nullabilityMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nullabilityValues.length); const nullabilityStream = buildEncodedStream(nullabilityMetadata, nullabilityEncoded); return concatenateBuffers(nullabilityStream, dataStream); } /** * Encodes DOUBLE values */ export function encodeDoubleColumn(values) { const encodedData = encodeDoubleLE(values); const streamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, values.length); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes nullable DOUBLE values */ export function encodeDoubleNullableColumn(values) { const nonNullValues = values.filter((v) => v !== null); const encodedData = encodeDoubleLE(new Float64Array(nonNullValues)); const dataStreamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nonNullValues.length); const dataStream = buildEncodedStream(dataStreamMetadata, encodedData); const nullabilityValues = values.map((v) => v !== null); const nullabilityEncoded = encodeBooleanRle(nullabilityValues); const nullabilityMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nullabilityValues.length); const nullabilityStream = buildEncodedStream(nullabilityMetadata, nullabilityEncoded); return concatenateBuffers(nullabilityStream, dataStream); } /** * Encodes BOOLEAN values */ export function encodeBooleanColumn(values) { const encodedData = encodeBooleanRle(values); const streamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, values.length); return buildEncodedStream(streamMetadata, encodedData); } /** * Encodes nullable BOOLEAN values */ export function encodeBooleanNullableColumn(values) { const nonNullValues = values.filter((v) => v !== null); const encodedData = encodeBooleanRle(nonNullValues); const dataStreamMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nonNullValues.length); const dataStream = buildEncodedStream(dataStreamMetadata, encodedData); const nullabilityValues = values.map((v) => v !== null); const nullabilityEncoded = encodeBooleanRle(nullabilityValues); const nullabilityMetadata = createStreamMetadata(LogicalLevelTechnique.NONE, LogicalLevelTechnique.NONE, nullabilityValues.length); const nullabilityStream = buildEncodedStream(nullabilityMetadata, nullabilityEncoded); return concatenateBuffers(nullabilityStream, dataStream); } function createStreamMetadata(logicalTechnique1, logicalTechnique2 = LogicalLevelTechnique.NONE, numValues = 3) { return { physicalStreamType: PhysicalStreamType.DATA, logicalStreamType: { dictionaryType: DictionaryType.NONE }, logicalLevelTechnique1: logicalTechnique1, logicalLevelTechnique2: logicalTechnique2, physicalLevelTechnique: PhysicalLevelTechnique.VARINT, numValues, byteLength: 10, decompressedCount: numValues, }; } function createRleMetadata(logicalTechnique1, logicalTechnique2, runs, numRleValues) { return { physicalStreamType: PhysicalStreamType.DATA, logicalStreamType: { dictionaryType: DictionaryType.NONE }, logicalLevelTechnique1: logicalTechnique1, logicalLevelTechnique2: logicalTechnique2, physicalLevelTechnique: PhysicalLevelTechnique.VARINT, numValues: runs * 2, byteLength: 10, decompressedCount: numRleValues, runs, numRleValues, }; } function buildEncodedStream(streamMetadata, encodedData) { const updatedMetadata = { ...streamMetadata, byteLength: encodedData.length, }; const metadataBuffer = encodeStreamMetadata(updatedMetadata); const result = new Uint8Array(metadataBuffer.length + encodedData.length); result.set(metadataBuffer, 0); result.set(encodedData, metadataBuffer.length); return result; } function encodeStreamMetadata(metadata) { const buffer = new Uint8Array(100); let writeOffset = 0; // Byte 1: Stream type const physicalTypeIndex = Object.values(PhysicalStreamType).indexOf(metadata.physicalStreamType); const lowerNibble = 0; // For DATA stream with NONE dictionary type buffer[writeOffset++] = (physicalTypeIndex << 4) | lowerNibble; // Byte 2: Encoding techniques const llt1Index = Object.values(LogicalLevelTechnique).indexOf(metadata.logicalLevelTechnique1); const llt2Index = Object.values(LogicalLevelTechnique).indexOf(metadata.logicalLevelTechnique2); const pltIndex = Object.values(PhysicalLevelTechnique).indexOf(metadata.physicalLevelTechnique); buffer[writeOffset++] = (llt1Index << 5) | (llt2Index << 2) | pltIndex; // Variable-length fields const offset = new IntWrapper(writeOffset); encodeVarintInt32Value(metadata.numValues, buffer, offset); encodeVarintInt32Value(metadata.byteLength, buffer, offset); // RLE-specific fields if (isRleMetadata(metadata)) { encodeVarintInt32Value(metadata.runs, buffer, offset); encodeVarintInt32Value(metadata.numRleValues, buffer, offset); } return buffer.slice(0, offset.get()); } function isRleMetadata(metadata) { return "runs" in metadata && "numRleValues" in metadata; } function concatenateBuffers(...buffers) { const totalLength = buffers.reduce((sum, buf) => sum + buf.length, 0); const result = new Uint8Array(totalLength); let offset = 0; for (const buffer of buffers) { result.set(buffer, offset); offset += buffer.length; } return result; } //# sourceMappingURL=propertyEncoder.js.map