/** * Create symbol table from string array * * @param symbolStrings Array of symbol strings * @returns Symbol table buffer and lengths */ export function createSymbolTable(symbolStrings) { const textEncoder = new TextEncoder(); const symbolBuffers = symbolStrings.map((s) => textEncoder.encode(s)); const symbolLengths = new Uint32Array(symbolBuffers.map((b) => b.length)); const totalLength = symbolBuffers.reduce((sum, b) => sum + b.length, 0); const symbols = new Uint8Array(totalLength); let offset = 0; for (const buffer of symbolBuffers) { symbols.set(buffer, offset); offset += buffer.length; } return { symbols, symbolLengths }; } /** * Encode data using FSST compression with pre-defined symbol table * Encoder requires pre-defined symbol table. Real FSST learns optimal symbols from data. This * implementation is for testing decoder only. * * @param symbols Array of symbols, where each symbol can be between 1 and 8 bytes * @param symbolLengths Array of symbol lengths, length of each symbol in symbols array * @param uncompressedData Data to compress * @returns FSST compressed data, where each entry is an index to the symbols array */ export function encodeFsst(symbols, symbolLengths, uncompressedData) { if (uncompressedData.length === 0) { return new Uint8Array(0); } // Calculate symbol offsets (cumulative sum of lengths) const symbolOffsets = new Array(symbolLengths.length).fill(0); for (let i = 1; i < symbolLengths.length; i++) { symbolOffsets[i] = symbolOffsets[i - 1] + symbolLengths[i - 1]; } const result = []; let pos = 0; while (pos < uncompressedData.length) { let bestSymbolIndex = -1; let bestSymbolLength = 0; // Try to find longest matching symbol at current position for (let symbolIndex = 0; symbolIndex < symbolLengths.length; symbolIndex++) { const symbolLength = symbolLengths[symbolIndex]; const symbolOffset = symbolOffsets[symbolIndex]; // Check if symbol could fit and is longer than current best if (pos + symbolLength <= uncompressedData.length && symbolLength > bestSymbolLength) { // Check if bytes match let matches = true; for (let i = 0; i < symbolLength; i++) { if (symbols[symbolOffset + i] !== uncompressedData[pos + i]) { matches = false; break; } } if (matches) { bestSymbolIndex = symbolIndex; bestSymbolLength = symbolLength; } } } if (bestSymbolIndex !== -1) { // Found a matching symbol result.push(bestSymbolIndex); pos += bestSymbolLength; } else { // No match - emit escape sequence (255 followed by literal byte) result.push(255); result.push(uncompressedData[pos]); pos++; } } return new Uint8Array(result); } //# sourceMappingURL=fsstEncoder.js.map