deno.land / std@0.224.0 / csv / parse.ts
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.// This module is browser compatible.
import { convertRowToObject, ERR_BARE_QUOTE, ERR_FIELD_COUNT, ERR_INVALID_DELIM, ERR_QUOTE, ParseError, type ParseResult, type ReadOptions, type RecordWithColumn,} from "./_io.ts";import { assert } from "../assert/assert.ts";
export { ParseError, type ParseResult, type ReadOptions, type RecordWithColumn,};
const BYTE_ORDER_MARK = "\ufeff";
class Parser { #input = ""; #cursor = 0; #options: { separator: string; trimLeadingSpace: boolean; comment?: string; lazyQuotes?: boolean; fieldsPerRecord?: number; }; constructor({ separator = ",", trimLeadingSpace = false, comment, lazyQuotes, fieldsPerRecord, }: ReadOptions = {}) { this.#options = { separator, trimLeadingSpace, comment, lazyQuotes, fieldsPerRecord, }; } #readLine(): string | null { if (this.#isEOF()) return null;
if ( !this.#input.startsWith("\r\n", this.#cursor) || !this.#input.startsWith("\n", this.#cursor) ) { let buffer = ""; let hadNewline = false; while (this.#cursor < this.#input.length) { if (this.#input.startsWith("\r\n", this.#cursor)) { hadNewline = true; this.#cursor += 2; break; } if ( this.#input.startsWith("\n", this.#cursor) ) { hadNewline = true; this.#cursor += 1; break; } buffer += this.#input[this.#cursor]; this.#cursor += 1; } if (!hadNewline && buffer.endsWith("\r")) { buffer = buffer.slice(0, -1); }
return buffer; } return null; } #isEOF(): boolean { return this.#cursor >= this.#input.length; } #parseRecord(startLine: number): string[] | null { let line = this.#readLine(); if (line === null) return null; if (line.length === 0) { return []; }
function runeCount(s: string): number { // Array.from considers the surrogate pair. return Array.from(s).length; }
let lineIndex = startLine + 1;
// line starting with comment character is ignored if (this.#options.comment && line[0] === this.#options.comment) { return []; }
let fullLine = line; let quoteError: ParseError | null = null; const quote = '"'; const quoteLen = quote.length; const separatorLen = this.#options.separator.length; let recordBuffer = ""; const fieldIndexes = [] as number[]; parseField: for (;;) { if (this.#options.trimLeadingSpace) { line = line.trimStart(); }
if (line.length === 0 || !line.startsWith(quote)) { // Non-quoted string field const i = line.indexOf(this.#options.separator); let field = line; if (i >= 0) { field = field.substring(0, i); } // Check to make sure a quote does not appear in field. if (!this.#options.lazyQuotes) { const j = field.indexOf(quote); if (j >= 0) { const col = runeCount( fullLine.slice(0, fullLine.length - line.slice(j).length), ); quoteError = new ParseError( startLine + 1, lineIndex, col, ERR_BARE_QUOTE, ); break parseField; } } recordBuffer += field; fieldIndexes.push(recordBuffer.length); if (i >= 0) { line = line.substring(i + separatorLen); continue parseField; } break parseField; } else { // Quoted string field line = line.substring(quoteLen); for (;;) { const i = line.indexOf(quote); if (i >= 0) { // Hit next quote. recordBuffer += line.substring(0, i); line = line.substring(i + quoteLen); if (line.startsWith(quote)) { // `""` sequence (append quote). recordBuffer += quote; line = line.substring(quoteLen); } else if (line.startsWith(this.#options.separator)) { // `","` sequence (end of field). line = line.substring(separatorLen); fieldIndexes.push(recordBuffer.length); continue parseField; } else if (0 === line.length) { // `"\n` sequence (end of line). fieldIndexes.push(recordBuffer.length); break parseField; } else if (this.#options.lazyQuotes) { // `"` sequence (bare quote). recordBuffer += quote; } else { // `"*` sequence (invalid non-escaped quote). const col = runeCount( fullLine.slice(0, fullLine.length - line.length - quoteLen), ); quoteError = new ParseError( startLine + 1, lineIndex, col, ERR_QUOTE, ); break parseField; } } else if (line.length > 0 || !(this.#isEOF())) { // Hit end of line (copy all data so far). recordBuffer += line; const r = this.#readLine(); lineIndex++; line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go. fullLine = line; if (r === null) { // Abrupt end of file (EOF or error). if (!this.#options.lazyQuotes) { const col = runeCount(fullLine); quoteError = new ParseError( startLine + 1, lineIndex, col, ERR_QUOTE, ); break parseField; } fieldIndexes.push(recordBuffer.length); break parseField; } recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.) } else { // Abrupt end of file (EOF on error). if (!this.#options.lazyQuotes) { const col = runeCount(fullLine); quoteError = new ParseError( startLine + 1, lineIndex, col, ERR_QUOTE, ); break parseField; } fieldIndexes.push(recordBuffer.length); break parseField; } } } } if (quoteError) { throw quoteError; } const result = [] as string[]; let preIdx = 0; for (const i of fieldIndexes) { result.push(recordBuffer.slice(preIdx, i)); preIdx = i; } return result; } parse(input: string): string[][] { this.#input = input.startsWith(BYTE_ORDER_MARK) ? input.slice(1) : input; this.#cursor = 0; const result: string[][] = []; let _nbFields: number | undefined; let lineResult: string[]; let first = true; let lineIndex = 0;
const INVALID_RUNE = ["\r", "\n", '"'];
const options = this.#options; if ( INVALID_RUNE.includes(options.separator) || (typeof options.comment === "string" && INVALID_RUNE.includes(options.comment)) || options.separator === options.comment ) { throw new Error(ERR_INVALID_DELIM); }
for (;;) { const r = this.#parseRecord(lineIndex); if (r === null) break; lineResult = r; lineIndex++; // If fieldsPerRecord is 0, Read sets it to // the number of fields in the first record if (first) { first = false; if (options.fieldsPerRecord !== undefined) { if (options.fieldsPerRecord === 0) { _nbFields = lineResult.length; } else { _nbFields = options.fieldsPerRecord; } } }
if (lineResult.length > 0) { if (_nbFields && _nbFields !== lineResult.length) { throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT); } result.push(lineResult); } } return result; }}
/** Options for {@linkcode parse}. */export interface ParseOptions extends ReadOptions { /** * If you provide `skipFirstRow: true` and `columns`, the first line will be * skipped. * If you provide `skipFirstRow: true` but not `columns`, the first line will * be skipped and used as header definitions. */ skipFirstRow?: boolean;
/** List of names used for header definition. */ columns?: readonly string[];}
/** * Csv parse helper to manipulate data. * Provides an auto/custom mapper for columns. * * @example * ```ts * import { parse } from "https://deno.land/std@$STD_VERSION/csv/parse.ts"; * const string = "a,b,c\nd,e,f"; * * console.log( * await parse(string, { * skipFirstRow: false, * }), * ); * // output: * // [["a", "b", "c"], ["d", "e", "f"]] * ``` * * @param input Input to parse. * @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`. * If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record<string, unknown>[]`. */export function parse(input: string): string[][];/** * Csv parse helper to manipulate data. * Provides an auto/custom mapper for columns. * * @example * ```ts * import { parse } from "https://deno.land/std@$STD_VERSION/csv/parse.ts"; * const string = "a,b,c\nd,e,f"; * * console.log( * await parse(string, { * skipFirstRow: false, * }), * ); * // output: * // [["a", "b", "c"], ["d", "e", "f"]] * ``` * * @param input Input to parse. * @param opt options of the parser. * @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`. * If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record<string, unknown>[]`. */export function parse<const T extends ParseOptions>( input: string, opt: T,): ParseResult<ParseOptions, T>;export function parse<const T extends ParseOptions>( input: string, opt: T = { skipFirstRow: false } as T,): ParseResult<ParseOptions, T> { const parser = new Parser(opt); const r = parser.parse(input);
if (opt.skipFirstRow || opt.columns) { let headers: readonly string[] = [];
if (opt.skipFirstRow) { const head = r.shift(); assert(head !== undefined); headers = head; }
if (opt.columns) { headers = opt.columns; }
const firstLineIndex = opt.skipFirstRow ? 1 : 0; return r.map((row, i) => { return convertRowToObject(row, headers, firstLineIndex + i); }) as ParseResult<ParseOptions, T>; } return r as ParseResult<ParseOptions, T>;}
Version Info