import { fuzzySelect } from "./fuzzySearch";

interface CSVParseOptions {
    knownHeaders?: string[],
    rules?: Record<string, (string) => any>,
    requiredHeaders?: string[],
}

export function parseCSV(csvString: string, options: CSVParseOptions) {
    const rows = getRowsFromCSVData(csvString);
    // const lines = csvString.split('\n');

    if (rows.length < 2) {
        return null;
    }

    const items = [];
    const rawHeaders = rows[0];
    let headers = [];
    if (options.knownHeaders) {
        // Fuzzy match from known headers
        for (const header of rawHeaders) {
            const bestFit = fuzzySelect(header, options.knownHeaders);
            if (!bestFit) {
                throw new Error(`Unknown header: '${header}'. Valid headers should resemble the following:\n${options.knownHeaders.join("\n")}`);        
            }
            
            headers.push(bestFit);
        }
    } else {
        // Use whatever's provided
        headers = rawHeaders;
    }

    if (options.requiredHeaders) {
        for (const header of options.requiredHeaders) {
            if (!headers.includes(header)) {
                throw new Error(`Missing required header: '${header}'`); 
            }
        }
    }

    for (let i = 1; i < rows.length; i++) {
        const parts = rows[i];
        if (parts.length !== headers.length) {
            throw new Error(`Invalid CSV file; line ${i + 1} does not match number of headers (${parts.length} vs ${headers.length})`);
        }

        const item = {};
        for (let j = 0; j < headers.length; j++) {
            const key = headers[j];
            let value = parts[j];
            if (options.rules && options.rules[key]) {
                value = options.rules[key](value);
            }

            item[key] = value;
        }

        items.push(item);
    }

    return items;
}

/**
 * Based on https://stackoverflow.com/a/14991797, this properly parses CSV
 * entries, properly performing escapes for quotes and newlines (of all types).
 * 
 * Returns the a 2D array consisting of the rows and columns of the data.
 * @param str The CSV string
 * @returns the CSV data, per row and column
 */
function getRowsFromCSVData(str: string): string[][] {
    var arr: string[][] = [];

    // 'true' means we're inside a quoted field
    var quote = false;

    // Iterate over each character, keep track of current row and column (of the returned array)
    for (var row = 0, col = 0, c = 0; c < str.length; c++) {
        var cc = str[c], nc = str[c+1];        
        arr[row] = arr[row] || [];             
        arr[row][col] = arr[row][col] || '';

        // If the current character is a quotation mark, and we're inside a
        // quoted field, and the next character is also a quotation mark,
        // add a quotation mark to the current column and skip the next character
        if (cc === '"' && quote && nc === '"') { arr[row][col] += cc; ++c; continue; }

        // If it's just one quotation mark, begin/end quoted field
        if (cc === '"') { quote = !quote; continue; }

        // If it's a comma and we're not in a quoted field, move on to the next column
        if (cc === ',' && !quote) { ++col; continue; }

        // If it's a newline (CRLF) and we're not in a quoted field, skip the next character
        // and move on to the next row and move to column 0 of that new row
        if (cc === '\r' && nc === '\n' && !quote) { ++row; col = 0; ++c; continue; }

        // If it's a newline (LF or CR) and we're not in a quoted field,
        // move on to the next row and move to column 0 of that new row
        if (cc === '\n' && !quote) { ++row; col = 0; continue; }
        if (cc === '\r' && !quote) { ++row; col = 0; continue; }

        // Otherwise, append the current character to the current column
        arr[row][col] += cc;
    }
    return arr;
}
