/**
## /utils/csvUpload
This module exports the default csvUpload method for mapp utils module.
This utility supplies a way to insert data into a table from a csv file.
The function takes a File object and reads the contents.
The data is sanatised (see `splitRowIntoFields`) and then passed to the supplied `query` param.
```json
{
query: "sql_table_insert"
queryparams: {
table: "scratch"
}
layer: {}
fields: {
"numeric_field": "numeric",
"char_field": "text"
}
}
- layer: the layer on which the csv upload is being done.
- queryparams.table: the table being inserted on.
- fields: this describes the fields being inserted, the name of the field is used as the key and the type is the value of the key
The function transforms the read data into an object as below:
```json
{
"numeric_field::numeric":[
1,2,3
],
"char_field::text":[
'a','b','c'
]
}
```
This data is passed to the relevant insert query (defaults to `sql_table_insert`).
Additionally files exceeding the lambda upload limit (6mb) will be chunked into parts and inserted separately.
@module /utils/csvUpload
*/
/**
@typedef {Object} schemaMethods
schemaMethods are used to convert the field values to the correct type for the database.
The methods are used in the chunkRows method to convert the field values to the correct type for the database.
@property {function} text Replace quotes in string values.
@property {function} int Parse value as integer.
@property {function} numeric Parse value as float.
*/
const schemaMethods = {
text: (val) => `${val.replace(/'/g, "''")}`,
int: (val) => parseInt(val.replace(/[^\d.-]/g, '')) || 'NULL',
numeric: (val) => parseFloat(val.replace(/[^\d.-]/g, '')) || 'NULL',
};
/**
@function csvUpload
@async
@description
This function uploads a CSV file to a database store.
@param {File} file The CSV file to upload.
@param {Object} params The parameters object.
@property {String} params.query The query to upload the data.
@property {Object} params.fields The definition of the columns being inserted e.g. {field: field_type}.
@property {layer} params.layer The layer being inserted on.
@property {layer} params.queryparams.table The table being inserted into.
@property {Boolean} [params.async] If set to true, the data is uploaded asynchronously. Default is false.
@property {Boolean} [params.header] If set to true, the first row is treated as a header row and not uploaded.
@returns {Promise<Object|Error>} The outcome object and any messages returned by the database.
*/
export default async function csvUpload(file, params = {}) {
if (!params.query) {
console.warn(`Query required`);
return;
}
if (!params.fields) {
console.warn(`Field defintions are required for insert`);
return;
}
if (!params.layer) {
console.warn(`A layer must be supplied for insert`);
return;
}
if (!params.queryparams.table) {
console.warn(`A table must be supplied for insert`);
return;
}
return new Promise((resolve) => {
const reader = new FileReader();
reader.readAsText(file);
reader.onload = async (e) => {
// Split text file into rows on carriage return / new line.
const rows = e.target.result.trim().split(/\r?\n/);
// The file has a header row.
if (params.header) rows.shift();
const chunks = chunkRows(rows, params);
let responses = [];
// If async = true, then we need to wait for all promises to resolve before returning the outcome object.
if (params.async) {
responses = await Promise.all(
chunks.map((chunk) => postRequestData(chunk, params)),
);
}
// If async = false, run them synchhronously.
else {
for (const chunk of chunks) {
const response = await postRequestData(chunk, params);
responses.push(response);
}
}
resolve(responses);
};
});
}
/**
@function chunkRows
@description
The method iterates over each row in the rows param. And returns an array of chunks according to max payload size for the POST request.
@param {array} rows An array of rows extracted from the CSV file.
@param {object} params Parameter for chunking the rows array.
@property {Object} params.fields The definition of the columns being inserted e.g. {field: field_type}.
@property {Number} [params.chunkSize] The chunk size in bytes to upload the data in. Defaults to 4MB.
@property {Array} [params.schema] The schema array of column types.
@returns {Array} An array of field arrays chunked to size.
*/
function chunkRows(rows, params) {
const chunks = [];
// Set default chunksize in bytes.
params.chunkSize ??= 4000000;
let chunkSize = 0;
let chunk = {};
rows.forEach((row) => {
// Split row into fields array.
const fields = splitRowIntoFields(row);
for (const indx in fields) {
const field = Object.keys(params.fields)[indx];
const type = params.fields[field];
//assign field definition to its type with a `::` separator e.g. field::field_type
chunk[`${field}::${type}`] ??= [];
chunk[`${field}::${type}`].push(schemaMethods[type](fields[indx]));
// Determine blob size in bytes for the chunk.
const rowSize = new Blob([chunk]).size;
// Check whether the chunk would exceed lambda payload limit.
if (chunkSize + rowSize >= params.chunkSize) {
chunks.push(structuredClone(chunk));
// Create a new current chunk.
chunk = {};
chunkSize = 0;
}
// Add row to current chunk and sum size.
chunkSize += rowSize;
}
});
// Push the last chunk if it exists.
if (Object.keys(chunk).length) chunks.push(chunk);
return chunks;
}
/**
@function splitRowIntoFields
@description
The method splits a CSV row string into fields.
@param {String} row The row to split.
@returns {Array} The fields array.
*/
function splitRowIntoFields(row) {
// Create an array to store the fields.
const fields = [];
// Create a variable to store the current field.
let currentField = '';
let insideQuotes = false;
// Loop through each character in the row.
for (const character of row) {
// This if statement checks if the character is a double quote.
if (character === '"') {
// If the character is a double quote, we toggle the insideQuotes variable, which will be used to determine if we are inside a quote.
insideQuotes = !insideQuotes;
}
// If the character is a comma and we are not inside quotes, we push the current field to the fields array.
else if (character === ',' && !insideQuotes) {
// Push current field to fields array
fields.push(currentField.trim());
// Reset for the next field
currentField = '';
} else {
// Add character to current field
currentField += character;
}
}
// Push the last field (if any) after the loop to the fields array.
if (currentField) {
fields.push(currentField.trim());
}
return fields;
}
/**
@function postRequestData
@description
The method returns a parameterised query object from the XHR utility method.
@param {Array} data Data for the post request body.
@param {Object} params Parameters for the post query.
@returns {Promise<Object|Error>} Returns promise from XHR query utility.
*/
function postRequestData(data, params) {
params.queryparams ??= {};
const queryParams = mapp.utils.queryParams(params);
const paramString = mapp.utils.paramString(queryParams);
const url = `${mapp.host}/api/query?${paramString}`;
return mapp.utils.xhr({
method: 'POST',
url,
body: JSON.stringify(data),
});
}