Wrangling Javascript DataFrames using Data-Forge
With tax season nearby, this script can help you itemize your expenses. First, download your bank transactions for the year and inspect the column names. For this example, we will only need three columns, "date", "amount", "category".
Do not convert your file to Excel. Keep it CSV.
'use strict;'
const fs = require('fs');
const util = require('util');
const path = require('path');
// Convert CSV into DataFrames
const dataForge = require('data-forge');
require('data-forge-fs'); // For readFile/writeFile.
// https://www.npmjs.com/package/moment-timezone
const moment = require('moment-timezone');
// Convert string numbers into accounting format
const accounting = require('accounting');
const DATA = {
file: {
input: './data/Transactions.csv',
output: './data/new.csv'
},
columns: {
required: {
date: "date",
amount: "amount",
category: "category"
}
}
}
//A. Synchronously import CSV file
dataForge.readFile(DATA.file.input)
//B. Parse CSV data
.parseCSV()
//C. Modify DataFrame more
.then(df => {
// Identify which colums are not important and drop them
let columnsToDrop = (columns) => {
//i. Identify which columns are necessary for this project
let requiredColumns = Object.values(columns);
//ii. Get the available columns within the dataFrame
let columnsArr = df.getColumnNames()
//iii. Delete any column that is not Required
.filter(column => !requiredColumns.includes(column))
return columnsArr;
}
let dfModified = df
//a. Drop non-required columns
.dropSeries(columnsToDrop(DATA.columns.required))
//b. Change the Date formatting. This is more reliable than .parseDate("Date", "MM-YYYY")
.transformSeries({ date: value => DateUtils.parseDate(value) })
//c. Create new columns with modified values
.generateSeries({
//i. Strip out the string text formatting and create a floating number
amount_fpn: row => new AccountingUtils()
//ii. Strip out the $()
.stripFormatting(row["amount"])
//iii. Only surface negative numbers. I'm not tracking income
.filterAmount()
})
//d. Organize by Category then Month. Then Sum up the expenses
.pivot(["category", "date"], "amount_fpn", series => series.sum())
return dfModified;
})
.then(async dfEnriched => {
//a. Write to Disk
try {
FileUtils.writeToCSV(dfEnriched)
} catch(WriteError){
console.log("WriteError")
}
})
.catch(err => {
console.error("err", err && err.stack || err);
});
class FileUtils {
static writeToCSV = async (df) => {
await df
.asCSV()
.writeFile(DATA.file.output)
.then( () => {
console.log("CSV Created.")
})
.catch(IPError => {
console.error("IPError:", IPError)
})
}
}
class AccountingUtils {
constructor(){
this.value = 0
}
stripFormatting = (value) => {
this.value = accounting.unformat(value)
return this
}
filterAmount = (value) => {
//i. Identify if transaction was a Debit (-) or a Credit (+)
const isPositive = Math.sign(this.value);
//ii. We only want Debits (-) not the Credits (+)
// Just convert all positive numbers to 0
if( isPositive == 1 ) return 0.00;
else return this.value;
}
}
class DateUtils {
static toUTC = (momentObj) => {
return momentObj.utc().format();
}
static toLocal = (momentObj) => {
return momentObj.tz('America/Los_Angeles').format('MMMM Do YYYY, h:mm:ss a')
}
static parseDate(date) {
return moment(new Date(date)).format("MM-YYYY");
}
}
Appendix
- The Bible on Data-Forge
- Javascript for Data Science isn't exclusive to Data-Forge but the examples on how to work with DataFrames is excellent.
- The Data Wrangler has a lot of interesting examples.
- Pivot Examples are buried deep within Github.