Difference between revisions of "GncArchive.pl"
From GnuCash
(Created on behalf of User:Mhasling.) |
(updates for v5.24+ of perl, and don't crash on voided transactions) |
||
(3 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
− | + | <SyntaxHighlight lang="perl"> | |
+ | #!/usr/bin/perl -w | ||
+ | # Remove older transactions from the GnuCash XML to create a more | ||
+ | # manageable dataset. Creates new opening balance transactions for | ||
+ | # asset/liability accounts so that the latest and reconciled balances | ||
+ | # are maintained. | ||
+ | # Arguments are file name and date (remove through this date) | ||
+ | # After running, account balances will need to be adjusted. | ||
+ | # Two passes needed, first to figure out what to remove. The second | ||
+ | # to filter out the old transactions. | ||
+ | # Large chunks of this script were copied from gccull.pl from the | ||
+ | # GnuCash wiki, posted by Doug Pastorello. | ||
+ | |||
+ | use POSIX; | ||
+ | |||
+ | # Some syntax flags | ||
+ | my ($inAccount, $inTransaction, $curAccount, $curTransaction); | ||
+ | my ($inSplit, $curSplit); | ||
+ | # Hash of accounts. Key is act:id, value is account object (ref) | ||
+ | my (%accounts); | ||
+ | # Hash of transactions. Key is trn:id, value is transaction object (ref) | ||
+ | my (%transactions); | ||
+ | # Hash of splits. Key is split:id, value is split object (ref) | ||
+ | my (%splits); | ||
+ | # Hash of ISO4217 commodities. Key is ID, value is number of uses. | ||
+ | my (%commodities); | ||
+ | # Reference to root account object | ||
+ | my ($rootAccount); | ||
+ | # Equity account id used for generating balancing transactions | ||
+ | my ($equityAccountId, $equityActCommodity); | ||
+ | # List of deleted transaction GUIDs, available for reuse in balancing | ||
+ | # transactions | ||
+ | my (@guids); | ||
+ | # Misc global vars | ||
+ | my ($i, $buffer, $skip); | ||
+ | my (%actypes); | ||
+ | |||
+ | # Command line arguments | ||
+ | my ($gcFileName) = $ARGV[0]; # Name of the GnuCash XML file to process | ||
+ | my ($delDate) = $ARGV[1]; # Date to delete through | ||
+ | my ($archiveFileName, $postedDate, $enteredDate); | ||
+ | if ($delDate =~ m/^(\d\d\d\d)(\d\d)(\d\d)$/) { | ||
+ | $postedDate = "$1-$2-$3 00:00:00 +0000"; | ||
+ | } | ||
+ | else {die "Bad date format, required as YYYYMMDD\n "} | ||
+ | ($archiveFileName = $gcFileName) =~ s/^(.+)(\.\w+)$/$1-archive-$delDate$2/; | ||
+ | die "Strange filename, does not appear to have an extension\n " if $archiveFileName eq $gcFileName; | ||
+ | $enteredDate = strftime("%Y-%m-%d %H:%M:%S +0000", gmtime($^T)); | ||
+ | |||
+ | # Initial values | ||
+ | $inAccount = 0; | ||
+ | $inTransaction = 0; | ||
+ | $inSplit = 0; | ||
+ | |||
+ | # First pass read the file | ||
+ | open(IF, $gcFileName) || die "Can't open $gcFileName\n "; | ||
+ | while (<IF>) { | ||
+ | chomp; | ||
+ | # -- Process the account lines -- | ||
+ | if (m/^\s*<gnc:account/) { | ||
+ | $inAccount = 1; | ||
+ | $curAccount = {}; # New hash for new account | ||
+ | $curAccount->{id} = "-NONE-"; # GC account ID code | ||
+ | $curAccount->{origXactCnt} = 0; # Number of transactions before cull | ||
+ | $curAccount->{newXactCnt} = 0; # Number of transactions after cull | ||
+ | $curAccount->{type} = "-NONE-"; | ||
+ | $curAccount->{name} = "-NONE-"; | ||
+ | $curAccount->{description} = "-NONE-"; | ||
+ | $curAccount->{parent} = "-NONE-"; | ||
+ | $curAccount->{children} = 0; | ||
+ | $curAccount->{delBalances} = { "n" => 0, "c" => 0, "y" => 0 }; | ||
+ | $curAccount->{commodityID} = "-NONE-"; | ||
+ | $curAccount->{commoditySCU} = 1; | ||
+ | next; | ||
+ | } | ||
+ | if (m/^\s*<\/gnc:account>/) {$inAccount = 0; next} | ||
+ | if ($inAccount && m/<act:id type="guid">/) { | ||
+ | if (m/<act:id type="guid">([0-9a-zA-z]+)<\/act:id>/) { | ||
+ | $accounts{$1} = $curAccount; | ||
+ | $curAccount->{id} = $1; | ||
+ | } | ||
+ | else {die "Unexpected act:id format\n "} | ||
+ | } | ||
+ | if ($inAccount && m/<act:type>/) { | ||
+ | if (m/<act:type>(.+)<\/act:type>/) { | ||
+ | $curAccount->{type} = $1; | ||
+ | $actypes{$1} = 1; | ||
+ | } | ||
+ | else {die "Unexpected act:type format\n "} | ||
+ | } | ||
+ | if ($inAccount && m/<act:name>/) { | ||
+ | if (m/<act:name>(.+)<\/act:name>/) { | ||
+ | $curAccount->{name} = $1; | ||
+ | } | ||
+ | else {die "Unexpected act:name format\n "} | ||
+ | } | ||
+ | if ($inAccount && m/<act:description>/) { | ||
+ | if (m/<act:description>(.+)<\/act:description>/) { | ||
+ | $curAccount->{description} = $1; | ||
+ | } | ||
+ | else {die "Unexpected act:description format\n "} | ||
+ | } | ||
+ | if ($inAccount && m/<act:parent type=/) { | ||
+ | if (m/<act:parent type="guid">(.+)<\/act:parent>/) { | ||
+ | $curAccount->{parent} = $1; | ||
+ | } | ||
+ | else {die "Unexpected act:parent format\n "} | ||
+ | } | ||
+ | if ($inAccount && m/<act:commodity-scu>/) { | ||
+ | if (m/<act:commodity-scu>(.+)<\/act:commodity-scu>/) { | ||
+ | $curAccount->{commoditySCU} = $1; | ||
+ | } | ||
+ | else {die "Unexpected act:commodity-scu format\n "} | ||
+ | } | ||
+ | if ($inAccount && m/<cmdty:id>/) { | ||
+ | if (m/<cmdty:id>(.+)<\/cmdty:id>/) { | ||
+ | $curAccount->{commodityID} = $1; | ||
+ | $commodities{$1} = 0 unless exists($commodities{$1}); | ||
+ | $commodities{$1} += 1; | ||
+ | } | ||
+ | else {die "Unexpected cmdty:id format\n "} | ||
+ | } | ||
+ | # -- Process the transaction lines -- | ||
+ | if (m/^\s*<gnc:transaction/) { | ||
+ | $inTransaction = 1; | ||
+ | $curTransaction = {}; # New hash for new transaction | ||
+ | $curTransaction->{id} = "-NONE-"; | ||
+ | $curTransaction->{splits} = {}; # Hash of split entries | ||
+ | $curTransaction->{toDelete} = 0; # Flag for deletion | ||
+ | $curTransaction->{date} = 0; # Date transaction is posted | ||
+ | next; | ||
+ | } | ||
+ | if (m/^\s*<\/gnc:transaction>/) {$inTransaction = 0; next} | ||
+ | if ($inTransaction && m/<trn:id type="guid">/) { | ||
+ | if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) { | ||
+ | $transactions{$1} = $curTransaction; | ||
+ | $curTransaction->{id} = $1; | ||
+ | } | ||
+ | else {die "Unexpected trn:id format\n "} | ||
+ | } | ||
+ | if ($inTransaction && m/<trn:date-posted>/) { | ||
+ | # If the date is not on this line, read the next | ||
+ | if (! m/<\/ts:date>/) { $_ = <IF>; chomp} | ||
+ | if (m/<ts:date>([0-9][0-9][0-9][0-9])\-([0-9][0-9])\-([0-9][0-9]).+<\/ts:date>/) { | ||
+ | $curTransaction->{date} = $1 . $2 . $3; | ||
+ | } | ||
+ | else {die "Unexpected ts:date format\n "} | ||
+ | } | ||
+ | |||
+ | # -- Process the split lines -- | ||
+ | if (m/^\s*<trn:split>/) { | ||
+ | $inSplit = 1; | ||
+ | $curSplit = {}; # New hash for new split | ||
+ | $curSplit->{id} = "-NONE-"; | ||
+ | $curSplit->{transaction} = $curTransaction; | ||
+ | $curSplit->{template} = 0; | ||
+ | $curSplit->{toDelete} = 0; # Flag for deletion | ||
+ | $curSplit->{accountId} = "-NONE-"; | ||
+ | $curSplit->{amountNum} = 0; | ||
+ | $curSplit->{amountDen} = 1; | ||
+ | $curSplit->{reconState} = "n"; | ||
+ | next; | ||
+ | } | ||
+ | if (m/^\s*<\/trn:split>/) {$inSplit = 0; next} | ||
+ | if ($inSplit && m/<split:id type="guid">/) { | ||
+ | if (m/<split:id type="guid">([0-9a-zA-z]+)<\/split:id>/) { | ||
+ | # Add splits to the current transaction split hash | ||
+ | $curTransaction->{splits}->{$1} = $curSplit; | ||
+ | $splits{$1} = $curSplit; # Add to global split list | ||
+ | $curSplit->{id} = $1; | ||
+ | } | ||
+ | else {die "Unexpected split:id format\n "} | ||
+ | } | ||
+ | if ($inSplit && m/<split:account type="guid">/) { | ||
+ | if (m/<split:account type="guid">([0-9a-zA-z]+)<\/split:account>/) { | ||
+ | $curSplit->{accountId} = $1; | ||
+ | $accounts{$1}->{origXactCnt}++; # Increment the account transactions | ||
+ | } | ||
+ | else {die "Unexpected split:account format\n "} | ||
+ | } | ||
+ | if ($inSplit && m/<slot:key>sched-xaction<\/slot:key>/) { | ||
+ | $curSplit->{template} = 1; | ||
+ | } | ||
+ | if ($inSplit && m/<split:reconciled-state>/) { | ||
+ | if (m/<split:reconciled-state>([ncyv])<\/split:reconciled-state>/) { | ||
+ | $curSplit->{reconState} = $1; | ||
+ | } | ||
+ | else {die "Unexpected split:reconciled-state format\n "} | ||
+ | } | ||
+ | if ($inSplit && m/<split:quantity>/) { | ||
+ | if (m/<split:quantity>([-0-9]+)\/([0-9]+)<\/split:quantity>/) { | ||
+ | $curSplit->{amountNum} = $1; | ||
+ | $curSplit->{amountDen} = $2; | ||
+ | } | ||
+ | else {die "Unexpected split:quantity format\n "} | ||
+ | } | ||
+ | |||
+ | } # End of first read through the GnuCash file | ||
+ | close(IF); | ||
+ | # | ||
+ | # ---------------------------------------------------------------------- | ||
+ | # | ||
+ | |||
+ | # Copy the account transaction counts from old to new, count the | ||
+ | # number of child accounts | ||
+ | foreach $curAccount (values(%accounts)) { | ||
+ | $curAccount->{newXactCnt} = $curAccount->{origXactCnt}; | ||
+ | my $parent = $curAccount->{parent}; | ||
+ | $accounts{$parent}->{children}++ if $parent ne "-NONE-"; | ||
+ | } | ||
+ | # Find the root account to use as parent for the generated account for the | ||
+ | # balancing transactions | ||
+ | foreach $curAccount (values(%accounts)) { | ||
+ | if (($curAccount->{type} eq "ROOT") && ($curAccount->{name} ne "-NONE-")) { | ||
+ | $rootAccount = $curAccount; | ||
+ | print "Creating equity account for balancing transactions under $curAccount->{name}\n"; | ||
+ | last; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # Mark deletable transactions | ||
+ | $i = 0; # count deleted transactions | ||
+ | foreach $curTransaction (values(%transactions)) { | ||
+ | if (&xactIsDeletable($curTransaction)) { | ||
+ | push @guids, $curTransaction->{id}; | ||
+ | foreach $split (values(%{$curTransaction->{splits}})) { | ||
+ | push @guids, $split->{id}; | ||
+ | } | ||
+ | $i++; | ||
+ | } | ||
+ | } | ||
+ | print "Marked $i (full) transactions for delete\n"; | ||
+ | print "Recovered " . scalar(@guids) . " GUIDs for reuse\n"; | ||
+ | |||
+ | # Calculate new opening balances | ||
+ | foreach $curTransaction (values(%transactions)) { | ||
+ | if ($curTransaction->{toDelete}) { | ||
+ | foreach $split (values(%{$curTransaction->{splits}})) { | ||
+ | my ($aid, $rs, $n, $d) = ($split->{accountId}, $split->{reconState}, $split->{amountNum}, $split->{amountDen}); | ||
+ | if (($n != 0) && ($aid ne "-NONE-")) { | ||
+ | my $a = $accounts{$aid}; | ||
+ | die "Deletable transaction has different fractional denominator\n " if $d != $a->{commoditySCU}; | ||
+ | $a->{delBalances}->{$rs} += $n; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # Dump out some account data | ||
+ | open(OF, ">${gcFileName}_accounts.csv") || die "Output error"; | ||
+ | foreach $i (keys(%accounts)) { | ||
+ | $curAccount = $accounts{$i}; | ||
+ | print OF "$curAccount->{id}\t$curAccount->{type}\t$curAccount->{name}"; | ||
+ | print OF "\t$curAccount->{delBalances}->{n}\t$curAccount->{delBalances}->{c}\t$curAccount->{delBalances}->{y}"; | ||
+ | print OF "\t$curAccount->{children}"; | ||
+ | print OF "\t$curAccount->{origXactCnt}\t$curAccount->{newXactCnt}"; | ||
+ | print OF "\t$curAccount->{description}\n"; | ||
+ | } | ||
+ | close(OF); | ||
+ | |||
+ | # Dump out some split information | ||
+ | open(OF, ">${gcFileName}_splits.csv") || die "Output error"; | ||
+ | foreach $curSplit (values(%splits)) { | ||
+ | print OF "$curSplit->{id}\t$curSplit->{accountId}\t$curSplit->{transaction}->{date}\n"; | ||
+ | } | ||
+ | close(OF); | ||
+ | # | ||
+ | # ---------------------------------------------------------------------- | ||
+ | # Rename the data file to create a backup, and then open and read it, | ||
+ | # this time skipping transactions that are marked for deletion, and | ||
+ | # adding the replacement balancing account and transactions. | ||
+ | # | ||
+ | rename($gcFileName,"${gcFileName}.original") || die "Can't rename $gcFileName to create backup\n "; | ||
+ | open(IF, "${gcFileName}.original") || die "Can't open ${gcFileName}.original for input\n "; | ||
+ | open(OF, "> ${gcFileName}") || die "Can't open $gcFileName for output"; | ||
+ | open(AF, "> ${archiveFileName}") || die "Can't open $archiveFileName for output"; | ||
+ | $addedBalancing = 0; | ||
+ | $buffering = 0; | ||
+ | $inTransaction = 0; | ||
+ | $inScheduled = 0; | ||
+ | $splitCount = 0; | ||
+ | $toDelete = 0; | ||
+ | $buffer = ""; | ||
+ | while (<IF>) { | ||
+ | chomp; | ||
+ | |||
+ | # Copy everything to the archive file except the scheduled transactions | ||
+ | $inScheduled = 1 if m/^\s*<gnc:schedxaction/; | ||
+ | print AF "$_\n" unless $inScheduled; | ||
+ | $inScheduled = 0 if m/^\s*<\/gnc:schedxaction/; | ||
+ | |||
+ | # Switch to buffering mode at start of each transaction ... | ||
+ | if (m/^\s*<gnc:transaction/) { | ||
+ | $buffering = 1; | ||
+ | $inTransaction = 1; | ||
+ | $splitCount = 0; | ||
+ | $inSplit = 0; | ||
+ | $toDelete = 0; | ||
+ | # Insert all the balancing entries before the first transaction | ||
+ | unless ($addedBalancing) { | ||
+ | print OF genBalanceAccount($rootAccount); | ||
+ | foreach $account (values(%accounts)) { | ||
+ | next if $account->{type} =~ m/^(INCOME|EXPENSE)$/; | ||
+ | print OF genBalancingXact($account, "n") if $account->{delBalances}->{n}; | ||
+ | print OF genBalancingXact($account, "c") if $account->{delBalances}->{c}; | ||
+ | print OF genBalancingXact($account, "y") if $account->{delBalances}->{y}; | ||
+ | } | ||
+ | $addedBalancing = 1; | ||
+ | } | ||
+ | } | ||
+ | # ... otherwise just copy from input to output | ||
+ | unless ($buffering) { | ||
+ | print OF "$_\n"; | ||
+ | next; | ||
+ | } | ||
+ | # Buffering mode, append current line to buffer | ||
+ | $buffer = $buffer."$_\n"; | ||
+ | |||
+ | if ($inTransaction && m/<trn:id type="guid">/) { | ||
+ | if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) { | ||
+ | $toDelete = $transactions{$1}->{toDelete}; | ||
+ | } | ||
+ | else {die "Unexpected trn:id format\n "} | ||
+ | } | ||
+ | # Process the split lines, if not skipping transaction -- | ||
+ | if (m/^\s*<trn:split>/) { | ||
+ | $inSplit = 1; | ||
+ | next; | ||
+ | } | ||
+ | if ($inSplit && m/^\s*<\/trn:split>/) { | ||
+ | $inSplit = 0; | ||
+ | $splitCount++; | ||
+ | next; | ||
+ | } | ||
+ | if ($inTransaction && m/^\s*<\/gnc:transaction/) { | ||
+ | $buffering = 0; | ||
+ | $inTransaction = 0; | ||
+ | print OF $buffer unless $toDelete || ($splitCount < 2); | ||
+ | $buffer = ""; | ||
+ | next; | ||
+ | } | ||
+ | } # end of filtering pass through input file | ||
+ | close(IF); | ||
+ | close(OF); | ||
+ | exit; | ||
+ | # | ||
+ | # | ||
+ | # ---------------------------------------------------------------------- | ||
+ | # ---------------------------------------------------------------------- | ||
+ | # SUBROUTINES | ||
+ | |||
+ | # -- Mark deletable splits -- | ||
+ | # Returns non-zero if the split gets marked for delete | ||
+ | # Arguments: split pointer, transaction date | ||
+ | sub splitIsDeletable { | ||
+ | my ($inSplit, $xctDate) = @_; | ||
+ | my ($splAcnt) = $accounts{$inSplit->{accountId}}; | ||
+ | my ($splAcntType) = $splAcnt->{type}; | ||
+ | my ($rslt) = 0; | ||
+ | # Work only on splits not marked for delete | ||
+ | unless ($inSplit->{toDelete} || $inSplit->{template}) { | ||
+ | if ($xctDate <= $delDate) { | ||
+ | if ($splAcntType =~ /^(EQUITY|ASSET|LIABILITY|BANK|CREDIT|CASH|STOCK|MUTUAL|INCOME|EXPENSE)$/) { | ||
+ | $splAcnt->{newXactCnt}--; | ||
+ | $rslt = 1; | ||
+ | $inSplit->{toDelete} = 1; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | #print "DEBUG: $inSplit->{id}, $splAcntType, $xctDate, $rslt\n"; | ||
+ | return($rslt); | ||
+ | } | ||
+ | |||
+ | # -- Mark deletable transactions -- | ||
+ | sub xactIsDeletable { | ||
+ | my ($inXact) = @_; | ||
+ | my ($rslt) = 1; # Start assuming all splits get deleted | ||
+ | my ($xactDate) = $inXact->{date}; | ||
+ | my ($xSplit, $xSplits, $i); | ||
+ | $xSplits = $inXact->{splits}; | ||
+ | foreach $xSplit (values(%$xSplits)) { | ||
+ | $i = &splitIsDeletable($xSplit, $xactDate); | ||
+ | $rslt = $rslt && $i; | ||
+ | } | ||
+ | if ($rslt) {$inXact->{toDelete} = 1} | ||
+ | return($rslt); | ||
+ | } | ||
+ | |||
+ | # -- Create equity account for generated opening balances | ||
+ | sub genBalanceAccount { | ||
+ | my ($ra) = @_; | ||
+ | $equityAccountId = pop(@guids); | ||
+ | $equityActCommodity = "-NONE-"; | ||
+ | my $nn = 0; | ||
+ | while (($c,$n) = each(%commodities)) { | ||
+ | next if $c eq "template"; | ||
+ | if ($n > $nn) { $equityActCommodity = $c; $nn = $n; } | ||
+ | } | ||
+ | my $xml = ""; | ||
+ | $xml .= "<gnc:account version=\"2.0.0\">\n"; | ||
+ | $xml .= " <act:name>Generated Balancing Transactions</act:name>\n"; | ||
+ | $xml .= " <act:id type=\"guid\">$equityAccountId</act:id>\n"; | ||
+ | $xml .= " <act:type>EQUITY</act:type>\n"; | ||
+ | $xml .= " <act:commodity>\n"; | ||
+ | $xml .= " <cmdty:space>ISO4217</cmdty:space>\n"; | ||
+ | $xml .= " <cmdty:id>$equityActCommodity</cmdty:id>\n"; | ||
+ | $xml .= " </act:commodity>\n"; | ||
+ | $xml .= " <act:code></act:code>\n"; | ||
+ | $xml .= " <act:description>Generated Balancing Transactions</act:description>\n"; | ||
+ | $xml .= " <act:parent type=\"guid\">$rootAccount->{id}</act:parent>\n"; | ||
+ | $xml .= "</gnc:account>\n"; | ||
+ | return ($xml); | ||
+ | } | ||
+ | |||
+ | # -- Create balancing transaction XML -- | ||
+ | sub genBalancingXact { | ||
+ | my ($account,$rs) = @_; | ||
+ | return ("") if $account->{type} eq "EQUITY"; | ||
+ | my ($n,$c,$d) = ($account->{delBalances}->{$rs}, $account->{commodityID}, $account->{commoditySCU}); | ||
+ | my $e = 0 - $n; | ||
+ | my $desc = "Opening balance"; | ||
+ | $desc .= " (needs valuation)" if $c ne $equityActCommodity; | ||
+ | my $xml = ""; | ||
+ | $xml .= "<gnc:transaction version=\"2.0.0\">\n"; | ||
+ | $xml .= " <trn:id type=\"guid\">" . pop(@guids) . "</trn:id>\n"; | ||
+ | $xml .= " <trn:date-posted>\n"; | ||
+ | $xml .= " <ts:date>$postedDate</ts:date>\n"; | ||
+ | $xml .= " </trn:date-posted>\n"; | ||
+ | $xml .= " <trn:date-entered>\n"; | ||
+ | $xml .= " <ts:date>$enteredDate</ts:date>\n"; | ||
+ | $xml .= " </trn:date-entered>\n"; | ||
+ | $xml .= " <trn:description>$desc</trn:description>\n"; | ||
+ | $xml .= " <trn:splits>\n"; | ||
+ | $xml .= " <trn:split>\n"; | ||
+ | $xml .= " <split:id type=\"guid\">" . pop(@guids) . "</split:id>\n"; | ||
+ | $xml .= " <split:reconciled-state>$rs</split:reconciled-state>\n"; | ||
+ | $xml .= " <split:value>$n/$d</split:value>\n"; | ||
+ | $xml .= " <split:quantity>$n/$d</split:quantity>\n"; | ||
+ | $xml .= " <split:account type=\"guid\">$account->{id}</split:account>\n"; | ||
+ | $xml .= " </trn:split>\n"; | ||
+ | $xml .= " <trn:split>\n"; | ||
+ | $xml .= " <split:id type=\"guid\">" . pop(@guids) . "</split:id>\n"; | ||
+ | $xml .= " <split:reconciled-state>n</split:reconciled-state>\n"; | ||
+ | $xml .= " <split:value>$e/$d</split:value>\n"; | ||
+ | $xml .= " <split:quantity>$e/$d</split:quantity>\n"; | ||
+ | $xml .= " <split:account type=\"guid\">$equityAccountId</split:account>\n"; | ||
+ | $xml .= " </trn:split>\n"; | ||
+ | $xml .= " </trn:splits>\n"; | ||
+ | $xml .= "</gnc:transaction>\n"; | ||
+ | return ($xml); | ||
+ | } | ||
+ | </SyntaxHighlight> |
Latest revision as of 22:32, 19 August 2021
#!/usr/bin/perl -w
# Remove older transactions from the GnuCash XML to create a more
# manageable dataset. Creates new opening balance transactions for
# asset/liability accounts so that the latest and reconciled balances
# are maintained.
# Arguments are file name and date (remove through this date)
# After running, account balances will need to be adjusted.
# Two passes needed, first to figure out what to remove. The second
# to filter out the old transactions.
# Large chunks of this script were copied from gccull.pl from the
# GnuCash wiki, posted by Doug Pastorello.
use POSIX;
# Some syntax flags
my ($inAccount, $inTransaction, $curAccount, $curTransaction);
my ($inSplit, $curSplit);
# Hash of accounts. Key is act:id, value is account object (ref)
my (%accounts);
# Hash of transactions. Key is trn:id, value is transaction object (ref)
my (%transactions);
# Hash of splits. Key is split:id, value is split object (ref)
my (%splits);
# Hash of ISO4217 commodities. Key is ID, value is number of uses.
my (%commodities);
# Reference to root account object
my ($rootAccount);
# Equity account id used for generating balancing transactions
my ($equityAccountId, $equityActCommodity);
# List of deleted transaction GUIDs, available for reuse in balancing
# transactions
my (@guids);
# Misc global vars
my ($i, $buffer, $skip);
my (%actypes);
# Command line arguments
my ($gcFileName) = $ARGV[0]; # Name of the GnuCash XML file to process
my ($delDate) = $ARGV[1]; # Date to delete through
my ($archiveFileName, $postedDate, $enteredDate);
if ($delDate =~ m/^(\d\d\d\d)(\d\d)(\d\d)$/) {
$postedDate = "$1-$2-$3 00:00:00 +0000";
}
else {die "Bad date format, required as YYYYMMDD\n "}
($archiveFileName = $gcFileName) =~ s/^(.+)(\.\w+)$/$1-archive-$delDate$2/;
die "Strange filename, does not appear to have an extension\n " if $archiveFileName eq $gcFileName;
$enteredDate = strftime("%Y-%m-%d %H:%M:%S +0000", gmtime($^T));
# Initial values
$inAccount = 0;
$inTransaction = 0;
$inSplit = 0;
# First pass read the file
open(IF, $gcFileName) || die "Can't open $gcFileName\n ";
while (<IF>) {
chomp;
# -- Process the account lines --
if (m/^\s*<gnc:account/) {
$inAccount = 1;
$curAccount = {}; # New hash for new account
$curAccount->{id} = "-NONE-"; # GC account ID code
$curAccount->{origXactCnt} = 0; # Number of transactions before cull
$curAccount->{newXactCnt} = 0; # Number of transactions after cull
$curAccount->{type} = "-NONE-";
$curAccount->{name} = "-NONE-";
$curAccount->{description} = "-NONE-";
$curAccount->{parent} = "-NONE-";
$curAccount->{children} = 0;
$curAccount->{delBalances} = { "n" => 0, "c" => 0, "y" => 0 };
$curAccount->{commodityID} = "-NONE-";
$curAccount->{commoditySCU} = 1;
next;
}
if (m/^\s*<\/gnc:account>/) {$inAccount = 0; next}
if ($inAccount && m/<act:id type="guid">/) {
if (m/<act:id type="guid">([0-9a-zA-z]+)<\/act:id>/) {
$accounts{$1} = $curAccount;
$curAccount->{id} = $1;
}
else {die "Unexpected act:id format\n "}
}
if ($inAccount && m/<act:type>/) {
if (m/<act:type>(.+)<\/act:type>/) {
$curAccount->{type} = $1;
$actypes{$1} = 1;
}
else {die "Unexpected act:type format\n "}
}
if ($inAccount && m/<act:name>/) {
if (m/<act:name>(.+)<\/act:name>/) {
$curAccount->{name} = $1;
}
else {die "Unexpected act:name format\n "}
}
if ($inAccount && m/<act:description>/) {
if (m/<act:description>(.+)<\/act:description>/) {
$curAccount->{description} = $1;
}
else {die "Unexpected act:description format\n "}
}
if ($inAccount && m/<act:parent type=/) {
if (m/<act:parent type="guid">(.+)<\/act:parent>/) {
$curAccount->{parent} = $1;
}
else {die "Unexpected act:parent format\n "}
}
if ($inAccount && m/<act:commodity-scu>/) {
if (m/<act:commodity-scu>(.+)<\/act:commodity-scu>/) {
$curAccount->{commoditySCU} = $1;
}
else {die "Unexpected act:commodity-scu format\n "}
}
if ($inAccount && m/<cmdty:id>/) {
if (m/<cmdty:id>(.+)<\/cmdty:id>/) {
$curAccount->{commodityID} = $1;
$commodities{$1} = 0 unless exists($commodities{$1});
$commodities{$1} += 1;
}
else {die "Unexpected cmdty:id format\n "}
}
# -- Process the transaction lines --
if (m/^\s*<gnc:transaction/) {
$inTransaction = 1;
$curTransaction = {}; # New hash for new transaction
$curTransaction->{id} = "-NONE-";
$curTransaction->{splits} = {}; # Hash of split entries
$curTransaction->{toDelete} = 0; # Flag for deletion
$curTransaction->{date} = 0; # Date transaction is posted
next;
}
if (m/^\s*<\/gnc:transaction>/) {$inTransaction = 0; next}
if ($inTransaction && m/<trn:id type="guid">/) {
if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) {
$transactions{$1} = $curTransaction;
$curTransaction->{id} = $1;
}
else {die "Unexpected trn:id format\n "}
}
if ($inTransaction && m/<trn:date-posted>/) {
# If the date is not on this line, read the next
if (! m/<\/ts:date>/) { $_ = <IF>; chomp}
if (m/<ts:date>([0-9][0-9][0-9][0-9])\-([0-9][0-9])\-([0-9][0-9]).+<\/ts:date>/) {
$curTransaction->{date} = $1 . $2 . $3;
}
else {die "Unexpected ts:date format\n "}
}
# -- Process the split lines --
if (m/^\s*<trn:split>/) {
$inSplit = 1;
$curSplit = {}; # New hash for new split
$curSplit->{id} = "-NONE-";
$curSplit->{transaction} = $curTransaction;
$curSplit->{template} = 0;
$curSplit->{toDelete} = 0; # Flag for deletion
$curSplit->{accountId} = "-NONE-";
$curSplit->{amountNum} = 0;
$curSplit->{amountDen} = 1;
$curSplit->{reconState} = "n";
next;
}
if (m/^\s*<\/trn:split>/) {$inSplit = 0; next}
if ($inSplit && m/<split:id type="guid">/) {
if (m/<split:id type="guid">([0-9a-zA-z]+)<\/split:id>/) {
# Add splits to the current transaction split hash
$curTransaction->{splits}->{$1} = $curSplit;
$splits{$1} = $curSplit; # Add to global split list
$curSplit->{id} = $1;
}
else {die "Unexpected split:id format\n "}
}
if ($inSplit && m/<split:account type="guid">/) {
if (m/<split:account type="guid">([0-9a-zA-z]+)<\/split:account>/) {
$curSplit->{accountId} = $1;
$accounts{$1}->{origXactCnt}++; # Increment the account transactions
}
else {die "Unexpected split:account format\n "}
}
if ($inSplit && m/<slot:key>sched-xaction<\/slot:key>/) {
$curSplit->{template} = 1;
}
if ($inSplit && m/<split:reconciled-state>/) {
if (m/<split:reconciled-state>([ncyv])<\/split:reconciled-state>/) {
$curSplit->{reconState} = $1;
}
else {die "Unexpected split:reconciled-state format\n "}
}
if ($inSplit && m/<split:quantity>/) {
if (m/<split:quantity>([-0-9]+)\/([0-9]+)<\/split:quantity>/) {
$curSplit->{amountNum} = $1;
$curSplit->{amountDen} = $2;
}
else {die "Unexpected split:quantity format\n "}
}
} # End of first read through the GnuCash file
close(IF);
#
# ----------------------------------------------------------------------
#
# Copy the account transaction counts from old to new, count the
# number of child accounts
foreach $curAccount (values(%accounts)) {
$curAccount->{newXactCnt} = $curAccount->{origXactCnt};
my $parent = $curAccount->{parent};
$accounts{$parent}->{children}++ if $parent ne "-NONE-";
}
# Find the root account to use as parent for the generated account for the
# balancing transactions
foreach $curAccount (values(%accounts)) {
if (($curAccount->{type} eq "ROOT") && ($curAccount->{name} ne "-NONE-")) {
$rootAccount = $curAccount;
print "Creating equity account for balancing transactions under $curAccount->{name}\n";
last;
}
}
# Mark deletable transactions
$i = 0; # count deleted transactions
foreach $curTransaction (values(%transactions)) {
if (&xactIsDeletable($curTransaction)) {
push @guids, $curTransaction->{id};
foreach $split (values(%{$curTransaction->{splits}})) {
push @guids, $split->{id};
}
$i++;
}
}
print "Marked $i (full) transactions for delete\n";
print "Recovered " . scalar(@guids) . " GUIDs for reuse\n";
# Calculate new opening balances
foreach $curTransaction (values(%transactions)) {
if ($curTransaction->{toDelete}) {
foreach $split (values(%{$curTransaction->{splits}})) {
my ($aid, $rs, $n, $d) = ($split->{accountId}, $split->{reconState}, $split->{amountNum}, $split->{amountDen});
if (($n != 0) && ($aid ne "-NONE-")) {
my $a = $accounts{$aid};
die "Deletable transaction has different fractional denominator\n " if $d != $a->{commoditySCU};
$a->{delBalances}->{$rs} += $n;
}
}
}
}
# Dump out some account data
open(OF, ">${gcFileName}_accounts.csv") || die "Output error";
foreach $i (keys(%accounts)) {
$curAccount = $accounts{$i};
print OF "$curAccount->{id}\t$curAccount->{type}\t$curAccount->{name}";
print OF "\t$curAccount->{delBalances}->{n}\t$curAccount->{delBalances}->{c}\t$curAccount->{delBalances}->{y}";
print OF "\t$curAccount->{children}";
print OF "\t$curAccount->{origXactCnt}\t$curAccount->{newXactCnt}";
print OF "\t$curAccount->{description}\n";
}
close(OF);
# Dump out some split information
open(OF, ">${gcFileName}_splits.csv") || die "Output error";
foreach $curSplit (values(%splits)) {
print OF "$curSplit->{id}\t$curSplit->{accountId}\t$curSplit->{transaction}->{date}\n";
}
close(OF);
#
# ----------------------------------------------------------------------
# Rename the data file to create a backup, and then open and read it,
# this time skipping transactions that are marked for deletion, and
# adding the replacement balancing account and transactions.
#
rename($gcFileName,"${gcFileName}.original") || die "Can't rename $gcFileName to create backup\n ";
open(IF, "${gcFileName}.original") || die "Can't open ${gcFileName}.original for input\n ";
open(OF, "> ${gcFileName}") || die "Can't open $gcFileName for output";
open(AF, "> ${archiveFileName}") || die "Can't open $archiveFileName for output";
$addedBalancing = 0;
$buffering = 0;
$inTransaction = 0;
$inScheduled = 0;
$splitCount = 0;
$toDelete = 0;
$buffer = "";
while (<IF>) {
chomp;
# Copy everything to the archive file except the scheduled transactions
$inScheduled = 1 if m/^\s*<gnc:schedxaction/;
print AF "$_\n" unless $inScheduled;
$inScheduled = 0 if m/^\s*<\/gnc:schedxaction/;
# Switch to buffering mode at start of each transaction ...
if (m/^\s*<gnc:transaction/) {
$buffering = 1;
$inTransaction = 1;
$splitCount = 0;
$inSplit = 0;
$toDelete = 0;
# Insert all the balancing entries before the first transaction
unless ($addedBalancing) {
print OF genBalanceAccount($rootAccount);
foreach $account (values(%accounts)) {
next if $account->{type} =~ m/^(INCOME|EXPENSE)$/;
print OF genBalancingXact($account, "n") if $account->{delBalances}->{n};
print OF genBalancingXact($account, "c") if $account->{delBalances}->{c};
print OF genBalancingXact($account, "y") if $account->{delBalances}->{y};
}
$addedBalancing = 1;
}
}
# ... otherwise just copy from input to output
unless ($buffering) {
print OF "$_\n";
next;
}
# Buffering mode, append current line to buffer
$buffer = $buffer."$_\n";
if ($inTransaction && m/<trn:id type="guid">/) {
if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) {
$toDelete = $transactions{$1}->{toDelete};
}
else {die "Unexpected trn:id format\n "}
}
# Process the split lines, if not skipping transaction --
if (m/^\s*<trn:split>/) {
$inSplit = 1;
next;
}
if ($inSplit && m/^\s*<\/trn:split>/) {
$inSplit = 0;
$splitCount++;
next;
}
if ($inTransaction && m/^\s*<\/gnc:transaction/) {
$buffering = 0;
$inTransaction = 0;
print OF $buffer unless $toDelete || ($splitCount < 2);
$buffer = "";
next;
}
} # end of filtering pass through input file
close(IF);
close(OF);
exit;
#
#
# ----------------------------------------------------------------------
# ----------------------------------------------------------------------
# SUBROUTINES
# -- Mark deletable splits --
# Returns non-zero if the split gets marked for delete
# Arguments: split pointer, transaction date
sub splitIsDeletable {
my ($inSplit, $xctDate) = @_;
my ($splAcnt) = $accounts{$inSplit->{accountId}};
my ($splAcntType) = $splAcnt->{type};
my ($rslt) = 0;
# Work only on splits not marked for delete
unless ($inSplit->{toDelete} || $inSplit->{template}) {
if ($xctDate <= $delDate) {
if ($splAcntType =~ /^(EQUITY|ASSET|LIABILITY|BANK|CREDIT|CASH|STOCK|MUTUAL|INCOME|EXPENSE)$/) {
$splAcnt->{newXactCnt}--;
$rslt = 1;
$inSplit->{toDelete} = 1;
}
}
}
#print "DEBUG: $inSplit->{id}, $splAcntType, $xctDate, $rslt\n";
return($rslt);
}
# -- Mark deletable transactions --
sub xactIsDeletable {
my ($inXact) = @_;
my ($rslt) = 1; # Start assuming all splits get deleted
my ($xactDate) = $inXact->{date};
my ($xSplit, $xSplits, $i);
$xSplits = $inXact->{splits};
foreach $xSplit (values(%$xSplits)) {
$i = &splitIsDeletable($xSplit, $xactDate);
$rslt = $rslt && $i;
}
if ($rslt) {$inXact->{toDelete} = 1}
return($rslt);
}
# -- Create equity account for generated opening balances
sub genBalanceAccount {
my ($ra) = @_;
$equityAccountId = pop(@guids);
$equityActCommodity = "-NONE-";
my $nn = 0;
while (($c,$n) = each(%commodities)) {
next if $c eq "template";
if ($n > $nn) { $equityActCommodity = $c; $nn = $n; }
}
my $xml = "";
$xml .= "<gnc:account version=\"2.0.0\">\n";
$xml .= " <act:name>Generated Balancing Transactions</act:name>\n";
$xml .= " <act:id type=\"guid\">$equityAccountId</act:id>\n";
$xml .= " <act:type>EQUITY</act:type>\n";
$xml .= " <act:commodity>\n";
$xml .= " <cmdty:space>ISO4217</cmdty:space>\n";
$xml .= " <cmdty:id>$equityActCommodity</cmdty:id>\n";
$xml .= " </act:commodity>\n";
$xml .= " <act:code></act:code>\n";
$xml .= " <act:description>Generated Balancing Transactions</act:description>\n";
$xml .= " <act:parent type=\"guid\">$rootAccount->{id}</act:parent>\n";
$xml .= "</gnc:account>\n";
return ($xml);
}
# -- Create balancing transaction XML --
sub genBalancingXact {
my ($account,$rs) = @_;
return ("") if $account->{type} eq "EQUITY";
my ($n,$c,$d) = ($account->{delBalances}->{$rs}, $account->{commodityID}, $account->{commoditySCU});
my $e = 0 - $n;
my $desc = "Opening balance";
$desc .= " (needs valuation)" if $c ne $equityActCommodity;
my $xml = "";
$xml .= "<gnc:transaction version=\"2.0.0\">\n";
$xml .= " <trn:id type=\"guid\">" . pop(@guids) . "</trn:id>\n";
$xml .= " <trn:date-posted>\n";
$xml .= " <ts:date>$postedDate</ts:date>\n";
$xml .= " </trn:date-posted>\n";
$xml .= " <trn:date-entered>\n";
$xml .= " <ts:date>$enteredDate</ts:date>\n";
$xml .= " </trn:date-entered>\n";
$xml .= " <trn:description>$desc</trn:description>\n";
$xml .= " <trn:splits>\n";
$xml .= " <trn:split>\n";
$xml .= " <split:id type=\"guid\">" . pop(@guids) . "</split:id>\n";
$xml .= " <split:reconciled-state>$rs</split:reconciled-state>\n";
$xml .= " <split:value>$n/$d</split:value>\n";
$xml .= " <split:quantity>$n/$d</split:quantity>\n";
$xml .= " <split:account type=\"guid\">$account->{id}</split:account>\n";
$xml .= " </trn:split>\n";
$xml .= " <trn:split>\n";
$xml .= " <split:id type=\"guid\">" . pop(@guids) . "</split:id>\n";
$xml .= " <split:reconciled-state>n</split:reconciled-state>\n";
$xml .= " <split:value>$e/$d</split:value>\n";
$xml .= " <split:quantity>$e/$d</split:quantity>\n";
$xml .= " <split:account type=\"guid\">$equityAccountId</split:account>\n";
$xml .= " </trn:split>\n";
$xml .= " </trn:splits>\n";
$xml .= "</gnc:transaction>\n";
return ($xml);
}