Difference between revisions of "GncArchive.pl"

From GnuCash
Jump to: navigation, search
(Created on behalf of User:Mhasling.)
 
(Added script contents)
Line 1: Line 1:
Created on behalf of [[User:Mhasling]].
+
<nowiki>
 +
#!/usr/bin/perl -w
 +
# Remove older transactions from the GnuCash XML to create a more
 +
# manageable dataset. Creates new opening balance transactions for
 +
# asset/liability accounts so that the latest and reconciled balances
 +
# are maintained.
 +
# Arguments are file name and date (remove through this date)
 +
# After running, account balances will need to be adjusted.
 +
# Two passes needed, first to figure out what to remove. The second
 +
# to filter out the old transactions.
 +
# Large chunks of this script were copied from gccull.pl from the
 +
# GnuCash wiki, posted by Doug Pastorello.
 +
 
 +
use POSIX;
 +
 
 +
# Some syntax flags
 +
my ($inAccount, $inTransaction, $curAccount, $curTransaction);
 +
my ($inSplit, $curSplit);
 +
# Hash of accounts. Key is act:id, value is account object (ref)
 +
my (%accounts);
 +
# Hash of transactions. Key is trn:id, value is transaction object (ref)
 +
my (%transactions);
 +
# Hash of splits. Key is split:id, value is split object (ref)
 +
my (%splits);
 +
# Hash of ISO4217 commodities. Key is ID, value is number of uses.
 +
my (%commodities);
 +
# Reference to root account object
 +
my ($rootAccount);
 +
# Equity account id used for generating balancing transactions
 +
my ($equityAccountId, $equityActCommodity);
 +
# List of deleted transaction GUIDs, available for reuse in balancing
 +
# transactions
 +
my (@guids);
 +
# Misc global vars
 +
my ($i, $buffer, $skip);
 +
my (%actypes);
 +
 
 +
# Command line arguments
 +
my ($gcFileName) = $ARGV[0]; # Name of the GnuCash XML file to process
 +
my ($delDate) = $ARGV[1]; # Date to delete through
 +
my ($archiveFileName, $postedDate, $enteredDate);
 +
if ($delDate =~ m/^(\d\d\d\d)(\d\d)(\d\d)$/) {
 +
    $postedDate = "$1-$2-$3 00:00:00 +0000";
 +
}
 +
else {die "Bad date format, required as YYYYMMDD\n  "}
 +
($archiveFileName = $gcFileName) =~ s/^(.+)(\.\w+)$/$1-archive-$delDate$2/;
 +
die "Strange filename, does not appear to have an extension\n  " if $archiveFileName eq $gcFileName;
 +
$enteredDate = strftime("%Y-%m-%d %H:%M:%S +0000", gmtime($^T));
 +
 
 +
# Initial values
 +
$inAccount = 0;
 +
$inTransaction = 0;
 +
$inSplit = 0;
 +
 
 +
# First pass read the file
 +
open(IF, $gcFileName) || die "Can't open $gcFileName\n  ";
 +
while (<IF>) {
 +
    chomp;
 +
    # -- Process the account lines --
 +
    if (m/^\s*<gnc:account/) {
 +
        $inAccount                = 1;
 +
        $curAccount                = {}; # New hash for new account
 +
        $curAccount->{id}          = "-NONE-"; # GC account ID code
 +
        $curAccount->{origXactCnt} = 0; # Number of transactions before cull
 +
        $curAccount->{newXactCnt}  = 0; # Number of transactions after cull
 +
        $curAccount->{type}        = "-NONE-";
 +
        $curAccount->{name}        = "-NONE-";
 +
        $curAccount->{description} = "-NONE-";
 +
        $curAccount->{parent}      = "-NONE-";
 +
        $curAccount->{children}    = 0;
 +
        $curAccount->{delBalances} = { "n" => 0, "c" => 0, "y" => 0 };
 +
        $curAccount->{commodityID} = "-NONE-";
 +
        $curAccount->{commoditySCU} = 1;
 +
        next;
 +
    }
 +
    if (m/^\s*<\/gnc:account>/) {$inAccount = 0; next}
 +
    if ($inAccount && m/<act:id type="guid">/) {
 +
        if (m/<act:id type="guid">([0-9a-zA-z]+)<\/act:id>/) {
 +
            $accounts{$1} = $curAccount;
 +
            $curAccount->{id} = $1;
 +
        }
 +
        else {die "Unexpected act:id format\n  "}
 +
    }
 +
    if ($inAccount && m/<act:type>/) {
 +
        if (m/<act:type>(.+)<\/act:type>/) {
 +
            $curAccount->{type} = $1;
 +
            $actypes{$1} = 1;
 +
        }
 +
        else {die "Unexpected act:type format\n  "}
 +
    }
 +
    if ($inAccount && m/<act:name>/) {
 +
        if (m/<act:name>(.+)<\/act:name>/) {
 +
            $curAccount->{name} = $1;
 +
        }
 +
        else {die "Unexpected act:name format\n  "}
 +
    }
 +
    if ($inAccount && m/<act:description>/) {
 +
        if (m/<act:description>(.+)<\/act:description>/) {
 +
            $curAccount->{description} = $1;
 +
        }
 +
        else {die "Unexpected act:description format\n  "}
 +
    }
 +
    if ($inAccount && m/<act:parent type=/) {
 +
        if (m/<act:parent type="guid">(.+)<\/act:parent>/) {
 +
            $curAccount->{parent} = $1;
 +
        }
 +
        else {die "Unexpected act:parent format\n  "}
 +
    }
 +
    if ($inAccount && m/<act:commodity-scu>/) {
 +
        if (m/<act:commodity-scu>(.+)<\/act:commodity-scu>/) {
 +
            $curAccount->{commoditySCU} = $1;
 +
        }
 +
        else {die "Unexpected act:commodity-scu format\n  "}
 +
    }
 +
    if ($inAccount && m/<cmdty:id>/) {
 +
        if (m/<cmdty:id>(.+)<\/cmdty:id>/) {
 +
            $curAccount->{commodityID} = $1;
 +
            $commodities{$1} = 0 unless exists($commodities{$1});
 +
            $commodities{$1} += 1;
 +
        }
 +
        else {die "Unexpected cmdty:id format\n  "}
 +
    }
 +
    # -- Process the transaction lines --
 +
    if (m/^\s*<gnc:transaction/) {
 +
        $inTransaction = 1;
 +
        $curTransaction = {}; # New hash for new transaction
 +
        $curTransaction->{id} = "-NONE-";
 +
        $curTransaction->{splits} = {}; # Hash of split entries
 +
        $curTransaction->{toDelete} = 0; # Flag for deletion
 +
        $curTransaction->{date} = 0; # Date transaction is posted
 +
        next;
 +
    }
 +
    if (m/^\s*<\/gnc:transaction>/) {$inTransaction = 0; next}
 +
    if ($inTransaction && m/<trn:id type="guid">/) {
 +
        if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) {
 +
            $transactions{$1} = $curTransaction;
 +
            $curTransaction->{id} = $1;
 +
        }
 +
        else {die "Unexpected trn:id format\n  "}
 +
    }
 +
    if ($inTransaction && m/<trn:date-posted>/) {
 +
        # If the date is not on this line, read the next
 +
        if (! m/<\/ts:date>/) { $_ = <IF>; chomp}
 +
        if (m/<ts:date>([0-9][0-9][0-9][0-9])\-([0-9][0-9])\-([0-9][0-9]).+<\/ts:date>/) {
 +
            $curTransaction->{date} = $1 . $2 . $3;
 +
        }
 +
        else {die "Unexpected ts:date format\n  "}
 +
    }
 +
 
 +
    # -- Process the split lines --
 +
    if (m/^\s*<trn:split>/) {
 +
        $inSplit = 1;
 +
        $curSplit = {}; # New hash for new split
 +
        $curSplit->{id} = "-NONE-";
 +
        $curSplit->{transaction} = $curTransaction;
 +
        $curSplit->{template} = 0;
 +
        $curSplit->{toDelete} = 0; # Flag for deletion
 +
        $curSplit->{accountId} = "-NONE-";
 +
        $curSplit->{amountNum} = 0;
 +
        $curSplit->{amountDen} = 1;
 +
        $curSplit->{reconState} = "n";
 +
        next;
 +
    }
 +
    if (m/^\s*<\/trn:split>/) {$inSplit = 0; next}
 +
    if ($inSplit && m/<split:id type="guid">/) {
 +
        if (m/<split:id type="guid">([0-9a-zA-z]+)<\/split:id>/) {
 +
            # Add splits to the current transaction split hash
 +
            $curTransaction->{splits}->{$1} = $curSplit;
 +
            $splits{$1} = $curSplit; # Add to global split list
 +
            $curSplit->{id} = $1;
 +
        }
 +
        else {die "Unexpected split:id format\n  "}
 +
    }
 +
    if ($inSplit && m/<split:account type="guid">/) {
 +
        if (m/<split:account type="guid">([0-9a-zA-z]+)<\/split:account>/) {
 +
            $curSplit->{accountId} = $1;
 +
            $accounts{$1}->{origXactCnt}++; # Increment the account transactions
 +
        }
 +
        else {die "Unexpected split:account format\n  "}
 +
    }
 +
    if ($inSplit && m/<slot:key>sched-xaction<\/slot:key>/) {
 +
        $curSplit->{template} = 1;
 +
    }
 +
    if ($inSplit && m/<split:reconciled-state>/) {
 +
        if (m/<split:reconciled-state>([ncy])<\/split:reconciled-state>/) {
 +
            $curSplit->{reconState} = $1;
 +
        }
 +
        else {die "Unexpected split:reconciled-state format\n  "}
 +
    }
 +
    if ($inSplit && m/<split:quantity>/) {
 +
        if (m/<split:quantity>([-0-9]+)\/([0-9]+)<\/split:quantity>/) {
 +
            $curSplit->{amountNum} = $1;
 +
            $curSplit->{amountDen} = $2;
 +
        }
 +
        else {die "Unexpected split:quantity format\n  "}
 +
    }
 +
 
 +
} # End of first read through the GnuCash file
 +
close(IF);
 +
#
 +
# ----------------------------------------------------------------------
 +
#
 +
 
 +
# Copy the account transaction counts from old to new, count the
 +
# number of child accounts
 +
foreach $curAccount (values(%accounts)) {
 +
    $curAccount->{newXactCnt} = $curAccount->{origXactCnt};
 +
    my $parent = $curAccount->{parent};
 +
    $accounts{$parent}->{children}++ if $parent ne "-NONE-";
 +
}
 +
# Find the root account to use as parent for the generated account for the
 +
# balancing transactions
 +
foreach $curAccount (values(%accounts)) {
 +
    if (($curAccount->{type} eq "ROOT") && ($curAccount->{name} ne "-NONE-")) {
 +
        $rootAccount = $curAccount;
 +
        print "Creating equity account for balancing transactions under $curAccount->{name}\n";
 +
        last;
 +
    }
 +
}
 +
 
 +
# Mark deletable transactions
 +
$i = 0; # count deleted transactions
 +
foreach $curTransaction (values(%transactions)) {
 +
    if (&xactIsDeletable($curTransaction)) {
 +
        push @guids, $curTransaction->{id};
 +
        foreach $split (values($curTransaction->{splits})) {
 +
            push @guids, $split->{id};
 +
        }
 +
        $i++;
 +
    }
 +
}
 +
print "Marked $i (full) transactions for delete\n";
 +
print "Recovered " . scalar(@guids) . " GUIDs for reuse\n";
 +
 
 +
# Calculate new opening balances
 +
foreach $curTransaction (values(%transactions)) {
 +
    if ($curTransaction->{toDelete}) {
 +
        foreach $split (values($curTransaction->{splits})) {
 +
            my ($aid, $rs, $n, $d) = ($split->{accountId}, $split->{reconState}, $split->{amountNum}, $split->{amountDen});
 +
            if (($n != 0) && ($aid ne "-NONE-")) {
 +
                my $a = $accounts{$aid};
 +
                die "Deletable transaction has different fractional denominator\n  " if $d != $a->{commoditySCU};
 +
                $a->{delBalances}->{$rs} += $n;
 +
            }
 +
        }
 +
    }
 +
}
 +
 
 +
# Dump out some account data
 +
open(OF, ">${gcFileName}_accounts.csv") || die "Output error";
 +
foreach $i (keys(%accounts)) {
 +
    $curAccount = $accounts{$i};
 +
    print OF "$curAccount->{id}\t$curAccount->{type}\t$curAccount->{name}";
 +
    print OF "\t$curAccount->{delBalances}->{n}\t$curAccount->{delBalances}->{c}\t$curAccount->{delBalances}->{y}";
 +
    print OF "\t$curAccount->{children}";
 +
    print OF "\t$curAccount->{origXactCnt}\t$curAccount->{newXactCnt}";
 +
    print OF "\t$curAccount->{description}\n";
 +
}
 +
close(OF);
 +
 
 +
# Dump out some split information
 +
open(OF, ">${gcFileName}_splits.csv") || die "Output error";
 +
foreach $curSplit (values(%splits)) {
 +
    print OF "$curSplit->{id}\t$curSplit->{accountId}\t$curSplit->{transaction}->{date}\n";
 +
}
 +
close(OF);
 +
#
 +
# ----------------------------------------------------------------------
 +
# Rename the data file to create a backup, and then open and read it,
 +
# this time skipping transactions that are marked for deletion, and
 +
# adding the replacement balancing account and transactions.
 +
#
 +
rename($gcFileName,"${gcFileName}.original") || die "Can't rename $gcFileName to create backup\n  ";
 +
open(IF, "${gcFileName}.original") || die "Can't open ${gcFileName}.original for input\n  ";
 +
open(OF, "> ${gcFileName}") || die "Can't open $gcFileName for output";
 +
open(AF, "> ${archiveFileName}") || die "Can't open $archiveFileName for output";
 +
$addedBalancing = 0;
 +
$buffering = 0;
 +
$inTransaction = 0;
 +
$inScheduled = 0;
 +
$splitCount = 0;
 +
$toDelete = 0;
 +
$buffer = "";
 +
while (<IF>) {
 +
    chomp;
 +
 
 +
    # Copy everything to the archive file except the scheduled transactions
 +
    $inScheduled = 1 if m/^\s*<gnc:schedxaction/;
 +
    print AF "$_\n" unless $inScheduled;
 +
    $inScheduled = 0 if m/^\s*<\/gnc:schedxaction/;
 +
 
 +
    # Switch to buffering mode at start of each transaction ...
 +
    if (m/^\s*<gnc:transaction/) {
 +
        $buffering = 1;
 +
        $inTransaction = 1;
 +
        $splitCount = 0;
 +
        $inSplit = 0;
 +
        $toDelete = 0;
 +
        # Insert all the balancing entries before the first transaction
 +
        unless ($addedBalancing) {
 +
            print OF genBalanceAccount($rootAccount);
 +
            foreach $account (values(%accounts)) {
 +
                next if $account->{type} =~ m/^(INCOME|EXPENSE)$/;
 +
                print OF genBalancingXact($account, "n") if $account->{delBalances}->{n};
 +
                print OF genBalancingXact($account, "c") if $account->{delBalances}->{c};
 +
                print OF genBalancingXact($account, "y") if $account->{delBalances}->{y};
 +
            }
 +
            $addedBalancing = 1;
 +
        }
 +
    }
 +
    # ... otherwise just copy from input to output
 +
    unless ($buffering) {
 +
        print OF "$_\n";
 +
        next;
 +
    }
 +
    # Buffering mode, append current line to buffer
 +
    $buffer = $buffer."$_\n";
 +
 
 +
    if ($inTransaction && m/<trn:id type="guid">/) {
 +
        if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) {
 +
            $toDelete = $transactions{$1}->{toDelete};
 +
        }
 +
        else {die "Unexpected trn:id format\n  "}
 +
    }
 +
    # Process the split lines, if not skipping transaction --
 +
    if (m/^\s*<trn:split>/) {
 +
        $inSplit = 1;
 +
        next;
 +
    }
 +
    if ($inSplit && m/^\s*<\/trn:split>/) {
 +
        $inSplit = 0;
 +
        $splitCount++;
 +
        next;
 +
    }
 +
    if ($inTransaction && m/^\s*<\/gnc:transaction/) {
 +
        $buffering = 0;
 +
        $inTransaction = 0;
 +
        print OF $buffer unless $toDelete || ($splitCount < 2);
 +
        $buffer = "";
 +
        next;
 +
    }
 +
} # end of filtering pass through input file
 +
close(IF);
 +
close(OF);
 +
exit;
 +
#
 +
#
 +
# ----------------------------------------------------------------------
 +
# ----------------------------------------------------------------------
 +
# SUBROUTINES
 +
 
 +
# -- Mark deletable splits --
 +
# Returns non-zero if the split gets marked for delete
 +
# Arguments: split pointer, transaction date
 +
sub splitIsDeletable {
 +
    my ($inSplit, $xctDate) = @_;
 +
    my ($splAcnt) = $accounts{$inSplit->{accountId}};
 +
    my ($splAcntType) = $splAcnt->{type};
 +
    my ($rslt) = 0;
 +
    # Work only on splits not marked for delete
 +
    unless ($inSplit->{toDelete} || $inSplit->{template}) {
 +
        if ($xctDate <= $delDate) {
 +
            if ($splAcntType =~ /^(EQUITY|ASSET|LIABILITY|BANK|CREDIT|CASH|STOCK|MUTUAL|INCOME|EXPENSE)$/) {
 +
                $splAcnt->{newXactCnt}--;
 +
                $rslt = 1;
 +
                $inSplit->{toDelete} = 1;
 +
            }
 +
        }
 +
    }
 +
    #print "DEBUG: $inSplit->{id}, $splAcntType, $xctDate, $rslt\n";
 +
    return($rslt);
 +
}
 +
 
 +
# -- Mark deletable transactions --
 +
sub xactIsDeletable {
 +
    my ($inXact) = @_;
 +
    my ($rslt) = 1; # Start assuming all splits get deleted
 +
    my ($xactDate) = $inXact->{date};
 +
    my ($xSplit, $xSplits, $i);
 +
    $xSplits = $inXact->{splits};
 +
    foreach $xSplit (values(%$xSplits)) {
 +
        $i = &splitIsDeletable($xSplit, $xactDate);
 +
        $rslt = $rslt && $i;
 +
    }
 +
    if ($rslt) {$inXact->{toDelete} = 1}
 +
    return($rslt);
 +
}
 +
 
 +
# -- Create equity account for generated opening balances
 +
sub genBalanceAccount {
 +
    my ($ra) = @_;
 +
    $equityAccountId = pop(@guids);
 +
    $equityActCommodity = "-NONE-";
 +
    my $nn = 0;
 +
    while (($c,$n) = each(%commodities)) {
 +
        next if $c eq "template";
 +
        if ($n > $nn) { $equityActCommodity = $c; $nn = $n; }
 +
    }
 +
    my $xml = "";
 +
    $xml .= "<gnc:account version=\"2.0.0\">\n";
 +
    $xml .= "  <act:name>Generated Balancing Transactions</act:name>\n";
 +
    $xml .= "  <act:id type=\"guid\">$equityAccountId</act:id>\n";
 +
    $xml .= "  <act:type>EQUITY</act:type>\n";
 +
    $xml .= "  <act:commodity>\n";
 +
    $xml .= "    <cmdty:space>ISO4217</cmdty:space>\n";
 +
    $xml .= "    <cmdty:id>$equityActCommodity</cmdty:id>\n";
 +
    $xml .= "  </act:commodity>\n";
 +
    $xml .= "  <act:code></act:code>\n";
 +
    $xml .= "  <act:description>Generated Balancing Transactions</act:description>\n";
 +
    $xml .= "  <act:parent type=\"guid\">$rootAccount->{id}</act:parent>\n";
 +
    $xml .= "</gnc:account>\n";
 +
    return ($xml);
 +
}
 +
 
 +
# -- Create balancing transaction XML --
 +
sub genBalancingXact {
 +
    my ($account,$rs) = @_;
 +
    return ("") if $account->{type} eq "EQUITY";
 +
    my ($n,$c,$d) = ($account->{delBalances}->{$rs}, $account->{commodityID}, $account->{commoditySCU});
 +
    my $e = 0 - $n;
 +
    my $desc = "Opening balance";
 +
    $desc .= " (needs valuation)" if $c ne $equityActCommodity;
 +
    my $xml = "";
 +
    $xml .= "<gnc:transaction version=\"2.0.0\">\n";
 +
    $xml .= "  <trn:id type=\"guid\">" . pop(@guids) . "</trn:id>\n";
 +
    $xml .= "  <trn:date-posted>\n";
 +
    $xml .= "    <ts:date>$postedDate</ts:date>\n";
 +
    $xml .= "  </trn:date-posted>\n";
 +
    $xml .= "  <trn:date-entered>\n";
 +
    $xml .= "    <ts:date>$enteredDate</ts:date>\n";
 +
    $xml .= "  </trn:date-entered>\n";
 +
    $xml .= "  <trn:description>$desc</trn:description>\n";
 +
    $xml .= "  <trn:splits>\n";
 +
    $xml .= "    <trn:split>\n";
 +
    $xml .= "      <split:id type=\"guid\">" . pop(@guids) . "</split:id>\n";
 +
    $xml .= "      <split:reconciled-state>$rs</split:reconciled-state>\n";
 +
    $xml .= "      <split:value>$n/$d</split:value>\n";
 +
    $xml .= "      <split:quantity>$n/$d</split:quantity>\n";
 +
    $xml .= "      <split:account type=\"guid\">$account->{id}</split:account>\n";
 +
    $xml .= "    </trn:split>\n";
 +
    $xml .= "    <trn:split>\n";
 +
    $xml .= "      <split:id type=\"guid\">" . pop(@guids) . "</split:id>\n";
 +
    $xml .= "      <split:reconciled-state>n</split:reconciled-state>\n";
 +
    $xml .= "      <split:value>$e/$d</split:value>\n";
 +
    $xml .= "      <split:quantity>$e/$d</split:quantity>\n";
 +
    $xml .= "      <split:account type=\"guid\">$equityAccountId</split:account>\n";
 +
    $xml .= "    </trn:split>\n";
 +
    $xml .= "  </trn:splits>\n";
 +
    $xml .= "</gnc:transaction>\n";
 +
    return ($xml);
 +
}
 +
</nowiki>

Revision as of 20:00, 29 June 2019

#!/usr/bin/perl -w # Remove older transactions from the GnuCash XML to create a more # manageable dataset. Creates new opening balance transactions for # asset/liability accounts so that the latest and reconciled balances # are maintained. # Arguments are file name and date (remove through this date) # After running, account balances will need to be adjusted. # Two passes needed, first to figure out what to remove. The second # to filter out the old transactions. # Large chunks of this script were copied from gccull.pl from the # GnuCash wiki, posted by Doug Pastorello. use POSIX; # Some syntax flags my ($inAccount, $inTransaction, $curAccount, $curTransaction); my ($inSplit, $curSplit); # Hash of accounts. Key is act:id, value is account object (ref) my (%accounts); # Hash of transactions. Key is trn:id, value is transaction object (ref) my (%transactions); # Hash of splits. Key is split:id, value is split object (ref) my (%splits); # Hash of ISO4217 commodities. Key is ID, value is number of uses. my (%commodities); # Reference to root account object my ($rootAccount); # Equity account id used for generating balancing transactions my ($equityAccountId, $equityActCommodity); # List of deleted transaction GUIDs, available for reuse in balancing # transactions my (@guids); # Misc global vars my ($i, $buffer, $skip); my (%actypes); # Command line arguments my ($gcFileName) = $ARGV[0]; # Name of the GnuCash XML file to process my ($delDate) = $ARGV[1]; # Date to delete through my ($archiveFileName, $postedDate, $enteredDate); if ($delDate =~ m/^(\d\d\d\d)(\d\d)(\d\d)$/) { $postedDate = "$1-$2-$3 00:00:00 +0000"; } else {die "Bad date format, required as YYYYMMDD\n "} ($archiveFileName = $gcFileName) =~ s/^(.+)(\.\w+)$/$1-archive-$delDate$2/; die "Strange filename, does not appear to have an extension\n " if $archiveFileName eq $gcFileName; $enteredDate = strftime("%Y-%m-%d %H:%M:%S +0000", gmtime($^T)); # Initial values $inAccount = 0; $inTransaction = 0; $inSplit = 0; # First pass read the file open(IF, $gcFileName) || die "Can't open $gcFileName\n "; while (<IF>) { chomp; # -- Process the account lines -- if (m/^\s*<gnc:account/) { $inAccount = 1; $curAccount = {}; # New hash for new account $curAccount->{id} = "-NONE-"; # GC account ID code $curAccount->{origXactCnt} = 0; # Number of transactions before cull $curAccount->{newXactCnt} = 0; # Number of transactions after cull $curAccount->{type} = "-NONE-"; $curAccount->{name} = "-NONE-"; $curAccount->{description} = "-NONE-"; $curAccount->{parent} = "-NONE-"; $curAccount->{children} = 0; $curAccount->{delBalances} = { "n" => 0, "c" => 0, "y" => 0 }; $curAccount->{commodityID} = "-NONE-"; $curAccount->{commoditySCU} = 1; next; } if (m/^\s*<\/gnc:account>/) {$inAccount = 0; next} if ($inAccount && m/<act:id type="guid">/) { if (m/<act:id type="guid">([0-9a-zA-z]+)<\/act:id>/) { $accounts{$1} = $curAccount; $curAccount->{id} = $1; } else {die "Unexpected act:id format\n "} } if ($inAccount && m/<act:type>/) { if (m/<act:type>(.+)<\/act:type>/) { $curAccount->{type} = $1; $actypes{$1} = 1; } else {die "Unexpected act:type format\n "} } if ($inAccount && m/<act:name>/) { if (m/<act:name>(.+)<\/act:name>/) { $curAccount->{name} = $1; } else {die "Unexpected act:name format\n "} } if ($inAccount && m/<act:description>/) { if (m/<act:description>(.+)<\/act:description>/) { $curAccount->{description} = $1; } else {die "Unexpected act:description format\n "} } if ($inAccount && m/<act:parent type=/) { if (m/<act:parent type="guid">(.+)<\/act:parent>/) { $curAccount->{parent} = $1; } else {die "Unexpected act:parent format\n "} } if ($inAccount && m/<act:commodity-scu>/) { if (m/<act:commodity-scu>(.+)<\/act:commodity-scu>/) { $curAccount->{commoditySCU} = $1; } else {die "Unexpected act:commodity-scu format\n "} } if ($inAccount && m/<cmdty:id>/) { if (m/<cmdty:id>(.+)<\/cmdty:id>/) { $curAccount->{commodityID} = $1; $commodities{$1} = 0 unless exists($commodities{$1}); $commodities{$1} += 1; } else {die "Unexpected cmdty:id format\n "} } # -- Process the transaction lines -- if (m/^\s*<gnc:transaction/) { $inTransaction = 1; $curTransaction = {}; # New hash for new transaction $curTransaction->{id} = "-NONE-"; $curTransaction->{splits} = {}; # Hash of split entries $curTransaction->{toDelete} = 0; # Flag for deletion $curTransaction->{date} = 0; # Date transaction is posted next; } if (m/^\s*<\/gnc:transaction>/) {$inTransaction = 0; next} if ($inTransaction && m/<trn:id type="guid">/) { if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) { $transactions{$1} = $curTransaction; $curTransaction->{id} = $1; } else {die "Unexpected trn:id format\n "} } if ($inTransaction && m/<trn:date-posted>/) { # If the date is not on this line, read the next if (! m/<\/ts:date>/) { $_ = <IF>; chomp} if (m/<ts:date>([0-9][0-9][0-9][0-9])\-([0-9][0-9])\-([0-9][0-9]).+<\/ts:date>/) { $curTransaction->{date} = $1 . $2 . $3; } else {die "Unexpected ts:date format\n "} } # -- Process the split lines -- if (m/^\s*<trn:split>/) { $inSplit = 1; $curSplit = {}; # New hash for new split $curSplit->{id} = "-NONE-"; $curSplit->{transaction} = $curTransaction; $curSplit->{template} = 0; $curSplit->{toDelete} = 0; # Flag for deletion $curSplit->{accountId} = "-NONE-"; $curSplit->{amountNum} = 0; $curSplit->{amountDen} = 1; $curSplit->{reconState} = "n"; next; } if (m/^\s*<\/trn:split>/) {$inSplit = 0; next} if ($inSplit && m/<split:id type="guid">/) { if (m/<split:id type="guid">([0-9a-zA-z]+)<\/split:id>/) { # Add splits to the current transaction split hash $curTransaction->{splits}->{$1} = $curSplit; $splits{$1} = $curSplit; # Add to global split list $curSplit->{id} = $1; } else {die "Unexpected split:id format\n "} } if ($inSplit && m/<split:account type="guid">/) { if (m/<split:account type="guid">([0-9a-zA-z]+)<\/split:account>/) { $curSplit->{accountId} = $1; $accounts{$1}->{origXactCnt}++; # Increment the account transactions } else {die "Unexpected split:account format\n "} } if ($inSplit && m/<slot:key>sched-xaction<\/slot:key>/) { $curSplit->{template} = 1; } if ($inSplit && m/<split:reconciled-state>/) { if (m/<split:reconciled-state>([ncy])<\/split:reconciled-state>/) { $curSplit->{reconState} = $1; } else {die "Unexpected split:reconciled-state format\n "} } if ($inSplit && m/<split:quantity>/) { if (m/<split:quantity>([-0-9]+)\/([0-9]+)<\/split:quantity>/) { $curSplit->{amountNum} = $1; $curSplit->{amountDen} = $2; } else {die "Unexpected split:quantity format\n "} } } # End of first read through the GnuCash file close(IF); # # ---------------------------------------------------------------------- # # Copy the account transaction counts from old to new, count the # number of child accounts foreach $curAccount (values(%accounts)) { $curAccount->{newXactCnt} = $curAccount->{origXactCnt}; my $parent = $curAccount->{parent}; $accounts{$parent}->{children}++ if $parent ne "-NONE-"; } # Find the root account to use as parent for the generated account for the # balancing transactions foreach $curAccount (values(%accounts)) { if (($curAccount->{type} eq "ROOT") && ($curAccount->{name} ne "-NONE-")) { $rootAccount = $curAccount; print "Creating equity account for balancing transactions under $curAccount->{name}\n"; last; } } # Mark deletable transactions $i = 0; # count deleted transactions foreach $curTransaction (values(%transactions)) { if (&xactIsDeletable($curTransaction)) { push @guids, $curTransaction->{id}; foreach $split (values($curTransaction->{splits})) { push @guids, $split->{id}; } $i++; } } print "Marked $i (full) transactions for delete\n"; print "Recovered " . scalar(@guids) . " GUIDs for reuse\n"; # Calculate new opening balances foreach $curTransaction (values(%transactions)) { if ($curTransaction->{toDelete}) { foreach $split (values($curTransaction->{splits})) { my ($aid, $rs, $n, $d) = ($split->{accountId}, $split->{reconState}, $split->{amountNum}, $split->{amountDen}); if (($n != 0) && ($aid ne "-NONE-")) { my $a = $accounts{$aid}; die "Deletable transaction has different fractional denominator\n " if $d != $a->{commoditySCU}; $a->{delBalances}->{$rs} += $n; } } } } # Dump out some account data open(OF, ">${gcFileName}_accounts.csv") || die "Output error"; foreach $i (keys(%accounts)) { $curAccount = $accounts{$i}; print OF "$curAccount->{id}\t$curAccount->{type}\t$curAccount->{name}"; print OF "\t$curAccount->{delBalances}->{n}\t$curAccount->{delBalances}->{c}\t$curAccount->{delBalances}->{y}"; print OF "\t$curAccount->{children}"; print OF "\t$curAccount->{origXactCnt}\t$curAccount->{newXactCnt}"; print OF "\t$curAccount->{description}\n"; } close(OF); # Dump out some split information open(OF, ">${gcFileName}_splits.csv") || die "Output error"; foreach $curSplit (values(%splits)) { print OF "$curSplit->{id}\t$curSplit->{accountId}\t$curSplit->{transaction}->{date}\n"; } close(OF); # # ---------------------------------------------------------------------- # Rename the data file to create a backup, and then open and read it, # this time skipping transactions that are marked for deletion, and # adding the replacement balancing account and transactions. # rename($gcFileName,"${gcFileName}.original") || die "Can't rename $gcFileName to create backup\n "; open(IF, "${gcFileName}.original") || die "Can't open ${gcFileName}.original for input\n "; open(OF, "> ${gcFileName}") || die "Can't open $gcFileName for output"; open(AF, "> ${archiveFileName}") || die "Can't open $archiveFileName for output"; $addedBalancing = 0; $buffering = 0; $inTransaction = 0; $inScheduled = 0; $splitCount = 0; $toDelete = 0; $buffer = ""; while (<IF>) { chomp; # Copy everything to the archive file except the scheduled transactions $inScheduled = 1 if m/^\s*<gnc:schedxaction/; print AF "$_\n" unless $inScheduled; $inScheduled = 0 if m/^\s*<\/gnc:schedxaction/; # Switch to buffering mode at start of each transaction ... if (m/^\s*<gnc:transaction/) { $buffering = 1; $inTransaction = 1; $splitCount = 0; $inSplit = 0; $toDelete = 0; # Insert all the balancing entries before the first transaction unless ($addedBalancing) { print OF genBalanceAccount($rootAccount); foreach $account (values(%accounts)) { next if $account->{type} =~ m/^(INCOME|EXPENSE)$/; print OF genBalancingXact($account, "n") if $account->{delBalances}->{n}; print OF genBalancingXact($account, "c") if $account->{delBalances}->{c}; print OF genBalancingXact($account, "y") if $account->{delBalances}->{y}; } $addedBalancing = 1; } } # ... otherwise just copy from input to output unless ($buffering) { print OF "$_\n"; next; } # Buffering mode, append current line to buffer $buffer = $buffer."$_\n"; if ($inTransaction && m/<trn:id type="guid">/) { if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) { $toDelete = $transactions{$1}->{toDelete}; } else {die "Unexpected trn:id format\n "} } # Process the split lines, if not skipping transaction -- if (m/^\s*<trn:split>/) { $inSplit = 1; next; } if ($inSplit && m/^\s*<\/trn:split>/) { $inSplit = 0; $splitCount++; next; } if ($inTransaction && m/^\s*<\/gnc:transaction/) { $buffering = 0; $inTransaction = 0; print OF $buffer unless $toDelete || ($splitCount < 2); $buffer = ""; next; } } # end of filtering pass through input file close(IF); close(OF); exit; # # # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- # SUBROUTINES # -- Mark deletable splits -- # Returns non-zero if the split gets marked for delete # Arguments: split pointer, transaction date sub splitIsDeletable { my ($inSplit, $xctDate) = @_; my ($splAcnt) = $accounts{$inSplit->{accountId}}; my ($splAcntType) = $splAcnt->{type}; my ($rslt) = 0; # Work only on splits not marked for delete unless ($inSplit->{toDelete} || $inSplit->{template}) { if ($xctDate <= $delDate) { if ($splAcntType =~ /^(EQUITY|ASSET|LIABILITY|BANK|CREDIT|CASH|STOCK|MUTUAL|INCOME|EXPENSE)$/) { $splAcnt->{newXactCnt}--; $rslt = 1; $inSplit->{toDelete} = 1; } } } #print "DEBUG: $inSplit->{id}, $splAcntType, $xctDate, $rslt\n"; return($rslt); } # -- Mark deletable transactions -- sub xactIsDeletable { my ($inXact) = @_; my ($rslt) = 1; # Start assuming all splits get deleted my ($xactDate) = $inXact->{date}; my ($xSplit, $xSplits, $i); $xSplits = $inXact->{splits}; foreach $xSplit (values(%$xSplits)) { $i = &splitIsDeletable($xSplit, $xactDate); $rslt = $rslt && $i; } if ($rslt) {$inXact->{toDelete} = 1} return($rslt); } # -- Create equity account for generated opening balances sub genBalanceAccount { my ($ra) = @_; $equityAccountId = pop(@guids); $equityActCommodity = "-NONE-"; my $nn = 0; while (($c,$n) = each(%commodities)) { next if $c eq "template"; if ($n > $nn) { $equityActCommodity = $c; $nn = $n; } } my $xml = ""; $xml .= "<gnc:account version=\"2.0.0\">\n"; $xml .= " <act:name>Generated Balancing Transactions</act:name>\n"; $xml .= " <act:id type=\"guid\">$equityAccountId</act:id>\n"; $xml .= " <act:type>EQUITY</act:type>\n"; $xml .= " <act:commodity>\n"; $xml .= " <cmdty:space>ISO4217</cmdty:space>\n"; $xml .= " <cmdty:id>$equityActCommodity</cmdty:id>\n"; $xml .= " </act:commodity>\n"; $xml .= " <act:code></act:code>\n"; $xml .= " <act:description>Generated Balancing Transactions</act:description>\n"; $xml .= " <act:parent type=\"guid\">$rootAccount->{id}</act:parent>\n"; $xml .= "</gnc:account>\n"; return ($xml); } # -- Create balancing transaction XML -- sub genBalancingXact { my ($account,$rs) = @_; return ("") if $account->{type} eq "EQUITY"; my ($n,$c,$d) = ($account->{delBalances}->{$rs}, $account->{commodityID}, $account->{commoditySCU}); my $e = 0 - $n; my $desc = "Opening balance"; $desc .= " (needs valuation)" if $c ne $equityActCommodity; my $xml = ""; $xml .= "<gnc:transaction version=\"2.0.0\">\n"; $xml .= " <trn:id type=\"guid\">" . pop(@guids) . "</trn:id>\n"; $xml .= " <trn:date-posted>\n"; $xml .= " <ts:date>$postedDate</ts:date>\n"; $xml .= " </trn:date-posted>\n"; $xml .= " <trn:date-entered>\n"; $xml .= " <ts:date>$enteredDate</ts:date>\n"; $xml .= " </trn:date-entered>\n"; $xml .= " <trn:description>$desc</trn:description>\n"; $xml .= " <trn:splits>\n"; $xml .= " <trn:split>\n"; $xml .= " <split:id type=\"guid\">" . pop(@guids) . "</split:id>\n"; $xml .= " <split:reconciled-state>$rs</split:reconciled-state>\n"; $xml .= " <split:value>$n/$d</split:value>\n"; $xml .= " <split:quantity>$n/$d</split:quantity>\n"; $xml .= " <split:account type=\"guid\">$account->{id}</split:account>\n"; $xml .= " </trn:split>\n"; $xml .= " <trn:split>\n"; $xml .= " <split:id type=\"guid\">" . pop(@guids) . "</split:id>\n"; $xml .= " <split:reconciled-state>n</split:reconciled-state>\n"; $xml .= " <split:value>$e/$d</split:value>\n"; $xml .= " <split:quantity>$e/$d</split:quantity>\n"; $xml .= " <split:account type=\"guid\">$equityAccountId</split:account>\n"; $xml .= " </trn:split>\n"; $xml .= " </trn:splits>\n"; $xml .= "</gnc:transaction>\n"; return ($xml); }