GcCull.pl
From GnuCash
#!/usr/bin/perl -w # Remove old transavtions from the GnuCash XML. # Arguments are file name and year (remove through this year) # After running, account balances will need to be adjusted. # Two passes needed, first to figure out what to remove. The second # to filter out the old transactions. # # Some syntax flags my ($inAccount, $inTransaction, $curAccount, $curTransaction); my ($inSplit, $curSplit); # Hash of accounts. Key is act:id, value is account object (hash pointer) my(%accounts); # Hash of transactions. Key is trn:id, value is transaction object (pointer) my(%transactions); my(%splits); # Misc global vars my ($i, $buffer, $skip); # Command line arguments my ($gcFileName) = $ARGV[0]; # Name of the GnuCash XML file to process my ($delYear) = $ARGV[1]; # Year to delete through # Initial values $inAccount = 0; $inTransaction = 0; $inSplit = 0; # First pass read the file open(IF, $gcFileName) || die "Can't open $gcFileName\n "; while (<IF>) { chomp; # -- Process the account lines -- if (m/^\s*<gnc:account/) { # Start of a new account $inAccount = 1; $curAccount = {}; # New hash for new account $curAccount->{id} = "-NONE-"; # GC account ID code $curAccount->{origXactCnt} = 0; # Number of transactions before cull $curAccount->{newXactCnt} = 0; # Number of transactions after cull $curAccount->{type} = "-NONE-"; $curAccount->{name} = "-NONE-"; $curAccount->{description} = "-NONE-"; next; } if (m/^\s*<\/gnc:account>/) {$inAccount = 0; next} if ($inAccount && m/<act:id type="guid">/) { if (m/<act:id type="guid">([0-9a-zA-z]+)<\/act:id>/) { $accounts{$1} = $curAccount; $curAccount->{id} = $1; } else {die "Unexpected act:id format\n "} } if ($inAccount && m/<act:type>/) { if (m/<act:type>(.+)<\/act:type>/) {$curAccount->{type} = $1} else {die "Unexpected act:type format\n "} } if ($inAccount && m/<act:name>/) { if (m/<act:name>(.+)<\/act:name>/) { $curAccount->{name} = $1} else {die "Unexpected act:name format\n "} } if ($inAccount && m/<act:description>/) { if (m/<act:description>(.+)<\/act:description>/) { $curAccount->{description} = $1} else {die "Unexpected act:description format\n "} } # -- Process the transaction lines -- if (m/^\s*<gnc:transaction/) { $inTransaction = 1; $curTransaction = {}; # New hash for new transaction $curTransaction->{id} = "-NONE-"; $curTransaction->{splits} = {}; # Hash of split entries $curTransaction->{toDelete} = 0; # Flag for deletion $curTransaction->{year} = 0; # Year transaction is posted next; } if (m/^\s*<\/gnc:transaction>/) {$inTransaction = 0; next} if ($inTransaction && m/<trn:id type="guid">/) { if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) { $transactions{$1} = $curTransaction; $curTransaction->{id} = $1; } else {die "Unexpected trn:id format\n "} } if ($inTransaction && m/<trn:date-posted>/) { # If the date is not on this line, read the next if (! m/<\/ts:date>/) { $_ = <IF>; chomp} if (m/<ts:date>([0-9][0-9][0-9][0-9])\-.+<\/ts:date>/) { $curTransaction->{year} = $1;} else {die "Unexpected ts:date format\n "} } # -- Process the split lines -- if (m/^\s*<trn:split>/) { $inSplit = 1; $curSplit = {}; # New hash for new split $curSplit->{id} = "-NONE-"; $curSplit->{transaction} = $curTransaction; $curSplit->{toDelete} = 0; # Flag for deletion $curSplit->{accountId} = "-NONE-"; next; } if (m/^\s*<\/trn:split>/) {$inSplit = 0; next} if ($inSplit && m/<split:id type="guid">/) { if (m/<split:id type="guid">([0-9a-zA-z]+)<\/split:id>/) { # Add splits to the current transaction split hash $curTransaction->{splits}->{$1} = $curSplit; $splits{$1} = $curSplit; # Add to global split list $curSplit->{id} = $1; } else {die "Unexpected split:id format\n "} } if ($inSplit && m/<split:account type="guid">/) { if (m/<split:account type="guid">([0-9a-zA-z]+)<\/split:account>/) { $curSplit->{accountId} = $1; $accounts{$1}->{origXactCnt}++; # Increment the account transactions } else {die "Unexpected split:account format\n "} } } # End of first read through the GnuCash file close(IF); # # ---------------------------------------------------------------------- # # Copy the account transaction counts from old to new foreach $curAccount (values(%accounts)) { $curAccount->{newXactCnt} = $curAccount->{origXactCnt}; } # Mark deletable transactions $i = 0; # count deleted transactions foreach $curTransaction (values(%transactions)) { if (&xactIsDeletable($curTransaction)) {$i++} } print "Marked $i (full) transactions for delete\n"; # Dump out some account data open(OF, ">${gcFileName}_accounts.csv") || die "Output error"; foreach $i (keys(%accounts)) { $curAccount = $accounts{$i}; print OF "$curAccount->{id}\t$curAccount->{type}\t$curAccount->{name}"; print OF "\t$curAccount->{origXactCnt}\t$curAccount->{newXactCnt}"; print OF "\t$curAccount->{description}\n"; } close(OF); # Dump out some split information open(OF, ">${gcFileName}_splits.csv") || die "Output error"; foreach $curSplit (values(%splits)) { print OF "$curSplit->{id}\t$curSplit->{accountId}\t$curSplit->{transaction}->{year}\n"; } close(OF); # # ---------------------------------------------------------------------- # Re-read the data file, this time skipping splits and transactions # that are marked for deletion. open(IF, $gcFileName) || die "Can't open $gcFileName\n "; open(OF, ">new_${gcFileName}") || die "Output error"; $skip = 0; $buffer = ""; while (<IF>) { chomp; # Append current line to buffer $buffer = $buffer."$_\n"; # -- Process the transaction lines -- if (m/^\s*<gnc:transaction/) { $inTransaction = 1; $skip = 1; $buffer = "$_\n"; next; } if (m/^\s*<\/gnc:transaction>/) { if ($skip == 0) {print OF "$_\n"} $inTransaction = 0; $skip = 0; next; } if ($inTransaction && m/<trn:id type="guid">/) { if (m/<trn:id type="guid">([0-9a-zA-z]+)<\/trn:id>/) { # If this transaction is not deleted, print the buffer. if ($transactions{$1}->{toDelete} == 0) { print OF "$buffer"; $skip = 0; } else {$buffer=""} # empty the buffer if not needed } else {die "Unexpected trn:id format\n "} next; } # -- Process the split lines, if not skiping transaction -- if (($skip == 0) && (m/^\s*<trn:split>/)) { $inSplit = 1; $skip = 2; $buffer = "$_\n"; next; } if ($inSplit && m/^\s*<\/trn:split>/) { if ($skip == 0) {print OF "$_\n"} $inSplit = 0; $skip = 0; next; } if ($inSplit && m/<split:id type="guid">/) { if (m/<split:id type="guid">([0-9a-zA-z]+)<\/split:id>/) { # If this split is not deleted, print the buffer if ($splits{$1}->{toDelete} == 0) { print OF "$buffer"; $skip = 0; } else {$buffer=""} # empty the buffer if not needed } else {die "Unexpected split:id format\n "} next; } # The default action is to copy the input to the output when not skipping if ($skip == 0) {print OF "$_\n"} } # end of filtering pass through input file close(IF); close(OF); exit; # # # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- # SUBROUTINES # -- Mark deletable splits -- # Returns non-zero id the split gets marked for delete # Arguments: split pointer, transaction year sub splitIsDeletable { my ($inSplit, $xctYear) = @_; my ($splAcnt) = $accounts{$inSplit->{accountId}}; my ($splAcntType) = $splAcnt->{type}; my ($rslt) = 0; # Work only on splits not marked for delete if ($inSplit->{toDelete} == 0) { if ($xctYear <= $delYear) { if (($splAcntType eq "BANK") || ($splAcntType eq "CASH") || ($splAcntType eq "CREDIT") || ($splAcntType eq "EXPENSE") || ($splAcntType eq "EQUITY") || ($splAcntType eq "LIABILITY") || ($splAcntType eq "INCOME")) { $splAcnt->{newXactCnt}--; $rslt = 1; $inSplit->{toDelete} = 1; } } } #print "DEBUG: $inSplit->{id}, $splAcntType, $xctYear, $rslt\n"; return($rslt); } # -- Mark deletable transactions -- sub xactIsDeletable { my ($inXact) = @_; my ($rslt) = 1; # Start assuming all splits get deleted my ($xactYear) = $inXact->{year}; my ($xSplit, $xSplits, $i); $xSplits = $inXact->{splits}; foreach $xSplit (values(%$xSplits)) { $i = &splitIsDeletable($xSplit, $xactYear); $rslt = $rslt && $i; } if ($rslt) {$inXact->{toDelete} = 1} return($rslt); }