diff options
Diffstat (limited to 'tools/csvm2csv')
-rwxr-xr-x | tools/csvm2csv | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/tools/csvm2csv b/tools/csvm2csv new file mode 100755 index 0000000..3bcbc7b --- /dev/null +++ b/tools/csvm2csv @@ -0,0 +1,128 @@ +#!/usr/bin/awk -f +# +# Compiles a "magic" CSV file into a normal CSV +# +# Copyright (C) 2016 LoVullo Associates, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# "Magic" CSVs simply exist to make life easier: they permit comments, blank +# lines, variables, sub-delimiter expansion, and any number of ranges per line. +# Ranges will be expanded in every combination, making rate tables highly +# maintainable. +# +# Variables are also supported when defined using :var=val. Variables may +# expand into ranges, 'cause they're awesome. Multiple variables may be +# delimited by semi-colons, as may multiple values. +# +# For example: +# :foo=1--3 +# $foo;7;9--10:$foo, 5--10 +# +# Would generate: +# 1, 5 +# 1, 6 +# ... +# 5, 10 +# 2, 5 +# ... +# 9, 5 +# ... +# 1, 5 +# 1, 6 +# ... +## + + +function rangeout( i, m, j, me, orig ) +{ + if ( i > NF ) + { + print + return + } + + orig = $i + + # check first for delimiters + if ( match( $i, /^([^;]+);(.*)$/, m ) ) + { + # give it a shot with the first value + $i = m[1] + rangeout( i ) + + # strip off the first value and process with following value(s) + $i = m[2] + rangeout( i ) + + # we've delegated; we're done + $i = orig + return + } + + # attempt to parse variable (may expand into a range) + if ( match( $i, /^\$([a-zA-Z_-]+)$/, m ) ) + { + $i = vars[ m[1] ]; + } + + # parse range + if ( match( $i, /^([0-9]+)--([0-9]+)$/, m ) ) + { + j = m[1] + me = m[2] + do + { + $i = j + rangeout( i + 1 ) + } while ( j++ < me ) + } + else + { + rangeout( i + 1 ); + } + + # restore to original value + $i = orig +} + + +BEGIN { + # we're parsing CSVs + FS = " *, *" + OFS = "," +} + + +# skip all lines that begin with `#', which denotes a comment, or are empty +/^#|^$/ { next; } + +# lines that begin with a colon are variable definitions +/^:/ { + match( $0, /^:([a-zA-Z_-]+)=(.*?)$/, m ) + vars[ m[1] ] = m[2] + next +} + +# lines containing ranges (denoted by `--', the en dash, which is a typesetting +# convetion for ranges), sub-delimiters, or variables must be expanded +/--|;|\$[a-zA-Z_-]/ { rangeout( 1 ); next; } + +# all other lines are normal; simply output them verbatim +{ + # this assignment will ensure that awk processes the output, ensuring that + # extra spaces between commas are stripped + $1=$1 + print +} |