Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Gerwitz <gerwitm@lovullo.com>2016-08-24 09:43:05 -0400
committerMike Gerwitz <gerwitm@lovullo.com>2016-08-24 12:38:00 -0400
commitff01f39c1e8c9b9549d884a0db1f9a74799cf37e (patch)
tree35978db88a8d385250b1b47ad05966e19516373d /src/current/tools/csvm2csv
parent6c0aa54bd1b7b49d736f0db3a8f48b7aa90b3b65 (diff)
downloadtame-ff01f39c1e8c9b9549d884a0db1f9a74799cf37e.tar.gz
tame-ff01f39c1e8c9b9549d884a0db1f9a74799cf37e.tar.bz2
tame-ff01f39c1e8c9b9549d884a0db1f9a74799cf37e.zip
Liberate current implementation of "Calc DSL"
(Copyright headers will be added in the next commit; these are the original files, unaltered in any way.) The internal project name at LoVullo is simply "Calc DSL". This liberates the entire thing. If anything was missed, I'll be added later. To continue building at LoVullo with this move, symlinks are used for the transition; this is the exact code that is used in production. There is a lot here---over 25,000 lines. Much of it is in disarray from the environment surrounding its development, but it does work well for what it was intended to do. (LoVullo folks: fork point is 65723a0 in calcdsl.git.)
Diffstat (limited to 'src/current/tools/csvm2csv')
-rwxr-xr-xsrc/current/tools/csvm2csv112
1 files changed, 112 insertions, 0 deletions
diff --git a/src/current/tools/csvm2csv b/src/current/tools/csvm2csv
new file mode 100755
index 0000000..410d9fa
--- /dev/null
+++ b/src/current/tools/csvm2csv
@@ -0,0 +1,112 @@
+#!/usr/bin/awk -f
+#
+# Compiles a "magic" CSV file into a normal CSV
+#
+# "Magic" CSVs simply exist to make life easier: they permit comments, blank
+# lines, variables, sub-delimiter expansion, and any number of ranges per line.
+# Ranges will be expanded in every combination, making rate tables highly
+# maintainable.
+#
+# Variables are also supported when defined using :var=val. Variables may expand
+# into ranges, 'cause they're awesome. Multiple variables may be delimited by
+# semi-colons, as may multiple values.
+#
+# For example:
+# :foo=1--3
+# $foo;7;9--10:$foo, 5--10
+#
+# Would generate:
+# 1, 5
+# 1, 6
+# ...
+# 5, 10
+# 2, 5
+# ...
+# 9, 5
+# ...
+# 1, 5
+# 1, 6
+# ...
+
+
+function rangeout( i, m, j, me, orig )
+{
+ if ( i > NF )
+ {
+ print
+ return
+ }
+
+ orig = $i
+
+ # check first for delimiters
+ if ( match( $i, /^([^;]+);(.*)$/, m ) )
+ {
+ # give it a shot with the first value
+ $i = m[1]
+ rangeout( i )
+
+ # strip off the first value and process with following value(s)
+ $i = m[2]
+ rangeout( i )
+
+ # we've delegated; we're done
+ $i = orig
+ return
+ }
+
+ # attempt to parse variable (may expand into a range)
+ if ( match( $i, /^\$([a-zA-Z_-]+)$/, m ) )
+ {
+ $i = vars[ m[1] ];
+ }
+
+ # parse range
+ if ( match( $i, /^([0-9]+)--([0-9]+)$/, m ) )
+ {
+ j = m[1]
+ me = m[2]
+ do
+ {
+ $i = j
+ rangeout( i + 1 )
+ } while ( j++ < me )
+ }
+ else
+ {
+ rangeout( i + 1 );
+ }
+
+ # restore to original value
+ $i = orig
+}
+
+
+BEGIN {
+ # we're parsing CSVs
+ FS = " *, *"
+ OFS = ","
+}
+
+
+# skip all lines that begin with `#', which denotes a comment, or are empty
+/^#|^$/ { next; }
+
+# lines that begin with a colon are variable definitions
+/^:/ {
+ match( $0, /^:([a-zA-Z_-]+)=(.*?)$/, m )
+ vars[ m[1] ] = m[2]
+ next
+}
+
+# lines containing ranges (denoted by `--', the en dash, which is a typesetting
+# convetion for ranges), sub-delimiters, or variables must be expanded
+/--|;|\$[a-zA-Z_-]/ { rangeout( 1 ); next; }
+
+# all other lines are normal; simply output them verbatim
+{
+ # this assignment will ensure that awk processes the output, ensuring that
+ # extra spaces between commas are stripped
+ $1=$1
+ print
+}