Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Gerwitz <mike.gerwitz@rtspecialty.com>2018-10-03 14:44:55 -0400
committerMike Gerwitz <mike.gerwitz@rtspecialty.com>2018-10-03 14:44:55 -0400
commitb716e8c2cdb6d6d2a8bf922056e46627f8c951f0 (patch)
tree4df9e36a6f42c0c0ba1da7269fb3100882f7929e
parentd251f7a79ba854a73a15d344bdad1627d21153a4 (diff)
parent397710c055d26cc7d6a14b2cc804f427a8cf9c57 (diff)
downloadtame-b716e8c2cdb6d6d2a8bf922056e46627f8c951f0.tar.gz
tame-b716e8c2cdb6d6d2a8bf922056e46627f8c951f0.tar.bz2
tame-b716e8c2cdb6d6d2a8bf922056e46627f8c951f0.zip
csvm: Auto-sort expanded output
-rwxr-xr-xbuild-aux/csvm-expand195
-rwxr-xr-xbuild-aux/csvm2csv196
-rwxr-xr-xbuild-aux/test/test-csvm2csv66
3 files changed, 324 insertions, 133 deletions
diff --git a/build-aux/csvm-expand b/build-aux/csvm-expand
new file mode 100755
index 0000000..a231163
--- /dev/null
+++ b/build-aux/csvm-expand
@@ -0,0 +1,195 @@
+#!/usr/bin/awk -f
+#
+# Expands a "magic" CSV file into a normal CSV
+#
+# Copyright (C) 2016, 2018 R-T Specialty, LLC.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# "Magic" CSVs simply exist to make life easier: they permit comments, blank
+# lines, variables, sub-delimiter expansion, and any number of ranges per line.
+# Ranges will be expanded in every combination, making rate tables highly
+# maintainable.
+#
+# Variables are also supported when defined using :var=val. Variables may
+# expand into ranges, 'cause they're awesome. Multiple variables may be
+# delimited by semi-colons, as may multiple values.
+#
+# For example:
+# :foo=1--3
+# $foo;7;9--10:$foo, 5--10,1/1/2017
+#
+# Would generate:
+# 1, 5, 1483246800
+# 1, 6, 1483246800
+# ...
+# 5, 10, 1483246800
+# 2, 5, 1483246800
+# ...
+# 9, 5, 14832468005
+# ...
+# 1, 5, 1483246800
+# 1, 6, 1483246800
+# ...
+##
+
+
+# Expand variable with its value, if any
+function expand_vars( s, value )
+{
+ # attempt to parse variable (may expand into a range)
+ if ( match( s, /^\$([a-zA-Z_-]+)$/, m ) )
+ {
+ value = vars[ m[1] ];
+
+ if ( value == "" )
+ {
+ print "error: unknown variable reference: `$" m[1] "'" > "/dev/stderr"
+ exit 1
+ }
+
+ return value
+ }
+
+ return s
+}
+
+
+# Expand line
+function parseline( i, m, j, me, orig )
+{
+ if ( i > NF )
+ {
+ print
+ return
+ }
+
+ orig = $i
+
+ # expand variables before any processing so that expansions
+ # can include any type of formatting
+ $i = expand_vars( $i )
+
+ if ( match( $i, /^([0-9]+\/){2}[0-9]+$/, m ) )
+ {
+ cmd = "date --date=" $i " +%s"
+ cmd |& getline $i
+ close(cmd)
+ }
+
+ # check first for delimiters
+ if ( match( $i, /^([^;]+);(.*)$/, m ) )
+ {
+ # give it a shot with the first value
+ $i = m[1]
+ parseline( i )
+
+ # strip off the first value and process with following value(s)
+ $i = m[2]
+ parseline( i )
+
+ # we've delegated; we're done
+ $i = orig
+ return
+ }
+
+ # parse range
+ if ( match( $i, /^([^-]+)--([^-]+)$/, m ) )
+ {
+ j = expand_vars( m[1] )
+ me = expand_vars( m[2] )
+
+ if ( !match( j, /^[0-9]+$/ ) || !match( me, /^[0-9]+$/ ) )
+ {
+ print "error: invalid range: `" $i "'" > "/dev/stderr"
+ exit 1
+ }
+
+ do
+ {
+ $i = j
+ parseline( i + 1 )
+ } while ( j++ < me )
+ }
+ else
+ {
+ parseline( i + 1 );
+ }
+
+ # restore to original value
+ $i = orig
+}
+
+
+BEGIN {
+ # we're parsing CSVs
+ FS = " *, *"
+ OFS = ","
+
+ has_directives = 0
+ directives = "!(NODIRECTIVES)"
+}
+
+
+# skip all lines that begin with `#', which denotes a comment, or are empty
+/^#|^$/ { next; }
+
+# directives are echoed back and are intended for processing by
+# the parent csvm2csv script
+/^!/ && output_started {
+ print "error: directive must appear before header: `" $0 "'" > "/dev/stderr"
+ exit 1
+}
+/^!/ && has_directives {
+ print "error: all directives must be on one line: `" $0 "'" > "/dev/stderr"
+ exit 1
+}
+/^!/ {
+ has_directives = 1
+ directives = $0
+
+ next
+}
+
+# lines that begin with a colon are variable definitions
+/^:/ {
+ if ( !match( $0, /^:([a-zA-Z_-]+)=(.*?)$/, m ) )
+ {
+ print "error: invalid variable definition: `" $0 "'" > "/dev/stderr"
+ exit 1
+ }
+
+ vars[ m[1] ] = m[2]
+ next
+}
+
+# Always begin output with a line for directives, even if there are
+# none. This makes subsequent processing much easier, since we won't have
+# to conditionally ignore the top line.
+!output_started {
+ print directives
+
+ output_started = 1
+}
+
+# lines that need any sort of processing (ranges, dates, etc)
+/--|;|\$[a-zA-Z_-]|\// { parseline( 1 ); next; }
+
+# all other lines are normal; simply output them verbatim
+{
+ # this assignment will ensure that awk processes the output, ensuring that
+ # extra spaces between commas are stripped
+ $1=$1
+ print
+}
diff --git a/build-aux/csvm2csv b/build-aux/csvm2csv
index addbe26..76b7c46 100755
--- a/build-aux/csvm2csv
+++ b/build-aux/csvm2csv
@@ -1,8 +1,7 @@
-#!/usr/bin/awk -f
-#
+#!/bin/bash
# Compiles a "magic" CSV file into a normal CSV
#
-# Copyright (C) 2016, 2018 R-T Specialty, LLC.
+# Copyright (C) 2018 R-T Specialty, LLC.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -17,150 +16,95 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
-# "Magic" CSVs simply exist to make life easier: they permit comments, blank
-# lines, variables, sub-delimiter expansion, and any number of ranges per line.
-# Ranges will be expanded in every combination, making rate tables highly
-# maintainable.
-#
-# Variables are also supported when defined using :var=val. Variables may
-# expand into ranges, 'cause they're awesome. Multiple variables may be
-# delimited by semi-colons, as may multiple values.
+# For format of CSVMs, see `csvm-expand'.
#
-# For example:
-# :foo=1--3
-# $foo;7;9--10:$foo, 5--10,1/1/2017
-#
-# Would generate:
-# 1, 5, 1483246800
-# 1, 6, 1483246800
-# ...
-# 5, 10, 1483246800
-# 2, 5, 1483246800
-# ...
-# 9, 5, 14832468005
-# ...
-# 1, 5, 1483246800
-# 1, 6, 1483246800
-# ...
+# To disable sorting of CSVM output, use the `!NOSORT' directive before the
+# header line.
##
+# account for symlinks, since historically this script lives in a different
+# directory and has been symlinked for compatibility
+declare -r mypath=$( dirname "$( readlink -f "$0" )" )
+
-# Expand variable with its value, if any
-function expand_vars( s, value )
+# Generate -k arguments for GNU sort given a CSV header
+#
+# The generated arguments will be of the form -k1,1n ... -kl,ln, where `l'
+# is the total number of header entries.
+#
+# For example, given this header:
+# foo, bar, baz
+# the output would be:
+# -k1,1n -k2,2n -k3,3n
+sort-key-args()
{
- # attempt to parse variable (may expand into a range)
- if ( match( s, /^\$([a-zA-Z_-]+)$/, m ) )
- {
- value = vars[ m[1] ];
-
- if ( value == "" )
- {
- print "error: unknown variable reference: `$" m[1] "'" > "/dev/stderr"
- exit 1
- }
+ local -r header="${1?Missing CSV header}"
- return value
- }
+ local -i i=0
- return s
+ # generate -ki,in for each column (notice that a trailing
+ # comma is added to the header because of the read delimiter)
+ while read -d,; do
+ echo -n "-k$((++i)),${i}n "
+ done <<< "$header,"
}
-# Expand line
-function parseline( i, m, j, me, orig )
+# Sort every column of CSV
+#
+# The columns will all be sorted left-to-right. The header is left in place
+# as the first row.
+csv-sort()
{
- if ( i > NF )
- {
- print
- return
- }
-
- orig = $i
-
- # expand variables before any processing so that expansions
- # can include any type of formatting
- $i = expand_vars( $i )
-
- if ( match( $i, /^([0-9]+\/){2}[0-9]+$/, m ) )
- {
- cmd = "date --date=" $i " +%s"
- cmd |& getline $i
- close(cmd)
- }
-
- # check first for delimiters
- if ( match( $i, /^([^;]+);(.*)$/, m ) )
- {
- # give it a shot with the first value
- $i = m[1]
- parseline( i )
-
- # strip off the first value and process with following value(s)
- $i = m[2]
- parseline( i )
-
- # we've delegated; we're done
- $i = orig
- return
- }
-
- # parse range
- if ( match( $i, /^([^-]+)--([^-]+)$/, m ) )
- {
- j = expand_vars( m[1] )
- me = expand_vars( m[2] )
-
- if ( !match( j, /^[0-9]+$/ ) || !match( me, /^[0-9]+$/ ) )
- {
- print "error: invalid range: `" $i "'" > "/dev/stderr"
- exit 1
- }
+ # the first line of the expanded CSVM is the CSV header
+ local header; read -r header
+ local -r keys=$( sort-key-args "$header" )
- do
- {
- $i = j
- parseline( i + 1 )
- } while ( j++ < me )
- }
- else
- {
- parseline( i + 1 );
- }
-
- # restore to original value
- $i = orig
+ # all remaining input (which is now sans header) is sorted
+ echo "$header"
+ sort -t, $keys -
}
-BEGIN {
- # we're parsing CSVs
- FS = " *, *"
- OFS = ","
-}
+# Output usage information
+#
+# Kudos to you if you understand the little Easter egg.
+usage()
+{
+ cat <<EOU
+Usage: $0 [FILE]
+Expand CSVM represented by FILE or stdin into a CSV
+The columns of the expanded CSV will be automatically sorted
+left-to-right. To inhibit this behavior, use the \`!NOSORT'
+directive anywhere before the header line in the source CSVM.
-# skip all lines that begin with `#', which denotes a comment, or are empty
-/^#|^$/ { next; }
+Options:
+ --help Output usage information.
-# lines that begin with a colon are variable definitions
-/^:/ {
- if ( !match( $0, /^:([a-zA-Z_-]+)=(.*?)$/, m ) )
- {
- print "error: invalid variable definition: `" $0 "'" > "/dev/stderr"
- exit 1
- }
+This program has magic CSV powers.
+EOU
- vars[ m[1] ] = m[2]
- next
+ exit 64 # EX_USAGE
}
-# lines that need any sort of processing (ranges, dates, etc)
-/--|;|\$[a-zA-Z_-]|\// { parseline( 1 ); next; }
-# all other lines are normal; simply output them verbatim
+# Sort CSV rows left-to-right unless the `!NOSORT' directive is provided
+main()
{
- # this assignment will ensure that awk processes the output, ensuring that
- # extra spaces between commas are stripped
- $1=$1
- print
+ test ! "$1" == --help || usage
+
+ "$mypath/csvm-expand" "$@" \
+ | {
+ local directives; read -r directives
+
+ # ignore sorting if given NOSORT directive
+ if [[ "$directives" =~ NOSORT ]]; then
+ cat
+ else
+ csv-sort "$sort"
+ fi
+ }
}
+
+main "$@"
diff --git a/build-aux/test/test-csvm2csv b/build-aux/test/test-csvm2csv
index 14ef407..ea6b6db 100755
--- a/build-aux/test/test-csvm2csv
+++ b/build-aux/test/test-csvm2csv
@@ -38,7 +38,10 @@ run-test()
test $? -eq 0 || return 1
# expected output
- diff <( cat <<< "$expected" ) <( cat <<< "$given" )
+ diff <( cat <<< "$expected" ) <( cat <<< "$given" ) || {
+ echo "test $testsum failure" >&2
+ return 1
+ }
}
@@ -92,11 +95,11 @@ test-delim()
declare -r expected='header,line
1,2
+3,6
+3,9
4,2
4,6
-4,9
-3,6
-3,9'
+4,9'
run-test "$input" "$expected"
}
@@ -179,11 +182,12 @@ test-var-with-var()
:baz=$range;$foo
$baz, 5'
+ # note that the output is sorted
declare -r expected='header,line
2,5
+2,5
3,5
-4,5
-2,5'
+4,5'
run-test "$input" "$expected"
}
@@ -203,6 +207,51 @@ $foo'
}
+test-directive-stripped()
+{
+ declare -r input='!DIRECTIVE
+header, line'
+
+ declare -r expected='header,line'
+
+ run-test "$input" "$expected"
+}
+
+
+test-no-sort()
+{
+ declare -r input='!NOSORT
+header, line
+1,1
+0,0'
+
+ declare -r expected='header,line
+1,1
+0,0'
+
+ run-test "$input" "$expected"
+}
+
+
+# all directives should be put on a single line
+test-fail-multi-directive()
+{
+ declare -r input='!DIRECTIVE1
+!DIRECTIVE2
+header, line'
+
+ ((testsum++))
+
+ local -r result=$(
+ ../csvm2csv 2>&1 <<< "$input" \
+ && echo '(test failure: expected failure)'
+ )
+
+ grep -q '!DIRECTIVE2' <<< "$result" \
+ || return 1
+}
+
+
test-fail-unknown-var-ref()
{
((testsum++))
@@ -254,6 +303,9 @@ test-comment \
&& test-var-with-range-delim \
&& test-var-with-var \
&& test-var-zero-ref \
+ && test-directive-stripped \
+ && test-no-sort \
+ && test-fail-multi-directive \
&& test-fail-unknown-var-ref \
&& test-fail-non-numeric-range \
&& test-fail-invalid-var-dfn \
@@ -263,7 +315,7 @@ test-comment \
}
# safety check
-test "$testsum" -eq 12 || {
+test "$testsum" -eq 15 || {
echo 'error: did not run all csvm2csv tests!' >&2
exit 1
}