Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Gerwitz <mike.gerwitz@rtspecialty.com>2018-01-26 11:50:05 -0500
committerMike Gerwitz <mike.gerwitz@rtspecialty.com>2018-09-11 09:30:52 -0400
commit98f9b6fadb5c86d4b058dc557dc1e4a8ad20b84f (patch)
tree3833932653b7a0d639d50ab52ae42bf6e470a7a9
parentcac9d22cb2673f0d86f05dd1f585a0299fbc3c65 (diff)
downloadtame-98f9b6fadb5c86d4b058dc557dc1e4a8ad20b84f.tar.gz
tame-98f9b6fadb5c86d4b058dc557dc1e4a8ad20b84f.tar.bz2
tame-98f9b6fadb5c86d4b058dc557dc1e4a8ad20b84f.zip
vector/table: Extract bisect functions into vector/filter
* vector/filter.xml (bisect, foremost, _mask-unless_): Add to package. * vector/table.xml (bisect, foremost, _mask-unless_): Remove from package.
-rw-r--r--core/vector/filter.xml278
-rw-r--r--core/vector/table.xml263
2 files changed, 278 insertions, 263 deletions
diff --git a/core/vector/filter.xml b/core/vector/filter.xml
index daa7a67..f347960 100644
--- a/core/vector/filter.xml
+++ b/core/vector/filter.xml
@@ -23,6 +23,9 @@
core="true"
desc="Filtering Vectors and Matrices">
+ <import package="../base" />
+ <import package="list" />
+
<section title="Matrix Filtering">
\ref{mfilter} handles complex filtering of matrices.
@@ -242,5 +245,280 @@
</c:otherwise>
</c:cases>
</function>
+
+
+ <section title="Bisecting">
+ Perform an~$O(lg n)$ bisect on a data set.
+
+ This is intended to limit recursion on very large data sets (and
+ consequently will increase performance).
+ This will bisect up until a certain point (the gap),
+ unless it finds the value in question.
+ After finding the value,
+ it will perform an~$O(n)$ linear backward search to find the first
+ occurrence of the value.
+ If the value is not found,
+ it will halt at the gap and return the first index of the gap,
+ which we will consider its "best guess",
+ at which point a linear search can be performed by the caller to
+ determine if the value does in fact exist at all.
+
+ (The reason this operates so oddly is because of its caller;
+ we could rid the gap and make this a general-purpose function if need be.
+ Technically,
+ the gap is useless and saves $lg g$ steps,
+ which may be very small.)
+
+
+ <const name="TABLE_WHEN_MASK_VALUE" type="float" value="-0.0001"
+ desc="Used to mask when conditions" />
+ <const name="MFILTER_BISECT_GAP_MAX" type="integer" value="10"
+ desc="Quit bisect if size is less than or equal to this value" />
+
+
+ <function name="bisect"
+ desc="Bisect a matrix toward the requested column value">
+ <param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
+ <param name="col" type="integer" desc="Column index to filter on" />
+ <param name="val" type="float" desc="Column value to filter on" />
+ <param name="start" type="integer" desc="Start index" />
+ <param name="end" type="integer" desc="Start end" />
+
+ <c:let>
+ <c:values>
+ <!-- the gap represents the number of indexes between the current start
+ and end indexes -->
+ <c:value name="gap" type="integer" desc="Gap between current start and end">
+ <c:sum>
+ <c:value-of name="end" />
+
+ <t:negate>
+ <c:value-of name="start" />
+ </t:negate>
+ </c:sum>
+ </c:value>
+ </c:values>
+
+ <!--
+ At this point, we need to determine if we should continue the bisect or
+ halt. The purpose of the gap is based on the understanding that (with
+ our use cases) we will arrive, almost always, at one of two scenarios:
+ we found the value, but it's part of a larger set of the same values, or
+ the value we are looking for may not even exist at all.
+
+ The gap just limits recursion (but just barely) at smaller levels, since
+ bisect will take lg(n) steps). Increase the gap limit to decrease the
+ number of steps, or decrease it to 1 if you want a complete bisection.
+ -->
+ <c:cases>
+ <!-- give up if we've reached our gap limit -->
+ <c:case>
+ <c:when name="gap">
+ <c:lte>
+ <c:value-of name="MFILTER_BISECT_GAP_MAX" />
+ </c:lte>
+ </c:when>
+
+ <!-- we tried our best; return our current position -->
+ <c:value-of name="start" />
+ </c:case>
+
+ <!-- we have not yet reached our gap limit; keep going -->
+ <c:otherwise>
+ <c:let>
+ <c:values>
+ <c:value name="mid_index" type="integer" desc="Middle index">
+ <!-- to determine the new mid index, add half of the gap to the
+ current index -->
+ <c:sum>
+ <c:value-of name="start" />
+ <c:ceil>
+ <c:quotient>
+ <c:value-of name="gap" />
+ <c:const value="2" desc="Bisect" />
+ </c:quotient>
+ </c:ceil>
+ </c:sum>
+ </c:value>
+ </c:values>
+
+ <c:let>
+ <c:values>
+ <c:value name="mid" type="float" desc="Middle value">
+ <c:value-of name="matrix">
+ <!-- row -->
+ <c:index>
+ <c:value-of name="mid_index" />
+ </c:index>
+
+ <!-- column -->
+ <c:index>
+ <c:value-of name="col" />
+ </c:index>
+ </c:value-of>
+ </c:value>
+ </c:values>
+
+ <c:cases>
+ <!-- if the middle value is lower than our value, then take the upper half -->
+ <c:case>
+ <c:when name="mid">
+ <c:lt>
+ <c:value-of name="val" />
+ </c:lt>
+ </c:when>
+
+ <c:recurse start="mid_index" />
+ </c:case>
+
+ <!-- similarily, take the lower half if we over-shot -->
+ <c:case>
+ <c:when name="mid">
+ <c:gt>
+ <c:value-of name="val" />
+ </c:gt>
+ </c:when>
+
+ <c:recurse end="mid_index" />
+ </c:case>
+
+ <!-- if we have an exact match, that doesn't necessarily mean that we
+ have every value; we may have intersected a set of them -->
+ <c:otherwise>
+ <!-- this will return an exact index: the first index
+ containing the element we've been looking for (it is a
+ linear backwards search) -->
+ <c:apply name="foremost" matrix="matrix" col="col" i="mid_index" />
+ </c:otherwise>
+ </c:cases>
+ </c:let>
+ </c:let>
+ </c:otherwise>
+ </c:cases>
+ </c:let>
+ </function>
+
+
+ <function name="foremost"
+ desc="Search backwards for the first occurrance in a sorted list">
+ <param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
+ <param name="col" type="integer" desc="Column index to search on" />
+ <param name="i" type="integer" desc="Current index" />
+
+ <c:let>
+ <c:values>
+ <!-- we calculate this rather than accept it via an argument so that
+ this function may be called directly in a more convenient manner
+ -->
+ <c:value name="val" type="float" desc="Current value">
+ <c:value-of name="matrix">
+ <!-- row -->
+ <c:index>
+ <c:value-of name="i" />
+ </c:index>
+
+ <!-- column -->
+ <c:index>
+ <c:value-of name="col" />
+ </c:index>
+ </c:value-of>
+ </c:value>
+
+ <c:value name="prev" type="float" desc="Previous value">
+ <c:value-of name="matrix">
+ <!-- row -->
+ <c:index>
+ <t:dec>
+ <c:value-of name="i" />
+ </t:dec>
+ </c:index>
+
+ <!-- column -->
+ <c:index>
+ <c:value-of name="col" />
+ </c:index>
+ </c:value-of>
+ </c:value>
+ </c:values>
+
+ <c:cases>
+ <!-- if we have no more indexes to check, then we're done -->
+ <c:case>
+ <c:when name="i">
+ <c:eq>
+ <c:const value="0"
+ desc="Did we check the final (first) index?" />
+ </c:eq>
+ </c:when>
+
+ <!-- well, then, we're done -->
+ <c:value-of name="i" />
+ </c:case>
+
+ <!-- if the previous column value is the same value, then continue checking -->
+ <c:case>
+ <c:when name="prev">
+ <c:eq>
+ <c:value-of name="val" />
+ </c:eq>
+ </c:when>
+
+ <c:recurse>
+ <c:arg name="i">
+ <t:dec>
+ <c:value-of name="i" />
+ </t:dec>
+ </c:arg>
+ </c:recurse>
+ </c:case>
+
+ <!-- otherwise, we've found the foremost index -->
+ <c:otherwise>
+ <c:value-of name="i" />
+ </c:otherwise>
+ </c:cases>
+ </c:let>
+ </function>
+
+
+ <template name="_mask-unless_"
+ desc="Mask a value unless the condition is truthful">
+ <param name="@values@" desc="Body" />
+ <param name="@name@" desc="Scalar to check" />
+ <param name="@index@" desc="Optional index" />
+ <param name="@desc@" desc="Optional description" />
+
+ <c:cases>
+ <!-- if masked -->
+ <c:case>
+ <!-- no index provided -->
+ <unless name="@index@">
+ <c:when name="@name@">
+ <c:eq>
+ <c:value-of name="FALSE" />
+ </c:eq>
+ </c:when>
+ </unless>
+
+ <!-- index provided -->
+ <if name="@index@">
+ <c:when name="@name@" index="@index@">
+ <c:eq>
+ <c:value-of name="FALSE" />
+ </c:eq>
+ </c:when>
+ </if>
+
+ <!-- TODO: configurable mask via meta and/or param -->
+ <c:value-of name="TABLE_WHEN_MASK_VALUE" />
+ </c:case>
+
+ <!-- if not masked -->
+ <c:otherwise>
+ <param-copy name="@values@" />
+ </c:otherwise>
+ </c:cases>
+ </template>
+ </section>
</section>
</package>
diff --git a/core/vector/table.xml b/core/vector/table.xml
index c74ac9a..38dc0f7 100644
--- a/core/vector/table.xml
+++ b/core/vector/table.xml
@@ -24,7 +24,6 @@
desc="Functions for performing table lookups">
<import package="../base" />
- <import package="list" />
<!-- since templates are inlined, we need to make these symbols available to
avoid terrible confusion -->
@@ -33,11 +32,6 @@
<import package="filter" export="true" />
<import package="matrix" export="true" />
-
- <const name="TABLE_WHEN_MASK_VALUE" type="float" value="-0.0001" desc="Used to mask when conditions" />
- <const name="MFILTER_BISECT_GAP_MAX" type="integer" value="10" desc="Quit bisect if size is less than or equal to this value" />
-
-
<!--
Create a constant table
@@ -496,262 +490,5 @@
</c:otherwise>
</c:cases>
</function>
-
-
- <!--
- Perform a lg(n) bisect on a data set.
-
- This is intended to limit recursion on very large data sets (and
- consequently will increase performance). This will bisect up until a certain
- point (the gap), unless it finds the value in question. After finding the
- value, it will perform a linear backward search to find the first occurrence
- of the value. If the value is not found, it will halt at the gap and return
- the first index of the gap, which we will consider its "best guess", at
- which point a linear search can be performed by the caller to determine if
- the value does in fact exist at all.
-
- (The reason this operates so oddly is because of its caller. We could rid
- the gap and make this a general-purpose function if need be. Technically,
- the gap is useless and saves lg(g) steps, which may be very small.)
- -->
- <function name="bisect" desc="Bisect a matrix toward the requested column value">
- <param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
- <param name="col" type="integer" desc="Column index to filter on" />
- <param name="val" type="float" desc="Column value to filter on" />
- <param name="start" type="integer" desc="Start index" />
- <param name="end" type="integer" desc="Start end" />
-
- <c:let>
- <c:values>
- <!-- the gap represents the number of indexes between the current start
- and end indexes -->
- <c:value name="gap" type="integer" desc="Gap between current start and end">
- <c:sum>
- <c:value-of name="end" />
-
- <t:negate>
- <c:value-of name="start" />
- </t:negate>
- </c:sum>
- </c:value>
- </c:values>
-
- <!--
- At this point, we need to determine if we should continue the bisect or
- halt. The purpose of the gap is based on the understanding that (with
- our use cases) we will arrive, almost always, at one of two scenarios:
- we found the value, but it's part of a larger set of the same values, or
- the value we are looking for may not even exist at all.
-
- The gap just limits recursion (but just barely) at smaller levels, since
- bisect will take lg(n) steps). Increase the gap limit to decrease the
- number of steps, or decrease it to 1 if you want a complete bisection.
- -->
- <c:cases>
- <!-- give up if we've reached our gap limit -->
- <c:case>
- <c:when name="gap">
- <c:lte>
- <c:value-of name="MFILTER_BISECT_GAP_MAX" />
- </c:lte>
- </c:when>
-
- <!-- we tried our best; return our current position -->
- <c:value-of name="start" />
- </c:case>
-
- <!-- we have not yet reached our gap limit; keep going -->
- <c:otherwise>
- <c:let>
- <c:values>
- <c:value name="mid_index" type="integer" desc="Middle index">
- <!-- to determine the new mid index, add half of the gap to the
- current index -->
- <c:sum>
- <c:value-of name="start" />
- <c:ceil>
- <c:quotient>
- <c:value-of name="gap" />
- <c:const value="2" type="integer" desc="Bisect" />
- </c:quotient>
- </c:ceil>
- </c:sum>
- </c:value>
- </c:values>
-
- <c:let>
- <c:values>
- <c:value name="mid" type="float" desc="Middle value">
- <c:value-of name="matrix">
- <!-- row -->
- <c:index>
- <c:value-of name="mid_index" />
- </c:index>
-
- <!-- column -->
- <c:index>
- <c:value-of name="col" />
- </c:index>
- </c:value-of>
- </c:value>
- </c:values>
-
- <c:cases>
- <!-- if the middle value is lower than our value, then take the upper half -->
- <c:case>
- <c:when name="mid">
- <c:lt>
- <c:value-of name="val" />
- </c:lt>
- </c:when>
-
- <c:recurse start="mid_index" />
- </c:case>
-
- <!-- similarily, take the lower half if we over-shot -->
- <c:case>
- <c:when name="mid">
- <c:gt>
- <c:value-of name="val" />
- </c:gt>
- </c:when>
-
- <c:recurse end="mid_index" />
- </c:case>
-
- <!-- if we have an exact match, that doesn't necessarily mean that we
- have every value; we may have intersected a set of them -->
- <c:otherwise>
- <!-- this will return an exact index: the first index
- containing the element we've been looking for (it is a
- linear backwards search) -->
- <c:apply name="foremost" matrix="matrix" col="col" i="mid_index" />
- </c:otherwise>
- </c:cases>
- </c:let>
- </c:let>
- </c:otherwise>
- </c:cases>
- </c:let>
- </function>
-
-
- <function name="foremost" desc="Search backwards for the first occurrance in a sorted list">
- <param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
- <param name="col" type="integer" desc="Column index to search on" />
- <param name="i" type="integer" desc="Current index" />
-
- <c:let>
- <c:values>
- <!-- we calculate this rather than accept it via an argument so that
- this function may be called directly in a more convenient manner
- -->
- <c:value name="val" type="float" desc="Current value">
- <c:value-of name="matrix">
- <!-- row -->
- <c:index>
- <c:value-of name="i" />
- </c:index>
-
- <!-- column -->
- <c:index>
- <c:value-of name="col" />
- </c:index>
- </c:value-of>
- </c:value>
-
- <c:value name="prev" type="float" desc="Previous value">
- <c:value-of name="matrix">
- <!-- row -->
- <c:index>
- <t:dec>
- <c:value-of name="i" />
- </t:dec>
- </c:index>
-
- <!-- column -->
- <c:index>
- <c:value-of name="col" />
- </c:index>
- </c:value-of>
- </c:value>
- </c:values>
-
- <c:cases>
- <!-- if we have no more indexes to check, then we're done -->
- <c:case>
- <c:when name="i">
- <c:eq>
- <c:const value="0" type="integer" desc="Did we check the final (first) index?" />
- </c:eq>
- </c:when>
-
- <!-- well, then, we're done -->
- <c:value-of name="i" />
- </c:case>
-
- <!-- if the previous column value is the same value, then continue checking -->
- <c:case>
- <c:when name="prev">
- <c:eq>
- <c:value-of name="val" />
- </c:eq>
- </c:when>
-
- <c:recurse>
- <c:arg name="i">
- <t:dec>
- <c:value-of name="i" />
- </t:dec>
- </c:arg>
- </c:recurse>
- </c:case>
-
- <!-- otherwise, we've found the foremost index -->
- <c:otherwise>
- <c:value-of name="i" />
- </c:otherwise>
- </c:cases>
- </c:let>
- </function>
-
-
- <template name="_mask-unless_" desc="Mask a value unless the condition is truthful">
- <param name="@values@" desc="Body" />
- <param name="@name@" desc="Scalar to check" />
- <param name="@index@" desc="Optional index" />
- <param name="@desc@" desc="Optional description" />
-
- <c:cases>
- <!-- if masked -->
- <c:case>
- <!-- no index provided -->
- <unless name="@index@">
- <c:when name="@name@">
- <c:eq>
- <c:value-of name="FALSE" />
- </c:eq>
- </c:when>
- </unless>
-
- <!-- index provided -->
- <if name="@index@">
- <c:when name="@name@" index="@index@">
- <c:eq>
- <c:value-of name="FALSE" />
- </c:eq>
- </c:when>
- </if>
-
- <!-- TODO: configurable mask via meta and/or param -->
- <c:value-of name="TABLE_WHEN_MASK_VALUE" />
- </c:case>
-
- <!-- if not masked -->
- <c:otherwise>
- <param-copy name="@values@" />
- </c:otherwise>
- </c:cases>
- </template>
</package>