Mike Gerwitz

Free Software Hacker+Activist

aboutsummaryrefslogtreecommitdiffstats
blob: f00a48331bd4c87e6be215f429d7130c9280610a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/gawk -f
# Cache post data in metadata recutils file
#
#  Copyright (C) 2019 Mike Gerwitz
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Generates database of metadata for a given post in recutils format for use
# by other scripts.  The post must have already been converted to HTML using
# `post2html' or some equivalent means.
#
# This script is also responsible for determining what constitutes the
# abstract, which we consider to be everything after the subject line but
# before the end-of-abstract marker "<!-- more -->".  If no such marker
# exists then the script exits in error.
##

# Output author and post date derived from the file name.
BEGINFILE {
    match( FILENAME, /[^/]+$/, name )

    # TODO: configurable
    print "author: Mike Gerwitz <mtg@gnu.org>"

    printf "date: %s\n",
        gensub( /^(.{10}).*$/, "\\1", "", name[0] )
}

# Wait until after <main>; everything before it is the HTML header.
/^ *<main>/ { main=1 }
!main { next }


# The first header represents the subject/title and also contains the
# unique id for this post (as generated by `post2html').
main && /^<h1 / {
    # Strip header tags from subject.
    print "subject: " gensub( /<\/?h[^>]+>/, "", "g" )

    # Grab the generated id from the header and use it to
    # generate a complete slug.
    printf "slug: %s\n", \
        gensub( /^([0-9]+)-([0-9]+)-[0-9]+-(.*)\.[a-z]+$/,
                "\\1/\\2/\\3",
                "",
                name[0] )

    # Skip the date line immediately following the header and grab the first
    # line of the abstract.
    getline
    getline

    printf "abstract: %s\n", $0
    a = 1
    next
}

# The end-of-abstract marker is "<!-- more -->".  Until we reach that point,
# output each line of the abstract prefixed by a `+', which is the recutils
# line continuation marker.
/^<!-- more -->/ { exit }
a { printf "+ %s\n", $0 }

# If we get to this point, that means that there is no end-of-abstract
# marker, which we will consider to be an error just to make sure that the
# author didn't forget to add one.  If the entire post is to be considered
# part of the abstract, then the marker can be added at the end of the post.
ENDFILE {
    print "error: missing '<!-- more -->'" > "/dev/stderr"
    exit 1
}