Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
path: root/msgfmt
blob: be734f0cf5bcab70753c830d210f61db949f9837 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/bin/bash
#
# Formats a Git commit message
#
#  Copyright (C) 2012  Mike Gerwitz
#
#  This file is part of repo2html.
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
# #

# optional id (for cref errors)
id="$1"

# HTML replacements (default)
lquo='\&ldquo;'
rquo='\&rdquo;'
mdash='\&mdash;'
opar='<p>'
epar='</p>'

# redefines replacements to yield plain text (instead of HTML entities)
nohtml()
{
  lquo=\"
  rquo=\"
  mdash=---
}

# no paragraph tags should be output
nopar()
{
  opar=
  epar=
}


while getopts nP opt; do
  case "$opt" in
    n) nohtml;;
    P) nopar;;
  esac
done

# calculate this after options have been parsed
refopar="${opar:+${opar%>} id="ref-\\1" class="ref">}"

# format the commit message, stopping at the diff (if any)
awk -vid="$id" -vurl_root="${url_root%/}" -vcref_errlog="$cref_errlog" '
    # replace commit refs with generated URL (allows linking to prior commits
    # without hard-coding the configurable links that could change or be
    # relative to where the content is hosted); this will then be processed as a
    # normal URL by the remainder of the script
    match($0, /\[cref:(.*?)\]/, g) {
      # retrieve the URL from the hashcache and perform the line replacement
      # (which will be reflected once we print the line)
      c = "./hashcache " g[1]
      c | getline result

      # if a cref error logfile path was provided, log unknown refs so that they
      # can be re-processed (if commits are processed in reverse order and the
      # hashcache is cleared before the run, then this is likely to occur for
      # every cref)
      if ( result == "" && cref_errlog && id ) {
        printf id"\n" >>cref_errlog
      }

      gsub(/\[cref:.*?\]/, (url_root "/" result))
    }

    # stop printing at diff
    /^diff --git/ { exit }

    # otherwise, print everything
    { print }
  ' \
  | sed ':a;N;$!ba;
    # handle <>-delimited links (strip delimiters)
    s#<\([fh]ttps\?://[^ ]\+\)>#\1#g;

    # escaping
    s/\&/\&amp;/g;
    s/</\&lt;/g;
    s/>/\&gt;/g;

    # quoting (initiated by an indented paragraph and terminated by a new
    # paragraph, unless that paragraph is also indented)
    s#\n\n  \+\(\([^\n]\+\n\(\n  \+\)\?\)\+\)#<blockquote>\1</blockquote>#g

    # pre-formatted block. markdown-style
    s#\n\n  \+\(\([^\n]\+\n\(\n  \+\)\?\)\+\)#<blockquote>\1</blockquote>#g

    # unfortunately, non-greedy matches make it difficult to exclude punctuation
    # at the end of a link, so we will handle it in a separate expression
    s#[fh]ttps\?://[^]\n )]\+#<a href="&">&</a>#g;
    s#<a href="\([^"]\+\)\([.;,!]\)">\([^<]\+\).</a>#<a href="\1">\3</a>\2#g;

    # reference definitions (footnotes)
    s#\n\[\([0-9]\+\)\]#'"$epar$refopar"'&#g;

    # references in text (note that references that enclose text as a hyperlink
    # must not start with a number, otherwise they will be considered to be a
    # reference number)
    s|\[\([^0-9][^]]\+\)\]\[\([0-9]\+\)\]|<a href="#ref-\2">\1</a>\[\2\]|g
    s|\[\([0-9]\+\)\]|<sup><a href="#ref-\1">&</a></sup>|g

    # paragraphs
    s#\n\n#'"$epar"'&'"$opar"'#g;
    /^/i'"$opar"'
    /$/a'"$epar"'

    # basic formatting
    s/---/'"$mdash"'/g;
    s#``#'"$lquo"'#g;
    s#'\'\''#'"$rquo"'#g;
    s#\(\W\)\*\*\([^\*]\+\)\*\*\(\W\)#\1<strong>\2</strong>\3#g;
    s#\(\W\)\*\([^\*]\+\)\*\(\W\)#\1<em>\2</em>\3#g;
  '