-
Notifications
You must be signed in to change notification settings - Fork 229
/
parse.py
executable file
·314 lines (263 loc) · 8.51 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
#!/usr/bin/env python3
#
# Usage: parse.py [filename]
# Converts text to html file.
from collections import namedtuple
import sys
import re
DEBUG = False
FILENAME = 'linux-cheatsheet.txt'
TEMP = 'web/template.html'
TEMP_ANCHOR = '<!-- INSERT_HERE -->'
BATCH_SEPARATOR = "<tr><td width=155px></td><td></td></tr>"
SKIP_LINES = 14
SEP = ' — '
TOP_PADDING = 7
SMALL_TOP_PADDING = 1
###
## MAIN
#
def main():
# Paragraph and code block do not contain ' — ' and begin at the start of
# line. Code block is bounded by '```'
filename = FILENAME if len(sys.argv) < 2 else sys.argv[1]
lines = [convert_specials(a) for a in read_file(filename)]
lines = lines[SKIP_LINES:]
out = []
table = []
code = []
in_code = False
paragraph = []
lines = iter(lines)
for line in lines:
if line.startswith('```'):
check_par_and_table(out, paragraph, table, code)
in_code = not in_code
continue
if in_code:
code.append(line)
continue
title = get_title(line, lines)
if title:
check_par_and_table(out, paragraph, table, code)
out.append(title)
continue
in_paragraph = re.match('\S', line) and SEP not in line
if in_paragraph:
paragraph.append(line)
continue
if re.search('\S+', line):
if paragraph:
out.append(get_paragraph(paragraph))
paragraph = []
table.append(line)
continue
check_par_and_table(out, paragraph, table, code)
check_par_and_table(out, paragraph, table, code)
out = parse_inline_code(out)
print(insert_in_template(out))
def check_par_and_table(out, paragraph, table, code):
if paragraph:
out.append(get_paragraph(paragraph))
paragraph.clear()
if table:
out.append(get_table(table))
table.clear()
if code:
out.append(get_code(code))
code.clear()
def parse_inline_code(lines):
return [re.sub('`(.*?)`', '\'<code>\\1</code>\'', a) for a in lines]
def get_code(lines):
code = '\n'.join(lines)
return f'<pre><code>{code}</code></pre>'
def get_paragraph(lines):
code = ' '.join(lines)
return f'<p>{code}</p>'
first_h1 = True
def get_title(line, lines):
if line.startswith('####'):
global first_h1
title = next(lines).strip('# ')
next(lines)
pre = '\n<p><br><p>\n'
post = '\n<br>\n' if title == 'GNOME' else ''
if first_h1:
pre = ''
first_h1 = False
return f'{pre}{format_title(title, 5)}{post}'
elif line.startswith('===='):
title = next(lines).strip(': ')
next(lines)
link_id = title.replace(' ', '_').lower()
link = f'<a href="#{link_id}" name="{link_id}">#</a>'
return format_title(title, 2, a=link)
elif re.match('^[A-Z\(\) /]+:\s*$', line):
title = line.strip(': ')
return format_title(title, 3)
def format_title(text, a_size, a=''):
return f'<h{a_size}>{text.title()}{a}</h{a_size}>'
###
## TABLE
#
def get_table(lines):
lines = [a for a in lines if re.search('\S', a)]
out = []
line_batches = get_line_batches(lines)
line_batches = [parse_batch(a) for a in line_batches]
out = BATCH_SEPARATOR.join(line_batches)
out = f'<table width=780 style="border-spacing: 0px"><tbody>\n{out}\n' \
f'</tbody></table><br>'
return out
def get_line_batches(lines):
out, batch = [], []
for line in lines:
no_whitespace = re.match('\S', line)
if no_whitespace and batch:
out.append(batch)
batch = []
batch.append(line)
if batch:
out.append(batch)
return out
Option = namedtuple('Option', ['name', 'desc'])
class Cmd:
def __init__(s, name):
s.name = name.strip()
s.desc = []
s.options = []
s.last = s.desc
def append(s, text):
s.last.append(text)
def add_option(s, name):
s.options.append(Option(name.strip(), []))
s.last = s.options[-1].desc
def __repr__(s):
return str({'name': s.name, 'desc': format_desc(s.desc),
'options': s.options})
def __str__(s):
big_pad = s.options and (len(s.options) > 1 or s.desc)
top_padding_this = TOP_PADDING if big_pad else SMALL_TOP_PADDING
out = []
name = f'<tr><td style="padding-right: 10px;padding-top: ' \
f'{top_padding_this}px;width: 155px" valign="top"><strong>' \
f'<code>{s.name}</code></strong></td>'
out.append(name)
if s.desc:
desc = f'<td style="padding-top: {top_padding_this}px" ' \
f'valign="top">{format_desc(s.desc)}</td></tr>\n'
out.append(desc)
options_str = []
top_padding = 0
for i, opt in enumerate(s.options):
if not s.desc and i == 0:
top_padding = top_padding_this
option = f'<tr> <td style="width:1px;white-space:nowrap' \
f';padding-right:10px;padding-top:{top_padding}px" '\
f'valign="top"><strong><code>{opt.name}</code></strong>' \
f'</td><td style="padding-top:{top_padding}px" ' \
f'valign="top">{format_desc(opt.desc)}</td></tr>\n'
options_str.append(option)
top_padding = 0
if options_str:
options = ''.join(options_str)
options_table = f'<table style="border-spacing: 0px">\n' \
f'{options}\n</table>'
if s.desc:
out.append(f'<tr> <td></td> <td> {options_table} </td> </tr>\n')
else:
out.append(f'<td valign="top"> {options_table} </td> </tr>\n')
return ''.join(out)
def parse_batch(lines):
'''
Input example
-------------
apt-get — Advanced Package Tool built on top of dpkg. New command called
simply `apt` is also available. It merges the functionalities of
`apt-get` and `apt-cache`.
update — Updates local list of existing packages.
-u dist-upgrade — Upgrades by intelligently handling changing
dependencies with new versions of packages. To regularly update
put this line: `apt-get update && apt-get -u dist-upgrade` in
`crontab`.
Elements
--------
### Name and description
abc — abc abd ..
abc abc
### Name and option
abc abc — abd ..
abc abc
### Option
abc — abc abc ...
abc abc
'''
out = []
cmd = None
for line in lines:
new_command = re.match('\S', line)
if not new_command:
process_desc_or_opt(line, cmd)
continue
if cmd:
out.append(cmd)
tokens = line.split(SEP, 1)
name = tokens[0]
if ' ' in name:
name, option = name.split(' ', 1)
if len(tokens) < 2:
tokens.append('')
tokens[1] = f'{option} — {tokens[1]}'
cmd = Cmd(name)
if len(tokens) > 1:
process_desc_or_opt(tokens[1], cmd)
if cmd:
out.append(cmd)
out_f = repr if DEBUG else str
return ''.join(map(out_f, out))
def process_desc_or_opt(line, cmd):
if line.endswith(' —'):
cmd.add_option(line[:-2])
return
if SEP not in line:
try:
cmd.append(line)
except AttributeError:
print('line is not str', line, file=sys.stderr)
sys.exit()
return
name, desc = line.split(SEP, 1)
try:
cmd.add_option(name)
except AttributeError:
print('cmd is none', line, file=sys.stderr)
sys.exit()
cmd.append(desc)
def insert_in_template(text):
out = []
template = read_file(TEMP)
for line in template:
if re.search(TEMP_ANCHOR, line):
out.append('\n'.join(text))
continue
out.append(line)
return '\n'.join(out)
###
## UTIL
#
def convert_specials(line):
line = line.replace('<', '<')
line = line.replace('>', '>')
return line
def format_desc(lines):
out = []
for line in lines:
line = re.sub(' $', '<br>', line)
out.append(line)
return ' '.join(a.strip() for a in out)
def read_file(filename):
with open(filename, encoding='utf-8') as file:
lines = file.readlines()
return [a.strip('\n') for a in lines]
if __name__ == '__main__':
main()