Harmonise manual versions and add auto-formatting tool for Zint.org.uk website

This commit is contained in:
Robin Stuart 2022-07-19 12:33:51 +01:00
parent 78bda3b359
commit bc0c745a93
4 changed files with 412 additions and 1 deletions

View file

@ -7,7 +7,7 @@
SOURCE = manual.pmd
OUT_PDF = manual.pdf
OUT_TXT = manual.txt
HIGHLIGHT_THEME = pygments.theme
HIGHLIGHT_THEME = vs.theme
INC_HEADER_PDF = inc_header_pdf.tex
INC_BEFORE_BODY_PDF = inc_before_body_pdf.tex
INCLUDES_PDF = $(INC_HEADER_PDF) $(INC_BEFORE_BODY_PDF)

View file

@ -4008,6 +4008,8 @@ countries.
Mac and macOS are trademarks of Apple Inc., registered in the U.S. and other
countries.
The Zint logo derived from "SF Planetary Orbiter" font by ShyFoundary
Zint.org.uk website design and hosting provided by Robert Elliott.
## 7.2 Patent Issues

399
docs/zint_org_uk.py Normal file
View file

@ -0,0 +1,399 @@
# Works out which tags should influence indentation and puts them on their own line
def isolate_tag(tag):
global stage
indentable_tag = True
for keyword in indent_skip:
if keyword in tag:
indentable_tag = False
if '</' in tag:
# Close tag
if (indentable_tag):
stage += "\n"
stage += tag
stage += "\n"
else:
stage += tag
else:
# Open tag
if (indentable_tag):
stage += "\n"
stage += tag
stage += "\n"
else:
stage += tag
# Add the right amount of indendation (indentation X 4 spaces)
def add_indent():
global indentation
retval = ""
for i in range(0,indentation):
retval += " "
return retval
# Apply indentation to text
def with_indent(text):
global indentation
retval = ""
d = ''
for c in text:
if d == '\n':
retval += d
retval += add_indent()
else:
retval += d
d = c
retval += d
return retval
# Read file and pull some tags onto their own lines for later processing
manual = ""
tag = False
tag_buffer = ""
text_buffer = ""
stage = ""
indent_skip = ['img', 'code', 'pre', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'span', '<a', '</a', 'sup', '<col', '</col', '<hr', 'div']
print("Reading... manual.html")
with open('manual.html') as f:
manual = f.read()
for c in manual:
if c == '<':
stage += text_buffer
tag = True
tag_buffer = ""
if (tag):
tag_buffer += c
else:
text_buffer += c
if c == '>':
tag_buffer = tag_buffer.replace("\n", " ")
isolate_tag(tag_buffer)
tag = False
text_buffer = ""
f.close()
manual = stage
stage = ""
print("Adjusting HTML")
# Change the guts of the HTML tags
in_dd = False
to_remove = False
remove_next = False
span_literal = False
for c in manual:
if c == '<':
# Remove "{#tbl:" table identifiers
if '{#tbl:' in text_buffer:
text_buffer = text_buffer[text_buffer.index('tag=') + 7:-3]
text_buffer = text_buffer.replace('\n', ' ')
text_buffer = '\n' + text_buffer + '\n'
# Remove "{@tabl:" table references
if 'tbl:' in text_buffer:
text_buffer = ''
stage += text_buffer
tag = True
tag_buffer = ""
to_remove = False
if (tag):
tag_buffer += c
else:
text_buffer += c
if c == '>':
# Remove some tags which aren't needed on website
if 'span' in tag_buffer:
to_remove = True
if 'div' in tag_buffer:
to_remove = True
if '<col' in tag_buffer:
to_remove = True
if '</col' in tag_buffer:
to_remove = True
if (remove_next):
to_remove = True
remove_next = False
if ('a href' in tag_buffer) and ('aria-hidden="true"' in tag_buffer):
to_remove = True
remove_next = True
if '<a href="#' in tag_buffer:
to_remove = True
remove_next = True
# Don't allow <p> and </p> between <dd> and </dd>
if (tag_buffer == "<dd>"):
in_dd = True
if (tag_buffer == "</dd>"):
in_dd = False
if (in_dd and tag_buffer == '<p>'):
to_remove = True
if (in_dd and tag_buffer == '</p>'):
to_remove = True
# Remove attributes for some tags
if '<pre' in tag_buffer:
tag_buffer = '<pre>'
if '<table' in tag_buffer:
tag_buffer = '<table>'
if '<tr' in tag_buffer:
tag_buffer = '<tr>'
if '<td' in tag_buffer:
tag_buffer = '<td>'
if '<th ' in tag_buffer:
tag_buffer = '<th>'
# Bump all headers up one level
tag_buffer = tag_buffer.replace('<h6', '<h7')
tag_buffer = tag_buffer.replace('</h6', '</h7')
tag_buffer = tag_buffer.replace('<h5', '<h6')
tag_buffer = tag_buffer.replace('</h5', '</h6')
tag_buffer = tag_buffer.replace('<h4', '<h5')
tag_buffer = tag_buffer.replace('</h4', '</h5')
tag_buffer = tag_buffer.replace('<h3', '<h4')
tag_buffer = tag_buffer.replace('</h3', '</h4')
tag_buffer = tag_buffer.replace('<h2', '<h3')
tag_buffer = tag_buffer.replace('</h2', '</h3')
tag_buffer = tag_buffer.replace('<h1', '<h2')
tag_buffer = tag_buffer.replace('</h1', '</h2')
# Change class names for code snippets
tag_buffer = tag_buffer.replace('class="sourceCode bash"', 'class="language-bash"')
tag_buffer = tag_buffer.replace('class="sourceCode c"', 'class="language-cpp"')
# Change location of images
tag_buffer = tag_buffer.replace('src="images/', 'src="/images/manual/')
# Change <code> without language to <span>
if tag_buffer == '<code>':
tag_buffer = '<span class="literal">'
span_literal = True
if tag_buffer == '</code>' and span_literal:
tag_buffer = '</span>'
span_literal = False
if not to_remove:
stage += tag_buffer
tag = False
text_buffer = ""
manual = stage
stage = ""
print("Removing empty lines")
# Remove blank lines unless in between <pre> and </pre>
last_char = ''
in_pre = False
for c in manual:
if c == '<':
tag = True
tag_buffer = ""
if (tag):
tag_buffer += c
else:
text_buffer += c
if c == '>':
if ("<pre" in tag_buffer):
in_pre = True
if ("</pre" in tag_buffer):
in_pre = False
tag = False
text_buffer = ""
if c == '\n':
if (last_char != '\n') or (in_pre == True):
stage += c
else:
stage += c
last_char = c
manual = stage
stage = ""
print("Applying indentation")
# Indent the code to make it easier to read
indentation = 1
in_pre = False
paragraph_block = False
document_start = True
chapter_six = False
last_char = ''
for c in manual:
if c == '<':
#Fix 'floating' full stops
text_buffer = text_buffer.replace(' . ', '. ')
# Apply indentation to text
if in_pre:
stage += text_buffer
else:
stage += with_indent(text_buffer)
tag = True
tag_buffer = ""
if (tag):
tag_buffer += c
else:
# Strip '{}' from already removed table references
if c == '}' and last_char == '{':
text_buffer = text_buffer[:-1]
else:
text_buffer += c
last_char = c
if c == '>':
indentable_tag = True
for keyword in indent_skip:
if keyword in tag_buffer:
indentable_tag = False
# Protect the indentation in <pre> segments
if ('<pre' in tag_buffer):
in_pre = True
if ('</pre' in tag_buffer):
in_pre = False
# Chapter 6 requires special treatment - detect beginning and end
if ('id="types-of-symbology"' in tag_buffer):
chapter_six = True
if ('id="legal-and-version-information"' in tag_buffer):
chapter_six = False
if '</' in tag_buffer:
# Close tag
if (indentable_tag):
indentation -= 1
stage += add_indent()
stage += tag_buffer
else:
if text_buffer.endswith('\n'):
stage += add_indent()
stage += tag_buffer
else:
# Split into sections
if (indentation == 1) and ('<p' in tag_buffer):
if not paragraph_block:
if document_start:
document_start = False
else:
stage += '</section>\n'
stage += '<section class="container">\n'
paragraph_block = True
# Handle headers but also decide where to split into multiple HTML files and mark with <page>
if (indentation == 1):
if ('<h2' in tag_buffer):
if document_start:
document_start = False
stage += '<section class="container">\n'
paragraph_block = True
else:
stage += '</section>\n'
stage += '<page>\n'
stage += '<section class="container">\n'
paragraph_block = True
elif ('<h3' in tag_buffer) and chapter_six:
stage += '</section>\n'
stage += '<page>\n'
stage += '<section class="container">\n'
paragraph_block = True
elif ('<h' in tag_buffer):
if not paragraph_block:
stage += '</section>\n'
stage += '<section class="container">\n'
paragraph_block = True
# <dl> section has it's own class
if (indentation == 1) and ('<dl' in tag_buffer):
stage += '</section>\n'
stage += '<section class="definition-list container">\n'
paragraph_block = False
# <table> section has it's own class
if (indentation == 1) and ('<table' in tag_buffer):
stage += '</section>\n'
stage += '<section class="table">\n'
paragraph_block = False
# Open tag
if (indentable_tag):
stage += add_indent()
stage += tag_buffer
indentation += 1
else:
if text_buffer.endswith('\n'):
stage += add_indent()
stage += tag_buffer
tag = False
text_buffer = ""
stage += '\n</section>\n'
manual = stage
stage = ""
# Remove <h2> data and split into output files
out_filenames = ['chapter1.html', 'chapter2.html', 'chapter3.html', 'chapter4.html', 'chapter5.html',
'chapter6.0.html', 'chapter6.1.html', 'chapter6.2.html', 'chapter6.3.html', 'chapter6.4.html',
'chapter6.5.html', 'chapter6.6.html', 'chapter6.7.html', 'chapter7.html', 'appendixa.html', 'appendixb.html']
page = 0
print("Writing... ", out_filenames[page])
f = open(out_filenames[page], "w")
h2_tag = False
for c in manual:
if c == '<':
if h2_tag == False:
stage += text_buffer
tag = True
tag_buffer = ""
if (tag):
tag_buffer += c
else:
text_buffer += c
if c == '>':
if '<h2' in tag_buffer:
h2_tag = True
elif '</h2' in tag_buffer:
h2_tag = False
elif tag_buffer == '<page>':
f.write(stage)
f.close()
stage = ""
page += 1
print("Writing... ", out_filenames[page])
f = open(out_filenames[page], "w")
else:
stage += tag_buffer
tag = False
text_buffer = ""
f.write(stage)
f.close()

10
docs/zint_org_uk.sh Executable file
View file

@ -0,0 +1,10 @@
#/bin/sh
rm -r ./HTML
pandoc -o manual.html manual.pmd
python3 zint_org_uk.py
rm ./chapter6.0.html
mkdir HTML
mv chapter*.html ./HTML
mv appendix*.html ./HTML
cd ./HTML