# This script takes the output from pandoc and converts it into the format needed by # the website at Zint.org.uk # # Warning: This code is ugly... but it saves days of manual effort updating the website. # # Copyright (C) 2022 # Works out which tags should influence indentation and puts them on their own line def isolate_tag(tag): global stage indentable_tag = True for keyword in indent_skip: if keyword in tag: indentable_tag = False if '': tag_buffer = tag_buffer.replace("\n", " ") isolate_tag(tag_buffer) tag = False text_buffer = "" f.close() manual = stage stage = "" print("Adjusting HTML") # Change the guts of the HTML tags in_dd = False to_remove = False remove_next = False span_literal = False for c in manual: if c == '<': # Remove "{#tbl:" table identifiers if '{#tbl:' in text_buffer: text_buffer = text_buffer[text_buffer.index('tag=') + 7:-3] text_buffer = text_buffer.replace('\n', ' ') text_buffer = '\n' + text_buffer + '\n' # Remove "{@tabl:" table references if 'tbl:' in text_buffer: text_buffer = '' stage += text_buffer tag = True tag_buffer = "" to_remove = False if (tag): tag_buffer += c else: text_buffer += c if c == '>': # Remove some tags which aren't needed on website if 'span' in tag_buffer: to_remove = True if 'div' in tag_buffer: to_remove = True if '"): in_dd = True if (tag_buffer == ""): in_dd = False if (in_dd and tag_buffer == '

'): to_remove = True if (in_dd and tag_buffer == '

'): to_remove = True # Remove attributes for some tags if '' span_literal = True if tag_buffer == '' and span_literal: tag_buffer = '' span_literal = False if not to_remove: stage += tag_buffer tag = False text_buffer = "" manual = stage stage = "" print("Removing empty lines") # Remove blank lines unless in between
 and 
last_char = '' in_pre = False for c in manual: if c == '<': tag = True tag_buffer = "" if (tag): tag_buffer += c else: text_buffer += c if c == '>': if ("': indentable_tag = True for keyword in indent_skip: if keyword in tag_buffer: indentable_tag = False # Protect the indentation in
 segments
        if ('
            if (indentation == 1):
                if (' section has it's own class
            if (indentation == 1) and (' section has it's own class
            if (indentation == 1) and (' data and split into output files
out_filenames = ['chapter1.html', 'chapter2.html', 'chapter3.html', 'chapter4.html', 'chapter5.html',
                 'chapter6.0.html', 'chapter6.1.html', 'chapter6.2.html', 'chapter6.3.html', 'chapter6.4.html',
                 'chapter6.5.html', 'chapter6.6.html', 'chapter6.7.html', 'chapter7.html', 'appendixa.html', 'appendixb.html']
page = 0
print("Writing... ", out_filenames[page])
f = open(out_filenames[page], "w")
h2_tag = False
for c in manual:
    if c == '<':
        if h2_tag == False:
            stage += text_buffer
        tag = True
        tag_buffer = ""
    
    if (tag):
        tag_buffer += c
    else:
        text_buffer += c
        
    if c == '>':
        if '':
            f.write(stage)
            f.close()
            stage = ""
            page += 1
            print("Writing... ", out_filenames[page])
            f = open(out_filenames[page], "w")
        else:
            stage += tag_buffer
        tag = False
        text_buffer = ""

f.write(stage)
f.close()