www-builder/build.py

#!/usr/bin/env python3

# Usage:
#
# cd ~/src/www-builder
# python3 build.py
# cd output
# cp -r * ~/src/www-home
# cd ~/src/www-home
# git status
# git add [some stuff]
# git commit
# git push

# TODO: replace gallery.tinyletterapp.com images with locally hosted content.
# TODO: in template.html, add apple touch icon, maybe other favicon sizes.
# TODO: local mirrors of all papers in publications.html

import glob
import markdown
import os
import re
import shutil


input_directory = 'content'
static_directory = 'static'
output_directory = 'output'

md_extensions = ['fenced_code', 'codehilite', 'nl2br', 'toc', 'smarty', 'tables', 'linkify']


def print_file(in_file, out_file):
  print('%-62s -> %s' % (in_file, out_file))


def copy_static_files():
  for (dirpath, _, filenames) in os.walk(static_directory):
    for filename in filenames:
      source = os.path.join(dirpath, filename)
      out_path = dirpath.replace(static_directory, '', 1)
      out_path = out_path.lstrip('/')
      dest_dir = os.path.join(output_directory, out_path)
      os.makedirs(dest_dir, exist_ok=True)
      dest = os.path.join(dest_dir, filename)
      print_file(source, dest)
      shutil.copy2(source, dest)


def process_markdown_files():
  template = open('template.html').read()
  for (dirpath, _, filenames) in os.walk(input_directory):
    for filename in filenames:
      markdown_filename = os.path.join(dirpath, filename)
      if not markdown_filename.endswith('.md'):
        continue

      markdown_file = open(markdown_filename)
      text = markdown_file.read()
      markdown_file.close()

      if not text.startswith('# '):
        text = '# ' + text

      match = re.match(r'^(.*?)\n', text)
      if match:
        title = match.group(1).lstrip('# ')
      else:
        title = text
      title += ' | Colin McMillen'
      if markdown_filename == os.path.join(input_directory, 'index.md'):
        title = 'Colin McMillen'

      out_filename = os.path.basename(markdown_filename).replace('.md', '.html')

      out_dirpath = os.path.join(output_directory, dirpath)
      out_dirpath = out_dirpath.replace('/content', '', 1)
      out_fullpath = os.path.join(out_dirpath, out_filename)
      page_url = out_fullpath.replace('output/', '', 1)
      if page_url.endswith('index.html'):  # strip off index.html
        page_url = page_url[:-len('index.html')]

      html = markdown.markdown(text, extensions=md_extensions, output_format='html5')
      output = template.replace('__TITLE_GOES_HERE__', title)
      output = output.replace('__CONTENT_GOES_HERE__', html)
      output = output.replace('__PAGE_URL_GOES_HERE__', page_url)

      os.makedirs(out_dirpath, exist_ok=True)
      print_file(markdown_filename, out_fullpath)
      out_file = open(out_fullpath, 'w')
      out_file.write(output)
      out_file.close()


def make_sitemap():
  sitemap_command = ' '.join("""
find output -regextype posix-extended -regex '.*.(html|pdf)$' |
grep -v ^output/google |
grep -v ^output/drafts |
perl -pe 's|output|https://www.mcmillen.dev|'
> output/sitemap.txt""".split('\n'))
  os.system(sitemap_command)


def make_rss():  # TODO: implement.
  pass


def main():
  os.makedirs(output_directory, exist_ok=True)
  copy_static_files()
  process_markdown_files()
  make_sitemap()
  make_rss()


if __name__ == '__main__':
  main()