Browse Source

better atom summaries / links; used "posted" instead of "published"

main
Colin McMillen 1 year ago
parent
commit
13cfc84d8b
  1. 38
      build.py
  2. 2
      content/blog/20190403-update.md
  3. 2
      content/blog/20200209-sneak.md
  4. 2
      content/sigbovik/index.md

38
build.py

@ -8,7 +8,14 @@
# TODO: in template.html, add apple touch icon, maybe other favicon sizes.
# TODO: local mirrors of all papers in publications.html
# Requirements:
# sudo apt install python3-markdown
# sudo apt install python3-smartypants
# sudo apt install python3-bs4
import argparse
from bs4 import BeautifulSoup
import glob
import html
from io import StringIO
@ -48,12 +55,19 @@ def copy_static_files():
def find_update_date(text):
match = re.search(r'^Published:? (\d{4}-\d{2}-\d{2})', text, re.MULTILINE)
match = re.search(r'^\*?Posted (\d{4}-\d{2}-\d{2})', text, re.MULTILINE)
if not match:
return None
return match.group(1)
def find_summary(html_content):
text = BeautifulSoup(html_content, features='lxml').get_text()
lines = text.split('\n')
result = ' '.join(lines[2:4])
return html.escape(result, quote=False)
def process_markdown_files():
template = open('template.html').read()
for (dirpath, _, filenames) in os.walk(input_directory):
@ -77,7 +91,7 @@ def process_markdown_files():
else:
title = text
blog_entry['title'] = html.escape(title)
blog_entry['title'] = html.escape(title, quote=False)
title += ' | Colin McMillen'
if markdown_filename == os.path.join(input_directory, 'index.md'):
@ -92,17 +106,18 @@ def process_markdown_files():
if page_url.endswith('index.html'): # strip off index.html
page_url = page_url[:-len('index.html')]
html_content = markdown.markdown(
text, extensions=md_extensions, output_format='html5')
output = template.format(
title=title, content=html_content, page_url=page_url)
update_date = find_update_date(text)
if update_date:
blog_entry['url'] = 'https://www.mcmillen.dev/' + page_url
blog_entry['date'] = update_date
blog_entry['summary'] = find_summary(html_content)
blog_entries.append(blog_entry)
html_content = markdown.markdown(
text, extensions=md_extensions, output_format='html5')
output = template.format(
title=title, content=html_content, page_url=page_url)
os.makedirs(out_dirpath, exist_ok=True)
print_file(markdown_filename, out_fullpath)
out_file = open(out_fullpath, 'w')
@ -143,8 +158,9 @@ def make_atom_feed():
<title>{title}</title>
<id>{url}</id>
<link rel="alternate" href="{url}"/>
<content type="text/html" src="{url}"/>
<updated>{updated}</updated>
<summary>{summary}</summary>
<summary>{summary} (...)</summary>
</entry>
'''
@ -153,14 +169,14 @@ def make_atom_feed():
entries_io = StringIO()
last_update = None
for entry in blog_entries:
# We lie and pretend that all entries were written at noon UTC.
update_date = entry['date'] + 'T12:00:00+00:00'
# We lie and pretend that all entries were written at noon EST.
update_date = entry['date'] + 'T12:00:00-04:00'
last_update = update_date
entries_io.write(entry_template.format(
url=entry['url'],
title=entry['title'],
updated=update_date,
summary='TODO: fill this out.'))
summary=entry['summary']))
entries_text = entries_io.getvalue()

2
content/blog/20190403-update.md

@ -1,6 +1,6 @@
# My first paper in 10 years?!
Published: 2019-04-03.
*Posted 2019-04-03.*
It's been nearly two months since my last day at Google, so I guess I should finally make use of this newsletter :)

2
content/blog/20200209-sneak.md

@ -1,6 +1,6 @@
# A new year & a sneaky new project
Published: 2020-02-09
*Posted 2020-02-09.*
I can't believe it's here so quickly, but: today marks a year since my last day at Google. That seemed like a good occasion to dust off this newsletter & let you know what I've been up to: making a videogame!

2
content/sigbovik/index.md

@ -1,6 +1,6 @@
# 93% of Paint Splatters are Valid Perl Programs
Published 2019-04-01.
*Posted 2019-04-01.*
TLDR: [read the paper](2019.pdf) and [view the gallery of pretty Perl programs](splatters.html).

Loading…
Cancel
Save