better atom summaries / links; used "posted" instead of "published"

2021-07-01 01:00:33 -04:00 · 2021-07-01 01:00:33 -04:00 · 13cfc84d8b
commit 13cfc84d8b
parent bfba1c36d0
4 changed files with 31 additions and 15 deletions
--- a/build.py
+++ b/build.py
@ -8,7 +8,14 @@
 # TODO: in template.html, add apple touch icon, maybe other favicon sizes.
 # TODO: local mirrors of all papers in publications.html

+# Requirements:
+# sudo apt install python3-markdown
+# sudo apt install python3-smartypants
+# sudo apt install python3-bs4
+
+
 import argparse
+from bs4 import BeautifulSoup
 import glob
 import html
 from io import StringIO
@ -48,12 +55,19 @@ def copy_static_files():


 def find_update_date(text):
-  match = re.search(r'^Published:? (\d{4}-\d{2}-\d{2})', text, re.MULTILINE)
+  match = re.search(r'^\*?Posted (\d{4}-\d{2}-\d{2})', text, re.MULTILINE)
  if not match:
    return None
  return match.group(1)


+def find_summary(html_content):
+  text = BeautifulSoup(html_content, features='lxml').get_text()
+  lines = text.split('\n')
+  result = ' '.join(lines[2:4])
+  return html.escape(result, quote=False)
+
+
 def process_markdown_files():
  template = open('template.html').read()
  for (dirpath, _, filenames) in os.walk(input_directory):
@ -77,7 +91,7 @@ def process_markdown_files():
      else:
        title = text

-      blog_entry['title'] = html.escape(title)
+      blog_entry['title'] = html.escape(title, quote=False)

      title += ' | Colin McMillen'
      if markdown_filename == os.path.join(input_directory, 'index.md'):
@ -92,17 +106,18 @@ def process_markdown_files():
      if page_url.endswith('index.html'):  # strip off index.html
        page_url = page_url[:-len('index.html')]

-      update_date = find_update_date(text)
-      if update_date:
-        blog_entry['url'] = 'https://www.mcmillen.dev/' + page_url
-        blog_entry['date'] = update_date
-        blog_entries.append(blog_entry)
-
      html_content = markdown.markdown(
          text, extensions=md_extensions, output_format='html5')
      output = template.format(
          title=title, content=html_content, page_url=page_url)

+      update_date = find_update_date(text)
+      if update_date:
+        blog_entry['url'] = 'https://www.mcmillen.dev/' + page_url
+        blog_entry['date'] = update_date
+        blog_entry['summary'] = find_summary(html_content)
+        blog_entries.append(blog_entry)
+
      os.makedirs(out_dirpath, exist_ok=True)
      print_file(markdown_filename, out_fullpath)
      out_file = open(out_fullpath, 'w')
@ -143,8 +158,9 @@ def make_atom_feed():
    <title>{title}</title>
    <id>{url}</id>
    <link rel="alternate" href="{url}"/>
+    <content type="text/html" src="{url}"/>
    <updated>{updated}</updated>
-    <summary>{summary}</summary>
+    <summary>{summary} (...)</summary>
  </entry>
 '''

@ -153,14 +169,14 @@ def make_atom_feed():
  entries_io = StringIO()
  last_update = None
  for entry in blog_entries:
-    # We lie and pretend that all entries were written at noon UTC.
-    update_date = entry['date'] + 'T12:00:00+00:00'
+    # We lie and pretend that all entries were written at noon EST.
+    update_date = entry['date'] + 'T12:00:00-04:00'
    last_update = update_date
    entries_io.write(entry_template.format(
        url=entry['url'],
        title=entry['title'],
        updated=update_date,
-        summary='TODO: fill this out.'))
+        summary=entry['summary']))

  entries_text = entries_io.getvalue()

--- a/content/blog/20190403-update.md
+++ b/content/blog/20190403-update.md
@ -1,6 +1,6 @@
 # My first paper in 10 years?!

-Published: 2019-04-03.
+*Posted 2019-04-03.*

 It's been nearly two months since my last day at Google, so I guess I should finally make use of this newsletter :)

--- a/content/blog/20200209-sneak.md
+++ b/content/blog/20200209-sneak.md
@ -1,6 +1,6 @@
 # A new year & a sneaky new project

-Published: 2020-02-09
+*Posted 2020-02-09.*

 I can't believe it's here so quickly, but: today marks a year since my last day at Google. That seemed like a good occasion to dust off this newsletter & let you know what I've been up to: making a videogame!

--- a/content/sigbovik/index.md
+++ b/content/sigbovik/index.md
@ -1,6 +1,6 @@
 # 93% of Paint Splatters are Valid Perl Programs

-Published 2019-04-01.
+*Posted 2019-04-01.*

 TLDR: [read the paper](2019.pdf) and [view the gallery of pretty Perl programs](splatters.html).