343 lines
11 KiB
Python
343 lines
11 KiB
Python
import os
|
|
import re
|
|
import glob
|
|
import shutil
|
|
import subprocess
|
|
import markdown
|
|
import yaml
|
|
import pydantic
|
|
from typing import Optional
|
|
from datetime import datetime, date
|
|
from dotmap import DotMap
|
|
|
|
class GlobalVars(pydantic.BaseModel):
|
|
'''Static-valued global variables to be interpolated into any HTML templates.'''
|
|
today: date = datetime.today()
|
|
|
|
|
|
def filepath_or_string(s: str) -> str:
|
|
'''Loads the contents of a string if it is a filepath, otherwise returns the string.'''
|
|
if os.path.isfile(s):
|
|
with open(s, 'r') as f:
|
|
return f.read()
|
|
else:
|
|
return s
|
|
|
|
|
|
def extract_placeholders(s: str) -> set:
|
|
'''Extracts placeholder variables in the format `{variable}` from
|
|
an unformatted template string.'''
|
|
|
|
# Regex pattern to match placeholders with alphanumerics, dots, and underscores.
|
|
placeholder_pattern = r'\{([\w\.]+)\}'
|
|
|
|
# Find all matches in the string.
|
|
matches = re.findall(placeholder_pattern, s)
|
|
|
|
# Return the set of distinct placeholders.
|
|
return set(matches)
|
|
|
|
|
|
def find_cyclical_placeholders(s: str, _parents: tuple = None, _cycles: set = None, **kwargs) -> set[tuple]:
|
|
'''Recursively interpolates supplied kwargs into a template string to validate
|
|
that there are no cyclical dependencies that would cause infinite recursion.
|
|
|
|
Returns a list of paths (expressed as tuples of nodes) of cyclical placeholders.
|
|
'''
|
|
|
|
# Track the lineage of each placeholder so we can see if it is its own ancestor.
|
|
if _parents is None:
|
|
_parents = tuple()
|
|
|
|
# Keep track of any cycles encountered.
|
|
if _cycles is None:
|
|
_cycles = set()
|
|
|
|
# Extract the placeholders from the input.
|
|
placeholders = extract_placeholders(s)
|
|
|
|
# Recursion will naturally end once there are no more nested placeholders.
|
|
for p in placeholders:
|
|
|
|
# Any placeholder that has itself in its ancestry forms a cycle.
|
|
if p in _parents:
|
|
_cycles.add(_parents + (p,))
|
|
|
|
# For placeholders that are not their own ancestor, recursively
|
|
# interpolate the kwargs into the nested placeholders until we reach
|
|
# strings without placeholders.
|
|
else:
|
|
find_cyclical_placeholders(
|
|
('{'+p+'}').format(**kwargs),
|
|
_parents = _parents+(p,),
|
|
_cycles = _cycles,
|
|
**kwargs
|
|
)
|
|
|
|
return _cycles
|
|
|
|
|
|
with open('config.yaml', 'r') as config_file:
|
|
config = yaml.safe_load(config_file.read())
|
|
|
|
class SiteConfig(pydantic.BaseModel):
|
|
base_url: Optional[str] = config['site_defaults'].get('base_url')
|
|
git_repo: Optional[str] = config['site_defaults'].get('git_repo')
|
|
build_cache: Optional[str] = config['site_defaults'].get('build_cache')
|
|
assets: Optional[list] = config['site_defaults'].get('assets')
|
|
web_root: Optional[str] = config['site_defaults'].get('web_root')
|
|
articles: Optional[list] = config['site_defaults'].get('articles')
|
|
|
|
class ArticleMetadata(pydantic.BaseModel):
|
|
title: str
|
|
author: Optional[str] = config.get('author')
|
|
date: date
|
|
lastmod: Optional[date] = None
|
|
published: bool
|
|
tags: list
|
|
thumbnail: Optional[str] = None
|
|
|
|
|
|
|
|
def load_markdown(md: str) -> tuple[ArticleMetadata|None, str]:
|
|
'''Loads a Markdown file into a (metadata: ArticleMetadata, content: str) pair.'''
|
|
|
|
# Load the file contents if a filepath is specified, and strip document delimiters ('---').
|
|
md = filepath_or_string(md).strip().strip('---').strip()
|
|
|
|
# If there is no `---` delimiter, then the article has no metadata.
|
|
if '---' not in md.strip('---'):
|
|
return None, md
|
|
|
|
# Split the metadata from the contents.
|
|
[raw_metadata, raw_article] = md.split('---')
|
|
|
|
# Use YAML to parse the metadata.
|
|
metadata = yaml.safe_load(raw_metadata)
|
|
|
|
# Convert the contents to a HTML string.
|
|
content = markdown.markdown(raw_article)
|
|
|
|
return ArticleMetadata(**metadata), content
|
|
|
|
|
|
def format_html_template(template: str, **kwargs) -> str:
|
|
'''Interpolates variables specified as keyword arguments
|
|
into the given HTML template.
|
|
|
|
# Example
|
|
|
|
```python
|
|
kwargs = {'a': '1', 'b': '2', 'c': '{d}+{e}', 'd': '3', 'e': '{c}'}
|
|
s = '{a} + {b} = {c}'
|
|
find_cyclical_placeholders(s, **kwargs)
|
|
|
|
>>> {('c', 'e', 'c')}
|
|
```
|
|
'''
|
|
|
|
# Load the template if a filepath is given.
|
|
template = filepath_or_string(template)
|
|
|
|
# Ensure the template does not have cyclical placeholder references.
|
|
cycles = find_cyclical_placeholders(template, globalvars = GlobalVars(), **kwargs)
|
|
|
|
if len(cycles) > 0:
|
|
raise ValueError('Template has cyclical dependencies: {cycles}')
|
|
|
|
# Iteratively interpolate global variables and the kwargs into the template until
|
|
# there are no more placeholders. The loop is used to account for nested template references.
|
|
formatted_html = template
|
|
while len(extract_placeholders(formatted_html)) > 0:
|
|
formatted_html = formatted_html.format(globalvars = GlobalVars(), **kwargs)
|
|
|
|
# Return the formatted HTML.
|
|
return formatted_html
|
|
|
|
|
|
run = lambda cmd: subprocess.run(cmd.split(' '), stdout = subprocess.PIPE, stderr = subprocess.PIPE)
|
|
def pull_git_repo(repo: str, build_cache: str) -> None:
|
|
'''Pulls/clones a repo into the build cache directory.'''
|
|
if os.path.exists(f'{build_cache}/.git'):
|
|
run(f'git -C {build_cache} pull origin')
|
|
else:
|
|
run(f'git clone {repo} {build_cache}')
|
|
|
|
|
|
def load_partials() -> dict:
|
|
"""Loads partial templates from the templates/partials directory."""
|
|
partials = {}
|
|
for filename in os.listdir('templates/partials'):
|
|
with open(f'templates/partials/{filename}') as partial_file:
|
|
partial_template = partial_file.read()
|
|
|
|
partials[f'partials.{os.path.splitext(filename)[0]}'] = format_html_template(
|
|
partial_template,
|
|
current_year = datetime.now().year
|
|
)
|
|
return partials
|
|
|
|
|
|
def import_resume():
|
|
|
|
# Use a sentinel value for the loop.
|
|
max_date = '0000-00-00'
|
|
|
|
# Loop through the folders in the resume repo to find the most recent one.
|
|
for resume_folder in os.listdir('build/resume'):
|
|
|
|
# Skip folders that are not in YYYY-MM-DD format.
|
|
try:
|
|
datetime.strptime(resume_folder,'%Y-%m-%d')
|
|
except Exception:
|
|
continue
|
|
|
|
# Keep track of the most recent date.
|
|
if resume_folder > max_date:
|
|
max_date = resume_folder
|
|
|
|
# Copy the resume into the /dist directory.
|
|
run(f'cp build/resume/{max_date}/shepich_resume.pdf dist/shepich_resume.pdf')
|
|
|
|
|
|
def format_blog_tags(tags: list[str], template = 'templates/components/blog_tag.html') -> list[str]:
|
|
'''Generates HTML blog tag components from a list of tag names.'''
|
|
return [
|
|
format_html_template(template, tag_name = t) for t in tags
|
|
]
|
|
|
|
|
|
def build_blog_archive(
|
|
index: dict[str, tuple[str, str]],
|
|
page_template = 'templates/pages/default.html',
|
|
li_template = 'templates/components/blog_archive_li.html',
|
|
**kwargs
|
|
) -> str:
|
|
'''Converts an index, formatted as filestem: (metadata, contents) dict,
|
|
into an HTML page containing the list of articles, sorted from newest to oldest.
|
|
|
|
Note: partials must be expanded into the kwargs, as they are needed to generate
|
|
the overall page.
|
|
'''
|
|
|
|
# Add each article as a list item to an unordered list.
|
|
archive_html_content = '<ul>'
|
|
for article, (metadata, contents) in sorted(index.items(), key = lambda item: item[1][0].date)[::-1]:
|
|
|
|
# Generate HTML for the article (including metadata tags).
|
|
archive_html_content += format_html_template(
|
|
li_template,
|
|
article_filestem = article,
|
|
blog_tags = ' '.join(format_blog_tags(metadata.tags)),
|
|
metadata = metadata
|
|
|
|
)
|
|
archive_html_content +='</ul>'
|
|
|
|
# Interpolate the article into the overall page template.
|
|
archive_html_page = format_html_template(
|
|
page_template,
|
|
content = archive_html_content,
|
|
**kwargs
|
|
)
|
|
|
|
return archive_html_page
|
|
|
|
def copy_assets(site: SiteConfig):
|
|
'''Copies the list of site assets from the build cache to the web root.'''
|
|
|
|
# Expand any globbed expressions.
|
|
expanded_asset_list = []
|
|
for a in site.assets:
|
|
expanded_asset_list.extend(
|
|
# Assets are defined relative to the build cache; construct the full path.
|
|
glob.glob(f'{site.build_cache}/{a.lstrip("/")}')
|
|
)
|
|
|
|
for asset in expanded_asset_list:
|
|
|
|
# Construct the destination path analogous to the source path
|
|
# but in the web root instead of the build cache.
|
|
destination = f'{site.web_root}/{a.lstrip("/")}'
|
|
|
|
# Delete existing files.
|
|
shutil.rmtree(destination, ignore_errors=True)
|
|
|
|
# Copy the asset.
|
|
if os.path.isdir(asset):
|
|
shutil.copytree(asset, destination)
|
|
elif os.path.isfile(asset):
|
|
shutil.copyfile(asset, destination)
|
|
else:
|
|
continue
|
|
|
|
return None
|
|
|
|
|
|
def build_index(site: SiteConfig) -> dict:
|
|
'''Loads the sites articles into an index mapping the filename stem
|
|
to a (metadata: dict, content: str) tuple.'''
|
|
|
|
index = {}
|
|
|
|
# Expand any globbed expressions.
|
|
expanded_article_list = []
|
|
for a in site.articles or {}:
|
|
expanded_article_list.extend(
|
|
# Article paths are defined relative to the build cache; construct the full path.
|
|
glob.glob(f'{site.build_cache}/{a.lstrip("/")}')
|
|
)
|
|
|
|
|
|
for article in expanded_article_list:
|
|
metadata, content = load_markdown(article)
|
|
|
|
# Skip unpublished articles.
|
|
if not metadata.published:
|
|
continue
|
|
|
|
article_filestem = os.path.splitext(os.path.basename(article))[0]
|
|
index[article_filestem] = (metadata, content)
|
|
|
|
return index
|
|
|
|
|
|
def map_templates(dir: str, parent = '') -> DotMap:
|
|
'''Recursively maps the templates directory into a nested dict structure.
|
|
Leaves map the filestems of .html template files to their contents.
|
|
'''
|
|
|
|
output = {}
|
|
|
|
# List the files and subdirectories at the top level.
|
|
for sub in os.listdir(os.path.join(parent,dir)):
|
|
|
|
# Construct the full path to the file or subdir from the root of the tree.
|
|
full_path = os.path.join(parent,dir,sub)
|
|
|
|
# Recursively map subdirectories.
|
|
if os.path.isdir(full_path):
|
|
output[sub] = map_templates(sub, parent = dir)
|
|
continue
|
|
|
|
# Templates must be .html files.
|
|
filestem, ext = os.path.splitext(sub)
|
|
if ext != '.html':
|
|
continue
|
|
|
|
# Load template file.
|
|
with open(full_path, 'r') as file:
|
|
html = file.read()
|
|
|
|
output[filestem] = html
|
|
|
|
return DotMap(output)
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pass
|
|
|
|
|