jimsite/jimsite.py
2026-01-31 04:11:23 -05:00

277 lines
8.6 KiB
Python

import os
import glob
import shutil
import subprocess
import markdown
import yaml
import pydantic
from typing import Optional
from datetime import datetime, date
from dotmap import DotMap
class GlobalVars(pydantic.BaseModel):
'''Static-valued global variables to be interpolated into any HTML templates.'''
today: date = datetime.today()
def filepath_or_string(s: str) -> str:
'''Loads the contents of a string if it is a filepath, otherwise returns the string.'''
if os.path.isfile(s):
with open(s, 'r') as f:
return f.read()
else:
return s
with open('config.yaml', 'r') as config_file:
config = yaml.safe_load(config_file.read())
class SiteConfig(pydantic.BaseModel):
base_url: Optional[str] = config['site_defaults'].get('base_url')
git_repo: Optional[str] = config['site_defaults'].get('git_repo')
build_cache: Optional[str] = config['site_defaults'].get('build_cache')
assets: Optional[list] = config['site_defaults'].get('assets')
web_root: Optional[str] = config['site_defaults'].get('web_root')
articles: Optional[list] = config['site_defaults'].get('articles')
class ArticleMetadata(pydantic.BaseModel):
title: str
author: Optional[str] = config.get('author')
date: date
lastmod: Optional[date] = None
published: bool
tags: list
thumbnail: Optional[str] = None
def load_markdown(md: str) -> tuple[ArticleMetadata|None, str]:
'''Loads a Markdown file into a (metadata: ArticleMetadata, content: str) pair.'''
# Load the file contents if a filepath is specified, and strip document delimiters ('---').
md = filepath_or_string(md).strip().strip('---').strip()
# If there is no `---` delimiter, then the article has no metadata.
if '---' not in md.strip('---'):
return None, md
# Split the metadata from the contents.
[raw_metadata, raw_article] = md.split('---')
# Use YAML to parse the metadata.
metadata = yaml.safe_load(raw_metadata)
# Convert the contents to a HTML string.
content = markdown.markdown(raw_article)
return ArticleMetadata(**metadata), content
def format_html_template(template: str, **kwargs) -> str:
'''Interpolates variables specified as keyword arguments
into the given HTML template.'''
# Load the template if a filepath is given.
template = filepath_or_string(template)
# Interpolate the kwargs into the HTML template.
# Apply global variables twice in case a partial used
# by the first call of .format() uses a variable.
html = template.format(
globalvars = GlobalVars(), **kwargs
).format(globalvars = GlobalVars())
# Return the formatted HTML.
return html
run = lambda cmd: subprocess.run(cmd.split(' '), stdout = subprocess.PIPE, stderr = subprocess.PIPE)
def pull_git_repo(repo: str, build_cache: str) -> None:
'''Pulls/clones a repo into the build cache directory.'''
if os.path.exists(f'{build_cache}/.git'):
run(f'git -C {build_cache} pull origin')
else:
run(f'git clone {repo} {build_cache}')
def load_partials() -> dict:
"""Loads partial templates from the templates/partials directory."""
partials = {}
for filename in os.listdir('templates/partials'):
with open(f'templates/partials/{filename}') as partial_file:
partial_template = partial_file.read()
partials[f'partials.{os.path.splitext(filename)[0]}'] = format_html_template(
partial_template,
current_year = datetime.now().year
)
return partials
def import_resume():
# Use a sentinel value for the loop.
max_date = '0000-00-00'
# Loop through the folders in the resume repo to find the most recent one.
for resume_folder in os.listdir('build/resume'):
# Skip folders that are not in YYYY-MM-DD format.
try:
datetime.strptime(resume_folder,'%Y-%m-%d')
except Exception:
continue
# Keep track of the most recent date.
if resume_folder > max_date:
max_date = resume_folder
# Copy the resume into the /dist directory.
run(f'cp build/resume/{max_date}/shepich_resume.pdf dist/shepich_resume.pdf')
def format_blog_tags(tags: list[str], template = 'templates/components/blog_tag.html') -> list[str]:
'''Generates HTML blog tag components from a list of tag names.'''
return [
format_html_template(template, tag_name = t) for t in tags
]
def build_blog_archive(
index: dict[str, tuple[str, str]],
page_template = 'templates/pages/default.html',
li_template = 'templates/components/blog_archive_li.html',
**kwargs
) -> str:
'''Converts an index, formatted as filestem: (metadata, contents) dict,
into an HTML page containing the list of articles, sorted from newest to oldest.
Note: partials must be expanded into the kwargs, as they are needed to generate
the overall page.
'''
# Add each article as a list item to an unordered list.
archive_html_content = '<ul>'
for article, (metadata, contents) in sorted(index.items(), key = lambda item: item[1][0].date)[::-1]:
# Generate HTML for the article (including metadata tags).
archive_html_content += format_html_template(
li_template,
article_filestem = article,
blog_tags = ' '.join(format_blog_tags(metadata.tags)),
metadata = metadata
)
archive_html_content +='</ul>'
# Interpolate the article into the overall page template.
archive_html_page = format_html_template(
page_template,
content = archive_html_content,
**kwargs
)
return archive_html_page
def copy_assets(site: SiteConfig):
'''Copies the list of site assets from the build cache to the web root.'''
# Expand any globbed expressions.
expanded_asset_list = []
for a in site.assets:
expanded_asset_list.extend(
# Assets are defined relative to the build cache; construct the full path.
glob.glob(f'{site.build_cache}/{a.lstrip("/")}')
)
for asset in expanded_asset_list:
# Construct the destination path analogous to the source path
# but in the web root instead of the build cache.
destination = f'{site.web_root}/{a.lstrip("/")}'
# Delete existing files.
shutil.rmtree(destination, ignore_errors=True)
# Copy the asset.
if os.path.isdir(asset):
shutil.copytree(asset, destination)
elif os.path.isfile(asset):
shutil.copyfile(asset, destination)
else:
continue
return None
def build_index(site: SiteConfig) -> dict:
'''Loads the sites articles into an index mapping the filename stem
to a (metadata: dict, content: str) tuple.'''
index = {}
# Expand any globbed expressions.
expanded_article_list = []
for a in site.articles:
expanded_article_list.extend(
# Article paths are defined relative to the build cache; construct the full path.
glob.glob(f'{site.build_cache}/{a.lstrip("/")}')
)
for article in expanded_article_list:
metadata, content = load_markdown(article)
# Skip unpublished articles.
if not metadata.published:
continue
article_filestem = os.path.splitext(os.path.basename(article))[0]
index[article_filestem] = (metadata, content)
return index
def map_templates(dir: str, parent = '') -> DotMap:
'''Recursively maps the templates directory into a nested dict structure.
Leaves map the filestems of .html template files to their contents.
'''
output = {}
# List the files and subdirectories at the top level.
for sub in os.listdir(os.path.join(parent,dir)):
# Construct the full path to the file or subdir from the root of the tree.
full_path = os.path.join(parent,dir,sub)
# Recursively map subdirectories.
if os.path.isdir(full_path):
output[sub] = map_templates(sub, parent = dir)
continue
# Templates must be .html files.
filestem, ext = os.path.splitext(sub)
if ext != '.html':
continue
# Load template file.
with open(full_path, 'r') as file:
html = file.read()
# # Interpolate global variables into partials.
# if 'partials' in full_path:
# html = html.format(globalvars = GlobalVars())
output[filestem] = html
return DotMap(output)
if __name__ == '__main__':
pass