import os import re import glob import shutil import subprocess import markdown import yaml import pydantic from typing import Optional from datetime import datetime, date from dotmap import DotMap class GlobalVars(pydantic.BaseModel): '''Static-valued global variables to be interpolated into any HTML templates.''' today: date = datetime.today() def filepath_or_string(s: str) -> str: '''Loads the contents of a string if it is a filepath, otherwise returns the string.''' if os.path.isfile(s): with open(s, 'r') as f: return f.read() else: return s def extract_placeholders(s: str) -> set: '''Extracts placeholder variables in the format `{variable}` from an unformatted template string.''' # Regex pattern to match placeholders with alphanumerics, dots, and underscores. placeholder_pattern = r'\{([\w\.]+)\}' # Find all matches in the string. matches = re.findall(placeholder_pattern, s) # Return the set of distinct placeholders. return set(matches) def find_cyclical_placeholders(s: str, _parents: tuple = None, _cycles: set = None, **kwargs) -> set[tuple]: '''Recursively interpolates supplied kwargs into a template string to validate that there are no cyclical dependencies that would cause infinite recursion. Returns a list of paths (expressed as tuples of nodes) of cyclical placeholders. ''' # Track the lineage of each placeholder so we can see if it is its own ancestor. if _parents is None: _parents = tuple() # Keep track of any cycles encountered. if _cycles is None: _cycles = set() # Extract the placeholders from the input. placeholders = extract_placeholders(s) # Recursion will naturally end once there are no more nested placeholders. for p in placeholders: # Any placeholder that has itself in its ancestry forms a cycle. if p in _parents: _cycles.add(_parents + (p,)) # For placeholders that are not their own ancestor, recursively # interpolate the kwargs into the nested placeholders until we reach # strings without placeholders. else: find_cyclical_placeholders( ('{'+p+'}').format(**kwargs), _parents = _parents+(p,), _cycles = _cycles, **kwargs ) return _cycles with open('config.yaml', 'r') as config_file: config = yaml.safe_load(config_file.read()) class SiteConfig(pydantic.BaseModel): base_url: Optional[str] = config['site_defaults'].get('base_url') git_repo: Optional[str] = config['site_defaults'].get('git_repo') build_cache: Optional[str] = config['site_defaults'].get('build_cache') assets: Optional[list] = config['site_defaults'].get('assets') web_root: Optional[str] = config['site_defaults'].get('web_root') articles: Optional[list] = config['site_defaults'].get('articles') class ArticleMetadata(pydantic.BaseModel): title: str author: Optional[str] = config.get('author') date: date lastmod: Optional[date] = None published: bool tags: list thumbnail: Optional[str] = None def load_markdown(md: str) -> tuple[ArticleMetadata|None, str]: '''Loads a Markdown file into a (metadata: ArticleMetadata, content: str) pair.''' # Load the file contents if a filepath is specified, and strip document delimiters ('---'). md = filepath_or_string(md).strip().strip('---').strip() # If there is no `---` delimiter, then the article has no metadata. if '---' not in md.strip('---'): return None, md # Split the metadata from the contents. [raw_metadata, raw_article] = md.split('---') # Use YAML to parse the metadata. metadata = yaml.safe_load(raw_metadata) # Convert the contents to a HTML string. content = markdown.markdown(raw_article) return ArticleMetadata(**metadata), content def format_html_template(template: str, **kwargs) -> str: '''Interpolates variables specified as keyword arguments into the given HTML template. # Example ```python kwargs = {'a': '1', 'b': '2', 'c': '{d}+{e}', 'd': '3', 'e': '{c}'} s = '{a} + {b} = {c}' find_cyclical_placeholders(s, **kwargs) >>> {('c', 'e', 'c')} ``` ''' # Load the template if a filepath is given. template = filepath_or_string(template) # Ensure the template does not have cyclical placeholder references. cycles = find_cyclical_placeholders(template, globalvars = GlobalVars(), **kwargs) if len(cycles) > 0: raise ValueError('Template has cyclical dependencies: {cycles}') # Iteratively interpolate global variables and the kwargs into the template until # there are no more placeholders. The loop is used to account for nested template references. formatted_html = template while len(extract_placeholders(formatted_html)) > 0: formatted_html = formatted_html.format(globalvars = GlobalVars(), **kwargs) # Return the formatted HTML. return formatted_html run = lambda cmd: subprocess.run(cmd.split(' '), stdout = subprocess.PIPE, stderr = subprocess.PIPE) def pull_git_repo(repo: str, build_cache: str) -> None: '''Pulls/clones a repo into the build cache directory.''' if os.path.exists(f'{build_cache}/.git'): run(f'git -C {build_cache} pull origin') else: run(f'git clone {repo} {build_cache}') def load_partials() -> dict: """Loads partial templates from the templates/partials directory.""" partials = {} for filename in os.listdir('templates/partials'): with open(f'templates/partials/{filename}') as partial_file: partial_template = partial_file.read() partials[f'partials.{os.path.splitext(filename)[0]}'] = format_html_template( partial_template, current_year = datetime.now().year ) return partials def import_resume(): # Use a sentinel value for the loop. max_date = '0000-00-00' # Loop through the folders in the resume repo to find the most recent one. for resume_folder in os.listdir('build/resume'): # Skip folders that are not in YYYY-MM-DD format. try: datetime.strptime(resume_folder,'%Y-%m-%d') except Exception: continue # Keep track of the most recent date. if resume_folder > max_date: max_date = resume_folder # Copy the resume into the /dist directory. run(f'cp build/resume/{max_date}/shepich_resume.pdf dist/shepich_resume.pdf') def format_blog_tags(tags: list[str], template = 'templates/components/blog_tag.html') -> list[str]: '''Generates HTML blog tag components from a list of tag names.''' return [ format_html_template(template, tag_name = t) for t in tags ] def build_blog_archive( index: dict[str, tuple[str, str]], page_template = 'templates/pages/default.html', li_template = 'templates/components/blog_archive_li.html', **kwargs ) -> str: '''Converts an index, formatted as filestem: (metadata, contents) dict, into an HTML page containing the list of articles, sorted from newest to oldest. Note: partials must be expanded into the kwargs, as they are needed to generate the overall page. ''' # Add each article as a list item to an unordered list. archive_html_content = '' # Interpolate the article into the overall page template. archive_html_page = format_html_template( page_template, content = archive_html_content, **kwargs ) return archive_html_page def copy_assets(site: SiteConfig): '''Copies the list of site assets from the build cache to the web root.''' # Expand any globbed expressions. expanded_asset_list = [] for a in site.assets: expanded_asset_list.extend( # Assets are defined relative to the build cache; construct the full path. glob.glob(f'{site.build_cache}/{a.lstrip("/")}') ) for asset in expanded_asset_list: # Construct the destination path analogous to the source path # but in the web root instead of the build cache. destination = f'{site.web_root}/{a.lstrip("/")}' # Delete existing files. shutil.rmtree(destination, ignore_errors=True) # Copy the asset. if os.path.isdir(asset): shutil.copytree(asset, destination) elif os.path.isfile(asset): shutil.copyfile(asset, destination) else: continue return None def build_index(site: SiteConfig) -> dict: '''Loads the sites articles into an index mapping the filename stem to a (metadata: dict, content: str) tuple.''' index = {} # Expand any globbed expressions. expanded_article_list = [] for a in site.articles or {}: expanded_article_list.extend( # Article paths are defined relative to the build cache; construct the full path. glob.glob(f'{site.build_cache}/{a.lstrip("/")}') ) for article in expanded_article_list: metadata, content = load_markdown(article) # Skip unpublished articles. if not metadata.published: continue article_filestem = os.path.splitext(os.path.basename(article))[0] index[article_filestem] = (metadata, content) return index def map_templates(dir: str, parent = '') -> DotMap: '''Recursively maps the templates directory into a nested dict structure. Leaves map the filestems of .html template files to their contents. ''' output = {} # List the files and subdirectories at the top level. for sub in os.listdir(os.path.join(parent,dir)): # Construct the full path to the file or subdir from the root of the tree. full_path = os.path.join(parent,dir,sub) # Recursively map subdirectories. if os.path.isdir(full_path): output[sub] = map_templates(sub, parent = dir) continue # Templates must be .html files. filestem, ext = os.path.splitext(sub) if ext != '.html': continue # Load template file. with open(full_path, 'r') as file: html = file.read() output[filestem] = html return DotMap(output) if __name__ == '__main__': pass