jimsite/jimsite.py

import os
import glob
import shutil
import subprocess
import markdown
import yaml
import pydantic
from typing import Optional
from datetime import datetime, date
from dotmap import DotMap

class GlobalVars(pydantic.BaseModel):
    '''Static-valued global variables to be interpolated into any HTML templates.'''
    today: date = datetime.today()


def filepath_or_string(s: str) -> str:
    '''Loads the contents of a string if it is a filepath, otherwise returns the string.'''
    if os.path.isfile(s):
        with open(s, 'r') as f:
            return f.read()
    else:
        return s


with open('config.yaml', 'r') as config_file:
    config = yaml.safe_load(config_file.read())

class SiteConfig(pydantic.BaseModel):
    base_url: Optional[str] = config['site_defaults'].get('base_url')
    git_repo: Optional[str] = config['site_defaults'].get('git_repo')
    build_cache: Optional[str] = config['site_defaults'].get('build_cache')
    assets: Optional[list] = config['site_defaults'].get('assets')
    web_root: Optional[str] = config['site_defaults'].get('web_root')
    articles: Optional[list] = config['site_defaults'].get('articles')

class ArticleMetadata(pydantic.BaseModel):
    title: str
    author: Optional[str] = config.get('author')
    date: date
    lastmod: Optional[date] = None
    published: bool
    tags: list
    thumbnail: Optional[str] = None


def load_markdown(md: str) -> tuple[ArticleMetadata|None, str]:
    '''Loads a Markdown file into a (metadata: ArticleMetadata, content: str) pair.'''

    # Load the file contents if a filepath is specified, and strip document delimiters ('---').
    md = filepath_or_string(md).strip().strip('---').strip()

    # If there is no `---` delimiter, then the article has no metadata.
    if '---' not in md.strip('---'):
        return None, md

    # Split the metadata from the contents.
    [raw_metadata, raw_article] = md.split('---')

    # Use YAML to parse the metadata.
    metadata = yaml.safe_load(raw_metadata)

    # Convert the contents to a HTML string.
    content = markdown.markdown(raw_article)

    return ArticleMetadata(**metadata), content


def format_html_template(template: str, **kwargs) -> str:
    '''Interpolates variables specified as keyword arguments
    into the given HTML template.'''

    # Load the template if a filepath is given.
    template = filepath_or_string(template)

    # Interpolate the kwargs into the HTML template.
    # Apply global variables twice in case a partial used
    # by the first call of .format() uses a variable.
    html = template.format(
        globalvars = GlobalVars(), **kwargs
    ).format(globalvars = GlobalVars())

    # Return the formatted HTML.
    return html


run = lambda cmd: subprocess.run(cmd.split(' '), stdout = subprocess.PIPE, stderr = subprocess.PIPE)
def pull_git_repo(repo: str, build_cache: str) -> None:
    '''Pulls/clones a repo into the build cache directory.'''
    if os.path.exists(f'{build_cache}/.git'):
        run(f'git -C {build_cache} pull origin')
    else:
        run(f'git clone {repo} {build_cache}')


def load_partials() -> dict:
    """Loads partial templates from the templates/partials directory."""
    partials = {}
    for filename in os.listdir('templates/partials'):
        with open(f'templates/partials/{filename}') as partial_file:
            partial_template = partial_file.read()

        partials[f'partials.{os.path.splitext(filename)[0]}'] = format_html_template(
            partial_template,
            current_year = datetime.now().year
        )
    return partials


def import_resume():

    # Use a sentinel value for the loop.
    max_date = '0000-00-00'

    # Loop through the folders in the resume repo to find the most recent one.
    for resume_folder in os.listdir('build/resume'):

        # Skip folders that are not in YYYY-MM-DD format.
        try:
            datetime.strptime(resume_folder,'%Y-%m-%d')
        except Exception:
            continue

        # Keep track of the most recent date.
        if resume_folder > max_date:
            max_date = resume_folder

    # Copy the resume into the /dist directory.
    run(f'cp build/resume/{max_date}/shepich_resume.pdf dist/shepich_resume.pdf')


def format_blog_tags(tags: list[str], template = 'templates/components/blog_tag.html') -> list[str]:
    '''Generates HTML blog tag components from a list of tag names.'''
    return [
        format_html_template(template, tag_name = t) for t in tags
    ]


def build_blog_archive(
        index: dict[str, tuple[str, str]],
        page_template = 'templates/pages/default.html',
        li_template = 'templates/components/blog_archive_li.html',
        **kwargs
    ) -> str:
    '''Converts an index, formatted as filestem: (metadata, contents) dict,
    into an HTML page containing the list of articles, sorted from newest to oldest.

    Note: partials must be expanded into the kwargs, as they are needed to generate
    the overall page.
    '''

    # Add each article as a list item to an unordered list.
    archive_html_content = '<ul>'
    for article, (metadata, contents) in sorted(index.items(), key = lambda item: item[1][0].date)[::-1]:

        # Generate HTML for the article (including metadata tags).
        archive_html_content += format_html_template(
            li_template,
            article_filestem = article,
            blog_tags = ' '.join(format_blog_tags(metadata.tags)),
            metadata = metadata

        )
    archive_html_content +='</ul>'

    # Interpolate the article into the overall page template.
    archive_html_page = format_html_template(
        page_template,
        content = archive_html_content,
        **kwargs
    )

    return archive_html_page

def copy_assets(site: SiteConfig):
    '''Copies the list of site assets from the build cache to the web root.'''

    # Expand any globbed expressions.
    expanded_asset_list = []
    for a in site.assets:
        expanded_asset_list.extend(
            # Assets are defined relative to the build cache; construct the full path.
            glob.glob(f'{site.build_cache}/{a.lstrip("/")}')
        )

    for asset in expanded_asset_list:

        # Construct the destination path analogous to the source path
        # but in the web root instead of the build cache.
        destination = f'{site.web_root}/{a.lstrip("/")}'

        # Delete existing files.
        shutil.rmtree(destination, ignore_errors=True)

        # Copy the asset.
        if os.path.isdir(asset):
            shutil.copytree(asset, destination)
        elif os.path.isfile(asset):
            shutil.copyfile(asset, destination)
        else:
            continue

    return None


def build_index(site: SiteConfig) -> dict:
    '''Loads the sites articles into an index mapping the filename stem
    to a (metadata: dict, content: str) tuple.'''

    index = {}

    # Expand any globbed expressions.
    expanded_article_list = []
    for a in site.articles:
        expanded_article_list.extend(
            # Article paths are defined relative to the build cache; construct the full path.
            glob.glob(f'{site.build_cache}/{a.lstrip("/")}')
        )


    for article in expanded_article_list:
        metadata, content = load_markdown(article)

        # Skip unpublished articles.
        if not metadata.published:
            continue

        article_filestem = os.path.splitext(os.path.basename(article))[0]
        index[article_filestem] = (metadata, content)

    return index


def map_templates(dir: str, parent = '') -> DotMap:
    '''Recursively maps the templates directory into a nested dict structure.
    Leaves map the filestems of .html template files to their contents.
    '''

    output = {}

    # List the files and subdirectories at the top level.
    for sub in os.listdir(os.path.join(parent,dir)):

        # Construct the full path to the file or subdir from the root of the tree.
        full_path = os.path.join(parent,dir,sub)

        # Recursively map subdirectories.
        if os.path.isdir(full_path):
            output[sub] = map_templates(sub, parent = dir)
            continue

        # Templates must be .html files.
        filestem, ext = os.path.splitext(sub)
        if ext != '.html':
            continue

        # Load template file.
        with open(full_path, 'r') as file:
            html = file.read()

        # # Interpolate global variables into partials.
        # if 'partials' in full_path:
        #     html = html.format(globalvars = GlobalVars())

        output[filestem] = html

    return DotMap(output)


if __name__ == '__main__':
    pass