Hugo博客一建自动部署以及本地备份

August 30, 2024 · 442 words · One minute

find ./bilibytes.github.io -mindepth 1 ! -name ".git" ! -name "CNAME" ! -path "./bilibytes.github.io .git/*" -exec rm -rf {} +

python3 backup_website_in_html.py

hugo

cd bilibytes.github.io

git init

git remote add origin https://github.com/bilibytes/bilibytes.github.io.git

git add .

git commit -m "$(date)"

git pull --rebase origin main

git push --set-upstream origin main
import os
import re
import markdown
import requests
from pathlib import Path
import shutil
import yaml
from urllib.parse import urlparse

def extract_metadata_and_content(markdown_content):
    metadata = {}
    match = re.match(r'^---\n([\s\S]+?)\n---\n([\s\S]*)', markdown_content)
    if match:
        metadata = yaml.safe_load(match.group(1))
        content = match.group(2)
        return metadata, content
    return {}, markdown_content

def extract_content_after_title(markdown_content):
    match = re.search(r'(?:^|\n)#+\s+(.+?)(?:\n|$)([\s\S]*)', markdown_content)
    if match:
        return match.group(2)
    return markdown_content

def download_image(image_url, images_folder, retries=3):
    parsed_url = urlparse(image_url)
    image_name = os.path.basename(parsed_url.path)
    local_image_path = os.path.join(images_folder, image_name)

    if parsed_url.scheme in ('http', 'https'):
        for attempt in range(retries):
            try:
                response = requests.get(image_url, stream=True, timeout=5)
                if response.status_code == 200:
                    with open(local_image_path, 'wb') as out_file:
                        shutil.copyfileobj(response.raw, out_file)
                    break
            except requests.exceptions.RequestException as e:
                print(f"Attempt {attempt + 1} failed to download {image_url}: {e}")
            if attempt == retries - 1:
                print(f"Failed to download image after {retries} attempts: {image_url}")
                return None
    else:
        if os.path.exists(image_url):
            shutil.copy(image_url, local_image_path)
        else:
            print(f"Local image not found: {image_url}")
            return None

    return os.path.join('html_images_local', image_name)

def update_image_paths(html_content, input_folder, images_folder):
    img_tags = re.findall(r'<img[^>]+src="([^">]+)"', html_content)
    
    for img_path in img_tags:
        if not urlparse(img_path).scheme:
            img_path = os.path.join(input_folder, img_path)
        
        local_img_path = download_image(img_path, images_folder)
        if local_img_path:
            html_content = html_content.replace(img_path, local_img_path)
    
    return html_content

def markdown_to_html(markdown_content, input_folder, output_folder):
    metadata, content = extract_metadata_and_content(markdown_content)
    content_to_convert = extract_content_after_title(content)
    html_content = markdown.markdown(content_to_convert, extensions=['fenced_code', 'tables'])

    if 'title' in metadata:
        title_html = f"<h1>{metadata['title']}</h1>\n"
        html_content = title_html + html_content

    images_folder = os.path.join(output_folder, 'html_images_local')
    os.makedirs(images_folder, exist_ok=True)
    
    html_content = update_image_paths(html_content, input_folder, images_folder)
    html_with_style = add_css_styles(html_content)

    return html_with_style

def add_css_styles(html_content):
    style = '''
    <style>
        body {
            font-family: Arial, sans-serif;
            line-height: 1.6;
            margin: 20px;
            max-width: 800px;
            margin-left: auto;
            margin-right: auto;
        }
        img {
            max-width: 100%;
            height: auto;
            display: block;
            margin: 10px 0;
        }
        pre {
            background-color: #f4f4f4;
            padding: 10px;
            border-radius: 5px;
            overflow-x: auto;
        }
        code {
            font-family: monospace;
            background-color: #f4f4f4;
            padding: 2px 4px;
            border-radius: 3px;
        }
        h1, h2, h3, h4, h5, h6 {
            color: #333;
        }
        p {
            margin: 10px 0;
        }
    </style>
    '''
    return f"<html><head>{style}</head><body>{html_content}</body></html>"

def process_markdown_files(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    
    for md_file in Path(input_folder).rglob('*.md'):
        with open(md_file, 'r', encoding='utf-8') as file:
            markdown_content = file.read()
        
        html_content = markdown_to_html(markdown_content, input_folder, output_folder)
        
        output_file = Path(output_folder) / (md_file.stem + '.html')
        
        with open(output_file, 'w', encoding='utf-8') as file:
            file.write(html_content)
        
        print(f"Generated HTML: {output_file}")

# Set the input folder containing markdown files
input_folder_blog = './content/blog'
input_folder_tech = './content/tech'
input_folder_writings = './content/writings'
# Set the output folder where HTML files and images will be saved
output_folder = './Backup_in_HTML'

# Convert all markdown files to HTML
process_markdown_files(input_folder_blog, output_folder)
process_markdown_files(input_folder_tech, output_folder)
process_markdown_files(input_folder_writings, output_folder)

(2024.08.30 苏黎世)

Hugo博客一建自动部署以及本地备份