Hugo博客一建自动部署以及本地备份
August 30, 2024 · 442 words · One minute
find ./bilibytes.github.io -mindepth 1 ! -name ".git" ! -name "CNAME" ! -path "./bilibytes.github.io .git/*" -exec rm -rf {} +
python3 backup_website_in_html.py
hugo
cd bilibytes.github.io
git init
git remote add origin https://github.com/bilibytes/bilibytes.github.io.git
git add .
git commit -m "$(date)"
git pull --rebase origin main
git push --set-upstream origin main
import os
import re
import markdown
import requests
from pathlib import Path
import shutil
import yaml
from urllib.parse import urlparse
def extract_metadata_and_content(markdown_content):
metadata = {}
match = re.match(r'^---\n([\s\S]+?)\n---\n([\s\S]*)', markdown_content)
if match:
metadata = yaml.safe_load(match.group(1))
content = match.group(2)
return metadata, content
return {}, markdown_content
def extract_content_after_title(markdown_content):
match = re.search(r'(?:^|\n)#+\s+(.+?)(?:\n|$)([\s\S]*)', markdown_content)
if match:
return match.group(2)
return markdown_content
def download_image(image_url, images_folder, retries=3):
parsed_url = urlparse(image_url)
image_name = os.path.basename(parsed_url.path)
local_image_path = os.path.join(images_folder, image_name)
if parsed_url.scheme in ('http', 'https'):
for attempt in range(retries):
try:
response = requests.get(image_url, stream=True, timeout=5)
if response.status_code == 200:
with open(local_image_path, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
break
except requests.exceptions.RequestException as e:
print(f"Attempt {attempt + 1} failed to download {image_url}: {e}")
if attempt == retries - 1:
print(f"Failed to download image after {retries} attempts: {image_url}")
return None
else:
if os.path.exists(image_url):
shutil.copy(image_url, local_image_path)
else:
print(f"Local image not found: {image_url}")
return None
return os.path.join('html_images_local', image_name)
def update_image_paths(html_content, input_folder, images_folder):
img_tags = re.findall(r'<img[^>]+src="([^">]+)"', html_content)
for img_path in img_tags:
if not urlparse(img_path).scheme:
img_path = os.path.join(input_folder, img_path)
local_img_path = download_image(img_path, images_folder)
if local_img_path:
html_content = html_content.replace(img_path, local_img_path)
return html_content
def markdown_to_html(markdown_content, input_folder, output_folder):
metadata, content = extract_metadata_and_content(markdown_content)
content_to_convert = extract_content_after_title(content)
html_content = markdown.markdown(content_to_convert, extensions=['fenced_code', 'tables'])
if 'title' in metadata:
title_html = f"<h1>{metadata['title']}</h1>\n"
html_content = title_html + html_content
images_folder = os.path.join(output_folder, 'html_images_local')
os.makedirs(images_folder, exist_ok=True)
html_content = update_image_paths(html_content, input_folder, images_folder)
html_with_style = add_css_styles(html_content)
return html_with_style
def add_css_styles(html_content):
style = '''
<style>
body {
font-family: Arial, sans-serif;
line-height: 1.6;
margin: 20px;
max-width: 800px;
margin-left: auto;
margin-right: auto;
}
img {
max-width: 100%;
height: auto;
display: block;
margin: 10px 0;
}
pre {
background-color: #f4f4f4;
padding: 10px;
border-radius: 5px;
overflow-x: auto;
}
code {
font-family: monospace;
background-color: #f4f4f4;
padding: 2px 4px;
border-radius: 3px;
}
h1, h2, h3, h4, h5, h6 {
color: #333;
}
p {
margin: 10px 0;
}
</style>
'''
return f"<html><head>{style}</head><body>{html_content}</body></html>"
def process_markdown_files(input_folder, output_folder):
os.makedirs(output_folder, exist_ok=True)
for md_file in Path(input_folder).rglob('*.md'):
with open(md_file, 'r', encoding='utf-8') as file:
markdown_content = file.read()
html_content = markdown_to_html(markdown_content, input_folder, output_folder)
output_file = Path(output_folder) / (md_file.stem + '.html')
with open(output_file, 'w', encoding='utf-8') as file:
file.write(html_content)
print(f"Generated HTML: {output_file}")
# Set the input folder containing markdown files
input_folder_blog = './content/blog'
input_folder_tech = './content/tech'
input_folder_writings = './content/writings'
# Set the output folder where HTML files and images will be saved
output_folder = './Backup_in_HTML'
# Convert all markdown files to HTML
process_markdown_files(input_folder_blog, output_folder)
process_markdown_files(input_folder_tech, output_folder)
process_markdown_files(input_folder_writings, output_folder)
(2024.08.30 苏黎世)