diff --git a/.github/workflows/test-archive-scripts.yml b/.github/workflows/test-archive-scripts.yml new file mode 100644 index 00000000000..e74eb5b4ed4 --- /dev/null +++ b/.github/workflows/test-archive-scripts.yml @@ -0,0 +1,43 @@ +name: Smoke test archive scripts + +on: + push: + branches: + - archive-creation-docs-v2 + paths: + - "src/current/create_all_archives_fixed.py" + - "src/current/create_single_archive.py" + - "src/current/snapshot_relative.py" + - "src/current/test_archive_smoke.py" + pull_request: + paths: + - "src/current/create_all_archives_fixed.py" + - "src/current/create_single_archive.py" + - "src/current/snapshot_relative.py" + - "src/current/test_archive_smoke.py" + +jobs: + smoke-test: + runs-on: ubuntu-latest + defaults: + run: + working-directory: src/current + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: pip install beautifulsoup4 requests + + - name: Run smoke tests + run: python3 test_archive_smoke.py + + - name: Verify snapshot_relative.py is unchanged + # Confirms the scripts never modify the checked-in source file. + # Use -- to disambiguate path from a revision; path is relative to + # working-directory (src/current). + run: git diff --exit-code -- snapshot_relative.py diff --git a/src/current/CREATE_PORTABLE_ARCHIVE.md b/src/current/CREATE_PORTABLE_ARCHIVE.md new file mode 100644 index 00000000000..08c8b52f11d --- /dev/null +++ b/src/current/CREATE_PORTABLE_ARCHIVE.md @@ -0,0 +1,190 @@ +# Creating a Portable CockroachDB Documentation Archive + +This guide shows how to create a fully portable, offline documentation archive for any CockroachDB version that works with **any folder name** and has **working navigation**. + +## What You'll Get + +- **Portable Archive**: Works when renamed to any folder name +- **Dynamic Navigation**: Automatically detects archive location +- **Comprehensive Sidebars**: Full navigation on every page +- **Fully Offline**: No internet connection required +- **Version-Specific**: Contains only docs relevant to the target version + +## Prerequisites + +- Jekyll site built and ready: `bundle exec jekyll build` +- Python 3.x installed +- BeautifulSoup4: `pip install beautifulsoup4` +- Requests: `pip install requests` + +## Quick Start + +### Create Archive for a Single Version + +```bash +# Navigate to the docs source directory +cd src/current + +# Create archive for any version (e.g., v23.1, v24.2, v25.4) +python3 create_single_archive.py v23.1 +``` + +This will create `cockroachdb-docs-v23.1-offline.zip` containing the complete offline documentation. + +### Create Archives for Multiple Versions + +```bash +# Create archives for multiple versions (default: v20.2, v21.1, v21.2, v22.1, v22.2) +python3 create_all_archives_fixed.py +``` + +## The 14-Step Archive Creation Process + +The `create_single_archive.py` script automates the following steps: + +1. **Create base archive** - `snapshot_relative.py` +2. **Apply navigation fixes** - `fix_navigation_quick.py` +3. **Fix version placeholders** - Dynamic script for target version +4. **Remove non-target sidebars** - Keep only target version sidebar +5. **Clean target sidebar** - Remove references to newer versions +6. **Fix JavaScript sidebar** - `fix_js_sidebar_final.py` +7. **Fix remaining references** - `fix_remaining_v25_refs.py` +8. **Create advisories directory** - For security advisories JSON +9. **Copy advisories JSON** - From `_site/docs/advisories/internal/` +10. **Fix incomplete sidebars** - `fix_incomplete_sidebars.py` +11. **Make navigation dynamic** - `make_navigation_dynamic_v2.py` +12. **Fix root navigation** - `fix_root_navigation.py` +13. **Fix broken sidebar links** - `fix_broken_sidebar_links.py` +14. **Fix final broken links** - `fix_final_broken_links.py` + +## Output Structure + +``` +offline_snap/ (or any name you choose) +├── index.html # Root landing page +├── {version}/ # Version-specific documentation +│ ├── index.html +│ └── [documentation pages] +├── cockroachcloud/ # CockroachCloud docs +├── advisories/ # Security advisories +├── releases/ # Release notes +├── molt/ # MOLT migration tool docs +├── css/ # Stylesheets +├── js/ # JavaScript +├── images/ # Images +│ └── {version}/ # Version-specific images +├── fonts/ # Localized Google Fonts +└── _internal/ # Internal assets + └── sidebar-{version}.html # Navigation sidebar +``` + +## Required Scripts + +### Main Scripts + +| Script | Purpose | +|--------|---------| +| `create_single_archive.py` | Creates a single version archive (recommended) | +| `create_all_archives_fixed.py` | Creates archives for multiple versions | +| `snapshot_relative.py` | Core archiver that creates the base structure | + +### Supporting Scripts (14-step process) + +| Script | Purpose | +|--------|---------| +| `fix_navigation_quick.py` | Basic navigation fixes | +| `fix_js_sidebar_final.py` | Remove newer version references from JavaScript | +| `fix_remaining_v25_refs.py` | Final URL cleanup | +| `fix_incomplete_sidebars.py` | Ensures all pages have comprehensive sidebar | +| `make_navigation_dynamic_v2.py` | Makes navigation work with any folder name | +| `fix_root_navigation.py` | Fixes navigation from root index.html | +| `fix_broken_sidebar_links.py` | Removes broken links from sidebars | +| `fix_final_broken_links.py` | Final pass for remaining broken links | + +## Features + +### Dynamic Folder Detection + +The archive can be renamed to any folder name and navigation will continue to work: + +```javascript +// The JavaScript automatically detects the archive folder: +// Works with: my-docs/, cockroachdb-archive/, custom_name/, etc. + +// Method 1: Look for _internal folder pattern +var internalMatch = currentPath.match(/\/([^\/]+)\/_internal\//); + +// Method 2: Look for known directory structure +var archiveMatch = currentPath.match(/\/([^\/]+)\/(v\d+\.\d+|cockroachcloud|releases)/); +``` + +### Cross-Directory Navigation + +- Navigate between version docs, cockroachcloud, advisories, and releases +- Proper relative path calculation from any page +- Sidebar works identically on all pages + +## Usage Instructions + +### Opening the Archive + +```bash +# Extract the archive +unzip cockroachdb-docs-v23.1-offline.zip + +# Open in browser (from within archive directory) +cd offline_snap +open index.html + +# Or use full path +open /path/to/offline_snap/index.html +``` + +### Sharing the Archive + +1. Share the zip file +2. User can extract and rename to anything: `my-docs/`, `cockroach-archive/`, etc. +3. Navigation will work automatically with the new name + +## Troubleshooting + +### Jekyll Build Missing + +```bash +# Ensure _site directory exists +bundle exec jekyll build +``` + +### Navigation Issues + +- **Problem**: Links go to wrong location +- **Solution**: Open `index.html` from within the archive directory + +### Folder Renaming Issues + +- **Problem**: Navigation breaks after renaming +- **Solution**: The `make_navigation_dynamic_v2.py` script should have been run during creation + +### Missing Sidebars + +- **Problem**: Some pages have minimal sidebars +- **Solution**: Run `fix_incomplete_sidebars.py` on the archive + +## Version Support + +The scripts support creating archives for any CockroachDB version: + +- v2.1, v19.1, v19.2, v20.1, v20.2 +- v21.1, v21.2 +- v22.1, v22.2 +- v23.1, v23.2 +- v24.1, v24.2, v24.3 +- v25.1, v25.2, v25.3, v25.4 +- v26.1 + +## Notes + +- Archives grow in size with newer versions due to more documentation +- Each archive is typically 100-200MB +- Archives are self-contained and work completely offline +- Navigation auto-detects the archive folder name for portability diff --git a/src/current/README_ARCHIVE_CREATION.md b/src/current/README_ARCHIVE_CREATION.md new file mode 100644 index 00000000000..72e00ba2507 --- /dev/null +++ b/src/current/README_ARCHIVE_CREATION.md @@ -0,0 +1,188 @@ +# CockroachDB Offline Documentation Archive Creation Guide + +This guide explains how to create offline documentation archives for specific CockroachDB versions. + +## Prerequisites + +Before creating archives, ensure you have: + +1. **Jekyll Build Directory**: `_site/` directory with built documentation + - Run Jekyll build if not present: `bundle exec jekyll build` + +2. **Python 3**: Required for all scripts + +3. **Supporting Scripts**: All scripts listed in the "Required Scripts" section below + +4. **Disk Space**: Each archive is 100-200MB, ensure adequate space + +## Quick Start + +### Create Archives for Multiple Versions + +```bash +# Create archives for versions v20.1 through v22.2 +python3 create_all_archives_fixed.py +``` + +This will create archives for the default versions: v20.2, v21.1, v21.2, v22.1, v22.2 + +### Create Archive for a Single Version + +```bash +# Create archive for a specific version +python3 create_single_archive.py v23.1 +``` + +## The 14-Step Archive Creation Process + +Each archive goes through the following steps: + +1. **Create base archive** - `snapshot_relative.py` +2. **Apply navigation fixes** - `fix_navigation_quick.py` +3. **Fix version placeholders** - Dynamic script +4. **Remove non-target sidebars** - Shell command +5. **Clean target sidebar** - Python logic +6. **Fix JavaScript sidebar** - `fix_js_sidebar_final.py` +7. **Fix remaining references** - `fix_remaining_v25_refs.py` +8. **Create advisories directory** - Shell command +9. **Copy advisories JSON** - Shell command +10. **Fix incomplete sidebars** - `fix_incomplete_sidebars.py` +11. **Make navigation dynamic** - `make_navigation_dynamic_v2.py` +12. **Fix root navigation** - `fix_root_navigation.py` +13. **Fix broken sidebar links** - `fix_broken_sidebar_links.py` +14. **Fix final broken links** - `fix_final_broken_links.py` + +## Required Scripts + +### Main Scripts +- **`create_all_archives_fixed.py`** - Creates multiple version archives with all fixes +- **`create_single_archive.py`** - Creates a single version archive +- **`make_navigation_dynamic_v2.py`** - Makes navigation work with any folder name (version-aware) + +### Supporting Scripts (14-step process) +- **`snapshot_relative.py`** - Creates the initial archive structure +- **`fix_navigation_quick.py`** - Applies initial navigation fixes +- **`fix_js_sidebar_final.py`** - Fixes JavaScript sidebar functionality +- **`fix_remaining_v25_refs.py`** - Removes references to newer versions +- **`fix_incomplete_sidebars.py`** - Completes sidebar HTML structure +- **`fix_root_navigation.py`** - Fixes navigation for root-level files +- **`fix_broken_sidebar_links.py`** - Repairs broken links in sidebars +- **`fix_final_broken_links.py`** - Final pass to fix any remaining broken links + +## Common Issues and Solutions + +### Issue 1: Navigation Links Go to System Paths +**Symptom**: Links resolve to `file:///Users/username/Documents/index.html` instead of staying in archive + +**Cause**: The `make_navigation_dynamic.py` script has a hardcoded version (v19.2) in the pattern + +**Solution**: Use `make_navigation_dynamic_v2.py` which accepts the target version as a parameter + +### Issue 2: Sidebar Shows Newer Versions +**Symptom**: Archive for v20.1 shows links to v25.x in sidebar + +**Cause**: Sidebar cleaning step didn't remove all newer version references + +**Solution**: The script automatically removes references to versions newer than the target + +### Issue 3: Archive Doesn't Work When Renamed +**Symptom**: Navigation breaks when archive folder is renamed from `offline_snap` + +**Cause**: Hardcoded folder name in navigation JavaScript + +**Solution**: `make_navigation_dynamic_v2.py` makes the navigation detect any folder name + +### Issue 4: JavaScript Syntax Errors +**Symptom**: Browser console shows syntax errors, navigation completely broken + +**Cause**: Missing arguments in JavaScript `replace()` calls + +**Solution**: The scripts automatically fix these syntax errors during creation + +## Archive Structure + +Each archive contains: +``` +offline_snap/ +├── _internal/ +│ └── sidebar-vX.Y.html # Version-specific sidebar +├── v[version]/ # Version-specific documentation +├── releases/ # Release notes +├── advisories/ # Security advisories +├── cockroachcloud/ # Cloud documentation +├── molt/ # MOLT documentation +└── index.html # Main entry point +``` + +## Testing Archives + +1. **Extract the archive**: + ```bash + unzip cockroachdb-docs-v20.1-offline.zip + ``` + +2. **Open in browser**: + ```bash + open offline_snap/index.html + ``` + +3. **Test navigation**: + - Click "Docs Home" - should stay within archive + - Click version-specific links - should navigate correctly + - Check that sidebar shows only appropriate versions + +## Version Support + +The scripts support creating archives for any CockroachDB version. Common versions: +- v2.1, v19.2, v20.1, v20.2 +- v21.1, v21.2 +- v22.1, v22.2 +- v23.1, v23.2 +- v24.1, v24.2, v24.3 +- v25.1, v25.2, v25.3, v25.4 +- v26.1 + +## Advanced Usage + +### Customizing the Archive Creation + +Edit `create_all_archives_fixed.py` to: +- Change which versions are created (modify the `versions` list) +- Adjust the cleaning logic for sidebars +- Add additional fix steps + +### Manual Navigation Fix + +If you need to fix navigation in an existing archive: + +```bash +# Extract archive +unzip cockroachdb-docs-vX.Y-offline.zip + +# Apply navigation fix with correct version +python3 make_navigation_dynamic_v2.py offline_snap vX.Y + +# Re-create archive +zip -r cockroachdb-docs-vX.Y-offline.zip offline_snap/ +``` + +## Troubleshooting + +### Scripts Not Found +Ensure all supporting scripts are in the same directory as the main scripts. + +### Jekyll Build Missing +Run `bundle exec jekyll build` to create the `_site` directory. + +### Out of Disk Space +Each archive is 100-200MB. The creation process also needs temporary space. + +### Navigation Still Broken After Fixes +Check browser console for JavaScript errors. The issue is likely a syntax error that needs fixing. + +## Notes + +- Archives grow in size with newer versions due to more documentation +- The creation process takes ~2-3 minutes per version +- Archives are self-contained and work offline +- Navigation auto-detects the archive folder name for portability \ No newline at end of file diff --git a/src/current/create_all_archives_fixed.py b/src/current/create_all_archives_fixed.py new file mode 100644 index 00000000000..1163e3bfe8f --- /dev/null +++ b/src/current/create_all_archives_fixed.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +""" +Create all documentation archives (v20.2, v21.1, v21.2, v22.1, v22.2) +with FIXED navigation that properly detects the version +""" +import subprocess +import shutil +import re +import tempfile +from pathlib import Path +import time +from bs4 import BeautifulSoup + + +def run_cmd(cmd_list, description): + """Run a command using subprocess list form (no shell=True).""" + print(f" {description}...") + result = subprocess.run(cmd_list, capture_output=True, text=True) + if result.returncode != 0: + print(f" Warning: {result.stderr[:200] if result.stderr else 'Command had issues but continuing'}") + return result.returncode == 0 + + +def fix_navigation_in_archive(version): + """Apply navigation fixes to the archive using BeautifulSoup for DOM safety. + + Regex is applied only to the text content of + +''' + + html = re.sub(r"", nav_deps + "\n", html, flags=re.IGNORECASE) + + # Add vibrant sidebar styles (FROM SCRIPT 1) + offline_styles = self.get_vibrant_sidebar_styles(prefix) + html = re.sub(r"", offline_styles + "\n", html, flags=re.IGNORECASE) + + # Simple navgoco initialization (FROM SCRIPT 1) + nav_init = """""" + + html = re.sub(r"
+ html_content = html_content.replace('', '\n' + banner_html) + + # Write back the modified content + index_path.write_text(html_content, encoding="utf-8") + self.log("Added archived banner to existing index.html", "SUCCESS") + else: + self.log("No existing index.html found to modify", "WARNING") + + def build(self): + """Main build process with hybrid optimizations""" + print("\n" + "="*60) + print("🚀 COCKROACHDB OFFLINE DOCUMENTATION ARCHIVER (HYBRID+)") + print("="*60) + + # Verify paths + self.log(f"Jekyll Root: {JEKYLL_ROOT}") + self.log(f"Site Root: {SITE_ROOT}") + self.log(f"Docs Root: {DOCS_ROOT}") + self.log(f"Output: {OUTPUT_ROOT}") + self.log(f"Target Version: {TARGET_VERSION}") + + if not SITE_ROOT.exists(): + self.log("Site root not found! Run 'jekyll build' first.", "ERROR") + return False + + # Clean output directory + if OUTPUT_ROOT.exists(): + self.log("Cleaning existing output directory...") + shutil.rmtree(OUTPUT_ROOT) + OUTPUT_ROOT.mkdir(parents=True) + + # Use selective asset copying (FROM SCRIPT 2) + self.copy_selective_assets() + + # Ensure critical navigation assets + self.log("\n--- Ensuring Navigation Assets ---") + self.ensure_asset( + "jquery.min.js", + [DOCS_ROOT / "js" / "jquery.min.js", SITE_ROOT / "js" / "jquery.min.js"], + "https://code.jquery.com/jquery-3.6.3.min.js", + OUTPUT_ROOT / "js" + ) + self.ensure_asset( + "jquery.cookie.min.js", + [DOCS_ROOT / "js" / "jquery.cookie.min.js", SITE_ROOT / "js" / "jquery.cookie.min.js"], + "https://cdnjs.cloudflare.com/ajax/libs/jquery-cookie/1.4.1/jquery.cookie.min.js", + OUTPUT_ROOT / "js" + ) + self.ensure_asset( + "jquery.navgoco.min.js", + [DOCS_ROOT / "js" / "jquery.navgoco.min.js", SITE_ROOT / "js" / "jquery.navgoco.min.js"], + "https://raw.githubusercontent.com/tefra/navgoco/master/src/jquery.navgoco.js", + OUTPUT_ROOT / "js" + ) + self.ensure_asset( + "jquery.navgoco.css", + [DOCS_ROOT / "css" / "jquery.navgoco.css", SITE_ROOT / "css" / "jquery.navgoco.css"], + "https://raw.githubusercontent.com/tefra/navgoco/master/src/jquery.navgoco.css", + OUTPUT_ROOT / "css" + ) + + # Load sidebar + self.log("\n--- Loading Sidebar ---") + self.load_sidebar() + + # Process HTML files with stricter version filtering (FROM SCRIPT 2) + self.log("\n--- Processing HTML Files ---") + + files_to_process = [] + + # Only target version files + version_dir = DOCS_ROOT / TARGET_VERSION + if version_dir.exists(): + files_to_process.extend(list(version_dir.rglob("*.html"))) + self.log(f"Found {len(files_to_process)} files in {TARGET_VERSION}/", "SUCCESS") + + # Common pages (but exclude other version directories) + for pattern in COMMON_PAGES: + if '*' in pattern: + for file_path in DOCS_ROOT.glob(pattern): + # Skip other version directories + rel_path = file_path.relative_to(DOCS_ROOT) + if (rel_path.parts and + rel_path.parts[0].startswith('v') and + rel_path.parts[0] != TARGET_VERSION): + continue + files_to_process.append(file_path) + else: + file_path = DOCS_ROOT / pattern + if file_path.exists(): + files_to_process.append(file_path) + + # Remove duplicates and filter out unwanted versions + filtered_files = [] + for file_path in set(files_to_process): + rel_path = file_path.relative_to(DOCS_ROOT) + # Skip files from other version directories + if (rel_path.parts and + rel_path.parts[0].startswith('v') and + rel_path.parts[0] != TARGET_VERSION): + continue + filtered_files.append(file_path) + + files_to_process = filtered_files + self.log(f"Total files to process (after version filtering): {len(files_to_process)}") + + # Process each file with better error handling (FROM SCRIPT 2) + processed_count = 0 + error_count = 0 + + for i, file_path in enumerate(files_to_process, 1): + try: + if i % 25 == 0: + self.log(f"Progress: {i}/{len(files_to_process)} ({i*100//len(files_to_process)}%)") + + self.process_html_file(file_path) + processed_count += 1 + + except Exception as e: + error_count += 1 + self.log(f"Failed to process {file_path}: {e}", "ERROR") + # Continue with next file instead of crashing + continue + + self.log(f"Successfully processed {processed_count} files, {error_count} errors", "SUCCESS") + + # Final cleanup steps + self.log("\n--- Final Steps ---") + self.fix_css_images() + self.download_google_fonts() + self.create_professional_index_page() # FROM SCRIPT 2 + + # Enhanced summary + print("\n" + "="*60) + self.log("HYBRID ARCHIVE COMPLETE!", "SUCCESS") + self.log(f"Output directory: {OUTPUT_ROOT.resolve()}") + self.log(f"Total files: {len(self.processed_files)}") + self.log(f"Total broken URLs removed: {self.total_broken_urls}", "SUCCESS") + + # Navigation summary + if self.comprehensive_sidebar_html: + self.log("✅ Comprehensive sidebar extracted and applied to all pages", "SUCCESS") + else: + self.log("⚠️ No comprehensive sidebar found - using original individual processing", "WARNING") + + self.log("🟣 Vibrant #6933FF sidebar styling", "SUCCESS") + self.log("🏠 Professional homepage with archived banner", "SUCCESS") + self.log("🔗 ORIGINAL working navigation logic restored", "SUCCESS") + self.log("⚡ Selective asset copying for reduced size", "SUCCESS") + self.log("🔧 Robust error handling and progress reporting", "SUCCESS") + self.log("✅ JavaScript URL processing: ORIGINAL working version", "SUCCESS") + self.log("✅ Filtered out non-v2.1 version links (v25.1, v24.x, etc.)", "SUCCESS") + self.log("✅ Broken sidebar links removed from comprehensive sidebar", "SUCCESS") + + print(f"\n🎉 Hybrid offline site built in {OUTPUT_ROOT}") + print(f"\n📦 To test: open {OUTPUT_ROOT}/index.html in your browser") + print(f"\n🟣 Vibrant purple sidebar + professional homepage + improved navigation logic") + print(f"\n⚡ Optimized assets - excluded non-{TARGET_VERSION} files") + print(f"\n🔧 {self.total_broken_urls} broken sidebar URLs cleaned up") + print(f"\n✨ Best features from all scripts combined!") + + return True + + +def main(): + """Main entry point""" + try: + archiver = OfflineArchiver() + success = archiver.build() + sys.exit(0 if success else 1) + except KeyboardInterrupt: + print("\n\nArchiving cancelled by user.") + sys.exit(1) + except Exception as e: + print(f"\n❌ Fatal error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/test_archive_smoke.py b/src/current/test_archive_smoke.py new file mode 100644 index 00000000000..bf2fb7b4441 --- /dev/null +++ b/src/current/test_archive_smoke.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +""" +Smoke test for archive creation scripts. + +Creates a minimal offline_snap fixture, runs fix_navigation_in_archive(), +and verifies invariants without requiring a full Jekyll build. + +Run from src/current/: + python3 test_archive_smoke.py +""" +import hashlib +import shutil +import sys +import tempfile +from pathlib import Path + +SCRIPT_DIR = Path(__file__).parent +VERSION = "v22.2" + +# Broken JS that the fix should correct (matches what make_navigation_dynamic.py writes) +BROKEN_JS = ( + f"url = url.replace(/^stable\\//, ).replace(/\\/stable\\//, '/{VERSION}/');" +) +# Expected output after fix (no space after comma — matches replacement string in script) +FIXED_JS = ( + f"url = url.replace(/^stable\\//, '{VERSION}/')" + f".replace(/\\/stable\\//,'/{VERSION}/');" +) + +BROKEN_HTML = f""" + +No scripts here.
""" + + +def _sha256(path): + return hashlib.sha256(path.read_bytes()).hexdigest() + + +def test_fix_navigation(work_dir): + """fix_navigation_in_archive() patches broken JS only inside