Skip to content

Conversation

@zeroSteiner
Copy link
Contributor

@zeroSteiner zeroSteiner commented Dec 17, 2025

Fixes #20603 by updating the module code to merge the target Arch and Platform entries into the module's top level data. An AI generated script was then used to update all exploit modules to remove redundant entries that are no longer required. As a bonus this also enables Author and References to be included in the target hash and they are also merged up. I then went through and moved a bunch of authors into their targets when there was a comment that made it clear they were the author of one or more specific target. There should be no noticable changes for end users but module authors now don't have to worry about setting the merged module metadata when they already specified it in the target.

The diff was relatively clear before robocop got involved. I recommend reviewing the changes one commit at a time.

Verification

  • See platform and arch is set using exploit/multi/http/atlassian_confluence_rce_cve_2023_22527 as an example
  • Use the module, then run info and see the Platform and Arch data in the module info at the top
  • Review the source code see the Platform and Arch are now only specified in the targets
  • See authors are merged up using exploit/multi/script/web_delivery as an example
  • Use the module, then run info see that bcoles in particular shows up exactly once as a module author
  • Review the source code and see his name is not listed at the module level but is included in multiple targets, this also demonstrates that the list is uniqued
  • To review the changes at scale, run TZ=UTC tools/automation/cache/build_new_cache.sh from the framework directory
  • Review the new db/modules_metadata_base.json, see that there are changes but the changes are due to inaccuracies that were previously in the modules
AI refactor code

Including this for future reference but it has served its purpose.

#!/usr/bin/env python3
"""
Refactor Metasploit modules to remove redundant Platform/Arch definitions
at the module level when they're already defined in Targets.
"""

import re
import argparse
from pathlib import Path
from typing import List, Tuple, Optional


def extract_targets_info(content: str) -> Tuple[bool, bool]:
    """
    Check if Targets array contains Platform and/or Arch definitions.
    Returns: (has_platform_in_targets, has_arch_in_targets)
    """
    # Find the Targets array - need to handle nested brackets carefully
    targets_match = re.search(
        r"'Targets'\s*=>\s*\[",
        content,
        re.DOTALL
    )
    
    if not targets_match:
        return False, False
    
    # Find the matching closing bracket for Targets array
    start_pos = targets_match.end()
    bracket_count = 1
    pos = start_pos
    
    while pos < len(content) and bracket_count > 0:
        if content[pos] == '[':
            bracket_count += 1
        elif content[pos] == ']':
            bracket_count -= 1
        pos += 1
    
    if bracket_count != 0:
        return False, False
    
    targets_content = content[start_pos:pos-1]
    
    # Check if any target has Platform definition
    has_platform = bool(re.search(r"'Platform'\s*=>", targets_content))
    
    # Check if any target has Arch definition
    has_arch = bool(re.search(r"'Arch'\s*=>", targets_content))
    
    return has_platform, has_arch


def find_module_level_definitions(content: str) -> Tuple[Optional[re.Match], Optional[re.Match]]:
    """
    Find module-level Platform and Arch definitions (commented or uncommented).
    These should be in the update_info hash, NOT inside Targets.
    Returns: (platform_match, arch_match) with positions adjusted to full content
    """
    # Helper class to adjust match positions
    class AdjustedMatch:
        def __init__(self, original_match, offset):
            self._match = original_match
            self._offset = offset
        
        def start(self):
            return self._match.start() + self._offset
        
        def end(self):
            return self._match.end() + self._offset
        
        def group(self, n=0):
            return self._match.group(n)
    
    # Find where Targets array starts
    targets_start_match = re.search(r"'Targets'\s*=>\s*\[", content)
    
    if targets_start_match:
        # Only search before the Targets definition
        search_content = content[:targets_start_match.start()]
        search_offset = 0
    else:
        search_content = content
        search_offset = 0
    
    # Look for Platform at module level (may be commented)
    # Handle both ['x', 'y'], %w[x y], and %w{x y} notation
    # Use [ \t]* instead of \s* to avoid matching newlines
    platform_pattern = r"^([ \t]*)(#[ \t]*)?(\'Platform\'[ \t]*=>[ \t]*(?:\[.*?\]|%w[\[\{].*?[\]\}]))[ \t]*,?[ \t]*$"
    platform_match_temp = re.search(platform_pattern, search_content, re.MULTILINE)
    
    # Look for Arch at module level (may be commented)
    # Handle both single values and arrays
    # Use [ \t]* instead of \s* to avoid matching newlines
    # Include digits in constant names: ARCH_X86, ARCH_X64, etc.
    arch_pattern = r"^([ \t]*)(#[ \t]*)?(\'Arch\'[ \t]*=>[ \t]*(?:\[.*?\]|[A-Z0-9_]+))[ \t]*,?[ \t]*$"
    arch_match_temp = re.search(arch_pattern, search_content, re.MULTILINE)
    
    # Create adjusted match objects
    platform_match = AdjustedMatch(platform_match_temp, search_offset) if platform_match_temp else None
    arch_match = AdjustedMatch(arch_match_temp, search_offset) if arch_match_temp else None
    
    return platform_match, arch_match


def remove_line_with_surrounding_whitespace(content: str, match: re.Match) -> str:
    """Remove a matched line including its newline."""
    start = match.start()
    end = match.end()
    
    # Find the start of the line
    line_start = content.rfind('\n', 0, start)
    if line_start == -1:
        line_start = 0
    else:
        line_start += 1
    
    # Find the end of the line (include the newline)
    line_end = content.find('\n', end)
    if line_end == -1:
        line_end = len(content)
    else:
        line_end += 1
    
    return content[:line_start] + content[line_end:]


def refactor_module(file_path: Path, dry_run: bool = True) -> Optional[str]:
    """
    Refactor a single module file.
    Returns a status message or None if no changes needed.
    """
    try:
        content = file_path.read_text()
    except Exception as e:
        return f"ERROR reading {file_path}: {e}"
    
    # Check if this file has Targets with Platform/Arch
    has_platform_in_targets, has_arch_in_targets = extract_targets_info(content)
    
    if not (has_platform_in_targets or has_arch_in_targets):
        return None  # Nothing to refactor
    
    # Find module-level definitions
    platform_match, arch_match = find_module_level_definitions(content)
    
    # Collect all matches that need to be removed
    matches_to_remove = []
    changes = []
    
    if has_platform_in_targets and platform_match:
        matches_to_remove.append(('Platform', platform_match))
        changes.append("Platform")
    
    if has_arch_in_targets and arch_match:
        matches_to_remove.append(('Arch', arch_match))
        changes.append("Arch")
    
    if not matches_to_remove:
        return None
    
    # Sort matches by position (descending) so we remove from bottom to top
    # This prevents position shifting issues
    matches_to_remove.sort(key=lambda x: x[1].start(), reverse=True)
    
    # Remove all matches
    modified_content = content
    for _, match in matches_to_remove:
        modified_content = remove_line_with_surrounding_whitespace(modified_content, match)
    
    if not changes:
        return None
    
    if not dry_run:
        file_path.write_text(modified_content)
        return f"MODIFIED {file_path}: Removed {', '.join(changes)}"
    else:
        return f"WOULD MODIFY {file_path}: Would remove {', '.join(changes)}"


def find_module_files(base_path: Path, pattern: str = "**/*.rb") -> List[Path]:
    """Find all Ruby module files."""
    return list(base_path.glob(pattern))


def main():
    parser = argparse.ArgumentParser(
        description="Refactor Metasploit modules to remove redundant Platform/Arch definitions"
    )
    parser.add_argument(
        "path",
        type=Path,
        help="Path to modules directory or single module file"
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        default=True,
        help="Show what would be changed without modifying files (default)"
    )
    parser.add_argument(
        "--apply",
        action="store_true",
        help="Actually modify the files (overrides --dry-run)"
    )
    parser.add_argument(
        "--pattern",
        default="**/*.rb",
        help="Glob pattern for finding module files (default: **/*.rb)"
    )
    
    args = parser.parse_args()
    
    dry_run = not args.apply
    
    if args.path.is_file():
        files = [args.path]
    else:
        files = find_module_files(args.path, args.pattern)
    
    print(f"{'DRY RUN - ' if dry_run else ''}Processing {len(files)} files...")
    print()
    
    modified_count = 0
    error_count = 0
    
    for file_path in files:
        result = refactor_module(file_path, dry_run)
        if result:
            print(result)
            if result.startswith("ERROR"):
                error_count += 1
            else:
                modified_count += 1
    
    print()
    print(f"Summary: {modified_count} files {'would be ' if dry_run else ''}modified, {error_count} errors")
    
    if dry_run and modified_count > 0:
        print("\nRun with --apply to actually modify the files")


if __name__ == "__main__":
    main()

@zeroSteiner zeroSteiner force-pushed the feat/lib/mod-merge-target-info branch from 9e5eb58 to 8f415a4 Compare December 17, 2025 22:23
Target authors were selected based on comments that indicated that the
author was only responsible for a set of descrete targets. Authors that
were noted as assisting with target testing, check module development,
etc. were left at the module level.
@zeroSteiner zeroSteiner force-pushed the feat/lib/mod-merge-target-info branch 3 times, most recently from 84868ba to 2695a94 Compare December 18, 2025 14:50
@zeroSteiner zeroSteiner force-pushed the feat/lib/mod-merge-target-info branch from 2695a94 to 602adeb Compare December 18, 2025 15:23
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Module Metadata Fields Arch and Platform Should be Automatic

1 participant