[Fortify] Unreal Engine Porject(.uproject) 분석

1_make_compile_commands_json.bat

@SET UEBT_DIR="C:\UnrealEngine-5.3.2-release\Engine\Binaries\DotNET\UnrealBuildTool"
@SET UE_PROJECT="C:\UnrealEngine-5.3.2-release\Samples\Games\Lyra\Lyra.uproject"
@SET UE_TARGET="LyraGame Win64 Shipping"

@PUSHD %CD%
CD /D %UEBT_DIR%
REM 필요한 경우 generated 헤더 생성
REM UnrealBuildTool.exe -Mode=Build -Project=%UE_PROJECT% -Target=%UE_TARGET%
UnrealBuildTool.exe -Mode=GenerateClangDatabase -Project=%UE_PROJECT% -Target=%UE_TARGET%
@POPD
@PAUSE


2_ue5_fortify_translate.bat

@CHCP 65001
@CLS
sourceanalyzer -b UE5_Test -clean
DEL UE5_Test.fpr
RMDIR /S /Q C:\ue_fortify.tmp
python ue5_fortify_translate.py ^
--compile-db C:\\UnrealEngine-5.3.2-release\\compile_commands.json ^
--staging C:\\ue_fortify.tmp\\staging ^
--build-id UE5_Test ^
--msvc "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Tools\\MSVC\\14.36.32532\\bin\\Hostx64\\x64\\cl.exe" ^
--sourceanalyzer C:\\opentext\\sast-25.2.0\\bin\\sourceanalyzer.exe ^
--max-workers 4
sourceanalyzer -b UE5_Test -Xms2g -Xmx8g -Xss1m -scan -logfile scan.log -f UE5_Test.fpr
@PAUSE

ue5_fortify_translate.py

# UE5 → Fortify SCA Translation Runner (Windows / Python 3)
# compile_commands.json → normalize RSP/GCD → Fortify SCA (sourceanalyzer) translation
# - Recursive @include normalization, absolute paths, split/combined option fixups
# - UE module Public/Private/Classes + UHT Inc injection (Engine & Plugins, with or without x64)
# - Safe console output, per-unit full log files (tail-focused console summary)
# - Preflight: if UHT-generated headers are missing, explain and exit with code 3

from __future__ import annotations
import argparse
import hashlib
import json
import os
import re
import subprocess
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Dict, List, Tuple

# ===== Defaults (use forward slashes in literals to keep this file canvas-safe) =====
DEFAULT_MSVC = "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.36.32532/bin/Hostx64/x64/cl.exe"
DEFAULT_SOURCEANALYZER = "C:/opentext/sast-25.2.0/bin/sourceanalyzer.exe"
DEFAULT_BUILD_ID = "UE5_Test"

# ===== Regex / constants =====
WIN_DRIVE_RE = re.compile(r"^[A-Za-z]:/")
QUOTE_RE = re.compile(r'^[\"\'](.*)[\"\']$')
RSP_INCLUDE_LINE = re.compile(r'^@(?:"(.*?)"|\'(.*?)\'|(\S+))\s*$')  # @"…" | @'…' | @path
ANY_QUOTED = re.compile(r'"(.*?)"|\'(.*?)\'')
FIRST_TOKEN_IS_PATH_RE = re.compile(r'^[\"\'].*[\"\']\s*$')
COMMAND_EXTRACT_RSP = re.compile(r'@(?:"(.*?\.(?:rsp|gcd))"|\'(.*?\.(?:rsp|gcd))\'|(\S+\.(?:rsp|gcd)))')

OPTION_PREFIXES_SPLIT = {'/I','-I','/FI','/Fo','/Fp','/Fe','/Fd','/Fa','/FU','/external:I'}
OPTION_PREFIXES_COMBINED = ['/I','-I','/FI','/Fo','/Fp','/Fe','/Fd','/Fa','/FU','/external:I']

STAGED_CACHE: Dict[Path, Path] = {}

# ===== Utils =====

def dequote(s: str) -> str:
    m = QUOTE_RE.match(s)
    return m.group(1) if m else s


def as_abs(path: str, base: Path) -> str:
    # keep forward slashes in literals; resolve to absolute and normalize
    p = path.replace('\\', '/')
    p = os.path.expandvars(p)
    p = dequote(p)
    if not WIN_DRIVE_RE.match(p):
        p = str((base / p).resolve())
    return str(Path(p).resolve())


def split_preserving_quotes(s: str) -> List[str]:
    out: List[str] = []
    cur: List[str] = []
    quote: str | None = None
    for ch in s:
        if quote:
            if ch == quote:
                cur.append(ch)
                out.append(''.join(cur).strip())
                cur = []
                quote = None
            else:
                cur.append(ch)
        else:
            if ch in ('"', "'"):
                if cur and not cur[-1].isspace():
                    out.append(''.join(cur).strip()); cur = []
                cur.append(ch); quote = ch
            elif ch.isspace():
                if cur:
                    out.append(''.join(cur).strip()); cur = []
            else:
                cur.append(ch)
    if cur:
        out.append(''.join(cur).strip())
    return [t for t in out if t]


def normalize_token_path(token: str, base: Path) -> str:
    m = RSP_INCLUDE_LINE.match(token)
    if m:
        inner = next((g for g in m.groups() if g), None)
        if inner:
            abs_inner = as_abs(inner, base)
            return '@' + abs_inner

    for pre in OPTION_PREFIXES_COMBINED:
        if token.startswith(pre) and len(token) > len(pre) and '"' not in token and "'" not in token:
            path_part = token[len(pre):]
            abs_p = as_abs(path_part, base)
            return f'{pre}"{abs_p}"'

    if '"' in token or "'" in token:
        def repl(mo):
            inner = mo.group(1) or mo.group(2) or ''
            return '"' + as_abs(inner, base) + '"'
        return ANY_QUOTED.sub(repl, token)

    low = token.lower()
    if low.endswith('.c') or low.endswith('.cpp') or low.endswith('.cxx') or low.endswith('.cc') or low.endswith('.c++') or low.endswith('.rc') or low.endswith('.hpp') or low.endswith('.h') or low.endswith('.inl'):
        return '"' + as_abs(token, base) + '"'
    return token


def normalize_tokens_with_context(tokens: List[str], base: Path) -> List[str]:
    out: List[str] = []
    i = 0
    while i < len(tokens):
        t = tokens[i]
        if t in OPTION_PREFIXES_SPLIT and i + 1 < len(tokens):
            nxt = tokens[i + 1]
            abs_p = as_abs(dequote(nxt), base)
            out.append(f'{t}"{abs_p}"')
            i += 2
            continue
        out.append(normalize_token_path(t, base))
        i += 1
    return out

# ===== UE roots / module detection / UHT presence check =====

def find_ue_root_from(path_hint: Path) -> Path | None:
    p = path_hint.resolve()
    for anc in [p] + list(p.parents):
        if anc.name.lower() == 'engine' and anc.parent.exists():
            return anc.parent
    return None


def uht_inc_roots(ue_root: Path) -> List[Path]:
    return [
        ue_root / 'Engine/Intermediate/Build/Win64/UnrealEditor/Inc',
        ue_root / 'Engine/Intermediate/Build/Win64/x64/UnrealEditor/Inc',
        ue_root / 'Engine/Intermediate/Build/UnrealEditor/Inc',
    ]


def derive_module_roots_from_source(src_file: Path) -> List[Path]:
    parts = list(src_file.resolve().parts)
    if 'Source' in parts:
        idx = parts.index('Source')
        if idx + 1 < len(parts):
            return [Path(*parts[:idx+2])]
    return []


def append_module_and_uht_includes(lines_out: List[str], ue_root: Path, src_file: Path) -> None:
    added: set[str] = set()
    def add_inc(p: Path):
        p = p.resolve()
        if p.is_dir():
            sp = str(p)
            if sp not in added:
                lines_out.append(f'/I"{sp}"')
                added.add(sp)

    # source dir
    add_inc(src_file.parent)

    # module dirs
    for mod_root in derive_module_roots_from_source(src_file):
        for sub in ('Public','Private','Classes'):
            add_inc(mod_root / sub)

    # engine UHT Inc/<Module>
    for root in uht_inc_roots(ue_root):
        root = root.resolve()
        if root.is_dir():
            for child in root.iterdir():
                if child.is_dir():
                    add_inc(child)

    # plugins UHT Inc/<Module>
    plugins_root = (ue_root / 'Engine/Plugins').resolve()
    if plugins_root.is_dir():
        for cat in plugins_root.iterdir():
            if not cat.is_dir():
                continue
            for plug in cat.iterdir():
                if not plug.is_dir():
                    continue
                for arch in ('Win64/UnrealEditor/Inc', 'Win64/x64/UnrealEditor/Inc'):
                    inc_root = (plug / f'Intermediate/Build/{arch}').resolve()
                    if inc_root.is_dir():
                        for child in inc_root.iterdir():
                            if child.is_dir():
                                add_inc(child)

# ===== RSP/GCD normalization =====

def _staged_name_for(src: Path, staging_dir: Path) -> Path:
    h = hashlib.md5(str(src.resolve()).encode('utf-8')).hexdigest()[:8]
    return staging_dir / f"{src.stem}.{h}{src.suffix}"


def process_rsp_like_file(src: Path, staging_dir: Path, base_cwd: Path, src_file_for_extra_inc: Path | None) -> Path:
    src = src.resolve()
    if src in STAGED_CACHE:
        return STAGED_CACHE[src]

    staging_dir.mkdir(parents=True, exist_ok=True)

    try:
        text = src.read_text(encoding='utf-8', errors='ignore')
    except FileNotFoundError:
        dst = _staged_name_for(src, staging_dir)
        with dst.open('w', encoding='utf-8', newline='') as f:
            f.write('@' + str(src) + '\n')
        STAGED_CACHE[src] = dst
        return dst

    lines_out: List[str] = []
    for line in text.splitlines():
        raw = line.strip()
        if not raw:
            lines_out.append(line)
            continue

        m = RSP_INCLUDE_LINE.match(raw)
        if m:
            inc = next((g for g in m.groups() if g), None)
            if inc:
                abs_inc = Path(as_abs(inc, base_cwd))
                staged_inc = process_rsp_like_file(abs_inc, staging_dir, base_cwd, src_file_for_extra_inc)
                lines_out.append('@' + str(staged_inc))
                continue

        if FIRST_TOKEN_IS_PATH_RE.match(raw) and (raw.startswith('"') or raw.startswith("'")):
            abs_p = as_abs(raw, base_cwd)
            lines_out.append('"' + abs_p + '"')
            continue

        tokens = split_preserving_quotes(line)
        norm_tokens = normalize_tokens_with_context(tokens, base_cwd)

        cleaned: List[str] = []
        i = 0
        while i < len(norm_tokens):
            t = norm_tokens[i]
            if t == '/Fo' and i + 1 < len(norm_tokens) and norm_tokens[i+1].startswith('/Fo'):
                i += 1; continue
            cleaned.append(t); i += 1
        lines_out.append(' '.join(cleaned))

    ue_root = find_ue_root_from(base_cwd) or find_ue_root_from(src) or base_cwd
    if src_file_for_extra_inc is not None:
        append_module_and_uht_includes(lines_out, ue_root, src_file_for_extra_inc)

    dst = _staged_name_for(src, staging_dir)
    with dst.open('w', encoding='utf-8', newline='') as f:
        f.write('\n'.join(lines_out)); f.write('\n')

    STAGED_CACHE[src] = dst
    return dst

# ===== Fortify =====

def build_sa_command(sourceanalyzer: Path, build_id: str, compiler_exe: Path, rsp_like: Path) -> List[str]:
    return [str(sourceanalyzer), '-b', build_id, '-debug', '-verbose', str(compiler_exe), '@' + str(rsp_like)]


def parse_compile_command(cmd: str) -> Tuple[Path | None, Path | None]:
    tokens = split_preserving_quotes(cmd)
    compiler: Path | None = None
    if tokens:
        first = dequote(tokens[0])
        if first.lower().endswith('.exe'):
            compiler = Path(first)
    m = COMMAND_EXTRACT_RSP.search(cmd)
    rsp_like: Path | None = None
    if m:
        cand = next((g for g in m.groups() if g), None)
        if cand:
            rsp_like = Path(dequote(cand))
    return compiler, rsp_like


def save_full_log(staging: Path, unit_name: str, content: str) -> Path:
    safe = re.sub(r'[^A-Za-z0-9_.-]+', '_', unit_name)[:120]
    p = staging / f'{safe}.fortify.log'
    p.write_text(content, encoding='utf-8', errors='ignore')
    return p


def summarize_output(out: str, max_len: int = 1000) -> str:
    lines = [ln for ln in out.splitlines() if ln.strip()]
    head = '\n'.join(lines[:5])
    tail = '\n'.join(lines[-40:])
    msg = (head + ('\n...\n' if len(lines) > 45 else '\n') + tail)
    if len(msg) > max_len:
        msg = msg[-max_len:]
    return msg


def run_translation_for_entry(entry: dict, args) -> Tuple[str, int, str]:
    file_path = Path(entry.get('file', ''))
    cmd_str = entry.get('command', '')
    base_dir = Path(entry.get('directory', '.'))

    compiler, rsp_like = parse_compile_command(cmd_str)
    if compiler is None:
        compiler = Path(args.msvc)

    if rsp_like is not None:
        abs_rsp = (base_dir / rsp_like).resolve()
        staged_rsp = process_rsp_like_file(abs_rsp, args.staging, base_dir, file_path)
    else:
        temp_name = (file_path.name or 'unit').replace('.', '_') + '.autogen.rsp'
        temp_path = args.staging / temp_name
        tokens = split_preserving_quotes(cmd_str)
        if tokens and tokens[0].lower().endswith('.exe'):
            tokens = tokens[1:]
        norm_tokens = normalize_tokens_with_context(tokens, base_dir)
        lines_out = [' '.join(norm_tokens)]
        ue_root = find_ue_root_from(base_dir) or base_dir
        append_module_and_uht_includes(lines_out, ue_root, file_path)
        with temp_path.open('w', encoding='utf-8', newline='') as f:
            f.write('\n'.join(lines_out)); f.write('\n')
        staged_rsp = temp_path

    sa_cmd = build_sa_command(args.sourceanalyzer, args.build_id, compiler, staged_rsp)

    print('[SA-CMD] ' + ' '.join(sa_cmd))
    try:
        head = Path(staged_rsp).read_text(encoding='utf-8', errors='ignore').splitlines()[:20]
        print('[RSP-HEAD]')
        sys.stdout.write('\n'.join(head) + '\n')
    except Exception:
        pass

    try:
        proc = subprocess.run(
            sa_cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            encoding='mbcs',
            errors='replace'
        )
        rc = proc.returncode
        out = proc.stdout
    except Exception as e:
        rc = -1
        out = f"EXEC ERROR: {e}"

    unit_desc = str(file_path) if file_path else str(staged_rsp)

    log_path = save_full_log(args.staging, Path(unit_desc).name, out)
    short_log = summarize_output(out)
    short_log += f"\n[full log] {log_path}"

    return unit_desc, rc, short_log

# ===== Main / preflight =====

def main(argv: List[str] | None = None) -> int:
    p = argparse.ArgumentParser(description='UE5 → Fortify SCA Translation Runner (recursive @include + module incs)')
    p.add_argument('--compile-db', required=True, type=Path, help='compile_commands.json path')
    p.add_argument('--staging', required=True, type=Path, help='dir to store normalized rsp/gcd')
    p.add_argument('--build-id', default=DEFAULT_BUILD_ID)
    p.add_argument('--msvc', default=DEFAULT_MSVC, type=Path, help='fallback compiler path')
    p.add_argument('--sourceanalyzer', default=DEFAULT_SOURCEANALYZER, type=Path)
    p.add_argument('--max-workers', type=int, default=os.cpu_count() or 4)
    p.add_argument('--filter-ext', nargs='*', default=['.c', '.cpp', '.cxx', '.cc', '.c++', '.cs'])
    p.add_argument('--dry-run', action='store_true')

    args = p.parse_args(argv)
    args.staging = args.staging.resolve()

    if not args.compile_db.is_file():
        print(f"compile_commands.json not found: {args.compile_db}", file=sys.stderr)
        return 2

    # Preflight: UHT generated headers check
    ue_root_hint = find_ue_root_from(args.compile_db.parent) or Path('C:/UnrealEngine-5.3.2-release')
    inc_roots = uht_inc_roots(ue_root_hint)
    has_generated = False
    for r in inc_roots:
        if r.is_dir():
            try:
                next(r.rglob('*.generated.h'))
                has_generated = True
                break
            except StopIteration:
                pass
    if not has_generated:
        print('[PRECHECK] UHT generated headers not found under any of:')
        for r in inc_roots:
            print('  -', r)
        print('\n[HINT] Run UHT first to generate headers. Example:')
        print('  Engine/Binaries/DotNET/UnrealBuildTool/UnrealBuildTool.exe -Mode=Build '
              '-Project="C:/UnrealEngine-5.3.2-release/Samples/Games/Lyra/Lyra.uproject" '
              '-Target="UnrealEditor Win64 Development"')
        return 3

    data = json.loads(args.compile_db.read_text(encoding='utf-8'))
    if not isinstance(data, list):
        print('compile_commands.json must be a list', file=sys.stderr)
        return 2

    entries: List[dict] = []
    for e in data:
        f = e.get('file', '')
        if not f:
            continue
        low = f.lower()
        if any(low.endswith(ext) for ext in args.filter_ext):
            entries.append(e)

    print(f"targets {len(entries)} (staging={args.staging})")
    args.staging.mkdir(parents=True, exist_ok=True)

    if args.dry_run:
        for e in entries[:50]:
            file_path = Path(e.get('file', ''))
            cmd_str = e.get('command', '')
            base_dir = Path(e.get('directory', '.'))
            compiler, rsp_like = parse_compile_command(cmd_str)
            if compiler is None:
                compiler = args.msvc
            rsp_abs = (base_dir / rsp_like).resolve() if rsp_like else Path('<inline>')
            print(f"[DRY] {file_path}\n  compiler: {compiler}\n  rsp_like: {rsp_abs}")
        return 0

    results = []
    with ThreadPoolExecutor(max_workers=args.max_workers) as ex:
        futures = [ex.submit(run_translation_for_entry, e, args) for e in entries]
        for fut in as_completed(futures):
            results.append(fut.result())

    ok = sum(1 for _, rc, _ in results if rc == 0)
    fail = len(results) - ok
    print("\n===== SUMMARY =====")
    print(f"OK: {ok}, FAIL: {fail}")
    for unit, rc, log in results[:50]:
        status = 'OK' if rc == 0 else f'FAIL({rc})'
        print(f"- {status}: {unit}\n  {log}\n")

    return 0 if fail == 0 else 1


if __name__ == '__main__':
    raise SystemExit(main())
위로 스크롤