[Fortify] UnrealBuildTool(UnrealEngine) Wrapper (.uproject 분석)

1_make_compile_commands_json.bat

@SET UEBT_DIR="C:\UnrealEngine-5.3.2-release\Engine\Binaries\DotNET\UnrealBuildTool"
@SET UE_PROJECT="C:\UnrealEngine-5.3.2-release\Samples\Games\Lyra\Lyra.uproject"
@SET UE_TARGET="LyraGame Win64 Shipping"

@PUSHD %CD%
CD /D %UEBT_DIR%
REM 필요한 경우 generated 헤더 생성
REM UnrealBuildTool.exe -Mode=Build -Project=%UE_PROJECT% -Target=%UE_TARGET%
UnrealBuildTool.exe -Mode=GenerateClangDatabase -Project=%UE_PROJECT% -Target=%UE_TARGET%
@POPD
@PAUSE

2_ue5_fortify_translate.bat

@CHCP 65001
@CLS
sourceanalyzer -b UE5_Test -clean
DEL UE5_Test.fpr
RMDIR /S /Q C:\ue_fortify.tmp
python ue5_fortify_translate.py ^
--compile-db C:\\UnrealEngine-5.3.2-release\\compile_commands.json ^
--staging C:\\ue_fortify.tmp\\staging ^
--build-id UE5_Test ^
--msvc "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Tools\\MSVC\\14.36.32532\\bin\\Hostx64\\x64\\cl.exe" ^
--sourceanalyzer C:\\opentext\\sast-25.2.0\\bin\\sourceanalyzer.exe ^
--max-workers 4
sourceanalyzer -b UE5_Test -Xms2g -Xmx8g -Xss1m -scan -logfile scan.log -f UE5_Test.fpr
@PAUSE

ue5_fortify_translate.py

# UE5 → Fortify SCA Translation Runner (Windows / Python 3)
# compile_commands.json → RSP/GCD 표준화(normalize) → Fortify SCA(sourceanalyzer) Translation
# - 재귀적 @include 풀기, 절대 경로화, 옵션(분리/결합형) 정규화
# - UE 모듈 Public/Private/Classes + UHT Inc 경로 자동 주입(Engine & Plugins, x64 유무 모두 처리)
# - 콘솔에는 안전한 요약 로그만 출력, 각 유닛별 전체 로그는 파일로 저장
# - 사전 점검(Preflight): UHT가 생성하는 *.generated.h 없으면 안내 후 종료 코드 3 반환

from __future__ import annotations
import argparse
import hashlib
import json
import os
import re
import subprocess
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Dict, List, Tuple

# ===== 기본값(리터럴 내 경로는 정방향 슬래시 유지: 캔버스 안전성) =====
DEFAULT_MSVC = "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.36.32532/bin/Hostx64/x64/cl.exe"
DEFAULT_SOURCEANALYZER = "C:/opentext/sast-25.2.0/bin/sourceanalyzer.exe"
DEFAULT_BUILD_ID = "UE5_Test"

# ===== 정규식 / 상수 =====
WIN_DRIVE_RE = re.compile(r"^[A-Za-z]:/")
QUOTE_RE = re.compile(r'^[\"\'](.*)[\"\']$')
RSP_INCLUDE_LINE = re.compile(r'^@(?:"(.*?)"|\'(.*?)\'|(\S+))\s*$')  # @"…" | @'…' | @path
ANY_QUOTED = re.compile(r'"(.*?)"|\'(.*?)\'')
FIRST_TOKEN_IS_PATH_RE = re.compile(r'^[\"\'].*[\"\']\s*$')
COMMAND_EXTRACT_RSP = re.compile(r'@(?:"(.*?\.(?:rsp|gcd))"|\'(.*?\.(?:rsp|gcd))\'|(\S+\.(?:rsp|gcd)))')

# 분리형 옵션(/I <dir> 등)과 결합형 옵션(/IC:\\dir 등) 모두 지원
OPTION_PREFIXES_SPLIT = {'/I','-I','/FI','/Fo','/Fp','/Fe','/Fd','/Fa','/FU','/external:I'}
OPTION_PREFIXES_COMBINED = ['/I','-I','/FI','/Fo','/Fp','/Fe','/Fd','/Fa','/FU','/external:I']

# 이미 스테이징한 RSP/GCD는 재활용(재귀 처리 캐시)
STAGED_CACHE: Dict[Path, Path] = {}

# ===== 유틸리티 =====

def dequote(s: str) -> str:
    """양끝 따옴표 제거."""
    m = QUOTE_RE.match(s)
    return m.group(1) if m else s


def as_abs(path: str, base: Path) -> str:
    """경로를 절대경로로 정규화(환경변수/따옴표 처리, 역슬래시는 정방향으로 통일)."""
    p = path.replace('\\', '/')
    p = os.path.expandvars(p)
    p = dequote(p)
    if not WIN_DRIVE_RE.match(p):
        p = str((base / p).resolve())
    return str(Path(p).resolve())


def split_preserving_quotes(s: str) -> List[str]:
    """공백 분할 시 따옴표 내부는 보존하는 간단한 토크나이저."""
    out: List[str] = []
    cur: List[str] = []
    quote: str | None = None
    for ch in s:
        if quote:
            if ch == quote:
                cur.append(ch)
                out.append(''.join(cur).strip())
                cur = []
                quote = None
            else:
                cur.append(ch)
        else:
            if ch in ('"', "'"):
                if cur and not cur[-1].isspace():
                    out.append(''.join(cur).strip()); cur = []
                cur.append(ch); quote = ch
            elif ch.isspace():
                if cur:
                    out.append(''.join(cur).strip()); cur = []
            else:
                cur.append(ch)
    if cur:
        out.append(''.join(cur).strip())
    return [t for t in out if t]


def normalize_token_path(token: str, base: Path) -> str:
    """토큰 안의 경로 요소를 절대경로/따옴표 포함 형태로 정규화."""
    # @"file.rsp" 같은 include 라인
    m = RSP_INCLUDE_LINE.match(token)
    if m:
        inner = next((g for g in m.groups() if g), None)
        if inner:
            abs_inner = as_abs(inner, base)
            return '@' + abs_inner

    # 결합형 옵션(/IC:\\inc 등) 내부 경로 정규화
    for pre in OPTION_PREFIXES_COMBINED:
        if token.startswith(pre) and len(token) > len(pre) and '"' not in token and "'" not in token:
            path_part = token[len(pre):]
            abs_p = as_abs(path_part, base)
            return f'{pre}"{abs_p}"'

    # 따옴표로 둘러싸인 내부 경로 정규화
    if '"' in token or "'" in token:
        def repl(mo):
            inner = mo.group(1) or mo.group(2) or ''
            return '"' + as_abs(inner, base) + '"'
        return ANY_QUOTED.sub(repl, token)

    # 소스/헤더류 파일이면 절대경로+따옴표로 감싸기
    low = token.lower()
    if low.endswith('.c') or low.endswith('.cpp') or low.endswith('.cxx') or low.endswith('.cc') or low.endswith('.c++') or low.endswith('.rc') or low.endswith('.hpp') or low.endswith('.h') or low.endswith('.inl'):
        return '"' + as_abs(token, base) + '"'
    return token


def normalize_tokens_with_context(tokens: List[str], base: Path) -> List[str]:
    """분리형 옵션을 결합형으로 바꾸고, 경로를 절대경로로 만든 토큰 리스트 반환."""
    out: List[str] = []
    i = 0
    while i < len(tokens):
        t = tokens[i]
        if t in OPTION_PREFIXES_SPLIT and i + 1 < len(tokens):
            nxt = tokens[i + 1]
            abs_p = as_abs(dequote(nxt), base)
            out.append(f'{t}"{abs_p}"')
            i += 2
            continue
        out.append(normalize_token_path(t, base))
        i += 1
    return out

# ===== UE 루트/모듈 탐지 및 UHT 존재 여부 체크 =====

def find_ue_root_from(path_hint: Path) -> Path | None:
    """어떤 경로에서든 Engine 상위(UE 루트)를 추정."""
    p = path_hint.resolve()
    for anc in [p] + list(p.parents):
        if anc.name.lower() == 'engine' and anc.parent.exists():
            return anc.parent
    return None


def uht_inc_roots(ue_root: Path) -> List[Path]:
    """UHT가 생성하는 Inc/<Module> 루트 후보들 반환(아키텍처 케이스 포함)."""
    return [
        ue_root / 'Engine/Intermediate/Build/Win64/UnrealEditor/Inc',
        ue_root / 'Engine/Intermediate/Build/Win64/x64/UnrealEditor/Inc',
        ue_root / 'Engine/Intermediate/Build/UnrealEditor/Inc',
    ]


def derive_module_roots_from_source(src_file: Path) -> List[Path]:
    """소스 파일 경로에서 모듈 기준경로(<Module>/Source)를 찾아 모듈 루트 후보 반환."""
    parts = list(src_file.resolve().parts)
    if 'Source' in parts:
        idx = parts.index('Source')
        if idx + 1 < len(parts):
            return [Path(*parts[:idx+2])]
    return []


def append_module_and_uht_includes(lines_out: List[str], ue_root: Path, src_file: Path) -> None:
    """모듈 Public/Private/Classes와 Engine/Plugin의 UHT Inc 경로를 /I 옵션으로 주입."""
    added: set[str] = set()
    def add_inc(p: Path):
        p = p.resolve()
        if p.is_dir():
            sp = str(p)
            if sp not in added:
                lines_out.append(f'/I"{sp}"')
                added.add(sp)

    # 소스 파일의 디렉터리
    add_inc(src_file.parent)

    # 모듈 디렉터리들
    for mod_root in derive_module_roots_from_source(src_file):
        for sub in ('Public','Private','Classes'):
            add_inc(mod_root / sub)

    # Engine 쪽 UHT Inc/<Module>
    for root in uht_inc_roots(ue_root):
        root = root.resolve()
        if root.is_dir():
            for child in root.iterdir():
                if child.is_dir():
                    add_inc(child)

    # Plugins 쪽 UHT Inc/<Module>
    plugins_root = (ue_root / 'Engine/Plugins').resolve()
    if plugins_root.is_dir():
        for cat in plugins_root.iterdir():
            if not cat.is_dir():
                continue
            for plug in cat.iterdir():
                if not plug.is_dir():
                    continue
                for arch in ('Win64/UnrealEditor/Inc', 'Win64/x64/UnrealEditor/Inc'):
                    inc_root = (plug / f'Intermediate/Build/{arch}').resolve()
                    if inc_root.is_dir():
                        for child in inc_root.iterdir():
                            if child.is_dir():
                                add_inc(child)

# ===== RSP/GCD 정규화 =====

def _staged_name_for(src: Path, staging_dir: Path) -> Path:
    """원본 경로 해시로 스테이징 파일명 생성(충돌 방지)."""
    h = hashlib.md5(str(src.resolve()).encode('utf-8')).hexdigest()[:8]
    return staging_dir / f"{src.stem}.{h}{src.suffix}"

# ----- 전처리 산출물 유발 옵션 제거(/P, /E, /EP, /Fi*) -----

def _drop_preprocess_token(tok: str, next_tok: str | None = None) -> int:
    """
    반환값: 건너뛸 토큰 개수 (0=유지, 1=현재 토큰만 제거, 2=/Fi 분리형처럼 값 동반 제거)
    - /P, /E, /EP: MSVC 전처리 산출물/표준출력 전처리 → *.i 쓰레기 파일 생성 유발
    - /Fi*, '/Fi' <file>: 전처리 산출물 파일명 지정(혼선 유발)
    - (-E, -P): Clang 계열 방어
    """
    u = tok.upper()
    if u in ('/P', '/E', '/EP'):
        return 1
    if tok.startswith('/Fi'):
        return 1
    if tok == '/Fi' and next_tok is not None:
        return 2
    if tok in ('-E','-P'):
        return 1
    return 0


def process_rsp_like_file(src: Path, staging_dir: Path, base_cwd: Path, src_file_for_extra_inc: Path | None) -> Path:
    """RSP/GCD를 읽어 재귀적으로 표준화하여 스테이징 파일로 떨어뜨림."""
    src = src.resolve()
    if src in STAGED_CACHE:
        return STAGED_CACHE[src]

    staging_dir.mkdir(parents=True, exist_ok=True)

    try:
        text = src.read_text(encoding='utf-8', errors='ignore')
    except FileNotFoundError:
        # 원본이 없으면 그대로 참조를 남긴 stub 작성
        dst = _staged_name_for(src, staging_dir)
        with dst.open('w', encoding='utf-8', newline='') as f:
            f.write('@' + str(src) + '\n')
        STAGED_CACHE[src] = dst
        return dst

    lines_out: List[str] = []
    for line in text.splitlines():
        raw = line.strip()
        if not raw:
            lines_out.append(line)
            continue

        # @include 재귀 처리
        m = RSP_INCLUDE_LINE.match(raw)
        if m:
            inc = next((g for g in m.groups() if g), None)
            if inc:
                abs_inc = Path(as_abs(inc, base_cwd))
                staged_inc = process_rsp_like_file(abs_inc, staging_dir, base_cwd, src_file_for_extra_inc)
                lines_out.append('@' + str(staged_inc))
                continue

        # 첫 토큰이 인용부 경로 한 개만 있는 경우("C:/.../foo.cpp") 절대경로화
        if FIRST_TOKEN_IS_PATH_RE.match(raw) and (raw.startswith('"') or raw.startswith("'")):
            abs_p = as_abs(raw, base_cwd)
            lines_out.append('"' + abs_p + '"')
            continue

        # 일반 행: 토큰화 → 경로 절대화/정규화 → 전처리 관련 옵션 제거
        tokens = split_preserving_quotes(line)
        norm_tokens = normalize_tokens_with_context(tokens, base_cwd)

        cleaned: List[str] = []
        i = 0
        while i < len(norm_tokens):
            t = norm_tokens[i]
            # 중복된 /Fo 정리
            if t == '/Fo' and i + 1 < len(norm_tokens) and norm_tokens[i+1].startswith('/Fo'):
                i += 1; continue
            # 전처리 산출물 관련 플래그 드롭
            skip = _drop_preprocess_token(t, norm_tokens[i+1] if i+1 < len(norm_tokens) else None)
            if skip:
                i += skip
                continue
            cleaned.append(t); i += 1
        lines_out.append(' '.join(cleaned))

    # 소스 파일 문맥 기반 추가 /I 경로 주입
    ue_root = find_ue_root_from(base_cwd) or find_ue_root_from(src) or base_cwd
    if src_file_for_extra_inc is not None:
        append_module_and_uht_includes(lines_out, ue_root, src_file_for_extra_inc)

    # 스테이징 파일 저장
    dst = _staged_name_for(src, staging_dir)
    with dst.open('w', encoding='utf-8', newline='') as f:
        f.write('\n'.join(lines_out)); f.write('\n')

    STAGED_CACHE[src] = dst
    return dst

# ===== Fortify 호출 =====

def build_sa_command(sourceanalyzer: Path, build_id: str, compiler_exe: Path, rsp_like: Path) -> List[str]:
    """sourceanalyzer 실행 커맨드 구성."""
    return [str(sourceanalyzer), '-b', build_id, '-debug', '-verbose', str(compiler_exe), '@' + str(rsp_like)]


def parse_compile_command(cmd: str) -> Tuple[Path | None, Path | None]:
    """compile_commands.json의 command에서 컴파일러 경로와 RSP/GCD 참조 추출."""
    tokens = split_preserving_quotes(cmd)
    compiler: Path | None = None
    if tokens:
        first = dequote(tokens[0])
        if first.lower().endswith('.exe'):
            compiler = Path(first)
    m = COMMAND_EXTRACT_RSP.search(cmd)
    rsp_like: Path | None = None
    if m:
        cand = next((g for g in m.groups() if g), None)
        if cand:
            rsp_like = Path(dequote(cand))
    return compiler, rsp_like


def save_full_log(staging: Path, unit_name: str, content: str) -> Path:
    """전체 로그를 스테이징 디렉터리에 저장하고 경로 반환."""
    safe = re.sub(r'[^A-Za-z0-9_.-]+', '_', unit_name)[:120]
    p = staging / f'{safe}.fortify.log'
    p.write_text(content, encoding='utf-8', errors='ignore')
    return p


def summarize_output(out: str, max_len: int = 1000) -> str:
    """로그의 앞/뒤 일부만 합쳐 콘솔 요약 생성."""
    lines = [ln for ln in out.splitlines() if ln.strip()]
    head = '\n'.join(lines[:5])
    tail = '\n'.join(lines[-40:])
    msg = (head + ('\n...\n' if len(lines) > 45 else '\n') + tail)
    if len(msg) > max_len:
        msg = msg[-max_len:]
    return msg


def run_translation_for_entry(entry: dict, args) -> Tuple[str, int, str]:
    """compile_commands.json의 한 엔트리를 번역(Translation) 실행."""
    file_path = Path(entry.get('file', ''))
    cmd_str = entry.get('command', '')
    base_dir = Path(entry.get('directory', '.'))

    compiler, rsp_like = parse_compile_command(cmd_str)
    if compiler is None:
        compiler = Path(args.msvc)

    # RSP/GCD가 있는 경우: 재귀 정규화 후 스테이징 파일 사용
    if rsp_like is not None:
        abs_rsp = (base_dir / rsp_like).resolve()
        staged_rsp = process_rsp_like_file(abs_rsp, args.staging, base_dir, file_path)
    else:
        # RSP가 없으면 command 토큰을 직접 정규화하여 임시 RSP 생성
        temp_name = (file_path.name or 'unit').replace('.', '_') + '.autogen.rsp'
        temp_path = args.staging / temp_name
        tokens = split_preserving_quotes(cmd_str)
        if tokens and tokens[0].lower().endswith('.exe'):
            tokens = tokens[1:]
        norm_tokens = normalize_tokens_with_context(tokens, base_dir)
        lines_out = [' '.join(norm_tokens)]
        ue_root = find_ue_root_from(base_dir) or base_dir
        append_module_and_uht_includes(lines_out, ue_root, file_path)
        with temp_path.open('w', encoding='utf-8', newline='') as f:
            f.write('\n'.join(lines_out)); f.write('\n')
        staged_rsp = temp_path

    sa_cmd = build_sa_command(args.sourceanalyzer, args.build_id, compiler, staged_rsp)

    # 실행 커맨드 및 RSP 헤더 일부 출력(디버그 가독성)
    print('[SA-CMD] ' + ' '.join(sa_cmd))
    try:
        head = Path(staged_rsp).read_text(encoding='utf-8', errors='ignore').splitlines()[:20]
        print('[RSP-HEAD]')
        sys.stdout.write('\n'.join(head) + '\n')
    except Exception:
        pass

    # Fortify 실행
    try:
        proc = subprocess.run(
            sa_cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            encoding='mbcs',
            errors='replace'
        )
        rc = proc.returncode
        out = proc.stdout
    except Exception as e:
        rc = -1
        out = f"EXEC ERROR: {e}"

    unit_desc = str(file_path) if file_path else str(staged_rsp)

    # 로그 저장 + 요약 반환
    log_path = save_full_log(args.staging, Path(unit_desc).name, out)
    short_log = summarize_output(out)
    short_log += f"\n[full log] {log_path}"

    return unit_desc, rc, short_log

# ===== 메인 / 사전 점검 =====

def main(argv: List[str] | None = None) -> int:
    p = argparse.ArgumentParser(description='UE5 → Fortify SCA Translation Runner (recursive @include + module incs)')
    p.add_argument('--compile-db', required=True, type=Path, help='compile_commands.json 경로')
    p.add_argument('--staging', required=True, type=Path, help='정규화된 rsp/gcd를 저장할 디렉터리')
    p.add_argument('--build-id', default=DEFAULT_BUILD_ID)
    p.add_argument('--msvc', default=DEFAULT_MSVC, type=Path, help='컴파일러 경로(기본값)')
    p.add_argument('--sourceanalyzer', default=DEFAULT_SOURCEANALYZER, type=Path)
    p.add_argument('--max-workers', type=int, default=os.cpu_count() or 4)
    p.add_argument('--filter-ext', nargs='*', default=['.c', '.cpp', '.cxx', '.cc', '.c++', '.cs'])
    p.add_argument('--dry-run', action='store_true')

    args = p.parse_args(argv)
    args.staging = args.staging.resolve()

    if not args.compile_db.is_file():
        print(f"compile_commands.json not found: {args.compile_db}", file=sys.stderr)
        return 2

    # UHT 생성 헤더 존재 여부 사전 점검
    ue_root_hint = find_ue_root_from(args.compile_db.parent) or Path('C:/UnrealEngine-5.3.2-release')
    inc_roots = uht_inc_roots(ue_root_hint)
    has_generated = False
    for r in inc_roots:
        if r.is_dir():
            try:
                next(r.rglob('*.generated.h'))
                has_generated = True
                break
            except StopIteration:
                pass
    if not has_generated:
        print('[PRECHECK] UHT generated headers not found under any of:')
        for r in inc_roots:
            print('  -', r)
        print('\n[HINT] Run UHT first to generate headers. Example:')
        print('  Engine/Binaries/DotNET/UnrealBuildTool/UnrealBuildTool.exe -Mode=Build '
              '-Project="C:/UnrealEngine-5.3.2-release/Samples/Games/Lyra/Lyra.uproject" '
              '-Target="UnrealEditor Win64 Development"')
        return 3

    # 컴파일 데이터 로드
    data = json.loads(args.compile_db.read_text(encoding='utf-8'))
    if not isinstance(data, list):
        print('compile_commands.json must be a list', file=sys.stderr)
        return 2

    # 타깃 엔트리 필터링(확장자 기준)
    entries: List[dict] = []
    for e in data:
        f = e.get('file', '')
        if not f:
            continue
        low = f.lower()
        if any(low.endswith(ext) for ext in args.filter_ext):
            entries.append(e)

    print(f"targets {len(entries)} (staging={args.staging})")
    args.staging.mkdir(parents=True, exist_ok=True)

    # 드라이런: 최대 50개만 요약 프린트
    if args.dry_run:
        for e in entries[:50]:
            file_path = Path(e.get('file', ''))
            cmd_str = e.get('command', '')
            base_dir = Path(e.get('directory', '.'))
            compiler, rsp_like = parse_compile_command(cmd_str)
            if compiler is None:
                compiler = args.msvc
            rsp_abs = (base_dir / rsp_like).resolve() if rsp_like else Path('<inline>')
            print(f"[DRY] {file_path}\n  compiler: {compiler}\n  rsp_like: {rsp_abs}")
        return 0

    # 병렬 실행
    results = []
    with ThreadPoolExecutor(max_workers=args.max_workers) as ex:
        futures = [ex.submit(run_translation_for_entry, e, args) for e in entries]
        for fut in as_completed(futures):
            results.append(fut.result())

    # 요약 출력
    ok = sum(1 for _, rc, _ in results if rc == 0)
    fail = len(results) - ok
    print("\n===== SUMMARY =====")
    print(f"OK: {ok}, FAIL: {fail}")
    for unit, rc, log in results[:50]:
        status = 'OK' if rc == 0 else f'FAIL({rc})'
        print(f"- {status}: {unit}\n  {log}\n")

    return 0 if fail == 0 else 1


if __name__ == '__main__':
    raise SystemExit(main())
위로 스크롤