Zum Inhalt

validation

academic_doc_generator.core.validation

Validation logic for configuration and environment.

validate_api_keys()

Check which LLM APIs are configured.

Returns:

Type Description
list[str]

List of available API names.

Raises:

Type Description
RuntimeError

If no APIs are configured.

Source code in src/academic_doc_generator/core/validation.py
def validate_api_keys() -> list[str]:
    """Check which LLM APIs are configured.

    Returns:
        List of available API names.

    Raises:
        RuntimeError: If no APIs are configured.
    """
    available = []

    if os.getenv("OPENAI_API_KEY"):
        available.append("openai")
    if os.getenv("GROQ_API_KEY"):
        available.append("groq")
    if os.getenv("GEMINI_API_KEY"):
        available.append("gemini")

    # Ollama doesn't need a key
    available.append("ollama")

    # Filter out empty strings if any
    available = [api for api in available if api]

    if len(available) == 1 and available[0] == "ollama":
        # Just a warning if only ollama is available, not an error
        # but let's keep it simple for now as per recommendation
        pass

    return available

validate_pdf_path(folder_path, filename)

Get validated PDF file path.

Parameters:

Name Type Description Default
folder_path str

Base folder path.

required
filename str

Name of the PDF file.

required

Returns:

Type Description
Path

Absolute path to PDF file.

Raises:

Type Description
ValueError

If path attempts directory traversal or contains invalid characters.

FileNotFoundError

If PDF does not exist.

Security
  • Prevents directory traversal (../)
  • Prevents absolute paths
  • Validates file extension
Source code in src/academic_doc_generator/core/validation.py
def validate_pdf_path(folder_path: str, filename: str) -> Path:
    """Get validated PDF file path.

    Args:
        folder_path: Base folder path.
        filename: Name of the PDF file.

    Returns:
        Absolute path to PDF file.

    Raises:
        ValueError: If path attempts directory traversal or contains invalid characters.
        FileNotFoundError: If PDF does not exist.

    Security:
        - Prevents directory traversal (../)
        - Prevents absolute paths
        - Validates file extension
    """
    base_path = Path(folder_path).resolve()

    # Additional security checks
    if Path(filename).is_absolute():
        raise ValueError(f"Absolute paths not allowed: {filename}")

    if any(part == ".." for part in Path(filename).parts):
        raise ValueError(f"Path traversal detected: {filename}")

    # Enforce file extension
    if not filename.lower().endswith(".pdf"):
        raise ValueError(f"Only PDF files allowed: {filename}")

    pdf_path = (base_path / filename).resolve()

    # Verify path is within base folder (prevent traversal)
    try:
        pdf_path.relative_to(base_path)
    except ValueError as e:
        raise ValueError(f"Invalid PDF path (directory traversal): {filename}") from e

    if not pdf_path.exists():
        raise FileNotFoundError(f"PDF not found: {pdf_path}")

    return pdf_path