Skip to content

File Utilities

llm_client.utils.file_utils

File utilities for handling uploads to LLM providers.

Functions

detect_file_type(file_path)

Detect the type of a file based on its extension.

Parameters:

Name Type Description Default
file_path str | Path

Path to the file.

required

Returns:

Type Description
FileType

File type category.

Raises:

Type Description
ValueError

If file type cannot be determined or is unsupported.

Examples:

>>> detect_file_type("image.jpg")
'image'
>>> detect_file_type("document.pdf")
'pdf'
Source code in llm_client/utils/file_utils.py
def detect_file_type(file_path: str | Path) -> FileType:
    """Detect the type of a file based on its extension.

    Args:
        file_path: Path to the file.

    Returns:
        File type category.

    Raises:
        ValueError: If file type cannot be determined or is unsupported.

    Examples:
        >>> detect_file_type("image.jpg")
        'image'
        >>> detect_file_type("document.pdf")
        'pdf'
    """
    path = Path(file_path)
    mime_type, _ = mimetypes.guess_type(str(path))

    if mime_type is None:
        raise ValueError(f"Could not determine file type for {file_path}")

    if mime_type.startswith("image/"):
        return "image"
    elif mime_type == "application/pdf":
        return "pdf"
    elif mime_type.startswith("video/"):
        return "video"
    elif mime_type.startswith("audio/"):
        return "audio"
    elif mime_type in [
        "application/msword",
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        "text/plain",
    ]:
        return "document"
    else:
        raise ValueError(f"Unsupported file type: {mime_type}")

encode_file_base64(file_path)

Encode a file to base64 string.

Parameters:

Name Type Description Default
file_path str | Path

Path to the file.

required

Returns:

Type Description
str

Base64 encoded string.

Raises:

Type Description
FileNotFoundError

If file doesn't exist.

Examples:

>>> encoded = encode_file_base64("image.jpg")
>>> len(encoded) > 0
True
Source code in llm_client/utils/file_utils.py
def encode_file_base64(file_path: str | Path) -> str:
    """Encode a file to base64 string.

    Args:
        file_path: Path to the file.

    Returns:
        Base64 encoded string.

    Raises:
        FileNotFoundError: If file doesn't exist.

    Examples:
        >>> encoded = encode_file_base64("image.jpg")
        >>> len(encoded) > 0
        True
    """
    path = Path(file_path)
    if not path.exists():
        raise FileNotFoundError(f"File not found: {file_path}")

    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

get_mime_type(file_path)

Get the MIME type of a file.

Parameters:

Name Type Description Default
file_path str | Path

Path to the file.

required

Returns:

Type Description
str

MIME type string.

Raises:

Type Description
ValueError

If MIME type cannot be determined.

Examples:

>>> get_mime_type("image.jpg")
'image/jpeg'
>>> get_mime_type("document.pdf")
'application/pdf'
Source code in llm_client/utils/file_utils.py
def get_mime_type(file_path: str | Path) -> str:
    """Get the MIME type of a file.

    Args:
        file_path: Path to the file.

    Returns:
        MIME type string.

    Raises:
        ValueError: If MIME type cannot be determined.

    Examples:
        >>> get_mime_type("image.jpg")
        'image/jpeg'
        >>> get_mime_type("document.pdf")
        'application/pdf'
    """
    mime_type, _ = mimetypes.guess_type(str(file_path))
    if mime_type is None:
        raise ValueError(f"Could not determine MIME type for {file_path}")
    return mime_type

prepare_file_for_gemini(file_path)

Prepare a file for Gemini API format (via OpenAI compatibility).

Gemini nutzt die OpenAI-Kompatibilitätsschicht, unterstützt aber PDFs nur im image_url Format, nicht im file Format.

Parameters:

Name Type Description Default
file_path str | Path

Path to the file.

required

Returns:

Type Description
dict

Dictionary with file data in Gemini format.

Examples:

>>> file_data = prepare_file_for_gemini("image.jpg")
>>> "type" in file_data
True
Source code in llm_client/utils/file_utils.py
def prepare_file_for_gemini(file_path: str | Path) -> dict:
    """Prepare a file for Gemini API format (via OpenAI compatibility).

    Gemini nutzt die OpenAI-Kompatibilitätsschicht, unterstützt aber PDFs
    nur im image_url Format, nicht im file Format.

    Args:
        file_path: Path to the file.

    Returns:
        Dictionary with file data in Gemini format.

    Examples:
        >>> file_data = prepare_file_for_gemini("image.jpg")
        >>> "type" in file_data
        True
    """
    # file_type = detect_file_type(file_path)
    mime_type = get_mime_type(file_path)
    base64_data = encode_file_base64(file_path)

    # Gemini verwendet für alle unterstützten Dateitypen das image_url Format
    return {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_data}"}}

prepare_file_for_openai(file_path)

Prepare a file for OpenAI API format.

Parameters:

Name Type Description Default
file_path str | Path

Path to the file.

required

Returns:

Type Description
dict

Dictionary with file data in OpenAI format.

Examples:

>>> file_data = prepare_file_for_openai("image.jpg")
>>> "type" in file_data and "image_url" in file_data
True
Source code in llm_client/utils/file_utils.py
def prepare_file_for_openai(file_path: str | Path) -> dict:
    """Prepare a file for OpenAI API format.

    Args:
        file_path: Path to the file.

    Returns:
        Dictionary with file data in OpenAI format.

    Examples:
        >>> file_data = prepare_file_for_openai("image.jpg")
        >>> "type" in file_data and "image_url" in file_data
        True
    """
    file_type = detect_file_type(file_path)
    mime_type = get_mime_type(file_path)
    base64_data = encode_file_base64(file_path)

    if file_type == "image":
        return {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_data}"}}
    else:
        # For PDFs and other documents
        return {"type": "file", "file": {"data": base64_data, "mime_type": mime_type}}

prepare_files_for_provider(file_paths, provider)

Prepare multiple files for a specific provider.

Parameters:

Name Type Description Default
file_paths list[str | Path]

List of file paths.

required
provider str

Name of the provider.

required

Returns:

Type Description
list[dict]

List of file data dictionaries.

Raises:

Type Description
ValueError

If any file is not supported by the provider.

FileNotFoundError

If any file doesn't exist.

Examples:

>>> files = prepare_files_for_provider(["img1.jpg", "img2.png"], "openai")
>>> len(files) == 2
True
Source code in llm_client/utils/file_utils.py
def prepare_files_for_provider(
    file_paths: list[str | Path],
    provider: str,
) -> list[dict]:
    """Prepare multiple files for a specific provider.

    Args:
        file_paths: List of file paths.
        provider: Name of the provider.

    Returns:
        List of file data dictionaries.

    Raises:
        ValueError: If any file is not supported by the provider.
        FileNotFoundError: If any file doesn't exist.

    Examples:
        >>> files = prepare_files_for_provider(["img1.jpg", "img2.png"], "openai")
        >>> len(files) == 2
        True
    """
    prepared_files = []

    for file_path in file_paths:
        # Validate file
        is_valid, error = validate_file_for_provider(file_path, provider)
        if not is_valid:
            raise ValueError(error)

        # Prepare based on provider
        if provider.lower() == "openai":
            prepared_files.append(prepare_file_for_openai(file_path))
        elif provider.lower() == "gemini":
            prepared_files.append(prepare_file_for_gemini(file_path))
        elif provider.lower() in ["groq", "ollama"]:
            # Same format as OpenAI
            prepared_files.append(prepare_file_for_openai(file_path))

    return prepared_files

validate_file_for_provider(file_path, provider)

Validate if a file is supported by a provider.

Parameters:

Name Type Description Default
file_path str | Path

Path to the file.

required
provider str

Name of the provider (openai, gemini, groq, ollama).

required

Returns:

Type Description
tuple[bool, str | None]

Tuple of (is_valid, error_message).

Examples:

>>> is_valid, error = validate_file_for_provider("image.jpg", "openai")
>>> is_valid
True
>>> is_valid, error = validate_file_for_provider("video.mp4", "groq")
>>> is_valid
False
Source code in llm_client/utils/file_utils.py
def validate_file_for_provider(
    file_path: str | Path,
    provider: str,
) -> tuple[bool, str | None]:
    """Validate if a file is supported by a provider.

    Args:
        file_path: Path to the file.
        provider: Name of the provider (openai, gemini, groq, ollama).

    Returns:
        Tuple of (is_valid, error_message).

    Examples:
        >>> is_valid, error = validate_file_for_provider("image.jpg", "openai")
        >>> is_valid
        True
        >>> is_valid, error = validate_file_for_provider("video.mp4", "groq")
        >>> is_valid
        False
    """
    try:
        file_type = detect_file_type(file_path)
    except ValueError as e:
        return False, str(e)

    # Provider-specific file type support
    provider_support = {
        "openai": ["image", "pdf"],
        "gemini": ["image", "pdf", "video", "audio"],
        "groq": ["image"],  # Limited vision support
        "ollama": ["image"],  # Vision models only
    }

    supported_types = provider_support.get(provider.lower(), [])

    if file_type not in supported_types:
        return False, (
            f"{provider} does not support {file_type} files. "
            f"Supported types: {', '.join(supported_types)}"
        )

    return True, None