File Utilities¶

`llm_client.utils.file_utils` ¶

File utilities for handling uploads to LLM providers.

Functions¶

`detect_file_type(file_path)` ¶

Detect the type of a file based on its extension.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to the file.	required

Returns:

Type	Description
`FileType`	File type category.

Raises:

Type	Description
`ValueError`	If file type cannot be determined or is unsupported.

Examples:

>>> detect_file_type("image.jpg")
'image'
>>> detect_file_type("document.pdf")
'pdf'

Source code in llm_client/utils/file_utils.py

def detect_file_type(file_path: str | Path) -> FileType:
    """Detect the type of a file based on its extension.

    Args:
        file_path: Path to the file.

    Returns:
        File type category.

    Raises:
        ValueError: If file type cannot be determined or is unsupported.

    Examples:
        >>> detect_file_type("image.jpg")
        'image'
        >>> detect_file_type("document.pdf")
        'pdf'
    """
    path = Path(file_path)
    mime_type, _ = mimetypes.guess_type(str(path))

    if mime_type is None:
        raise ValueError(f"Could not determine file type for {file_path}")

    if mime_type.startswith("image/"):
        return "image"
    elif mime_type == "application/pdf":
        return "pdf"
    elif mime_type.startswith("video/"):
        return "video"
    elif mime_type.startswith("audio/"):
        return "audio"
    elif mime_type in [
        "application/msword",
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        "text/plain",
    ]:
        return "document"
    else:
        raise ValueError(f"Unsupported file type: {mime_type}")

`encode_file_base64(file_path)` ¶

Encode a file to base64 string.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to the file.	required

Returns:

Type	Description
`str`	Base64 encoded string.

Raises:

Type	Description
`FileNotFoundError`	If file doesn't exist.

Examples:

>>> encoded = encode_file_base64("image.jpg")
>>> len(encoded) > 0
True

Source code in llm_client/utils/file_utils.py

def encode_file_base64(file_path: str | Path) -> str:
    """Encode a file to base64 string.

    Args:
        file_path: Path to the file.

    Returns:
        Base64 encoded string.

    Raises:
        FileNotFoundError: If file doesn't exist.

    Examples:
        >>> encoded = encode_file_base64("image.jpg")
        >>> len(encoded) > 0
        True
    """
    path = Path(file_path)
    if not path.exists():
        raise FileNotFoundError(f"File not found: {file_path}")

    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

`get_mime_type(file_path)` ¶

Get the MIME type of a file.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to the file.	required

Returns:

Type	Description
`str`	MIME type string.

Raises:

Type	Description
`ValueError`	If MIME type cannot be determined.

Examples:

>>> get_mime_type("image.jpg")
'image/jpeg'
>>> get_mime_type("document.pdf")
'application/pdf'

Source code in llm_client/utils/file_utils.py

def get_mime_type(file_path: str | Path) -> str:
    """Get the MIME type of a file.

    Args:
        file_path: Path to the file.

    Returns:
        MIME type string.

    Raises:
        ValueError: If MIME type cannot be determined.

    Examples:
        >>> get_mime_type("image.jpg")
        'image/jpeg'
        >>> get_mime_type("document.pdf")
        'application/pdf'
    """
    mime_type, _ = mimetypes.guess_type(str(file_path))
    if mime_type is None:
        raise ValueError(f"Could not determine MIME type for {file_path}")
    return mime_type

`prepare_file_for_gemini(file_path)` ¶

Prepare a file for Gemini API format (via OpenAI compatibility).

Gemini nutzt die OpenAI-Kompatibilitätsschicht, unterstützt aber PDFs nur im image_url Format, nicht im file Format.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to the file.	required

Returns:

Type	Description
`dict`	Dictionary with file data in Gemini format.

Examples:

>>> file_data = prepare_file_for_gemini("image.jpg")
>>> "type" in file_data
True

Source code in llm_client/utils/file_utils.py

def prepare_file_for_gemini(file_path: str | Path) -> dict:
    """Prepare a file for Gemini API format (via OpenAI compatibility).

    Gemini nutzt die OpenAI-Kompatibilitätsschicht, unterstützt aber PDFs
    nur im image_url Format, nicht im file Format.

    Args:
        file_path: Path to the file.

    Returns:
        Dictionary with file data in Gemini format.

    Examples:
        >>> file_data = prepare_file_for_gemini("image.jpg")
        >>> "type" in file_data
        True
    """
    # file_type = detect_file_type(file_path)
    mime_type = get_mime_type(file_path)
    base64_data = encode_file_base64(file_path)

    # Gemini verwendet für alle unterstützten Dateitypen das image_url Format
    return {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_data}"}}

`prepare_file_for_openai(file_path)` ¶

Prepare a file for OpenAI API format.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to the file.	required

Returns:

Type	Description
`dict`	Dictionary with file data in OpenAI format.

Examples:

>>> file_data = prepare_file_for_openai("image.jpg")
>>> "type" in file_data and "image_url" in file_data
True

Source code in llm_client/utils/file_utils.py

def prepare_file_for_openai(file_path: str | Path) -> dict:
    """Prepare a file for OpenAI API format.

    Args:
        file_path: Path to the file.

    Returns:
        Dictionary with file data in OpenAI format.

    Examples:
        >>> file_data = prepare_file_for_openai("image.jpg")
        >>> "type" in file_data and "image_url" in file_data
        True
    """
    file_type = detect_file_type(file_path)
    mime_type = get_mime_type(file_path)
    base64_data = encode_file_base64(file_path)

    if file_type == "image":
        return {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_data}"}}
    else:
        # For PDFs and other documents
        return {"type": "file", "file": {"data": base64_data, "mime_type": mime_type}}

`prepare_files_for_provider(file_paths, provider)` ¶

Prepare multiple files for a specific provider.

Parameters:

Name	Type	Description	Default
`file_paths`	`list[str \| Path]`	List of file paths.	required
`provider`	`str`	Name of the provider.	required

Returns:

Type	Description
`list[dict]`	List of file data dictionaries.

Raises:

Type	Description
`ValueError`	If any file is not supported by the provider.
`FileNotFoundError`	If any file doesn't exist.

Examples:

>>> files = prepare_files_for_provider(["img1.jpg", "img2.png"], "openai")
>>> len(files) == 2
True

Source code in llm_client/utils/file_utils.py

def prepare_files_for_provider(
    file_paths: list[str | Path],
    provider: str,
) -> list[dict]:
    """Prepare multiple files for a specific provider.

    Args:
        file_paths: List of file paths.
        provider: Name of the provider.

    Returns:
        List of file data dictionaries.

    Raises:
        ValueError: If any file is not supported by the provider.
        FileNotFoundError: If any file doesn't exist.

    Examples:
        >>> files = prepare_files_for_provider(["img1.jpg", "img2.png"], "openai")
        >>> len(files) == 2
        True
    """
    prepared_files = []

    for file_path in file_paths:
        # Validate file
        is_valid, error = validate_file_for_provider(file_path, provider)
        if not is_valid:
            raise ValueError(error)

        # Prepare based on provider
        if provider.lower() == "openai":
            prepared_files.append(prepare_file_for_openai(file_path))
        elif provider.lower() == "gemini":
            prepared_files.append(prepare_file_for_gemini(file_path))
        elif provider.lower() in ["groq", "ollama"]:
            # Same format as OpenAI
            prepared_files.append(prepare_file_for_openai(file_path))

    return prepared_files

`validate_file_for_provider(file_path, provider)` ¶

Validate if a file is supported by a provider.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to the file.	required
`provider`	`str`	Name of the provider (openai, gemini, groq, ollama).	required

Returns:

Type	Description
`tuple[bool, str \| None]`	Tuple of (is_valid, error_message).

Examples:

>>> is_valid, error = validate_file_for_provider("image.jpg", "openai")
>>> is_valid
True
>>> is_valid, error = validate_file_for_provider("video.mp4", "groq")
>>> is_valid
False

Source code in llm_client/utils/file_utils.py

def validate_file_for_provider(
    file_path: str | Path,
    provider: str,
) -> tuple[bool, str | None]:
    """Validate if a file is supported by a provider.

    Args:
        file_path: Path to the file.
        provider: Name of the provider (openai, gemini, groq, ollama).

    Returns:
        Tuple of (is_valid, error_message).

    Examples:
        >>> is_valid, error = validate_file_for_provider("image.jpg", "openai")
        >>> is_valid
        True
        >>> is_valid, error = validate_file_for_provider("video.mp4", "groq")
        >>> is_valid
        False
    """
    try:
        file_type = detect_file_type(file_path)
    except ValueError as e:
        return False, str(e)

    # Provider-specific file type support
    provider_support = {
        "openai": ["image", "pdf"],
        "gemini": ["image", "pdf", "video", "audio"],
        "groq": ["image"],  # Limited vision support
        "ollama": ["image"],  # Vision models only
    }

    supported_types = provider_support.get(provider.lower(), [])

    if file_type not in supported_types:
        return False, (
            f"{provider} does not support {file_type} files. "
            f"Supported types: {', '.join(supported_types)}"
        )

    return True, None

File Utilities¶

llm_client.utils.file_utils ¶

Functions¶

detect_file_type(file_path) ¶

encode_file_base64(file_path) ¶

get_mime_type(file_path) ¶

prepare_file_for_gemini(file_path) ¶

prepare_file_for_openai(file_path) ¶

prepare_files_for_provider(file_paths, provider) ¶

validate_file_for_provider(file_path, provider) ¶

`llm_client.utils.file_utils` ¶

`detect_file_type(file_path)` ¶

`encode_file_base64(file_path)` ¶

`get_mime_type(file_path)` ¶

`prepare_file_for_gemini(file_path)` ¶

`prepare_file_for_openai(file_path)` ¶

`prepare_files_for_provider(file_paths, provider)` ¶

`validate_file_for_provider(file_path, provider)` ¶