Zum Inhalt

orchestrator

academic_doc_generator.review.orchestrator

run_review_pipeline(pdf_path, llm_client=None, groq_free=False, output_folder=None)

Run the peer review pipeline and produce a Markdown review.

Parameters:

Name Type Description Default
pdf_path str | Path

Path to the paper PDF.

required
llm_client Optional[LLMClientProtocol]

LLMClient instance for API access. If None, creates a new one.

None
groq_free bool

Whether to apply throttling for free-tier.

False
output_folder Optional[str | Path]

Folder to save the markdown. Defaults to PDF folder.

None

Returns:

Type Description
ReviewResult

Path to the generated markdown file.

Source code in src/academic_doc_generator/review/orchestrator.py
def run_review_pipeline(
    pdf_path: str | Path,
    llm_client: Optional[LLMClientProtocol] = None,
    groq_free: bool = False,
    output_folder: Optional[str | Path] = None,
) -> ReviewResult:
    """Run the peer review pipeline and produce a Markdown review.

    Args:
        pdf_path: Path to the paper PDF.
        llm_client: LLMClient instance for API access. If None, creates a new one.
        groq_free: Whether to apply throttling for free-tier.
        output_folder: Folder to save the markdown. Defaults to PDF folder.

    Returns:
        Path to the generated markdown file.
    """
    pdf_path_str = str(pdf_path)
    if output_folder is None:
        output_folder_str = os.path.dirname(pdf_path_str)
    else:
        output_folder_str = str(output_folder)

    # Create LLMClient if not provided
    if llm_client is None:
        llm_client = LLMClient()
        print(f"Using LLM API: {llm_client.api_choice} with model: {llm_client.llm}")

    pages_words = extract_text_with_positions(pdf_path_str)
    annotations, stats = extract_annotations_with_positions(pdf_path_str, False)

    # Page heights (you can get this via PdfReader too)
    reader = PdfReader(pdf_path_str)
    page_heights = {i: float(p.mediabox.top) for i, p in enumerate(reader.pages)}

    context = find_annotation_context_with_lines(pages_words, annotations, page_heights)
    rewritten = rewrite_comments_markdown(context, llm_client, groq_free=groq_free)

    fileid = os.path.splitext(os.path.basename(pdf_path_str))[0]
    print(fileid)

    md_path = os.path.join(output_folder_str, f"review_comments_{fileid}.md")
    create_review_markdown(rewritten, md_path)

    return md_path