Extract annotations from PDF, rewrite them to feedback, and summarize.
Parameters:
| Name |
Type |
Description |
Default |
pdf_path
|
str
|
|
required
|
llm_client
|
LLMClient
|
LLMClient instance for API access.
|
required
|
Returns:
| Type |
Description |
str
|
A string containing 3-4 bullet points of summarized feedback in German.
|
Source code in src/academic_doc_generator/project/feedback_generator.py
| def generate_feedback_summary(pdf_path: str, llm_client: LLMClient) -> str:
"""Extract annotations from PDF, rewrite them to feedback, and summarize.
Args:
pdf_path: Path to the PDF file.
llm_client: LLMClient instance for API access.
Returns:
A string containing 3-4 bullet points of summarized feedback in German.
"""
# 1. Extract annotations and text
pages_words = extract_text_with_positions(pdf_path)
annotations, _stats = extract_annotations_with_positions(pdf_path, ignore_source=True)
# 2. Get context for each annotation
context_dict = find_annotation_context(pages_words, annotations)
# 3. Rewrite comments to constructive feedback (German)
# Include all categories except "ignore" (e.g., "llm", "quelle", "language")
rewritten_feedbacks = []
for _page, contexts in context_dict.items():
for ctx in contexts:
if ctx["category"] == "ignore":
continue
comment = ctx["comment"]
highlighted = ctx["highlighted"]
prompt = build_prompt(
PromptTemplate.REWRITE_TO_CONSTRUCTIVE_FEEDBACK,
highlighted=highlighted,
comment=comment,
)
messages = [{"role": "user", "content": prompt}]
rewritten = llm_client.chat_completion(messages)
rewritten_feedbacks.append(rewritten)
if not rewritten_feedbacks:
return "- Keine spezifischen Anmerkungen im Dokument gefunden."
# 4. Summarize feedbacks into 3-4 bullet points (German)
all_feedbacks_text = "\n".join([f"- {fb}" for fb in rewritten_feedbacks])
summary_prompt = build_prompt(PromptTemplate.SUMMARIZE_FEEDBACKS, text=all_feedbacks_text)
summary_messages = [{"role": "user", "content": summary_prompt}]
summary = llm_client.chat_completion(summary_messages)
return summary
|