metadata¶

`academic_doc_generator.core.metadata` ¶

Generation of web-compatible metadata files for student projects.

`generate_metadata_file(output_folder, title, author, pages_text, llm_client, work_type, semester, date_str=None, copy_to_web_folder=True, students=None)` ¶

Create a Jekyll-style Markdown file with frontmatter for a student project.

Parameters:

Name	Type	Description	Default
`output_folder`	`str`	Folder where the .md file will be saved.	required
`title`	`str`	Title of the work.	required
`author`	`str`	Full name of the student (or first student in group).	required
`pages_text`	`dict[int, str]`	Extracted text from the PDF.	required
`llm_client`	`LLMClientProtocol`	LLM client for summary generation.	required
`work_type`	`str`	String indicating the type of work.	required
`semester`	`str`	Semester name (e.g., "Wintersemester 24/25").	required
`date_str`	`Optional[str]`	Date of the work (YYYY-MM-DD). Defaults to today.	`None`
`copy_to_web_folder`	`bool`	Whether to copy the file to the global web metadata folder.	`True`
`students`	`Optional[list[StudentInfo]]`	Optional list of student info for group projects.	`None`

Returns:

Type	Description
`str`	Path to the generated .md file.

Source code in src/academic_doc_generator/core/metadata.py

def generate_metadata_file(
    output_folder: str,
    title: str,
    author: str,
    pages_text: dict[int, str],
    llm_client: LLMClientProtocol,
    work_type: str,  # e.g., "Bachelorthesis", "Masterthesis", "Praxisprojekt"
    semester: str,
    date_str: Optional[str] = None,
    copy_to_web_folder: bool = True,
    students: Optional[list[StudentInfo]] = None,
) -> str:
    """Create a Jekyll-style Markdown file with frontmatter for a student project.

    Args:
        output_folder: Folder where the .md file will be saved.
        title: Title of the work.
        author: Full name of the student (or first student in group).
        pages_text: Extracted text from the PDF.
        llm_client: LLM client for summary generation.
        work_type: String indicating the type of work.
        semester: Semester name (e.g., "Wintersemester 24/25").
        date_str: Date of the work (YYYY-MM-DD). Defaults to today.
        copy_to_web_folder: Whether to copy the file to the global web metadata folder.
        students: Optional list of student info for group projects.

    Returns:
        Path to the generated .md file.
    """
    if not date_str:
        date_str = datetime.now().strftime("%Y-%m-%d")

    summary = summarize_for_web(pages_text, llm_client)

    if students and len(students) > 1:
        initials_list = [get_initials(s.get("name", "U. A.")) for s in students]
        initials = " & ".join(initials_list)
        slugs_list = [get_author_slug(s.get("name", "unkn")) for s in students]
        author_slug = "_".join(slugs_list)

        # Replace all author names with their respective initials in the summary
        for s in students:
            s_name = s.get("name")
            if s_name and s_name not in ["Unknown Author", "Unknown", "Unbekannt"]:
                summary = summary.replace(s_name, get_initials(s_name))
    else:
        initials = get_initials(author)
        author_slug = get_author_slug(author)
        # Clean up summary: replace full author name with initials if it appears
        if author and author not in ["Unknown Author", "Unknown", "Unbekannt"]:
            summary = summary.replace(author, initials)

    # Create .md filename
    # format: {year}_{semester_slug}_{type_slug}_{author_slug}.md
    year = date_str[:4]

    # Try to make a slug from semester
    sem_slug = (
        semester.lower()
        .replace(" ", "")
        .replace("/", "")
        .replace("wintersemester", "ws")
        .replace("sommersemester", "ss")
    )

    type_slug = work_type[:2].lower()

    md_filename = f"{year}_{sem_slug}_{type_slug}_{author_slug}.md"
    md_path = os.path.join(output_folder, md_filename)

    content = f"""---
title: "{title}"
author: "{initials}"
date: "{date_str}"
excerpt: |
  {summary}
collection: student_projects
type: "{work_type}"
semester: "{semester}"
---

{summary}
"""
    with open(md_path, "w", encoding="utf-8") as f:
        f.write(content)

    # Copy to global web metadata folder if configured and requested
    if copy_to_web_folder:
        global_config = load_global_config()
        web_metadata_folder = global_config.get("web_metadata_folder")
        if web_metadata_folder:
            try:
                os.makedirs(web_metadata_folder, exist_ok=True)
                shutil.copy2(md_path, os.path.join(web_metadata_folder, md_filename))
                print(f"✅ Web metadata also copied to: {web_metadata_folder}")
            except Exception as e:
                print(f"⚠️  Error copying web metadata: {e}")

    return md_path

`get_author_slug(name)` ¶

Generate a short slug from the author's name.

Parameters:

Name	Type	Description	Default
`name`	`str`	Full name of the person.	required

Returns:

Type	Description
`str`	A 4-character lowercase slug.

Source code in src/academic_doc_generator/core/metadata.py

def get_author_slug(name: str) -> str:
    """Generate a short slug from the author's name.

    Args:
        name: Full name of the person.

    Returns:
        A 4-character lowercase slug.
    """
    if not name or name in ["Unknown Author", "Unknown", "Unbekannt"]:
        return "unkn"
    # Replace hyphens with spaces
    name_clean = name.replace("-", " ")
    parts = name_clean.split()
    if len(parts) >= 2:
        first = parts[0][:2].lower()
        last = parts[-1][:2].lower()
        return first + last
    elif len(parts) == 1:
        return parts[0][:4].lower()
    return "unkn"

`get_initials(name)` ¶

Generate initials for a name (e.g., 'Max Mustermann' -> 'M. M.').

Parameters:

Name	Type	Description	Default
`name`	`str`	Full name of the person.	required

Returns:

Type	Description
`str`	String containing uppercase initials followed by dots.

Source code in src/academic_doc_generator/core/metadata.py

def get_initials(name: str) -> str:
    """Generate initials for a name (e.g., 'Max Mustermann' -> 'M. M.').

    Args:
        name: Full name of the person.

    Returns:
        String containing uppercase initials followed by dots.
    """
    if not name or name in ["Unknown Author", "Unknown", "Unbekannt"]:
        return "U. A."
    # Replace hyphens with spaces to treat as separate parts
    name_clean = name.replace("-", " ")
    parts = name_clean.split()
    initials = [p[0].upper() + "." for p in parts if p]
    return " ".join(initials)

`summarize_for_web(pages_text, llm_client)` ¶

Generate a concise, English summary suitable for publication on a website.

Parameters:

Name	Type	Description	Default
`pages_text`	`dict[int, str]`	Mapping of page indices to text content.	required
`llm_client`	`LLMClientProtocol`	LLM client for summary generation.	required

Returns:

Type	Description
`str`	A short English summary string.

Source code in src/academic_doc_generator/core/metadata.py

def summarize_for_web(pages_text: dict[int, str], llm_client: LLMClientProtocol) -> str:
    """Generate a concise, English summary suitable for publication on a website.

    Args:
        pages_text: Mapping of page indices to text content.
        llm_client: LLM client for summary generation.

    Returns:
        A short English summary string.
    """
    # Use first 10 pages as in the original script
    text_to_summarize = "\n\n".join([pages_text.get(i, "") for i in sorted(pages_text.keys())[:10]])

    prompt = build_prompt(PromptTemplate.SUMMARIZE_FOR_WEB, text=text_to_summarize)
    messages = [{"role": "user", "content": prompt}]
    return llm_client.chat_completion(messages).strip()

metadata¶

academic_doc_generator.core.metadata ¶

generate_metadata_file(output_folder, title, author, pages_text, llm_client, work_type, semester, date_str=None, copy_to_web_folder=True, students=None) ¶

get_author_slug(name) ¶

get_initials(name) ¶

summarize_for_web(pages_text, llm_client) ¶

`academic_doc_generator.core.metadata` ¶

`generate_metadata_file(output_folder, title, author, pages_text, llm_client, work_type, semester, date_str=None, copy_to_web_folder=True, students=None)` ¶

`get_author_slug(name)` ¶

`get_initials(name)` ¶

`summarize_for_web(pages_text, llm_client)` ¶