import pandas as pd
from pathlib import Path
from IPython.display import Markdown, HTML

from tulip.plots import plot_bar, plot_lines
from tulip_mania.notebook_health import (
    analyze_notebooks,
    generate_health_report,
    get_failed_notebooks,
    get_notebooks_by_data_source,
    get_latest_build_log,
    merge_with_build_log,
    get_notebook_link,
    get_project_notebooks,
)
from tulip_mania.columns import columns

Notebook Health Monitor¶

This notebook analyzes the health of all notebooks in the documentation, including:

Execution status (success, error, not run)
Error analysis and diagnostics
Notebook inventory and complexity metrics
Data source usage (Bloomberg, Haver, FRED, GS)

# Configuration
# Detect project root - works whether run from projects/other/ or from project root
cwd = Path.cwd()

# Check if we're in the project root (has projects/ folder) or in projects/other/
if (cwd / "projects").is_dir():
    # Running from project root (e.g., via main.py)
    PROJECT_ROOT = cwd
elif cwd.name == "other" and (cwd.parent.parent / "projects").is_dir():
    # Running from projects/other/ (interactive)
    PROJECT_ROOT = cwd.parent.parent
else:
    # Fallback: try to find project root by looking for myst.yml
    PROJECT_ROOT = cwd
    for parent in [cwd] + list(cwd.parents):
        if (parent / "myst.yml").exists():
            PROJECT_ROOT = parent
            break

PROJECTS_PATH = PROJECT_ROOT / "projects"
LOG_DIR = PROJECT_ROOT / "logs"

print(f"Project root: {PROJECT_ROOT.resolve()}")
print(f"Analyzing notebooks in: {PROJECTS_PATH.resolve()}")

# Run analysis using the project-aware function
df = get_project_notebooks(PROJECT_ROOT)
df = merge_with_build_log(df, LOG_DIR)
report = generate_health_report(df)

# Get last build info
build_log = get_latest_build_log(LOG_DIR)
if build_log:
    print(f"Last build: {build_log['log_date'].strftime('%Y-%m-%d %H:%M')}")
    print(f"Build results: {build_log['summary']['passed']}/{build_log['summary']['total']} passed")

print(f"Analyzed {report['total_notebooks']} notebooks")

Summary Dashboard¶

# Summary statistics
partial_count = report["status_counts"].get("partial", 0)
read_error_count = report["status_counts"].get("read_error", 0)

summary_html = f"""
<div style="display: flex; gap: 20px; flex-wrap: wrap;">
    <div style="background: #e8f5e9; padding: 20px; border-radius: 8px; min-width: 150px;">
        <h3 style="margin: 0; color: #2e7d32;">Total</h3>
        <p style="font-size: 32px; margin: 10px 0; font-weight: bold;">{report["total_notebooks"]}</p>
        <p style="margin: 0; color: #666;">notebooks</p>
    </div>
    <div style="background: #e3f2fd; padding: 20px; border-radius: 8px; min-width: 150px;">
        <h3 style="margin: 0; color: #1565c0;">Success</h3>
        <p style="font-size: 32px; margin: 10px 0; font-weight: bold;">{report["status_counts"].get("success", 0)}</p>
        <p style="margin: 0; color: #666;">{report["success_rate"]:.1f}%</p>
    </div>
    <div style="background: #ffebee; padding: 20px; border-radius: 8px; min-width: 150px;">
        <h3 style="margin: 0; color: #c62828;">Errors</h3>
        <p style="font-size: 32px; margin: 10px 0; font-weight: bold;">{report["status_counts"].get("error", 0)}</p>
        <p style="margin: 0; color: #666;">{report["error_rate"]:.1f}%</p>
    </div>
    <div style="background: #ffe0b2; padding: 20px; border-radius: 8px; min-width: 150px;">
        <h3 style="margin: 0; color: #e65100;">Partial</h3>
        <p style="font-size: 32px; margin: 10px 0; font-weight: bold;">{partial_count}</p>
        <p style="margin: 0; color: #666;">some cells ran</p>
    </div>
    <div style="background: #fff3e0; padding: 20px; border-radius: 8px; min-width: 150px;">
        <h3 style="margin: 0; color: #ef6c00;">Not Run</h3>
        <p style="font-size: 32px; margin: 10px 0; font-weight: bold;">{report["status_counts"].get("not_run", 0)}</p>
        <p style="margin: 0; color: #666;">notebooks</p>
    </div>
</div>
"""

# Show read errors if any
if read_error_count > 0:
    summary_html += f"""
<div style="margin-top: 10px; background: #f5f5f5; padding: 10px; border-radius: 8px;">
    <span style="color: #666;">Read errors: {read_error_count} notebooks could not be parsed</span>
</div>
"""

HTML(summary_html)

# Execution status breakdown
status_df = pd.DataFrame(
    [
        {"Status": status, "Count": count}
        for status, count in report["status_counts"].items()
    ]
).set_index("Status")

plot_bar(
    status_df["Count"],
    title="<b>Notebook Execution Status</b>",
    figsize=(600, 400),
)

Error Analysis¶

# Failed notebooks with clickable links
failed = get_failed_notebooks(df)

location_col = "project" if "project" in df.columns else "folder"

if failed.empty:
    display(Markdown("No failed notebooks found."))
else:
    display(Markdown(f"### Failed Notebooks ({len(failed)})"))
    
    # Create clickable links - adjust for project structure
    failed_display = failed.copy()
    
    def make_link(row):
        """Create a relative link to the notebook."""
        path = Path(row["path"])
        # Get path relative to projects folder
        try:
            rel_path = path.relative_to(PROJECTS_PATH)
            html_path = str(rel_path).replace("\\", "/").replace(".ipynb", ".html")
            return f'<a href="../{html_path}">{path.name}</a>'
        except ValueError:
            return path.name
    
    failed_display["notebook"] = failed_display.apply(make_link, axis=1)
    
    # Use location_col if present in failed_display
    display_cols = ["notebook"]
    if location_col in failed_display.columns:
        display_cols.append(location_col)
    elif "folder" in failed_display.columns:
        display_cols.append("folder")
    display_cols.extend(["error_cell_index", "error_type", "error_message"])
    
    # Filter to only existing columns
    display_cols = [c for c in display_cols if c in failed_display.columns]
    
    display(HTML(
        failed_display[display_cols].to_html(escape=False, index=False)
    ))

# Error types breakdown
if report["error_types"]:
    display(Markdown("### Error Types"))
    error_df = pd.DataFrame(
        [
            {"Error Type": etype, "Count": count}
            for etype, count in report["error_types"].items()
        ]
    )
    display(error_df.style.hide(axis="index"))

# Partial execution notebooks
location_col = "project" if "project" in df.columns else "folder"
display_cols = ["name", location_col, "cells_executed", "code_cells"]
display_cols = [c for c in display_cols if c in df.columns]

partial_nbs = df[df["execution_status"] == "partial"][display_cols]

if partial_nbs.empty:
    display(Markdown("No partially executed notebooks found."))
else:
    display(Markdown(f"### Partially Executed Notebooks ({len(partial_nbs)})"))
    display(
        Markdown(
            "_These notebooks started execution but didn't complete all code cells._"
        )
    )
    display(
        partial_nbs.style.set_properties(**{"text-align": "left"}).hide(axis="index")
    )

Notebook Inventory¶

# Notebooks by project folder
group_col = "project" if "project" in df.columns else "folder"

folder_df = (
    df.groupby(group_col)
    .agg(
        {
            "name": "count",
            "execution_status": lambda x: (x == "success").sum(),
            "code_cells": "mean",
            "lines_of_code": "mean",
        }
    )
    .rename(
        columns={
            "name": "Notebooks",
            "execution_status": "Successful",
            "code_cells": "Avg Code Cells",
            "lines_of_code": "Avg Lines",
        }
    )
)

display(Markdown(f"### Notebooks by {group_col.title()}"))
folder_df.style.format(
    {
        "Avg Code Cells": "{:.1f}",
        "Avg Lines": "{:.0f}",
    }
)

# Full inventory table with build times
display(Markdown("### Full Inventory"))

# Use project column if available, otherwise folder
location_col = "project" if "project" in df.columns else "folder"

inventory_cols = [
    "name",
    location_col,
    "execution_status",
    "build_time",
    "code_cells",
    "lines_of_code",
    "uses_bloomberg",
    "uses_haver",
    "uses_iris",
]

# Filter to only columns that exist
inventory_cols = [c for c in inventory_cols if c in df.columns]


def status_color(val):
    colors = {
        "success": "background-color: #c8e6c9",
        "error": "background-color: #ffcdd2",
        "not_run": "background-color: #fff9c4",
        "partial": "background-color: #ffe0b2",
    }
    return colors.get(val, "")


# Format build time as seconds
inv_df = df[inventory_cols].copy()
if "build_time" in inv_df.columns:
    inv_df["build_time"] = inv_df["build_time"].apply(
        lambda x: f"{x:.1f}s" if pd.notna(x) and isinstance(x, (int, float)) else "-"
    )

inv_df.style.map(status_color, subset=["execution_status"]).hide(axis="index")

Complexity Metrics¶

# Complexity summary
complexity_html = f"""
<div style="display: flex; gap: 20px; flex-wrap: wrap;">
    <div style="background: #f3e5f5; padding: 20px; border-radius: 8px; min-width: 150px;">
        <h3 style="margin: 0; color: #7b1fa2;">Avg Cells</h3>
        <p style="font-size: 32px; margin: 10px 0; font-weight: bold;">{report["avg_cells"]:.1f}</p>
        <p style="margin: 0; color: #666;">per notebook</p>
    </div>
    <div style="background: #e1f5fe; padding: 20px; border-radius: 8px; min-width: 150px;">
        <h3 style="margin: 0; color: #0277bd;">Avg Code Cells</h3>
        <p style="font-size: 32px; margin: 10px 0; font-weight: bold;">{report["avg_code_cells"]:.1f}</p>
        <p style="margin: 0; color: #666;">per notebook</p>
    </div>
    <div style="background: #fce4ec; padding: 20px; border-radius: 8px; min-width: 150px;">
        <h3 style="margin: 0; color: #c2185b;">Avg Lines</h3>
        <p style="font-size: 32px; margin: 10px 0; font-weight: bold;">{report["avg_lines_of_code"]:.0f}</p>
        <p style="margin: 0; color: #666;">of code</p>
    </div>
    <div style="background: #e8eaf6; padding: 20px; border-radius: 8px; min-width: 150px;">
        <h3 style="margin: 0; color: #3f51b5;">With Timestamp</h3>
        <p style="font-size: 32px; margin: 10px 0; font-weight: bold;">{report["notebooks_with_timestamp"]}</p>
        <p style="margin: 0; color: #666;">notebooks</p>
    </div>
</div>
"""
HTML(complexity_html)

# Largest notebooks by lines of code
display(Markdown("### Largest Notebooks (by lines of code)"))

location_col = "project" if "project" in df.columns else "folder"
df.nlargest(10, "lines_of_code")[
    ["name", location_col, "lines_of_code", "code_cells"]
].style.hide(axis="index")

# Slowest notebooks by build time
display(Markdown("### Slowest Notebooks (by build time)"))

# Ensure build_time is numeric for sorting
df["build_time"] = pd.to_numeric(df["build_time"], errors="coerce")

if df["build_time"].notna().any():
    slowest = df[df["build_time"].notna()].nlargest(10, "build_time")[
        ["name", "folder", "build_time", "code_cells"]
    ].copy()
    slowest["build_time"] = slowest["build_time"].apply(lambda x: f"{x:.1f}s")
    display(slowest.style.hide(axis="index"))
else:
    display(Markdown("_No build time data available. Run a full build to populate._"))

Data Source Usage¶

# Data source usage chart
ds_df = pd.DataFrame(
    [
        {"Source": source.title(), "Notebooks": count}
        for source, count in report["data_source_usage"].items()
    ]
).set_index("Source")

plot_bar(
    ds_df["Notebooks"],
    title="<b>Data Source Usage Across Notebooks</b>",
    figsize=(600, 400),
)

# Notebooks by data source
Markdown("### Notebooks by Data Source")

bloomberg_nbs = get_notebooks_by_data_source(df, "bloomberg")
display(Markdown(f"**Bloomberg** ({len(bloomberg_nbs)} notebooks)"))
if not bloomberg_nbs.empty:
    display(bloomberg_nbs.head(10).style.hide(axis="index"))

haver_nbs = get_notebooks_by_data_source(df, "haver")
display(Markdown(f"**Haver** ({len(haver_nbs)} notebooks)"))
if not haver_nbs.empty:
    display(haver_nbs.head(10).style.hide(axis="index"))

from tulip_mania.notebook_related import notebook_updated

notebook_updated()