import gradio as gr
import pandas as pd
import numpy as np
import os
import re
from typing import Dict, Tuple, List, Optional, Callable
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

# ======================================
# 設定（添付CSVの既定パス：必要に応じて変更可）
# ======================================
DEFAULT_CSV_PATH = "/mnt/data/mock_data_id_9999.csv"

# ======================================
# ユーティリティ
# ======================================
def normalize(s: str) -> str:
    return str(s).replace("\u3000", " ").replace("\n", "").replace("\r", "").strip()

def try_read_csv_3header(path_or_file) -> pd.DataFrame:
    """
    3行ヘッダーCSVを読み込む（cp932/utf-8-sig フォールバック）。
    1列目は timestamp として datetime 変換。
    2列目以降は (ID, ItemName, ProcessName) の3段。
    """
    last_err = None
    for enc in ["cp932", "utf-8-sig", "utf-8"]:
        try:
            df = pd.read_csv(path_or_file, header=[0, 1, 2], encoding=enc)
            break
        except Exception as e:
            last_err = e
            df = None
    if df is None:
        raise last_err

    # 先頭列を timestamp に
    ts = pd.to_datetime(df.iloc[:, 0], errors="coerce")
    df = df.drop(df.columns[0], axis=1)
    df.insert(0, "timestamp", ts)

    return df

def col_tuple_to_str(col) -> str:
    if isinstance(col, tuple):
        return "_".join([str(x) for x in col if x])
    return str(col)

def build_index_maps(df: pd.DataFrame):
    """
    プロセス（3行目＝タプルの3つ目）→ 該当列情報 の辞書を作る。
    各列は (col_tuple, id, item, process, col_str)
    """
    process_map = {}
    for col in df.columns:
        if col == "timestamp":
            continue
        if isinstance(col, tuple) and len(col) >= 3:
            col_id, item_name, process_name = str(col[0]), str(col[1]), str(col[2])
        else:
            parts = str(col).split("_")
            if len(parts) >= 3:
                col_id, item_name, process_name = parts[0], "_".join(parts[1:-1]), parts[-1]
            else:
                continue
        rec = {
            "col_tuple": col,
            "id": col_id,
            "item": item_name,
            "process": process_name,
            "col_str": col_tuple_to_str(col),
        }
        process_map.setdefault(process_name, []).append(rec)
    processes = sorted(list(process_map.keys()), key=lambda x: normalize(x))
    return process_map, processes

def extract_measure_tag(item_name: str) -> str:
    """
    項目名末尾の計測項目タグを抽出。([...]優先→末尾語)
    """
    s = normalize(item_name)
    m = re.search(r"\[([^\[\]]+)\]\s*$", s)
    if m:
        return m.group(1).strip()
    tokens = re.split(r"\s+", s)
    return tokens[-1] if tokens else s

def extract_category(item_name: str) -> str:
    """
    項目名の「最後の '_' 以降」をカテゴリ名として返す。
    例: '除害RO_A処理水_導電率' → '導電率' / '..._圧力' → '圧力'
    '_' が無い場合は「処理水…」の後ろや末尾語を推定。
    """
    s = normalize(item_name)
    if "_" in s:
        return s.split("_")[-1].strip()
    m = re.search(r"処理水[_\s]*(.+)$", s)
    if m:
        return m.group(1).strip()
    toks = re.split(r"\s+", s)
    return toks[-1] if toks else s

# ======================================
# しきい値ハンドリング
# ======================================
def try_read_thresholds_excel(file) -> Optional[pd.DataFrame]:
    """
    しきい値Excel（任意）を読み込み。
    想定カラム: ColumnID, ItemName, ProcessNo_ProcessName, LL, L, H, HH, Important(任意)
    """
    if file is None:
        return None
    df = pd.read_excel(file)
    df.columns = [normalize(c) for c in df.columns]
    needed = {"ColumnID", "ItemName", "ProcessNo_ProcessName"}
    if not needed.issubset(set(df.columns)):
        rename_map = {}
        for k in list(df.columns):
            nk = normalize(str(k))
            if nk.lower() in ["columnid", "colid", "id"]:
                rename_map[k] = "ColumnID"
            elif nk.lower() in ["itemname", "item", "name"]:
                rename_map[k] = "ItemName"
            elif nk.lower() in ["processno_processname", "process", "processname"]:
                rename_map[k] = "ProcessNo_ProcessName"
        if rename_map:
            df = df.rename(columns=rename_map)
    for c in ["LL", "L", "H", "HH"]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")
    if "Important" in df.columns:
        df["Important"] = (
            df["Important"].astype(str).str.upper().map({"TRUE": True, "FALSE": False})
        )
    return df

def build_threshold_lookup(thr_df: Optional[pd.DataFrame]) -> Dict[Tuple[str, str, str], Tuple[float, float, float, float]]:
    """
    キー: (ColumnID, ItemName, ProcessNo_ProcessName) → (LL, L, H, HH)
    """
    lookup = {}
    if thr_df is None or thr_df.empty:
        return lookup
    for _, r in thr_df.iterrows():
        colid = normalize(str(r.get("ColumnID", "")))
        item = normalize(str(r.get("ItemName", "")))
        proc = normalize(str(r.get("ProcessNo_ProcessName", "")))
        LL = r.get("LL", np.nan)
        L  = r.get("L",  np.nan)
        H  = r.get("H",  np.nan)
        HH = r.get("HH", np.nan)
        lookup[(colid, item, proc)] = (LL, L, H, HH)
    return lookup

def auto_threshold(series: pd.Series) -> Tuple[float, float, float, float]:
    """
    自動しきい値: mean ± std（LL/L/H/HH を mean±2sd / ±1sd とする）
    """
    s = series.dropna()
    if len(s) < 5:
        return (np.nan, np.nan, np.nan, np.nan)
    m = float(s.mean())
    sd = float(s.std(ddof=1)) if len(s) >= 2 else 0.0
    return (m - 2*sd, m - sd, m + sd, m + 2*sd)

def judge_status(value, LL, L, H, HH) -> str:
    if pd.notna(LL) and value <= LL:
        return "LL"
    if pd.notna(L) and value <= L:
        return "L"
    if pd.notna(HH) and value >= HH:
        return "HH"
    if pd.notna(H) and value >= H:
        return "H"
    return "OK"

# カラー設定
STATUS_COLOR = {
    "LL": "#2b6cb0",
    "L":  "#63b3ed",
    "OK": "#a0aec0",
    "H":  "#f6ad55",
    "HH": "#e53e3e",
}
LINE_COLOR = "#4a5568"

# ======================================
# 汎用：グループキーに応じて図を作る（サブプロット）
# group_by: "all" / "category" / "item"
# ======================================
def _group_key_func(group_by: str) -> Callable[[dict], str]:
    if group_by == "item":
        return lambda rr: normalize(rr["item"])
    if group_by == "category":
        return lambda rr: extract_category(rr["item"])
    # "all"
    return lambda rr: "ALL"

def make_grouped_figure(
    df: pd.DataFrame,
    process_map: Dict[str, List[dict]],
    process_name: str,
    selected_items: List[str],
    thr_df: Optional[pd.DataFrame],
    thr_mode: str,
    date_min: Optional[str],
    date_max: Optional[str],
    group_by: str,                       # "all" / "category" / "item"
    _force_groups: Optional[List[str]] = None,  # ページ分割用
) -> Optional[go.Figure]:
    if df is None or not process_name:
        return None
    recs = process_map.get(process_name, [])
    if not recs:
        return None
    selected = set([normalize(x) for x in (selected_items or [])])
    recs = [r for r in recs if normalize(r["item"]) in selected]
    if not recs:
        return None

    dfw = df.copy()
    if date_min:
        dfw = dfw[dfw["timestamp"] >= pd.to_datetime(date_min)]
    if date_max:
        dfw = dfw[dfw["timestamp"] <= pd.to_datetime(date_max)]
    if dfw.empty:
        return None

    thr_lookup = build_threshold_lookup(thr_df) if thr_mode == "excel" else {}
    keyfunc = _group_key_func(group_by)

    # グループ化（カテゴリ / 項目 / 一括ALL）
    groups: Dict[str, List[dict]] = {}
    for r in recs:
        groups.setdefault(keyfunc(r), []).append(r)

    group_names = list(groups.keys()) if _force_groups is None else _force_groups
    if not group_names:
        return None

    rows = len(group_names)
    if rows <= 1:
        vspace = 0.03
    else:
        max_vs = (1.0 / (rows - 1)) - 1e-4
        vspace = max(0.0, min(0.03, max_vs))

    # サブタイトル
    if group_by == "all":
        subtitles = [f"{process_name} | すべての項目"]  # 1行
    elif group_by == "category":
        subtitles = [f"{process_name} | 分類: {g}" for g in group_names]
    else:  # item
        subtitles = [f"{process_name} | 項目: {g}" for g in group_names]

    fig = make_subplots(
        rows=rows, cols=1, shared_xaxes=True,
        vertical_spacing=vspace,
        subplot_titles=subtitles
    )

    # 各グループを1行にまとめて複数系列として描画
    row_idx = 1
    for gname in group_names:
        cols = groups.get(gname, [])
        for r in cols:
            col = r["col_tuple"]
            col_str = r["col_str"]
            if col in dfw.columns:
                series = dfw[col]
            elif col_str in dfw.columns:
                series = dfw[col_str]
            else:
                continue

            x = dfw["timestamp"]
            y = pd.to_numeric(series, errors="coerce")

            if thr_mode == "excel":
                key = (normalize(r["id"]), normalize(r["item"]), normalize(r["process"]))
                LL, L, H, HH = thr_lookup.get(key, (np.nan, np.nan, np.nan, np.nan))
                if all(pd.isna(v) for v in [LL, L, H, HH]):
                    LL, L, H, HH = auto_threshold(y)
            else:
                LL, L, H, HH = auto_threshold(y)

            # ライン
            fig.add_trace(
                go.Scatter(
                    x=x, y=y, mode="lines",
                    name=f"{r['item']} ({r['id']})",
                    line=dict(color=LINE_COLOR, width=1.5),
                    hovertemplate="%{x}<br>%{y}<extra>"+f"{r['item']} ({r['id']})"+"</extra>"
                ),
                row=row_idx, col=1
            )
            # マーカー（色分け）
            colors = []
            for v in y:
                if pd.isna(v):
                    colors.append("rgba(0,0,0,0)")
                else:
                    st = judge_status(v, LL, L, H, HH)
                    colors.append(STATUS_COLOR.get(st, STATUS_COLOR["OK"]))
            fig.add_trace(
                go.Scatter(
                    x=x, y=y, mode="markers",
                    name=f"{r['item']} markers",
                    marker=dict(size=6, color=colors),
                    showlegend=False,
                    hovertemplate="%{x}<br>%{y}<extra></extra>"
                ),
                row=row_idx, col=1
            )
        # しきい値ガイドはグループ行に対して一律ではなく、系列ごとに別値になるので省略
        row_idx += 1

    fig.update_layout(
        title=(
            f"{process_name} ｜ "
            + ("一括表示" if group_by == "all"
               else "分類別表示（カテゴリ）" if group_by == "category"
               else "個別表示（項目）")
        ),
        xaxis_title="timestamp",
        showlegend=True,
        margin=dict(l=10, r=10, t=40, b=10),
        hovermode="x unified",
        height=max(420, 260 * rows),
    )
    return fig

# ページ分割（group_byごと）
def make_grouped_figure_paged(
    df: pd.DataFrame,
    process_map: Dict[str, List[dict]],
    process_name: str,
    selected_items: List[str],
    thr_df: Optional[pd.DataFrame],
    thr_mode: str,
    date_min: Optional[str],
    date_max: Optional[str],
    page: int,
    per_page: int,
    group_by: str,  # "category" or "item"
) -> Tuple[Optional[go.Figure], int, List[str]]:
    recs = process_map.get(process_name, [])
    if not recs:
        return None, 0, []
    selected = set([normalize(x) for x in (selected_items or [])])
    recs = [r for r in recs if normalize(r["item"]) in selected]
    if not recs:
        return None, 0, []

    keyfunc = _group_key_func(group_by)
    groups: Dict[str, List[dict]] = {}
    for r in recs:
        groups.setdefault(keyfunc(r), []).append(r)
    all_names = list(groups.keys())
    total_pages = max(1, int(np.ceil(len(all_names) / max(1, per_page))))
    page = int(max(1, min(page, total_pages)))
    start = (page - 1) * per_page
    end = start + per_page
    names_slice = all_names[start:end]

    fig = make_grouped_figure(
        df, process_map, process_name, selected_items, thr_df, thr_mode,
        date_min, date_max, group_by=group_by, _force_groups=names_slice
    )
    return fig, total_pages, all_names

# ======================================
# グローバル状態（UI間共有）
# ======================================
G_DF: Optional[pd.DataFrame] = None
G_PROCESS_MAP = {}
G_PROCESSES = []
G_THRESHOLDS_DF: Optional[pd.DataFrame] = None

# ======================================
# コールバック
# ======================================
def initialize_default_csv():
    """
    起動時にデフォルトCSVが存在すれば読み込む。
    """
    global G_DF, G_PROCESS_MAP, G_PROCESSES
    if os.path.exists(DEFAULT_CSV_PATH):
        try:
            df = try_read_csv_3header(DEFAULT_CSV_PATH)
            G_DF = df
            G_PROCESS_MAP, G_PROCESSES = build_index_maps(df)
            return (
                f"✅ 既定CSVを読み込みました: {DEFAULT_CSV_PATH}",
                gr.update(choices=G_PROCESSES, value=(G_PROCESSES[0] if G_PROCESSES else None)),
                G_PROCESSES
            )
        except Exception as e:
            return f"⚠ 既定CSV読み込み失敗: {e}", gr.update(), []
    return "ℹ CSVをアップロードしてください。", gr.update(), []

def on_csv_upload(file):
    """
    CSVアップロード → パース → プロセス候補更新
    """
    global G_DF, G_PROCESS_MAP, G_PROCESSES
    if file is None:
        return "⚠ ファイルが選択されていません。", gr.update(choices=[]), []
    try:
        df = try_read_csv_3header(file.name if hasattr(file, "name") else file)
        G_DF = df
        G_PROCESS_MAP, G_PROCESSES = build_index_maps(df)
        return (
            f"✅ CSV読み込み: {df.shape[0]}行 × {df.shape[1]}列",
            gr.update(choices=G_PROCESSES, value=(G_PROCESSES[0] if G_PROCESSES else None)),
            G_PROCESSES
        )
    except Exception as e:
        return f"❌ 読み込みエラー: {e}", gr.update(choices=[]), []

def on_thr_upload(file):
    """
    しきい値Excelアップロード → メモリ更新
    """
    global G_THRESHOLDS_DF
    if file is None:
        G_THRESHOLDS_DF = None
        return "ℹ しきい値ファイルなし（自動しきい値が使われます）"
    try:
        thr = try_read_thresholds_excel(file.name if hasattr(file, "name") else file)
        G_THRESHOLDS_DF = thr
        return f"✅ しきい値を読み込みました（{thr.shape[0]}件）"
    except Exception as e:
        G_THRESHOLDS_DF = None
        return f"❌ しきい値読み込みエラー: {e}"

def update_items(process_name: str):
    """
    プロセス選択に応じて、項目（2行目）候補を返す。
    """
    if not process_name or process_name not in G_PROCESS_MAP:
        return gr.update(choices=[], value=[])
    items = sorted(list({rec["item"] for rec in G_PROCESS_MAP[process_name]}), key=lambda x: normalize(x))
    # デフォルトは全選択
    return gr.update(choices=items, value=items)

def render_any(process_name: str, items: List[str], display_mode: str, thr_mode_label: str,
               date_min, date_max, page: int, per_page: int):
    """
    表示モードに応じて Plot を返す。
    - 一括表示: 全選択項目を1枚の行（ALL）にまとめる
    - 分類別表示: 末尾カテゴリごとにサブプロット。多い場合はページ分割
    - 個別表示: 項目ごとにサブプロット。多い場合はページ分割
    """
    if G_DF is None:
        return "⚠ データ未読み込み", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
    if not process_name:
        return "⚠ プロセスを選択してください", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
    if not items:
        return "⚠ 項目を選択してください", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

    mode = "excel" if str(thr_mode_label).startswith("excel") else "auto"

    # 一括表示
    if str(display_mode).startswith("一括"):
        fig = make_grouped_figure(
            G_DF, G_PROCESS_MAP, process_name, items, G_THRESHOLDS_DF, mode, date_min, date_max, group_by="all"
        )
        if fig is None:
            return "⚠ 図を生成できませんでした（データ無し or 条件不一致）", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
        return "✅ 一括表示を描画しました", gr.update(value=fig, visible=True), gr.update(visible=False), gr.update(visible=False)

    # 分類別表示（カテゴリ）
    if str(display_mode).startswith("分類"):
        fig, total_pages, all_names = make_grouped_figure_paged(
            G_DF, G_PROCESS_MAP, process_name, items, G_THRESHOLDS_DF, mode,
            date_min, date_max, page=int(page), per_page=int(per_page), group_by="category"
        )
        if fig is None:
            return "⚠ 図を生成できませんでした（データ無し or 条件不一致）", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
        info = f"分類（カテゴリ）数: {len(all_names)} ｜ ページ {int(max(1,min(page, total_pages)))} / {total_pages} ｜ 件/ページ={int(per_page)}"
        return "✅ 分類別表示（末尾語カテゴリ）を描画しました", gr.update(value=fig, visible=True), gr.update(value=info, visible=True), gr.update(visible=True)

    # 個別表示（項目）
    fig, total_pages, all_names = make_grouped_figure_paged(
        G_DF, G_PROCESS_MAP, process_name, items, G_THRESHOLDS_DF, mode,
        date_min, date_max, page=int(page), per_page=int(per_page), group_by="item"
    )
    if fig is None:
        return "⚠ 図を生成できませんでした（データ無し or 条件不一致）", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
    info = f"項目数: {len(all_names)} ｜ ページ {int(max(1,min(page, total_pages)))} / {total_pages} ｜ 件/ページ={int(per_page)}"
    return "✅ 個別表示（項目）を描画しました", gr.update(value=fig, visible=True), gr.update(value=info, visible=True), gr.update(visible=True)

# ======================================
# UI
# ======================================
init_msg, init_proc_update, _ = initialize_default_csv()
init_value = init_proc_update.get("value") if isinstance(init_proc_update, dict) else None
init_choices = init_proc_update.get("choices") if isinstance(init_proc_update, dict) else []

with gr.Blocks(css="""
.gradio-container {overflow: auto !important;}
""") as demo:
    gr.Markdown("## トレンドグラフ専用アプリ（3行ヘッダー対応・プロセス別・分類/個別・閾値色分け）")

    with gr.Row():
        csv_uploader = gr.File(label="① 時系列CSV（3行ヘッダー）", file_count="single", file_types=[".csv"])
        thr_uploader = gr.File(label="② 閾値Excel（任意: LL/L/H/HH）", file_count="single", file_types=[".xlsx", ".xls"])

    with gr.Row():
        thr_mode = gr.Radio(
            ["excel（アップロード優先・無ければ自動）", "自動（平均±標準偏差）"],
            value="excel（アップロード優先・無ければ自動）",
            label="しきい値モード"
        )
        date_min = gr.Textbox(label="抽出開始日時（任意）例: 2024-07-01 00:00")
        date_max = gr.Textbox(label="抽出終了日時（任意）例: 2024-07-31 23:59")

    # 表示形式の切り替え
    display_mode = gr.Radio(
        ["一括表示", "分類別表示（カテゴリ）", "個別表示（項目）"],
        value="一括表示",
        label="表示形式"
    )

    status_csv = gr.Markdown(init_msg)
    status_thr = gr.Markdown()

    process_dd = gr.Dropdown(label="対象プロセス（3行ヘッダーの3行目）",
                             choices=init_choices, value=init_value)
    items_cb = gr.CheckboxGroup(label="表示する項目（3行ヘッダーの2行目）", choices=[], value=[])

    with gr.Row():
        btn_render = gr.Button("トレンド図を生成", variant="primary")

    msg = gr.Markdown()
    plot = gr.Plot(label="トレンド図", visible=True)

    # ページ分割コントロール（分類別/個別のみ表示）
    with gr.Row():
        per_page = gr.Slider(1, 12, value=8, step=1, label="件/ページ（分類別・個別）", visible=False)
        page_no = gr.Number(value=1, label="ページ（1〜）", precision=0, visible=False)
    page_info = gr.Markdown(visible=False)

    # 2) CSVアップロードで更新
    csv_uploader.change(
        on_csv_upload,
        inputs=[csv_uploader],
        outputs=[status_csv, process_dd, gr.State()],
    )

    # 3) 閾値アップロードで更新
    thr_uploader.change(
        on_thr_upload,
        inputs=[thr_uploader],
        outputs=[status_thr],
    )

    # 4) プロセス選択で項目候補更新
    process_dd.change(
        update_items,
        inputs=[process_dd],
        outputs=[items_cb],
    )

    # 5) 図生成
    btn_render.click(
        fn=lambda proc, items, disp_mode, mode, dmin, dmax, p, pp:
            render_any(proc, items, disp_mode, mode, dmin, dmax, p, pp),
        inputs=[process_dd, items_cb, display_mode, thr_mode, date_min, date_max, page_no, per_page],
        outputs=[msg, plot, page_info, page_no],
    )

    # 6) 表示形式に応じたコントロール表示切替
    def _toggle_page_controls(mode):
        show = not str(mode).startswith("一括")
        return gr.update(visible=show), gr.update(visible=show), gr.update(visible=show)
    display_mode.change(
        _toggle_page_controls,
        inputs=[display_mode],
        outputs=[per_page, page_no, page_info],
    )

if __name__ == "__main__":
    # SSRオフ（Plotly埋め込みや再描画の安定化のため）
    demo.launch(ssr_mode=False)