Compare commits

...

4 Commits

Author SHA1 Message Date
0a9484257a feat(cli): add --format flag for pretty, llm, and jsonl output 2026-04-16 17:21:48 -04:00
7cc6b74048 fix: remove 500-char limit on message body wrapping 2026-04-16 17:06:29 -04:00
b06604788e feat(messages): resolve @mentions and redesign thread display
Replace <@USERID> tokens with @handles using Slack handle (name) as
preferred identifier, falling back to display_name then real_name.

Redesign thread rendering with bullet glyphs (● top-level, ◆ thread
parent), ├/└ reply branches, horizontal rules between roots, and an
ellipsis row for skipped replies. Resolve orphan thread parents from
the message cache when possible; otherwise render an explicit
"orphaned thread — parent unknown" box.
2026-04-16 16:55:52 -04:00
b9afd0d8da chore: ignore __pycache__ directories 2026-04-16 16:55:52 -04:00
3 changed files with 343 additions and 52 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
result result
__pycache__

View File

@@ -24,7 +24,11 @@ channels, members, etc.
## Key Files ## Key Files
- **`slack_cli/__main__.py`** — Canonical CLI source. Entry point is `main()`, exposed as the `slack-cli` console script. - **`slack_cli/__main__.py`** — Canonical CLI source. Entry point is `main()`, exposed as the `slack-cli` console script. Supports three output formats via `-f/--format`:
- `pretty` (default) — ANSI/tree layout with URLs, for humans.
- `llm` — token-efficient text (no ANSI/URLs, grouped by channel+date, threads indented).
- `jsonl` — one JSON object per message; includes raw `ts`, `channel_id`, `user_id`.
Renderers are `render_pretty` / `render_llm` / `render_jsonl`; `cmd_messages` builds the display list once and dispatches.
- **`pyproject.toml`** — Python packaging metadata. Declares `slack-cli` console script entry point and `dfindexeddb` dependency. - **`pyproject.toml`** — Python packaging metadata. Declares `slack-cli` console script entry point and `dfindexeddb` dependency.
- **`docs/indexeddb-format.md`** — Documents the on-disk format: LevelDB layer, IndexedDB databases, Blink value encoding, and the full Redux state schema with field-level detail. - **`docs/indexeddb-format.md`** — Documents the on-disk format: LevelDB layer, IndexedDB databases, Blink value encoding, and the full Redux state schema with field-level detail.
- **`scripts/analyze_structure.py`** — Introspects the live IndexedDB and dumps database/object-store/record-type info plus Redux state key schemas. Re-run this when the data format changes and update the docs accordingly. - **`scripts/analyze_structure.py`** — Introspects the live IndexedDB and dumps database/object-store/record-type info plus Redux state key schemas. Re-run this when the data format changes and update the docs accordingly.

View File

@@ -81,8 +81,12 @@ def resolve_user(state: dict, user_id: str) -> str:
if not isinstance(member, dict): if not isinstance(member, dict):
return user_id return user_id
# Slack Redux State Stores Name Fields at Top Level # Slack Handle (e.g. "evan") > Display Name > Real Name > fallback to ID
name = member.get("display_name") or member.get("real_name") or member.get("name") name = member.get("name") # Slack handle / username
if name and name != user_id:
return name
name = member.get("display_name") or member.get("real_name")
if name: if name:
return name return name
@@ -96,6 +100,23 @@ def resolve_user(state: dict, user_id: str) -> str:
return user_id return user_id
def resolve_mentions(state: dict, text: str) -> str:
"""Replace Slack User Mentions (<@USERID>) With @DisplayNames"""
if not isinstance(text, str):
return text
members = state.get("members", {})
if not isinstance(members, dict):
return text
def _replace(match):
user_id = match.group(1)
resolved = resolve_user(state, user_id)
return f"@{resolved}"
return re.sub(r"<@([A-Z0-9]+)>", _replace, text)
def parse_since(value: str) -> datetime: def parse_since(value: str) -> datetime:
"""Parse a --since Value Into a Datetime """Parse a --since Value Into a Datetime
@@ -197,6 +218,7 @@ def cmd_messages(
count: int, count: int,
unread_only: bool = False, unread_only: bool = False,
since: datetime | None = None, since: datetime | None = None,
output_format: str = "pretty",
): ):
"""Print Messages From Slack State""" """Print Messages From Slack State"""
messages_store = state.get("messages", {}) messages_store = state.get("messages", {})
@@ -243,6 +265,7 @@ def cmd_messages(
user_id = msg.get("user", msg.get("bot_id", "?")) user_id = msg.get("user", msg.get("bot_id", "?"))
user_name = resolve_user(state, user_id) user_name = resolve_user(state, user_id)
text = resolve_mentions(state, text)
subtype = msg.get("subtype", "") subtype = msg.get("subtype", "")
# dfindexeddb Represents JS undefined as an Undefined object # dfindexeddb Represents JS undefined as an Undefined object
@@ -280,39 +303,100 @@ def cmd_messages(
top_level.append(msg) top_level.append(msg)
# Build Display List — Each Top-Level Entry Followed by Its Replies # Build Display List — Each Top-Level Entry Followed by Its Replies
# item = (msg | None, depth) — None msg means orphan thread header # item = (msg, depth)
# depth 0 = root (top-level message or orphan thread header) # depth 0 = root (top-level message OR thread parent)
# depth 1 = thread reply # depth 1 = thread reply (or ellipsis row)
display: list[tuple[dict | None, int]] = [] # Every root carries `has_replies` so the renderer can pick glyphs.
display: list[tuple[dict, int]] = []
seen_parents: set[str] = {m["ts"] for m in top_level} seen_parents: set[str] = {m["ts"] for m in top_level}
for msg in top_level: for msg in top_level:
replies = threads.get(msg["ts"], [])
msg["has_replies"] = bool(replies)
if replies:
msg["is_thread_parent"] = True
display.append((msg, 0)) display.append((msg, 0))
for reply in threads.get(msg["ts"], []): for reply in replies:
display.append((reply, 1)) display.append((reply, 1))
# Collect Orphan Thread Groups — Replies Whose Parent Isn't Shown # Collect Orphan Thread Groups — Replies Whose Parent Isn't a Top-Level Match
orphan_groups: list[tuple[str, list[dict]]] = [] orphan_groups: list[tuple[str, list[dict]]] = []
for thread_ts, replies in threads.items(): for thread_ts, replies in threads.items():
if thread_ts not in seen_parents: if thread_ts not in seen_parents:
orphan_groups.append((thread_ts, replies)) orphan_groups.append((thread_ts, replies))
# Sort Orphan Groups by Earliest Reply Timestamp
orphan_groups.sort(key=lambda g: g[1][0]["dt"]) orphan_groups.sort(key=lambda g: g[1][0]["dt"])
# Append Each Orphan Group With a Header Placeholder # Append Each Orphan Group — Resolve Parent From Cache When Possible.
# If parent isn't cached, emit a synthetic header with is_orphan=True so
# the renderer shows a consistent row + "parent not cached" body.
for thread_ts, replies in orphan_groups: for thread_ts, replies in orphan_groups:
# Use First Reply's Channel and ID for the Header cid = replies[0]["channel_id"]
header = { ch_name = replies[0]["channel"]
"channel": replies[0]["channel"], channel_msgs_raw = messages_store.get(cid)
"channel_id": replies[0]["channel_id"], parent_raw = (
channel_msgs_raw.get(thread_ts)
if isinstance(channel_msgs_raw, dict) else None
)
if isinstance(parent_raw, dict) and isinstance(parent_raw.get("text"), str):
user_id = parent_raw.get("user", parent_raw.get("bot_id", "?"))
subtype = parent_raw.get("subtype", "")
if not isinstance(subtype, str):
subtype = ""
parent_msg = {
"channel": ch_name,
"channel_id": cid,
"ts": thread_ts,
"thread_ts": thread_ts, "thread_ts": thread_ts,
"dt": ts_to_datetime(thread_ts),
"user": resolve_user(state, user_id),
"user_id": user_id,
"text": resolve_mentions(state, parent_raw.get("text", "")),
"subtype": subtype,
"is_thread_parent": True,
"has_replies": True,
} }
display.append((None, 0)) # Placeholder for header reply_count = parent_raw.get("reply_count")
shown = len(replies)
if isinstance(reply_count, int):
# Known: show ellipsis only if replies were actually skipped
skipped = reply_count - shown if reply_count > shown else 0
else:
# Unknown reply_count — signal ambiguous gap
skipped = None
else:
# Orphan — Parent Not in Local Cache
parent_msg = {
"channel": ch_name,
"channel_id": cid,
"ts": thread_ts,
"thread_ts": thread_ts,
"dt": ts_to_datetime(thread_ts),
"user": "???",
"user_id": "",
"text": "",
"subtype": "",
"is_thread_parent": True,
"is_orphan": True,
"has_replies": True,
}
# No reply_count available for orphans → generic ellipsis
skipped = None
display.append((parent_msg, 0))
# Suppress Ellipsis When We Know No Replies Were Skipped
if skipped != 0:
display.append(({"ellipsis": True, "skipped": skipped}, 1))
for reply in replies: for reply in replies:
display.append((reply, 1)) display.append((reply, 1))
# Patch the Placeholder With Header Info
display[-len(replies) - 1] = (header, 0) # Select Format-Specific Renderer
renderers = {
"pretty": render_pretty,
"llm": render_llm,
"jsonl": render_jsonl,
}
renderer = renderers[output_format]
# Print Last N Messages (Count depth=0 Entries Only) # Print Last N Messages (Count depth=0 Entries Only)
if len(display) > 0: if len(display) > 0:
@@ -330,52 +414,122 @@ def cmd_messages(
else: else:
visible = [] visible = []
renderer(visible, display, domain, unread_only, since)
def render_pretty(visible, display, domain, unread_only, since):
"""Human-Friendly Terminal Output — ANSI, Rules, Tree Glyphs"""
# ANSI Palette
dim = "\033[90m"
ts_color = "\033[90m" ts_color = "\033[90m"
ch_color = "\033[36m" ch_color = "\033[36m"
user_color = "\033[33m" user_color = "\033[33m"
bullet_top = "\033[36m●\033[0m" # Top-level message (cyan dot)
bullet_thread = "\033[33m◆\033[0m" # Thread parent (yellow diamond)
reset = "\033[0m" reset = "\033[0m"
bar_str = f"\033[90m│\033[0m" bar = f"{dim}{reset}" # Thread continuation gutter
branch_mid = f"{dim}{reset}" # Ellipsis branch
branch_end = f"{dim}{reset}" # Final reply branch
# Horizontal Rule Between Roots — Sized to Terminal Width (Clamped)
try:
term_width = os.get_terminal_size().columns
except OSError:
term_width = 78
rule_width = max(40, min(term_width, 100))
rule = f"{dim}{'' * rule_width}{reset}"
def wrap_text(text: str, prefix: str) -> None:
"""Print Message Body, Prefixing Every Line"""
for line in text.split("\n"):
print(f"{prefix}{line}")
prev_depth = None
for idx, (msg, depth) in enumerate(visible): for idx, (msg, depth) in enumerate(visible):
# Peek Ahead to See if Next Item Is Still a Thread Reply # Peek Ahead — Needed to Decide Between Mid vs End Reply Branch
next_is_reply = (idx + 1 < len(visible) and visible[idx + 1][1] > 0) next_is_reply = (
idx + 1 < len(visible) and visible[idx + 1][1] > 0
)
# Orphan Thread Header # Horizontal Rule Before Every New Root
if msg is not None and "dt" not in msg: if depth == 0:
header_line = f"\033[90m↳ thread in {ch_color}#{msg['channel']}{reset}" print(rule)
if domain and msg.get("channel_id") and msg.get("thread_ts"):
link = slack_url(domain, msg["channel_id"], msg["thread_ts"]) # Ellipsis Row — Between Thread Parent and Shown Replies
header_line += f" \033[90m{link}{reset}" if msg.get("ellipsis"):
print(header_line) skipped = msg.get("skipped")
if skipped:
inner = f"[… {skipped} earlier repl{'y' if skipped == 1 else 'ies'}]"
else:
inner = "[… older replies]"
print(f"{branch_mid} {dim}{inner}{reset}")
# Continuation Bar Below Ellipsis if More Replies Follow
if next_is_reply:
print(bar)
continue continue
dt_str = msg["dt"].strftime("%Y-%m-%d %H:%M:%S") dt_str = msg["dt"].strftime("%Y-%m-%d %H:%M:%S")
subtype_tag = f" [{msg['subtype']}]" if msg["subtype"] else "" subtype_tag = f" [{msg['subtype']}]" if msg.get("subtype") else ""
bar = f"{bar_str} " if depth > 0 else ""
# Build Slack Link for Top-Level Messages if depth == 0:
# Root Row — Choose Bullet Based on Thread State
is_thread = msg.get("is_thread_parent", False)
bullet = bullet_thread if is_thread else bullet_top
link_suffix = "" link_suffix = ""
if domain and depth == 0: if domain and msg.get("channel_id") and msg.get("ts"):
link = slack_url(domain, msg["channel_id"], msg["ts"]) link = slack_url(domain, msg["channel_id"], msg["ts"])
link_suffix = f" {ts_color}{link}{reset}" link_suffix = f" {ts_color}{link}{reset}"
print( print(
f"{bar}{ts_color}{dt_str}{reset} " f"{bullet} {ts_color}{dt_str}{reset} "
f"{ch_color}#{msg['channel']}{reset} " f"{ch_color}#{msg['channel']}{reset} "
f"{user_color}{msg['user']}{reset}{subtype_tag}{link_suffix}" f"{user_color}{msg['user']}{reset}{subtype_tag}{link_suffix}"
) )
# Indent Message Text (Prefix Every Line for Multi-Line Messages) # Orphan Subtitle (Kept Below Header for Layout Consistency)
text_prefix = f"{bar_str} " if depth > 0 else " " if msg.get("is_orphan"):
for line in msg["text"][:500].split("\n"): print(f"{bar} {dim}(orphan — parent not cached){reset}")
print(f"{text_prefix}{line}")
# Connecting Bar Between Thread Messages # Body: Either the Orphan Placeholder Box or the Real Text
if msg.get("is_orphan"):
# ⚠ Renders as Double-Width in Most Terminals, So the
# top/bottom rules need to be 2 chars shorter than the
# printable width of the middle row.
box_top = f"{dim}{'' * 39}{reset}"
box_mid = f"{dim}{reset} \033[33m⚠ ORPHANED THREAD — PARENT UNKNOWN\033[0m {dim}{reset}"
box_bot = f"{dim}{'' * 39}{reset}"
print(f"{bar}")
print(f"{bar} {box_top}")
print(f"{bar} {box_mid}")
print(f"{bar} {box_bot}")
else:
# Top-Level and Resolved-Parent Bodies Both Indent 2 Spaces;
# thread parents use a gutter to signal replies follow.
body_prefix = f"{bar} " if msg.get("has_replies") else " "
wrap_text(msg["text"], body_prefix)
# Trailing Gutter Only When Replies Follow
if next_is_reply: if next_is_reply:
print(bar_str) print(bar)
else: else:
print() print()
else:
# Reply Row — Use └ (Final) or ├ (Mid) Depending on Followups
branch = branch_mid if next_is_reply else branch_end
print(
f"{branch} {ts_color}{dt_str}{reset} "
f"{user_color}{msg['user']}{reset}{subtype_tag}"
)
# Mid Replies Keep the │ Gutter; Final Reply Indents Flat
text_prefix = f"{bar} " if next_is_reply else " "
wrap_text(msg["text"], text_prefix)
if next_is_reply:
print(bar)
else:
print()
prev_depth = depth
root_count = sum(1 for _, d in visible if d == 0) root_count = sum(1 for _, d in visible if d == 0)
total_roots = sum(1 for _, d in display if d == 0) total_roots = sum(1 for _, d in display if d == 0)
@@ -385,6 +539,134 @@ def cmd_messages(
print(f"--- Showing {root_count} of {total_roots} {label} ---") print(f"--- Showing {root_count} of {total_roots} {label} ---")
def render_llm(visible, display, domain, unread_only, since):
"""Token-Efficient Text Output for LLM Consumption
Groups by channel, then by date. Drops ANSI, URLs, tree glyphs, and
repeated channel/date headers. Thread replies use 2-space indent.
"""
# Group Visible Rows Into (Channel, Root+Replies) Blocks
# Walk the flat `visible` list, pairing each depth=0 row with the
# depth=1 rows that follow it until the next root.
blocks: list[tuple[str, dict, list[dict]]] = []
current_root: dict | None = None
current_replies: list[dict] = []
current_channel: str = ""
for msg, depth in visible:
if depth == 0:
if current_root is not None:
blocks.append((current_channel, current_root, current_replies))
current_root = msg
current_replies = []
current_channel = msg.get("channel", "")
else:
current_replies.append(msg)
if current_root is not None:
blocks.append((current_channel, current_root, current_replies))
# Emit Blocks — Channel Header on Change, Date Header on Change
last_channel: str | None = None
last_date: str | None = None
def fmt_row(msg: dict, indent: str = "") -> str:
if msg.get("ellipsis"):
skipped = msg.get("skipped")
if skipped:
inner = f"[… {skipped} earlier repl{'y' if skipped == 1 else 'ies'}]"
else:
inner = "[… older replies]"
return f"{indent}{inner}"
hm = msg["dt"].strftime("%H:%M")
subtype = f" [{msg['subtype']}]" if msg.get("subtype") else ""
user = msg.get("user", "?")
text = msg.get("text", "")
# Inline Single-Line Text; Indent Continuation Lines
lines = text.split("\n") if text else [""]
head = f"{indent}{hm} {user}{subtype}:"
if lines == [""]:
return head
if len(lines) == 1:
return f"{head} {lines[0]}"
cont_indent = indent + " "
return head + " " + lines[0] + "".join(f"\n{cont_indent}{ln}" for ln in lines[1:])
for channel, root, replies in blocks:
# Channel Header on Change
if channel != last_channel:
if last_channel is not None:
print()
print(f"== #{channel} ==")
last_channel = channel
last_date = None
# Date Header on Change (Per Channel)
root_date = root["dt"].strftime("%Y-%m-%d")
if root_date != last_date:
print(root_date)
last_date = root_date
# Root Row — Annotate Orphans and Thread Reply Counts Inline
if root.get("is_orphan"):
hm = root["dt"].strftime("%H:%M")
print(f" {hm} (orphan — parent uncached):")
else:
suffix = ""
if root.get("is_thread_parent"):
# Count Actual Shown Replies (Excluding Ellipsis Marker)
n = sum(1 for r in replies if not r.get("ellipsis"))
suffix_note = f" thread ({n}):"
else:
suffix_note = ""
# Splice Thread Note Before the Colon Added by fmt_row
row = fmt_row(root, indent=" ")
if suffix_note and row.endswith(":"):
row = row[:-1] + suffix_note
elif suffix_note and ": " in row:
# Multi-Line or Inline-Text Case — Insert Before First ": "
head, rest = row.split(": ", 1)
row = head + suffix_note + " " + rest
print(row)
# Replies — 4-Space Indent
for reply in replies:
print(fmt_row(reply, indent=" "))
# Footer — Compact Summary
root_count = sum(1 for _, d in visible if d == 0)
total_roots = sum(1 for _, d in display if d == 0)
label = "unread" if unread_only else "msgs"
since_note = f" since={since.strftime('%Y-%m-%d %H:%M')}" if since else ""
print(f"-- {root_count}/{total_roots} {label}{since_note} --")
def render_jsonl(visible, display, domain, unread_only, since):
"""One JSON Object Per Message — Structured Consumption"""
for msg, depth in visible:
if msg.get("ellipsis"):
obj = {
"type": "ellipsis",
"depth": depth,
"skipped": msg.get("skipped"),
}
else:
obj = {
"type": "message",
"depth": depth,
"channel": msg.get("channel"),
"channel_id": msg.get("channel_id"),
"ts": msg.get("ts"),
"thread_ts": msg.get("thread_ts"),
"datetime": msg["dt"].isoformat() if msg.get("dt") else None,
"user": msg.get("user"),
"user_id": msg.get("user_id"),
"subtype": msg.get("subtype") or None,
"text": msg.get("text"),
"is_thread_parent": msg.get("is_thread_parent", False),
"is_orphan": msg.get("is_orphan", False),
}
print(json.dumps(obj, ensure_ascii=False, default=str))
def cmd_channels(state: dict): def cmd_channels(state: dict):
"""List Channels With Message Counts""" """List Channels With Message Counts"""
messages_store = state.get("messages", {}) messages_store = state.get("messages", {})
@@ -451,6 +733,10 @@ def main():
"--dump", nargs="?", const="slack_state.json", "--dump", nargs="?", const="slack_state.json",
help="Dump full state to JSON file" help="Dump full state to JSON file"
) )
parser.add_argument(
"-f", "--format", choices=["pretty", "llm", "jsonl"], default="pretty",
help="Output format: pretty (default, ANSI/tree), llm (token-efficient text), jsonl (one JSON per message)"
)
args = parser.parse_args() args = parser.parse_args()
# Find and Decode the Blob # Find and Decode the Blob
@@ -473,7 +759,7 @@ def main():
cmd_channels(state) cmd_channels(state)
else: else:
cmd_messages(state, args.channels, args.exclude_channels, cmd_messages(state, args.channels, args.exclude_channels,
args.count, args.unread, args.since) args.count, args.unread, args.since, args.format)
if __name__ == "__main__": if __name__ == "__main__":