commit f49c944efe96ee9379f519e93100b4601b6b1afa Author: Evan Reichard Date: Thu Apr 16 12:08:04 2026 -0400 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b2be92b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +result diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..49c2028 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,56 @@ +# AI Agent Guidelines + +## Project Overview + +Reads Slack messages from the local Chromium IndexedDB cache on disk. Slack's +desktop app (Mac App Store) persists its entire Redux state as a single +Blink-serialized blob. We decode it with `dfindexeddb` and extract messages, +channels, members, etc. + +## Project Structure + +``` +. +├── slack_cli/ # Python package (canonical source) +│ ├── __init__.py +│ └── __main__.py # CLI entry point +├── pyproject.toml # Python packaging metadata +├── flake.nix # Nix dev shell + package build +├── docs/ +│ └── indexeddb-format.md # Full IndexedDB data format documentation +└── scripts/ + └── analyze_structure.py # Generates schema data for docs +``` + +## Key Files + +- **`slack_cli/__main__.py`** — Canonical CLI source. Entry point is `main()`, exposed as the `slack-cli` console script. +- **`pyproject.toml`** — Python packaging metadata. Declares `slack-cli` console script entry point and `dfindexeddb` dependency. +- **`docs/indexeddb-format.md`** — Documents the on-disk format: LevelDB layer, IndexedDB databases, Blink value encoding, and the full Redux state schema with field-level detail. +- **`scripts/analyze_structure.py`** — Introspects the live IndexedDB and dumps database/object-store/record-type info plus Redux state key schemas. Re-run this when the data format changes and update the docs accordingly. +- **`flake.nix`** — Nix dev shell (python3.12, uv, snappy) + standalone package build. Packages pinned PyPI deps (python-snappy==0.6.1, zstd==1.5.5.1, dfindexeddb) inline. + +## Dev Environment + +```bash +nix develop # Enter shell with python3.12, uv, snappy +./slack-cli.py # uv resolves deps automatically via inline metadata +``` + +Or without nix, ensure `python3.12`, `uv`, and `libsnappy` are available. + +## Building + +```bash +nix build # Build standalone CLI to ./result/bin/slack-cli +nix run # Build and run directly +nix run . -- --help # Pass args +``` + +## Dependencies + +All Python deps are declared inline in each script's `# /// script` metadata block. `uv` resolves and caches them automatically. The only dependency is: + +- **`dfindexeddb`** — Forensic parser for Chromium IndexedDB/LevelDB and Blink V8 serialized values. + +The nix flake provides **`snappy`** (the C library) because `python-snappy` needs it to compile its native extension. diff --git a/docs/indexeddb-format.md b/docs/indexeddb-format.md new file mode 100644 index 0000000..858fb6e --- /dev/null +++ b/docs/indexeddb-format.md @@ -0,0 +1,601 @@ +# Slack Desktop IndexedDB Data Format + +This document describes the on-disk format and data structure of the Slack +desktop app's local IndexedDB cache. It covers the Mac App Store version +(`com.tinyspeck.slackmacgap`), though the Electron version uses the same +Chromium IndexedDB format at a different path. + +> **Tooling**: All structures were analyzed using +> [`dfindexeddb`](https://pypi.org/project/dfindexeddb/) — a forensic Python +> library that parses Chromium IndexedDB / LevelDB files and Blink-serialized +> V8 values without native dependencies. + +## Table of Contents + +- [Filesystem Layout](#filesystem-layout) +- [LevelDB Layer](#leveldb-layer) + - [Files](#files) + - [Custom Comparator](#custom-comparator) + - [Record Types](#record-types) +- [IndexedDB Layer](#indexeddb-layer) + - [Databases](#databases) + - [Object Stores](#object-stores) + - [Blob Storage](#blob-storage) +- [Value Encoding](#value-encoding) + - [Blink IDB Value Wrapper](#blink-idb-value-wrapper) + - [V8 Serialization](#v8-serialization) + - [Sentinel Types](#sentinel-types) + - [JSArray Encoding](#jsarray-encoding) +- [Redux State Schema](#redux-state-schema) + - [Overview](#overview) + - [messages](#messages) + - [channels](#channels) + - [members](#members) + - [reactions](#reactions) + - [files](#files-1) + - [bots](#bots) + - [teams](#teams) + - [userGroups](#usergroups) + - [channelHistory](#channelhistory) + - [allThreads](#allthreads) + - [Other Stores](#other-stores) +- [Caveats & Limitations](#caveats--limitations) + +--- + +## Filesystem Layout + +### Mac App Store Version + +``` +~/Library/Containers/com.tinyspeck.slackmacgap/ + Data/Library/Application Support/Slack/IndexedDB/ + https_app.slack.com_0.indexeddb.leveldb/ # LevelDB database + 000042.log # Write-ahead log (active writes) + 000044.ldb # SSTable (compacted data) + CURRENT # Points to active MANIFEST + LOCK # Process lock file + LOG # LevelDB operational log + LOG.old # Previous operational log + MANIFEST-000001 # Database manifest (file versions, levels) + https_app.slack.com_0.indexeddb.blob/ # External blob storage + 2/ # database_id=2 + 1e/ # Sharded directory (blob_number >> 8) + 1e80 # Blob file (blob_number in hex) +``` + +### Electron Version (if installed) + +``` +~/Library/Application Support/Slack/IndexedDB/ + https_app.slack.com_0.indexeddb.leveldb/ + https_app.slack.com_0.indexeddb.blob/ +``` + +### Other Platforms + +| OS | Path | +| ------- | ------------------------------- | +| Linux | `~/.config/Slack/IndexedDB/...` | +| Windows | `%AppData%\Slack\IndexedDB\...` | + +--- + +## LevelDB Layer + +Chromium's IndexedDB is backed by LevelDB, a sorted key-value store. + +### Files + +| File Pattern | Purpose | +| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | +| `*.log` | Write-ahead log. Contains recent, uncommitted writes as `WriteBatch` records. Each record has a 7-byte header: checksum (4), length (2), type (1). | +| `*.ldb` / `*.sst` | SSTables. Immutable, sorted, compressed (Snappy) data files produced by compaction. | +| `MANIFEST-*` | Tracks which files belong to which LSM-tree level, active file set. | +| `CURRENT` | Text file pointing to the active manifest (e.g., `MANIFEST-000001`). | +| `LOCK` | Advisory file lock. **Held by Slack while running.** | +| `LOG` | LevelDB's internal operational log (compaction events, etc.). | + +### Custom Comparator + +Chromium IndexedDB uses a custom LevelDB comparator called **`idb_cmp1`** +rather than the default `leveldb.BytewiseComparator`. This means: + +- Standard LevelDB libraries (`plyvel`, `leveldb`) **cannot open** these + databases — they will fail with a comparator mismatch error. +- You must either use `dfindexeddb` (which parses the raw files without + opening the DB) or copy the data and parse at the binary level. + +### Record Types + +Each LevelDB key in the IndexedDB encodes a typed key prefix. The key types +observed in Slack's database: + +| Record Type | Count | Description | +| ------------------------ | ------- | ----------------------------------------------- | +| `ScopesPrefixKey` | ~13,000 | Internal scope tracking records | +| `RecoveryBlobJournalKey` | ~2,300 | Blob lifecycle / garbage collection journal | +| `ObjectStoreDataKey` | ~1,600 | **Actual data records** (messages, state, etc.) | +| `ExistsEntryKey` | ~1,600 | Existence index (mirrors data keys) | +| `BlobEntryKey` | ~1,500 | Maps data keys to external blob references | +| `ObjectStoreMetaDataKey` | ~800 | Object store schema metadata | +| `DatabaseMetaDataKey` | ~770 | Database-level metadata (version, etc.) | +| `ActiveBlobJournalKey` | ~760 | Currently active blob journal | +| `DatabaseNameKey` | 3 | Maps database IDs to names | +| `ObjectStoreNamesKey` | 3 | Maps object store IDs to names | +| `SchemaVersionKey` | 1 | IndexedDB schema version | +| `MaxDatabaseIdKey` | 1 | Highest allocated database ID | +| `DataVersionKey` | 1 | Data format version | + +The high counts for `ScopesPrefixKey`, `RecoveryBlobJournalKey`, and +`ActiveBlobJournalKey` reflect Slack's frequent Redux state persistence — +each save cycle creates a new blob and journals the old one for garbage +collection. + +--- + +## IndexedDB Layer + +### Databases + +Three IndexedDB databases are present: + +| `database_id` | Name | Purpose | +| ------------- | ------------------ | ---------------------------------------------------------------------------- | +| 2 | `reduxPersistence` | Slack's full Redux application state | +| 3 | _(unnamed)_ | Encrypted syncer data (e.g., `syncer.User/{hash}`, `syncer.Document/{hash}`) | +| 10 | _(unnamed)_ | Sundry metadata (e.g., `minChannelUpdated` timestamp) | + +> **Note**: `database_id=0` is used for global IndexedDB metadata records +> (database names, schema version, etc.) and is not an application database. + +### Object Stores + +| `database_id` | `object_store_id` | Store Name | Key Pattern | Storage | +| ------------- | ----------------- | -------------------- | ------------------------------------------ | ------------------------------- | +| 2 | 1 | `reduxPersenceStore` | `persist:slack-client-{TEAM_ID}-{USER_ID}` | External blob (~4-7 MB) | +| 3 | 1 | `{hex-hash}` | `syncer.{Type}/{ID}` | Inline, **encrypted** (AES-GCM) | +| 10 | 1 | `sundryStorage` | `0` | Inline | + +The Redux state store (db=2) contains a single key per team+user combination. +The entire application state is serialized into one large blob. + +### Blob Storage + +When an IndexedDB value exceeds the inline size threshold, Chromium stores it +as an external blob file. The blob path is derived from the blob number: + +``` +{blob_dir}/{database_id}/{blob_number >> 8 :02x}/{blob_number :04x} +``` + +For example, blob number `7808` (hex `0x1e80`) in database `2`: + +``` +https_app.slack.com_0.indexeddb.blob/2/1e/1e80 +``` + +Blobs are **versioned** — each Redux persist cycle allocates a new blob number +and the previous blob is journaled for deletion. Only the latest blob contains +the current state. + +--- + +## Value Encoding + +### Blink IDB Value Wrapper + +Chromium wraps IndexedDB values in a Blink-specific envelope with MIME type +`application/vnd.blink-idb-value-wrapper`. The blob file begins with a +3-byte header: + +| Offset | Value | Meaning | +| ------ | ------ | ------------------------------------- | +| 0 | `0xFF` | Blink serialization tag: VERSION | +| 1 | `0x11` | Pseudo-version: "requires processing" | +| 2 | `0x02` | Compression: Snappy | + +After the header, the remaining bytes are **Snappy-compressed** V8 serialized +data. + +### V8 Serialization + +The decompressed data uses Chrome's V8 serialization format — the same binary +format used by `structuredClone()` and `postMessage()`. It encodes JavaScript +values including: + +- Primitives: `string`, `number`, `boolean`, `null`, `undefined` +- Objects: `{}` → Python `dict` +- Arrays: `[]` → Python `JSArray` (see below) +- Typed arrays, `Date`, `RegExp`, `Map`, `Set`, `ArrayBuffer`, etc. + +`dfindexeddb` deserializes this into Python-native types with a few special +sentinel objects. + +### Sentinel Types + +`dfindexeddb` represents JavaScript values that have no Python equivalent +using sentinel objects: + +| JS Value | dfindexeddb Type | Python `repr` | Notes | +| ----------- | ---------------- | ------------- | ----------------------------------------------------------------------------------- | +| `undefined` | `Undefined` | `Undefined()` | Distinct from `null`. Common on optional message fields (e.g., `subtype`, `files`). | +| `null` | `Null` | `Null()` | Used where Slack explicitly sets `null`. | +| `NaN` | `NaN` | `NaN()` | Rare. | + +**Important**: When checking fields, always handle these types. A message's +`subtype` field is `Undefined()` (not `None`, not missing) when no subtype +applies: + +```python +subtype = msg.get("subtype", "") +if not isinstance(subtype, str): + subtype = "" # Was Undefined() or Null() +``` + +### JSArray Encoding + +JavaScript sparse arrays are encoded as `JSArray` objects with two attributes: + +- **`values`**: A Python list of positional values. Sparse positions are + `Undefined()`. +- **`properties`**: A Python dict mapping string indices to the actual values. + +```python +# JS: ["alice", "bob", "carol"] +# Python: +JSArray( + values=[Undefined(), Undefined(), Undefined()], + properties={0: "alice", 1: "bob", 2: "carol"} +) +``` + +To iterate a `JSArray` as a flat list: + +```python +def jsarray_to_list(arr): + if hasattr(arr, "properties"): + return [arr.properties.get(i) for i in range(len(arr.values))] + return arr # Already a plain list +``` + +--- + +## Redux State Schema + +### Overview + +The Redux state blob contains Slack's entire client-side application state. +It is a single large JavaScript object with ~140 top-level keys. The largest +stores (by serialized size): + +| Key | Size | Entries | Description | +| ---------------- | ------- | ------------- | ----------------------------------------- | +| `messages` | ~44 MB | ~295 channels | Cached message history | +| `channels` | ~1.3 MB | ~583 | Channel metadata | +| `files` | ~1.2 MB | ~267 | File/upload metadata | +| `channelHistory` | ~800 KB | ~1,200 | Pagination / scroll state per channel | +| `members` | ~730 KB | ~351 | User profiles | +| `experiments` | ~310 KB | ~1,527 | Feature flag experiments | +| `reactions` | ~290 KB | ~955 | Emoji reactions on messages | +| `apps` | ~280 KB | ~29 | Installed Slack app metadata | +| `prefs` | ~170 KB | 4 | User preferences (huge, hundreds of keys) | +| `userPrefs` | ~156 KB | ~667 | Additional user preference data | +| `threadSub` | ~135 KB | ~1,120 | Thread subscription state | + +--- + +### messages + +**Path**: `state.messages[channel_id][timestamp]` + +The primary message store. Keyed by channel ID, then by message timestamp. + +``` +messages: { + "C0XXXXXXXXX": { # Channel ID + "1776115292.356529": { ... }, # Message (ts is the key) + "1776117909.325989": { ... }, + ... + }, + ... +} +``` + +#### Message Fields + +| Field | Type | Description | +| ------------------- | ------------------------ | ------------------------------------------------------------------------------------------------------ | +| `ts` | `str` | Message timestamp (unique ID). Unix epoch with microseconds as decimal. | +| `type` | `str` | Always `"message"`. | +| `text` | `str` | Message text content. Contains Slack markup: `<@U123>` for mentions, `` for links. | +| `user` | `str` | User ID of sender (e.g., `"U0XXXXXXXXX"`). | +| `channel` | `str` | Channel ID (e.g., `"C0XXXXXXXXX"`). | +| `subtype` | `str` \| `Undefined` | Message subtype: `"channel_join"`, `"bot_message"`, etc. `Undefined()` for normal messages. | +| `thread_ts` | `str` \| `Undefined` | Parent thread timestamp. Same as `ts` for thread parent messages. `Undefined()` for non-threaded. | +| `reply_count` | `int` | Number of replies (0 for non-parent messages). | +| `reply_users` | `JSArray` \| `Undefined` | User IDs of thread participants. | +| `reply_users_count` | `int` \| `Undefined` | Count of unique repliers. | +| `latest_reply` | `str` \| `Undefined` | Timestamp of latest reply. | +| `_hidden_reply` | `bool` | `True` if this is a thread reply not shown in the channel. | +| `blocks` | `JSArray` \| `Undefined` | Slack Block Kit elements (rich text, sections, images, etc.). | +| `files` | `JSArray` \| `Undefined` | File IDs attached to this message. Values in `properties` are file ID strings (not full file objects). | +| `attachments` | `JSArray` \| `Undefined` | Legacy attachments (links, bot attachments). | +| `client_msg_id` | `str` | Client-generated UUID for the message. | +| `no_display` | `bool` | Whether to hide this message in UI. | +| `_rxn_key` | `str` | Key for looking up reactions: `"message-{ts}-{channel}"`. | +| `slackbot_feels` | `Null` | Slackbot sentiment (always `Null()` in practice). | +| `__meta__` | `dict` | Internal cache metadata: `{"lastUpdatedTs": "..."}`. | +| `parent_user_id` | `str` | User ID of the thread parent author (only on replies). | +| `upload` | `bool` | Present and `True` on file upload messages. | + +--- + +### channels + +**Path**: `state.channels[channel_id]` + +Channel metadata. Includes public channels, private channels, DMs, and MPDMs. + +#### Channel Fields + +| Field | Type | Description | +| ---------------------- | ------- | --------------------------------------------------- | +| `id` | `str` | Channel ID (e.g., `"C0XXXXXXXXX"`). | +| `name` | `str` | Channel display name. | +| `name_normalized` | `str` | Lowercase normalized name. | +| `is_channel` | `bool` | Public channel. | +| `is_group` | `bool` | Private channel (legacy term). | +| `is_im` | `bool` | Direct message. | +| `is_mpim` | `bool` | Multi-party direct message. | +| `is_private` | `bool` | Private (group or MPIM). | +| `is_archived` | `bool` | Channel is archived. | +| `is_general` | `bool` | The `#general` channel. | +| `is_member` | `bool` | Current user is a member. | +| `created` | `float` | Unix timestamp of channel creation. | +| `creator` | `str` | User ID of channel creator. | +| `context_team_id` | `str` | Team ID this channel belongs to. | +| `topic` | `dict` | `{"value": "...", "creator": "...", "last_set": 0}` | +| `purpose` | `dict` | `{"value": "...", "creator": "...", "last_set": 0}` | +| `unread_cnt` | `int` | Unread message count. | +| `unread_highlight_cnt` | `int` | Unread mentions/highlights count. | +| `is_ext_shared` | `bool` | Slack Connect shared channel. | +| `is_org_shared` | `bool` | Shared across org workspaces. | +| `is_frozen` | `bool` | Channel is frozen (read-only). | + +_Plus ~30 additional boolean flags and UI state fields._ + +--- + +### members + +**Path**: `state.members[user_id]` + +User profiles for all visible workspace members. + +#### Member Fields + +| Field | Type | Description | +| --------------------- | ------- | -------------------------------------------------------------------------------------------------- | +| `id` | `str` | User ID (e.g., `"U0XXXXXXXXX"`). | +| `team_id` | `str` | Primary team ID. | +| `name` | `str` | Username (login name). | +| `real_name` | `str` | Full display name. | +| `deleted` | `bool` | Account deactivated. | +| `color` | `str` | Hex color assigned to user. | +| `tz` | `str` | Timezone identifier (e.g., `"America/New_York"`). | +| `tz_label` | `str` | Human-readable timezone name. | +| `tz_offset` | `int` | UTC offset in seconds. | +| `profile` | `dict` | Nested profile with `title`, `phone`, `email`, `image_*` URLs, `status_text`, `status_emoji`, etc. | +| `is_admin` | `bool` | Workspace admin. | +| `is_owner` | `bool` | Workspace owner. | +| `is_bot` | `bool` | Bot account. | +| `is_app_user` | `bool` | App-associated user. | +| `is_restricted` | `bool` | Guest (single-channel or multi-channel). | +| `is_ultra_restricted` | `bool` | Single-channel guest. | +| `updated` | `float` | Last profile update timestamp. | +| `is_self` | `bool` | `True` for the current logged-in user. | + +_Plus `_name_lc`, `_display_name_lc`, etc. for search/sorting._ + +--- + +### reactions + +**Path**: `state.reactions[reaction_key]` + +Keyed by `"message-{ts}-{channel_id}"` (matching the `_rxn_key` field on +messages). + +Each value is a `JSArray` of reaction objects: + +```python +{ + "name": "eyes", # Emoji name + "baseName": "eyes", # Base name (without skin tone) + "count": 2, # Total reaction count + "users": JSArray( # User IDs who reacted + values=[Undefined(), Undefined()], + properties={0: "U0XXXXXXXXX", 1: "U0YYYYYYYYY"} + ) +} +``` + +--- + +### files + +**Path**: `state.files[file_id]` + +File metadata for files visible in the current session. + +#### File Fields + +| Field | Type | Description | +| ------------- | ------- | ----------------------------------------------------------------------------- | +| `id` | `str` | File ID (e.g., `"F0XXXXXXXXX"`). | +| `name` | `str` | Original filename. | +| `title` | `str` | Display title. | +| `mimetype` | `str` | MIME type (e.g., `"image/png"`). | +| `filetype` | `str` | Short type (e.g., `"png"`, `"pdf"`). | +| `size` | `int` | File size in bytes. | +| `user` | `str` | Uploader's user ID. | +| `created` | `float` | Upload timestamp. | +| `url_private` | `str` | Authenticated download URL. | +| `permalink` | `str` | Permanent link to file in Slack. | +| `thumb_*` | `str` | Thumbnail URLs at various sizes (64, 80, 160, 360, 480, 720, 800, 960, 1024). | +| `original_w` | `int` | Original image width. | +| `original_h` | `int` | Original image height. | +| `is_public` | `bool` | Shared to a public channel. | +| `is_external` | `bool` | External file (Google Drive, etc.). | + +> **Note**: File URLs require Slack authentication to access. The `files` +> store in messages contains only file IDs (strings), not full file objects. +> Cross-reference with `state.files[file_id]` for metadata. + +--- + +### bots + +**Path**: `state.bots[bot_id]` + +Bot user metadata. + +| Field | Type | Description | +| --------- | ------- | ---------------------------------------------- | +| `id` | `str` | Bot ID (e.g., `"B0XXXXXXXXX"`). | +| `name` | `str` | Bot display name (e.g., `"MyBot"`). | +| `app_id` | `str` | Associated Slack app ID. | +| `user_id` | `str` | User ID associated with this bot. | +| `icons` | `dict` | Icon URLs: `image_36`, `image_48`, `image_72`. | +| `deleted` | `bool` | Bot is deactivated. | +| `updated` | `float` | Last update timestamp. | +| `team_id` | `str` | Team ID. | + +--- + +### teams + +**Path**: `state.teams[team_id]` + +Workspace/org metadata. + +| Field | Type | Description | +| -------------- | ------- | ---------------------------------------------------- | +| `id` | `str` | Team ID (e.g., `"T0XXXXXXXXX"`). | +| `name` | `str` | Workspace name. | +| `domain` | `str` | Slack subdomain. | +| `url` | `str` | Full workspace URL. | +| `email_domain` | `str` | Email domain for sign-up. | +| `plan` | `str` | Plan type (`"std"`, `"plus"`, `"enterprise"`, etc.). | +| `icon` | `dict` | Workspace icon URLs at various sizes. | +| `date_created` | `float` | Workspace creation timestamp. | +| `prefs` | `dict` | Workspace-level preferences (large, many keys). | + +--- + +### userGroups + +**Path**: `state.userGroups[group_id]` + +User groups (e.g., `@engineering`, `@design`). + +| Field | Type | Description | +| ------------- | --------- | ----------------------------------------------- | +| `id` | `str` | Group ID (e.g., `"S0XXXXXXXXX"`). | +| `name` | `str` | Display name. | +| `handle` | `str` | Mention handle (e.g., `"design"`). | +| `description` | `str` | Group description. | +| `user_count` | `int` | Number of members. | +| `users` | `JSArray` | Member user IDs. | +| `prefs` | `dict` | Contains `channels` JSArray (default channels). | + +--- + +### channelHistory + +**Path**: `state.channelHistory[channel_id]` + +Pagination and fetch state for channel message history. + +| Field | Type | Description | +| ---------------- | ---------------- | ----------------------------------------------------------------------- | +| `reachedStart` | `bool` | Scrolled to the very first message. | +| `reachedEnd` | `bool` \| `Null` | Scrolled to the latest message. | +| `prevReachedEnd` | `bool` | Previously reached end (before new messages arrived). | +| `slices` | `JSArray` | Loaded message timestamp ranges. Each slice has a `timestamps` JSArray. | + +--- + +### allThreads + +**Path**: `state.allThreads` + +Thread view state (the "Threads" sidebar panel). + +| Field | Type | Description | +| ------------- | --------- | ----------------------------------------------------------------------------------------------------------- | +| `threads` | `JSArray` | Thread summaries. Each property has `threadKey` (`"{channel}-{ts}"`), `sortTs`, `hasUnreads`, `isPriority`. | +| `hasMore` | `bool` | More threads available to load. | +| `cursorTs` | `str` | Pagination cursor. | +| `maxTs` | `str` | Most recent thread timestamp. | +| `selectedTab` | `str` | Active tab: `"all"` or `"unreads"`. | + +--- + +### Other Stores + +Stores not detailed above but present in the state: + +| Key | Entries | Description | +| ------------------ | ------- | --------------------------------------------------------------------------------------------------------- | +| `experiments` | ~1,500 | Feature flags and A/B test assignments | +| `prefs` | 4 | User preferences — `user`, `team`, `client`, `features`. The `user` entry alone has 400+ preference keys. | +| `threadSub` | ~1,100 | Thread subscription state per channel+thread | +| `searchResults` | 1 | Last search query, results, and filters | +| `membership` | ~18 | Channel membership maps: `{user_id: {isKnown, isMember}}` | +| `membershipCounts` | ~97 | Channel member counts | +| `channelCursors` | ~256 | Read cursor positions per channel | +| `mutedChannels` | ~14 | Muted channel list | +| `unreadCounts` | ~18 | Unread count state per channel | +| `flannelEmoji` | ~548 | Custom workspace emoji definitions | +| `slashCommand` | 2 | Slash command definitions | +| `channelSections` | ~18 | Sidebar section organization | +| `bootData` | 67 | Initial boot data (team info, feature gates) | + +--- + +## Caveats & Limitations + +1. **Cache only** — Slack only caches **recently viewed** channels and + messages. The IndexedDB does not contain complete workspace history. + +2. **Single blob** — The entire Redux state is one monolithic blob (~4-7 MB + compressed, ~90 MB JSON). There is no way to read individual channels + without decoding the whole thing. + +3. **Lock file** — Slack holds the LevelDB `LOCK` file while running. To + read the data you must either: + - Copy the LevelDB + blob directories and remove the `LOCK` file from + the copy, or + - Parse the raw `.log` and `.ldb` files directly (which `dfindexeddb` + does). + +4. **Blob rotation** — Slack persists state frequently. The blob file changes + every few seconds. Only the **latest** blob (highest modification time) + contains current data. + +5. **Encrypted data** — Database 3 (object store name is a hex hash) + contains AES-GCM encrypted values (syncer data). The encryption key is + not stored in the IndexedDB and these records cannot be decrypted from + disk alone. + +6. **File references** — Messages reference files by ID only (e.g., + `"F0XXXXXXXXX"`), not by the full file object. Cross-reference with + `state.files[file_id]` for metadata and URLs. + +7. **Slack markup** — Message `text` fields contain Slack's markup format: + - User mentions: `<@U0XXXXXXXXX>` + - Channel links: `<#C0XXXXXXXXX|general>` + - URLs: `` + - Emoji: `:thumbsup:` (not Unicode) diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..8da6f7b --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1776067740, + "narHash": "sha256-B35lpsqnSZwn1Lmz06BpwF7atPgFmUgw1l8KAV3zpVQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "7e495b747b51f95ae15e74377c5ce1fe69c1765f", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..a33f146 --- /dev/null +++ b/flake.nix @@ -0,0 +1,129 @@ +# Usage: +# - Shell: `nix develop` +# - Direnv (https://direnv.net/): `.envrc` content of `use flake` +# - Build: `nix build` +# - Run: `nix run` + +{ + description = "Slack Local Reader"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = + { self + , nixpkgs + , flake-utils + , + }: + flake-utils.lib.eachDefaultSystem ( + system: + let + pkgs = nixpkgs.legacyPackages.${system}; + python = pkgs.python312; + + # ── Python Dependency Overrides ────────────────────────────── + # + # dfindexeddb pins python-snappy==0.6.1 and zstd==1.5.5.1. + # nixpkgs ships newer versions, so we build the exact pins + # from PyPI source tarballs. + + pythonPackages = python.pkgs; + + python-snappy = pythonPackages.buildPythonPackage rec { + pname = "python-snappy"; + version = "0.6.1"; + format = "setuptools"; + + src = pkgs.fetchurl { + url = "https://files.pythonhosted.org/packages/98/7a/44a24bad98335b2c72e4cadcdecf79f50197d1bab9f22f863a274f104b96/python-snappy-0.6.1.tar.gz"; + hash = "sha256-tqEHqwYgasxTWdTFYyvZsi1EhwKnmzFpsMYuD7gIuyo="; + }; + + buildInputs = [ pkgs.snappy ]; + + # Tests require snappy test fixtures not present in sdist + doCheck = false; + }; + + zstd-python = pythonPackages.buildPythonPackage rec { + pname = "zstd"; + version = "1.5.5.1"; + format = "setuptools"; + + src = pkgs.fetchurl { + url = "https://files.pythonhosted.org/packages/source/z/zstd/zstd-1.5.5.1.tar.gz"; + hash = "sha256-HvmAq/Dh4HKwKNLXbvlbR2YyZRyWIlzzC2Gcbu9iVnI="; + }; + + # Bundled C sources — no external zstd library needed + doCheck = false; + }; + + dfindexeddb = pythonPackages.buildPythonPackage rec { + pname = "dfindexeddb"; + version = "20260210"; + format = "setuptools"; + + src = pkgs.fetchurl { + url = "https://files.pythonhosted.org/packages/source/d/dfindexeddb/dfindexeddb-20260210.tar.gz"; + hash = "sha256-4ahEe4Lpoh0oqGR6kI7J1HEGfvKVEzu3qQ+3ykgFd/Y="; + }; + + propagatedBuildInputs = [ + python-snappy + zstd-python + ]; + + doCheck = false; + }; + + # ── Slack CLI Package ──────────────────────────────────────── + + slack-cli = pythonPackages.buildPythonApplication { + pname = "slack-cli"; + version = "0.1.0"; + format = "pyproject"; + + src = let + fs = pkgs.lib.fileset; + in + fs.toSource { + root = ./.; + fileset = fs.unions [ + ./pyproject.toml + ./slack_cli + ]; + }; + + build-system = [ pythonPackages.setuptools ]; + + dependencies = [ dfindexeddb ]; + + doCheck = false; + + meta = { + description = "Read Slack messages from local Chromium IndexedDB cache"; + mainProgram = "slack-cli"; + }; + }; + in + { + packages.default = slack-cli; + + devShells.default = pkgs.mkShell { + packages = with pkgs; [ + python + uv + snappy + ]; + + shellHook = '' + export UV_PYTHON_PREFERENCE=only-system + ''; + }; + } + ); +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f0a6f5e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "slack-cli" +version = "0.1.0" +requires-python = ">=3.12" +description = "Read Slack messages from local Chromium IndexedDB cache" +dependencies = ["dfindexeddb"] + +[project.scripts] +slack-cli = "slack_cli.__main__:main" + +[tool.setuptools.packages.find] +include = ["slack_cli*"] diff --git a/scripts/analyze_structure.py b/scripts/analyze_structure.py new file mode 100755 index 0000000..1682fdc --- /dev/null +++ b/scripts/analyze_structure.py @@ -0,0 +1,196 @@ +#!/usr/bin/env -S uv run --python=python3.12 --script +# /// script +# requires-python = ">=3.12" +# dependencies = ["dfindexeddb"] +# /// +"""Analyze Slack IndexedDB Structure + +Dumps the full schema of the IndexedDB: databases, object stores, +record types, and the Redux state top-level keys with sizes/types. +Used for generating documentation. +""" + +import pathlib +import shutil +import tempfile +from collections import Counter + +from dfindexeddb.indexeddb.chromium.blink import V8ScriptValueDecoder +from dfindexeddb.indexeddb.chromium.record import FolderReader + +HOME = pathlib.Path.home() +IDB_BASE = HOME / ( + "Library/Containers/com.tinyspeck.slackmacgap" + "/Data/Library/Application Support/Slack/IndexedDB" +) +LDB_DIR = IDB_BASE / "https_app.slack.com_0.indexeddb.leveldb" +BLOB_DIR = IDB_BASE / "https_app.slack.com_0.indexeddb.blob" + + +def analyze_leveldb(): + """Analyze LevelDB Record Structure""" + print("=" * 70) + print("LEVELDB / INDEXEDDB STRUCTURE") + print("=" * 70) + + # Filesystem Layout + print("\n## Filesystem Layout") + print(f"\nLevelDB dir: {LDB_DIR}") + for p in sorted(LDB_DIR.iterdir()): + size = p.stat().st_size + print(f" {p.name:30s} {size:>12,} bytes") + + print(f"\nBlob dir: {BLOB_DIR}") + for p in sorted(BLOB_DIR.rglob("*")): + if p.is_file(): + size = p.stat().st_size + rel = p.relative_to(BLOB_DIR) + print(f" {str(rel):30s} {size:>12,} bytes") + + # Copy DB to Avoid Lock + tmp = pathlib.Path(tempfile.mkdtemp()) + shutil.copytree(str(LDB_DIR), str(tmp / "db")) + (tmp / "db" / "LOCK").unlink(missing_ok=True) + + # Parse Records + reader = FolderReader(tmp / "db") + key_types = Counter() + db_meta = {} # db_id -> {name, obj_stores} + obj_store_names = {} # (db_id, os_id) -> name + + for rec in reader.GetRecords(load_blobs=False): + kt = type(rec.key).__name__ + key_types[kt] += 1 + + db_id = rec.database_id or 0 + + if kt == "DatabaseNameKey": + dn = getattr(rec.key, "database_name", None) + if dn: + db_meta.setdefault(db_id, {"name": None})["name"] = str(dn) + + if kt == "ObjectStoreMetaDataKey": + md_type = getattr(rec.key, "metadata_type", None) + os_id = getattr(rec.key, "object_store_id", None) + if md_type == 0 and rec.value: + obj_store_names[(db_id, os_id)] = str(rec.value) + + if kt == "ObjectStoreDataKey": + user_key = getattr(rec.key, "encoded_user_key", None) + val = rec.value + blob_size = getattr(val, "blob_size", None) if val else None + version = getattr(val, "version", None) if val else None + key_val = getattr(user_key, "value", None) if user_key else None + os_id = rec.object_store_id + + info = db_meta.setdefault(db_id, {"name": None}) + stores = info.setdefault("obj_stores", {}) + store_info = stores.setdefault(os_id, {"keys": [], "sample_key": None}) + if key_val and not store_info["sample_key"]: + store_info["sample_key"] = str(key_val)[:80] + store_info["blob_size"] = blob_size + store_info["version"] = version + + shutil.rmtree(tmp) + + # Print Databases + print("\n## Databases") + for db_id in sorted(db_meta.keys()): + info = db_meta[db_id] + name = info.get("name", "?") + print(f"\n database_id={db_id}: \"{name}\"") + for (did, osid), osname in sorted(obj_store_names.items()): + if did == db_id: + print(f" object_store_id={osid}: \"{osname}\"") + store_info = info.get("obj_stores", {}).get(osid, {}) + if store_info.get("sample_key"): + print(f" sample_key: {store_info['sample_key']}") + if store_info.get("blob_size"): + print(f" blob_size: {store_info['blob_size']:,}") + + # Print Record Types + print("\n## Record Type Counts") + for kt, count in key_types.most_common(): + print(f" {count:6d} {kt}") + + +def analyze_redux_state(): + """Analyze Redux State Blob Structure""" + print("\n") + print("=" * 70) + print("REDUX STATE BLOB STRUCTURE") + print("=" * 70) + + # Find Blob + blobs = sorted(BLOB_DIR.rglob("*"), key=lambda p: p.stat().st_mtime if p.is_file() else 0) + blob_files = [b for b in blobs if b.is_file()] + if not blob_files: + print("No blob files found!") + return + + blob_path = blob_files[-1] + size = blob_path.stat().st_size + print(f"\nBlob: {blob_path.relative_to(IDB_BASE)} ({size:,} bytes)") + + state = V8ScriptValueDecoder.FromBytes(blob_path.read_bytes()) + + # Top-Level Keys + print("\n## Top-Level Keys (sorted by size)") + entries = [] + for k in sorted(state.keys()): + v = state[k] + size = len(str(v)) + t = type(v).__name__ + child_count = len(v) if isinstance(v, dict) else None + entries.append((size, k, t, child_count)) + + entries.sort(reverse=True) + for size, k, t, child_count in entries: + cc = f" ({child_count} entries)" if child_count is not None else "" + print(f" {size:>12,} chars {k} ({t}){cc}") + + # Detailed Structure of Key Stores + detail_keys = [ + "messages", "channels", "members", "reactions", + "files", "bots", "teams", "userGroups", + "channelHistory", "allThreads", "searchResults", + "prefs", "userPrefs", "membership", + ] + + print("\n## Key Store Schemas") + for store_key in detail_keys: + store = state.get(store_key) + if store is None: + continue + + print(f"\n### {store_key}") + print(f" type: {type(store).__name__}") + + if isinstance(store, dict): + print(f" entry_count: {len(store)}") + + # Find a representative entry + for entry_key, entry_val in store.items(): + if isinstance(entry_val, dict) and len(entry_val) > 3: + print(f" sample_key: \"{entry_key}\"") + print(f" fields:") + for fk, fv in entry_val.items(): + ft = type(fv).__name__ + fval = repr(fv)[:80] + print(f" {fk}: {ft} = {fval}") + break + elif not isinstance(entry_val, dict): + # Nested dict of dicts (e.g., messages -> channel -> ts -> msg) + if isinstance(entry_val, dict): + for inner_key, inner_val in entry_val.items(): + if isinstance(inner_val, dict): + print(f" structure: {store_key}[channel_id][timestamp] -> message") + break + break + elif hasattr(store, "properties"): + print(f" JSArray with {len(store.properties)} properties") + + +if __name__ == "__main__": + analyze_leveldb() + analyze_redux_state() diff --git a/slack_cli/__init__.py b/slack_cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/slack_cli/__main__.py b/slack_cli/__main__.py new file mode 100644 index 0000000..59678b8 --- /dev/null +++ b/slack_cli/__main__.py @@ -0,0 +1,480 @@ +"""Read Slack Messages From Local IndexedDB + +Parses the Chromium IndexedDB backing Slack's desktop app (Mac App Store +version). Finds the Redux state blob, decodes it with dfindexeddb's Blink +V8 deserializer, and prints cached messages. + +Usage: + ./slack-cli.py # Recent messages (all channels) + ./slack-cli.py -c general # Filter by channel name (glob pattern) + ./slack-cli.py -c 'team-*' -c general # Multiple channel filters + ./slack-cli.py -x 'alerts-*' -x 'bot-*' # Exclude channels + ./slack-cli.py -n 50 # Show last 50 messages + ./slack-cli.py -u # Show unread messages only + ./slack-cli.py -u -c general # Unread messages in a specific channel + ./slack-cli.py -s 2h # Messages from the last 2 hours + ./slack-cli.py -s 2026-04-15 # Messages since a specific date + ./slack-cli.py --channels # List channels with message counts + ./slack-cli.py --dump # Dump full Redux state to file +""" + +import argparse +import fnmatch +import json +import os +import re +import sys +from collections import defaultdict +from datetime import datetime, timedelta +from pathlib import Path + +from dfindexeddb.indexeddb.chromium.blink import V8ScriptValueDecoder + +# ─── Constants ─────────────────────────────────────────────────────────────── + +SLACK_IDB_BASE = Path.home() / ( + "Library/Containers/com.tinyspeck.slackmacgap" + "/Data/Library/Application Support/Slack/IndexedDB" +) +BLOB_DIR = SLACK_IDB_BASE / "https_app.slack.com_0.indexeddb.blob" + + +# ─── Helpers ───────────────────────────────────────────────────────────────── + + +def find_latest_blob() -> Path | None: + """Find the Latest Blob File in the IndexedDB Blob Directory + + Slack stores a single large blob containing the entire Redux state. + The blob number increments on every persist, so the latest file is + what we want. + """ + blob_files = [b for b in BLOB_DIR.rglob("*") if b.is_file()] + if not blob_files: + return None + blob_files.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return blob_files[0] + + +def decode_blob(blob_path: Path) -> dict: + """Decode a Blink IDB Value Wrapper Blob""" + raw = blob_path.read_bytes() + return V8ScriptValueDecoder.FromBytes(raw) + + +def ts_to_datetime(ts: str) -> datetime: + """Convert Slack Timestamp to Datetime""" + try: + return datetime.fromtimestamp(float(ts)) + except (ValueError, TypeError, OSError): + return datetime.min + + +def resolve_user(state: dict, user_id: str) -> str: + """Resolve a Slack User ID to Display Name""" + if not isinstance(user_id, str): + return str(user_id) + members = state.get("members", {}) + if not isinstance(members, dict): + return user_id + member = members.get(user_id) + if not isinstance(member, dict): + return user_id + + # Slack Redux State Stores Name Fields at Top Level + name = member.get("display_name") or member.get("real_name") or member.get("name") + if name: + return name + + # Also Check Nested Profile + profile = member.get("profile", {}) + if isinstance(profile, dict): + name = profile.get("display_name") or profile.get("real_name") + if name: + return name + + return user_id + + +def parse_since(value: str) -> datetime: + """Parse a --since Value Into a Datetime + + Supports relative durations (e.g. 30m, 2h, 3d) and absolute + dates/datetimes (e.g. 2026-04-15, '2026-04-15 10:00'). + """ + # Relative Duration: + m = re.fullmatch(r"(\d+)([mhd])", value.strip()) + if m: + amount = int(m.group(1)) + unit = m.group(2) + delta = {"m": timedelta(minutes=amount), "h": timedelta( + hours=amount), "d": timedelta(days=amount)}[unit] + return datetime.now() - delta + + # Absolute Datetime + for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M", "%Y-%m-%d"): + try: + return datetime.strptime(value.strip(), fmt) + except ValueError: + continue + + raise argparse.ArgumentTypeError( + f"invalid --since value: {value!r} " + f"(expected e.g. 30m, 2h, 3d, 2026-04-15, '2026-04-15 10:00')" + ) + + +def build_channel_names(state: dict) -> dict[str, str]: + """Build Channel ID -> Name Lookup""" + channels_store = state.get("channels", {}) + names = {} + if isinstance(channels_store, dict): + for cid, cdata in channels_store.items(): + if isinstance(cdata, dict): + names[cid] = cdata.get("name", cdata.get("name_normalized", cid)) + return names + + +def get_workspace_domain(state: dict) -> str | None: + """Get the Primary Workspace Domain From Teams Store""" + teams = state.get("teams", {}) + if not isinstance(teams, dict): + return None + for _tid, team in teams.items(): + if isinstance(team, dict) and team.get("url"): + return team.get("domain") + # Fallback to First Team With a Domain + for _tid, team in teams.items(): + if isinstance(team, dict) and team.get("domain"): + return team.get("domain") + return None + + +def slack_url(domain: str, channel_id: str, ts: str, thread_ts: str | None = None) -> str: + """Build a Slack Deep Link URL + + Message timestamps become URL path segments by removing the dot: + 1776219948.820859 -> p1776219948820859 + Thread replies additionally include ?thread_ts=...&cid=... + """ + ts_url = "p" + ts.replace(".", "") + url = f"https://{domain}.slack.com/archives/{channel_id}/{ts_url}" + if thread_ts and thread_ts != ts: + url += f"?thread_ts={thread_ts}&cid={channel_id}" + return url + + +# ─── Commands ──────────────────────────────────────────────────────────────── + + +def build_read_cursors(state: dict) -> dict[str, float]: + """Build Channel ID -> Read Cursor Timestamp Lookup + + The channelCursors store maps channel IDs to the timestamp of the + last-read message. Messages with ts > cursor are unread. + """ + cursors = state.get("channelCursors", {}) + result = {} + if isinstance(cursors, dict): + for cid, ts in cursors.items(): + try: + result[cid] = float(ts) + except (ValueError, TypeError): + continue + return result + + +def channel_matches(name: str, patterns: list[str]) -> bool: + """Check if a Channel Name Matches Any of the Glob Patterns""" + name_lower = name.lower() + return any(fnmatch.fnmatch(name_lower, p.lower()) for p in patterns) + + +def cmd_messages( + state: dict, + include_channels: list[str] | None, + exclude_channels: list[str] | None, + count: int, + unread_only: bool = False, + since: datetime | None = None, +): + """Print Messages From Slack State""" + messages_store = state.get("messages", {}) + channel_names = build_channel_names(state) + read_cursors = build_read_cursors(state) if unread_only else {} + domain = get_workspace_domain(state) + + # Collect All Messages + all_msgs = [] + for cid, channel_msgs in messages_store.items(): + if not isinstance(channel_msgs, dict): + continue + ch_name = channel_names.get(cid, cid) + + # Apply Channel Include / Exclude Filters + if include_channels and not channel_matches(ch_name, include_channels): + continue + if exclude_channels and channel_matches(ch_name, exclude_channels): + continue + + # Determine Read Cursor for Unread Filtering + cursor = read_cursors.get(cid, 0.0) if unread_only else 0.0 + + # Convert --since Datetime to Unix Timestamp for Comparison + since_ts = since.timestamp() if since else 0.0 + + for ts, msg in channel_msgs.items(): + if not isinstance(msg, dict): + continue + + # Skip Messages Before Cutoff (Unread Cursor or --since) + try: + ts_f = float(ts) + except (ValueError, TypeError): + continue + if unread_only and ts_f <= cursor: + continue + if since and ts_f < since_ts: + continue + + text = msg.get("text", "") + if not text or not isinstance(text, str): + continue + + user_id = msg.get("user", msg.get("bot_id", "?")) + user_name = resolve_user(state, user_id) + subtype = msg.get("subtype", "") + + # dfindexeddb Represents JS undefined as an Undefined object + if not isinstance(subtype, str): + subtype = "" + + # Resolve Thread Timestamp + thread_ts = msg.get("thread_ts", "") + if not isinstance(thread_ts, str): + thread_ts = "" + + all_msgs.append({ + "channel": ch_name, + "channel_id": cid, + "ts": ts, + "thread_ts": thread_ts or None, + "dt": ts_to_datetime(ts), + "user": user_name, + "user_id": user_id, + "text": text, + "subtype": subtype, + }) + + # Sort by Timestamp (Most Recent Last) + all_msgs.sort(key=lambda m: m["dt"]) + + # Group Thread Replies Under Their Parents + threads: dict[str, list[dict]] = defaultdict(list) + top_level: list[dict] = [] + for msg in all_msgs: + thread_ts = msg["thread_ts"] + if thread_ts and thread_ts != msg["ts"]: + threads[thread_ts].append(msg) + else: + top_level.append(msg) + + # Build Display List — Each Top-Level Entry Followed by Its Replies + # item = (msg | None, depth) — None msg means orphan thread header + # depth 0 = root (top-level message or orphan thread header) + # depth 1 = thread reply + display: list[tuple[dict | None, int]] = [] + seen_parents: set[str] = {m["ts"] for m in top_level} + + for msg in top_level: + display.append((msg, 0)) + for reply in threads.get(msg["ts"], []): + display.append((reply, 1)) + + # Collect Orphan Thread Groups — Replies Whose Parent Isn't Shown + orphan_groups: list[tuple[str, list[dict]]] = [] + for thread_ts, replies in threads.items(): + if thread_ts not in seen_parents: + orphan_groups.append((thread_ts, replies)) + + # Sort Orphan Groups by Earliest Reply Timestamp + orphan_groups.sort(key=lambda g: g[1][0]["dt"]) + + # Append Each Orphan Group With a Header Placeholder + for thread_ts, replies in orphan_groups: + # Use First Reply's Channel and ID for the Header + header = { + "channel": replies[0]["channel"], + "channel_id": replies[0]["channel_id"], + "thread_ts": thread_ts, + } + display.append((None, 0)) # Placeholder for header + for reply in replies: + display.append((reply, 1)) + # Patch the Placeholder With Header Info + display[-len(replies) - 1] = (header, 0) + + # Print Last N Messages (Count depth=0 Entries Only) + if len(display) > 0: + # Walk Backwards to Find the Cutoff That Includes `count` Roots + roots_seen = 0 + start_idx = len(display) + for i in range(len(display) - 1, -1, -1): + if display[i][1] == 0: + roots_seen += 1 + if roots_seen > count: + break + start_idx = i + + visible = display[start_idx:] + else: + visible = [] + + ts_color = "\033[90m" + ch_color = "\033[36m" + user_color = "\033[33m" + reset = "\033[0m" + + bar_str = f"\033[90m│\033[0m" + + for idx, (msg, depth) in enumerate(visible): + # Peek Ahead to See if Next Item Is Still a Thread Reply + next_is_reply = (idx + 1 < len(visible) and visible[idx + 1][1] > 0) + + # Orphan Thread Header + if msg is not None and "dt" not in msg: + header_line = f"\033[90m↳ thread in {ch_color}#{msg['channel']}{reset}" + if domain and msg.get("channel_id") and msg.get("thread_ts"): + link = slack_url(domain, msg["channel_id"], msg["thread_ts"]) + header_line += f" \033[90m{link}{reset}" + print(header_line) + continue + + dt_str = msg["dt"].strftime("%Y-%m-%d %H:%M:%S") + subtype_tag = f" [{msg['subtype']}]" if msg["subtype"] else "" + bar = f"{bar_str} " if depth > 0 else "" + + # Build Slack Link for Top-Level Messages + link_suffix = "" + if domain and depth == 0: + link = slack_url(domain, msg["channel_id"], msg["ts"]) + link_suffix = f" {ts_color}{link}{reset}" + + print( + f"{bar}{ts_color}{dt_str}{reset} " + f"{ch_color}#{msg['channel']}{reset} " + f"{user_color}{msg['user']}{reset}{subtype_tag}{link_suffix}" + ) + + # Indent Message Text (Prefix Every Line for Multi-Line Messages) + text_prefix = f"{bar_str} " if depth > 0 else " " + for line in msg["text"][:500].split("\n"): + print(f"{text_prefix}{line}") + + # Connecting Bar Between Thread Messages + if next_is_reply: + print(bar_str) + else: + print() + + root_count = sum(1 for _, d in visible if d == 0) + total_roots = sum(1 for _, d in display if d == 0) + label = "unread messages" if unread_only else "messages" + if since: + label += f" since {since.strftime('%Y-%m-%d %H:%M')}" + print(f"--- Showing {root_count} of {total_roots} {label} ---") + + +def cmd_channels(state: dict): + """List Channels With Message Counts""" + messages_store = state.get("messages", {}) + channel_names = build_channel_names(state) + + counts = {} + for cid, channel_msgs in messages_store.items(): + if not isinstance(channel_msgs, dict): + continue + ch_name = channel_names.get(cid, cid) + msg_count = sum( + 1 for v in channel_msgs.values() + if isinstance(v, dict) and v.get("text") + ) + if msg_count > 0: + counts[ch_name] = msg_count + + for name, c in sorted(counts.items(), key=lambda x: -x[1]): + print(f" {c:5d} #{name}") + + print(f"\n--- {len(counts)} channels with cached messages ---") + + +def cmd_dump(state: dict, output: str): + """Dump Full Redux State to File""" + with open(output, "w") as f: + json.dump(state, f, indent=2, default=str, ensure_ascii=False) + size_mb = os.path.getsize(output) / 1024 / 1024 + print(f"Dumped {size_mb:.1f}MB to {output}") + + +# ─── Main ──────────────────────────────────────────────────────────────────── + + +def main(): + parser = argparse.ArgumentParser( + description="Read Slack messages from local IndexedDB" + ) + parser.add_argument( + "-c", "--channel", action="append", dest="channels", + help="Include channels matching glob pattern (repeatable, e.g. -c 'team-*' -c general)" + ) + parser.add_argument( + "-x", "--exclude", action="append", dest="exclude_channels", + help="Exclude channels matching glob pattern (repeatable, e.g. -x 'alerts-*' -x 'bot-*')" + ) + parser.add_argument( + "-n", "--count", type=int, default=30, + help="Number of messages to show (default: 30)" + ) + parser.add_argument( + "-u", "--unread", action="store_true", + help="Show only unread messages (based on read cursor position)" + ) + parser.add_argument( + "-s", "--since", type=parse_since, + help="Show messages since time (e.g. 30m, 2h, 3d, 2026-04-15, '2026-04-15 10:00')" + ) + parser.add_argument( + "--channels", action="store_true", dest="list_channels", + help="List channels with message counts" + ) + parser.add_argument( + "--dump", nargs="?", const="slack_state.json", + help="Dump full state to JSON file" + ) + args = parser.parse_args() + + # Find and Decode the Blob + blob_path = find_latest_blob() + if not blob_path: + print( + "No blob files found. Is Slack installed and has it been opened?", + file=sys.stderr, + ) + sys.exit(1) + + size_mb = blob_path.stat().st_size / 1024 / 1024 + print(f"Reading blob: {blob_path} ({size_mb:.1f}MB)", file=sys.stderr) + state = decode_blob(blob_path) + + # Dispatch Command + if args.dump: + cmd_dump(state, args.dump) + elif args.list_channels: + cmd_channels(state) + else: + cmd_messages(state, args.channels, args.exclude_channels, + args.count, args.unread, args.since) + + +if __name__ == "__main__": + main()