OpenMOSS
diff --git a/‎server/app.py‎
Lines changed: 13 additions & 1288 deletions b/‎server/app.py‎
Lines changed: 13 additions & 1288 deletions
diff --git a/‎server/config.py‎
Lines changed: 11 additions & 0 deletions b/‎server/config.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎server/logic/__init__.py‎ b/‎server/logic/__init__.py‎
diff --git a/‎server/logic/loaders.py‎
Lines changed: 44 additions & 0 deletions b/‎server/logic/loaders.py‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎server/logic/samples.py‎
Lines changed: 139 additions & 0 deletions b/‎server/logic/samples.py‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎server/routers/__init__.py‎ b/‎server/routers/__init__.py‎
@@ -0,0 +1,11 @@
+import os
+
+import torch
+
+from lm_saes.config import MongoDBConfig
+from lm_saes.database import MongoClient
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+client = MongoClient(MongoDBConfig())
+sae_series = os.environ.get("SAE_SERIES", "default")
+tokenizer_only = os.environ.get("TOKENIZER_ONLY", "false").lower() == "true"
@@ -0,0 +1,44 @@
+from functools import lru_cache
+
+from datasets import Dataset
+
+from lm_saes.abstract_sae import AbstractSparseAutoEncoder
+from lm_saes.backend import LanguageModel
+from lm_saes.config import BaseSAEConfig
+from lm_saes.resource_loaders import load_dataset_shard, load_model
+from server.config import client, device, sae_series, tokenizer_only
+from server.utils.common import synchronized
+
+
+@synchronized
+@lru_cache(maxsize=8)
+def get_model(*, name: str) -> LanguageModel:
+    """Load and cache a language model."""
+    cfg = client.get_model_cfg(name)
+    if cfg is None:
+        raise ValueError(f"Model {name} not found")
+    cfg.tokenizer_only = tokenizer_only
+    cfg.device = device
+    return load_model(cfg)
+
+
+@synchronized
+@lru_cache(maxsize=16)
+def get_dataset(*, name: str, shard_idx: int = 0, n_shards: int = 1) -> Dataset:
+    """Load and cache a dataset shard."""
+    cfg = client.get_dataset_cfg(name)
+    assert cfg is not None, f"Dataset {name} not found"
+    return load_dataset_shard(cfg, shard_idx, n_shards)
+
+
+@synchronized
+@lru_cache(maxsize=8)
+def get_sae(*, name: str) -> AbstractSparseAutoEncoder:
+    """Load and cache a sparse autoencoder."""
+    path = client.get_sae_path(name, sae_series)
+    assert path is not None, f"SAE {name} not found"
+    cfg = BaseSAEConfig.from_pretrained(path)
+    cfg.device = device
+    sae = AbstractSparseAutoEncoder.from_config(cfg)
+    sae.eval()
+    return sae
@@ -0,0 +1,139 @@
+from typing import Any, Generator
+
+import numpy as np
+
+from lm_saes.database import FeatureAnalysisSampling
+from server.logic.loaders import get_dataset, get_model
+
+
+def extract_samples(
+    sampling: FeatureAnalysisSampling,
+    start: int | None = None,
+    end: int | None = None,
+    visible_range: int | None = None,
+) -> list[dict[str, Any]]:
+    def process_sample(
+        *,
+        sparse_feature_acts: tuple[np.ndarray, np.ndarray, np.ndarray | None, np.ndarray | None],
+        context_idx: int,
+        dataset_name: str,
+        model_name: str,
+        shard_idx: int | None = None,
+        n_shards: int | None = None,
+    ):
+        model = get_model(name=model_name)
+        data = get_dataset(name=dataset_name, shard_idx=shard_idx, n_shards=n_shards)[context_idx]
+
+        origins = model.trace({k: [v] for k, v in data.items()})[0]
+
+        (
+            feature_acts_indices,
+            feature_acts_values,
+            z_pattern_indices,
+            z_pattern_values,
+        ) = sparse_feature_acts
+
+        assert origins is not None and feature_acts_indices is not None and feature_acts_values is not None, (
+            "Origins and feature acts must not be None"
+        )
+
+        token_offset = 0
+        if visible_range is not None:  # Drop tokens before and after the highest activating token
+            if len(feature_acts_indices) == 0:
+                max_feature_act_index = 0
+            else:
+                max_feature_act_index = int(feature_acts_indices[np.argmax(feature_acts_values).item()].item())
+
+            feature_acts_mask = np.logical_and(
+                feature_acts_indices > max_feature_act_index - visible_range,
+                feature_acts_indices < max_feature_act_index + visible_range,
+            )
+            feature_acts_indices = feature_acts_indices[feature_acts_mask]
+            feature_acts_values = feature_acts_values[feature_acts_mask]
+
+            if z_pattern_indices is not None and z_pattern_values is not None:
+                z_pattern_mask = np.logical_and(
+                    z_pattern_indices > max_feature_act_index - visible_range,
+                    z_pattern_indices < max_feature_act_index + visible_range,
+                ).all(axis=0)
+                z_pattern_indices = z_pattern_indices[:, z_pattern_mask]
+                z_pattern_values = z_pattern_values[z_pattern_mask]
+
+            token_offset = max(0, max_feature_act_index - visible_range)
+
+            origins = origins[token_offset : max_feature_act_index + visible_range]
+
+        text_offset = None
+        if "text" in data:
+            text_ranges = [origin["range"] for origin in origins if origin is not None and origin["key"] == "text"]
+            if text_ranges:
+                max_text_origin = max(text_ranges, key=lambda x: x[1])
+                data["text"] = data["text"][: max_text_origin[1]]
+                if visible_range is not None:
+                    text_offset = min(text_ranges, key=lambda x: x[0])[0]
+                    data["text"] = data["text"][text_offset:]
+
+        return {
+            **data,
+            "token_offset": token_offset,
+            "text_offset": text_offset,
+            "origins": origins,
+            "feature_acts_indices": feature_acts_indices,
+            "feature_acts_values": feature_acts_values,
+            "z_pattern_indices": z_pattern_indices,
+            "z_pattern_values": z_pattern_values,
+        }
+
+    def index_select(
+        indices: np.ndarray,
+        values: np.ndarray,
+        i: int,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Select i-th sample from sparse tensor indices and values."""
+        mask = indices[0] == i
+        return indices[1:, mask], values[mask]
+
+    def process_sparse_feature_acts(
+        feature_acts_indices: np.ndarray,
+        feature_acts_values: np.ndarray,
+        z_pattern_indices: np.ndarray | None,
+        z_pattern_values: np.ndarray | None,
+        start: int,
+        end: int,
+    ) -> Generator[tuple[np.ndarray, np.ndarray, np.ndarray | None, np.ndarray | None], Any, None]:
+        for i in range(start, end):
+            feature_acts_indices_i, feature_acts_values_i = index_select(feature_acts_indices, feature_acts_values, i)
+            if z_pattern_indices is not None and z_pattern_values is not None:
+                z_pattern_indices_i, z_pattern_values_i = index_select(z_pattern_indices, z_pattern_values, i)
+            else:
+                z_pattern_indices_i, z_pattern_values_i = None, None
+            yield feature_acts_indices_i[0], feature_acts_values_i, z_pattern_indices_i, z_pattern_values_i
+
+    start = start if start is not None else 0
+    end = end if end is not None else len(sampling.context_idx)
+
+    return [
+        process_sample(
+            sparse_feature_acts=sparse_feature_acts,
+            context_idx=context_idx,
+            dataset_name=dataset_name,
+            model_name=model_name,
+            shard_idx=shard_idx,
+            n_shards=n_shards,
+        )
+        for sparse_feature_acts, context_idx, dataset_name, model_name, shard_idx, n_shards in zip(
+            process_sparse_feature_acts(
+                sampling.feature_acts_indices,
+                sampling.feature_acts_values,
+                sampling.z_pattern_indices,
+                sampling.z_pattern_values,
+                start,
+                end,
+            ),
+            sampling.context_idx[start:end],
+            sampling.dataset_name[start:end],
+            sampling.model_name[start:end],
+            sampling.shard_idx[start:end] if sampling.shard_idx is not None else [0] * (end - start),
+            sampling.n_shards[start:end] if sampling.n_shards is not None else [1] * (end - start),
+        )
+    ]