Module emblaze.neighbors
Defines model classes to compute and store nearest neighbor sets that can be
inherited across different Embedding
objects.
Classes
class NeighborSet (neighbor_objects)
-
Expand source code
class NeighborSet: """ An object representing a serializable collection of Neighbors objects. """ def __init__(self, neighbor_objects): super().__init__() self._neighbors = neighbor_objects def __getitem__(self, slice): return self._neighbors[slice] def __setitem__(self, slice, val): self._neighbors[slice] = val def __len__(self): return len(self._neighbors) def __iter__(self): return iter(self._neighbors) def __eq__(self, other): if isinstance(other, NeighborSet): return len(other) == len(self) and all(n1 == n2 for n1, n2 in zip(self, other)) elif isinstance(other, Neighbors): return all(n1 == other for n1 in self) return False def __ne__(self, other): return not (self == other) def to_json(self, compressed=True, num_neighbors=None): """ Serializes the list of Neighbors objects to JSON. """ return [n.to_json(compressed=compressed, num_neighbors=num_neighbors) for n in self] @classmethod def from_json(cls, data): return [Neighbors.from_json(d) for d in data] def identical(self): """Returns True if all Neighbors objects within this NeighborSet are equal to each other.""" if len(self) == 0: return True return all(n == self[0] for n in self)
An object representing a serializable collection of Neighbors objects.
Static methods
def from_json(data)
Methods
def identical(self)
-
Expand source code
def identical(self): """Returns True if all Neighbors objects within this NeighborSet are equal to each other.""" if len(self) == 0: return True return all(n == self[0] for n in self)
Returns True if all Neighbors objects within this NeighborSet are equal to each other.
def to_json(self, compressed=True, num_neighbors=None)
-
Expand source code
def to_json(self, compressed=True, num_neighbors=None): """ Serializes the list of Neighbors objects to JSON. """ return [n.to_json(compressed=compressed, num_neighbors=num_neighbors) for n in self]
Serializes the list of Neighbors objects to JSON.
class Neighbors (values, ids=None, metric='euclidean', n_neighbors=100, clf=None)
-
Expand source code
class Neighbors: """ An object representing a serializable set of nearest neighbors within an embedding. The `Neighbors` object simply stores a matrix of integer IDs, where rows correspond to points in the embedding and columns are IDs of neighbors in order of proximity to each point. These neighbors can be accessed through the `values` property. """ def __init__(self, values, ids=None, metric='euclidean', n_neighbors=100, clf=None): """ This constructor should typically not be used - use [`Neighbors.compute`](#emblaze.neighbors.Neighbors.compute) instead. Args: values: Matrix of n x D high-dimensional positions ids: If supplied, a list of IDs for the points in the matrix metric: Distance metric to use to compute neighbors (can be any supported metric for `sklearn.neighbors.NearestNeighbors`) n_neighbors: Number of neighbors to compute and save clf: The `NearestNeighbors` object (only used when loading a `Neighbors` object from file) """ super().__init__() self.values = values self.ids = ids self._id_index = {id: i for i, id in enumerate(self.ids)} self.metric = metric self.n_neighbors = n_neighbors self.clf = clf @classmethod def compute(cls, pos, ids=None, metric='euclidean', n_neighbors=100): """ Compute a nearest-neighbor set using a given metric. Args: pos: Matrix of n x D high-dimensional positions ids: If supplied, a list of IDs for the points in the matrix metric: Distance metric to use to compute neighbors (can be any supported metric for `sklearn.neighbors.NearestNeighbors`) n_neighbors: Number of neighbors to compute and save Returns: An initialized `Neighbors` object containing computed neighbors. """ ids = ids if ids is not None else np.arange(len(pos)) neighbor_clf = NearestNeighbors(metric=metric, n_neighbors=n_neighbors + 1).fit(pos) _, neigh_indexes = neighbor_clf.kneighbors(pos) return cls(ids[neigh_indexes[:,1:]], ids=ids, metric=metric, n_neighbors=n_neighbors, clf=neighbor_clf) def index(self, id_vals): """ Returns the index(es) of the given IDs. """ if isinstance(id_vals, (list, np.ndarray, set)): return [self._id_index[int(id_val)] for id_val in id_vals] else: return self._id_index[int(id_vals)] def __getitem__(self, ids): """ids can be a single ID or a sequence of IDs""" if ids is None: return self.values return self.values[self.index(ids)] def __eq__(self, other): if isinstance(other, NeighborSet): return other == self if not isinstance(other, Neighbors): return False return np.allclose(self.ids, other.ids) and np.allclose(self.values, other.values) def __ne__(self, other): return not (self == other) def __len__(self): return len(self.values) def calculate_neighbors(self, pos, return_distance=True, n_neighbors=None): if self.clf is None: raise ValueError( ("Cannot compute neighbors because the Neighbors was not " "initialized with a neighbor classifier - was it deserialized " "from JSON without saving the original coordinates or " "concatenated to another Neighbors?")) neigh_dists, neigh_indexes = self.clf.kneighbors(pos, n_neighbors=n_neighbors or self.n_neighbors) if return_distance: return neigh_dists, neigh_indexes return neigh_indexes def concat(self, other): """Concatenates the two Neighbors together, discarding the original classifier.""" assert not (set(self.ids.tolist()) & set(other.ids.tolist())), "Cannot concatenate Neighbors objects with overlapping ID values" assert self.metric == other.metric, "Cannot concatenate Neighbors objects with different metrics" return Neighbors( np.concatenate(self.values, other.values), ids=np.concatenate(self.ids, other.ids), metric=self.metric, n_neighbors = max(self.n_neighbors, other.n_neighbors) ) def to_json(self, compressed=True, num_neighbors=None): """Serializes the neighbors to a JSON object.""" result = {} result["metric"] = self.metric result["n_neighbors"] = self.n_neighbors neighbors = self.values if num_neighbors is not None: neighbors = neighbors[:,:min(num_neighbors, neighbors.shape[1])] if compressed: result["_format"] = "compressed" # Specify the type name that will be used to encode the point IDs. # This is important because the highlight array takes up the bulk # of the space when transferring to file/widget. dtype, type_name = choose_integer_type(self.ids) result["_idtype"] = type_name result["_length"] = len(self) result["ids"] = encode_numerical_array(self.ids, dtype) result["neighbors"] = encode_numerical_array(neighbors.flatten(), astype=dtype, interval=neighbors.shape[1]) else: result["_format"] = "expanded" result["neighbors"] = {} indexes = self.index(self.ids) for id_val, index in zip(self.ids, indexes): result["neighbors"][id_val] = neighbors[index].tolist() return result @classmethod def from_json(cls, data): if data.get("_format", "expanded") == "compressed": dtype = np.dtype(data["_idtype"]) ids = decode_numerical_array(data["ids"], dtype) neighbors = decode_numerical_array(data["neighbors"], dtype) else: neighbor_dict = data["neighbors"] try: ids = [int(id_val) for id_val in list(neighbor_dict.keys())] neighbor_dict = {int(k): v for k, v in neighbor_dict.items()} except: ids = list(neighbor_dict.keys()) ids = sorted(ids) neighbors = np.array([neighbor_dict[id_val] for id_val in ids]) return cls(neighbors, ids=ids, metric=data["metric"], n_neighbors=data["n_neighbors"])
An object representing a serializable set of nearest neighbors within an embedding. The
Neighbors
object simply stores a matrix of integer IDs, where rows correspond to points in the embedding and columns are IDs of neighbors in order of proximity to each point. These neighbors can be accessed through thevalues
property.This constructor should typically not be used - use
Neighbors.compute
instead.Args
values
- Matrix of n x D high-dimensional positions
ids
- If supplied, a list of IDs for the points in the matrix
metric
- Distance metric to use to compute neighbors (can be any supported
metric for
sklearn.neighbors.NearestNeighbors
) n_neighbors
- Number of neighbors to compute and save
clf
- The
NearestNeighbors
object (only used when loading aNeighbors
object from file)
Static methods
def compute(pos, ids=None, metric='euclidean', n_neighbors=100)
-
Compute a nearest-neighbor set using a given metric.
Args
pos
- Matrix of n x D high-dimensional positions
ids
- If supplied, a list of IDs for the points in the matrix
metric
- Distance metric to use to compute neighbors (can be any supported
metric for
sklearn.neighbors.NearestNeighbors
) n_neighbors
- Number of neighbors to compute and save
Returns
An initialized
Neighbors
object containing computed neighbors. def from_json(data)
Methods
def calculate_neighbors(self, pos, return_distance=True, n_neighbors=None)
-
Expand source code
def calculate_neighbors(self, pos, return_distance=True, n_neighbors=None): if self.clf is None: raise ValueError( ("Cannot compute neighbors because the Neighbors was not " "initialized with a neighbor classifier - was it deserialized " "from JSON without saving the original coordinates or " "concatenated to another Neighbors?")) neigh_dists, neigh_indexes = self.clf.kneighbors(pos, n_neighbors=n_neighbors or self.n_neighbors) if return_distance: return neigh_dists, neigh_indexes return neigh_indexes
def concat(self, other)
-
Expand source code
def concat(self, other): """Concatenates the two Neighbors together, discarding the original classifier.""" assert not (set(self.ids.tolist()) & set(other.ids.tolist())), "Cannot concatenate Neighbors objects with overlapping ID values" assert self.metric == other.metric, "Cannot concatenate Neighbors objects with different metrics" return Neighbors( np.concatenate(self.values, other.values), ids=np.concatenate(self.ids, other.ids), metric=self.metric, n_neighbors = max(self.n_neighbors, other.n_neighbors) )
Concatenates the two Neighbors together, discarding the original classifier.
def index(self, id_vals)
-
Expand source code
def index(self, id_vals): """ Returns the index(es) of the given IDs. """ if isinstance(id_vals, (list, np.ndarray, set)): return [self._id_index[int(id_val)] for id_val in id_vals] else: return self._id_index[int(id_vals)]
Returns the index(es) of the given IDs.
def to_json(self, compressed=True, num_neighbors=None)
-
Expand source code
def to_json(self, compressed=True, num_neighbors=None): """Serializes the neighbors to a JSON object.""" result = {} result["metric"] = self.metric result["n_neighbors"] = self.n_neighbors neighbors = self.values if num_neighbors is not None: neighbors = neighbors[:,:min(num_neighbors, neighbors.shape[1])] if compressed: result["_format"] = "compressed" # Specify the type name that will be used to encode the point IDs. # This is important because the highlight array takes up the bulk # of the space when transferring to file/widget. dtype, type_name = choose_integer_type(self.ids) result["_idtype"] = type_name result["_length"] = len(self) result["ids"] = encode_numerical_array(self.ids, dtype) result["neighbors"] = encode_numerical_array(neighbors.flatten(), astype=dtype, interval=neighbors.shape[1]) else: result["_format"] = "expanded" result["neighbors"] = {} indexes = self.index(self.ids) for id_val, index in zip(self.ids, indexes): result["neighbors"][id_val] = neighbors[index].tolist() return result
Serializes the neighbors to a JSON object.