Module emblaze.utils
Functions
def affine_to_matrix(t)
-
Expand source code
def affine_to_matrix(t): """ Returns a 3x3 matrix representing the transformation matrix. """ return np.array([ [t.a, t.b, t.c], [t.d, t.e, t.f], [t.g, t.h, t.i] ])
Returns a 3x3 matrix representing the transformation matrix.
def affine_transform(transform, points)
-
Expand source code
def affine_transform(transform, points): """ Transforms a set of N x 2 points using the given Affine object. """ reshaped_points = np.vstack([points.T, np.ones((1, points.shape[0]))]) transformed = np.dot(affine_to_matrix(transform), reshaped_points) return transformed.T[:,:2] # pylint: disable=unsubscriptable-object
Transforms a set of N x 2 points using the given Affine object.
def choose_integer_type(values)
-
Expand source code
def choose_integer_type(values): """ Chooses the best integer type (i.e. np.(u)int(8|16|32)) for the given set of values. Returns the dtype and its name. """ min_val = values.min() max_val = values.max() rng = max_val - min_val if min_val < 0: if rng > 2 ** 32 - 1: return np.int64, "i8" elif rng > 2 ** 16 - 1: return np.int32, "i4" elif rng > 2 ** 8 - 1: return np.int16, "i2" return np.int8, "i1" elif rng > 2 ** 32 - 1: return np.uint64, "u8" elif rng > 2 ** 16 - 1: return np.uint32, "u4" elif rng > 2 ** 8 - 1: return np.uint16, "u2" return np.uint8, "u1"
Chooses the best integer type (i.e. np.(u)int(8|16|32)) for the given set of values. Returns the dtype and its name.
def decode_numerical_array(obj, astype=numpy.float32)
-
Expand source code
def decode_numerical_array(obj, astype=np.float32): """ Decodes the given compressed dict into an array of the given dtype. The dict should contain a 'values' key (base64 string) and optionally a 'positions' key (base64 string to be turned into an int32 array, defining the shape of a 2d matrix) or an 'interval' key (integer defining the number of columns in the 2d matrix). """ values = np.frombuffer(base64.decodebytes(obj["values"].encode('ascii')), dtype=astype) if "positions" in obj: positions = np.frombuffer(base64.decodebytes(obj["positions"].encode('ascii')), dtype=np.int32) deltas = positions[1:] - positions[:-1] assert np.allclose(deltas, deltas[0]), "cannot currently decode numerical arrays with non-standard positions array" values = values.reshape(-1, deltas[0]) elif "interval" in obj: values = values.reshape(-1, obj["interval"]) return values
Decodes the given compressed dict into an array of the given dtype. The dict should contain a 'values' key (base64 string) and optionally a 'positions' key (base64 string to be turned into an int32 array, defining the shape of a 2d matrix) or an 'interval' key (integer defining the number of columns in the 2d matrix).
def decode_object_array(obj)
-
Expand source code
def decode_object_array(obj): """ Decodes the given object's 'values' key into a JSON object. """ return json.loads(base64.b64decode(obj["values"].encode('ascii')))
Decodes the given object's 'values' key into a JSON object.
def encode_numerical_array(arr, astype=numpy.float32, positions=None, interval=None)
-
Expand source code
def encode_numerical_array(arr, astype=np.float32, positions=None, interval=None): """ Encodes the given numpy array into a base64 representation for fast transfer to the widget frontend. The array will be encoded as a sequence of numbers with type 'astype'. If positions is not None, it should be a numpy array of positions at which the array for each ID *ends*. For example, if there are ten IDs and ten numbers in the array for each ID, the positions array would be [10, 20, ..., 90, 100]. If interval is not None, it is passed into the result object directly (and signifies the same as positions, but with a regularly spaced interval). """ # TODO support saving arrays as numerical sequence metadata # sequence_info = _detect_numerical_sequence(arr) # if sequence_info is not None: # result = { ""} if not arr.flags['C_CONTIGUOUS']: arr = arr.copy(order='C') result = { "values": base64.b64encode(arr.astype(astype)).decode('ascii') } if positions is not None: result["positions"] = base64.b64encode(positions.astype(np.int32)).decode('ascii') if interval is not None: result["interval"] = interval return result
Encodes the given numpy array into a base64 representation for fast transfer to the widget frontend. The array will be encoded as a sequence of numbers with type 'astype'.
If positions is not None, it should be a numpy array of positions at which the array for each ID ends. For example, if there are ten IDs and ten numbers in the array for each ID, the positions array would be [10, 20, …, 90, 100].
If interval is not None, it is passed into the result object directly (and signifies the same as positions, but with a regularly spaced interval).
def encode_object_array(arr)
-
Expand source code
def encode_object_array(arr): """ Encodes the given array as a base64 string of a JSON string. """ if isinstance(arr, np.ndarray): arr = arr.tolist() return { "values": base64.b64encode(json.dumps(standardize_json(arr)).encode("utf-8")).decode('ascii') }
Encodes the given array as a base64 string of a JSON string.
def inverse_intersection(seqs1, seqs2, mask_ids, outer)
-
Expand source code
@jit(nopython=True) def inverse_intersection(seqs1, seqs2, mask_ids, outer): """ Computes the inverse intersection size of the two lists of sets. Args: seqs1: A list of iterables seqs2: Another list of iterables - must be the same length as seqs1 mask_ids: Iterable containing objects that should be EXCLUDED if outer is True, and INCLUDED if outer is False outer: Determines the behavior of mask_ids Returns: A numpy array of inverse intersection sizes between each element in seqs1 and seqs2. """ distances = np.zeros(len(seqs1)) mask_ids = set(mask_ids) for i in range(len(seqs1)): set1 = set([n for n in seqs1[i] if (n in mask_ids) != outer]) set2 = set([n for n in seqs2[i] if (n in mask_ids) != outer]) num_intersection = len(set1 & set2) if len(set1) or len(set2): distances[i] = 1 / (1 + num_intersection) return distances
Computes the inverse intersection size of the two lists of sets.
Args
seqs1
- A list of iterables
seqs2
- Another list of iterables - must be the same length as seqs1
mask_ids
- Iterable containing objects that should be EXCLUDED if outer is True, and INCLUDED if outer is False
outer
- Determines the behavior of mask_ids
Returns
A numpy array of inverse intersection sizes between each element in seqs1 and seqs2.
def matrix_to_affine(mat)
-
Expand source code
def matrix_to_affine(mat): """ Returns an Affine transformation object from the given 3x3 matrix. """ return Affine(*(mat.flatten()[:6]))
Returns an Affine transformation object from the given 3x3 matrix.
def projection_standardizer(emb)
-
Expand source code
def projection_standardizer(emb): """Returns an affine transformation to translate an embedding to the centroid of the given set of points.""" return Affine.translation(*(-emb.mean(axis=0)[:2]))
Returns an affine transformation to translate an embedding to the centroid of the given set of points.
def standardize_json(o, round_digits=4)
-
Expand source code
def standardize_json(o, round_digits=4): """ Produces a JSON-compliant object by replacing numpy types with system types and rounding floats to save space. """ if isinstance(o, (float, np.float32, np.float64)): return round(float(o), round_digits) if isinstance(o, (np.int64, np.int32, np.uint8)): return int(o) if isinstance(o, dict): return {standardize_json(k, round_digits): standardize_json(v, round_digits) for k, v in o.items()} if isinstance(o, (list, tuple)): return [standardize_json(x, round_digits) for x in o] return o
Produces a JSON-compliant object by replacing numpy types with system types and rounding floats to save space.
Classes
class DataType
-
Expand source code
class DataType: """Types of data, e.g. categorical vs continuous.""" CATEGORICAL = "categorical" CONTINUOUS = "continuous"
Types of data, e.g. categorical vs continuous.
Class variables
var CATEGORICAL
var CONTINUOUS
class Field
-
Expand source code
class Field: """Standardized field names for embeddings and projections. These data can all be versioned within a ColumnarData object.""" POSITION = "position" COLOR = "color" RADIUS = "r" ALPHA = "alpha" # Thumbnail fields NAME = "name" DESCRIPTION = "description"
Standardized field names for embeddings and projections. These data can all be versioned within a ColumnarData object.
Class variables
var ALPHA
var COLOR
var DESCRIPTION
var NAME
var POSITION
var RADIUS
class PreviewMode
-
Expand source code
class PreviewMode: """Ways of calculating preview lines.""" PROJECTION_SIMILARITY = "projectionNeighborSimilarity" NEIGHBOR_SIMILARITY = "neighborSimilarity"
Ways of calculating preview lines.
Class variables
var NEIGHBOR_SIMILARITY
var PROJECTION_SIMILARITY
class ProjectionTechnique
-
Expand source code
class ProjectionTechnique: """Names of projection techniques.""" UMAP = "umap" TSNE = "tsne" ALIGNED_UMAP = "aligned-umap" PCA = "pca"
Names of projection techniques.
Class variables
var ALIGNED_UMAP
var PCA
var TSNE
var UMAP
class SidebarPane
-
Expand source code
class SidebarPane: """Indexes of sidebar panes in the widget.""" CURRENT = 1 SAVED = 2 RECENT = 3 SUGGESTED = 4
Indexes of sidebar panes in the widget.
Class variables
var CURRENT
var RECENT
var SAVED
var SUGGESTED