Module emblaze.utils
Functions
- def affine_to_matrix(t)
- 
Expand source codedef affine_to_matrix(t): """ Returns a 3x3 matrix representing the transformation matrix. """ return np.array([ [t.a, t.b, t.c], [t.d, t.e, t.f], [t.g, t.h, t.i] ])Returns a 3x3 matrix representing the transformation matrix. 
- def affine_transform(transform, points)
- 
Expand source codedef affine_transform(transform, points): """ Transforms a set of N x 2 points using the given Affine object. """ reshaped_points = np.vstack([points.T, np.ones((1, points.shape[0]))]) transformed = np.dot(affine_to_matrix(transform), reshaped_points) return transformed.T[:,:2] # pylint: disable=unsubscriptable-objectTransforms a set of N x 2 points using the given Affine object. 
- def choose_integer_type(values)
- 
Expand source codedef choose_integer_type(values): """ Chooses the best integer type (i.e. np.(u)int(8|16|32)) for the given set of values. Returns the dtype and its name. """ min_val = values.min() max_val = values.max() rng = max_val - min_val if min_val < 0: if rng > 2 ** 32 - 1: return np.int64, "i8" elif rng > 2 ** 16 - 1: return np.int32, "i4" elif rng > 2 ** 8 - 1: return np.int16, "i2" return np.int8, "i1" elif rng > 2 ** 32 - 1: return np.uint64, "u8" elif rng > 2 ** 16 - 1: return np.uint32, "u4" elif rng > 2 ** 8 - 1: return np.uint16, "u2" return np.uint8, "u1"Chooses the best integer type (i.e. np.(u)int(8|16|32)) for the given set of values. Returns the dtype and its name. 
- def decode_numerical_array(obj, astype=numpy.float32)
- 
Expand source codedef decode_numerical_array(obj, astype=np.float32): """ Decodes the given compressed dict into an array of the given dtype. The dict should contain a 'values' key (base64 string) and optionally a 'positions' key (base64 string to be turned into an int32 array, defining the shape of a 2d matrix) or an 'interval' key (integer defining the number of columns in the 2d matrix). """ values = np.frombuffer(base64.decodebytes(obj["values"].encode('ascii')), dtype=astype) if "positions" in obj: positions = np.frombuffer(base64.decodebytes(obj["positions"].encode('ascii')), dtype=np.int32) deltas = positions[1:] - positions[:-1] assert np.allclose(deltas, deltas[0]), "cannot currently decode numerical arrays with non-standard positions array" values = values.reshape(-1, deltas[0]) elif "interval" in obj: values = values.reshape(-1, obj["interval"]) return valuesDecodes the given compressed dict into an array of the given dtype. The dict should contain a 'values' key (base64 string) and optionally a 'positions' key (base64 string to be turned into an int32 array, defining the shape of a 2d matrix) or an 'interval' key (integer defining the number of columns in the 2d matrix). 
- def decode_object_array(obj)
- 
Expand source codedef decode_object_array(obj): """ Decodes the given object's 'values' key into a JSON object. """ return json.loads(base64.b64decode(obj["values"].encode('ascii')))Decodes the given object's 'values' key into a JSON object. 
- def encode_numerical_array(arr, astype=numpy.float32, positions=None, interval=None)
- 
Expand source codedef encode_numerical_array(arr, astype=np.float32, positions=None, interval=None): """ Encodes the given numpy array into a base64 representation for fast transfer to the widget frontend. The array will be encoded as a sequence of numbers with type 'astype'. If positions is not None, it should be a numpy array of positions at which the array for each ID *ends*. For example, if there are ten IDs and ten numbers in the array for each ID, the positions array would be [10, 20, ..., 90, 100]. If interval is not None, it is passed into the result object directly (and signifies the same as positions, but with a regularly spaced interval). """ # TODO support saving arrays as numerical sequence metadata # sequence_info = _detect_numerical_sequence(arr) # if sequence_info is not None: # result = { ""} if not arr.flags['C_CONTIGUOUS']: arr = arr.copy(order='C') result = { "values": base64.b64encode(arr.astype(astype)).decode('ascii') } if positions is not None: result["positions"] = base64.b64encode(positions.astype(np.int32)).decode('ascii') if interval is not None: result["interval"] = interval return resultEncodes the given numpy array into a base64 representation for fast transfer to the widget frontend. The array will be encoded as a sequence of numbers with type 'astype'. If positions is not None, it should be a numpy array of positions at which the array for each ID ends. For example, if there are ten IDs and ten numbers in the array for each ID, the positions array would be [10, 20, …, 90, 100]. If interval is not None, it is passed into the result object directly (and signifies the same as positions, but with a regularly spaced interval). 
- def encode_object_array(arr)
- 
Expand source codedef encode_object_array(arr): """ Encodes the given array as a base64 string of a JSON string. """ if isinstance(arr, np.ndarray): arr = arr.tolist() return { "values": base64.b64encode(json.dumps(standardize_json(arr)).encode("utf-8")).decode('ascii') }Encodes the given array as a base64 string of a JSON string. 
- def inverse_intersection(seqs1, seqs2, mask_ids, outer)
- 
Expand source code@jit(nopython=True) def inverse_intersection(seqs1, seqs2, mask_ids, outer): """ Computes the inverse intersection size of the two lists of sets. Args: seqs1: A list of iterables seqs2: Another list of iterables - must be the same length as seqs1 mask_ids: Iterable containing objects that should be EXCLUDED if outer is True, and INCLUDED if outer is False outer: Determines the behavior of mask_ids Returns: A numpy array of inverse intersection sizes between each element in seqs1 and seqs2. """ distances = np.zeros(len(seqs1)) mask_ids = set(mask_ids) for i in range(len(seqs1)): set1 = set([n for n in seqs1[i] if (n in mask_ids) != outer]) set2 = set([n for n in seqs2[i] if (n in mask_ids) != outer]) num_intersection = len(set1 & set2) if len(set1) or len(set2): distances[i] = 1 / (1 + num_intersection) return distancesComputes the inverse intersection size of the two lists of sets. Args- seqs1
- A list of iterables
- seqs2
- Another list of iterables - must be the same length as seqs1
- mask_ids
- Iterable containing objects that should be EXCLUDED if outer is True, and INCLUDED if outer is False
- outer
- Determines the behavior of mask_ids
 ReturnsA numpy array of inverse intersection sizes between each element in seqs1 and seqs2. 
- def matrix_to_affine(mat)
- 
Expand source codedef matrix_to_affine(mat): """ Returns an Affine transformation object from the given 3x3 matrix. """ return Affine(*(mat.flatten()[:6]))Returns an Affine transformation object from the given 3x3 matrix. 
- def projection_standardizer(emb)
- 
Expand source codedef projection_standardizer(emb): """Returns an affine transformation to translate an embedding to the centroid of the given set of points.""" return Affine.translation(*(-emb.mean(axis=0)[:2]))Returns an affine transformation to translate an embedding to the centroid of the given set of points. 
- def standardize_json(o, round_digits=4)
- 
Expand source codedef standardize_json(o, round_digits=4): """ Produces a JSON-compliant object by replacing numpy types with system types and rounding floats to save space. """ if isinstance(o, (float, np.float32, np.float64)): return round(float(o), round_digits) if isinstance(o, (np.int64, np.int32, np.uint8)): return int(o) if isinstance(o, dict): return {standardize_json(k, round_digits): standardize_json(v, round_digits) for k, v in o.items()} if isinstance(o, (list, tuple)): return [standardize_json(x, round_digits) for x in o] return oProduces a JSON-compliant object by replacing numpy types with system types and rounding floats to save space. 
Classes
- class DataType
- 
Expand source codeclass DataType: """Types of data, e.g. categorical vs continuous.""" CATEGORICAL = "categorical" CONTINUOUS = "continuous"Types of data, e.g. categorical vs continuous. Class variables- var CATEGORICAL
- var CONTINUOUS
 
- class Field
- 
Expand source codeclass Field: """Standardized field names for embeddings and projections. These data can all be versioned within a ColumnarData object.""" POSITION = "position" COLOR = "color" RADIUS = "r" ALPHA = "alpha" # Thumbnail fields NAME = "name" DESCRIPTION = "description"Standardized field names for embeddings and projections. These data can all be versioned within a ColumnarData object. Class variables- var ALPHA
- var COLOR
- var DESCRIPTION
- var NAME
- var POSITION
- var RADIUS
 
- class PreviewMode
- 
Expand source codeclass PreviewMode: """Ways of calculating preview lines.""" PROJECTION_SIMILARITY = "projectionNeighborSimilarity" NEIGHBOR_SIMILARITY = "neighborSimilarity"Ways of calculating preview lines. Class variables- var NEIGHBOR_SIMILARITY
- var PROJECTION_SIMILARITY
 
- class ProjectionTechnique
- 
Expand source codeclass ProjectionTechnique: """Names of projection techniques.""" UMAP = "umap" TSNE = "tsne" ALIGNED_UMAP = "aligned-umap" PCA = "pca"Names of projection techniques. Class variables- var ALIGNED_UMAP
- var PCA
- var TSNE
- var UMAP
 
- class SidebarPane
- 
Expand source codeclass SidebarPane: """Indexes of sidebar panes in the widget.""" CURRENT = 1 SAVED = 2 RECENT = 3 SUGGESTED = 4Indexes of sidebar panes in the widget. Class variables- var CURRENT
- var RECENT
- var SAVED
- var SUGGESTED