Simple workflow for identifiying clones

import celltag_tools as ct
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')
import scipy
import pkg_resources

data_path_1 = pkg_resources.resource_filename("celltag_tools", "data/parse_1.txt.gz")
data_path_2 = pkg_resources.resource_filename("celltag_tools", "data/parse_2.txt.gz")
ct_obj = ct.tl.read_celltag(data_path_2, starcode_path="../../starcode/", assay='RNA',
                   allowlist_path="../../18N-multi-v1-allowlist.csv", inplace=True)
processing: /Users/kjindal/Documents/single_cell/celltag_tools/celltag_tools/data/parse_2.txt.gz
Total filtered CellTag Reads: 4468269
CellTag Sequencing saturation:  91.92850743766769
running starcode-v1.4 (last revised 2021-09-22) with 4 threads
reading input files
raw format detected
sorting
progress: 100.00%
spheres clustering
Creating CellTagData object and setting ct_reads, thresholds, and seq_sat
ct.tl.create_allow_mtx(ct_obj)
ct_obj.allow_mtx['mtx'], ct_obj.allow_mtx['cells'], ct_obj.allow_mtx['celltags']
(<74575x20202 sparse matrix of type '<class 'numpy.int64'>'
 	with 149745 stored elements in Compressed Sparse Row format>,
 array(['AAACCCAAGAGCATCG-1', 'AAACCCAAGATCACCT-1', 'AAACCCAAGATCGCTT-1',
        ..., 'TTTGTTGGTTACCCAA-1', 'TTTGTTGGTTCATCTT-1',
        'TTTGTTGGTTTGCAGT-1'], dtype='<U18'),
 array(['AAAGTAAACTAACAGCTATGTGGCACGC', 'AAAGTAAACTATTAGGTTTGGTACACGG',
        'AAAGTAAACTGAAAGAGATGATTCATAT', ...,
        'TTTGTTTTCTTTAAGGACTGTTGCATTC', 'TTTGTTTTCTTTAAGGTATGGCACATTT',
        'TTTGTTTTCTTTAAGTCGTGCCTCACTG'], dtype='<U28'))
ct.tl.create_bin_mtx(ct_obj, bin_th=1)
ct.tl.create_metric_mtx(ct_obj, met_lower=0)
Using 0 as lower and 25 as upper thresholds
ct_obj.metric_mtx['mtx'], ct_obj.metric_mtx['cells'], ct_obj.metric_mtx['celltags']
(<9949x10246 sparse matrix of type '<class 'numpy.int64'>'
 	with 17223 stored elements in Compressed Sparse Row format>,
 array(['AAACCCAAGAGCATCG-1', 'AAACCCAAGATCGCTT-1', 'AAACCCACAAGATGTA-1',
        ..., 'TTTGTTGCAGCAGATG-1', 'TTTGTTGCAGTGTGGA-1',
        'TTTGTTGGTGCATTTG-1'], dtype='<U18'),
 array(['AAAGTAAACTAACAGCTATGTGGCACGC', 'AAAGTAAACTATTAGGTTTGGTACACGG',
        'AAAGTAAACTGAAAGAGATGATTCATAT', ...,
        'TTTGTTTTCTGTAAGTGTTGGGTCACGC', 'TTTGTTTTCTTGTAGGATTGCACCAGTT',
        'TTTGTTTTCTTTAAGGACTGTTGCATTC'], dtype='<U28'))
ct.tl.call_clones(ct_obj)
Calculating Jaccard similarity
Identifying clones
ct.pl.diagnostic_plots(ct_obj, 'metric')
_images/e4f9164ca33cd3a852b2b293c27b2e6248a142e48367268b052492faeaae98e4.png
ct_obj.save("celltag_obj_reprogramming.pkl")