From c2f146df414587325ed6dc6d0d11dca5ccad7e49 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 5 Jun 2024 09:11:12 -0600 Subject: [PATCH 1/2] calc_ogu_cell_counts_per_cm2_of_sample_for_qiita --- qp_woltka/__init__.py | 7 +++- qp_woltka/woltka.py | 85 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/qp_woltka/__init__.py b/qp_woltka/__init__.py index cc43f06..408e45b 100644 --- a/qp_woltka/__init__.py +++ b/qp_woltka/__init__.py @@ -9,7 +9,7 @@ from qiita_client import QiitaPlugin, QiitaCommand from .woltka import (woltka, woltka_syndna, calculate_cell_counts, - calculate_rna_copy_counts) + calculate_rna_copy_counts, calculate_cell_counts_skin) from qp_woltka.util import generate_woltka_dflt_params, get_dbs, plugin_details from os import environ @@ -87,6 +87,11 @@ calculate_cell_counts, req_params, opt_params, outputs, dflt_param_set) plugin.register_command(calculate_cell_counts_cmd) +calculate_cell_counts_skin_cmd = QiitaCommand( + 'Calculate Cell Counts Skin', "Calculate cell counts per-genome for skin", + calculate_cell_counts_skin, req_params, opt_params, outputs, + dflt_param_set) +plugin.register_command(calculate_cell_counts_skin_cmd) # MTX calculate RNA copy counts req_params = { diff --git a/qp_woltka/woltka.py b/qp_woltka/woltka.py index 2815b30..f57cecb 100644 --- a/qp_woltka/woltka.py +++ b/qp_woltka/woltka.py @@ -17,6 +17,7 @@ from pysyndna import fit_linear_regression_models_for_qiita from pysyndna import calc_ogu_cell_counts_per_g_of_sample_for_qiita from pysyndna import calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita +from pysyndna import calc_ogu_cell_counts_per_cm2_of_sample_for_qiita from qp_woltka.util import search_by_filename @@ -634,6 +635,90 @@ def calculate_cell_counts(qclient, job_id, parameters, out_dir): return True, ainfo, "" +def calculate_cell_counts_skin(qclient, job_id, parameters, out_dir): + """Run calc_ogu_cell_counts_per_cm2_of_sample_for_qiita + + Parameters + ---------- + qclient : tgp.qiita_client.QiitaClient + The Qiita server client + job_id : str + The job id + parameters : dict + The parameter values to wolka syndna + out_dir : str + The path to the job's output directory + + Returns + ------- + bool, list, str + The results of the job + """ + error = '' + # let's get the syndna_id and prep in a single go + syndna_id = parameters['SynDNA hits'] + syndna_files, prep = qclient.artifact_and_preparation_files(syndna_id) + if 'log' not in syndna_files.keys(): + error = ("No logs found, are you sure you selected the correct " + "artifact for 'SynDNA hits'?") + else: + + lin_regress_by_sample_id_fp = [f for f in syndna_files['log'] + if 'lin_regress_by_sample_id' in f] + if not lin_regress_by_sample_id_fp: + error = ("No 'lin_regress_by_sample_id' log found, are you sure " + " you selected the correct artifact for 'SynDNA hits'?") + else: + lin_regress_by_sample_id_fp = lin_regress_by_sample_id_fp[0] + + # for per_genome_id let's do it separately so we can also ge the + # sample information + per_genome_id = parameters['Woltka per-genome'] + ainfo = qclient.get("/qiita_db/artifacts/%s/" % per_genome_id) + aparams = ainfo['processing_parameters'] + ogu_fp = ainfo['files']['biom'][0]['filepath'] + + if 'Database' not in aparams or not ogu_fp.endswith('none.biom'): + error = ("The selected 'Woltka per-genome' artifact doesn't " + "look like one, did you select the correct file?") + else: + ogu_counts_per_sample = load_table(ogu_fp) + + db_files = _process_database_files(aparams['Database']) + ogu_lengths_fp = db_files["length.map"] + + sample_info = qclient.get( + '/qiita_db/prep_template/%s/data/?sample_information=true' + % ainfo['prep_information'][0]) + sample_info = pd.DataFrame.from_dict( + sample_info['data'], orient='index') + sample_info.reset_index(names='sample_name', inplace=True) + + if error: + return False, None, error + + try: + output = calc_ogu_cell_counts_per_cm2_of_sample_for_qiita( + sample_info, prep, lin_regress_by_sample_id_fp, + ogu_counts_per_sample, ogu_lengths_fp, + int(parameters['read_length']), float(parameters['min_coverage']), + float(parameters['min_rsquared'])) + except Exception as e: + return False, None, str(e) + + log_fp = f'{out_dir}/cell_counts.log' + with open(log_fp, 'w') as f: + f.write(output['calc_cell_counts_log']) + biom_fp = f'{out_dir}/cell_counts.biom' + with biom_open(biom_fp, 'w') as f: + output['cell_count_biom'].to_hdf5(f, f"Cell Counts - {job_id}") + ainfo = [ + ArtifactInfo( + 'Cell counts', 'BIOM', [(biom_fp, 'biom'), (log_fp, 'log')])] + + return True, ainfo, "" + + def calculate_rna_copy_counts(qclient, job_id, parameters, out_dir): """Run calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita From 34375bb4b65b42fc7a2e2addca35398b7d0c6528 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 5 Jun 2024 09:20:51 -0600 Subject: [PATCH 2/2] extent test_calculate_cell_counts --- qp_woltka/tests/test_woltka.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/qp_woltka/tests/test_woltka.py b/qp_woltka/tests/test_woltka.py index 6f5383b..8aa9559 100644 --- a/qp_woltka/tests/test_woltka.py +++ b/qp_woltka/tests/test_woltka.py @@ -18,7 +18,8 @@ from qp_woltka import plugin from qp_woltka.woltka import ( woltka_to_array, woltka, woltka_syndna_to_array, woltka_syndna, - calculate_cell_counts, calculate_rna_copy_counts) + calculate_cell_counts, calculate_rna_copy_counts, + calculate_cell_counts_skin) class WoltkaTests(PluginTestCase): @@ -557,6 +558,11 @@ def test_woltka_syndna_to_array(self): self.assertCountEqual(ainfo, exp) def test_calculate_cell_counts(self): + """ + Notes: + calculate_cell_counts and calculate_cell_counts_skin are almost + the same method so we are going to test both here + """ params = {'SynDNA hits': 5, 'Woltka per-genome': 6, 'min_coverage': 1, 'read_length': 150, 'min_rsquared': 0.8} @@ -568,6 +574,11 @@ def test_calculate_cell_counts(self): success, ainfo, msg = calculate_cell_counts( self.qclient, job_id, params, out_dir) self.assertFalse(success) + self.assertEqual(msg, "No logs found, are you sure you selected the " + "correct artifact for 'SynDNA hits'?") + success, ainfo, msg = calculate_cell_counts_skin( + self.qclient, job_id, params, out_dir) + self.assertFalse(success) self.assertEqual(msg, "No logs found, are you sure you selected the " "correct artifact for 'SynDNA hits'?") @@ -609,6 +620,12 @@ def test_calculate_cell_counts(self): success, ainfo, msg = calculate_cell_counts( self.qclient, job_id, params, out_dir) self.assertFalse(success) + self.assertEqual(msg, "The selected 'Woltka per-genome' artifact " + "doesn't look like one, did you select the correct " + "file?") + success, ainfo, msg = calculate_cell_counts_skin( + self.qclient, job_id, params, out_dir) + self.assertFalse(success) self.assertEqual(msg, "The selected 'Woltka per-genome' artifact " "doesn't look like one, did you select the correct " "file?")