add config to plot all features #1

epigen · Jun 22, 2024 · 5dade5d · 5dade5d
1 parent 7258418
commit 5dade5d
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 16 deletions.
diff --git a/config/config.yaml b/config/config.yaml
@@ -91,8 +91,9 @@ scatterplot2d:
     size: 1
     alpha: 1
 
-# specify features of interest, which values from the data, will be highlgihted in the 2D/3D plots
+# specify features of interest. these features from the data, will be highlighted in the 2D/3D plots
 # motivated by bioinformatics highlighting expression levels of marker genes (eg: ['PTPRC'])
+# use keyword ['ALL'] to plot all features. WARNING: Only useful for relatively low dimensional data, a plot is generated for each feature and method.
 # if not used leave empty []
-features_to_plot: ['pixel_0_0','pixel_0_1','pixel_0_2','pixel_0_3']
+features_to_plot: ['ALL'] #['pixel_0_0','pixel_0_1','pixel_0_2','pixel_0_3']
 
diff --git a/workflow/rules/visualization.smk b/workflow/rules/visualization.smk
@@ -5,7 +5,7 @@ rule prep_feature_plot:
     input:
         unpack(get_sample_paths),
     output:
-        os.path.join(result_path,'{sample}','metadata_features.csv'),
+        metadata_features = os.path.join(result_path,'{sample}','metadata_features.csv'),
     resources:
         mem_mb=config.get("mem", "16000"),
     threads: config.get("threads", 1)

diff --git a/workflow/scripts/subset_data.py b/workflow/scripts/subset_data.py
@@ -8,19 +8,13 @@
 #### configurations
 
 # ipnuts
-data_path = snakemake.input["data"] #"/nobackup/lab_bock/projects/macroIC/results/AKsmall/condition_24h_cytokines/counts/CORRECTED_RNA.csv"
+data_path = snakemake.input["data"]
 # outputs
-result_data_path = snakemake.output[0] #"/nobackup/lab_bock/projects/macroIC/results/Lee2020NatGenet/unsupervised_analysis/merged_NORMALIZED/metadata_features.csv"
-
-result_dir = os.path.dirname(result_data_path)
+metadata_features_path = snakemake.output["metadata_features"]
 
 # parameters
-samples_by_features = int(snakemake.params['samples_by_features']) # 0
-features_to_plot = set(snakemake.params["features_to_plot"]) # set(['FCN1', 'TGFBR1', 'TGFBR2', 'TNFRSF1A','IL6R', 'IFNGR1', 'IFNGR2', 'IFNAR1', 'IFNG', 'IFNB1', 'IL6', 'TNF', 'TGFB1', 'TGFB2'])
-
-# make directory if not existing
-if not os.path.exists(result_dir):
-    os.makedirs(result_dir, exist_ok=True)
+samples_by_features = int(snakemake.params['samples_by_features'])
+features_to_plot = set(snakemake.params["features_to_plot"])
 
 ### load data
 
@@ -30,14 +24,18 @@
 else:
     data = pd.read_csv(data_path, index_col=0).T
 
-### check overlap with columns & subset data
-features_to_plot = list(features_to_plot.intersection(set(data.columns)))
+### check if "ALL" features should be plotted and overlap with columns & subset data
+if features_to_plot == {"ALL"}:
+    features_to_plot = list(data.columns)
+else:
+    features_to_plot = list(features_to_plot.intersection(set(data.columns)))
 
+# subset data
 if len(features_to_plot)!=0:
     data = data.loc[:,features_to_plot]
 else:
     print("requested features to plot are not in the provided data, first 10 features will be plotted instead")
     data = data.iloc[:,:10]
 
 # save data
-data.to_csv(result_data_path)
+data.to_csv(metadata_features_path)