Skip to content

Commit

Permalink
Use bright colour for non-consentigs in cleaned graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
rrwick committed Jan 20, 2025
1 parent 52d0987 commit dd8dcb3
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 54 deletions.
2 changes: 1 addition & 1 deletion src/clean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ pub fn clean(in_gfa: PathBuf, out_gfa: PathBuf, remove: Option<String>, duplicat
duplicate_tigs(&mut graph, &duplicate);
}
merge_graph(&mut graph);
graph.save_gfa(&out_gfa, &vec![]).unwrap();
graph.save_gfa(&out_gfa, &vec![], true).unwrap();
finished_message(&out_gfa);
}

Expand Down
2 changes: 1 addition & 1 deletion src/cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@ fn save_cluster_gfa(sequences: &[Sequence], cluster_num: u16, gfa_lines: &Vec<St
}
cluster_graph.remove_zero_depth_unitigs();
merge_linear_paths(&mut cluster_graph, &cluster_seqs);
cluster_graph.save_gfa(&out_gfa, &cluster_seqs).unwrap();
cluster_graph.save_gfa(&out_gfa, &cluster_seqs, false).unwrap();
}


Expand Down
2 changes: 1 addition & 1 deletion src/combine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ fn combine_clusters(in_gfas: &Vec<PathBuf>, combined_gfa: &Path, combined_fasta:
let circ = if unitig.is_isolated_and_circular() { " circular=true".to_string() }
else { "".to_string() };
let depth_tag = format!("\tDP:f:{:.2}", unitig.depth);
let mut colour_tag = unitig.colour_tag();
let mut colour_tag = unitig.colour_tag(true);
if colour_tag.is_empty() {
colour_tag = "\tCL:z:orangered".to_string();
}
Expand Down
2 changes: 1 addition & 1 deletion src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub fn compress(assemblies_dir: PathBuf, autocycler_dir: PathBuf, k_size: u32, t
simplify_unitig_graph(&mut unitig_graph, &sequences);
let out_gfa = autocycler_dir.join("input_assemblies.gfa");
let out_yaml = autocycler_dir.join("input_assemblies.yaml");
unitig_graph.save_gfa(&out_gfa, &sequences).unwrap();
unitig_graph.save_gfa(&out_gfa, &sequences, false).unwrap();
save_metrics(&mut metrics, assembly_count, &sequences, &unitig_graph, &out_yaml);
finished_message(start_time, out_gfa, out_yaml);
}
Expand Down
8 changes: 4 additions & 4 deletions src/graph_simplification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::rc::Rc;
use crate::misc::{reverse_complement, strand};
use crate::position::Position;
use crate::sequence::Sequence;
use crate::unitig::{Unitig, UnitigStrand};
use crate::unitig::{Unitig, UnitigStrand, UnitigType};
use crate::unitig_graph::UnitigGraph;


Expand Down Expand Up @@ -438,8 +438,8 @@ fn merge_path(graph: &mut UnitigGraph, path: &Vec<UnitigStrand>, new_unitig_numb
..Default::default()
};

if path.iter().any(|p| p.anchor()) {
unitig.set_as_consentig();
if path.iter().any(|p| p.is_anchor() || p.is_consentig()) {
unitig.unitig_type = UnitigType::Consentig;
}

let unitig_rc = Rc::new(RefCell::new(unitig));
Expand Down Expand Up @@ -508,7 +508,7 @@ fn get_merge_path_depth(path: &Vec<UnitigStrand>, forward_positions: &[Position]

// If the path contains an anchor unitig, set the merged depth to the anchor's depth.
for u in path {
if u.anchor() {
if u.is_anchor() {
return u.depth();
}
}
Expand Down
16 changes: 9 additions & 7 deletions src/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::log::{section_header, explanation};
use crate::misc::{check_if_dir_exists, check_if_file_exists, reverse_path, load_file_lines,
sign_at_end, sign_at_end_vec};
use crate::sequence::Sequence;
use crate::unitig::Unitig;
use crate::unitig::{Unitig, UnitigType};
use crate::unitig_graph::UnitigGraph;


Expand All @@ -49,9 +49,9 @@ pub fn resolve(cluster_dir: PathBuf, verbose: bool) {

apply_unique_message();
apply_bridges(&mut unitig_graph, &bridges, bridge_depth);
unitig_graph.save_gfa(&bridged_gfa, &vec![]).unwrap();
unitig_graph.save_gfa(&bridged_gfa, &vec![], false).unwrap();
merge_after_bridging(&mut unitig_graph);
unitig_graph.save_gfa(&merged_gfa, &vec![]).unwrap();
unitig_graph.save_gfa(&merged_gfa, &vec![], false).unwrap();

let cull_count = cull_ambiguity(&mut bridges, verbose);
if cull_count > 0 {
Expand All @@ -62,7 +62,7 @@ pub fn resolve(cluster_dir: PathBuf, verbose: bool) {
} else {
eprintln!("All bridges were unique, no culling necessary.\n");
}
unitig_graph.save_gfa(&final_gfa, &vec![]).unwrap();
unitig_graph.save_gfa(&final_gfa, &vec![], true).unwrap();
finished_message(&final_gfa);
}

Expand Down Expand Up @@ -118,7 +118,8 @@ fn load_graph(gfa_lines: &Vec<String>, print_info: bool,
let (unitig_graph, sequences) = UnitigGraph::from_gfa_lines(gfa_lines);
if let Some(anchors) = anchors {
for num in anchors {
unitig_graph.unitig_index.get(num).unwrap().borrow_mut().anchor = true;
unitig_graph.unitig_index.get(num).unwrap()
.borrow_mut().unitig_type = UnitigType::Anchor;
}
}
if print_info {
Expand All @@ -141,7 +142,7 @@ fn find_anchor_unitigs(graph: &mut UnitigGraph, sequences: &[Sequence]) -> Vec<u
let mut forward_seq_ids: Vec<_> = unitig.forward_positions.iter().map(|p| p.seq_id()).collect();
forward_seq_ids.sort();
if forward_seq_ids == all_seq_ids {
unitig.anchor = true;
unitig.unitig_type = UnitigType::Anchor;
anchor_ids.push(unitig.number);
}
}
Expand Down Expand Up @@ -260,7 +261,8 @@ fn reduce_depths(graph: &mut UnitigGraph, bridge: &Bridge) {
fn delete_unitigs_not_connected_to_anchor(graph: &mut UnitigGraph) {
let to_delete: HashSet<u32> = graph.connected_components().into_iter()
.filter_map(|component| {
if component.iter().all(|&num| !graph.unitig_index.get(&num).unwrap().borrow().anchor) { Some(component) }
if component.iter().all(|&num| graph.unitig_index.get(&num).unwrap().borrow()
.unitig_type != UnitigType::Anchor) { Some(component) }
else { None } })
.flat_map(|component| component.into_iter())
.collect();
Expand Down
6 changes: 3 additions & 3 deletions src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ fn visit_dirs_for_yaml_files(dir: &Path, yaml_files: &mut Vec<PathBuf>) {
let path = entry.path();
if path.is_dir() {
visit_dirs_for_yaml_files(&path, yaml_files);
} else if path.extension().map_or(false, |ext| ext == "yaml") {
} else if path.extension().is_some_and(|ext| ext == "yaml") {
yaml_files.push(path);
}
}
Expand All @@ -144,7 +144,7 @@ fn get_one_copy_yaml(yaml_files: &[PathBuf], filename: &str) -> Option<PathBuf>
// Returns the YAML file in the given path with a matching filename. No match is okay and one
// match is okay, but multiple matches will result in an error.
let found_files = yaml_files.iter()
.filter(|path| path.file_name().map_or(false, |name| name == filename)).collect::<Vec<_>>();
.filter(|path| path.file_name().is_some_and(|name| name == filename)).collect::<Vec<_>>();
match found_files.len() {
0 => None,
1 => Some(found_files[0].clone()),
Expand All @@ -157,7 +157,7 @@ fn get_multi_copy_yaml(yaml_files: &[PathBuf], filename: &str) -> Vec<PathBuf> {
// Returns all YAML files in the given path with a matching filename, excluding those that are
// in a qc_fail directory.
yaml_files.iter().filter(|path| {
path.file_name().map_or(false, |name| name == filename) &&
path.file_name().is_some_and(|name| name == filename) &&
!path.to_string_lossy().contains("/qc_fail/")
}).cloned().collect()
}
Expand Down
4 changes: 2 additions & 2 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@ fn test_high_level(seq_a: &str, seq_b: &str, seq_c: &str, seq_d: &str, seq_e: &s
let mut unitig_graph = UnitigGraph::from_kmer_graph(&kmer_graph);
simplify_structure(&mut unitig_graph, &sequences);
let gfa_1 = graph_dir.path().join("graph_1.gfa");
unitig_graph.save_gfa(&gfa_1, &sequences).unwrap();
unitig_graph.save_gfa(&gfa_1, &sequences, false).unwrap();

// Load the unitig graph from file, save it back to file and ensure the files are the same.
let gfa_2 = graph_dir.path().join("graph_2.gfa");
let (unitig_graph, sequences) = UnitigGraph::from_gfa_file(&gfa_1);
unitig_graph.save_gfa(&gfa_2, &sequences).unwrap();
unitig_graph.save_gfa(&gfa_2, &sequences, false).unwrap();
assert_same_content(&gfa_1, &gfa_2);

// Reconstruct the sequences from the unitig graph.
Expand Down
2 changes: 1 addition & 1 deletion src/trim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub fn trim(cluster_dir: PathBuf, min_identity: f64, max_unitigs: usize, mad: f6
let sequences = choose_trim_type(start_end_results, hairpin_results, &mut graph, &sequences);
let sequences = exclude_outliers_in_length(&mut graph, &sequences, mad);
clean_up_graph(&mut graph, &sequences);
graph.save_gfa(&trimmed_gfa, &sequences).unwrap();
graph.save_gfa(&trimmed_gfa, &sequences, false).unwrap();
save_metrics(&trimmed_yaml, &sequences);
finished_message(&trimmed_gfa);
}
Expand Down
71 changes: 40 additions & 31 deletions src/unitig.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::position::Position;
static ANCHOR_COLOUR: &str = "forestgreen";
static BRIDGE_COLOUR: &str = "pink";
static CONSENTIG_COLOUR: &str = "steelblue";
static OTHER_COLOUR: &str = "orangered";


#[derive(Clone, Default)]
Expand All @@ -34,12 +35,7 @@ pub struct Unitig {
pub forward_seq: Vec<u8>,
pub reverse_seq: Vec<u8>,
pub depth: f64,

// TODO: I might want to drop anchor and bridge and instead create a unitig-type enum that
// can cover all options: anchor, bridge, consentig, etc.
pub anchor: bool,
pub bridge: bool,

pub unitig_type: UnitigType, // anchor, bridge, consentig or other
pub forward_positions: Vec<Position>,
pub reverse_positions: Vec<Position>,
pub forward_next: Vec<UnitigStrand>,
Expand Down Expand Up @@ -79,12 +75,17 @@ impl Unitig {
quit_with_error("Could not find a depth tag (e.g. DP:f:10.00) in the GFA segment \
line.\nAre you sure this is an Autocycler-generated GFA file?");
});
let anchor = parts.iter().any(|p| *p == format!("CL:z:{}", ANCHOR_COLOUR)) ||
parts.iter().any(|p| *p == format!("CL:z:{}", CONSENTIG_COLOUR));
let bridge = parts.iter().any(|p| *p == format!("CL:z:{}", BRIDGE_COLOUR)) ||
parts.iter().any(|p| *p == format!("CL:z:{}", CONSENTIG_COLOUR));
let unitig_type = if parts.iter().any(|p| *p == format!("CL:z:{}", CONSENTIG_COLOUR)) {
UnitigType::Consentig
} else if parts.iter().any(|p| *p == format!("CL:z:{}", ANCHOR_COLOUR)) {
UnitigType::Anchor
} else if parts.iter().any(|p| *p == format!("CL:z:{}", BRIDGE_COLOUR)) {
UnitigType::Bridge
} else {
UnitigType::Other
};
Unitig {
number, forward_seq, reverse_seq, depth, anchor, bridge,
number, forward_seq, reverse_seq, depth, unitig_type,
..Default::default()
}
}
Expand All @@ -93,7 +94,7 @@ impl Unitig {
// This constructor is for manually building a Unitig object when creating bridges.
let reverse_seq = reverse_complement(&forward_seq);
Unitig {
number, forward_seq, reverse_seq, depth, bridge: true,
number, forward_seq, reverse_seq, depth, unitig_type: UnitigType::Bridge,
..Default::default()
}
}
Expand Down Expand Up @@ -163,16 +164,20 @@ impl Unitig {
assert!(!self.forward_seq.is_empty());
}

pub fn gfa_segment_line(&self) -> String {
pub fn gfa_segment_line(&self, use_other_colour: bool) -> String {
let seq_str = String::from_utf8_lossy(&self.forward_seq);
format!("S\t{}\t{}\tDP:f:{:.2}{}", self.number, seq_str, self.depth, self.colour_tag())
format!("S\t{}\t{}\tDP:f:{:.2}{}", self.number, seq_str, self.depth,
self.colour_tag(use_other_colour))
}

pub fn colour_tag(&self) -> String {
if self.is_consentig() { format!("\tCL:z:{}", CONSENTIG_COLOUR) }
else if self.anchor { format!("\tCL:z:{}", ANCHOR_COLOUR) }
else if self.bridge { format!("\tCL:z:{}", BRIDGE_COLOUR) }
else { String::new() }
pub fn colour_tag(&self, use_other_colour: bool) -> String {
match self.unitig_type {
UnitigType::Consentig => format!("\tCL:z:{}", CONSENTIG_COLOUR),
UnitigType::Anchor => format!("\tCL:z:{}", ANCHOR_COLOUR),
UnitigType::Bridge => format!("\tCL:z:{}", BRIDGE_COLOUR),
UnitigType::Other => { if use_other_colour { format!("\tCL:z:{}", OTHER_COLOUR) }
else { String::new() } }
}
}

pub fn length(&self) -> u32 {
Expand Down Expand Up @@ -277,16 +282,6 @@ impl Unitig {
next.number() == self.number && next.strand && prev.number() == self.number && prev.strand
}

fn is_consentig(&self) -> bool {
// A unitig is labelled as consentig by having both the anchor and bridge flags set.
self.anchor && self.bridge
}

pub fn set_as_consentig(&mut self) {
self.anchor = true;
self.bridge = true;
}

pub fn clear_all_links(&mut self) {
self.forward_next.clear();
self.forward_prev.clear();
Expand Down Expand Up @@ -353,8 +348,12 @@ impl UnitigStrand {
self.unitig.borrow().get_seq(self.strand)
}

pub fn anchor(&self) -> bool {
self.unitig.borrow().anchor
pub fn is_anchor(&self) -> bool {
self.unitig.borrow().unitig_type == UnitigType::Anchor
}

pub fn is_consentig(&self) -> bool {
self.unitig.borrow().unitig_type == UnitigType::Consentig
}
}

Expand All @@ -369,6 +368,16 @@ impl fmt::Debug for UnitigStrand {
}


#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum UnitigType {
Anchor,
Bridge,
Consentig,
#[default]
Other,
}


#[cfg(test)]
mod tests {
use super::*;
Expand Down
5 changes: 3 additions & 2 deletions src/unitig_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,11 +314,12 @@ impl UnitigGraph {
self.build_unitig_index();
}

pub fn save_gfa(&self, gfa_filename: &Path, sequences: &Vec<Sequence>) -> io::Result<()> {
pub fn save_gfa(&self, gfa_filename: &Path, sequences: &Vec<Sequence>,
use_other_colour: bool) -> io::Result<()> {
let mut file = File::create(gfa_filename)?;
writeln!(file, "H\tVN:Z:1.0\tKM:i:{}", self.k_size)?;
for unitig in &self.unitigs {
writeln!(file, "{}", unitig.borrow().gfa_segment_line())?;
writeln!(file, "{}", unitig.borrow().gfa_segment_line(use_other_colour))?;
}
for (a, a_strand, b, b_strand) in self.get_links_for_gfa(0) {
writeln!(file, "L\t{}\t{}\t{}\t{}\t0M", a, a_strand, b, b_strand)?;
Expand Down

0 comments on commit dd8dcb3

Please sign in to comment.