diff --git a/src/main.cpp b/src/main.cpp index e93f9d9..be80c4e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -127,7 +127,8 @@ int main(int argc, char* argv[]){ cmd.add("umi_loc", 0, "specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none", false, ""); cmd.add("umi_len", 0, "if the UMI is in read1/read2, its length should be provided", false, 0); cmd.add("umi_prefix", 0, "if specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default", false, ""); - cmd.add("umi_skip", 0, "if the UMI is in read1/read2, fastp can skip several bases following UMI, default is 0", false, 0); + cmd.add("umi_skip", 0, "if the UMI is in read1/read2, fastp can skip several bases following UMI, default is 0", false, 0, cmdline::range(0, 100)); + cmd.add("umi_skipb", 0, "if the UMI is in read1/read2, fastp can skip several bases before trim UMI, default is 0", false, 0, cmdline::range(0, 100)); cmd.add("umi_delim", 0, "delimiter to use between the read name and the UMI, default is :", false, ":"); // overrepresented sequence analysis @@ -384,6 +385,7 @@ int main(int argc, char* argv[]){ opt.umi.length = cmd.get("umi_len"); opt.umi.prefix = cmd.get("umi_prefix"); opt.umi.skip = cmd.get("umi_skip"); + opt.umi.skipb = cmd.get("umi_skipb"); opt.umi.delimiter = cmd.get("umi_delim"); if(opt.umi.enabled) { string umiLoc = cmd.get("umi_loc"); diff --git a/src/options.h b/src/options.h index 57086b2..6ae2348 100644 --- a/src/options.h +++ b/src/options.h @@ -108,6 +108,7 @@ class UMIOptions { location = UMI_LOC_NONE; length = 0; skip = 0; + skipb = 0; delimiter= ":"; } public: @@ -115,6 +116,7 @@ class UMIOptions { int location; int length; int skip; + int skipb; string prefix; string separator; string delimiter; diff --git a/src/umiprocessor.cpp b/src/umiprocessor.cpp index 4c2c829..8b8b548 100644 --- a/src/umiprocessor.cpp +++ b/src/umiprocessor.cpp @@ -18,12 +18,12 @@ void UmiProcessor::process(Read* r1, Read* r2) { else if(mOptions->umi.location == UMI_LOC_INDEX2 && r2) umi = r2->lastIndex(); else if(mOptions->umi.location == UMI_LOC_READ1){ - umi = r1->mSeq->substr(0, min(r1->length(), mOptions->umi.length)); - r1->trimFront(umi.length() + mOptions->umi.skip); + umi = r1->mSeq->substr(mOptions->umi.skipb, min(r1->length(), mOptions->umi.length)); + r1->trimFront(mOptions->umi.skipb + umi.length() + mOptions->umi.skip); } else if(mOptions->umi.location == UMI_LOC_READ2 && r2){ - umi = r2->mSeq->substr(0, min(r2->length(), mOptions->umi.length)); - r2->trimFront(umi.length() + mOptions->umi.skip); + umi = r2->mSeq->substr(mOptions->umi.skipb, min(r2->length(), mOptions->umi.length)); + r2->trimFront(mOptions->umi.skipb + umi.length() + mOptions->umi.skip); } else if(mOptions->umi.location == UMI_LOC_PER_INDEX){ string umiMerged = r1->firstIndex(); @@ -37,13 +37,13 @@ void UmiProcessor::process(Read* r1, Read* r2) { } } else if(mOptions->umi.location == UMI_LOC_PER_READ){ - string umi1 = r1->mSeq->substr(0, min(r1->length(), mOptions->umi.length)); + string umi1 = r1->mSeq->substr(mOptions->umi.skipb, min(r1->length(), mOptions->umi.length)); string umiMerged = umi1; - r1->trimFront(umi1.length() + mOptions->umi.skip); + r1->trimFront(mOptions->umi.skipb + umi1.length() + mOptions->umi.skip); if(r2){ - string umi2 = r2->mSeq->substr(0, min(r2->length(), mOptions->umi.length)); + string umi2 = r2->mSeq->substr(mOptions->umi.skipb, min(r2->length(), mOptions->umi.length)); umiMerged = umiMerged + "_" + umi2; - r2->trimFront(umi2.length() + mOptions->umi.skip); + r2->trimFront(mOptions->umi.skipb + umi2.length() + mOptions->umi.skip); } addUmiToName(r1, umiMerged);