Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/mskcc/cmo
Browse files Browse the repository at this point in the history
  • Loading branch information
lordzappo committed Apr 4, 2017
2 parents d602556 + 61de7b1 commit 0f56eba
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 34 deletions.
30 changes: 20 additions & 10 deletions bin/cmo_maf2maf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python

import argparse, os, sys, re, subprocess, itertools, glob, tempfile, shutil
import argparse, os, sys, re, subprocess, itertools, glob, getpass, tempfile, shutil
from operator import attrgetter
import textwrap as _textwrap
import cmo
Expand Down Expand Up @@ -40,8 +40,8 @@ if __name__ =='__main__':
defaults_dict['--custom-enst'] = cmo.util.programs['vcf2maf'][options.version] + "data/isoform_overrides_at_mskcc"
defaults_dict['--filter-vcf'] = cmo.util.genomes['GRCh37']['exac']
defaults_dict['--retain-cols'] = 'Center,Verification_Status,Validation_Status,Mutation_Status,Sequencing_Phase,Sequence_Source,Validation_Method,Score,BAM_file,Sequencer,Tumor_Sample_UUID,Matched_Norm_Sample_UUID,Caller'
tmp_dir = tempfile.mkdtemp(dir='/scratch') if os.path.exists('/scratch') else tempfile.mkdtemp(dir='/tmp');
defaults_dict['--tmp-dir'] = tmp_dir
tmp_root = "/scratch/<username>/..."
defaults_dict['--tmp-dir'] = tmp_root

# With arguments and defaults set, let's construct an argparse instance
parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
Expand All @@ -53,12 +53,25 @@ if __name__ =='__main__':
parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
else:
parser.add_argument(arg, action="store", metavar='', help=description)
cmo.util.add_logging_options(parser)

# Now run the argparse instance, which will parse and execute, or print help text if requested
args = parser.parse_args()
args_dict = vars(args)

# If user didn't define their own --tmp-dir, let's create one for them under /scratch/username
if args_dict['tmp_dir'] == tmp_root:
# Create a subdirectory under /scratch with the username, if it doesn't already exist
tmp_root = "/scratch/" + getpass.getuser()
if not os.path.exists(tmp_root):
os.makedirs(tmp_root)
# For machines without writable /scratch, default to creating a temp folder under /tmp
tmp_dir = tempfile.mkdtemp(dir=tmp_root) if os.path.exists(tmp_root) else tempfile.mkdtemp(dir='/tmp')
args_dict['tmp_dir'] = tmp_dir
# Show the user a warning about limited storage in temp directories
sys.stderr.write( "WARNING: Writing temporary files to " + tmp_dir + " which could fill " +
"up and interrupt your colleagues' work. If you're working with giant files, then " +
"please define your own --tmp-dir, or we're gonna get ya!\n" )

# Locate VEP and it's cache, the reference FASTA, and the VCF used for filtering
vep_dir = cmo.util.programs['vep'][args.vep_release]
args_dict['vep_data'] = vep_dir
Expand All @@ -75,16 +88,13 @@ if __name__ =='__main__':

# Build the command we're going to run
cmd = [cmo.util.programs['perl']['default'], script_path]
stderr = args.stderr
stdout = args.stdout
# Trim out arguments without values, and also any args that might mess with our logging
# Trim out arguments without values
args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
cmo.util.remove_logging_options_from_dict(args_dict)

# Make sure the arguments are in a format that the script will accept, and kick it off
for arg, value in args_dict.items():
arg = arg.replace("_","-")
cmd = cmd + ["--"+arg, value]
sys.stderr.write( "Running: " + " ".join( cmd ) + "\n" )
cmo.util.call_cmd( " ".join( cmd ), stdout=stdout, stderr=stderr )
sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
cmo.util.call_cmd( " ".join( cmd ))
shutil.rmtree(tmp_dir)
60 changes: 46 additions & 14 deletions bin/cmo_maf2vcf
Original file line number Diff line number Diff line change
@@ -1,37 +1,69 @@
#!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python

import argparse, os, sys, re, subprocess, itertools, glob
from operator import attrgetter
import textwrap as _textwrap
import cmo

# Custom help formatter to display args in alphabetical order, and fitted line wrap for sphinx
class SortingHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
def add_arguments(self, actions):
actions = sorted(actions, key=attrgetter('option_strings'))
super(SortingHelpFormatter, self).add_arguments(actions)
def _split_lines(self, text, width):
text = self._whitespace_matcher.sub(' ', text).strip()
return _textwrap.wrap(text, 78)

# Function that runs --help on the tool we've wrapped, and extracts documentation
def parse_script_help(script_path):
perl = cmo.util.programs['perl']['default']
help_text = subprocess.Popen(" ".join([perl, script_path, "-h"]),stdout=subprocess.PIPE,shell=True).communicate()[0]
valid_args = re.findall(r"\s+(--[\S_]+)\s+([\S \t]+)\n?", help_text, re.M)
return dict(valid_args)
help_text = subprocess.Popen(" ".join([perl, script_path, "-h"]), stdout=subprocess.PIPE, shell=True).communicate()[0]
valid_args = re.findall(r"^\s*(--\S+)\s+([^\[\n]+)", help_text, re.M)
defaults = re.findall(r"^\s*(--\S+)\s+[\S ]+\[([\S ]+)\]$", help_text, re.M)
return dict(valid_args), dict(defaults)

if __name__ =='__main__':
# We'll first need to figure out which version to run with "-h" to parse the help text
preparser = argparse.ArgumentParser(description="run maf2vcf", add_help=False)
preparser.add_argument("--version", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
preparser = argparse.ArgumentParser(description="Run maf2vcf", add_help=False, formatter_class=SortingHelpFormatter)
preparser.add_argument("--version", help="Version of tool to run", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
preparser.add_argument("--ncbi-build", help="Genome build of variants in input", choices=["GRCh37","GRCh38","GRCm38"], default="GRCh37")
options, _ = preparser.parse_known_args()

# Figure out the path to the actual Perl script that this Python wrapper will run
script_path = cmo.util.programs['vcf2maf'][options.version] + "maf2vcf.pl"
args_dict = parse_script_help(script_path)
parser = argparse.ArgumentParser(parents = [preparser], add_help=True)
# Extract arguments and their defaults, by parsing the --help output
args_dict, defaults_dict = parse_script_help(script_path)

# With arguments and defaults set, let's construct an argparse instance
parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
for arg, description in args_dict.items():
if arg == "--help":
# Hide a few arguments from the user, because we'll determine them ourselves
if arg in ["--help","--man","--ref-fasta"]:
continue
parser.add_argument(arg,action="store", metavar='', help=description)
cmo.util.add_logging_options(parser)
if arg in defaults_dict and arg not in ["--output-maf"]:
parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
else:
parser.add_argument(arg, action="store", metavar='', help=description)

# Now run the argparse instance, which will parse and execute, or print help text if requested
args = parser.parse_args()
args_dict = vars(args)

# Locate the reference for this genome build
args_dict['ref_fasta'] = cmo.util.genomes[args.ncbi_build]['fasta']

# Remove arguments that the actual wrapped tool won't recognize
for key in ["version"]:
del args_dict[key]

# Build the command we're going to run
cmd = [cmo.util.programs['perl']['default'], script_path]
stderr = args.stderr
stdout = args.stdout
# Trim out arguments without values
args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
cmo.util.remove_logging_options_from_dict(args_dict)

# Make sure the arguments are in a format that the script will accept, and kick it off
for arg, value in args_dict.items():
arg = arg.replace("_","-")
cmd = cmd + ["--"+arg, value]
cmo.util.call_cmd(" ".join(cmd), stdout=stdout, stderr=stderr)
sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
cmo.util.call_cmd( " ".join( cmd ))
30 changes: 20 additions & 10 deletions bin/cmo_vcf2maf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python

import argparse, os, sys, re, subprocess, itertools, glob, tempfile, shutil
import argparse, os, sys, re, subprocess, itertools, glob, getpass, tempfile, shutil
from operator import attrgetter
import textwrap as _textwrap
import cmo
Expand Down Expand Up @@ -42,8 +42,8 @@ if __name__ =='__main__':
defaults_dict['--maf-center'] = 'mskcc.org'
defaults_dict['--vcf-tumor-id'] = defaults_dict['--tumor-id']
defaults_dict['--vcf-normal-id'] = defaults_dict['--normal-id']
tmp_dir = tempfile.mkdtemp(dir='/scratch') if os.path.exists('/scratch') else tempfile.mkdtemp(dir='/tmp');
defaults_dict['--tmp-dir'] = tmp_dir
tmp_root = "/scratch/<username>/..."
defaults_dict['--tmp-dir'] = tmp_root

# With arguments and defaults set, let's construct an argparse instance
parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
Expand All @@ -55,12 +55,25 @@ if __name__ =='__main__':
parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
else:
parser.add_argument(arg, action="store", metavar='', help=description)
cmo.util.add_logging_options(parser)

# Now run the argparse instance, which will parse and execute, or print help text if requested
args = parser.parse_args()
args_dict = vars(args)

# If user didn't define their own --tmp-dir, let's create one for them under /scratch/username
if args_dict['tmp_dir'] == tmp_root:
# Create a subdirectory under /scratch with the username, if it doesn't already exist
tmp_root = "/scratch/" + getpass.getuser()
if not os.path.exists(tmp_root):
os.makedirs(tmp_root)
# For machines without writable /scratch, default to creating a temp folder under /tmp
tmp_dir = tempfile.mkdtemp(dir=tmp_root) if os.path.exists(tmp_root) else tempfile.mkdtemp(dir='/tmp')
args_dict['tmp_dir'] = tmp_dir
# Show the user a warning about limited storage in temp directories
sys.stderr.write( "WARNING: Writing temporary files to " + tmp_dir + " which could fill " +
"up and interrupt your colleagues' work. If you're working with giant files, then " +
"please define your own --tmp-dir, or we're gonna get ya!\n" )

# Locate VEP and it's cache, the reference FASTA, and the VCF used for filtering
vep_dir = cmo.util.programs['vep'][args.vep_release]
args_dict['vep_data'] = vep_dir
Expand All @@ -77,16 +90,13 @@ if __name__ =='__main__':

# Build the command we're going to run
cmd = [cmo.util.programs['perl']['default'], script_path]
stderr = args.stderr
stdout = args.stdout
# Trim out arguments without values, and also any args that might mess with our logging
# Trim out arguments without values
args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
cmo.util.remove_logging_options_from_dict(args_dict)

# Make sure the arguments are in a format that the script will accept, and kick it off
for arg, value in args_dict.items():
arg = arg.replace("_","-")
cmd = cmd + ["--"+arg, value]
sys.stderr.write( "Running: " + " ".join( cmd ) + "\n" )
cmo.util.call_cmd( " ".join( cmd ), stdout=stdout, stderr=stderr )
sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
cmo.util.call_cmd( " ".join( cmd ))
shutil.rmtree(tmp_dir)
69 changes: 69 additions & 0 deletions bin/cmo_vcf2vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python

import argparse, os, sys, re, subprocess, itertools, glob
from operator import attrgetter
import textwrap as _textwrap
import cmo

# Custom help formatter to display args in alphabetical order, and fitted line wrap for sphinx
class SortingHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
def add_arguments(self, actions):
actions = sorted(actions, key=attrgetter('option_strings'))
super(SortingHelpFormatter, self).add_arguments(actions)
def _split_lines(self, text, width):
text = self._whitespace_matcher.sub(' ', text).strip()
return _textwrap.wrap(text, 78)

# Function that runs --help on the tool we've wrapped, and extracts documentation
def parse_script_help(script_path):
perl = cmo.util.programs['perl']['default']
help_text = subprocess.Popen(" ".join([perl, script_path, "-h"]), stdout=subprocess.PIPE, shell=True).communicate()[0]
valid_args = re.findall(r"^\s*(--\S+)\s+([^\[\n]+)", help_text, re.M)
defaults = re.findall(r"^\s*(--\S+)\s+[\S ]+\[([\S ]+)\]$", help_text, re.M)
return dict(valid_args), dict(defaults)

if __name__ =='__main__':
# We'll first need to figure out which version to run with "-h" to parse the help text
preparser = argparse.ArgumentParser(description="Run vcf2vcf", add_help=False, formatter_class=SortingHelpFormatter)
preparser.add_argument("--version", help="Version of tool to run", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
preparser.add_argument("--ncbi-build", help="Genome build of variants in input", choices=["GRCh37","GRCh38","GRCm38"], default="GRCh37")
options, _ = preparser.parse_known_args()

# Figure out the path to the actual Perl script that this Python wrapper will run
script_path = cmo.util.programs['vcf2maf'][options.version] + "vcf2vcf.pl"
# Extract arguments and their defaults, by parsing the --help output
args_dict, defaults_dict = parse_script_help(script_path)

# With arguments and defaults set, let's construct an argparse instance
parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
for arg, description in args_dict.items():
# Hide a few arguments from the user, because we'll determine them ourselves
if arg in ["--help","--man","--ref-fasta"]:
continue
if arg in defaults_dict and arg not in ["--output-maf"]:
parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
else:
parser.add_argument(arg, action="store", metavar='', help=description)

# Now run the argparse instance, which will parse and execute, or print help text if requested
args = parser.parse_args()
args_dict = vars(args)

# Locate the reference for this genome build
args_dict['ref_fasta'] = cmo.util.genomes[args.ncbi_build]['fasta']

# Remove arguments that the actual wrapped tool won't recognize
for key in ["version"]:
del args_dict[key]

# Build the command we're going to run
cmd = [cmo.util.programs['perl']['default'], script_path]
# Trim out arguments without values
args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)

# Make sure the arguments are in a format that the script will accept, and kick it off
for arg, value in args_dict.items():
arg = arg.replace("_","-")
cmd = cmd + ["--"+arg, value]
sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
cmo.util.call_cmd( " ".join( cmd ))

0 comments on commit 0f56eba

Please sign in to comment.