Source code for camparee.bowtie2

import os
import sys
import argparse
import subprocess
import json

from camparee.abstract_camparee_step import AbstractCampareeStep
from camparee.camparee_utils import CampareeException
from camparee.camparee_constants import CAMPAREE_CONSTANTS
from beers_utils.sample import Sample

# TODO: Add support for additional command line arguments to pass to Bowtie2 commands.

[docs]class Bowtie2IndexStep(AbstractCampareeStep): """Wrapper around generating a Bowtie2 index. """ BOWTIE2_INDEX_DIR_PATTERN = CAMPAREE_CONSTANTS.BOWTIE2_INDEX_DIR_PATTERN BOWTIE2_INDEX_PREFIX_PATTERN = CAMPAREE_CONSTANTS.BOWTIE2_INDEX_PREFIX_PATTERN BOWTIE2_INDEX_LOG_FILENAME_PATTERN = CAMPAREE_CONSTANTS.BOWTIE2_INDEX_LOG_FILENAME_PATTERN #The basic Bowtie2 command used to generate indexes from a given FASTA. BASE_BOWTIE2_INDEX_COMMAND = ('{bowtie2_bin_dir}/bowtie2-build' ' --threads {num_bowtie2_threads}' ' {bowtie2_cmd_options}' ' {reference_fasta}' ' {output_index_prefix}') def __init__(self, log_directory_path, data_directory_path, parameters=dict()): """Constructor for Bowtie2IndexStep object. Parameters ---------- data_directory_path: string Full path to data directory log_directory_path : string Full path to log directory. parameters : dict [Optional] Dictionary of Bowtie2 parameters specified by the config file (Note, the "num_bowtie_threads" entry in the config file maps to the bowtie2 "--threads" command line parameter). """ self.data_directory_path = data_directory_path self.log_directory_path = log_directory_path self.num_bowtie2_threads = parameters.pop('num_bowtie_threads', 1) # Remaining parameters (if any) aside from "num_bowtie_threads" self.bowtie2_cmd_options = parameters
[docs] def validate(self): # The value given to the "--threads" parameter is specified by the # num_bowtie_threads entry in the config file. invalid_bowtie2_parameters = ["--threads"] for key, value in self.bowtie2_cmd_options.items(): if not key.startswith("-"): print(f"Bowtie2 index parameter {key} with value {value} needs" f" to be a Bowtie2 option starting with single (-) or double" f" dashes (--).", sys.stderr) return False if key in invalid_bowtie2_parameters: print(f"Bowtie2 index parameter {key} with value {value} cannot" f" be used as a Bowtie2 option since the value is either" f" hard-coded by this script, or explicitly specfied" f" elsewhere in the config file.") return False return True
[docs] def execute(self, sample_id, genome_suffix, bowtie2_bin_dir, transcriptome_fasta_path): """Build Bowtie2 index from the given FASTA file of transcripts. Parameters ---------- sample_id : string Identifier for sample corresponding to reference transcriptome. Used to construct index and log paths for this specific Bowtie2 execution. genome_suffix : string Suffix to identify the parent/allele of the transcriptome. Should be 1 or 2. This same suffix is a appended to all output files/directories. bowtie2_bin_dir : string Path to the directory containing the bowtie2-build exectuable. transcriptome_fasta_path : string Path to the FASTA file of transcripts, used as the basis for the Bowtie2 index. This is generally prepared by the TranscriptomeFastaPreparationStep. """ bowtie2_index_dir_path = os.path.join(self.data_directory_path, f'sample{sample_id}', Bowtie2IndexStep.BOWTIE2_INDEX_DIR_PATTERN.format(genome_name=genome_suffix)) bowtie2_index_file_prefix = os.path.join(bowtie2_index_dir_path, Bowtie2IndexStep.BOWTIE2_INDEX_PREFIX_PATTERN.format(genome_name=genome_suffix)) log_file_path = os.path.join(self.log_directory_path, f'sample{sample_id}', Bowtie2IndexStep.BOWTIE2_INDEX_LOG_FILENAME_PATTERN.format(genome_name=genome_suffix)) with open(log_file_path, 'w') as log_file: print(f"Building Bowtie2 indexes for transcriptome {genome_suffix} " f"of sample{sample_id}.") log_file.write(f"Building Bowtie2 indexes for transcriptome {genome_suffix} " f"of sample{sample_id}.\n") log_file.write(f"Parameters:\n" f" Bowtie2 binary directory: {bowtie2_bin_dir}\n" f" Bowtie2 index directory: {bowtie2_index_dir_path}\n" f" Bowtie2 index file prefix: {bowtie2_index_file_prefix}\n" f" Input transcriptome FASTA: {transcriptome_fasta_path}\n" f" Number of Bowtie2 threads: {self.num_bowtie2_threads}\n") log_file.write("Create Bowtie2 index directory.\n") if os.path.isdir(bowtie2_index_dir_path): log_file.write("Bowtie2 index directory already exists.\n") else: os.mkdir(bowtie2_index_dir_path) bwt2_cmd_options = ' '.join( f"{key} {value}" for key,value in self.bowtie2_cmd_options.items() ) bowtie2_command = Bowtie2IndexStep.BASE_BOWTIE2_INDEX_COMMAND.format(bowtie2_bin_dir=bowtie2_bin_dir, num_bowtie2_threads=self.num_bowtie2_threads, bowtie2_cmd_options=bwt2_cmd_options, output_index_prefix=bowtie2_index_file_prefix, reference_fasta=transcriptome_fasta_path) print(f"Running Bowtie2 with command: {bowtie2_command}") print(f"For full Bowtie2 index output see {log_file_path}") log_file.write(f"Running Bowtie2 with command: {bowtie2_command}.\n\n") log_file.write("Bowtie2 index output follows:\n") try: bowtie2_result = subprocess.run(bowtie2_command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, # Redirect stderr to stdout. encoding="ascii") except subprocess.CalledProcessError as bowtie2_index_exception: log_file.write("\n*****ERROR: Bowtie2 index command failed:\n") log_file.write(f"\tExit code: {bowtie2_index_exception.returncode}\n") log_file.write("\n*****STDOUT:\n") log_file.write(f"{bowtie2_index_exception.stdout}\n") log_file.write("\n*****STDERR:\n") log_file.write(f"{bowtie2_index_exception.stderr}\n") raise CampareeException(f"\nBowtie2 index process failed. " f"For full details see {log_file_path}\n") print("Finished generating Bowtie2 index.\n") log_file.write(f"{bowtie2_result.stdout}\n") log_file.write("\nFinished generating Bowtie2 index.\n") log_file.write("ALL DONE!\n")
[docs] def get_commandline_call(self, sample_id, genome_suffix, bowtie2_bin_dir, transcriptome_fasta_path): """Prepare command to execute the Bowtie2IndexStep from the command line, given all of the arugments used to run the execute() function. Parameters ---------- sample_id : string Identifier for sample corresponding to reference transcriptome. Used to construct index and log paths for this specific Bowtie2 execution. genome_suffix : string Suffix to identify the parent/allele of the transcriptome. Should be 1 or 2. This same suffix is a appended to all output files/directories. bowtie2_bin_dir : string Path to the directory containing the bowtie2-build exectuable. transcriptome_fasta_path : string Path to the FASTA file of transcripts, used as the basis for the Bowtie2 index. This is generally prepared by the TranscriptomeFastaPreparationStep. Returns ------- string Command to execute on the command line. It will perform the same operations as a call to execute() with the same parameters. """ #Retrieve path to the bowtie2.py script. bowtie2_step_path = os.path.realpath(__file__) #If the above command returns a string with a "pyc" extension, instead #of "py", strip off "c" so it points to this script. bowtie2_step_path = bowtie2_step_path.rstrip('c') # TODO: Explore alternative to json for passing dictionary via command line. # Eval could be dangerous for this, since the user has complete control # over what gets entered as a bowtie2 parameter through the config file. command = (f" python {bowtie2_step_path} index" f" --log_directory_path {self.log_directory_path}" f" --data_directory_path {self.data_directory_path}" f" --sample_id {sample_id}" f" --genome_suffix {genome_suffix}" f" --bowtie2_bin_dir {bowtie2_bin_dir}" f" --transcriptome_fasta_file_path {transcriptome_fasta_path}" f" --num_bowtie2_threads {self.num_bowtie2_threads}" f" --bowtie2_parameters '{json.dumps(self.bowtie2_cmd_options)}'") return command
[docs] def get_validation_attributes(self, sample_id, genome_suffix, bowtie2_bin_dir, transcriptome_fasta_path): """ Prepare attributes required by is_output_valid() function to validate output generated by the Bowtie2IndexStep job. Parameters ---------- sample_id : string Identifier for sample corresponding to reference transcriptome. Used to construct index and log paths for this specific Bowtie2 execution. genome_suffix : string Suffix to identify the parent/allele of the transcriptome. Should be 1 or 2. This same suffix is a appended to all output files/directories. bowtie2_bin_dir : string Path to the directory containing the bowtie2-build exectuable. [Note: this parameter is captured just so get_validation_attributes() accepts the same arguments as get_commandline_call(). It is not used here.] transcriptome_fasta_path : string Path to the FASTA file of transcripts, used as the basis for the Bowtie2 index. This is generally prepared by the TranscriptomeFastaPreparationStep. [Note: this parameter is captured just so get_validation_attributes() accepts the same arguments as get_commandline_call(). It is not used here.] Returns ------- dict A Bowtie2IndexStep job's data_directory, log_directory, corresponding sample ID, and genome_suffix. """ validation_attributes = {} validation_attributes['data_directory'] = self.data_directory_path validation_attributes['log_directory'] = self.log_directory_path validation_attributes['sample_id'] = sample_id validation_attributes['genome_suffix'] = genome_suffix return validation_attributes
[docs] @staticmethod def is_output_valid(validation_attributes): """ Check if output of Bowtie2IndexStep for a specific job/execution is correctly formed and valid, given a job's data directory, log directory, sample ID, and genome suffix. Prepare these attributes for a given job using the get_validation_attributes() method. Parameters ---------- validation_attributes : dict A job's data_directory, log_directory, corresponding sample_id, and genome_suffix used when creating the Bowtie2 index. Returns ------- boolean True - Bowtie2IndexStep output files were created and are well formed. False - Bowtie2IndexStep output files do not exist or are missing data. """ data_directory_path = validation_attributes['data_directory'] log_directory_path = validation_attributes['log_directory'] sample_id = validation_attributes['sample_id'] genome_suffix = validation_attributes['genome_suffix'] valid_output = False # Construct output filenames/paths log_file_path = os.path.join(log_directory_path, f'sample{sample_id}', Bowtie2IndexStep.BOWTIE2_INDEX_LOG_FILENAME_PATTERN.format(genome_name=genome_suffix)) bowtie2_index_file_prefix = os.path.join(data_directory_path, f'sample{sample_id}', Bowtie2IndexStep.BOWTIE2_INDEX_DIR_PATTERN.format(genome_name=genome_suffix), Bowtie2IndexStep.BOWTIE2_INDEX_PREFIX_PATTERN.format(genome_name=genome_suffix)) # TODO: Identify index files are missing in the event of a failed validation. # Note, bowtie2-build should produce 6 different index files. They all # should exist. if os.path.isfile(bowtie2_index_file_prefix + ".1.bt2") and \ os.path.isfile(bowtie2_index_file_prefix + ".2.bt2") and \ os.path.isfile(bowtie2_index_file_prefix + ".3.bt2") and \ os.path.isfile(bowtie2_index_file_prefix + ".4.bt2") and \ os.path.isfile(bowtie2_index_file_prefix + ".rev.1.bt2") and \ os.path.isfile(bowtie2_index_file_prefix + ".rev.2.bt2") and \ os.path.isfile(log_file_path): #Read last line in log file line = "" with open(log_file_path, "r") as log_file: for line in log_file: line = line.rstrip() if line == "ALL DONE!": valid_output = True return valid_output
[docs] @staticmethod def main(cmd_args): """Entry point into class. Used when script is executed/submitted via the command line with the 'index' subcommand. """ parameters = json.loads(cmd_args.bowtie2_parameters) bowtie2_index = Bowtie2IndexStep(log_directory_path=cmd_args.log_directory_path, data_directory_path=cmd_args.data_directory_path, parameters=parameters) bowtie2_index.execute(sample_id=cmd_args.sample_id, genome_suffix=cmd_args.genome_suffix, bowtie2_bin_dir=cmd_args.bowtie2_bin_dir, transcriptome_fasta_path=cmd_args.transcriptome_fasta_file_path)
[docs]class Bowtie2AlignStep(AbstractCampareeStep): """Wrapper around aligning reads with Bowtie2 """ BOWTIE2_ALIGN_FILENAME_PATTERN = CAMPAREE_CONSTANTS.BOWTIE2_ALIGN_FILENAME_PATTERN BOWTIE2_ALIGN_LOG_FILENAME_PATTERN = CAMPAREE_CONSTANTS.BOWTIE2_ALIGN_LOG_FILENAME_PATTERN # TODO: Update this script to gracefully handle both one and two FASTQ files # for input (currently only works with two FASTQ files). #The basic Bowtie2 command used to generate indexes from a given FASTA. BASE_BOWTIE2_ALIGN_COMMAND = ('{bowtie2_bin_dir}/bowtie2' ' --very-sensitive' ' --threads {num_bowtie2_threads}' ' {bowtie2_cmd_options}' ' -x {bowtie2_index_prefix}' ' -1 {first_read_fastq}' ' -2 {second_read_fastq}' ' -S {output_sam_file}') def __init__(self, log_directory_path, data_directory_path, parameters=dict()): """Constructor for Bowtie2AlignStep object. Parameters ---------- data_directory_path: string Full path to data directory log_directory_path : string Full path to log directory. parameters : dict [Optional] Dictionary of Bowtie2 parameters specified by the config file (Note, the "num_bowtie_threads" entry in the config file maps to the bowtie2 "--threads" command line parameter). """ self.data_directory_path = data_directory_path self.log_directory_path = log_directory_path self.num_bowtie2_threads = parameters.pop('num_bowtie_threads', 1) # Remaining parameters (if any) aside from "num_bowtie_threads" self.bowtie2_cmd_options = parameters
[docs] def validate(self): """Check all given Bowtie2 parameters are correctly formed (i.e. start with single or double dash), and do not conflict with any that are explicitly specified by this script (--very-sensitive, -x, -1, -2, -S), or elsewhere in the config file (--threads). """ # These are parameters this script specifies directly. Most of these are # for specifying the index, input fastq(s), and output SAM filename. invalid_bowtie2_parameters = ["--very-sensitive", "-x", "-1", "-2", "-S", "--threads"] for key, value in self.bowtie2_cmd_options.items(): if not key.startswith("-"): print(f"Bowtie2 align parameter {key} with value {value} needs" f" to be a Bowtie2 option starting with single (-) or double" f" dashes (--).", sys.stderr) return False if key in invalid_bowtie2_parameters: print(f"Bowtie2 index parameter {key} with value {value} cannot" f" be used as a Bowtie2 option since the value is either" f" hard-coded by this script, or explicitly specfied" f" elsewhere in the config file.") return False return True
[docs] def execute(self, sample, genome_suffix, bowtie2_bin_dir): """Use Bowtie2 to align fastq files for a given sample to the refrence transcriptome. Parameters ---------- sample : Sample Sample containing paths for FASTQ files for alignment. genome_suffix : string Suffix to identify the parent/allele of the transcriptome. Should be 1 or 2. This same suffix is a appended to all output files/directories. bowtie2_bin_dir : string Path to the directory containing the bowtie2 exectuable. """ bowtie2_index_file_prefix = os.path.join(self.data_directory_path, f'sample{sample.sample_id}', Bowtie2IndexStep.BOWTIE2_INDEX_DIR_PATTERN.format(genome_name=genome_suffix), Bowtie2IndexStep.BOWTIE2_INDEX_PREFIX_PATTERN.format(genome_name=genome_suffix)) bowtie2_output_file_path = os.path.join(self.data_directory_path, f'sample{sample.sample_id}', Bowtie2AlignStep.BOWTIE2_ALIGN_FILENAME_PATTERN.format(genome_name=genome_suffix)) log_file_path = os.path.join(self.log_directory_path, f'sample{sample.sample_id}', Bowtie2AlignStep.BOWTIE2_ALIGN_LOG_FILENAME_PATTERN.format(genome_name=genome_suffix)) fastq_file_1, fastq_file_2 = sample.fastq_file_paths with open(log_file_path, 'w') as log_file: print(f"Running Bowtie2 alignment to transcriptome {genome_suffix} " f"of sample{sample.sample_id}") log_file.write(f"Running Bowtie2 alignment to transcriptome " f"{genome_suffix} of sample{sample.sample_id}.\n") log_file.write(f"Parameters:\n" f" Bowtie2 binary directory: {bowtie2_bin_dir}\n" f" Bowtie2 index file prefix: {bowtie2_index_file_prefix}\n" f" Bowtie2 output SAM file: {bowtie2_output_file_path}\n" f" Read 1 FASTQ: {fastq_file_1}\n" f" Read 2 FASTQ: {fastq_file_2}\n" f" Number of Bowtie2 threads: {self.num_bowtie2_threads}\n") bwt2_cmd_options = ' '.join( f"{key} {value}" for key,value in self.bowtie2_cmd_options.items() ) bowtie2_command = Bowtie2AlignStep.BASE_BOWTIE2_ALIGN_COMMAND.format(bowtie2_bin_dir=bowtie2_bin_dir, num_bowtie2_threads=self.num_bowtie2_threads, bowtie2_cmd_options=bwt2_cmd_options, bowtie2_index_prefix=bowtie2_index_file_prefix, first_read_fastq=fastq_file_1, second_read_fastq=fastq_file_2, output_sam_file=bowtie2_output_file_path) print(f"Running Bowtie2 with command: {bowtie2_command}") print(f"For full Bowtie2 alignment output see {log_file_path}") log_file.write(f"Running Bowtie2 with command: {bowtie2_command}.\n\n") log_file.write("Bowtie2 alignment output follows:\n") try: bowtie2_result = subprocess.run(bowtie2_command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, # Redirect stderr to stdout. encoding="ascii") except subprocess.CalledProcessError as bowtie2_align_exception: log_file.write("\n*****ERROR: Bowtie2 alignment command failed:\n") log_file.write(f"\tExit code: {bowtie2_align_exception.returncode}\n") log_file.write("\n*****STDOUT:\n") log_file.write(f"{bowtie2_align_exception.stdout}\n") log_file.write("\n*****STDERR:\n") log_file.write(f"{bowtie2_align_exception.stderr}\n") raise CampareeException(f"\nBowtie2 alignment process failed. " f"For full details see {log_file_path}\n") print("Finished Bowtie2 alignment.\n") log_file.write(f"{bowtie2_result.stdout}\n") log_file.write("\nFinished Bowtie2 alignment.\n") log_file.write("ALL DONE!\n")
[docs] def get_commandline_call(self, sample, genome_suffix, bowtie2_bin_dir): """Prepare command to execute the Bowtie2AlignStep from the command line, given all of the arugments used to run the execute() function. Parameters ---------- sample : Sample Sample containing paths for FASTQ files for alignment. genome_suffix : string Suffix to identify the parent/allele of the transcriptome. Should be 1 or 2. This same suffix is a appended to all output files/directories. bowtie2_bin_dir : string Path to the directory containing the bowtie2 exectuable. Returns ------- string Command to execute on the command line. It will perform the same operations as a call to execute() with the same parameters. """ #Retrieve path to the bowtie2.py script. bowtie2_step_path = os.path.realpath(__file__) #If the above command returns a string with a "pyc" extension, instead #of "py", strip off "c" so it points to this script. bowtie2_step_path = bowtie2_step_path.rstrip('c') # TODO: Explore alternative to json for passing dictionary via command line. # Eval could be dangerous for this, since the user has complete control # over what gets entered as a bowtie2 parameter through the config file. command = (f" python {bowtie2_step_path} align" f" --log_directory_path {self.log_directory_path}" f" --data_directory_path {self.data_directory_path}" f" --sample '{repr(sample)}'" f" --genome_suffix {genome_suffix}" f" --bowtie2_bin_dir {bowtie2_bin_dir}" f" --num_bowtie2_threads {self.num_bowtie2_threads}" f" --bowtie2_parameters '{json.dumps(self.bowtie2_cmd_options)}'") return command
[docs] def get_validation_attributes(self, sample, genome_suffix, bowtie2_bin_dir): """ Prepare attributes required by is_output_valid() function to validate output generated by the Bowtie2AlignStep job. Parameters ---------- sample : Sample Sample containing paths for FASTQ files for alignment. [Note: only the sample_id is used, but the full Sample object is required here so get_validation_attributes() accepts the same arguments as get_commandline_call().] genome_suffix : string Suffix to identify the parent/allele of the transcriptome. Should be 1 or 2. This same suffix is a appended to all output files/directories. bowtie2_bin_dir : string Path to the directory containing the bowtie2 exectuable. [Note: this parameter is captured just so get_validation_attributes() accepts the same arguments as get_commandline_call(). It is not used here.] Returns ------- dict A Bowtie2AlignStep job's data_directory, log_directory, corresponding sample ID, and genome_suffix. """ validation_attributes = {} validation_attributes['data_directory'] = self.data_directory_path validation_attributes['log_directory'] = self.log_directory_path validation_attributes['sample_id'] = sample.sample_id validation_attributes['genome_suffix'] = genome_suffix return validation_attributes
[docs] @staticmethod def is_output_valid(validation_attributes): """ Check if output of Bowtie2AlignStep for a specific job/execution is correctly formed and valid, given a job's data directory, log directory, sample ID, and genome suffix. Prepare these attributes for a given job using the get_validation_attributes() method. Parameters ---------- validation_attributes : dict A job's data_directory, log_directory, corresponding sample_id, and genome_suffix used when aligning reads with Bowtie2. Returns ------- boolean True - Bowtie2AlignStep output files were created and are well formed. False - Bowtie2AlignStep output files do not exist or are missing data. """ data_directory_path = validation_attributes['data_directory'] log_directory_path = validation_attributes['log_directory'] sample_id = validation_attributes['sample_id'] genome_suffix = validation_attributes['genome_suffix'] valid_output = False # Construct output filenames/paths bowtie2_output_file_path = os.path.join(data_directory_path, f'sample{sample_id}', Bowtie2AlignStep.BOWTIE2_ALIGN_FILENAME_PATTERN.format(genome_name=genome_suffix)) log_file_path = os.path.join(log_directory_path, f'sample{sample_id}', Bowtie2AlignStep.BOWTIE2_ALIGN_LOG_FILENAME_PATTERN.format(genome_name=genome_suffix)) if os.path.isfile(bowtie2_output_file_path) and \ os.path.isfile(log_file_path): #Read last line in log file line = "" with open(log_file_path, "r") as log_file: for line in log_file: line = line.rstrip() if line == "ALL DONE!": valid_output = True return valid_output
[docs] @staticmethod def main(cmd_args): """Entry point into class. Used when script is executed/submitted via the command line with the 'align' subcommand. """ sample = eval(cmd_args.sample) # Requires Sample function from BEERS_UTILS.sample parameters = json.loads(cmd_args.bowtie2_parameters) parameters['num_bowtie_threads'] = cmd_args.num_bowtie2_threads bowtie2_align = Bowtie2AlignStep(log_directory_path=cmd_args.log_directory_path, data_directory_path=cmd_args.data_directory_path, parameters=parameters) bowtie2_align.execute(sample=sample, genome_suffix=cmd_args.genome_suffix, bowtie2_bin_dir=cmd_args.bowtie2_bin_dir)
if __name__ == '__main__': """ Prepare and process command line arguments. The setup below allows for entry into either the Bowtie2IndexStep main() method or the Bowtie2AlignStep main() method based on which subcommand is specified at the command line. """ parser = argparse.ArgumentParser(description='Command line wrapper around' ' Bowtie2 index creation and' ' alignment.') subparsers = parser.add_subparsers(help="Choose one of the following:",dest="RUN_MODE", metavar="RUN_MODE") subparsers.required = True #Setup arguments for the index subcommand bowtie2_index_subparser = subparsers.add_parser('index', help="Create Bowtie2 index from transcriptome FASTA.", description="Create Bowtie2 index from transcriptome FASTA.") bowtie2_index_subparser.set_defaults(func=Bowtie2IndexStep.main) #Send arguments for this subcommand to the Bowtie2IndexStep's main() method. required_named_bowtie2_index_subparser = bowtie2_index_subparser.add_argument_group('Required named arguments') required_named_bowtie2_index_subparser.add_argument('-l', '--log_directory_path', required=True, help='Directory in which to save logging files.') required_named_bowtie2_index_subparser.add_argument('-d', '--data_directory_path', required=True, help='Directory in which to save output files.') required_named_bowtie2_index_subparser.add_argument('--sample_id', required=True, help='Sample ID associated with input genome.') required_named_bowtie2_index_subparser.add_argument('--genome_suffix', required=True, help='Suffix identifying parent/allele of source genome.') required_named_bowtie2_index_subparser.add_argument('--bowtie2_bin_dir', required=True, help='Full path to directory containing bowtie2-build ' 'executable.') required_named_bowtie2_index_subparser.add_argument('--transcriptome_fasta_file_path', required=True, help='Input transcriptome in FASTA format.') required_named_bowtie2_index_subparser.add_argument('--num_bowtie2_threads', type=int, default=1, required=False, help='Number of threads to use when running Bowtie2,') required_named_bowtie2_index_subparser.add_argument('--bowtie2_parameters', required=False, help="Jsonified Bowtie2 index parameters (excluding " "--threads).") #Setup arguments from the alignment subcommand bowtie2_align_subparser = subparsers.add_parser('align', help="Run Bowtie2 alignment to transcriptome.", description="Run Bowtie2 alignment to transcriptome.") #Send arguments for this subcommand to the Bowtie2AlignStep's main() method. bowtie2_align_subparser.set_defaults(func=Bowtie2AlignStep.main) required_named_bowtie2_align_subparser = bowtie2_align_subparser.add_argument_group('Required named arguments') required_named_bowtie2_align_subparser.add_argument('-l', '--log_directory_path', required=True, help='Directory in which to save logging files.') required_named_bowtie2_align_subparser.add_argument('-d', '--data_directory_path', required=True, help='Directory in which to save output files.') required_named_bowtie2_align_subparser.add_argument('--sample', required=True, help='String representation of a Sample object.') required_named_bowtie2_align_subparser.add_argument('--genome_suffix', required=True, help='Suffix identifying parent/allele of source genome.') required_named_bowtie2_align_subparser.add_argument('--bowtie2_bin_dir', required=True, help='Full path to directory containing bowtie2 ' 'executable.') required_named_bowtie2_align_subparser.add_argument('--num_bowtie2_threads', type=int, default=1, required=False, help='Number of threads to use when running Bowtie2,') required_named_bowtie2_align_subparser.add_argument('--bowtie2_parameters', required=False, help="Jsonified Bowtie2 index parameters (excluding " "--threads).") args = parser.parse_args() args.func(args)