# HG changeset patch
# User scisjnu123
# Date 1568358773 14400
# Node ID 019b09b37955e093b94851046aa1fdf873385ea2
# Parent ec10ff509d48333003942aa2a9b92cb7a29bd48d
Uploaded
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_AddCommentsToBam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_AddCommentsToBam.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,77 @@
+
+ add comments to BAM dataset
+
+ picard_macros.xml
+
+
+
+ @java_options@
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ AddCommentsToBam
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ #for $element in $comments:
+ COMMENT="${element.comment}"
+ #end for
+ QUIET=true
+ VERBOSITY=ERROR
+ VALIDATION_STRINGENCY=${validation_stringency}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Adds one or more comments (@CO) to the header of a specified BAM dataset.
+
+@dataset_collections@
+
+@description@
+
+ COMMENT=String
+ C=String Comments to add to the BAM file This option may be specified 0 or more times.
+
+@more_info@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_AddOrReplaceReadGroups.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_AddOrReplaceReadGroups.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,125 @@
+
+ add or replaces read group information
+
+ picard_macros.xml
+ read_group_macros.xml
+
+
+
+ @define_read_group_helpers@
+ #set $rg_auto_name = $read_group_name_default($inputFile)
+ @set_read_group_vars@
+ @java_options@
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ AddOrReplaceReadGroups
+ INPUT="${inputFile}"
+ $format_read_group("RGLB=", $rg_lb, '"')
+ $format_read_group("RGPL=", $rg_pl, '"')
+ $format_read_group("RGPU=", $rg_pu, '"')
+ $format_read_group("RGSM=", $rg_sm, '"')
+ $format_read_group("RGID=", $rg_id, '"')
+ $format_read_group("RGDS=", $rg_ds, '"')
+ $format_read_group("RGPI=", $rg_pi, '"')
+ $format_read_group("RGDT=", $rg_dt, '"')
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+ OUTPUT="${outFile}"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Add or Replace Read Groups in an input BAM or SAM file.
+
+@dataset_collections@
+
+@RG@
+
+@description@
+
+ INPUT=File
+ I=File Input file (bam or sam). Required.
+
+ OUTPUT=File
+ O=File Output file (bam or sam). Required.
+
+ SORT_ORDER=SortOrder
+ SO=SortOrder Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.
+ Default value: null. Possible values: {unsorted, queryname, coordinate}
+
+ RGID=String
+ ID=String Read Group ID Default value: 1. This option can be set to 'null' to clear the default
+ value.
+
+ RGLB=String
+ LB=String Read Group Library Required.
+
+ RGPL=String
+ PL=String Read Group platform (e.g. illumina, solid) Required.
+
+ RGPU=String
+ PU=String Read Group platform unit (eg. run barcode) Required.
+
+ RGSM=String
+ SM=String Read Group sample name Required.
+
+ RGCN=String
+ CN=String Read Group sequencing center name Default value: null.
+
+ RGDS=String
+ DS=String Read Group description Default value: null.
+
+ RGDT=Iso8601Date
+ DT=Iso8601Date Read Group run date Default value: null.
+
+ RGPI=Integer
+ PI=Integer Read Group predicted insert size Default value: null.
+
+@more_info@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_BedToIntervalList.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_BedToIntervalList.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,115 @@
+
+ convert coordinate data into picard interval list format
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ #set $picard_dict = "localref.dict"
+ #set $ref_fasta = "localref.fa" ## This is done because picards "likes" .fa extension
+
+ ln -s "${reference_source.ref_file}" "${ref_fasta}" &&
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+
+ java -jar \$JAVA_JAR_PATH/picard.jar CreateSequenceDictionary REFERENCE="${ref_fasta}" OUTPUT="${picard_dict}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+ &&
+
+ #else:
+
+ #set $ref_fasta = str( $reference_source.ref_file.fields.path ) ## getting path of reference fasta file (must end with .fa)
+ #set $picard_dict=$ref_fasta[:-2]+"dict" ## replacing .fa with .dict
+
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ BedToIntervalList
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+
+ SEQUENCE_DICTIONARY="${picard_dict}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Convert coordinate data (such as BED or Galaxy Interval) into Picard Interval Format.
+
+@dataset_collections@
+
+@description@
+
+ SEQUENCE_DICTIONARY=File
+ SD=File The sequence dictionary. You can either use dictionary pre-cached
+ on this instance of Galaxy, or create one on teh fly from a FASTA
+ file uploaded to history (right pane of the interface).
+
+
+@more_info@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_CleanSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_CleanSam.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,57 @@
+
+ perform SAM/BAM grooming
+
+ picard_macros.xml
+
+
+
+ @java_options@
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ CleanSam
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ QUIET=true
+ VERBOSITY=ERROR
+ VALIDATION_STRINGENCY=${validation_stringency}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Read SAM/BAM and perform various fix-ups. Currently, the only fix-ups are:
+
+ 1. to soft-clip an alignment that hangs off the end of its reference sequence.
+ 2. to set MAPQ to 0 if a read is unmapped.
+
+@dataset_collections@
+
+@more_info@
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_CollectAlignmentSummaryMetrics.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_CollectAlignmentSummaryMetrics.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,133 @@
+
+ writes a file containing summary alignment metrics
+
+ picard_macros.xml
+
+
+
+ @java_options@
+ ##set up input files
+
+ #set $reference_fasta_filename = "localref.fa"
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ CollectAlignmentSummaryMetrics
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ MAX_INSERT_SIZE=${maxinsert}
+ #for $sequence in $adapters:
+ ADAPTER_SEQUENCE="${sequence.adapter}"
+ #end for
+ METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}"
+ IS_BISULFITE_SEQUENCED="${bisulphite}"
+
+ REFERENCE_SEQUENCE="${reference_fasta_filename}"
+
+ ASSUME_SORTED="${assume_sorted}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Reads a SAM or BAM file and writes a file containing summary alignment metrics.
+
+@dataset_collections@
+
+@description@
+
+ MAX_INSERT_SIZE=Integer Paired end reads above this insert size will be considered chimeric along with
+ inter-chromosomal pairs. Default value: 100000.
+
+ ADAPTER_SEQUENCE=String List of adapter sequences to use when processing the alignment metrics This option may
+ be specified 0 or more times.
+
+ METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel
+ LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE,
+ LIBRARY, READ_GROUP} This option may be specified 0 or more times.
+
+ IS_BISULFITE_SEQUENCED=Boolean
+ BS=Boolean Whether the SAM or BAM file consists of bisulfite sequenced reads.
+
+
+ REFERENCE_SEQUENCE=File
+ R=File Reference sequence fasta Default value: null.
+
+ ASSUME_SORTED=Boolean
+ AS=Boolean If true (default), then the sort order in the header file will be ignored.
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_CollectBaseDistributionByCycle.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_CollectBaseDistributionByCycle.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,114 @@
+
+ charts the nucleotide distribution per cycle in a SAM or BAM dataset
+
+ picard_macros.xml
+
+
+ R
+
+
+ @java_options@
+ ##set up input files
+
+ #set $reference_fasta_filename = "localref.fa"
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ CollectBaseDistributionByCycle
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ CHART_OUTPUT="${pdfFile}"
+ ALIGNED_READS_ONLY="${aligned_reads_only}"
+ PF_READS_ONLY="${pf_reads_only}"
+ REFERENCE_SEQUENCE="${reference_fasta_filename}"
+ ASSUME_SORTED="${assume_sorted}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Program to chart the nucleotide distribution per cycle in a SAM or BAM file.
+
+@dataset_collections@
+
+@description@
+
+ ALIGNED_READS_ONLY=Boolean If set to true, calculate the base distribution over aligned reads only. Default value:
+ false. This option can be set to 'null' to clear the default value. Possible values:
+ {true, false}
+
+ PF_READS_ONLY=Boolean If set to true calculate the base distribution over PF reads only. Default value: false.
+ This option can be set to 'null' to clear the default value. Possible values: {true,
+ false}
+
+ REFERENCE_SEQUENCE=File
+ R=File Reference sequence fasta Default value: null.
+
+ ASSUME_SORTED=Boolean
+ AS=Boolean If true (default), then the sort order in the header file will be ignored. Default
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_CollectGcBiasMetrics.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_CollectGcBiasMetrics.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,115 @@
+
+ charts the GC bias metrics
+
+ picard_macros.xml
+
+
+ R
+
+
+ @java_options@
+ ##set up input files
+
+ #set $reference_fasta_filename = "localref.fa"
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ CollectGcBiasMetrics
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ CHART_OUTPUT="${pdfFile}"
+ SUMMARY_OUTPUT="${summaryFile}"
+ WINDOW_SIZE="${window_size}"
+ MINIMUM_GENOME_FRACTION="${minimum_genome_fraction}"
+ IS_BISULFITE_SEQUENCED="${is_bisulfite_sequenced}"
+ REFERENCE_SEQUENCE="${reference_fasta_filename}"
+ ASSUME_SORTED="${assume_sorted}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Program to chart the nucleotide distribution per cycle in a SAM or BAM file.
+
+@dataset_collections@
+
+@description@
+
+ ALIGNED_READS_ONLY=Boolean If set to true, calculate the base distribution over aligned reads only. Default value:
+ false. Possible values: {true, false}
+
+ PF_READS_ONLY=Boolean If set to true calculate the base distribution over PF reads only. Default value: false.
+ This option can be set to 'null' to clear the default value. Possible values: {true,
+ false}
+
+ ASSUME_SORTED=Boolean
+ AS=Boolean If true (default), then the sort order in the header file will be ignored. Default: True
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_CollectInsertSizeMetrics.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_CollectInsertSizeMetrics.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,139 @@
+
+ plots distribution of insert sizes
+
+ picard_macros.xml
+
+
+ R
+
+
+ @java_options@
+ ##set up input files
+
+ #set $reference_fasta_filename = "localref.fa"
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ CollectInsertSizeMetrics
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ HISTOGRAM_FILE="${histFile}"
+ DEVIATIONS="${deviations}"
+
+ #if str( $hist_width ):
+ HISTOGRAM_WIDTH="${hist_width}"
+ #end if
+
+ MINIMUM_PCT="${min_pct}"
+ REFERENCE_SEQUENCE="${reference_fasta_filename}"
+ ASSUME_SORTED="${assume_sorted}"
+ METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Reads a SAM or BAM dataset and writes a file containing metrics about the statistical distribution of insert size (excluding duplicates) and generates a Histogram plot.
+
+@dataset_collections@
+
+@description@
+
+
+ DEVIATIONS=Double Generate mean, sd and plots by trimming the data down to MEDIAN +
+ DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically
+ includes enough anomalous values from chimeras and other artifacts to make the mean and
+ sd grossly misleading regarding the real distribution. Default value: 10.0.
+
+ HISTOGRAM_WIDTH=Integer
+ W=Integer Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail.
+ Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be
+ included. Default value: not set.
+
+ MINIMUM_PCT=Float
+ M=Float When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that
+ have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05.
+
+ METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel
+ LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE,
+ LIBRARY, READ_GROUP} This option may be specified 0 or more times.
+
+ ASSUME_SORTED=Boolean
+ AS=Boolean If true (default), then the sort order in the header file will be ignored. Default
+ value: true. This option can be set to 'null' to clear the default value. Possible
+ values: {true, false}
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_CollectRnaSeqMetrics.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_CollectRnaSeqMetrics.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,201 @@
+
+ collect metrics about the alignment of RNA to various functional classes of loci in the genome
+
+ picard_macros.xml
+
+
+ R
+
+
+
+ ## Set up input files
+
+ ## Reference sequences
+
+ #set $reference_fasta_filename = "localref.fa"
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+
+ ## refFlat data
+ ## The awk line below converts a file obtained from UCSC as specified in the tool help to refFlat format
+
+ grep -v '^#' ${refFlat} | awk '{print $11"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10}' > refFlat.tab &&
+
+ ## Start picard command
+
+ @java_options@
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ CollectRnaSeqMetrics
+ REF_FLAT=refFlat.tab
+
+ #if str( $ribosomal_intervals ) != "None":
+ RIBOSOMAL_INTERVALS="${ribosomal_intervals}"
+ #end if
+
+ STRAND_SPECIFICITY="${strand_specificity}"
+ MINIMUM_LENGTH="${minimum_length}"
+ CHART_OUTPUT="${pdfFile}"
+
+ #for $sequence_to_ignore in $ignore_list:
+ IGNORE_SEQUENCE="${sequence_to_ignore.sequence}"
+ #end for
+
+ RRNA_FRAGMENT_PERCENTAGE="${rrna_fragment_percentage}"
+ METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}"
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ REFERENCE_SEQUENCE="${reference_fasta_filename}"
+ ASSUME_SORTED="${assume_sorted}"
+
+ QUIET=true
+ VERBOSITY=ERROR
+ VALIDATION_STRINGENCY=${validation_stringency}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Collects metrics about the alignment of RNA to various functional classes of loci in the genome: coding, intronic, UTR, intergenic, ribosomal.
+
+@dataset_collections@
+
+-----
+
+.. class:: warningmark
+
+**Obtaining gene annotations in refFlat format**
+
+This tool requires gene annotations in refFlat_ format. These data can be obtained from UCSC table browser directly through Galaxy by following these steps:
+
+ 1. Click on **Get Data** in the upper part of left pane of Galaxy interface
+ 2. Click on **UCSC Main** link
+ 3. Set your genome and dataset of interest. It **must** be the same genome build against which you have mapped the reads contained in the BAM file you are analyzing
+ 4. In the **output format** field choose **selected fields from primary and related tables**
+ 5. Click **get output** button
+ 6. In the first table presented at the top of the page select (using checkboxes) first 11 fields:
+ name
+ chrom
+ strand
+ txStart
+ txEnd
+ cdsStart
+ cdsEnd
+ exonCount
+ exonStarts
+ exonEnds
+ proteinId
+ 7. Click **done with selection**
+ 8. Click **Send query to Galaxy**
+ 9. A new dataset will appear in the current Galaxy history
+ 10. Use this dataset as the input for **Gene annotations in refFlat form** dropdown of this tool
+
+.. _refFlat: http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat
+
+@description@
+
+ REF_FLAT=File Gene annotations in refFlat form. Format described here:
+ http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat Required.
+
+ RIBOSOMAL_INTERVALS=File Location of rRNA sequences in genome, in interval_list format. If not specified no bases
+ will be identified as being ribosomal. Format described here:
+ http://picard.sourceforge.net/javadoc/net/sf/picard/util/IntervalList.html and can be
+ generated from BED datasetes using Galaxy's wrapper for picard_BedToIntervalList tool
+
+ STRAND_SPECIFICITY=StrandSpecificity
+ STRAND=StrandSpecificity For strand-specific library prep. For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND
+ if the reads are expected to be on the transcription strand. Required. Possible values:
+ {NONE, FIRST_READ_TRANSCRIPTION_STRAND, SECOND_READ_TRANSCRIPTION_STRAND}
+
+ MINIMUM_LENGTH=Integer When calculating coverage based values (e.g. CV of coverage) only use transcripts of this
+ length or greater. Default value: 500.
+
+ IGNORE_SEQUENCE=String If a read maps to a sequence specified with this option, all the bases in the read are
+ counted as ignored bases.
+
+ RRNA_FRAGMENT_PERCENTAGE=Double
+ This percentage of the length of a fragment must overlap one of the ribosomal intervals
+ for a read or read pair by this must in order to be considered rRNA. Default value: 0.8.
+
+ METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel
+ LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE,
+ LIBRARY, READ_GROUP} This option may be specified 0 or more times.
+
+ ASSUME_SORTED=Boolean
+ AS=Boolean If true (default), then the sort order in the header file will be ignored. Default
+ value: true. Possible values: {true, false}
+
+@more_info@
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_CollectWgsMetrics.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_CollectWgsMetrics.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,107 @@
+
+ compute metrics for evaluating of whole genome sequencing experiments
+
+ picard_macros.xml
+
+
+
+ @java_options@
+ ##set up input files
+
+ #set $reference_fasta_filename = "localref.fa"
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ CollectWgsMetrics
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ REFERENCE_SEQUENCE="${reference_fasta_filename}"
+ MINIMUM_MAPPING_QUALITY="${minimum_mapping_quality}"
+ MINIMUM_BASE_QUALITY="${minimum_base_quality}"
+ COVERAGE_CAP="${coverage_cap}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments.
+
+@dataset_collections@
+
+@description@
+
+ MINIMUM_MAPPING_QUALITY=Integer
+ MQ=Integer Minimum mapping quality for a read to contribute coverage. Default value: 20.
+
+ MINIMUM_BASE_QUALITY=Integer
+ Q=Integer Minimum base quality for a base to contribute coverage. Default value: 20.
+
+ COVERAGE_CAP=Integer
+ CAP=Integer Treat bases with coverage exceeding this value as if they had coverage at this value.
+ Default value: 250.
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_DownsampleSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_DownsampleSam.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,75 @@
+
+ Downsample a file to retain a subset of the reads
+
+ picard_macros.xml
+
+
+
+ @java_options@
+ java -jar
+ \$JAVA_JAR_PATH/picard.jar
+ DownsampleSam
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ PROBABILITY=${probability}
+ RANDOM_SEED=${seed}
+ QUIET=true
+ VERBOSITY=ERROR
+ VALIDATION_STRINGENCY=${validation_stringency}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Randomly down-sample a SAM or BAM file to retain a random subset of the reads. Mate-pairs are either both kept or both discarded. Reads marked as not primary alignments are all discarded. Each read is given a probability P of being retained - results with the exact same input in the same order and with the same value for RANDOM_SEED will produce the same results.
+
+@dataset_collections@
+
+@description@
+
+ INPUT=File
+ I=File The input SAM or BAM file to downsample. Required.
+
+ OUTPUT=File
+ O=File The output, downsampled, SAM or BAM file to write. Required.
+
+ RANDOM_SEED=Long
+ R=Long Random seed to use if reproducibilty is desired. Setting to null will cause multiple
+ invocations to produce different results.
+
+ PROBABILITY=Double
+ P=Double The probability of keeping any individual read, between 0 and 1.
+
+
+
+@more_info@
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_EstimateLibraryComplexity.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_EstimateLibraryComplexity.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,132 @@
+
+ assess sequence library complexity from read sequences
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ EstimateLibraryComplexity
+
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+
+ MIN_IDENTICAL_BASES="${min_identical_bases}"
+ MAX_DIFF_RATE="${max_diff_rate}"
+ MIN_MEAN_QUALITY="${min_mean_quality}"
+ MAX_GROUP_RATIO="${max_group_ratio}"
+ #import pipes
+ READ_NAME_REGEX=${ pipes.quote( str( $read_name_regex ) ) or "''" }
+ OPTICAL_DUPLICATE_PIXEL_DISTANCE="${optical_duplicate_pixel_distance}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Attempts to estimate library complexity from sequence of read pairs alone. Does so by sorting all reads by the first N bases (5 by default)
+of each read and then comparing reads with the first N bases identical to each other for duplicates. Reads are considered to be duplicates
+if they match each other with no gaps and an overall mismatch rate less than or equal to MAX_DIFF_RATE (0.03 by default).
+
+Reads of poor quality are filtered out so as to provide a more accurate estimate. The filtering removes reads with any no-calls in the first
+N bases or with a mean base quality lower than MIN_MEAN_QUALITY across either the first or second read.
+
+Unpaired reads are ignored in this computation.
+The algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes these in the calculation of library size.
+
+Also, since there is no alignment to screen out technical reads one further filter is applied on the data. After examining all reads a Histogram
+is built of [#reads in duplicate set -> #of duplicate sets]; all bins that contain exactly one duplicate set are then removed from the Histogram
+as outliers before library size is estimated.
+
+@dataset_collections@
+
+@description@
+
+ MIN_IDENTICAL_BASES=Integer The minimum number of bases at the starts of reads that must be identical for reads to be
+ grouped together for duplicate detection. In effect total_reads / 4^max_id_bases reads
+ will be compared at a time, so lower numbers will produce more accurate results but
+ consume exponentially more memory and CPU. Default value: 5.
+
+ MAX_DIFF_RATE=Double The maximum rate of differences between two reads to call them identical. Default value:
+ 0.03.
+
+ MIN_MEAN_QUALITY=Integer The minimum mean quality of the bases in a read pair for the read to be analyzed. Reads
+ with lower average quality are filtered out and not considered in any calculations.
+ Default value: 20.
+
+ MAX_GROUP_RATIO=Integer Do not process self-similar groups that are this many times over the mean expected group
+ size. I.e. if the input contains 10m read pairs and MIN_IDENTICAL_BASES is set to 5, then
+ the mean expected group size would be approximately 10 reads. Default value: 500.
+
+ READ_NAME_REGEX=String Regular expression that can be used to parse read names in the incoming SAM file. Read
+ names are parsed to extract three variables: tile/region, x coordinate and y coordinate.
+ These values are used to estimate the rate of optical duplication in order to give a more
+ accurate estimated library size. Set this option to null to disable optical duplicate
+ detection. The regular expression should contain three capture groups for the three
+ variables, in order. It must match the entire read name. Note that if the default regex
+ is specified, a regex match is not actually done, but instead the read name is split on
+ colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be
+ tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements
+ are assumed to be tile, x and y values. Default value:
+ [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.
+
+ OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer
+ The maximum offset between two duplicte clusters in order to consider them optical
+ duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels)
+ unless using later versions of the Illumina pipeline that multiply pixel values by 10, in
+ which case 50-100 is more normal. Default value: 100.
+
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_FastqToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_FastqToSam.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,226 @@
+
+ convert Fastq data into unaligned BAM
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ FastqToSam
+
+ #if str( $input_type.input_type_selector ) == "se":
+ FASTQ="${input_type.fastq}"
+ #elif str( $input_type.input_type_selector ) == "pe":
+ FASTQ="${input_type.fastq}"
+ FASTQ2="${input_type.fastq2}"
+ #else
+ FASTQ="${input_type.fastq.forward}"
+ FASTQ2="${input_type.fastq.reverse}"
+ #end if
+
+ QUALITY_FORMAT="${quality_format}"
+ OUTPUT="${outFile}"
+ READ_GROUP_NAME="${read_group_name}"
+ SAMPLE_NAME="${sample_name}"
+
+ #if str( $library_name ):
+ LIBRARY_NAME="${library_name}"
+ #end if
+
+ #if str( $platform_unit ):
+ PLATFORM_UNIT="${platform_unit}"
+ #end if
+
+ #if str( $platform ):
+ PLATFORM="${platform}"
+ #end if
+
+ #if str( $sequencing_center ):
+ SEQUENCING_CENTER="${sequencing_center}"
+ #end if
+
+ #if str( $predicted_insert_size ):
+ PREDICTED_INSERT_SIZE="${predicted_insert_size}"
+ #end if
+
+ #if str( $comment ):
+ COMMENT="${comment}"
+ #end if
+
+ #if str( $description ):
+ DESCRIPTION="${description}"
+ #end if
+
+ #if str( $run_date ):
+ RUN_DATE="${run_date}"
+ #end if
+
+ MIN_Q="${min_q}"
+ MAX_Q="${max_q}"
+ STRIP_UNPAIRED_MATE_NUMBER="${strip_unpairied_mate_number}"
+ ALLOW_AND_IGNORE_EMPTY_LINES="${allow_and_ignore_empty_lines}"
+
+ SORT_ORDER=coordinate
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments.
+
+@dataset_collections@
+
+@RG@
+
+@description@
+
+ FASTQ=File
+ F1=File Input fastq file for single end data, or first read in paired end
+ data. Required.
+
+ FASTQ2=File
+ F2=File Input fastq file for the second read of paired end data (if used).
+
+ QUALITY_FORMAT=FastqQualityFormat
+ V=FastqQualityFormat A value describing how the quality values are encoded in the fastq. Either Solexa for
+ pre-pipeline 1.3 style scores (solexa scaling + 66), Illumina for pipeline 1.3 and above
+ (phred scaling + 64) or Standard for phred scaled scores with a character shift of 33.
+ If this value is not specified, the quality format will be detected automatically.
+ Default value: null. Possible values: {Solexa, Illumina, Standard}
+
+ READ_GROUP_NAME=String
+ RG=String Read group name Default value: A.
+
+ SAMPLE_NAME=String
+ SM=String Sample name to insert into the read group header Required.
+
+ LIBRARY_NAME=String
+ LB=String The library name to place into the LB attribute in the read group header.
+
+ PLATFORM_UNIT=String
+ PU=String The platform unit (often run_barcode.lane) to insert into the read group header.
+
+ PLATFORM=String
+ PL=String The platform type (e.g. illumina, solid) to insert into the read group header.
+
+ SEQUENCING_CENTER=String
+ CN=String The sequencing center from which the data originated.
+
+ PREDICTED_INSERT_SIZE=Integer
+ PI=Integer Predicted median insert size, to insert into the read group header.
+
+ COMMENT=String
+ CO=String Comment to include in the merged output file's header.
+
+ DESCRIPTION=String
+ DS=String Inserted into the read group header.
+
+ RUN_DATE=Iso8601Date
+ DT=Iso8601Date Date the run was produced, to insert into the read group header.
+
+ MIN_Q=Integer Minimum quality allowed in the input fastq. An exception will be thrown if a quality is
+ less than this value. Default value: 0.
+
+ MAX_Q=Integer Maximum quality allowed in the input fastq. An exception will be thrown if a quality is
+ greater than this value. Default value: 93.
+
+ STRIP_UNPAIRED_MATE_NUMBER=Boolean
+ If true and this is an unpaired fastq any occurance of '/1' will be removed from the end
+ of a read name. Default value: false. Possible values: {true, false}
+
+ ALLOW_AND_IGNORE_EMPTY_LINES=Boolean
+ Allow (and ignore) empty lines Default value: false. Possible values: {true, false}
+
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_FilterSamReads.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_FilterSamReads.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,130 @@
+
+ include or exclude aligned and unaligned reads and read lists
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ ##Sam Sorting is performed here because FilterSamReads requires input to be in query-sorted order
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ SortSam
+ INPUT="${inputFile}"
+ OUTPUT=query_sorted_bam.bam
+ SORT_ORDER=queryname
+ VALIDATION_STRINGENCY=LENIENT
+ QUIET=true
+ VERBOSITY=ERROR
+
+ &&
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ FilterSamReads
+ INPUT=query_sorted_bam.bam
+ FILTER="${filter_type.filter}"
+
+ #if ( str( $filter_type.filter ) == "includeReadList" or str( $filter_type.filter ) == "excludeReadList" ):
+ READ_LIST_FILE="${filter_type.read_list_file}"
+ #end if
+
+ OUTPUT="${outFile}"
+ SORT_ORDER=coordinate
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments.
+
+------
+
+.. class:: warningmark
+
+**Warning on using this tool on BWA-MEM output**
+
+This tool will likely fail on BAM datasets generated by BWA MEM as it generates partial read alignemnts.
+
+@dataset_collections@
+
+@description@
+
+ FILTER=Filter Filter. Required. Possible values:
+ includeAligned [OUTPUT SAM/BAM will contain aligned
+ reads only. (Note that *both* first and
+ second of paired reads must be aligned to be included
+ in the OUTPUT SAM or BAM)],
+
+ excludeAligned [OUTPUT SAM/BAM will contain un-mapped reads only.
+ (Note that *both* first and second of pair must be aligned to be
+ excluded from the OUTPUT SAM or BAM)]
+
+ includeReadList [OUTPUT SAM/BAM will contain reads
+ that are supplied in the READ_LIST_FILE file]
+
+ excludeReadList [OUTPUT bam will contain
+ reads that are *not* supplied in the READ_LIST_FILE file]}
+
+ READ_LIST_FILE=File
+ RLF=File Read List File containing reads that will be included or excluded from the OUTPUT SAM or
+ BAM file. Default value: null.
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_FixMateInformation.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_FixMateInformation.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,81 @@
+
+ ensure that all mate-pair information is in sync between each read and it's mate pair
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ FixMateInformation
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ ASSUME_SORTED=${assume_sorted}
+ ADD_MATE_CIGAR=${add_mate_cigar}
+
+ SORT_ORDER=coordinate
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Ensure that all mate-pair information is in sync between each read and it's mate pair. Reads marked with the secondary alignment flag are written to the output file unchanged.
+
+------
+
+.. class:: warningmark
+
+**Warning on using ASSUME_SORTED option**
+
+Datasets imported into Galaxy are automatically coordinate sorted. So use this option (set it to True) only if you are sure that this is necessary. If you are not sure - a good rule of thumb
+is to assume that the BAM you are working with is coordinate sorted.
+
+@dataset_collections@
+
+@description@
+
+ ASSUME_SORTED=Boolean
+ AS=Boolean If true, assume that the input file is queryname sorted, even if the header says
+ otherwise. Default value: false.
+
+ ADD_MATE_CIGAR=Boolean
+ MC=Boolean Adds the mate CIGAR tag (MC) if true, does not if false. Default value: true.
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_MarkDuplicates.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_MarkDuplicates.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,127 @@
+
+ examine aligned records in BAM datasets to locate duplicate molecules
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ MarkDuplicates
+
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+
+ METRICS_FILE="${metrics_file}"
+ #for $element in $comments:
+ COMMENT="${element.comment}"
+ #end for
+ REMOVE_DUPLICATES="${remove_duplicates}"
+ ASSUME_SORTED="${assume_sorted}"
+
+ DUPLICATE_SCORING_STRATEGY="${duplicate_scoring_strategy}"
+
+ #import pipes
+ READ_NAME_REGEX=${ pipes.quote( str( $read_name_regex ) ) or "''" }
+ OPTICAL_DUPLICATE_PIXEL_DISTANCE="${optical_duplicate_pixel_distance}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Examines aligned records in the supplied SAM or BAM dataset to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged.
+
+@dataset_collections@
+
+@description@
+
+ COMMENT=String
+ CO=String Comment(s) to include in the output file's header. This option may be specified 0 or
+ more times.
+
+ REMOVE_DUPLICATES=Boolean If true do not write duplicates to the output file instead of writing them with
+ appropriate flags set. Default value: false.
+
+ READ_NAME_REGEX=String Regular expression that can be used to parse read names in the incoming SAM file. Read
+ names are parsed to extract three variables: tile/region, x coordinate and y coordinate.
+ These values are used to estimate the rate of optical duplication in order to give a more
+ accurate estimated library size. Set this option to null to disable optical duplicate
+ detection. The regular expression should contain three capture groups for the three
+ variables, in order. It must match the entire read name. Note that if the default regex
+ is specified, a regex match is not actually done, but instead the read name is split on
+ colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be
+ tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements
+ are assumed to be tile, x and y values. Default value:
+ [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.
+
+ DUPLICATE_SCORING_STRATEGY=ScoringStrategy
+ DS=ScoringStrategy The scoring strategy for choosing the non-duplicate among candidates. Default value:
+ SUM_OF_BASE_QUALITIES. Possible values: {SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH}
+
+ OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer
+ The maximum offset between two duplicte clusters in order to consider them optical
+ duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels)
+ unless using later versions of the Illumina pipeline that multiply pixel values by 10, in
+ which case 50-100 is more normal. Default value: 100.
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_MarkDuplicatesWithMateCigar.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_MarkDuplicatesWithMateCigar.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,160 @@
+
+ examine aligned records in BAM datasets to locate duplicate molecules
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ MarkDuplicatesWithMateCigar
+
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+
+ METRICS_FILE="${metrics_file}"
+ COMMENT="${comment}"
+
+ MINIMUM_DISTANCE="${minimum_distance}"
+ SKIP_PAIRS_WITH_NO_MATE_CIGAR="${skip_pairs_with_no_mate_cigar}"
+
+
+ REMOVE_DUPLICATES="${remove_duplicates}"
+ ASSUME_SORTED="${assume_sorted}"
+
+ DUPLICATE_SCORING_STRATEGY="${duplicate_scoring_strategy}"
+
+ #import pipes
+ READ_NAME_REGEX=${ pipes.quote( str( $read_name_regex ) ) or "''" }
+ OPTICAL_DUPLICATE_PIXEL_DISTANCE="${optical_duplicate_pixel_distance}"
+
+
+ BLOCK_SIZE=100000
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Examines aligned records in the supplied SAM or BAM dataset to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged.
+
+------
+
+.. class:: warningmark
+
+On the difference between **MarkDuplicates** and **picard_MarkDuplicatesWithMateCigar**
+
+From Samtools Announce MailingList_:
+
+This tool can replace MarkDuplicates if the input SAM/BAM has Mate CIGAR (MC) optional tags pre-computed
+(see the tools RevertOriginalBaseQualitiesAndAddMateCigar and FixMateInformation). This allows the new tool
+to perform a streaming duplicate marking routine (i.e. a single-pass). This tool cannot be used with
+alignments that have large gaps or reference skips, which happens frequently in RNA-seq data.
+
+.. _MailingList: http://sourceforge.net/p/samtools/mailman/message/32910359/
+
+@dataset_collections@
+
+@description@
+
+ MINIMUM_DISTANCE=Integer The minimum distance to buffer records to account for clipping on the 5' end of the
+ records.Set this number to -1 to use twice the first read's read length (or 100,
+ whichever is smaller). Default value: -1. This option can be set to 'null' to clear the
+ default value.
+
+ SKIP_PAIRS_WITH_NO_MATE_CIGAR=Boolean
+ Skip record pairs with no mate cigar and include them in the output. Default value:
+ true. This option can be set to 'null' to clear the default value. Possible values:
+ {true, false}
+
+ COMMENT=String
+ CO=String Comment(s) to include in the output file's header. This option may be specified 0 or
+ more times.
+
+ REMOVE_DUPLICATES=Boolean If true do not write duplicates to the output file instead of writing them with
+ appropriate flags set. Default value: false.
+
+ READ_NAME_REGEX=String Regular expression that can be used to parse read names in the incoming SAM file. Read
+ names are parsed to extract three variables: tile/region, x coordinate and y coordinate.
+ These values are used to estimate the rate of optical duplication in order to give a more
+ accurate estimated library size. Set this option to null to disable optical duplicate
+ detection. The regular expression should contain three capture groups for the three
+ variables, in order. It must match the entire read name. Note that if the default regex
+ is specified, a regex match is not actually done, but instead the read name is split on
+ colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be
+ tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements
+ are assumed to be tile, x and y values. Default value:
+ [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.
+
+ DUPLICATE_SCORING_STRATEGY=ScoringStrategy
+ DS=ScoringStrategy The scoring strategy for choosing the non-duplicate among candidates. Default value:
+ TOTAL_MAPPED_REFERENCE_LENGTH. Possible values: {SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH}
+
+ OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer
+ The maximum offset between two duplicte clusters in order to consider them optical
+ duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels)
+ unless using later versions of the Illumina pipeline that multiply pixel values by 10, in
+ which case 50-100 is more normal. Default value: 100.
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_MeanQualityByCycle.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_MeanQualityByCycle.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,111 @@
+
+ chart distribution of base qualities
+
+ picard_macros.xml
+
+
+ R
+
+
+ @java_options@
+ ##set up input files
+
+ #set $reference_fasta_filename = "localref.fa"
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ MeanQualityByCycle
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ CHART_OUTPUT="${pdfFile}"
+ REFERENCE_SEQUENCE="${reference_fasta_filename}"
+ ALIGNED_READS_ONLY="${aligned_reads_only}"
+ PF_READS_ONLY="${pf_reads_only}"
+
+ ASSUME_SORTED="${assume_sorted}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Program to chart the distribution of base qualities by cycle within reads supplied in a SAM or BAM dataset.
+
+@dataset_collections@
+
+@description@
+
+ ALIGNED_READS_ONLY=Boolean If set to true, calculate the base distribution over aligned reads only. Default value:
+ false. Possible values: {true, false}
+
+ PF_READS_ONLY=Boolean If set to true calculate the base distribution over PF reads only. Default value: false.
+ This option can be set to 'null' to clear the default value. Possible values: {true,
+ false}
+
+ ASSUME_SORTED=Boolean
+ AS=Boolean If true (default), then the sort order in the header file will be ignored. Default: True
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_MergeBamAlignment.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_MergeBamAlignment.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,327 @@
+
+ merge alignment data with additional info stored in an unmapped BAM dataset
+
+ picard_macros.xml
+
+
+
+ @java_options@
+ #set $picard_dict = "localref.dict"
+ #set $ref_fasta = "localref.fa" ## This is done because picards "likes" .fa extension
+
+ ln -s "${reference_source.ref_file}" "${ref_fasta}" &&
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+
+ java -jar \$JAVA_JAR_PATH/picard.jar CreateSequenceDictionary REFERENCE="${ref_fasta}" OUTPUT="${picard_dict}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+ &&
+
+ #else:
+
+ #set $ref_fasta = str( $reference_source.ref_file.fields.path )
+
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ MergeBamAlignment
+ UNMAPPED_BAM="${unmapped_bam}"
+
+ PAIRED_RUN=true ##This argument is ignored and will be removed. Required. Possible values: {true, false}
+
+ #if str( $aligned_or_read1_and_read2.aligned_or_read1_and_read2_selector ) == "paired_one_file":
+ #for $dataset in $aligned_or_read1_and_read2.aligned_bams:
+ ALIGNED_BAM="${dataset.aligned_bam}"
+ #end for
+ #elif str( $aligned_or_read1_and_read2.aligned_or_read1_and_read2_selector ) == "paired_two_files":
+ #for $dataset in $aligned_or_read1_and_read2.read1_aligned_bams:
+ READ1_ALIGNED_BAM="${dataset.read1_aligned_bam}"
+ #end for
+ #for $dataset in $aligned_or_read1_and_read2.read2_aligned_bams:
+ READ2_ALIGNED_BAM="${dataset.read2_aligned_bam}"
+ #end for
+ #else
+ #for $dataset in $aligned_or_read1_and_read2.read1_aligned_bams:
+ READ1_ALIGNED_BAM="${dataset.read1_aligned_bam}"
+ #end for
+ #end if
+
+ OUTPUT="${outFile}"
+ REFERENCE_SEQUENCE="${ref_fasta}"
+
+ CLIP_ADAPTERS="${clip_adapters}"
+ IS_BISULFITE_SEQUENCE="${is_bisulfite_sequence}"
+ ALIGNED_READS_ONLY="${aligned_reads_only}"
+ MAX_INSERTIONS_OR_DELETIONS="${max_insertions_or_deletions}"
+
+ #for $attribute in $attributes_to_retain:
+ ATTRIBUTES_TO_RETAIN="${$attribute.attribute}"
+ #end for
+
+ #for $attribute in $attributes_to_remove:
+ ATTRIBUTES_TO_REMOVE="${$attribute.attribute}"
+ #end for
+
+ READ1_TRIM="${read1_trim}"
+ READ2_TRIM="${read2_trim}"
+
+ #if str( $orientations ) != "None":
+ #for $orientation in str( $orientations ).split(','): ## See trello card https://trello.com/c/9nW02Zhd
+ EXPECTED_ORIENTATIONS="${orientation}"
+ #end for
+ #end if
+
+ ALIGNER_PROPER_PAIR_FLAGS="${aligner_proper_pair_flags}"
+ PRIMARY_ALIGNMENT_STRATEGY="${primary_alignment_strategy}"
+ CLIP_OVERLAPPING_READS="${clip_overlapping_reads}"
+ INCLUDE_SECONDARY_ALIGNMENTS="${include_secondary_alignments}"
+ ADD_MATE_CIGAR="${add_mate_cigar}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+
+ SORT_ORDER=coordinate
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Merges alignment data from a SAM or BAM dataset with additional data stored in an unmapped BAM dataset and produces a third SAM or BAM dataset of aligned and unaligned reads.
+
+@dataset_collections@
+
+@description@
+
+ UNMAPPED_BAM=File
+ UNMAPPED=File Original SAM or BAM file of unmapped reads, which must be in queryname order. Required.
+
+ ALIGNED_BAM=File
+ ALIGNED=File SAM or BAM file(s) with alignment data. This option may be specified 0 or more times.
+ Cannot be used in conjuction with option(s) READ1_ALIGNED_BAM (R1_ALIGNED)
+ READ2_ALIGNED_BAM (R2_ALIGNED)
+
+ READ1_ALIGNED_BAM=File
+ R1_ALIGNED=File SAM or BAM file(s) with alignment data from the first read of a pair. This option may be
+ specified 0 or more times. Cannot be used in conjuction with option(s) ALIGNED_BAM
+ (ALIGNED)
+
+ READ2_ALIGNED_BAM=File
+ R2_ALIGNED=File SAM or BAM file(s) with alignment data from the second read of a pair. This option may
+ be specified 0 or more times. Cannot be used in conjuction with option(s) ALIGNED_BAM
+ (ALIGNED)
+
+ PAIRED_RUN=Boolean
+ PE=Boolean This argument is ignored and will be removed. Required. Possible values: {true, false}
+
+ JUMP_SIZE=Integer
+ JUMP=Integer The expected jump size (required if this is a jumping library). Deprecated. Use
+ EXPECTED_ORIENTATIONS instead Default value: null. Cannot be used in conjuction with
+ option(s) EXPECTED_ORIENTATIONS (ORIENTATIONS)
+
+ CLIP_ADAPTERS=Boolean Whether to clip adapters where identified. Default value: true. Possible values: {true, false}
+
+ IS_BISULFITE_SEQUENCE=Boolean Whether the lane is bisulfite sequence (used when caculating the NM tag). Default value:
+ false. Possible values: {true, false}
+
+ ALIGNED_READS_ONLY=Boolean Whether to output only aligned reads. Default value: false. Possible values: {true, false}
+
+ MAX_INSERTIONS_OR_DELETIONS=Integer
+ MAX_GAPS=Integer The maximum number of insertions or deletions permitted for an alignment to be included.
+ Alignments with more than this many insertions or deletions will be ignored. Set to -1 to
+ allow any number of insertions or deletions. Default value: 1.
+
+ ATTRIBUTES_TO_RETAIN=String Reserved alignment attributes (tags starting with X, Y, or Z) that should be brought over
+ from the alignment data when merging. This option may be specified 0 or more times.
+
+ ATTRIBUTES_TO_REMOVE=String Attributes from the alignment record that should be removed when merging. This overrides
+ ATTRIBUTES_TO_RETAIN if they share common tags. This option may be specified 0 or more
+ times.
+
+ READ1_TRIM=Integer
+ R1_TRIM=Integer The number of bases trimmed from the beginning of read 1 prior to alignment Default
+ value: 0.
+
+ READ2_TRIM=Integer
+ R2_TRIM=Integer The number of bases trimmed from the beginning of read 2 prior to alignment Default
+ value: 0.
+
+ EXPECTED_ORIENTATIONS=PairOrientation
+ ORIENTATIONS=PairOrientation The expected orientation of proper read pairs. Replaces JUMP_SIZE Possible values: {FR,
+ RF, TANDEM} This option may be specified 0 or more times. Cannot be used in conjuction
+ with option(s) JUMP_SIZE (JUMP)
+
+ ALIGNER_PROPER_PAIR_FLAGS=Boolean
+ Use the aligner's idea of what a proper pair is rather than computing in this program.
+ Default value: false. Possible values: {true, false}
+
+ SORT_ORDER=SortOrder
+ SO=SortOrder The order in which the merged reads should be output. Default value: coordinate.
+ Possible values: {unsorted, queryname, coordinate}
+
+ PRIMARY_ALIGNMENT_STRATEGY=PrimaryAlignmentStrategy
+ Strategy for selecting primary alignment when the aligner has provided more than one
+ alignment for a pair or fragment, and none are marked as primary, more than one is marked
+ as primary, or the primary alignment is filtered out for some reason. BestMapq expects
+ that multiple alignments will be correlated with HI tag, and prefers the pair of
+ alignments with the largest MAPQ, in the absence of a primary selected by the aligner.
+ EarliestFragment prefers the alignment which maps the earliest base in the read. Note
+ that EarliestFragment may not be used for paired reads. BestEndMapq is appropriate for
+ cases in which the aligner is not pair-aware, and does not output the HI tag. It simply
+ picks the alignment for each end with the highest MAPQ, and makes those alignments
+ primary, regardless of whether the two alignments make sense together.MostDistant is also
+ for a non-pair-aware aligner, and picks the alignment pair with the largest insert size.
+ If all alignments would be chimeric, it picks the alignments for each end with the best
+ MAPQ. For all algorithms, ties are resolved arbitrarily. Default value: BestMapq.
+ Possible values: {BestMapq, EarliestFragment, BestEndMapq, MostDistant}
+
+ CLIP_OVERLAPPING_READS=BooleanFor paired reads, soft clip the 3' end of each read if necessary so that it does not
+ extend past the 5' end of its mate. Default value: true. Possible values: {true, false}
+
+ INCLUDE_SECONDARY_ALIGNMENTS=Boolean
+ If false, do not write secondary alignments to output. Default value: true.
+ Possible values: {true, false}
+
+ ADD_MATE_CIGAR=Boolean
+ MC=Boolean Adds the mate CIGAR tag (MC) if true, does not if false. Possible values: {true, false}
+
+
+
+
+@more_info@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_MergeSamFiles.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_MergeSamFiles.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,92 @@
+
+ merges multiple SAM/BAM datasets into one
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ MergeSamFiles
+
+ #for $element in $inputFile:
+ INPUT="${element}"
+ #end for
+
+ OUTPUT="${outFile}"
+ MERGE_SEQUENCE_DICTIONARIES="${merge_sequence_dictionaries}"
+
+ ASSUME_SORTED="${assume_sorted}"
+ #for $element in $comments:
+ COMMENT="${element.comment}"
+ #end for
+
+ USE_THREADING=true
+ SORT_ORDER=coordinate
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Merges multiple SAM/BAM datasets into one.
+
+@dataset_collections@
+
+@description@
+
+ ASSUME_SORTED=Boolean
+ AS=Boolean If true, assume that the input files are in the same sort order as the requested output
+ sort order, even if their headers say otherwise. Default value: false. This option can
+ be set to 'null' to clear the default value. Possible values: {true, false}
+
+ MERGE_SEQUENCE_DICTIONARIES=Boolean
+ MSD=Boolean Merge the sequence dictionaries Default value: false. This option can be set to 'null'
+ to clear the default value. Possible values: {true, false}
+
+ COMMENT=String
+ CO=String Comment(s) to include in the merged output file's header. This option may be specified 0
+ or more times.
+
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_NormalizeFasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_NormalizeFasta.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,70 @@
+
+ normalize fasta datasets
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+
+ ## Two lines below are due to the fact that picard likes fasta files to have extension .fa
+ #set $fasta_file="local_fasta.fa"
+ ln -s "${inputFile}" "${fasta_file}" &&
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ NormalizeFasta
+
+ INPUT="${fasta_file}"
+ OUTPUT="${outFile}"
+ LINE_LENGTH="${line_length}"
+ TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE="${truncate_sequence_names_at_whitespaces}"
+
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Takes any dataset that conforms to the fasta format and normalizes it so that all lines of sequence except the last line per named sequence are of the same length.
+
+@dataset_collections@
+
+@description@
+
+ LINE_LENGTH=Integer The line length to be used for the output fasta file. Default value: 100.
+
+ TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE=Boolean
+ Truncate sequence names at first whitespace. Default value: false. Possible values: {true, false}
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_QualityScoreDistribution.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_QualityScoreDistribution.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,116 @@
+
+ chart quality score distribution
+
+ picard_macros.xml
+
+
+ R
+
+
+ @java_options@
+ ##set up input files
+
+ #set $reference_fasta_filename = "localref.fa"
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ QualityScoreDistribution
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ CHART_OUTPUT="${pdfFile}"
+ REFERENCE_SEQUENCE="${reference_fasta_filename}"
+ ALIGNED_READS_ONLY="${aligned_reads_only}"
+ PF_READS_ONLY="${pf_reads_only}"
+ INCLUDE_NO_CALLS="${include_no_calls}"
+
+ ASSUME_SORTED="${assume_sorted}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+Program to chart quality score distributions in a SAM or BAM dataset.
+
+@dataset_collections@
+
+@description@
+
+ ALIGNED_READS_ONLY=Boolean If set to true, calculate the base distribution over aligned reads only. Default value:
+ false. Possible values: {true, false}
+
+ PF_READS_ONLY=Boolean If set to true calculate the base distribution over PF reads only. Default value: false.
+ Possible values: {true, false}
+
+ INCLUDE_NO_CALLS=Boolean If set to true, include quality for no-call bases in the distribution. Default value:
+ false. Possible values: {true, false}
+
+ ASSUME_SORTED=Boolean
+ AS=Boolean If true (default), then the sort order in the header file will be ignored. Default: True
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_ReorderSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_ReorderSam.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,130 @@
+
+ reorder reads to match ordering in reference sequences
+
+ picard_macros.xml
+
+
+
+ @java_options@
+ #set $picard_dict = "localref.dict"
+ #set $ref_fasta = "localref.fa" ## This is done because picards "likes" .fa extension
+
+ ln -s "${reference_source.ref_file}" "${ref_fasta}" &&
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+
+ java -jar \$JAVA_JAR_PATH/picard.jar CreateSequenceDictionary REFERENCE="${ref_fasta}" OUTPUT="${picard_dict}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+ &&
+
+ #else:
+
+ #set $ref_fasta = str( $reference_source.ref_file.fields.path )
+
+ #end if
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ ReorderSam
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ REFERENCE="${ref_fasta}"
+ ALLOW_INCOMPLETE_DICT_CONCORDANCE="${allow_incomplete_dict_concordance}"
+ ALLOW_CONTIG_LENGTH_DISCORDANCE="${allow_contig_length_discordance}"
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Purpose**
+
+ReorderSam reorders reads in a SAM/BAM file to match the contig ordering in a provided reference file, as determined by exact name matching of contigs. Reads mapped to contigs absent in the new reference are dropped.
+
+@dataset_collections@
+
+----
+
+.. class:: warningmark
+
+Not to be confused with **SortSam**.
+
+@description@
+
+ ALLOW_INCOMPLETE_DICT_CONCORDANCE=Boolean
+ S=Boolean If true, then allows only a partial overlap of the BAM contigs with the new reference
+ sequence contigs. By default, this tool requires a corresponding contig in the new
+ reference for each read contig Default value: false. Possible values: {true, false}
+
+ ALLOW_CONTIG_LENGTH_DISCORDANCE=Boolean
+ U=Boolean If true, then permits mapping from a read contig to a new reference contig with the same
+ name but a different length. Highly dangerous, only use if you know what you are doing.
+ Default value: false. Possible values: {true, false}
+
+@more_info@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_ReplaceSamHeader.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_ReplaceSamHeader.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,63 @@
+
+ replace header in a SAM/BAM dataset
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+
+ ## Two lines below are due to the fact that picard likes fasta files to have extension .fa
+ #set $fasta_file="local_fasta.fa"
+ ln -s "${inputFile}" "${fasta_file}" &&
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ ReplaceSamHeader
+
+ INPUT="${inputFile}"
+ HEADER="${header}"
+ OUTPUT="${outFile}"
+
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Replace the SAMFileHeader in a SAM/BAM dataset with the given header. Validation is minimal. It is up to the user to ensure that all the elements referred to in the SAMRecords are present in the new header. Sort order of the two input datasets must be the same.
+@dataset_collections@
+
+@description@
+
+ HEADER=File SAM file from which SAMFileHeader will be read. Required.
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_RevertOriginalBaseQualitiesAndAddMateCigar.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_RevertOriginalBaseQualitiesAndAddMateCigar.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,75 @@
+
+ revert the original base qualities and add the mate cigar tag
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ RevertOriginalBaseQualitiesAndAddMateCigar
+
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+
+ RESTORE_ORIGINAL_QUALITIES="${restore_original_qualities}"
+ MAX_RECORDS_TO_EXAMINE="${max_records_to_examine}"
+
+ SORT_ORDER=coordinate
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Reverts the original base qualities and adds the mate cigar tag to SAM or BAMs.
+
+@dataset_collections@
+
+@description@
+
+ RESTORE_ORIGINAL_QUALITIES=Boolean
+ OQ=Boolean True to restore original qualities from the OQ field to the QUAL field if available.
+ Default value: true. Possible values: {true, false}
+
+ MAX_RECORDS_TO_EXAMINE=IntegerThe maximum number of records to examine to determine if we can exit early and not
+ output, given that there are a no original base qualities (if we are to restore) and mate
+ cigars exist. Set to 0 to never skip the file. Default value: 10000.
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_RevertSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_RevertSam.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,136 @@
+
+ revert SAM/BAM datasets to a previous state
+
+ picard_macros.xml
+
+
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ RevertSam
+
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+
+ RESTORE_ORIGINAL_QUALITIES="${restore_original_qualities}"
+ REMOVE_DUPLICATE_INFORMATION="${remove_duplicate_information}"
+ REMOVE_ALIGNMENT_INFORMATION="${remove_alignment_information}"
+
+ #for $attribute_to_clear in $attributes_to_clear:
+ ATTRIBUTE_TO_CLEAR="${attribute_to_clear.attribute}"
+ #end for
+
+ SANITIZE="${sanitize}"
+ MAX_DISCARD_FRACTION="${max_discard_fraction}"
+ SAMPLE_ALIAS="${sample_alias}"
+ LIBRARY_NAME="${library_name}"
+
+ SORT_ORDER="${sort_order}"
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Reverts SAM or BAM files to a previous state by removing certain types of information and/or substituting in the original quality scores when available.
+
+@dataset_collections@
+
+@description@
+
+ SORT_ORDER=SortOrder
+ SO=SortOrder The sort order to create the reverted output file with. Default value: queryname.
+ Possible values: {unsorted, queryname, coordinate}
+
+ RESTORE_ORIGINAL_QUALITIES=Boolean
+ OQ=Boolean True to restore original qualities from the OQ field to the QUAL field if available.
+ Default value: true. Possible values: {true, false}
+
+ REMOVE_DUPLICATE_INFORMATION=Boolean
+ Remove duplicate read flags from all reads. Note that if this is true and
+ REMOVE_ALIGNMENT_INFORMATION==false, the output may have the unusual but sometimes
+ desirable trait of having unmapped reads that are marked as duplicates. Default value:
+ true. Possible values: {true, false}
+
+ REMOVE_ALIGNMENT_INFORMATION=Boolean
+ Remove all alignment information from the file. Default value: true. TPossible values: {true, false}
+
+ ATTRIBUTE_TO_CLEAR=String When removing alignment information, the set of optional tags to remove. This option may
+ be specified 0 or more times.
+
+ SANITIZE=Boolean WARNING: This option is potentially destructive. If enabled will discard reads in order
+ to produce a consistent output BAM. Reads discarded include (but are not limited to)
+ paired reads with missing mates, duplicated records, records with mismatches in length of
+ bases and qualities. This option can only be enabled if the output sort order is
+ queryname and will always cause sorting to occur. Possible values: {true, false}
+
+ MAX_DISCARD_FRACTION=Double If SANITIZE=true and higher than MAX_DISCARD_FRACTION reads are discarded due to
+ sanitization thenthe program will exit with an Exception instead of exiting cleanly.
+ Output BAM will still be valid. Default value: 0.01.
+
+ SAMPLE_ALIAS=String
+ ALIAS=String The sample alias to use in the reverted output file. This will override the existing
+ sample alias in the file and is used only if all the read groups in the input file have
+ the same sample alias Default value: null.
+
+ LIBRARY_NAME=String
+ LIB=String The library name to use in the reverted output file. This will override the existing
+ sample alias in the file and is used only if all the read groups in the input file have
+ the same sample alias Default value: null.
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_SamToFastq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_SamToFastq.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,196 @@
+
+ extract reads and qualities from SAM/BAM dataset and convert to fastq
+
+ picard_macros.xml
+
+
+
+
+ echo "BAM" > $report && ## This is necessary for output dataset detection (see output tags below)
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ SamToFastq
+
+ INPUT="${inputFile}"
+
+ #if str( $output_per_rg ) == "true":
+ OUTPUT_PER_RG=true
+ OUTPUT_DIR=.
+ #elif str( $output_per_rg ) == "false" and str( $interleave ) == "false":
+ FASTQ=READ1.fastq
+ SECOND_END_FASTQ=READ2.fastq
+ UNPAIRED_FASTQ=UNPAIRED_READS.fastq
+ #elif str( $output_per_rg ) == "false" and str( $interleave ) == "true":
+ FASTQ=INTERLEAVED.fastq
+ #end if
+
+ RE_REVERSE="${re_reverse}"
+ INTERLEAVE="${interleave}"
+ INCLUDE_NON_PF_READS="${include_non_pf_reads}"
+ CLIPPING_ATTRIBUTE="${clipping_attribute}"
+ CLIPPING_ACTION="${clipping_action}"
+ READ1_TRIM="${read1_trim}"
+
+ #if int($read1_max_bases_to_write) > -1:
+ READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}"
+ #end if
+
+ READ2_TRIM="${read2_trim}"
+
+ #if int($read2_max_bases_to_write) > -1:
+ READ2_MAX_BASES_TO_WRITE="${read2_max_bases_to_write}"
+ #end if
+
+ INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}"
+
+
+ VALIDATION_STRINGENCY="${validation_stringency}"
+ QUIET=true
+ VERBOSITY=ERROR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Extracts read sequences and qualities from the input SAM/BAM dataset and outputs them in Sanger fastq format. In the RE_REVERSE=True mode (default behavior), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM.BAM dataset will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer.
+
+-----
+
+.. class:: warningmark
+
+**DANGER: Multiple Outputs**
+
+Generating per readgroup fastq (setting **OUTPUT_PER_RG** to True) may produce very large numbers of outputs. Know what you are doing!
+
+@dataset_collections@
+
+@description@
+
+ FASTQ=File
+ F=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq).
+ Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
+
+ SECOND_END_FASTQ=File
+ F2=File Output fastq file (if paired, second end of the pair fastq). Default value: null.
+ Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
+
+ UNPAIRED_FASTQ=File
+ FU=File Output fastq file for unpaired reads; may only be provided in paired-fastq mode Default
+ value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
+
+ OUTPUT_PER_RG=Boolean
+ OPRG=Boolean Output a fastq file per read group (two fastq files per read group if the group is
+ paired). Default value: false. Possible values: {true, false} Cannot be used in
+ conjuction with option(s) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) FASTQ (F)
+
+ OUTPUT_DIR=File
+ ODIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true.
+ Default value: null.
+
+ RE_REVERSE=Boolean
+ RC=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them
+ to fastq Default value: true. Possible values: {true, false}
+
+ INTERLEAVE=Boolean
+ INTER=Boolean Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe
+ which end it came from Default value: false. Possible values: {true, false}
+
+ INCLUDE_NON_PF_READS=Boolean
+ NON_PF=Boolean Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes
+ filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads.
+ Default value: false. Possible values: {true, false}
+
+ CLIPPING_ATTRIBUTE=String
+ CLIP_ATTR=String The attribute that stores the position at which the SAM record should be clipped Default
+ value: null.
+
+ CLIPPING_ACTION=String
+ CLIP_ACT=String The action that should be taken with clipped reads: 'X' means the reads and qualities
+ should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in
+ the clipped region; and any integer means that the base qualities should be set to that
+ value in the clipped region. Default value: null.
+
+ READ1_TRIM=Integer
+ R1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0.
+
+ READ1_MAX_BASES_TO_WRITE=Integer
+ R1_MAX_BASES=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than
+ this many bases left after trimming, all will be written. If this value is null then all
+ bases left after trimming will be written. Default value: null.
+
+ READ2_TRIM=Integer
+ R2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0.
+
+ READ2_MAX_BASES_TO_WRITE=Integer
+ R2_MAX_BASES=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than
+ this many bases left after trimming, all will be written. If this value is null then all
+ bases left after trimming will be written. Default value: null.
+
+ INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean
+ If true, include non-primary alignments in the output. Support of non-primary alignments
+ in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and
+ there are paired reads with non-primary alignments. Default value: false.
+ Possible values: {true, false}
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/picard_ValidateSamFile.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/picard_ValidateSamFile.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,226 @@
+
+ assess validity of SAM/BAM dataset
+
+ picard_macros.xml
+
+
+
+
+ ##set the maximum number of open file to hard maximum or 4096 if on a mac (mac gives 'unlimited' as output of `ulimit -Hn` command
+
+ [ `ulimit -Hn` = unlimited ] && ulimit -Sn 4096 || ulimit -Sn `ulimit -Hn`
+
+ &&
+
+ ##set up input files
+
+ #set $reference_fasta_filename = "localref.fa"
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+
+ @java_options@
+
+ java -jar \$JAVA_JAR_PATH/picard.jar
+ ValidateSamFile
+
+ INPUT="${inputFile}"
+ OUTPUT="${outFile}"
+ MODE="${mode}"
+
+ #if str( $ignore ) != "None":
+ #for $element in str( $ignore ).split(','): ## See trello card https://trello.com/c/9nW02Zhd
+ IGNORE="${element}"
+ #end for
+ #end if
+
+ MAX_OUTPUT="${max_output}"
+ REFERENCE_SEQUENCE="${reference_fasta_filename}"
+ IGNORE_WARNINGS="${ignore_warnings}"
+ IS_BISULFITE_SEQUENCED="${is_bisulfite_sequenced}"
+ MAX_OPEN_TEMP_FILES=`ulimit -Sn`
+
+ VERBOSITY=ERROR
+ QUIET=true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Purpose**
+
+Reads a SAM/BAM dataset and report on its validity.
+
+@dataset_collections@
+
+@description@
+
+ MODE=Mode
+ M=Mode Mode of output Default value: VERBOSE. This option can be set to 'null' to clear the
+ default value. Possible values: {VERBOSE, SUMMARY}
+
+ IGNORE=Type List of validation error types to ignore. Possible values: {INVALID_QUALITY_FORMAT,
+ INVALID_FLAG_PROPER_PAIR, INVALID_FLAG_MATE_UNMAPPED, MISMATCH_FLAG_MATE_UNMAPPED,
+ INVALID_FLAG_MATE_NEG_STRAND, MISMATCH_FLAG_MATE_NEG_STRAND, INVALID_FLAG_FIRST_OF_PAIR,
+ INVALID_FLAG_SECOND_OF_PAIR, PAIRED_READ_NOT_MARKED_AS_FIRST_OR_SECOND,
+ INVALID_FLAG_NOT_PRIM_ALIGNMENT, INVALID_FLAG_SUPPLEMENTARY_ALIGNMENT,
+ INVALID_FLAG_READ_UNMAPPED, INVALID_INSERT_SIZE, INVALID_MAPPING_QUALITY, INVALID_CIGAR,
+ ADJACENT_INDEL_IN_CIGAR, INVALID_MATE_REF_INDEX, MISMATCH_MATE_REF_INDEX,
+ INVALID_REFERENCE_INDEX, INVALID_ALIGNMENT_START, MISMATCH_MATE_ALIGNMENT_START,
+ MATE_FIELD_MISMATCH, INVALID_TAG_NM, MISSING_TAG_NM, MISSING_HEADER,
+ MISSING_SEQUENCE_DICTIONARY, MISSING_READ_GROUP, RECORD_OUT_OF_ORDER,
+ READ_GROUP_NOT_FOUND, RECORD_MISSING_READ_GROUP, INVALID_INDEXING_BIN,
+ MISSING_VERSION_NUMBER, INVALID_VERSION_NUMBER, TRUNCATED_FILE,
+ MISMATCH_READ_LENGTH_AND_QUALS_LENGTH, EMPTY_READ, CIGAR_MAPS_OFF_REFERENCE,
+ MISMATCH_READ_LENGTH_AND_E2_LENGTH, MISMATCH_READ_LENGTH_AND_U2_LENGTH,
+ E2_BASE_EQUALS_PRIMARY_BASE, BAM_FILE_MISSING_TERMINATOR_BLOCK, UNRECOGNIZED_HEADER_TYPE,
+ POORLY_FORMATTED_HEADER_TAG, HEADER_TAG_MULTIPLY_DEFINED,
+ HEADER_RECORD_MISSING_REQUIRED_TAG, INVALID_DATE_STRING, TAG_VALUE_TOO_LARGE,
+ INVALID_INDEX_FILE_POINTER, INVALID_PREDICTED_MEDIAN_INSERT_SIZE,
+ DUPLICATE_READ_GROUP_ID, MISSING_PLATFORM_VALUE, INVALID_PLATFORM_VALUE,
+ DUPLICATE_PROGRAM_GROUP_ID, MATE_NOT_FOUND, MATES_ARE_SAME_END,
+ MISMATCH_MATE_CIGAR_STRING, MATE_CIGAR_STRING_INVALID_PRESENCE} This option may be
+ specified 0 or more times.
+
+ MAX_OUTPUT=Integer
+ MO=Integer The maximum number of lines output in verbose mode Default value: 100. This option can
+ be set to 'null' to clear the default value.
+
+ REFERENCE_SEQUENCE=File
+ R=File Reference sequence file, the NM tag check will be skipped if this is missing Default
+ value: null.
+
+ IGNORE_WARNINGS=Boolean If true, only report errors and ignore warnings. Default value: false. This option can
+ be set to 'null' to clear the default value. Possible values: {true, false}
+
+ VALIDATE_INDEX=Boolean If true and input is a BAM file with an index file, also validates the index. Default
+ value: true. This option can be set to 'null' to clear the default value. Possible
+ values: {true, false}
+
+ IS_BISULFITE_SEQUENCED=Boolean
+ BISULFITE=Boolean Whether the SAM or BAM file consists of bisulfite sequenced reads. If so, C->T is not
+ counted as an error in computing the value of the NM tag. Default value: false. This
+ option can be set to 'null' to clear the default value. Possible values: {true, false}
+
+@more_info@
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/read_group_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/read_group_macros.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,294 @@
+
+
+
+#def identifier_or_name($input1)
+ #if hasattr($input1, 'element_identifier')
+ #return $input1.element_identifier
+ #else
+ #return $input1.name.rstrip('.gz').rstrip('.fastq').rstrip('.fq')
+ #end if
+#end def
+
+#def clean(name)
+ #import re
+ #set $name_clean = re.sub('[^\w\-_\.]', '_', $name)
+ #return $name_clean
+#end def
+
+#def read_group_name_default($input1, $input2=None)
+ #if $input2 is None
+ #return $clean($identifier_or_name($input1))
+ #else
+ #import itertools
+ #set $input_name1 = $clean($identifier_or_name($input1))
+ #set $input_name2 = $clean($identifier_or_name($input2))
+ #set $common_prefix = ''.join([c[0] for c in itertools.takewhile(lambda x: all(x[0] == y for y in x), itertools.izip(*[$input_name1, $input_name2]))])
+ #if len($common_prefix) > 3
+ #return $common_prefix
+ #else
+ #return $input_name1
+ #end if
+ #end if
+#end def
+
+#def format_read_group(prefix, value, quote='', arg='')
+ #if $value
+ #return $arg + $quote + $prefix + $value + $quote
+ #else
+ #return ''
+ #end if
+#end def
+
+#def rg_param(name)
+ #if $varExists("rg")
+ #return $rg.get($name, None)
+ #else
+ #return $getVar($name, None)
+ #end if
+#end def
+
+#set $use_rg = True
+
+
+
+#if $use_rg
+ #if $rg_param('read_group_id_conditional') is None
+ #set $rg_id = $rg_auto_name
+ #elif $rg_param('read_group_id_conditional').do_auto_name
+ #set $rg_id = $rg_auto_name
+ #else
+ #set $rg_id = str($rg_param('read_group_id_conditional').ID)
+ #end if
+
+ #if $rg_param('read_group_sm_conditional') is None
+ #set $rg_sm = ''
+ #elif $rg_param('read_group_sm_conditional').do_auto_name
+ #set $rg_sm = $rg_auto_name
+ #else
+ #set $rg_sm = str($rg_param('read_group_sm_conditional').SM)
+ #end if
+
+ #if $rg_param('PL')
+ #set $rg_pl = str($rg_param('PL'))
+ #else
+ #set $rg_pl = ''
+ #end if
+
+ #if $rg_param('read_group_lb_conditional') is None
+ #set $rg_lb = ''
+ #elif $rg_param('read_group_lb_conditional').do_auto_name
+ #set $rg_lb = $rg_auto_name
+ #else
+ #set $rg_lb = str($rg_param('read_group_lb_conditional').LB)
+ #end if
+
+ #if $rg_param('CN')
+ #set $rg_cn = str($rg_param('CN'))
+ #else
+ #set $rg_cn = ''
+ #end if
+
+ #if $rg_param("DS")
+ #set $rg_ds = str($rg_param("DS"))
+ #else
+ #set $rg_ds = ''
+ #end if
+
+ #if $rg_param("DT")
+ #set $rg_dt = str($rg_param("DT"))
+ #else
+ #set $rg_dt = ''
+ #end if
+
+ #if $rg_param("FO")
+ #set $rg_fo = str($rg_param("FO"))
+ #else
+ #set $rg_fo = ''
+ #end if
+
+ #if $rg_param("KS")
+ #set $rg_ks = str($rg_param("KS"))
+ #else
+ #set $rg_ks = ''
+ #end if
+
+ #if $rg_param("PG")
+ #set $rg_pg = str($rg_param("PG"))
+ #else
+ #set $rg_pg = ''
+ #end if
+
+ #if str($rg_param("PI"))
+ #set $rg_pi = str($rg_param("PI"))
+ #else
+ #set $rg_pi = ''
+ #end if
+
+ #if $rg_param("PU")
+ #set $rg_pu = str($rg_param("PU"))
+ #else
+ #set $rg_pu = ''
+ #end if
+#end if
+
+
+#set $use_rg = str($rg.rg_selector) != "do_not_set"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ \*|[ACMGRSVTWYHKDBN]+$
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/picard/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/picard/tool_data_table_conf.xml.sample Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,8 @@
+
+
+
+
+ value, dbkey, name, path
+
+
+
\ No newline at end of file
diff -r ec10ff509d48 -r 019b09b37955 gv/samtools_mpileup/973fea5b4bdf/samtools_mpileup/samtools_mpileup.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/samtools_mpileup/973fea5b4bdf/samtools_mpileup/samtools_mpileup.xml Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,213 @@
+
+ SNP and indel caller
+
+ samtools
+
+ samtools_wrapper.py
+ -p 'samtools mpileup'
+ --stdout "${output_log}"
+ #if $reference_source.reference_source_selector != "history":
+ -p '-f "${reference_source.ref_file.fields.path}"'
+ #else:
+ -d "-f" "${reference_source.ref_file}" "fa" "reference_input"
+ #end if
+ #for $i, $input_bam in enumerate( $reference_source.input_bams ):
+ -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}"
+ -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index
+ #end for
+ -p '
+ #if str( $advanced_options.advanced_options_selector ) == "advanced":
+ ${advanced_options.skip_anomalous_read_pairs}
+ ${advanced_options.disable_probabilistic_realignment}
+ -C "${advanced_options.coefficient_for_downgrading}"
+ -d "${advanced_options.max_reads_per_bam}"
+ ${advanced_options.extended_BAQ_computation}
+ #if str( $advanced_options.position_list ) != 'None':
+ -l "${advanced_options.position_list}"
+ #end if
+ -q "${advanced_options.minimum_mapping_quality}"
+ -Q "${advanced_options.minimum_base_quality}"
+ #if str( $advanced_options.region_string ):
+ -r "${advanced_options.region_string}"
+ #end if
+ ${advanced_options.output_per_sample_read_depth}
+ ${advanced_options.output_per_sample_strand_bias_p_value}
+ #end if
+ #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation':
+ ##-g or -u
+ -g
+ -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}"
+ -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}"
+ #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling':
+ -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}"
+ #else:
+ -I
+ #end if
+ -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}"
+ #if len( $genotype_likelihood_computation_type.platform_list_repeat ):
+ -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }"
+ #end if
+ #end if
+ > "${output_mpileup}"
+ '
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+ Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample.
+
+------
+
+**Settings**::
+
+ Input Options:
+ -6 Assume the quality is in the Illumina 1.3+ encoding.
+ -A Do not skip anomalous read pairs in variant calling.
+ -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments.
+ -b FILE List of input BAM files, one file per line [null]
+ -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0]
+ -d INT At a position, read maximally INT reads per input BAM. [250]
+ -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit.
+ -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null]
+ -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null]
+ -q INT Minimum mapping quality for an alignment to be used [0]
+ -Q INT Minimum base quality for a base to be considered [13]
+ -r STR Only generate pileup in region STR [all sites]
+ Output Options:
+
+ -D Output per-sample read depth
+ -g Compute genotype likelihoods and output them in the binary call format (BCF).
+ -S Output per-sample Phred-scaled strand bias P-value
+ -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping.
+
+ Options for Genotype Likelihood Computation (for -g or -u):
+
+ -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20]
+ -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100]
+ -I Do not perform INDEL calling
+ -L INT Skip INDEL calling if the average per-sample depth is above INT. [250]
+ -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40]
+ -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all]
+
+------
+
+**Citation**
+
+For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_
+
+If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
+
+
+
diff -r ec10ff509d48 -r 019b09b37955 gv/samtools_mpileup/973fea5b4bdf/samtools_mpileup/samtools_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gv/samtools_mpileup/973fea5b4bdf/samtools_mpileup/samtools_wrapper.py Fri Sep 13 03:12:53 2019 -0400
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+#Dan Blankenberg
+
+"""
+A wrapper script for running SAMTools commands.
+"""
+
+import sys, optparse, os, tempfile, subprocess, shutil
+from string import Template
+
+GALAXY_EXT_TO_SAMTOOLS_EXT = { 'bam_index':'bam.bai', } #items not listed here will use the galaxy extension as-is
+GALAXY_EXT_TO_SAMTOOLS_FILE_TYPE = GALAXY_EXT_TO_SAMTOOLS_EXT #for now, these are the same, but could be different if needed
+DEFAULT_SAMTOOLS_PREFIX = "SAMTools_file"
+CHUNK_SIZE = 2**20 #1mb
+
+
+def cleanup_before_exit( tmp_dir ):
+ if tmp_dir and os.path.exists( tmp_dir ):
+ shutil.rmtree( tmp_dir )
+
+def SAMTOOLS_filename_from_galaxy( galaxy_filename, galaxy_ext, target_dir = None, prefix = None ):
+ suffix = GALAXY_EXT_TO_SAMTOOLS_EXT.get( galaxy_ext, galaxy_ext )
+ if prefix is None:
+ prefix = DEFAULT_SAMTOOLS_PREFIX
+ if target_dir is None:
+ target_dir = os.getcwd()
+ SAMTools_filename = os.path.join( target_dir, "%s.%s" % ( prefix, suffix ) )
+ os.symlink( galaxy_filename, SAMTools_filename )
+ return SAMTools_filename
+
+def SAMTOOLS_filetype_argument_substitution( argument, galaxy_ext ):
+ return argument % dict( file_type = GALAXY_EXT_TO_SAMTOOLS_FILE_TYPE.get( galaxy_ext, galaxy_ext ) )
+
+def open_file_from_option( filename, mode = 'rb' ):
+ if filename:
+ return open( filename, mode = mode )
+ return None
+
+def html_report_from_directory( html_out, dir ):
+ html_out.write( '\n
\nGalaxy - SAMTOOLS Output\n\n\n\n
\n' )
+ for fname in sorted( os.listdir( dir ) ):
+ html_out.write( '