changeset 3:0d10255b5434 draft default tip

Uploaded
author scisjnu123
date Thu, 03 Oct 2019 10:42:15 -0400
parents 2c7824a8d764
children
files GATK/gatk/analyze_covariates.xml GATK/gatk/base_recalibrator.xml GATK/gatk/combine_gvcfs.xml GATK/gatk/combine_variants.xml GATK/gatk/gatk.xml GATK/gatk/gatk_macros.xml GATK/gatk/generation/gatk.xsl GATK/gatk/generation/gatk.xsldb.xml GATK/gatk/genotype_gvcfs.xml GATK/gatk/haplotype_caller.xml GATK/gatk/indel_realigner.xml GATK/gatk/print_reads.xml GATK/gatk/realigner_target_creator.xml GATK/gatk/tool-data/destinations.py GATK/gatk/tool-data/picard_index.loc.sample GATK/gatk/tool_data_table_conf.xml.sample GATK/gatk/tool_dependencies.xml GATK/package_picard_1_135/tool_dependencies.xml GATK/package_r_for_gatk_3_4_0/tool_dependencies.xml ngsap-vc/Haplotypecaller.ga ngsap-vc/SAMTools.ga ngsap-vc/SVDetect.ga ngsap-vc/VARSCAN.ga ngsap-vc/gatk/analyze_covariates.xml ngsap-vc/gatk/base_recalibrator.xml ngsap-vc/gatk/combine_gvcfs.xml ngsap-vc/gatk/combine_variants.xml ngsap-vc/gatk/gatk.xml ngsap-vc/gatk/gatk_macros.xml ngsap-vc/gatk/generation/gatk.xsl ngsap-vc/gatk/generation/gatk.xsldb.xml ngsap-vc/gatk/genotype_gvcfs.xml ngsap-vc/gatk/haplotype_caller.xml ngsap-vc/gatk/indel_realigner.xml ngsap-vc/gatk/print_reads.xml ngsap-vc/gatk/realigner_target_creator.xml ngsap-vc/gatk/tool-data/destinations.py ngsap-vc/gatk/tool-data/picard_index.loc.sample ngsap-vc/gatk/tool_data_table_conf.xml.sample ngsap-vc/gatk/tool_dependencies.xml ngsap-vc/package_r_for_gatk_3_4_0/tool_dependencies.xml ngsap-vc/suite_samtools_1_2/repository_dependencies.xml ngsap-vc/svdetect/BAM_preprocessingPairs.pl ngsap-vc/svdetect/BAM_preprocessingPairs.xml ngsap-vc/svdetect/SVDetect_compare.pl ngsap-vc/svdetect/SVDetect_compare.xml ngsap-vc/svdetect/SVDetect_import.sh ngsap-vc/svdetect/SVDetect_import.xml ngsap-vc/svdetect/SVDetect_run_parallel.pl ngsap-vc/svdetect/SVDetect_run_parallel.xml ngsap-vc/svdetect/circos_graph.xml ngsap-vc/varscan/tool_dependencies.xml ngsap-vc/varscan/varscan_mpileup.pl ngsap-vc/varscan/varscan_mpileup.xml ngsap-vc/varscan/varscan_somatic.pl ngsap-vc/varscan/varscan_somatic.xml
diffstat 56 files changed, 7971 insertions(+), 1289 deletions(-) [+]
line wrap: on
line diff
--- a/GATK/gatk/analyze_covariates.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-<macros>
-    <xml name="AnalyzeCovariatesParameters" tokens="tag">
-
-        <!-- BQSR in main config -->
-
-        <param name="afterReportFile" type="data" format="tabular" optional="true" label="file containing the BQSR second-pass report file" help="-after,&#8209;&#8209;afterReportFile &amp;lt;afterReportFile&amp;gt;" />
-
-        <param name="beforeReportFile" type="data" format="tabular" optional="true" label="file containing the BQSR first-pass report file" help="-before,&#8209;&#8209;beforeReportFile &amp;lt;beforeReportFile&amp;gt;" />
-
-    </xml>
-
-    <xml name="AnalyzeCovariatesOutput">
-        <data format="pdf" name="ac_plotsReportFile" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (PDF Recalibration Report)">
-            <yield />
-        </data>
-    </xml>
-
-    <template name="AnalyzeCovariatesPreprocessing">
-<![CDATA[
-]]>
-    </template>
-
-    <template name="AnalyzeCovariatesOptions">
-<![CDATA[
-        --plotsReportFile                   ${ac_plotsReportFile}
-
-        #if str($analysis_type.afterReportFile)
-            --afterReportFile $analysis_type.afterReportFile
-        #end if
-        #if str($analysis_type.beforeReportFile)
-            --beforeReportFile $analysis_type.beforeReportFile
-        #end if
-]]>
-    </template>
-</macros>
-
-
--- a/GATK/gatk/base_recalibrator.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-<macros>
-    <xml name="BaseRecalibratorParameters" tokens="tag">
-        
-        <expand macro="macro_bam_input" tag="@TAG@" />
-
-        <param name="knownSites" type="data" format="vcf,bcf,bed,pileup,tabular,table" label="Database of known Sites (ROD files; e.g. VCF format)" multiple="true" title="A database of known polymorphic sites to skip over in the recalibration algorithm" help="-knownSites,&#8209;&#8209;knownSites &amp;lt;knownSites&amp;gt;" />
-
-    </xml>
-
-    <xml name="BaseRecalibratorOutput">
-        <data format="tabular" name="br_table" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (Table)">
-            <yield />
-        </data>
-    </xml>
-
-    <template name="BaseRecalibratorPreprocessing">
-<![CDATA[
-        @token_bam_input_pre@
-        #for $i, $variant in enumerate($analysis_type.knownSites):
-            ln -s -f ${variant} variant_${i}.vcf &&
-        #end for
-]]>
-    </template>
-
-    <template name="BaseRecalibratorOptions">
-<![CDATA[
-        --out                   ${br_table}
-
-        @token_bam_input@
-
-        #for $i, $variant in enumerate($analysis_type.knownSites):
-            --knownSites variant_${i}.vcf
-        #end for
-]]>
-    </template>
-</macros>
-
-
--- a/GATK/gatk/combine_gvcfs.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-<macros>
-    <xml name="CombineGVCFsParameters" tokens="tag">
-        
-        <expand macro="macro_gvcf_input" tag="@TAG@" />
-
-        <expand macro="macro_optional_parameters">
-
-            <param name="breakBandsAtMultiplesOf" type="integer" value="0" label="If > 0, reference bands will be broken up at genomic positions that are multiples of this number" help="-breakBandsAtMultiplesOf,&#8209;&#8209;breakBandsAtMultiplesOf &amp;lt;breakBandsAtMultiplesOf&amp;gt;" />
-
-        </expand>
-
-    </xml>
-
-    <xml name="CombineGVCFsOutput">
-        <data format="vcf" name="cg_output_vcf" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (VCF)">
-            <yield />
-        </data>
-    </xml>
-
-    <template name="CombineGVCFsPreprocessing">
-<![CDATA[
-        @token_gvcf_input_pre@
-]]>
-    </template>
-
-    <template name="CombineGVCFsOptions">
-<![CDATA[
-        --out ${cg_output_vcf}
-
-        @token_gvcf_input@
-
-        #set $optionals = $analysis_type.optional_parameters
-        #if $optionals.optional_parameters_enabled
-            #if $optionals.breakBandsAtMultiplesOf > 0
-                --breakBandsAtMultiplesOf $optionals.breakBandsAtMultiplesOf
-            #end if
-        #end if
-]]>
-    </template>
-
-
-</macros>
-
-
--- a/GATK/gatk/combine_variants.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-<macros>
-    <xml name="CombineVariantsParameters" tokens="tag">
-        
-        <expand macro="macro_vcf_input" tag="@TAG@" />
-
-        <expand macro="macro_optional_parameters">
-
-            <param name="filteredRecordsMergeType" type="select" label="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields" help="-filteredRecordsMergeType,&#8209;&#8209;filteredrecordsmergetype &amp;lt;filteredrecordsmergetype&amp;gt;">
-                <option value="">No Selection</option>
-                <option value="KEEP_IF_ANY_UNFILTERED">KEEP_IF_ANY_UNFILTERED</option>
-                <option value="KEEP_IF_ALL_UNFILTERED">KEEP_IF_ALL_UNFILTERED</option>
-                <option value="KEEP_UNCONDITIONAL">KEEP_UNCONDITIONAL</option>
-            </param>
-
-            <param name="genotypeMergeOptions" type="select" label="Determines how we should merge genotype records for samples shared across the ROD files" help="-genotypeMergeOptions,&#8209;&#8209;genotypemergeoption &amp;lt;genotypemergeoption&amp;gt;">
-                <option value="">No Selection</option>
-                <option value="UNIQUIFY">UNIQUIFY</option>
-                <option value="PRIORITIZE">PRIORITIZE</option>
-                <option value="UNSORTED">UNSORTED</option>
-                <option value="REQUIRE_UNIQUE">REQUIRE_UNIQUE</option>
-            </param>
-
-            <param name="minimumN" type="integer" value="1" optional="true" label="Combine variants and output site only if the variant is present in at least N input files" help="-minN,&#8209;&#8209;minimumN &amp;lt;minimumN&amp;gt;" />
-
-            <param name="rod_priority_list" type="text" value="" optional="true" label="A comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted" help="-priority,&#8209;&#8209;rod_priority_list &amp;lt;rod_priority_list&amp;gt;" />
-
-            <param name="setKey" type="text" value="" optional="true" label="Key used in the INFO key=value tag emitted describing which set the combined VCF record came from" help="-setKey,&#8209;&#8209;setKey &amp;lt;setKey&amp;gt;" />
-
-            <param name="assumeIdenticalSamples" type="boolean" truevalue="--assumeIdenticalSamples" falsevalue="" label="If true, assume input VCFs have identical sample sets and disjoint calls" help="-assumeIdenticalSamples,&#8209;&#8209;assumeIdenticalSamples" />
-
-            <param name="excludeNonVariants" type="boolean" truevalue="--excludeNonVariants" falsevalue="" label="Don't include loci found to be non-variant after the combining procedure" help="-env,&#8209;&#8209;excludeNonVariants" />
-
-            <param name="filteredAreUncalled" type="boolean" truevalue="--filteredAreUncalled" falsevalue="" label="If true, then filtered VCFs are treated as uncalled, so that filtered set annotations don't appear in the combined VCF" help="-filteredAreUncalled,&#8209;&#8209;filteredAreUncalled" />
-
-            <param name="mergeInfoWithMaxAC" type="boolean" truevalue="--mergeInfoWithMaxAC" falsevalue="" label="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records." help="-mergeInfoWithMaxAC,&#8209;&#8209;mergeInfoWithMaxAC" />
-
-            <param name="minimalVCF" type="boolean" truevalue="--minimalVCF" falsevalue="" label="If true, then the output VCF will contain no INFO or genotype FORMAT fields" help="-minimalVCF,&#8209;&#8209;minimalVCF" />
-
-            <param name="printComplexMerges" type="boolean" truevalue="--printComplexMerges" falsevalue="" label="Print out interesting sites requiring complex compatibility merging" help="-printComplexMerges,&#8209;&#8209;printComplexMerges" />
-
-            <param name="suppressCommandLineHeader" type="boolean" truevalue="--suppressCommandLineHeader" falsevalue="" label="If true, do not output the header containing the command line used" help="-suppressCommandLineHeader,&#8209;&#8209;suppressCommandLineHeader" />
-
-        </expand>
-
-    </xml>
-
-    <xml name="CombineVariantsOutput">
-        <data format="vcf" name="cv_output_vcf" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (VCF)">
-            <yield />
-        </data>
-    </xml>
-
-    <template name="CombineVariantsPreprocessing">
-<![CDATA[
-        @token_vcf_input_pre@
-]]>
-    </template>
-
-    <template name="CombineVariantsOptions">
-<![CDATA[
-        --out ${cv_output_vcf}
-
-        @token_vcf_input@
-
-        #set $optionals = $analysis_type.optional_parameters
-        #if $optionals.optional_parameters_enabled
-
-            #if $optionals.filteredRecordsMergeType
-                --filteredRecordsMergeType $optionals.filteredRecordsMergeType
-            #end if
-            #if $optionals.genotypeMergeOptions
-                --genotypeMergeOptions $optionals.genotypeMergeOptions
-            #end if
-            #if $optionals.minimumN != 1
-                --minimumN $optionals.minimumN
-            #end if
-            #if $optionals.rod_priority_list
-                --rod_priority_list $optionals.rod_priority_list
-            #end if
-
-            $optionals.assumeIdenticalSamples
-            $optionals.excludeNonVariants
-            $optionals.filteredAreUncalled
-            $optionals.mergeInfoWithMaxAC
-            $optionals.minimalVCF
-            $optionals.printComplexMerges
-            $optionals.suppressCommandLineHeader
-
-        #end if
-]]>
-    </template>
-
-
-</macros>
-
-
--- a/GATK/gatk/gatk.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,179 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<tool id="gatk" name="GATK" version="@VERSION@.d9">
-  <description>tool collection Version @VERSION@</description>
-  <macros>
-    <import>gatk_macros.xml</import>
-    <import>realigner_target_creator.xml</import>
-    <import>indel_realigner.xml</import>
-    <import>base_recalibrator.xml</import>
-    <import>analyze_covariates.xml</import>
-    <import>print_reads.xml</import>
-    <import>haplotype_caller.xml</import>
-    <import>genotype_gvcfs.xml</import>
-    <import>combine_gvcfs.xml</import>
-    <import>combine_variants.xml</import>
-  </macros>
-  <expand macro="requirements"/>
-  <stdio>
-    <regex match="^INFO" level="log"/>
-    <regex match="^WARN" level="warning"/>
-    <regex match="Using .* implementation of PairHMM" level="warning"/>
-    <regex match="There is insufficient memory for the Java Runtime Environment to continue" level="fatal"/>
-    <regex match="^##### ERROR" level="fatal"/>
-    <exit_code range="1:" level="fatal"/>
-  </stdio>
-  <command><![CDATA[
-        ############################
-        ## import analysis specific preprocessings by using cheetahs internal searchList
-        ## if not defined, ignore
-        ############################
-        #if $analysis_type.analysis_type_selector + "Preprocessing" in vars()['SL'][2]
-            #set $analysisPreprocessing = vars()['SL'][2][$analysis_type.analysis_type_selector + "Preprocessing"]
-            #include source=$analysisPreprocessing
-        #end if
-        
-        ############################
-        ## GATK tool unspecific options
-        ############################
-        @GATK_EXEC@
-        
-        --analysis_type ${analysis_type.analysis_type_selector}
-        --reference_sequence    ${ref_file.fields.path}
-
-        --log_to_file           ${output_log}
-
-        #if $cond_intervals.cond_intervals_enabled
-            #for $interval in $cond_intervals.intervals:
-                --intervals ${interval.L}
-            #end for
-        #end if
-
-        #if $cond_BQSR.cond_BQSR_enabled
-          --BQSR $cond_BQSR.BQSR
-        #end if
-
-        ############################
-        ## import analysis specific options by using cheetahs internal searchList
-        ## if not defined throw raw python error until better idea
-        ############################
-        #if $analysis_type.analysis_type_selector + "Options" in vars()['SL'][2]
-            #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"]
-            #include source=$analysisOptions
-        #else
-            #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"]
-        #end if
-        
-        ############################
-        ## only put ERROR or FATAL log messages into stderr
-        ## but keep full log for printing into log file
-        ############################
-        2>&1 | awk '\$1 != "INFO" && \$1 != "WARN"' >&2
-]]></command>
-  <inputs>
-    <param name="ref_file" type="select" label="Using reference genome" help="-R,‑‑reference_sequence &amp;lt;reference_sequence&amp;gt;">
-      <options from_data_table="picard_indexes"/>
-      <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
-    </param>
-    <conditional name="cond_intervals">
-      <param name="cond_intervals_enabled" type="boolean" label="Select interval subset to operate on?"/>
-      <when value="true">
-        <repeat name="intervals" title="genomic interval over which to operate" help="-L,‑‑intervals &amp;lt;intervals&amp;gt;">
-          <param name="L" type="text" value=""/>
-        </repeat>
-      </when>
-      <when value="false"/>
-    </conditional>
-    <conditional name="cond_BQSR">
-      <param name="cond_BQSR_enabled" type="boolean" label="Select covariates for on-the-fly recalibration?"/>
-      <when value="true">
-        <param name="BQSR" type="data" format="tabular" label="Input covariates table file for on-the-fly base quality score recalibration" help="-BQSR,‑‑BQSR &amp;lt;BQSR&amp;gt; intended primarily for use with BaseRecalibrator and PrintReads"/>
-      </when>
-      <when value="false"/>
-    </conditional>
-    <conditional name="cond_threads">
-      <param name="cond_threads_enabled" type="boolean" label="Set computational options (cpu, mem)?"/>
-      <when value="true">
-        <param name="nt" type="integer" value="1" label="Number of data threads to allocate to this analysis" help="make sure, the option is available for the chosen tool"/>
-        <param name="nct" type="integer" value="1" label="Number of CPU threads to allocate per data thread" help="make sure, the option is available for the chosen tool"/>
-        <param name="mem" type="integer" value="0" label="Overwrite Memory in MB (0 = don't overwrite)" help="Overwrites all other defaults and might lead to crash the run. States mem per data thread"/>
-      </when>
-      <when value="false"/>
-    </conditional>
-    <conditional name="analysis_type">
-      <param name="analysis_type_selector" type="select" label="Analysis Type">
-        <option value="RealignerTargetCreator">RealignerTargetCreator</option>
-        <option value="IndelRealigner">IndelRealigner</option>
-        <option value="BaseRecalibrator">BaseRecalibrator</option>
-        <option value="AnalyzeCovariates">AnalyzeCovariates</option>
-        <option value="PrintReads">PrintReads</option>
-        <option value="HaplotypeCaller">HaplotypeCaller</option>
-        <option value="GenotypeGVCFs">GenotypeGVCFs</option>
-        <option value="CombineGVCFs">CombineGVCFs</option>
-        <option value="CombineVariants">CombineVariants</option>
-      </param>
-      <when value="RealignerTargetCreator">
-        <expand macro="RealignerTargetCreatorParameters" tag="rtc"/>
-      </when>
-      <when value="IndelRealigner">
-        <expand macro="IndelRealignerParameters" tag="ir"/>
-      </when>
-      <when value="BaseRecalibrator">
-        <expand macro="BaseRecalibratorParameters" tag="br"/>
-      </when>
-      <when value="AnalyzeCovariates">
-        <expand macro="AnalyzeCovariatesParameters" tag="ac"/>
-      </when>
-      <when value="PrintReads">
-        <expand macro="PrintReadsParameters" tag="pr"/>
-      </when>
-      <when value="HaplotypeCaller">
-        <expand macro="HaplotypeCallerParameters" tag="hc"/>
-      </when>
-      <when value="GenotypeGVCFs">
-        <expand macro="GenotypeGVCFsParameters" tag="gg"/>
-      </when>
-      <when value="CombineGVCFs">
-        <expand macro="CombineGVCFsParameters" tag="cg"/>
-      </when>
-      <when value="CombineVariants">
-        <expand macro="CombineVariantsParameters" tag="cv"/>
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <expand macro="RealignerTargetCreatorOutput" tag="rtc">
-      <filter>analysis_type['analysis_type_selector'] == 'RealignerTargetCreator'</filter>
-    </expand>
-    <expand macro="IndelRealignerOutput" tag="ir">
-      <filter>analysis_type['analysis_type_selector'] == 'IndelRealigner'</filter>
-    </expand>
-    <expand macro="BaseRecalibratorOutput" tag="br">
-      <filter>analysis_type['analysis_type_selector'] == 'BaseRecalibrator'</filter>
-    </expand>
-    <expand macro="AnalyzeCovariatesOutput" tag="ac">
-      <filter>analysis_type['analysis_type_selector'] == 'AnalyzeCovariates'</filter>
-    </expand>
-    <expand macro="PrintReadsOutput" tag="pr">
-      <filter>analysis_type['analysis_type_selector'] == 'PrintReads'</filter>
-    </expand>
-    <expand macro="HaplotypeCallerOutput" tag="hc">
-      <filter>analysis_type['analysis_type_selector'] == 'HaplotypeCaller'</filter>
-    </expand>
-    <expand macro="GenotypeGVCFsOutput" tag="gg">
-      <filter>analysis_type['analysis_type_selector'] == 'GenotypeGVCFs'</filter>
-    </expand>
-    <expand macro="CombineGVCFsOutput" tag="cg">
-      <filter>analysis_type['analysis_type_selector'] == 'CombineGVCFs'</filter>
-    </expand>
-    <expand macro="CombineVariantsOutput" tag="cv">
-      <filter>analysis_type['analysis_type_selector'] == 'CombineVariants'</filter>
-    </expand>
-    <data format="txt" name="output_log" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (log)"/>
-  </outputs>
-  <expand macro="macro_tests"/>
-  <citations>
-    <citation type="doi">10.1101/gr.107524.110</citation>
-    <citation type="doi">10.1038/ng.806</citation>
-    <citation type="doi">10.1002/0471250953.bi1110s43</citation>
-  </citations>
-</tool>
--- a/GATK/gatk/gatk_macros.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,166 +0,0 @@
-<macros>
-
-    <xml name="requirements">
-        <requirements>
-            <requirement type="package">gatk</requirement>
-            <requirement type="set_environment">GATK_PATH</requirement>
-            <requirement type="set_environment">GATK_SITE_OPTIONS</requirement>
-            <requirement type="package" version="3.1.2.1">package_r_for_gatk_3_4_0</requirement>
-        </requirements>
-    </xml>
-
-    <xml name="version_command">
-        <version_command><![CDATA[ @GATK_EXEC@ --help|grep '^The Genome' ]]></version_command>
-    </xml>
-
-    <token name="@VERSION@">3.4-0</token>
-    <token name="@OUTPUT_NAME_PREFIX@">${tool.name} - ${analysis_type.analysis_type_selector}</token>
-    <token name="@GATK_EXEC@">
-<![CDATA[
-        #if $cond_threads.cond_threads_enabled:
-            #if int($cond_threads.nct) > 1:
-                THREAD_STRING="-nct $cond_threads.nct" &&
-            #end if
-            #if int($cond_threads.nt) > 1:
-                THREAD_STRING=$THREAD_STRING" -nt $cond_threads.nt" &&
-            #end if
-            #if int($cond_threads.mem) > 0:
-                GATK_MEM=$cond_threads.mem &&
-            #end if
-        #end if
-        java -Xmx\${GATK_MEM:-\${SLURM_MEM_PER_NODE:-4096}}M -jar "\$GATK_PATH/GenomeAnalysisTK.jar" \${THREAD_STRING:-}
-]]>
-    </token>
-
-    <xml name="macro_vcf_input" tokens="tag">
-        <param name="input" type="data" format="vcf" multiple="true" label="Variant files (VCF format)" help="-V, &#8209;&#8209;variant">
-            <validator type="unspecified_build" />
-            <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> 
-        </param>
-    </xml>
-    <token name="@token_vcf_input_pre@" tokens="tag">
-<![CDATA[
-        ############################
-        ## create links to gVCF input files with correct extensions
-        ############################
-        #for $i, $variant in enumerate($analysis_type.input):
-            ln -s -f ${variant} variant_${i}.vcf &&
-        #end for
-]]>
-    </token>
-    <token name="@token_vcf_input@">
-<![CDATA[
-        #for $i, $variant in enumerate($analysis_type.input):
-            --variant variant_${i}.vcf
-        #end for
-        @token_reference_input@
-]]>
-    </token>
-
-
-    <xml name="macro_gvcf_input" tokens="tag">
-        <param name="input" type="data" format="vcf" multiple="true" label="Variant files (gVCF format)" help="-V, &#8209;&#8209;variant">
-            <validator type="unspecified_build" />
-            <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> 
-        </param>
-    </xml>
-    <token name="@token_gvcf_input_pre@" tokens="tag">
-<![CDATA[
-        ############################
-        ## create links to gVCF input files with correct extensions
-        ############################
-        #for $i, $variant in enumerate($analysis_type.input):
-            ln -s -f ${variant} variant_${i}.g.vcf &&
-        #end for
-]]>
-    </token>
-    <token name="@token_gvcf_input@">
-<![CDATA[
-        #for $i, $variant in enumerate($analysis_type.input):
-            --variant variant_${i}.g.vcf
-        #end for
-        @token_reference_input@
-]]>
-    </token>
-
-    <xml name="macro_bam_input">
-        <conditional name="cond_bam_input">
-            <param name="all_in_one" type="boolean" value="false" label="Input all BAM files in a single command" />
-            <when value="true">
-                <param name="input" type="data" format="bam" multiple="true" label="Input file containing sequence data (BAM)" help="-I, ‑‑input_file">
-                    <validator type="unspecified_build"/>
-                    <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build."/>
-                </param>
-            </when>
-            <when value="false">
-                <param name="input" type="data" format="bam" label="Input file containing sequence data (BAM)" help="-I, ‑‑input_file">
-                    <validator type="unspecified_build"/>
-                    <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build."/>
-                </param>
-            </when>
-        </conditional>
-    </xml>
-    <token name="@token_bam_input_pre@">
-<![CDATA[
-        ############################
-        ## create links to bam input files with correct extensions
-        ############################
-        #if $analysis_type.cond_bam_input.all_in_one
-            #for $i, $bam in enumerate($analysis_type.cond_bam_input.input):
-                ln -s -f ${bam} input_${i}.bam &&
-                ln -s -f ${bam.metadata.bam_index} input_${i}.bam.bai &&
-            #end for
-        #else
-            ln -s -f ${analysis_type.cond_bam_input.input} input.bam &&
-            ln -s -f ${analysis_type.cond_bam_input.input.metadata.bam_index} input.bam.bai &&
-        #end if
-]]>
-    </token>
-    <token name="@token_bam_input@">
-<![CDATA[
-        #if $analysis_type.cond_bam_input.all_in_one
-            #for $i, $bam in enumerate($analysis_type.cond_bam_input.input):
-                --input_file input_${i}.bam
-            #end for
-        #else
-            --input_file            input.bam
-        #end if
-        @token_reference_input@
-]]>
-    </token>
-
-    <token name="@token_reference_input@">
-<![CDATA[
-]]>
-    </token>
-    <xml name="macro_input" tokens="tag">
-        <yield />
-    </xml>
-
-    <xml name="macro_optional_parameters">
-        <conditional name="optional_parameters">
-            <param name="optional_parameters_enabled" type="boolean" label="Configure Optional Parameters" />
-            <when value="true">
-                <yield />
-            </when>
-            <when value="false" />
-        </conditional>
-    </xml>
-
-    <xml name="macro_advanced_parameters">
-        <conditional name="advanced_parameters">
-            <param name="advanced_parameters_enabled" type="boolean" label="Configure Advanced Parameters" />
-            <when value="true">
-                <yield />
-            </when>
-            <when value="false" />
-        </conditional>
-    </xml>
-
-    <xml name="macro_tests">
-        <tests>
-            
-        </tests>
-    </xml>
-
-</macros>
--- a/GATK/gatk/generation/gatk.xsl	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
-<xsl:output
-    method="xml"
-    encoding="utf-8"
-    indent="yes"
-    cdata-section-elements="script style" />
-
-<xsl:template match="/">
-
-<tool id="gatk" name="GATK" version="@VERSION@.d9">
-    <description>tool collection Version @VERSION@</description>
-
-    <macros>
-        <import>gatk_macros.xml</import>
-        <xsl:for-each select="analyses/analysis">
-        <import><xsl:value-of select="macro_file" /></import>
-        </xsl:for-each>
-    </macros>
-
-    <expand macro="requirements" />
-
-    <stdio>
-        <regex match="^INFO" level="log" />
-        <regex match="^WARN" level="warning" />
-        <regex match="Using .* implementation of PairHMM" level="warning" />
-        <regex match="There is insufficient memory for the Java Runtime Environment to continue" level="fatal" />
-        <regex match="^##### ERROR" level="fatal" />
-        <exit_code range="1:" level="fatal"/>
-    </stdio>
-
-    <command>
-<xsl:text disable-output-escaping="yes">&lt;![CDATA[
-        ############################
-        ## import analysis specific preprocessings by using cheetahs internal searchList
-        ## if not defined, ignore
-        ############################
-        #if $analysis_type.analysis_type_selector + "Preprocessing" in vars()['SL'][2]
-            #set $analysisPreprocessing = vars()['SL'][2][$analysis_type.analysis_type_selector + "Preprocessing"]
-            #include source=$analysisPreprocessing
-        #end if
-        
-        ############################
-        ## GATK tool unspecific options
-        ############################
-        @GATK_EXEC@
-        
-        --analysis_type ${analysis_type.analysis_type_selector}
-        --reference_sequence    ${ref_file.fields.path}
-
-        --log_to_file           ${output_log}
-
-        #if $cond_intervals.cond_intervals_enabled
-            #for $interval in $cond_intervals.intervals:
-                --intervals ${interval.L}
-            #end for
-        #end if
-
-        #if $cond_BQSR.cond_BQSR_enabled
-          --BQSR $cond_BQSR.BQSR
-        #end if
-
-        ############################
-        ## import analysis specific options by using cheetahs internal searchList
-        ## if not defined throw raw python error until better idea
-        ############################
-        #if $analysis_type.analysis_type_selector + "Options" in vars()['SL'][2]
-            #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"]
-            #include source=$analysisOptions
-        #else
-            #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"]
-        #end if
-        
-        ############################
-        ## only put ERROR or FATAL log messages into stderr
-        ## but keep full log for printing into log file
-        ############################
-        2>&amp;1 | awk '\$1 != "INFO" &amp;&amp; \$1 != "WARN"' >&amp;2
-]]&gt;</xsl:text>
-    </command>
-
-    <inputs>
-
-        <param name="ref_file" type="select" label="Using reference genome" help="-R,&#8209;&#8209;reference_sequence &amp;lt;reference_sequence&amp;gt;" >
-            <options from_data_table="picard_indexes">
-                <!--filter type="data_meta" key="dbkey" ref="@TAG@_input" column="dbkey" /-->
-            </options>
-            <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
-        </param>
-
-        <conditional name="cond_intervals">
-            <param name="cond_intervals_enabled" type="boolean" label="Select interval subset to operate on?" />
-            <when value="true">
-                <repeat name="intervals" title="genomic interval over which to operate" help="-L,&#8209;&#8209;intervals &amp;lt;intervals&amp;gt;">
-                    <param name="L" type="text" value="" />
-                </repeat>
-            </when>
-            <when value="false" />
-        </conditional>
-
-        <conditional name="cond_BQSR">
-            <param name="cond_BQSR_enabled" type="boolean" label="Select covariates for on-the-fly recalibration?" />
-            <when value="true">
-                <param name="BQSR" type="data" format="tabular" label="Input covariates table file for on-the-fly base quality score recalibration" help="-BQSR,&#8209;&#8209;BQSR &amp;lt;BQSR&amp;gt; intended primarily for use with BaseRecalibrator and PrintReads" />
-            </when>
-            <when value="false" />
-        </conditional>
-
-        <conditional name="cond_threads">
-            <param name="cond_threads_enabled" type="boolean" label="Set computational options (cpu, mem)?" />
-            <when value="true">
-                <param name="nt" type="integer" value="1" label="Number of data threads to allocate to this analysis" help="make sure, the option is available for the chosen tool" />
-                <param name="nct" type="integer" value="1" label="Number of CPU threads to allocate per data thread" help="make sure, the option is available for the chosen tool" />
-                <param name="mem" type="integer" value="0" label="Overwrite Memory in MB (0 = don't overwrite)" help="Overwrites all other defaults and might lead to crash the run. States mem per data thread" />
-            </when>
-            <when value="false" />
-        </conditional>
-
-        <conditional name="analysis_type">
-            <param name="analysis_type_selector" type="select" label="Analysis Type">
-                <xsl:for-each select="analyses/analysis">
-                <option value="{name}"><xsl:value-of select="name" /></option>
-                </xsl:for-each>
-            </param>
-            <xsl:for-each select="analyses/analysis">
-            <when value="{name}">
-                <!--xsl:choose>
-                    <xsl:when test="input_type = 'bam'">
-                        <expand macro="macro_bam_input" tag="{tag}" />
-                    </xsl:when>
-                    <xsl:when test="input_type = 'gvcf'">
-                        <expand macro="macro_gvcf_input" tag="{tag}" />
-                    </xsl:when>
-                </xsl:choose-->
-                <expand macro="{name}Parameters" tag="{tag}" />
-            </when>
-            </xsl:for-each>
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <xsl:for-each select="analyses/analysis">
-        <expand macro="{name}Output" tag="{tag}">
-            <filter>analysis_type['analysis_type_selector'] == '<xsl:value-of select="name" />'</filter>
-        </expand>
-        </xsl:for-each>
-        <data format="txt" name="output_log" label="${{tool.name}} - ${{analysis_type.analysis_type_selector}} on ${{on_string}} (log)" />
-    </outputs>
-
-    <expand macro="macro_tests" />
-
-    <citations>
-        <citation type="doi">10.1101/gr.107524.110</citation>
-        <citation type="doi">10.1038/ng.806</citation>
-        <citation type="doi">10.1002/0471250953.bi1110s43</citation>
-    </citations>
-</tool>
-
-</xsl:template>
-</xsl:stylesheet>
-
-
--- a/GATK/gatk/generation/gatk.xsldb.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<analyses>
-    <analysis>
-	    <name>RealignerTargetCreator</name>
-	    <input_type>bam</input_type>
-	    <tag>rtc</tag>
-	    <macro_file>realigner_target_creator.xml</macro_file>
-    </analysis>
-    <analysis>
-	    <name>IndelRealigner</name>
-	    <input_type>bam</input_type>
-	    <tag>ir</tag>
-	    <macro_file>indel_realigner.xml</macro_file>
-    </analysis>
-    <analysis>
-	    <name>BaseRecalibrator</name>
-	    <input_type>bam</input_type>
-	    <tag>br</tag>
-	    <macro_file>base_recalibrator.xml</macro_file>
-    </analysis>
-    <analysis>
-	    <name>AnalyzeCovariates</name>
-	    <input_type>bam</input_type>
-	    <tag>ac</tag>
-	    <macro_file>analyze_covariates.xml</macro_file>
-    </analysis>
-    <analysis>
-	    <name>PrintReads</name>
-	    <input_type>bam</input_type>
-	    <tag>pr</tag>
-	    <macro_file>print_reads.xml</macro_file>
-    </analysis>
-    <analysis>
-	    <name>HaplotypeCaller</name>
-	    <input_type>bam</input_type>
-	    <tag>hc</tag>
-	    <macro_file>haplotype_caller.xml</macro_file>
-    </analysis>
-    <analysis>
-	    <name>GenotypeGVCFs</name>
-	    <input_type>gvcf</input_type>
-	    <tag>gg</tag>
-	    <macro_file>genotype_gvcfs.xml</macro_file>
-    </analysis>
-    <analysis>
-	    <name>CombineGVCFs</name>
-	    <input_type>gvcf</input_type>
-	    <tag>cg</tag>
-	    <macro_file>combine_gvcfs.xml</macro_file>
-    </analysis>
-    <analysis>
-	    <name>CombineVariants</name>
-	    <input_type>vcf</input_type>
-	    <tag>cv</tag>
-	    <macro_file>combine_variants.xml</macro_file>
-    </analysis>
-</analyses>
--- a/GATK/gatk/genotype_gvcfs.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-<macros>
-    <xml name="GenotypeGVCFsParameters" tokens="tag">
-        
-        <expand macro="macro_gvcf_input" tag="@TAG@" />
-
-        <expand macro="macro_optional_parameters">
-
-
-            <param name="sample_ploidy" type="integer" value="2" label="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)" help="-ploidy,&#8209;&#8209;sample_ploidy &amp;lt;sample_ploidy&amp;gt;" />
-
-        </expand>
-
-    </xml>
-
-    <xml name="GenotypeGVCFsOutput">
-        <data format="vcf" name="gg_output_gvcf" from_work_dir="output.g.vcf" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (gVCF)">
-            <yield />
-        </data>
-    </xml>
-
-    <template name="GenotypeGVCFsPreprocessing">
-<![CDATA[
-        @token_gvcf_input_pre@
-]]>
-    </template>
-
-    <template name="GenotypeGVCFsOptions">
-<![CDATA[
-        --out output.g.vcf
-
-        @token_gvcf_input@
-
-        #set $optionals = $analysis_type.optional_parameters
-        #if $optionals.optional_parameters_enabled
-            --sample_ploidy $optionals.sample_ploidy
-        #end if
-]]>
-    </template>
-
-
-</macros>
-
-
--- a/GATK/gatk/haplotype_caller.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-<macros>
-    <xml name="HaplotypeCallerParameters" tokens="tag">
-        
-        <expand macro="macro_bam_input" tag="@TAG@" />
-
-        <conditional name="cond_usage">
-            <param name="cond_usage_selector" type="select" label="Select usage">
-                <option value="GVCF">Single-sample all-sites calling on DNAseq (GVCF mode)</option>
-            </param>
-            <when value="GVCF">
-                <expand macro="HaplotypeCallerGVCF" />
-            </when>
-        </conditional>
-
-        <expand macro="macro_optional_parameters">
-
-            <param name="sample_ploidy" type="integer" value="2" label="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)" help="-ploidy,&#8209;&#8209;sample_ploidy &amp;lt;sample_ploidy&amp;gt;" />
-
-        </expand>
-
-    </xml>
-
-    <xml name="HaplotypeCallerOutput">
-        <data format="vcf" name="hc_output_gvcf" from_work_dir="output.g.vcf" label="${tool.name} on ${on_string} (gVCF)">
-            <yield />
-        </data>
-    </xml>
-
-    <template name="HaplotypeCallerPreprocessing">
-<![CDATA[
-        @token_bam_input_pre@
-]]>
-    </template>
-
-    <template name="HaplotypeCallerOptions">
-<![CDATA[
-        --out output.g.vcf
-
-        @token_bam_input@
-
-        #set $optionals = $analysis_type.optional_parameters
-        #if $optionals.optional_parameters_enabled
-            --sample_ploidy $optionals.sample_ploidy
-        #end if
-
-        #set $usage_selector = $analysis_type.cond_usage.cond_usage_selector
-        #set $usage = $analysis_type.cond_usage
-
-        #if str($usage_selector) == 'GVCF'
-            --emitRefConfidence "GVCF"
-        #end if
-]]>
-    </template>
-
-
-
-    <xml name="HaplotypeCallerGVCF">
-        <param name="emitRefConfidence" type="select" optional="true" label="Mode for emitting reference confidence scores" help="-ERC,‑‑emitRefConfidence &amp;lt;emitRefConfidence&amp;gt;">
-              <option value="GVCF">GVCF (Reference model emitted with condensed non-variant blocks)</option>
-        </param>
-    </xml>
-
-</macros>
-
-
--- a/GATK/gatk/indel_realigner.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-<macros>
-    <xml name="IndelRealignerParameters" tokens="tag">
-        
-        <expand macro="macro_bam_input" tag="@TAG@" />
-
-        <param name="targetIntervals" type="data" format="gatk_interval" label="Intervals file output from RealignerTargetCreator" help="-targetIntervals,--targetIntervals &amp;lt;targetIntervals&amp;gt;" />
-
-        <expand macro="macro_optional_parameters">
-            <repeat name="knownAlleles" title="Input VCF file(s) with known indels" help="-known,&#8209;&#8209;knownAlleles &amp;lt;knownAlleles&amp;gt;">
-                <param name="knownAllele" type="data" format="vcf" label="Variant file (VCF format)" />
-            </repeat>
-            
-            <param name="consensusDeterminationModel" type="select" label="minimum reads at a locus to enable using the entropy calculation" help="-model,&#8209;&#8209;consensusDeterminationModel &amp;lt;consensusDeterminationModel&amp;gt;">
-                <option value="USE_READS">USE_READS - Additionally uses indels already present in the original alignments of the reads</option>
-                <option value="KNOWNS_ONLY">KNOWNS_ONLYS - Uses only indels from a provided ROD of known indels</option>
-                <option value="USE_SW">USE_SW - Additionally uses 'Smith-Waterman' to generate alternate consenses</option>
-            </param>
-            <param name="LODThresholdForCleaning" type="float" value="5.0" label="LOD threshold above which the cleaner will clean" help="-LOD,&#8209;&#8209;LODThresholdForCleaning &amp;lt;LODThresholdForCleaning&amp;gt;" />
-            <!--param name="nWayOut" type="float" value="5.0" label="Generate one output file for each input (-I) bam file (not compatible with -output)" help="-nWayOut,&#45;&#45;nWayOut &amp;lt;nWayOut&amp;gt;" /-->
-        </expand>
-
-
-        <expand macro="macro_advanced_parameters">
-            <param name="entropyThreshold" type="float" value="0.15" label="Percentage of mismatches at a locus to be considered having high entropy (0.0 &lt; entropy &lt;= 1.0)" help="-entropy,&#8209;&#8209;entropyThreshold &amp;lt;entropyThreshold&amp;gt;" />
-
-            <param name="maxConsensuses" type="integer" value="30" label="Max alternate consensuses to try (necessary to improve performance in deep coverage)" help="-maxConsensuses,&#8209;&#8209;maxConsensuses &amp;lt;maxConsensuses&amp;gt;" />
-
-            <param name="maxIsizeForMovement" type="integer" value="3000" label="maximum insert size of read pairs that we attempt to realign" help="-maxIsize,&#8209;&#8209;maxIsizeForMovement &amp;lt;maxIsizeForMovement&amp;gt;" />
-
-            <param name="maxPositionalMoveAllowed" type="integer" value="200" label="Maximum positional move in basepairs that a read can be adjusted during realignment" help="-maxPosMove,&#8209;&#8209;maxPositionalMoveAllowed &amp;lt;maxPositionalMoveAllowed&amp;gt;" />
-
-            <param name="maxReadsForConsensuses" type="integer" value="120" label="Max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)" help="-greedy,&#8209;&#8209;maxReadsForConsensuses &amp;lt;maxReadsForConsensuses&amp;gt;" />
-
-            <param name="maxReadsForRealignment" type="integer" value="20000" label="Max reads allowed at an interval for realignment" help="-maxReads,&#8209;&#8209;maxReadsForRealignment &amp;lt;maxReadsForRealignment&amp;gt;" />
-
-            <param name="maxReadsInMemory" type="integer" value="150000" label="max reads allowed to be kept in memory at a time by the SAMFileWriter" help="-maxInMemory,&#8209;&#8209;maxReadsInMemory &amp;lt;maxReadsInMemory&amp;gt;" />
-
-            <param name="noOriginalAlignmentTags" type="boolean" truevalue="--noOriginalAlignmentTags" falsevalue="" label="Don't output the original cigar or alignment start tags for each realigned read in the output bam" help="-noTags,&#8209;&#8209;noOriginalAlignmentTags" />
-        </expand>
-
-    </xml>
-
-    <xml name="IndelRealignerOutput">
-        <data format="bam" name="ir_output_bam" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (BAM)">
-            <yield />
-        </data>
-    </xml>
-
-    <template name="IndelRealignerPreprocessing">
-<![CDATA[
-        @token_bam_input_pre@
-
-        ln -s -f ${analysis_type.targetIntervals} target.intervals &&
-
-        #if $analysis_type.optional_parameters.optional_parameters_enabled
-            #for $i, $knownAllele in enumerate($analysis_type.optional_parameters.knownAlleles):
-                ln -s -f ${knownAllele.knownAllele} knownAllele_${i}.vcf &&
-            #end for
-        #end if
-]]>
-    </template>
-
-    <template name="IndelRealignerOptions">
-<![CDATA[
-        --out                   ${ir_output_bam}
-
-        @token_bam_input@
-        --targetIntervals       target.intervals
-
-        #if $analysis_type.optional_parameters.optional_parameters_enabled
-            #for $i, $knownAllele in enumerate($analysis_type.optional_parameters.knownAlleles):
-                --knownAlleles knownAllele_${i}.vcf
-            #end for
-            --consensusDeterminationModel       ${analysis_type.consensusDeterminationModel}
-            --LODThresholdForCleaning           ${analysis_type.LODThresholdForCleaning}
-        #end if
-
-        #if $analysis_type.advanced_parameters.advanced_parameters_enabled
-            --entropyThreshold          ${analysis_type.advanced_parameters.entropyThreshold}
-            --maxConsensuses            ${analysis_type.advanced_parameters.maxConsensuses}
-            --maxIsizeForMovement       ${analysis_type.advanced_parameters.maxIsizeForMovement}
-            --maxPositionalMoveAllowed  ${analysis_type.advanced_parameters.maxPositionalMoveAllowed}
-            --maxReadsForConsensuses    ${analysis_type.advanced_parameters.maxReadsForConsensuses}
-            --maxReadsForRealignment    ${analysis_type.advanced_parameters.maxReadsForRealignment}
-            --maxReadsInMemory          ${analysis_type.advanced_parameters.maxReadsInMemory}
-            ${analysis_type.advanced_parameters.noOriginalAlignmentTags}
-        #end if
-]]>
-    </template>
-</macros>
--- a/GATK/gatk/print_reads.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-<macros>
-    <xml name="PrintReadsParameters" tokens="tag">
-        
-        <expand macro="macro_bam_input" tag="@TAG@" />
-
-        <!-- BQSR in main config -->
-
-        <expand macro="macro_optional_parameters">
-
-            <param name="number" type="integer" value="" optional="true" label="Print the first n reads from the file, discarding the rest" help="-n,&#8209;&#8209;number &amp;lt;number&amp;gt;" />
-
-            <param name="platform" type="text" value="" optional="true" label="Exclude all reads with this platform from the output" help="-platform,&#8209;&#8209;platform &amp;lt;platform&amp;gt;" />
-
-            <param name="readGroup" type="text" value="" optional="true" label="Exclude all reads with this read group from the output" help="-readGroup,&#8209;&#8209;readGroup &amp;lt;readGroup&amp;gt;" />
-
-            <param name="sample_file" type="data" format="txt" optional="true" label="File containing a list of samples (one per line). Can be specified multiple times" help="-sf,&#8209;&#8209;sample_file &amp;lt;sample_file&amp;gt;" />
-
-            <repeat name="sample_names" title="Sample names to be included in the analysis" help="-sn,‑‑sample_name &amp;lt;sample_name&amp;gt;">
-                <param name="sample_name" type="text" value="" title="Sample name to be included in the analysis" />
-            </repeat>
-            
-            <param name="simplify" type="text" truevalue="-s" falsevalue="" label="Erase all extra attributes in the read but keep the read group information" help="-s,&#8209;&#8209;simplify" />
-
-        </expand>
-
-    </xml>
-
-    <xml name="PrintReadsOutput">
-        <data format="bam" name="pr_output_bam" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (BAM)">
-            <yield />
-        </data>
-    </xml>
-
-    <template name="PrintReadsPreprocessing">
-<![CDATA[
-        @token_bam_input_pre@
-]]>
-    </template>
-
-    <template name="PrintReadsOptions">
-<![CDATA[
-        --out                   ${pr_output_bam}
-
-        @token_bam_input@
-
-        #set $optionals = $analysis_type.optional_parameters
-        #if $optionals.optional_parameters_enabled
-            #if int($optionals.number) > 0
-                --number $optionals.number
-            #end if
-            #if str($optionals.platform)
-                --platform $optionals.platform
-            #end if
-            #if str($optionals.readGroup)
-                --readGroup $optionals.readGroup
-            #end if
-            #if $optionals.sample_file
-                --sample_file $optionals.sample_file
-            #end if
-            #if $optionals.sample_names
-                #for $sample in $optionals.sample_names:
-                    --intervals ${sample.sample_name}
-                #end for
-            #end if
-            $optionals.simplify
-        #end if
-]]>
-    </template>
-</macros>
-
-
--- a/GATK/gatk/realigner_target_creator.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<macros>
-    <xml name="RealignerTargetCreatorParameters" tokens="tag">
-        
-        <expand macro="macro_bam_input" tag="@TAG@" />
-
-        <expand macro="macro_optional_parameters">
-            <param name="maxIntervalSize" type="integer" value="500" label="maximum interval size; any intervals larger than this value will be dropped" help="-maxInterval,&#8209;&#8209;maxIntervalSize &amp;lt;maxIntervalSize&amp;gt;" />
-            <param name="minReadsAtLocus" type="integer" value="4" label="minimum reads at a locus to enable using the entropy calculation" help="-minReads,&#8209;&#8209;minReadsAtLocus &amp;lt;minReadsAtLocus&amp;gt;" />
-            <param name="windowSize" type="integer" value="10" label="window size for calculating entropy or SNP clusters" help="-window,&#8209;&#8209;windowSize &amp;lt;windowSize&amp;gt;" />
-
-            <param name="mismatchFraction" type="float" value="0.0" label="fraction of base qualities needing to mismatch for a position to have high entropy" help="-mismatch,&#8209;&#8209;mismatchFraction &amp;lt;mismatchFraction&amp;gt;" />
-            <repeat name="rod_bindings" title="Input VCF files with known indels" help="-known,&#8209;&#8209;known &amp;lt;known&amp;gt;">
-                <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
-            </repeat>
-        </expand>
-        
-    </xml>
-
-    <xml name="RealignerTargetCreatorOutput">
-        <data format="gatk_interval" name="rtc_output_intervals" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (GATK intervals)">
-            <yield />
-        </data>
-    </xml>
-
-    <template name="RealignerTargetCreatorPreprocessing">
-<![CDATA[
-        @token_bam_input_pre@
-
-        #if $analysis_type.optional_parameters.optional_parameters_enabled
-            #for $i, $rod_binding in enumerate($analysis_type.optional_parameters.rod_bindings):
-                ln -s -f ${rod_binding.input_rod} rod_binding_${i}.vcf &&
-            #end for
-        #end if
-]]>
-    </template>
-
-    <template name="RealignerTargetCreatorOptions">
-<![CDATA[
-        --out                   ${rtc_output_intervals}
-
-        @token_bam_input@
-
-        #if $analysis_type.optional_parameters.optional_parameters_enabled
-            --maxIntervalSize       ${analysis_type.optional_parameters.maxIntervalSize}
-            --minReadsAtLocus       ${analysis_type.optional_parameters.minReadsAtLocus}
-            --windowSize            ${analysis_type.optional_parameters.windowSize}
-            --mismatchFraction      ${analysis_type.optional_parameters.mismatchFraction}
-
-            #for $i, $rod_binding in enumerate($analysis_type.optional_parameters.rod_bindings):
-                --known rod_binding_${i}.vcf
-            #end for
-        #end if
-]]>
-    </template>
-</macros>
-
-
--- a/GATK/gatk/tool-data/destinations.py	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-from galaxy.jobs import JobDestination
-import os
-import sys
-import json
-import cStringIO
-import logging
-
-log = logging.getLogger( __name__ )
-
-
-def dump(obj, nested_level=0, output=sys.stdout):
-    spacing = '   '
-    if type(obj) == dict:
-        print >> output, '%s{' % ((nested_level) * spacing)
-        for k, v in obj.items():
-            if hasattr(v, '__iter__'):
-                print >> output, '%s%s:' % ((nested_level + 1) * spacing, k)
-                dump(v, nested_level + 1, output)
-            else:
-                print >> output, '%s%s: %s' % ((nested_level + 1) * spacing, k, v)
-        print >> output, '%s}' % (nested_level * spacing)
-    elif type(obj) == list:
-        print >> output, '%s[' % ((nested_level) * spacing)
-        for v in obj:
-            if hasattr(v, '__iter__'):
-                dump(v, nested_level + 1, output)
-            else:
-                print >> output, '%s%s' % ((nested_level + 1) * spacing, v)
-        print >> output, '%s]' % ((nested_level) * spacing)
-    else:
-        print >> output, '%s%s' % (nested_level * spacing, obj)
-
-
-def dynamic_slurm_cluster_gatk(job, tool_id):
-    # Allocate extra time
-    inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] )
-    inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] )
-    inp_data.update( [ ( da.name, json.loads(da.value) ) for da in job.parameters ] )
-    out = cStringIO.StringIO()
-    dump(inp_data, 1, out)
-    log.debug(out.getvalue())
-    
-    nativeSpecs = '--nodes=1 --ntasks=1'
-    
-    # runner doesn't allow to specify --cpus-per-task
-    # thus the mem calculation gets messy with more than 1 node
-    # --> translate nt ==> nodes, nct ==> ntasks
-    
-    if 'cond_threads' not in inp_data:
-        return JobDestination(runner="slurm")
-    
-    if inp_data['cond_threads']['cond_threads_enabled'] == "True":
-        nNodes = int(inp_data['cond_threads']['nt'])
-        nCPU   = int(inp_data['cond_threads']['nct'])
-        nMEM   = int(inp_data['cond_threads']['mem'])
-        if nMEM > 0:
-            nativeSpecs = '--nodes=%d --ntasks=%d --mem=%d' % (nNodes, nCPU*nNodes, nMEM)
-        else:
-            nativeSpecs = '--nodes=%d --ntasks=%d' % (nNodes, nCPU*nNodes)
-        
-    return JobDestination(runner="slurm", params={"nativeSpecification": nativeSpecs})
-
--- a/GATK/gatk/tool-data/picard_index.loc.sample	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of Picard dict and associated files. You will need
-#to create these data files and then create a picard_index.loc file 
-#similar to this one (store it in this directory) that points to 
-#the directories in which those files are stored. The picard_index.loc 
-#file has this format (longer white space is the TAB character):
-#
-#<unique_build_id>	<dbkey>		<display_name>		<fasta_file_path>
-#
-#So, for example, if you had hg18 indexed and stored in 
-#/depot/data2/galaxy/srma/hg18/,
-#then the srma_index.loc entry would look like this:
-#
-#hg18	hg18	hg18 Pretty		/depot/data2/galaxy/picard/hg18/hg18.fa
-#
-#and your /depot/data2/galaxy/srma/hg18/ directory
-#would contain the following three files:
-#hg18.fa
-#hg18.dict
-#hg18.fa.fai
-#
-#The dictionary file for each reference (ex. hg18.dict) must be 
-#created via Picard (http://picard.sourceforge.net). Note that
-#the dict file does not have the .fa extension although the
-#path list in the loc file does include it.
-#
--- a/GATK/gatk/tool_data_table_conf.xml.sample	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-<tables>
-    <!-- Location of Picard dict files valid for GATK -->
-    <table name="picard_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/picard_index.loc" />
-    </table>
-</tables>
--- a/GATK/gatk/tool_dependencies.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <set_environment version="1.0">
-        <environment_variable action="set_to" name="GATK_PATH">/mnt/galaxy/tools/GATK/3.4-0</environment_variable>
-    </set_environment>
-    <!--
-    Use GATK_SITE_OPTIONS to set additional parameters that should be inserted in every GATK call.
-    The intended use case was to prohibit GATK to collect and send data.
-    For example:
-
-    -et "NO_ET" -K "/data/gatk_key_file" ##ET no phone home
-    -->
-    <set_environment version="1.0">
-        <environment_variable action="set_to" name="GATK_SITE_OPTIONS"> </environment_variable>
-    </set_environment>
-    <package name="package_r_for_gatk_3_4_0" version="3.1.2.1">
-        <repository changeset_revision="49c62e9b71ad" name="package_r_for_gatk_3_4_0" owner="avowinkel" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>
--- a/GATK/package_picard_1_135/tool_dependencies.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="picard" version="1.135">
-        <install version="1.0">
-            <actions_group>
-                <actions architecture="x86_64" os="linux">
-                    <action type="download_by_url">https://github.com/broadinstitute/picard/releases/download/1.135/picard-tools-1.135.zip</action>
-                    <action type="move_directory_files">
-                        <source_directory>.</source_directory>
-                        <destination_directory>$INSTALL_DIR</destination_directory>
-                    </action>       
-                </actions>                
-                <action type="set_environment">
-                    <environment_variable name="JAVA_JAR_PATH" action="set_to">$INSTALL_DIR</environment_variable>
-                </action>
-            </actions_group>
-        </install>
-        <readme>
-This picard package dependency is retrieved directly from https://github.com/broadinstitute/picard/releases
-        </readme>
-    </package>
-</tool_dependency>
--- a/GATK/package_r_for_gatk_3_4_0/tool_dependencies.xml	Thu Sep 12 06:50:21 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="R" version="3.1.2">
-        <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="package_r_for_gatk_3_4_0" version="3.1.2.1">
-        <install version="1.0">
-            <actions>
-                <action type="setup_r_environment">
-
-                    <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu">
-                        <package name="R" version="3.1.2" />
-                    </repository>
-                    <package>https://github.com/cran/stringi/archive/0.5-5.tar.gz</package>
-                    <package>https://github.com/cran/magrittr/archive/1.5.tar.gz</package>
-                    <package>https://github.com/cran/stringr/archive/1.0.0.tar.gz</package>
-                    <package>https://github.com/cran/RColorBrewer/archive/1.1-2.tar.gz</package>
-                    <package>https://github.com/cran/dichromat/archive/2.0-0.tar.gz</package>
-                    <package>https://github.com/cran/colorspace/archive/1.2-6.tar.gz</package>
-                    <package>https://github.com/cran/munsell/archive/0.4.2.tar.gz</package>
-                    <package>https://github.com/cran/labeling/archive/0.3.tar.gz</package>
-                    <package>https://github.com/cran/Rcpp/archive/0.11.6.tar.gz</package>
-                    <package>https://github.com/cran/digest/archive/0.6.8.tar.gz</package>
-                    <package>https://github.com/cran/gtable/archive/0.1.2.tar.gz</package>
-                    <package>https://github.com/cran/bitops/archive/1.0-6.tar.gz</package>
-                    <package>https://github.com/cran/caTools/archive/1.17.1.tar.gz</package>
-                    <package>https://github.com/cran/gtools/archive/3.5.0.tar.gz</package>
-                    <package>https://github.com/cran/gdata/archive/2.17.0.tar.gz</package>
-                    <package>https://github.com/cran/gsalib/archive/2.1.tar.gz</package>
-                    <package>https://github.com/cran/gplots/archive/2.17.0.tar.gz</package>
-                    <package>https://github.com/cran/plyr/archive/1.8.3.tar.gz</package>
-                    <package>https://github.com/cran/reshape/archive/0.8.5.tar.gz</package>
-                    <package>https://github.com/cran/reshape2/archive/1.4.1.tar.gz</package>
-                    <package>https://github.com/cran/scales/archive/0.2.5.tar.gz</package>
-                    <package>https://github.com/cran/proto/archive/0.3-10.tar.gz</package>
-                    <package>https://github.com/cran/MASS/archive/7.3-43.tar.gz</package>
-                    <package>https://github.com/cran/ggplot2/archive/1.0.1.tar.gz</package>
-                </action>
-            </actions>
-        </install>
-        <readme>
-            ggplot2 is a plotting system for R, based on the grammar of graphics, which tries to take the good parts of base and lattice graphics and none of the bad parts.
-            It takes care of many of the fiddly details that make plotting a hassle (like drawing legends) as well as providing a powerful model of graphics that makes it easy to produce complex multi-layered graphics.
-
-            http://ggplot2.org/
-        </readme>
-    </package>
-</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/Haplotypecaller.ga	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,101 @@
+{
+    "a_galaxy_workflow": "true", 
+    "annotation": "Haplotypecaller workflow.", 
+    "format-version": "0.1", 
+    "name": "Haplotypecaller", 
+    "steps": {
+        "0": {
+            "annotation": "", 
+            "id": 0, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "Input Dataset"
+                }
+            ], 
+            "label": null, 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 198, 
+                "top": 208.1666717529297
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"Input Dataset\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": [], 
+            "uuid": "b9fb671f-f07f-498c-8c85-619987501313"
+        }, 
+        "1": {
+            "annotation": "", 
+            "id": 1, 
+            "input_connections": {
+                "analysis_type|cond_bam_input|input": {
+                    "id": 0, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "label": null, 
+            "name": "GATK", 
+            "outputs": [
+                {
+                    "name": "rtc_output_intervals", 
+                    "type": "gatk_interval"
+                }, 
+                {
+                    "name": "ir_output_bam", 
+                    "type": "bam"
+                }, 
+                {
+                    "name": "br_table", 
+                    "type": "tabular"
+                }, 
+                {
+                    "name": "ac_plotsReportFile", 
+                    "type": "pdf"
+                }, 
+                {
+                    "name": "pr_output_bam", 
+                    "type": "bam"
+                }, 
+                {
+                    "name": "hc_output_gvcf", 
+                    "type": "vcf"
+                }, 
+                {
+                    "name": "gg_output_gvcf", 
+                    "type": "vcf"
+                }, 
+                {
+                    "name": "cg_output_vcf", 
+                    "type": "vcf"
+                }, 
+                {
+                    "name": "cv_output_vcf", 
+                    "type": "vcf"
+                }, 
+                {
+                    "name": "output_log", 
+                    "type": "txt"
+                }
+            ], 
+            "position": {
+                "left": 484.83331298828125, 
+                "top": 200
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/avowinkel/gatk/gatk/3.4-0.d9", 
+            "tool_state": "{\"__page__\": 0, \"cond_threads\": \"{\\\"cond_threads_enabled\\\": \\\"False\\\", \\\"__current_case__\\\": 1}\", \"ref_file\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"\\\"}\", \"__rerun_remap_job_id__\": null, \"cond_BQSR\": \"{\\\"cond_BQSR_enabled\\\": \\\"False\\\", \\\"__current_case__\\\": 1}\", \"analysis_type\": \"{\\\"analysis_type_selector\\\": \\\"HaplotypeCaller\\\", \\\"cond_usage\\\": {\\\"emitRefConfidence\\\": \\\"GVCF\\\", \\\"__current_case__\\\": 0, \\\"cond_usage_selector\\\": \\\"GVCF\\\"}, \\\"optional_parameters\\\": {\\\"optional_parameters_enabled\\\": \\\"False\\\", \\\"__current_case__\\\": 1}, \\\"cond_bam_input\\\": {\\\"input\\\": null, \\\"all_in_one\\\": \\\"False\\\", \\\"__current_case__\\\": 1}, \\\"__current_case__\\\": 5}\", \"cond_intervals\": \"{\\\"cond_intervals_enabled\\\": \\\"False\\\", \\\"__current_case__\\\": 1}\"}", 
+            "tool_version": "3.4-0.d9", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "5988799c-5658-4475-89ee-86f267cfbee1"
+        }
+    }, 
+    "uuid": "e4f99de3-d6b0-4554-9562-60b2b0eed2d3"
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/SAMTools.ga	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,160 @@
+{
+    "a_galaxy_workflow": "true", 
+    "annotation": "SAMTools workflow", 
+    "format-version": "0.1", 
+    "name": "SAMTools", 
+    "steps": {
+        "0": {
+            "annotation": "", 
+            "id": 0, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "Input Dataset"
+                }
+            ], 
+            "label": null, 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 162, 
+                "top": 169
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"Input Dataset\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": [], 
+            "uuid": "1eec712c-2fe0-43a8-b033-f027a74c6a48"
+        }, 
+        "1": {
+            "annotation": "", 
+            "id": 1, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "Input Dataset"
+                }
+            ], 
+            "label": null, 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 273.83331298828125, 
+                "top": 341.83331298828125
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"Input Dataset\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": [], 
+            "uuid": "98d5e3bf-4715-48a2-9b2b-29b21179c425"
+        }, 
+        "2": {
+            "annotation": "", 
+            "id": 2, 
+            "input_connections": {
+                "input1": {
+                    "id": 0, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "label": null, 
+            "name": "Sort", 
+            "outputs": [
+                {
+                    "name": "output1", 
+                    "type": "bam"
+                }
+            ], 
+            "position": {
+                "left": 330, 
+                "top": 222
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_sort/samtools_sort/2.0", 
+            "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input1\": \"null\", \"sort_mode\": \"\\\"\\\"\"}", 
+            "tool_version": "2.0", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "5e9aef3e-c2ad-43d1-b248-7d94d6a974bc"
+        }, 
+        "3": {
+            "annotation": "", 
+            "id": 3, 
+            "input_connections": {
+                "reference_source|input_bams_0|input_bam": {
+                    "id": 2, 
+                    "output_name": "output1"
+                }, 
+                "reference_source|ref_file": {
+                    "id": 1, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "label": null, 
+            "name": "MPileup", 
+            "outputs": [
+                {
+                    "name": "output_mpileup", 
+                    "type": "pileup"
+                }, 
+                {
+                    "name": "output_log", 
+                    "type": "txt"
+                }
+            ], 
+            "position": {
+                "left": 484.83331298828125, 
+                "top": 313.83331298828125
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_mpileup/samtools_mpileup/2.0", 
+            "tool_state": "{\"__page__\": 0, \"advanced_options\": \"{\\\"advanced_options_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 1}\", \"__rerun_remap_job_id__\": null, \"genotype_likelihood_computation_type\": \"{\\\"output_mapping_quality\\\": \\\"False\\\", \\\"genotype_likelihood_computation_type_selector\\\": \\\"do_not_perform_genotype_likelihood_computation\\\", \\\"__current_case__\\\": 1, \\\"base_position_on_reads\\\": \\\"False\\\"}\", \"reference_source\": \"{\\\"ref_file\\\": null, \\\"reference_source_selector\\\": \\\"history\\\", \\\"input_bams\\\": [{\\\"__index__\\\": 0, \\\"input_bam\\\": null}], \\\"__current_case__\\\": 1}\"}", 
+            "tool_version": "2.0", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "84c6d18e-1930-4eef-b58a-46c88a35339e"
+        }, 
+        "4": {
+            "annotation": "", 
+            "id": 4, 
+            "input_connections": {
+                "input_file": {
+                    "id": 3, 
+                    "output_name": "output_mpileup"
+                }
+            }, 
+            "inputs": [], 
+            "label": null, 
+            "name": "Pileup to VCF", 
+            "outputs": [
+                {
+                    "name": "output_file", 
+                    "type": "vcf"
+                }
+            ], 
+            "position": {
+                "left": 562.8333129882812, 
+                "top": 469.83331298828125
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/pileup_to_vcf/pileup_to_vcf/2.2", 
+            "tool_state": "{\"snps_only\": \"\\\"False\\\"\", \"min_cvrg\": \"\\\"5\\\"\", \"allow_multiples\": \"\\\"False\\\"\", \"input_file\": \"null\", \"__page__\": 0, \"vcf_id\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"cols\": \"{\\\"select_order\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"depth_as\": \"\\\"ref\\\"\", \"min_base_qual\": \"\\\"20\\\"\", \"min_var_pct\": \"\\\"0.5\\\"\"}", 
+            "tool_version": "2.2", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "335581bc-838e-431d-8247-1e1580260275"
+        }
+    }, 
+    "uuid": "0c40a63d-b349-49b8-90b0-c927a05253ef"
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/SVDetect.ga	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,163 @@
+{
+    "a_galaxy_workflow": "true", 
+    "annotation": "SVDetect workflow.", 
+    "format-version": "0.1", 
+    "name": "SVDetect", 
+    "steps": {
+        "0": {
+            "annotation": "", 
+            "id": 0, 
+            "input_connections": {}, 
+            "inputs": [], 
+            "label": null, 
+            "name": "Import data", 
+            "outputs": [
+                {
+                    "name": "outbamfile", 
+                    "type": "bam"
+                }, 
+                {
+                    "name": "outlenfile", 
+                    "type": "len"
+                }, 
+                {
+                    "name": "outsvfile", 
+                    "type": "sv"
+                }
+            ], 
+            "position": {
+                "left": 171.99998474121094, 
+                "top": 168
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/bzeitouni/svdetect/svdetect_import/1.0.0", 
+            "tool_state": "{\"file_name\": \"\\\"file1\\\"\", \"__rerun_remap_job_id__\": null, \"type\": \"{\\\"file_type\\\": \\\"bam\\\", \\\"__current_case__\\\": 0}\", \"file_path\": \"\\\"\\\"\", \"__page__\": 0}", 
+            "tool_version": "1.0.0", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "296478bf-9b67-47e8-9685-ced05bad2857"
+        }, 
+        "1": {
+            "annotation": "", 
+            "id": 1, 
+            "input_connections": {}, 
+            "inputs": [], 
+            "label": null, 
+            "name": "Import data", 
+            "outputs": [
+                {
+                    "name": "outbamfile", 
+                    "type": "bam"
+                }, 
+                {
+                    "name": "outlenfile", 
+                    "type": "len"
+                }, 
+                {
+                    "name": "outsvfile", 
+                    "type": "sv"
+                }
+            ], 
+            "position": {
+                "left": 266.99998474121094, 
+                "top": 430.99998474121094
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/bzeitouni/svdetect/svdetect_import/1.0.0", 
+            "tool_state": "{\"file_name\": \"\\\"file1\\\"\", \"__rerun_remap_job_id__\": null, \"type\": \"{\\\"file_type\\\": \\\"len\\\", \\\"__current_case__\\\": 1}\", \"file_path\": \"\\\"\\\"\", \"__page__\": 0}", 
+            "tool_version": "1.0.0", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "481cf408-cef1-4ccd-bd08-9394bac04050"
+        }, 
+        "2": {
+            "annotation": "", 
+            "id": 2, 
+            "input_connections": {
+                "inputBam": {
+                    "id": 0, 
+                    "output_name": "outbamfile"
+                }
+            }, 
+            "inputs": [], 
+            "label": null, 
+            "name": "BAM preprocessing", 
+            "outputs": [
+                {
+                    "name": "abBAM", 
+                    "type": "bam"
+                }, 
+                {
+                    "name": "log", 
+                    "type": "txt"
+                }, 
+                {
+                    "name": "normBAM", 
+                    "type": "bam"
+                }
+            ], 
+            "position": {
+                "left": 344.49998474121094, 
+                "top": 224
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/bzeitouni/svdetect/svdetect_preprocessing/1.0.0", 
+            "tool_state": "{\"__page__\": 0, \"isizeMin\": \"\\\"0\\\"\", \"newBam\": \"{\\\"__current_case__\\\": 1, \\\"pairNormal\\\": \\\"no\\\"}\", \"isizeMax\": \"\\\"10000\\\"\", \"inputBam\": \"null\", \"__rerun_remap_job_id__\": null, \"nbrePair\": \"\\\"1000000\\\"\", \"readType\": \"\\\"1\\\"\", \"sample_name\": \"\\\"sample\\\"\", \"pairType\": \"\\\"1\\\"\", \"foldPair\": \"\\\"3.0\\\"\"}", 
+            "tool_version": "1.0.0", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "01f2c8d8-123b-4eac-9c5e-6101e9264dbf"
+        }, 
+        "3": {
+            "annotation": "", 
+            "id": 3, 
+            "input_connections": {
+                "cmap_file": {
+                    "id": 1, 
+                    "output_name": "outlenfile"
+                }, 
+                "mates_file": {
+                    "id": 2, 
+                    "output_name": "abBAM"
+                }
+            }, 
+            "inputs": [], 
+            "label": null, 
+            "name": "Detect clusters of anomalously mapped pairs", 
+            "outputs": [
+                {
+                    "name": "sv_file", 
+                    "type": "sv"
+                }, 
+                {
+                    "name": "circos_file", 
+                    "type": "segdup"
+                }, 
+                {
+                    "name": "bed_file", 
+                    "type": "bed"
+                }, 
+                {
+                    "name": "log_file", 
+                    "type": "txt"
+                }
+            ], 
+            "position": {
+                "left": 472.49998474121094, 
+                "top": 375.99998474121094
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/bzeitouni/svdetect/svdetect_run_parallel/1.0.0", 
+            "tool_state": "{\"__page__\": 0, \"sv_type\": \"\\\"all\\\"\", \"getLinks\": \"{\\\"window_size\\\": \\\"3000\\\", \\\"step_length\\\": \\\"250\\\", \\\"splitmate\\\": \\\"True\\\", \\\"__current_case__\\\": 1, \\\"linking\\\": \\\"linking\\\"}\", \"mates_file\": \"null\", \"__rerun_remap_job_id__\": null, \"getFilteredLinks\": \"{\\\"nb_pairs_threshold\\\": \\\"5\\\", \\\"filtering\\\": \\\"filtering\\\", \\\"chromosomes\\\": \\\"\\\", \\\"file_conversion\\\": {\\\"file_conversion_select\\\": \\\"do_not_convert\\\", \\\"__current_case__\\\": 0}, \\\"splitlink\\\": \\\"False\\\", \\\"filter1\\\": {\\\"strand_filtering\\\": \\\"strand\\\", \\\"final_score_threshold\\\": \\\"1.0\\\", \\\"__current_case__\\\": 1, \\\"filter2\\\": {\\\"nb_pairs_order_threshold\\\": \\\"2\\\", \\\"order_filtering\\\": \\\"order\\\", \\\"__current_case__\\\": 1, \\\"filter3\\\": {\\\"indel_sigma_threshold\\\": \\\"3.0\\\", \\\"dup_sigma_threshold\\\": \\\"3.0\\\", \\\"singleton_sigma_threshold\\\": \\\"4.0\\\", \\\"__current_case__\\\": 1, \\\"insert_size_filtering\\\": \\\"insert\\\"}, \\\"mu_length\\\": \\\"3000\\\", \\\"sigma_length\\\": \\\"250\\\"}}, \\\"__current_case__\\\": 1, \\\"links2SV\\\": \\\"True\\\"}\", \"read2_length\": \"\\\"50\\\"\", \"read1_length\": \"\\\"50\\\"\", \"sample_name\": \"\\\"sample\\\"\", \"mates_orientation\": \"\\\"FR\\\"\", \"cmap_file\": \"null\"}", 
+            "tool_version": "1.0.0", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "2edd1fca-fc2d-453e-aead-c95a2e250d0a"
+        }
+    }, 
+    "uuid": "bb58fd42-403a-48ad-80d2-59d81f0c8f0b"
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/VARSCAN.ga	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,133 @@
+{
+    "a_galaxy_workflow": "true", 
+    "annotation": "VARSCAN workflow.", 
+    "format-version": "0.1", 
+    "name": "VARSCAN", 
+    "steps": {
+        "0": {
+            "annotation": "", 
+            "id": 0, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "Input Dataset"
+                }
+            ], 
+            "label": null, 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 161, 
+                "top": 187
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"Input Dataset\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": [], 
+            "uuid": "72a1326a-cd57-4567-8c41-cc66411642be"
+        }, 
+        "1": {
+            "annotation": "", 
+            "id": 1, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "Input Dataset"
+                }
+            ], 
+            "label": null, 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 163.83331298828125, 
+                "top": 263.8333282470703
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"Input Dataset\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": [], 
+            "uuid": "d8444634-af6c-44c5-9b55-8c3284da2823"
+        }, 
+        "2": {
+            "annotation": "", 
+            "id": 2, 
+            "input_connections": {
+                "reference_source|input_bams_0|input_bam": {
+                    "id": 0, 
+                    "output_name": "output"
+                }, 
+                "reference_source|ref_file": {
+                    "id": 1, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "label": null, 
+            "name": "MPileup", 
+            "outputs": [
+                {
+                    "name": "output_mpileup", 
+                    "type": "pileup"
+                }, 
+                {
+                    "name": "output_log", 
+                    "type": "txt"
+                }
+            ], 
+            "position": {
+                "left": 370, 
+                "top": 187
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_mpileup/samtools_mpileup/2.0", 
+            "tool_state": "{\"__page__\": 0, \"advanced_options\": \"{\\\"advanced_options_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 1}\", \"__rerun_remap_job_id__\": null, \"genotype_likelihood_computation_type\": \"{\\\"output_mapping_quality\\\": \\\"False\\\", \\\"genotype_likelihood_computation_type_selector\\\": \\\"do_not_perform_genotype_likelihood_computation\\\", \\\"__current_case__\\\": 1, \\\"base_position_on_reads\\\": \\\"False\\\"}\", \"reference_source\": \"{\\\"ref_file\\\": null, \\\"reference_source_selector\\\": \\\"history\\\", \\\"input_bams\\\": [{\\\"__index__\\\": 0, \\\"input_bam\\\": null}], \\\"__current_case__\\\": 1}\"}", 
+            "tool_version": "2.0", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "26d34295-7783-40e1-950f-a485ff569f5b"
+        }, 
+        "3": {
+            "annotation": "", 
+            "id": 3, 
+            "input_connections": {
+                "in_file": {
+                    "id": 2, 
+                    "output_name": "output_mpileup"
+                }
+            }, 
+            "inputs": [], 
+            "label": null, 
+            "name": "VarScan mpileup", 
+            "outputs": [
+                {
+                    "name": "output", 
+                    "type": "vcf"
+                }, 
+                {
+                    "name": "log", 
+                    "type": "txt"
+                }
+            ], 
+            "position": {
+                "left": 563.8333129882812, 
+                "top": 364.83331298828125
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/fcaramia/varscan/varscan_mpileup/2.3.5", 
+            "tool_state": "{\"strand_filter\": \"\\\"1\\\"\", \"min_avg_qual\": \"\\\"15\\\"\", \"min_coverage\": \"\\\"8\\\"\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"min_freq_for_hom\": \"\\\"0.75\\\"\", \"min_var_freq\": \"\\\"0.01\\\"\", \"exe_command\": \"\\\"mpileup2snp\\\"\", \"min_reads2\": \"\\\"2\\\"\", \"p_value\": \"\\\"0.99\\\"\", \"variants\": \"\\\"1\\\"\", \"vcf_sample_list\": \"null\", \"in_file\": \"null\"}", 
+            "tool_version": "2.3.5", 
+            "type": "tool", 
+            "user_outputs": [], 
+            "uuid": "5ff6cf71-5d74-42ad-bcdc-7c75891dc10e"
+        }
+    }, 
+    "uuid": "5b7bf29e-1272-4211-b345-37695989c6fa"
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/analyze_covariates.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,37 @@
+<macros>
+    <xml name="AnalyzeCovariatesParameters" tokens="tag">
+
+        <!-- BQSR in main config -->
+
+        <param name="afterReportFile" type="data" format="tabular" optional="true" label="file containing the BQSR second-pass report file" help="-after,&#8209;&#8209;afterReportFile &amp;lt;afterReportFile&amp;gt;" />
+
+        <param name="beforeReportFile" type="data" format="tabular" optional="true" label="file containing the BQSR first-pass report file" help="-before,&#8209;&#8209;beforeReportFile &amp;lt;beforeReportFile&amp;gt;" />
+
+    </xml>
+
+    <xml name="AnalyzeCovariatesOutput">
+        <data format="pdf" name="ac_plotsReportFile" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (PDF Recalibration Report)">
+            <yield />
+        </data>
+    </xml>
+
+    <template name="AnalyzeCovariatesPreprocessing">
+<![CDATA[
+]]>
+    </template>
+
+    <template name="AnalyzeCovariatesOptions">
+<![CDATA[
+        --plotsReportFile                   ${ac_plotsReportFile}
+
+        #if str($analysis_type.afterReportFile)
+            --afterReportFile $analysis_type.afterReportFile
+        #end if
+        #if str($analysis_type.beforeReportFile)
+            --beforeReportFile $analysis_type.beforeReportFile
+        #end if
+]]>
+    </template>
+</macros>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/base_recalibrator.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,38 @@
+<macros>
+    <xml name="BaseRecalibratorParameters" tokens="tag">
+        
+        <expand macro="macro_bam_input" tag="@TAG@" />
+
+        <param name="knownSites" type="data" format="vcf,bcf,bed,pileup,tabular,table" label="Database of known Sites (ROD files; e.g. VCF format)" multiple="true" title="A database of known polymorphic sites to skip over in the recalibration algorithm" help="-knownSites,&#8209;&#8209;knownSites &amp;lt;knownSites&amp;gt;" />
+
+    </xml>
+
+    <xml name="BaseRecalibratorOutput">
+        <data format="tabular" name="br_table" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (Table)">
+            <yield />
+        </data>
+    </xml>
+
+    <template name="BaseRecalibratorPreprocessing">
+<![CDATA[
+        @token_bam_input_pre@
+        #for $i, $variant in enumerate($analysis_type.knownSites):
+            ln -s -f ${variant} variant_${i}.vcf &&
+        #end for
+]]>
+    </template>
+
+    <template name="BaseRecalibratorOptions">
+<![CDATA[
+        --out                   ${br_table}
+
+        @token_bam_input@
+
+        #for $i, $variant in enumerate($analysis_type.knownSites):
+            --knownSites variant_${i}.vcf
+        #end for
+]]>
+    </template>
+</macros>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/combine_gvcfs.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,44 @@
+<macros>
+    <xml name="CombineGVCFsParameters" tokens="tag">
+        
+        <expand macro="macro_gvcf_input" tag="@TAG@" />
+
+        <expand macro="macro_optional_parameters">
+
+            <param name="breakBandsAtMultiplesOf" type="integer" value="0" label="If > 0, reference bands will be broken up at genomic positions that are multiples of this number" help="-breakBandsAtMultiplesOf,&#8209;&#8209;breakBandsAtMultiplesOf &amp;lt;breakBandsAtMultiplesOf&amp;gt;" />
+
+        </expand>
+
+    </xml>
+
+    <xml name="CombineGVCFsOutput">
+        <data format="vcf" name="cg_output_vcf" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (VCF)">
+            <yield />
+        </data>
+    </xml>
+
+    <template name="CombineGVCFsPreprocessing">
+<![CDATA[
+        @token_gvcf_input_pre@
+]]>
+    </template>
+
+    <template name="CombineGVCFsOptions">
+<![CDATA[
+        --out ${cg_output_vcf}
+
+        @token_gvcf_input@
+
+        #set $optionals = $analysis_type.optional_parameters
+        #if $optionals.optional_parameters_enabled
+            #if $optionals.breakBandsAtMultiplesOf > 0
+                --breakBandsAtMultiplesOf $optionals.breakBandsAtMultiplesOf
+            #end if
+        #end if
+]]>
+    </template>
+
+
+</macros>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/combine_variants.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,96 @@
+<macros>
+    <xml name="CombineVariantsParameters" tokens="tag">
+        
+        <expand macro="macro_vcf_input" tag="@TAG@" />
+
+        <expand macro="macro_optional_parameters">
+
+            <param name="filteredRecordsMergeType" type="select" label="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields" help="-filteredRecordsMergeType,&#8209;&#8209;filteredrecordsmergetype &amp;lt;filteredrecordsmergetype&amp;gt;">
+                <option value="">No Selection</option>
+                <option value="KEEP_IF_ANY_UNFILTERED">KEEP_IF_ANY_UNFILTERED</option>
+                <option value="KEEP_IF_ALL_UNFILTERED">KEEP_IF_ALL_UNFILTERED</option>
+                <option value="KEEP_UNCONDITIONAL">KEEP_UNCONDITIONAL</option>
+            </param>
+
+            <param name="genotypeMergeOptions" type="select" label="Determines how we should merge genotype records for samples shared across the ROD files" help="-genotypeMergeOptions,&#8209;&#8209;genotypemergeoption &amp;lt;genotypemergeoption&amp;gt;">
+                <option value="">No Selection</option>
+                <option value="UNIQUIFY">UNIQUIFY</option>
+                <option value="PRIORITIZE">PRIORITIZE</option>
+                <option value="UNSORTED">UNSORTED</option>
+                <option value="REQUIRE_UNIQUE">REQUIRE_UNIQUE</option>
+            </param>
+
+            <param name="minimumN" type="integer" value="1" optional="true" label="Combine variants and output site only if the variant is present in at least N input files" help="-minN,&#8209;&#8209;minimumN &amp;lt;minimumN&amp;gt;" />
+
+            <param name="rod_priority_list" type="text" value="" optional="true" label="A comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted" help="-priority,&#8209;&#8209;rod_priority_list &amp;lt;rod_priority_list&amp;gt;" />
+
+            <param name="setKey" type="text" value="" optional="true" label="Key used in the INFO key=value tag emitted describing which set the combined VCF record came from" help="-setKey,&#8209;&#8209;setKey &amp;lt;setKey&amp;gt;" />
+
+            <param name="assumeIdenticalSamples" type="boolean" truevalue="--assumeIdenticalSamples" falsevalue="" label="If true, assume input VCFs have identical sample sets and disjoint calls" help="-assumeIdenticalSamples,&#8209;&#8209;assumeIdenticalSamples" />
+
+            <param name="excludeNonVariants" type="boolean" truevalue="--excludeNonVariants" falsevalue="" label="Don't include loci found to be non-variant after the combining procedure" help="-env,&#8209;&#8209;excludeNonVariants" />
+
+            <param name="filteredAreUncalled" type="boolean" truevalue="--filteredAreUncalled" falsevalue="" label="If true, then filtered VCFs are treated as uncalled, so that filtered set annotations don't appear in the combined VCF" help="-filteredAreUncalled,&#8209;&#8209;filteredAreUncalled" />
+
+            <param name="mergeInfoWithMaxAC" type="boolean" truevalue="--mergeInfoWithMaxAC" falsevalue="" label="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records." help="-mergeInfoWithMaxAC,&#8209;&#8209;mergeInfoWithMaxAC" />
+
+            <param name="minimalVCF" type="boolean" truevalue="--minimalVCF" falsevalue="" label="If true, then the output VCF will contain no INFO or genotype FORMAT fields" help="-minimalVCF,&#8209;&#8209;minimalVCF" />
+
+            <param name="printComplexMerges" type="boolean" truevalue="--printComplexMerges" falsevalue="" label="Print out interesting sites requiring complex compatibility merging" help="-printComplexMerges,&#8209;&#8209;printComplexMerges" />
+
+            <param name="suppressCommandLineHeader" type="boolean" truevalue="--suppressCommandLineHeader" falsevalue="" label="If true, do not output the header containing the command line used" help="-suppressCommandLineHeader,&#8209;&#8209;suppressCommandLineHeader" />
+
+        </expand>
+
+    </xml>
+
+    <xml name="CombineVariantsOutput">
+        <data format="vcf" name="cv_output_vcf" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (VCF)">
+            <yield />
+        </data>
+    </xml>
+
+    <template name="CombineVariantsPreprocessing">
+<![CDATA[
+        @token_vcf_input_pre@
+]]>
+    </template>
+
+    <template name="CombineVariantsOptions">
+<![CDATA[
+        --out ${cv_output_vcf}
+
+        @token_vcf_input@
+
+        #set $optionals = $analysis_type.optional_parameters
+        #if $optionals.optional_parameters_enabled
+
+            #if $optionals.filteredRecordsMergeType
+                --filteredRecordsMergeType $optionals.filteredRecordsMergeType
+            #end if
+            #if $optionals.genotypeMergeOptions
+                --genotypeMergeOptions $optionals.genotypeMergeOptions
+            #end if
+            #if $optionals.minimumN != 1
+                --minimumN $optionals.minimumN
+            #end if
+            #if $optionals.rod_priority_list
+                --rod_priority_list $optionals.rod_priority_list
+            #end if
+
+            $optionals.assumeIdenticalSamples
+            $optionals.excludeNonVariants
+            $optionals.filteredAreUncalled
+            $optionals.mergeInfoWithMaxAC
+            $optionals.minimalVCF
+            $optionals.printComplexMerges
+            $optionals.suppressCommandLineHeader
+
+        #end if
+]]>
+    </template>
+
+
+</macros>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/gatk.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="utf-8"?>
+<tool id="gatk" name="GATK" version="@VERSION@.d9">
+  <description>tool collection Version @VERSION@</description>
+  <macros>
+    <import>gatk_macros.xml</import>
+    <import>realigner_target_creator.xml</import>
+    <import>indel_realigner.xml</import>
+    <import>base_recalibrator.xml</import>
+    <import>analyze_covariates.xml</import>
+    <import>print_reads.xml</import>
+    <import>haplotype_caller.xml</import>
+    <import>genotype_gvcfs.xml</import>
+    <import>combine_gvcfs.xml</import>
+    <import>combine_variants.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <stdio>
+    <regex match="^INFO" level="log"/>
+    <regex match="^WARN" level="warning"/>
+    <regex match="Using .* implementation of PairHMM" level="warning"/>
+    <regex match="There is insufficient memory for the Java Runtime Environment to continue" level="fatal"/>
+    <regex match="^##### ERROR" level="fatal"/>
+    <exit_code range="1:" level="fatal"/>
+  </stdio>
+  <command><![CDATA[
+        ############################
+        ## import analysis specific preprocessings by using cheetahs internal searchList
+        ## if not defined, ignore
+        ############################
+        #if $analysis_type.analysis_type_selector + "Preprocessing" in vars()['SL'][2]
+            #set $analysisPreprocessing = vars()['SL'][2][$analysis_type.analysis_type_selector + "Preprocessing"]
+            #include source=$analysisPreprocessing
+        #end if
+        
+        ############################
+        ## GATK tool unspecific options
+        ############################
+        @GATK_EXEC@
+        
+        --analysis_type ${analysis_type.analysis_type_selector}
+        --reference_sequence    ${ref_file.fields.path}
+
+        --log_to_file           ${output_log}
+
+        #if $cond_intervals.cond_intervals_enabled
+            #for $interval in $cond_intervals.intervals:
+                --intervals ${interval.L}
+            #end for
+        #end if
+
+        #if $cond_BQSR.cond_BQSR_enabled
+          --BQSR $cond_BQSR.BQSR
+        #end if
+
+        ############################
+        ## import analysis specific options by using cheetahs internal searchList
+        ## if not defined throw raw python error until better idea
+        ############################
+        #if $analysis_type.analysis_type_selector + "Options" in vars()['SL'][2]
+            #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"]
+            #include source=$analysisOptions
+        #else
+            #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"]
+        #end if
+        
+        ############################
+        ## only put ERROR or FATAL log messages into stderr
+        ## but keep full log for printing into log file
+        ############################
+        2>&1 | awk '\$1 != "INFO" && \$1 != "WARN"' >&2
+]]></command>
+  <inputs>
+    <param name="ref_file" type="select" label="Using reference genome" help="-R,‑‑reference_sequence &amp;lt;reference_sequence&amp;gt;">
+      <options from_data_table="picard_indexes"/>
+      <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+    </param>
+    <conditional name="cond_intervals">
+      <param name="cond_intervals_enabled" type="boolean" label="Select interval subset to operate on?"/>
+      <when value="true">
+        <repeat name="intervals" title="genomic interval over which to operate" help="-L,‑‑intervals &amp;lt;intervals&amp;gt;">
+          <param name="L" type="text" value=""/>
+        </repeat>
+      </when>
+      <when value="false"/>
+    </conditional>
+    <conditional name="cond_BQSR">
+      <param name="cond_BQSR_enabled" type="boolean" label="Select covariates for on-the-fly recalibration?"/>
+      <when value="true">
+        <param name="BQSR" type="data" format="tabular" label="Input covariates table file for on-the-fly base quality score recalibration" help="-BQSR,‑‑BQSR &amp;lt;BQSR&amp;gt; intended primarily for use with BaseRecalibrator and PrintReads"/>
+      </when>
+      <when value="false"/>
+    </conditional>
+    <conditional name="cond_threads">
+      <param name="cond_threads_enabled" type="boolean" label="Set computational options (cpu, mem)?"/>
+      <when value="true">
+        <param name="nt" type="integer" value="1" label="Number of data threads to allocate to this analysis" help="make sure, the option is available for the chosen tool"/>
+        <param name="nct" type="integer" value="1" label="Number of CPU threads to allocate per data thread" help="make sure, the option is available for the chosen tool"/>
+        <param name="mem" type="integer" value="0" label="Overwrite Memory in MB (0 = don't overwrite)" help="Overwrites all other defaults and might lead to crash the run. States mem per data thread"/>
+      </when>
+      <when value="false"/>
+    </conditional>
+    <conditional name="analysis_type">
+      <param name="analysis_type_selector" type="select" label="Analysis Type">
+        <option value="RealignerTargetCreator">RealignerTargetCreator</option>
+        <option value="IndelRealigner">IndelRealigner</option>
+        <option value="BaseRecalibrator">BaseRecalibrator</option>
+        <option value="AnalyzeCovariates">AnalyzeCovariates</option>
+        <option value="PrintReads">PrintReads</option>
+        <option value="HaplotypeCaller">HaplotypeCaller</option>
+        <option value="GenotypeGVCFs">GenotypeGVCFs</option>
+        <option value="CombineGVCFs">CombineGVCFs</option>
+        <option value="CombineVariants">CombineVariants</option>
+      </param>
+      <when value="RealignerTargetCreator">
+        <expand macro="RealignerTargetCreatorParameters" tag="rtc"/>
+      </when>
+      <when value="IndelRealigner">
+        <expand macro="IndelRealignerParameters" tag="ir"/>
+      </when>
+      <when value="BaseRecalibrator">
+        <expand macro="BaseRecalibratorParameters" tag="br"/>
+      </when>
+      <when value="AnalyzeCovariates">
+        <expand macro="AnalyzeCovariatesParameters" tag="ac"/>
+      </when>
+      <when value="PrintReads">
+        <expand macro="PrintReadsParameters" tag="pr"/>
+      </when>
+      <when value="HaplotypeCaller">
+        <expand macro="HaplotypeCallerParameters" tag="hc"/>
+      </when>
+      <when value="GenotypeGVCFs">
+        <expand macro="GenotypeGVCFsParameters" tag="gg"/>
+      </when>
+      <when value="CombineGVCFs">
+        <expand macro="CombineGVCFsParameters" tag="cg"/>
+      </when>
+      <when value="CombineVariants">
+        <expand macro="CombineVariantsParameters" tag="cv"/>
+      </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <expand macro="RealignerTargetCreatorOutput" tag="rtc">
+      <filter>analysis_type['analysis_type_selector'] == 'RealignerTargetCreator'</filter>
+    </expand>
+    <expand macro="IndelRealignerOutput" tag="ir">
+      <filter>analysis_type['analysis_type_selector'] == 'IndelRealigner'</filter>
+    </expand>
+    <expand macro="BaseRecalibratorOutput" tag="br">
+      <filter>analysis_type['analysis_type_selector'] == 'BaseRecalibrator'</filter>
+    </expand>
+    <expand macro="AnalyzeCovariatesOutput" tag="ac">
+      <filter>analysis_type['analysis_type_selector'] == 'AnalyzeCovariates'</filter>
+    </expand>
+    <expand macro="PrintReadsOutput" tag="pr">
+      <filter>analysis_type['analysis_type_selector'] == 'PrintReads'</filter>
+    </expand>
+    <expand macro="HaplotypeCallerOutput" tag="hc">
+      <filter>analysis_type['analysis_type_selector'] == 'HaplotypeCaller'</filter>
+    </expand>
+    <expand macro="GenotypeGVCFsOutput" tag="gg">
+      <filter>analysis_type['analysis_type_selector'] == 'GenotypeGVCFs'</filter>
+    </expand>
+    <expand macro="CombineGVCFsOutput" tag="cg">
+      <filter>analysis_type['analysis_type_selector'] == 'CombineGVCFs'</filter>
+    </expand>
+    <expand macro="CombineVariantsOutput" tag="cv">
+      <filter>analysis_type['analysis_type_selector'] == 'CombineVariants'</filter>
+    </expand>
+    <data format="txt" name="output_log" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (log)"/>
+  </outputs>
+  <expand macro="macro_tests"/>
+  <citations>
+    <citation type="doi">10.1101/gr.107524.110</citation>
+    <citation type="doi">10.1038/ng.806</citation>
+    <citation type="doi">10.1002/0471250953.bi1110s43</citation>
+  </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/gatk_macros.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,166 @@
+<macros>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">gatk</requirement>
+            <requirement type="set_environment">GATK_PATH</requirement>
+            <requirement type="set_environment">GATK_SITE_OPTIONS</requirement>
+            <requirement type="package" version="3.1.2.1">package_r_for_gatk_3_4_0</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command><![CDATA[ @GATK_EXEC@ --help|grep '^The Genome' ]]></version_command>
+    </xml>
+
+    <token name="@VERSION@">3.4-0</token>
+    <token name="@OUTPUT_NAME_PREFIX@">${tool.name} - ${analysis_type.analysis_type_selector}</token>
+    <token name="@GATK_EXEC@">
+<![CDATA[
+        #if $cond_threads.cond_threads_enabled:
+            #if int($cond_threads.nct) > 1:
+                THREAD_STRING="-nct $cond_threads.nct" &&
+            #end if
+            #if int($cond_threads.nt) > 1:
+                THREAD_STRING=$THREAD_STRING" -nt $cond_threads.nt" &&
+            #end if
+            #if int($cond_threads.mem) > 0:
+                GATK_MEM=$cond_threads.mem &&
+            #end if
+        #end if
+        java -Xmx\${GATK_MEM:-\${SLURM_MEM_PER_NODE:-4096}}M -jar "\$GATK_PATH/GenomeAnalysisTK.jar" \${THREAD_STRING:-}
+]]>
+    </token>
+
+    <xml name="macro_vcf_input" tokens="tag">
+        <param name="input" type="data" format="vcf" multiple="true" label="Variant files (VCF format)" help="-V, &#8209;&#8209;variant">
+            <validator type="unspecified_build" />
+            <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> 
+        </param>
+    </xml>
+    <token name="@token_vcf_input_pre@" tokens="tag">
+<![CDATA[
+        ############################
+        ## create links to gVCF input files with correct extensions
+        ############################
+        #for $i, $variant in enumerate($analysis_type.input):
+            ln -s -f ${variant} variant_${i}.vcf &&
+        #end for
+]]>
+    </token>
+    <token name="@token_vcf_input@">
+<![CDATA[
+        #for $i, $variant in enumerate($analysis_type.input):
+            --variant variant_${i}.vcf
+        #end for
+        @token_reference_input@
+]]>
+    </token>
+
+
+    <xml name="macro_gvcf_input" tokens="tag">
+        <param name="input" type="data" format="vcf" multiple="true" label="Variant files (gVCF format)" help="-V, &#8209;&#8209;variant">
+            <validator type="unspecified_build" />
+            <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> 
+        </param>
+    </xml>
+    <token name="@token_gvcf_input_pre@" tokens="tag">
+<![CDATA[
+        ############################
+        ## create links to gVCF input files with correct extensions
+        ############################
+        #for $i, $variant in enumerate($analysis_type.input):
+            ln -s -f ${variant} variant_${i}.g.vcf &&
+        #end for
+]]>
+    </token>
+    <token name="@token_gvcf_input@">
+<![CDATA[
+        #for $i, $variant in enumerate($analysis_type.input):
+            --variant variant_${i}.g.vcf
+        #end for
+        @token_reference_input@
+]]>
+    </token>
+
+    <xml name="macro_bam_input">
+        <conditional name="cond_bam_input">
+            <param name="all_in_one" type="boolean" value="false" label="Input all BAM files in a single command" />
+            <when value="true">
+                <param name="input" type="data" format="bam" multiple="true" label="Input file containing sequence data (BAM)" help="-I, ‑‑input_file">
+                    <validator type="unspecified_build"/>
+                    <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build."/>
+                </param>
+            </when>
+            <when value="false">
+                <param name="input" type="data" format="bam" label="Input file containing sequence data (BAM)" help="-I, ‑‑input_file">
+                    <validator type="unspecified_build"/>
+                    <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build."/>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@token_bam_input_pre@">
+<![CDATA[
+        ############################
+        ## create links to bam input files with correct extensions
+        ############################
+        #if $analysis_type.cond_bam_input.all_in_one
+            #for $i, $bam in enumerate($analysis_type.cond_bam_input.input):
+                ln -s -f ${bam} input_${i}.bam &&
+                ln -s -f ${bam.metadata.bam_index} input_${i}.bam.bai &&
+            #end for
+        #else
+            ln -s -f ${analysis_type.cond_bam_input.input} input.bam &&
+            ln -s -f ${analysis_type.cond_bam_input.input.metadata.bam_index} input.bam.bai &&
+        #end if
+]]>
+    </token>
+    <token name="@token_bam_input@">
+<![CDATA[
+        #if $analysis_type.cond_bam_input.all_in_one
+            #for $i, $bam in enumerate($analysis_type.cond_bam_input.input):
+                --input_file input_${i}.bam
+            #end for
+        #else
+            --input_file            input.bam
+        #end if
+        @token_reference_input@
+]]>
+    </token>
+
+    <token name="@token_reference_input@">
+<![CDATA[
+]]>
+    </token>
+    <xml name="macro_input" tokens="tag">
+        <yield />
+    </xml>
+
+    <xml name="macro_optional_parameters">
+        <conditional name="optional_parameters">
+            <param name="optional_parameters_enabled" type="boolean" label="Configure Optional Parameters" />
+            <when value="true">
+                <yield />
+            </when>
+            <when value="false" />
+        </conditional>
+    </xml>
+
+    <xml name="macro_advanced_parameters">
+        <conditional name="advanced_parameters">
+            <param name="advanced_parameters_enabled" type="boolean" label="Configure Advanced Parameters" />
+            <when value="true">
+                <yield />
+            </when>
+            <when value="false" />
+        </conditional>
+    </xml>
+
+    <xml name="macro_tests">
+        <tests>
+            
+        </tests>
+    </xml>
+
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/generation/gatk.xsl	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:output
+    method="xml"
+    encoding="utf-8"
+    indent="yes"
+    cdata-section-elements="script style" />
+
+<xsl:template match="/">
+
+<tool id="gatk" name="GATK" version="@VERSION@.d9">
+    <description>tool collection Version @VERSION@</description>
+
+    <macros>
+        <import>gatk_macros.xml</import>
+        <xsl:for-each select="analyses/analysis">
+        <import><xsl:value-of select="macro_file" /></import>
+        </xsl:for-each>
+    </macros>
+
+    <expand macro="requirements" />
+
+    <stdio>
+        <regex match="^INFO" level="log" />
+        <regex match="^WARN" level="warning" />
+        <regex match="Using .* implementation of PairHMM" level="warning" />
+        <regex match="There is insufficient memory for the Java Runtime Environment to continue" level="fatal" />
+        <regex match="^##### ERROR" level="fatal" />
+        <exit_code range="1:" level="fatal"/>
+    </stdio>
+
+    <command>
+<xsl:text disable-output-escaping="yes">&lt;![CDATA[
+        ############################
+        ## import analysis specific preprocessings by using cheetahs internal searchList
+        ## if not defined, ignore
+        ############################
+        #if $analysis_type.analysis_type_selector + "Preprocessing" in vars()['SL'][2]
+            #set $analysisPreprocessing = vars()['SL'][2][$analysis_type.analysis_type_selector + "Preprocessing"]
+            #include source=$analysisPreprocessing
+        #end if
+        
+        ############################
+        ## GATK tool unspecific options
+        ############################
+        @GATK_EXEC@
+        
+        --analysis_type ${analysis_type.analysis_type_selector}
+        --reference_sequence    ${ref_file.fields.path}
+
+        --log_to_file           ${output_log}
+
+        #if $cond_intervals.cond_intervals_enabled
+            #for $interval in $cond_intervals.intervals:
+                --intervals ${interval.L}
+            #end for
+        #end if
+
+        #if $cond_BQSR.cond_BQSR_enabled
+          --BQSR $cond_BQSR.BQSR
+        #end if
+
+        ############################
+        ## import analysis specific options by using cheetahs internal searchList
+        ## if not defined throw raw python error until better idea
+        ############################
+        #if $analysis_type.analysis_type_selector + "Options" in vars()['SL'][2]
+            #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"]
+            #include source=$analysisOptions
+        #else
+            #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"]
+        #end if
+        
+        ############################
+        ## only put ERROR or FATAL log messages into stderr
+        ## but keep full log for printing into log file
+        ############################
+        2>&amp;1 | awk '\$1 != "INFO" &amp;&amp; \$1 != "WARN"' >&amp;2
+]]&gt;</xsl:text>
+    </command>
+
+    <inputs>
+
+        <param name="ref_file" type="select" label="Using reference genome" help="-R,&#8209;&#8209;reference_sequence &amp;lt;reference_sequence&amp;gt;" >
+            <options from_data_table="picard_indexes">
+                <!--filter type="data_meta" key="dbkey" ref="@TAG@_input" column="dbkey" /-->
+            </options>
+            <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+        </param>
+
+        <conditional name="cond_intervals">
+            <param name="cond_intervals_enabled" type="boolean" label="Select interval subset to operate on?" />
+            <when value="true">
+                <repeat name="intervals" title="genomic interval over which to operate" help="-L,&#8209;&#8209;intervals &amp;lt;intervals&amp;gt;">
+                    <param name="L" type="text" value="" />
+                </repeat>
+            </when>
+            <when value="false" />
+        </conditional>
+
+        <conditional name="cond_BQSR">
+            <param name="cond_BQSR_enabled" type="boolean" label="Select covariates for on-the-fly recalibration?" />
+            <when value="true">
+                <param name="BQSR" type="data" format="tabular" label="Input covariates table file for on-the-fly base quality score recalibration" help="-BQSR,&#8209;&#8209;BQSR &amp;lt;BQSR&amp;gt; intended primarily for use with BaseRecalibrator and PrintReads" />
+            </when>
+            <when value="false" />
+        </conditional>
+
+        <conditional name="cond_threads">
+            <param name="cond_threads_enabled" type="boolean" label="Set computational options (cpu, mem)?" />
+            <when value="true">
+                <param name="nt" type="integer" value="1" label="Number of data threads to allocate to this analysis" help="make sure, the option is available for the chosen tool" />
+                <param name="nct" type="integer" value="1" label="Number of CPU threads to allocate per data thread" help="make sure, the option is available for the chosen tool" />
+                <param name="mem" type="integer" value="0" label="Overwrite Memory in MB (0 = don't overwrite)" help="Overwrites all other defaults and might lead to crash the run. States mem per data thread" />
+            </when>
+            <when value="false" />
+        </conditional>
+
+        <conditional name="analysis_type">
+            <param name="analysis_type_selector" type="select" label="Analysis Type">
+                <xsl:for-each select="analyses/analysis">
+                <option value="{name}"><xsl:value-of select="name" /></option>
+                </xsl:for-each>
+            </param>
+            <xsl:for-each select="analyses/analysis">
+            <when value="{name}">
+                <!--xsl:choose>
+                    <xsl:when test="input_type = 'bam'">
+                        <expand macro="macro_bam_input" tag="{tag}" />
+                    </xsl:when>
+                    <xsl:when test="input_type = 'gvcf'">
+                        <expand macro="macro_gvcf_input" tag="{tag}" />
+                    </xsl:when>
+                </xsl:choose-->
+                <expand macro="{name}Parameters" tag="{tag}" />
+            </when>
+            </xsl:for-each>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <xsl:for-each select="analyses/analysis">
+        <expand macro="{name}Output" tag="{tag}">
+            <filter>analysis_type['analysis_type_selector'] == '<xsl:value-of select="name" />'</filter>
+        </expand>
+        </xsl:for-each>
+        <data format="txt" name="output_log" label="${{tool.name}} - ${{analysis_type.analysis_type_selector}} on ${{on_string}} (log)" />
+    </outputs>
+
+    <expand macro="macro_tests" />
+
+    <citations>
+        <citation type="doi">10.1101/gr.107524.110</citation>
+        <citation type="doi">10.1038/ng.806</citation>
+        <citation type="doi">10.1002/0471250953.bi1110s43</citation>
+    </citations>
+</tool>
+
+</xsl:template>
+</xsl:stylesheet>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/generation/gatk.xsldb.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analyses>
+    <analysis>
+	    <name>RealignerTargetCreator</name>
+	    <input_type>bam</input_type>
+	    <tag>rtc</tag>
+	    <macro_file>realigner_target_creator.xml</macro_file>
+    </analysis>
+    <analysis>
+	    <name>IndelRealigner</name>
+	    <input_type>bam</input_type>
+	    <tag>ir</tag>
+	    <macro_file>indel_realigner.xml</macro_file>
+    </analysis>
+    <analysis>
+	    <name>BaseRecalibrator</name>
+	    <input_type>bam</input_type>
+	    <tag>br</tag>
+	    <macro_file>base_recalibrator.xml</macro_file>
+    </analysis>
+    <analysis>
+	    <name>AnalyzeCovariates</name>
+	    <input_type>bam</input_type>
+	    <tag>ac</tag>
+	    <macro_file>analyze_covariates.xml</macro_file>
+    </analysis>
+    <analysis>
+	    <name>PrintReads</name>
+	    <input_type>bam</input_type>
+	    <tag>pr</tag>
+	    <macro_file>print_reads.xml</macro_file>
+    </analysis>
+    <analysis>
+	    <name>HaplotypeCaller</name>
+	    <input_type>bam</input_type>
+	    <tag>hc</tag>
+	    <macro_file>haplotype_caller.xml</macro_file>
+    </analysis>
+    <analysis>
+	    <name>GenotypeGVCFs</name>
+	    <input_type>gvcf</input_type>
+	    <tag>gg</tag>
+	    <macro_file>genotype_gvcfs.xml</macro_file>
+    </analysis>
+    <analysis>
+	    <name>CombineGVCFs</name>
+	    <input_type>gvcf</input_type>
+	    <tag>cg</tag>
+	    <macro_file>combine_gvcfs.xml</macro_file>
+    </analysis>
+    <analysis>
+	    <name>CombineVariants</name>
+	    <input_type>vcf</input_type>
+	    <tag>cv</tag>
+	    <macro_file>combine_variants.xml</macro_file>
+    </analysis>
+</analyses>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/genotype_gvcfs.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,43 @@
+<macros>
+    <xml name="GenotypeGVCFsParameters" tokens="tag">
+        
+        <expand macro="macro_gvcf_input" tag="@TAG@" />
+
+        <expand macro="macro_optional_parameters">
+
+
+            <param name="sample_ploidy" type="integer" value="2" label="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)" help="-ploidy,&#8209;&#8209;sample_ploidy &amp;lt;sample_ploidy&amp;gt;" />
+
+        </expand>
+
+    </xml>
+
+    <xml name="GenotypeGVCFsOutput">
+        <data format="vcf" name="gg_output_gvcf" from_work_dir="output.g.vcf" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (gVCF)">
+            <yield />
+        </data>
+    </xml>
+
+    <template name="GenotypeGVCFsPreprocessing">
+<![CDATA[
+        @token_gvcf_input_pre@
+]]>
+    </template>
+
+    <template name="GenotypeGVCFsOptions">
+<![CDATA[
+        --out output.g.vcf
+
+        @token_gvcf_input@
+
+        #set $optionals = $analysis_type.optional_parameters
+        #if $optionals.optional_parameters_enabled
+            --sample_ploidy $optionals.sample_ploidy
+        #end if
+]]>
+    </template>
+
+
+</macros>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/haplotype_caller.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,65 @@
+<macros>
+    <xml name="HaplotypeCallerParameters" tokens="tag">
+        
+        <expand macro="macro_bam_input" tag="@TAG@" />
+
+        <conditional name="cond_usage">
+            <param name="cond_usage_selector" type="select" label="Select usage">
+                <option value="GVCF">Single-sample all-sites calling on DNAseq (GVCF mode)</option>
+            </param>
+            <when value="GVCF">
+                <expand macro="HaplotypeCallerGVCF" />
+            </when>
+        </conditional>
+
+        <expand macro="macro_optional_parameters">
+
+            <param name="sample_ploidy" type="integer" value="2" label="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)" help="-ploidy,&#8209;&#8209;sample_ploidy &amp;lt;sample_ploidy&amp;gt;" />
+
+        </expand>
+
+    </xml>
+
+    <xml name="HaplotypeCallerOutput">
+        <data format="vcf" name="hc_output_gvcf" from_work_dir="output.g.vcf" label="${tool.name} on ${on_string} (gVCF)">
+            <yield />
+        </data>
+    </xml>
+
+    <template name="HaplotypeCallerPreprocessing">
+<![CDATA[
+        @token_bam_input_pre@
+]]>
+    </template>
+
+    <template name="HaplotypeCallerOptions">
+<![CDATA[
+        --out output.g.vcf
+
+        @token_bam_input@
+
+        #set $optionals = $analysis_type.optional_parameters
+        #if $optionals.optional_parameters_enabled
+            --sample_ploidy $optionals.sample_ploidy
+        #end if
+
+        #set $usage_selector = $analysis_type.cond_usage.cond_usage_selector
+        #set $usage = $analysis_type.cond_usage
+
+        #if str($usage_selector) == 'GVCF'
+            --emitRefConfidence "GVCF"
+        #end if
+]]>
+    </template>
+
+
+
+    <xml name="HaplotypeCallerGVCF">
+        <param name="emitRefConfidence" type="select" optional="true" label="Mode for emitting reference confidence scores" help="-ERC,‑‑emitRefConfidence &amp;lt;emitRefConfidence&amp;gt;">
+              <option value="GVCF">GVCF (Reference model emitted with condensed non-variant blocks)</option>
+        </param>
+    </xml>
+
+</macros>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/indel_realigner.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,90 @@
+<macros>
+    <xml name="IndelRealignerParameters" tokens="tag">
+        
+        <expand macro="macro_bam_input" tag="@TAG@" />
+
+        <param name="targetIntervals" type="data" format="gatk_interval" label="Intervals file output from RealignerTargetCreator" help="-targetIntervals,--targetIntervals &amp;lt;targetIntervals&amp;gt;" />
+
+        <expand macro="macro_optional_parameters">
+            <repeat name="knownAlleles" title="Input VCF file(s) with known indels" help="-known,&#8209;&#8209;knownAlleles &amp;lt;knownAlleles&amp;gt;">
+                <param name="knownAllele" type="data" format="vcf" label="Variant file (VCF format)" />
+            </repeat>
+            
+            <param name="consensusDeterminationModel" type="select" label="minimum reads at a locus to enable using the entropy calculation" help="-model,&#8209;&#8209;consensusDeterminationModel &amp;lt;consensusDeterminationModel&amp;gt;">
+                <option value="USE_READS">USE_READS - Additionally uses indels already present in the original alignments of the reads</option>
+                <option value="KNOWNS_ONLY">KNOWNS_ONLYS - Uses only indels from a provided ROD of known indels</option>
+                <option value="USE_SW">USE_SW - Additionally uses 'Smith-Waterman' to generate alternate consenses</option>
+            </param>
+            <param name="LODThresholdForCleaning" type="float" value="5.0" label="LOD threshold above which the cleaner will clean" help="-LOD,&#8209;&#8209;LODThresholdForCleaning &amp;lt;LODThresholdForCleaning&amp;gt;" />
+            <!--param name="nWayOut" type="float" value="5.0" label="Generate one output file for each input (-I) bam file (not compatible with -output)" help="-nWayOut,&#45;&#45;nWayOut &amp;lt;nWayOut&amp;gt;" /-->
+        </expand>
+
+
+        <expand macro="macro_advanced_parameters">
+            <param name="entropyThreshold" type="float" value="0.15" label="Percentage of mismatches at a locus to be considered having high entropy (0.0 &lt; entropy &lt;= 1.0)" help="-entropy,&#8209;&#8209;entropyThreshold &amp;lt;entropyThreshold&amp;gt;" />
+
+            <param name="maxConsensuses" type="integer" value="30" label="Max alternate consensuses to try (necessary to improve performance in deep coverage)" help="-maxConsensuses,&#8209;&#8209;maxConsensuses &amp;lt;maxConsensuses&amp;gt;" />
+
+            <param name="maxIsizeForMovement" type="integer" value="3000" label="maximum insert size of read pairs that we attempt to realign" help="-maxIsize,&#8209;&#8209;maxIsizeForMovement &amp;lt;maxIsizeForMovement&amp;gt;" />
+
+            <param name="maxPositionalMoveAllowed" type="integer" value="200" label="Maximum positional move in basepairs that a read can be adjusted during realignment" help="-maxPosMove,&#8209;&#8209;maxPositionalMoveAllowed &amp;lt;maxPositionalMoveAllowed&amp;gt;" />
+
+            <param name="maxReadsForConsensuses" type="integer" value="120" label="Max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)" help="-greedy,&#8209;&#8209;maxReadsForConsensuses &amp;lt;maxReadsForConsensuses&amp;gt;" />
+
+            <param name="maxReadsForRealignment" type="integer" value="20000" label="Max reads allowed at an interval for realignment" help="-maxReads,&#8209;&#8209;maxReadsForRealignment &amp;lt;maxReadsForRealignment&amp;gt;" />
+
+            <param name="maxReadsInMemory" type="integer" value="150000" label="max reads allowed to be kept in memory at a time by the SAMFileWriter" help="-maxInMemory,&#8209;&#8209;maxReadsInMemory &amp;lt;maxReadsInMemory&amp;gt;" />
+
+            <param name="noOriginalAlignmentTags" type="boolean" truevalue="--noOriginalAlignmentTags" falsevalue="" label="Don't output the original cigar or alignment start tags for each realigned read in the output bam" help="-noTags,&#8209;&#8209;noOriginalAlignmentTags" />
+        </expand>
+
+    </xml>
+
+    <xml name="IndelRealignerOutput">
+        <data format="bam" name="ir_output_bam" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (BAM)">
+            <yield />
+        </data>
+    </xml>
+
+    <template name="IndelRealignerPreprocessing">
+<![CDATA[
+        @token_bam_input_pre@
+
+        ln -s -f ${analysis_type.targetIntervals} target.intervals &&
+
+        #if $analysis_type.optional_parameters.optional_parameters_enabled
+            #for $i, $knownAllele in enumerate($analysis_type.optional_parameters.knownAlleles):
+                ln -s -f ${knownAllele.knownAllele} knownAllele_${i}.vcf &&
+            #end for
+        #end if
+]]>
+    </template>
+
+    <template name="IndelRealignerOptions">
+<![CDATA[
+        --out                   ${ir_output_bam}
+
+        @token_bam_input@
+        --targetIntervals       target.intervals
+
+        #if $analysis_type.optional_parameters.optional_parameters_enabled
+            #for $i, $knownAllele in enumerate($analysis_type.optional_parameters.knownAlleles):
+                --knownAlleles knownAllele_${i}.vcf
+            #end for
+            --consensusDeterminationModel       ${analysis_type.consensusDeterminationModel}
+            --LODThresholdForCleaning           ${analysis_type.LODThresholdForCleaning}
+        #end if
+
+        #if $analysis_type.advanced_parameters.advanced_parameters_enabled
+            --entropyThreshold          ${analysis_type.advanced_parameters.entropyThreshold}
+            --maxConsensuses            ${analysis_type.advanced_parameters.maxConsensuses}
+            --maxIsizeForMovement       ${analysis_type.advanced_parameters.maxIsizeForMovement}
+            --maxPositionalMoveAllowed  ${analysis_type.advanced_parameters.maxPositionalMoveAllowed}
+            --maxReadsForConsensuses    ${analysis_type.advanced_parameters.maxReadsForConsensuses}
+            --maxReadsForRealignment    ${analysis_type.advanced_parameters.maxReadsForRealignment}
+            --maxReadsInMemory          ${analysis_type.advanced_parameters.maxReadsInMemory}
+            ${analysis_type.advanced_parameters.noOriginalAlignmentTags}
+        #end if
+]]>
+    </template>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/print_reads.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,71 @@
+<macros>
+    <xml name="PrintReadsParameters" tokens="tag">
+        
+        <expand macro="macro_bam_input" tag="@TAG@" />
+
+        <!-- BQSR in main config -->
+
+        <expand macro="macro_optional_parameters">
+
+            <param name="number" type="integer" value="" optional="true" label="Print the first n reads from the file, discarding the rest" help="-n,&#8209;&#8209;number &amp;lt;number&amp;gt;" />
+
+            <param name="platform" type="text" value="" optional="true" label="Exclude all reads with this platform from the output" help="-platform,&#8209;&#8209;platform &amp;lt;platform&amp;gt;" />
+
+            <param name="readGroup" type="text" value="" optional="true" label="Exclude all reads with this read group from the output" help="-readGroup,&#8209;&#8209;readGroup &amp;lt;readGroup&amp;gt;" />
+
+            <param name="sample_file" type="data" format="txt" optional="true" label="File containing a list of samples (one per line). Can be specified multiple times" help="-sf,&#8209;&#8209;sample_file &amp;lt;sample_file&amp;gt;" />
+
+            <repeat name="sample_names" title="Sample names to be included in the analysis" help="-sn,‑‑sample_name &amp;lt;sample_name&amp;gt;">
+                <param name="sample_name" type="text" value="" title="Sample name to be included in the analysis" />
+            </repeat>
+            
+            <param name="simplify" type="text" truevalue="-s" falsevalue="" label="Erase all extra attributes in the read but keep the read group information" help="-s,&#8209;&#8209;simplify" />
+
+        </expand>
+
+    </xml>
+
+    <xml name="PrintReadsOutput">
+        <data format="bam" name="pr_output_bam" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (BAM)">
+            <yield />
+        </data>
+    </xml>
+
+    <template name="PrintReadsPreprocessing">
+<![CDATA[
+        @token_bam_input_pre@
+]]>
+    </template>
+
+    <template name="PrintReadsOptions">
+<![CDATA[
+        --out                   ${pr_output_bam}
+
+        @token_bam_input@
+
+        #set $optionals = $analysis_type.optional_parameters
+        #if $optionals.optional_parameters_enabled
+            #if int($optionals.number) > 0
+                --number $optionals.number
+            #end if
+            #if str($optionals.platform)
+                --platform $optionals.platform
+            #end if
+            #if str($optionals.readGroup)
+                --readGroup $optionals.readGroup
+            #end if
+            #if $optionals.sample_file
+                --sample_file $optionals.sample_file
+            #end if
+            #if $optionals.sample_names
+                #for $sample in $optionals.sample_names:
+                    --intervals ${sample.sample_name}
+                #end for
+            #end if
+            $optionals.simplify
+        #end if
+]]>
+    </template>
+</macros>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/realigner_target_creator.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,57 @@
+<macros>
+    <xml name="RealignerTargetCreatorParameters" tokens="tag">
+        
+        <expand macro="macro_bam_input" tag="@TAG@" />
+
+        <expand macro="macro_optional_parameters">
+            <param name="maxIntervalSize" type="integer" value="500" label="maximum interval size; any intervals larger than this value will be dropped" help="-maxInterval,&#8209;&#8209;maxIntervalSize &amp;lt;maxIntervalSize&amp;gt;" />
+            <param name="minReadsAtLocus" type="integer" value="4" label="minimum reads at a locus to enable using the entropy calculation" help="-minReads,&#8209;&#8209;minReadsAtLocus &amp;lt;minReadsAtLocus&amp;gt;" />
+            <param name="windowSize" type="integer" value="10" label="window size for calculating entropy or SNP clusters" help="-window,&#8209;&#8209;windowSize &amp;lt;windowSize&amp;gt;" />
+
+            <param name="mismatchFraction" type="float" value="0.0" label="fraction of base qualities needing to mismatch for a position to have high entropy" help="-mismatch,&#8209;&#8209;mismatchFraction &amp;lt;mismatchFraction&amp;gt;" />
+            <repeat name="rod_bindings" title="Input VCF files with known indels" help="-known,&#8209;&#8209;known &amp;lt;known&amp;gt;">
+                <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
+            </repeat>
+        </expand>
+        
+    </xml>
+
+    <xml name="RealignerTargetCreatorOutput">
+        <data format="gatk_interval" name="rtc_output_intervals" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (GATK intervals)">
+            <yield />
+        </data>
+    </xml>
+
+    <template name="RealignerTargetCreatorPreprocessing">
+<![CDATA[
+        @token_bam_input_pre@
+
+        #if $analysis_type.optional_parameters.optional_parameters_enabled
+            #for $i, $rod_binding in enumerate($analysis_type.optional_parameters.rod_bindings):
+                ln -s -f ${rod_binding.input_rod} rod_binding_${i}.vcf &&
+            #end for
+        #end if
+]]>
+    </template>
+
+    <template name="RealignerTargetCreatorOptions">
+<![CDATA[
+        --out                   ${rtc_output_intervals}
+
+        @token_bam_input@
+
+        #if $analysis_type.optional_parameters.optional_parameters_enabled
+            --maxIntervalSize       ${analysis_type.optional_parameters.maxIntervalSize}
+            --minReadsAtLocus       ${analysis_type.optional_parameters.minReadsAtLocus}
+            --windowSize            ${analysis_type.optional_parameters.windowSize}
+            --mismatchFraction      ${analysis_type.optional_parameters.mismatchFraction}
+
+            #for $i, $rod_binding in enumerate($analysis_type.optional_parameters.rod_bindings):
+                --known rod_binding_${i}.vcf
+            #end for
+        #end if
+]]>
+    </template>
+</macros>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/tool-data/destinations.py	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,62 @@
+from galaxy.jobs import JobDestination
+import os
+import sys
+import json
+import cStringIO
+import logging
+
+log = logging.getLogger( __name__ )
+
+
+def dump(obj, nested_level=0, output=sys.stdout):
+    spacing = '   '
+    if type(obj) == dict:
+        print >> output, '%s{' % ((nested_level) * spacing)
+        for k, v in obj.items():
+            if hasattr(v, '__iter__'):
+                print >> output, '%s%s:' % ((nested_level + 1) * spacing, k)
+                dump(v, nested_level + 1, output)
+            else:
+                print >> output, '%s%s: %s' % ((nested_level + 1) * spacing, k, v)
+        print >> output, '%s}' % (nested_level * spacing)
+    elif type(obj) == list:
+        print >> output, '%s[' % ((nested_level) * spacing)
+        for v in obj:
+            if hasattr(v, '__iter__'):
+                dump(v, nested_level + 1, output)
+            else:
+                print >> output, '%s%s' % ((nested_level + 1) * spacing, v)
+        print >> output, '%s]' % ((nested_level) * spacing)
+    else:
+        print >> output, '%s%s' % (nested_level * spacing, obj)
+
+
+def dynamic_slurm_cluster_gatk(job, tool_id):
+    # Allocate extra time
+    inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] )
+    inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] )
+    inp_data.update( [ ( da.name, json.loads(da.value) ) for da in job.parameters ] )
+    out = cStringIO.StringIO()
+    dump(inp_data, 1, out)
+    log.debug(out.getvalue())
+    
+    nativeSpecs = '--nodes=1 --ntasks=1'
+    
+    # runner doesn't allow to specify --cpus-per-task
+    # thus the mem calculation gets messy with more than 1 node
+    # --> translate nt ==> nodes, nct ==> ntasks
+    
+    if 'cond_threads' not in inp_data:
+        return JobDestination(runner="slurm")
+    
+    if inp_data['cond_threads']['cond_threads_enabled'] == "True":
+        nNodes = int(inp_data['cond_threads']['nt'])
+        nCPU   = int(inp_data['cond_threads']['nct'])
+        nMEM   = int(inp_data['cond_threads']['mem'])
+        if nMEM > 0:
+            nativeSpecs = '--nodes=%d --ntasks=%d --mem=%d' % (nNodes, nCPU*nNodes, nMEM)
+        else:
+            nativeSpecs = '--nodes=%d --ntasks=%d' % (nNodes, nCPU*nNodes)
+        
+    return JobDestination(runner="slurm", params={"nativeSpecification": nativeSpecs})
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/tool-data/picard_index.loc.sample	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Picard dict and associated files. You will need
+#to create these data files and then create a picard_index.loc file 
+#similar to this one (store it in this directory) that points to 
+#the directories in which those files are stored. The picard_index.loc 
+#file has this format (longer white space is the TAB character):
+#
+#<unique_build_id>	<dbkey>		<display_name>		<fasta_file_path>
+#
+#So, for example, if you had hg18 indexed and stored in 
+#/depot/data2/galaxy/srma/hg18/,
+#then the srma_index.loc entry would look like this:
+#
+#hg18	hg18	hg18 Pretty		/depot/data2/galaxy/picard/hg18/hg18.fa
+#
+#and your /depot/data2/galaxy/srma/hg18/ directory
+#would contain the following three files:
+#hg18.fa
+#hg18.dict
+#hg18.fa.fai
+#
+#The dictionary file for each reference (ex. hg18.dict) must be 
+#created via Picard (http://picard.sourceforge.net). Note that
+#the dict file does not have the .fa extension although the
+#path list in the loc file does include it.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/tool_data_table_conf.xml.sample	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of Picard dict files valid for GATK -->
+    <table name="picard_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/picard_index.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/gatk/tool_dependencies.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <set_environment version="1.0">
+        <environment_variable action="set_to" name="GATK_PATH">/mnt/galaxy/tools/GATK/3.4-0</environment_variable>
+    </set_environment>
+    <!--
+    Use GATK_SITE_OPTIONS to set additional parameters that should be inserted in every GATK call.
+    The intended use case was to prohibit GATK to collect and send data.
+    For example:
+
+    -et "NO_ET" -K "/data/gatk_key_file" ##ET no phone home
+    -->
+    <set_environment version="1.0">
+        <environment_variable action="set_to" name="GATK_SITE_OPTIONS"> </environment_variable>
+    </set_environment>
+    <package name="package_r_for_gatk_3_4_0" version="3.1.2.1">
+        <repository changeset_revision="49c62e9b71ad" name="package_r_for_gatk_3_4_0" owner="avowinkel" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/package_r_for_gatk_3_4_0/tool_dependencies.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,48 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="R" version="3.1.2">
+        <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="package_r_for_gatk_3_4_0" version="3.1.2.1">
+        <install version="1.0">
+            <actions>
+                <action type="setup_r_environment">
+
+                    <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu">
+                        <package name="R" version="3.1.2" />
+                    </repository>
+                    <package>https://github.com/cran/stringi/archive/0.5-5.tar.gz</package>
+                    <package>https://github.com/cran/magrittr/archive/1.5.tar.gz</package>
+                    <package>https://github.com/cran/stringr/archive/1.0.0.tar.gz</package>
+                    <package>https://github.com/cran/RColorBrewer/archive/1.1-2.tar.gz</package>
+                    <package>https://github.com/cran/dichromat/archive/2.0-0.tar.gz</package>
+                    <package>https://github.com/cran/colorspace/archive/1.2-6.tar.gz</package>
+                    <package>https://github.com/cran/munsell/archive/0.4.2.tar.gz</package>
+                    <package>https://github.com/cran/labeling/archive/0.3.tar.gz</package>
+                    <package>https://github.com/cran/Rcpp/archive/0.11.6.tar.gz</package>
+                    <package>https://github.com/cran/digest/archive/0.6.8.tar.gz</package>
+                    <package>https://github.com/cran/gtable/archive/0.1.2.tar.gz</package>
+                    <package>https://github.com/cran/bitops/archive/1.0-6.tar.gz</package>
+                    <package>https://github.com/cran/caTools/archive/1.17.1.tar.gz</package>
+                    <package>https://github.com/cran/gtools/archive/3.5.0.tar.gz</package>
+                    <package>https://github.com/cran/gdata/archive/2.17.0.tar.gz</package>
+                    <package>https://github.com/cran/gsalib/archive/2.1.tar.gz</package>
+                    <package>https://github.com/cran/gplots/archive/2.17.0.tar.gz</package>
+                    <package>https://github.com/cran/plyr/archive/1.8.3.tar.gz</package>
+                    <package>https://github.com/cran/reshape/archive/0.8.5.tar.gz</package>
+                    <package>https://github.com/cran/reshape2/archive/1.4.1.tar.gz</package>
+                    <package>https://github.com/cran/scales/archive/0.2.5.tar.gz</package>
+                    <package>https://github.com/cran/proto/archive/0.3-10.tar.gz</package>
+                    <package>https://github.com/cran/MASS/archive/7.3-43.tar.gz</package>
+                    <package>https://github.com/cran/ggplot2/archive/1.0.1.tar.gz</package>
+                </action>
+            </actions>
+        </install>
+        <readme>
+            ggplot2 is a plotting system for R, based on the grammar of graphics, which tries to take the good parts of base and lattice graphics and none of the bad parts.
+            It takes care of many of the fiddly details that make plotting a hassle (like drawing legends) as well as providing a powerful model of graphics that makes it easy to produce complex multi-layered graphics.
+
+            http://ggplot2.org/
+        </readme>
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/suite_samtools_1_2/repository_dependencies.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<repositories description="A suite of Galaxy tools designed to work with version 1.2 of the SAMtools package.">
+    <repository changeset_revision="cf875cbe2df4" name="data_manager_sam_fasta_index_builder" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="af7c50162f0b" name="bam_to_sam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="d04d9f1c6791" name="sam_to_bam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="8c3472790020" name="samtools_bedcov" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="1ebb4ecdc1ef" name="samtools_calmd" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="0072bf593791" name="samtools_flagstat" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="87398ae795c7" name="samtools_idxstats" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="c6fdfe3331d6" name="samtools_mpileup" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="020e144b5f78" name="samtools_reheader" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="3735f950b2f5" name="samtools_rmdup" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="2b474ebbfc7d" name="samtools_slice_bam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="a430da4f04cd" name="samtools_sort" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="57f3e32f809d" name="samtools_split" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="0d71d9467847" name="samtools_stats" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/svdetect/BAM_preprocessingPairs.pl	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,340 @@
+#!/usr/bin/perl -w
+
+use strict;
+use warnings;
+use Getopt::Std;
+my $version = '0.5_galaxy';
+
+my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools";
+
+my %opts = ( t=>1, p=>1, n=>1000000, f=>3, s=>0, S=>10000, o=>"." );
+
+getopts('dt:p:n:f:s:S:o:b:l:x:N:', \%opts); #GALAXY 
+
+my $working_dir=($opts{o} ne ".")? $opts{o}:"working directory";
+
+my $pt_bad_mates_file=$opts{b};  #GALAXY 
+my $pt_log_file=$opts{l}; #GALAXY 
+my $pt_good_mates_file=$opts{x} if($opts{d}); #GALAXY 
+
+
+die(qq/
+    
+Description:
+    
+    Preprocessing of mates to get anomalously mapped mate-pair\/paired-end reads as input
+    for SVDetect.
+
+    From all pairs mapped onto the reference genome, this script outputs abnormal pairs:
+        - mapped on two different chromosomes
+        - with an incorrect strand orientation and\/or pair order
+        - with an insert size distance +- sigma threshold
+    into a file <prefix.ab.bam\/sam>
+    
+    -BAM\/SAM File input format only, sorted by read names
+
+    Version : $version
+    SAMtools required for BAM files
+    
+    
+Usage:   BAM_preprocessingPairs.pl [options] <all_mate_file.sorted.bam\/sam>
+
+Options: -t BOOLEAN   read type: =1 (Illumina), =0 (SOLiD) [$opts{t}]
+         -p BOOLEAN   pair type: =1 (paired-end), =0 (mate-pair)  [$opts{p}]
+         -n INTEGER   number of pairs for calculating mu and sigma lengths [$opts{n}]
+	 -s INTEGER   minimum value of ISIZE for calculating mu and sigma lengths [$opts{s}]
+	 -S INTEGER   maximum value of ISIZE for calculating mu and sigma lengths [$opts{S}]
+         -f REAL      minimal number of sigma fold for filtering pairs [$opts{f}]
+         -d           dump normal pairs into a file [<prefix.norm.bam\/sam>] (optional)
+	 -o STRING    output directory [$working_dir]
+
+\n/) if (@ARGV == 0 && -t STDIN);
+
+unless (-d $opts{o}){
+	mkdir $opts{o} or die;
+}
+$opts{o}.="/" if($opts{o}!~/\/$/);
+
+my $mates_file=shift(@ARGV);
+
+$mates_file=readlink($mates_file);
+
+my $bad_mates_file=(split(/\//,$mates_file))[$#_];
+
+if($bad_mates_file=~/.(s|b)am$/){
+    $bad_mates_file=~s/.(b|s)am$/.ab.sam/;
+    $bad_mates_file=$opts{o}.$bad_mates_file;
+}
+
+else{
+    die "Error: mate_file with the extension <.bam> or <.sam> needed !\n";
+}
+
+my $good_mates_file;
+if($opts{d}){
+    $good_mates_file=(split(/\//,$mates_file))[$#_];
+    $good_mates_file=~s/.(b|s)am$/.norm.sam/;
+    $good_mates_file=$opts{o}.$good_mates_file;
+}
+
+my $log_file=$opts{o}.$opts{N}.".svdetect_preprocessing.log"; #GALAXY 
+
+#------------------------------------------------------------------------------#
+#Calculate mu and sigma
+
+open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";
+
+print LOG "\# Calculating mu and sigma lengths...\n";
+print LOG "-- file=$mates_file\n";
+print LOG "-- n=$opts{n}\n";
+print LOG "-- ISIZE min=$opts{s}, max=$opts{S}\n";
+
+my ($record, $sumX,$sumX2) = (0,0,0);
+my $warn=$opts{n}/10;
+my $prev_pair="FIRST";
+
+my $bam=($mates_file =~ /.bam$/)? 1:0;
+
+if($bam){
+    open(MATES, "${SAMTOOLS_BIN_DIR}/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
+}else{
+    open MATES, "<".$mates_file or die "$0: can't open ".$mates_file.":$!\n";
+}
+
+while(<MATES>){
+    
+    my @t=split;
+    
+    next if ($t[0]=~/^@/);
+    
+    my $current_pair=$t[0];
+    next if($current_pair eq $prev_pair);
+    $prev_pair=$current_pair;                                                   
+    
+    my ($chr1,$chr2,$length)=($t[2],$t[6],abs($t[8]));
+    
+    next if (($t[1]&0x0004) || ($t[1]&0x0008));
+    next if ($length<$opts{s} || $length>$opts{S}) ;
+    
+    if($chr2 eq "="){
+
+        $sumX += $length;							#add to sum and sum^2 for mean and variance calculation
+	$sumX2 += $length*$length;
+        $record++;
+    }
+
+    if($record>$warn){
+	print LOG "-- $warn pairs analysed\n";
+        $warn+=$warn;
+    }
+    
+    last if ($record>$opts{n});
+    
+}
+close (MATES);
+
+$record--;
+my $mu = $sumX/$record;
+my $sigma = sqrt($sumX2/$record - $mu*$mu); 
+
+print LOG "-- Total : $record pairs analysed\n";
+print LOG "-- mu length = ".decimal($mu,1).", sigma length = ".decimal($sigma,1)."\n";
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#Preprocessing pairs
+
+$warn=100000;
+
+$record=0;
+my %count=( ab=>0, norm=>0, chr=>0, sense=>0, dist=>0, unmap=>0);
+
+my $read_type=($opts{t})? "Illumina":"SOLiD";
+my $pair_type=($opts{p})? "paired-end":"mate-paired";
+
+print LOG "\# Preprocessing pairs...\n";
+print LOG "-- file= $mates_file\n";
+print LOG "-- type= $read_type $pair_type reads\n";
+print LOG "-- sigma threshold= $opts{f}\n";
+print LOG "-- using ".decimal($mu-$opts{f}*$sigma,4)."-".decimal($mu+$opts{f}*$sigma,4)." as normal range of insert size\n";
+
+my @header;
+
+if($bam){
+    open(HEADER, "${SAMTOOLS_BIN_DIR}/samtools view -H $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
+    @header=<HEADER>;
+    close HEADER;
+    open(MATES, "${SAMTOOLS_BIN_DIR}/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
+}else{
+    open MATES, "<".$mates_file or die "$0: can't open ".$mates_file.":$!\n";
+}
+
+open AB, ">$bad_mates_file" or die "$0: can't write in the output: $bad_mates_file :$!\n";
+print AB @header if($bam);
+
+if($opts{d}){
+    open NORM, ">$good_mates_file" or die "$0: can't write in the output: $good_mates_file :$!\n";
+    print NORM @header if($bam);
+}
+
+$prev_pair="FIRST";
+my $prev_bad;
+
+while(<MATES>){
+    
+    my @t=split;
+    my $bad=0;
+    
+    if ($t[0]=~/^@/){
+        print AB;
+        print NORM if ($opts{d});
+        next;
+    }
+    
+    my $current_pair=$t[0];
+    if($current_pair eq $prev_pair){
+        next if($prev_bad==-1);
+        if($prev_bad){
+            print AB;
+        }elsif(!$prev_bad){
+            print NORM if($opts{d});
+        }
+        next;
+    }
+    
+    $prev_pair=$current_pair;
+    
+    my ($chr1,$chr2,$pos1,$pos2,$length)=($t[2],$t[6],$t[3],$t[7], abs($t[8]));
+    
+    if (($t[1]&0x0004) || ($t[1]&0x0008)){
+        $prev_bad=-1;
+        $count{unmap}++;
+        $record++;
+        next;
+        
+    }
+    
+    my $strand1 = (($t[1]&0x0010))? 'R':'F';
+    my $strand2 = (($t[1]&0x0020))? 'R':'F';
+    my $order1  = (($t[1]&0x0040))? '1':'2';
+    my $order2  = (($t[1]&0x0080))? '1':'2';
+    
+    if($order1 == 2){
+        ($strand1,$strand2)=($strand2,$strand1);
+        ($chr1,$chr2)=($chr2,$chr1);
+        ($pos1,$pos2)=($pos2,$pos1);
+        ($order1,$order2)=($order2,$order1);
+    }
+    
+    my $sense=$strand1.$strand2;
+    
+    if($chr1 ne "=" && $chr2 ne "="){
+        $bad=1;
+        $count{chr}++;
+    }
+    
+    if($opts{p}){ #paired-end
+        if(!(($sense eq "FR" && $pos1<$pos2) || ($sense eq "RF" && $pos2<$pos1))){
+            $bad=1;
+            $count{sense}++;
+        }
+    }else{ #mate-pair
+        if($opts{t}){ #Illumina
+            if(!(($sense eq "FR" && $pos2<$pos1) || ($sense eq "RF" && $pos1<$pos2))){
+            $bad=1;
+            $count{sense}++;
+            }
+        }else{ #SOLiD
+            if(!(($sense eq "FF" && $pos2<$pos1) || ($sense eq "RR" && $pos1<$pos2))){
+            $bad=1;
+            $count{sense}++;
+            }
+        }
+    }
+    
+    if(($chr1 eq "=" || $chr2 eq "=") && ($length <$mu - $opts{f}*$sigma || $length>$mu + $opts{f}*$sigma)){
+        $bad=1;
+        $count{dist}++;
+    }
+    
+    if($bad){
+        print AB;
+        $count{ab}++;
+        $prev_bad=$bad;
+    }else{
+        print NORM if ($opts{d});
+        $count{norm}++;
+        $prev_bad=$bad;
+    }
+    
+    $record++;
+    
+    if($record>$warn){
+        print LOG "-- $warn pairs analysed\n";
+        $warn+=100000;
+    }
+}
+
+close AB;
+close NORM if($opts{d});
+
+print LOG "-- Total : $record pairs analysed\n";
+print LOG "-- $count{unmap} pairs whose one or both reads are unmapped\n";
+print LOG "-- ".($count{ab}+$count{norm})." mapped pairs\n";
+print LOG "---- $count{ab} abnormal mapped pairs\n";
+print LOG "------ $count{chr} pairs mapped on two different chromosomes\n";
+print LOG "------ $count{sense} pairs with incorrect strand orientation and\/or pair order\n";
+print LOG "------ $count{dist} pairs with incorrect insert size distance\n";
+print LOG "--- $count{norm} correct mapped pairs\n";
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#OUTPUT
+
+if($bam){
+    
+    my $bam_file=$bad_mates_file;
+    $bam_file=~s/.sam$/.bam/;
+    print LOG "\# Converting sam to bam for abnormal mapped pairs\n";
+    system("${SAMTOOLS_BIN_DIR}/samtools view -bS $bad_mates_file > $bam_file 2>".$opts{o}."samtools.log");
+    unlink($bad_mates_file);
+    print LOG "-- output created: $bam_file\n";
+
+    system "rm $pt_bad_mates_file ; ln -s $bam_file $pt_bad_mates_file"; #GALAXY
+    
+    if($opts{d}){
+        $bam_file=$good_mates_file;
+        $bam_file=~s/.sam$/.bam/;
+        print LOG "\# Converting sam to bam for correct mapped pairs\n";
+        system("${SAMTOOLS_BIN_DIR}/samtools view -bS $good_mates_file > $bam_file 2>".$opts{o}."samtools.log");
+        unlink($good_mates_file);
+        print LOG "-- output created: $bam_file\n";
+
+	system "rm $pt_good_mates_file ; ln -s $bam_file $pt_good_mates_file"; #GALAXY
+
+    }
+
+}
+
+else{
+    print LOG "-- output created: $bad_mates_file\n";
+    print LOG "-- output created: $good_mates_file\n" if($opts{d});
+}
+
+close LOG;
+
+system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY
+
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub decimal{
+    
+  my $num=shift;
+  my $digs_to_cut=shift;
+
+  $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/);
+
+  return $num;
+}
+#------------------------------------------------------------------------------#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/svdetect/BAM_preprocessingPairs.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,77 @@
+<tool id="svdetect_preprocessing" name="BAM preprocessing">
+
+  <description>to get abnormal pairs</description>
+
+  <command interpreter="perl"> BAM_preprocessingPairs.pl -t '$readType' -p '$pairType' -n '$nbrePair' -s '$isizeMin' -S '$isizeMax' -f '$foldPair' -o $__new_file_path__/svdetect -b '$abBAM' -l '$log' -N $sample_name
+	#if $newBam.pairNormal=="yes" 
+	-d -x '$normBAM'
+	#end if
+	'$inputBam'
+  </command>
+
+  <inputs>
+    <param name="sample_name" type="text" value="sample" label="Sample Name"/>
+    <param name="inputBam" type="data" format="bam" label="BAM input file"/>
+    <param name="readType" type="select" label="Read type">
+	<option value="1">Illumina</option>
+	<option value="0">SOLiD</option>
+   </param>
+   <param name="pairType" type="select" label="Library type">
+	<option value="1">Paired-end</option>
+	<option value="0">Mate-Pair</option>
+   </param>
+   <conditional name="newBam">
+   <param name="pairNormal" type="select" label="Do you want an additional bam file listing concordant mapped pairs?" help="Dump normal pairs into a file sample_name.norm.bam/sam">
+	<option value="no">No</option>
+	<option value="yes">Yes</option>
+   </param>
+    <when value="yes">
+	  <!-- do nothing here -->
+    </when>
+    <when value="no">
+	 <!-- do nothing here -->
+    </when>
+   </conditional>
+   <param name="nbrePair" value="1000000" type="integer" size="30" label="Number of pairs for calculating mu (µ) and sigma (σ) lengths"/>
+   <param name="isizeMin" value="0" type="integer" size="30" label="Minimum value of ISIZE for calculating mu (µ) and sigma (σ) lengths"/>
+   <param name="isizeMax" value="10000" type="integer" size="30" label="Maximum value of ISIZE for calculating mu (µ)and sigma( σ) lengths"/>
+   <param name="foldPair" value="3" type="float" size="30" label="Minimal number of sigma (σ) fold for filtering pairs"/>
+  </inputs>
+
+  <outputs>
+    <data format="bam" name="abBAM" label="${$sample_name}.ab.bam"/> 
+    <data format="txt" name="log" label="${$sample_name}.svdetect_preprocessing.log"/>
+    <data format="bam" name="normBAM" label="${$sample_name}.norm.bam">
+    <filter>newBam['pairNormal'] == 'yes'</filter>
+    </data> 
+  </outputs>
+
+  <help>
+
+**What it does**
+
+Bam_preprocessingPairs - Version 0.5
+
+Preprocessing of mates to get anomalously mapped mate-pair/paired-end reads as input for SVDetect.
+
+From all pairs mapped onto the reference genome, this script outputs abnormal pairs:
+
+	* mapped on two different chromosomes
+	* with an incorrect strand orientation and/or pair order
+	* with an insert size distance +- sigma threshold
+
+into a file prefix.ab.bam/sam sorted by read names
+    
+-BAM/SAM File input format only.
+  
+SAMtools required for BAM files
+
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (svdetect@curie.fr) for any questions or concerns about the Galaxy implementation of SVDetect.
+
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/svdetect/SVDetect_compare.pl	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,716 @@
+#!/usr/bin/perl -w
+
+=pod
+
+=head1 NAME
+
+SVDetect Compare for Galaxy
+
+Version: 0.8b for Galaxy
+
+=head1 SYNOPSIS
+
+SVDetect_compare.pl links2compare -conf <configuration_file> [-help] [-man]
+
+=cut
+
+# -------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Pod::Usage;
+use Getopt::Long;
+
+use Config::General;
+use Tie::IxHash;
+
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+#PARSE THE COMMAND LINE
+my %OPT;
+GetOptions(\%OPT,
+	   'conf=s',
+	   'out1=s', #GALAXY
+	   'out2=s', #GALAXY
+	   'out3=s', #GALAXY
+	   'out4=s', #GALAXY
+	   'out5=s', #GALAXY
+	   'out6=s', #GALAXY
+	   'out7=s', #GALAXY
+	   'out8=s', #GALAXY
+	   'out9=s', #GALAXY
+	   'l=s', #GALAXY
+	   'N=s', #GALAXY
+	   'help',
+           'man'
+	  );
+
+pod2usage() if $OPT{help};
+pod2usage(-verbose=>2) if $OPT{man};
+pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf});
+
+
+pod2usage() if(@ARGV<1);
+
+tie (my %func, 'Tie::IxHash',links2compare=>\&links2compare);
+
+foreach my $command (@ARGV){
+    pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command}));
+}
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+#READ THE CONFIGURATION FILE
+my $conf=Config::General->new(    -ConfigFile        => $OPT{conf},
+                                  -Tie => "Tie::IxHash",
+                                  -AllowMultiOptions => 1,
+				  -LowerCaseNames    => 1,
+				  -AutoTrue => 1);
+my %CONF= $conf->getall;
+validateconfiguration(\%CONF);							#validation of the configuration parameters
+
+
+my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY
+my $BEDTOOLS_BIN_DIR="/bioinfo/local/BEDTools/bin"; #GALAXY
+
+my $pt_log_file=$OPT{l}; #GALAXY
+my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_compare.log"; #GALAXY
+open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY
+
+my @pt_sv_file=($OPT{out1},$OPT{out2},$OPT{out3}) if($OPT{out1}); #GALAXY common,sample,reference
+my @pt_circos_file=($OPT{out4},$OPT{out5},$OPT{out6}) if($OPT{out4}); #GALAXY common,sample,reference
+my @pt_bed_file=($OPT{out7},$OPT{out8},$OPT{out9}) if($OPT{out7}); #GALAXY common,sample,reference
+
+$CONF{compare}{sample_link_file}=readlink($CONF{compare}{sample_link_file});#GALAXY
+$CONF{compare}{sample_link_file}=~s/.sv.txt//; #GALAXY
+
+$CONF{compare}{reference_link_file}=readlink($CONF{compare}{reference_link_file});#GALAXY
+$CONF{compare}{reference_link_file}=~s/.sv.txt//; #GALAXY
+
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+#COMMAND EXECUTION
+foreach my $command (@ARGV){
+    &{$func{$command}}();
+}
+print LOG "-- end\n";
+
+close LOG;#GALAXY
+system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY
+
+exit(0);
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+#FUNCTIONS
+
+# -----------------------------------------------------------------------------#
+#MAIN FUNCTION number 5:Comparison between samples, common or specific links
+sub links2compare{
+    
+    my @compare_files;
+    
+    compareSamples($CONF{general}{output_dir},
+		   $CONF{compare}{list_samples},
+		   $CONF{compare}{sample_link_file},
+		   $CONF{compare}{reference_link_file},
+		   $CONF{compare}{min_overlap},
+		   $CONF{compare}{same_sv_type},
+		   \@compare_files);
+
+    my $pt_ind=0;
+ 
+    for my $input_file (@compare_files){
+	
+	$input_file=$CONF{general}{output_dir}.$input_file;
+	
+	my $output_file=$input_file;
+	$output_file=~s/unique$/compared/;
+	
+	sortLinks($input_file, $output_file,1);
+	
+	if($CONF{compare}{circos_output}){
+	    links2segdup($CONF{circos}{organism_id},
+			 $CONF{circos}{colorcode},
+			 $output_file,
+			 $output_file.".segdup.txt");
+	    system "rm $pt_circos_file[$pt_ind]; ln -s $output_file.segdup.txt $pt_circos_file[$pt_ind]" if (defined $pt_circos_file[$pt_ind]); #GALAXY
+	}
+	
+	if($CONF{compare}{bed_output}){
+	links2bedfile($CONF{compare}{read_lengths},
+		      $CONF{bed}{colorcode},
+		      $output_file,
+		      $output_file.".bed");
+	system "rm $pt_bed_file[$pt_ind]; ln -s $output_file.bed $pt_bed_file[$pt_ind]" if (defined $pt_bed_file[$pt_ind]); #GALAXY
+	}
+	
+	if($CONF{compare}{sv_output}){
+	    
+	    links2SVfile ($output_file, $output_file.".sv.txt");
+	    system "rm $pt_sv_file[$pt_ind]; ln -s $output_file.sv.txt $pt_sv_file[$pt_ind]" if (defined $pt_sv_file[$pt_ind]); #GALAXY
+	}
+	$pt_ind++;
+	
+    }
+    unlink(@compare_files);
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub compareSamples{
+    
+    my ($dir,$list_samples,$sample_file,$reference_file,$min_overlap,$same_sv_type,$file_names)=@_;
+    
+    my @bedpefiles;
+    my @list=split(",",$list_samples);
+    my @list_files=($sample_file,$reference_file);
+
+    print LOG "\# Comparison procedure...\n";
+    print LOG "-- samples=$list_samples\n".
+	 "-- minimum overlap=$min_overlap\n".
+	 "-- same SV type=$same_sv_type\n";
+
+    #conversion of links to bedPE format file
+    print LOG "-- Conversion of links.filtered files to bedPE format\n";
+    for my $s (0..$#list) {
+
+	links2bedPElinksfile($list[$s],$list_files[$s],$list_files[$s].".bedpe.txt");
+	push(@bedpefiles,$list_files[$s].".bedpe.txt");
+
+    }
+
+    #get common links between all samples compared
+    print LOG "-- Getting common links between all samples with BEDTools\n";
+    my $common_name=join(".",@list);
+    
+    my $nb=scalar @list;
+    my $command="";
+    my $prompt=">";
+    
+    while ($nb>0){
+	
+	for my $i (0..$#list_files){
+		
+	    $command.="$BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a ".$list_files[$i].".bedpe.txt";
+	    my $pipe=0;
+	    
+	    for my $j ($i+1..$#list_files){
+		
+		$command.="| $BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a stdin" if($pipe);
+		$command.=" -b ".$list_files[$j].".bedpe.txt";
+		$pipe=1;
+		
+	    }
+
+	    $command.=$prompt.$dir.$common_name.".bedpe.tmp;";
+	    $prompt=">>";
+	    
+	    my $first=shift(@list_files); push(@list_files,$first);
+	    last;
+	}
+	$nb--;
+    }
+    
+    system ($command);
+    
+    push(@bedpefiles,$dir.$common_name.".bedpe.tmp");
+    
+    #Post comparison to get common links if same type only (as an option)
+    open( FILE, "<".$dir.$common_name.".bedpe.tmp") or die "Can't open".$dir.$common_name.".bedpe.tmp : $!";
+    open( OUT, ">".$dir.$common_name.".bedpe.unique") or die "Can't write in ".$dir.$common_name.".bedpe.unique : $!";
+    
+    while(<FILE>){
+	my @t=split("\t",$_);
+	my $s=(split("_",$t[6]))[0];
+	my ($sv1,$sv2)=($t[7],$t[18]);
+	splice(@t,11,$#t);
+	
+	if($same_sv_type){
+	    print OUT join("\t",@t)."\n" if($sv1 eq $sv2);
+	}else{
+	    print OUT join("\t",@t)."\n";
+	}
+    }
+    close FILE;
+    close OUT;
+    
+    bedPElinks2linksfile($dir.$common_name.".bedpe.unique", $dir.$common_name.".unique");
+    push(@bedpefiles,$dir.$common_name.".bedpe.unique");
+    push(@$file_names,$common_name.".unique");
+    print LOG "-- output created: ".$dir.$common_name.".compared\n";
+    
+    #get specific links for each sample
+    print LOG "-- Getting specific links for each sample\n";
+    for my $s (0..$#list) {
+	system("grep -Fxv -f ".$dir.$common_name.".bedpe.unique ".$list_files[$s].".bedpe.txt >".$dir.$list[$s].".bedpe.unique");
+	bedPElinks2linksfile($dir.$list[$s].".bedpe.unique",$dir.$list[$s].".unique");
+	push(@bedpefiles,$dir.$list[$s].".bedpe.unique");
+	push(@$file_names,$list[$s].".unique");
+	print LOG "-- output created: ".$dir.$list[$s].".compared\n";
+    }
+    
+    unlink(@bedpefiles);
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#convert the links file to the circos format
+sub links2segdup{
+    
+    my($id,$color_code,$links_file,$segdup_file)=@_;
+    
+    print LOG "\# Converting to the circos format...\n";
+    
+    tie (my %hcolor,'Tie::IxHash');						#color-code hash table
+    foreach my $col (keys %{$color_code}){
+	my ($min_links,$max_links)=split(",",$color_code->{$col});
+	$hcolor{$col}=[$min_links,$max_links];
+    }
+    
+    open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
+    open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
+    
+    my $index=1;
+    while(<LINKS>){
+	
+	my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6];
+	
+	my $color=getColor($count,\%hcolor,"circos");				#get the color-code according the number of links
+	
+	print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n".	#circos output
+		     "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n";
+	$index++;
+    }
+    
+    close LINKS;
+    close SEGDUP;
+    print LOG "-- output created: $segdup_file\n";
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#convert the links file to the bedPE format for BEDTools usage
+sub links2bedPElinksfile{
+
+    my ($sample,$links_file,$bedpe_file)=@_;
+    
+    open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
+    open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n";
+    
+    my $nb_links=1;
+    
+    while(<LINKS>){
+	
+	chomp;
+	my @t=split("\t",$_);
+	my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6);
+	my $type=($chr1 eq $chr2)? "INTRA":"INTER";
+	$type.="_".$t[10];
+	
+	$start1--; $start2--;
+	
+	print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2".
+	"\t$sample"."_link$nb_links\t$type\t.\t.".
+	"\t".join("|",@t)."\n";
+	
+	$nb_links++;
+    }
+    
+    close LINKS;
+    close BEDPE;
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub bedPElinks2linksfile{
+
+    my ($bedpe_file,$links_file)=@_;
+ 
+    open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n";
+    open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n";
+    
+    while(<BEDPE>){
+	
+	chomp;
+	my $sample=(split("_",(split("\t",$_))[6]))[0];
+	my @t1=(split("\t",$_))[0,1,2,3,4,5];
+	my @t2=split(/\|/,(split("\t",$_))[10]);
+	push(@t2,$sample);
+	
+	print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n";
+	
+    }
+    close BEDPE;
+    close LINKS;
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#convert the links file to the bed format
+sub links2bedfile{
+    
+    my ($tag_length,$color_code,$links_file,$bed_file)=@_;
+    
+    print LOG "\# Converting to the bed format...\n";
+    
+    my $compare=1;
+    if($links_file!~/compared$/){
+	$compare=0;
+	$tag_length->{none}->{1}=$tag_length->{1};
+	$tag_length->{none}->{2}=$tag_length->{2};
+    }
+    
+    #color-code hash table
+    tie (my %hcolor,'Tie::IxHash');
+    my %color_order;
+    $color_order{"255,255,255"}=0;
+    my $n=1;
+    foreach my $col (keys %{$color_code}){
+	my ($min_links,$max_links)=split(",",$color_code->{$col});
+	$hcolor{$col}=[$min_links,$max_links];
+	$color_order{$col}=$n;
+	$n++;
+    }
+    
+    my %pair;
+    my %pt;
+    $n=1;
+    open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
+    
+    my %str=( "F"=>"+", "R"=>"-" );
+
+    while(<LINKS>){
+	
+	my @t=split;
+	my $sample=($compare)? pop(@t):"none";
+	
+	my $chr1=$t[0]; 
+	my $chr2=$t[3];
+	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
+	$chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
+	my $same_chr=($chr1 eq $chr2)? 1:0;
+	
+	my $count=$t[6];
+	my $color=getColor($count,\%hcolor,"bed");
+	
+	my @pairs=deleteBadOrderSensePairs(split(",",$t[7]));
+	my @strand1=deleteBadOrderSensePairs(split(",",$t[8]));
+	my @strand2=deleteBadOrderSensePairs(split(",",$t[9]));
+	my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10]));
+	my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11]));
+	my @position1=deleteBadOrderSensePairs(split(",",$t[14]));
+	my @position2=deleteBadOrderSensePairs(split(",",$t[15]));
+	my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample});
+	my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample});
+
+	
+	for my $p (0..$#pairs){						
+	    
+	    if (!exists $pair{$pairs[$p]}){
+		
+		if($same_chr){
+		    
+		    $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]},
+				    $start1[$p]-1, $end2[$p], $color,
+				    2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ];    
+		    $pt{$n}=$pair{$pairs[$p]}->{0};
+		    $n++;
+		    
+		}else{
+		    
+		    $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]},
+						$start1[$p]-1, $end1[$p], $color,
+						1, $tag_length->{$sample}->{$ends_order1[$p]}, 0];
+		    $pt{$n}=$pair{$pairs[$p]}->{1};
+		    $n++;
+		    
+		    
+		    $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]},
+						$start2[$p]-1, $end2[$p], $color,
+						1, $tag_length->{$sample}->{$ends_order2[$p]}, 0];
+		    $pt{$n}=$pair{$pairs[$p]}->{2};
+		    $n++;
+		}
+	    }else{
+		
+		if($same_chr){
+		    ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]});
+		}else{
+		    ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]});
+		    ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]});
+		}
+	    }
+	}
+    }
+    close LINKS;
+    
+    my $nb_pairs=$n-1;
+    
+    open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
+    print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ".
+	      "visibility=2 itemRgb=\"On\"\n";
+    
+    for my $i (1..$nb_pairs){
+	print BED join("\t",@{$pt{$i}})."\n";
+    }
+    
+    close BED;
+    
+    print LOG "-- output created: $bed_file\n";
+    
+    undef %pair;
+    undef %pt;
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub links2SVfile{
+    
+    my($links_file,$sv_file)=@_;
+    
+    print LOG "\# Converting to the sv output table...\n";
+    open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
+    open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
+    
+    my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist
+    chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering
+    final_score breakpoint1_start1-end1 breakpoint2_start2-end2);
+    
+    my $nb_links=0;
+    
+    while (<LINKS>){
+	
+	my @t=split;
+	my @sv=();
+	my $sv_type="-";
+	my $strand_ratio="-";
+	my $eq_ratio="-";
+	my $eq_type="-";
+	my $insert_ratio="-";
+	my $link="-";
+	my ($bk1, $bk2)=("-","-");
+	my $score="-";
+	
+	my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]); 
+	my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]);
+	my $nb_pairs=$t[6];
+	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
+	$chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
+	my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER";
+	
+	#if strand filtering
+	if (defined $t[16]){
+	    #if inter-chr link
+	    $sv_type=$t[16];
+	    if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){
+		$strand_ratio=floor($1/$2*100)."%";
+		$score=$t[18];
+	    }
+	    if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){
+	    #if intra-chr link with insert size filtering
+		$strand_ratio=floor($1/$2*100)."%";
+		$link=floor($t[17]);
+		if($sv_type!~/^INV/){
+		    $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
+		    $score=$t[20];
+		}else{
+		    $score=$t[19];
+		}
+	    }
+	}
+	
+	if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){
+	    
+	    #if strand and order filtering only and/or interchr link
+	    $eq_type=$t[18];
+	    $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
+	    ($bk1, $bk2)=($t[20],$t[21]);
+	    foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
+	    $score=$t[22];
+	    
+	}elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){
+	    
+	    #if all three filtering
+	    $link=floor($t[17]);
+	    $eq_type=$t[19];
+	    $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/);
+	    
+	    if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){
+		$insert_ratio=floor($1/$2*100)."%";
+		($bk1, $bk2)=($t[22],$t[23]);
+		$score=$t[24];
+		
+	    }else{
+		($bk1, $bk2)=($t[21],$t[22]);
+		$score=$t[23];
+	    }
+	    foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
+	    
+	}
+	
+	
+	push(@sv, $chr_type, $sv_type,$eq_type);
+	push(@sv,"$chr1\t$start1-$end1");
+	push(@sv, $link);
+	push(@sv,"$chr2\t$start2-$end2",
+	     $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2);
+	
+	
+	print SV join("\t",@sv)."\n";
+    }
+    
+    close LINKS;
+    close SV;
+    
+    system "sort  -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted";
+    
+    open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n";
+    my @links=<SV>;
+    close SV;
+    
+    open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
+    
+    print SV join("\t",@header)."\n";
+    print SV @links;
+    close SV;
+    
+    unlink($sv_file.".sorted");
+  
+    print LOG "-- output created: $sv_file\n";
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub deleteBadOrderSensePairs{
+    
+    my (@tab)=@_;
+    my @tab2;
+
+    foreach my $v (@tab){
+	
+	$v=~s/[\(\)]//g;
+	push(@tab2,$v) if($v!~/[\$\*\@]$/);
+    }
+    return @tab2;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#gets starts and ends Coords when start=leftmost given positions, directions and orders
+sub getCoordswithLeftMost {
+    
+    my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_;
+
+    for my $i (0..scalar(@{$positions})-1) {
+
+	if($strand->[$i] eq 'F'){
+	    $starts->[$i]=$positions->[$i];
+	    $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1;
+	}else{
+	    $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1;
+	    $ends->[$i]=$positions->[$i];
+	}
+    }    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub floor {
+    my $nb = $_[0];
+    $nb=~ s/\..*//;
+    return $nb;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub decimal{
+    
+  my $num=shift;
+  my $digs_to_cut=shift;
+
+  $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/);
+
+  return $num;
+}
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#Sort links according the concerned chromosomes and their coordinates
+sub sortLinks{
+    
+    my ($links_file,$sortedlinks_file,$unique)=@_;
+    
+    print LOG "# Sorting links...\n";
+    
+    my $pipe=($unique)? "| sort -u":"";
+    system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file";
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub getColor{
+
+    my($count,$hcolor,$format)=@_;
+    for my $col ( keys % { $hcolor} ) {
+       return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]);
+    }
+    return "white" if($format eq "circos");
+    return "255,255,255" if($format eq "bed");
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#check if the configuration file is correct
+sub validateconfiguration{
+    
+    my %conf=%{$_[0]};
+    my $list_prgs="@ARGV";
+    
+    my @circos_params=qw(organism_id colorcode);
+    my @bed_params=qw(colorcode);
+    my @compare_params=qw(list_samples list_read_lengths sample_link_file reference_link_file);
+    
+    unless (defined($conf{general}{output_dir})) {
+	$conf{general}{output_dir} = ".";
+    }
+    unless (-d $conf{general}{output_dir}){
+	mkdir $conf{general}{output_dir} or die;
+    }
+    $conf{general}{output_dir}.="/" if($conf{general}{output_dir}!~/\/$/);
+
+    
+    if($list_prgs=~/links2compare/){
+	foreach my $p (@compare_params) {
+	    die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p});
+	}
+	
+	unless (defined($conf{compare}{same_sv_type})) {
+	    $conf{compare}{same_sv_type} = 0;
+	}
+	
+	unless (defined($conf{compare}{min_overlap})) {
+	    $conf{compare}{min_overlap} = 1E-9;
+	}
+	
+	if($conf{compare}{circos_output}){
+	    foreach my $p (@circos_params) {
+		next if($list_prgs=~/^ratio/ && $p eq "colorcode");
+		die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
+	    }
+	}
+	if($conf{compare}{bed_output}){
+	    foreach my $p (@bed_params) {
+		die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
+	    }
+	    die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths});
+
+	    my @samples=split(",",$conf{compare}{list_samples});
+	    my @read_lengths=split(",",$conf{compare}{list_read_lengths});
+	    for my $i (0..$#samples){
+		my @l=split("-",$read_lengths[$i]);
+		$conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]};
+	    }
+	}
+    }
+   
+    
+}
+#------------------------------------------------------------------------------#
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/svdetect/SVDetect_compare.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,218 @@
+<tool id="svdetect_compare" name="Compare">
+
+<description>structural variants between two samples</description>
+
+<command interpreter="perl">SVDetect_compare.pl links2compare -conf '$config_file' -l '$log_file' -N '$sample_name.$reference_name'
+
+#if $links2SV
+-out1 '$common_sv_file'
+-out2 '$sample_sv_file'
+-out3 '$reference_sv_file'
+#end if
+
+#if $file_conversion.file_conversion_select=="convert" and $file_conversion.links2circos
+-out4 '$common_circos_file'
+-out5 '$sample_circos_file'
+-out6 '$reference_circos_file'
+#end if
+
+#if $file_conversion.file_conversion_select=="convert" and $file_conversion.links2bed
+-out7 '$common_bed_file'
+-out8 '$sample_bed_file'
+-out9 '$reference_bed_file'
+#end if
+
+</command>
+
+<inputs>
+	<param name="sample_name" type="text" size="20" value="sample" label="Sample Name"/>
+	<param name="sample_read1_length" type="integer" size="10" value="50" label="Sample read 1 length (bp)"/>
+	<param name="sample_read2_length" type="integer" size="10" value="50" label="Sample read 2 length (bp)"/>
+   	<param name="sample_mates_file" type="data" format="sv" label="Sample input file" help=".sv file"/>
+	
+	<param name="reference_name" type="text" size="20" value="reference" label="Reference Name"/>
+	<param name="reference_read1_length" type="integer" size="10" value="50" label="Reference read 1 length (bp)"/>
+	<param name="reference_read2_length" type="integer" size="10" value="50" label="Reference read 2 length (bp)"/>
+   	<param name="reference_mates_file" type="data" format="sv" label="Reference input file" help=".sv file"/>
+	
+	<param name="min_overlap" type="float" size="10"  value="0.05" label="Minimum overlap of links required as a fraction"/>
+	<param name="same_sv_type" label="Comparison of SVs with the same type only ?" type="boolean" truevalue="1" falsevalue="0" checked="True"/>
+
+	<param name="links2SV" label="Do you want to have filtered links in a tabulated file format showing significant SVs?" type="boolean" truevalue="1" falsevalue="0" checked="True"/>
+		
+	<conditional name="file_conversion">
+		<param name="file_conversion_select" type="select" label="Output file conversion" help="Converts filtered links to Circos/BED files format for graphical view of SVs">
+			<option value="do_not_convert">No</option>
+			<option value="convert">Yes</option>
+		</param>
+		<when value="do_not_convert">
+				<!-- do nothing here -->
+		</when>
+		<when value="convert">
+			<param name="links2circos" label="Converts the link list to the Circos link format" type="boolean" truevalue="1" falsevalue="0" checked="True"/>
+			<param name="links2bed" label="Converts the link list to the UCSC BED format" type="boolean" truevalue="1" falsevalue="0" checked="False"/>
+			<param name="organism_id" type="text" size="10" value="hs" label="Organism ID"/>
+			<repeat name="color_code" title="Color-code" min="1" max="7">
+				<param name="color" type="select" label="Color">
+					<option value="grey">grey</option>
+					<option value="black">black</option>
+					<option value="blue">blue</option>
+					<option value="green">green</option>
+					<option value="purple">purple</option>
+					<option value="orange">orange</option>
+					<option value="red">red</option>
+				</param>
+				<param name="interval" type="text" value="1,3" label="Interval"/>
+			</repeat>
+		</when>
+	</conditional>
+</inputs>
+
+
+
+<outputs>
+	<data format="sv" name="common_sv_file" label="common.compared.sv">
+		<filter>links2SV is True</filter>
+	</data>
+	<data format="sv" name="sample_sv_file" label="${sample_name}.compared.sv">
+		<filter>links2SV is True</filter>
+	</data>
+	<data format="sv" name="reference_sv_file" label="${reference_name}.compared.sv">
+		<filter>links2SV is True</filter>
+	</data>
+
+	<data format="segdup" name="common_circos_file" label="common.compared.segdup">
+		<filter>(
+			file_conversion['file_conversion_select']=="convert" and
+			file_conversion['links2circos'] is True
+			)
+		</filter>
+	</data>
+	<data format="segdup" name="sample_circos_file" label="${sample_name}.compared.segdup">
+		<filter>(
+			file_conversion['file_conversion_select']=="convert" and
+			file_conversion['links2circos'] is True
+			)
+		</filter>
+	</data>
+	<data format="segdup" name="reference_circos_file" label="${reference_name}.compared.segdup">
+		<filter>(
+			file_conversion['file_conversion_select']=="convert" and
+			file_conversion['links2circos'] is True
+			)
+		</filter>
+	</data>
+
+	<data format="bed" name="common_bed_file" label="common.compared.bed">
+		<filter>(
+			file_conversion['file_conversion_select']=="convert" and
+			file_conversion['links2bed'] is True
+			)
+		</filter>
+	</data>
+	<data format="bed" name="sample_bed_file" label="${sample_name}.compared.bed">
+		<filter>(
+			file_conversion['file_conversion_select']=="convert" and
+			file_conversion['links2bed'] is True
+			)
+		</filter>
+	</data>
+	<data format="bed" name="reference_bed_file" label="${reference_name}.compared.bed">
+		<filter>(
+			file_conversion['file_conversion_select']=="convert" and
+			file_conversion['links2bed'] is True
+			)
+		</filter>
+	</data>
+
+	<data format="txt" name="log_file" label="${sample_name}.${reference_name}.svdetect_compare.log"/>
+</outputs>
+
+
+
+<configfiles>
+	<configfile name="config_file">
+&lt;general&gt;
+output_dir=$__new_file_path__/svdetect
+&lt;/general&gt; 
+
+#if $file_conversion.file_conversion_select == "convert"
+#if $file_conversion.links2circos
+&lt;circos&gt;
+organism_id=${file_conversion.organism_id}
+&lt;colorcode&gt;
+#for $color_repeat in $file_conversion.color_code
+${color_repeat.color}=${color_repeat.interval}
+#end for
+&lt;/colorcode&gt;
+&lt;/circos&gt;
+#end if
+#if $file_conversion.links2bed
+&lt;bed&gt;
+&lt;colorcode&gt;
+#for $color_repeat in $file_conversion.color_code
+#if str($color_repeat.color)== "grey"
+190,190,190=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "black"
+0,0,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "blue"
+0,0,255=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "green"
+0,255,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "purple"
+153,50,205=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "orange"
+255,140,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "red"
+255,0,0=${color_repeat.interval}
+#end if
+#end for
+&lt;/colorcode&gt;
+&lt;/bed&gt;
+#end if
+#end if
+
+&lt;compare&gt;
+list_samples=${sample_name},${reference_name}
+list_read_lengths=${sample_read1_length}-${sample_read2_length},${reference_read1_length}-${reference_read2_length}
+sample_link_file=${sample_mates_file}
+reference_link_file=${reference_mates_file}
+min_overlap=${min_overlap}
+same_sv_type=${same_sv_type}
+sv_output=${links2SV}
+#if $file_conversion.file_conversion_select == "convert"
+circos_output=${$file_conversion.links2circos}
+bed_output=${$file_conversion.links2bed}
+#end if
+&lt;/compare&gt; 
+
+	</configfile>
+</configfiles>
+
+  <help>
+**What it does**
+
+SVDetect - Version : 0.8b
+
+Comparison of clusters between two samples to get common or sample-specific SVs
+
+This program is designed to compare filtered links between two anomalously mapped mate-pair/paired-end datasets
+and to identify common and sample-specific SVs (like the usual sample/reference design).
+Overlaps between coordinates of clusters and types of SVs are used as parameters of comparison.
+
+Manual documentation available at the http://svdetect.sourceforge.net/Site/Manual.html
+
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (svdetect@curie.fr) for any questions or concerns about the Galaxy implementation of SVDetect.
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/svdetect/SVDetect_import.sh	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+
+while getopts "i:o:" optionName; do
+case "$optionName" in
+
+i) INPUT="$OPTARG";;
+o) OUTPUT="$OPTARG";;
+
+esac
+done
+
+rm $OUTPUT
+
+ln -s $INPUT $OUTPUT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/svdetect/SVDetect_import.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,85 @@
+<tool id="svdetect_import" name="Import data">
+  <description>BAM, chromosome info or sv files</description>
+  <command interpreter="bash">SVDetect_import.sh -i $file_path
+  #if str($type.file_type)=="bam"
+  -o $outbamfile
+  #elif str($type.file_type)=="len"
+  -o $outlenfile
+  #elif str($type.file_type)=="sv"
+  -o $outsvfile
+  #end if
+  </command>
+  <inputs>
+    <param name="file_name" type="text" value="file1" label="File Name"/>
+    <conditional name="type">
+      <param name="file_type" type="select" label="Select the file type to import" help="BAM file (BAM) or text file (SAM, chromosome list or a SV tabulated text file)">
+          <option value="bam">BAM file (.bam)</option>
+          <option value="len">Chromosome info file (.len)</option>
+          <option value="sv">SVDetect output file (.sv)</option>
+      </param>
+    <when value="bam">
+      <!-- do nothing here -->
+    </when>
+    <when value="len">
+      <!-- do nothing here -->
+    </when>
+     <when value="sv">
+      <!-- do nothing here -->
+    </when>
+    </conditional>
+     <param name="file_path" type="text" size="150" label="Path to file"/>
+  </inputs>
+  <outputs>
+    <data format="bam" name="outbamfile" label="${file_name}.bam">
+      <filter>type['file_type']=="bam"</filter>
+    </data>
+    <data format="len" name="outlenfile" label="${file_name}.len">
+      <filter>type['file_type']=="len"</filter>
+    </data>
+    <data format="sv" name="outsvfile" label="${file_name}.sv">
+      <filter>type['file_type']=="sv"</filter>
+    </data>
+  </outputs>
+  <help>
+**What it does**
+
+This tool allows you to import quickly a BAM file, a chromosome info file or a SVDetect output file from you computer as inputs for SVDetect.
+
+
+**Example of chromosome file**
+
+Input len file::
+
+    1  chr1  247249719
+    2  chr2  242951149
+    3  chr3  199501827
+    4  chr4  191273063
+    5  chr5  180857866
+    6  chr6  170899992
+    7  chr7  158821424
+    8  chr8  146274826
+    9  chr9  140273252
+    10  chr10  135374737
+    11  chr11  134452384
+    12  chr12  132349534
+    13  chr13  114142980
+    14  chr14  106368585
+    15  chr15  100338915
+    16  chr16  88827254
+    17  chr17  78774742
+    18  chr18  76117153
+    19  chr19  63811651
+    20  chr20  62435964
+    21  chr21  46944323
+    22  chr22  49691432
+    23  chrX  154913754
+    24  chrY  57772954
+
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (svdetect@curie.fr) for any questions or concerns about the Galaxy implementation of SVDetect.
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/svdetect/SVDetect_run_parallel.pl	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,3537 @@
+#!/usr/bin/perl -w
+
+=pod
+
+=head1 NAME
+
+SVDetect - Program designed to the detection of structural variations
+from paired-end/mate-pair sequencing data, compatible with SOLiD and Illumina (>=1.3) reads
+
+Version: 0.8b for Galaxy
+
+=head1 SYNOPSIS
+
+SVDetect <command> -conf <configuration_file> [-help] [-man]
+    
+    Command:
+
+    	linking		detection and isolation of links
+        filtering	filtering of links according different parameters
+        links2circos	links conversion to circos format
+	links2bed 	paired-ends of links converted to bed format (UCSC)
+	links2SV	formatted output to show most significant SVs
+	cnv		calculate copy-number profiles
+	ratio2circos	ratio conversion to circos density format
+	ratio2bedgraph	ratio conversion to bedGraph density format (UCSC)
+    
+=head1 DESCRIPTION
+
+This is a command-line interface to SVDetect.
+
+
+=head1 AUTHORS
+
+Bruno Zeitouni E<lt>bruno.zeitouni@curie.frE<gt>,
+Valentina Boeva E<lt>valentina.boeva@curie.frE<gt>
+
+=cut
+
+# -------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Pod::Usage;
+use Getopt::Long;
+
+use Config::General;
+use Tie::IxHash;
+use FileHandle;
+use Parallel::ForkManager;
+
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+#PARSE THE COMMAND LINE
+my %OPT;
+GetOptions(\%OPT,
+	   'conf=s',
+	   'out1=s', #GALAXY
+	   'out2=s', #GALAXY
+	   'out3=s', #GALAXY
+	   'out4=s', #GALAXY
+	   'out5=s', #GALAXY
+	   'l=s', #GALAXY
+	   'N=s',#GALAXY
+	   'help',#GALAXY
+           'man'
+	  );
+
+pod2usage() if $OPT{help};
+pod2usage(-verbose=>2) if $OPT{man};
+pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf});
+
+pod2usage() if(@ARGV<1);
+
+tie (my %func, 'Tie::IxHash',linking=>\&createlinks,
+			     filtering=>\&filterlinks,
+			     links2circos=>\&links2circos,
+			     links2bed=>\&links2bed,
+			     links2compare=>\&links2compare,
+			     links2SV=>\&links2SV,
+			     cnv=>\&cnv,
+			     ratio2circos=>\&ratio2circos,
+			     ratio2bedgraph=>\&ratio2bedgraph);
+
+foreach my $command (@ARGV){
+    pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command}));
+}
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+
+
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+#READ THE CONFIGURATION FILE
+my $conf=Config::General->new(    -ConfigFile        => $OPT{conf},
+                                  -Tie => "Tie::IxHash",
+                                  -AllowMultiOptions => 1,
+				  -LowerCaseNames    => 1,
+				  -AutoTrue => 1);
+my %CONF= $conf->getall;
+validateconfiguration(\%CONF);							#validation of the configuration parameters
+
+my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY
+
+my $pt_log_file=$OPT{l}; #GALAXY
+my $pt_links_file=$OPT{out1} if($OPT{out1}); #GALAXY
+my $pt_flinks_file=$OPT{out2} if($OPT{out2}); #GALAXY
+my $pt_sv_file=$OPT{out3} if($OPT{out3}); #GALAXY
+my $pt_circos_file=$OPT{out4} if($OPT{out4}); #GALAXY
+my $pt_bed_file=$OPT{out5} if($OPT{out5}); #GALAXY
+
+$CONF{general}{mates_file}=readlink($CONF{general}{mates_file});#GALAXY
+$CONF{general}{cmap_file}=readlink($CONF{general}{cmap_file});#GALAXY
+
+my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_run.log"; #GALAXY
+open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+#COMMAND EXECUTION
+foreach my $command (@ARGV){
+    &{$func{$command}}();
+}
+print LOG "-- end\n";#GALAXY
+
+close LOG;#GALAXY
+system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY
+exit(0);
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+
+
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
+#FUNCTIONS
+#------------------------------------------------------------------------------#
+#MAIN FUNCTION number 1: Detection of links from mate-pairs data
+sub createlinks{
+    
+    my %CHR;									#main hash table 1: fragments, links
+    my %CHRID;
+    my @MATEFILES;
+    
+    my $output_prefix=$CONF{general}{mates_file}.".".$CONF{general}{sv_type};
+    my @path=split(/\//,$output_prefix);
+    $output_prefix=$CONF{general}{output_dir}.$path[$#path];
+    my $tmp_mates_prefix=$CONF{general}{tmp_dir}."mates/".$path[$#path];
+    my $tmp_links_prefix=$CONF{general}{tmp_dir}."links/".$path[$#path];
+    
+    shearingChromosome(\%CHR, \%CHRID,						#making the genomic fragment library with the detection parameters
+		     $CONF{detection}{window_size},
+		     $CONF{detection}{step_length},
+		     $CONF{general}{cmap_file});
+    
+    if($CONF{detection}{split_mate_file}){
+    
+	splitMateFile(\%CHR, \%CHRID, \@MATEFILES, $tmp_mates_prefix,
+		      $CONF{general}{sv_type},
+		      $CONF{general}{mates_file},
+		      $CONF{general}{input_format},
+		      $CONF{general}{read_lengths}
+		      );
+    }else{
+	
+	@MATEFILES=qx{ls $tmp_mates_prefix*} or die "# Error: No splitted mate files already created at $CONF{general}{tmp_dir} :$!";
+	chomp(@MATEFILES);
+	print LOG "# Splitted mate files already created.\n";
+    }
+    
+    
+    #Parallelization of the linking per chromosome for intra +  interchrs
+    my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
+    
+    foreach my $matefile (@MATEFILES){
+	
+	my $pid = $pm->start and next;
+	getlinks(\%CHR, \%CHRID, $matefile);
+	$pm->finish;
+    
+    }
+    $pm->wait_all_children;
+    
+    #Merge the chromosome links file into only one
+    my @LINKFILES= qx{ls $tmp_links_prefix*links} or die "# Error: No links files created at $CONF{general}{tmp_dir} :$!";
+    chomp(@LINKFILES);
+    catFiles( \@LINKFILES => "$output_prefix.links" );
+    
+    system "rm $pt_links_file; ln -s $output_prefix.links $pt_links_file" if (defined $pt_links_file); #GALAXY
+    print LOG "# Linking end procedure : output created: $output_prefix.links\n";
+    #unlink(@LINKFILES);
+    #unlink(@MATEFILES);
+	
+    undef %CHR;
+    undef %CHRID;
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub getlinks {
+    
+    my ($chr,$chrID,$tmp_mates_prefix)=@_;
+    
+    my $tmp_links_prefix=$tmp_mates_prefix;
+    $tmp_links_prefix=~s/\/mates\//\/links\//;
+    
+    my %PAIR;									#main hash table 2: pairs
+    
+    linking($chr,$chrID, \%PAIR,						#creation of all links from chromosome coordinates of pairs
+	  $CONF{general}{read_lengths},
+	  $CONF{detection}{window_size},
+	  $CONF{detection}{step_length},
+	  $tmp_mates_prefix,
+	  $CONF{general}{input_format},
+	  $CONF{general}{sv_type},
+	  "$tmp_links_prefix.links.mapped"
+	  );
+    
+    getUniqueLinks("$tmp_links_prefix.links.mapped",					#remove the doublons
+		   "$tmp_links_prefix.links.unique");
+    
+    defineCoordsLinks($chr,$chrID, \%PAIR,					#definition of the precise coordinates of links
+		      $CONF{general}{input_format},
+		      $CONF{general}{sv_type},
+		      $CONF{general}{read_lengths},
+		      "$tmp_links_prefix.links.unique",
+		      "$tmp_links_prefix.links.unique_defined"); 
+    
+    sortLinks("$tmp_links_prefix.links.unique_defined",				#sorting links from coordinates
+	      "$tmp_links_prefix.links.sorted");
+
+    removeFullyOverlappedLinks("$tmp_links_prefix.links.sorted",			#remove redundant links
+			       "$tmp_links_prefix.links",1);				#file output
+    
+   
+    undef %PAIR;
+    
+    unlink("$tmp_links_prefix.links.mapped",
+	   "$tmp_links_prefix.links.unique",
+	   "$tmp_links_prefix.links.unique_defined",
+	   "$tmp_links_prefix.links.sorted");
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub splitMateFile{
+    
+    my ($chr,$chrID,$files_list,$output_prefix,$sv_type,$mates_file,$input_format,$tag_length)=@_;
+    
+    print LOG "# Splitting the mate file \"$mates_file\" for parallel processing...\n";
+    
+    my %filesHandle;
+    
+    #fichier matefile inter
+    if($sv_type=~/^(all|inter)$/){
+	my $newFileName="$output_prefix.interchrs";
+	push(@{$files_list},$newFileName);
+	my $fh = new FileHandle;
+	$fh->open(">$newFileName");
+	$filesHandle{inter}=$fh;
+    }
+    
+    #fichiers matefiles intra
+    if($sv_type=~/^(all|intra)$/){
+	foreach my $k (1..$chr->{nb_chrs}){
+	    my $newFileName=$output_prefix.".".$chr->{$k}->{name};
+	    push(@{$files_list},$newFileName);
+	    my $fh = new FileHandle;
+	    $fh->open(">$newFileName");
+	    $filesHandle{$k}=$fh;
+	}
+    }
+    
+    if ($mates_file =~ /.gz$/) {
+	open(MATES, "gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n"; #gzcat
+    }elsif($mates_file =~ /.bam$/){
+	open(MATES, "$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
+    }else{
+	open MATES, "<".$mates_file or die "$0: can't open ".$mates_file.":$!\n";
+    }
+    
+    
+    while(<MATES>){
+	
+	my @t=split;								
+	my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1,$end_order_read2);
+	
+	next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2, \$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
+	
+	next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
+	
+	($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
+	
+	if( ($sv_type=~/^(all|inter)$/) && ($chr_read1 ne $chr_read2) ){
+	    my $fh2print=$filesHandle{inter};
+	    print $fh2print join("\t",@t)."\n";
+	}
+	
+	if( ($sv_type=~/^(all|intra)$/) && ($chr_read1 eq $chr_read2) ){
+	    my $fh2print=$filesHandle{$chr_read1};
+	    print $fh2print join("\t",@t)."\n";
+	    
+	}
+    }
+    
+    foreach my $name (keys %filesHandle){
+	my $fh=$filesHandle{$name};
+	$fh->close;
+    }
+    
+    print LOG "# Splitted mate files of \"$mates_file\" created.\n";
+}
+
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub filterlinks{
+    
+    my %CHR;
+    my %CHRID;
+    my @LINKFILES;
+    my @FLINKFILES;
+    
+    my $output_prefix=$CONF{general}{mates_file}.".".$CONF{general}{sv_type};
+    my @path=split(/\//,$output_prefix);
+    $output_prefix=$CONF{general}{output_dir}.$path[$#path];
+    my $tmp_links_prefix=$CONF{general}{tmp_dir}."links/".$path[$#path];
+    
+    createChrHashTables(\%CHR,\%CHRID,
+			$CONF{general}{cmap_file});
+    
+    if($CONF{filtering}{split_link_file}){
+    
+	splitLinkFile(\%CHR, \%CHRID, \@LINKFILES,
+		      $tmp_links_prefix,
+		      $CONF{general}{sv_type},
+		      "$output_prefix.links",
+		      );
+    }else{
+	
+	@LINKFILES=qx{ls $tmp_links_prefix*links} or die "# Error: No splitted link files already created\n";
+	chomp(@LINKFILES);
+	print LOG "# Splitted link files already created.\n";
+    }
+    
+    #Parallelization of the filtering per chromosome for intra +  interchrs
+    my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
+    
+    foreach my $linkfile (@LINKFILES){
+	
+	my $pid = $pm->start and next;
+	getFilteredlinks(\%CHR, \%CHRID, $linkfile);
+	$pm->finish;
+    
+    }
+    $pm->wait_all_children;
+    
+    #Merge the chromosome links file into only one
+    @FLINKFILES= qx{ls $tmp_links_prefix*filtered} or die "# Error: No links files created\n";
+    chomp(@FLINKFILES);
+    catFiles( \@FLINKFILES => "$output_prefix.links.filtered" );
+    
+    system "rm $pt_flinks_file; ln -s $output_prefix.links.filtered $pt_flinks_file" if (defined $pt_flinks_file); #GALAXY
+    print LOG"# Filtering end procedure : output created: $output_prefix.links.filtered\n";
+    
+    undef %CHR;
+    undef %CHRID;
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub splitLinkFile{
+    
+    my ($chr,$chrID,$files_list,$input_prefix,$sv_type,$link_file)=@_;
+    
+    print LOG "# Splitting the link file for parallel processing...\n";
+    
+    my %filesHandle;
+    
+    #fichier matefile inter
+    if($sv_type=~/^(all|inter)$/){
+	my $newFileName="$input_prefix.interchrs.links";
+	push(@{$files_list},$newFileName);
+	my $fh = new FileHandle;
+	$fh->open(">$newFileName");
+	$filesHandle{inter}=$fh;
+    }
+    
+    #fichiers matefiles intra
+    if($sv_type=~/^(all|intra)$/){
+	foreach my $k (1..$chr->{nb_chrs}){
+	    my $newFileName=$input_prefix.".".$chr->{$k}->{name}.".links";
+	    push(@{$files_list},$newFileName);
+	    my $fh = new FileHandle;
+	    $fh->open(">$newFileName");
+	    $filesHandle{$k}=$fh;
+	}
+    }
+    
+    open LINKS, "<".$link_file or die "$0: can't open ".$link_file.":$!\n";
+    while(<LINKS>){
+	
+	my @t=split;
+	my ($chr_read1,$chr_read2)=($t[0],$t[3]);							
+	
+	next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
+	
+	($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
+	
+	if( ($sv_type=~/^(all|inter)$/) && ($chr_read1 ne $chr_read2) ){
+	    my $fh2print=$filesHandle{inter};
+	    print $fh2print join("\t",@t)."\n";
+	}
+	
+	if( ($sv_type=~/^(all|intra)$/) && ($chr_read1 eq $chr_read2) ){
+	    my $fh2print=$filesHandle{$chr_read1};
+	    print $fh2print join("\t",@t)."\n";
+	    
+	}
+    }
+    
+    foreach my $name (keys %filesHandle){
+	my $fh=$filesHandle{$name};
+	$fh->close;
+    }
+    
+    print LOG "# Splitted link files created.\n";
+}
+
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#MAIN FUNCTION number 2: Filtering processing
+sub getFilteredlinks {
+    
+    my ($chr,$chrID,$tmp_links_prefix)=@_;
+    my %PAIR;
+
+    strandFiltering($chr,$chrID,
+		     $CONF{filtering}{nb_pairs_threshold},			#filtering of links
+		     $CONF{filtering}{strand_filtering},
+		     $CONF{filtering}{chromosomes},
+		     $CONF{general}{input_format},
+		     $CONF{general}{cmap_file},
+		     $CONF{general}{mates_orientation},
+		     $CONF{general}{read_lengths},
+		    $tmp_links_prefix,
+		    "$tmp_links_prefix.filtered",
+		    ); 						
+
+    if($CONF{filtering}{strand_filtering}){					#re-definition of links coordinates with strand filtering
+
+	my @tmpfiles;
+	
+	rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_unique");
+	
+	getUniqueLinks("$tmp_links_prefix.filtered_unique",
+		       "$tmp_links_prefix.filtered");
+	
+	push(@tmpfiles,"$tmp_links_prefix.filtered_unique");
+	
+	if($CONF{filtering}{order_filtering}){					#filtering using the order
+	    
+	    rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_ordered");
+	    
+	    orderFiltering($chr,$chrID,
+			      $CONF{filtering}{nb_pairs_threshold},
+			      $CONF{filtering}{nb_pairs_order_threshold},			   
+			      $CONF{filtering}{mu_length},
+			      $CONF{filtering}{sigma_length},
+			      $CONF{general}{mates_orientation},	      
+			      $CONF{general}{read_lengths},
+			      "$tmp_links_prefix.filtered_ordered", 
+			      "$tmp_links_prefix.filtered",				
+			     );
+	    
+	    push(@tmpfiles,"$tmp_links_prefix.filtered_ordered");
+	}
+	
+	if (($CONF{filtering}{insert_size_filtering})&&
+	    ($CONF{general}{sv_type} ne 'inter')){ 				 
+	    
+	    rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_withoutIndelSize");
+	    
+	    addInsertionInfo($chr,$chrID,
+			     $CONF{filtering}{nb_pairs_threshold},
+			     $CONF{filtering}{order_filtering},
+			     $CONF{filtering}{indel_sigma_threshold},
+			     $CONF{filtering}{dup_sigma_threshold},
+			     $CONF{filtering}{singleton_sigma_threshold}, 
+			     $CONF{filtering}{mu_length},
+			     $CONF{filtering}{sigma_length},
+			     $CONF{general}{mates_orientation},
+			     $CONF{general}{read_lengths},
+			     "$tmp_links_prefix.filtered_withoutIndelSize", 
+			     "$tmp_links_prefix.filtered"			
+			     );
+	    
+	    push(@tmpfiles,"$tmp_links_prefix.filtered_withoutIndelSize");    
+	}
+	
+	sortLinks("$tmp_links_prefix.filtered",
+		  "$tmp_links_prefix.filtered_sorted");
+	
+	removeFullyOverlappedLinks("$tmp_links_prefix.filtered_sorted",
+			           "$tmp_links_prefix.filtered_nodup",
+				   );				
+	
+	postFiltering("$tmp_links_prefix.filtered_nodup",
+		      "$tmp_links_prefix.filtered",
+		       $CONF{filtering}{final_score_threshold});
+	
+	push(@tmpfiles,"$tmp_links_prefix.filtered_sorted","$tmp_links_prefix.filtered_nodup");
+	
+	unlink(@tmpfiles);
+	
+	
+    }
+    undef %PAIR;
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#MAIN FUNCTION number 3: Circos format conversion for links
+sub links2circos{
+    
+    my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
+    my @path=split(/\//,$input_file);
+    $input_file=$CONF{general}{output_dir}.$path[$#path];
+    
+    my $output_file.=$input_file.".segdup.txt";
+    
+    links2segdup($CONF{circos}{organism_id},
+		 $CONF{circos}{colorcode},
+		 $input_file,
+		 $output_file);							#circos file output
+    
+    system "rm $pt_circos_file; ln -s $output_file $pt_circos_file" if (defined $pt_circos_file); #GALAXY
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#MAIN FUNCTION number 4: Bed format conversion for links
+sub links2bed{
+    
+    my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
+    my @path=split(/\//,$input_file);
+    $input_file=$CONF{general}{output_dir}.$path[$#path];
+    
+    my $output_file.=$input_file.".bed.txt";
+    
+    links2bedfile($CONF{general}{read_lengths},
+		  $CONF{bed}{colorcode},
+		  $input_file,
+		  $output_file);						#bed file output
+    
+    system "rm $pt_bed_file; ln -s $output_file $pt_bed_file" if (defined $pt_bed_file); #GALAXY
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#MAIN FUNCTION number 6: Bed format conversion for links
+sub links2SV{
+    
+    my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
+    
+    my @path=split(/\//,$input_file);
+    $input_file=$CONF{general}{output_dir}.$path[$#path];
+    
+    my $output_file.=$input_file.".sv.txt";
+    
+    
+    links2SVfile( $input_file,
+		  $output_file);
+    
+     system "rm $pt_sv_file; ln -s $output_file $pt_sv_file" if (defined $pt_sv_file); #GALAXY	
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#MAIN FUNCTION number 7: copy number variations, coverage ratio calculation
+sub cnv{
+    
+    my %CHR;
+    my %CHRID;
+    my @MATEFILES;
+    my @MATEFILES_REF;
+    
+    my $output_prefix=$CONF{general}{mates_file};
+    my $output_prefix_ref=$CONF{detection}{mates_file_ref};
+    my @path=split(/\//,$output_prefix);
+    my @path_ref=split(/\//,$output_prefix_ref);
+    $output_prefix=$CONF{general}{output_dir}.$path[$#path];
+    $output_prefix_ref=$CONF{general}{output_dir}.$path_ref[$#path_ref];
+    my $tmp_mates_prefix=$CONF{general}{tmp_dir}."mates/".$path[$#path];
+    my $tmp_mates_prefix_ref=$CONF{general}{tmp_dir}."mates/".$path_ref[$#path_ref];
+    my $tmp_density_prefix=$CONF{general}{tmp_dir}."density/".$path[$#path];
+    
+    shearingChromosome(\%CHR, \%CHRID,						#making the genomic fragment library with the detection parameters
+		     $CONF{detection}{window_size},
+		     $CONF{detection}{step_length},
+		     $CONF{general}{cmap_file});
+    
+    if($CONF{detection}{split_mate_file}){
+    
+	splitMateFile(\%CHR, \%CHRID, \@MATEFILES, $tmp_mates_prefix,
+		      "intra",
+		      $CONF{general}{mates_file},
+		      $CONF{general}{input_format},
+		      $CONF{general}{read_lengths}
+		      );
+	
+	splitMateFile(\%CHR, \%CHRID, \@MATEFILES_REF, $tmp_mates_prefix_ref,
+		      "intra",
+		      $CONF{detection}{mates_file_ref},
+		      $CONF{general}{input_format},
+		      $CONF{general}{read_lengths}
+		      );
+	
+	
+    }else{
+	
+	@MATEFILES=qx{ls $tmp_mates_prefix*} or die "# Error: No splitted sample mate files of \"$CONF{general}{mates_file}\" already created at $CONF{general}{tmp_dir} :$!";
+	chomp(@MATEFILES);
+	@MATEFILES_REF=qx{ls $tmp_mates_prefix_ref*} or die "# Error: No splitted reference mate files of \"$CONF{detection}{mates_file_ref}\" already created at $CONF{general}{tmp_dir} :$!";
+	chomp(@MATEFILES_REF);
+	print LOG "# Splitted sample and reference mate files already created.\n";
+    }
+    
+    #Parallelization of the cnv per chromosome
+    my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
+    
+    foreach my $file (0..$#MATEFILES){
+	
+	my $pid = $pm->start and next;
+
+	densityCalculation(\%CHR, \%CHRID, $file,
+	  $CONF{general}{read_lengths},
+	  $CONF{detection}{window_size},
+	  $CONF{detection}{step_length},
+	  \@MATEFILES,
+	  \@MATEFILES_REF,
+	  $MATEFILES[$file].".density",
+	  $CONF{general}{input_format});
+
+	$pm->finish;
+	
+    }
+    $pm->wait_all_children;
+    
+    #Merge the chromosome links file into only one
+    my @DENSITYFILES= qx{ls $tmp_density_prefix*density} or die "# Error: No density files created at $CONF{general}{tmp_dir} :$!";
+    chomp(@DENSITYFILES);
+    catFiles( \@DENSITYFILES => "$output_prefix.density" );
+
+    print LOG "# cnv end procedure : output created: $output_prefix.density\n";
+    
+    
+    undef %CHR;
+    undef %CHRID;
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#MAIN FUNCTION number 8: Circos format conversion for cnv ratios
+sub ratio2circos{
+    
+    my $input_file=$CONF{general}{mates_file}.".density";
+    
+    my @path=split(/\//,$input_file);
+    $input_file=$CONF{general}{output_dir}.$path[$#path];
+    
+    my $output_file.=$input_file.".segdup.txt";
+    
+    ratio2segdup($CONF{circos}{organism_id},		
+		 $input_file,
+		 $output_file);
+}
+#------------------------------------------------------------------------------#
+#MAIN FUNCTION number 9: BedGraph format conversion for cnv ratios
+sub ratio2bedgraph{
+    
+    my $input_file=$CONF{general}{mates_file}.".density";
+    
+    my @path=split(/\//,$input_file);
+    $input_file=$CONF{general}{output_dir}.$path[$#path];
+    
+    my $output_file.=$input_file.".bedgraph.txt";
+    
+    ratio2bedfile($input_file,
+		  $output_file);						#bed file output
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#Creation of the fragment library
+sub shearingChromosome{
+    
+    print LOG "# Making the fragments library...\n";
+    
+    my ($chr,$chrID,$window,$step,$cmap_file)=@_;				#window and step sizes parameters
+
+    createChrHashTables($chr,$chrID,$cmap_file);				#hash tables: chromosome ID <=> chromsomes Name
+    
+    foreach my $k (1..$chr->{nb_chrs}){						
+	
+	print LOG"-- $chr->{$k}->{name}\n";
+	
+	my $frag=1;
+	for (my $start=0; $start<$chr->{$k}->{length}; $start+=$step){
+	    
+	    my $end=($start<($chr->{$k}->{length})-$window)? $start+$window-1:($chr->{$k}->{length})-1;
+	    $chr->{$k}->{$frag}=[$start,$end];					#creation of fragments, coordinates storage
+	    
+	    if($end==($chr->{$k}->{length})-1){
+		$chr->{$k}->{nb_frag}=$frag;					#nb of fragments per chromosome
+		last;
+	    }
+	    $frag++;
+	}
+    }
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#Creation of chromosome hash tables from the cmap file
+sub createChrHashTables{
+    
+    my ($chr,$chrID,$cmap_file)=@_;
+    $chr->{nb_chrs}=0;
+    
+    open CMAP, "<".$cmap_file or die "$0: can't open ".$cmap_file.":$!\n";
+    while(<CMAP>){
+
+	if(/^\s+$/){ next;}
+	my ($k,$name,$length) = split;
+	$chr->{$k}->{name}=$name;
+	$chr->{$k}->{length}=$length;
+	$chrID->{$name}=$k;
+	$chr->{nb_chrs}++;
+	
+    }
+    close CMAP;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#Read the mate file according the input format file (solid, eland or sam)
+sub readMateFile{
+    
+    my ($chr1,$chr2,$pos1,$pos2,$order1,$order2,$t,$file_type,$tag_length)=@_;
+    my ($strand1,$strand2);
+    
+    if($file_type eq "solid"){
+	
+	($$chr1,$$chr2,$$pos1,$$pos2,$$order1,$$order2)=($$t[6],$$t[7],$$t[8]+1,$$t[9]+1,1,2); #0-based
+	
+    }else{
+	my ($tag_length1,$tag_length2);
+	($$chr1,$$chr2,$$pos1,$strand1,$$pos2,$strand2,$$order1,$$order2,$tag_length1,$tag_length2)=($$t[11],$$t[12],$$t[7],$$t[8],$$t[9],$$t[10],1,2,length($$t[1]),length($$t[2])) #1-based
+	if($file_type eq "eland");
+	
+	if($file_type eq "sam"){
+	    
+	    return 0 if ($$t[0]=~/^@/);						#header sam filtered out
+	   
+	    ($$chr1,$$chr2,$$pos1,$$pos2)=($$t[2],$$t[6],$$t[3],$$t[7]);
+	   
+	    return 0 if (($$t[1]&0x0004) || ($$t[1]&0x0008));
+	   
+	    $$chr2=$$chr1 if($$chr2 eq "=");
+	   
+	    $strand1 = (($$t[1]&0x0010))? 'R':'F';
+	    $strand2 = (($$t[1]&0x0020))? 'R':'F';
+	    
+	    $$order1=  (($$t[1]&0x0040))? '1':'2';
+	    $$order2=  (($$t[1]&0x0080))? '1':'2';
+	    $tag_length1 = $tag_length->{$$order1};
+	    $tag_length2 = $tag_length->{$$order2};
+	}
+	
+	$$pos1 = -($$pos1+$tag_length1) if ($strand1 eq "R");		#get sequencing starts
+	$$pos2 = -($$pos2+$tag_length2) if ($strand2 eq "R");
+    }
+    return 1;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#Parsing of the mates files and creation of links between 2 chromosomal fragments
+sub linking{
+    
+    my ($chr,$chrID,$pair,$tag_length,$window_dist,$step,$mates_file,$input_format,$sv_type,$links_file)=@_;
+    my %link;
+    
+    my $record=0;
+    my $nb_links=0;
+    my $warn=10000;
+    
+    my @sfile=split(/\./,$mates_file);
+    my $fchr=$sfile[$#sfile];
+    
+    my $fh = new FileHandle;
+   
+    print LOG "# $fchr : Linking procedure...\n";
+    print LOG "-- file=$mates_file\n".
+	 "-- chromosome=$fchr\n".
+	 "-- input format=$input_format\n".
+	 "-- type=$sv_type\n".
+	 "-- read1 length=$tag_length->{1}, read2 length=$tag_length->{2}\n".
+	 "-- window size=$window_dist, step length=$step\n";
+    
+    if ($mates_file =~ /.gz$/) {
+	$fh->open("gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n"; #gzcat
+    }elsif($mates_file =~ /.bam$/){
+	$fh->open("$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
+    }else{
+	$fh->open("<".$mates_file) or die "$0: can't open ".$mates_file.":$!\n";
+    }
+    
+    
+    while(<$fh>){
+	
+	my @t=split;								#for each mate-pair
+	my $mate=$t[0];
+	my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1,$end_order_read2);
+	
+	next if(exists $$pair{$mate});
+	
+	next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2, \$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
+	
+	next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
+	
+	($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
+	
+	if($sv_type ne "all"){
+	    if( ($sv_type eq "inter") && ($chr_read1 ne $chr_read2) ||
+	        ($sv_type eq "intra") && ($chr_read1 eq $chr_read2) ){
+	    }else{
+		next;
+	    }
+	}
+	
+	$$pair{$mate}=[$chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1, $end_order_read2 ]; 	#fill out the hash pair table (ready for the defineCoordsLinks function)
+	
+	$record++; 
+	
+	my ($coord_start_read1,$coord_end_read1,$coord_start_read2,$coord_end_read2);	#get the coordinates of each read
+	
+	recupCoords($firstbase_read1,\$coord_start_read1,\$coord_end_read1,$tag_length->{$end_order_read1},$input_format);
+	recupCoords($firstbase_read2,\$coord_start_read2,\$coord_end_read2,$tag_length->{$end_order_read2},$input_format);
+	
+	for(my $i=1;$i<=$chr->{$chr_read1}->{'nb_frag'};$i++){			#fast genome parsing for link creation
+	    
+	    if (abs ($coord_start_read1-${$chr->{$chr_read1}->{$i}}[0]) <= $window_dist){
+		
+		if(overlap($coord_start_read1,$coord_end_read1,${$chr->{$chr_read1}->{$i}}[0],${$chr->{$chr_read1}->{$i}}[1])){
+		    
+		    for(my $j=1;$j<=$chr->{$chr_read2}->{'nb_frag'};$j++){
+			
+			if (abs ($coord_start_read2-${$chr->{$chr_read2}->{$j}}[0]) <= $window_dist) {
+			    
+			    if(overlap($coord_start_read2,$coord_end_read2,${$chr->{$chr_read2}->{$j}}[0],${$chr->{$chr_read2}->{$j}}[1])){
+				
+				makeLink(\%link,$chr_read1,$i,$chr_read2,$j,$mate,\$nb_links);	#make the link
+			    }
+			    
+			}else{
+			    
+			    $j=getNextFrag($coord_start_read2,$j,${$chr->{$chr_read2}->{$j}}[0],$chr->{$chr_read2}->{nb_frag},$window_dist,$step);
+			}				
+		    }
+		}
+		
+	    }else{
+		
+		$i=getNextFrag($coord_start_read1,$i,${$chr->{$chr_read1}->{$i}}[0],$chr->{$chr_read1}->{nb_frag},$window_dist,$step);
+	    }
+	}
+	
+	if($record>=$warn){
+	    print LOG "-- $fchr : $warn mate-pairs analysed - $nb_links links done\n";
+	    $warn+=10000;
+	}
+    }
+    $fh->close;
+    
+    if(!$nb_links){
+	print LOG "-- $fchr : No mate-pairs !\n".
+	 "-- $fchr : No links have been found with the selected type of structural variations \($sv_type\)\n";
+    }
+    
+    print LOG "-- $fchr : Total : $record mate-pairs analysed - $nb_links links done\n";
+    
+    print LOG "-- $fchr : writing...\n";
+    
+    $fh = new FileHandle;
+    
+    $fh->open(">".$links_file) or die "$0: can't write in the output ".$links_file." :$!\n";
+    
+    foreach my $chr1 ( sort { $a <=> $b} keys %link){				#Sorted links output
+	
+	foreach my $chr2 ( sort { $a <=> $b} keys %{$link{$chr1}}){
+	    
+	    foreach my $frag1 ( sort { $a <=> $b} keys %{$link{$chr1}{$chr2}}){
+		
+		foreach my $frag2 ( sort { $a <=> $b} keys %{$link{$chr1}{$chr2}{$frag1}}){
+		
+		    my @count=split(",",$link{$chr1}{$chr2}{$frag1}{$frag2});
+		    print $fh "$chr->{$chr1}->{name}\t".(${$chr->{$chr1}->{$frag1}}[0]+1)."\t".(${$chr->{$chr1}->{$frag1}}[1]+1)."\t".
+				"$chr->{$chr2}->{name}\t".(${$chr->{$chr2}->{$frag2}}[0]+1)."\t".(${$chr->{$chr2}->{$frag2}}[1]+1)."\t".
+				scalar @count."\t".				#nb of read
+				$link{$chr1}{$chr2}{$frag1}{$frag2}."\n";	#mate list
+		}  
+	    }
+	}
+    }
+    
+    $fh->close;
+    
+    undef %link;
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#remove exact links doublons according to the mate list
+sub getUniqueLinks{
+    
+    my ($links_file,$nrlinks_file)=@_;
+    my %links;
+    my %pt;
+    my $nb_links;
+    my $n=1;
+    
+    my $record=0;
+    my $warn=300000;
+    
+    my @sfile=split(/\./,$links_file);
+    my $fchr=$sfile[$#sfile-2];
+    
+    my $fh = new FileHandle;
+
+    print LOG "# $fchr : Getting unique links...\n";
+    $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
+   
+    while(<$fh>){
+	
+	my @t=split;
+	my $mates=$t[7];
+	$record++;
+	
+	if(!exists $links{$mates}){						#Unique links selection
+	       
+	    $links{$mates}=[@t];
+	    $pt{$n}=$links{$mates};
+	    $n++;
+	    
+	
+	}else{									#get the link coordinates from the mate-pairs list
+	    
+	    for my $i (1,2,4,5){						#get the shortest regions
+		
+		$links{$mates}->[$i]=($t[$i]>$links{$mates}->[$i])? $t[$i]:$links{$mates}->[$i]	#maximum start
+		if($i==1 || $i==4);
+		$links{$mates}->[$i]=($t[$i]<$links{$mates}->[$i])? $t[$i]:$links{$mates}->[$i]	#minimum end
+		if($i==2 || $i==5);
+	    }
+	}
+	if($record>=$warn){
+	    print LOG "-- $fchr : $warn links analysed - ".($n-1)." unique links done\n";
+	    $warn+=300000;
+	}
+    }
+    $fh->close;
+    
+    $nb_links=$n-1;
+    print LOG "-- $fchr : Total : $record links analysed - $nb_links unique links done\n";
+    
+    $fh = new FileHandle;
+    $fh->open(">$nrlinks_file") or die "$0: can't write in the output: $nrlinks_file :$!\n";
+    print LOG "-- $fchr : writing...\n";
+    for my $i (1..$nb_links){
+	
+	print $fh join("\t",@{$pt{$i}})."\n";				#all links output
+    }
+    
+    $fh->close;
+    
+    undef %links;
+    undef %pt;
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#get the new coordinates of each link from the mate list
+sub defineCoordsLinks{
+    
+    my ($chr,$chrID,$pair,$input_format,$sv_type,$tag_length,$links_file,$clinks_file)=@_;
+    
+    my @sfile=split(/\./,$links_file);
+    my $fchr=$sfile[$#sfile-2];
+    
+    my $fh = new FileHandle;
+    my $fh2 = new FileHandle;
+    
+    $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
+    $fh2->open(">$clinks_file") or die "$0: can't write in the output: $clinks_file :$!\n";
+    
+    print LOG "# $fchr : Defining precise link coordinates...\n";
+    
+    my $record=0;
+    my $warn=100000;
+    
+    my %coords;
+    my %strands;
+    my %order;
+    my %ends_order;
+    
+    while(<$fh>){
+	
+
+	my ($col1,$col2)=(1,2);							#for an intrachromosomal link
+	my $diffchr=0;								#difference between chr1 and chr2
+	my ($chr1,$chr2,$mates_list,$npairs)=(split)[0,3,7,8];
+	($chr1,$chr2) = ($chrID->{$chr1},$chrID->{$chr2});
+	if ($chr1 != $chr2){							#for an interchromosomal link
+	    $col1=$col2=0;							#no distinction
+	    $diffchr=1;
+	}
+	
+	my @pairs=split(",",$mates_list);
+	
+	$coords{$col1}{$chr1}->{start}=undef;
+	$coords{$col1}{$chr1}->{end}=undef;
+	$coords{$col2}{$chr2}->{start}=undef;
+	$coords{$col2}{$chr2}->{end}=undef;
+	$strands{$col1}{$chr1}=undef;
+	$strands{$col2}{$chr2}=undef;
+	$ends_order{$col1}{$chr1}=undef;
+	$ends_order{$col2}{$chr2}=undef;
+	
+	
+	$order{$col1}{$chr1}->{index}->{1}=undef;
+	$order{$col1}{$chr1}->{index}->{2}=undef;
+	$order{$col2}{$chr2}->{index}->{1}=undef;
+	$order{$col2}{$chr2}->{index}->{2}=undef;
+	$order{$col1}{$chr1}->{order}=undef;
+	$order{$col2}{$chr2}->{order}=undef;
+	
+	$record++;
+
+	for my $p (0..$#pairs){							#for each pair
+	    
+	    my ($coord_start_read1,$coord_end_read1);
+	    my ($coord_start_read2,$coord_end_read2);
+	    my $strand_read1=recupCoords(${$$pair{$pairs[$p]}}[2],\$coord_start_read1,\$coord_end_read1,$tag_length->{${$$pair{$pairs[$p]}}[4]},$input_format);
+	    my $strand_read2=recupCoords(${$$pair{$pairs[$p]}}[3],\$coord_start_read2,\$coord_end_read2,$tag_length->{${$$pair{$pairs[$p]}}[5]},$input_format);
+
+	    if(!$diffchr){							#for a intrachromosomal link
+		if($coord_start_read2<$coord_start_read1){				#get the closer start coordinate for each column
+		    ($col1,$col2)=(2,1);
+		}else{
+		    ($col1,$col2)=(1,2);
+		}
+	    }
+	    
+	    push(@{$coords{$col1}{${$$pair{$pairs[$p]}}[0]}->{start}},$coord_start_read1);	#get coords and strands of f3 and r3 reads
+	    push(@{$coords{$col1}{${$$pair{$pairs[$p]}}[0]}->{end}},$coord_end_read1);
+	    push(@{$coords{$col2}{${$$pair{$pairs[$p]}}[1]}->{start}},$coord_start_read2);
+	    push(@{$coords{$col2}{${$$pair{$pairs[$p]}}[1]}->{end}},$coord_end_read2);
+	    push(@{$strands{$col1}{${$$pair{$pairs[$p]}}[0]}},$strand_read1);
+	    push(@{$strands{$col2}{${$$pair{$pairs[$p]}}[1]}},$strand_read2);
+	    push(@{$ends_order{$col1}{${$$pair{$pairs[$p]}}[0]}},${$$pair{$pairs[$p]}}[4]);
+	    push(@{$ends_order{$col2}{${$$pair{$pairs[$p]}}[1]}},${$$pair{$pairs[$p]}}[5]);
+	}
+	
+	($col1,$col2)=(1,2) if(!$diffchr);
+	
+	my $coord_start_chr1=min(min(@{$coords{$col1}{$chr1}->{start}}),min(@{$coords{$col1}{$chr1}->{end}}));		#get the biggest region
+	my $coord_end_chr1=max(max(@{$coords{$col1}{$chr1}->{start}}),max(@{$coords{$col1}{$chr1}->{end}}));		
+	my $coord_start_chr2=min(min(@{$coords{$col2}{$chr2}->{start}}),min(@{$coords{$col2}{$chr2}->{end}}));	
+	my $coord_end_chr2=max(max(@{$coords{$col2}{$chr2}->{start}}),max(@{$coords{$col2}{$chr2}->{end}}));
+	
+	@{$order{$col1}{$chr1}->{index}->{1}}= sort {${$coords{$col1}{$chr1}->{start}}[$a] <=>  ${$coords{$col1}{$chr1}->{start}}[$b]} 0 .. $#{$coords{$col1}{$chr1}->{start}};
+	@{$order{$col2}{$chr2}->{index}->{1}}= sort {${$coords{$col2}{$chr2}->{start}}[$a] <=>  ${$coords{$col2}{$chr2}->{start}}[$b]} 0 .. $#{$coords{$col2}{$chr2}->{start}};
+	
+	foreach my $i (@{$order{$col1}{$chr1}->{index}->{1}}){			#get the rank of the chr2 reads according to the sorted chr1 reads (start coordinate sorting)
+	    foreach my $j (@{$order{$col2}{$chr2}->{index}->{1}}){
+		
+		if(${$order{$col1}{$chr1}->{index}->{1}}[$i] == ${$order{$col2}{$chr2}->{index}->{1}}[$j]){
+		    ${$order{$col1}{$chr1}->{index}->{2}}[$i]=$i;
+		    ${$order{$col2}{$chr2}->{index}->{2}}[$i]=$j;
+		    last;
+		}
+	    }
+	}
+	
+	foreach my $i (@{$order{$col1}{$chr1}->{index}->{2}}){			#use rank chr1 as an ID
+	    foreach my $j (@{$order{$col2}{$chr2}->{index}->{2}}){
+		
+		if(${$order{$col1}{$chr1}->{index}->{2}}[$i] == ${$order{$col2}{$chr2}->{index}->{2}}[$j]){
+		    ${$order{$col1}{$chr1}->{order}}[$i]=$i+1;
+		    ${$order{$col2}{$chr2}->{order}}[$i]=$j+1;
+		    last;
+		}
+	    }
+	}
+	
+	@pairs=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},\@pairs);#sorting of the pairs, strands, and start coords from the sorted chr2 reads
+	@{$strands{$col1}{$chr1}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$strands{$col1}{$chr1});
+	@{$strands{$col2}{$chr2}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$strands{$col2}{$chr2});
+	@{$ends_order{$col1}{$chr1}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$ends_order{$col1}{$chr1});
+	@{$ends_order{$col2}{$chr2}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$ends_order{$col2}{$chr2});
+	@{$coords{$col1}{$chr1}->{start}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$coords{$col1}{$chr1}->{start});
+	@{$coords{$col2}{$chr2}->{start}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$coords{$col2}{$chr2}->{start});
+	
+	
+	my @link=($chr->{$chr1}->{name}, $coord_start_chr1 , $coord_end_chr1,	#all information output
+		  $chr->{$chr2}->{name}, $coord_start_chr2 , $coord_end_chr2,
+		  scalar @pairs,
+		  join(",",@pairs),
+		  join(",",@{$strands{$col1}{$chr1}}),
+		  join(",",@{$strands{$col2}{$chr2}}),
+		  join(",",@{$ends_order{$col1}{$chr1}}),
+		  join(",",@{$ends_order{$col2}{$chr2}}),
+		  join(",",@{$order{$col1}{$chr1}->{order}}),
+		  join(",",@{$order{$col2}{$chr2}->{order}}),
+		  join(",",@{$coords{$col1}{$chr1}->{start}}),
+		  join(",",@{$coords{$col2}{$chr2}->{start}}));
+	
+	print $fh2 join("\t",@link)."\n";
+	
+	if($record>=$warn){
+	    print LOG "-- $fchr : $warn links processed\n";
+	    $warn+=100000;
+	}
+    }
+    $fh->close;
+    $fh2->close;
+    
+    print LOG "-- $fchr : Total : $record links processed\n";
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#Sort links according the concerned chromosomes and their coordinates
+sub sortLinks{
+    
+    my ($links_file,$sortedlinks_file,$unique)=@_;
+    
+    my @sfile=split(/\./,$links_file);
+    my $fchr=$sfile[$#sfile-2];
+    
+    
+    print LOG "# $fchr : Sorting links...\n";
+    
+    my $pipe=($unique)? "| sort -u":"";
+    system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file";
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#removal of fully overlapped links
+sub removeFullyOverlappedLinks{
+    
+    my ($links_file,$nrlinks_file,$warn_out)=@_;
+    
+    my %pt;
+    my $n=1;
+    
+    my @sfile=split(/\./,$links_file);
+    my $fchr=$sfile[$#sfile-2];
+    
+    my $fh = new FileHandle;
+    
+    $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
+    while(<$fh>){
+	
+	my @t=split("\t",$_);
+	$pt{$n}=[@t];
+	$n++;
+    }
+    $fh->close;
+    
+    my $nb_links=$n-1;
+    my $nb=$nb_links;
+
+    my %pt2;
+    my $nb2=1;
+    my $record=0;
+    my $warn=10000;
+    
+    print LOG "# $fchr : Removing fully overlapped links...\n";
+    
+    LINK:
+
+    for my $i (1..$nb){
+	    
+	my @link=();
+	my @next_link=();
+	my $ind1=$i;
+	
+	$record++;
+	if($record>=$warn){
+	    print LOG "-- $fchr : $warn unique links analysed - ".($nb2-1)." non-overlapped links done\n";
+	    $warn+=10000;
+	}
+	
+	if(exists $pt{$ind1}){
+	    @link=@{$pt{$ind1}};                                                #link1
+	}else{
+	    next LINK;
+	}
+	
+	my ($chr1,$start1,$end1,$chr2,$start2,$end2)=($link[0],$link[1],$link[2],$link[3],$link[4],$link[5]);	#get info of link1
+	my @mates=deleteBadOrderSensePairs(split(",",$link[7]));
+    
+	my $ind2=$ind1+1;
+	$ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);			#get the next found link
+	
+	if($ind2<=$nb){
+	    
+	    @next_link=@{$pt{$ind2}};					        #link2
+	    my ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]); #get info of link2
+	    my @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
+	  
+	    while(($chr1 eq $chr3 && $chr2 eq $chr4) && overlap($start1,$end1,$start3,$end3)){	#loop here according to the chr1 coordinates, need an overlap between links to enter
+		    
+		if(!overlap($start2,$end2,$start4,$end4)){			#if no overlap with chr2 coordinates ->next link2
+		    
+		    $ind2++;
+		    $ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);
+		   
+		    if($ind2>$nb){						#if no more link in the file -> save link1
+			
+			$pt2{$nb2}=\@link;
+			$nb2++;
+			next LINK;
+		    }
+		    
+		    @next_link=@{$pt{$ind2}};
+		    ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]);
+		    @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
+		    next;
+		}
+		
+		my %mates=map{$_ =>1} @mates;					#get the equal number of mates
+		my @same_mates = grep( $mates{$_}, @next_mates );
+		my $nb_mates= scalar @same_mates;
+	      
+		if($nb_mates == scalar @mates){
+		    
+		    delete $pt{$ind1};						#if pairs of link 1 are all included in link 2 -> delete link1
+		    next LINK;							#go to link2, link2 becomes link1
+		    
+		}else{
+			delete $pt{$ind2} if($nb_mates == scalar @next_mates);	#if pairs of link2 are all included in link 1 -> delete link2
+			$ind2++;							#we continue by checking the next link2
+			$ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);
+			
+			if($ind2>$nb){						#if no more link in the file -> save link1
+			  
+			    $pt2{$nb2}=\@link;
+			    $nb2++;
+			    next LINK;
+			}
+		    
+			@next_link=@{$pt{$ind2}};					#get info of link2
+			($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]);
+			@next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
+			
+		}
+	    }
+	}
+	$pt2{$nb2}=\@link;							#if no (more) link with chr1 coordinates overlap -> save link1
+	$nb2++;
+    }																																				
+   
+    print LOG "-- $fchr : Total : $nb_links unique links analysed - ".($nb2-1)." non-overlapped links done\n";
+    
+    #OUTPUT
+    
+    $fh = new FileHandle;
+    $fh->open(">$nrlinks_file") or die "$0: can't write in the output: $nrlinks_file :$!\n";
+    print LOG "-- $fchr :  writing...\n";
+    for my $i (1..$nb2-1){
+	
+	print $fh join("\t",@{$pt2{$i}});				#all links output
+    }
+    
+    close $fh;
+    
+    print LOG "-- $fchr : output created: $nrlinks_file\n" if($warn_out);
+    
+    undef %pt;
+    undef %pt2;
+}
+#------------------------------------------------------------------------------#
+sub postFiltering {
+    
+    my ($links_file,$pflinks_file, $finalScore_thres)=@_;
+    
+    my @sfile=split(/\./,$links_file);
+    my $fchr=$sfile[$#sfile-2];
+    
+    
+    my ($nb,$nb2)=(0,0);
+    
+    print LOG "# $fchr : Post-filtering links...\n";
+    print LOG "-- $fchr : final score threshold = $finalScore_thres\n";
+    
+    my $fh = new FileHandle;
+    my $fh2 = new FileHandle;
+    
+    $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
+    $fh2->open(">$pflinks_file") or die "$0: can't write in the output: $pflinks_file :$!\n";
+    
+    
+    while(<$fh>){
+	
+	my @t=split("\t",$_);
+	my $score=$t[$#t-1];
+	
+	if($score >= $finalScore_thres){
+	    print $fh2 join("\t", @t);
+	    $nb2++;
+	}
+	$nb++;
+    }
+    $fh->close;
+    $fh2->close;
+    
+    print LOG "-- $fchr : Total : $nb unique links analysed - $nb2 links kept\n";
+    print LOG "-- $fchr : output created: $pflinks_file\n";
+}
+
+
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#Filtering of the links
+sub strandFiltering{
+    
+    my($chr,$chrID,$pairs_threshold,$strand_filtering,$chromosomes,
+       $input_format,$cmap_file,$mate_sense, $tag_length,$links_file,$flinks_file)=@_;
+    
+    my @sfile=split(/\./,$links_file);
+    my $fchr=$sfile[$#sfile-1];
+    
+    
+    my %chrs;
+    my %chrs1;
+    my %chrs2;
+    my $nb_chrs;
+    my $exclude;
+    
+    if($chromosomes){
+	my @chrs=split(",",$chromosomes);
+	$nb_chrs=scalar @chrs;
+	$exclude=($chrs[0]=~/^\-/)? 1:0;
+	for my $chrName (@chrs){
+	    $chrName=~s/^(\-)//;
+	    my $col=($chrName=~s/_(1|2)$//);
+	    
+	    if(!$col){
+		$chrs{$chrID->{$chrName}}=undef 
+	    }else{
+		$chrs1{$chrID->{$chrName}}=undef if($1==1);
+		$chrs2{$chrID->{$chrName}}=undef if($1==2);
+	    }
+	}
+    }
+ 
+    my $record=0;
+    my $nb_links=0;
+    my $warn=10000;
+    
+    my $sens_ratio_threshold=0.6;
+    
+    print LOG "\# Filtering procedure...\n";
+    print LOG "\# Number of pairs and strand filtering...\n";
+    print LOG "-- file=$links_file\n";
+    print LOG "-- nb_pairs_threshold=$pairs_threshold, strand_filtering=".(($strand_filtering)? "yes":"no").
+	 ", chromosomes=".(($chromosomes)? "$chromosomes":"all")."\n";
+    
+    
+    
+    my $fh = new FileHandle;
+    my $fh2 = new FileHandle;
+    
+    $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
+    $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
+
+    while(<$fh>){
+	
+	my @t=split;	#for each link
+	my $is_good=1;
+	$record++;
+	
+	
+	if($chromosomes){
+	    
+	    my ($chr1,$chr2)=($chrID->{$t[0]},$chrID->{$t[3]});
+	    
+	    if(!$exclude){
+		$is_good=(exists $chrs{$chr1} && exists $chrs{$chr2})? 1:0;
+		$is_good=(exists $chrs1{$chr1} && exists $chrs2{$chr2})? 1:0 if(!$is_good);
+		$is_good=($nb_chrs==1 && (exists $chrs1{$chr1} || exists $chrs2{$chr2}))? 1:0 if(!$is_good);
+	    }else{
+		$is_good=(exists $chrs{$chr1} || exists $chrs{$chr2})? 0:1;
+		$is_good=(exists $chrs1{$chr1} || exists $chrs2{$chr2})? 0:1 if($is_good);
+	    }
+	}
+	
+	$is_good = ($is_good && $t[6] >= $pairs_threshold)? 1 :0;		#filtering according the number of pairs
+	if($is_good && $strand_filtering){					#if filtering according the strand sense
+	    
+	    my @mates=split(/,/,$t[7]);						#get the concordant pairs in the strand sense
+	    my @strands1=split(/,/,$t[8]);
+	    my @strands2=split(/,/,$t[9]);
+	    
+	    my %mate_class=( 'FF' => 0, 'RR' => 0, 'FR' => 0,  'RF' => 0);
+	    
+	    my %mate_reverse=( 'FF' => 'RR', 'RR' => 'FF',			#group1: FF,RR
+		       'FR' => 'RF', 'RF' => 'FR');				#group2: FR,RF
+	    
+	    my %mate_class2=( $mate_sense=>"NORMAL_SENSE", inverseSense($mate_sense)=>"NORMAL_SENSE",
+		    substr($mate_sense,0,1).inverseSense(substr($mate_sense,1,1))=>"REVERSE_SENSE",
+		    inverseSense(substr($mate_sense,0,1)).substr($mate_sense,1,1)=>"REVERSE_SENSE");
+	    
+	    if($t[6] == 1){
+		
+		push(@t,$mate_class2{$strands1[0].$strands2[0]},"1/1",1,1);
+		
+	    }else{
+		
+		tie (my %class,'Tie::IxHash');
+		my $split;
+		
+		foreach my $i (0..$#mates){
+		    $mate_class{$strands1[$i].$strands2[$i]}++;		#get the over-represented group
+		}
+		
+		my $nb_same_sens_class=$mate_class{FF}+$mate_class{RR};
+		my $nb_diff_sens_class=$mate_class{FR}+$mate_class{RF};
+		my $sens_ratio=max($nb_same_sens_class,$nb_diff_sens_class)/($nb_same_sens_class+$nb_diff_sens_class);
+		
+		if($sens_ratio < $sens_ratio_threshold){
+		    %class=(1=>'FF', 2=>'FR');
+		    $split=1;
+		}else{
+		    $class{1}=($nb_same_sens_class >  $nb_diff_sens_class)? 'FF':'FR';	#if yes get the concerned class
+		    $split=0;
+		}
+		
+		$is_good=getConsistentSenseLinks(\@t,\@mates,\@strands1,\@strands2,$tag_length,$mate_sense,\%mate_reverse,\%mate_class2,\%class,$split,$pairs_threshold);
+	    }
+	}
+	
+	if($is_good){								#PRINT
+	    
+	    my $nb=scalar @t;
+	    if($nb > 20){
+		my @t2=splice(@t,0,20);
+		print $fh2 join("\t",@t2)."\n";
+		$nb_links++;
+	    }
+	    $nb_links++;
+	    print $fh2 join("\t",@t)."\n";
+	}
+	
+	if($record>=$warn){
+	   print LOG "-- $fchr : $warn links analysed - $nb_links links kept\n";
+	    $warn+=10000;
+	}
+    }
+    $fh->close;
+    $fh2->close;
+    
+    print LOG "-- $fchr : No links have been found with the selected filtering parameters\n" if(!$nb_links);
+    
+    print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
+    
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub getConsistentSenseLinks{
+    
+    my ($t,$mates,$strands1,$strands2,$tag_length,$mate_sense, $mate_reverse,$mate_class2, $class, $split,$thres)=@_;
+    
+    my $npairs=scalar @$mates;
+
+    my @ends_order1 = split (/,/,$$t[10]);
+    my @ends_order2 = split (/,/,$$t[11]);
+    my @order1 = split (/,/,$$t[12]);
+    my @order2 = split (/,/,$$t[13]);
+    my @positions1 = split (/,/,$$t[14]);
+    my @positions2 = split (/,/,$$t[15]);
+    
+    my @newlink;
+
+    foreach my $ind (keys %{$class} ){
+	
+	tie (my %flink,'Tie::IxHash');
+	my @orders2remove=();
+	
+	foreach my $i (0..$#{$mates}){					#get the pairs belonging the over-represented group
+	
+	    if((($$strands1[$i].$$strands2[$i]) eq $$class{$ind}) || (($$strands1[$i].$$strands2[$i]) eq $$mate_reverse{$$class{$ind}})){
+		push(@{$flink{mates}},$$mates[$i]);
+		push(@{$flink{strands1}},$$strands1[$i]);
+		push(@{$flink{strands2}},$$strands2[$i]);
+		push(@{$flink{ends_order1}},$ends_order1[$i]);
+		push(@{$flink{ends_order2}},$ends_order2[$i]);
+		push(@{$flink{positions1}},$positions1[$i]);
+		push(@{$flink{positions2}},$positions2[$i]);
+			    
+	    }else{
+		push(@orders2remove,$order1[$i]);
+	    }
+	}
+	
+	@{$flink{order1}}=();
+	@{$flink{order2}}=();
+	if(scalar @orders2remove > 0){
+	    getNewOrders(\@order1,\@order2,\@orders2remove,$flink{order1},$flink{order2})
+	}else{
+	    @{$flink{order1}}=@order1;
+	    @{$flink{order2}}=@order2;
+	}
+	
+	my @ends1; getEnds(\@ends1,$flink{positions1},$flink{strands1},$flink{ends_order1},$tag_length);
+	my @ends2; getEnds(\@ends2,$flink{positions2},$flink{strands2},$flink{ends_order2},$tag_length);
+	
+	my $fnpairs=scalar @{$flink{mates}};
+	my $strand_filtering_ratio=$fnpairs."/".$npairs;
+	my $real_ratio=$fnpairs/$npairs;
+	
+	if($fnpairs>=$thres){							#filtering according the number of pairs
+	    
+	    push(@newlink,
+		$$t[0],
+		min(min(@{$flink{positions1}}),min(@ends1)),
+		max(max(@{$flink{positions1}}),max(@ends1)),
+		$$t[3],
+		min(min(@{$flink{positions2}}),min(@ends2)),
+		max(max(@{$flink{positions2}}),max(@ends2)),
+		$fnpairs,
+		join(",",@{$flink{mates}}),
+		join(",",@{$flink{strands1}}),
+		join(",",@{$flink{strands2}}),
+		join(",",@{$flink{ends_order1}}),
+		join(",",@{$flink{ends_order2}}),
+		join(",",@{$flink{order1}}),
+		join(",",@{$flink{order2}}),
+		join(",",@{$flink{positions1}}),
+		join(",",@{$flink{positions2}}),
+		$$mate_class2{${$flink{strands1}}[0].${$flink{strands2}}[0]},
+		$strand_filtering_ratio,
+		$real_ratio,
+		$npairs
+	    );
+	}
+    }
+    
+    if (grep {defined($_)} @newlink) {
+	@$t=@newlink;
+	return 1
+    }
+    return 0;
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub getNewOrders{
+    
+    my($tab1,$tab2,$list,$newtab1,$newtab2)=@_;
+    my $j=1;
+    my $k=1;
+    for my $i (0..$#{$tab2}){
+	my $c=0;
+	for my $j (0..$#{$list}){
+	    $c++ if(${$list}[$j] < ${$tab2}[$i]);
+	    if(${$list}[$j] == ${$tab2}[$i]){
+		$c=-1; last;
+	    }
+	}
+	if($c!=-1){
+	    push(@{$newtab2}, ${$tab2}[$i]-$c);
+	    push(@{$newtab1}, $k);
+	    $k++;
+	}
+    }
+}
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#Filtering of the links using their order
+sub orderFiltering {
+
+    my ($chr,$chrID,$nb_pairs_threshold,$nb_pairs_order_threshold,$mu,$sigma,$mate_sense,$tag_length,$links_file,$flinks_file)=@_;
+    
+    my @sfile=split(/\./,$links_file);
+    my $fchr=$sfile[$#sfile-2];
+    
+    
+    my $diff_sense_ends=(($mate_sense eq "FR") || ($mate_sense eq "RF"))? 1:0; 
+    
+    my $record=0;
+    my $warn=10000;
+    my $nb_links=0;
+    
+    my $quant05 = 1.644854;
+    my $quant001 = 3.090232;
+    my $alphaDist = $quant05 * 2 * $sigma;
+    my $maxFragmentLength = &floor($quant001 * $sigma + $mu);
+    
+    print LOG "\# Filtering by order...\n";
+    print LOG "-- mu length=$mu, sigma length=$sigma, nb pairs order threshold=$nb_pairs_order_threshold\n";
+    print LOG "-- distance between comparable pairs was set to $alphaDist\n";
+    print LOG "-- maximal fragment length was set to $maxFragmentLength\n";
+
+    
+    my $fh = new FileHandle;
+    my $fh2 = new FileHandle;
+    
+    $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
+    $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
+    
+    while(<$fh>){
+	
+	$record++; 
+	my @t = split; 
+	my ($chr1,$chr2,$mates_list)=@t[0,3,7];
+	my @pairs=split(",",$mates_list);
+	($chr1,$chr2) = ($chrID->{$chr1},$chrID->{$chr2});
+	my ($coord_start_chr1,$coord_end_chr1,$coord_start_chr2,$coord_end_chr2) = @t[1,2,4,5];
+	my $numberOfPairs = $t[6];
+	my @strand1 = split (/,/,$t[8]);
+	my @strand2 = split (/,/,$t[9]);
+	my @ends_order1 = split (/,/,$t[10]);
+	my @ends_order2 = split (/,/,$t[11]);
+	my @order1 = split (/,/,$t[12]);
+	my @order2 = split (/,/,$t[13]);
+	my @positions1 = split (/,/,$t[14]); 
+	my @positions2 = split (/,/,$t[15]);
+	my @ends1; getEnds(\@ends1,\@positions1,\@strand1,\@ends_order1,$tag_length);
+	my @ends2; getEnds(\@ends2,\@positions2,\@strand2,\@ends_order2,$tag_length);
+	my $clusterCoordinates_chr1;
+	my $clusterCoordinates_chr2;
+	my $reads_left = 0;
+	
+	my $ifRenv = $t[16];
+	my $strand_ratio_filtering=$t[17]; 
+	
+	#kind of strand filtering. For example, will keep only FFF-RRR from a link FFRF-RRRF if <F-R> orientation is correct
+	my ($singleBreakpoint, %badInFRSense) = findBadInFRSenseSOLiDSolexa(\@strand1,\@strand2,\@ends_order1,\@ends_order2,\@order1,\@order2,$mate_sense);
+										#find pairs type F-RRRR or FFFF-R in the case if <R-F> orientation is correct
+										#These pairs are annotated as BED pairs forever! They won't be recycled!
+	my $table;
+	for my $i (0..$numberOfPairs-1) {					#fill the table with non adequate pairs: pairID	numberOfNonAdPairs nonAdPairIDs
+	    my $nonAdeq = 0;
+	    for my $j (0..$i-1) {
+		if (exists($table->{$j}->{$i})) {		    
+		    $nonAdeq++;
+		    $table->{$i}->{$j} = 1;		    
+		}
+	    }
+	    for my $j ($i+1..$numberOfPairs-1) {
+		if ($positions1[$j]-$positions1[$i]>$alphaDist) {		    
+		    if (&reversed ($i,$j,$ifRenv,\@positions2)) {
+			$nonAdeq++;
+			$table->{$i}->{$j} = 1;
+		    }
+		}
+	    }
+	    $table->{$i}->{nonAdeq} = $nonAdeq;   	    
+	}	
+							
+	for my $bad (keys %badInFRSense) { #remove pairs type F-RRRR or FFFF-R in the case of <R-F> orientation
+	    &remove($bad,$table);	    
+	}	
+	
+	my @falseReads;	
+							#RRRR-F -> RRRR or R-FFFF -> FFFF	
+	@falseReads = findBadInRFSenseSOLiDSolexa(\@strand1,\@ends_order1,$mate_sense, keys %{$table});
+										#these pairs will be recycled later as $secondTable
+	for my $bad (@falseReads) {
+	    &remove($bad,$table);
+	}			
+		
+	my $bad = &check($table); 
+	while ($bad ne "OK") {							#clear the table to reject non adequate pairs in the sense of ORDER
+	   # push (@falseReads, $bad);  remove completely!!!
+	    &remove($bad,$table);  
+	    $bad = &check($table);	    
+	}
+	
+	$reads_left = scalar keys %{$table}; 
+	my $coord_start_chr1_cluster1 = min(min(@positions1[sort {$a<=>$b} keys %{$table}]),min(@ends1[sort {$a<=>$b} keys %{$table}]));
+	my $coord_end_chr1_cluster1 = max(max(@positions1[sort {$a<=>$b} keys %{$table}]),max(@ends1[sort {$a<=>$b} keys %{$table}]));   
+	my $coord_start_chr2_cluster1 = min(min(@positions2[sort {$a<=>$b} keys %{$table}]),min(@ends2[sort {$a<=>$b} keys %{$table}]));
+	my $coord_end_chr2_cluster1 = max(max(@positions2[sort {$a<=>$b} keys %{$table}]),max(@ends2[sort {$a<=>$b} keys %{$table}]));  	
+	
+	$clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
+	$clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
+	
+	my $ifBalanced = 'UNBAL';
+	my $secondTable;
+	my $clusterCoordinates;
+	my ($break_pont_chr1,$break_pont_chr2);
+	
+	my $signatureType="";
+	
+	my $maxCoord1 =$chr->{$chr1}->{length};
+	my $maxCoord2 =$chr->{$chr2}->{length};
+	
+	if (scalar @falseReads) {
+	    @falseReads = sort @falseReads; 
+	    #now delete FRFR choosing the majority
+	    my @newfalseReads;	   							#find and remove pairs type RRRR-F or R-FFFF	
+	    @newfalseReads = findBadInRFSenseSOLiDSolexa(\@strand1,\@ends_order1,$mate_sense,@falseReads);	     #these @newfalseReads  won't be recycled
+	    my %hashTmp;
+	    for my $count1 (0..scalar(@falseReads)-1) {
+		my $i = $falseReads[$count1];
+		$hashTmp{$i} = 1;
+		for my $bad (@newfalseReads) {
+		    if ($bad == $i) {
+			delete $hashTmp{$i};
+			next;
+		    }
+		}
+	    }
+	    @falseReads = sort keys %hashTmp;  #what is left
+	    for my $count1 (0..scalar(@falseReads)-1) {				#fill the table for reads which were previously rejected  
+		my $nonAdeq = 0;
+		my $i = $falseReads[$count1];
+
+		for my $count2 (0..$count1-1) {
+		    my $j = $falseReads[$count2];
+		    if (exists($secondTable->{$j}->{$i})) {		    
+			$nonAdeq++;
+			$secondTable->{$i}->{$j} = 1;		    
+		    }
+		}
+		for my $count2 ($count1+1..scalar(@falseReads)-1) {
+		    my $j = $falseReads[$count2];
+		    if ($positions1[$j]-$positions1[$i]>$alphaDist) {
+			if (&reversed ($i,$j,$ifRenv,\@positions2)) {
+			    $nonAdeq++;
+			    $secondTable->{$i}->{$j} = 1;
+			}
+		    }
+		}
+		$secondTable->{$i}->{nonAdeq} = $nonAdeq;
+	    }
+	    
+	    my @falseReads2;
+	    my $bad = &check($secondTable);
+	    while ($bad ne "OK") {						#clear the table to reject non adequate pairs
+		push (@falseReads2, $bad);
+		&remove($bad,$secondTable);
+		$bad = &check($secondTable);
+	    }
+	    if (scalar keys %{$secondTable} >= $nb_pairs_order_threshold) {
+		my $coord_start_chr1_cluster2 = min(min(@positions1[sort {$a<=>$b} keys %{$secondTable}]),min(@ends1[sort {$a<=>$b} keys %{$secondTable}]));
+		my $coord_end_chr1_cluster2 = max(max(@positions1[sort {$a<=>$b} keys %{$secondTable}]),max(@ends1[sort {$a<=>$b} keys %{$secondTable}]));   
+		my $coord_start_chr2_cluster2 = min(min(@positions2[sort {$a<=>$b} keys %{$secondTable}]),min(@ends2[sort {$a<=>$b} keys %{$secondTable}]));
+		my $coord_end_chr2_cluster2 = max(max(@positions2[sort {$a<=>$b} keys %{$secondTable}]),max(@ends2[sort {$a<=>$b} keys %{$secondTable}]));   		 
+		
+		$ifBalanced = 'BAL';
+		
+		if ($ifBalanced eq 'BAL') {
+		    
+		    if (scalar keys %{$table} < $nb_pairs_order_threshold) {
+			$ifBalanced = 'UNBAL';					#kill cluster 1!
+			($table,$secondTable)=($secondTable,$table);		#this means that one needs to exchange cluster1 with cluster2
+			$reads_left = scalar keys %{$table};
+			$coord_start_chr1_cluster1 = $coord_start_chr1_cluster2;
+			$coord_end_chr1_cluster1 = $coord_end_chr1_cluster2;   
+			$coord_start_chr2_cluster1 = $coord_start_chr2_cluster2;
+			$coord_end_chr2_cluster1 = $coord_end_chr2_cluster2;
+			$clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
+			$clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
+			
+		    } else {
+			
+			$reads_left += scalar keys %{$secondTable};
+			next if ($reads_left < $nb_pairs_threshold);
+			
+			if ($coord_end_chr1_cluster2 < $coord_start_chr1_cluster1) {
+			    ($table,$secondTable)=($secondTable,$table);	 #this means that one needs to exchange cluster1 with cluster2
+			    
+			    ($coord_start_chr1_cluster1,$coord_start_chr1_cluster2) = ($coord_start_chr1_cluster2,$coord_start_chr1_cluster1);
+			    ($coord_end_chr1_cluster1,$coord_end_chr1_cluster2)=($coord_end_chr1_cluster2,$coord_end_chr1_cluster1);   
+			    ($coord_start_chr2_cluster1,$coord_start_chr2_cluster2)=($coord_start_chr2_cluster2,$coord_start_chr2_cluster1);
+			    ($coord_end_chr2_cluster1 , $coord_end_chr2_cluster2)=($coord_end_chr2_cluster2 , $coord_end_chr2_cluster1);
+			    
+			    $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.'),'.$clusterCoordinates_chr1;
+			    $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.'),'.$clusterCoordinates_chr2;			
+			}else {
+			    $clusterCoordinates_chr1 .= ',('.$coord_start_chr1_cluster2.','.$coord_end_chr1_cluster2.')';
+			    $clusterCoordinates_chr2 .= ',('.$coord_start_chr2_cluster2.','.$coord_end_chr2_cluster2.')';			
+			}
+			$coord_start_chr1 = min($coord_start_chr1_cluster1,$coord_start_chr1_cluster2);
+			$coord_end_chr1 = max($coord_end_chr1_cluster1,$coord_end_chr1_cluster2);
+			$coord_start_chr2 =  min($coord_start_chr2_cluster1,$coord_start_chr2_cluster2);
+			$coord_end_chr2 = max($coord_end_chr2_cluster1,$coord_end_chr2_cluster2);
+			#to calculate breakpoints one need to take into account read orientation in claster..
+			my $leftLetterOk = substr($mate_sense, 0, 1);  #R
+			my $rightLetterOk = substr($mate_sense, 1, 1); #F
+			
+			
+			my @index1 = keys %{$table};
+			my @index2 = keys %{$secondTable};
+			
+			my (@generalStrand1,@generalStrand2) = 0;
+			
+			if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
+			    $leftLetterOk = 'R';
+			    $rightLetterOk = 'F';
+			    @generalStrand1 = translateSolidToRF(\@strand1,\@ends_order1);	
+			    @generalStrand2 = translateSolidToRF(\@strand2,\@ends_order2);
+			} else {
+			    @generalStrand1 = @strand1;
+			    @generalStrand2 = @strand2; # TODO check if it is correct
+			}
+			if ($generalStrand1[$index1[0]] eq $leftLetterOk && $generalStrand1[$index2[0]] eq $rightLetterOk) { #(R,F)
+			    $break_pont_chr1 = '('.$coord_end_chr1_cluster1.','.$coord_start_chr1_cluster2.')';
+			    
+			    if ($generalStrand2[$index1[0]] eq $rightLetterOk && $generalStrand2[$index2[0]] eq $leftLetterOk) {
+				if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
+				    $break_pont_chr2 = '('.$coord_end_chr2_cluster2.','.$coord_start_chr2_cluster1.')';
+				    $signatureType = "TRANSLOC";
+				} else {
+				    $break_pont_chr2 = '('.max(($coord_end_chr2_cluster1-$maxFragmentLength),1).','.$coord_start_chr2_cluster1.')';	
+				    $break_pont_chr2 .= ',('.$coord_end_chr2_cluster2.','.min(($coord_start_chr2_cluster2+$maxFragmentLength),$maxCoord2).')';
+				    $signatureType = "INS_FRAGMT";
+				}		    
+				
+			    } elsif ($generalStrand2[$index1[0]] eq $leftLetterOk && $generalStrand2[$index2[0]] eq $rightLetterOk) {
+				if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
+				    $break_pont_chr2 = '('.max(($coord_end_chr2_cluster2-$maxFragmentLength),1).','.$coord_start_chr2_cluster2.')';	
+				    $break_pont_chr2 .= ',('.$coord_end_chr2_cluster1.','.min(($coord_start_chr2_cluster1+$maxFragmentLength),$maxCoord2).')';
+				    $signatureType = "INV_INS_FRAGMT";
+				} else {
+				    $break_pont_chr2 = '('.$coord_end_chr2_cluster1.','.$coord_start_chr2_cluster2.')';
+				    $signatureType = "INV_TRANSLOC";
+				}	
+			    } else {
+				#should not occur
+				print STDERR "\nError in orderFiltering\n\n";				    
+			    }
+			}
+			
+			elsif ($generalStrand1[$index1[0]] eq $rightLetterOk && $generalStrand1[$index2[0]] eq $leftLetterOk) { #(F,R)
+			    $break_pont_chr1 = '('.max(($coord_end_chr1_cluster1-$maxFragmentLength),1).','.$coord_start_chr1_cluster1.')';	
+			    $break_pont_chr1 .= ',('.$coord_end_chr1_cluster2.','.min(($coord_start_chr1_cluster2+$maxFragmentLength),$maxCoord1).')';				
+			    if ($generalStrand2[$index1[0]] eq $rightLetterOk && $generalStrand2[$index2[0]] eq $leftLetterOk) { 
+				if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
+				    $break_pont_chr2 = '('.$coord_end_chr2_cluster2.','.$coord_start_chr2_cluster1.')';
+				    $signatureType = "INV_INS_FRAGMT";
+				} else {
+				    $break_pont_chr2 = '('.max(($coord_end_chr2_cluster1-$maxFragmentLength),1).','.$coord_start_chr2_cluster1.')';	
+				    $break_pont_chr2 .= ',('.$coord_end_chr2_cluster2.','.min(($coord_start_chr2_cluster2+$maxFragmentLength),$maxCoord2).')';
+				    $signatureType = "INV_COAMPLICON";
+				}		    
+				
+			    } elsif ($generalStrand2[$index1[0]] eq $leftLetterOk && $generalStrand2[$index2[0]] eq $rightLetterOk) {
+				if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
+				    $break_pont_chr2 = '('.max(($coord_end_chr2_cluster2-$maxFragmentLength),1).','.$coord_start_chr2_cluster2.')';	
+				    $break_pont_chr2 .= ',('.$coord_end_chr2_cluster1.','.min(($coord_start_chr2_cluster1+$maxFragmentLength),$maxCoord2).')';
+				    $signatureType = "COAMPLICON";
+				} else {
+				    $break_pont_chr2 = '('.$coord_end_chr2_cluster1.','.$coord_start_chr2_cluster2.')';
+				    $signatureType = "INS_FRAGMT";
+				}	
+			    } else {
+				#should not occur
+				$signatureType = "UNDEFINED";
+			    }
+			}			    
+			else { # (F,F) or (R,R)  something strange. We will discard the smallest cluster
+			    $ifBalanced = 'UNBAL';				
+			    if (scalar keys %{$secondTable} > scalar keys %{$table}) {
+				($table,$secondTable)=($secondTable,$table);		#this means that one needs to exchange cluster1 with cluster2
+				
+				$coord_start_chr1_cluster1 = $coord_start_chr1_cluster2;
+				$coord_end_chr1_cluster1 = $coord_end_chr1_cluster2;   
+				$coord_start_chr2_cluster1 = $coord_start_chr2_cluster2;
+				$coord_end_chr2_cluster1 = $coord_end_chr2_cluster2;
+				$clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
+				$clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';			    
+			    }
+			    $reads_left = scalar keys %{$table};
+			}
+			if ($ifBalanced eq 'BAL') {
+			    $ifRenv = $signatureType;
+			}	
+		    }
+		}
+	    }   
+	}	
+	if ($ifBalanced ne 'BAL') {
+	    #define possible break point
+	    $coord_start_chr1 = $coord_start_chr1_cluster1;
+	    $coord_end_chr1 = $coord_end_chr1_cluster1;
+	    $coord_start_chr2 = $coord_start_chr2_cluster1;
+	    $coord_end_chr2 = $coord_end_chr2_cluster1;
+	    
+	    my $region_length_chr1 = $coord_end_chr1-$coord_start_chr1;
+	    my $region_length_chr2 = $coord_end_chr2-$coord_start_chr2;
+
+	    my $leftLetterOk = substr($mate_sense, 0, 1);  #R
+	    my $rightLetterOk = substr($mate_sense, 1, 1); #F
+	    
+	    my @index = keys %{$table};			    
+	    unless ($diff_sense_ends) {
+		my $firstEndOrder1 = $ends_order1[$index[0]];
+		my $firstEndOrder2 = $ends_order2[$index[0]];
+	        $break_pont_chr1 = (($strand1[$index[0]] eq 'R' && $firstEndOrder1 == 2) || ($strand1[$index[0]] eq 'F' && $firstEndOrder1 == 1))?'('.$coord_end_chr1.','.min(($coord_start_chr1+$maxFragmentLength),$maxCoord1).')':'('.max(($coord_end_chr1-$maxFragmentLength),1).','.$coord_start_chr1.')';
+	        $break_pont_chr2 = (($strand2[$index[0]] eq 'R' && $firstEndOrder2 == 2) || ($strand2[$index[0]] eq 'F' && $firstEndOrder2 == 1))?'('.$coord_end_chr2.','.min(($coord_start_chr2+$maxFragmentLength),$maxCoord2).')':'('.max(($coord_end_chr2-$maxFragmentLength),1).','.$coord_start_chr2.')';	
+	    } else {
+	        $break_pont_chr1 = ($strand1[$index[0]] eq $leftLetterOk )?'('.$coord_end_chr1.','.min(($coord_start_chr1+$maxFragmentLength),$maxCoord1).')':'('.max(($coord_end_chr1-$maxFragmentLength),1).','.$coord_start_chr1.')';
+	        $break_pont_chr2 = ($strand2[$index[0]] eq $leftLetterOk )?'('.$coord_end_chr2.','.min(($coord_start_chr2+$maxFragmentLength),$maxCoord2).')':'('.max(($coord_end_chr2-$maxFragmentLength),1).','.$coord_start_chr2.')';	
+	    }
+	    
+	    if ($chr1 ne $chr2){
+		$ifRenv="INV_TRANSLOC" if($ifRenv eq "REVERSE_SENSE");
+		$ifRenv="TRANSLOC" if($ifRenv eq "NORMAL_SENSE");
+	    }  
+	}
+	
+	if (($ifBalanced eq 'BAL')&&( (scalar keys %{$table}) + (scalar keys %{$secondTable}) < $nb_pairs_threshold)) {
+	    next; 								#discard the link
+	}	
+ 	if (($ifBalanced eq 'UNBAL')&&(scalar keys %{$table} < $nb_pairs_threshold)) {
+	    next; 								#discard the link
+	}
+	my $ratioTxt = "$reads_left/".(scalar @pairs);
+	my ($n1,$nTot) = split ("/",$strand_ratio_filtering);
+	my $ratioReal = $reads_left/$nTot;    
+	
+	if ($coord_start_chr1<=0) {
+	    $coord_start_chr1=1;
+	}
+	if ($coord_start_chr2<=0) {
+	    $coord_start_chr2=1;
+	}
+	#create output	
+	my @link=($chr->{$chr1}->{name}, $coord_start_chr1 , $coord_end_chr1,	#all information output
+		  $chr->{$chr2}->{name}, $coord_start_chr2 , $coord_end_chr2,
+		  $reads_left,
+		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@pairs),
+		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@strand1),
+		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@strand2),
+		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@ends_order1),
+		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@ends_order2),
+		  &redraw(2,$table,$secondTable,\%badInFRSense,$ifBalanced,\@order1),
+		  &redraw(2,$table,$secondTable,\%badInFRSense,$ifBalanced,\@order2),
+		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@positions1),
+		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@positions2),
+		  $ifRenv,
+		  $strand_ratio_filtering,
+		  $ifBalanced, $ratioTxt, $break_pont_chr1, $break_pont_chr2,
+		  $ratioReal, $nTot);
+	
+	$nb_links++;
+	print $fh2 join("\t",@link)."\n";
+	
+	if($record>=$warn){
+	    print LOG "-- $fchr : $warn links analysed - $nb_links links kept\n";
+	    $warn+=10000;
+	}
+	
+    }
+    $fh->close;
+    $fh2->close;
+    
+    print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#gets information about ends positions given start, direction and order
+sub getEnds {
+    my ($ends,$starts,$strand,$end_order,$tag_length) = @_;
+    for my $i (0..scalar(@{$starts})-1) {
+	$ends->[$i] = getEnd($starts->[$i],$strand->[$i],$end_order->[$i],$tag_length);
+    }    
+}
+sub getEnd {
+    my ($start,$strand, $end_order,$tag_length) = @_;       
+    return ($strand eq 'F')? $start+$tag_length->{$end_order}-1:$start-$tag_length->{$end_order}+1;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#gets starts and ends Coords when start=leftmost given positions, directions and orders
+sub getCoordswithLeftMost {
+    
+    my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_;
+
+    for my $i (0..scalar(@{$positions})-1) {
+
+	if($strand->[$i] eq 'F'){
+	    $starts->[$i]=$positions->[$i];
+	    $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1;
+	}else{
+	    $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1;
+	    $ends->[$i]=$positions->[$i];
+	}
+    }    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub addInsertionInfo { 								#add field with INS,DEL,NA and distance between clusters and performs filtering 
+    
+    my ($chr,$chrID,$nb_pairs_threshold,$order_filtering,$indel_sigma_threshold,$dup_sigma_threshold,$singleton_sigma_threshold,$mu,$sigma,$mate_sense,$tag_length,$links_file,$flinks_file)=@_;
+
+    my @sfile=split(/\./,$links_file);
+    my $fchr=$sfile[$#sfile-2];
+    
+
+    my $diff_sense_ends=(($mate_sense eq "FR") || ($mate_sense eq "RF"))? 1:0; 
+    
+    my $record=0;
+    my $nb_links=0;
+    my $warn=10000;
+    
+    print LOG "\# Filtering out normal pairs using insert size...\n";
+    print LOG "-- mu length=$mu, sigma length=$sigma, indel sigma threshold=$indel_sigma_threshold, dup sigma threshold=$dup_sigma_threshold\n";
+    print LOG "-- using ".($mu-$indel_sigma_threshold*$sigma)."-".
+		      ($mu+$indel_sigma_threshold*$sigma)." as normal range of insert size for indels\n";
+    print LOG "-- using ".($mu-$dup_sigma_threshold*$sigma)."-".
+		      ($mu+$dup_sigma_threshold*$sigma)." as normal range of insert size for duplications\n";
+    print LOG "-- using ".($mu-$singleton_sigma_threshold*$sigma)." as the upper limit of insert size for singletons\n" if($mate_sense eq "RF");
+
+    my $fh = new FileHandle;
+    my $fh2 = new FileHandle;
+    
+    $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
+    $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
+    
+    while(<$fh>){
+	
+	$record++; 
+	my @t = split;
+	my ($chr1,$chr2,$mates_list)=@t[0,3,7];
+	
+	if($chrID->{$chr1} ne $chrID->{$chr2}) {				#if inter-chromosomal link here (because sv_type=all), 
+	    $nb_links++;
+	    
+	    $t[16]="INV_TRANSLOC" if($t[16] eq "REVERSE_SENSE");
+	    $t[16]="TRANSLOC" if($t[16] eq "NORMAL_SENSE");
+	    
+	    $t[16].= "\t";
+	    $t[19].= "\t";
+	    
+	    print $fh2 join("\t",@t)."\n";
+	    
+	    if($record>=$warn){
+		print LOG "-- $fchr : $warn links processed - $nb_links links kept\n";
+		$warn+=10000;
+	    }								
+	    next;								
+	}									
+	
+	my $ifRenv = $t[16];
+	my $ifBalanced = "UNBAL";
+	$ifBalanced = $t[18] if ($order_filtering);
+		
+	my $numberOfPairs = $t[6];
+	my @positions1 = deleteBadOrderSensePairs(split (/,/,$t[14]));
+	my @positions2 = deleteBadOrderSensePairs(split (/,/,$t[15]));
+	
+	if ($ifBalanced eq "BAL") {
+	    
+	    if ($ifRenv eq "INV_TRANSLOC") {
+		$ifRenv = "INV_FRAGMT"; #for intrachromosomal inverted translocation is the same as inverted fragment
+	    }
+	    if ($ifRenv eq "NORMAL_SENSE") {
+		$ifRenv = "TRANSLOC"; 
+	    }
+	    if ($ifRenv eq "REVERSE_SENSE") {
+		$ifRenv = "INV_FRAGMT"; #for intrachromosomal inverted translocation is the same as inverted fragment
+	    }
+	    $t[19].= "\t";
+	    
+	    my $meanDistance = 0;
+	    
+	    for my $i (0..$numberOfPairs-1) {
+		$meanDistance += $positions2[$i]-$positions1[$i];
+	    }
+	    $meanDistance /= $numberOfPairs;      
+	    
+	    $t[16] = $ifRenv."\t".$meanDistance;
+	    #dont touch the annotation. It should be already OK.
+	    
+	} else {
+	    #only for unbalanced
+	    
+	    my $ifoverlap=overlap($t[1],$t[2],$t[4],$t[5]);
+	    
+	    my $ends_sense_class = (deleteBadOrderSensePairs(split (/,/,$t[8])))[0].
+				   (deleteBadOrderSensePairs(split (/,/,$t[9])))[0];
+	    my $ends_order_class = (deleteBadOrderSensePairs(split (/,/,$t[10])))[0].
+				   (deleteBadOrderSensePairs(split (/,/,$t[11])))[0];
+	    
+	    my $indel_type = $ifRenv;
+	    
+	    my $meanDistance = "N/A";
+	    
+	    ($meanDistance, $indel_type) = checkIndel ($numberOfPairs,		#identify insertion type for rearrangments without inversion, calculates distance between cluster
+						       \@positions1,		#assign N/A to $indel_type if unknown
+						       \@positions2,
+						       $ifRenv,
+						       $ifoverlap,
+						       $indel_sigma_threshold,
+						       $dup_sigma_threshold,
+						       $singleton_sigma_threshold,
+						       $mu,
+						       $sigma,
+						       $ifBalanced,
+						       $ends_sense_class,
+						       $ends_order_class,
+						       $mate_sense,
+						       $diff_sense_ends,
+						       );
+	    
+	    #filtering of pairs with distance inconsistant with the SV
+	    if ($ifRenv ne "REVERSE_SENSE") {
+		my $maxCoord1 =$chr->{$chrID->{$chr1}}->{length};	
+		my $maxCoord2 =$chr->{$chrID->{$chr2}}->{length};
+		$meanDistance = recalc_t_usingInsertSizeInfo(\@t,$mu,$sigma,$meanDistance,$tag_length,$diff_sense_ends,$mate_sense,
+							     $maxCoord1,$maxCoord2,$ends_sense_class,$ends_order_class,$nb_pairs_threshold,$order_filtering);
+		next if ($t[6] < $nb_pairs_threshold);
+	    }else{
+		 $t[19].= "\t";
+	    }
+	    $t[16] = $indel_type."\t".$meanDistance;
+	}
+	
+	$nb_links++;
+		
+	print $fh2 join("\t",@t)."\n";
+	if($record>=$warn){
+	     print LOG "-- $fchr : $warn links processed - $nb_links links kept\n";
+	    $warn+=10000;
+	}
+    }
+    $fh->close;
+    $fh2->close; 
+    
+    print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub checkIndel  {
+    
+    my ($numberOfPairs,	$positions1, $positions2, $ifRenv, $ifoverlap, $indel_sigma_threshold, $dup_sigma_threshold, $singleton_sigma_threshold,
+	$mu, $sigma, $ifBalanced,$ends_sense_class,$ends_order_class,$mate_sense,$diff_sense_ends) = @_;
+    
+    my $meanDistance = 0;
+    
+    for my $i (0..$numberOfPairs-1) {
+	$meanDistance += $positions2->[$i]-$positions1->[$i];
+    }
+    $meanDistance /= $numberOfPairs;
+    
+    return ($meanDistance,"INV_DUPLI") if (($ifRenv eq "REVERSE_SENSE") && ($meanDistance<$mu+$dup_sigma_threshold*$sigma) );
+    
+    return ($meanDistance,"INVERSION") if ($ifRenv eq "REVERSE_SENSE");
+    
+    if($diff_sense_ends){
+	return ($meanDistance, "LARGE_DUPLI") if ($ends_sense_class ne $mate_sense) && ($meanDistance>$mu+$dup_sigma_threshold*$sigma) ;
+	return ($meanDistance, "SINGLETON") if (($meanDistance<$mu-$singleton_sigma_threshold*$sigma) && $mate_sense eq "RF" && ($ends_sense_class eq inverseSense($mate_sense)));
+    }else{
+	return ($meanDistance, "LARGE_DUPLI") if (($ends_sense_class eq $mate_sense) && ($ends_order_class eq "12") || ($ends_sense_class eq inverseSense($mate_sense)) && ($ends_order_class eq "21")) &&
+	($meanDistance>$mu+$dup_sigma_threshold*$sigma) ; 
+    }
+ 
+    return ($meanDistance, "SMALL_DUPLI") if (($meanDistance<$mu-$dup_sigma_threshold*$sigma) && $ifoverlap);
+    
+    return ($meanDistance, "DUPLICATION") if ($diff_sense_ends && ($ends_sense_class ne $mate_sense) && ($meanDistance<$mu-$dup_sigma_threshold*$sigma) ) ;
+    
+    return ($meanDistance, "INSERTION") if ($meanDistance<$mu -$indel_sigma_threshold*$sigma);
+    return ($meanDistance, "DELETION") if ($meanDistance>$mu+$indel_sigma_threshold*$sigma);
+
+    return ($meanDistance, "UNDEFINED");
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#sub reacalulate @t so that get rid of unconsistent pairs (unconsistent insert size )
+sub recalc_t_usingInsertSizeInfo {
+    my($t,$mu,$sigma,$meanDistance,$tag_length,$diff_sense_ends,$mate_sense,$maxCoord1,$maxCoord2,$ends_sense_class,$ends_order_class,$nb_pairs_threshold,$order_filtering) = @_;
+ 	    
+    my @badPairs;
+	    
+    my @positions1 = getAllEntries($t->[14]);
+    my @positions2 = getAllEntries($t->[15]);	    
+	    
+    if ($meanDistance < $mu) {
+	for my $i (0..scalar(@positions1)-1) {
+	    if (substr($positions2[$i],-1,1) ne '$' && substr($positions2[$i],-1,1) ne '*' && $positions2[$i]-$positions1[$i]>=$mu) {
+	    	push(@badPairs,$i);		    
+	    }
+    	}
+    } else {
+	for my $i (0..scalar(@positions1)-1) {
+	    if (substr($positions2[$i],-1,1) ne '$' && substr($positions2[$i],-1,1) ne '*' && $positions2[$i]-$positions1[$i]<=$mu) {
+		push(@badPairs,$i);
+	    }
+	}	    
+    }
+
+    if (scalar (@badPairs)>0) {
+	#print join("\t",@badPairs).": ".join("\t",@t)."\n";	        
+	#remove these inconsistant links
+	$t->[6] -= scalar(@badPairs); #numberOfPairs
+	return if ($t->[6] < $nb_pairs_threshold); 	
+
+	$t->[7] = mark_values(\@badPairs, $t->[7]);
+	$t->[8] = mark_values(\@badPairs, $t->[8]);
+	$t->[9] = mark_values(\@badPairs, $t->[9]);
+	$t->[10] = mark_values(\@badPairs, $t->[10]);
+	$t->[11] = mark_values(\@badPairs, $t->[11]);
+	
+	$t->[12] = mark_indexes(\@badPairs, $t->[12]);
+	$t->[13] = mark_indexes(\@badPairs, $t->[13]);	    
+		    
+	$t->[14] = mark_values(\@badPairs, $t->[14]);
+	$t->[15] = mark_values(\@badPairs, $t->[15]);
+	$t->[19] = recalculate_ratio($t->[6],$t->[19]) if ($order_filtering); #add the second ratio
+	$t->[17] = recalculate_ratio($t->[6],$t->[17]) unless ($order_filtering);
+	($t->[1],$t->[2]) = recalculate_boundaries($t->[14],$t->[8],$t->[10],$tag_length);
+	($t->[4],$t->[5]) = recalculate_boundaries($t->[15],$t->[9],$t->[11],$tag_length);	 
+		    
+	#recalc breakpoints:
+	my $quant001 = 3.090232;
+	my $maxFragmentLength = &floor($quant001 * $sigma + $mu);
+	$t->[20] = recalc_breakpoints($mate_sense,$maxCoord1,$t->[14],substr($ends_sense_class,0,1),substr($ends_order_class,0,1),$t->[1],$t->[2],$maxFragmentLength,$diff_sense_ends ) if ($order_filtering);
+	$t->[21] = recalc_breakpoints($mate_sense,$maxCoord2,$t->[15],substr($ends_sense_class,1,1),substr($ends_order_class,1,1),$t->[4],$t->[5],$maxFragmentLength,$diff_sense_ends ) if ($order_filtering);
+	#recalc total ratio
+	$t->[22] = $t->[6] / $t->[23] if ($order_filtering);
+	$t->[18] = $t->[6] / $t->[19] unless ($order_filtering);
+	
+    	@positions1 = deleteBadOrderSensePairs(split (/,/,$t->[14]));
+    	@positions2 = deleteBadOrderSensePairs(split (/,/,$t->[15]));		
+	
+	$meanDistance = 0;
+	
+	for my $i (0..scalar(@positions1)-1) {
+	    $meanDistance += $positions2[$i]-$positions1[$i];
+	}
+	$meanDistance /= scalar(@positions1);
+ 	
+    } else {
+	$t->[17] = recalculate_ratio((split(/\//,$t->[17]))[0],$t->[17]) unless ($order_filtering);
+	$t->[19] = recalculate_ratio((split(/\//,$t->[19]))[0],$t->[19]) if ($order_filtering);
+    
+    } #nothing has been filtered
+    return $meanDistance;
+}
+
+sub recalculate_ratio {
+    my ($left, $ratio) = @_;
+    my @elements = split (/\//,$ratio);
+    $elements[1]= $elements[0];
+    $elements[0]=$left;
+    return $ratio."\t".join("/",@elements);    
+}
+
+sub recalc_breakpoints {
+    my ($mate_sense,$maxCoord,$startString,$strand,$firstEndOrder,$coord_start_chr,$coord_end_chr,$maxFragmentLength,$diff_sense_ends ) = @_;
+    my $break_pont_chr;
+    
+    my $leftLetterOk = substr($mate_sense, 0, 1);  #R
+    my $rightLetterOk = substr($mate_sense, 1, 1); #F	    
+
+	
+    my @positions = deleteBadOrderSensePairs(split (/,/,$startString));
+
+    unless ($diff_sense_ends) {
+	$break_pont_chr = (($strand eq 'R' && $firstEndOrder == 2) || ($strand eq 'F' && $firstEndOrder == 1))?'('.$coord_end_chr.','.min(($coord_start_chr+$maxFragmentLength),$maxCoord).')':'('.max(($coord_end_chr-$maxFragmentLength),1).','.$coord_start_chr.')';
+    } else {
+	$break_pont_chr = ($strand eq $leftLetterOk)?'('.$coord_end_chr.','.min(($coord_start_chr+$maxFragmentLength),$maxCoord).')':'('.max(($coord_end_chr-$maxFragmentLength),1).','.$coord_start_chr.')';
+    }    
+    return $break_pont_chr;
+}
+sub recalculate_boundaries {
+    my ($startString,$senseString,$endsOrderString,$tag_length) = @_;
+    my @positions = deleteBadOrderSensePairs(split (/,/,$startString));
+    my @strands = deleteBadOrderSensePairs(split (/,/,$senseString));
+    my @ends_orders = deleteBadOrderSensePairs(split (/,/,$endsOrderString));
+    my @ends; getEnds(\@ends,\@positions,\@strands,\@ends_orders,$tag_length);   
+    my $coord_start_cluster = min(min(@positions),min(@ends));
+    my $coord_end_cluster = max(max(@positions),max(@ends));
+    return ($coord_start_cluster,$coord_end_cluster);
+}
+
+sub remove_indexes {
+    my ($bads, $string) = @_;
+    my @elements = deleteBadOrderSensePairs(split (/,/,$string));
+    for my $i (reverse sort %{$bads}) {
+	delete $elements[$i];
+    }
+    return "(".join(",",@elements).")";
+}
+##add @ to to elements 
+sub mark_values {
+    my ($bads, $string) = @_;
+    my @elements = getAllEntries($string);
+    for my $i (@{$bads}) {
+	$elements[$i] .= "@";
+    }    
+    return "(".join(",",@elements).")";
+}
+##add @ to to indexes
+sub mark_indexes {
+    my ($bads, $string) = @_;
+    my @elements = getAllEntries($string);
+    for my $i ((0..scalar(@elements)-1)) {
+	for my $j (@{$bads}) {	    
+	    $elements[$i] .= "@" if ($elements[$i] eq ($j+1));
+	}	   
+    }
+
+    return "(".join(",",@elements).")";    
+}
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub redraw {
+    
+    my ($type,$table,$secondTable,$badInFRSense,$ifBalanced,$arr) = @_;	
+
+    my $out;
+    my @first_arr;
+    if ($ifBalanced eq 'BAL') {
+	my @second_arr;
+	my $lastPushed = 1;
+	if ($type == 1) {
+	    for my $i (0 .. scalar(@{$arr})-1) {
+		if (exists ($table->{$i})) {
+	    	    push(@first_arr,$arr->[$i]);
+		    $lastPushed = 1;
+		}elsif (exists ($secondTable->{$i})) {
+		    push(@second_arr,$arr->[$i]);
+		    $lastPushed = 2;
+		} elsif ($lastPushed == 1) {
+		    if (exists ($badInFRSense->{$i})) {
+		        push(@first_arr,$arr->[$i]."\$");
+		    }else {
+		        push(@first_arr,$arr->[$i]."*");
+		    }
+		} elsif ($lastPushed == 2) {
+		    if (exists ($badInFRSense->{$i})) {
+		        push(@second_arr,$arr->[$i]."\$");
+		    }else {
+			push(@second_arr,$arr->[$i]."*");
+		    }
+		} else {print "Error!";exit;}
+	    }
+	} else {
+	     for my $i (@{$arr}) {
+		if (exists ($table->{$i-1})) {
+	    	    push(@first_arr,$i);
+		    $lastPushed = 1;
+		}elsif (exists ($secondTable->{$i-1})) {
+		    push(@second_arr,$i);
+		    $lastPushed = 2;
+		} elsif ($lastPushed == 1) {
+		    if (exists ($badInFRSense->{$i-1})) {
+		        push(@first_arr,$i."\$");
+		    }else {
+			push(@first_arr,$i."*");
+		    }
+		} elsif ($lastPushed == 2) {
+		    if (exists ($badInFRSense->{$i-1})) {
+		        push(@second_arr,$i."\$");
+		    }else {
+			push(@second_arr,$i."*");
+		    }
+		} else {print "Error!";exit;}
+	    }
+	}		
+	$out = '('.join(",",@first_arr).'),('.join(",",@second_arr).')';
+    }
+    else {
+	if ($type == 1) {
+	    for my $i (0 .. scalar(@{$arr})-1) { 
+		if (exists ($table->{$i})) {
+	    	    push(@first_arr,$arr->[$i]);
+		} else {
+		    if (exists ($badInFRSense->{$i})) {
+			push(@first_arr,$arr->[$i]."\$");
+		    }else {
+		        push(@first_arr,$arr->[$i]."*");
+		    }
+		}
+	    }
+	} else {
+	    for my $i (@{$arr}) {
+		if (exists ($table->{$i-1})) {
+	    	    push(@first_arr,$i);
+		} else {
+		    if (exists ($badInFRSense->{$i-1})) {
+			push(@first_arr,$i."\$");
+		    }else {
+			push(@first_arr,$i."*");
+		    }
+		}
+	    }	    
+	}	
+	$out = '('.join(",",@first_arr).')';
+    }    
+    return $out;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub check {
+    
+    my $table = $_[0];
+    my $bad = 'OK';
+    my $max = 0;  
+    for my $i (sort {$a<=>$b} keys %{$table}) { 				
+	unless ($table->{$i}->{nonAdeq} == 0) {
+	    if ($max<$table->{$i}->{nonAdeq}) {
+		$max=$table->{$i}->{nonAdeq};
+		$bad = $i;
+	    }
+	}	    
+    } 
+    return $bad;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub reversed {
+    
+    my ($i,$j,$ifRenv,$positions) = @_;
+    if (($ifRenv eq 'REVERSE_SENSE' && $positions->[$i]<$positions->[$j]) || ($ifRenv ne 'REVERSE_SENSE' && $positions->[$i]>$positions->[$j])){
+	return 1;
+    }
+    return 0;    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub remove {
+    
+    my ($bad,$table) = @_;
+    for my $i (sort {$a<=>$b} keys %{$table}) {
+	if ($bad == $i) {
+	    delete($table->{$i});;
+	} else {
+	    if (exists($table->{$i}->{$bad})) {
+		delete($table->{$i}->{$bad});
+		$table->{$i}->{nonAdeq}--;
+	    }
+	}
+    }
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub findBadInRFSenseSOLiDSolexa { #choose maximum: FFFFs or RRRRs
+    
+    my ($strand,$ends_order,$mate_sense,@keysLeft) = @_;
+    
+    my $leftLetterOk = substr($mate_sense, 0, 1);  #R
+    my $rightLetterOk = substr($mate_sense, 1, 1); #F
+    
+    my (@standardArray);
+    if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
+	$leftLetterOk = 'R';
+	$rightLetterOk = 'F';
+	@standardArray = translateSolidToRF($strand,$ends_order);
+    } else {
+	@standardArray = @{$strand};	
+    }
+   
+    my $ifR = 0;
+    my @Rs;
+    
+    for my $i (@keysLeft) {	
+	if ($standardArray[$i] eq $leftLetterOk) {
+	    $ifR++;
+	    push(@Rs,$i);
+	}
+    }
+ 
+
+    my $ifF = 0;
+    my @Fs;
+    
+    for my $i (@keysLeft) {	
+	if ($standardArray[$i] eq $rightLetterOk) {
+	    $ifF++;
+	    push(@Fs,$i);
+	}
+    }
+    
+    if($ifR>=$ifF) {
+	return @Fs;
+    }
+    return @Rs;     
+}
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub findBadInFRSenseSOLiDSolexa { #should work both for SOLiD and Solexa 
+    
+    my ($strand1,$strand2,$ends_order1,$ends_order2,$order1,$order2) = ($_[0],$_[1],$_[2],$_[3],$_[4],$_[5]);
+    my $mate_sense =  $_[6];
+      
+    my $leftLetterOk = substr($mate_sense, 0, 1);  #R
+    my $rightLetterOk = substr($mate_sense, 1, 1); #F
+    
+    my (@standardArray1,@standardArray2);
+   
+    if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
+	$leftLetterOk = 'R';
+	$rightLetterOk = 'F';
+	@standardArray1 = translateSolidToRF($strand1,$ends_order1);
+	my @arr = getOrderedStrands($strand2,$order2); 
+	my @ends2 = getOrderedStrands($ends_order2,$order2); 
+	@standardArray2 = translateSolidToRF(\@arr,\@ends2);  
+
+    } else {
+	@standardArray1 = @{$strand1};
+	@standardArray2 = getOrderedStrands($strand2,$order2);
+    }
+   
+    #we will try 4 possibilities, 2 for each end of the link: RFRR-FFF->RFFFF , RFRR-FFF->RRRFFF
+   
+    #for the first end:   
+   
+    my @array = @standardArray1; 
+    my %badInFRSense1;
+    for my $i (1..scalar (@array)-1){ # FRFRFFFF -> FFFFFF and RRFRFRFFFF -> RRFFFFFF
+	if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
+	    $badInFRSense1{$i}=1;
+	    $array[$i] = $rightLetterOk;
+	}
+    }    
+    my $numberRRRFFF_or_FFF_1 = scalar(@array)-scalar(keys %badInFRSense1);
+    @array = @standardArray1;    
+    my %badInFRSense0;
+    for my $i (reverse(1..scalar (@array)-1)){ # FRFRFFFFRR -> FFFFFFRR
+	if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
+	    $badInFRSense0{$i-1}=1;
+	    $array[$i-1] = $leftLetterOk;
+
+	}
+    }
+    my $numberRRF1 = scalar(@array)-scalar(keys %badInFRSense0);
+    
+    #for the second end:   
+    @array = @standardArray2;
+ 
+    my %badInFRSense3;
+    for my $i (1..scalar(@array)-1){
+	if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
+	    $badInFRSense3{$order2->[$i]}=1;
+	    $array[$i] = $rightLetterOk;
+	}
+    }
+    my $numberRRRFFF_or_FFF_2 = scalar(@array)-scalar(keys %badInFRSense3);
+
+    @array = @standardArray2;
+    my %badInFRSense5;
+    for my $i (reverse(1..scalar (@array)-1)){ # FRFRFFFF -> FFFFFF
+	if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
+	    $badInFRSense5{$i-1}=1;
+	    $array[$i-1] = $leftLetterOk;
+	}
+    }
+    my $numberRRF2 = scalar(@array)-scalar(keys %badInFRSense5);  
+    
+    if ($numberRRF1>=$numberRRRFFF_or_FFF_1 && $numberRRF1 >= $numberRRRFFF_or_FFF_2 && $numberRRF1 >=$numberRRF2) {
+	return (1,%badInFRSense0);
+    }   
+    
+    if ($numberRRRFFF_or_FFF_1 >=$numberRRF1 && $numberRRRFFF_or_FFF_1 >= $numberRRRFFF_or_FFF_2 && $numberRRRFFF_or_FFF_1 >= $numberRRF2) {
+	return (1,%badInFRSense1);
+    }
+    
+    if ($numberRRRFFF_or_FFF_2 >= $numberRRF1 && $numberRRRFFF_or_FFF_2 >= $numberRRRFFF_or_FFF_1 && $numberRRRFFF_or_FFF_2 >=$numberRRF2) {
+	return (2,%badInFRSense3);
+    }
+    
+    if ($numberRRF2 >=  $numberRRF1 && $numberRRF2 >= $numberRRRFFF_or_FFF_1 && $numberRRF2 >= $numberRRRFFF_or_FFF_2 ) {
+	return (2,%badInFRSense5);
+    }  
+    
+    #should not get here:
+    print STDERR "Error in findBadInFRSenseSOLiDSolexa()!\n";
+    return (1,%badInFRSense1); 
+}
+
+sub getOrderedStrands {
+    my ($strand,$order) = ($_[0],$_[1]);
+    my @arr;
+    for my $i (0..scalar(@{$strand})-1) {
+	push(@arr,$strand->[$order->[$i]-1]); 
+    }    
+    return @arr; 
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub checkClusters {
+    
+    my ($ifRenv,$coord_start_chr1_cluster1,$coord_start_chr1_cluster2,$coord_start_chr2_cluster1,$coord_start_chr2_cluster2) = @_;
+    if ($ifRenv eq 'REVERSE_SENSE') {
+	if ($coord_start_chr1_cluster1 <= $coord_start_chr1_cluster2) {
+	    return ($coord_start_chr2_cluster1 <= $coord_start_chr2_cluster2)?1:0; 
+	}
+	return ($coord_start_chr2_cluster1 >= $coord_start_chr2_cluster2)?1:0; 
+    }
+    #if NORM
+    if ($coord_start_chr1_cluster1 <= $coord_start_chr1_cluster2) {
+        return ($coord_start_chr2_cluster1 >= $coord_start_chr2_cluster2)?1:0; 
+    }
+    return ($coord_start_chr2_cluster1 <= $coord_start_chr2_cluster2)?1:0; 
+}
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub translateSolidToRF {
+    my ($strandArr,$ends_orderArr)=@_;
+    my @array;
+    for my $i (0..scalar(@{$strandArr})-1) {					
+	if ($ends_orderArr->[$i]==1 && $strandArr->[$i] eq 'F') {
+	    push(@array,'F');
+	}
+	if ($ends_orderArr->[$i]==2 && $strandArr->[$i] eq 'F') {
+	    push(@array,'R');
+	}
+	if ($ends_orderArr->[$i]==1 && $strandArr->[$i] eq 'R') {
+	    push(@array,'R');   
+	}
+	if ($ends_orderArr->[$i]==2 && $strandArr->[$i] eq 'R') {
+	    push(@array,'F');    
+	}
+    }
+    return @array;
+}
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#convert the links file to the circos format
+sub links2segdup{
+    
+    my($id,$color_code,$links_file,$segdup_file)=@_;
+    
+    print LOG "# Converting to the circos format...\n";
+    
+    tie (my %hcolor,'Tie::IxHash');						#color-code hash table
+    foreach my $col (keys %{$color_code}){
+	my ($min_links,$max_links)=split(",",$color_code->{$col});
+	$hcolor{$col}=[$min_links,$max_links];
+    }
+    
+    open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
+    open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
+    
+    my $index=1;
+    while(<LINKS>){
+	
+	my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6];
+	
+	my $color=getColor($count,\%hcolor,"circos");				#get the color-code according the number of links
+	
+	print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n".	#circos output
+		     "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n";
+	$index++;
+    }
+    
+    close LINKS;
+    close SEGDUP;
+    print LOG "-- output created: $segdup_file\n";
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#convert the links file to the bedPE format for BEDTools usage
+sub links2bedPElinksfile{
+
+    my ($sample,$links_file,$bedpe_file)=@_;
+    
+    open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
+    open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n";
+    
+    my $nb_links=1;
+    
+    while(<LINKS>){
+	
+	chomp;
+	my @t=split("\t",$_);
+	my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6);
+	my $type=($chr1 eq $chr2)? "INTRA":"INTER";
+	$type.="_".$t[10];
+	
+	$start1--; $start2--;
+	
+	print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2".
+	"\t$sample"."_link$nb_links\t$type\t.\t.".
+	"\t".join("|",@t)."\n";
+	
+	$nb_links++;
+    }
+    
+    close LINKS;
+    close BEDPE;
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub bedPElinks2linksfile{
+
+    my ($bedpe_file,$links_file)=@_;
+ 
+    open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n";
+    open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n";
+    
+    while(<BEDPE>){
+	
+	chomp;
+	my $sample=(split("_",(split("\t",$_))[6]))[0];
+	my @t1=(split("\t",$_))[0,1,2,3,4,5];
+	my @t2=split(/\|/,(split("\t",$_))[10]);
+	push(@t2,$sample);
+	
+	print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n";
+	
+    }
+    close BEDPE;
+    close LINKS;
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#convert the links file to the bed format
+sub links2bedfile{
+    
+    my ($tag_length,$color_code,$links_file,$bed_file)=@_;
+    
+    print LOG "# Converting to the bed format...\n";
+    
+    my $compare=1;
+    if($links_file!~/compared$/){
+	$compare=0;
+	$tag_length->{none}->{1}=$tag_length->{1};
+	$tag_length->{none}->{2}=$tag_length->{2};
+    }
+    
+    #color-code hash table
+    tie (my %hcolor,'Tie::IxHash');
+    my %color_order;
+    my $n=1;
+    foreach my $col (keys %{$color_code}){
+	my ($min_links,$max_links)=split(",",$color_code->{$col});
+	$hcolor{$col}=[$min_links,$max_links];
+	$color_order{$col}=$n;
+	$n++;
+    }
+    
+    my %pair;
+    my %pt;
+    $n=1;
+    open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
+    
+    my %str=( "F"=>"+", "R"=>"-" );
+
+    while(<LINKS>){
+	
+	my @t=split;
+	my $sample=($compare)? pop(@t):"none";
+	
+	my $chr1=$t[0]; 
+	my $chr2=$t[3];
+	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
+	$chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
+	my $same_chr=($chr1 eq $chr2)? 1:0;
+	
+	my $count=$t[6];
+	my $color=getColor($count,\%hcolor,"bed");
+	
+	my @pairs=deleteBadOrderSensePairs(split(",",$t[7]));
+	my @strand1=deleteBadOrderSensePairs(split(",",$t[8]));
+	my @strand2=deleteBadOrderSensePairs(split(",",$t[9]));
+	my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10]));
+	my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11]));
+	my @position1=deleteBadOrderSensePairs(split(",",$t[14]));
+	my @position2=deleteBadOrderSensePairs(split(",",$t[15]));
+	my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample});
+	my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample});
+
+	
+	for my $p (0..$#pairs){						
+	    
+	    if (!exists $pair{$pairs[$p]}){
+		
+		if($same_chr){
+		    
+		    $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]},
+				    $start1[$p]-1, $end2[$p], $color,
+				    2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ];    
+		    $pt{$n}=$pair{$pairs[$p]}->{0};
+		    $n++;
+		    
+		}else{
+		    
+		    $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]},
+						$start1[$p]-1, $end1[$p], $color,
+						1, $tag_length->{$sample}->{$ends_order1[$p]}, 0];
+		    $pt{$n}=$pair{$pairs[$p]}->{1};
+		    $n++;
+		    
+		    
+		    $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]},
+						$start2[$p]-1, $end2[$p], $color,
+						1, $tag_length->{$sample}->{$ends_order2[$p]}, 0];
+		    $pt{$n}=$pair{$pairs[$p]}->{2};
+		    $n++;
+		}
+	    }else{
+		
+		if($same_chr){
+		    ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]});
+		}else{
+		    ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]});
+		    ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]});
+		}
+	    }
+	}
+    }
+    close LINKS;
+    
+    my $nb_pairs=$n-1;
+    
+    open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
+    print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ".
+	      "visibility=2 itemRgb=\"On\"\n";
+    
+    for my $i (1..$nb_pairs){
+	print BED join("\t",@{$pt{$i}})."\n";
+    }
+    
+    close BED;
+    
+    print LOG "-- output created: $bed_file\n";
+    
+    undef %pair;
+    undef %pt;
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub deleteBadOrderSensePairs{
+    
+    my (@tab)=@_;
+    my @tab2;
+
+    foreach my $v (@tab){
+	
+	$v=~s/[\(\)]//g;
+	push(@tab2,$v) if($v!~/[\$\*\@]$/);
+    }
+    return @tab2;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub getAllEntries{    
+    my (@tab)=split (/,/,$_[0]);
+    my @tab2;
+
+    foreach my $v (@tab){
+	
+	$v=~s/[\(\)]//g;
+	push(@tab2,$v);
+    }
+    return @tab2;
+}#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub getAllEntriesWOspecialChar{    
+    my (@tab)=split (/,/,$_[0]);
+    my @tab2;
+
+    foreach my $v (@tab){
+	
+	$v=~s/[\(\)\$\*\@]//g;
+	push(@tab2,$v);
+    }
+    return @tab2;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub links2SVfile{
+    
+    my($links_file,$sv_file)=@_;
+    
+    print LOG "# Converting to the sv output table...\n";
+    open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
+    open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
+    
+    my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist
+    chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering
+    final_score breakpoint1_start1-end1 breakpoint2_start2-end2);
+    
+    my $nb_links=0;
+    
+    while (<LINKS>){
+	
+	my @t=split;
+	my @sv=();
+	my $sv_type="-";
+	my $strand_ratio="-";
+	my $eq_ratio="-";
+	my $eq_type="-";
+	my $insert_ratio="-";
+	my $link="-";
+	my ($bk1, $bk2)=("-","-");
+	my $score="-";
+	
+	my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]); 
+	my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]);
+	my $nb_pairs=$t[6];
+	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
+	$chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
+	my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER";
+	
+	#if strand filtering
+	if (defined $t[16]){
+	    #if inter-chr link
+	    $sv_type=$t[16];
+	    if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){
+		$strand_ratio=floor($1/$2*100)."%";
+		$score=$t[18];
+	    }
+	    if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){
+	    #if intra-chr link with insert size filtering
+		$strand_ratio=floor($1/$2*100)."%";
+		$link=floor($t[17]);
+		if($sv_type!~/^INV/){
+		    $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
+		    $score=$t[20];
+		}else{
+		    $score=$t[19];
+		}
+	    }
+	}
+	
+	if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){
+	    
+	    #if strand and order filtering only and/or interchr link
+	    $eq_type=$t[18];
+	    $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
+	    ($bk1, $bk2)=($t[20],$t[21]);
+	    foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
+	    $score=$t[22];
+	    
+	}elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){
+	    
+	    #if all three filtering
+	    $link=floor($t[17]);
+	    $eq_type=$t[19];
+	    $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/);
+	    
+	    if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){
+		$insert_ratio=floor($1/$2*100)."%";
+		($bk1, $bk2)=($t[22],$t[23]);
+		$score=$t[24];
+		
+	    }else{
+		($bk1, $bk2)=($t[21],$t[22]);
+		$score=$t[23];
+	    }
+	    foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
+	    
+	}
+	
+	
+	push(@sv, $chr_type, $sv_type,$eq_type);
+	push(@sv,"$chr1\t$start1-$end1");
+	push(@sv, $link);
+	push(@sv,"$chr2\t$start2-$end2",
+	     $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2);
+	
+	
+	print SV join("\t",@sv)."\n";
+    }
+    
+    close LINKS;
+    close SV;
+    
+    system "sort  -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted";
+    
+    open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n";
+    my @links=<SV>;
+    close SV;
+    
+    open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
+    
+    print SV join("\t",@header)."\n";
+    print SV @links;
+    close SV;
+    
+    unlink($sv_file.".sorted");
+  
+    print LOG "-- output created: $sv_file\n";
+    
+}
+#------------------------------------------------------------------------------#
+sub densityCalculation{
+    
+    my ($chr,$chrID,$file,$tag_length,$window_dist,$step,$mates_file,$mates_file_ref,$density_file,$input_format)=@_;
+    
+    my @sfile=split(/\./,$$mates_file[$file]);
+    my $fchr=$sfile[$#sfile];
+    
+    my $fh = new FileHandle;
+    
+    my %density;
+    my %density_ref;
+    my @ratio;
+    my ($cov,$cov_ref);
+  
+    #FREQUENCY CALCULATION PROCEDURE
+    print LOG "# $fchr : Frequency calculation procedure...\n";
+    &FreqCalculation(\%density,$chr,$chrID,$tag_length,$window_dist,$step,$$mates_file[$file],$input_format);
+    &FreqCalculation(\%density_ref,$chr,$chrID,$tag_length,$window_dist,$step,$$mates_file_ref[$file],$input_format);
+   
+    #MAKING RATIO AND OUTPUT
+    print LOG "\# Ratio calculation procedure...\n";
+    $density_file=~s/\/mates\//\/density\//;
+    $fh->open(">".$density_file) or die "$0: can't write in the output ".$density_file." :$!\n";
+    
+    foreach my $k (1..$chr->{nb_chrs}){
+	foreach my $frag (1..$chr->{$k}->{nb_frag}){
+		
+	    @ratio= ($chr->{$k}->{name},
+		    (${$chr->{$k}->{$frag}}[0]+1),
+		    (${$chr->{$k}->{$frag}}[1]+1));
+		
+	    $cov=(exists $density{$k}{$frag}->{count})? $density{$k}{$frag}->{count}:0;
+	    $cov_ref=(exists $density_ref{$k}{$frag}->{count})? $density_ref{$k}{$frag}->{count}:0;
+		
+	    push(@ratio,$cov,$cov_ref);
+	    push(@ratio,log($cov/$cov_ref)) if($cov && $cov_ref);
+	    push(@ratio,-log($cov_ref+1)) if(!$cov && $cov_ref);
+	    push(@ratio,log($cov+1)) if($cov && !$cov_ref);
+	    next if(!$cov && !$cov_ref);
+		
+	    print $fh join("\t",@ratio)."\n";
+	}
+    }
+    
+    $fh->close;
+    print LOG "-- output created: $density_file\n";
+    
+    undef %density;
+    undef %density_ref;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub FreqCalculation{
+    
+    my ($density,$chr,$chrID,$tag_length,$window_dist,$step,$mates_file,$input_format) = @_;
+    
+    my @sfile=split(/\./,$mates_file);
+    my $fchr=$sfile[$#sfile];
+    my $fh = new FileHandle;
+    
+    my $nb_windows=0;
+    my $warn=100000;
+    my $record=0;
+    my %pair;
+ 
+    my ($sumX,$sumX2) = (0,0);
+ 
+    print LOG "\# Frequency calculation for $mates_file...\n";
+ 
+     if ($mates_file =~ /.gz$/) {
+	$fh->open("gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
+    }elsif($mates_file =~ /.bam$/){
+	o$fh->open("$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
+    }else{
+	$fh->open("<".$mates_file) or die "$0: can't open ".$mates_file.":$!\n";
+    }
+    
+    while(<$fh>){
+	
+	my @t=split;
+	my $mate=$t[0];
+	    
+	my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1, $end_order_read2,);
+	
+	next if(exists $pair{$mate});
+	
+	next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2,\$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
+	
+	next unless (exists $chrID->{$chr_read1} || exists $chrID->{$chr_read2}); 
+	($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
+	
+	$pair{$mate}=undef;
+	$record++;
+	
+	my ($coord_start_read1,$coord_end_read1, $coord_start_read2,$coord_end_read2);
+	
+	recupCoords($firstbase_read1,\$coord_start_read1,\$coord_end_read1,$tag_length->{$end_order_read1},$input_format);
+	recupCoords($firstbase_read2,\$coord_start_read2,\$coord_end_read2,$tag_length->{$end_order_read2},$input_format);
+	
+	my $length = abs($coord_start_read1-$coord_start_read2);
+	$sumX += $length;							#add to sum and sum^2 for mean and variance calculation
+	$sumX2 += $length*$length;	
+	
+	for(my $i=1;$i<=$chr->{$chr_read1}->{'nb_frag'};$i++){
+	    
+	    if (abs ($coord_start_read1-${$chr->{$chr_read1}->{$i}}[0]) <= $window_dist){
+		
+		&addToDensity($density,$chr_read1,$i,\$nb_windows)
+		if(overlap($coord_start_read1,$coord_end_read2,${$chr->{$chr_read1}->{$i}}[0],${$chr->{$chr_read1}->{$i}}[1]));
+		
+	    }else{
+		
+		$i=getNextFrag($coord_start_read1,$i,${$chr->{$chr_read1}->{$i}}[0],$chr->{$chr_read1}->{nb_frag},$window_dist,$step);
+	    }
+	}
+	
+	if($record>=$warn){
+	    print LOG "-- $warn mate-pairs analysed - $nb_windows points created\n";
+	    $warn+=100000;
+	}
+    }
+    $fh->close;
+
+    print LOG "-- $fchr : Total : $record mate-pairs analysed - $nb_windows points created\n";
+
+    if($record>0){
+	
+	my $mu = $sumX/$record;
+	my $sigma = sqrt($sumX2/$record - $mu*$mu);
+	print LOG "-- $fchr : mu length = $mu, sigma length = $sigma\n";
+    }
+
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub ratio2segdup{
+    
+    my($id,$density_file,$segdup_file)=@_;
+    
+    print LOG "# Converting to circos format...\n";
+        
+    open RATIO, "<$density_file" or die "$0: can't open $density_file :$!\n";
+    open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
+  
+    while(<RATIO>){
+	chomp;
+	my ($chr1,$start1,$end1,$ratio)=(split /\t/)[0,1,2,5];	
+	print SEGDUP "$id$chr1\t$start1\t$end1\t$ratio\n";
+    }
+    
+    close RATIO;
+    close SEGDUP;
+    print LOG "-- output created: $segdup_file\n";
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub ratio2bedfile{
+    
+    my($density_file,$bed_file)=@_;
+    
+    print LOG "# Converting to bedGraph format...\n";
+        
+    open RATIO, "<$density_file" or die "$0: can't open $density_file :$!\n";
+    open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
+    print BED "track type=bedGraph name=\"$bed_file\" description=\"log ratios for cnv detection\" ".
+	      "visibility=2 color=255,0,0 alwaysZero=\"On\"\n";
+    
+    while(<RATIO>){
+	chomp;
+	my ($chr1,$start1,$end1,$ratio)=(split /\t/)[0,1,2,5];
+	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/);
+	print BED "$chr1\t".($start1-1)."\t$end1\t$ratio\n";
+    }
+    
+    close RATIO;
+    close BED;
+    print LOG "-- output created: $bed_file\n";
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub inverseSense{
+    
+    my $mate_sense=$_[0];
+    my %reverse=( 'F'  => 'R' , 'R'  => 'F' ,
+		  'FF' => 'RR', 'RR' => 'FF',			
+		  'FR' => 'RF', 'RF' => 'FR');
+    return $reverse{$mate_sense};
+}
+
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub getNextFrag{
+    
+    my ($read_start,$frag_num,$frag_start,$frag_last,$window_dist,$step)=@_;
+    
+    my $how_far = $read_start-$frag_start;
+    my $nb_windows_toskip;
+    
+    if($how_far>0){
+	
+	$nb_windows_toskip=($how_far/$step)-($window_dist/$step);
+	$nb_windows_toskip=~ s/\..*//;
+	$nb_windows_toskip=0 if($nb_windows_toskip<0);
+	return ($frag_num + $nb_windows_toskip);
+    }
+    return $frag_last;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub getColor{
+
+    my($count,$hcolor,$format)=@_;
+    for my $col ( keys % { $hcolor} ) {
+       return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]);
+    }
+    return "white" if($format eq "circos");
+    return "255,255,255" if($format eq "bed");
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub recupCoords{
+    
+    my($c_hit,$cs_hit,$ce_hit,$tag_length,$input_format)=@_;
+    my $strand = 'F';
+    
+    if ($c_hit=~s/^\-//) {
+	$strand='R';
+	    $$cs_hit=$c_hit;
+	    $$ce_hit=$c_hit-($tag_length-1); 
+    }else{
+	$$cs_hit=$c_hit;
+	$$ce_hit=$c_hit+($tag_length-1); 
+    }
+    return $strand;
+    
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub overlap {
+    my($cs_hit,$ce_hit,$cs_region,$ce_region)=@_;
+    if( (($cs_hit < $cs_region) && ($ce_hit < $cs_region )) || (($cs_hit > $ce_region) && ($ce_hit > $ce_region )) ) {
+        return 0;
+    }
+    return 1;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub makeLink {
+    
+    my ($link,$chr1,$frag1,$chr2,$frag2,$mt,$nb)=@_;
+
+    if($chr1>$chr2){
+        ($chr1,$chr2)= ($chr2,$chr1);
+        ($frag1,$frag2)= ($frag2,$frag1);
+    }
+    
+     if($chr1 == $chr2){
+        if($frag1>$frag2){
+	    ($frag1,$frag2)= ($frag2,$frag1);
+	}
+    }
+  
+    if(!exists $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}){
+        $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}=$mt;
+	$$nb++;
+    }elsif($link->{$chr1}->{$chr2}->{$frag1}->{$frag2}!~/(^|,)$mt(,|$)/){
+        $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}.=",$mt";
+    }
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#fonction of adding the read to the density profile
+sub addToDensity {
+    
+    my ($density,$chr1,$frag1,$nb)=@_;
+    
+    if(!exists $density->{$chr1}->{$frag1}->{count}){
+            $density->{$chr1}->{$frag1}->{count}=1;
+	    $$nb++;
+    }else{
+            $density->{$chr1}->{$frag1}->{count}++;
+    }
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub floor {
+    my $nb = $_[0];
+    $nb=~ s/\..*//;
+    return $nb;
+}
+#------------------------------------------------------------------------------#
+sub decimal{
+    
+  my $num=shift;
+  my $digs_to_cut=shift;
+
+  $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/);
+
+  return $num;
+}
+
+#------------------------------------------------------------------------------#
+sub max {
+    
+    my($max) = shift(@_);
+    foreach my $temp (@_) {
+        $max = $temp if $temp > $max;
+    }
+    return($max);
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub min {
+    
+    my($min) = shift(@_);
+    foreach my $temp (@_) {
+        $min = $temp if $temp < $min;
+    }
+    return($min);
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub sortTablebyIndex{
+    my ($tab1,$tab2)=@_;
+    my @tab3;
+    
+    foreach my $i (@$tab1){
+	$tab3[$i]=$$tab2[$$tab1[$i]];
+    }
+    return @tab3;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub round {
+  my $number = shift || 0;
+  my $dec = 10 ** (shift || 0);
+  return int( $dec * $number + .5 * ($number <=> 0)) / $dec;
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub getUniqueTable{
+    
+    my (@tab)=@_;
+    my (%saw,@out)=();
+    undef %saw;
+    return sort(grep(!$saw{$_}++, @tab)); 
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+sub catFiles {
+    
+    unlink("$_[1]") if(exists $_[1]);
+    system qq( cat "$_" >> "$_[1]" ) for @{$_[0]};
+}
+#------------------------------------------------------------------------------#
+#------------------------------------------------------------------------------#
+#check if the configuration file is correct
+sub validateconfiguration{
+    
+    my %conf=%{$_[0]};
+    my $list_prgs="@ARGV";
+    
+    my @general_params=qw(input_format mates_orientation read1_length read2_length mates_file cmap_file);
+    my @detection_params=qw(split_mate_file window_size step_length split_mate_file);
+    my @filtering_params=qw(split_link_file nb_pairs_threshold strand_filtering split_link_file);
+    my @circos_params=qw(organism_id colorcode);
+    my @bed_params=qw(colorcode);
+    my @compare_params=qw(list_samples file_suffix);
+    
+    foreach my $dir ($conf{general}{output_dir},$conf{general}{tmp_dir}){
+	
+	unless (defined($dir)) {
+	    $dir = ".";
+	}
+	unless (-d $dir){
+	    mkdir $dir or die;
+	}
+	$dir.="/" if($dir!~/\/$/);
+    }
+    
+    unless (defined($conf{general}{num_threads})) {
+	    $conf{general}{num_threads} = 1;
+	}
+    $conf{general}{num_threads}=24 if($conf{general}{num_threads}>24);
+    
+    if($list_prgs!~/links2compare/){
+    
+	foreach my $p (@general_params){
+	    die("Error Config : The parameter \"$p\" is not defined\n") if (!defined $conf{general}{$p});
+	}
+	
+	$conf{general}{input_format}="sam" if($conf{general}{input_format} eq "bam");
+	
+	unless (defined($conf{general}{sv_type})) {
+	    $conf{general}{sv_type} = "all";
+	}
+	
+	$conf{general}{read_lengths}={ 1=> $conf{general}{read1_length}, 2=> $conf{general}{read2_length}};
+    }
+    
+    if($list_prgs=~/(linking|cnv)/){
+	
+	foreach my $p (@detection_params){
+	    die("Error Config : The parameter \"$p\" is not defined\n") if (!defined $conf{detection}{$p});
+	}
+	
+	die("Error Config : The parameter \"mates_file_ref\" is not defined\n") if($list_prgs=~/cnv/ && !defined $conf{detection}{mates_file_ref});
+	
+	if($conf{detection}{step_length}>$conf{detection}{window_size}){
+	    die("Error Config : Parameter \"step_length\" should not exceed \"window size\"\n");
+	}
+	
+	unless (-d $conf{general}{tmp_dir}."/mates"){
+	    mkdir $conf{general}{tmp_dir}."/mates" or die;
+	}
+	
+	if($list_prgs=~/linking/){
+	    unless (-d $conf{general}{tmp_dir}."/links"){
+		mkdir $conf{general}{tmp_dir}."/links" or die;
+	    }
+	}
+	if($list_prgs=~/cnv/){
+	    unless (-d $conf{general}{tmp_dir}."/density"){
+		mkdir $conf{general}{tmp_dir}."/density" or die;
+	    }
+	}
+	
+    }
+    
+    if($list_prgs=~/filtering/){
+    
+	foreach my $p (@filtering_params) {
+	    die("Error Config : The filtering parameter \"$p\" is not defined\n") if (!defined $conf{filtering}{$p});
+	    
+	}
+	
+	if(defined($conf{filtering}{chromosomes})) {
+	    my @chrs=split(",",$conf{filtering}{chromosomes});
+	    my $exclude=($chrs[0]=~/^\-/)? 1:0;
+	    for my $chrName (@chrs){
+	      
+		die("Error Config : The filtering parameter \"chromosomes\" is not valid\n")
+		if(($chrName!~/^\-/ && $exclude) || ($chrName=~/^\-/ && !$exclude));
+		
+	    }
+	}
+	
+	if (( $conf{filtering}{order_filtering} )&& !$conf{filtering}{strand_filtering}) {
+	    die("Error Config : The parameter strand_filtering is set to \"0\" while order_filtering is selected".
+		"\nChange strand_filtering to \"1\" if you want to use the order filtering\n");
+	}
+	if (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{order_filtering}) {
+	    die("Error Config : You should set parameters \"mu_length\" and \"sigma_length\" to use order filtering\n");
+	}
+	if (( $conf{filtering}{insert_size_filtering} )&& !$conf{filtering}{strand_filtering}) {
+	    die("Error Config : The parameter strand_filtering is set to \"0\" while insert_size_filtering is selected".
+		"\nChange strand_filtering to \"1\" if you want to use the insert size filtering\n");
+	}
+	if (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{insert_size_filtering}) {
+	    die("Error Config : You should set parameters \"mu_length\" and \"sigma_length\" to use discriminate insertions from deletions\n");
+	}
+	
+	if (!defined($conf{filtering}{indel_sigma_threshold})) {
+	    $conf{filtering}{indel_sigma_threshold} = 2;
+	}
+	if (!defined($conf{filtering}{dup_sigma_threshold})) {
+	    $conf{filtering}{dup_sigma_threshold} = 2;
+	}
+	if (!defined($conf{filtering}{singleton_sigma_threshold})) {
+	    $conf{filtering}{singleton_sigma_threshold} = 4;
+	}
+	
+	if (!defined($conf{filtering}{nb_pairs_order_threshold})) {
+	    $conf{filtering}{nb_pairs_order_threshold} = 1;
+	}
+	
+	if (!defined($conf{filtering}{final_score_threshold})) {
+	    $conf{filtering}{final_score_threshold} = 0.8;
+	}
+	
+	if ($conf{filtering}{nb_pairs_order_threshold}>$conf{filtering}{nb_pairs_threshold}) {
+	    die("Error Config : Parameter \"nb_pairs_order_threshold\" should not exceed \"nb_pairs_threshold\"\n");
+	}
+	
+    }
+    
+    if($list_prgs=~/2circos$/){
+	foreach my $p (@circos_params) {
+	    next if($list_prgs=~/^ratio/ && $p eq "colorcode");
+	    die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
+	}
+    }
+    
+    if($list_prgs=~/2bed$/){
+	foreach my $p (@bed_params) {
+	    die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
+	}
+    }
+    
+    if($list_prgs=~/links2compare/){
+	foreach my $p (@compare_params) {
+	    die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p});
+	}
+	
+	unless (defined($conf{compare}{same_sv_type})) {
+	    $conf{compare}{same_sv_type} = 0;
+	}
+	
+	unless (defined($conf{compare}{min_overlap})) {
+	    $conf{compare}{min_overlap} = 1E-9;
+	}
+	
+	if($conf{compare}{circos_output}){
+	    foreach my $p (@circos_params) {
+		next if($list_prgs=~/^ratio/ && $p eq "colorcode");
+		die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
+	    }
+	}
+	if($conf{compare}{bed_output}){
+	    foreach my $p (@bed_params) {
+		die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
+	    }
+	    die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths});
+
+	    my @samples=split(",",$conf{compare}{list_samples});
+	    my @read_lengths=split(",",$conf{compare}{list_read_lengths});
+	    for my $i (0..$#samples){
+		my @l=split("-",$read_lengths[$i]);
+		$conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]};
+	    }
+	}
+    }
+   
+    
+}
+#------------------------------------------------------------------------------#
+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/svdetect/SVDetect_run_parallel.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,324 @@
+<tool id="svdetect_run_parallel" name="Detect clusters of anomalously mapped pairs">
+
+<description>and identify structural variants</description>
+
+<command interpreter="perl">SVDetect_run_parallel.pl
+
+#if $getLinks.linking == "linking"
+linking
+<!-- -out1 '$links_file' -->
+#end if
+#if $getFilteredLinks.filtering == "filtering"
+filtering
+<!--- out2 '$flinks_file' -->
+#if str($getFilteredLinks.links2SV) == "create"
+links2SV
+-out3 '$sv_file'
+#end if
+#if  $getFilteredLinks.file_conversion.file_conversion_select=="convert" and str($getFilteredLinks.file_conversion.links2circos) == "create"
+links2circos
+-out4 '$circos_file'
+#end if
+#if  $getFilteredLinks.file_conversion.file_conversion_select=="convert" and str($getFilteredLinks.file_conversion.links2bed) == "create"
+links2bed
+-out5 '$bed_file'
+#end if
+#end if
+-conf '$config_file'
+-l '$log_file'
+-N '$sample_name'
+
+</command>
+
+<inputs>
+	<param name="sample_name" type="text" value="sample" label="Sample Name"/>
+	<param name="mates_file" format="bam" type="data" label="Input BAM file (.ab.bam)"/>
+   	<param name="cmap_file" format="len" type="data" label="Chromosomes list file (.len)" help="Tabulated file format with Chromosome ID (integer from 1), name and length"/>
+  	<param name="mates_orientation" type="select" format="txt" label="Type of sequencing technology and libraries">
+		<option value="FR">Illumina paired-ends</option>
+		<option value="RF">Illumina mate-pairs</option>
+		<option value="FR">SOLiD paired-ends</option>
+		<option value="RR">SOLiD mate-pairs</option>
+   	</param>
+	<param name="read1_length" type="integer" size="10" value="50" label="Read 1 length (bp)" help="Length of the first read in a pair (left read)"/>
+	<param name="read2_length" type="integer" size="10" value="50" label="Read 2 length (bp)" help="Length of the second read in a pair (right read)"/>
+	<param name="sv_type" type="select" format="txt" label="Type of SV to detect">
+		<option value="all">all types of SVs</option>
+		<option value="intra">intrachromosomal SVs only</option>
+		<option value="inter">interchromosomal SVs only</option>
+  	</param>
+   	
+   	<conditional name="getLinks">
+   		<param name="linking" type="select" label="Linking procedure" help="Detection and isolation of links">
+			<option value="linking">Yes</option>
+			<option value="">No, already done</option>
+  		</param>
+		<when value="">
+    			<!-- do nothing here -->
+   		</when>
+    		<when value="linking">
+			<param name="splitmate" label="Do you want to split the original mate file per chromosome for parallel computing?" type="boolean" truevalue="split" falsevalue="do_not_split" checked="True" help="Untick it if already done"/>
+    			<param name="window_size" type="integer" size="20" value="3000" label="Window size (bp)" help="Equal to at least “2µ+2√2σ"/>
+			<param name="step_length" type="integer" size="20" value="250" label="Step length size (bp)" help="Equal to 1/2 or 1/4 of the window size"/>
+    		</when>
+   	</conditional>
+
+   	<conditional name="getFilteredLinks">
+	   	<param name="filtering" type="select" label="Filtering procedure" help="Filtering of links according different parameters and thresholds">
+			<option value="filtering">Yes</option>
+                        <option value="">No</option>
+	  	</param>
+		<when value="">
+	    		<!-- do nothing here -->
+	   	</when>
+	    	<when value="filtering">
+			
+			<param name="splitlink" label="Do you want to split the original link file per chromosome for parallel computing?" type="boolean" truevalue="split" falsevalue="do_not_split" checked="False" help="Untick it if (the linking is) already done"/>
+			<param name="chromosomes" type="text" size="20" label="List of chromosome names to keep or exclude"/>
+			<param name="nb_pairs_threshold" type="integer" size="20" value="5" label="Minimum number of pairs in a cluster"/>
+		
+			<conditional name="filter1">
+	   			<param name="strand_filtering" type="select" label="Strand filtering procedure">
+					<option value="strand">Yes</option>
+					<option value="">No</option>
+	  			</param>
+				<when value="">
+	    				<!-- do nothing here -->
+	   			</when>
+	    			<when value="strand">
+
+					<conditional name="filter2">
+			   			<param name="order_filtering" type="select" label="Order filtering procedure">
+							<option value="order">Yes</option>
+							<option value="">No</option>
+			  			</param>
+						<when value="">
+			    				<!-- do nothing here -->
+			   			</when>
+			    			<when value="order">
+
+							<conditional name="filter3">
+					   			<param name="insert_size_filtering" type="select" label="Insert-size filtering procedure">
+									<option value="insert">Yes</option>
+									<option value="">No</option>
+					  			</param>
+								<when value="">
+					    				<!-- do nothing here -->
+					   			</when>
+					    			<when value="insert">
+									<param name="indel_sigma_threshold" type="float" size="20" value="3" label="Minimal number of sigma fold for the insert size filtering and to call insertions and deletions"/>
+									<param name="dup_sigma_threshold" type="float" size="20" value="3" label="minimal number of sigma fold for the insert size filtering to call tandem duplications"/>
+									<param name="singleton_sigma_threshold" type="float" size="20" value="4" label="Minimal number of sigma fold for the insert size filtering to call singletons" help="for Illumina mate-pairs only"/>
+			    					</when>
+			   				</conditional>
+
+							<param name="mu_length" type="integer" size="20" value="3000" label="Mean insert size value (µ) of normally mapped mate-pairs, in bp"/>
+							<param name="sigma_length" type="integer" size="20" value="250" label="Calculated sd value (σ) from the distribution of normally mapped  mate-pairs, in bp"/>
+			    				<param name="nb_pairs_order_threshold" type="integer" size="20" value="2" label="Minimal number of pairs in a subgroup of paired-end reads for balanced events"/>
+			    			</when>
+			   		</conditional>
+						
+					<param name="final_score_threshold" type="float" size="20"  value="1.0" label="Minimal final filtering score for calling SVs" help="A value of 1 means all the pairs in a cluster were consistent between each other after applying filters"/>
+	    			</when>
+	   		</conditional>
+		
+			<param name="links2SV" label="Do you want to have filtered links in a tabulated file format showing significant SVs?" type="boolean" truevalue="create" falsevalue="do_not_create" checked="True"/>
+		
+			<conditional name="file_conversion">
+				<param name="file_conversion_select" type="select" label="Output file conversion" help="Converts filtered links to Circos/BED files format for graphical view of SVs">
+					<option value="do_not_convert">No</option>
+					<option value="convert">Yes</option>
+				</param>
+				<when value="do_not_convert">
+					    <!-- do nothing here -->
+				</when>
+				<when value="convert">
+					<param name="links2circos" label="Converts the link list to the Circos link format" type="boolean" truevalue="create" falsevalue="do_not_create" checked="True"/>
+					<param name="links2bed" label="Converts the link list to the UCSC BED format" type="boolean" truevalue="create" falsevalue="do_not_create" checked="False"/>
+					<param name="organism_id" type="text" size="10" value="hs" label="Organism ID"/>
+					<repeat name="color_code" title="Color-code" min="1" max="7">
+						<param name="color" type="select" label="Color">
+							<option value="grey">grey</option>
+							<option value="black">black</option>
+							<option value="blue">blue</option>
+							<option value="green">green</option>
+							<option value="purple">purple</option>
+							<option value="orange">orange</option>
+							<option value="red">red</option>
+						</param>
+						<param name="interval" type="text" value="1,3" label="Interval"/>
+					</repeat>
+				</when>
+			</conditional>
+    		</when>
+	</conditional>
+</inputs>
+
+
+<outputs>
+	<!--<data format="txt" name="links_file" label="svdetect.links">
+		<filter>getLinks['linking']=="linking"</filter>
+	</data>
+	<data format="txt" name="flinks_file" label="svdetect.links.filtered">
+		<filter>getFilteredLinks['filtering']=="filtering"</filter>
+	</data>-->
+	<data format="sv" name="sv_file" label="${sample_name}.sv">
+		<filter>(
+			getFilteredLinks['filtering']=="filtering" and
+			getFilteredLinks['links2SV'] is True
+			)
+	         </filter>
+	</data>
+	<data format="segdup" name="circos_file" label="${sample_name}.segdup">
+		<filter>(
+			getFilteredLinks['filtering']=="filtering" and
+			getFilteredLinks['file_conversion']['file_conversion_select']=="convert" and
+			getFilteredLinks['file_conversion']['links2circos'] is True
+			)
+	         </filter>
+	</data>
+	<data format="bed" name="bed_file" label="${sample_name}.bed">
+		<filter>(
+			getFilteredLinks['filtering']=="filtering" and
+			getFilteredLinks['file_conversion']['file_conversion_select']=="convert" and
+			getFilteredLinks['file_conversion']['links2bed'] is True
+			)
+	         </filter>
+	</data>
+	<data format="txt" name="log_file" label="${sample_name}.svdetect_run.log"/>
+</outputs>
+
+
+
+<configfiles>
+	<configfile name="config_file">
+&lt;general&gt;
+input_format = bam
+sv_type = ${sv_type}
+mates_orientation=${mates_orientation}
+read1_length=${read1_length}
+read2_length=${read2_length}
+mates_file=${mates_file}
+cmap_file=${cmap_file}
+tmp_dir=$__new_file_path__/svdetect/tmp
+output_dir=$__new_file_path__/svdetect
+num_threads=8
+&lt;/general&gt; 
+
+#if $getLinks.linking == "linking"
+&lt;detection&gt;
+#if str($getLinks.splitmate) == "split"
+split_mate_file=1
+#else
+split_mate_file=0
+#end if
+window_size=${getLinks.window_size}
+step_length=${getLinks.step_length}
+&lt;/detection&gt; 
+#end if
+
+#if $getFilteredLinks.filtering == "filtering"
+&lt;filtering&gt;
+#if str($getFilteredLinks.splitlink) == "split"
+split_link_file=1
+#else
+split_link_file=0
+#end if
+#if str($getFilteredLinks.chromosomes) != ""
+chromosomes=${getFilteredLinks.chromosomes}
+#end if
+nb_pairs_threshold=${getFilteredLinks.nb_pairs_threshold}
+#if $getFilteredLinks.filter1.strand_filtering == "strand"
+strand_filtering=1
+final_score_threshold=${getFilteredLinks.filter1.final_score_threshold}
+#if $getFilteredLinks.filter1.filter2.order_filtering == "order"
+order_filtering=1
+mu_length=${getFilteredLinks.filter1.filter2.mu_length}
+sigma_length=${getFilteredLinks.filter1.filter2.sigma_length}
+nb_pairs_order_threshold=${getFilteredLinks.filter1.filter2.nb_pairs_order_threshold}
+#if $getFilteredLinks.filter1.filter2.filter3.insert_size_filtering == "insert"
+insert_size_filtering=1
+indel_sigma_threshold=${getFilteredLinks.filter1.filter2.filter3.indel_sigma_threshold}
+dup_sigma_threshold=${getFilteredLinks.filter1.filter2.filter3.dup_sigma_threshold}
+singleton_sigma_threshold=${getFilteredLinks.filter1.filter2.filter3.singleton_sigma_threshold}
+#else
+insert_size_filtering=0
+#end if
+#else
+order_filtering=0
+#end if
+#else
+strand_filtering=0
+#end if
+&lt;/filtering&gt; 
+#end if
+
+#if $getFilteredLinks.filtering == "filtering"
+#if $getFilteredLinks.file_conversion.file_conversion_select == "convert"
+#if str($getFilteredLinks.file_conversion.links2circos) == "create"
+&lt;circos&gt;
+organism_id=${getFilteredLinks.file_conversion.organism_id}
+&lt;colorcode&gt;
+#for $color_repeat in $getFilteredLinks.file_conversion.color_code
+${color_repeat.color}=${color_repeat.interval}
+#end for
+&lt;/colorcode&gt;
+&lt;/circos&gt;
+#end if
+#if str($getFilteredLinks.file_conversion.links2bed) == "create"
+&lt;bed&gt;
+&lt;colorcode&gt;
+#for $color_repeat in $getFilteredLinks.file_conversion.color_code
+#if str($color_repeat.color)== "grey"
+190,190,190=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "black"
+0,0,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "blue"
+0,0,255=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "green"
+0,255,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "purple"
+153,50,205=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "orange"
+255,140,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "red"
+255,0,0=${color_repeat.interval}
+#end if
+#end for
+&lt;/colorcode&gt;
+&lt;/bed&gt;
+#end if
+#end if
+#end if	
+	</configfile>
+</configfiles>
+
+  <help>
+**What it does**
+
+SVDetect - Version : 0.8b
+
+Parallel version (nCPU=8)
+
+SVDetect is a application for the isolation and the type prediction of intra- and inter-chromosomal rearrangements from paired-end/mate-pair sequencing data provided by the high-throughput sequencing technologies
+
+This tool aims to identifying structural variations (SVs) with both clustering and sliding-window strategies, and helping in their visualization at the genome scale.
+SVDetect is compatible with SOLiD and Illumina (>=1.3) reads.
+
+Manual documentation available at the http://svdetect.sourceforge.net/Site/Manual.html
+
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (svdetect@curie.fr) for any questions or concerns about the Galaxy implementation of SVDetect.
+
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/svdetect/circos_graph.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,290 @@
+<tool id="circos_graph" name="Circos" version="1.1.0">
+
+<description>plots</description>
+
+<command interpreter="perl">circos/bin/circos
+
+-conf '$circos_config_file'
+-outputfile '${outputfile}.dat'
+-png 
+
+> '$log_file'
+
+;
+
+rm '$outputfile'; ln -s '${outputfile}.png' '$outputfile' 
+
+</command>
+
+
+<inputs>
+	<param name="graph_name" type="text" size="20" value="graph1" label="Graph name"/>
+	
+	<param name="karyotype" type="select" format="txt" label="Type of model organism">
+		<option value="data/karyotype.human_hg19.txt">Human (homo sapiens, hs) -hg19-</option>
+		<option value="data/karyotype.human.txt">Human (homo sapiens, hs) -hg18-</option>
+		<option value="data/2/karyotype.mouse.txt">Mouse (Mus Musculus, mm)</option>
+		<option value="data/2/karyotype.dog.txt">Dog (Canis familiaris, cf)</option>
+		<option value="data/2/karyotype.rt.txt">Rat (Rattus norvegicus, rn)</option>
+		<option value="data/karyotype.yeast.txt">Yeast (Saccharomyces Cerevisiae, sc) -SGD-</option>
+
+  	</param>
+	<param name="chromosomes_units" type="integer" size="50" value="1000000" label="Chromosomes units"/>
+	<param name="chromosomes" type="text" size="100" value="" label="List of chromosome names to keep or exclude" help="ex: hs2;hs3 or -hsX;-hsY">
+		<sanitizer>
+			<valid initial="string.printable">
+				<add value=";"/>
+			</valid>
+		</sanitizer>
+	</param>
+	<param name="link_file" format="segdup" type="data" label="Input link file (.segdup)"/>
+</inputs>
+
+<outputs>
+	<data format="txt" name="log_file" label="${graph_name}.circos.log"/>
+	<data format="png" name="outputfile" label="${graph_name}.png"/>
+</outputs>
+
+
+
+<configfiles>
+	<configfile name="ideogram_config_file">
+		
+&lt;ideogram&gt;
+
+&lt;spacing&gt;
+
+default = 5u
+break   = 1u
+
+axis_break_at_edge = yes
+axis_break         = yes
+axis_break_style   = 2
+
+&lt;break_style 1&gt;
+stroke_color = black
+fill_color   = blue
+thickness    = 0.25r
+stroke_thickness = 2
+&lt;/break&gt;
+
+&lt;break_style 2&gt;
+stroke_color     = black
+stroke_thickness = 3p
+thickness        = 1.5r
+&lt;/break&gt;
+
+&lt;/spacing&gt;
+
+## thickness (px) of chromosome ideogram
+thickness        = 100p
+stroke_thickness = 2
+## ideogram border color
+stroke_color     = black
+fill             = yes
+## the default chromosome color is set here and any value
+## defined in the karyotype file overrides it
+fill_color       = black
+
+## fractional radius position of chromosome ideogram within image
+radius         = 0.85r
+show_label     = yes
+label_with_tag = yes
+label_font     = condensedbold
+label_radius   = dims(ideogram,radius) + 0.075r
+label_size     = 60p
+
+## cytogenetic bands
+band_stroke_thickness = 2
+
+## show_bands determines whether the outline of cytogenetic bands
+## will be seen
+show_bands            = yes
+## in order to fill the bands with the color defined in the karyotype
+## file you must set fill_bands
+fill_bands            = yes
+
+&lt;/ideogram&gt;
+
+	</configfile>
+
+	<configfile name="ticks_config_file">
+	
+show_ticks          = yes
+show_tick_labels    = yes
+
+&lt;ticks&gt;
+radius               = dims(ideogram,radius_outer)
+multiplier           = 1e-6
+
+&lt;tick&gt;
+spacing        = 0.5u
+size           = 2p
+thickness      = 2p
+color          = grey
+show_label     = no
+label_size     = 12p
+label_offset   = 0p
+format         = %.2f
+&lt;/tick&gt;
+	
+&lt;tick&gt;
+spacing        = 1u
+size           = 3p
+thickness      = 2p
+color          = dgrey
+show_label     = no
+label_size     = 12p
+label_offset   = 0p
+format         = %.2f
+&lt;/tick&gt;
+
+&lt;tick&gt;
+spacing        = 5u
+size           = 5p
+thickness      = 2p
+color          = black
+show_label     = yes
+label_size     = 16p
+label_offset   = 0p
+format         = %d
+&lt;/tick&gt;
+
+&lt;tick&gt;
+spacing        = 10u
+size           = 8p
+thickness      = 2p
+color          = black
+show_label     = yes
+label_size     = 20p
+label_offset   = 5p
+format         = %d
+&lt;/tick&gt;
+&lt;/ticks&gt;
+	</configfile>	
+	
+	
+	<configfile name="circos_config_file">
+&lt;colors&gt;
+&lt;&lt;include etc/colors.conf&gt;&gt;
+&lt;/colors&gt;
+
+&lt;fonts&gt;
+&lt;&lt;include etc/fonts.conf&gt;&gt;
+&lt;/fonts&gt;
+
+&lt;&lt;include $ideogram_config_file&gt;&gt;
+&lt;&lt;include $ticks_config_file&gt;&gt;
+
+karyotype = $karyotype
+
+&lt;image&gt;
+24bit = yes
+##png = yes
+##svg = no
+## radius of inscribed circle in image
+radius         = 1500p
+background     = white
+## by default angle=0 is at 3 o'clock position
+angle_offset   = -90
+#angle_orientation = counterclockwise
+
+auto_alpha_colors = yes
+auto_alpha_steps  = 5
+&lt;/image&gt;
+
+chromosomes_units= $chromosomes_units
+
+#if str($chromosomes)==""
+chromosomes_display_default = yes
+#else
+chromosomes_display_default = no
+chromosomes = $chromosomes
+#end if
+
+&lt;links&gt;
+
+z      = 0
+radius = 0.95r
+bezier_radius = 0.2r
+
+&lt;link segdup&gt;
+show         = yes
+color        = dgrey_a5
+thickness    = 2
+file         = $link_file
+record_limit = 1000
+&lt;/link&gt;
+
+&lt;/links&gt;
+
+
+anglestep       = 0.5
+minslicestep    = 10
+beziersamples   = 40
+debug           = no
+warnings        = no
+imagemap        = no
+
+units_ok = bupr
+units_nounit = n
+	</configfile>
+</configfiles>
+
+  <help>
+**What it does**
+
+Circos
+
+Manual documentation available at the http://circos.ca/
+
+
+**Example of link segdup file**
+
+segdup file::
+
+    1	hs1	1077096	1078746	color=red
+    1	hs1	1080923	1082805	color=red
+    2	hs1	1137684	1137961	color=red
+    2	hs3	1138138	1138423	color=red
+    3	hs11	1169417	1170000	color=red
+    3	hs11	1170025	1170975	color=red
+    4	hs11	1222480	1224271	color=green
+    4	hs11	1223328	1225675	color=green
+    5	hs12	1223336	1225812	color=grey
+    5	hs13	1224709	1227633	color=grey
+    6	hs11	1223621	1226460	color=red
+    6	hs11	1224918	1227633	color=red
+    7	hs11	1399510	1401513	color=white
+    7	hs11	1401628	1403697	color=white
+    8	hs15	1652045	1653746	color=red
+    8	hs15	1657167	1658940	color=red
+    9	hs11	165333	165887	color=white
+    9	hs11	165981	168016	color=white
+    10	hs11	1702700	1702841	color=red
+    10	hs11	1702903	1703057	color=red
+    11	hs11	1912272	1915186	color=white
+    11	hs11	1937111	1939824	color=white
+    12	hs11	1983211	1983355	color=red
+    12	hs11	1983591	1983748	color=red
+    13	hs11	2913657	2913898	color=white
+    13	hs11	2914048	2914341	color=white
+    14	hs11	3090593	3090749	color=purple
+    14	hs11	3090709	3090864	color=purple
+    15	hs21	3466365	3466434	color=red
+    15	hs21	3466554	3466620	color=red
+    16	hsX	3603073	3603321	color=white
+    16	hsX	3603295	3603520	color=white
+
+
+    
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (svdetect@curie.fr) for any questions or concerns about the Galaxy implementation of Circos.
+    
+    
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/varscan/tool_dependencies.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="VarScan" version="2.3.5">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.5.jar</action>
+                <action type="move_file">
+                    <source>VarScan.v2.3.5.jar</source>
+                    <destination>$INSTALL_DIR/jars</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="JAVA_JAR_PATH" action="set_to">$INSTALL_DIR/jars</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        </readme>
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/varscan/varscan_mpileup.pl	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,116 @@
+#!/usr/bin/perl
+
+use strict;
+use Cwd;
+
+die qq(
+Bad numbr of inputs
+
+) if(!@ARGV);
+
+my $options ="";
+my $file="";
+my $command="";
+my $output="";
+my $working_dir = cwd();
+my $temp_vcf = "$working_dir/temp";
+my $log="";
+
+foreach my $input (@ARGV) 
+{
+	my @tmp = split "::", $input;
+	if($tmp[0] eq "COMMAND") 
+	{
+		$command = $tmp[1];
+	} 
+	elsif($tmp[0] eq "INPUT") 
+	{
+		$file = $tmp[1];
+	}
+	elsif($tmp[0] eq "OPTION") 
+	{
+		$options = "$options ${tmp[1]}";
+	}
+	elsif($tmp[0] eq "OUTPUT") 
+	{
+		$output = $tmp[1];
+	}
+	elsif($tmp[0] eq "LOG") 
+	{
+		$log = $tmp[1];
+	}
+	else 
+	{
+		die("Unknown Input: $input\n");
+	}
+}
+
+system ("$command $file $options 1>$temp_vcf 2>$log");
+
+vs2vcf($temp_vcf, $output);
+
+
+sub vs2vcf 
+{
+
+	#
+	# G l o b a l     v a r i a b l e s 
+	#
+	my $version = '0.1';
+
+	#
+	# Read in file
+	#
+	my $input = shift;
+	my $output = shift;
+	my $chr_ord = shift;
+	open(IN, $input) or die "Can't open $input': $!\n";
+	open(OUT, ">$output") or die "Can't create $output': $!\n";
+	my %output;
+
+	while ( <IN> )
+	{
+		if ( /^#/ )
+		{
+			print OUT;
+			next;
+		}
+		chomp;
+		my $line = $_;
+
+		my @flds = split ( "\t", $line );
+		my $ref = $flds[3];
+		my $alt = $flds[4];
+		#
+		# Deletion of bases
+		#
+		if ( $alt =~ /^\-/ )
+		{
+			($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
+		}
+
+		#
+		# Insertion of bases
+		#
+		if ( $alt =~ /^\+/ )
+		{
+			$flds[4] = $ref.substr($alt,1);
+		}
+		print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
+		$output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
+	}
+	close(IN);
+	# if chromosome order given return in sorted order
+	if(defined $chr_ord) 
+	{
+		for my $chrom (@{ $chr_ord }) 
+		{
+			for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} }) 
+			{
+				print OUT $output{$chrom}{$pos};
+			}
+		}
+	}
+	close(OUT);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/varscan/varscan_mpileup.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,124 @@
+<tool id="varscan_mpileup" name="VarScan mpileup" version="2.3.5">
+  <description>
+        mutation caller for targeted, exome, and whole-genome resequencing
+  </description>
+  <requirements>
+  	<requirement type="package" version="2.3.5">VarScan</requirement>
+  </requirements>
+  <command interpreter="perl">
+  	
+	varscan_mpileup.pl 
+	"COMMAND::java -jar \$JAVA_JAR_PATH/VarScan.v2.3.5.jar $exe_command" 
+  	"INPUT::$in_file"
+  	"OUTPUT::$output"
+  	"LOG::$log"
+	"OPTION::--min-coverage $min_coverage"
+	"OPTION::--min-reads2 $min_reads2"
+	"OPTION::--min-avg-qual $min_avg_qual"
+	"OPTION::--min-var-freq $min_var_freq"
+	"OPTION::--min-freq-for-hom $min_freq_for_hom"
+	"OPTION::--p-value $p_value"	
+	"OPTION::--strand-filter $strand_filter"	
+	"OPTION::--output-vcf 1"	
+	
+	#if ($vcf_sample_list):
+		"OPTION::--vcf-sample-list $vcf_sample_list"
+	#end if
+	"OPTION::--variants $variants"	
+	
+	
+  	
+  </command>
+
+  <inputs>
+  	
+	<param name="exe_command" type="select" label="Command" help="" optional="false">
+		<option value="mpileup2snp" >mpileup2snp</option>
+		<option value="mpileup2indel">mpileup2indel</option>
+		<option value="mpileup2cns">mpileup2cns</option>
+	</param>
+	<param name="in_file" type="data" format="pileup" label="mpileup file" help="The SAMtools mpileup file" />
+	<param name="min_coverage" type="integer" label="min-coverage" help="" optional="true" value="8"/>
+	<param name="min_reads2" type="integer" label="min-reads2" help="" optional="true" value="2"/>
+	<param name="min_avg_qual" type="integer" label="min-avg-qual" help="" optional="true" value="15"/>
+	<param name="min_var_freq" type="float" label="min-var-freq" help="" optional="true" value="0.01"/>
+	<param name="min_freq_for_hom" type="float" label="min-freq-for-hom" help="" optional="true" value="0.75"/>
+	<param name="p_value" type="text" label="p-value" help="" optional="true" value="0.99"/>
+	<param name="strand_filter" type="integer" label="strand-filter" help="" optional="true" value="1"/>
+	<param name="vcf_sample_list" type="data" label="vcf-sample-list" format="txt" help="" optional="true" />
+	<param name="variants" type="integer" label="variants" help="Set to 1 to report only variants" optional="true" value="1"/>
+	
+	
+  </inputs>
+  <outputs>
+  	<data type="data" format="vcf" name="output" label="${tool.name} result on ${on_string}"/>
+  	<data type="data" format="txt" name="log" label="${tool.name} result on ${on_string} (log) "/>
+  </outputs>
+  	
+  <help> 
+
+.. class:: infomark
+
+**What it does**
+
+::
+
+ VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most  operating systems. It can be used to detect different types of variation:
+
+    Germline variants (SNPs an dindels) in individual samples or pools of samples.
+    Multi-sample variants (shared or private) in multi-sample datasets (with mpileup).
+    Somatic mutations, LOH events, and germline variants in tumor-normal pairs.
+    Somatic copy number alterations (CNAs) in tumor-normal exome data.
+
+
+**Input**
+
+::
+
+  mpileup file - The SAMtools mpileup file
+ 
+
+**Parameters**
+
+::
+
+  commands
+	mpileup2snp		Identify SNPs from an mpileup file
+	mpileup2indel		Identify indels an mpileup file
+	mpileup2cns		Call consensus and variants from an mpileup file
+
+  min-coverage	
+  	Minimum read depth at a position to make a call [8]
+
+  min-reads2	
+  	Minimum supporting reads at a position to call variants [2]
+
+  min-avg-qual	
+  	Minimum base quality at a position to count a read [15]
+
+  min-var-freq	
+        Minimum variant allele frequency threshold [0.01]
+
+  min-freq-for-hom
+  	Minimum frequency to call homozygote [0.75]
+  
+  p-value
+  	Default p-value threshold for calling variants [99e-02]
+  
+  strand-filter
+  	Ignore variants with >90% support on one strand [1]
+  
+  output-vcf
+  	If set to 1, outputs in VCF format
+  
+  vcf-sample-list
+  	For VCF output, a list of sample names in order, one per line
+  
+  variants
+  	Report only variant (SNP/indel) positions [0]
+
+
+  
+  </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/varscan/varscan_somatic.pl	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,138 @@
+#!/usr/bin/perl
+
+
+use strict;
+use Cwd;
+
+die qq(
+Bad numbr of inputs
+
+) if(!@ARGV);
+
+my $options ="";
+my $normal="";
+my $command="";
+my $tumor="";
+my $output="";
+my $working_dir = cwd();
+my $snp = "$working_dir/output.snp.vcf";
+my $indels = "$working_dir/output.indel.vcf";
+
+foreach my $input (@ARGV) 
+{
+	my @tmp = split "::", $input;
+	if($tmp[0] eq "COMMAND") 
+	{
+		$command = $tmp[1];
+	} 
+	if($tmp[0] eq "NORMAL") 
+	{
+		$normal = $tmp[1];
+	} 
+	elsif($tmp[0] eq "TUMOR") 
+	{
+		$tumor = $tmp[1];
+	}
+	elsif($tmp[0] eq "OPTION") 
+	{
+		$options = "$options ${tmp[1]}";
+	}
+	elsif($tmp[0] eq "OUTPUT") 
+	{
+		$output = $tmp[1];
+	}
+	
+	else 
+	{
+		die("Unknown Input: $input\n");
+	}
+}
+
+system ("$command $normal $tumor $options ");
+system("grep -v '^\#' $indels | grep -v '^chrom position' >> $snp");
+
+my @chr_ord = chromosome_order($tumor);
+
+vs2vcf($snp, $output,\@chr_ord);
+
+
+sub vs2vcf 
+{
+
+	#
+	# G l o b a l     v a r i a b l e s 
+	#
+	my $version = '0.1';
+
+	#
+	# Read in file
+	#
+	my $input = shift;
+	my $output = shift;
+	my $chr_ord = shift;
+	open(IN, $input) or die "Can't open $input': $!\n";
+	open(OUT, ">$output") or die "Can't create $output': $!\n";
+	my %output;
+
+	while ( <IN> )
+	{
+		if ( /^#/ )
+		{
+			print OUT;
+			next;
+		}
+		chomp;
+		my $line = $_;
+
+		my @flds = split ( "\t", $line );
+		my $ref = $flds[3];
+		my $alt = $flds[4];
+		#
+		# Deletion of bases
+		#
+		if ( $alt =~ /^\-/ )
+		{
+			($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
+		}
+
+		#
+		# Insertion of bases
+		#
+		if ( $alt =~ /^\+/ )
+		{
+			$flds[4] = $ref.substr($alt,1);
+		}
+		print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
+		$output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
+	}
+	close(IN);
+	# if chromosome order given return in sorted order
+	if(defined $chr_ord) 
+	{
+		for my $chrom (@{ $chr_ord }) 
+		{
+			for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} }) 
+			{
+				print OUT $output{$chrom}{$pos};
+			}
+		}
+	}
+	close(OUT);
+}
+
+
+sub chromosome_order 
+{
+	my $input = shift;
+	# calculate flagstats
+	my $COMM = "samtools view -H $input | grep '^\@SQ'";
+	my @SQ = `$COMM`;
+	chomp @SQ;
+	for(my $i = 0; $i <= $#SQ; $i++) 
+	{
+		$SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/;
+	} 
+	return(@SQ);
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsap-vc/varscan/varscan_somatic.xml	Thu Oct 03 10:42:15 2019 -0400
@@ -0,0 +1,131 @@
+<tool id="varscan_somatic" name="VarScan Somatic" version="2.3.5">
+  <description>
+        somatic mutation caller for cancer genomics
+  </description>
+  <requirements>
+  	<requirement type="package" version="2.3.5">VarScan</requirement>
+  </requirements>
+  <command interpreter="perl">
+  	varscan_somatic.pl 
+  	"COMMAND::java -jar \$JAVA_JAR_PATH/VarScan.v2.3.5.jar somatic" 
+  	"NORMAL::$normal" 
+  	"TUMOR::$tumor"
+  	"OUTPUT::$output"
+  	
+	"OPTION::--min-coverage $min_coverage"
+	"OPTION::--min-coverage-normal $min_coverage_normal"
+	"OPTION::--min-coverage-tumor $min_coverage_tumor"
+	
+	"OPTION::--min-var-freq $min_var_freq"
+	"OPTION::--min-freq-for-hom $min_freq_for_hom"
+	
+	"OPTION::--normal-purity $normal_purity"	
+	"OPTION::--tumor-purity $tumor_purity"		
+	
+	"OPTION::--p-value $p_value"	
+	"OPTION::--somatic-p-value $somatic_p_value"	
+
+	"OPTION::--strand-filter $strand_filter"	
+	"OPTION::--validation $validation"	
+	"OPTION::--output-vcf 1"	
+	
+
+  	
+  </command>
+
+  <inputs>
+
+	<param name="normal" type="data" format="pileup" label="normal mpileup file" help="The SAMtools mpileup file for normal sample" />
+	<param name="tumor" type="data" format="pileup" label="tumor mpileup file" help="The SAMtools mpileup file for tumor sample" />
+
+	<param name="min_coverage" type="integer" label="min-coverage" help="" optional="true" value="8"/>
+	<param name="min_coverage_normal" type="integer" label="min-coverage-normal" help="" optional="true" value="8"/>
+	<param name="min_coverage_tumor" type="integer" label="min-coverage-tumor" help="" optional="true" value="6"/>
+	
+	<param name="min_var_freq" type="float" label="min-var-freq" help="" optional="true" value="0.10"/>
+	<param name="min_freq_for_hom" type="float" label="min-freq-for-hom" help="" optional="true" value="0.75"/>
+	
+	<param name="normal_purity" type="float" label="normal-purity" help="" optional="true" value="1.00"/>
+	<param name="tumor_purity" type="float" label="tumor-purity" help="" optional="true" value="1.00"/>
+	
+
+	<param name="p_value" type="text" label="p-value" help="" optional="true" value="0.99"/>
+	<param name="somatic_p_value" type="text" label="somatic-p-value" help="" optional="true" value="0.05"/>
+
+	<param name="strand_filter" type="integer" label="strand-filter" help="" optional="true" value="1"/>
+	<param name="validation" type="integer" label="validation" help="" optional="true" value="0"/>
+	
+  </inputs>
+  <outputs>
+  	<data type="data" format="vcf" name="output" label="${tool.name} result on ${on_string}"/>
+  </outputs>
+  	
+  <help> 
+
+.. class:: infomark
+
+**What it does**
+
+::
+
+ VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most  operating systems. It can be used to detect different types of variation:
+
+    Germline variants (SNPs an dindels) in individual samples or pools of samples.
+    Multi-sample variants (shared or private) in multi-sample datasets (with mpileup).
+    Somatic mutations, LOH events, and germline variants in tumor-normal pairs.
+    Somatic copy number alterations (CNAs) in tumor-normal exome data.
+
+
+**Input**
+
+::
+
+  mpileup normal file - The SAMtools mpileup file for normal
+  mpileup tumor file - The SAMtools mpileup file for tumor
+ 
+
+**Parameters**
+
+::
+
+  min-coverage	
+  	Minimum read depth at a position to make a call [8]
+
+  min-coverage-normal	
+  	Minimum coverage in normal to call somatic [8]
+  	
+  min-coverage-tumor	
+  	Minimum coverage in tumor to call somatic [6]
+  	
+  min-var-freq 
+  	Minimum variant frequency to call a heterozygote [0.10]  	  	
+
+  min-freq-for-hom
+  	Minimum frequency to call homozygote [0.75]
+  	
+  normal-purity 
+  	Estimated purity (non-tumor content) of normal sample [1.00]
+  	
+  tumor-purity
+  	Estimated purity (tumor content) of tumor sample [1.00]
+  
+  p-value
+  	Default p-value threshold for calling variants [0.99]
+  	
+  somatic-p-value
+  	P-value threshold to call a somatic site [0.05]  	
+  
+  strand-filter
+  	If set to 1, removes variants with >90% strand bias
+  	
+  validation 
+  	If set to 1, outputs all compared positions even if non-variant
+  
+  output-vcf
+  	If set to 1, outputs in VCF format [Default]
+
+
+  
+  </help>
+</tool>
+