|
Commit message:
Uploaded |
|
added:
scaffremodler/1_create_conf.py scaffremodler/1_create_conf.xml scaffremodler/2_map.py scaffremodler/2_map.xml scaffremodler/3_filter_single_pair.py scaffremodler/3_filter_single_pair.xml scaffremodler/4_filter_sam.py scaffremodler/4_filter_sam.xml scaffremodler/5_calc_stat.py scaffremodler/5_calc_stat.xml scaffremodler/6_parse_discord.py scaffremodler/6_parse_discord.xml scaffremodler/7_select_on_cov.py scaffremodler/7_select_on_cov.xml scaffremodler/8_ident_SV.py scaffremodler/8_ident_SV.xml scaffremodler/README.md scaffremodler/SplitOnX.py scaffremodler/SplitOnX.xml scaffremodler/bamgrepreads scaffremodler/conf4circos.py scaffremodler/conf4circos.xml scaffremodler/contig_scaff.py scaffremodler/contig_scaff.xml scaffremodler/contig_scaff_withN.py scaffremodler/convert2X.py scaffremodler/convert2X.xml scaffremodler/draw_circos.py scaffremodler/draw_circos.xml scaffremodler/estimate.py scaffremodler/fusion_scaff.py scaffremodler/fusion_scaff.xml scaffremodler/group4contig.py scaffremodler/group4contig.xml scaffremodler/include_scaffremodler.sh scaffremodler/loca_programs.conf scaffremodler/look4fusion.py scaffremodler/look4fusion.xml scaffremodler/merge_sam.py scaffremodler/reEstimateN.py scaffremodler/reEstimateN.xml scaffremodler/scaffremodler_wrapper.py scaffremodler/shmwriter.c scaffremodler/test-data/Ref_for_SV_detection.fasta scaffremodler/test-data/alignment.sam scaffremodler/test-data/bam_remove_redundancy.bam scaffremodler/test-data/chromosome_information.txt scaffremodler/test-data/circos_configuration.txt scaffremodler/test-data/config_file.txt scaffremodler/test-data/coverage.txt scaffremodler/test-data/discordant_proportion.txt scaffremodler/test-data/discordant_zone_FF.txt scaffremodler/test-data/discordant_zone_FR.txt scaffremodler/test-data/discordant_zone_RR.txt scaffremodler/test-data/discordant_zone_chr_FF.txt scaffremodler/test-data/discordant_zone_chr_FR.txt scaffremodler/test-data/discordant_zone_chr_RF.txt scaffremodler/test-data/discordant_zone_chr_RR.txt scaffremodler/test-data/discordant_zone_deletion.txt scaffremodler/test-data/discordant_zone_insertion.txt scaffremodler/test-data/list_type.txt scaffremodler/test-data/merge.txt scaffremodler/test-data/new_config_file.txt scaffremodler/test-data/out_chr_ff.bam scaffremodler/test-data/out_chr_fr.bam scaffremodler/test-data/out_chr_rf.bam scaffremodler/test-data/out_chr_rr.bam scaffremodler/test-data/out_deletion.bam scaffremodler/test-data/out_discarded.bam scaffremodler/test-data/out_ff.bam scaffremodler/test-data/out_fr.bam scaffremodler/test-data/out_insertion.bam scaffremodler/test-data/out_rf.bam scaffremodler/test-data/out_rr.bam scaffremodler/test-data/possible_junction_and_fusion.txt scaffremodler/test-data/quality_threshold.sam scaffremodler/test-data/reads_mate1_SV.fq scaffremodler/test-data/reads_mate1_rf.fq scaffremodler/test-data/reads_mate2_SV.fq scaffremodler/test-data/reads_mate2_rf.fq scaffremodler/test-data/scaffold_to_split.fasta scaffremodler/test-data/scaffold_to_split.txt scaffremodler/test-data/statistic.txt scaffremodler/test-data/sv_detected.txt scaffremodler/test-data/to_contig.txt scaffremodler/test-data/to_merge.txt scaffremodler/tool-data/scaffremodler.loc.sample scaffremodler/tool_data_table_conf.xml.sample scaffremodler/tool_dependencies.xml scaffremodler/verif_fusion.py scaffremodler/verif_fusion.xml |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/1_create_conf.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/1_create_conf.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,132 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, random\r\n+\r\n+from Bio.Seq import Seq\r\n+from Bio.Alphabet import generic_dna\r\n+from Bio import SeqIO\r\n+from Bio.SeqRecord import SeqRecord\r\n+\r\n+def cree_chrom(FILE, OUT):\r\n+\trecord_dict = SeqIO.index(FILE, "fasta")\r\n+\toutfile = open(OUT, \'wb\')\r\n+\tliste = []\r\n+\tfor n in record_dict:\r\n+\t\tliste.append(n)\r\n+\tliste.sort()\r\n+\tfor n in liste:\r\n+\t\toutfile.write(\'\\t\'.join([n, str(len(str(record_dict[n].seq)))])+\'\\n\')\r\n+\toutfile.close()\r\n+\r\n+def __main__():\r\n+\t#Parse Command Line\r\n+\tparser = optparse.OptionParser(usage="python %prog [options]\\n\\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\\n\\n"\r\n+\t"This script generate a configuration file that will be used in the ApMap pipeline")\r\n+\t# Wrapper options.\r\n+\tparser.add_option( \'\', \'--tool\', dest=\'tool\', default=\'bowtie2_single\', help=\'The tool used : bowtie, bowtie2, bowtie2_single, bwa, bwa_mem, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--ref\', dest=\'ref\', default=\'not_filled\', help=\'The multifasta reference file\')\r\n+\tparser.add_option( \'\', \'--q1\', dest=\'q1\', default=\'not_filled\', help=\'The mate1 fastq file\')\r\n+\tparser.add_option( \'\', \'--q2\', dest=\'q2\', default=\'not_filled\', help=\'The mate2 fastq file\')\r\n+\tparser.add_option( \'\', \'--orient\', dest=\'orient\', default=\'rf\', help=\'The expected orientation: rf or fr, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--mini\', dest=\'mini\', default=\'2500\', help=\'The minimum insert size (integer), [default: %default]\')\r\n+\tparser.add_option( \'\', \'--maxi\', dest=\'maxi\', default=\'7500\', help=\'The maximum insert size (integer), [default: %default]\')\r\n+\tparser.add_option( \'\', \'--qual\', dest=\'qual\', default=\'33\', help=\'Fastq quality encoding: 33 or 64, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--index\', dest=\'index\', default=\'y\', help=\'Build reference index : y or n, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--rmindex\', dest=\'rmindex\', default=\'y\', help=\'Remove reference index at the end of calculation: y or n, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--filter_multi\', dest=\'filter_multi\', default=\'y\', help=\'Filter reads with multiple locations : y or n, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--mini_dis\', dest=\'mini_dis\', default=\'10000\', help=\'The minimal insert size to keep the discordant read for structural variation search (integer), [default: %default]\')\r\n+\tparser.add_option( \'\', \'--mult_max_cov\', dest=\'mult_max_cov\', default=\'10\', help=\'multiplicator of median coverage for maximal median coverage to keep a zone (float), [default: %default]\')\r\n+\tparser.add_option( \'\', \'--mult_min_cov\', dest=\'mult_min_cov\', default=\'0.25\', help=\'multiplicator of median coverage for minimal median coverage to keep a zone (float), [default: %default]\')\r\n+\tparser.add_option( \'\', \'--min_zone\', dest=\'min_zone\', default=\'500\', help=\'Minimal number of covered sites in a zone to be considered (integer), [default: %default]\')\r\n+\tparser.add_option( \'\', \'--min_gap\', dest=\'min_gap\', default=\'300\', help=\'Maximal number of contiguous uncovered sites in a zone to be considered as a single '..b'l (integer), [default: %default]. For homozygous SV in diploid: expected value = 0.5, if heterozygous: expected value = 0.25\')\r\n+\tparser.add_option( \'\', \'--min_score\', dest=\'min_score\', default=70, help=\'The minimal score for a discordant zone to be identified as passed, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--ploid\', dest=\'ploid\', default=0.33, help=\'Multiplicator for coverage variation detection in SV identification (ex : If homozygous duplication expected in diploid: expected = coverage + coverage*1, if heterozygous duplication expected in diploid => expected = coverage + coverage*0.5). Choose a value lower than the expected one\')\r\n+\tparser.add_option( \'\', \'--restimate\', dest=\'restimate\', default=\'n\', help=\'Wether re-estimating --mini and --maxi parameters: y or n, [default: %default]. If y, these parameters are calculated as followed on well mapped paired read on the basis of previous min and max parameters: min/max = median -/+ (standard_deviation * "--msd" option)\')\r\n+\tparser.add_option( \'\', \'--output\', dest=\'output\', default=\'config.conf\', help=\'The output of the conf file, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--chr\', dest=\'chr\', default=\'chr.tab\', help=\'Output file containing chromosomes informations, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--rm_intermediate\', dest=\'rm_intermediate\', default=\'n\', help=\'remove intermediate bam/sam, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--exclude_chrom\', dest=\'exclude_chrom\', default=\'no_exclude\', help=\'Exclude chromosomes from analysis. "no_exclude" or chromosomes names separated by "=", [default: %default]\')\r\n+\t(options, args) = parser.parse_args()\r\n+\t\r\n+\t\r\n+\t\r\n+\tcree_chrom(options.ref, options.chr)\r\n+\t# print options.ref\r\n+\t# print options.chr\r\n+\t# print options.q1\r\n+\t# print options.q2\r\n+\t# print options.chr\r\n+\t# print options.output\r\n+\r\n+\tconfig = ConfigParser.RawConfigParser()\r\n+\tconfig.add_section(\'General\')\r\n+\tconfig.set(\'General\',\'ref\', options.ref)\r\n+\tconfig.set(\'General\',\'chr\', options.chr)\r\n+\tconfig.set(\'General\',\'mini\', options.mini)\r\n+\tconfig.set(\'General\',\'maxi\', options.maxi)\r\n+\tconfig.set(\'General\',\'thread\', options.thread)\r\n+\tconfig.set(\'General\',\'tool\', options.tool)\r\n+\tconfig.set(\'General\',\'q1\', options.q1)\r\n+\tconfig.set(\'General\',\'q2\', options.q2)\r\n+\tconfig.set(\'General\',\'qual\', options.qual)\r\n+\tconfig.set(\'General\',\'orient\', options.orient)\r\n+\tconfig.set(\'General\',\'index\', options.index)\r\n+\tconfig.set(\'General\',\'rmindex\', options.rmindex)\r\n+\tconfig.set(\'General\',\'sd_multiplicator\', options.msd)\r\n+\tconfig.set(\'General\',\'restimate\', options.restimate)\r\n+\tconfig.set(\'General\',\'mini_dis\', options.mini_dis)\r\n+\tconfig.set(\'General\',\'mult_max_cov\', options.mult_max_cov)\r\n+\tconfig.set(\'General\',\'mult_min_cov\', options.mult_min_cov)\r\n+\tconfig.set(\'General\',\'min_zone\', options.min_zone)\r\n+\tconfig.set(\'General\',\'min_gap\', options.min_gap)\r\n+\tconfig.set(\'General\',\'max_dist_merge\', options.max_dist_merge)\r\n+\tconfig.set(\'General\',\'YiS\', options.YiS)\r\n+\tconfig.set(\'General\',\'MiS\', options.MiS)\r\n+\tconfig.set(\'General\',\'YiC\', options.YiC)\r\n+\tconfig.set(\'General\',\'MiC\', options.MiC)\r\n+\tconfig.set(\'General\',\'min_score\', options.min_score)\r\n+\tconfig.set(\'General\',\'ploid\', options.ploid)\r\n+\tconfig.set(\'General\',\'fai_file\', options.ref+\'.fai\')\r\n+\tconfig.set(\'General\',\'exclude_chrom\', options.exclude_chrom)\r\n+\tconfig.add_section(\'Mapping\')\r\n+\tconfig.add_section(\'Single_filter\')\r\n+\tconfig.set(\'Single_filter\',\'rminput\', options.rm_intermediate)\r\n+\tconfig.set(\'Single_filter\',\'filter_multi\', options.filter_multi)\r\n+\tconfig.add_section(\'Remove_dup\')\r\n+\tconfig.set(\'Remove_dup\',\'rminput\', options.rm_intermediate)\r\n+\tconfig.add_section(\'Calc_coverage\')\r\n+\tconfig.add_section(\'Trie_discord\')\r\n+\tconfig.set(\'Trie_discord\',\'rminput\', options.rm_intermediate)\r\n+\tconfig.add_section(\'Score_discord\')\r\n+\tconfig.add_section(\'Ident_discord\')\r\n+\twith open(options.output, \'wb\') as configfile:\r\n+\t\tconfig.write(configfile)\r\n+\t\r\n+if __name__ == "__main__": __main__()\r\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/1_create_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/1_create_conf.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,149 @@ +<tool id="1_create_conf" name="1_create_conf" version="0.1"> + <description> : Create configuration file and chromosome tab file for Scaffremodler</description> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/1_create_conf.py + --tool $tool + #if str($reference_genome.source) == "history": + --ref $reference_genome.own_file + #else: + --ref $reference_genome.genome.fields.path + #end if + --q1 $q1 + --q2 $q2 + --orient $orient + --qual $qual + --mini $mini + --maxi $maxi + --index $index + --rmindex $rmindex + --mini_dis $mini_dis + --mult_max_cov $mult_max_cov + --mult_min_cov $mult_min_cov + --min_zone $min_zone + --min_gap $min_gap + --thread $thread + --msd $msd + --max_dist_merge $max_dist_merge + --YiS $YiS + --MiS $MiS + --YiC $YiC + --MiC $MiC + --min_score $min_score + --ploid $ploid + --restimate $restimate + --output $config_file + --chr $chromosome_information + + </command> + <inputs> + <param name="tool" type="select" label="Mapping tool"> + <option selected="true" value="bowtie2_single">bowtie2_single</option> + <option value="bowtie">bowtie</option> + <option value="bowtie2">bowtie2</option> + <option value="bwa">bwa</option> + <option value="bwa_mem">bwa_mem</option> + </param> + + <conditional name="reference_genome"> + <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below"> + <option value="indexed">Use a built-in genome index</option> + <option value="history">Use a genome from the history and build index</option> + </param> + <when value="indexed"> + <param name="genome" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the South Green team"> + + <options from_data_table="scaffremodler"> + <filter type="sort_by" column="1"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="history"> + <param name="own_file" type="data" format="fasta" label="Select reference genome" /> + </when> + </conditional> + <param name="q1" type="data" format="fastq,fastqsanger" label="Mate1 fastq file" /> + <param name="q2" type="data" format="fastq,fastqsanger" label="Mate2 fastq file" /> + <param name="orient" type="select" label="The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand"> + <option value="rf">RF</option> + <option value="fr">FR</option> + </param> + <param name="qual" type="select" label="Fastq file quality encoding 33 or 64"> + <option selected="true" value="33">33</option> + <option value="64">64</option> + </param> + <param name="mini" type="integer" value="2500" label="minimum good fragment length" /> + <param name="maxi" type="integer" value="7500" label="maximum good fragment length" /> + <param name="index" type="select" label="Create index if reference not indexed"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + <param name="rmindex" type="select" label="Remove index"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + <param name="mini_dis" type="integer" value="10000" label="Minimal distance between mate to keep discordant mate (should be higher than 2 fold expected insert)" /> + <param name="mult_max_cov" type="float" value="10" label="Multiplicator of median coverage for maximal median coverage to keep a zone" /> + <param name="mult_min_cov" type="float" value="0.25" label="Multiplicator of median coverage for maximal median coverage to keep a zone" /> + <param name="min_zone" type="integer" value="500" label="Minimal number of covered sites in a zone to be considered" /> + <param name="min_gap" type="integer" value="300" label="Maximal number of contiguous uncovered sites in a zone to be considered as a single zone" /> + <param name="thread" type="integer" value="1" label="Thread number used for mapping" /> + <param name="msd" type="float" value="3" label="Multiplicator of standard deviation for discordant zone identification" /> + <param name="max_dist_merge" type="float" value="1000" label="Maximal distance between two discordant zone to merge" /> + <param name="YiS" type="float" value="0" label="Y-intercept of the linear function for zone size that will give the first component of product giving the score" /> + <param name="MiS" type="float" value="0.5" label="Multiplicator of median insert size for calculating minimal zone size for which the first component of product giving the score will be maximal" /> + <param name="YiC" type="float" value="0" label="Y-intercept of the linear function for coverage that will give the second component of product giving the score" /> + <param name="MiC" type="float" value="0.25" label="Multiplicator of median coverage for calculating minimal zone coverage for which the second component of product giving the score will be maximal" /> + <param name="min_score" type="integer" value="70" label="minimal score for a discordant zone to be identified as passed (0 to 100)" /> + <param name="ploid" type="float" value="0.66" label="Multiplicator for coverage variation detection in SV identification (ex : If homozygous duplication expected in diploid: expected = coverage + coverage*1; if heterozygous duplication expected in diploid => expected = coverage + coverage*0.5). Choose a value lower than the expected one" /> + <param name="restimate" type="select" label="Re-estimate --mini and --maxi parameters"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + </inputs> + <outputs> + <data format="txt" name="config_file" label="${tool.name} : Configuration file" /> + <data format="data" name="chromosome_information" label="${tool.name} : Chromosome information" /> + </outputs> + <tests> + <test> + <param name="q1" value="reads_mate1_SV.fq"/> + <param name="q2" value="reads_mate2_SV.fq"/> + <param name="orient" value="rf"/> + <param name="index" value="y"/> + <param name="rmindex" value="y"/> + <param name="source" value="history" /> + <param name="own_file" value="Ref_for_SV_detection.fasta" /> + <output name="config_file" file="config_file.txt" compare="sim_size"/> + <output name="chromosome_information" file="chromosome_information.txt"/> + </test> + </tests> + <help> + +**Overview** + +This program generate a configuration file that will be used by all other programs that begin with a number. + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/2_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/2_map.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,223 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, random, datetime, glob, sys\r\n+import multiprocessing\r\n+\r\n+def stop_err( msg ):\r\n+\tsys.stderr.write( "%s\\n" % msg )\r\n+\tsys.exit()\r\n+\r\n+def run_job (cmd_line, ERROR):\r\n+\tprint cmd_line\r\n+\ttry:\r\n+\t\ttmp = tempfile.NamedTemporaryFile().name\r\n+\t\t# print tmp\r\n+\t\terror = open(tmp, \'w\')\r\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\r\n+\t\treturncode = proc.wait()\r\n+\t\terror.close()\r\n+\t\terror = open( tmp, \'rb\' )\r\n+\t\tstderr = \'\'\r\n+\t\tbuffsize = 1048576\r\n+\t\ttry:\r\n+\t\t\twhile True:\r\n+\t\t\t\tstderr += error.read( buffsize )\r\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\r\n+\t\t\t\t\tbreak\r\n+\t\texcept OverflowError:\r\n+\t\t\tpass\r\n+\t\terror.close()\r\n+\t\tos.remove(tmp)\r\n+\t\tif returncode != 0:\r\n+\t\t\traise Exception, stderr\r\n+\texcept Exception, e:\r\n+\t\tstop_err( ERROR + str( e ) )\r\n+\r\n+\r\n+def worker(job):\r\n+\ttry:\r\n+\t\tprint "---"\r\n+\t\tprint job[0]\r\n+\t\tprint job[1]\r\n+\t\tprint "---"\r\n+\t\tsys.stdout.flush()\r\n+\t\trun_job(job[0], job[1])\r\n+\texcept Exception, e:\r\n+\t\tprint "error : "+e.__doc__+" (\'"+e.message+")\' in \'"+job[0]+"\'"\r\n+\r\n+\r\n+\r\n+def Mapping(LOCA_PROGRAMS, TOOL, REF, Q1, Q2, ORIENT, MIN, MAX, QUAL, INDEX, RMINDEX, THREAD, OUT, PATHNAME):\r\n+\t\tinterm1 = OUT+\'_mate1.sam\'\r\n+\t\tinterm2 = OUT+\'_mate2.sam\'\r\n+\t\tinterm_sort1 = OUT+\'_mate1_sorted.sam\'\r\n+\t\tinterm_sort2 = OUT+\'_mate2_sorted.sam\'\r\n+\t\tt0 = datetime.datetime.now()\r\n+\t\tprint t0\r\n+\t\tif TOOL == \'bowtie\':\r\n+\t\t\tif INDEX == \'y\':\r\n+\t\t\t\tbuild_index = \'%s -q %s %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bowtie-build\'), REF, REF)\r\n+\t\t\t\trun_job(build_index,\'Indexing error:\\n\')\r\n+\t\t\tif QUAL == \'33\':\r\n+\t\t\t\tmapping1 = \'%s --quiet -a -m 1 %s --phred33-quals -q %s -S %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bowtie\'), REF, Q1, interm1)\r\n+\t\t\t\tmapping2 = \'%s --quiet -a -m 1 %s --phred33-quals -q %s -S %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bowtie\'), REF, Q2, interm2)\r\n+\t\t\t\t# mapping1 = \'%s --quiet %s --phred33-quals -q %s -S %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bowtie\'), REF, Q1, interm1)\r\n+\t\t\t\t# mapping2 = \'%s --quiet %s --phred33-quals -q %s -S %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bowtie\'), REF, Q2, interm2)\r\n+\t\t\telif QUAL == \'64\':\r\n+\t\t\t\tmapping1 = \'%s --quiet -a -m 1 %s --solexa1.3-quals -q %s -S %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bowtie\'), REF, Q1, interm1)\r\n+\t\t\t\tmapping2 = \'%s --quiet -a -m 1 %s --solexa1.3-quals -q %s -S %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bowtie\'), REF, Q2, interm2)\r\n+\t\t\t\t# mapping1 = \'%s --quiet %s --solexa1.3-quals -q %s -S %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bowtie\'), REF, Q1, interm1)\r\n+\t\t\t\t# mapping2 = \'%s --quiet %s --solexa1.3-quals -q %s -S %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bowtie\'), REF, Q2, interm2)\r\n+\t\t\telse:\r\n+\t\t\t\tsys.exit(\'Unknown quality encoding : support only +33 or +64 encoding\')\r\n+\t\t\tsorting1 = \'%s -jar %s SortSam INPUT=%s OUTPUT=%s SORT_ORDER=queryname QUIET=true MAX_RECORDS_IN_RAM=5000000 VERBOSITY=WARNING VALIDATION_STRINGENCY=SILENT\' % (LOCA_PROGRAMS.get(\'Programs\',\'java\'), LOCA_PROGRAMS.get(\'Programs\',\'picard-tool\'), interm1, interm_sort1)\r\n+\t\t\tsorting2 = \'%s -jar %s SortSam INPUT=%s OUTPUT=%s SORT_ORDER=queryname QUIET=true MAX_'..b'AD, REF, Q2, bwasai2)\r\n+\t\t\tmapping2_1 = \'%s sampe %s %s %s %s %s > %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'bwa\'), REF, bwasai1, bwasai2, Q1, Q2, OUT)\r\n+\t\t\trun_job(mapping1, \'Mapping error:\\n\')\r\n+\t\t\trun_job(mapping2, \'Mapping error:\\n\')\r\n+\t\t\trun_job(mapping2_1, \'Mapping2 error:\\n\')\r\n+\t\t\tos.remove(bwasai1)\r\n+\t\t\tos.remove(bwasai2)\r\n+\t\tif RMINDEX == \'y\':\r\n+\t\t\tfor filename in glob.glob(REF+\'.*\'):\r\n+\t\t\t\t# print filename\r\n+\t\t\t\tos.remove(filename)\r\n+\t\tprint datetime.datetime.now() - t0\r\n+\r\n+def __main__():\r\n+\t#Parse Command Line\r\n+\tparser = optparse.OptionParser(usage="python %prog [options]\\n\\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr"\r\n+\t"\\n\\n This script map paired reads on a reference and output a sam file containing the paired reads mapped.")\r\n+\t# Wrapper options.\r\n+\tparser.add_option( \'\', \'--tool\', dest=\'tool\', default=\'bowtie2_single\', help=\'The tool used : bowtie, bowtie2, bowtie2_single, bwa, bwa_mem, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--ref\', dest=\'ref\', default=\'not_filled\', help=\'The multifasta reference file\')\r\n+\tparser.add_option( \'\', \'--q1\', dest=\'q1\', default=\'not_filled\', help=\'The mate1 fastq file\')\r\n+\tparser.add_option( \'\', \'--q2\', dest=\'q2\', default=\'not_filled\', help=\'The mate2 fastq file\')\r\n+\tparser.add_option( \'\', \'--orient\', dest=\'orient\', default=\'rf\', help=\'The expected orientation: rf or fr, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--mini\', dest=\'mini\', default=\'2500\', help=\'The minimum insert size (integer), [default: %default]\')\r\n+\tparser.add_option( \'\', \'--maxi\', dest=\'maxi\', default=\'7500\', help=\'The maximum insert size (integer), [default: %default]\')\r\n+\tparser.add_option( \'\', \'--qual\', dest=\'qual\', default=\'33\', help=\'Fastq quality encoding: 33 or 64, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--index\', dest=\'index\', default=\'y\', help=\'Build reference index : y or n, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--rmindex\', dest=\'rmindex\', default=\'y\', help=\'Remove reference index at the end of calculation: y or n, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--thread\', dest=\'thread\', default=\'1\', help=\'The thread number used for mapping (integer), [default: %default]\')\r\n+\tparser.add_option( \'\', \'--out\', dest=\'out\', default=\'mate.sam\', help=\'The ouput of mapped reads, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--config\', dest=\'config\', default=None)\r\n+\t(options, args) = parser.parse_args()\r\n+\r\n+\r\n+\r\n+\tpathname = os.path.dirname(sys.argv[0])\r\n+\r\n+\tloca_programs = ConfigParser.RawConfigParser()\r\n+\tloca_programs.read(pathname+\'/loca_programs.conf\')\r\n+\r\n+\tif options.config:\r\n+\t\tconfig = ConfigParser.RawConfigParser()\r\n+\t\tconfig.read(options.config)\r\n+\t\tMapping(loca_programs, config.get(\'General\',\'tool\'), config.get(\'General\',\'ref\'), config.get(\'General\',\'q1\'), config.get(\'General\',\'q2\'), config.get(\'General\',\'orient\'), config.get(\'General\',\'mini\'), config.get(\'General\',\'maxi\'), config.get(\'General\',\'qual\'), config.get(\'General\',\'index\'), config.get(\'General\',\'rmindex\'), config.get(\'General\',\'thread\'), options.out, pathname)\r\n+\t\tconfig.set(\'Mapping\', \'out\', options.out)\r\n+\t\tif config.get(\'General\',\'tool\') in [\'bowtie\', \'bowtie2\', \'bowtie2_single\']:\r\n+\t\t\tconfig.set(\'Single_filter\', \'asxs\', 1)\r\n+\t\t\tconfig.set(\'Single_filter\', \'qual\', \'not_filled\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Single_filter\', \'asxs\', \'not_filled\')\r\n+\t\t\tconfig.set(\'Single_filter\', \'qual\', 0)\r\n+\t\twith open(options.config, \'wb\') as configfile:\r\n+\t\t\tconfig.write(configfile)\r\n+\telse:#For commande line\r\n+\t\tif options.q1 == \'not_filled\':\r\n+\t\t\tsys.exit(\'--q1 argument is missing\')\r\n+\t\tif options.q2 == \'not_filled\':\r\n+\t\t\tsys.exit(\'--q2 argument is missing\')\r\n+\t\tif options.ref == \'not_filled\':\r\n+\t\t\tsys.exit(\'--ref argument is missing\')\r\n+\t\tMapping(loca_programs, options.tool, options.ref, options.q1, options.q2, options.orient, options.mini, options.maxi, options.qual, options.index, options.rmindex, options.thread, options.out, pathname)\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+if __name__ == "__main__": __main__()\r\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/2_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/2_map.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,52 @@ +<tool id="2_map" name="2_map" version="0.1"> + <description> : Map read onto a reference fasta file</description> + <requirements> + <requirement type="package" version="0.7.7">bwa</requirement> + <requirement type="package" version="2.2.6">bowtie2</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/2_map.py --config $config --out $alignment + </command> + <inputs> + <param name="config" type="data" format="text" label="Configuration file"/> + </inputs> + <outputs> + <data format="sam" name="alignment" label="${tool.name} : Mapping (SAM file)" /> + </outputs> + <tests> + <test> + <param name="config" value="config_file.txt" /> + <output name="alignment" file="alignment.sam" compare="sim_size"/> + </test> + </tests> + <help> + +**Overview** + +This program aligns paired reads along reference sequences. + +Read pairs should be in fastq format with either phred33 or phred64 quality encoding. + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/3_filter_single_pair.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/3_filter_single_pair.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,184 @@ + +# +# Copyright 2014 CIRAD +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time + + +def find_info(LINE): + dic = {} + for n in LINE[11:]: + liste = n.split(':') + dic[liste[0]] = liste[2] + return dic + + +def Filtre(SAM, ASXS, QUAL, OUT): + outfile = open(OUT, 'w') + + nb_input = 0 + nb_kept = 0 + + file = open(SAM) + if QUAL == 'not_filled': + min_dif = int(ASXS) + l1 = file.readline().split() + while l1[0][0] == '@': + outfile.write('\t'.join(l1)+'\n') + l1 = file.readline().split() + l2 = file.readline().split() + while l1: + nb_input += 1 + if l1[0] != l2[0]: + sys.exit('Read should be sorted by query name in the sam file') + dico1 = find_info(l1) + dico2 = find_info(l2) + if 'XS' in dico1: + if 'XS' in dico2: + if abs(int(dico1['AS'])-int(dico1['XS'])) >= min_dif and abs(int(dico2['AS'])-int(dico2['XS'])) >= min_dif: + outfile.write('\t'.join(l1)+'\n') + outfile.write('\t'.join(l2)+'\n') + nb_kept += 1 + else: + if abs(int(dico1['AS'])-int(dico1['XS'])) >= min_dif: + outfile.write('\t'.join(l1)+'\n') + outfile.write('\t'.join(l2)+'\n') + nb_kept += 1 + elif 'XS' in dico2: + if abs(int(dico2['AS'])-int(dico2['XS'])) >= min_dif: + outfile.write('\t'.join(l1)+'\n') + outfile.write('\t'.join(l2)+'\n') + nb_kept += 1 + else: + outfile.write('\t'.join(l1)+'\n') + outfile.write('\t'.join(l2)+'\n') + nb_kept += 1 + l1 = file.readline().split() + l2 = file.readline().split() + + elif ASXS == 'not_filled': + min_qual = int(QUAL) + l1 = file.readline().split() + while l1[0][0] == '@': + outfile.write('\t'.join(l1)+'\n') + l1 = file.readline().split() + l2 = file.readline().split() + while l1: + nb_input += 1 + if l1[0] != l2[0]: + sys.exit('Read should be sorted by query name in the sam file') + if int(l1[4]) >= min_qual and int(l2[4]) >= min_qual: + outfile.write('\t'.join(l1)+'\n') + outfile.write('\t'.join(l2)+'\n') + nb_kept += 1 + l1 = file.readline().split() + l2 = file.readline().split() + else: + min_dif = int(ASXS) + min_qual = int(QUAL) + l1 = file.readline().split() + while l1[0][0] == '@': + outfile.write('\t'.join(l1)+'\n') + l1 = file.readline().split() + l2 = file.readline().split() + while l1: + nb_input += 1 + if l1[0] != l2[0]: + sys.exit('Read should be sorted by query name in the sam file') + dico1 = find_info(l1) + dico2 = find_info(l2) + if 'XS' in dico1: + if 'XS' in dico2: + if abs(int(dico1['AS'])-int(dico1['XS'])) >= min_dif and abs(int(dico2['AS'])-int(dico2['XS'])) >= min_dif: + if int(l1[4]) >= min_qual and int(l2[4]) >= min_qual: + outfile.write('\t'.join(l1)+'\n') + outfile.write('\t'.join(l2)+'\n') + nb_kept += 1 + else: + if abs(int(dico1['AS'])-int(dico1['XS'])) >= min_dif: + if int(l1[4]) >= min_qual and int(l2[4]) >= min_qual: + outfile.write('\t'.join(l1)+'\n') + outfile.write('\t'.join(l2)+'\n') + nb_kept += 1 + elif 'XS' in dico2: + if abs(int(dico2['AS'])-int(dico2['XS'])) >= min_dif: + if int(l1[4]) >= min_qual and int(l2[4]) >= min_qual: + outfile.write('\t'.join(l1)+'\n') + outfile.write('\t'.join(l2)+'\n') + nb_kept += 1 + else: + if int(l1[4]) >= min_qual and int(l2[4]) >= min_qual: + outfile.write('\t'.join(l1)+'\n') + outfile.write('\t'.join(l2)+'\n') + nb_kept += 1 + l1 = file.readline().split() + l2 = file.readline().split() + + print('Mapped pairs: %s' % nb_input) + print('Mapped pairs kept: %s' % nb_kept) + print('Mapped pairs proportion kept: %s' % str(float(nb_kept)/float(nb_input))) + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser(usage="python %prog [options]\n\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr" + "\n\n This script takes a sam file and output only paired reads were both mates pass filter threshold on the AS/XS flags or mapping quality or both provided.") + # Wrapper options. + parser.add_option( '', '--sam', dest='sam', default='not_filled', help='Paired sam file') + parser.add_option( '', '--asxs', dest='asxs', default='not_filled', help='Minimal difference between the best and second hit accepted to consider the hit as single') + parser.add_option( '', '--qual', dest='qual', default='not_filled', help='Minimal mapping quality to keep the hit') + parser.add_option( '', '--rminput', dest='rminput', default='n', help='Remove input file: y or n, [default: %default]') + parser.add_option( '', '--out', dest='out', default='Single_hit_mapped.sam', help='Output file') + parser.add_option( '', '--config', dest='config', default=None) + (options, args) = parser.parse_args() + + + + if options.config: + config = ConfigParser.RawConfigParser() + config.read(options.config) + if config.get('Single_filter','filter_multi') == 'y': + Filtre(config.get('Mapping','out'), config.get('Single_filter','asxs'), config.get('Single_filter','qual'), options.out) + else: + print 'The input sam is the sam as the output sam in 3_filter_single_pair' + os.system('cp % %' % (config.get('Mapping','out'), options.out)) + if config.get('Single_filter','rminput') == 'y': + os.remove(config.get('Mapping','out')) + config.set('Single_filter', 'out', options.out) + config.set('Single_filter', 'type', 'sam') + config.set('Remove_dup', 'sort', 'coordinate') + with open(options.config, 'wb') as configfile: + config.write(configfile) + else: + if options.sam == 'not_filled': + sys.exit('--sam argument is missing') + if options.qual == 'not_filled' and options.asxs == 'not_filled': + print 'No --asxs or --qual argument are passed the imput sam is the sam as the output sam in 3_filter_single_pair' + os.system('cp % %' % (options.sam, options.out)) + else: + Filtre(options.sam, options.asxs, options.qual, options.out) + if options.rminput == 'y': + os.remove(options.sam) + + +if __name__ == "__main__": __main__() + + + |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/3_filter_single_pair.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/3_filter_single_pair.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,67 @@ +<tool id="3_filter_single_pair" name="3_filter_single_pair" version="0.1"> + <description> : Filter paired read mapping uniquely that pass mapping quality threshold</description> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/3_filter_single_pair.py + --sam $sam_file + --config $config_file + --asxs $asxs + --qual $qual + --out $quality_threshold + </command> + <inputs> + <param name="sam_file" type="data" format="sam" label="SAM file (--sam)" /> + <param name="config_file" type="data" format="text" label="Configuration file"/> + <param name="asxs" type="integer" value="0" label="Minimal difference between the best and second hit accepted to consider the hit as single (--asxs)" /> + <param name="qual" type="integer" value="0" label="Minimal mapping quality to keep the hit (--qual)" /> + </inputs> + <outputs> + <data format="sam" name="quality_threshold" label="${tool.name} : Quality threshold (SAM file)" /> + </outputs> + <tests> + <test> + <param name="sam_file" value="alignment.sam" /> + <param name="config_file" value="config_file.txt" /> + <param name="asxs" value="0" /> + <param name="qual" value="0" /> + <output name="quality_threshold" file="quality_threshold.sam" compare="sim_size"/> + </test> + </tests> + <help> + +**Overview** + +This program filter paired reads in a sam file sorted by query name. The filtering can be either done on the mapping quality and/or based on a threshold between AS/XS flags. + +This program output a filtered sam file sorted by query name. Unmapped and single end mapped pairs are not removed during this step. + +**Filtering option** + +* --asxs : an integer corresponding to the minimal difference between the AS/XS flag value to keep a pair. A pair is kept if both mate pass threshold. + +* --qual : an integer corresponding to the minimal mapping quality. If both mate of a pair have a mapping quality superior or equal to the asxs parameter, the pair is kept. + +* --rminput : This options decide if input should be deleted after treatment (y : remove input / n : don’t remove input (default : n) + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/4_filter_sam.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/4_filter_sam.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,128 @@ + +# +# Copyright 2014 CIRAD +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def run_job (cmd_line, ERROR): + print cmd_line + try: + tmp = tempfile.NamedTemporaryFile().name + # print tmp + error = open(tmp, 'w') + proc = subprocess.Popen( args=cmd_line, shell=True, stderr=error) + returncode = proc.wait() + error.close() + error = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += error.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + error.close() + os.remove(tmp) + if returncode != 0: + raise Exception, stderr + except Exception, e: + stop_err( ERROR + str( e ) ) + +def Filter(LOCA_PROGRAMS, SAM, TYPE, SORT, OUT): + bamfile = OUT+'_filtered.bam' + sortedbam = OUT+'_sorted.bam' + rmdupbam = OUT+'_rmdup.bam' + rmdupmetrics = OUT+'_rmdupmetrics.bam' + if TYPE == 'sam': + filter = '%s view -bS %s | %s view -uF 4 - | %s view -uF 8 - > %s' % (LOCA_PROGRAMS.get('Programs','samtools'), SAM, LOCA_PROGRAMS.get('Programs','samtools'), LOCA_PROGRAMS.get('Programs','samtools'), bamfile) + elif TYPE == 'bam': + filter = '%s view -uF 4 %s | %s view -uF 8 - > %s' % (LOCA_PROGRAMS.get('Programs','samtools'), SAM, LOCA_PROGRAMS.get('Programs','samtools'), bamfile) + else: + mot = SAM+' argument passed in --sam is not recognized' + sys.exit(mot) + sorting1 = '%s -jar %s SortSam INPUT=%s OUTPUT=%s SORT_ORDER=coordinate QUIET=true MAX_RECORDS_IN_RAM=5000000 VERBOSITY=WARNING VALIDATION_STRINGENCY=SILENT' % (LOCA_PROGRAMS.get('Programs','java'), LOCA_PROGRAMS.get('Programs','picard-tool'), bamfile, sortedbam) + rmdup = '%s -jar %s MarkDuplicates INPUT=%s OUTPUT=%s METRICS_FILE=%s REMOVE_DUPLICATES=true QUIET=true MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 VERBOSITY=WARNING VALIDATION_STRINGENCY=SILENT' % (LOCA_PROGRAMS.get('Programs','java'), LOCA_PROGRAMS.get('Programs','picard-tool'), sortedbam, rmdupbam, rmdupmetrics) + sorting2 = '%s -jar %s SortSam INPUT=%s OUTPUT=%s SORT_ORDER=%s QUIET=true MAX_RECORDS_IN_RAM=5000000 VERBOSITY=WARNING VALIDATION_STRINGENCY=SILENT' % (LOCA_PROGRAMS.get('Programs','java'), LOCA_PROGRAMS.get('Programs','picard-tool'), rmdupbam, OUT, SORT) + + run_job(filter, 'Error in first filter:') + run_job(sorting1, 'Error in sorting1:') + os.remove(bamfile) + run_job(rmdup, 'Error in removing duplicates:') + os.remove(sortedbam) + run_job(sorting2, 'Error in sorting2:') + os.remove(rmdupbam) + os.system("sed -n 8p %s | cut -f 3 | sed 's/^/Read pairs examined : /'" % rmdupmetrics) + os.system("sed -n 8p %s | cut -f 6 | sed 's/^/Read pairs duplicates : /'" % rmdupmetrics) + os.system("sed -n 8p %s | cut -f 7 | sed 's/^/Read pairs optical duplicates : /'" % rmdupmetrics) + os.system("sed -n 8p %s | cut -f 8 | sed 's/^/Duplication proportion : /'" % rmdupmetrics) + os.remove(rmdupmetrics) + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser(usage="python %prog [options]\n\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr" + "\n\n This script remove unmapped paired reads and read duplicates") + # Wrapper options. + parser.add_option( '', '--sam', dest='sam', default='not_filled', help='Paired sam/bam file') + parser.add_option( '', '--type', dest='type', default='sam', help='Input type : sam or bam, [default: %default]') + parser.add_option( '', '--sort', dest='sort', default='coordinate', help='Sort order queryname or coordinate, [default: %default]') + parser.add_option( '', '--rminput', dest='rminput', default='n', help='Remove input file: y or n, [default: %default]') + parser.add_option( '', '--out', dest='out', default='rmdup_mapped.bam', help='Output file') + parser.add_option( '', '--config', dest='config', default=None) + (options, args) = parser.parse_args() + + + + pathname = os.path.dirname(sys.argv[0]) + + loca_programs = ConfigParser.RawConfigParser() + loca_programs.read(pathname+'/loca_programs.conf') + + if options.config: + config = ConfigParser.RawConfigParser() + config.read(options.config) + if options.sam == 'not_filled': + Filter(loca_programs, config.get('Single_filter','out'), config.get('Single_filter','type'), config.get('Remove_dup','sort'), options.out) + else: + Filter(loca_programs, options.sam, config.get('Single_filter','type'), config.get('Remove_dup','sort'), options.out) + if config.get('Remove_dup','rminput') == 'y': + os.remove(config.get('Single_filter','out')) + config.set('Remove_dup', 'out', options.out) + config.set('Remove_dup', 'type', 'bam') + with open(options.config, 'wb') as configfile: + config.write(configfile) + else: + if options.sam == 'not_filled': + sys.exit('--sam argument is missing, please provide a bam or a sam') + Filter(loca_programs, options.sam, options.type, options.sort, options.out) + if options.rminput == 'y': + os.remove(options.sam) + + +if __name__ == "__main__": __main__() + + + |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/4_filter_sam.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/4_filter_sam.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,54 @@ +<tool id="4_filter_sam" name="4_filter_sam" version="0.1"> + <description> : Remove redundancy in mapped reads</description> + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/4_filter_sam.py + --sam $sam_file + --config $config_file + --out $bam_remove_redundancy + </command> + <inputs> + <param name="sam_file" type="data" format="fastq" label="Filtered SAM file" /> + <param name="config_file" type="data" format="txt" label="Configuration file"/> + </inputs> + <outputs> + <data format="bam" name="bam_remove_redundancy" label="${tool.name} : Remove redundancy (BAM file)" /> + </outputs> + <tests> + <test> + <param name="sam_file" value="quality_threshold.sam" /> + <param name="config_file" value="config_file.txt" /> + <output name="bam_remove_redundancy" file="bam_remove_redundancy.bam" compare="sim_size"/> + </test> + </tests> + <help> + +**Overview** + +This program removes unmapped and single mapped paired reads in a sam/bam file. + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/5_calc_stat.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/5_calc_stat.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,237 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, math\r\n+\r\n+def stop_err( msg ):\r\n+ sys.stderr.write( "%s\\n" % msg )\r\n+ sys.exit()\r\n+\r\n+def run_job (cmd_line, ERROR):\r\n+\tprint cmd_line\r\n+\ttry:\r\n+\t\ttmp = tempfile.NamedTemporaryFile().name\r\n+\t\t# print tmp\r\n+\t\terror = open(tmp, \'w\')\r\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\r\n+\t\treturncode = proc.wait()\r\n+\t\terror.close()\r\n+\t\terror = open( tmp, \'rb\' )\r\n+\t\tstderr = \'\'\r\n+\t\tbuffsize = 1048576\r\n+\t\ttry:\r\n+\t\t\twhile True:\r\n+\t\t\t\tstderr += error.read( buffsize )\r\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\r\n+\t\t\t\t\tbreak\r\n+\t\texcept OverflowError:\r\n+\t\t\tpass\r\n+\t\terror.close()\r\n+\t\tos.remove(tmp)\r\n+\t\tif returncode != 0:\r\n+\t\t\traise Exception, stderr\r\n+\texcept Exception, e:\r\n+\t\tstop_err( ERROR + str( e ) )\r\n+\r\n+def recal_ins(LOCA_PROGRAMS, FILE, TYPE):\r\n+\t# os.system(\'echo "Calculating insert size on the firts approximation of the well mapped read"\')\r\n+\ttmp = tempfile.NamedTemporaryFile().name\r\n+\t# print tmp\r\n+\tif TYPE == \'bam\':\r\n+\t\testim_insert = \'%s view -uf 2 %s | %s view -h -o %s - \' % (LOCA_PROGRAMS.get(\'Programs\',\'samtools\'), FILE, LOCA_PROGRAMS.get(\'Programs\',\'samtools\'), tmp)\r\n+\telif TYPE == \'sam\':\r\n+\t\testim_insert = \'%s view -bS %s | %s view -uf 2 - | %s view -h -o %s - \' % (LOCA_PROGRAMS.get(\'Programs\',\'samtools\'), FILE, LOCA_PROGRAMS.get(\'Programs\',\'samtools\'), LOCA_PROGRAMS.get(\'Programs\',\'samtools\'), tmp)\r\n+\telse:\r\n+\t\tmot = TYPE+\' argument passed in --sam is not recognized\'\r\n+\t\tsys.exit(mot)\r\n+\t\r\n+\trun_job(estim_insert, \'Error in bam/sam filtering:\\n\')\r\n+\t\r\n+\tLIST = []\r\n+\tfichier = open(tmp)\r\n+\ti = 0\r\n+\tfor line in fichier:\r\n+\t\tdata = line.split()\r\n+\t\tif data != []:\r\n+\t\t\tif line[0] != \'@\':\r\n+\t\t\t\tif i == 0:\r\n+\t\t\t\t\ti = 1\r\n+\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\telse:\r\n+\t\t\t\t\ti = 0\r\n+\tMED = mediane(LIST)\r\n+\tMOY = moyenne(LIST)\r\n+\tEC = ecart_type(LIST)\r\n+\tLIST = []\r\n+\tos.remove(tmp)\r\n+\treturn [MED, MOY, EC]\r\n+\r\n+#Fonction that calculate la median, mean and interval containing (INT*100)% of values, COL: column to treate (0 based)\r\n+def stat(FILE, COL, INT, VERBOSE, STAT_FILE):\r\n+\tFICH = open(FILE)\r\n+\tDIC = {}\r\n+\tDIC_final = {}\r\n+\tfor LINE in FICH:\r\n+\t\tDATA = LINE.split()\r\n+\t\tif DATA != []:\r\n+\t\t\tif DATA[0] in DIC:\r\n+\t\t\t\tDIC[DATA[0]].append(float(DATA[COL]))\r\n+\t\t\telse:\r\n+\t\t\t\tDIC[DATA[0]] = []\r\n+\t\t\t\tDIC[DATA[0]].append(float(DATA[COL]))\r\n+\tfor n in DIC:\r\n+\t\tDIC_final[n] = [moyenne(DIC[n]), mediane(DIC[n]), intervalle(DIC[n], INT), len(DIC[n])]\r\n+\tsomme_moy = 0\r\n+\tsomme_med = 0\r\n+\ttaille = 0\r\n+\toutfile = open(STAT_FILE, \'w\')\r\n+\tfor n in DIC_final:\r\n+\t\tif VERBOSE == \'all\':\r\n+\t\t\toutfile.write("Chromosome : "+n+" :\\n")\r\n+\t\t\toutfile.write("\\tMean coverage : "+str(DIC_final[n][0])+"\\n")\r\n+\t\t\toutfile.write("\\tMedian coverage : "+str(DIC_final[n][1])+"\\n")\r\n+\t\t\toutfile.write("\\tConfidence interval (90%) : ["+str(DIC_final[n][2][0])+","+str(DIC_final[n][2][1])+"]\\n")\r\n+\t\tsomme_moy = somme_moy + DIC_final[n][0]*DIC_final[n][3]\r\n+\t\tsomme_med = somme_med + DIC_final[n][1]*DIC_final[n][3]\r\n+\t\ttaille = taille + DIC_final[n][3]\r\n+\tif VERBOSE == \'all\' or V'..b'e = math.sqrt(VAR)\r\n+\treturn ecart_type\r\n+\r\n+def calcul_stat(LOCA_PROGRAMS, SAM, TYPE, OUT, STAT_FILE):\r\n+\tif TYPE == \'bam\':\r\n+\t\tcal_cov = \'%s depth %s > %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'samtools\'), SAM, OUT)\r\n+\telif TYPE == \'sam\':\r\n+\t\tsam2bam = \'%s view -bS %s > %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'samtools\'), SAM, OUT+\'.bam\')\r\n+\t\trun_job(sam2bam, \'Error in sam2bam conversion:\\n\')\r\n+\t\tcal_cov = \'%s depth %s > %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'samtools\'), OUT+\'.bam\', OUT)\r\n+\telse:\r\n+\t\tmot = TYPE+\' argument passed in --type is not recognized\'\r\n+\t\tsys.exit(mot)\r\n+\trun_job(cal_cov, \'Error in calculating coverage:\\n\')\r\n+\tif TYPE == \'sam\':\r\n+\t\tos.remove(OUT+\'.bam\')\r\n+\t\r\n+\tliste = stat(OUT, 2, 0.9, \'all\', STAT_FILE)\r\n+\t\r\n+\tINFO_INSERT = recal_ins(LOCA_PROGRAMS, SAM, TYPE)\r\n+\tinsert = float(INFO_INSERT[0])\r\n+\tstandev = str(INFO_INSERT[2])\r\n+\toutfile = open(STAT_FILE, \'a\')\r\n+\toutfile.write("Insert size as been re-estimated to :"+str(insert)+"\\n")\r\n+\toutfile.write("Standard deviation of insert size as been re-estimated to :"+standev+"\\n")\r\n+\treturn [INFO_INSERT[0], INFO_INSERT[1], INFO_INSERT[2], liste[0], liste[1]]\r\n+\r\n+\t\r\n+def __main__():\r\n+\t#Parse Command Line\r\n+\tparser = optparse.OptionParser(usage="python %prog [options]\\n\\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\\n\\nThis program take in input a sam/bam file,"\r\n+\t" calculate coverage for each covered sites of the reference sequences, estimate mean, median and 90% confidence interval coverage for the covered sites and estimate mean and "\r\n+\t"standard deviation of library insert size.")\r\n+\t\r\n+\tparser.add_option( \'\', \'--sam\', dest=\'sam\', default=\'not_filled\', help=\'Paired sam/bam file\')\r\n+\tparser.add_option( \'\', \'--type\', dest=\'type\', default=\'sam\', help=\'Input type : sam or bam, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--out\', dest=\'out\', default=\'coverage.cov\', help=\'Output file\')\r\n+\tparser.add_option( \'\', \'--stat\', dest=\'stat\', default=\'stat.txt\', help=\'Output statistic file\')\r\n+\tparser.add_option( \'\', \'--outconf\', dest=\'outconf\', default=\'stat.conf\', help=\'Output configuration file with statistics\')\r\n+\tparser.add_option( \'\', \'--config\', dest=\'config\', default=None)\r\n+\t(options, args) = parser.parse_args()\r\n+\t\r\n+\t\r\n+\t\r\n+\tpathname = os.path.dirname(sys.argv[0])\r\n+\t\r\n+\tloca_programs = ConfigParser.RawConfigParser()\r\n+\tloca_programs.read(pathname+\'/loca_programs.conf\')\r\n+\t\r\n+\tif options.config:\r\n+\t\tconfig = ConfigParser.RawConfigParser()\r\n+\t\tconfig.read(options.config)\r\n+\t\tif options.sam == \'not_filled\':\r\n+\t\t\tSTAT = calcul_stat(loca_programs, config.get(\'Remove_dup\',\'out\'), config.get(\'Remove_dup\',\'type\'), options.out, options.stat)\r\n+\t\telse:\r\n+\t\t\tSTAT = calcul_stat(loca_programs, options.sam, config.get(\'Remove_dup\',\'type\'), options.out, options.stat)\r\n+\t\tmini = float(STAT[0] - (config.getfloat(\'General\',\'sd_multiplicator\')*STAT[2]))\r\n+\t\tmaxi = float(STAT[0] + (config.getfloat(\'General\',\'sd_multiplicator\')*STAT[2]))\r\n+\t\tconfig.set(\'Calc_coverage\', \'out\', options.out)\r\n+\t\tconfig.set(\'Calc_coverage\', \'median_insert\', STAT[0])\r\n+\t\tconfig.set(\'Calc_coverage\', \'mean_insert\', STAT[1])\r\n+\t\tconfig.set(\'Calc_coverage\', \'standard_deviation_insert\', STAT[2])\r\n+\t\tconfig.set(\'Calc_coverage\', \'mean_coverage\', STAT[3])\r\n+\t\tconfig.set(\'Calc_coverage\', \'median_coverage\', STAT[4])\r\n+\t\tconfig.set(\'Calc_coverage\', \'mini\', mini)\r\n+\t\tconfig.set(\'Calc_coverage\', \'maxi\', maxi)\r\n+\t\tconfig.set(\'Score_discord\', \'MiS\', (config.getfloat(\'General\',\'MiS\')*STAT[0]))\r\n+\t\tconfig.set(\'Score_discord\', \'MiC\', (config.getfloat(\'General\',\'MiC\')*STAT[4]))\r\n+\t\twith open(options.config, \'wb\') as configfile:\r\n+\t\t\tconfig.write(configfile)\r\n+\t\tcopy = \'cp %s %s\' % (options.config, options.outconf)\r\n+\t\tos.system(copy)\r\n+\telse:\r\n+\t\tif options.sam == \'not_filled\':\r\n+\t\t\tmot = \'Please provide an argument for --sam\'\r\n+\t\t\tsys.exit(mot)\r\n+\t\tcalcul_stat(loca_programs, options.sam, options.type, options.out, options.stat)\r\n+\t\r\n+\t\r\n+if __name__ == "__main__": __main__()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/5_calc_stat.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/5_calc_stat.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,62 @@ +<tool id="5_calc_stat" name="5_calc_stat" version="0.1"> + <description> : Calculate statistics on insert size, and coverage</description> + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/5_calc_stat.py + --sam $sam_file + --config $config_file + --out $coverage + --stat $statistic + --outconf $new_config_file + </command> + <inputs> + <param name="sam_file" type="data" format="bam,sam" label="Sam/bam file" /> + <param name="config_file" type="data" format="txt" label="Configuration file"/> + </inputs> + <outputs> + <data format="txt" name="coverage" label="${tool.name} : Coverage" /> + <data format="txt" name="statistic" label="${tool.name} : Statistic" /> + <data format="txt" name="new_config_file" label="${tool.name} : New Configuration" /> + </outputs> + <tests> + <test> + <param name="sam_file" value="bam_remove_redundancy.bam" /> + <param name="config_file" value="config_file.txt" /> + <output name="coverage" file="coverage.txt"/> + <output name="statistic" file="statistic.txt"/> + <output name="new_config_file" file="new_config_file.txt" compare="sim_size"/> + </test> + </tests> + <help> + +**Overview** + +This program calculate coverage for each covered sites of the reference sequences, estimate mean, median and 90 confidence interval coverage for the covered sites (uncovered sites are not taken in account). + +This program also re-estimate insert size by calculating median insert size of correctly mapped reads. It also calculates insert size standard deviation. + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/6_parse_discord.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/6_parse_discord.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,786 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, math\r\n+\r\n+def stop_err( msg ):\r\n+ sys.stderr.write( "%s\\n" % msg )\r\n+ sys.exit()\r\n+\r\n+def run_job (cmd_line, ERROR):\r\n+\tprint cmd_line\r\n+\ttry:\r\n+\t\ttmp = (tempfile.NamedTemporaryFile().name)+\'.error\'\r\n+\t\t# print tmp\r\n+\t\terror = open(tmp, \'w\')\r\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\r\n+\t\treturncode = proc.wait()\r\n+\t\terror.close()\r\n+\t\terror = open( tmp, \'rb\' )\r\n+\t\tstderr = \'\'\r\n+\t\tbuffsize = 1048576\r\n+\t\ttry:\r\n+\t\t\twhile True:\r\n+\t\t\t\tstderr += error.read( buffsize )\r\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\r\n+\t\t\t\t\tbreak\r\n+\t\texcept OverflowError:\r\n+\t\t\tpass\r\n+\t\terror.close()\r\n+\t\tos.remove(tmp)\r\n+\t\tif returncode != 0:\r\n+\t\t\traise Exception, stderr\r\n+\texcept Exception, e:\r\n+\t\tstop_err( ERROR + str( e ) )\r\n+\r\n+def trie2discord_pair(LOCA_PROGRAMS, SAM, TYPE, SORT, MINI_DIS, MINI, MAXI, ORIENT, CHR, LISTE):\r\n+\tif TYPE == \'bam\':\r\n+\t\ttmp = (tempfile.NamedTemporaryFile().name)+\'.sam\'\r\n+\t\tif SORT == \'coordinate\' or SORT == \'unsorted\':\r\n+\t\t\tbam2sam = \'%s -jar %s SortSam INPUT=%s OUTPUT=%s SORT_ORDER=queryname QUIET=true MAX_RECORDS_IN_RAM=5000000 VERBOSITY=WARNING VALIDATION_STRINGENCY=SILENT\' % (LOCA_PROGRAMS.get(\'Programs\',\'java\'), LOCA_PROGRAMS.get(\'Programs\',\'picard-tool\'), SAM, tmp)\r\n+\t\telif SORT == \'queryname\':\r\n+\t\t\tbam2sam = \'samtools view -h -o %s %s\' % (tmp, SAM)\r\n+\t\telse:\r\n+\t\t\tmot = \'Unrecognized --sort option : \'+SORT\r\n+\t\t\tsys.exit(mot)\r\n+\t\trun_job (bam2sam, \'Error in bam2sam conversion:\')\r\n+\t\ttrielinebyline(tmp, MINI_DIS, MINI, MAXI, ORIENT, CHR, LISTE)\r\n+\t\t# print tmp\r\n+\t\tos.remove(tmp)\r\n+\telse:\r\n+\t\tif SORT == \'coordinate\' or SORT == \'unsorted\':\r\n+\t\t\ttmp = (tempfile.NamedTemporaryFile().name)+\'.sam\'\r\n+\t\t\tSorting = \'%s -jar %s SortSam INPUT=%s OUTPUT=%s SORT_ORDER=queryname QUIET=true MAX_RECORDS_IN_RAM=5000000 VERBOSITY=WARNING VALIDATION_STRINGENCY=SILENT\' % (LOCA_PROGRAMS.get(\'Programs\',\'java\'), LOCA_PROGRAMS.get(\'Programs\',\'picard-tool\'), SAM, tmp)\r\n+\t\t\trun_job (Sorting, \'Error in samtools :\')\r\n+\t\t\ttrielinebyline(SAM, MINI_DIS, MINI, MAXI, ORIENT, CHR, LISTE)\r\n+\t\t\t# print tmp\r\n+\t\t\tos.remove(tmp)\r\n+\t\telif SORT == \'queryname\':\r\n+\t\t\ttrielinebyline(SAM, MINI_DIS, MINI, MAXI, ORIENT, CHR, LISTE)\r\n+\t\telse:\r\n+\t\t\tmot = \'Unrecognized --sort option : \'+SORT\r\n+\t\t\tsys.exit(mot)\r\n+\r\n+def trielinebyline(SAM, MINI_DIS, MINI, MAXI, ORIENT, CHR, LISTE):\r\n+\t############################################\r\n+\t#recording chromosome order\r\n+\t############################################\r\n+\tliste_chr = []\r\n+\tfile = open(CHR)\r\n+\tfor line in file:\r\n+\t\tdata = line.split()\r\n+\t\tif data:\r\n+\t\t\tliste_chr.append(data[0])\r\n+\tfile.close()\r\n+\r\n+\t############################################\r\n+\t#Creating the liste of discordant reads\r\n+\t############################################\r\n+\tfile = open(SAM)\r\n+\toutfile = open(LISTE,\'w\')\r\n+\tl1 = file.readline().split()\r\n+\twhile l1[0][0] == \'@\':\r\n+\t\tl1 = file.readline().split()\r\n+\tl2 = file.readline().split()\r\n+\twhile l1:\r\n+\t\tif ORIENT == \'rf\':\r\n+\t\t\toutfile.write(\'\\t\'.join(trie_discord_rf(l1, l2, MINI_DIS, MINI, MAXI, liste_chr))+\'\\n\')\r\n+\t\telif ORI'..b', float(config.get(\'General\',\'mini_dis\')), float(config.get(\'Calc_coverage\',\'mini\')), float(config.get(\'Calc_coverage\',\'maxi\')), config.get(\'General\',\'orient\'), config.get(\'General\',\'chr\'), options.liste_type)\r\n+\t\tempty = calcul_discord_prop_and_parse(loca_programs, config.get(\'General\',\'chr\'), config.get(\'Remove_dup\',\'out\'), options.liste_type, options.out_ins, options.out_del, options.out_fr, options.out_rf, options.out_ff, options.out_rr, options.out_chr_fr, options.out_chr_rf, options.out_chr_ff, options.out_chr_rr, options.out_discarded, options.discord_prop, config.get(\'General\',\'orient\'), config.get(\'General\',\'exclude_chrom\'))\r\n+\t\tif config.get(\'Trie_discord\',\'rminput\') == \'y\':\r\n+\t\t\tos.remove(config.get(\'Remove_dup\',\'out\'))\r\n+\t\tif options.out_ins in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_ins\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_ins\', options.out_ins)\r\n+\t\tif options.out_del in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_del\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_del\', options.out_del)\r\n+\t\tif options.out_fr in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_fr\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_fr\', options.out_fr)\r\n+\t\tif options.out_rf in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_rf\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_rf\', options.out_rf)\r\n+\t\tif options.out_ff in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_ff\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_ff\', options.out_ff)\r\n+\t\tif options.out_rr in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_rr\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_rr\', options.out_rr)\r\n+\t\tif options.out_chr_fr in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_chr_fr\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_chr_fr\', options.out_chr_fr)\r\n+\t\tif options.out_chr_rf in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_chr_rf\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_chr_rf\', options.out_chr_rf)\r\n+\t\tif options.out_chr_ff in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_chr_ff\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_chr_ff\', options.out_chr_ff)\r\n+\t\tif options.out_chr_rr in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_chr_rr\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_chr_rr\', options.out_chr_rr)\r\n+\t\tif options.out_discarded in empty:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_discarded\', \'empty\')\r\n+\t\telse:\r\n+\t\t\tconfig.set(\'Trie_discord\', \'out_discarded\', options.out_discarded)\r\n+\t\tconfig.set(\'Trie_discord\', \'liste_type\', options.liste_type)\r\n+\t\tconfig.set(\'Trie_discord\', \'discord_prop\', options.discord_prop)\r\n+\t\tconfig.set(\'Trie_discord\', \'type\', \'bam\')\r\n+\t\twith open(options.config, \'wb\') as configfile:\r\n+\t\t\tconfig.write(configfile)\r\n+\telse:\r\n+\t\tif options.sam == \'not_filled\':\r\n+\t\t\tmot = \'Please provide an argument for --sam\'\r\n+\t\t\tsys.exit(mot)\r\n+\t\tif options.sort == \'not_filled\':\r\n+\t\t\tmot = \'Please provide an argument for --sort\'\r\n+\t\t\tsys.exit(mot)\r\n+\t\tif options.chr == \'not_filled\':\r\n+\t\t\tmot = \'Please provide an argument for --chr\'\r\n+\t\t\tsys.exit(mot)\r\n+\t\tif options.mini == \'not_filled\':\r\n+\t\t\tmot = \'Please provide an argument for --mini\'\r\n+\t\t\tsys.exit(mot)\r\n+\t\tif options.maxi == \'not_filled\':\r\n+\t\t\tmot = \'Please provide an argument for --maxi\'\r\n+\t\t\tsys.exit(mot)\r\n+\t\ttrie2discord_pair(loca_programs, options.sam, options.type, options.sort, float(options.mini_dis), float(options.mini), float(options.maxi), options.orient, options.chr, options.liste_type)\r\n+\t\tcalcul_discord_prop_and_parse(loca_programs, options.chr, options.sam, options.liste_type, options.out_ins, options.out_del, options.out_fr, options.out_rf, options.out_ff, options.out_rr, options.out_chr_fr, options.out_chr_rf, options.out_chr_ff, options.out_chr_rr, options.out_discarded, options.discord_prop, options.orient, options.exclude_chrom)\r\n+\t\tif options.rminput == \'y\':\r\n+\t\t\tos.remove(options.sam)\r\n+\tos.remove(options.liste_type+".filtered")\r\n+if __name__ == "__main__": __main__()\r\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/6_parse_discord.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/6_parse_discord.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,111 @@ +<tool id="6_parse_discord" name="6_parse_discord" version="0.1"> + <description> : Parse mapped reads based on their discordance type</description> + <requirements> + <requirement type="package" version="1.136">picard</requirement> + <requirement type="package" version="1.2">samtools</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/6_parse_discord.py + --config $config_file + --out_ins $out_insertion + --out_del $out_deletion + --out_fr $out_fr + --out_rf $out_rf + --out_ff $out_ff + --out_rr $out_rr + --out_chr_fr $out_chr_fr + --out_chr_rf $out_chr_rf + --out_chr_ff $out_chr_ff + --out_chr_rr $out_chr_rr + --out_discarded $out_discarded + --discord_prop $discordant_proportion + --liste_type $list_type + </command> + <inputs> + <param name="config_file" type="data" format="txt" label="Configuration file"/> + </inputs> + <outputs> + <data format="bam" name="out_insertion" label="${tool.name} : Insertion" /> + <data format="bam" name="out_deletion" label="${tool.name} : Deletion" /> + <data format="bam" name="out_fr" label="${tool.name} : Forward-Reverse" /> + <data format="bam" name="out_rf" label="${tool.name} : Reverse-Forward" /> + <data format="bam" name="out_ff" label="${tool.name} : Forward-Forward" /> + <data format="bam" name="out_rr" label="${tool.name} : Reverse-Reverse" /> + <data format="bam" name="out_chr_fr" label="${tool.name} : Chromosome-FR" /> + <data format="bam" name="out_chr_rf" label="${tool.name} : Chromosome-RF" /> + <data format="bam" name="out_chr_ff" label="${tool.name} : Chromosome-FF" /> + <data format="bam" name="out_chr_rr" label="${tool.name} : Chromosome-RR" /> + <data format="bam" name="out_discarded" label="${tool.name} : Read discarded" /> + <data format="txt" name="discordant_proportion" label="${tool.name} : Discordant read proportion" /> + <data format="txt" name="list_type" label="${tool.name} : List" /> + </outputs> + <tests> + <test> + <param name="config_file" value="new_config_file.txt" /> + <output name="list_type" file="list_type.txt" compare="sim_size"/> + <output name="discordant_proportion" file="discordant_proportion.txt" compare="sim_size"/> + <output name="out_discarded" file="out_discarded.bam" compare="sim_size"/> + <output name="out_chr_rr" file="out_chr_rr.bam" compare="sim_size"/> + <output name="out_chr_ff" file="out_chr_ff.bam" compare="sim_size"/> + <output name="out_chr_rf" file="out_chr_rf.bam" compare="sim_size"/> + <output name="out_chr_fr" file="out_chr_fr.bam" compare="sim_size"/> + <output name="out_rr" file="out_rr.bam" compare="sim_size"/> + <output name="out_ff" file="out_ff.bam" compare="sim_size"/> + <output name="out_rf" file="out_rf.bam" compare="sim_size"/> + <output name="out_fr" file="out_fr.bam" compare="sim_size"/> + <output name="out_insertion" file="out_insertion.bam" compare="sim_size"/> + <output name="out_deletion" file="out_deletion.bam" compare="sim_size"/> + </test> + </tests> + <help> + +**Overview** + +This program takes in input a sam/bam file, identify discordant read pairs, calculate proportion of discordant reads on 1kb window size and parse the sam/bam file in 11 sub bam files corresponding to the different discordant types of mapped pairs : + +* correct orientation and insert size : reverse-forward or forward-reverse, depending on correct orientation (out_rf or out_fr options respectively) + +* correct orientation but insert size inferior than expected (deletion type, out_del option) + +* correct orientation but insert size inferior than expected (insertion type, out_ins option) + +* uncorrect orientation : reverse-forward or forward-reverse depending on correct orientation (out_rf or out_fr option respectively) + +* reverse-reverse mapped pairs on the same chromosome (out_rr option) + +* forward-forward mapped pairs on the same chromosome (out_ff options) + +* reverse-forward mapped pairs on distinct chromosomes (out_chr_rf option) + +* forward-reverse mapped pairs on distinct chromosomes (out_chr_fr option) + +* reverse-reverse mapped pairs on distinct chromosomes (out_chr_rr option) + +* forward-forward mapped pairs on distinct chromosomes (out_chr_ff option) + +* an additional bam file containing discarded reads. These discarded read pairs are incorrectly mapped reads that have an insert size lower than the minimal insert size passed in mini_dis options. + + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/7_select_on_cov.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/7_select_on_cov.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,1364 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, math, glob, datetime\r\n+from inspect import currentframe, getframeinfo\r\n+\r\n+def stop_err( msg ):\r\n+\traise ValueError(msg)\r\n+\r\n+def run_job (outLog, frameinfo, cmd_line, ERROR):\r\n+\tlogOutput = open(outLog, \'a\')\r\n+\tlogOutput.write("\\n"+cmd_line)\r\n+\tlogOutput.close()\r\n+\ttry:\r\n+\t\ttmp = tempfile.NamedTemporaryFile().name\r\n+\t\terror = open(tmp, \'w\')\r\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\r\n+\t\treturncode = proc.wait()\r\n+\t\terror.close()\r\n+\t\terror = open( tmp, \'rb\' )\r\n+\t\tstderr = \'\'\r\n+\t\tbuffsize = 1048576\r\n+\t\ttry:\r\n+\t\t\twhile True:\r\n+\t\t\t\tstderr += error.read( buffsize )\r\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\r\n+\t\t\t\t\tbreak\r\n+\t\texcept OverflowError:\r\n+\t\t\tpass\r\n+\t\terror.close()\r\n+\t\tos.remove(tmp)\r\n+\t\tif returncode != 0:\r\n+\t\t\traise Exception, stderr\r\n+\texcept Exception, e:\r\n+\t\tstop_err( \'Line : \'+str(frameinfo.lineno)+\' - \'+ERROR + str( e ) )\r\n+\r\n+def run_job_silent (frameinfo, cmd_line, ERROR):\r\n+\ttry:\r\n+\t\ttmp = tempfile.NamedTemporaryFile().name\r\n+\t\terror = open(tmp, \'w\')\r\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\r\n+\t\treturncode = proc.wait()\r\n+\t\terror.close()\r\n+\t\terror = open( tmp, \'rb\' )\r\n+\t\tstderr = \'\'\r\n+\t\tbuffsize = 1048576\r\n+\t\ttry:\r\n+\t\t\twhile True:\r\n+\t\t\t\tstderr += error.read( buffsize )\r\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\r\n+\t\t\t\t\tbreak\r\n+\t\texcept OverflowError:\r\n+\t\t\tpass\r\n+\t\terror.close()\r\n+\t\tos.remove(tmp)\r\n+\t\tif returncode != 0:\r\n+\t\t\traise Exception, stderr\r\n+\texcept Exception, e:\r\n+\t\tstop_err( \'Line : \'+str(frameinfo.lineno)+\'\\n\'+cmd_line+\'\\n\'+ERROR + str( e ) )\r\n+\r\n+def mediane(L):\r\n+\t"""\r\n+\t\tGive the median value of a list of integers\r\n+\r\n+\t\t:param L: A list of integers\r\n+\t\t:type L: list\r\n+\t\t:return: The median value\r\n+\t\t:rtype: int\r\n+\t"""\r\n+\r\n+\tL.sort()\r\n+\tN = len(L)\r\n+\tn = N/2.0\r\n+\tp = int(n)\r\n+\tif n == 0:\r\n+\t\treturn 0\r\n+\telif n == 1:\r\n+\t\treturn (L[0])\r\n+\telif n == p:\r\n+\t\treturn (L[p-1]+L[p])/2.0\r\n+\telse:\r\n+\t\treturn float(L[p])\r\n+\r\n+def extractSamFromPosition(LOCA_PROGRAMS, SAM, TYPE, CHR, START, END, OUT):\r\n+\t"""\r\n+\t\tProvide a sam file or bam file from coordinates\r\n+\r\n+\t\tThis function create a sam or bam file, according to the input format, from a first sam or bam file and coordinates like chr01:10000-20000.\r\n+\r\n+\t\t:param LOCA_PROGRAMS: From the Configparser module. Contains the path of each programs\r\n+\t\t:param SAM: The input sam or bam file\r\n+\t\t:type SAM: str\r\n+\t\t:param TYPE: The format of the input file. If the input file is a bam file, he must be indexed.\r\n+\t\t:type TYPE: str ("sam" | "bam")\r\n+\t\t:param CHR: File containing col1: chromosome name and col2: chromosome size\r\n+\t\t:type CHR: str\r\n+\t\t:param START: The start position\r\n+\t\t:type START: int\r\n+\t\t:param END: The end position\r\n+\t\t:type END: int\r\n+\t\t:param OUT: The name of the output file\r\n+\t\t:return: void\r\n+\t"""\r\n+\r\n+\tif TYPE == \'bam\':\r\n+\t\tbam2subbam = \'%s view -bh %s %s:%s-%s -o %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'samtools\'), SAM, CHR, START, END, OUT)\r\n+\t\trun_job_silent(getframeinfo(currentframe()), bam2subbam, \'Error in bam2subbam:\\n\')\r\n+\telif TY'..b' -i \'1i#CHR-zone1\\tSTART\\tEND\\tSIZE\\tCOV\\tCHR-zone2\\tSTART\\tEND\\tSIZE\\tCOV\\tMISC\\tREAD\\tSCORE\\tSTATUS\' %s" % options.out)\r\n+\t\telse:\r\n+\t\t\tif options.ref == \'not_filled\':\r\n+\t\t\t\traise ValueError(\'Please provide an argument for --ref\')\r\n+\r\n+\t\t\t# Index bam file\r\n+\t\t\tlogOutput.write("\\nstarting date : "+str(datetime.datetime.now()))\r\n+\t\t\tt0 = datetime.datetime.now()\r\n+\t\t\tindex_bam_file(loca_programs, options.sam)\r\n+\t\t\tlogOutput.write("\\nindex of the bam file : "+str(datetime.datetime.now() - t0))\r\n+\t\t\tlogOutput.flush()\r\n+\t\t\t# Sort the bam upstream / downstream\r\n+\t\t\tt0 = datetime.datetime.now()\r\n+\t\t\tamont_aval(loca_programs, options.chr, options.sam, options.type, tmp_name+\'Sam\')\r\n+\t\t\tsam2bam(loca_programs, tmp_name+\'Sam_amont\', tmp_name+\'Bam_amont\')\r\n+\t\t\tsam2bam(loca_programs, tmp_name+\'Sam_aval\', tmp_name+\'Bam_aval\')\r\n+\t\t\tindex_bam_file(loca_programs, tmp_name+\'Bam_amont\')\r\n+\t\t\tindex_bam_file(loca_programs, tmp_name+\'Bam_aval\')\r\n+\t\t\tlogOutput.write("\\nSort and index amont/aval : "+str(datetime.datetime.now() - t0))\r\n+\t\t\tlogOutput.flush()\r\n+\r\n+\t\t\t# Calcul the coverture site by site\r\n+\t\t\tt0 = datetime.datetime.now()\r\n+\t\t\tcalcul_cov(loca_programs, options.sam, options.type, tmp_cov)\r\n+\t\t\tlogOutput.write(\'\\ncalcul coverture : \'+str(datetime.datetime.now() - t0))\r\n+\t\t\tlogOutput.flush()\r\n+\t\t\tmaxcov = int(options.median_coverage)*int(options.mult_max_cov)\r\n+\t\t\tmincov = int(options.median_coverage)*int(mult_min_cov)\r\n+\t\t\tminzone = options.min_zone\r\n+\t\t\tmingap = options.min_gap\r\n+\t\t\tlogOutput.write(\'\\nMinimal accepted coverage:\'+str(mincov))\r\n+\t\t\tlogOutput.write(\'\\nMaximal accepted coverage:\'+str(maxcov))\r\n+\t\t\tlogOutput.flush()\r\n+\r\n+\t\t\t# Find zones from the coverture\r\n+\t\t\tt0 = datetime.datetime.now()\r\n+\t\t\tstatCovs = select_sur_couv(tmp_cov, int(options.min_zone), float(options.maxcov), float(options.mincov), int(options.min_gap), tmp_zone)\r\n+\t\t\tnb_zone = statCovs[0]\r\n+\t\t\tlongerZone = statCovs[1]\r\n+\t\t\tlogOutput.write("\\ntotal number of discordant zones : "+str(nb_zone))\r\n+\t\t\tlogOutput.write("\\nlength of the longer zone : "+str(longerZone))\r\n+\t\t\tlogOutput.write(\'\\nselect zones on coverture : \'+str(datetime.datetime.now() - t0))\r\n+\t\t\tlogOutput.flush()\r\n+\t\t\tos.remove(tmp_cov)\r\n+\t\t\tecart = options.ecart\r\n+\t\t\tlogOutput.write(\'\\nMargin:\'+str(ecart))\r\n+\t\t\tlogOutput.write(\'\\nNumber of zone to test:\'+str(nb_zone))\r\n+\t\t\tlogOutput.flush()\r\n+\r\n+\t\t\t# Try to identify mate zones\r\n+\t\t\tt0 = datetime.datetime.now()\r\n+\t\t\tlook_4_mate(logNameFile, loca_programs, options.type, options.sam, options.chr, tmp_mate_zone, tmp_zone, float(options.ecart), int(options.min_zone), float(options.maxcov), float(options.mincov), int(options.min_gap), nb_zone, longerZone, int(options.med_insert))\r\n+\t\t\tlogOutput.write(\'\\nlook for mate zones : \'+str(datetime.datetime.now() - t0))\r\n+\t\t\tlogOutput.flush()\r\n+\t\t\tos.remove(tmp_zone)\r\n+\r\n+\t\t\t# Try to merge the mate zones identified above\r\n+\t\t\tt0 = datetime.datetime.now()\r\n+\t\t\tmerge_zone(loca_programs, options.chr, tmp_mate_zone, int(options.max_dist_merge), tmp_name+\'Bam_amont\', tmp_name+\'Bam_aval\', \'bam\', tmp_merge)\r\n+\t\t\tlogOutput.write(\'\\nmerge the zones : \'+str(datetime.datetime.now() - t0))\r\n+\t\t\tlogOutput.flush()\r\n+\t\t\tos.remove(tmp_mate_zone)\r\n+\r\n+\t\t\t# Calcul the score for each mate zones\r\n+\t\t\tt0 = datetime.datetime.now()\r\n+\t\t\tcalculate_score(tmp_merge, float(options.YiS), float(options.MiS), float(options.YiC), float(optionsMiC), float(options.min_score), options.chr, options.out)\r\n+\t\t\tlogOutput.write(\'\\ncalculate score : \'+str(datetime.datetime.now() - t0))\r\n+\t\t\tlogOutput.flush()\r\n+\r\n+\t\t\tos.remove(tmp_merge)\r\n+\t\t\t# Add header to the score file\r\n+\t\t\tos.system("sed -i \'1i#CHR-zone1\\tSTART\\tEND\\tSIZE\\tCOV\\tCHR-zone2\\tSTART\\tEND\\tSIZE\\tCOV\\tMISC\\tREAD\\tSCORE\\tSTATUS\' %s" % options.out)\r\n+\r\n+\tlogOutput.write("\\ntotal time : "+str(datetime.datetime.now() - t_start))\r\n+\tlogOutput.close()\r\n+\t# if os.path.exists(options.sam+".bai"):\r\n+\t\t# os.remove(options.sam+".bai")\r\n+\tos.remove(logNameFile)\r\n+if __name__ == "__main__": __main__()\r\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/7_select_on_cov.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/7_select_on_cov.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,93 @@ +<tool id="7_select_on_cov" name="7_select_on_cov" version="0.1"> + <description> : Identify discordant zones</description>> + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/7_select_on_cov.py + --sam $sam_file + --config $config_file + --out $out + </command> + <inputs> + <param name="sam_file" type="data" format="bam,sam" label="Sam/bam discordant file" /> + <param name="config_file" type="data" format="txt" label="conf file"/> + <param name="prefix" type="text" label="Identifier for output" /> + </inputs> + <outputs> + <data format="txt" name="out" label="${tool.name} : $prefix score" /> + </outputs> + <tests> + <test> + <param name="sam_file" value="out_fr.bam" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="out" file="discordant_zone_FR.txt"/> + </test> + <test> + <param name="sam_file" value="out_insertion.bam" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="out" file="discordant_zone_insertion.txt"/> + </test> + <test> + <param name="sam_file" value="out_deletion.bam" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="out" file="discordant_zone_deletion.txt"/> + </test> + <test> + <param name="sam_file" value="out_ff.bam" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="out" file="discordant_zone_FF.txt"/> + </test> + <test> + <param name="sam_file" value="out_chr_rf.bam" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="out" file="discordant_zone_chr_RF.txt"/> + </test> + <test> + <param name="sam_file" value="out_chr_ff.bam" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="out" file="discordant_zone_chr_FF.txt"/> + </test> + <test> + <param name="sam_file" value="out_chr_fr.bam" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="out" file="discordant_zone_chr_FR.txt"/> + </test> + <test> + <param name="sam_file" value="out_chr_rr.bam" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="out" file="discordant_zone_chr_RR.txt"/> + </test> + <test> + <param name="sam_file" value="out_rr.bam" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="out" file="discordant_zone_RR.txt"/> + </test> + </tests> + <help> + +**Overview** + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/8_ident_SV.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/8_ident_SV.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,940 @@\n+#!/usr/local/bioinfo/python/2.7.9/bin/python\n+#\n+#\n+# Copyright 2014 CIRAD\n+#\n+# This program is free software; you can redistribute it and/or modify\n+# it under the terms of the GNU General Public License as published by\n+# the Free Software Foundation; either version 3 of the License, or\n+# (at your option) any later version.\n+#\n+# This program is distributed in the hope that it will be useful,\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+# GNU General Public License for more details.\n+#\n+# You should have received a copy of the GNU General Public License\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\n+# write to the Free Software Foundation, Inc.,\n+# 51 Franklin Street, Fifth Floor, Boston,\n+# MA 02110-1301, USA.\n+#\n+#\n+\n+import optparse\n+import os\n+import shutil\n+import subprocess\n+import sys\n+import tempfile\n+import fileinput\n+import ConfigParser\n+import operator\n+import time\n+import random\n+import datetime\n+import ctypes\n+import multiprocessing as mp\n+from multiprocessing.sharedctypes import Value, Array \n+\n+# Global variables\n+covChr = {} # contains the coverture of each site by chromosome\n+\n+\n+def stop_err( msg ):\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit()\n+\n+\n+def run_job (cmd_line, ERROR):\n+\tprint cmd_line\n+\ttry:\n+\t\ttmp = tempfile.NamedTemporaryFile().name\n+\t\t# print tmp\n+\t\terror = open(tmp, \'w\')\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\n+\t\treturncode = proc.wait()\n+\t\terror.close()\n+\t\terror = open( tmp, \'rb\' )\n+\t\tstderr = \'\'\n+\t\tbuffsize = 1048576\n+\t\ttry:\n+\t\t\twhile True:\n+\t\t\t\tstderr += error.read( buffsize )\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\n+\t\t\t\t\tbreak\n+\t\texcept OverflowError:\n+\t\t\tpass\n+\t\terror.close()\n+\t\tos.remove(tmp)\n+\t\tif returncode != 0:\n+\t\t\traise Exception, stderr\n+\texcept Exception, e:\n+\t\tstop_err( ERROR + str( e ) )\n+\n+\n+def readChrLenght(chrFile):\n+\t"""\n+\t\tReturn a dict with the chromosome name as key, and their size in value.\n+\t"""\n+\tchrSize = {}\n+\tf = open(chrFile, \'r\')\n+\tfor line in f:\n+\t\tif line.strip():\n+\t\t\tcols = line.split()\n+\t\t\tchrSize[cols[0]] = int(cols[1])\n+\tf.close()\n+\treturn chrSize\n+\n+\t\n+def readCov(chrFile, covFile):\n+\t"""\n+\t\tfill a global dict with the coverage of the chromosome for each site\n+\t"""\n+\tchrSize = readChrLenght(chrFile)\n+\tchr = ""\n+\tf = open(covFile, \'r\')\n+\tfor line in f:\n+\t\tif line.strip():\n+\t\t\tcols = line.split()\n+\t\t\tif cols[0] != chr: # new chromosome\n+\t\t\t\tif chr: # not the first line\n+\t\t\t\t\tcovChr[chr] = test\n+\t\t\t\ttest = mp.sharedctypes.RawArray(ctypes.c_int, chrSize[cols[0]])\n+\t\t\t\ttest[int(cols[1])-1] = int(cols[2])\n+\t\t\t\tchr = cols[0]\n+\t\t\telse:\n+\t\t\t\ttest[int(cols[1])-1] = int(cols[2])\n+\tcovChr[chr] = test\n+\n+\t\n+def indent_discord(FF, FR, RR, INS, DEL, CHR_rr, CHR_fr, CHR_rf, CHR_ff, INSERT, OUT, EXP_COV, PLOID, TYPE):\n+\toutfile = open(OUT,\'w\')\n+\tfile = open(FF)\n+\tdic_FF = {}\n+\tfor line in file:\n+\t\tdata = line.split()\n+\t\tif data != []:\n+\t\t\tif data[0][0] != "#" and data[13] == "PASSED":\n+\t\t\t\tif data[0] in dic_FF:\n+\t\t\t\t\tdic_FF[data[0]].append([int(data[1]), int(data[2]), data[5], int(data[6]), int(data[7])])\n+\t\t\t\telse :\n+\t\t\t\t\tdic_FF[data[0]] = ([])\n+\t\t\t\t\tdic_FF[data[0]].append([int(data[1]), int(data[2]), data[5], int(data[6]), int(data[7])])\n+\tfile.close()\n+\t\n+\tfile = open(FR)\n+\tdic_FR = {}\n+\tfor line in file:\n+\t\tdata = line.split()\n+\t\tif data != []:\n+\t\t\tif data[0][0] != "#" and data[13] == "PASSED":\n+\t\t\t\tif data[0] in dic_FR:\n+\t\t\t\t\tdic_FR[data[0]].append([int(data[1]), int(data[2]), data[5], int(data[6]), int(data[7])])\n+\t\t\t\telse :\n+\t\t\t\t\tdic_FR[data[0]] = ([])\n+\t\t\t\t\tdic_FR[data[0]].append([int(data[1]), int(data[2]), data[5], int(data[6]), int(data[7])])\n+\tfile.close()\n+\t\n+\tfile = open(RR)\n+\tdic_RR = {}\n+\tfor line in file:\n+\t\tdata = line.split()\n+\t\tif data != []:\n+\t\t\tif data[0][0] != "#" and data[13] == "PASSED":\n+\t\t\t\tif data[0] in dic_RR:\n+\t\t\t\t\tdic_RR[data[0]].append([int'..b'r_rr)\n+\t\tconfig.set(\'Ident_discord\',\'chr_fr\', options.chr_fr)\n+\t\tconfig.set(\'Ident_discord\',\'chr_rf\', options.chr_rf)\n+\t\tconfig.set(\'Ident_discord\',\'chr_ff\', options.chr_ff)\n+\t\twith open(options.config, \'wb\') as configfile:\n+\t\t\tconfig.write(configfile)\t\t\n+\t\n+\tsys.exit()\n+\ti = 0\n+\tliste_tmp = []\n+\tliste_id = []\n+\tlisteJobs = []\n+\t# liste_job = []\n+\twhile i < 10:\n+\t\ttemp = options.out+\'_\'+str(i)\n+\t\tliste_tmp.append(temp)\n+\t\t# print temp\n+\t\tif options.config:\n+\t\t\t# liste_id.append("%s %s/ident_SV.py --frf %s --ff %s --rr %s --ins %s --delet %s --chr_rr %s --chr_fr %s --chr_rf %s --chr_ff %s --chr %s --covf %s --orient %s --insert %s --exp_cov %s --ploid %s --out %s --config %s --type %s" % (loca_programs.get(\'Programs\',\'python\'), ScriptPath, options.frf, options.ff, options.rr, options.ins, options.delet, options.chr_rr, options.chr_fr, options.chr_rf, options.chr_ff, options.chr, options.covf, options.orient, options.insert, options.exp_cov, options.ploid, temp, options.config, str(i)))\n+\t\t\targs = [ScriptPath, options.frf, options.ff, options.rr, options.ins, options.delet, options.chr_rr, options.chr_fr, options.chr_rf, options.chr_ff, options.chr, options.covf, options.orient, options.insert, options.exp_cov, options.ploid, temp, options.config, str(i)]\n+\t\t\ttype = str(i)\n+\t\t\tlisteJobs.append(args)\n+\t\telse:\n+\t\t\t# liste_id.append("%s %s/ident_SV.py --frf %s --ff %s --rr %s --ins %s --delet %s --chr_rr %s --chr_fr %s --chr_rf %s --chr_ff %s --chr %s --covf %s --orient %s --insert %s --exp_cov %s --ploid %s --out %s --type %s" % (loca_programs.get(\'Programs\',\'python\'), ScriptPath, options.frf, options.ff, options.rr, options.ins, options.delet, options.chr_rr, options.chr_fr, options.chr_rf, options.chr_ff, options.chr, options.covf, options.orient, options.insert, options.exp_cov, options.ploid, temp, str(i)))\n+\t\t\targs = [ScriptPath, options.frf, options.ff, options.rr, options.ins, options.delet, options.chr_rr, options.chr_fr, options.chr_rf, options.chr_ff, options.chr, options.covf, options.orient, options.insert, options.exp_cov, options.ploid, temp, str(i)]\n+\t\t\tlisteJobs.append(args)\n+\t\ti += 1\n+\t\n+\tliste_process = []\n+\t\n+\tpool = multiprocessing.Pool(processes=proc)\n+\tresultsJobs = pool.map(worker, liste_id)\n+\t\n+\t# for n in liste_id:\n+\t\t# t = multiprocessing.Process(target=run_job, args=(n, \'Bug lauching indent_SV.py\',))\n+\t\t# liste_process.append(t)\n+\t\t# if len(liste_process) == proc:\n+\t\t\t# # Starts threads\n+\t\t\t# for process in liste_process:\n+\t\t\t\t# process.start()\n+\t\t\t# # This blocks the calling thread until the thread whose join() method is called is terminated.\n+\t\t\t# for process in liste_process:\n+\t\t\t\t# process.join()\n+\t\t\t# #the processes are done\n+\t\t\t# liste_process = []\n+\t# if liste_process:\n+\t\t# # Starts threads\n+\t\t# for process in liste_process:\n+\t\t\t# process.start()\n+\t\t# # This blocks the calling thread until the thread whose join() method is called is terminated.\n+\t\t# for process in liste_process:\n+\t\t\t# process.join()\n+\t\t# #the processes are done\n+\t\t# liste_process = []\n+\t\n+\tif options.config:\n+\t\tconfig = ConfigParser.RawConfigParser()\n+\t\tconfig.read(options.config)\n+\t\tconfig.set(\'Ident_discord\',\'frf\', options.frf)\n+\t\tconfig.set(\'Ident_discord\',\'ff\', options.ff)\n+\t\tconfig.set(\'Ident_discord\',\'rr\', options.rr)\n+\t\tconfig.set(\'Ident_discord\',\'ins\', options.ins)\n+\t\tconfig.set(\'Ident_discord\',\'delet\', options.delet)\n+\t\tconfig.set(\'Ident_discord\',\'chr_rr\', options.chr_rr)\n+\t\tconfig.set(\'Ident_discord\',\'chr_fr\', options.chr_fr)\n+\t\tconfig.set(\'Ident_discord\',\'chr_rf\', options.chr_rf)\n+\t\tconfig.set(\'Ident_discord\',\'chr_ff\', options.chr_ff)\n+\t\twith open(options.config, \'wb\') as configfile:\n+\t\t\tconfig.write(configfile)\n+\t# for n in liste_job:\n+\t\t# cherche_error(\'IDENT_SV.o\'+n)\n+\t\t# os.system(\'rm IDENT_SV.o\'+n)\n+\tmot = \'cat \'\n+\tfor n in liste_tmp:\n+\t\tmot = mot + n + \' \'\n+\tmot = mot + \'> \' + options.out\n+\tos.system(mot)\n+\tfor n in liste_tmp:\n+\t\tos.remove(n)\n+\t\t\n+if __name__ == "__main__": __main__()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/8_ident_SV.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/8_ident_SV.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,74 @@ +<tool id="8_ident_SV" name="8_ident_SV" version="0.1"> + <description> : Try to identify pattern of structural variation</description> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/8_ident_SV.py + --frf $frf + --ff $ff + --rr $rr + --ins $insertion + --delet $deletion + --chr_rr $chr_rr + --chr_fr $chr_fr + --chr_rf $chr_rf + --chr_ff $chr_ff + --config $config_file + --out $sv_detected + </command> + <inputs> + <param name="insertion" type="data" format="txt" label="Discordant ins_score file"/> + <param name="deletion" type="data" format="txt" label="Discordant del_score file"/> + <param name="frf" type="data" format="txt" label="Discordant fr_score or rf_score file depending on expected orientation"/> + <param name="ff" type="data" format="txt" label="Discordant ff_score file"/> + <param name="rr" type="data" format="txt" label="Discordant rr_score file"/> + <param name="chr_fr" type="data" format="txt" label="Discordant chr-fr_score file"/> + <param name="chr_rf" type="data" format="txt" label="Discordant chr-rf_score file"/> + <param name="chr_ff" type="data" format="txt" label="Discordant chr-ff_score file"/> + <param name="chr_rr" type="data" format="txt" label="Discordant chr-rr_score file"/> + <param name="config_file" type="data" format="txt" label="conf file"/> + <param name="prefix" type="text" label="Identifier for output" /> + </inputs> + <outputs> + <data format="txt" name="sv_detected" label="${tool.name} : $prefix SV_detected " /> + </outputs> + <tests> + <test> + <param name="insertion" value="discordant_zone_insertion.txt" /> + <param name="deletion" value="discordant_zone_deletion.txt" /> + <param name="frf" value="discordant_zone_FR.txt" /> + <param name="ff" value="discordant_zone_FF.txt" /> + <param name="rr" value="discordant_zone_RR.txt" /> + <param name="chr_fr" value="discordant_zone_chr_FR.txt" /> + <param name="chr_rf" value="discordant_zone_chr_RF.txt" /> + <param name="chr_ff" value="discordant_zone_chr_FF.txt" /> + <param name="chr_rr" value="discordant_zone_chr_RR.txt" /> + <param name="config_file" value="new_config_file.txt" /> + <output name="sv_detected" file="sv_detected.txt"/> + </test> + </tests> + <help> + +**Overview** + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/README.md Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,550 @@\n+# Purpose of scaffremodler\n+\n+Scaffremodler regroup a severals programs which principal aims is to detect use paired reads to link different genomic regions. These main programs are accompanied with a severals others programs that can be used in complement to improve scaffold assemblies or to detect large structural variations between a reference sequence and a re-sequenced genome.\n+\n+## Installation\n+\n+All proposed tools described here are written in python and work on linux system\n+\n+To install the tools:\n+\n+1. unzip the folder\n+2. go to the bin directory and open the loca_programs.conf file\n+3. set the path to each programs required (listed below)\n+\n+## Dependencies\n+\n+1. bowtie can be found at http://bowtie-bio.sourceforge.net/index.shtml\n+2. bowtie2 can be found at http://bowtie-bio.sourceforge.net/bowtie2/index.shtml\n+3. SortSam.jar, MarkDuplicates.jar and FilterSamReads.jar belong to Picard Tools and can be found at http://sourceforge.net/projects/picard/files/picard-tools/\n+4. bwa can be found at http://bio-bwa.sourceforge.net\n+5. samtools can be found at http://www.htslib.org\n+6. circos-0.67 or greater is required and can be found at http://circos.ca/software/download/circos/\n+perl, python and java are required. Biopython is also required.\n+7. bamgrepreads can be found at https://github.com/lindenb/variationtoolkit \n+\n+## Descriptions\n+\n+The package provided comprise 18 programs listed here:\n+\n+* 1_create_conf.py\n+* 2_map.py\n+* 3_filter_single_pair.py\n+* 4_filter_sam.py\n+* 5_calc_stat.py\n+* 6_parse_discord.py\n+* 7_select_on_cov.py\n+* scaffremodler_wrapper.py\n+* SplitOnX.py\n+* conf4circos.py\n+* contig_scaff.py\n+* convert2X.py\n+* draw_circos.py\n+* fusion_scaff.py\n+* group4contig.py\n+* look4fusion.py\n+* reEstimateN.py\n+* verif_fusion.py\n+\n+\n+All 21 programs run using the following command: \n+\n+```python\n+python program-name <--options-name value>\n+```\n+\xe2\x80\x83\n+## Programs\n+\n+### 1\\_create_conf.py\n+\n+This program takes in input a multifasta file and output a tabulated file recording sequence length informations. Each line corresponds to a sequence in the multicast file and contain two columns: the column correspond to sequence name and the second the sequence size.\n+\n+In addition, this program generate a configuration file that will be used by all other programs that begin with a number. These programs can run without this configuration file but as some options are common between these programs, generating the configuration allows passing options only once. \n+\n+These options and their utility are described in their respective programs. When a configuration file is passed to other programs, these programs add informations in it.\n+\n+In addition to these parameters the following options should be filled:\n+ \n+ --ref : A multi-fasta containing the reference sequence.\n+ --chr : The name of the output file that will contain the informations on sequence length.\n+ --output : The name of the output configuration file that will be generated.\n+ --restimate : in 6_parse_discord.py paired reads are parsed based on their mapping orientation and insert size. The minimal and maximal correct insert size is re-estimated in 5_calc_stat.py. Minimal and maximal insert size are calculated by adding and subtracting, respectively, X*standard deviation to the median insert size calculated on first identified well mapped reads. As median insert size, insert size standard deviation is estimated on identified well mapped reads. The X value is provided in the msd option.\n+ y : use re-estimated minimal and maximal insert size for parsing of well mapped reads\n+ n : don\xe2\x80\x99t use re-estimated minimal and maximal insert size for parsing of well mapped reads (default). If the insert size has not a normal like distribution, it is not recommended to re-estimate minimal and maximal insert size.\n+ --msd : multiplicator of standard deviation to re-estimate minimal and maximal insert size to identify well mapped reads'..b'is script looks for possible scaffold fusions and junctions based on discordant zones detected and unknown regions in a reference genome sequence.\n+\n+**Options:**\n+\n+ --config : The configuration file generated by conf4circos.py\n+ --bound : Boundaries of scaffold to look for fusion and junction. Only scaffold extremities are searched for fusion and junction. This means that no partial scaffold fusion and junction are searched. (Default: 10 000)\n+ --out : Output text file containing possible fusions and junctions. (Default: possible_fusion.txt)\n+ --out_tar : Output name of a tar.gz file containing circos figures showing discordant zone leading to the detection of possible fusions and junctions. (Default: possible_fusion.tar)\n+\n+### group4contig.py\n+\n+This program takes scaffold name to join provided in a table file and group them by linkage. Scaffold groups are outputted in a table file. This file should be edited to be used by contig_scaff.py program.\n+\n+**Options:**\n+\n+ --table : A tabulated input file having an identical structure of the tabulated file provided in fusion_scaff.py or the output file of look4fusion.py.\n+ --out : Output file name containing scaffolds to group together. (Default: intermediate_junction.txt).\n+\n+\n+\n+### contig\\_scaff.py\n+\n+This program creates junctions between scaffolds using a tabulated file and output a multifasta file containing all sequences in the input fasta file, including joined scaffolds.\n+\n+**Options:**\n+\n+ --table : A table file of scaffold to join. \n+ --fasta : The multi-fasta file containing scaffolds.\n+ --out : Output file name of the multi fasta file. (Default: super_contig.fasta)\n+ --out_verif : Output file name that register the constitution of the newly formed scaffold. This file is used by verif_fusion.py to validate junctions performed. (Default: contig2verif.txt)\n+\n+### fusion\\_scaff.py\n+\n+This program merges scaffold sequences based on tabulated file.\n+\n+**Options:**\n+\n+ --table : A tabulated file containing information of scaffold to merge. \n+ --fasta : A multifasta file containing sequences.\n+ --out : Output file name of the multifasta file containing merged scaffolds. (Default: fusion.fasta)\n+ --out_verif : Output file name that register the constitution of the newly formed scaffold. This file is used by verif_fusion.py to validate fusions performed. (Default: fusion2verif.txt)\n+\n+### verif\\_fusion.py\n+\n+This program verifies scaffold sequence fusions and junctions performed by fusion_scaff.py or contig_scaff.py. The verification is performed by drawing circos pictures representing paired reads overlapping scaffold junction/fusion performed.\n+\n+**Options:**\n+\n+ --config : The configuration file generated by conf4circos.py.\n+ --list : The file name passed in --out_verif option when running fusion_scaff.py or contig_scaff.py\n+ --bound : Boundaries around junction to draw paired reads. Choose a value >= 2 fold library insert size. (Default: 10000)\n+ --thread : Thread number used for circos drawing (integer). (Default: 1)\n+ --out_tar : Output name of a tar.gz file containing circos pictures validating fusions and junctions performed. (Default: possible_fusion.tar)\n+\n+### reEstimateN.py\n+\n+This program re-estimate N present in DNA sequence. Re-estimated N are replaced by S. First paired read insert size is re-estimated and second correctly orientated paired reads overlapping unknown regions are used to re-estimate the size of these unknown regions.\n+\n+**Options:**\n+\n+ --config : The configuration file generated in scaffremodler pipeline.\n+ --exclude : Chromosome/scaffold names separated with "=" to exclude for the insert size estimation.\n+ --min_read : The minimal read number requested to make the re-estimation of an unknown region. (Default: 30)\n+ --out : Output file name of the fasta file containing re-estimated N regions. (Default: N_restimated.fasta)\n+ --thread : Number of thread to use. (Default: 1)\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/SplitOnX.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/SplitOnX.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,78 @@ + +# +# Copyright 2014 CIRAD +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator +from Bio.Seq import Seq +from Bio.Alphabet import generic_dna +from Bio import SeqIO +from Bio.SeqRecord import SeqRecord + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser(usage="python %prog [options]\n\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\n\n" + "This program split DNA sequence when X are found. All scaffolds are renamed by length.") + # Wrapper options. + parser.add_option( '', '--fasta', dest='fasta', default='not_filled', help='The multifasta sequence file') + parser.add_option( '', '--out', dest='out', default='Splitted.fasta', help='The output file name, [default: %default]') + (options, args) = parser.parse_args() + + + if options.fasta == 'not_filled': + sys.exit('--fasta argument is missing') + + #loading sequences + record_dict = SeqIO.index(options.fasta, "fasta") + + dico = {} + liste_taille = [] + i = 0 + for n in record_dict: + sequence = str(record_dict[n].seq).replace('X',' ').split() + if len(sequence) == 1: + i += 1 + if i in dico: + sys.exit('there is a bug') + dico[i] = sequence[0] + liste_taille.append([i, len(sequence[0])]) + else: + print 'The sequence', n, 'has been cuted', len(sequence)-1, 'time' + for k in sequence: + i += 1 + if i in dico: + sys.exit('there is a bug') + dico[i] = k + liste_taille.append([i, len(k)]) + if k[0] == 'N' or k[0] == 'n' or k[0] == 'X': + sys.exit('Problem at the begining of the sequence : N are found') + if k[-1] == 'N' or k[-1] == 'n' or k[-1] == 'X': + sys.exit('Problem at the end of the sequence: N are found') + + liste_sorted = sorted(liste_taille, key=operator.itemgetter(1), reverse=True) + + outfile = open(options.out,'w') + i = 0 + for n in liste_sorted: + i += 1 + SeqIO.write(SeqRecord(Seq(dico[n[0]], generic_dna), id = 'scaffold'+str(i), description=''),outfile, "fasta") + outfile.close() + +if __name__ == "__main__": __main__() \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/SplitOnX.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/SplitOnX.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,42 @@ +<tool id="SplitOnX" name="SplitOnX" version="0.1"> + <description> : Split fasta sequence on X and rename sequence based on length</description> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/SplitOnX.py + --fasta $fasta + --out $out + + </command> + <inputs> + <param name="fasta" type="data" label="The fasta file" /> + <param name="prefix" type="text" label="Identifier for output" /> + </inputs> + <outputs> + <data format="fasta" name="out" label="${tool.name} : $prefix split fasta" /> + </outputs> + <help> + +**Overview** + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/bamgrepreads |
| b |
| Binary file scaffremodler/bamgrepreads has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/conf4circos.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/conf4circos.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,1041 @@\n+#!/usr/local/bioinfo/python/2.7.9/bin/python\n+#\n+#\n+# Copyright 2014 CIRAD\n+#\n+# This program is free software; you can redistribute it and/or modify\n+# it under the terms of the GNU General Public License as published by\n+# the Free Software Foundation; either version 3 of the License, or\n+# (at your option) any later version.\n+#\n+# This program is distributed in the hope that it will be useful,\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+# GNU General Public License for more details.\n+#\n+# You should have received a copy of the GNU General Public License\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\n+# write to the Free Software Foundation, Inc.,\n+# 51 Franklin Street, Fifth Floor, Boston,\n+# MA 02110-1301, USA.\n+#\n+#\n+\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, math, multiprocessing, datetime\n+from Bio.Seq import Seq\n+from Bio.Alphabet import generic_dna\n+from Bio import SeqIO\n+from Bio.SeqRecord import SeqRecord\n+\n+def stop_err( msg ):\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit()\n+\n+def run_job (cmd_line, ERROR):\n+\tprint cmd_line\n+\ttry:\n+\t\ttmp = (tempfile.NamedTemporaryFile().name)+\'.error\'\n+\t\t# print tmp\n+\t\terror = open(tmp, \'w\')\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\n+\t\treturncode = proc.wait()\n+\t\terror.close()\n+\t\terror = open( tmp, \'rb\' )\n+\t\tstderr = \'\'\n+\t\tbuffsize = 1048576\n+\t\ttry:\n+\t\t\twhile True:\n+\t\t\t\tstderr += error.read( buffsize )\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\n+\t\t\t\t\tbreak\n+\t\texcept OverflowError:\n+\t\t\tpass\n+\t\terror.close()\n+\t\tos.remove(tmp)\n+\t\tif returncode != 0:\n+\t\t\traise Exception, stderr\n+\texcept Exception, e:\n+\t\tstop_err( ERROR + str( e ) )\n+\n+\n+def extract_function_name():\n+\t"""Extracts failing function name from Traceback\n+\n+\tby Alex Martelli\n+\thttp://stackoverflow.com/questions/2380073/\\\n+\thow-to-identify-what-function-call-raise-an-exception-in-python\n+\t"""\n+\ttb = sys.exc_info()[-1]\n+\tstk = traceback.extract_tb(tb, 1)\n+\tfname = stk[0][3]\n+\treturn fname\n+\n+\n+def zonesOverlap(zonesOrg, zonesCompList):\n+\t"""\n+\t\tCalculate the overlap of one couple of zone with all the zones of one accession.\n+\n+\t\t:param zonesOrg: The mate zone to test.\n+\t\t:type zonesOrg: list\n+\t\t:param zonesCompList: The zones of one accession\n+\t\t:type zonesCompList: list\n+\t\t:return: A list with three boolean. first : indicate if the zoneOrg is overlapping one zone of zonesCompList; The second indicate if the overlapping zone is tagged as "PASSED" or not. The third indicate if the overlapping zone is tagged as "NOT_PASSED" or not.\n+\t\t:rtype: list\n+\t"""\n+\ti = 0\n+\tj = len(zonesCompList)\n+\tm = (i + j) // 2\n+\n+\toverlap = False\n+\n+\tamontChr = zonesOrg[0]\n+\tamontStart = int(zonesOrg[1])\n+\tamontEnd = int(zonesOrg[2])\n+\n+\tavalChr = zonesOrg[3]\n+\tAvalStart = int(zonesOrg[4])\n+\tAvalEnd = int(zonesOrg[5])\n+\n+\twhile i < j and not overlap:\n+\n+\t\tif zonesCompList[m][0] == amontChr:\n+\n+\t\t\tif avalChr == zonesCompList[m][5]:\n+\n+\t\t\t\tk = m\n+\t\t\t\twhile k >= i and zonesCompList[k][0] == amontChr and zonesCompList[k][5] == avalChr:\n+\n+\t\t\t\t\tif amontStart <= int(zonesCompList[k][2]) and amontEnd >= int(zonesCompList[k][1]) and AvalStart <= int(zonesCompList[k][7]) and AvalEnd >= int(zonesCompList[k][6]):\n+\t\t\t\t\t\toverlap = True\n+\t\t\t\t\tk -= 1\n+\n+\t\t\t\tk = m + 1\n+\t\t\t\twhile k < j and zonesCompList[k][0] == amontChr and zonesCompList[k][5] == avalChr:\n+\n+\t\t\t\t\tif amontStart <= int(zonesCompList[k][2]) and amontEnd >= int(zonesCompList[k][1]) and AvalStart <= int(zonesCompList[k][7]) and AvalEnd >= int(zonesCompList[k][6]):\n+\t\t\t\t\t\toverlap = True\n+\t\t\t\t\tk += 1\n+\n+\t\t\t\t# no infinite loop\n+\t\t\t\tif not overlap:\n+\t\t\t\t\tbreak\n+\n+\t\t\telif zonesCompList[m][5] < avalChr:\n+\t\t\t\ti = i + 1\n+\t\t\telse:\n+\t\t\t\tj = m\n+\n+\t\telif zonesCompList[m][0] < amontChr:\n+\t\t\ti = m + 1\n+\t\telse:\n+\t\t\tj = m\n+\n+\t\tm = (i + j)//2\n+\n+\treturn [overlap]\n+\n+\n+def rea'..b'\t\tconfig.set(\'Discord_link\',\'rr\', os.path.abspath(options.out_rr))\n+\t\t\tconfig.set(\'General\',\'rr\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'rr\', \'no\')\n+\n+\t\tif options.ins != \'not_filled\':\n+\t\t\tconfig.set(\'Discord_link\',\'ins\', os.path.abspath(options.out_ins))\n+\t\t\tconfig.set(\'General\',\'ins\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'ins\', \'no\')\n+\n+\t\tif options.delet != \'not_filled\':\n+\t\t\tconfig.set(\'Discord_link\',\'delet\', os.path.abspath(options.out_delet))\n+\t\t\tconfig.set(\'General\',\'delet\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'delet\', \'no\')\n+\n+\t\tif options.chr_rr != \'not_filled\':\n+\t\t\tconfig.set(\'Discord_link\',\'chr_rr\', os.path.abspath(options.out_chr_rr))\n+\t\t\tconfig.set(\'Discord_zone\',\'chr_rr\', os.path.abspath(options.chr_rr))\n+\t\t\tconfig.set(\'General\',\'chr_rr\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'chr_rr\', \'no\')\n+\n+\t\tif options.chr_rf != \'not_filled\':\n+\t\t\tconfig.set(\'Discord_link\',\'chr_rf\', os.path.abspath(options.out_chr_rf))\n+\t\t\tconfig.set(\'Discord_zone\',\'chr_rf\', os.path.abspath(options.chr_rf))\n+\t\t\tconfig.set(\'General\',\'chr_rf\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'chr_rf\', \'no\')\n+\n+\t\tif options.chr_fr != \'not_filled\':\n+\t\t\tconfig.set(\'Discord_link\',\'chr_fr\', os.path.abspath(options.out_chr_fr))\n+\t\t\tconfig.set(\'Discord_zone\',\'chr_fr\', os.path.abspath(options.chr_fr))\n+\t\t\tconfig.set(\'General\',\'chr_fr\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'chr_fr\', \'no\')\n+\n+\t\tif options.chr_ff != \'not_filled\':\n+\t\t\tconfig.set(\'Discord_link\',\'chr_ff\', os.path.abspath(options.out_chr_ff))\n+\t\t\tconfig.set(\'Discord_zone\',\'chr_ff\', os.path.abspath(options.chr_ff))\n+\t\t\tconfig.set(\'General\',\'chr_ff\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'chr_ff\', \'no\')\n+\n+\t\tif options.liste_read != \'not_filled\':\n+\t\t\tconfig.add_section(\'Read_link\')\n+\t\t\tconfig.set(\'Read_link\',\'rf\', os.path.abspath(options.Rout_rf))\n+\t\t\tconfig.set(\'Read_link\',\'fr\', os.path.abspath(options.Rout_fr))\n+\t\t\tconfig.set(\'Read_link\',\'ff\', os.path.abspath(options.Rout_ff))\n+\t\t\tconfig.set(\'Read_link\',\'rr\', os.path.abspath(options.Rout_rr))\n+\t\t\tconfig.set(\'Read_link\',\'ins\', os.path.abspath(options.Rout_ins))\n+\t\t\tconfig.set(\'Read_link\',\'del\', os.path.abspath(options.Rout_delet))\n+\t\t\tconfig.set(\'Read_link\',\'chr_rr\', os.path.abspath(options.Rout_chr_rr))\n+\t\t\tconfig.set(\'Read_link\',\'chr_rf\', os.path.abspath(options.Rout_chr_rf))\n+\t\t\tconfig.set(\'Read_link\',\'chr_fr\', os.path.abspath(options.Rout_chr_fr))\n+\t\t\tconfig.set(\'Read_link\',\'chr_ff\', os.path.abspath(options.Rout_chr_ff))\n+\t\t\tconfig.set(\'General\',\'read_rf\', \'yes\')\n+\t\t\tconfig.set(\'General\',\'read_fr\', \'yes\')\n+\t\t\tconfig.set(\'General\',\'read_ff\', \'yes\')\n+\t\t\tconfig.set(\'General\',\'read_rr\', \'yes\')\n+\t\t\tconfig.set(\'General\',\'read_ins\', \'yes\')\n+\t\t\tconfig.set(\'General\',\'read_del\', \'yes\')\n+\t\t\tconfig.set(\'General\',\'read_chr_rr\', \'yes\')\n+\t\t\tconfig.set(\'General\',\'read_chr_rf\', \'yes\')\n+\t\t\tconfig.set(\'General\',\'read_chr_fr\', \'yes\')\n+\t\t\tconfig.set(\'General\',\'read_chr_ff\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'read_rf\', \'no\')\n+\t\t\tconfig.set(\'General\',\'read_fr\', \'no\')\n+\t\t\tconfig.set(\'General\',\'read_ff\', \'no\')\n+\t\t\tconfig.set(\'General\',\'read_rr\', \'no\')\n+\t\t\tconfig.set(\'General\',\'read_ins\', \'no\')\n+\t\t\tconfig.set(\'General\',\'read_del\', \'no\')\n+\t\t\tconfig.set(\'General\',\'read_chr_rr\', \'no\')\n+\t\t\tconfig.set(\'General\',\'read_chr_rf\', \'no\')\n+\t\t\tconfig.set(\'General\',\'read_chr_fr\', \'no\')\n+\t\t\tconfig.set(\'General\',\'read_chr_ff\', \'no\')\n+\n+\t\tif options.dis_prop != \'not_filled\':\n+\t\t\tconfig.add_section(\'Proportion\')\n+\t\t\tconfig.set(\'Proportion\',\'prop\', os.path.abspath(options.dis_prop))\n+\t\t\tconfig.set(\'General\',\'prop\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'prop\', \'no\')\n+\n+\t\tif options.agp != \'not_filled\':\n+\t\t\tconfig.add_section(\'Scaffold\')\n+\t\t\tconfig.set(\'Scaffold\',\'scaff_tile\', os.path.abspath(options.out_scaff))\n+\t\t\tconfig.set(\'General\',\'scaff_tile\', \'yes\')\n+\t\telse:\n+\t\t\tconfig.set(\'General\',\'scaff_tile\', \'no\')\n+\t\t# writting configuration file\n+\t\twith open(options.output, \'wb\') as configfile:\n+\t\t\tconfig.write(configfile)\n+\n+if __name__ == "__main__": __main__()\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/conf4circos.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/conf4circos.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,319 @@\n+<tool id="conf4circos" name="conf4circos" version="0.1"> \n+ <description> : Create configuration file and files needed to draw circos</description> \n+ <stdio>\n+ <exit_code range="1:" />\n+ </stdio>\n+ <command>\n+\t\tsource $__tool_directory__/include_scaffremodler.sh ;\n+ python $__tool_directory__/conf4circos.py\n+\t\t\n+ #if str($reference_genome.source) == "history":\n+ --ref $reference_genome.own_file \n+ #else:\n+ --ref $reference_genome.index.fields.path\n+ #end if \n+\t\t--chr $chr\n+\t\t#if $have_cov.have_cov_opt == "yes"\n+\t\t\t--cov $have_cov.cov\n+\t\t\t--window $have_cov.window\n+\t\t\t--orient $orient\n+\t\t#end if\n+\t\t\n+\t\t#if $have_frf.have_frf_opt == "yes"\n+\t\t\t--frf $have_frf.frf\n+\t\t#end if\n+\t\t\n+\t\t#if $have_ff.have_ff_opt == "yes"\n+\t\t\t--ff $have_ff.ff\n+\t\t#end if\n+\t\t\n+\t\t#if $have_rr.have_rr_opt == "yes"\n+\t\t\t--rr $have_rr.rr\n+\t\t#end if\n+\t\t\n+\t\t#if $have_ins.have_ins_opt == "yes"\n+\t\t\t--ins $have_ins.ins\n+\t\t#end if\n+\t\t\n+\t\t#if $have_delet.have_delet_opt == "yes"\n+\t\t\t--delet $have_delet.delet\n+\t\t#end if\n+\t\t\n+\t\t#if $have_chr_rr.have_chr_rr_opt == "yes"\n+\t\t\t--chr_rr $have_chr_rr.chr_rr\n+\t\t#end if\n+\t\t\n+\t\t#if $have_chr_rf.have_chr_rf_opt == "yes"\n+\t\t\t--chr_rf $have_chr_rf.chr_rf\n+\t\t#end if\n+\t\t\n+\t\t#if $have_chr_ff.have_chr_ff_opt == "yes"\n+\t\t\t--chr_ff $have_chr_ff.chr_ff\n+\t\t#end if\n+\t\t\n+\t\t#if $have_chr_fr.have_chr_fr_opt == "yes"\n+\t\t\t--chr_fr $have_chr_fr.chr_fr\n+\t\t#end if\n+\t\t\n+\t\t#if $have_liste.have_liste_opt == "yes"\n+\t\t\t--liste_read $have_liste.liste_read\n+\t\t#end if\n+\t\t\t\n+\t\t#if $have_dis.have_dis_opt == "yes"\n+\t\t\t--dis_prop $have_dis.dis_prop\n+\t\t#end if\n+\t\t\n+\t\t#if $have_agp.have_agp_opt == "yes"\n+\t\t\t--agp $have_agp.agp\n+\t\t#end if\n+\t\t--output $circos_configuration\n+\t\t--out_kar $out_karyotype\n+\t\t--out_N $out_N\n+\t\t--out_cov $out_coverage\n+\t\t--out_frf $out_frf\n+\t\t--out_ff $out_ff\n+\t\t--out_rr $out_rr\n+\t\t--out_ins $out_ins\n+\t\t--out_delet $out_delet\n+\t\t--out_chr_rr $out_chr_rr\n+\t\t--out_chr_rf $out_chr_rf\n+\t\t--out_chr_ff $out_chr_ff\n+\t\t--out_chr_fr $out_chr_fr\n+\t\t--Rout_rf $Rout_rf\n+\t\t--Rout_fr $Rout_fr\n+\t\t--Rout_ff $Rout_ff\n+\t\t--Rout_rr $Rout_rr\n+\t\t--Rout_ins $Rout_ins\n+\t\t--Rout_delet $Rout_delet\n+\t\t--Rout_chr_rr $Rout_chr_rr\n+\t\t--Rout_chr_rf $Rout_chr_rf\n+\t\t--Rout_chr_ff $Rout_chr_ff\n+\t\t--Rout_chr_fr $Rout_chr_fr\n+\t\t--out_scaff $out_scaff\n+\t\t\n+ </command>\n+ <inputs>\n+ <conditional name="reference_genome">\n+ \t<param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">\n+ \t<option value="indexed">Use a built-in genome index</option>\n+ \t<option value="history">Use a genome from the history and build index</option> \n+ \t</param>\n+ \t<when value="indexed">\n+ \t<param name="index" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the South Green team">\n+\t\t\t\t\t\n+\t\t\t\t\t<options from_data_table="scaffremodler">\n+\t\t\t\t\t\t<filter type="sort_by" column="1"/>\n+\t\t\t\t\t\t<validator type="no_options" message="No indexes are available for the selected input dataset"/>\n+\t\t\t\t\t</options> \n+ \t</param>\n+ \t</when>\n+ \t<when value="history">\n+ \t<param name="own_file" type="data" format="fasta" label="Select reference genome" />\n+ \t</when>\n+ </conditional>\t \n+\t\t<param name="chr" type="data" format="txt" label="Chromosome file (generated by 1_create_conf or tabulated file with chr_name in col1 and length in col2)" />\n+\t\t<param name="orient" type="select" label="The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand">\n+\t\t\t<option value="rf">RF</option>\n+\t\t\t<option value="fr">FR</option>\n+\t\t</param>\n+\t\t<conditional name="have_cov">\n+\t\t\t<param name="have_cov_opt" type="select" label="Do you want to draw coverage?">\n+\t\t\t\t<option valu'..b'+\t\t\n+\t\t<conditional name="have_agp">\n+\t\t\t<param name="have_agp_opt" type="select" label="Do you want to draw a layer representing scaffold positions?">\n+\t\t\t\t<option value="not_filled">not_filled</option>\n+\t\t\t\t<option value="yes">yes</option>\n+\t\t\t</param>\n+\t\t\t<when value="yes">\n+\t\t\t\t<param name="agp" type="data" format="txt" label="An agp file locating scaffold" />\n+\t\t\t</when>\n+\t\t\t<when value="not_filled"></when>\n+\t\t</conditional>\n+\t\t<param name="prefix" type="text" label="Identifier for output" value="Circos" />\n+ </inputs>\n+ <outputs>\n+ <data format="txt" name="circos_configuration" label="${tool.name} : $prefix configuration" />\n+ <data format="txt" name="out_karyotype" label="${tool.name} : $prefix Karyotype" />\n+ <data format="txt" name="out_N" label="${tool.name} : $prefix location N" />\n+ <data format="txt" name="out_coverage" label="${tool.name} : $prefix coverage" />\n+ <data format="txt" name="out_frf" label="${tool.name} : $prefix circos_frf" />\n+ <data format="txt" name="out_ff" label="${tool.name} : $prefix ff" />\n+ <data format="txt" name="out_rr" label="${tool.name} : $prefix rr" />\n+ <data format="txt" name="out_ins" label="${tool.name} : $prefix insertion" />\n+ <data format="txt" name="out_delet" label="${tool.name} : $prefix deletion" />\n+ <data format="txt" name="out_chr_rr" label="${tool.name} : $prefix chr_rr" />\n+ <data format="txt" name="out_chr_rf" label="${tool.name} : $prefix chr_rf" />\n+ <data format="txt" name="out_chr_ff" label="${tool.name} : $prefix chr_ff" />\n+ <data format="txt" name="out_chr_fr" label="${tool.name} : $prefix chr_fr" />\n+ <data format="txt" name="Rout_rf" label="${tool.name} : $prefix R_rf" />\n+ <data format="txt" name="Rout_fr" label="${tool.name} : $prefix R_fr" />\n+ <data format="txt" name="Rout_ff" label="${tool.name} : $prefix R_ff" />\n+ <data format="txt" name="Rout_rr" label="${tool.name} : $prefix circosR_rr" />\n+ <data format="txt" name="Rout_ins" label="${tool.name} : $prefix R_insertion" />\n+ <data format="txt" name="Rout_delet" label="${tool.name} : $prefix R_deletion" />\n+ <data format="txt" name="Rout_chr_rr" label="${tool.name} : $prefix R_chr_rr" />\n+ <data format="txt" name="Rout_chr_rf" label="${tool.name} : $prefix R_chr_rf" />\n+ <data format="txt" name="Rout_chr_ff" label="${tool.name} : $prefix R_chr_ff" />\n+ <data format="txt" name="Rout_chr_fr" label="${tool.name} : $prefix R_chr_fr" />\n+ <data format="txt" name="out_scaff" label="${tool.name} : $prefix scaffold" />\n+ </outputs>\n+ <tests>\n+ <test>\n+ <param name="chr" value="chromosome_information.txt"/> \n+ <param name="have_cov_opt" value="yes" />\n+ <param name="cov" value="coverage.txt" />\n+ <param name="have_frf_opt" value="yes" />\n+ <param name="frf" value="discordant_zone_FR.txt" />\n+ <param name="have_ff_opt" value="yes" />\n+ <param name="ff" value="discordant_zone_FF.txt" />\n+ <param name="have_rr_opt" value="yes" />\n+ <param name="rr" value="discordant_zone_RR.txt" />\n+ <param name="source" value="history" />\n+ <param name="own_file" value="Ref_for_SV_detection.fasta" />\n+ <output name="circos_configuration" file="circos_configuration.txt" compare="sim_size"/> \n+ </test>\n+ </tests> \n+ <help>\n+ \n+**Overview**\n+\n+-----\n+\n+.. class:: infomark\n+\n+**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD).\n+\n+.. class:: infomark\n+\n+**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr\n+\n+.. class:: infomark\n+\n+**Program encapsulated in Galaxy by South Green**\n+\n+\t</help>\n+\t<citations>\n+ <citation type="doi">10.1186/s12864-016-2579-4</citation> \n+ </citations>\n+</tool> \n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/contig_scaff.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/contig_scaff.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,141 @@ + +# +# Copyright 2014 CIRAD +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator + +from Bio.Seq import Seq +from Bio.Alphabet import generic_dna +from Bio import SeqIO +from Bio.SeqRecord import SeqRecord + +def rev_seq(seq): + #function that reverse and complement a sequence + my_dna = Seq(seq, generic_dna) + return str(my_dna.reverse_complement()) + +def verif(TABLE): + dico = {} + file = open(TABLE) + for line in file: + data = line.split() + if data: + if data[0][0] == '>': + if data[0] in dico: + sys.exit('Two new scaffold '+data[0]+' have the same name') + else: + nom = data[0] + dico[data[0]] = set() + else: + for n in dico: + if data[0] in dico[n]: + sys.exit('The scaffold '+data[0]+' is already used') + dico[nom].add(data[0]) + file.close() + for n in dico: + if not(n[1:] in dico[n]): + sys.exit('The new scaffold name '+n[1:]+' is not a used scaffold') + +def scaff(TABLE, SEQ, OUT, OUT_VERIF): + record_dict = SeqIO.index(SEQ, "fasta") + file = open(TABLE) + outfile = open(OUT,'w') + outfile2 = open(OUT_VERIF,'w') + dico_fait = set() + sequence = '' + for line in file: + data = line.split() + if data: + if data[0][0] == '>': + if sequence: + SeqIO.write(SeqRecord(Seq(sequence, generic_dna), id = nom, description=''),outfile, "fasta") + outfile2.write(mot+'\n') + nom = data[0][1:] + mot = nom+'\t' + debut = 1 + sequence = '' + else: + if not(data[0] in record_dict): + sys.exit('The scaffold '+data[0]+' is not in the multifasta') + if debut: + debut = 0 + mot = mot+data[0]+'\t'+str(len(sequence)+1) + if data[1] == "FWD": + sequence = sequence + str(record_dict[data[0]].seq) + elif data[1] == "REV": + sequence = sequence + rev_seq(str(record_dict[data[0]].seq)) + else: + sys.exit('Orientation information is missing') + mot = mot+'\t'+str(len(sequence)) + else: + sequence = sequence + 'NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN' + mot = mot+'\t'+data[0]+'\t'+str(len(sequence)+1) + if data[1] == "FWD": + sequence = sequence + str(record_dict[data[0]].seq) + elif data[1] == "REV": + sequence = sequence + rev_seq(str(record_dict[data[0]].seq)) + else: + sys.exit('Orientation information is missing') + mot = mot+'\t'+str(len(sequence)) + dico_fait.add(data[0]) + if sequence: + SeqIO.write(SeqRecord(Seq(sequence, generic_dna), id = nom, description=''),outfile, "fasta") + outfile2.write(mot+'\n') + else: + sys.exit('No sequence in the last scaffold') + outfile.close() + return dico_fait + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser = optparse.OptionParser(usage="python %prog [options]\n\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\n\n" + "This script create junctions between scaffolds using a tabulated file.\n" + "The input tabulated file look as followed:\n" + ">chr1\n" + "scaffold1 FWD\n" + "scaffold2 FWD\n" + "scaffold3 REV\n" + ">...\n") + # Wrapper options. + parser.add_option( '', '--table', dest='table', default='not_filled', help='The table file of scaffold to join') + parser.add_option( '', '--fasta', dest='fasta', default='not_filled', help='The multi-fasta scaffold file') + parser.add_option( '', '--out', dest='out', default='super_contig.fasta', help='The multi-fasta output file name, [default: %default]') + parser.add_option( '', '--out_verif', dest='out_verif', default='contig2verif.txt', help='The output file to give to verif_fusion.py, [default: %default]') + (options, args) = parser.parse_args() + + + + #verifying file + verif(options.table) + + #creating the scaffolds + dico_fait = scaff(options.table, options.fasta, options.out, options.out_verif) + + #printing the remaining scaffold + record_dict = SeqIO.index(options.fasta, "fasta") + outfile = open(options.out,'a') + for n in record_dict: + if not(n in dico_fait): + SeqIO.write(record_dict[n], outfile, "fasta") + outfile.close() + +if __name__ == "__main__": __main__() \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/contig_scaff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/contig_scaff.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,46 @@ +<tool id="contig_scaff" name="contig_scaff" version="0.1"> + <description> : Join scaffolds unsing a tabulated file</description> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/contig_scaff.py + --table $table + --fasta $fasta + --out $fasta_file + --out_verif $tabulated_file + + </command> + <inputs> + <param name="table" type="data" label="The table file of scaffold to join" /> + <param name="fasta" type="data" label="The multi-fasta scaffold file" /> + <param name="prefix" type="text" label="Identifier for output" /> + </inputs> + <outputs> + <data format="fasta" name="fasta_file" label="${tool.name} : $prefix fasta" /> + <data format="txt" name="tabulated_file" label="${tool.name} : $prefix tabulated" /> + </outputs> + <help> + +**Overview** + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/contig_scaff_withN.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/contig_scaff_withN.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,161 @@ + +# +# Copyright 2014 CIRAD +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator + +from Bio.Seq import Seq +from Bio.Alphabet import generic_dna +from Bio import SeqIO +from Bio.SeqRecord import SeqRecord + +def rev_seq(seq): + #function that reverse and complement a sequence + my_dna = Seq(seq, generic_dna) + return str(my_dna.reverse_complement()) + +def verif(TABLE): + dico = {} + file = open(TABLE) + prec = '' + for line in file: + data = line.split() + if data: + if data[0] == prec: + if data[1] == 'W': + for n in dico: + if data[2] in dico[n]: + mot = 'Warning: the scaffold '+data[2]+' is already used' + print mot + dico[nom].add(data[0]) + else: + if data[0] in dico: + mot = 'Two new scaffold '+data[0]+' have the same name' + sys.exit(mot) + else: + nom = data[0] + dico[data[0]] = set() + prec = data[0] + file.close() + for n in dico: + if not(n in dico[n]): + sys.exit('The new scaffold name '+n[1:]+' is not a used scaffold') + +def scaff(TABLE, SEQ, OUT, OUT_VERIF): + record_dict = SeqIO.index(SEQ, "fasta") + file = open(TABLE) + outfile = open(OUT,'w') + outfile2 = open(OUT_VERIF,'w') + dico_fait = set() + sequence = '' + prec = '' + for line in file: + data = line.split() + if data: + if data[0] != prec: + if sequence: + SeqIO.write(SeqRecord(Seq(sequence, generic_dna), id = prec, description=''),outfile, "fasta") + prec = data[0] + mot = data[0]+'\t' + j = 0 + sequence = '' + if data[1] == 'W': + debut = len(sequence) + j += 1 + if data[3] == '+': + sequence = sequence + str(record_dict[data[2]].seq) + elif data[3] == '-': + sequence = sequence + rev_seq(str(record_dict[data[2]].seq)) + else: + sys.exit('Wrong orientation information'+data[3]) + outfile2.write(prec+'\t'+str(debut+1)+'\t'+str(len(sequence))+'\t'+str(j)+'\tW\t'+data[2]+'\t1\t'+str(len(sequence)-debut)+'\t+\n') + dico_fait.add(data[2]) + elif data[1] == 'N': + j += 1 + i = 0 + N_number = int(data[2]) + debut = len(sequence) + while i < N_number: + sequence = sequence + 'N' + i += 1 + outfile2.write(prec+'\t'+str(debut+1)+'\t'+str(len(sequence))+'\t'+str(j)+'\tN\t'+data[2]+'\tfragment\tno\n') + else: + sys.exit('Wrong region type'+data[3]) + else: + if data[1] == 'W': + debut = len(sequence) + j += 1 + if data[3] == '+': + sequence = sequence + str(record_dict[data[2]].seq) + elif data[3] == '-': + sequence = sequence + rev_seq(str(record_dict[data[2]].seq)) + else: + sys.exit('Wrong orientation information'+data[3]) + outfile2.write(prec+'\t'+str(debut+1)+'\t'+str(len(sequence))+'\t'+str(j)+'\tW\t'+data[2]+'\t1\t'+str(len(sequence)-debut)+'\t+\n') + dico_fait.add(data[2]) + elif data[1] == 'N': + j += 1 + i = 0 + N_number = int(data[2]) + debut = len(sequence) + while i < N_number: + sequence = sequence + 'N' + i += 1 + outfile2.write(prec+'\t'+str(debut+1)+'\t'+str(len(sequence))+'\t'+str(j)+'\tN\t'+data[2]+'\tfragment\tno\n') + else: + sys.exit('Wrong region type'+data[3]) + if sequence: + SeqIO.write(SeqRecord(Seq(sequence, generic_dna), id = prec, description=''),outfile, "fasta") + else: + sys.exit('No sequence in the last scaffold') + outfile.close() + return dico_fait + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser = optparse.OptionParser(usage="python %prog [options]\n\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\n\n" + "This script create junctions between scaffolds using a tabulated file.\n") + # Wrapper options. + parser.add_option( '', '--table', dest='table', default='not_filled', help='The table file of scaffold to join') + parser.add_option( '', '--fasta', dest='fasta', default='not_filled', help='The multi-fasta scaffold file') + parser.add_option( '', '--out', dest='out', default='super_contig.fasta', help='The multi-fasta output file name, [default: %default]') + parser.add_option( '', '--out_info', dest='out_info', default='contig_info.agp', help='An agp file locating contigs in scaffold, [default: %default]') + (options, args) = parser.parse_args() + + + + + #verifying file + verif(options.table) + + #creating the scaffolds + dico_fait = scaff(options.table, options.fasta, options.out, options.out_info) + + #printing the remaining scaffold + record_dict = SeqIO.index(options.fasta, "fasta") + outfile = open(options.out,'a') + for n in record_dict: + if not(n in dico_fait): + SeqIO.write(record_dict[n], outfile, "fasta") + outfile.close() + +if __name__ == "__main__": __main__() \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/convert2X.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/convert2X.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,88 @@ + +# +# Copyright 2014 CIRAD +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# + + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator + +from Bio.Seq import Seq +from Bio.Alphabet import generic_dna +from Bio import SeqIO +from Bio.SeqRecord import SeqRecord + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser(usage="python %prog [options]\n\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\n\n" + "This program replace specified regions in the provided table file by X. These X will be used to split scaffold using SplitOnX.py" + "The table file should be formated has in the example:" + "scaffold83 93565 93586" + "scaffold120 330181 330183" + "scaffold120 380870 383428") + # Wrapper options. + parser.add_option( '', '--table', dest='table', default='not_filled', help='The table file with region to convert to X') + parser.add_option( '', '--fasta', dest='fasta', default='not_filled', help='The multifasta sequence file') + parser.add_option( '', '--out', dest='out', default='X_converted.fasta', help='The output file name, [default: %default]') + (options, args) = parser.parse_args() + + + + if options.table == 'not_filled': + sys.exit('--table argument is missing') + if options.fasta == 'not_filled': + sys.exit('--fasta argument is missing') + + #loading sequences + record_dict = SeqIO.index(options.fasta, "fasta") + file = open(options.table) + dic = {} + for line in file: + data = line.split() + if data: + if data[0] in dic: + if len(data) == 2: + dic[data[0]].add(int(data[1])-1) + else: + i = int(data[1]) + while i <= int(data[2]): + dic[data[0]].add(i-1) + i += 1 + else: + dic[data[0]] = set() + if len(data) == 2: + dic[data[0]].add(int(data[1])-1) + else: + i = int(data[1]) + while i <= int(data[2]): + dic[data[0]].add(i-1) + i += 1 + file.close() + + outfile = open(options.out,'w') + for n in record_dict: + if n in dic: + sequence = list(str(record_dict[n].seq)) + for k in dic[n]: + sequence[k] = 'X' + SeqIO.write(SeqRecord(Seq(''.join(sequence), generic_dna), id = n, description=''),outfile, "fasta") + else: + SeqIO.write(SeqRecord(record_dict[n].seq, id = n, description=''),outfile, "fasta") + +if __name__ == "__main__": __main__() \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/convert2X.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/convert2X.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,65 @@ +<tool id="convert2X" name="convert2X" version="0.1"> + <description> : Replace specified DNA regions (in the provided table file) by X</description> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/convert2X.py + --table $table + #if str($reference_genome.source) == "history": + --fasta $reference_genome.own_file + #else: + --fasta $reference_genome.index.fields.path + #end if + --out $converted_fasta + + </command> + <inputs> + <param name="table" type="data" label="The table file with region to convert to X" /> + <conditional name="reference_genome"> + <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below"> + <option value="indexed">Use a built-in genome index</option> + <option value="history">Use a genome from the history and build index</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the South Green team"> + + <options from_data_table="scaffremodler"> + <filter type="sort_by" column="1"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="history"> + <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select reference genome" /> + </when> + </conditional> + <param name="prefix" type="text" label="Identifier for output" /> + </inputs> + <outputs> + <data format="fasta" name="converted_fasta" label="${tool.name} : $prefix X_convert.fasta" /> + </outputs> + <help> + +**Overview** + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/draw_circos.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/draw_circos.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,1127 @@\n+#!/usr/local/bioinfo/python/2.7.9/bin/python\n+#\n+#\n+# Copyright 2014 CIRAD\n+#\n+# This program is free software; you can redistribute it and/or modify\n+# it under the terms of the GNU General Public License as published by\n+# the Free Software Foundation; either version 3 of the License, or\n+# (at your option) any later version.\n+#\n+# This program is distributed in the hope that it will be useful,\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+# GNU General Public License for more details.\n+#\n+# You should have received a copy of the GNU General Public License\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\n+# write to the Free Software Foundation, Inc.,\n+# 51 Franklin Street, Fifth Floor, Boston,\n+# MA 02110-1301, USA.\n+#\n+#\n+\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, math, datetime\n+\n+def stop_err( msg ):\n+\tsys.stderr.write( "%s\\n" % msg )\n+\tsys.exit()\n+\n+def run_job (cmd_line, ERROR):\n+\tprint cmd_line\n+\ttry:\n+\t\ttmp = (tempfile.NamedTemporaryFile().name)+\'.error\'\n+\t\t# print tmp\n+\t\terror = open(tmp, \'w\')\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\n+\t\treturncode = proc.wait()\n+\t\terror.close()\n+\t\terror = open( tmp, \'rb\' )\n+\t\tstderr = \'\'\n+\t\tbuffsize = 1048576\n+\t\ttry:\n+\t\t\twhile True:\n+\t\t\t\tstderr += error.read( buffsize )\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\n+\t\t\t\t\tbreak\n+\t\texcept OverflowError:\n+\t\t\tpass\n+\t\terror.close()\n+\t\tos.remove(tmp)\n+\t\tif returncode != 0:\n+\t\t\traise Exception, stderr\n+\texcept Exception, e:\n+\t\tstop_err( ERROR + str( e ) )\n+\n+def define_regions(DRAW, CHR):\n+\t#Record regions to draw\n+\tdic = {}\n+\tchrDic = {}\n+\ttaille_total = 0\n+\tf = open(CHR, \'r\')\n+\tfor line in f:\n+\t\tif line.strip():\n+\t\t\tcols = line.split()\n+\t\t\tchrDic[cols[0]] = int(cols[1])\n+\tf.close()\n+\n+\tif DRAW == \'all\':\n+\t\tfor chrom in chrDic:\n+\t\t\tdic[chrom] = [[0, chrDic[chrom]+1]]\n+\t\t\ttaille_total += chrDic[chrom]\n+\telse:\n+\t\tdico = {}\n+\t\tregions = DRAW.split(\'-\')\n+\t\tfor region in regions:\n+\t\t\telts = region.split(\':\')\n+\t\t\tif not elts[0] in dico:\n+\t\t\t\tdico[elts[0]] = []\n+\n+\t\t\tif len(elts) == 3: # we have coordinate\n+\t\t\t\tdico[elts[0]].append([int(elts[1]), int(elts[2])+1])\n+\t\t\telse: # draw the whole chromosome\n+\t\t\t\ttry:\n+\t\t\t\t\tdico[elts[0]].append([0, chrDic[elts[0]]+1])\n+\t\t\t\texcept Exception, e:\n+\t\t\t\t\tprint (e)\n+\t\t\t\t\tprint ("No chromosome name \\""+elts[0]+"\\" found in the file : "+CHR+".")\n+\n+\t\t# to merge overlapping regions\n+\t\tchrom = \'\'\n+\t\tfor n in dico:\n+\t\t\tlist_2_sort = list(dico[n])\n+\t\t\tsorted_liste = list(sorted(list_2_sort, key=operator.itemgetter(0)))\n+\t\t\tfor k in sorted_liste:\n+\t\t\t\tif n in dic:\n+\t\t\t\t\tif fin < int(k[0]):\n+\t\t\t\t\t\tdic[n].append([debut, fin])\n+\t\t\t\t\t\ttaille_total = taille_total + (fin-debut) + 1\n+\t\t\t\t\t\tdebut = int(k[0])\n+\t\t\t\t\t\tfin = int(k[1])\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tfin = int(k[1])\n+\t\t\t\telse:\n+\t\t\t\t\tdic[n] = []\n+\t\t\t\t\tdebut = int(k[0])\n+\t\t\t\t\tfin = int(k[1])\n+\t\t\tdic[n].append([debut, fin])\n+\t\t\ttaille_total = taille_total + (fin-debut) + 1\n+\n+\tif taille_total < 5000:\n+\t\tprint \'Unit : 1 b\'\n+\t\tunit = \'10\'\n+\telif taille_total < 50000:\n+\t\tprint \'Unit : 1 Kb\'\n+\t\tunit = \'1000\'\n+\telif taille_total < 500000:\n+\t\tprint \'Unit : 10 Kb\', taille_total\n+\t\tunit = \'10000\'\n+\telif taille_total < 5000000:\n+\t\tprint \'Unit : 100 Kb\'\n+\t\tunit = \'100000\'\n+\telif taille_total < 50000000:\n+\t\tprint \'Unit : 1 Mb\'\n+\t\tunit = \'1000000\'\n+\telse:\n+\t\tprint \'Unit : 10 Mb\'\n+\t\tunit = \'10000000\'\n+\n+\t#creation of chromsomes to draw\n+\tchr_order = \'^\'\n+\tchr_name = \'\'\n+\tchar = 0\n+\tfile = open(CHR)\n+\tfor line in file:\n+\t\tdata = line.split()\n+\t\tif data:\n+\t\t\tif data[0] in dic:\n+\t\t\t\tlist_2_sort = list(dic[data[0]])\n+\t\t\t\tsorted_liste = list(sorted(list_2_sort, key=operator.itemgetter(1)))\n+\t\t\t\tfor n in sorted_liste:\n+\t\t\t\t\tif char > 25:\n+\t\t\t\t\t\tmot_char = chr(int(char/25)+96)+chr((char%25)+97)\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tmot_char = chr(char+97)\n+\t\t\t\t\tchr_order = chr_order+'..b'Draw discordant chr_rr link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--chr_rf\', dest=\'chr_rf\', default=\'y\', help=\'Draw discordant chr_rf link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--chr_fr\', dest=\'chr_fr\', default=\'y\', help=\'Draw discordant chr_fr link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--chr_ff\', dest=\'chr_ff\', default=\'y\', help=\'Draw discordant chr_ff link (y or n), [default: %default]\')\n+\n+\n+\tparser.add_option( \'\', \'--read_fr\', dest=\'read_fr\', default=\'y\', help=\'Draw read fr link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--read_rf\', dest=\'read_rf\', default=\'y\', help=\'Draw read rf link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--read_ff\', dest=\'read_ff\', default=\'y\', help=\'Draw read ff link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--read_rr\', dest=\'read_rr\', default=\'y\', help=\'Draw read rr link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--read_ins\', dest=\'read_ins\', default=\'y\', help=\'Draw read ins link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--read_delet\', dest=\'read_delet\', default=\'y\', help=\'Draw read delet link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--read_chr_rr\', dest=\'read_chr_rr\', default=\'y\', help=\'Draw read chr_rr link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--read_chr_rf\', dest=\'read_chr_rf\', default=\'y\', help=\'Draw read chr_rf link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--read_chr_fr\', dest=\'read_chr_fr\', default=\'y\', help=\'Draw read chr_fr link (y or n), [default: %default]\')\n+\tparser.add_option( \'\', \'--read_chr_ff\', dest=\'read_chr_ff\', default=\'y\', help=\'Draw read chr_ff link (y or n), [default: %default]\')\n+\n+\tparser.add_option( \'\', \'--text\', dest=\'text\', default=\'y\', help=\'Locate N regions, [default: %default]\')\n+\n+\tparser.add_option( \'\', \'--labels\', dest=\'labels\', default=\'y\', help=\'Draw reference sequence name, [default: %default]\')\n+\n+\tparser.add_option( \'\', \'--out\', dest=\'out\', default=\'circos.png\', help=\'The output file name\')\n+\t(options, args) = parser.parse_args()\n+\n+\n+\n+\tpathname = os.path.dirname(sys.argv[0])\n+\n+\tloca_programs = ConfigParser.RawConfigParser()\n+\tloca_programs.read(pathname+\'/loca_programs.conf\')\n+\n+\n+\n+\tif options.config == \'not_filled\':\n+\t\tsys.exit(\'--config argument is missing\')\n+\n+\tt0 = datetime.datetime.now()\n+\tconfig = ConfigParser.RawConfigParser()\n+\tconfig.read(options.config)\n+\n+\tchr_info = define_regions(options.draw, config.get(\'General\',\'chr\'))\n+\n+\n+\tideogram = tempfile.NamedTemporaryFile().name+\'.ideo\'\n+\tticks = tempfile.NamedTemporaryFile().name+\'.ticks\'\n+\tconf = tempfile.NamedTemporaryFile().name+\'.conf\'\n+\tkar = tempfile.NamedTemporaryFile().name+\'.kar\'\n+\n+\tcreate_ideogram(ideogram, options.labels)\n+\tcreate_ticks(ticks, chr_info[2])\n+\n+\tliste_rm = create_conf(chr_info[3], conf, ideogram, ticks, options.config, os.path.splitext(options.out)[0], chr_info[0], chr_info[1], chr_info[2], options.cov, options.scaff, options.discord, options.frf, options.ff, options.rr, options.ins, options.delet, options.chr_rr, options.chr_fr, options.chr_rf, options.chr_ff, options.read_fr, options.read_rf, options.read_ff, options.read_rr, options.read_ins, options.read_delet, options.read_chr_rr, options.read_chr_rf, options.read_chr_fr, options.read_chr_ff, options.text, kar)\n+\n+\trun_circos = \'%s %s -conf %s\' % (loca_programs.get(\'Programs\',\'perl\'), loca_programs.get(\'Programs\',\'circos\'), conf)\n+\trun_job(run_circos, \'Bug in run circos\')\n+\n+\tos.system(\'rm \'+ideogram)\n+\tos.system(\'rm \'+ticks)\n+\tos.system(\'rm \'+conf)\n+\tos.system(\'rm \'+kar)\n+\tfor n in liste_rm:\n+\t\tos.system(\'rm \'+n)\n+\tprint os.path.splitext(options.out)[0]\n+\tprint os.path.splitext(options.out)[0]+\'.png\'\n+\tprint options.out\n+\tif os.path.splitext(options.out)[0]+\'.png\' != options.out:\n+\t\tos.system(\'mv \'+os.path.splitext(options.out)[0]+\'.png \'+options.out)\n+\tprint datetime.datetime.now() - t0\n+\n+if __name__ == "__main__": __main__()\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/draw_circos.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/draw_circos.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,187 @@ +<tool id="draw_circos" name="draw_circos" version="0.1"> + <description> : Draw circos figure</description> + <requirements> + <requirement type="binary">circos</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/draw_circos.py + --config $config + --draw $draw + --cov $cov + --scaff $scaff + --discord $discord + --frf $frf + --ff $ff + --rr $rr + --ins $ins + --delet $delet + --chr_rr $chr_rr + --chr_rf $chr_rf + --chr_fr $chr_fr + --chr_ff $chr_ff + --read_fr $read_fr + --read_rf $read_rf + --read_ff $read_ff + --read_rr $read_rr + --read_ins $read_ins + --read_delet $read_delet + --read_chr_rr $read_chr_rr + --read_chr_rf $read_chr_rf + --read_chr_fr $read_chr_fr + --read_chr_ff $read_chr_ff + --text $text + --out $circos_file + + </command> + <inputs> + <param name="config" type="data" label="The circos_conf file generated by conf4circos" /> + <param name="draw" type="text" value="all" label="Region to draw ('all' or potitions separated by ':' : chrX:start1:end1-chrY:start2:end2)" /> + + <param name="cov" type="select" label="Draw coverage layer"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="scaff" type="select" label="Draw scaffold location layer"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="discord" type="select" label="Draw discordant proportion layer"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + + <param name="frf" type="select" label="Draw fr/rf discordant regions links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="ff" type="select" label="Draw ff discordant regions links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="rr" type="select" label="Draw rr discordant regions links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="ins" type="select" label="Draw ins discordant regions links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="delet" type="select" label="Draw delet discordant regions links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="chr_rf" type="select" label="Draw chr_rf discordant regions links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="chr_rr" type="select" label="Draw chr_rr discordant regions links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="chr_fr" type="select" label="Draw chr_fr discordant regions links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + <param name="chr_ff" type="select" label="Draw chr_ff discordant regions links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + + + <param name="read_rf" type="select" label="Draw rf read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="read_fr" type="select" label="Draw fr read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="read_ff" type="select" label="Draw ff read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="read_rr" type="select" label="Draw rr read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="read_ins" type="select" label="Draw ins read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="read_delet" type="select" label="Draw delet read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="read_chr_rf" type="select" label="Draw chr_rf read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="read_chr_rr" type="select" label="Draw chr_rr read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="read_chr_fr" type="select" label="Draw chr_fr read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + <param name="read_chr_ff" type="select" label="Draw chr_ff read links"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="text" type="select" label="Draw a text layer locating N regions"> + <option selected="true" value="y">y</option> + <option value="n">n</option> + </param> + + <param name="prefix" type="text" label="Identifier for output" value="circos" /> + </inputs> + <outputs> + <data name="circos_file" format="png" label="${tool.name} : $prefix .png" /> + </outputs> + <help> + +**Overview** + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/estimate.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/estimate.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,343 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, datetime\r\n+\r\n+from Bio.Seq import Seq\r\n+from Bio.Alphabet import generic_dna\r\n+from Bio import SeqIO\r\n+from Bio.SeqRecord import SeqRecord\r\n+\r\n+def stop_err( msg ):\r\n+ sys.stderr.write( "%s\\n" % msg )\r\n+ sys.exit()\r\n+\r\n+def run_job (cmd_line, ERROR):\r\n+\tprint cmd_line\r\n+\ttry:\r\n+\t\ttmp = (tempfile.NamedTemporaryFile().name)+\'.error\'\r\n+\t\t# print tmp\r\n+\t\terror = open(tmp, \'w\')\r\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\r\n+\t\treturncode = proc.wait()\r\n+\t\terror.close()\r\n+\t\terror = open( tmp, \'rb\' )\r\n+\t\tstderr = \'\'\r\n+\t\tbuffsize = 1048576\r\n+\t\ttry:\r\n+\t\t\twhile True:\r\n+\t\t\t\tstderr += error.read( buffsize )\r\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\r\n+\t\t\t\t\tbreak\r\n+\t\texcept OverflowError:\r\n+\t\t\tpass\r\n+\t\terror.close()\r\n+\t\tos.remove(tmp)\r\n+\t\tif returncode != 0:\r\n+\t\t\traise Exception, stderr\r\n+\texcept Exception, e:\r\n+\t\tstop_err( ERROR + str( e ) )\r\n+\t\r\n+def estimateN(chromosome, debut, fin, OR, sam, debut_rec, fin_rec, MIN_READ):\r\n+\tLIST = []\r\n+\tfichier = open(sam)\r\n+\tfor line in fichier:\r\n+\t\tdata = line.split()\r\n+\t\tif data != []:\r\n+\t\t\tif line[0] != \'@\':\r\n+\t\t\t\tif data[6] == \'=\' and data[2] == chromosome:\r\n+\t\t\t\t\tif int(data[3]) <= int(data[7]):#for selection of read spanning the zone\r\n+\t\t\t\t\t\tif int(data[3]) <= debut and fin <= int(data[7]) and debut_rec <= int(data[3]) and int(data[7]) <= fin_rec:\r\n+\t\t\t\t\t\t\tif data[1] == \'83\' or data[1] == \'163\' or data[1] == \'99\' or data[1] == \'147\':\r\n+\t\t\t\t\t\t\t\t#concordant reads\r\n+\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\telif data[1] == \'81\':#mate1 R et mate2 F et il s\'agit de mate1\r\n+\t\t\t\t\t\t\t\tif int(data[3]) < int(data[7]) and OR == \'rf\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\t\telif int(data[3]) > int(data[7]) and OR == \'fr\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\tprint data[0:9]\r\n+\t\t\t\t\t\t\t\t\tsys.exit(\'There is a probleme in estimateN 1\')\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\telif data[1] == \'161\':#mate1 R et mate2 F et il s\'agit de mate2\r\n+\t\t\t\t\t\t\t\tif int(data[3]) > int(data[7]) and OR == \'rf\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\tprint data[0:9]\r\n+\t\t\t\t\t\t\t\t\tsys.exit(\'There is a probleme in estimateN 2\')\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\t\telif int(data[3]) < int(data[7]) and OR == \'fr\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\telif data[1] == \'97\':#mate1 F et mate2 R et il s\'agit de mate1\r\n+\t\t\t\t\t\t\t\tif int(data[3]) > int(data[7]) and OR == \'rf\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\tprint data[0:9]\r\n+\t\t\t\t\t\t\t\t\tsys.exit(\'There is a probleme in estimateN 3\')\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\t\telif int(data[3]) < int(data[7]) and OR == \'fr\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\telif data[1] == \'145\':#mate1 F et mate2 R et il s\'agit de mate2\r\n+\t\t\t\t\t\t\t\tif int(data[3]) < int(data[7]) and OR == \'rf\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\t\telif int(data[3]) > int(data[7]) and OR == \'fr\':\r\n+\t\t\t\t\t\t\t\t\tLI'..b'recal, fin_recal,")"\r\n+\t\t\t\tprint (NAME+\'\\t\'+str(debut)+\'\\t\'+str(fin)+\'\\t\'+str(fin-debut+1)+\'\\t\'+str(INSERT)+\'\\t\'+str(INSERT_est)+\'\\tNA\\n\')\r\n+\t\t\t\t#to put N in the sequence\r\n+\t\t\t\titer = 0\r\n+\t\t\t\twhile iter < ((fin - debut) + 1):\r\n+\t\t\t\t\tSEQUENCE = \'N\' + SEQUENCE\r\n+\t\t\t\t\titer = iter + 1\r\n+\t\t\telse:\r\n+\t\t\t\t#On peut estimer les N\r\n+\t\t\t\t# print NAME, debut, fin, fin-debut+1 ,INSERT, INSERT_est, (fin-debut+1)-(INSERT_est - INSERT), "(",debut_recal, fin_recal,")"\r\n+\t\t\t\tprint (NAME+\'\\t\'+str(debut)+\'\\t\'+str(fin)+\'\\t\'+str(fin-debut+1)+\'\\t\'+str(INSERT)+\'\\t\'+str(INSERT_est)+\'\\t\'+str((fin-debut+1)-(INSERT_est - INSERT))+\'\\n\')\r\n+\t\t\t\t#to put N in the sequence\r\n+\t\t\t\tif (fin-debut+1)-(INSERT_est - INSERT) > 0:\r\n+\t\t\t\t\t#La region contient des N\r\n+\t\t\t\t\titer = 0\r\n+\t\t\t\t\twhile iter < (fin-debut+1)-(INSERT_est - INSERT):\r\n+\t\t\t\t\t\tSEQUENCE = \'E\' + SEQUENCE\r\n+\t\t\t\t\t\titer = iter + 1\r\n+\t\t\t\telse:\r\n+\t\t\t\t\t#On estime que les deux regions semblent plus proches que possible on met par defaut 20 S\r\n+\t\t\t\t\titer = 0\r\n+\t\t\t\t\twhile iter < 20:\r\n+\t\t\t\t\t\tSEQUENCE = \'E\' + SEQUENCE\r\n+\t\t\t\t\t\titer = iter + 1\r\n+\t\t\tos.remove(TEMP+\'chr\'+NAME+\'_reEstimateN_mapped.sam\')\r\n+\t\t\tdebut = \'\'\r\n+\t\t\tfin = \'\'\r\n+\t\t\t# SEQUENCE = caractere + SEQUENCE\r\n+\t\t\t# print \'manage N\', datetime.datetime.now() - t0\r\n+\t\t# else:\r\n+\t\t\t# SEQUENCE = caractere + SEQUENCE\r\n+\t\ti = i - 1\r\n+\tSEQUENCE = sequence[0:avant] + SEQUENCE\r\n+\tos.remove(TEMP+\'chr\'+NAME+\'_reEstimateN_mapped.bam\')\r\n+\toutfile = open(OUT,\'w\')\r\n+\tSeqIO.write(SeqRecord(Seq(SEQUENCE, generic_dna), id = NAME, description=\'\'),outfile, "fasta")\r\n+\toutfile.close()\r\n+\r\n+\r\n+def __main__():\r\n+\t#Parse Command Line\r\n+\tparser = optparse.OptionParser(usage="python %prog [options]\\n\\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\\n\\n"\r\n+\t"This program re-estimate N present in DNA sequence. Restimated N are replaced by S")\r\n+\t# Wrapper options.\r\n+\tparser.add_option( \'\', \'--config\', dest=\'config\', default=\'not_filled\', help=\'A config file generated in ApMap pipeline\')\r\n+\tparser.add_option( \'\', \'--min_read\', dest=\'min_read\', default=\'not_filled\', help=\'The minimal read number requested to make the estimation, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--fasta\', dest=\'fasta\', default=\'not_filled\', help=\'The fasta file, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--seq\', dest=\'seq\', default=\'not_filled\', help=\'The sequence name, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--out\', dest=\'out\', default=\'not_filled\', help=\'The output name, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--insert\', dest=\'insert\', default=\'not_filled\', help=\'Re-estimated insert size, [default: %default]\')\r\n+\t(options, args) = parser.parse_args()\r\n+\t\r\n+\t\r\n+\t\r\n+\tpathname = os.path.dirname(sys.argv[0])\r\n+\t\r\n+\tloca_programs = ConfigParser.RawConfigParser()\r\n+\tloca_programs.read(pathname+\'/loca_programs.conf\')\r\n+\t\r\n+\tif options.config == \'not_filled\':\r\n+\t\tmot = \'Please provide an argument for --config\'\r\n+\t\tsys.exit(mot)\r\n+\tif options.min_read == \'not_filled\':\r\n+\t\tmot = \'Please provide an argument for --min_read\'\r\n+\t\tsys.exit(mot)\r\n+\tif options.fasta == \'not_filled\':\r\n+\t\tmot = \'Please provide an argument for --fasta\'\r\n+\t\tsys.exit(mot)\r\n+\tif options.seq == \'not_filled\':\r\n+\t\tmot = \'Please provide an argument for --seq\'\r\n+\t\tsys.exit(mot)\r\n+\tif options.out == \'not_filled\':\r\n+\t\tmot = \'Please provide an argument for --out\'\r\n+\t\tsys.exit(mot)\r\n+\tif options.insert == \'not_filled\':\r\n+\t\tmot = \'Please provide an argument for --insert\'\r\n+\t\tsys.exit(mot)\r\n+\t\r\n+\tt0 = datetime.datetime.now()\r\n+\tconfig = ConfigParser.RawConfigParser()\r\n+\tconfig.read(options.config)\r\n+\r\n+\trecord_dict = SeqIO.index(options.fasta, "fasta")\r\n+\t\r\n+\ttempo = tempfile.NamedTemporaryFile().name\r\n+\t\r\n+\trestim(loca_programs, tempo, config.get(\'Remove_dup\',\'out\'), config.get(\'General\',\'orient\'), int(options.min_read), str(record_dict[options.seq].seq), options.seq, options.out, int(options.insert))\r\n+\tprint \'done\', datetime.datetime.now() - t0\r\n+if __name__ == "__main__": __main__()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/fusion_scaff.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/fusion_scaff.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,289 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, random\r\n+\r\n+\r\n+from Bio.Seq import Seq\r\n+from Bio.Alphabet import generic_dna\r\n+from Bio import SeqIO\r\n+from Bio.SeqRecord import SeqRecord\r\n+\r\n+def rev_seq(seq):\r\n+\t#function that reverse and complement a sequence\r\n+\tmy_dna = Seq(seq, generic_dna)\r\n+\treturn str(my_dna.reverse_complement())\r\n+\r\n+def charge_data(TABLE):\r\n+\t#function that record scaffold target and destination\r\n+\tdic_dest = {}#hash table : key = destination name, word = [destination orientation, last position of destination, hash_tableX, liste of destination coordinates]\r\n+\t#hash_tableX : key = last position of destination, word = [(0) target_name, (1) target_beginning, (2) target_end, (3) target_orientation, (4) destination_name, (5) destination_beginning, (6) destination_end, (7) destination_orientation, (8) Fusion_type]\r\n+\tdic_cible = set()#record scaffold target\r\n+\tfile = open(TABLE)\r\n+\tfor line in file:\r\n+\t\tdata = line.split()\r\n+\t\tif data != []:\r\n+\t\t\tif data[0] in dic_dest:\r\n+\t\t\t\tsys.exit(\'Problem in the table file target is also destination: \'+data[0])\r\n+\t\t\telif data[4] in dic_cible:\r\n+\t\t\t\tsys.exit(\'Problem in the table file destination is also a target: \'+data[4])\r\n+\t\t\tdic_cible.add(data[0])\r\n+\t\t\tif data[4] in dic_dest:\r\n+\t\t\t\tif dic_dest[data[4]][0] != data[7]:\r\n+\t\t\t\t\tsys.exit(\'Problem in the table file destination orientation is changing : \'+data[4])\r\n+\t\t\t\telse:\r\n+\t\t\t\t\tdic_dest[data[4]][2][int(data[6])] = [data[0], str(int(data[1])-1), str(int(data[2])-1), data[3], data[4], str(int(data[5])-1), str(int(data[6])-1), data[7], data[8]]\r\n+\t\t\t\t\tdic_dest[data[4]][1].append((int(data[6])-1))\r\n+\t\t\t\t\tif [str(int(data[5])-1), str(int(data[6])-1)] in dic_dest[data[4]][3]:\r\n+\t\t\t\t\t\tsys.exit(\'Problem in the table file. There is more than one target for same position in destination : \'+data[4]+\' \'+data[5]+\' \'+data[6])\r\n+\t\t\t\t\tdic_dest[data[4]][3].append([str(int(data[5])-1), str(int(data[6])-1)])\r\n+\t\t\telse:\r\n+\t\t\t\tdic_dest[data[4]] = [data[7], [(int(data[6])-1)],{},[]]\r\n+\t\t\t\tdic_dest[data[4]][2][(int(data[6])-1)] = [data[0], str(int(data[1])-1), str(int(data[2])-1), data[3], data[4], str(int(data[5])-1), str(int(data[6])-1), data[7], data[8]]\r\n+\t\t\t\tdic_dest[data[4]][3].append([str(int(data[5])-1), str(int(data[6])-1)])\r\n+\treturn dic_dest\r\n+\r\n+def verif_borne(DEBUT, FIN, SEQ, NOM):\r\n+\t# os.system("echo \'"+NOM+" "+str(DEBUT)+" "+str(FIN)+"\'")\r\n+\tif DEBUT > FIN:\r\n+\t\tsys.exit(\'Problem (1) in the table file wrong target coordinates : \'+NOM+\' \'+str(DEBUT)+\' \'+str(FIN))\r\n+\tif FIN + 1 > len(SEQ):\r\n+\t\tsys.exit(\'Problem (2) in the table file wrong target coordinates : \'+NOM+\' \'+str(FIN))\r\n+\tif SEQ[DEBUT] == \'n\' or SEQ[DEBUT] == \'N\':\r\n+\t\tsys.exit(\'Problem (3) in the table file wrong target coordinates : \'+NOM+\' \'+str(DEBUT))\r\n+\tif SEQ[FIN] == \'n\' or SEQ[FIN] == \'N\':\r\n+\t\tsys.exit(\'Problem (4) in the table file wrong target coordinates : \'+NOM+\' \'+str(FIN)+\' \'+SEQ[FIN-1])\r\n+\tif len(SEQ) != (FIN - DEBUT + 1):\r\n+\t\tif DEBUT != 0:\r\n+\t\t\tif SEQ[DEBUT-1] != \'n\' and SEQ[DEBUT-1] != \'N\':\r\n+\t\t\t\tsys.exit(\'Problem (5) in the table file wrong target coordinate'..b'1])))/2) > 0:\r\n+\t\t\t\t\tliste_pos = calcul_new_pos(liste_pos, (int(DIC[n][2])+1 - int(DIC[n][1])), int((((int(DIC[n][6]) - int(DIC[n][5])) - 1)-(int(DIC[n][2])+1 - int(DIC[n][1])))/2))\r\n+\t\t\t\t\tj = 0\r\n+\t\t\t\t\twhile j < int((((int(DIC[n][6]) - int(DIC[n][5])) - 1)-(int(DIC[n][2])+1 - int(DIC[n][1])))/2):\r\n+\t\t\t\t\t\tsequence = \'n\' + sequence\r\n+\t\t\t\t\t\tj = j + 1\r\n+\t\t\t\t\tj = 0\r\n+\t\t\t\telse:\r\n+\t\t\t\t\tsequence = \'nnnnnnnnnnnnnnnnnnnn\' + sequence\r\n+\t\t\t\t\tliste_pos = calcul_new_pos(liste_pos, (int(DIC[n][2])+1 - int(DIC[n][1])), 20)\r\n+\t\t\telse:#there is sequence at the beginning of the destination\r\n+\t\t\t\tliste_pos = calcul_new_pos(liste_pos, (int(DIC[n][2])+1 - int(DIC[n][1])), 0)\r\n+\t\t\t# print liste_pos\r\n+\tif int(DIC[n][5]) != 0 and int(DIC[n][6]) != 0:#there is NO sequence at the beginning of the destination\r\n+\t\tsequence = dico[DEST][0:fin] + sequence\r\n+\t\tliste_pos = calcul_new_pos(liste_pos, fin, 0)\r\n+\t\tliste_scaff.insert(0, DEST)\r\n+\t\tverif_borne(0, fin-1, dico[DEST], DEST)\r\n+\t\t# print liste_pos\r\n+\t#print \'******************************\', liste_pos\r\n+\t#os.system("echo \'lenght seq final :"+str(len(sequence))+"\'")\r\n+\t#os.system("echo \'**********sequence reconstructed**********\'")\r\n+\t#5)retourner le scaffold dans la bonne orientation\r\n+\tmot = \'\'\r\n+\tif OR == \'REV\':\r\n+\t\ti = 0\r\n+\t\tfor n in liste_scaff:\r\n+\t\t\tif i == 0:\r\n+\t\t\t\tmot = n+\'\\t\'+str((abs(liste_pos[i+1]-(len(sequence)-1)))+1)+\'\\t\'+str((abs(liste_pos[i]-(len(sequence)-1)))+1)\r\n+\t\t\telse:\r\n+\t\t\t\tmot = n+\'\\t\'+str((abs(liste_pos[i+1]-(len(sequence)-1)))+1)+\'\\t\'+str((abs(liste_pos[i]-(len(sequence)-1)))+1)+\'\\t\'+mot\r\n+\t\t\ti = i + 2\r\n+\t\toutfile = open(OUT,\'a\')\r\n+\t\toutfile.write(DEST+\'\\t\'+mot+\'\\n\')\r\n+\t\toutfile.close()\r\n+\t\t# print len(sequence)\r\n+\t\t#os.system("echo \'reversing sequence\'")\r\n+\t\treturn rev_seq(sequence)\r\n+\telse:\r\n+\t\ti = 0\r\n+\t\tfor n in liste_scaff:\r\n+\t\t\tif i == 0:\r\n+\t\t\t\tmot = n+\'\\t\'+str(liste_pos[i]+1)+\'\\t\'+str(liste_pos[i+1]+1)\r\n+\t\t\telse:\r\n+\t\t\t\tmot = mot+\'\\t\'+n+\'\\t\'+str(liste_pos[i]+1)+\'\\t\'+str(liste_pos[i+1]+1)\r\n+\t\t\ti = i + 2\r\n+\t\toutfile = open(OUT,\'a\')\r\n+\t\toutfile.write(DEST+\'\\t\'+mot+\'\\n\')\r\n+\t\toutfile.close()\r\n+\t\t# print len(sequence)\r\n+\t\treturn sequence\r\n+\r\n+def calcul_new_pos(l1, taille_seq, ADD):\r\n+\tif len(l1) == 0:\r\n+\t\tl1 = [ADD, ADD + taille_seq-1]\r\n+\t\treturn l1\r\n+\telse:\r\n+\t\tLISTE_A_RET = []\r\n+\t\tfor n in l1:\r\n+\t\t\tLISTE_A_RET.append(n+ADD+taille_seq)\r\n+\t\tLISTE_A_RET.insert(0,ADD+taille_seq-1)\r\n+\t\tLISTE_A_RET.insert(0,ADD)\r\n+\t\treturn LISTE_A_RET\r\n+\r\n+def __main__():\r\n+\t#Parse Command Line\r\n+\tparser = optparse.OptionParser(usage="python %prog [options]\\n\\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\\n\\n"\r\n+\t"This script merge scaffold based on tabulated file")\r\n+\t# Wrapper options.\r\n+\tparser.add_option( \'\', \'--table\', dest=\'table\', default=\'not_filled\', help=\'The table file of scaffold to merge\')\r\n+\tparser.add_option( \'\', \'--fasta\', dest=\'fasta\', default=\'not_filled\', help=\'The multi-fasta scaffold file\')\r\n+\tparser.add_option( \'\', \'--out\', dest=\'out\', default=\'fusion.fasta\', help=\'The multi-fasta output file name\')\r\n+\tparser.add_option( \'\', \'--out_verif\', dest=\'out_verif\', default=\'fusion2verif.txt\', help=\'The output file to give to verif_fusion.py\')\r\n+\t(options, args) = parser.parse_args()\r\n+\t\r\n+\r\n+\tdico = charge_data(options.table)\r\n+\r\n+\r\n+\trecord_dict = SeqIO.index(options.fasta, "fasta")\r\n+\t\r\n+\toutfile = open(options.out_verif,\'w\')\r\n+\toutfile.close()\r\n+\t\r\n+\toutfile = open(options.out,\'w\')\r\n+\tfor j in dico:\r\n+\t\tliste = sorted(dico[j][1], reverse=True)\r\n+\t\tSEQ = SeqRecord(Seq(reconstruct(dico[j][2], liste, dico[j][0], j, record_dict, options.out_verif), generic_dna), id = j, description=\'\')\r\n+\t\tSeqIO.write(SEQ, outfile, "fasta")\r\n+\r\n+\tfile = open(options.table)\r\n+\tdico = set()\r\n+\tfor line in file:\r\n+\t\tdata = line.split()\r\n+\t\tif data != []:\r\n+\t\t\tdico.add(data[0])\r\n+\t\t\tdico.add(data[4])\r\n+\r\n+\tfor n in record_dict:\r\n+\t\tif not(n in dico):\r\n+\t\t\tSeqIO.write(record_dict[n], outfile, "fasta")\r\n+\toutfile.close()\r\n+\r\n+if __name__ == "__main__": __main__()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/fusion_scaff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/fusion_scaff.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,46 @@ +<tool id="fusion_scaff" name="fusion_scaff" version="0.1"> + <description> : Merge scaffolds using a tabulated file</description> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/fusion_scaff.py + --table $table + --fasta $fasta + --out $fusion_fasta + --out_verif $fusion_tabulated + + </command> + <inputs> + <param name="table" type="data" label="The table file of scaffold to merge" /> + <param name="fasta" type="data" label="The multi-fasta scaffold file" /> + <param name="prefix" type="text" label="Identifier for output" /> + </inputs> + <outputs> + <data format="fasta" name="fusion_fasta" label="${tool.name} : $prefix fusion (fasta)" /> + <data format="txt" name="fusion_tabulated" label="${tool.name} : $prefix fusion (tabulated)" /> + </outputs> + <help> + +**Overview** + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/group4contig.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/group4contig.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,241 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time\r\n+\r\n+\r\n+\r\n+def __main__():\r\n+\t#Parse Command Line\r\n+\tparser = optparse.OptionParser(usage="python %prog [options]\\n\\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\\n\\n"\r\n+\t"This script takes scaffold to join and group them by linkage. The input is a tabulated file looking as folowed:\\n"\r\n+\t"scaffold1 1 2458028 FWD scaffold2 1 1 FWD FWD contig\\n"\r\n+\t"scaffold36 1 250001 FWD scaffold5 2000000 2000000 FWD REV contig")\r\n+\t# Wrapper options. \r\n+\tparser.add_option( \'\', \'--table\', dest=\'table\', help=\'The table file input\')\r\n+\tparser.add_option( \'\', \'--out\', dest=\'out\', default=\'intermediate_junction.txt\', help=\'The output file name, [default: %default]\')\r\n+\t(options, args) = parser.parse_args()\r\n+\t\r\n+\t# Filling dictionnary\r\n+\tdico_line = set()\r\n+\tfile = open(options.table)\r\n+\tfor line in file:\r\n+\t\tdata = line.split()\r\n+\t\tif data:\r\n+\t\t\tif data[3] != \'FWD\' or data[7] != \'FWD\':\r\n+\t\t\t\tsys.exit(\'Warning! the orientation filled in column 4 or 8 is not managed, this column should contain "FWD" \')\r\n+\t\t\tdico_line.add(line)\r\n+\tfile.close()\r\n+\t\r\n+\tdico = set() # --> to identify already treated lines\r\n+\tdico_scaff = {} # --> a hash table containing a list of three elements : (1) grouped scaffold ordered, (2) scaffold orientation in the group, (3) ungrouped scaffolds, (4) scaffold number\r\n+\ti = 0\r\n+\tfor line in dico_line:\r\n+\t\tdata = line.split()\r\n+\t\tif data:\r\n+\t\t\tif not(line in dico): # --> this line start a new group\r\n+\t\t\t\ti += 1\r\n+\t\t\t\t# print i\r\n+\t\t\t\tdico.add(line) # --> record a line already treated\r\n+\t\t\t\tdico_scaff[i] = [[data[4]],[data[7]],set(), 1]\r\n+\t\t\t\ttaille_dico = int(dico_scaff[i][3]) # --> to define a group is inflating or not (in the while loop)\r\n+\t\t\t\tif data[5] == \'1\':# --> scaffold should be inserted at the begining of the list\r\n+\t\t\t\t\tdico_scaff[i][0].insert(0,data[0])\r\n+\t\t\t\t\tdico_scaff[i][1].insert(0,data[8])\r\n+\t\t\t\t\tdico_scaff[i][3] += 1\r\n+\t\t\t\telse:\r\n+\t\t\t\t\tdico_scaff[i][0].append(data[0])\r\n+\t\t\t\t\tdico_scaff[i][1].append(data[8])\r\n+\t\t\t\t\tdico_scaff[i][3] += 1\r\n+\t\t\t\t# --> The new group as been stared\r\n+\t\t\t\twhile taille_dico < dico_scaff[i][3]: # --> verification is new scaffolds have been added to group\r\n+\t\t\t\t\ttaille_dico = int(dico_scaff[i][3])\r\n+\t\t\t\t\t# print dico_scaff[i][0], dico_scaff[i][1], len(dico_scaff[i][2]), dico_scaff[i][3]\r\n+\t\t\t\t\tfor line2 in dico_line:\r\n+\t\t\t\t\t\tdata2 = line2.split()\r\n+\t\t\t\t\t\tif data2 and not(line2 in dico):\r\n+\t\t\t\t\t\t\tif data2[4] in dico_scaff[i][0]: # --> checking if this scaffold is already in the group\r\n+\t\t\t\t\t\t\t\tdico.add(line2)\r\n+\t\t\t\t\t\t\t\tdico_scaff[i][3] += 1\r\n+\t\t\t\t\t\t\t\tif data2[4] == dico_scaff[i][0][-1]: # --> this scaffold is at the end of the list\r\n+\t\t\t\t\t\t\t\t\tif dico_scaff[i][1][-1] == \'FWD\': # --> if this scaffold is FWD orientated\r\n+\t\t\t\t\t\t\t\t\t\tif data2[5] == \'1\': # --> the new scaffold could not be inserted\r\n+\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][2].add(line2)\r\n+\t\t\t\t\t\t\t\t\t\t\t# print \'toto1\'\r\n+\t\t\t\t\t\t\t\t\t\telse: # --> the new scaffold could be inserted at the end\r\n+\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][0].append(data2[0])\r'..b' orientaion in column 8. Only FWD and REV are recognized\')\r\n+\t\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\t\tif data2[8] == \'FWD\': # --> the new scaffold could be inserted at the end\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][0].append(data2[4])\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][1].append(\'REV\')\r\n+\t\t\t\t\t\t\t\t\t\t\telif data2[8] == \'REV\': # --> the new scaffold could not be inserted\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][2].add(line2)\r\n+\t\t\t\t\t\t\t\t\t\t\t\t# print \'toto9\'\r\n+\t\t\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\t\t\tsys.exit(\'Unrecognized orientaion in column 8. Only FWD and REV are recognized\')\r\n+\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\tsys.exit(\'Unrecognized orientaion in column 8. Only FWD and REV are recognized\')\r\n+\t\t\t\t\t\t\t\telif data2[0] == dico_scaff[i][0][0]: # --> this scaffold is at the begining of the list\r\n+\t\t\t\t\t\t\t\t\tif dico_scaff[i][1][0] == \'FWD\': # --> if this scaffold is FWD orientated\r\n+\t\t\t\t\t\t\t\t\t\tif data2[5] != \'1\':\r\n+\t\t\t\t\t\t\t\t\t\t\tif data2[8] == \'FWD\': # --> the new scaffold could be inserted at the begining\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][0].insert(0,data2[4])\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][1].insert(0,data2[7])\r\n+\t\t\t\t\t\t\t\t\t\t\telif data2[8] == \'REV\': # --> the new scaffold could not be inserted\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][2].add(line2)\r\n+\t\t\t\t\t\t\t\t\t\t\t\t# print \'toto10\'\r\n+\t\t\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\t\t\tsys.exit(\'Unrecognized orientaion in column 8. Only FWD and REV are recognized\')\r\n+\t\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\t\tif data2[8] == \'REV\': # --> the new scaffold could be inserted at begining\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][0].insert(0,data2[4])\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][1].insert(0,\'REV\')\r\n+\t\t\t\t\t\t\t\t\t\t\telif data2[8] == \'FWD\': # --> the new scaffold could not be inserted\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][2].add(line2)\r\n+\t\t\t\t\t\t\t\t\t\t\t\t# print \'toto11\'\r\n+\t\t\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\t\t\tsys.exit(\'Unrecognized orientaion in column 8. Only FWD and REV are recognized\')\r\n+\t\t\t\t\t\t\t\t\telif dico_scaff[i][1][0] == \'REV\': # --> if this scaffold is REV orientated\r\n+\t\t\t\t\t\t\t\t\t\tif data2[5] == \'1\':\r\n+\t\t\t\t\t\t\t\t\t\t\tif data2[8] == \'FWD\': # --> the new scaffold could be inserted at the begining\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][0].insert(0,data2[4])\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][1].insert(0,\'REV\')\r\n+\t\t\t\t\t\t\t\t\t\t\telif data2[8] == \'REV\': # --> the new scaffold could not be inserted\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][2].add(line2)\r\n+\t\t\t\t\t\t\t\t\t\t\t\t# print \'toto12\'\r\n+\t\t\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\t\t\tsys.exit(\'Unrecognized orientaion in column 8. Only FWD and REV are recognized\')\r\n+\t\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\t\tif data2[8] == \'REV\': # --> the new scaffold could be inserted at begining\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][0].insert(0,data2[4])\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][1].insert(0,data2[7])\r\n+\t\t\t\t\t\t\t\t\t\t\telif data2[8] == \'FWD\': # --> the new scaffold could not be inserted\r\n+\t\t\t\t\t\t\t\t\t\t\t\tdico_scaff[i][2].add(line2)\r\n+\t\t\t\t\t\t\t\t\t\t\t\t# print \'toto13\'\r\n+\t\t\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\t\t\tsys.exit(\'Unrecognized orientaion in column 8. Only FWD and REV are recognized\')\r\n+\t\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\t\tsys.exit(\'Unrecognized orientaion in column 8. Only FWD and REV are recognized\')\r\n+\t\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\t\tdico_scaff[i][2].add(line2)\r\n+\t\t\t\t\t\t\t\t\t# print \'toto14\'\r\n+\r\n+\t\r\n+\toutfile = open(options.out,\'w\')\r\n+\tfor n in dico_scaff:\r\n+\t\tliste = list(dico_scaff[n][0])\r\n+\t\tliste.sort()\r\n+\t\toutfile.write(\'>\'+liste[0]+\'\\n\')\r\n+\t\t# print \'>\'+liste[0]\r\n+\t\tfor k in dico_scaff[n][0]:\r\n+\t\t\toutfile.write(\'\\t\'.join([k,dico_scaff[n][1][dico_scaff[n][0].index(k)]])+\'\\n\')\r\n+\t\t\t# print \'\\t\'.join([k,dico_scaff[n][1][dico_scaff[n][0].index(k)]])\r\n+\t\tfor k in dico_scaff[n][2]:\r\n+\t\t\tdata = k.split()\r\n+\t\t\tif data[5] == \'1\':\r\n+\t\t\t\tif data[3] == data[8]:\r\n+\t\t\t\t\toutfile.write(\'not_grouped\\t\'+data[0]+\'\\tFWD\\t\'+data[4]+\'\\t\'+data[7]+\'\\n\')\r\n+\t\t\t\telse:\r\n+\t\t\t\t\toutfile.write(\'not_grouped\\t\'+data[0]+\'\\tREV\\t\'+data[4]+\'\\t\'+data[7]+\'\\n\')\r\n+\t\t\telse:\r\n+\t\t\t\tif data[3] == data[8]:\r\n+\t\t\t\t\toutfile.write(\'not_grouped\\t\'+data[4]+\'\\t\'+data[7]+\'\\t\'+data[0]+\'\\tFWD\'+\'\\n\')\r\n+\t\t\t\telse:\r\n+\t\t\t\t\toutfile.write(\'not_grouped\\t\'+data[4]+\'\\t\'+data[7]+\'\\t\'+data[0]+\'\\tREV\'+\'\\n\')\r\n+\toutfile.close()\r\n+\t\r\n+\t\r\n+\t\r\n+if __name__ == "__main__": __main__()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/group4contig.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/group4contig.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,42 @@ +<tool id="group4contig" name="group4contig" version="0.1"> + <description> : Create a file grouping scaffold name that should be joined together</description> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/group4contig.py + --table $table + --out $grouped_scaffold + + </command> + <inputs> + <param name="table" type="data" label="The table file of scaffold to join" /> + <param name="prefix" type="text" label="Identifier for output" /> + </inputs> + <outputs> + <data format="txt" name="grouped_scaffold" label="${tool.name} : $prefix grouped scaffold" /> + </outputs> + <help> + +**Overview** + +----- + +.. class:: infomark + +**Galaxy integration** Martin Guillaume (CIRAD), Droc Gaetan (CIRAD). + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to galaxy-dev-southgreen@cirad.fr + +.. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/include_scaffremodler.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/include_scaffremodler.sh Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,1 @@ +module load compiler/gcc/4.9.2 system/python/2.7.9 bioinfo/bowtie2/2.2.5 bioinfo/bowtie/1.1.1 bioinfo/bwa/0.7.12 system/java/jre8 bioinfo/picard-tools/1.130 bioinfo/samtools/0.1.18 bioinfo/circos/0.67-7 \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/loca_programs.conf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/loca_programs.conf Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,13 @@ +[Programs] +perl = perl +python = python +bowtie = bowtie +bowtie-build = bowtie-build +java = java +bowtie2-build = bowtie2-build +bowtie2 = bowtie2 +picard-tool = /usr/local/bioinfo/picard-tools/1.130/picard.jar +bwa = bwa +samtools = /usr/local/bioinfo/samtools/1.2/bin/samtools +circos = /homedir/gmartin/script/circos-0.52/bin/circos +bamgrepreads = /home/galaxydev/SouthGreen_tools/Galaxy_SouthGreen/scaffremodler/bamgrepreads \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/look4fusion.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/look4fusion.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,390 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time\r\n+\r\n+def stop_err( msg ):\r\n+ sys.stderr.write( "%s\\n" % msg )\r\n+ sys.exit()\r\n+\r\n+def run_job (cmd_line, ERROR):\r\n+\tprint cmd_line\r\n+\ttry:\r\n+\t\ttmp = (tempfile.NamedTemporaryFile().name)+\'.error\'\r\n+\t\t# print tmp\r\n+\t\terror = open(tmp, \'w\')\r\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\r\n+\t\treturncode = proc.wait()\r\n+\t\terror.close()\r\n+\t\terror = open( tmp, \'rb\' )\r\n+\t\tstderr = \'\'\r\n+\t\tbuffsize = 1048576\r\n+\t\ttry:\r\n+\t\t\twhile True:\r\n+\t\t\t\tstderr += error.read( buffsize )\r\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\r\n+\t\t\t\t\tbreak\r\n+\t\texcept OverflowError:\r\n+\t\t\tpass\r\n+\t\terror.close()\r\n+\t\tos.remove(tmp)\r\n+\t\tif returncode != 0:\r\n+\t\t\traise Exception, stderr\r\n+\texcept Exception, e:\r\n+\t\tstop_err( ERROR + str( e ) )\r\n+\r\n+\r\n+def cherche_fusion(FILE, ZONE, FICHIER, OUT, BOUND):\r\n+\toutfile = open(OUT, \'a\')\r\n+\tfile = open(FILE)\r\n+\tdico_deb = []\r\n+\tdico_fin = []\r\n+\tfor line in file:\r\n+\t\tdata = line.split()\r\n+\t\tif data != []:\r\n+\t\t\t# print data\r\n+\t\t\tif int(data[2]) <= ZONE:\r\n+\t\t\t\tdico_deb.append(data)\r\n+\t\t\tif int(data[1]) >= int(data[3]) - ZONE:\r\n+\t\t\t\tdico_fin.append(data)\r\n+\tfor n in dico_deb:#on cherche les fusions\r\n+\t\tfor k in dico_fin:\r\n+\t\t\tif n[0] != k[0]:\r\n+\t\t\t\tsys.exit(\'There is a bug\')\r\n+\t\t\tif n[6] == k[6]:\r\n+\t\t\t\tif n[14] == \'AP\' and k[14] == \'AP\': #destination is before the scaffold searched\r\n+\t\t\t\t\tif k[13] == \'BLUE\' and n[13] == \'RED\':\r\n+\t\t\t\t\t\tif int(n[7])-100 < int(k[7])+100:\r\n+\t\t\t\t\t\t\tliste_fusion = cherche_N_zone(FICHIER, n[6], n[8], k[7])\r\n+\t\t\t\t\t\t\tif len(liste_fusion) == 0:\r\n+\t\t\t\t\t\t\t\toutfile.write(\'\\t\'.join([str(n[0]), str(1), str(n[3]), str(n[4]), str(n[6]), str(n[8]), str(k[7]), str(n[10]), str(n[12]), \'removed : no N found\'])+\'\\n\')\r\n+\t\t\t\t\t\t\t# elif len(liste_fusion) > 1:\r\n+\t\t\t\t\t\t\t\t# print \'removed : too much positions\', n[0], 0, n[3], n[4], n[6], n[8], k[7], n[10], n[12]\r\n+\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\tfor w in liste_fusion:\r\n+\t\t\t\t\t\t\t\t\toutfile.write(\'\\t\'.join([str(n[0]), str(1), str(n[3]), str(n[4]), str(n[6]), str(w[0]), str(w[1]), str(n[10]), str(n[12]), \'fusion\'])+\'\\n\')\r\n+\t\t\t\t\tif k[13] == \'PURPLE\' and n[13] == \'GREEN\':\r\n+\t\t\t\t\t\tif int(k[7])-100 < int(n[7])+100:\r\n+\t\t\t\t\t\t\tliste_fusion = cherche_N_zone(FICHIER, n[6], k[8], n[7])\r\n+\t\t\t\t\t\t\tif len(liste_fusion) == 0:\r\n+\t\t\t\t\t\t\t\toutfile.write(\'\\t\'.join([str(n[0]), str(1), str(n[3]), str(n[4]), str(n[6]), str(k[8]), str(n[7]), str(n[10]), str(n[12]), \'removed : no N found\'])+\'\\n\')\r\n+\t\t\t\t\t\t\t# elif len(liste_fusion) > 1:\r\n+\t\t\t\t\t\t\t\t# print \'removed : too much positions\', n[0], 0, n[3], n[4], n[6], k[8], n[7], n[10], n[12]\r\n+\t\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\t\tfor w in liste_fusion:\r\n+\t\t\t\t\t\t\t\t\toutfile.write(\'\\t\'.join([str(n[0]), str(1), str(n[3]), str(n[4]), str(n[6]), str(w[0]), str(w[1]), str(n[10]), str(n[12]), \'fusion\'])+\'\\n\')\r\n+\t\t\t\telif n[14] == \'AV\' and k[14] == \'AV\': #destination is before the scaffold searched\r\n+\t\t\t\t\tif n[13] == \'BLUE\' and k[13] == \'RED\':\r\n+\t\t\t\t\t\tif int(n[7])-100 < int(k[7])+100:\r\n+\t\t\t\t\t\t\tliste_fusion = cherche_N_zone(FICHIER, n[6], n[8], k[7])\r\n+\t\t\t\t\t\t\tif len(liste_fusion) == 0:\r\n+\t\t\t\t\t\t\t\toutfile.write(\'\\t\'.join([str(n[0'..b'n[2] + BOUND) >= dic_chr[n[0]]):\r\n+\t\t\t\t\t\t\tmot = mot+\'-\'+n[0]+\':0:\'+str(dic_chr[n[0]])\r\n+\t\t\t\t\t\telif ((n[1] - BOUND) <= 0):\r\n+\t\t\t\t\t\t\tmot = mot+\'-\'+n[0]+\':0:\'+str(n[2] + BOUND)\r\n+\t\t\t\t\t\telif((n[2] + BOUND) >= dic_chr[n[0]]):\r\n+\t\t\t\t\t\t\tmot = mot+\'-\'+n[0]+\':\'+str(n[1] - BOUND)+\':\'+str(dic_chr[n[0]])\r\n+\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\tmot = mot+\'-\'+n[0]+\':\'+str(n[1] - BOUND)+\':\'+str(n[2] + BOUND)\r\n+\t\t\t\t\tliste_out.append(data[0]+\'.png\')\r\n+\t\t\t\t\tredraw = \'%s %s/draw_circos.py --config %s --frf n --ff n --rr n --ins n --delet n --chr_rr n --chr_rf n --chr_fr n --chr_ff n --draw %s --out %s\' % (LOCA_PROGRAMS.get(\'Programs\',\'python\'), PATHNAME, CONFIG, mot, data[0]+\'.png\')\r\n+\t\t\t\t\trun_job(redraw, \'Bug when drawing circos\')\r\n+\tmot = liste_out[0]\r\n+\tfor n in liste_out[1:]:\r\n+\t\tmot = mot +\' \'+ n\r\n+\tarchivage = \'tar -cf \'+OUT_TAR+\' \'+mot\r\n+\trun_job(archivage, \'Bug in archive creation\')\r\n+\tfor n in liste_out:\r\n+\t\tos.remove(n)\r\n+\r\n+def cherche_dest(CHR, DEBUT, FIN, FICHIER, TEMP, COLOR, TYPE, CHR_ORDER, DIC_CHR):\r\n+\tliste = []\r\n+\tfile = open(FICHIER)\r\n+\tfor line in file:\r\n+\t\tdata = line.split()\r\n+\t\tif data:\r\n+\t\t\t# print data\r\n+\t\t\tif data[0] != data[5] and data[13] == "PASSED":\r\n+\t\t\t\tif CHR == data[0]:\r\n+\t\t\t\t\tif DEBUT-100 <= int(data[1]) and int(data[2]) <= FIN+100:\r\n+\t\t\t\t\t\tliste.append([data[5], int(data[6]), int(data[7])])\r\n+\t\t\t\t\t\tif CHR_ORDER.index(data[0]) < CHR_ORDER.index(data[5]):\r\n+\t\t\t\t\t\t\tPOSITION = \'AV\'\r\n+\t\t\t\t\t\telif CHR_ORDER.index(data[0]) > CHR_ORDER.index(data[5]):\r\n+\t\t\t\t\t\t\tPOSITION = \'AP\'\r\n+\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\tsys.exit(\'bug in cherche_dest\')\r\n+\t\t\t\t\t\tTEMP.write(\'\\t\'.join([data[0], data[1], data[2], str(DIC_CHR[data[0]]), \'FWD\', \'-\', data[5], data[6], data[7], str(DIC_CHR[data[5]]), \'FWD\', \'-\', TYPE, COLOR, POSITION])+\'\\n\')\r\n+\t\t\t\telif CHR == data[5]:\r\n+\t\t\t\t\tif DEBUT-100 <= int(data[6]) and int(data[7]) <= FIN+100:\r\n+\t\t\t\t\t\tliste.append([data[0], int(data[1]), int(data[2])])\r\n+\t\t\t\t\t\tif CHR_ORDER.index(data[5]) < CHR_ORDER.index(data[0]):\r\n+\t\t\t\t\t\t\tPOSITION = \'AV\'\r\n+\t\t\t\t\t\telif CHR_ORDER.index(data[5]) > CHR_ORDER.index(data[0]):\r\n+\t\t\t\t\t\t\tPOSITION = \'AP\'\r\n+\t\t\t\t\t\telse:\r\n+\t\t\t\t\t\t\tsys.exit(\'bug in cherche_dest\')\r\n+\t\t\t\t\t\tTEMP.write(\'\\t\'.join([data[5], data[6], data[7], str(DIC_CHR[data[5]]), \'FWD\', \'-\', data[0], data[1], data[2], str(DIC_CHR[data[0]]), \'FWD\', \'-\', TYPE, COLOR, POSITION])+\'\\n\')\r\n+\tTEMP.flush()\r\n+\treturn liste\r\n+\r\n+def __main__():\r\n+\t#Parse Command Line\r\n+\tparser = optparse.OptionParser(usage="python %prog [options]\\n\\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\\n\\n"\r\n+\t"This script looks for possible scaffold fusions and junctions")\r\n+\t# Wrapper options. \r\n+\tparser.add_option( \'\', \'--config\', dest=\'config\', default=\'not_filled\', help=\'The conf file generated by conf4circos.py\')\r\n+\t\r\n+\tparser.add_option( \'\', \'--bound\', dest=\'bound\', default=10000, help=\'Boudary of scaffold to look for fusion and junction, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--out\', dest=\'out\', default=\'possible_fusion.txt\', help=\'Output text file of possible fusion and contig, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--out_tar\', dest=\'out_tar\', default=\'possible_fusion.tar\', help=\'Output tar.gz file containing circos figures, [default: %default]\')\r\n+\t\r\n+\t\r\n+\t(options, args) = parser.parse_args()\r\n+\t\r\n+\tpathname = os.path.dirname(sys.argv[0])\r\n+\t\r\n+\tloca_programs = ConfigParser.RawConfigParser()\r\n+\tloca_programs.read(pathname+\'/loca_programs.conf\')\r\n+\t\r\n+\ttemp = tempfile.NamedTemporaryFile().name\r\n+\tprint temp\r\n+\t\r\n+\tconfig = ConfigParser.RawConfigParser()\r\n+\tconfig.read(options.config)\r\n+\t\r\n+\tif options.config == \'not_filled\':\r\n+\t\tsys.exit(\'--config argument is missing\')\r\n+\tif config.get(\'General\',\'orient\') != \'rf\':\r\n+\t\tsys.exit(\'The program exited: only rf orientation is accepted\')\r\n+\t\r\n+\tlook4fusion(loca_programs, options.config, int(options.bound), temp, options.out_tar, pathname)\r\n+\t\r\n+\tos.system(\'cat \'+temp+\' | sort | uniq > \'+options.out)\r\n+\tos.remove(temp)\r\n+\r\n+if __name__ == "__main__": __main__()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/look4fusion.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/look4fusion.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,30 @@ +<tool id="look4fusion" name="look4fusion" version="0.1"> + <description> : Look for possible scaffold fusions and junction using Scaffremodler workflow and conf4circos outputs</description> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/look4fusion.py + --config $config + --bound $bound + --out $possible_junction_and_fusions + --out_tar $circos_tar_file + + </command> + <inputs> + <param name="config" type="data" label="The circos_conf file generated by conf4circos" /> + <param name="bound" type="integer" value="10000" label="Scaffold boundary searched" /> + </inputs> + <outputs> + <data format="txt" name="possible_junction_and_fusions" label="${tool.name} : Possible junctions and fusions" /> + <data format="tar" name="circos_tar_file" label="${tool.name} : Circos representation (.tar file)" /> + </outputs> + <help> + + .. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/merge_sam.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/merge_sam.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b"@@ -0,0 +1,211 @@\n+\n+#\n+# Copyright 2014 CIRAD\n+#\n+# This program is free software; you can redistribute it and/or modify\n+# it under the terms of the GNU General Public License as published by\n+# the Free Software Foundation; either version 3 of the License, or\n+# (at your option) any later version.\n+#\n+# This program is distributed in the hope that it will be useful,\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+# GNU General Public License for more details.\n+#\n+# You should have received a copy of the GNU General Public License\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\n+# write to the Free Software Foundation, Inc.,\n+# 51 Franklin Street, Fifth Floor, Boston,\n+# MA 02110-1301, USA.\n+#\n+#\n+\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, random\n+\n+def CIGAR(mot, debut):\n+\tif 'N' in mot:\n+\t\tsys.exit('N in mot')\n+\telif 'P' in mot:\n+\t\tsys.exit('P in mot')\n+\telif '=' in mot:\n+\t\tsys.exit('= in mot')\n+\telif 'X' in mot:\n+\t\tsys.exit('X in mot')\n+\telse:\n+\t\tmot_split = mot.replace('M',' M ').replace('D',' D ').replace('I',' I ').replace('S',' S ').replace('H',' H ').split()\n+\ttotal = 0\n+\twhile mot_split:\n+\t\tif mot_split[1] == 'M' or mot_split[1] == 'D':\n+\t\t\ttotal = total + int(mot_split[0])\n+\t\tdel mot_split[0]\n+\t\tdel mot_split[0]\n+\treturn debut + (total-1)\n+\n+def find_info(LINE, YT):\n+\tmot = []\n+\tfor n in LINE:\n+\t\tliste = n.split(':')\n+\t\tif liste[0] == 'AS':\n+\t\t\tmot.append(n)\n+\t\telif liste[0] == 'XS':\n+\t\t\tmot.append(n)\n+\tmot.append(YT)\n+\treturn mot\n+\n+\n+def merge2sam(FILE1, FILE2, OUT, MIN, MAX, OR):\n+\tmapped_pair = 0\n+\tmapped_single = 0\n+\tunmapped = 0\n+\tfile1 = open(FILE1)\n+\tfile2 = open(FILE2)\n+\tboucle = 1\n+\toutfile = open(OUT, 'w')\n+\tline1 = file1.readline()\n+\tline2 = file2.readline()\n+\twhile line1:\n+\t\tdata1 = line1.split()\n+\t\tdata2 = line2.split()\n+\t\tif data1[0][0] == '@':\n+\t\t\toutfile.write(line1)\n+\t\telse:\n+\t\t\twhile data1[1] == '256' or data1[1] == '272':\n+\t\t\t\tline1 = file1.readline()\n+\t\t\t\tdata1 = line1.split()\n+\t\t\twhile data2[1] == '256' or data2[1] == '272':\n+\t\t\t\tline2 = file2.readline()\n+\t\t\t\tdata2 = line2.split()\n+\t\t\tif data1[0].replace('/1','').replace('/2','') != data2[0].replace('/1','').replace('/2','') and data1[0].replace('_1','').replace('_2','') != data2[0].replace('_1','').replace('_2',''):\n+\t\t\t\tmot = 'Probleme in the mapping : '+data1[0]+' and '+data2[0]+' are different. Read mates should be identified with /1 and /2'\n+\t\t\t\tsys.exit(mot)\n+\t\t\telse:\n+\t\t\t\tif data1[1] == '4' and data2[1] == '4': #reads unmapped\n+\t\t\t\t\tunmapped += 1\n+\t\t\t\t\toutfile.write(data1[0].replace('/1','').replace('/2','').replace('_1','').replace('_2','')+'\\t77\\t'+data1[2]+'\\t'+data1[3]+'\\t'+data1[4]+'\\t'+data1[5]+'\\t'+data1[6]+'\\t'+data1[7]+'\\t'+data1[8]+'\\t'+data1[9]+'\\t'+data1[10]+'\\t'+'\\t'.join(find_info(data1[11:],'YT:Z:UP'))+'\\n')\n+\t\t\t\t\toutfile.write(data2[0].replace('/1','').replace('/2','').replace('_1','').replace('_2','')+'\\t141\\t'+data2[2]+'\\t'+data2[3]+'\\t'+data2[4]+'\\t'+data2[5]+'\\t'+data2[6]+'\\t'+data2[7]+'\\t'+data2[8]+'\\t'+data2[9]+'\\t'+data2[10]+'\\t'+'\\t'.join(find_info(data2[11:],'YT:Z:UP'))+'\\n')\n+\t\t\t\telif data1[1] == '0' and data2[1] == '4': #mate 1 mappant en F\n+\t\t\t\t\tmapped_single += 1\n+\t\t\t\t\toutfile.write(data1[0].replace('/1','').replace('/2','').replace('_1','').replace('_2','')+'\\t73\\t'+data1[2]+'\\t'+data1[3]+'\\t'+data1[4]+'\\t'+data1[5]+'\\t=\\t'+data1[3]+'\\t0\\t'+data1[9]+'\\t'+data1[10]+'\\t'+'\\t'.join(find_info(data1[11:],'YT:Z:UP'))+'\\n')\n+\t\t\t\t\toutfile.write(data2[0].replace('/1','').replace('/2','').replace('_1','').replace('_2','')+'\\t133\\t'+data1[2]+'\\t'+data1[3]+'\\t'+data2[4]+'\\t'+data2[5]+'\\t=\\t'+data1[3]+'\\t0\\t'+data2[9]+'\\t'+data2[10]+'\\t'+'\\t'.join(find_info(data2[11:],'YT:Z:UP'))+'\\n')\n+\t\t\t\telif data1[1] == '16' and data2[1] == '4': #mate 1 mappant en R\n+\t\t\t\t\tmapped_single += 1\n+\t\t\t\t\toutfile.write(data1[0].replace('/1','').replace('/2','').re"..b',int(data1[3]))-int(data2[3]))+1)+\'\\t\'+data1[9]+\'\\t\'+data1[10]+\'\\t\'+\'\\t\'.join(find_info(data1[11:],\'YT:Z:DP\'))+\'\\n\')\n+\t\t\t\t\t\t\t\toutfile.write(data2[0].replace(\'/1\',\'\').replace(\'/2\',\'\').replace(\'_1\',\'\').replace(\'_2\',\'\')+\'\\t145\\t\'+data2[2]+\'\\t\'+data2[3]+\'\\t\'+data2[4]+\'\\t\'+data2[5]+\'\\t=\\t\'+data1[3]+\'\\t\'+str((CIGAR(data1[5],int(data1[3]))-int(data2[3]))+1)+\'\\t\'+data2[9]+\'\\t\'+data2[10]+\'\\t\'+\'\\t\'.join(find_info(data2[11:],\'YT:Z:DP\'))+\'\\n\')\n+\t\t\t\t\telif OR == \'fr\':#pour une bonne orientation FR\n+\t\t\t\t\t\tif int(data1[3]) < int(data2[3]) and ((CIGAR(data2[5],int(data2[3]))-int(data1[3]))+1) >= MIN and ((CIGAR(data2[5],int(data2[3]))-int(data1[3]))+1) <= MAX:\n+\t\t\t\t\t\t\toutfile.write(data1[0].replace(\'/1\',\'\').replace(\'/2\',\'\').replace(\'_1\',\'\').replace(\'_2\',\'\')+\'\\t99\\t\'+data1[2]+\'\\t\'+data1[3]+\'\\t\'+data1[4]+\'\\t\'+data1[5]+\'\\t=\\t\'+data2[3]+\'\\t\'+str((CIGAR(data2[5],int(data2[3]))-int(data1[3]))+1)+\'\\t\'+data1[9]+\'\\t\'+data1[10]+\'\\t\'+\'\\t\'.join(find_info(data1[11:],\'YT:Z:CP\'))+\'\\n\')\n+\t\t\t\t\t\t\toutfile.write(data2[0].replace(\'/1\',\'\').replace(\'/2\',\'\').replace(\'_1\',\'\').replace(\'_2\',\'\')+\'\\t147\\t\'+data2[2]+\'\\t\'+data2[3]+\'\\t\'+data2[4]+\'\\t\'+data2[5]+\'\\t=\\t\'+data1[3]+\'\\t-\'+str((CIGAR(data2[5],int(data2[3]))-int(data1[3]))+1)+\'\\t\'+data2[9]+\'\\t\'+data2[10]+\'\\t\'+\'\\t\'.join(find_info(data2[11:],\'YT:Z:CP\'))+\'\\n\')\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tif int(data1[3]) < int(data2[3]):\n+\t\t\t\t\t\t\t\toutfile.write(data1[0].replace(\'/1\',\'\').replace(\'/2\',\'\').replace(\'_1\',\'\').replace(\'_2\',\'\')+\'\\t97\\t\'+data1[2]+\'\\t\'+data1[3]+\'\\t\'+data1[4]+\'\\t\'+data1[5]+\'\\t=\\t\'+data2[3]+\'\\t\'+str((CIGAR(data2[5],int(data2[3]))-int(data1[3]))+1)+\'\\t\'+data1[9]+\'\\t\'+data1[10]+\'\\t\'+\'\\t\'.join(find_info(data1[11:],\'YT:Z:DP\'))+\'\\n\')\n+\t\t\t\t\t\t\t\toutfile.write(data2[0].replace(\'/1\',\'\').replace(\'/2\',\'\').replace(\'_1\',\'\').replace(\'_2\',\'\')+\'\\t145\\t\'+data2[2]+\'\\t\'+data2[3]+\'\\t\'+data2[4]+\'\\t\'+data2[5]+\'\\t=\\t\'+data1[3]+\'\\t-\'+str((CIGAR(data2[5],int(data2[3]))-int(data1[3]))+1)+\'\\t\'+data2[9]+\'\\t\'+data2[10]+\'\\t\'+\'\\t\'.join(find_info(data2[11:],\'YT:Z:DP\'))+\'\\n\')\n+\t\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\t\toutfile.write(data1[0].replace(\'/1\',\'\').replace(\'/2\',\'\').replace(\'_1\',\'\').replace(\'_2\',\'\')+\'\\t97\\t\'+data1[2]+\'\\t\'+data1[3]+\'\\t\'+data1[4]+\'\\t\'+data1[5]+\'\\t=\\t\'+data2[3]+\'\\t-\'+str((CIGAR(data1[5],int(data1[3]))-int(data2[3]))+1)+\'\\t\'+data1[9]+\'\\t\'+data1[10]+\'\\t\'+\'\\t\'.join(find_info(data1[11:],\'YT:Z:DP\'))+\'\\n\')\n+\t\t\t\t\t\t\t\toutfile.write(data2[0].replace(\'/1\',\'\').replace(\'/2\',\'\').replace(\'_1\',\'\').replace(\'_2\',\'\')+\'\\t145\\t\'+data2[2]+\'\\t\'+data2[3]+\'\\t\'+data2[4]+\'\\t\'+data2[5]+\'\\t=\\t\'+data1[3]+\'\\t\'+str((CIGAR(data1[5],int(data1[3]))-int(data2[3]))+1)+\'\\t\'+data2[9]+\'\\t\'+data2[10]+\'\\t\'+\'\\t\'.join(find_info(data2[11:],\'YT:Z:DP\'))+\'\\n\')\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tsys.exit(\'bug\')\n+\t\t\t\telse:\n+\t\t\t\t\tos.system(\'echo "\'+data1[1]+\' \'+data2[1]+\'"\')\n+\t\t\t\t\tsys.exit(\'Probleme in the formating of mapping file\')\n+\t\tline1 = file1.readline()\n+\t\tline2 = file2.readline()\n+\toutfile.close()\n+\tos.system(\'echo "Mapped pair: \'+str(mapped_pair)+\'"\')\n+\tos.system(\'echo "Mapped single (mate1 or mate2): \'+str(mapped_single)+\'"\')\n+\tos.system(\'echo "Unmapped (mate1 and mate2): \'+str(unmapped)+\'"\')\n+\t\n+\n+\n+def __main__():\n+\t#Parse Command Line\n+\tparser = optparse.OptionParser(usage="python %prog [options]\\n\\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr")\n+\t# Wrapper options.\n+\tparser.add_option( \'\', \'--file1\', dest=\'file1\', help=\'Mate1 sam file\')\n+\tparser.add_option( \'\', \'--file2\', dest=\'file2\', help=\'Mate2 sam file\')\n+\tparser.add_option( \'\', \'--out\', dest=\'out\', help=\'Output file name\')\n+\tparser.add_option( \'\', \'--min\', dest=\'min\', help=\'minimal insert size to accept the pair as properly mapped\')\n+\tparser.add_option( \'\', \'--max\', dest=\'max\', help=\'maximal insert size to accept the pair as properly mapped\')\n+\tparser.add_option( \'\', \'--orient\', dest=\'orient\', help=\'Expected orientation of paired reads\')\n+\t(options, args) = parser.parse_args()\n+\t\n+\t\n+\tmerge2sam(options.file1, options.file2, options.out, int(options.min), int(options.max), options.orient)\n+\t\n+if __name__ == "__main__": __main__()\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/reEstimateN.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/reEstimateN.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,287 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, threading, datetime, multiprocessing\r\n+\r\n+from Bio.Seq import Seq\r\n+from Bio.Alphabet import generic_dna\r\n+from Bio import SeqIO\r\n+from Bio.SeqRecord import SeqRecord\r\n+\r\n+def stop_err( msg ):\r\n+ sys.stderr.write( "%s\\n" % msg )\r\n+ sys.exit()\r\n+\r\n+def run_job (cmd_line, ERROR, ID):\r\n+\tprint cmd_line\r\n+\ttry:\r\n+\t\ttmp = (tempfile.NamedTemporaryFile().name)+\'-\'+ID+\'.error\'\r\n+\t\t# print tmp\r\n+\t\terror = open(tmp, \'w\')\r\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\r\n+\t\treturncode = proc.wait()\r\n+\t\terror.close()\r\n+\t\terror = open( tmp, \'rb\' )\r\n+\t\tstderr = \'\'\r\n+\t\tbuffsize = 1048576\r\n+\t\ttry:\r\n+\t\t\twhile True:\r\n+\t\t\t\tstderr += error.read( buffsize )\r\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\r\n+\t\t\t\t\tbreak\r\n+\t\texcept OverflowError:\r\n+\t\t\tpass\r\n+\t\terror.close()\r\n+\t\tos.remove(tmp)\r\n+\t\tif returncode != 0:\r\n+\t\t\traise Exception, stderr\r\n+\texcept Exception, e:\r\n+\t\tstop_err( ERROR + str( e ) )\r\n+\r\n+\r\n+def recal_ins(FILE):\r\n+\tLIST = []\r\n+\tfichier = open(FILE)\r\n+\ti = 0\r\n+\tfor line in fichier:\r\n+\t\tdata = line.split()\r\n+\t\tif data != []:\r\n+\t\t\tif line[0] != \'@\':\r\n+\t\t\t\tif i == 0:\r\n+\t\t\t\t\ti = 1\r\n+\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\telse:\r\n+\t\t\t\t\ti = 0\r\n+\tif LIST == []:\r\n+\t\treturn \'NA\'\r\n+\telse:\r\n+\t\treturn sum(LIST)/len(LIST)\r\n+\t\r\n+def estimateN(chromosome, debut, fin, OR, sam, debut_rec, fin_rec, MIN_READ):\r\n+\tLIST = []\r\n+\tfichier = open(sam)\r\n+\tfor line in fichier:\r\n+\t\tdata = line.split()\r\n+\t\tif data != []:\r\n+\t\t\tif line[0] != \'@\':\r\n+\t\t\t\tif data[6] == \'=\' and data[2] == chromosome:\r\n+\t\t\t\t\tif int(data[3]) <= int(data[7]):#for selection of read spanning the zone\r\n+\t\t\t\t\t\tif int(data[3]) <= debut and fin <= int(data[7]) and debut_rec <= int(data[3]) and int(data[7]) <= fin_rec:\r\n+\t\t\t\t\t\t\tif data[1] == \'83\' or data[1] == \'163\' or data[1] == \'99\' or data[1] == \'147\':\r\n+\t\t\t\t\t\t\t\t#concordant reads\r\n+\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\telif data[1] == \'81\':#mate1 R et mate2 F et il s\'agit de mate1\r\n+\t\t\t\t\t\t\t\tif int(data[3]) < int(data[7]) and OR == \'rf\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\t\telif int(data[3]) > int(data[7]) and OR == \'fr\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\tprint data[0:9]\r\n+\t\t\t\t\t\t\t\t\tsys.exit(\'There is a probleme in estimateN 1\')\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\telif data[1] == \'161\':#mate1 R et mate2 F et il s\'agit de mate2\r\n+\t\t\t\t\t\t\t\tif int(data[3]) > int(data[7]) and OR == \'rf\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\tprint data[0:9]\r\n+\t\t\t\t\t\t\t\t\tsys.exit(\'There is a probleme in estimateN 2\')\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\t\telif int(data[3]) < int(data[7]) and OR == \'fr\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\telif data[1] == \'97\':#mate1 F et mate2 R et il s\'agit de mate1\r\n+\t\t\t\t\t\t\t\tif int(data[3]) > int(data[7]) and OR == \'rf\':\r\n+\t\t\t\t\t\t\t\t\tLIST.append(abs(int(data[8])))\r\n+\t\t\t\t\t\t\t\t\tprint data[0:9]\r\n+\t\t\t\t\t\t\t\t\tsys.exit(\'There is a probleme in estimateN 3\')\r\n+\t\t\t\t\t\t\t\t\t# print data[0:9]\r\n+\t\t\t\t\t\t\t\telif int(data[3]) < i'..b'\t\tsys.exit(\'--out argument is missing\')\r\n+\t\r\n+\tconfig = ConfigParser.RawConfigParser()\r\n+\tconfig.read(options.config)\r\n+\t\r\n+\t#identification of chromosomes to exclude\r\n+\tCHR = options.exclude.split(\'=\')\r\n+\t\r\n+\t#Calculation of chromosomes size\r\n+\t#1)Loading sequences\r\n+\trecord_dict = SeqIO.index(config.get(\'General\',\'ref\'), "fasta")\r\n+\t#2)Recording informations in a temporary file\r\n+\tf = tempfile.NamedTemporaryFile()\r\n+\tliste_id = []\r\n+\tfor n in record_dict:\r\n+\t\tif not(n in CHR):\r\n+\t\t\tf.write(n+\'\\t1\'+\'\\t\'+str(len(str(record_dict[n].seq)))+\'\\n\')\r\n+\t\t\tliste_id.append(n)\r\n+\t\t\tf.flush()\r\n+\t#3)Generating a filtered sam from which insertsize will be estimated (on well mapped reads)\r\n+\ttempo = tempfile.NamedTemporaryFile().name\r\n+\tparse1 = \'%s view -uf 2 -L %s %s | %s view -h -o %s - \' % (loca_programs.get(\'Programs\',\'samtools\'), f.name, config.get(\'Remove_dup\',\'out\'), loca_programs.get(\'Programs\',\'samtools\'), tempo+\'_reEstimateN_mapped.sam\')\r\n+\trun_job(parse1, \'bug in parsing 1\', "sam")\r\n+\tf.close()\r\n+\tINSERT = recal_ins(tempo+\'_reEstimateN_mapped.sam\')\r\n+\tos.system(\'echo "Estimated insert size : \'+str(INSERT)+\'"\')\r\n+\tos.remove(tempo+\'_reEstimateN_mapped.sam\')\r\n+\t# INSERT = 5400\r\n+\t\r\n+\toutseq = open(options.out,\'w\')\r\n+\t#4)Estimation of the number of N for each N region for each chromosome\r\n+\t# proc = int(os.popen(\'grep -c cores /proc/cpuinfo\').read().split()[0])\r\n+\tproc = int(options.thread)\r\n+\tliste = []\r\n+\tliste_process = []\r\n+\tfor n in liste_id:\r\n+\t\tif not(n in CHR):\r\n+\t\t\toutfile_seq = open(options.out+\'_\'+n+\'_for_identSV.fasta\',\'w\')\r\n+\t\t\tSeqIO.write(SeqRecord(record_dict[n].seq, id = n, description=\'\'),outfile_seq, "fasta")\r\n+\t\t\toutfile_seq.close()\r\n+\t\t\tcommand_line = "%s %s/estimate.py --config %s --min_read %s --fasta %s --seq %s --out %s --insert %s" % (loca_programs.get(\'Programs\',\'python\'), ScriptPath, options.config, str(options.min_read), options.out+\'_\'+n+\'_for_identSV.fasta\', n, options.out+\'_\'+n+\'_interm.fasta\', str(INSERT))\r\n+\t\t\t# t = multiprocessing.Process(target=restim, args=(tempo, config.get(\'Remove_dup\',\'out\'), config.get(\'General\',\'orient\'), int(options.min_read), str(record_dict[n].seq), n, options.out, INSERT,))\r\n+\t\t\tt = multiprocessing.Process(target=run_job, args=(command_line, \'bug in estimate.py\', n,))\r\n+\t\t\t# Sticks the thread in a list so that it remains accessible \r\n+\t\t\tliste_process.append(t)\r\n+\t\t\tliste.append(n)\r\n+\t\t\tif len(liste) == proc:\r\n+\t\t\t\t# Starts threads\r\n+\t\t\t\tfor process in liste_process:\r\n+\t\t\t\t\tprocess.start()\r\n+\t\t\t\t# This blocks the calling thread until the thread whose join() method is called is terminated.\r\n+\t\t\t\tfor process in liste_process:\r\n+\t\t\t\t\tprocess.join()\r\n+\t\t\t\t#the processes are done\r\n+\t\t\t\tfor k in liste:\r\n+\t\t\t\t\tos.remove(options.out+\'_\'+k+\'_for_identSV.fasta\')\r\n+\t\t\t\t\trecord_fasta = SeqIO.index(options.out+\'_\'+k+\'_interm.fasta\', "fasta")\r\n+\t\t\t\t\tSeqIO.write(SeqRecord(record_fasta[k].seq, id = k, description=\'\'),outseq, "fasta")\r\n+\t\t\t\t\tos.remove(options.out+\'_\'+k+\'_interm.fasta\')\r\n+\t\t\t\t\tdel record_fasta\r\n+\t\t\t\t#remove tested reference \r\n+\t\t\t\tliste = []\r\n+\t\t\t\tliste_process = []\r\n+\tif liste:\r\n+\t\t# Starts threads\r\n+\t\tfor process in liste_process:\r\n+\t\t\tprocess.start()\r\n+\t\t# This blocks the calling thread until the thread whose join() method is called is terminated.\r\n+\t\tfor process in liste_process:\r\n+\t\t\tprocess.join()\r\n+\t\t#the processes are done\r\n+\t\tfor k in liste:\r\n+\t\t\tos.remove(options.out+\'_\'+k+\'_for_identSV.fasta\')\r\n+\t\t\trecord_fasta = SeqIO.index(options.out+\'_\'+k+\'_interm.fasta\', "fasta")\r\n+\t\t\tSeqIO.write(SeqRecord(record_fasta[k].seq, id = k, description=\'\'),outseq, "fasta")\r\n+\t\t\tos.remove(options.out+\'_\'+k+\'_interm.fasta\')\r\n+\t\t\tdel record_fasta\r\n+\t\t#remove tested reference \r\n+\t\tliste = []\r\n+\t\tliste_process = []\r\n+\t#5)Outputing sequence where N where not estimated\r\n+\tfor n in CHR:\r\n+\t\tif n in record_dict:\r\n+\t\t\tSeqIO.write(SeqRecord(record_dict[n].seq, id = k, description=\'\'),outseq, "fasta")\r\n+\toutseq.close()\r\n+\r\n+if __name__ == "__main__": __main__()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/reEstimateN.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/reEstimateN.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,34 @@ +<tool id="reEstimateN" name="reEstimateN" version="0.1"> + <description> : Re-estimate N in multifasta (N restimated are converted to S)</description> > + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + </requirements> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/reEstimateN.py + --config $config + --exclude $exclude + --min_read $min_read + --out $n_estimated_fasta + + </command> + <inputs> + <param name="config" type="data" label="The conf file generated by the workflow after steps 1_create_conf to 4_filter_sam" /> + <param name="exclude" type="text" value="No_exclude" label="Sequence names separated with '=' to exclude from N re-estimation" /> + <param name="min_read" type="integer" value="30" label="The minimal read number requested to make the N estimation" /> + <param name="prefix" type="text" label="Identifier for output" /> + </inputs> + <outputs> + <data format="fasta" name="n_estimated_fasta" label="${tool.name} : $prefix N_estimated" /> + </outputs> + <help> + + .. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/scaffremodler_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/scaffremodler_wrapper.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| b'@@ -0,0 +1,303 @@\n+\r\n+#\r\n+# Copyright 2014 CIRAD\r\n+#\r\n+# This program is free software; you can redistribute it and/or modify\r\n+# it under the terms of the GNU General Public License as published by\r\n+# the Free Software Foundation; either version 3 of the License, or\r\n+# (at your option) any later version.\r\n+#\r\n+# This program is distributed in the hope that it will be useful,\r\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+# GNU General Public License for more details.\r\n+#\r\n+# You should have received a copy of the GNU General Public License\r\n+# along with this program; if not, see <http://www.gnu.org/licenses/> or\r\n+# write to the Free Software Foundation, Inc.,\r\n+# 51 Franklin Street, Fifth Floor, Boston,\r\n+# MA 02110-1301, USA.\r\n+#\r\n+#\r\n+\r\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, random, multiprocessing, datetime, math, re\r\n+\r\n+\r\n+def stop_err( msg ):\r\n+\tsys.stderr.write( "%s\\n" % msg )\r\n+\tsys.exit()\r\n+\r\n+def run_job (cmd_line, ERROR):\r\n+\tprint str(datetime.datetime.now())+" : "+cmd_line\r\n+\tsys.stdout.flush()\r\n+\ttry:\r\n+\t\ttmp = tempfile.NamedTemporaryFile().name\r\n+\t\terror = open(tmp, \'w\')\r\n+\t\tproc = subprocess.Popen( args=cmd_line, shell=True, stderr=error)\r\n+\t\treturncode = proc.wait()\r\n+\t\terror.close()\r\n+\t\terror = open( tmp, \'rb\' )\r\n+\t\tstderr = \'\'\r\n+\t\tbuffsize = 1048576\r\n+\t\ttry:\r\n+\t\t\twhile True:\r\n+\t\t\t\tstderr += error.read( buffsize )\r\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\r\n+\t\t\t\t\tbreak\r\n+\t\texcept OverflowError:\r\n+\t\t\tpass\r\n+\t\terror.close()\r\n+\t\tos.system(\'rm \'+tmp)\r\n+\t\tif returncode != 0:\r\n+\t\t\traise Exception, stderr\r\n+\texcept Exception, e:\r\n+\t\tstop_err( ERROR + str( e ) )\r\n+\tfinally:\r\n+\t\treturn returncode\r\n+\r\n+\r\n+def main(job):\r\n+\r\n+\ttry:\r\n+\t\t#Modify the different paths for working in sub folders.\r\n+\t\tregexSamFile = re.compile("(--sam\\s.+)")\r\n+\t\tregexOutFile = re.compile("(--out\\s.+)")\r\n+\t\tsamFile = os.path.splitext(regexSamFile.search(job).group(1).split()[1])[0]\r\n+\t\toutFile = regexOutFile.search(job).group(1).split()[1]\r\n+\t\tjob = job.replace(\'--sam \',\'--sam ../\')\r\n+\t\tjob = job.replace(\'--config \',\'--config ../\')\r\n+\r\n+\t\t# Create a dir based on the discordant type\r\n+\t\tWORKING_DIR = tempfile.mkdtemp(prefix="temp_"+samFile+\'_\', dir=os.getcwd()).split(\'/\')[-1]+\'/\'\r\n+\t\tos.chdir(WORKING_DIR)\r\n+\t\treturnCode = run_job(job, "error on the job : "+job+"\\n")\r\n+\texcept Exception as e:\r\n+\t\tprint e\r\n+\tfinally:\r\n+\t\tif os.path.isfile(outFile):\r\n+\t\t\tos.rename(outFile, os.pardir+"/"+outFile)\r\n+\t\tos.chdir(os.pardir)\r\n+\t\tshutil.rmtree(WORKING_DIR)\r\n+\t\treturn returnCode\r\n+\r\n+\r\n+# def worker(listJobs, out_q):\r\n+\t# """\r\n+\r\n+\t# """\r\n+\t# outdict = {}\r\n+\t# # print "###"\r\n+\t# # print listJobs\r\n+\t# # print "###"\r\n+\t# for job in listJobs:\r\n+\t\t# try:\r\n+\t\t\t# # print "job : "+job\r\n+\t\t\t# outdict[job] = main(job)\r\n+\t\t# except Exception as e:\r\n+\t\t\t# outdict[job] = 0\r\n+\t\t\t# sys.stderr.write(format(e))\r\n+\t\t\t# pass\r\n+\t# out_q.put(outdict)\r\n+\r\n+def __main__():\r\n+\t#Parse Command Line\r\n+\tparser = optparse.OptionParser(usage="python %prog [options]\\n\\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\\n\\n"\r\n+\t"Is a wrapper for ApMap tools")\r\n+\t# Wrapper options.\r\n+\tparser.add_option( \'\', \'--tool\', dest=\'tool\', default=\'bowtie2_single\', help=\'The tool used : bowtie, bowtie2, bowtie2_single, bwa, bwa_mem, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--ref\', dest=\'ref\', default=\'not_filled\', help=\'The multifasta reference file\')\r\n+\tparser.add_option( \'\', \'--q1\', dest=\'q1\', default=\'not_filled\', help=\'The mate1 fastq file\')\r\n+\tparser.add_option( \'\', \'--q2\', dest=\'q2\', default=\'not_filled\', help=\'The mate2 fastq file\')\r\n+\tparser.add_option( \'\', \'--orient\', dest=\'orient\', default=\'rf\', help=\'The expected orientation: rf or fr, [default: %default]\')\r\n+\tparser.add_option( \'\', \'--mini\', dest=\'mini\', default=\'2500\', help=\'The minimum insert size (integer), [default: %default]\')\r\n+\tparse'..b'+\t\t\tchrFile = config.get(\'General\',\'chr\')\r\n+\t\t\tconfig.set(\'General\',\'chr\', "../"+chrFile)\r\n+\t\t\twith open(options.prefix+".conf", \'wb\') as configfile:\r\n+\t\t\t\tconfig.write(configfile)\r\n+\r\n+\t\t\tpool = multiprocessing.Pool(processes=nbProcs)\r\n+\t\t\tresultsJobs = pool.map(main, liste_id)\r\n+\r\n+\t\t\tfor i, job in enumerate(resultsJobs):\r\n+\t\t\t\tif job != 0:\r\n+\t\t\t\t\tprint("Sorry the job : \\n"+liste_id[i]+"\\n" \\\r\n+\t\t\t\t\t\t "could not be completed due to an error.\\n" \\\r\n+\t\t\t\t\t\t "Please read the error log file for more details.")\r\n+\r\n+\t\t\t#rewrite the chromosome file to his initial value\r\n+\t\t\tchrFile = config.get(\'General\',\'chr\')[3:]\r\n+\t\t\tconfig.set(\'General\',\'chr\', chrFile)\r\n+\t\t\twith open(options.prefix+".conf", \'wb\') as configfile:\r\n+\t\t\t\tconfig.write(configfile)\r\n+\r\n+\t\telif options.orient == \'fr\':\r\n+\t\t\tselect1 = \'%s %s/7_select_on_cov.py --config %s.conf --sam %s_chr_fr.bam --out %s_chr_fr.score\' % (loca_programs.get(\'Programs\',\'python\'), pathname, options.prefix, options.prefix, options.prefix)\r\n+\t\t\tliste_id.append(select1)\r\n+\t\t\tselect2 = \'%s %s/7_select_on_cov.py --config %s.conf --sam %s_chr_ff.bam --out %s_chr_ff.score\' % (loca_programs.get(\'Programs\',\'python\'), pathname, options.prefix, options.prefix, options.prefix)\r\n+\t\t\tliste_id.append(select2)\r\n+\t\t\tselect3 = \'%s %s/7_select_on_cov.py --config %s.conf --sam %s_chr_rr.bam --out %s_chr_rr.score\' % (loca_programs.get(\'Programs\',\'python\'), pathname, options.prefix, options.prefix, options.prefix)\r\n+\t\t\tliste_id.append(select3)\r\n+\t\t\tselect4 = \'%s %s/7_select_on_cov.py --config %s.conf --sam %s_chr_rf.bam --out %s_chr_rf.score\' % (loca_programs.get(\'Programs\',\'python\'), pathname, options.prefix, options.prefix, options.prefix)\r\n+\t\t\tliste_id.append(select4)\r\n+\t\t\tselect5 = \'%s %s/7_select_on_cov.py --config %s.conf --sam %s_del.bam --out %s_del.score\' % (loca_programs.get(\'Programs\',\'python\'), pathname, options.prefix, options.prefix, options.prefix)\r\n+\t\t\tliste_id.append(select5)\r\n+\t\t\tselect6 = \'%s %s/7_select_on_cov.py --config %s.conf --sam %s_ins.bam --out %s_ins.score\' % (loca_programs.get(\'Programs\',\'python\'), pathname, options.prefix, options.prefix, options.prefix)\r\n+\t\t\tliste_id.append(select6)\r\n+\t\t\tselect7 = \'%s %s/7_select_on_cov.py --config %s.conf --sam %s_rf.bam --out %s_rf.score\' % (loca_programs.get(\'Programs\',\'python\'), pathname, options.prefix, options.prefix, options.prefix)\r\n+\t\t\tliste_id.append(select7)\r\n+\t\t\tselect8 = \'%s %s/7_select_on_cov.py --config %s.conf --sam %s_ff.bam --out %s_ff.score\' % (loca_programs.get(\'Programs\',\'python\'), pathname, options.prefix, options.prefix, options.prefix)\r\n+\t\t\tliste_id.append(select8)\r\n+\t\t\tselect9 = \'%s %s/7_select_on_cov.py --config %s.conf --sam %s_rr.bam --out %s_rr.score\' % (loca_programs.get(\'Programs\',\'python\'), pathname, options.prefix, options.prefix, options.prefix)\r\n+\t\t\tliste_id.append(select9)\r\n+\r\n+\r\n+\t\t\t#Change the chromosome file to working in different sub folders\r\n+\t\t\tconfig = ConfigParser.RawConfigParser()\r\n+\t\t\tconfig.read(options.prefix+".conf")\r\n+\t\t\tchrFile = config.get(\'General\',\'chr\')\r\n+\t\t\tconfig.set(\'General\',\'chr\', "../"+chrFile)\r\n+\t\t\twith open(options.prefix+".conf", \'wb\') as configfile:\r\n+\t\t\t\tconfig.write(configfile)\r\n+\r\n+\t\t\tpool = multiprocessing.Pool(processes=nbProcs)\r\n+\t\t\tresultsJobs = pool.map(main, liste_id)\r\n+\r\n+\t\t\tfor i, job in enumerate(resultsJobs):\r\n+\t\t\t\tif job != 0:\r\n+\t\t\t\t\tprint("Sorry the job : \\n"+liste_id[i]+"\\n" \\\r\n+\t\t\t\t\t\t "could not be completed due to an error.\\n" \\\r\n+\t\t\t\t\t\t "Please read the error log file for more details.")\r\n+\r\n+\t\t\t#rewrite the chromosome file to his initial value\r\n+\t\t\tchrFile = config.get(\'General\',\'chr\')[3:]\r\n+\t\t\tconfig.set(\'General\',\'chr\', chrFile)\r\n+\t\t\twith open(options.prefix+".conf", \'wb\') as configfile:\r\n+\t\t\t\tconfig.write(configfile)\r\n+\t\telse:\r\n+\t\t\tmot = \'Unrecognized argument in --orient option: \'+options.orient\r\n+\t\t\tsys.exit(mot)\r\n+\t\tprint("Step 7 is finished (time : "+str(datetime.datetime.now()-t0)+")")\r\n+\t\tsys.stdout.flush()\r\n+if __name__ == "__main__": __main__()\r\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/shmwriter.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/shmwriter.c Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,34 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/ipc.h> +#include <sys/shm.h> + +int main(int argc, const char **argv) +{ + int shmid; + // give your shared memory an id, anything will do + key_t key = 123456; + char *shared_memory; + + // Setup shared memory, 11 is the size + if ((shmid = shmget(key, 11, IPC_CREAT | 0666)) < 0) + { + printf("Error getting shared memory id"); + exit(1); + } + // Attached shared memory + if ((shared_memory = shmat(shmid, NULL, 0)) == (char *) -1) + { + printf("Error attaching shared memory id"); + exit(1); + } + // copy "hello world" to shared memory + memcpy(shared_memory, "Hello World", sizeof("Hello World")); + // sleep so there is enough time to run the reader! + sleep(10); + // Detach and remove shared memory + shmdt(shmid); + shmctl(shmid, IPC_RMID, NULL); +} \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/Ref_for_SV_detection.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/Ref_for_SV_detection.fasta Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,81843 @@\n+>chr01\r\n+ATGCGGCCGCCCCGCTCCACTCGCTTCCTCTTATCTCTGCATCGTCAATTTATTAGTATT\r\n+CCTCGACCACTAATGTACTAATGCTTGTACAATTCTTGTTACCTTTTCTTCTGCTTGTTG\r\n+AATTCTTTTTTCATTTTTTCTCTCAATTTAATGAACATAAGAATGCTAACCTTTTTATTT\r\n+TTGAATTTAAAGTTTCTCAAAAAATTTTAATTGGAAGAGCTTCACTTGGTTTTCCTTTTA\r\n+TCGCAAAAACCAAGCAATCATTTTTGTACTGATTCAGGTGAAAGTGCAGTCATCTCAAAA\r\n+CTTGAATGTGGCAGATTCTTTGTCCAAGGATTCAGTAGCTACTATTTCAGATGAAGTGAA\r\n+ACAAGTGACTGATGGTAGTGATACCCAAGATGATGAGAACATGAGTAGTCCAGGTGTTGA\r\n+GTCAAAACATGAAGATGCTGTTGCCAAAAAGCAGTTATTTGTTTCTCAACAATCTAAAGG\r\n+AACAGCAGAGCAAAAGGTTGCTTCTTTTAGTGAAGATGTAAGAGAGATAAATACAGAAGC\r\n+AGAAGATGGTTGGCAACCTGTTCAAAGACCAAGGTCCATCGGAGGTTCAAGCCAGCAAAT\r\n+CAAGCATCAACGTACAAGCACCTGGAAAACCTACAACTATCAGATGAATGATGTTCCTAG\r\n+TGAAACTGTTCAATCCAAGCCACAGTTTTCTTATTTAAATAATGGGTATTATTTGCTTAA\r\n+GAAAAAGATAGTTATTCCTGGAAGCTTTAATGACAATCTTAACATGCAAGTTCAGTCACC\r\n+AGACACCAGATCTGGTCAGAAGGCGTATAAGGCTGTAACTTATCGGGTGAAGTCAGTGCC\r\n+TTCATCCACCAATCCTGAGATCAGTCATAACTCTTGGAGTGCTGTCGAAAGAACGACTTC\r\n+CCCATTAGATGCTCATGCACCCTATTATCGCCATGATAGCCAGGTATTAGAGAATCAGAA\r\n+GAACCTAATAGGTGGTGTCTCTGAGCCTCGCAACAATTTGGTTCATAGTTTTAGTAATTC\r\n+TCCATCATATAAAGATGTAGCACTGGCACCTCCAGGAACAATTGCCAAGATACATAGCCG\r\n+AAAGTTTCAGGAGAATATGCCATTGGAACAAGAGTTGTCTATTGGTGGCAATGCATCTGA\r\n+AATAAAGGAATCATTTTTGGCCGAGGAACATACAGAAAATGCTGCAGAGCTATCTGAGAT\r\n+ATCTAACATAACCCAAGATAAGGACACTGTCCAGGATGCGTTTTTAGACTCAGATAAAAA\r\n+AAGTTGAAGTTGATCATGAAGAAGAAAGAAAGGAAGATTGTGAAACAGAACAATTACTAG\r\n+AACCATCGTCTTCTGATTTGGAAGTGGCATCTTGTAGTAGCATGCTCACCAAGAATATCA\r\n+TTGACAATTGTGTATCTAGCAATGAGGTTCAAGGAGTTGAGCAAAATGAGAATCATGATC\r\n+AGAATTTGTCAACAAATACATCTGATAGGAAAAAATCTGAGTGTCCCATCACTGCAGAAA\r\n+GCAAAGAAGATAATCATGACGAAGCCTCATGTACCAATGTTGGTATCAGTTCTTACTCTA\r\n+GTCTCCATCAATTCAATTTCAAGAAGGTTCTCATCCCTGAGAAAACAGGTGGTGATTATC\r\n+CCACGATGGAACTACCACCTTCTAATTATGATGGGAGAGAGGTATCTAGCAAGAAGCTGT\r\n+CTGCATCTGCTGCACCGTTTAGCCCTTTCCCTGCTACAGTACTTGGTCCTGTTCCTGTAA\r\n+CTGTTGGTCTTCCTCCTAATGGTACAATTTCTGCAGTTACACCATGGCCATTGAGTGCCA\r\n+GTCTGCATGCTTCACCCACGGCTGTAATGCCAATGGTGCCTCCTATTTGTACCTCACCGC\r\n+ATCATCCTAATCCTTCTTCACCTAGGCCTTCGCACATTCTACGTCCCTTGCCATTTATAT\r\n+ATCCACCATATACCCAACCTCAAGTCATTCCAAATACCACTTTTGCTATGAACGGTAATG\r\n+GAAACCATTATCCATGGCAGTGCAACATTGGTGCAAACGTTCCTGACTTTGCGCCAGGAT\r\n+CAGTATGGTCTGGTTCTCATCCTGTGGACTTCTCATCTTTGCCACCCATCATTAGTCCAA\r\n+CTTCTGAATCCGTGTTGGAACCAATTATAACATCTCATTTAAGAACTGATGTGAGTCTAG\r\n+ACCTTCCATCGGATAACAATACTGAAGAAGAGAATAATGAGATATCCCAGATTATAGACA\r\n+TTTGTAAACCGCTGGATGGTAACTGGTTAGAGAAACGGGAATCTGAAGAATCTCACAGAA\r\n+ATAATACAAAAATTACCGACTTGGAATCTGAGACGGTTTTCAGACAAGATGCACAGCATA\r\n+GTGGTGGAAGGCATGTCTTTAAGAGTAGCAAAAAGTATGAGGGTGAGGGAAGCTTTAGCA\r\n+TATACATCAAGGGTAGAAATCGCCGTAAACAGACACTAAAGCTGGTTATAAGTTTGCTCA\r\n+ATAGGCCATATGGATCACAGTCATTTAAAGTTATATATAGCAGAGTAGTAAGAGGAAGTG\r\n+ATGTTATAAGTGCAACAGATATATCTTCCAGTGAAAATGTTACTTCCGACTAGTCACAAG\r\n+CAAAGAACACTACAAACTTCAAAATGGGCAGGAAAGAATTTTTTTTATTGTTCAAGCCAC\r\n+ATCCAGGAGGACGTGACTGAAAAATTAGCAATGTTGATAATTTAGTTATTGCTATTTTTC\r\n+AGGCAACTGATATTTTTTATTAAGAAGCCAAGATTGAACGTTCAGGAAGGTAAGCGACAA\r\n+ATCTCCTCATGGTCCAAGCCAGGTATTTCTTTGTTAATTAGCTTGACAATGTTACATAAC\r\n+TTGCAGATTTATTGCTCATCAATGCACCTCCATATACTTTATTGTCAATAACATCTCTTT\r\n+TGCTTTAATTTTGTGTTGGTTTCTCAAGCATACAATTAATTTATCCTGTTTAACATAAGT\r\n+TATTAGGAAGAAACTTTGTTTGTGAACTGATCATGATTCACCTTTGAAATCCCCAAATTT\r\n+TTTAATCTCGAGTATCTTCAGTATTGATATCTCACACCATCAAAAAGCTCATAGTATACC\r\n+ATCATGCATGTAATTTATAAGCCTGACTCGATATTTTAGTTCCGGTCTAGCCATTCCAAT\r\n+TTCCAAGCAACCGGTCAAGAGAGACAGACTGAGCGGATATTTTCAGGCTTCCGAAGATGT\r\n+TTTGATATGCATGAAACTAGTATCAACATCTTTTTGAGGCGCAGATTTTTGCTTCTATTG\r\n+TAGCTTTCTCTAATTTGCAATCTTGATAATCAGATTGCTGGAGTTAAGTTGTTTTTTCTA\r\n+ATTACCTCTCATCTGTTGCAAATTCCAGGGTATGGCTTACCATCAAATCCAACCTAATTC\r\n+CTTTTAGTTAAGTCTTCCACTTGCTATGTTTACTGCTGTTGATCTAATCATCTATCACAC\r\n+GTCTTGAGTCTCTGGGCTCATTGGATTTCCAGTCTACCAGGGATTATCAGTTCATTTGTT\r\n+GAAGCAGTACAACATTTAATAATGATGTTAAATCCGATTAGAAAGCTGGGTTGTGCATCC\r\n+TACCTCTTATGATTTTGTTTTTATGACATTATCTACAACCATTTTCTTGCATTCTCAAAT\r\n+TAGAGCAGATTTATCCGAAAATTAGACTTGTAATATAGCTGCCACACTACCTTTTTGGCA\r\n+TTCCAGTACAGACTAGGTAGATTGCTTGAATTGTAATGTATGCCTCTGTTATTCAGTAGT\r\n+GATGCTCAAGCAAGATTTATGTGTGGTCGAGTTTTCAACTTGCACAACATATATAATTTT\r\n+'..b'CACTAGACATTTTGAACATATAAAATTAGCTCA\r\n+TAAGCATGAATTAATATAACCAAATATTCATACATAATATGAGCATTTTATCGACTAAAA\r\n+ATATTGAAAATGTTATATCTTTATTATTTTCAACATGAAATATATCAAGAAGTTTAAAAA\r\n+AGAAACCAAAATAAAGCCATATTCATGATATAAAAAATGAAAACTCTTTCATATCAAGAC\r\n+AGAGATTAATGGCTCATATACCAACTATCAGAAATTTTAATACCAAAATGTGTTTTTAAA\r\n+CTATTATAAGAGAAACATAAGAAAACTAATGCAAAAACAAAAATTGTATACGTCCAGCCA\r\n+TTGTTGTGAAGAATTTTAGCAATGATTTTTCTTTATATTTTGATACTTCTCGACTCAGAA\r\n+CTCCCACTTTAAATGATATAGGAAATCCTTTACACTTAATATTAAGCACAAGGATAAAAA\r\n+TCCTCGTTTATATAGACGTTCTCCCGTCGAGAACATTTATTATCACGAGATTTATAACGT\r\n+TGAAGAATTAATTTAAATTTATAACATTTAGATCATAATAAATTTTTTTAATTTTATTTA\r\n+ATAATAACAGTTACAATGAAAAATCGAAATATTTAAACCACAATAAATGAAGAAATTGAT\r\n+GATAACGAATAATAAACTTAATAAGAACGGTTACATATTAACCTTATATTTACATAAATA\r\n+AATACCAAAAATCACGAGGGATACGGCATGCAAATTTGGACATTAGAAATTTGAGATAAT\r\n+ATATTCTATATATAATTACGTAACAGGAAAATTTCACGAAATGTGGTCGTGCCGGAGTGG\r\n+TTATCGGGCATGACTAGAAATCATGTGGGCTCTGCCCGCGCAGGTTCGAATCCTGCCGAC\r\n+CACGTTTATAACATCATATCGAATTCGATTTTGGAATAATATACGCTGTTTGAAGCCATG\r\n+ACGTGATTTGGTAATAGTAATCCGGTTTTCTTAATTAATATTATATTGCATAAATTTTAA\r\n+AAATTATCTAGAAAAAACTAAGTATGTGTTATTCCTAAGTAAATTATATTTGTATTTTTT\r\n+TTGGACCACATTATAATTTTTTTTATGAAGACCACAAATTTTCTTAATAAATATAAATAT\r\n+GAATTAAATGTAAATATAAATTTTAAAAAAAATCAAAAATAAAAATAAGAGAAGAGGATA\r\n+GATTATATAACTTTACCCTTACGTCACTAAACTCGGAGGAGAGGAGAAATTACGATGGCG\r\n+TTGGGAGAGAAGATGAGATAAAGGTGATAACGATCGTGATCACAATCAAATCAAGTCTCA\r\n+TATTTTGATGGAACCGTCGTATCCCTCCCCATCCTAACTTTTATATCTTGATTAAGAGGT\r\n+CATTCCGAGGTTCTTTTTAGTTATTCCCCTTGAATCATGATGCATAATAAAGTTTCTTTT\r\n+TGTAGTTATTTGGTTAGATCCTTGGGTTTAATTCGTAGTTCTAAATATTAATGATAATTT\r\n+TTATAAGTTATTTTAGATCTCATTATTACCTAGGTTTGATAAAAAAATACTAAAAATAAA\r\n+AAAATAAGGTGAAGATTTTTGTGGGATCAAAAGGTTTAAAAGAAATTCCCTTCGATAATT\r\n+TTAAAGATGTGATAAATAATATAGATTTTATGTCATTCTCCGAAGAAAAAAAAAAAAGAA\r\n+TTAATTTTTAACATAGAAGAAGATAAAAGAATAATAAAGTTTATTACTAAATAAAAATAT\r\n+TATTTTAAAGATTTCTTGTACTAAGATAAGAAGAAAAGACTAGTTAAACTTAAAAGATTT\r\n+TTATGAAGGAGATATATATCCTCCTACATCGTCTAAAGGTATATCATAAAAATTAGATAA\r\n+TACATAAAAATTGTAAAATACAAACCCGAAAAATAGATCTGTTAGTAAAAGAAGATACAT\r\n+CATCAATTATGACGTATTCTCCCAATAATATTCTTATTAATATTAAAAATATCAAAATTA\r\n+TACAAAAGCAAGATTTATACTACGATGCTGATAGTATTGATTTTTATTGGAGTAGATAAT\r\n+TTTATCGGATTACATTATTTTATTTTTATGAGAAGCATAAAAGTATTTTTTACCTCCTTA\r\n+TAAAAAAGAAAAATATTTATAAAGTCGATCATTAAAGATTTAGAAGAAAATGTTACTAAT\r\n+GCTAATATGAGGATCATTATTATTTATGATCTAGAAGCTATAAAAATTAATGAATATTCT\r\n+ATAATAAAGGAGTTAGATATGTATGCAAATAATTATATTGATAAGATTGATTTAGTGTTT\r\n+AGAGATTGTCAAGAAAGCTAATAAGATTGATTCCAAACAAATAAGAGTTTTATAAGTAAT\r\n+TAAGGTCTATAAGACAAATCGAAAAGTCATTGTTGATTGTGCGACTTCAAATTCTAAAAT\r\n+TTTTAAGAACATAATGTATAATAATAACATCCAATACTTTCCATAATTTTTTTTCCAATC\r\n+TTTAATTAGGAAATGATGTTGGAATATTAAATCAAACTTTGTTATTAGATATAATTTTGG\r\n+TTTAAGAGTCTTACTTAAGATTGGAGTTTCTATTATAGCTGAAGTTAAAGTTTTGAGTTT\r\n+ACCTCTCTATTTATAGAGGGCGTAATGTTTTAAATATATAATTGGGAGGAGAAATGAGGT\r\n+AAAGCAAAAATACACTTATAGATTTGGGGTCTAACAGATTGTAAGTTATCATGCTTAATA\r\n+TGAGTTTTTAGCTTTGAAATCTCTCATCTATTCTCATATATTTAAAGATTTGAATCTTAA\r\n+AAAATGAAAGCAAATGAAATTTGTGATGCCACCCCCAAGAATCACACTCCGATAAGATTC\r\n+ACAAACAAAACCCTAAAAATTATTAGAAAAACTAACCAAAGAAAAGAATCCCTATCCAAA\r\n+GAACTACAAACAGGTGAAACCACCAAAGCCTTTAAGCAAACAAATCTAAAGATCTGAACC\r\n+TAGAAAACATAAATGAAAATGAAAGCAAAAGCAACCAAGAAACCTGCATGCCATGTTCAA\r\n+AGAACCACATAACCATAAATTAAACCCTAGTATCTAACTAGAAAACCATCAAAAGAAATT\r\n+TCAATATATATAAAAGTTCAAGGAAACAATCAAGAAAATCTTGGGAATGACCACTTGATG\r\n+GAGTGAATGAGATAACAATCGCAAAAACTATAACAATTAAGATGGGGGGGGGGGATATAA\r\n+GATTAGGGATATAAGAGTTCCACGTAAATTCAAGGCACAAACATATTAGTCTTCAAGTGC\r\n+TTGTCCGGATTATCTATCTTGATAAAGTTATCAATTAAGAAAGATAACATTCTCATTGGA\r\n+TAGTATAGATGAACCTCACGATCATTAGGCACAAAGGACACTAGCAAATGGAGATGCAAC\r\n+TTTCCTTGCTAAATGACTTTGCAATGTCACTGATCAGTATATTTTCATTTACGCCAATAA\r\n+CTTATATATCATAATAGATAAATAAAATAAATTTGGGTGCTCTAATAAATCTATCTTTTT\r\n+AGATTTTAATATTACATAAATATTCTTTCATATTTTATTTTTAATGTTCATCCGATGCCA\r\n+ATCATGATATTACTATTAGCTAGTTGAATTAAAAGAAAATTCATAATTAAAAATAGATAA\r\n+AGCATCCCTAATATTCAATCATTACTAAATATTAAAAATATCTTTACATAAGATATTAGT\r\n+ATCAATTTGAGTTTCATCATCACTATGTGTGTGCGCGCGTACGTGCACTCTATAATGATA\r\n+GAGATAATGATAAATATAAATATAAACAAAATTATTGAATAAATCATATTTATGGTCCAA\r\n+TATGTTAAAAGTTGTAATTAAAGGGATAAATATCAAAACTGATATCTACACTCAT\r\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/alignment.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/alignment.sam Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,243054 @@\n+@HD\tVN:1.4\tSO:queryname\n+@SQ\tSN:chr01\tLN:2513845\n+@SQ\tSN:chr02\tLN:2396575\n+@PG\tID:bowtie2\tVN:2.2.5\tCL:"/usr/local/bioinfo/bowtie2/2.2.5/bowtie2-align-s --wrapper basic-0 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -x /bank/musa_acuminata/sample/Ref_for_SV_detection.fasta -q /bank/musa_acuminata/sample/reads_mate1_SV.fq --phred33 -p 1 -S /work/GALAXY/galaxy/database/files/040/dataset_40793.dat_mate1.sam"\tPN:bowtie2\n+HWUSI-EAS139:1:2000:100000:100000#0\t83\tchr01\t418660\t32\t101M\t=\t423377\t4818\tACATTGTAAGACAGCATATCACGATGAATTTACATTACATTGTGACTTACACTAGAACCACAATTTAGCCATCAAGTAGGTATCATCCCATTTTCCTCCTC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-20\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100000:100000#0\t163\tchr01\t423377\t42\t101M\t=\t418660\t-4818\tGCCATACCATCTTCATCCGAGAGGAGACTCTATGGAAAGTCCATAGAAAGTAGAAGAAACTTCTTAAAAGAAATTTATGATGCAACCACAATAGTATAGCT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100001:100001#0\t83\tchr01\t963775\t42\t101M\t=\t968883\t5209\tCCACGGTGTAAAGGTTCACTAACAAAATTGATTTATTATATTAATTAAATTACTATGTAAACTATATAATGACATAAATAACATCTAATGATGACACGTAG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100001:100001#0\t163\tchr01\t968883\t42\t101M\t=\t963775\t-5209\tTTTTCTATAGTAACTGTGAGCGACTGTAGCTAAATATAAAGTCCGCCGACTAATGACGTATCAGCGAAAACTGCCGCGGTGAGAAAGATGTGGTTGCTGAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100002:100002#0\t83\tchr02\t2062004\t37\t101M\t=\t2066578\t4675\tTTTGAAGGAGGAGAGAGTTCAATACATATCTTCTCGGCTCTCATCTGGATTTGCTCCTCCTTTCTCCGGTTTGCTCTTCTATCCTCACCACTATCGGTGGC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-44\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100002:100002#0\t163\tchr02\t2066578\t42\t101M\t=\t2062004\t-4675\tCCAGAATCTTTATCAAATGAGTTATGATGTAATACTTAGAGAGTAACATTTAGTCTTTAAAAACTTTTATATTAAATTGTGATCAAAATAGTTAAAAAATG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100003:100003#0\t137\tchr02\t972737\t35\t101M\t=\t972737\t0\tTGTCACATTGCCGGGAACATAACCAAAAAAAAAGGAAAAAACACATGTTTCACATTGAAAACTACAAATGGTCCCCACATCTATACACATTCATCTGAATC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-33\tYT:Z:UP\n+HWUSI-EAS139:1:2000:100003:100003#0\t69\tchr02\t972737\t0\t*\t=\t972737\t0\tNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tYT:Z:UP\n+HWUSI-EAS139:1:2000:100004:100004#0\t83\tchr01\t1096693\t42\t101M\t=\t1101510\t4918\tACTAGGCTTTTTCCTAGCCATTTATAGCTTATCCACCCATGTATTAGTTGCAGTCCTGCAATCCTTCTTCCTCATCTCTCACACACCTCACCAGTTCTGTG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100004:100004#0\t163\tchr01\t1101510\t42\t101M\t=\t1096693\t-4918\tCATCCTCATTCAGGGAATGCGCAGATTCTCCTGGCTCTACTTTCCTCAACTCATCAAACAATCAGTTGTTGCCACCCTAAATCTTTGTTGATCGATGAAGA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100005:100005#0\t83\tchr02\t1297858\t42\t101M\t=\t1302803\t5046\tATTACTAAAATGCAATTTCAATTTAATTTTATAGTTCAGCAGTGACTTCAAAATGGAGATTTACTTAAAGAAAATGAACATCCCATTGCAAACCACATCAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100005:100005#0\t163\tchr02\t1302803\t42\t101M\t=\t1297858\t-5046\tTAAAAATTTACACATAATTTTTTAAAAATGAAATATATCTAATTAAAAAATTAAAACTATTATTAAATTAATTAATCATTCTAATACCATATATGTGTGAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-E'..b'TTGTATCTAGCAATAGATCTATTTGGGTTTCGCTTAATTTGAAAATTCAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-55\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99997:99997#0\t163\tchr02\t847796\t42\t101M\t=\t842340\t-5557\tTTCTATATCATTCTATATTTGTTAATCCATATTTGCTTATATCATAATCTTCTATTGGAACCTAAGGTCATTTTTTAATTAAAAAATTATTAATTATCATA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99998:99998#0\t83\tchr01\t1160712\t42\t101M\t=\t1166480\t5869\tGGCAGAAAAAACGAGGCAACGACGTCGCCTCGTCGCCCTTTTTTGCACGGGGAGAAGGAAACCTTGGTTTCTTCTACCCACGTGGGTAGAAAAACGAGGCG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99998:99998#0\t163\tchr01\t1166480\t34\t101M\t=\t1160712\t-5869\tATGGATACCGATGATTTCTTTGCGCTTCTGTACCCGTAGAAGCAGAACCGATCGCAGTTCGACCTCAAGGTTTGATTCCCGCCATTCATCTGGTTATGACA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-30\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99999:99999#0\t83\tchr01\t2052922\t42\t101M\t=\t2057801\t4980\tCAGCTTGAAAGAGCGACCTCGTCATATAAGACGCGGTTAGATTGCCGACTCTTCGTCTTCCCTTCGACCCCACGCAGGCGCGTTTGAGATCCCAATCTCCC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99999:99999#0\t163\tchr01\t2057801\t42\t101M\t=\t2052922\t-4980\tTAATAAATACATACATAAAAATAAATATACAGACATTGTCACATAATAAATCTCTACATATAATGTAGCGTCCATGTATTTATTTGCCAATATAGAAATAG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:9999:9999#0\t83\tchr02\t2121853\t36\t101M\t=\t2126404\t4652\tTCGCAAAGATCCAATACTCGTAAATTCGGTAACCCTTCGAGCACGTTATCATCAATTATTGTGGTTGGGAGGGAGTCTCTTACCACTAAAGTTCTCAAACA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-40\tYT:Z:CP\n+HWUSI-EAS139:1:2000:9999:9999#0\t163\tchr02\t2126404\t39\t101M\t=\t2121853\t-4652\tATTAAGTTATTAAATATATCACTTTAATAGTTTACTATAAGATTATGAATTTGATTTCTAGCTTCATCATTTATTTTTTTAAAAATATTAAATATAAATAA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-59\tYT:Z:CP\n+HWUSI-EAS139:1:2000:999:999#0\t83\tchr01\t959031\t42\t101M\t=\t964314\t5384\tTCGGATGGCCTCAAAGCAAGTCTGTGGAGGTTCGGTATTGCTGTTCGGTTCATTTGGCTCCCACTTCACTCTAAAAATTGTATCTTTCGAACCTAAGCAGT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:999:999#0\t163\tchr01\t964314\t42\t101M\t=\t959031\t-5384\tCTTGTGATCATTCTGTCATCATCAGCTATGCGACAGACGAGAGAGAGGAATGTAGGAACGAAAGAAGGAAGGAAGGAGTTACATGCTTGATTCTTTCTATC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99:99#0\t83\tchr01\t300771\t42\t101M\t=\t305502\t4832\tATTAAATAATATGATTTTTCATTAGCATCTTCATTTTTTTTAATTCAAGTAAGTATAAGTTTAATTCACTATAAAATAGTGATTATATAAATTATCATATT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99:99#0\t163\tchr01\t305502\t42\t101M\t=\t300771\t-4832\tATATTTGTTGTTAGGATCATTTGATAAATTTCAACACAAAAGATTCATTCAGGTTGAATCTGTGCATGTGATAGGTGAATTAAATGTATAATTTTCAGTGT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:9:9#0\t83\tchr01\t1498615\t42\t101M\t=\t1503674\t5160\tAGAGATCAGAGAGGCTTTAAATAGCAGCTGAGGCCGCTCAGATCTGGGAGAAAACGACAGAGGAGAAGAGGAGGGAGAGAGGGATTAGGGAAAGAAGGAAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:9:9#0\t163\tchr01\t1503674\t39\t101M\t=\t1498615\t-5160\tCAGGTTTCATAAAATTTATCCATTCTAATAAATTTATGACCCATCTAATTTGATCGTATGATTGAAATGAGCTATGAAATATAGTATATTTTTGGAATAAT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-59\tYT:Z:CP\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/bam_remove_redundancy.bam |
| b |
| Binary file scaffremodler/test-data/bam_remove_redundancy.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/chromosome_information.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/chromosome_information.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,2 @@ +chr01 2513845 +chr02 2396575 |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/circos_configuration.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/circos_configuration.txt Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,65 @@ +[General] +chr = /work/GALAXY/galaxy/database/files/040/dataset_40792.dat +out_kar = /work/GALAXY/galaxy/database/files/040/dataset_40823.dat +out_n = /work/GALAXY/galaxy/database/files/040/dataset_40824.dat +orient = rf +cov = yes +frf = yes +ff = yes +rr = yes +ins = yes +delet = yes +chr_rr = yes +chr_rf = yes +chr_fr = yes +chr_ff = yes +read_rf = yes +read_fr = yes +read_ff = yes +read_rr = yes +read_ins = yes +read_del = yes +read_chr_rr = yes +read_chr_rf = yes +read_chr_fr = yes +read_chr_ff = yes +prop = yes +scaff_tile = no + +[Coverage] +cov = /work/GALAXY/galaxy/database/files/040/dataset_40825.dat +median_cov = 4.817 +mean_cov = -0.189583910387 + +[Discord_link] +frf = /work/GALAXY/galaxy/database/files/040/dataset_40826.dat +ff = /work/GALAXY/galaxy/database/files/040/dataset_40827.dat +rr = /work/GALAXY/galaxy/database/files/040/dataset_40828.dat +ins = /work/GALAXY/galaxy/database/files/040/dataset_40829.dat +delet = /work/GALAXY/galaxy/database/files/040/dataset_40830.dat +chr_rr = /work/GALAXY/galaxy/database/files/040/dataset_40831.dat +chr_rf = /work/GALAXY/galaxy/database/files/040/dataset_40832.dat +chr_fr = /work/GALAXY/galaxy/database/files/040/dataset_40834.dat +chr_ff = /work/GALAXY/galaxy/database/files/040/dataset_40833.dat + +[Discord_zone] +chr_rr = /work/GALAXY/galaxy/database/files/040/dataset_40820.dat +chr_rf = /work/GALAXY/galaxy/database/files/040/dataset_40817.dat +chr_fr = /work/GALAXY/galaxy/database/files/040/dataset_40819.dat +chr_ff = /work/GALAXY/galaxy/database/files/040/dataset_40818.dat + +[Read_link] +rf = /work/GALAXY/galaxy/database/files/040/dataset_40835.dat +fr = /work/GALAXY/galaxy/database/files/040/dataset_40836.dat +ff = /work/GALAXY/galaxy/database/files/040/dataset_40837.dat +rr = /work/GALAXY/galaxy/database/files/040/dataset_40838.dat +ins = /work/GALAXY/galaxy/database/files/040/dataset_40839.dat +del = /work/GALAXY/galaxy/database/files/040/dataset_40840.dat +chr_rr = /work/GALAXY/galaxy/database/files/040/dataset_40841.dat +chr_rf = /work/GALAXY/galaxy/database/files/040/dataset_40842.dat +chr_fr = /work/GALAXY/galaxy/database/files/040/dataset_40844.dat +chr_ff = /work/GALAXY/galaxy/database/files/040/dataset_40843.dat + +[Proportion] +prop = /work/GALAXY/galaxy/database/files/040/dataset_40810.dat + |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/config_file.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/config_file.txt Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,56 @@ +[General] +ref = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/Ref_for_SV_detection.fasta +chr = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/chromosome_information.txt +mini = 2500 +maxi = 7500 +thread = 1 +tool = bowtie2_single +q1 = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/reads_mate1_SV.fq +q2 = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/reads_mate2_SV.fq +qual = 33 +orient = rf +index = y +rmindex = y +sd_multiplicator = 3.0 +restimate = y +mini_dis = 10000 +mult_max_cov = 10.0 +mult_min_cov = 0.25 +min_zone = 500 +min_gap = 300 +max_dist_merge = 1000.0 +yis = 0.0 +mis = 0.5 +yic = 0.0 +mic = 0.25 +min_score = 70 +ploid = 0.66 +exclude_chrom = no_exclude + +[Mapping] +out = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/alignment.sam + +[Single_filter] +rminput = n +filter_multi = y +asxs = 1 +qual = not_filled +type = sam + +[Remove_dup] +rminput = n +sort = coordinate +out = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/bam_remove_redundancy.bam +type = bam + +[Calc_coverage] + +[Trie_discord] +rminput = n + +[Score_discord] +mis = 2599.5 +mic = 1.25 + +[Ident_discord] + |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/coverage.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/coverage.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,4583644 @@\n+chr01\t55\t1\n+chr01\t56\t1\n+chr01\t57\t1\n+chr01\t58\t1\n+chr01\t59\t1\n+chr01\t60\t1\n+chr01\t61\t1\n+chr01\t62\t1\n+chr01\t63\t1\n+chr01\t64\t1\n+chr01\t65\t1\n+chr01\t66\t1\n+chr01\t67\t1\n+chr01\t68\t1\n+chr01\t69\t1\n+chr01\t70\t1\n+chr01\t71\t1\n+chr01\t72\t1\n+chr01\t73\t1\n+chr01\t74\t1\n+chr01\t75\t1\n+chr01\t76\t1\n+chr01\t77\t1\n+chr01\t78\t2\n+chr01\t79\t2\n+chr01\t80\t2\n+chr01\t81\t2\n+chr01\t82\t2\n+chr01\t83\t2\n+chr01\t84\t2\n+chr01\t85\t2\n+chr01\t86\t2\n+chr01\t87\t3\n+chr01\t88\t3\n+chr01\t89\t3\n+chr01\t90\t3\n+chr01\t91\t3\n+chr01\t92\t3\n+chr01\t93\t3\n+chr01\t94\t3\n+chr01\t95\t3\n+chr01\t96\t3\n+chr01\t97\t3\n+chr01\t98\t4\n+chr01\t99\t4\n+chr01\t100\t4\n+chr01\t101\t4\n+chr01\t102\t4\n+chr01\t103\t4\n+chr01\t104\t4\n+chr01\t105\t4\n+chr01\t106\t4\n+chr01\t107\t4\n+chr01\t108\t4\n+chr01\t109\t4\n+chr01\t110\t4\n+chr01\t111\t4\n+chr01\t112\t4\n+chr01\t113\t4\n+chr01\t114\t4\n+chr01\t115\t4\n+chr01\t116\t4\n+chr01\t117\t4\n+chr01\t118\t4\n+chr01\t119\t4\n+chr01\t120\t4\n+chr01\t121\t4\n+chr01\t122\t4\n+chr01\t123\t4\n+chr01\t124\t4\n+chr01\t125\t4\n+chr01\t126\t4\n+chr01\t127\t4\n+chr01\t128\t4\n+chr01\t129\t4\n+chr01\t130\t4\n+chr01\t131\t4\n+chr01\t132\t4\n+chr01\t133\t4\n+chr01\t134\t4\n+chr01\t135\t4\n+chr01\t136\t4\n+chr01\t137\t5\n+chr01\t138\t5\n+chr01\t139\t5\n+chr01\t140\t5\n+chr01\t141\t5\n+chr01\t142\t5\n+chr01\t143\t5\n+chr01\t144\t5\n+chr01\t145\t5\n+chr01\t146\t5\n+chr01\t147\t5\n+chr01\t148\t5\n+chr01\t149\t5\n+chr01\t150\t5\n+chr01\t151\t5\n+chr01\t152\t5\n+chr01\t153\t5\n+chr01\t154\t5\n+chr01\t155\t5\n+chr01\t156\t4\n+chr01\t157\t4\n+chr01\t158\t4\n+chr01\t159\t4\n+chr01\t160\t4\n+chr01\t161\t4\n+chr01\t162\t4\n+chr01\t163\t4\n+chr01\t164\t4\n+chr01\t165\t4\n+chr01\t166\t4\n+chr01\t167\t4\n+chr01\t168\t4\n+chr01\t169\t4\n+chr01\t170\t4\n+chr01\t171\t4\n+chr01\t172\t4\n+chr01\t173\t4\n+chr01\t174\t4\n+chr01\t175\t4\n+chr01\t176\t4\n+chr01\t177\t4\n+chr01\t178\t4\n+chr01\t179\t3\n+chr01\t180\t3\n+chr01\t181\t3\n+chr01\t182\t3\n+chr01\t183\t3\n+chr01\t184\t3\n+chr01\t185\t3\n+chr01\t186\t3\n+chr01\t187\t3\n+chr01\t188\t2\n+chr01\t189\t2\n+chr01\t190\t2\n+chr01\t191\t2\n+chr01\t192\t2\n+chr01\t193\t2\n+chr01\t194\t2\n+chr01\t195\t2\n+chr01\t196\t2\n+chr01\t197\t2\n+chr01\t198\t2\n+chr01\t199\t1\n+chr01\t200\t1\n+chr01\t201\t1\n+chr01\t202\t1\n+chr01\t203\t1\n+chr01\t204\t1\n+chr01\t205\t1\n+chr01\t206\t1\n+chr01\t207\t1\n+chr01\t208\t1\n+chr01\t209\t1\n+chr01\t210\t1\n+chr01\t211\t1\n+chr01\t212\t1\n+chr01\t213\t1\n+chr01\t214\t1\n+chr01\t215\t1\n+chr01\t216\t1\n+chr01\t217\t1\n+chr01\t218\t1\n+chr01\t219\t1\n+chr01\t220\t1\n+chr01\t221\t1\n+chr01\t222\t1\n+chr01\t223\t1\n+chr01\t224\t1\n+chr01\t225\t1\n+chr01\t226\t1\n+chr01\t227\t1\n+chr01\t228\t1\n+chr01\t229\t2\n+chr01\t230\t2\n+chr01\t231\t2\n+chr01\t232\t2\n+chr01\t233\t2\n+chr01\t234\t2\n+chr01\t235\t2\n+chr01\t236\t2\n+chr01\t237\t2\n+chr01\t238\t1\n+chr01\t239\t1\n+chr01\t240\t1\n+chr01\t241\t1\n+chr01\t242\t1\n+chr01\t243\t1\n+chr01\t244\t1\n+chr01\t245\t1\n+chr01\t246\t1\n+chr01\t247\t1\n+chr01\t248\t1\n+chr01\t249\t1\n+chr01\t250\t1\n+chr01\t251\t1\n+chr01\t252\t1\n+chr01\t253\t1\n+chr01\t254\t1\n+chr01\t255\t1\n+chr01\t256\t1\n+chr01\t257\t1\n+chr01\t258\t2\n+chr01\t259\t2\n+chr01\t260\t2\n+chr01\t261\t2\n+chr01\t262\t2\n+chr01\t263\t2\n+chr01\t264\t2\n+chr01\t265\t2\n+chr01\t266\t2\n+chr01\t267\t2\n+chr01\t268\t2\n+chr01\t269\t2\n+chr01\t270\t2\n+chr01\t271\t2\n+chr01\t272\t2\n+chr01\t273\t2\n+chr01\t274\t2\n+chr01\t275\t2\n+chr01\t276\t2\n+chr01\t277\t2\n+chr01\t278\t2\n+chr01\t279\t2\n+chr01\t280\t3\n+chr01\t281\t3\n+chr01\t282\t3\n+chr01\t283\t3\n+chr01\t284\t3\n+chr01\t285\t3\n+chr01\t286\t3\n+chr01\t287\t3\n+chr01\t288\t3\n+chr01\t289\t3\n+chr01\t290\t3\n+chr01\t291\t3\n+chr01\t292\t3\n+chr01\t293\t3\n+chr01\t294\t3\n+chr01\t295\t3\n+chr01\t296\t3\n+chr01\t297\t3\n+chr01\t298\t3\n+chr01\t299\t3\n+chr01\t300\t3\n+chr01\t301\t3\n+chr01\t302\t3\n+chr01\t303\t3\n+chr01\t304\t3\n+chr01\t305\t3\n+chr01\t306\t3\n+chr01\t307\t3\n+chr01\t308\t3\n+chr01\t309\t3\n+chr01\t310\t3\n+chr01\t311\t3\n+chr01\t312\t3\n+chr01\t313\t3\n+chr01\t314\t3\n+chr01\t315\t3\n+chr01\t316\t3\n+chr01\t317\t3\n+chr01\t318\t4\n+chr01\t319\t4\n+chr01\t320\t4\n+chr01\t321\t4\n+chr01\t322\t4\n+chr01\t323\t4\n+chr01\t324\t4\n+chr01\t325\t4\n+chr01\t326\t4\n+chr01\t327\t4\n+chr01\t328\t4\n+chr01\t329\t4\n+chr01\t330\t3\n+chr01\t331\t3\n+chr01\t332\t3\n+chr01\t333\t3\n+chr01\t334\t3\n+chr01\t335\t3\n+chr01\t336\t3\n+chr01\t337\t3\n+chr01\t338\t3\n+chr01\t339\t3\n+chr01\t340\t3\n+chr01\t341\t3\n+chr01\t342\t3\n+chr01\t343\t3\n+chr01\t344\t3\n+chr01\t345\t3\n+chr01\t346\t3\n+chr01\t347\t3\n+chr01\t348\t3\n+chr01\t349\t3\n+chr01\t350\t3\n+chr01\t351\t3\n+chr01\t352\t3\n+chr01\t353\t3\n+chr01\t354\t3\n+chr01\t355\t3\n+chr01\t356\t3\n+chr01\t357\t3\n+chr01\t358\t3\n+chr01\t359\t2\n+chr01\t360\t2\n+chr01\t361\t2\n+chr01\t362\t2\n+chr01\t363\t2\n+chr0'..b'7\t1\n+chr02\t2396298\t1\n+chr02\t2396299\t1\n+chr02\t2396300\t1\n+chr02\t2396301\t1\n+chr02\t2396302\t1\n+chr02\t2396303\t1\n+chr02\t2396304\t1\n+chr02\t2396305\t1\n+chr02\t2396306\t1\n+chr02\t2396307\t2\n+chr02\t2396308\t2\n+chr02\t2396309\t2\n+chr02\t2396310\t2\n+chr02\t2396311\t2\n+chr02\t2396312\t2\n+chr02\t2396313\t2\n+chr02\t2396314\t2\n+chr02\t2396315\t2\n+chr02\t2396316\t2\n+chr02\t2396317\t2\n+chr02\t2396318\t2\n+chr02\t2396319\t2\n+chr02\t2396320\t2\n+chr02\t2396321\t2\n+chr02\t2396322\t2\n+chr02\t2396323\t2\n+chr02\t2396324\t2\n+chr02\t2396325\t2\n+chr02\t2396326\t2\n+chr02\t2396327\t2\n+chr02\t2396328\t2\n+chr02\t2396329\t2\n+chr02\t2396330\t2\n+chr02\t2396331\t2\n+chr02\t2396332\t3\n+chr02\t2396333\t3\n+chr02\t2396334\t3\n+chr02\t2396335\t3\n+chr02\t2396336\t4\n+chr02\t2396337\t4\n+chr02\t2396338\t4\n+chr02\t2396339\t4\n+chr02\t2396340\t4\n+chr02\t2396341\t4\n+chr02\t2396342\t4\n+chr02\t2396343\t4\n+chr02\t2396344\t4\n+chr02\t2396345\t4\n+chr02\t2396346\t4\n+chr02\t2396347\t4\n+chr02\t2396348\t4\n+chr02\t2396349\t5\n+chr02\t2396350\t5\n+chr02\t2396351\t5\n+chr02\t2396352\t5\n+chr02\t2396353\t5\n+chr02\t2396354\t5\n+chr02\t2396355\t5\n+chr02\t2396356\t5\n+chr02\t2396357\t5\n+chr02\t2396358\t5\n+chr02\t2396359\t5\n+chr02\t2396360\t5\n+chr02\t2396361\t5\n+chr02\t2396362\t5\n+chr02\t2396363\t5\n+chr02\t2396364\t5\n+chr02\t2396365\t5\n+chr02\t2396366\t5\n+chr02\t2396367\t4\n+chr02\t2396368\t4\n+chr02\t2396369\t4\n+chr02\t2396370\t4\n+chr02\t2396371\t4\n+chr02\t2396372\t4\n+chr02\t2396373\t4\n+chr02\t2396374\t4\n+chr02\t2396375\t4\n+chr02\t2396376\t4\n+chr02\t2396377\t4\n+chr02\t2396378\t4\n+chr02\t2396379\t4\n+chr02\t2396380\t4\n+chr02\t2396381\t4\n+chr02\t2396382\t4\n+chr02\t2396383\t4\n+chr02\t2396384\t4\n+chr02\t2396385\t4\n+chr02\t2396386\t4\n+chr02\t2396387\t4\n+chr02\t2396388\t4\n+chr02\t2396389\t4\n+chr02\t2396390\t4\n+chr02\t2396391\t4\n+chr02\t2396392\t4\n+chr02\t2396393\t4\n+chr02\t2396394\t4\n+chr02\t2396395\t4\n+chr02\t2396396\t5\n+chr02\t2396397\t5\n+chr02\t2396398\t5\n+chr02\t2396399\t5\n+chr02\t2396400\t5\n+chr02\t2396401\t5\n+chr02\t2396402\t5\n+chr02\t2396403\t5\n+chr02\t2396404\t5\n+chr02\t2396405\t5\n+chr02\t2396406\t5\n+chr02\t2396407\t5\n+chr02\t2396408\t4\n+chr02\t2396409\t4\n+chr02\t2396410\t4\n+chr02\t2396411\t4\n+chr02\t2396412\t4\n+chr02\t2396413\t4\n+chr02\t2396414\t4\n+chr02\t2396415\t4\n+chr02\t2396416\t4\n+chr02\t2396417\t4\n+chr02\t2396418\t4\n+chr02\t2396419\t4\n+chr02\t2396420\t4\n+chr02\t2396421\t4\n+chr02\t2396422\t4\n+chr02\t2396423\t4\n+chr02\t2396424\t4\n+chr02\t2396425\t4\n+chr02\t2396426\t4\n+chr02\t2396427\t4\n+chr02\t2396428\t4\n+chr02\t2396429\t4\n+chr02\t2396430\t4\n+chr02\t2396431\t4\n+chr02\t2396432\t5\n+chr02\t2396433\t4\n+chr02\t2396434\t4\n+chr02\t2396435\t4\n+chr02\t2396436\t4\n+chr02\t2396437\t3\n+chr02\t2396438\t3\n+chr02\t2396439\t3\n+chr02\t2396440\t3\n+chr02\t2396441\t3\n+chr02\t2396442\t3\n+chr02\t2396443\t3\n+chr02\t2396444\t3\n+chr02\t2396445\t3\n+chr02\t2396446\t3\n+chr02\t2396447\t3\n+chr02\t2396448\t3\n+chr02\t2396449\t3\n+chr02\t2396450\t2\n+chr02\t2396451\t2\n+chr02\t2396452\t2\n+chr02\t2396453\t2\n+chr02\t2396454\t2\n+chr02\t2396455\t2\n+chr02\t2396456\t2\n+chr02\t2396457\t2\n+chr02\t2396458\t2\n+chr02\t2396459\t2\n+chr02\t2396460\t2\n+chr02\t2396461\t2\n+chr02\t2396462\t2\n+chr02\t2396463\t2\n+chr02\t2396464\t2\n+chr02\t2396465\t2\n+chr02\t2396466\t2\n+chr02\t2396467\t2\n+chr02\t2396468\t2\n+chr02\t2396469\t2\n+chr02\t2396470\t2\n+chr02\t2396471\t2\n+chr02\t2396472\t2\n+chr02\t2396473\t2\n+chr02\t2396474\t2\n+chr02\t2396475\t2\n+chr02\t2396476\t2\n+chr02\t2396477\t2\n+chr02\t2396478\t2\n+chr02\t2396479\t2\n+chr02\t2396480\t2\n+chr02\t2396481\t2\n+chr02\t2396482\t2\n+chr02\t2396483\t2\n+chr02\t2396484\t2\n+chr02\t2396485\t2\n+chr02\t2396486\t2\n+chr02\t2396487\t2\n+chr02\t2396488\t2\n+chr02\t2396489\t2\n+chr02\t2396490\t2\n+chr02\t2396491\t2\n+chr02\t2396492\t2\n+chr02\t2396493\t2\n+chr02\t2396494\t2\n+chr02\t2396495\t2\n+chr02\t2396496\t2\n+chr02\t2396497\t1\n+chr02\t2396498\t1\n+chr02\t2396499\t1\n+chr02\t2396500\t1\n+chr02\t2396501\t1\n+chr02\t2396502\t1\n+chr02\t2396503\t1\n+chr02\t2396504\t1\n+chr02\t2396505\t1\n+chr02\t2396506\t1\n+chr02\t2396507\t1\n+chr02\t2396508\t1\n+chr02\t2396509\t1\n+chr02\t2396510\t1\n+chr02\t2396511\t1\n+chr02\t2396512\t1\n+chr02\t2396513\t1\n+chr02\t2396514\t1\n+chr02\t2396515\t1\n+chr02\t2396516\t1\n+chr02\t2396517\t1\n+chr02\t2396518\t1\n+chr02\t2396519\t1\n+chr02\t2396520\t1\n+chr02\t2396521\t1\n+chr02\t2396522\t1\n+chr02\t2396523\t1\n+chr02\t2396524\t1\n+chr02\t2396525\t1\n+chr02\t2396526\t1\n+chr02\t2396527\t1\n+chr02\t2396528\t1\n+chr02\t2396529\t1\n+chr02\t2396530\t1\n+chr02\t2396531\t1\n+chr02\t2396532\t1\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/discordant_proportion.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/discordant_proportion.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,4911 @@\n+chr01\t1\t1000\t0.0\n+chr01\t1001\t2000\t0.0\n+chr01\t2001\t3000\t0.0\n+chr01\t3001\t4000\t0.0555555555556\n+chr01\t4001\t5000\t0.0\n+chr01\t5001\t6000\t0.0\n+chr01\t6001\t7000\t0.0\n+chr01\t7001\t8000\t0.0454545454545\n+chr01\t8001\t9000\t0.0\n+chr01\t9001\t10000\t0.0\n+chr01\t10001\t11000\t0.0\n+chr01\t11001\t12000\t0.0425531914894\n+chr01\t12001\t13000\t0.0\n+chr01\t13001\t14000\t0.0\n+chr01\t14001\t15000\t0.0181818181818\n+chr01\t15001\t16000\t0.0\n+chr01\t16001\t17000\t0.0222222222222\n+chr01\t17001\t18000\t0.0\n+chr01\t18001\t19000\t0.0\n+chr01\t19001\t20000\t0.0\n+chr01\t20001\t21000\t0.0\n+chr01\t21001\t22000\t0.0\n+chr01\t22001\t23000\t0.015873015873\n+chr01\t23001\t24000\t0.0\n+chr01\t24001\t25000\t0.0\n+chr01\t25001\t26000\t0.0\n+chr01\t26001\t27000\t0.0\n+chr01\t27001\t28000\t0.0\n+chr01\t28001\t29000\t0.0\n+chr01\t29001\t30000\t0.0\n+chr01\t30001\t31000\t0.0\n+chr01\t31001\t32000\t0.0\n+chr01\t32001\t33000\t0.0243902439024\n+chr01\t33001\t34000\t0.0\n+chr01\t34001\t35000\t0.0\n+chr01\t35001\t36000\t0.0238095238095\n+chr01\t36001\t37000\t0.0\n+chr01\t37001\t38000\t0.0\n+chr01\t38001\t39000\t0.0\n+chr01\t39001\t40000\t0.0\n+chr01\t40001\t41000\t0.0192307692308\n+chr01\t41001\t42000\t0.0\n+chr01\t42001\t43000\t0.0\n+chr01\t43001\t44000\t0.02\n+chr01\t44001\t45000\t0.0\n+chr01\t45001\t46000\t0.0\n+chr01\t46001\t47000\t0.0\n+chr01\t47001\t48000\t0.0\n+chr01\t48001\t49000\t0.0\n+chr01\t49001\t50000\t0.0\n+chr01\t50001\t51000\t0.0178571428571\n+chr01\t51001\t52000\t0.0\n+chr01\t52001\t53000\t0.0\n+chr01\t53001\t54000\t0.0\n+chr01\t54001\t55000\t0.0\n+chr01\t55001\t56000\t0.0\n+chr01\t56001\t57000\t0.0188679245283\n+chr01\t57001\t58000\t0.0\n+chr01\t58001\t59000\t0.0\n+chr01\t59001\t60000\t0.0\n+chr01\t60001\t61000\t0.0\n+chr01\t61001\t62000\t0.0\n+chr01\t62001\t63000\t0.0\n+chr01\t63001\t64000\t0.0\n+chr01\t64001\t65000\t0.0\n+chr01\t65001\t66000\t0.0\n+chr01\t66001\t67000\t0.0\n+chr01\t67001\t68000\t0.0\n+chr01\t68001\t69000\t0.0\n+chr01\t69001\t70000\t0.027027027027\n+chr01\t70001\t71000\t0.0\n+chr01\t71001\t72000\t0.0\n+chr01\t72001\t73000\t0.0\n+chr01\t73001\t74000\t0.0\n+chr01\t74001\t75000\t0.0\n+chr01\t75001\t76000\t0.0\n+chr01\t76001\t77000\t0.0172413793103\n+chr01\t77001\t78000\t0.0\n+chr01\t78001\t79000\t0.0\n+chr01\t79001\t80000\t0.0\n+chr01\t80001\t81000\t0.0\n+chr01\t81001\t82000\t0.0\n+chr01\t82001\t83000\t0.0\n+chr01\t83001\t84000\t0.0\n+chr01\t84001\t85000\t0.0\n+chr01\t85001\t86000\t0.0\n+chr01\t86001\t87000\t0.0\n+chr01\t87001\t88000\t0.0\n+chr01\t88001\t89000\t0.0\n+chr01\t89001\t90000\t0.0\n+chr01\t90001\t91000\t0.0\n+chr01\t91001\t92000\t0.0\n+chr01\t92001\t93000\t0.0\n+chr01\t93001\t94000\t0.0\n+chr01\t94001\t95000\t0.0\n+chr01\t95001\t96000\t0.0294117647059\n+chr01\t96001\t97000\t0.0\n+chr01\t97001\t98000\t0.0\n+chr01\t98001\t99000\t0.0425531914894\n+chr01\t99001\t100000\t0.0192307692308\n+chr01\t100001\t101000\t0.0\n+chr01\t101001\t102000\t0.016393442623\n+chr01\t102001\t103000\t0.0\n+chr01\t103001\t104000\t0.0\n+chr01\t104001\t105000\t0.0\n+chr01\t105001\t106000\t0.0\n+chr01\t106001\t107000\t0.02\n+chr01\t107001\t108000\t0.0\n+chr01\t108001\t109000\t0.0\n+chr01\t109001\t110000\t0.0\n+chr01\t110001\t111000\t0.0\n+chr01\t111001\t112000\t0.0\n+chr01\t112001\t113000\t0.0\n+chr01\t113001\t114000\t0.0\n+chr01\t114001\t115000\t0.0\n+chr01\t115001\t116000\t0.0\n+chr01\t116001\t117000\t0.0\n+chr01\t117001\t118000\t0.0\n+chr01\t118001\t119000\t0.0\n+chr01\t119001\t120000\t0.0\n+chr01\t120001\t121000\t0.0\n+chr01\t121001\t122000\t0.0\n+chr01\t122001\t123000\t0.0\n+chr01\t123001\t124000\t0.0\n+chr01\t124001\t125000\t0.0\n+chr01\t125001\t126000\t0.0\n+chr01\t126001\t127000\t0.0\n+chr01\t127001\t128000\t0.0\n+chr01\t128001\t129000\t0.0\n+chr01\t129001\t130000\t0.0\n+chr01\t130001\t131000\t0.0\n+chr01\t131001\t132000\t0.0\n+chr01\t132001\t133000\t0.0\n+chr01\t133001\t134000\t0.0\n+chr01\t134001\t135000\t0.0\n+chr01\t135001\t136000\t0.0\n+chr01\t136001\t137000\t0.0\n+chr01\t137001\t138000\t0.0\n+chr01\t138001\t139000\t0.0\n+chr01\t139001\t140000\t0.0\n+chr01\t140001\t141000\t0.0\n+chr01\t141001\t142000\t0.0\n+chr01\t142001\t143000\t0.0\n+chr01\t143001\t144000\t0.0\n+chr01\t144001\t145000\t0.0\n+chr01\t145001\t146000\t0.0\n+chr01\t146001\t147000\t0.0\n+chr01\t147001\t148000\t0.0\n+chr01\t148001\t149000\t0.0\n+chr01\t149001\t150000\t0.0\n+chr01\t150001\t151000\t0.0\n+chr01\t151001\t152000\t0.025641025641\n+chr01\t152001\t153000\t0.0\n+chr01\t153001\t154000\t0.0\n+chr01\t154001\t155000\t0.0161290322581\n+chr01\t155001\t156000\t0.0\n+chr01\t156001\t157000\t0.0\n+chr01\t157001\t158000\t0.0\n+chr01\t158001\t159000\t0.0\n'..b'1\t2258000\t0.0\n+chr02\t2258001\t2259000\t0.0\n+chr02\t2259001\t2260000\t0.0\n+chr02\t2260001\t2261000\t0.0\n+chr02\t2261001\t2262000\t0.0\n+chr02\t2262001\t2263000\t0.0\n+chr02\t2263001\t2264000\t0.0\n+chr02\t2264001\t2265000\t0.0\n+chr02\t2265001\t2266000\t0.0\n+chr02\t2266001\t2267000\t0.0\n+chr02\t2267001\t2268000\t0.0\n+chr02\t2268001\t2269000\t0.0243902439024\n+chr02\t2269001\t2270000\t0.0\n+chr02\t2270001\t2271000\t0.0\n+chr02\t2271001\t2272000\t0.0196078431373\n+chr02\t2272001\t2273000\t0.0\n+chr02\t2273001\t2274000\t0.0208333333333\n+chr02\t2274001\t2275000\t0.0\n+chr02\t2275001\t2276000\t0.0\n+chr02\t2276001\t2277000\t0.0\n+chr02\t2277001\t2278000\t0.0\n+chr02\t2278001\t2279000\t0.0172413793103\n+chr02\t2279001\t2280000\t0.0\n+chr02\t2280001\t2281000\t0.0166666666667\n+chr02\t2281001\t2282000\t0.0\n+chr02\t2282001\t2283000\t0.0\n+chr02\t2283001\t2284000\t0.0\n+chr02\t2284001\t2285000\t0.0\n+chr02\t2285001\t2286000\t0.0204081632653\n+chr02\t2286001\t2287000\t0.0\n+chr02\t2287001\t2288000\t0.0\n+chr02\t2288001\t2289000\t0.0\n+chr02\t2289001\t2290000\t0.0\n+chr02\t2290001\t2291000\t0.0222222222222\n+chr02\t2291001\t2292000\t0.0\n+chr02\t2292001\t2293000\t0.0\n+chr02\t2293001\t2294000\t0.0\n+chr02\t2294001\t2295000\t0.0\n+chr02\t2295001\t2296000\t0.0\n+chr02\t2296001\t2297000\t0.0\n+chr02\t2297001\t2298000\t0.0169491525424\n+chr02\t2298001\t2299000\t0.0\n+chr02\t2299001\t2300000\t0.0\n+chr02\t2300001\t2301000\t0.0\n+chr02\t2301001\t2302000\t0.0\n+chr02\t2302001\t2303000\t0.0\n+chr02\t2303001\t2304000\t0.0\n+chr02\t2304001\t2305000\t0.0\n+chr02\t2305001\t2306000\t0.0\n+chr02\t2306001\t2307000\t0.0\n+chr02\t2307001\t2308000\t0.0\n+chr02\t2308001\t2309000\t0.0\n+chr02\t2309001\t2310000\t0.0\n+chr02\t2310001\t2311000\t0.0\n+chr02\t2311001\t2312000\t0.0\n+chr02\t2312001\t2313000\t0.0\n+chr02\t2313001\t2314000\t0.0217391304348\n+chr02\t2314001\t2315000\t0.0\n+chr02\t2315001\t2316000\t0.0\n+chr02\t2316001\t2317000\t0.0172413793103\n+chr02\t2317001\t2318000\t0.0161290322581\n+chr02\t2318001\t2319000\t0.0\n+chr02\t2319001\t2320000\t0.0232558139535\n+chr02\t2320001\t2321000\t0.0\n+chr02\t2321001\t2322000\t0.0\n+chr02\t2322001\t2323000\t0.0\n+chr02\t2323001\t2324000\t0.0217391304348\n+chr02\t2324001\t2325000\t0.0\n+chr02\t2325001\t2326000\t0.0512820512821\n+chr02\t2326001\t2327000\t0.0\n+chr02\t2327001\t2328000\t0.0\n+chr02\t2328001\t2329000\t0.0\n+chr02\t2329001\t2330000\t0.0\n+chr02\t2330001\t2331000\t0.0\n+chr02\t2331001\t2332000\t0.0\n+chr02\t2332001\t2333000\t0.0217391304348\n+chr02\t2333001\t2334000\t0.0\n+chr02\t2334001\t2335000\t0.027027027027\n+chr02\t2335001\t2336000\t0.0\n+chr02\t2336001\t2337000\t0.0\n+chr02\t2337001\t2338000\t0.0\n+chr02\t2338001\t2339000\t0.0\n+chr02\t2339001\t2340000\t0.0\n+chr02\t2340001\t2341000\t0.0\n+chr02\t2341001\t2342000\t0.0185185185185\n+chr02\t2342001\t2343000\t0.0\n+chr02\t2343001\t2344000\t0.0\n+chr02\t2344001\t2345000\t0.0\n+chr02\t2345001\t2346000\t0.0\n+chr02\t2346001\t2347000\t0.0\n+chr02\t2347001\t2348000\t0.0\n+chr02\t2348001\t2349000\t0.0\n+chr02\t2349001\t2350000\t0.0\n+chr02\t2350001\t2351000\t0.0\n+chr02\t2351001\t2352000\t0.0\n+chr02\t2352001\t2353000\t0.0\n+chr02\t2353001\t2354000\t0.0\n+chr02\t2354001\t2355000\t0.0\n+chr02\t2355001\t2356000\t0.0\n+chr02\t2356001\t2357000\t0.0\n+chr02\t2357001\t2358000\t0.0285714285714\n+chr02\t2358001\t2359000\t0.0\n+chr02\t2359001\t2360000\t0.0\n+chr02\t2360001\t2361000\t0.0\n+chr02\t2361001\t2362000\t0.0212765957447\n+chr02\t2362001\t2363000\t0.0\n+chr02\t2363001\t2364000\t0.0\n+chr02\t2364001\t2365000\t0.0\n+chr02\t2365001\t2366000\t0.0\n+chr02\t2366001\t2367000\t0.0\n+chr02\t2367001\t2368000\t0.0\n+chr02\t2368001\t2369000\t0.0\n+chr02\t2369001\t2370000\t0.0\n+chr02\t2370001\t2371000\t-0.2\n+chr02\t2371001\t2372000\t-0.2\n+chr02\t2372001\t2373000\t-0.2\n+chr02\t2373001\t2374000\t0.0\n+chr02\t2374001\t2375000\t0.0\n+chr02\t2375001\t2376000\t0.0\n+chr02\t2376001\t2377000\t0.0\n+chr02\t2377001\t2378000\t0.0\n+chr02\t2378001\t2379000\t0.0\n+chr02\t2379001\t2380000\t0.0\n+chr02\t2380001\t2381000\t-0.2\n+chr02\t2381001\t2382000\t0.0\n+chr02\t2382001\t2383000\t0.0\n+chr02\t2383001\t2384000\t0.0\n+chr02\t2384001\t2385000\t-0.2\n+chr02\t2385001\t2386000\t0.0\n+chr02\t2386001\t2387000\t0.0\n+chr02\t2387001\t2388000\t0.0\n+chr02\t2388001\t2389000\t0.0\n+chr02\t2389001\t2390000\t0.0\n+chr02\t2390001\t2391000\t0.0\n+chr02\t2391001\t2392000\t0.0\n+chr02\t2392001\t2393000\t0.0\n+chr02\t2393001\t2394000\t0.0\n+chr02\t2394001\t2395000\t0.0\n+chr02\t2395001\t2396000\t0.0\n+chr02\t2396001\t2396575\t0.0\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/discordant_zone_FF.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/discordant_zone_FF.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,2 @@ +#CHR-zone1 START END SIZE COV CHR-zone2 START END SIZE COV MISC READ SCORE STATUS +chr02 9991 15772 5781 2.0 chr02 100001 105629 5628 3.0 - 132 100 PASSED |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/discordant_zone_RR.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/discordant_zone_RR.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,2 @@ +#CHR-zone1 START END SIZE COV CHR-zone2 START END SIZE COV MISC READ SCORE STATUS +chr02 4230 9841 5611 2.0 chr02 94700 100013 5313 2.0 - 119 100 PASSED |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/discordant_zone_chr_FF.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/discordant_zone_chr_FF.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,3 @@ +#CHR-zone1 START END SIZE COV CHR-zone2 START END SIZE COV MISC READ SCORE STATUS +chr01 249988 254480 4492 3.0 chr02 700051 705732 5681 2.0 - 118 100 PASSED +chr01 2299986 2305476 5490 3.0 chr02 1601985 1607832 5847 2.0 - 146 100 PASSED |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/discordant_zone_chr_FR.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/discordant_zone_chr_FR.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,2 @@ +#CHR-zone1 START END SIZE COV CHR-zone2 START END SIZE COV MISC READ SCORE STATUS +chr01 2201114 2205850 4736 2.0 chr02 1598245 1601994 3749 3.0 - 102 100 PASSED |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/discordant_zone_chr_RF.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/discordant_zone_chr_RF.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,2 @@ +#CHR-zone1 START END SIZE COV CHR-zone2 START END SIZE COV MISC READ SCORE STATUS +chr01 2294227 2300010 5783 3.0 chr02 1746985 1752489 5504 2.0 - 137 100 PASSED |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/discordant_zone_chr_RR.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/discordant_zone_chr_RR.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,3 @@ +#CHR-zone1 START END SIZE COV CHR-zone2 START END SIZE COV MISC READ SCORE STATUS +chr01 347578 353015 5437 2.0 chr02 693333 700006 6673 2.0 - 125 100 PASSED +chr01 2194475 2199991 5516 2.0 chr02 1741402 1746995 5593 2.0 - 118 100 PASSED |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/discordant_zone_deletion.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/discordant_zone_deletion.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,2 @@ +#CHR-zone1 START END SIZE COV CHR-zone2 START END SIZE COV MISC READ SCORE STATUS +chr01 694179 700014 5835 3.0 chr01 800017 805557 5540 3.0 - 136 100 PASSED |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/list_type.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/list_type.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,112485 @@\n+HWUSI-EAS139:1:2000:100000:100000#0\t418660\t423377\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100001:100001#0\t963775\t968883\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100002:100002#0\t2062004\t2066578\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100004:100004#0\t1096693\t1101510\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100005:100005#0\t1297858\t1302803\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100006:100006#0\t1077179\t1082699\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100007:100007#0\t1261734\t1266884\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100008:100008#0\t510850\t515393\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100009:100009#0\t123241\t129025\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100010:100010#0\t1543152\t1547942\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100011:100011#0\t410127\t415464\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100012:100012#0\t81755\t86951\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100013:100013#0\t1093592\t1099613\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100014:100014#0\t1936107\t1941120\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100015:100015#0\t485310\t489853\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100016:100016#0\t2305858\t2311485\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100017:100017#0\t388167\t393181\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100018:100018#0\t1161914\t1167380\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:10001:10001#0\t214893\t220336\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100020:100020#0\t2158455\t2163558\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100021:100021#0\t689300\t694724\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100022:100022#0\t407396\t412936\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100023:100023#0\t2032049\t2036950\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100025:100025#0\t2364822\t2370289\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100026:100026#0\t370782\t375538\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100027:100027#0\t1304234\t1309512\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100028:100028#0\t243112\t248513\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100029:100029#0\t335387\t330896\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:10002:10002#0\t1645751\t1640667\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100030:100030#0\t1859985\t1864926\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100031:100031#0\t1253590\t1258584\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100032:100032#0\t60391\t54735\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100033:100033#0\t1077939\t1083280\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100034:100034#0\t1444036\t1449173\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100037:100037#0\t1664760\t1659523\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100038:100038#0\t1201052\t1206377\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100039:100039#0\t167351\t172025\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:10003:10003#0\t237245\t242777\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100040:100040#0\t1037786\t1043358\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100041:100041#0\t102467\t108270\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100042:100042#0\t94212\t88389\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100043:100043#0\t218011\t222914\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100044:100044#0\t1545703\t1549823\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100045:100045#0\t1054437\t1059288\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100046:100046#0\t14606\t100486\tchr02\tchr02\tff\n+HWUSI-EAS139:1:2000:100047:100047#0\t606289\t610302\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100048:100048#0\t2169718\t2174579\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100049:100049#0\t1293953\t1299142\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:10004:10004#0\t2400613\t2406696\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100050:100050#0\t67950\t73192\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100052:100052#0\t319986\t324969\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100053:100053#0\t341004\t346383\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100054:100054#0\t998315\t1003266\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100055:100055#0\t659188\t664281\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100056:100056#0\t229880\t235173\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100057:100057#0\t640816\t646163\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100058:100058#0\t1283008\t1288083\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:100059:100059#0\t1351179\t1356435\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:10005:10005#0\t1485959\t1491953\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:100060:100060#0\t22397'..b':1:2000:99946:99946#0\t871070\t876751\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99947:99947#0\t1316916\t1322422\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99948:99948#0\t2246887\t2252416\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99949:99949#0\t1230448\t1235826\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:9994:9994#0\t394568\t399754\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99950:99950#0\t486730\t492436\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99951:99951#0\t42667\t47282\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99952:99952#0\t155695\t160525\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99953:99953#0\t2032895\t2037223\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99954:99954#0\t866748\t871117\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99955:99955#0\t673951\t678650\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99956:99956#0\t263248\t257358\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99957:99957#0\t265751\t270099\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99958:99958#0\t1139922\t1144046\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99959:99959#0\t833862\t839226\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:9995:9995#0\t1960543\t1965031\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99960:99960#0\t1448568\t1453312\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99961:99961#0\t957358\t962966\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99962:99962#0\t1743080\t1737988\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99963:99963#0\t1672794\t1677891\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99964:99964#0\t1052531\t1057290\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99965:99965#0\t1509916\t1513595\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99966:99966#0\t1338077\t1342847\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99967:99967#0\t1531826\t1537692\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99968:99968#0\t2026872\t2031654\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99969:99969#0\t682258\t686363\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:9996:9996#0\t1113705\t1118257\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99970:99970#0\t1888463\t1894458\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99972:99972#0\t1394848\t1400033\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99973:99973#0\t2300792\t2305660\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99974:99974#0\t1516863\t1521700\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99975:99975#0\t142665\t147190\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99976:99976#0\t1649402\t1654712\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99977:99977#0\t1534900\t1538646\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99978:99978#0\t1625663\t1620565\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99979:99979#0\t2358004\t2362817\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:9997:9997#0\t2213812\t2218951\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99980:99980#0\t1396970\t1402517\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99981:99981#0\t310231\t306039\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99982:99982#0\t71887\t76998\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99983:99983#0\t1756843\t1762667\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99984:99984#0\t410359\t415102\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99985:99985#0\t1591183\t1596647\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99986:99986#0\t914147\t919562\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99987:99987#0\t59052\t64311\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99988:99988#0\t1128903\t1134740\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99989:99989#0\t1820422\t1825312\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:9998:9998#0\t2237124\t2241615\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99990:99990#0\t2161741\t2167420\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99991:99991#0\t1514040\t1519352\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99992:99992#0\t1658486\t1653148\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99993:99993#0\t2283970\t2289047\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99994:99994#0\t1884009\t1888745\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99995:99995#0\t459365\t464687\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99996:99996#0\t2297180\t2303138\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99997:99997#0\t842340\t847796\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:99998:99998#0\t1160712\t1166480\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99999:99999#0\t2052922\t2057801\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:9999:9999#0\t2121853\t2126404\tchr02\tchr02\tok\n+HWUSI-EAS139:1:2000:999:999#0\t959031\t964314\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:99:99#0\t300771\t305502\tchr01\tchr01\tok\n+HWUSI-EAS139:1:2000:9:9#0\t1498615\t1503674\tchr01\tchr01\tok\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/merge.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/merge.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,7 @@ +scaffold3 1 996558 FWD scaffold1 999999 999999 FWD REV contig +scaffold4 1 992957 FWD scaffold2 1 1 FWD FWD contig +scaffold4 1 992957 FWD scaffold3 1 1 FWD REV contig +scaffold5 1 914761 FWD scaffold2 999377 999377 FWD FWD contig +scaffold6 1 20001 FWD scaffold2 299998 319379 FWD FWD prob_fusion +scaffold7 1 19999 FWD scaffold3 480000 496560 FWD REV fusion +scaffold8 1 13848 FWD scaffold4 499999 511143 FWD FWD prob_fusion |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/new_config_file.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/new_config_file.txt Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,67 @@ +[General] +ref = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/Ref_for_SV_detection.fasta +chr = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/chromosome_information.txt +mini = 2500 +maxi = 7500 +thread = 1 +tool = bowtie2_single +q1 = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/reads_mate1_SV.fq +q2 = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/reads_mate2_SV.fq +qual = 33 +orient = rf +index = n +rmindex = n +sd_multiplicator = 3.0 +restimate = y +mini_dis = 10000 +mult_max_cov = 10.0 +mult_min_cov = 0.25 +min_zone = 500 +min_gap = 300 +max_dist_merge = 1000.0 +yis = 0.0 +mis = 0.5 +yic = 0.0 +mic = 0.25 +min_score = 70 +ploid = 0.66 +fai_file = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/Ref_for_SV_detection.fasta.fai +exclude_chrom = no_exclude + +[Mapping] +out = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/alignment.sam + +[Single_filter] +rminput = n +filter_multi = y +asxs = 1 +qual = not_filled +out = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/quality_threshold.sam +type = sam + +[Remove_dup] +rminput = n +sort = coordinate +out = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/bam_remove_redundancy.bam +type = bam + +[Calc_coverage] +out = /home/galaxydev/galaxy/tools/SouthGreen/scaffremodler/test-data/coverage.txt +median_insert = 5199 +mean_insert = 5200.10472232 +standard_deviation_insert = 501.815580589 +mean_coverage = 4.85657925724 +median_coverage = 5.0 +mini = 3693.55325823 +maxi = 6704.44674177 + +[Trie_discord] +rminput = n +type = bam + +[Score_discord] +mis = 2599.5 +mic = 1.25 + +[Ident_discord] + |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_chr_ff.bam |
| b |
| Binary file scaffremodler/test-data/out_chr_ff.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_chr_fr.bam |
| b |
| Binary file scaffremodler/test-data/out_chr_fr.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_chr_rf.bam |
| b |
| Binary file scaffremodler/test-data/out_chr_rf.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_chr_rr.bam |
| b |
| Binary file scaffremodler/test-data/out_chr_rr.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_deletion.bam |
| b |
| Binary file scaffremodler/test-data/out_deletion.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_discarded.bam |
| b |
| Binary file scaffremodler/test-data/out_discarded.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_ff.bam |
| b |
| Binary file scaffremodler/test-data/out_ff.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_insertion.bam |
| b |
| Binary file scaffremodler/test-data/out_insertion.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_rf.bam |
| b |
| Binary file scaffremodler/test-data/out_rf.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/out_rr.bam |
| b |
| Binary file scaffremodler/test-data/out_rr.bam has changed |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/possible_junction_and_fusion.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/possible_junction_and_fusion.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,7 @@ +scaffold3 1 996558 FWD scaffold1 999999 999999 FWD REV contig +scaffold4 1 992957 FWD scaffold2 1 1 FWD FWD contig +scaffold4 1 992957 FWD scaffold3 1 1 FWD REV contig +scaffold5 1 914761 FWD scaffold2 999377 999377 FWD FWD contig +scaffold6 1 20001 FWD scaffold2 299998 319379 FWD FWD prob_fusion +scaffold7 1 19999 FWD scaffold3 480000 496560 FWD REV fusion +scaffold8 1 13848 FWD scaffold4 499999 511143 FWD FWD prob_fusion |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/quality_threshold.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/quality_threshold.sam Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,240426 @@\n+@HD\tVN:1.4\tSO:queryname\n+@SQ\tSN:chr01\tLN:2513845\n+@SQ\tSN:chr02\tLN:2396575\n+@PG\tID:bowtie2\tVN:2.2.5\tCL:"/usr/local/bioinfo/bowtie2/2.2.5/bowtie2-align-s\t--wrapper\tbasic-0\t-D\t20\t-R\t3\t-N\t0\t-L\t20\t-i\tS,1,0.50\t-x\t/bank/musa_acuminata/sample/Ref_for_SV_detection.fasta\t-q\t/bank/musa_acuminata/sample/reads_mate1_SV.fq\t--phred33\t-p\t1\t-S\t/work/GALAXY/galaxy/database/files/040/dataset_40793.dat_mate1.sam"\tPN:bowtie2\n+HWUSI-EAS139:1:2000:100000:100000#0\t83\tchr01\t418660\t32\t101M\t=\t423377\t4818\tACATTGTAAGACAGCATATCACGATGAATTTACATTACATTGTGACTTACACTAGAACCACAATTTAGCCATCAAGTAGGTATCATCCCATTTTCCTCCTC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-20\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100000:100000#0\t163\tchr01\t423377\t42\t101M\t=\t418660\t-4818\tGCCATACCATCTTCATCCGAGAGGAGACTCTATGGAAAGTCCATAGAAAGTAGAAGAAACTTCTTAAAAGAAATTTATGATGCAACCACAATAGTATAGCT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100001:100001#0\t83\tchr01\t963775\t42\t101M\t=\t968883\t5209\tCCACGGTGTAAAGGTTCACTAACAAAATTGATTTATTATATTAATTAAATTACTATGTAAACTATATAATGACATAAATAACATCTAATGATGACACGTAG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100001:100001#0\t163\tchr01\t968883\t42\t101M\t=\t963775\t-5209\tTTTTCTATAGTAACTGTGAGCGACTGTAGCTAAATATAAAGTCCGCCGACTAATGACGTATCAGCGAAAACTGCCGCGGTGAGAAAGATGTGGTTGCTGAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100002:100002#0\t83\tchr02\t2062004\t37\t101M\t=\t2066578\t4675\tTTTGAAGGAGGAGAGAGTTCAATACATATCTTCTCGGCTCTCATCTGGATTTGCTCCTCCTTTCTCCGGTTTGCTCTTCTATCCTCACCACTATCGGTGGC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-44\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100002:100002#0\t163\tchr02\t2066578\t42\t101M\t=\t2062004\t-4675\tCCAGAATCTTTATCAAATGAGTTATGATGTAATACTTAGAGAGTAACATTTAGTCTTTAAAAACTTTTATATTAAATTGTGATCAAAATAGTTAAAAAATG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100003:100003#0\t137\tchr02\t972737\t35\t101M\t=\t972737\t0\tTGTCACATTGCCGGGAACATAACCAAAAAAAAAGGAAAAAACACATGTTTCACATTGAAAACTACAAATGGTCCCCACATCTATACACATTCATCTGAATC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-33\tYT:Z:UP\n+HWUSI-EAS139:1:2000:100003:100003#0\t69\tchr02\t972737\t0\t*\t=\t972737\t0\tNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tYT:Z:UP\n+HWUSI-EAS139:1:2000:100004:100004#0\t83\tchr01\t1096693\t42\t101M\t=\t1101510\t4918\tACTAGGCTTTTTCCTAGCCATTTATAGCTTATCCACCCATGTATTAGTTGCAGTCCTGCAATCCTTCTTCCTCATCTCTCACACACCTCACCAGTTCTGTG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100004:100004#0\t163\tchr01\t1101510\t42\t101M\t=\t1096693\t-4918\tCATCCTCATTCAGGGAATGCGCAGATTCTCCTGGCTCTACTTTCCTCAACTCATCAAACAATCAGTTGTTGCCACCCTAAATCTTTGTTGATCGATGAAGA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100005:100005#0\t83\tchr02\t1297858\t42\t101M\t=\t1302803\t5046\tATTACTAAAATGCAATTTCAATTTAATTTTATAGTTCAGCAGTGACTTCAAAATGGAGATTTACTTAAAGAAAATGAACATCCCATTGCAAACCACATCAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:100005:100005#0\t163\tchr02\t1302803\t42\t101M\t=\t1297858\t-5046\tTAAAAATTTACACATAATTTTTTAAAAATGAAATATATCTAATTAAAAAATTAAAACTATTATTAAATTAATTAATCATTCTAATACCATATATGTGTGAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-E'..b'TTGTATCTAGCAATAGATCTATTTGGGTTTCGCTTAATTTGAAAATTCAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-55\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99997:99997#0\t163\tchr02\t847796\t42\t101M\t=\t842340\t-5557\tTTCTATATCATTCTATATTTGTTAATCCATATTTGCTTATATCATAATCTTCTATTGGAACCTAAGGTCATTTTTTAATTAAAAAATTATTAATTATCATA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99998:99998#0\t83\tchr01\t1160712\t42\t101M\t=\t1166480\t5869\tGGCAGAAAAAACGAGGCAACGACGTCGCCTCGTCGCCCTTTTTTGCACGGGGAGAAGGAAACCTTGGTTTCTTCTACCCACGTGGGTAGAAAAACGAGGCG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99998:99998#0\t163\tchr01\t1166480\t34\t101M\t=\t1160712\t-5869\tATGGATACCGATGATTTCTTTGCGCTTCTGTACCCGTAGAAGCAGAACCGATCGCAGTTCGACCTCAAGGTTTGATTCCCGCCATTCATCTGGTTATGACA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-30\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99999:99999#0\t83\tchr01\t2052922\t42\t101M\t=\t2057801\t4980\tCAGCTTGAAAGAGCGACCTCGTCATATAAGACGCGGTTAGATTGCCGACTCTTCGTCTTCCCTTCGACCCCACGCAGGCGCGTTTGAGATCCCAATCTCCC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99999:99999#0\t163\tchr01\t2057801\t42\t101M\t=\t2052922\t-4980\tTAATAAATACATACATAAAAATAAATATACAGACATTGTCACATAATAAATCTCTACATATAATGTAGCGTCCATGTATTTATTTGCCAATATAGAAATAG\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:9999:9999#0\t83\tchr02\t2121853\t36\t101M\t=\t2126404\t4652\tTCGCAAAGATCCAATACTCGTAAATTCGGTAACCCTTCGAGCACGTTATCATCAATTATTGTGGTTGGGAGGGAGTCTCTTACCACTAAAGTTCTCAAACA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-40\tYT:Z:CP\n+HWUSI-EAS139:1:2000:9999:9999#0\t163\tchr02\t2126404\t39\t101M\t=\t2121853\t-4652\tATTAAGTTATTAAATATATCACTTTAATAGTTTACTATAAGATTATGAATTTGATTTCTAGCTTCATCATTTATTTTTTTAAAAATATTAAATATAAATAA\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-59\tYT:Z:CP\n+HWUSI-EAS139:1:2000:999:999#0\t83\tchr01\t959031\t42\t101M\t=\t964314\t5384\tTCGGATGGCCTCAAAGCAAGTCTGTGGAGGTTCGGTATTGCTGTTCGGTTCATTTGGCTCCCACTTCACTCTAAAAATTGTATCTTTCGAACCTAAGCAGT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:999:999#0\t163\tchr01\t964314\t42\t101M\t=\t959031\t-5384\tCTTGTGATCATTCTGTCATCATCAGCTATGCGACAGACGAGAGAGAGGAATGTAGGAACGAAAGAAGGAAGGAAGGAGTTACATGCTTGATTCTTTCTATC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99:99#0\t83\tchr01\t300771\t42\t101M\t=\t305502\t4832\tATTAAATAATATGATTTTTCATTAGCATCTTCATTTTTTTTAATTCAAGTAAGTATAAGTTTAATTCACTATAAAATAGTGATTATATAAATTATCATATT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:99:99#0\t163\tchr01\t305502\t42\t101M\t=\t300771\t-4832\tATATTTGTTGTTAGGATCATTTGATAAATTTCAACACAAAAGATTCATTCAGGTTGAATCTGTGCATGTGATAGGTGAATTAAATGTATAATTTTCAGTGT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:9:9#0\t83\tchr01\t1498615\t42\t101M\t=\t1503674\t5160\tAGAGATCAGAGAGGCTTTAAATAGCAGCTGAGGCCGCTCAGATCTGGGAGAAAACGACAGAGGAGAAGAGGAGGGAGAGAGGGATTAGGGAAAGAAGGAAC\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tYT:Z:CP\n+HWUSI-EAS139:1:2000:9:9#0\t163\tchr01\t1503674\t39\t101M\t=\t1498615\t-5160\tCAGGTTTCATAAAATTTATCCATTCTAATAAATTTATGACCCATCTAATTTGATCGTATGATTGAAATGAGCTATGAAATATAGTATATTTTTGGAATAAT\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\tAS:i:0\tXS:i:-59\tYT:Z:CP\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/reads_mate1_SV.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/reads_mate1_SV.fq Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,486100 @@\n+@HWUSI-EAS139:1:2000:1:1#0/1\n+ATACAAAGAAAATGCGACTTTATAAGCAAGTATTAATGAAATTGGAATTTTGTATGTGTAGTTCAGCTGAGATCCATTATTCCATATATGTACCGGAGTAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:2:2#0/1\n+GGCTTAAAGACTAAAATGAAACTGTTTTAGTGAATTCTGATTACAAGTTTCCTAGGATGGTTTTTCCTCATCCGAGAAACCTGCTATATGTCAAAAGAACG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:3:3#0/1\n+TATCTTCTTACATATGATTGACAATGTCCACAGATTGACAGCTTCACTTTTTGGTTGTGGGTGCTAAATCATCCATCAAGGAAGGAATATGTTAAATACCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:4:4#0/1\n+CTGTTTACATCTTTCCTTAATCTACGCTTTTGATTTCCTCGAGAAAGCTTAAAATCCCGAATCCCCGGGTTGAAGGGTCCCTTATCCCCATTCATCCTACA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:5:5#0/1\n+TACTTGCTAATGGACATATTAAACACCATGCTAAATTAAAAGAATAAAAATACATTCTAAGTGAATATCTTTGCAAATGTTATAGAATTACATCTTTCTAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:6:6#0/1\n+CAGTGTTTCCACCACCACTTTCTGCCCTGTTTGTCTCCAAGTCACTTATCCTTAATTTCTTATTGTAAAAACATTTTCAAATTAACCAAAATTGTGGTTTG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:7:7#0/1\n+TCAGAATCGTTCGAGGGACAAAGCAGATCAGAATTGGAACCCAAACTTGCCTCTCTCCTCACCAAAAATTTATTTGTCGTGTTGGATGATGTCTGGAGTCA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:8:8#0/1\n+TAGCCCTGTGAAGGAATAAGGAAACACAATTTGAAACTGTTTATGCAATGTTACCATTTATTGGACAATGTTACATATATAAATGTGTTTTCGATAACAAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:9:9#0/1\n+GTTCCTTCTTTCCCTAATCCCTCTCTCCCTCCTCTTCTCCTCTGTCGTTTTCTCCCAGATCTGAGCGGCCTCAGCTGCTATTTAAAGCCTCTCTGATCTCT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:10:10#0/1\n+TAAGGGGCAAAAGGCATCACCTGCCAACTAAAGGGGGGTCTTCTGCTTAACGAATGAACCCACGGGTAAGATCCAGCAAGAACAGTCCCAACACCTAATTT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:11:11#0/1\n+CTCCTTTTGTTAGAAAGATGCTGACAAAACCACATTACTTTGGCTTGCAGGGAATGGGTGACAATTTATTGGACTGATGGGGCATTCTGTATAACTCACTG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:12:12#0/1\n+GGTTGCTTACATTGGTTCTTATAATGTCCTTTCTCACCACAGTTATAGCAAACAATATCTTTTCTTGATCTTGACATGCTCCTACCCATACGTGAATTGCT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:13:13#0/1\n+GATTCTCTCAGTAAATAAATTGACATGACTTCTAATTGAGCAGGTTTTGGCATTAATGGAAGAACTTGGGGTCAGGCCAGATCTTGTAACATATAGCCACC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:14:14#0/1\n+GCTATGAATAACAGGTACCTTGTTTTGATATAAAATTAAAACAAAAAATAATAAAAATATTAAAATGACATGTTTAGATATCAATATTTGTTTGATCCATA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:15:15#0/1\n+TGTTGTCGCCGCTGTCGCTGCTCGCAAACGCTGCCTCTGTCGCCGCTCGCGCCTCCTGCTGCTCGCCGCTCCTGCTCCCGCTACTCGCCGCTCGCGCCTCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:16:16#0/1\n+AGGGAAGGTTTTTGTGAGGCGCTTGAGCTCTGTGTCATCATAACAGTAGTGTATATGTTTTCCACGCTCAACCTGATACTTGATGATTAGATAAGAAATTG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:17:17#0/1\n+TATACTTATCCCTATCCTACAACACCAGTAATAATTTGAAAAACTATTTTACAAAACTCTAAACTTTTATAAAAATAAAATATTTGATTATTTATGTTATC\n++\n+A'..b'AAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121510:121510#0/1\n+ACGTGTCGCCGCCGACGCGTGGCAGGAAGACGCCGGTTCCGGCGGATTCTCTCCTCGCGCTGGGGCTGCTGAGGAAGACAGCTCTCATCCCAGATCCAGGC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121511:121511#0/1\n+AAGCTCAATGACCAATGCAGTCAACCACAAAAAGTTGTTACAAATTGCTTTGTCATTGCCATTTTTGAAGCACTAAAGTCCAAAATTTTGGTAATTGCATC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121512:121512#0/1\n+TTGAAGTGTTCACATACCACCACCAAGCCAGCTGTGTTGTTTTTTGTCAAAATTCTTGGATCTAATCACAACACCATGATTATTCACAACCAGAGCAATCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121513:121513#0/1\n+TCACGTATATCACTCTAAAAATAACATTAAAGTGGTTGTGCATCCCTCTCTTGCATGCAGGTCTTTTCATCTCCAATTGAAACCTCACAGGGCATGGTCTC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121514:121514#0/1\n+AAGATGTCCACATTATTCCTACCACTACCCAACGATGATTCAAGTCCGTGCCATGACCGTCACCTAAATCCTCTTTGACTTTCCGGTTCTTGCGGTGCCAA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121515:121515#0/1\n+TTTTAGAAATTTATATGACAATTTCTCTAGAAATCACTTTGATGATTTATGTTGTGCATTGCCCGAAAATTCAAATCAACTCCTTAGGACAGTCCTTCACA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121516:121516#0/1\n+GTTATATTTTCTCCTAGGGTCTTCATGCCAACTTCATACTTGACCATATTTGTTAATGCAATGACATTTGCATTCTTATATGATGGGCCACATGCAACTTA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121517:121517#0/1\n+GGAATCCCCGAGCTCTGTGTATATTTACCATTGGGGATGGGGTAAGGAAATTGCTCAGGTTCTGGATGCATGTTAGCTTTATGTTTTCATTTTAATTAGGG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121518:121518#0/1\n+AAGTCCTCATGAGTAAAGAAATGACTGACAATTGTGAGGTGATTATGCCTACTATACTCATGTTGACTACACAACTCCCAGCCAACTCAAACATGGGAGTA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121519:121519#0/1\n+TTTACATGTGTTGGTAGTACTCAGTCATATGAGAATCAAGGGGGAAGGATGTGACATTTCTGATGTGCTTGATGTAGGTTATCTCAACTTGCTTTTATGTG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121520:121520#0/1\n+TGTCACCTCAGTGACTGTCTGCTCCACTGGGGGGAATCCAAGGTAGCAATGCCATGGAAACATGCCTGAGCACTTAAACCAGTTGAGAAAGACACACACAC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121521:121521#0/1\n+AATAAGTATATTAATAGAATTAATATTAGATGATAATTTAATAATAGAATAATTATAGTGACCAGTAGATCCTTTCCTTTATGGTTACTGTAGGGCTACAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121522:121522#0/1\n+ATCAGCCCTCTCCATTTCCGACTCAACTAGGAGAGTCTTGAGAAAGAATTGCTTCTAACTCGACTATGTTTCTTGAGAAAGGATTTCTTCAATGCTATTCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121523:121523#0/1\n+GAACAAGTATAGAAACCACTTTGATAGTTTCAGCTTTGTCAAAGGTTGGGCATGCTGAAATGGATATAATTGTCGGGAAGATAGGATGGCCTACAGATGGT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121524:121524#0/1\n+TGTTGTTCCATGGAATGCTGCTGAGAAGAAGTTTATTTGCCCTTGCCATGGATCTCAGGAGGACAACCAAGGCAAGGTTGTGAGAGGCCCTGCACCCTTGG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121525:121525#0/1\n+GCACATTACTTGCAAGCATGAACGTTGCTTCGTTCTCGAGCTCAATCGTCGTCTTCTCGACATCGATGTCCGCATACATGGTGTTATAGGTATCATGGCTA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/reads_mate1_rf.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/reads_mate1_rf.fq Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,486612 @@\n+@HWUSI-EAS139:1:2000:1:1#0/1\n+AATGGGGTAGGCAATAGGTGTACGGAAGTTTCAGGTTTCCCAGTCGTTTCGCTTCCCAGGTACGCATCCGAAGTTTGGAGAACCGCCGTGGGCCAATAATT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:2:2#0/1\n+TAGGTAGAAATAAAATAAAAAAAATCAATATTCTTGTGCCCAAAGAATTGTGTACATATCTACCAACAAAATGCAATGACATATATAGAACTCATATATAA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:3:3#0/1\n+GTTCAGTTCCAAAGAAGTTCAGAATAAAAGAGTAATCAACATAAGATAAAAAGATACAGGGTTTATTTTCTTAGATTTCTCCCTTTTTTCTAGGAAAAATT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:4:4#0/1\n+NNNNNNNNNNNNNNNNNAACAGTTGGGTACGAAGCCACCCCGAAAAACTTGAGACGACGTCGTCGGGTATGAAGCCACCCTGGAAAACTCGTGACGATGCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:5:5#0/1\n+ATTGAAGGAAAGCATCGAATTTACTTTGTTGTATGCTGATTCTGCTTTATTTGTGAAGCTAATAGGGCTTTGTTTTATAAGGTCGCAACAAGGAGAATTGT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:6:6#0/1\n+AAAGTAACACAAGAAGCACATTCTGATGCTATCATAATATGGTGGGCAATACGCTTAACATCCTTTGAAGCAAGGTCTACAATTTCGAATGATATCGCCCG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:7:7#0/1\n+CTATCATTTGAAGCTCTCAGTATACAGTAATTAAATAGTACTAATTATCACAAGCTATGGAGTATTTATTAGTTATGCTAGTAATTTTCAGGTAATTTGAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:8:8#0/1\n+GTAAGTCCATAAAGAAGTACACTTAATTTTTCAGGCAAAGAGTGTGTGAAATTCAAAAAAAAAAAAACAGACTTATGAAATTAGAGAGGGAGAGAGAAATC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:9:9#0/1\n+GGAGGATGACATGCGTCGGTAGGAAAATTGTTAGAAAATGTGATGGACTTCCTCTTTCCATCAAAGCTATTGCAGGGGTTTTAGCTTACAAGAACAGAAGC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:10:10#0/1\n+ATAAGCTCACTTGAAATTTCTGGCCAGCTTCATCTTTTGATAATGGCCTTCTTAGTTGTTGCATTCATCATCTTCAACAGTTCTCCAATTTTTGAAAGCAA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:11:11#0/1\n+GAGCCAGCGTTATTTTATAATTTTTTGTTGTAGAAGTGATTGTAGTGACATCCTCTTTCTTTTTCCCCCATACAGTTATCAGTGGTTAACTTCTGACTCAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:12:12#0/1\n+CTTAACATACTTTTGCTGTAGGGATGCCATATTTGTCACACAACTTCTTCATAAAGTCCTTTGATCCCTCTAGTGCTGCAGCCTCAGCCGAGGGACCGAAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:13:13#0/1\n+TATGAAGTTAATGGAATAAAAGGCATATGGGTTCCACATTGTCTAAGTGCGTATGGCATGTCATATTGTTGCCATGTTTGCATCGTATGTGTGGTTGCAAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:14:14#0/1\n+CTCCATCCCCGGGGCTCCTGGCTTTTAGATATGTTAATAAAAGGACGGTCGGTCCGTGCTGTCCTGCTACAGTGGCAAGCCGCCACGTTTCAGATAGCTTT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:15:15#0/1\n+ACAGCAGTAGAGTATTAATTAGTGAAATTGAATGAATCCTCTTTCGGATTCTACAAAACATAAAGTCGCATGGAAAACCAAACAACTATTGATTTTTAAAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:16:16#0/1\n+TGCACAAAACATGCTACAAGAGAGGTTTTGTTTCAGTCCTACTCTTCTTTGTGTAGTAGATTTATTTCCCACACTAATGTAGCTTTCCCAATTCTGTTGAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:17:17#0/1\n+AAGCAACACTATATTTATAATTATTATAATCTCAACTAAAACCAAGTTGTACCGACAGGCAAACTCAACTGCAAGCATTCATCGTTGAGGAATCTGTGATC\n++\n+A'..b'AAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121638:121638#0/1\n+GTAGGCAATACACGGTGTTGTACCTTTGCTACTCAGTGGAGTAGGCGTCAGAGTTGATGGAGAAGATTGTACAATCTCAAAGGTGACCAAAACTATTAGAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121639:121639#0/1\n+TGTTAGTACTAATGAAAATTTTTCTTATTGCCCATATTTGTTGAGCACTTTTTATAAGTTACAAACCTGACACTAGTTATCTATATTCTGTATTTGATTTT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121640:121640#0/1\n+CTCGTGGGCATATTGGGGATTCCGACATGGCAATAATAGGAAGTGACAGGAAAAGGAACGGAAGGCCACCACGCCACACGTCTGCTGCTCTCCCTTCGTGA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121641:121641#0/1\n+AACGGGAGGAGGTTTCTTGTGCAGTGGGTGTGGAACCCTGTAAAGATGACTTGTTTTGGTCCAAATGCCATGGATTAAAGGGCTGTTGTAATCTTGAGTTG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121642:121642#0/1\n+AGTCTGATACTATCTTTTTCCTCTAAATATGACTAATTAGTAGGTCCTTCTCAGGTTATCGATAAGAACATATGGCTTCAACATAATTTGGTTTTAGTCAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121643:121643#0/1\n+AAAATAAATGATGAAGCTAGGATTCAAATTTATAATTCATAATCTTATGATAAACTATTAAAGTGATATATTTAATAACTTGATTCCAAGTTTTTCAAAGT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121644:121644#0/1\n+CTTTTCACCAGCAAACGAACAGCAGCTTCTGAAAGAGCTGAAAGATACACGATTACCCATGCCACTTCGGTGGCCAATTCATCGTCCCTGCCAAAAATCAA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121645:121645#0/1\n+GTCGGAGCCGGAGGGGGAGGTGGAGATCAGGGTCTGCGTCAACCGGACCTGCGGGCGGCAGGGCTCCAGGGACATCTTGGCGGTTCTGTCCGCCATCGCGC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121646:121646#0/1\n+AGCCTCAGTACACACACCACGCTTCTCCCGGCGCATAAATCAGCTTACACGTAACCCCCAAACTAAAAGCAAAAGATAAAAGAAAGCTTCCCCGCTTCGTG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121647:121647#0/1\n+GAGCGTACGCTGCAGCTGTGTGACGCGTTTATATTATAATGTAACACCTGGAAAGCACAGGGCCAGGAGCCACGCCGGGCCGTCGGATAGCCTGGAATCTA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121648:121648#0/1\n+TCTATGCTGCAAAGGCCGTGGCTTCTGGTTCCCTCGAGGTGTTCCGGAAGCTATCCAGGTCTGGAGAGAGCAGCAACAACACGGGCACCGCCACCGCGGTG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121649:121649#0/1\n+CACCTATTTTAAACAAATCAAAGGCCAATTACCAAATGCATGAAAGTGATGCATTTCTCACCAAAAAGCTAACAGGTTTATACAGCAGACTGTATAACTAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121650:121650#0/1\n+TGTCATCAGATAGGTGGACGGTTGAGATTATGGTACAAGATGAGATCAGACGGCTTTCTGCGGTTGCGATATATATCACAGCGTTTGCGTTCGTATGTGAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121651:121651#0/1\n+CCTTGGGATGCTATTCACTACCCATATATGAGATGAGGTGGGGCTGCTGCCATCTCCGGGAACTCTGGGCATCCATGGGCACAGTATCAAGCTTATCTAAC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121652:121652#0/1\n+AGCGCCACAACCGAATTTCTTGTTTCTTTTCTCTTCTGGTTTTCCAAAGAAACCTGCATGCTCAGAATGACAACAATCAGTTGATAGGAACTTTACTCTCA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121653:121653#0/1\n+GTTGTAGTAATTTTTTAGTTTAATAAAGGGAAAAACTTGTGCGAGTTTCATGGTGGCTCTTTGATTTGTATTCCCTTGCGGAAGCATGTTGTGGTTACTGT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/reads_mate2_SV.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/reads_mate2_SV.fq Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,486100 @@\n+@HWUSI-EAS139:1:2000:1:1#0/2\n+ATCTGTCTCTTGCCCTCCCGAAACTGCAAACTGTAGAAGCTGCACCCTTTGCTAAATGGGAAAGGCCTGCCCCGCCATTCTGCACGTGTGACAAATTCACT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:2:2#0/2\n+GATCTGCAAAACAATTCTTAAGAACATTAAGAATGGAAAGAAAAACATTCTAGCATATTACAGAATGGAACATTGAAAACCTTTTCAGTGTATTTATTGCT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:3:3#0/2\n+GAAAAACTTTGATTCATATATAAATATAAAGAGAGAGAGAGAGAAGAAGAGAGAGAGAGAGAGAGAGAGAGAGTCACCATATGGAATAACATAAGTTGCAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:4:4#0/2\n+CAGATTTATTTACAAGGCAGATACCAATATTTTCAATCCACAACTCTTATCATTAAGTTCACAACTCAGGCTCAAAAAGTGAGCCTAACAAAAAGAAAGAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:5:5#0/2\n+CAAGGATTCAAATTCAAGTTTATACTAGATATAGATACAGTGTTGGACTGGTAAATTATGTTGGTACATATGGTATATTGTGTGTGATATATACTGCTAGA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:6:6#0/2\n+TTCTTGGCAGAAGCTAGCTTGCGATGGCCATGAAGTGGCAGAAGAATACGTCGAGACGGAGTATTACAAGGATCTCCGCTGCATCGACAAGCAGCATCACA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:7:7#0/2\n+CTGCTAGTACGTAGAAAAGAAGCAATGAGTCCACAAAACAAGCAAGGCAACTTTGCTAGTACCTTATGTTGGCTCTTAAACATATCGATCAAACAATTATT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:8:8#0/2\n+CTTTTGAATCTATGTTCTTGGATTACATATGTGGCATCTCATCTTTGAAAATTTTTCATTTCTTATACACTATGTATTATTGCAGATGCATTTTGAACTTT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:9:9#0/2\n+CAGGTTTCATAAAATTTATCCATTCTAATAAATTTATGACCCATCTAATTTGATCGTATGATTGAAATGAGCTATGAAATATAGTATATTTTTGGAATAAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:10:10#0/2\n+TAGTGAAGGTGTTATCATTCCAAAGAAGGTGCCAAAACAATTAAAACCTGCGCATACTGCCCTTTCCACCAGTGCGAACAGCACCGGCCATCTTCATGAGC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:11:11#0/2\n+CGATATACGTCCTCTGGGCAGATACCGATACGATATCCACAGATATGGCGGTGACGTTATAATATTTCTTCGGGATATTTATTGCACGTTTTTTATTTGCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:12:12#0/2\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:13:13#0/2\n+CCCGCCACACCGCTGCCTCACACCTTGTGTTGCCTCGCACAAACCGCACCCTTTTGTAGCAGCGCTCTTATCCACAAATCCACACCAAGGGCTGAGAATTG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:14:14#0/2\n+AAATCCAAAGGCAGCATTATCGGCTACCCATTTGAAATGATTTCACAAGATGGTTATGCCAAGGCTGGCCCAAATTACAGAACAAGATATGTTAAAAGCAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:15:15#0/2\n+TGATTAAGAGTTTAAGACATCTACAGAACTGGCTTTCCCTTTGCATAAATAGCCCATGCCACTTGTTTGAGTCTTAATAGGACTCTTGAGACTATATGTTA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:16:16#0/2\n+AAATTTCCAATGCATATCATCTCAGAAAAGGATGTTTTTCCATGAAGAATGTCATGTCAGTACTAGATGGTTTGATGGCATAAAAGTATGTCCAGTTTCAA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:17:17#0/2\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n++\n+A'..b'AAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121510:121510#0/2\n+AATGCGTAGATAATGGCCATAGTCAATCCTTGGACACCAATTTTCCACAAATTTCCCAGGATTGCAAAGGAATATCGGATTATCTTCAAGAGAACGTGGAC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121511:121511#0/2\n+TGAACGCCGCCGGCCCACTTAGCGGCACATTATCGGATTCGGGTTCCGATTGCGGTTGAGATCCGCCCTAACTTATCGGCTCCGGGTTAAGATTTCTCAGA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121512:121512#0/2\n+ATCTATATGGTTCACTTTGAAGTCCCAACAAAGCATAGGTTGTTCAGTCAACTTTCTTCTAGGTGTTCACGTGGTAAGTCTCATCTACTTTTAGATGCTAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121513:121513#0/2\n+CTGAATTGGGTAAGAACTAACCCACTGCTGTGGTATCATTGCGGACTTGGATCCTTCTGAAAAAACTTGGTTCATATCTACAGTTAAACCTTGAGGGTGGA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121514:121514#0/2\n+ATTTTTTAGCTGGAACACTTTATGAAAACATAGGCTATAACCTATTCCTGTTGGAGTGCAAGATTAGTCAACAAAATAATGAGAAAAATGTTCAACATCCA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121515:121515#0/2\n+AGCAACTTCGATTAATATTTCCAATTCACTTTCTCAAAAGTCCACCATTGATATTGTATGAGGAAGACAGCAAAAGAGCATGAAGATTTAAAGAAATTAGT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121516:121516#0/2\n+TCCTTTCCTCCGACGAAGAAGATGGCCCTGATGACTATATAGAGGAGGGTCATCCAGCTTTAGCAGATGCTGATCAACAAAGAAGGTGAATTTGATTCATG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121517:121517#0/2\n+GAATCGATTCTATGCATTCTCCTCGAGTCTACTCACTAATATTTTTTTTCTGAAGATTGATGTTCACCATAATCGGATTTTAAGCATCTTTTTTGCGTCGG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121518:121518#0/2\n+CAAACTTATGAATGATGCTTCGCTACAAAAAAATTTCCAATTGTATGCTTCACCACAACTTAAGAATGAGCATTTCAATATGTTGAGAGTGTGCAAGAGGC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121519:121519#0/2\n+TTGATTGTTAAAATTTCTAAGAGCATCATATTAAATAATGACTGGTGCGGGAGAGGATCTTTCAGCTTCGCATGCCACTTTAAGGTTAGTATTCAGTGATT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121520:121520#0/2\n+CATTGAAGTAAAGTTTATTCTTCTTTCTACCCATTGAACTTTATATTTAAGGATGATTCCTTTCTTTTCTTAACTAGATCTGGAAAAGATTTTTGCACTAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121521:121521#0/2\n+CTATCGGCTTGTAAAGATCCGAACCGAACACGCTGTTTATTCTCTCGCAGTTAGCGCGGATATCCTGGAGGAGCAAGGTGCTGCGCTCGTCTTCGGGGTCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121522:121522#0/2\n+TTATACTTTAAAAAAATTATATTAAAATTCTTATATTTATAAAAATAAAATATTTAATTATATTTCTCCTCACGTCATCAATTTTACTTATAATAATGATA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121523:121523#0/2\n+TCTATGAATAATAGGTATCCAGAAAACTATAAAGCTTAATGATTCCAAGAATTGAAAAGACAAGGAAAAAGTCAATAATCATACCATAACTGAGTGCCTGC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121524:121524#0/2\n+AGTACTCATACTATGGATTTTTTTCAGCTCATACAAATTTTTAGGGCGTGTGAACATGGCTGCTTGTTCTGCATGCTCGTCATCATTGGCTACTTGTTGTG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121525:121525#0/2\n+AATGAGTCGATGACGAAGCAGCCATCGCCTTCTAAACCAAAGCTGATGGTAGATGTTGTTCTTGGTCTCGTCTCTCTCACTCACCGATCTTTCTTTTGGTT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/reads_mate2_rf.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/reads_mate2_rf.fq Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,486612 @@\n+@HWUSI-EAS139:1:2000:1:1#0/2\n+GTGTTTATAACAGTCGTTATGTTTGTGCGTAATCCTATCGCATTAAATAGAACGTAACGGGAATGGTTACGACGACCGCTACCGAGTCATTGTATACCGCT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:2:2#0/2\n+GCTCTTCGTGGACGAAGGCGGCCACGGTTCTGATGTTGCTCGCGGATTCCGATGCAAGAACAGCGAGTTGTTGGTGAGCCAAAGCAGAGTCACCGGAGAAA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:3:3#0/2\n+TGCATCTCTGCTTCAACTGGTTAGCATACACAGCGCAGGTCTCGTACTCGCCGCAGTAGGCAGCCGCCATCAAGTTGTTGTAATCGGAACGAGCATCGTCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:4:4#0/2\n+CATTACAACTCTTCCTCAACCTTACAATTGTATAGTCAATTGATCTTGTTATTCTCTCTCCTTGTCTATGTGTATAATAATCAGAGAGCAGTTAGATTATG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:5:5#0/2\n+CAGCCTGCGAATTCCAATACCACTCAAGAATTAGACCCCGTCTATCGACTCAAACTGCACCCCGCAGACTTACCCGTGCGGGTAGACATCGCCGTAGATGG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:6:6#0/2\n+AGACTTCCTTCAAAATGAACAAAGCAATTCATGGTAATATAGAACATGATGTCCTCATTACTTGCATCAAATGTGAAGAGAGATCACCATTGCTTCAACCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:7:7#0/2\n+GGTGATAGTTGGTCATACTTCTCCGAGCAGTAGTATAATGATCGATCTTATGATACGGAGCAGTAGATCAGTGGCGAAAGTAGAAATTCAAACATTCTCCA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:8:8#0/2\n+GTTATAACCTAAATCATTCATAAAAATCTGACTGAATTCTATATTCTAAACTTTTATTTATTTATCTATCTATCTAATTTTATTGATAACTATTATGTCTA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:9:9#0/2\n+TCTAATCCACACCCGGTCTCGTCCACCTCGTTCGTCGGATTGTCATGACCGATCACAAATCCTTGGAGTATGTTAATATTTATCAATTTCCCGATTCCCTT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:10:10#0/2\n+AACAAGTTAGCTAATTTCCTCTGACATTTGGAAACTTAAACTTGCGTACATCTTTTTCCCTACAAGCCTTTTTTAGTTGCAATTCTTCTTGTAATTTTTAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:11:11#0/2\n+AGGTCGATAACAATTGGATTTCGATCGTTCCACCCGGTACATACCTTGTATTGAGCGATACGGGGCCCCTAATATATAATCGCCCGGTACAGTGTGGTTCG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:12:12#0/2\n+AGACTTACTATGCTATGAGTTTCCTCAATGGAGACCTGACCAATAATACAACATTTAGGACACCTCACTCAATCTACTAAGTAGTCAATGATACTTCCAGC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:13:13#0/2\n+GAACATATGAGGCCTTTTTTTCATTTGACCACTAAGGACTTGATGACATTTCCATCCAAATACAATGTCTTGCATCTAGTCTAACGAGATCTAGTTAGAAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:14:14#0/2\n+AGGATTTTTGACTTCCCTCATGTTCTTCAGCAACAAAAGAAGTTTTGGGCCCTTCGGATACTGGGACAATGACAGAGAAGCTGACTTCCAATGGAAGGTGA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:15:15#0/2\n+ACTGGAGTAGGTCGCCAAAGAGAAATTCAACCGGTATTTTTTGATGTTTTGGAGGCAAGGGTTGTGTTTACATCCTTTGGGCTTTTCATATGTTTCAGGAA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:16:16#0/2\n+CATTTGCAGGTATAAAAACCCACCACCTGCCCAATGCTAATGGGACTTTTCACATATTAAACTCACTTATATTTCACCTAAAAGGGGTCGAGTCAGAAAAC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:17:17#0/2\n+TTGGGTCGCTTCAAATTCTCAAAACTCAAAACCATACTTCTGGATTCATATGTATATTCACTTGAGATCATCCTATCAAATTGGGTAACACAGCCACCAAC\n++\n+A'..b'AAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121638:121638#0/2\n+GGATCCACTCGCACGCGATGCGCCCGCGCCCCTCCAAGCCGCACAACCCCACCTCGAGCCGGCCACCATATCGTCTAACTCGACAGACAGCTTCGTTAAAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121639:121639#0/2\n+GGGACGGTATCAGCGTAGCTGCGGTGAGGGAAGAAGAAGCGTCGAGGGAGTAGGCGCTCGGAGAAGAGGGAGAATCGGGAGAACCATGACGCTTCCCGATC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121640:121640#0/2\n+CAACGCCGCGTTAGCATACGCCAACTTAGCACATAGGGCACGTTTAGGCCACGTGCATCGACAGTCCAGTGCCATGCGACATAAATAGAGTTACCTAACGC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121641:121641#0/2\n+CTTCAACGCCTTCGTCGAATGCCGCAGATGAGGAGAAGACCGTCTTCTCCTCATCTGCGGCCTTCTCCTCATCTGCGGCCTTTTCTTCTTTTTTTGTAATA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121642:121642#0/2\n+GCAGCGGGCGCTGCGGGCCCCTGGCTGCGTGTGTTCCAAGCCATGGCACGCTCCTCCACACCACCTTGCCGTTGCTACCGGAAACCACCTTGTGGCCGGCC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121643:121643#0/2\n+AAGATTGTTAAGCTTTGTTAACTTTTAGATTCCCTTTGGTACATGAGTCAGCGGAGTATCGAAAGCATCGAGACACCTCAGACTGTGCAGCCTCGTGACAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121644:121644#0/2\n+GTACGAGTGTGTGTGTATATATATATATATATATATATATTCTGGTGGATATTGGATCGGTCAATAAAAGATCCGAAACCGACCCGGTCCACGATACCGAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121645:121645#0/2\n+TGTGATGAATGATCAAAAGTATGCTATGCACTGAACAAGATTTTTTTTTTTTGTCTTCGTTCCAATTTCCGGCATCTTTGAATTGATTCAACCTGCCTTGT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121646:121646#0/2\n+CTTGTCCCGTGGGAGAGCAGGGAGCAAATACAGGTCACGGTCGGTGCTCTGTACGAGCATTTCTGCAATTGCTGCTGCAAAGCTGAAAATAAAATAAATTA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121647:121647#0/2\n+TCCTATTTTAGATGGGTCCCAAGGTCGAACTTTGGGTGCGGGAACTCGTTGGAACGAACCCGACGATGAAACCGTCACTGACGCGAATGTTTATTTTGACC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121648:121648#0/2\n+CCTTTTGGTTCTCTTCCTTCCACTGTGTCATACGCCAAAACAGTTGGGTGAGGAGTAAAGACCGAATGGTGGTGAAGCCGAGTCATGGCTTAGTCAATGAA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121649:121649#0/2\n+TGTTAGCCATCAACTATAGATGCTGTTTGTTCTGGTGATACATACCAAAATAAAATTTCAATGAGAGTACTAACCACTGAATATTTCTGGAGCAATTCTGA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121650:121650#0/2\n+CAGGTTTCTCTGGGGGAGCAGCAGTCCGAGAGGATCGACGTGGCGTTGTTCGTCGAGTGATGTTGGTCTGCTCCACAATCCCCTCCTCCGTCAACCTGTAA\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121651:121651#0/2\n+CCGAGCTGAGACTGTTACATCATGCAAGAGTCCGAAAGTCACTTCTGTCACCAGCCATGCAAGGAAATGCCCATGCACTAGTTCCCTGTTCCAATCGCTAT\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121652:121652#0/2\n+TCAAATGGCTTATTGTTTTTGCACTGAAGAAAGGTCAAATTGTCTCCCAATAGTCCATAGAAAGCACTGTATGCTGCCAAAGTCTGAACTTGAATTTGAAG\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+@HWUSI-EAS139:1:2000:121653:121653#0/2\n+ATATGGAAACATCCAGGCTAAACTCACCGCGAATCTGAGGAGGAGGCAGGACTTCCCGACGGAGGAGTCTCCGATCAAGAGAAGCTTGAAGAGGTAATCAC\n++\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/scaffold_to_split.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/scaffold_to_split.fasta Mon Nov 14 08:31:23 2016 -0500 |
| b |
| b'@@ -0,0 +1,82638 @@\n+>scaffold1\n+ATGCGGCCGCCCCGCTCCACTCGCTTCCTCTTATCTCTGCATCGTCAATTTATTAGTATT\n+CCTCGACCACTAATGTACTAATGCTTGTACAATTCTTGTTACCTTTTCTTCTGCTTGTTG\n+AATTCTTTTTTCATTTTTTCTCTCAATTTAATGAACATAAGAATGCTAACCTTTTTATTT\n+TTGAATTTAAAGTTTCTCAAAAAATTTTAATTGGAAGAGCTTCACTTGGTTTTCCTTTTA\n+TCGCAAAAACCAAGCAATCATTTTTGTACTGATTCAGGTGAAAGTGCAGTCATCTCAAAA\n+CTTGAATGTGGCAGATTCTTTGTCCAAGGATTCAGTAGCTACTATTTCAGATGAAGTGAA\n+ACAAGTGACTGATGGTAGTGATACCCAAGATGATGAGAACATGAGTAGTCCAGGTGTTGA\n+GTCAAAACATGAAGATGCTGTTGCCAAAAAGCAGTTATTTGTTTCTCAACAATCTAAAGG\n+AACAGCAGAGCAAAAGGTTGCTTCTTTTAGTGAAGATGTAAGAGAGATAAATACAGAAGC\n+AGAAGATGGTTGGCAACCTGTTCAAAGACCAAGGTCCATCGGAGGTTCAAGCCAGCAAAT\n+CAAGCATCAACGTACAAGCACCTGGAAAACCTACAACTATCAGATGAATGATGTTCCTAG\n+TGAAACTGTTCAATCCAAGCCACAGTTTTCTTATTTAAATAATGGGTATTATTTGCTTAA\n+GAAAAAGATAGTTATTCCTGGAAGCTTTAATGACAATCTTAACATGCAAGTTCAGTCACC\n+AGACACCAGATCTGGTCAGAAGGCGTATAAGGCTGTAACTTATCGGGTGAAGTCAGTGCC\n+TTCATCCACCAATCCTGAGATCAGTCATAACTCTTGGAGTGCTGTCGAAAGAACGACTTC\n+CCCATTAGATGCTCATGCACCCTATTATCGCCATGATAGCCAGGTATTAGAGAATCAGAA\n+GAACCTAATAGGTGGTGTCTCTGAGCCTCGCAACAATTTGGTTCATAGTTTTAGTAATTC\n+TCCATCATATAAAGATGTAGCACTGGCACCTCCAGGAACAATTGCCAAGATACATAGCCG\n+AAAGTTTCAGGAGAATATGCCATTGGAACAAGAGTTGTCTATTGGTGGCAATGCATCTGA\n+AATAAAGGAATCATTTTTGGCCGAGGAACATACAGAAAATGCTGCAGAGCTATCTGAGAT\n+ATCTAACATAACCCAAGATAAGGACACTGTCCAGGATGCGTTTTTAGACTCAGATAAAAA\n+AAGTTGAAGTTGATCATGAAGAAGAAAGAAAGGAAGATTGTGAAACAGAACAATTACTAG\n+AACCATCGTCTTCTGATTTGGAAGTGGCATCTTGTAGTAGCATGCTCACCAAGAATATCA\n+TTGACAATTGTGTATCTAGCAATGAGGTTCAAGGAGTTGAGCAAAATGAGAATCATGATC\n+AGAATTTGTCAACAAATACATCTGATAGGAAAAAATCTGAGTGTCCCATCACTGCAGAAA\n+GCAAAGAAGATAATCATGACGAAGCCTCATGTACCAATGTTGGTATCAGTTCTTACTCTA\n+GTCTCCATCAATTCAATTTCAAGAAGGTTCTCATCCCTGAGAAAACAGGTGGTGATTATC\n+CCACGATGGAACTACCACCTTCTAATTATGATGGGAGAGAGGTATCTAGCAAGAAGCTGT\n+CTGCATCTGCTGCACCGTTTAGCCCTTTCCCTGCTACAGTACTTGGTCCTGTTCCTGTAA\n+CTGTTGGTCTTCCTCCTAATGGTACAATTTCTGCAGTTACACCATGGCCATTGAGTGCCA\n+GTCTGCATGCTTCACCCACGGCTGTAATGCCAATGGTGCCTCCTATTTGTACCTCACCGC\n+ATCATCCTAATCCTTCTTCACCTAGGCCTTCGCACATTCTACGTCCCTTGCCATTTATAT\n+ATCCACCATATACCCAACCTCAAGTCATTCCAAATACCACTTTTGCTATGAACGGTAATG\n+GAAACCATTATCCATGGCAGTGCAACATTGGTGCAAACGTTCCTGACTTTGCGCCAGGAT\n+CAGTATGGTCTGGTTCTCATCCTGTGGACTTCTCATCTTTGCCACCCATCATTAGTCCAA\n+CTTCTGAATCCGTGTTGGAACCAATTATAACATCTCATTTAAGAACTGATGTGAGTCTAG\n+ACCTTCCATCGGATAACAATACTGAAGAAGAGAATAATGAGATATCCCAGATTATAGACA\n+TTTGTAAACCGCTGGATGGTAACTGGTTAGAGAAACGGGAATCTGAAGAATCTCACAGAA\n+ATAATACAAAAATTACCGACTTGGAATCTGAGACGGTTTTCAGACAAGATGCACAGCATA\n+GTGGTGGAAGGCATGTCTTTAAGAGTAGCAAAAAGTATGAGGGTGAGGGAAGCTTTAGCA\n+TATACATCAAGGGTAGAAATCGCCGTAAACAGACACTAAAGCTGGTTATAAGTTTGCTCA\n+ATAGGCCATATGGATCACAGTCATTTAAAGTTATATATAGCAGAGTAGTAAGAGGAAGTG\n+ATGTTATAAGTGCAACAGATATATCTTCCAGTGAAAATGTTACTTCCGACTAGTCACAAG\n+CAAAGAACACTACAAACTTCAAAATGGGCAGGAAAGAATTTTTTTTATTGTTCAAGCCAC\n+ATCCAGGAGGACGTGACTGAAAAATTAGCAATGTTGATAATTTAGTTATTGCTATTTTTC\n+AGGCAACTGATATTTTTTATTAAGAAGCCAAGATTGAACGTTCAGGAAGGTAAGCGACAA\n+ATCTCCTCATGGTCCAAGCCAGGTATTTCTTTGTTAATTAGCTTGACAATGTTACATAAC\n+TTGCAGATTTATTGCTCATCAATGCACCTCCATATACTTTATTGTCAATAACATCTCTTT\n+TGCTTTAATTTTGTGTTGGTTTCTCAAGCATACAATTAATTTATCCTGTTTAACATAAGT\n+TATTAGGAAGAAACTTTGTTTGTGAACTGATCATGATTCACCTTTGAAATCCCCAAATTT\n+TTTAATCTCGAGTATCTTCAGTATTGATATCTCACACCATCAAAAAGCTCATAGTATACC\n+ATCATGCATGTAATTTATAAGCCTGACTCGATATTTTAGTTCCGGTCTAGCCATTCCAAT\n+TTCCAAGCAACCGGTCAAGAGAGACAGACTGAGCGGATATTTTCAGGCTTCCGAAGATGT\n+TTTGATATGCATGAAACTAGTATCAACATCTTTTTGAGGCGCAGATTTTTGCTTCTATTG\n+TAGCTTTCTCTAATTTGCAATCTTGATAATCAGATTGCTGGAGTTAAGTTGTTTTTTCTA\n+ATTACCTCTCATCTGTTGCAAATTCCAGGGTATGGCTTACCATCAAATCCAACCTAATTC\n+CTTTTAGTTAAGTCTTCCACTTGCTATGTTTACTGCTGTTGATCTAATCATCTATCACAC\n+GTCTTGAGTCTCTGGGCTCATTGGATTTCCAGTCTACCAGGGATTATCAGTTCATTTGTT\n+GAAGCAGTACAACATTTAATAATGATGTTAAATCCGATTAGAAAGCTGGGTTGTGCATCC\n+TACCTCTTATGATTTTGTTTTTATGACATTATCTACAACCATTTTCTTGCATTCTCAAAT\n+TAGAGCAGATTTATCCGAAAATTAGACTTGTAATATAGCTGCCACACTACCTTTTTGGCA\n+TTCCAGTACAGACTAGGTAGATTGCTTGAATTGTAATGTATGCCTCTGTTATTCAGTAGT\n+GATGCTCAAGCAAGATTTATGTGTGGTCGAGTTTTCAACTTGCACAACATATATAATTTT\n+TTTTTGTATGTTGTTTGTCATAGCTTTCTAGATGTATGTATATCATTTGCCATCTTTTCA'..b'ATTGCAGTGGGAAGGATGATATTTCTAAAGAGATTCAAACAG\n+ACATGCTTGTAAACTTACATTTTGCCAGTTCTTGATCATCTTTGGAGGCATCTGATCCTG\n+TTCTTGGCGCATCTCCAGGTGTCCATGAGCATTGGTTTGAGATGGTGGGTGAGAGGCCCT\n+TGATCTGAGTAAATGCTCGAAGATCGCCATGACGAGGAACATGGATACCAAGATTGCTGT\n+TGCAATGAAGCCAAAGGAGAGAGCATTCATGGACGAGCTCAAGTCCTTGAAAGCAGCCTC\n+CTTGTCATTTGTTCCTGATGGAGTTGTGTTGTCACTCCACATCTTTGGTCGATCTCCAAA\n+GCCATTCATGGTGATCAGCTGCTGCTGCTGCTACTCTGCAACCAGTAAGGCTCTCCTTTT\n+CGATGTGCTCCGGTTGGTTGAAGGATTAGCTCAATAAGGCAGGGGAAACTAAGATGATAG\n+CCATCATAACTTCGGCACCACCTTTGAATCCTTTTGCCACCACAAGCAGCTGTCTTTGTC\n+CAAGCACTCTCAGAATGAAGCAGAACACAAGTCTAAGAACATGTAGAAGATGATGATCAG\n+AAGAAAACTTACCACACCATCAGAAACATCACAGTAAATAAAAAAAGTTCTGAGATCATC\n+AATTCATTTTGTCAAATTAACATCCAATTTGGATTTGCTTTCAACTTATTATACACAGTA\n+ATCAAGAGCAGTAGTAATAAACTGTGTAGCAAATTAACTAGCAACAAATTTCTTTCCAAC\n+CAAAATCTTTTCCTGGTAAGAAGAGATTAAGATAATCAATCAGCAACCTAAATAAAATAT\n+ATTTTTTTCTTTTGAAGGAAGCTAAGCAAAATGTCATAGCAGCACAACAGACCCAAGAAA\n+TAAGCATTAATTAGTCATGCAAATATAAGTTTCCTTGAGGGAGAGTTGGGAGTGATGTTT\n+CCCAAGTACTAAGTTGTAAAAGCTAAGCATATGTATTCTGCTACACATCCACAAAAACTA\n+TGTCAAGATCTCCATAGTCTCAAGAGCTACATATTAATCTGGATCCATACTCCCAAGAAA\n+ATAGCATGGAAATCAACAAAAAGAATTGCAGGTTGGTCTCCATGATAAAAGGAAAACTCT\n+CTCCTCACTATGATTGAGTTCATTTAGCACAAATGTCTCATACTAAAACTAACAATTCCT\n+TGAAAGTTGACAGAAAGGAAATATGATTTGGGAGGTGAAGAACAGGCTCAGCTTCAATAG\n+CATAGAAAAGAAAAGAGGGAAAATGGAAGAAATAGCCAACCTTTAGCTTAAGATCACAGC\n+TGATCTTAAACTGCAGCTTCCTGAGAAGACTTTTGTCCTGTAATCTGGTGCTAATGTGGT\n+CTTTGCCTTGTTGTAGCTTAACTACCAAAGAAGGAATAGTTGACGGTGCCCAGGCTGCTT\n+GATCTCACTGCCTATGAACTCACAAGCTGCATTACTGAAGGCTCAAAAGAATGGCAACCC\n+CTAGGATTCTTCTGAAAGCATAGCTCCGTCCCAGTTTCAAGTTTGCCGAGTTCTAATAAT\n+CACCAGAAAGAAAGGATTGGATTGGTCACTGTCACAAGCATAAGAGCATTGCTGTCAGTC\n+TGAAAAGGATAAATACATAAATAAAGATATGATGATGGTCTTCTACTGCATCTTCCCTCC\n+ACTTGTGAAAGAGAAAAAGCTGCCAGACCAAGGTGTGATCTTTATTTTAACAGATGAAAG\n+GCTGCAAGAATGGTGGAAGACAATGTCTAAACCACATTTGAATGCTGGAAACACATGATC\n+AGATGCCACAAAGAAGCAACTGAGATGATACCAGAGAGAATAAGAGGAAGAGTATTATCC\n+CAAGACAGGAAATCTTACAGACATACATCGACAGAAGTACAATGTTTCAGGAAAGAAACT\n+AAAATGAGAGAAATGAGTCTCGGCATAGCAGGTGCATCCAAGGAAAATGATCGAATTCCT\n+CACCTTTGATGATGAAGATAACATGTTAATCCTCCCTAGGCTTTTGCCTGTGACACACAA\n+GGCAGCAGCAGCAGATTTATCCAGCACATGCAGAGACAAAGAAAGGCAAAAGGATTGTAA\n+AGATTCAATTAGTACTCTAACCAATTGGTGTGCACACTTTGCTTCAAAGAAAAAAGAATA\n+TGATGCAGAAGCTGCACTCATTAAAAGAAGAGTTGACAAGGCGGCAAACAAGAAACATGG\n+CTTTTTAGGTGTTTCAACTTGATAATGAAGATGTCCTGGAAGAAACCAAAGCCTACAAAG\n+ACCTACAAAGTGATGGATTAAGAGGGACAAAATAATTGCATGATCTTTCTGAATTGTATA\n+TGGATTAACAGAGAGACAAAATAATGACATCTTGGTTCTAAACATAACCAGCTGATTCAG\n+TATGTGAACCACACACAATATGGTACTAAGTTTCATGCATTAATTGACATTTATTTCAAT\n+AAATCATCATTATTCTGTTTTAGACTTGTCTGAAGACCAAGAAAGGTTTTGGTTTCATGA\n+TCCTGAGGTGATTGTAAAGATCAAGCCACTAACTGTGCACAACCATCTCTCAGAATCATG\n+CCTATTTCTGCTGAAACCAAAGACTTGAATCACTTGATAGTAACTTAACAATTTATAATT\n+TCATTATTTCATGAAGATATATCAAAAACTTTTTCTTCTTTGTCATCATGAAAAAGGACA\n+GTCATCATGAAAGGACTGGCCATGCAAAAGAAGGTCCCCCACATGGTTGGGATACAGAGA\n+CACAAGGTGGGAGCCGGAATGGTAAGGATAACTTCATGCATGAATCAAGGCCCCAATTCA\n+AGCATTGATTTTCTCTCAAGGCACTTGAAAAAGAATTAAAAGAAAAAAAAAAAACAGCTC\n+CAAGGCATTAAAAAAGTTGATAAAAATTTTGATTGTATTAAAAATTAAACTTAAGCGCAT\n+GATGATACAAGTCAACTATATATGGAAGAATATTACACAAATATTAATTAAATATTAGAT\n+GAAACAAAAAATATGTAGATATATATTTCGGACAGACGATATGCTATAATACCATGAGAG\n+ATTTTTTTTTTTCTAATGGATTACCACTTATCCCAAAAGTTTAAGCAATTAGAAGAAGTT\n+TCAATATTAGATTCATAAATAAATTAATAGTAGAATTGGTGACAAGATAAGAATATATCG\n+AGTGCGCAACGAGTTACTATAGAGAGAAGTCAGATAAGTAGTAGTAGAAATCAACATAAT\n+ATCAAAGCAGACTTACTATGCTATGAGTTTCCTCAATGGAGACCTGACCAATAATACAAC\n+ATTTAGGACACCTCACTCAATCTACTAAGTAGTCAATGATACTTCCAGCTAGGGATAGGA\n+AACAATCTAGAGATAGCAATTAAATTAAGTGCTGCTGCTGCAACTTGGGGGGCAGACCAA\n+AAGCATGATGAAACAACTTAATGGTGTTTAAAGGGTTCTCAAATCCTCACATCACCAAAT\n+GATTGTACAATAAATCCAAAACAAATCTGTTTTTTTCACTAATGTCACAAAAACTCAACT\n+CCATAATTCCCCTTATTAAGTAGAACTCAGAAAAAAACACTCCATAATGTATGTAATTAC\n+CATGCGTAAATGCAATCGACAACCGAAAAGATATTAACCTAATGCATTTGCTTGTCAAAG\n+AGATCTTAACTTATCTGCTCAAAACAAAAAGGAAAAGAATTCTTAGCTTGCTTTTGTTTT\n+TCTCTTTTCATGCCAAGTTCTTTTTGGTTTGTCTCATATTTAGGGAAGTCTCACTTTATT\n+GCTTTTCATTCACAGTCAAACTGAAGTCTGACCTTTCTTTTCTTGTGTGTGAAAAAGTTA\n+TTATATATATATAAAAAAAGTATACAAAACTTGATACCTATCTCTGTG\n' |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/scaffold_to_split.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/scaffold_to_split.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,2 @@ +scaffold_2 1000000 +scaffold_3 996559 996578 |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/statistic.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/statistic.txt Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,13 @@ +Chromosome : chr01 : + Mean coverage : 5.182446471 + Median coverage : 5.0 + Confidence interval (90%) : [2.0,10.0] +Chromosome : chr02 : + Mean coverage : 4.71688554442 + Median coverage : 5.0 + Confidence interval (90%) : [1.0,9.0] +*****Average***** + Mean coverage : 4.95715832207 + Median coverage : 5.0 +Insert size as been re-estimated to :5199.0 +Standard deviation of insert size as been re-estimated to :500.302399378 |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/sv_detected.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/sv_detected.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,4 @@ +inversion region: chr02 9991 100013 +reciprocal_translocation region1: chr01 2201114 2300010 region2_inv: chr02 1601985 1746995 +deletion region: chr01 700014 800017 +duplication region_inv: chr01 249988 353015 target: chr02 700006 700051 |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/to_contig.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/to_contig.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,4 @@ +scaffold3 1 996558 FWD scaffold1 999999 999999 FWD REV contig +scaffold4 1 992957 FWD scaffold2 1 1 FWD FWD contig +scaffold4 1 992957 FWD scaffold3 1 1 FWD REV contig +scaffold5 1 914761 FWD scaffold2 999377 999377 FWD FWD contig |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/test-data/to_merge.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/test-data/to_merge.txt Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,3 @@ +scaffold6 1 20001 FWD scaffold2 299998 319379 FWD FWD prob_fusion +scaffold7 1 19999 FWD scaffold3 480000 496560 FWD REV fusion +scaffold8 1 13848 FWD scaffold4 499999 511143 FWD FWD prob_fusion |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/tool-data/scaffremodler.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/tool-data/scaffremodler.loc.sample Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,2 @@ +sample_sv Sample for SV detection /bank/musa_acuminata/sample/Ref_for_SV_detection.fasta +sample_improve Sample for Genome Improvment /bank/musa_acuminata/sample/scaffolds_to_correct.fasta \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/tool_data_table_conf.xml.sample Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of scaffremodler file --> + <table name="scaffremodler" comment_char="#"> + <columns>value, dbkey, path</columns> + <file path="tool-data/scaffremodler.loc" /> + </table> +</tables> \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/tool_dependencies.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bwa" version="0.7.7"> + <repository changeset_revision="def70e393020" name="package_bwa_0_7_7" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> + <package name="bowtie2" version="2.2.6"> + <repository changeset_revision="0d9cd7487cc9" name="package_bowtie_2_2_6" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="samtools" version="1.2"> + <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="picard" version="1.136"> + <repository changeset_revision="3e9c24e5325b" name="package_picard_1_136" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/verif_fusion.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/verif_fusion.py Mon Nov 14 08:31:23 2016 -0500 |
| [ |
| @@ -0,0 +1,179 @@ + +# +# Copyright 2014 CIRAD +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput, ConfigParser, operator, time, multiprocessing + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def run_job (cmd_line, ERROR): + print cmd_line + try: + tmp = (tempfile.NamedTemporaryFile().name)+'.error' + # print tmp + error = open(tmp, 'w') + proc = subprocess.Popen( args=cmd_line, shell=True, stderr=error) + returncode = proc.wait() + error.close() + error = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += error.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + error.close() + os.remove(tmp) + if returncode != 0: + raise Exception, stderr + except Exception, e: + stop_err( ERROR + str( e ) ) + +def create_junction(LIST, OUT): + file = open(LIST) + OUTFILE = open(OUT, 'w') + liste_nom = [] + for line in file: + data = line.split() + if data: + scaff = data[0] + del data[0] + while len(data) > 0 : + OUTFILE.write('\t'.join([scaff,str(int(data[1])-1), str(int(data[1])-1), data[0]])+'\n') + OUTFILE.write('\t'.join([scaff,str(int(data[2])-1), str(int(data[2])-1), data[0]])+'\n') + del data[0] + del data[0] + del data[0] + OUTFILE.close() + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser(usage="python %prog [options]\n\nProgram designed by Guillaume MARTIN : guillaume.martin@cirad.fr\n\n" + "This script verifyies fusions and contigs performed using fusion_scaff.py or contig_scaff.py ") + # Wrapper options. + parser.add_option( '', '--config', dest='config', default='not_filled', help='The conf file generated by conf4circos.py') + parser.add_option( '', '--list', dest='list', help='The out_verif file generated by fusion_scaff.py') + parser.add_option( '', '--bound', dest='bound', default=10000, help='Boudary around junction to draw, [default: %default]') + parser.add_option( '', '--thread', dest='thread', default='1', help='Thread number used for circos drawing (integer), [default: %default]') + parser.add_option( '', '--out_tar', dest='out_tar', default='verif.tar', help='The archive output file containing all circos generated') + (options, args) = parser.parse_args() + + pathname = os.path.dirname(sys.argv[0]) + + loca_programs = ConfigParser.RawConfigParser() + loca_programs.read(pathname+'/loca_programs.conf') + + proc = int(options.thread) + + if options.config == 'not_filled': + sys.exit('--config argument is missing') + if options.list == 'not_filled': + sys.exit('--list argument is missing') + + config = ConfigParser.RawConfigParser() + config.read(options.config) + + dic_chr = {} + file = open(config.get('General','chr')) + for line in file: + data = line.split() + if data: + dic_chr[data[0]] = int(data[1]) + file.close() + + + tmp_name = tempfile.NamedTemporaryFile().name + tmp_conf = tmp_name+'.conf' + tmp_txt = tmp_name+'.txt' + + config.set('General','out_N', tmp_txt) + + with open(tmp_conf, 'wb') as configfile: + config.write(configfile) + + print tmp_conf + print tmp_txt + + #recording scaffold junction + create_junction(options.list,tmp_txt) + + #drawing circos for each junction + file = open(options.list) + liste_nom = [] + liste_process = [] + for line in file: + data = line.split() + if data: + scaff = data[0] + del data[0] + del data[0] + del data[0] + while len(data) > 1 : + if ((int(data[0]) - int(options.bound)) <= 0) and ((int(data[2]) + int(options.bound)) >= dic_chr[scaff]): + mot = scaff+':0:'+str(dic_chr[scaff]) + elif ((int(data[0]) - int(options.bound)) <= 0): + mot = scaff+':0:'+str(int(data[2]) + int(options.bound)) + elif((int(data[2]) + int(options.bound)) >= dic_chr[scaff]): + mot = scaff+':'+str(int(data[0]) - int(options.bound))+':'+str(dic_chr[scaff]) + else: + mot = scaff+':'+str(int(data[0]) - int(options.bound))+':'+str(int(data[2]) + int(options.bound)) + nom = scaff+'-'+data[0]+'-'+data[2]+'.png' + liste_nom.append(nom) + del data[0] + del data[0] + del data[0] + redraw = '%s %s/draw_circos.py --config %s --frf n --ff n --rr n --ins n --delet n --chr_rr n --chr_rf n --chr_fr n --chr_ff n --draw %s --out %s --labels n' % (loca_programs.get('Programs','python'), pathname, tmp_conf, mot, nom) + # run_job(redraw, 'Bug when drawing circos') + t = multiprocessing.Process(target=run_job, args=(redraw, 'Bug when drawing circos',)) + liste_process.append(t) + if len(liste_process) == proc: + # Starts threads + for process in liste_process: + process.start() + # This blocks the calling thread until the thread whose join() method is called is terminated. + for process in liste_process: + process.join() + #the processes are done + liste_process = [] + if liste_process: + # Starts threads + for process in liste_process: + process.start() + # This blocks the calling thread until the thread whose join() method is called is terminated. + for process in liste_process: + process.join() + #the processes are done + liste_process = [] + + mot = liste_nom[0] + for n in liste_nom[1:]: + mot = mot +' '+ n + archivage = 'tar -cf '+options.out_tar+' '+mot + run_job(archivage, 'Bug in archive creation') + for n in liste_nom: + os.remove(n) + +if __name__ == "__main__": __main__() \ No newline at end of file |
| b |
| diff -r 000000000000 -r 66885fa414c8 scaffremodler/verif_fusion.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scaffremodler/verif_fusion.xml Mon Nov 14 08:31:23 2016 -0500 |
| b |
| @@ -0,0 +1,32 @@ +<tool id="verif_fusion" name="verif_fusion" version="0.1"> + <description> : Generate circos to verifies sequence fusions and junctions</description> + <command> + source $__tool_directory__/include_scaffremodler.sh ; + python $__tool_directory__/verif_fusion.py + --config $config + --list $list + --bound $bound + --out_tar $circos_tar + + </command> + <inputs> + <param name="config" type="data" label="The circos_conf file generated by conf4circos" /> + <param name="list" type="data" label="A list of fusion or junction performed (contig.txt or fusion.txt file)" /> + <param name="bound" type="integer" value="10000" label="Boundary around junction to draw" /> + <param name="job_name" type="text" size="25" label="output name" value="SampleDataset"/> + <param name="OUT" type="text" label="Identifier for output" /> + </inputs> + <outputs> + <data format="tar" name="circos_tar" label="${tool.name} : Circos to verifies sequence fusions and junctions (tar file)" /> + </outputs> + <help> + + .. class:: infomark + +**Program encapsulated in Galaxy by South Green** + + </help> + <citations> + <citation type="doi">10.1186/s12864-016-2579-4</citation> + </citations> +</tool> \ No newline at end of file |