Previous changeset 12:bc082a79d655 (2021-02-10) Next changeset 14:1263189f6204 (2021-10-23) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79" |
modified:
macros.xml test-data/dedup_out1.bam test-data/dedup_out2.bam test-data/dedup_out3.bam test-data/dedup_out4.bam test-data/dedup_out5.bam test-data/dedup_out6.bam test-data/group_out4.bam test-data/group_out4.tab test-data/out_wl_paired.log umi-tools_group.xml |
added:
test-data/chr19_gene_tags.sam test-data/group_in2.sam |
b |
diff -r bc082a79d655 -r cf25b50eff0a macros.xml --- a/macros.xml Wed Feb 10 19:31:44 2021 +0000 +++ b/macros.xml Mon Sep 13 14:50:56 2021 +0000 |
[ |
b'@@ -1,5 +1,43 @@\n <?xml version="1.0"?>\n <macros>\n+\n+ <!-- macros applying to all umi_tools -->\n+\n+ <token name="@TOOL_VERSION@">1.1.2</token>\n+ <token name="@VERSION_SUFFIX@">0</token>\n+ <token name="@PROFILE@">21.01</token>\n+ <xml name="requirements">\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">umi_tools</requirement>\n+ <yield />\n+ </requirements>\n+ </xml>\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.1101/gr.209601.116</citation>\n+ <citation type="bibtex">\n+ @misc{githubUMI-tools,\n+ title = {UMI-tools},\n+ publisher = {GitHub},\n+ journal = {GitHub repository},\n+ url = {https://github.com/CGATOxford/UMI-tools},\n+ }\n+ </citation>\n+ </citations>\n+ </xml>\n+ <xml name="advanced_options_macro">\n+ <section name="advanced" title="Extra parameters" expanded="false">\n+ <param argument="--random-seed" type="integer" min="0" optional="true" label="Random Seed" />\n+ </section>\n+ </xml>\n+ <token name="@ADVANCED_OPTIONS@"><![CDATA[\n+ #if str($advanced.random_seed) != \'\'\n+ --random-seed=\'$advanced.random_seed\'\n+ #end if\n+ ]]></token>\n+ \n+ <!-- macros for extract and whitelist-->\n+ \n <macro name="barcode_sanitizer" >\n <sanitizer invalid_char="">\n <valid initial="string.letters,string.digits">\n@@ -23,90 +61,510 @@\n </valid>\n </sanitizer>\n </macro>\n- <macro name="barcode2_conditional" >\n- <conditional name="barcode">\n- <param name="barcode_select" argument="--split-barcode" type="select" label="Barcode on both reads?">\n- <option value="first_read_only">Barcode on first read only</option>\n- <option value="both_reads">Barcode on both reads</option>\n- </param>\n- <when value="first_read_only"/>\n- <when value="both_reads">\n- <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"\n- help="Use this option to specify the format of the UMI/barcode for\n- the second read pair if required." >\n- <expand macro="barcode_sanitizer" />\n- </param>\n- </when>\n- </conditional>\n+ <xml name="sanitize_tag" >\n+ <sanitizer invalid_char="">\n+ <valid initial="string.letters,string.digits" />\n+ </sanitizer>\n+ </xml>\n+ <macro name="barcode1_macro" >\n+ <param argument="--bc-pattern" type="text" label="Barcode pattern for first read"\n+ help="Use this option to specify the format of the UMI/barcode. Use Ns to\n+ represent the random positions and Xs to indicate the bc positions.\n+ Bases with Ns will be extracted and added to the read name. Remaining\n+ bases, marked with an X will be reattached to the read">\n+ <validator type="empty_field" /> \n+ <expand macro="barcode_sanitizer" />\n+ </param>\n </macro>\n+ <macro name="barcode2_macro" >\n+ <param argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"\n+ help="Use this option to specify the format of the UMI/barcode for\n+ the second read pair if required" >\n+ <expand macro="barcode_sanitizer" />\n+ </param>\n+ </macro>\n+ <!-- not just fastq because this would allow also fastqcsanger -->\n+ <token name="@FASTQ_FORMATS@">fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz,fastqsolexa,fastqsolexa.gz</token>\n+ <xml name="bio_tools">\n+ <xrefs>\n+ <xref type="bio.tools">umi-tools</xref>\n+ </xrefs>\n+ </xml>\n <xml name="input_types">\n- <conditional name="in'..b'tag specified so you do not need to supply the mapping file">\n+ <expand macro="sanitize_tag" />\n+ </param>\n+ <param argument="--assigned-status-tag" type="text" optional="true" label="Bam tag describing whether read is assigned to a gene" help="By default, this is set as the same tag as --gene-tag">\n+ <expand macro="sanitize_tag" />\n+ </param>\n+ <param argument="--skip-tags-regex" name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" >\n+ <expand macro="barcode_sanitizer" />\n+ </param>\n+ <param argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" />\n+ <param argument="--gene-transcript-map" type="data" format="tabular" optional="true" label="Tabular file mapping genes to transripts" />\n+ <param argument="--per-cell" name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" label="Group reads only if they have the same cell barcode" />\n+ </section>\n+ </xml>\n+ <token name="@SC_OPTIONS@"><![CDATA[\n+ #if str($sc.gene_tag) != "":\n+ --gene-tag \'$sc.gene_tag\'\n+ #end if\n+ #if str($sc.assigned_status_tag) != "":\n+ --assigned-status-tag \'$sc.assigned_status_tag\'\n+ #end if\n+ #if str($sc.skip_tags_regex) != "":\n+ --skip-tags-regex \'$sc.skip_tags_regex\'\n+ #end if\n+ $sc.per_contig\n+ #if $sc.gene_transcript_map:\n+ --gene-transcript-map \'$sc.gene_transcript_map\'\n+ #end if\n+ $sc.per_cell\n+ ]]></token>\n+\n+ <xml name="groupdedup_options_macro">\n+ <section name="gd" title="group/dedup specific options">\n+ <param argument="--buffer-whole-contig" type="boolean" truevalue="--buffer-whole-contig" falsevalue="" label="Read whole contig before outputting bundles" help="Guarantees that no reads are missed, but increases memory usage" />\n+ <!-- TODO this option is hidden on the CLI. Should we expose it? -->\n+ <param argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" />\n+ <param argument="--multimapping-detection-method" type="select" optional="true" label="BAM Tag indicating multimapping " help="Some aligners identify multimapping using bam tags. Setting this option to NH, X0 or XT will use these tags when selecting the best read amongst reads with the same position and umi">\n+ <option value="NH">NH</option>\n+ <option value="X0">X0</option>\n+ <option value="XT">XT</option>\n+ </param>\n+ </section>\n+ </xml>\n+ <token name="@GROUPDEDUP_OPTIONS@"><![CDATA[\n+ $gd.buffer_whole_contig\n+ $gd.whole_contig\n+ $gd.multimapping_detection_method\n+ ]]></token>\n+ \n+ <xml name="log_input_macro">\n+ <param argument="--log" type="boolean" label="Output log?" truevalue="--log" falsevalue="" help="Choose if you want to generate a text file containing logging information" />\n+ </xml>\n+ <xml name="log_output_macro">\n+ <data name="out_log" format="txt" label="${tool.name} on ${on_string}: logfile" >\n+ <filter>log</filter>\n+ </data>\n+ </xml>\n+ <token name="@LOG@"><![CDATA[\n+ #if $log:\n+ --log=\'$out_log\'\n+ #end if\n+ --log2stderr\n+ ]]></token>\n+\n </macros>\n' |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/chr19_gene_tags.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/chr19_gene_tags.sam Mon Sep 13 14:50:56 2021 +0000 |
b |
b'@@ -0,0 +1,1492 @@\n+@HD\tVN:1.4\tSO:queryname\n+@SQ\tSN:chr1\tLN:248956422\n+@SQ\tSN:chr2\tLN:242193529\n+@SQ\tSN:chr3\tLN:198295559\n+@SQ\tSN:chr4\tLN:190214555\n+@SQ\tSN:chr5\tLN:181538259\n+@SQ\tSN:chr6\tLN:170805979\n+@SQ\tSN:chr7\tLN:159345973\n+@SQ\tSN:chr8\tLN:145138636\n+@SQ\tSN:chr9\tLN:138394717\n+@SQ\tSN:chr10\tLN:133797422\n+@SQ\tSN:chr11\tLN:135086622\n+@SQ\tSN:chr12\tLN:133275309\n+@SQ\tSN:chr13\tLN:114364328\n+@SQ\tSN:chr14\tLN:107043718\n+@SQ\tSN:chr15\tLN:101991189\n+@SQ\tSN:chr16\tLN:90338345\n+@SQ\tSN:chr17\tLN:83257441\n+@SQ\tSN:chr18\tLN:80373285\n+@SQ\tSN:chr19\tLN:58617616\n+@SQ\tSN:chr20\tLN:64444167\n+@SQ\tSN:chr21\tLN:46709983\n+@SQ\tSN:chr22\tLN:50818468\n+@SQ\tSN:chrX\tLN:156040895\n+@SQ\tSN:chrY\tLN:57227415\n+@SQ\tSN:chrM\tLN:16569\n+@SQ\tSN:GL000008.2\tLN:209709\n+@SQ\tSN:GL000009.2\tLN:201709\n+@SQ\tSN:GL000194.1\tLN:191469\n+@SQ\tSN:GL000195.1\tLN:182896\n+@SQ\tSN:GL000205.2\tLN:185591\n+@SQ\tSN:GL000208.1\tLN:92689\n+@SQ\tSN:GL000213.1\tLN:164239\n+@SQ\tSN:GL000214.1\tLN:137718\n+@SQ\tSN:GL000216.2\tLN:176608\n+@SQ\tSN:GL000218.1\tLN:161147\n+@SQ\tSN:GL000219.1\tLN:179198\n+@SQ\tSN:GL000220.1\tLN:161802\n+@SQ\tSN:GL000221.1\tLN:155397\n+@SQ\tSN:GL000224.1\tLN:179693\n+@SQ\tSN:GL000225.1\tLN:211173\n+@SQ\tSN:GL000226.1\tLN:15008\n+@SQ\tSN:KI270302.1\tLN:2274\n+@SQ\tSN:KI270303.1\tLN:1942\n+@SQ\tSN:KI270304.1\tLN:2165\n+@SQ\tSN:KI270305.1\tLN:1472\n+@SQ\tSN:KI270310.1\tLN:1201\n+@SQ\tSN:KI270311.1\tLN:12399\n+@SQ\tSN:KI270312.1\tLN:998\n+@SQ\tSN:KI270315.1\tLN:2276\n+@SQ\tSN:KI270316.1\tLN:1444\n+@SQ\tSN:KI270317.1\tLN:37690\n+@SQ\tSN:KI270320.1\tLN:4416\n+@SQ\tSN:KI270322.1\tLN:21476\n+@SQ\tSN:KI270329.1\tLN:1040\n+@SQ\tSN:KI270330.1\tLN:1652\n+@SQ\tSN:KI270333.1\tLN:2699\n+@SQ\tSN:KI270334.1\tLN:1368\n+@SQ\tSN:KI270335.1\tLN:1048\n+@SQ\tSN:KI270336.1\tLN:1026\n+@SQ\tSN:KI270337.1\tLN:1121\n+@SQ\tSN:KI270338.1\tLN:1428\n+@SQ\tSN:KI270340.1\tLN:1428\n+@SQ\tSN:KI270362.1\tLN:3530\n+@SQ\tSN:KI270363.1\tLN:1803\n+@SQ\tSN:KI270364.1\tLN:2855\n+@SQ\tSN:KI270366.1\tLN:8320\n+@SQ\tSN:KI270371.1\tLN:2805\n+@SQ\tSN:KI270372.1\tLN:1650\n+@SQ\tSN:KI270373.1\tLN:1451\n+@SQ\tSN:KI270374.1\tLN:2656\n+@SQ\tSN:KI270375.1\tLN:2378\n+@SQ\tSN:KI270376.1\tLN:1136\n+@SQ\tSN:KI270378.1\tLN:1048\n+@SQ\tSN:KI270379.1\tLN:1045\n+@SQ\tSN:KI270381.1\tLN:1930\n+@SQ\tSN:KI270382.1\tLN:4215\n+@SQ\tSN:KI270383.1\tLN:1750\n+@SQ\tSN:KI270384.1\tLN:1658\n+@SQ\tSN:KI270385.1\tLN:990\n+@SQ\tSN:KI270386.1\tLN:1788\n+@SQ\tSN:KI270387.1\tLN:1537\n+@SQ\tSN:KI270388.1\tLN:1216\n+@SQ\tSN:KI270389.1\tLN:1298\n+@SQ\tSN:KI270390.1\tLN:2387\n+@SQ\tSN:KI270391.1\tLN:1484\n+@SQ\tSN:KI270392.1\tLN:971\n+@SQ\tSN:KI270393.1\tLN:1308\n+@SQ\tSN:KI270394.1\tLN:970\n+@SQ\tSN:KI270395.1\tLN:1143\n+@SQ\tSN:KI270396.1\tLN:1880\n+@SQ\tSN:KI270411.1\tLN:2646\n+@SQ\tSN:KI270412.1\tLN:1179\n+@SQ\tSN:KI270414.1\tLN:2489\n+@SQ\tSN:KI270417.1\tLN:2043\n+@SQ\tSN:KI270418.1\tLN:2145\n+@SQ\tSN:KI270419.1\tLN:1029\n+@SQ\tSN:KI270420.1\tLN:2321\n+@SQ\tSN:KI270422.1\tLN:1445\n+@SQ\tSN:KI270423.1\tLN:981\n+@SQ\tSN:KI270424.1\tLN:2140\n+@SQ\tSN:KI270425.1\tLN:1884\n+@SQ\tSN:KI270429.1\tLN:1361\n+@SQ\tSN:KI270435.1\tLN:92983\n+@SQ\tSN:KI270438.1\tLN:112505\n+@SQ\tSN:KI270442.1\tLN:392061\n+@SQ\tSN:KI270448.1\tLN:7992\n+@SQ\tSN:KI270465.1\tLN:1774\n+@SQ\tSN:KI270466.1\tLN:1233\n+@SQ\tSN:KI270467.1\tLN:3920\n+@SQ\tSN:KI270468.1\tLN:4055\n+@SQ\tSN:KI270507.1\tLN:5353\n+@SQ\tSN:KI270508.1\tLN:1951\n+@SQ\tSN:KI270509.1\tLN:2318\n+@SQ\tSN:KI270510.1\tLN:2415\n+@SQ\tSN:KI270511.1\tLN:8127\n+@SQ\tSN:KI270512.1\tLN:22689\n+@SQ\tSN:KI270515.1\tLN:6361\n+@SQ\tSN:KI270516.1\tLN:1300\n+@SQ\tSN:KI270517.1\tLN:3253\n+@SQ\tSN:KI270518.1\tLN:2186\n+@SQ\tSN:KI270519.1\tLN:138126\n+@SQ\tSN:KI270521.1\tLN:7642\n+@SQ\tSN:KI270522.1\tLN:5674\n+@SQ\tSN:KI270528.1\tLN:2983\n+@SQ\tSN:KI270529.1\tLN:1899\n+@SQ\tSN:KI270530.1\tLN:2168\n+@SQ\tSN:KI270538.1\tLN:91309\n+@SQ\tSN:KI270539.1\tLN:993\n+@SQ\tSN:KI270544.1\tLN:1202\n+@SQ\tSN:KI270548.1\tLN:1599\n+@SQ\tSN:KI270579.1\tLN:31033\n+@SQ\tSN:KI270580.1\tLN:1553\n+@SQ\tSN:KI270581.1\tLN:7046\n+@SQ\tSN:KI270582.1\tLN:6504\n+@SQ\tSN:KI270583.1\tLN:1400\n+@SQ\tSN:KI270584.1\tLN:4513\n+@SQ\tSN:KI270587.1\tLN:2969\n+@SQ\tSN:KI270588.1\tLN:6158\n+@SQ\tSN:KI270589.1\tLN:44474\n+@SQ\tSN:KI270590.1\tLN:4685\n+@SQ\tSN:KI270591.1\tLN:5796\n+@SQ\tSN:KI270593.1\tLN:3041\n+@SQ\tSN:KI270706.1\tLN:175055\n+@SQ\tSN:KI270707.1\tLN:32032\n+@SQ\tSN:KI270708.1\tLN:127682\n+@SQ\tSN:KI270709.1\tLN'..b'44:H5FCJBGXY:4:23606:6087:6093:CELL_ACAAGG:UMI_GCACAA:SAMPLE_CGATGT:UID_CGATGTACAAGGGCACAA\t16\tchr19\t647844\t255\t60M\t*\t0\t0\tTAATTTAAAATTATAAAAATCTTTCCACCGCTGAACGTTTAGAGGGTGAGGTTAGACAGA\t/<A<E<AAEE/AE6EEEEEAE/AEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEAAAAA\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000070423.17\n+NS500668:144:H5FCJBGXY:4:23606:19406:18847:CELL_ACAAGG:UMI_ACGTAT:SAMPLE_CGATGT:UID_CGATGTACAAGGACGTAT\t0\tchr19\t807553\t255\t60M\t*\t0\t0\tCCTTCCCCTTTCCCTATTTTTTTTCTTGCCCTGATCCGGAATTTCTTTGCCAACTGACTG\tAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEAEE/EEEEEEEEEEEEEEEEEEEEEA<AA/\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23606:22946:6130:CELL_ACAAGG:UMI_GCTAGG:SAMPLE_CGATGT:UID_CGATGTACAAGGGCTAGG\t0\tchr19\t812039\t255\t60M\t*\t0\t0\tTGTTGTGAGACCCGAGGGGCGGCGGCGCGGTTTTTTATGGTGACACAAATGTATATTTTG\tAAAAA//EEEEE<<E///EAEEEEEEEEEA/<E//EEE/E//EEEEEEEA/EEEEE/<A/\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23607:1822:13398:CELL_ACAAGG:UMI_GCCGAA:SAMPLE_CGATGT:UID_CGATGTACAAGGGCCGAA\t16\tchr19\t938931\t255\t13S47M\t*\t0\t0\tGCCAAAAAATGGTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTTGTTTTGTCACCCAGGC\t/<//EA/A////<AEEEEEEAEEEEEEEEEEEEEEEEEEE/EEEEEEEEE/EEEEAAAAA\tNH:i:1\tHI:i:1\tAS:i:46\tnM:i:0\tXF:Z:Unassigned_NoFeatures\n+NS500668:144:H5FCJBGXY:4:23607:10971:2167:CELL_ACAAGG:UMI_GCTAGG:SAMPLE_CGATGT:UID_CGATGTACAAGGGCTAGG\t0\tchr19\t812062\t255\t58M2S\t*\t0\t0\tGGCGCGCTTTTTTAAGGAGACACAAATATATATTTTGCTAACAGCAATTCCAAGCTCACA\t//A/A6/EEEEEEE/EE/E6EEEEEA//E/E///A/E<///</<6/E//A<E///<////\tNH:i:1\tHI:i:1\tAS:i:47\tnM:i:5\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23608:13283:1095:CELL_ACAAGG:UMI_AGTTTA:SAMPLE_CGATGT:UID_CGATGTACAAGGAGTTTA\t0\tchr19\t812062\t255\t60M\t*\t0\t0\tGGCGCCGTTTTTTATGGTGACACAAATGTATATTTTGCTAACAGCAATTCCATGCTCAGT\t//A/A6/E//E</AA//E/<E/E6EE///E/</A//E/AAA///EEAA/E/<////EA/<\tNH:i:1\tHI:i:1\tAS:i:55\tnM:i:2\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23609:5585:17562:CELL_ACAAGG:UMI_GCTAGG:SAMPLE_CGATGT:UID_CGATGTACAAGGGCTAGG\t0\tchr19\t812089\t255\t60M\t*\t0\t0\tGTATATTTTGCTAACAGCAATTCCAGGCTCAGTATTGTGACCGCGGAACCACAGGGGACC\tAAA/AA////E/EEE/EEEE//A/E//EEE6E/EE/</6/EA<//E//E/EEEA////A<\tNH:i:1\tHI:i:1\tAS:i:57\tnM:i:1\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23611:19058:11533:CELL_TTCACG:UMI_GCCTTA:SAMPLE_CGATGT:UID_CGATGTTTCACGGCCTTA\t0\tchr19\t811797\t255\t60M\t*\t0\t0\tCCTGCAGTCGCCTAGAAAACTTGCTCTCAAACTTCAGGGTTTTTTCTTCCTTCAAATTTT\tAAAAAEEEEAEEAEEEE/AEEEEE/EE/EA/EEAAEEEEEEEAEEEEEE<<AEEEEEEEE\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23611:23053:18525:CELL_ACAAGG:UMI_CCAGCA:SAMPLE_CGATGT:UID_CGATGTACAAGGCCAGCA\t0\tchr19\t812062\t255\t60M\t*\t0\t0\tGGCGCGGTTTTTTATTTTTACAAAAATTTATATTTTGCTAACAGCAATTCCAGGCTCAGT\t/AA/A6E/E/EEE/E//E6////AAEE6E/EEEEEAE////AEAEEEAEE/E/AEEE<EE\tNH:i:1\tHI:i:1\tAS:i:49\tnM:i:5\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23611:23963:1840:CELL_TTCACG:UMI_GCTTTT:SAMPLE_CGATGT:UID_CGATGTTTCACGGCTTTT\t16\tchr19\t647540\t255\t50M10S\t*\t0\t0\tAGTGTATTTTAAATAGCTTTCAAGATACACATATTTTTTCCTTTAAAAAAAAAGTCTGGT\t/EE<E/EEE//EAEEA<EEE//EEEEEEAEAEEEEEE/E6EEEEEEEEEEAEEEEAAAAA\tNH:i:1\tHI:i:1\tAS:i:49\tnM:i:0\tXF:Z:ENSG00000070423.17\n+NS500668:144:H5FCJBGXY:4:23612:8565:12159:CELL_ACAAGG:UMI_ACATAG:SAMPLE_CGATGT:UID_CGATGTACAAGGACATAG\t0\tchr19\t812062\t255\t2S58M\t*\t0\t0\tAGGGCGCGGTTTTTTATGGTGACACAAATGTATATTTTGCTAACAGCAATTCCAGGCTCA\tA///A/AE/E/E//E/EEE/EEAEEEEEAEEAE/AAA/66EEEEEA<EE/<AEAE/E/AE\tNH:i:1\tHI:i:1\tAS:i:57\tnM:i:0\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23612:14819:6245:CELL_TTCACG:UMI_CTCATT:SAMPLE_CGATGT:UID_CGATGTTTCACGCTCATT\t0\tchr19\t994331\t255\t60M\t*\t0\t0\tGGCCGGCCAAGTGAGGCCCGGAGACCCCGGCCCGAGGCGCCCAGGCCTGAGCCCCATGCC\tAAAAAEEEEEEEE/EEEEEEE/E/EEEAE<EEAE<</6EEEE/</<EE/AAAEEA<A/<A\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000065268.10\n+NS500668:144:H5FCJBGXY:4:23612:25822:12295:CELL_TTCACG:UMI_GATTGT:SAMPLE_CGATGT:UID_CGATGTTTCACGGATTGT\t0\tchr19\t812059\t255\t60M\t*\t0\t0\tGGCGGCGCGGTTTTTTATGGTGACACAAATGTATATTTTGCTAACAGCAATTCCAGGCTC\tAAAAAEEEEEEEEEEEEEEEEEAEEEAAAEAEAE/EE//AEA/EEEEEEEEAE6A<<AA<\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000011304.18\n' |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/dedup_out1.bam |
b |
Binary file test-data/dedup_out1.bam has changed |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/dedup_out2.bam |
b |
Binary file test-data/dedup_out2.bam has changed |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/dedup_out3.bam |
b |
Binary file test-data/dedup_out3.bam has changed |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/dedup_out4.bam |
b |
Binary file test-data/dedup_out4.bam has changed |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/dedup_out5.bam |
b |
Binary file test-data/dedup_out5.bam has changed |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/dedup_out6.bam |
b |
Binary file test-data/dedup_out6.bam has changed |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/group_in2.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/group_in2.sam Mon Sep 13 14:50:56 2021 +0000 |
b |
b'@@ -0,0 +1,403 @@\n+@HD\tVN:1.0\tSO:coordinate\n+@SQ\tSN:chrM\tLN:17009\n+@PG\tID:hisat2\tPN:hisat2\tVN:2.0.5\tCL:"/var/galaxy/tool_dependencies/_conda/envs/mulled-v1-2bb67013a57cac1e35f407d06d1f347baae35159f498496f1e36f84784069212/bin/hisat2-align-s --wrapper basic-0 -p 4 -x genome -1 input_f.fastq -2 input_r.fastq"\n+chrM_111_723_3:2:0_4:1:0_c3/1_CT\t163\tchrM\t114\t60\t1S197M\t=\t524\t611\tCGACTCAGTCAAATATGTGGTTGCTGGGCTTATTCTCTATGCGGGTTCTCCACACGCACAGACAGTCAGGGTGCTATTCAGTCAATGGTCACAGGACATATACTTAAATTCCTATTGTTCCACAGGACACGGGATGCGCGCACCCAGGTTTGCGTGCACACGTGTACACGTACACACGTAGACACGTACACACGTACA\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-16\tZS:i:-28\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:15A1T26G100C33C17\tYS:i:-15\tYT:Z:CP\tNH:i:1\n+chrM_513_1000_4:1:0_4:2:0_60/1_GT\t163\tchrM\t516\t60\t1S197M\t=\t801\t486\tCACACACGTGTACACGTACACACGTACACACGTACACACGTACACACTTATCCACGCGAACGCTTTAATTTAAGTAAATAACTCGCTTAATCAAACCCCCCTTACCCCCCGTTAACCTTATTTCTAATAATACGTGCCTATTTATGTCTTGCCCAACCCCACAAACAAGACTAGACCGTACCTAAATATAAGGCCTAA\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-16\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:46G3A31A39A29A44\tYS:i:-18\tYT:Z:CP\tNH:i:1\n+chrM_518_1106_2:1:0_5:2:0_44/1_CA\t163\tchrM\t520\t60\t198M\t=\t907\t587\tACGTGTACACGTACACACTTACACACGTACACACGTACACACTTATACACGCGAACGCTTTAATTTAAGTAAATAACTAGCTTAATCAAACCCCCCTTACCCCCCGTAAACCTTATTTATAATAATACGTGCCTATTTATGTCTTGCCAAACCCCACAAACAAGACTAGACCGTACCTAAATATAAGGCCTAAGAAAA\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-9\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:18G23G64T90\tYS:i:-21\tYT:Z:CP\tNH:i:1\n+chrM_111_723_3:2:0_4:1:0_c3/2_CT\t83\tchrM\t524\t60\t200M\t=\t114\t-611\tGTACACGTACACACGTACACACGTACACACGTACACACTTATACACCCGAACGCTTTATTTTAAGTAAATAACTAGCTTAATCAAACCCCCCTTACCCCCCGTTAACCTTATTTATAATAATACGTGCCTATTTATGTCTTGCCAAACCCCACAAACAAGACTAGACCGTACCTAAATATTAGGCCTAAGATAACGCTTA\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-15\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:38G7G11A121A10A8\tYS:i:-16\tYT:Z:CP\tNH:i:1\n+chrM_578_1113_4:1:0_4:2:0_19/1_TT\t163\tchrM\t580\t60\t198M\t=\t914\t534\tTAATTTAAGTAAATAACTAGCTTAATCAAACCCCCCTTACCCCCCGTTAACCTTATTTATAATAATCCGTGCCTATTTATGTCTTGCCAAACCCCACAAACAAGACTAGACCGTACCTAAATATAAGGCCTACGAAAACGCTTATAAGCTTACCAATCCCCTATTATTACTAGCTACTACGCCTAAATCATAACTCTG\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-15\tZS:i:-24\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:66A65A46A0T0A16\tYS:i:-18\tYT:Z:CP\tNH:i:1\n+chrM_637_1116_4:2:0_2:5:0_8f/2_AT\t99\tchrM\t637\t60\t200M\t=\t917\t478\tTATCATAATACGTGCCTATTTATGTCATGCCAAACCCCACAAACAAGACTAGACCGTACCTAAATATAAGGCCTAAGACAACGCTTATAAGCTTACCAATCCCCTATTATTACTAGCTACTAAGCCTAAATCATAACTCTGTTCGCAGTTATCTATAGATATACCGACCTGACTCTAATTCGACCCTATCGAACAACATT\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-18\tZS:i:-19\tXN:i:0\tXM:i:6\tXO:i:0\tXG:i:0\tNM:i:6\tMD:Z:3A22T51A44T0A57T17\tYS:i:-21\tYT:Z:CP\tNH:i:1\n+chrM_513_1000_4:1:0_4:2:0_60/2_GT\t83\tchrM\t801\t60\t200M\t=\t516\t-486\tCGACCTGACTCTAATTCGTCCCTATCGAACAACATTTTACATGTCTACGTTAGCACCACATCCCAGTTAATGTAGCGTAAACCTATAAAGCAAGGCACTGAAAATGCCTAGATGAGTAGCCAGACTCCATAAACACAAAAGTTTGGTCCTGGCCTTTCCATTAGTTATTAATAAGATTACACATGCAAGCCTCCGCAGCC\t22222222222222222222222222222222222222222222222222222222222222222222'..b'TACATAAGACATACTATGTATATCGTGCATTAATTGCTAGTCCCCATAAATATTAAGCATGTACAGTAGTTTATATATATTACATAAGACATAATAGTGCTTAATCGGGCATTCACCTTAATTCTAGGACAGTCTTCTATGGAACTCAACTATTCCAAAGAT\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-16\tZS:i:-36\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:18A6T57G59T35C17\tYS:i:-15\tYT:Z:CP\tNH:i:1\n+chrM_16373_16837_7:2:0_4:1:0_9/2_GT\t83\tchrM\t16638\t60\t200M\t=\t16375\t-463\tATTAAGCATGTACAGTAGTTTATATATATTACATAACACATACTATGTATTTCGTGCATTAATTGCTAGTCCCAATAAATATTAAGCATGTACAGTTGTTTATATATATTACATAAGACATAATAGTGCTTAATCGTGCATTCACCTTAATTCTAGGACAGTCTTCTATGGACCTCAACTATTCCAAAGAGCTTAATCAC\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-15\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:36G13A22C2G19A103\tYS:i:-27\tYT:Z:CP\tNH:i:1\n+chrM_16363_16864_3:2:1_3:2:0_ba/2_TA\t83\tchrM\t16665\t60\t200M\t=\t16365\t-500\tATTACATAAGACATACTATGTATATCGTACATTAATTGCTAGTCCCCATAAATATTAAGCATGTACAGTAGTTTATATATATTAAATAAGACATAATAGTGCTTTATAGTGCATTCACCTTAATTCTAGGACAGTCTTCTATGGACCTCAACTATTCCAAAGAGCTTAATCACCTGGCCTCGAGAAACCAGCAATCCTTG\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-15\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:28G20G34C19A2C92\tYS:i:-23\tYT:Z:CP\tNH:i:1\n+chrM_16484_16992_6:0:0_3:0:0_2c/1_AG\t147\tchrM\t16793\t60\t198M\t=\t16484\t-507\tGGACAGTCTTCTATGGACCTCAACGATTCCAAAGAGCTTAATCACCTGGCCTCGAGAAACCAGCAATCCTTGCTCGAACGTGTACCTCTTCTCGCTCCGGGCCCATTTCAACGTGGGGGTTTCTATAACGGAACTATAACTGGCATATGGTTCTTACTTCAGGGCCATAAAATCCTTGAAACCAATCCTTCAGTTCTC\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-9\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:24T113C7C51\tYS:i:-18\tYT:Z:CP\tNH:i:1\n+chrM_12782_13288_5:1:0_6:0:0_3/2_TA\t77\t*\t0\t0\t*\t*\t0\t0\tCAACTATAATATTTATCTCCTCAGGAGAACAAGCAATTATCTCAAACTGACACTGACTATCAATCCAAACTCTCAAGCTATCACTAAGCTTTAAAATAGATTAATTCTCAACCATCATTATCCGTGTAGCGCTTTTCGTCACATGGTCCATCATAGAATTCTCAATGTGGTACATGCACTCCGACCCATACATCAACCGA\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tYT:Z:UP\n+chrM_12782_13288_5:1:0_6:0:0_3/1_TA\t141\t*\t0\t0\t*\t*\t0\t0\tCATCGAAGCCCTAGTAATGGAATATTTAGGTTCTCGTGTTGGGTGATAAAGATTTGTTGGAAGTCCCATGCGTTTGAGTTGGTTAGAAATCCTGCTATGGCTATGATTAAGCCTACGTCTCCAATTCGGTTGTAGAGGATTGCTTGTAGGGCGGCAGTGTTTGCATCTGCTCGGCCATATCATCATCCGATAAGTCGA\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tYT:Z:UP\n+chrM_15422_15911_4:2:0_5:1:0_99/2_GA\t77\t*\t0\t0\t*\t*\t0\t0\tATGGGATACGTCCTGCCATGAGGCCAAATGTCCTTCTGAGGAGCAACGGTAATCCCTAACCTGCTGTCAGCAATTCCATACCTCGGGACTGAACTAGTAGAATGAATCTGAGGGGGGTTCTCAGTAGACAAAGCCACCCTAACACGATTCTTTGGCTTCCACTTCATTCTTTCATTCATTATCTCAGCCTTAGCAGGAGT\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tYT:Z:UP\n+chrM_15422_15911_4:2:0_5:1:0_99/1_GA\t141\t*\t0\t0\t*\t*\t0\t0\tGCTGCCCCTAGTTTGTTAGGGATGGATCGGAGAATTGCGTATGCGAATAGGAAGTATCATTCAGGTTAAATATGGGGAGGTGAATTTAAAGGGTTGGCTGGGATGTAGTTGTGTGGGTCTCCTAGCAGGTCTGGTGAAAATAGGACGAGTAGTATGAGTGTTAAAACTAGTACTAGAAGACCTAGGATTTCTTTGATT\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tYT:Z:UP\n' |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/group_out4.bam |
b |
Binary file test-data/group_out4.bam has changed |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/group_out4.tab --- a/test-data/group_out4.tab Wed Feb 10 19:31:44 2021 +0000 +++ b/test-data/group_out4.tab Mon Sep 13 14:50:56 2021 +0000 |
b |
b'@@ -0,0 +1,498 @@\n+read_id\tcontig\tposition\tgene\tumi\tumi_count\tfinal_umi\tfinal_umi_count\tunique_id\n+chrM_81_583_3:0:0_2:0:0_f4/1_TT\tchrM\t80\tNA\tTT\t1\tTT\t1\t0\n+chrM_110_732_3:0:0_2:0:0_160/1_CC\tchrM\t109\tNA\tCC\t1\tCC\t1\t1\n+chrM_118_613_6:0:0_4:0:0_169/1_AG\tchrM\t117\tNA\tAG\t1\tAG\t1\t2\n+chrM_149_684_2:0:0_2:0:0_6e/1_TA\tchrM\t148\tNA\tTA\t1\tTA\t1\t3\n+chrM_152_616_6:0:0_2:0:0_84/1_GC\tchrM\t151\tNA\tGC\t1\tGC\t1\t4\n+chrM_247_748_8:0:0_1:0:0_1b7/1_GC\tchrM\t246\tNA\tGC\t1\tGC\t1\t5\n+chrM_280_772_1:0:0_9:0:0_31/1_CG\tchrM\t279\tNA\tCG\t1\tCG\t1\t6\n+chrM_292_819_2:0:0_1:0:0_18d/1_CA\tchrM\t291\tNA\tCA\t1\tCA\t1\t7\n+chrM_390_890_3:0:0_6:0:0_bf/1_CA\tchrM\t389\tNA\tCA\t1\tCA\t1\t8\n+chrM_447_921_2:0:0_3:0:0_1cc/1_AC\tchrM\t446\tNA\tAC\t1\tAC\t1\t9\n+chrM_469_983_2:0:0_1:0:0_121/1_AC\tchrM\t468\tNA\tAC\t1\tAC\t1\t10\n+chrM_541_1074_3:0:0_8:1:0_22/1_AC\tchrM\t540\tNA\tAC\t1\tAC\t1\t11\n+chrM_8_556_3:0:0_2:0:0_1b1/1_AC\tchrM\t556\tNA\tAC\t1\tAC\t1\t12\n+chrM_112_577_2:0:0_4:0:0_17b/1_CG\tchrM\t577\tNA\tCG\t1\tCG\t1\t13\n+chrM_627_1063_6:0:0_4:1:0_12a/1_AA\tchrM\t626\tNA\tAA\t1\tAA\t1\t14\n+chrM_164_650_5:0:0_3:0:0_164/1_AC\tchrM\t650\tNA\tAC\t1\tAC\t1\t15\n+chrM_200_695_6:0:0_2:0:0_5a/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_168_695_5:0:0_2:0:0_1af/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_170_695_1:0:0_6:0:0_1e3/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_209_705_3:0:0_2:0:0_1b3/1_TA\tchrM\t705\tNA\tTA\t1\tTA\t1\t17\n+chrM_288_807_3:0:0_5:0:0_8e/1_AG\tchrM\t807\tNA\tAG\t1\tAG\t1\t18\n+chrM_818_1274_1:1:0_4:0:0_124/1_TC\tchrM\t817\tNA\tTC\t1\tTC\t1\t19\n+chrM_442_872_4:0:0_6:0:0_146/1_AT\tchrM\t872\tNA\tAT\t1\tAT\t1\t20\n+chrM_460_919_3:0:0_1:0:0_0/1_GA\tchrM\t919\tNA\tGA\t1\tGA\t1\t21\n+chrM_384_950_5:0:0_2:0:0_34/1_GG\tchrM\t950\tNA\tGG\t1\tGG\t1\t22\n+chrM_476_980_5:0:0_2:0:0_133/1_TA\tchrM\t980\tNA\tTA\t1\tTA\t1\t23\n+chrM_552_999_4:0:0_5:0:0_bb/1_AT\tchrM\t999\tNA\tAT\t1\tAT\t1\t24\n+chrM_512_1030_5:0:0_7:1:0_a5/1_AC\tchrM\t1030\tNA\tAC\t1\tAC\t1\t25\n+chrM_1103_1634_5:0:0_3:0:0_36/1_TA\tchrM\t1102\tNA\tTA\t1\tTA\t1\t26\n+chrM_648_1148_3:0:0_3:1:0_1d1/1_TT\tchrM\t1148\tNA\tTT\t1\tTT\t1\t27\n+chrM_1167_1797_8:0:0_2:1:0_14e/1_GT\tchrM\t1166\tNA\tGT\t1\tGT\t1\t28\n+chrM_619_1169_3:0:0_5:1:0_33/1_CC\tchrM\t1169\tNA\tCC\t1\tCC\t1\t29\n+chrM_668_1248_0:0:0_4:0:0_fa/1_TT\tchrM\t1248\tNA\tTT\t1\tTT\t1\t30\n+chrM_1288_1759_4:0:0_2:1:0_a1/1_TA\tchrM\t1287\tNA\tTA\t1\tTA\t1\t31\n+chrM_1327_1786_4:0:0_3:1:0_1ad/1_CA\tchrM\t1326\tNA\tCA\t1\tCA\t1\t32\n+chrM_866_1351_5:1:0_6:0:0_1c9/1_TA\tchrM\t1351\tNA\tTA\t1\tTA\t1\t33\n+chrM_853_1366_6:1:0_4:0:0_13b/1_TC\tchrM\t1366\tNA\tTC\t1\tTC\t1\t34\n+chrM_1399_1851_2:0:0_11:1:0_85/1_AT\tchrM\t1398\tNA\tAT\t1\tAT\t1\t35\n+chrM_946_1444_4:1:0_6:0:0_113/1_TT\tchrM\t1444\tNA\tTT\t1\tTT\t1\t36\n+chrM_943_1485_2:1:0_5:0:0_1e2/1_AT\tchrM\t1485\tNA\tAT\t1\tAT\t1\t37\n+chrM_1022_1501_4:0:0_5:0:0_132/1_GA\tchrM\t1501\tNA\tGA\t1\tGA\t1\t38\n+chrM_1504_2013_4:1:0_3:0:0_10b/1_AT\tchrM\t1503\tNA\tAT\t1\tAT\t1\t39\n+chrM_1505_1934_1:1:0_5:0:0_157/1_TC\tchrM\t1504\tNA\tTC\t1\tTC\t1\t40\n+chrM_997_1511_2:1:0_1:0:0_1d8/1_AT\tchrM\t1511\tNA\tAT\t1\tAT\t1\t41\n+chrM_1521_2070_5:1:0_3:0:0_1a/1_TA\tchrM\t1520\tNA\tTA\t1\tTA\t1\t42\n+chrM_1575_2126_5:1:0_2:1:0_18c/1_TA\tchrM\t1574\tNA\tTA\t1\tTA\t1\t43\n+chrM_1063_1598_5:0:0_5:0:0_f5/1_TG\tchrM\t1598\tNA\tTG\t1\tTG\t1\t44\n+chrM_1605_2128_3:1:0_4:1:0_1ea/1_TT\tchrM\t1604\tNA\tTT\t1\tTT\t1\t45\n+chrM_1065_1609_3:0:0_5:0:0_18e/1_TA\tchrM\t1609\tNA\tTA\t1\tTA\t1\t46\n+chrM_1140_1619_1:0:0_4:0:0_74/1_TT\tchrM\t1619\tNA\tTT\t1\tTT\t1\t47\n+chrM_1111_1626_4:0:0_2:0:0_186/1_AC\tchrM\t1626\tNA\tAC\t1\tAC\t1\t48\n+chrM_1664_2135_0:1:0_3:1:0_179/1_TT\tchrM\t1663\tNA\tTT\t1\tTT\t1\t49\n+chrM_1699_2147_1:0:0_7:0:0_10c/1_AA\tchrM\t1698\tNA\tAA\t1\tAA\t1\t50\n+chrM_1706_2240_3:0:0_6:0:0_99/1_GT\tchrM\t1705\tNA\tGT\t1\tGT\t1\t51\n+chrM_1756_2309_0:0:0_3:0:0_c3/1_AT\tchrM\t1755\tNA\tAT\t1\tAT\t1\t52\n+chrM_1344_1758_4:0:0_3:1:0_75/1_TT\tchrM\t1758\tNA\tTT\t1\tTT\t1\t53\n+chrM_1223_1777_5:0:0_6:1:0_42/1_TG\tchrM\t1777\tNA\tTG\t1\tTG\t1\t54\n+chrM_1790_2351_4:0:0_4:0:0_f3/1_CC\tchrM\t1789\tNA\tCC\t1\tCC\t1\t55\n+chrM_1308_1807_4:0:0_4:1:0_45/1_TA\tchrM\t1807\tNA\tTA\t1\tTA\t1\t56\n+chrM_1814_2315_6:0:0_3:0:0_12d/1_AA\tchrM\t1813\tNA\tAA\t1\tAA\t1\t57\n+chrM_1862_2304_4:0:0_1:0:0_19d/1_AA\tchrM\t1861\tNA\tAA\t1\tAA\t1\t58\n+chrM_1363_1869_5:0:0_3:1:0_aa/1_TA\tchrM\t1869\tNA\tTA\t1\tTA\t1\t59\n+chrM_1363_1887_2:0:0_3:0:0_d3/1_CA\tchrM\t1887\tNA\tCA\t1\tCA\t1\t60\n+chrM_1428_1904_7:0:0_1:0:0_1b0/1_AC\tchrM\t1904\tNA\tAC\t1\tAC\t1\t6'..b'\tTA\t1\t431\n+chrM_14135_14581_5:0:0_5:0:0_1de/1_AT\tchrM\t14581\tNA\tAT\t1\tAT\t1\t432\n+chrM_14612_15169_2:0:0_5:0:0_e4/1_TT\tchrM\t14611\tNA\tTT\t1\tTT\t1\t433\n+chrM_14668_15109_2:0:0_3:0:0_3d/1_TG\tchrM\t14667\tNA\tTG\t1\tTG\t1\t434\n+chrM_14696_15126_2:0:0_2:0:0_a7/1_AA\tchrM\t14695\tNA\tAA\t1\tAA\t1\t435\n+chrM_14727_15156_4:0:0_4:0:0_178/1_GA\tchrM\t14726\tNA\tGA\t1\tGA\t1\t436\n+chrM_14245_14783_3:0:0_1:0:0_9d/1_GG\tchrM\t14783\tNA\tGG\t1\tGG\t1\t437\n+chrM_14314_14802_5:0:0_3:0:0_191/1_GG\tchrM\t14802\tNA\tGG\t1\tGG\t1\t438\n+chrM_14817_15261_5:0:0_4:0:0_65/1_AT\tchrM\t14816\tNA\tAT\t1\tAT\t1\t439\n+chrM_14863_15320_2:0:0_4:0:0_d8/1_AA\tchrM\t14862\tNA\tAA\t1\tAA\t1\t440\n+chrM_14869_15387_2:0:0_7:0:0_1f/1_AC\tchrM\t14868\tNA\tAC\t1\tAC\t1\t441\n+chrM_14888_15340_3:0:0_3:0:0_112/1_GG\tchrM\t14887\tNA\tGG\t1\tGG\t1\t442\n+chrM_14368_14894_6:0:0_5:0:0_40/1_TC\tchrM\t14894\tNA\tTC\t1\tTC\t1\t443\n+chrM_14410_14941_6:0:0_3:0:0_e5/1_TA\tchrM\t14941\tNA\tTA\t1\tTA\t1\t444\n+chrM_14962_15543_5:0:0_8:0:0_46/1_CA\tchrM\t14961\tNA\tCA\t1\tCA\t1\t445\n+chrM_14456_14987_3:0:0_5:0:0_ab/1_GT\tchrM\t14987\tNA\tGT\t1\tGT\t1\t446\n+chrM_15070_15569_5:0:0_5:0:0_cf/1_CA\tchrM\t15069\tNA\tCA\t1\tCA\t1\t447\n+chrM_15140_15686_2:0:0_5:0:0_185/1_CC\tchrM\t15139\tNA\tCC\t1\tCC\t1\t448\n+chrM_15142_15661_7:0:0_3:0:0_11e/1_CT\tchrM\t15141\tNA\tCT\t1\tCT\t1\t449\n+chrM_15192_15694_5:0:0_2:0:0_f7/1_CA\tchrM\t15191\tNA\tCA\t1\tCA\t1\t450\n+chrM_15211_15685_5:0:0_2:0:0_1d7/1_AC\tchrM\t15210\tNA\tAC\t1\tAC\t1\t451\n+chrM_15225_15786_3:0:0_6:0:0_17e/1_TT\tchrM\t15224\tNA\tTT\t1\tTT\t1\t452\n+chrM_15258_15810_4:0:0_6:0:0_5f/1_TT\tchrM\t15257\tNA\tTT\t1\tTT\t1\t453\n+chrM_14817_15317_5:0:0_2:0:0_59/1_GC\tchrM\t15317\tNA\tGC\t1\tGC\t1\t454\n+chrM_15324_15836_4:0:0_3:0:0_94/1_AC\tchrM\t15323\tNA\tAC\t1\tAC\t1\t455\n+chrM_15365_15880_4:1:0_3:0:0_80/1_CA\tchrM\t15364\tNA\tCA\t1\tCA\t1\t456\n+chrM_15408_15863_2:1:0_3:0:0_1e6/1_AG\tchrM\t15407\tNA\tAG\t1\tAG\t1\t457\n+chrM_15439_15924_0:0:0_4:0:0_172/1_TG\tchrM\t15438\tNA\tTG\t1\tTG\t1\t458\n+chrM_14931_15457_2:0:0_4:1:0_1a1/1_AA\tchrM\t15457\tNA\tAA\t1\tAA\t1\t459\n+chrM_15547_16054_5:0:0_1:0:0_af/1_GA\tchrM\t15546\tNA\tGA\t1\tGA\t1\t460\n+chrM_15134_15560_3:0:0_4:0:0_1c4/1_GG\tchrM\t15560\tNA\tGG\t1\tGG\t1\t461\n+chrM_15575_16118_5:0:0_4:0:0_1aa/1_GC\tchrM\t15574\tNA\tGC\t1\tGC\t1\t462\n+chrM_15052_15629_7:0:0_9:0:0_15f/1_GA\tchrM\t15629\tNA\tGA\t1\tGA\t1\t463\n+chrM_15698_16224_5:0:0_6:0:0_138/1_AC\tchrM\t15697\tNA\tAC\t1\tAC\t1\t464\n+chrM_15247_15721_2:1:0_6:0:0_17/1_TC\tchrM\t15721\tNA\tTC\t1\tTC\t1\t465\n+chrM_15218_15763_2:0:0_2:0:0_171/1_AG\tchrM\t15763\tNA\tAG\t1\tAG\t1\t466\n+chrM_15860_16419_5:0:0_8:0:0_53/1_TA\tchrM\t15859\tNA\tTA\t1\tTA\t1\t467\n+chrM_15864_16329_3:0:0_3:0:0_18b/1_CG\tchrM\t15863\tNA\tCG\t1\tCG\t1\t468\n+chrM_15507_15962_3:0:0_0:0:0_5b/1_GT\tchrM\t15962\tNA\tGT\t1\tGT\t1\t469\n+chrM_15430_15985_5:0:0_3:0:0_2c/1_AT\tchrM\t15985\tNA\tAT\t1\tAT\t1\t470\n+chrM_15706_16238_6:0:0_7:0:0_7b/1_AG\tchrM\t16238\tNA\tAG\t1\tAG\t1\t471\n+chrM_16252_16701_2:1:0_5:0:0_147/1_TA\tchrM\t16251\tNA\tTA\t1\tTA\t1\t472\n+chrM_15753_16280_5:0:0_6:0:0_180/1_TG\tchrM\t16280\tNA\tTG\t1\tTG\t1\t473\n+chrM_15777_16347_2:0:0_6:0:0_16/1_AT\tchrM\t16347\tNA\tAT\t1\tAT\t1\t474\n+chrM_16366_16911_6:1:0_6:1:1_168/1_CC\tchrM\t16365\tNA\tCC\t1\tCC\t1\t475\n+chrM_16370_16833_6:1:0_6:0:1_1bb/1_AC\tchrM\t16369\tNA\tAC\t1\tAC\t1\t476\n+chrM_16402_16876_2:0:0_1:0:1_98/1_CA\tchrM\t16401\tNA\tCA\t1\tCA\t1\t477\n+chrM_16426_16953_3:0:0_2:2:0_60/1_AA\tchrM\t16425\tNA\tAA\t1\tAA\t1\t478\n+chrM_15986_16442_3:0:0_7:0:0_15d/1_TG\tchrM\t16442\tNA\tTG\t1\tTG\t1\t479\n+chrM_16030_16460_6:0:0_4:1:0_20/1_AT\tchrM\t16460\tNA\tAT\t1\tAT\t1\t480\n+chrM_15909_16463_3:0:0_7:1:0_106/1_GT\tchrM\t16463\tNA\tGT\t1\tGT\t1\t481\n+chrM_15984_16500_4:0:0_1:0:0_6b/1_GT\tchrM\t16500\tNA\tGT\t1\tGT\t1\t482\n+chrM_16070_16513_3:0:0_7:1:0_1e7/1_TA\tchrM\t16513\tNA\tTA\t1\tTA\t1\t483\n+chrM_16064_16572_4:0:0_6:1:0_f1/1_TG\tchrM\t16572\tNA\tTG\t1\tTG\t1\t484\n+chrM_16245_16740_4:1:0_2:0:1_127/1_TA\tchrM\t16742\tNA\tTA\t1\tTA\t1\t485\n+chrM_16315_16834_5:0:0_3:0:1_18/1_TT\tchrM\t16834\tNA\tTT\t1\tTT\t1\t486\n+chrM_16229_16843_2:1:0_2:0:1_c4/1_GC\tchrM\t16843\tNA\tGC\t1\tGC\t1\t487\n+chrM_16420_16905_5:0:0_2:0:1_159/1_GT\tchrM\t16905\tNA\tGT\t2\tGT\t2\t488\n+chrM_16420_16905_5:0:0_2:0:1_159/1_GT\tchrM\t16905\tNA\tGT\t2\tGT\t2\t488\n+chrM_16419_16923_3:0:0_3:0:1_183/1_CG\tchrM\t16923\tNA\tCG\t1\tCG\t1\t489\n+chrM_16585_16993_5:0:1_5:1:0_1b9/1_GA\tchrM\t16993\tNA\tGA\t1\tGA\t1\t490\n' |
b |
diff -r bc082a79d655 -r cf25b50eff0a test-data/out_wl_paired.log --- a/test-data/out_wl_paired.log Wed Feb 10 19:31:44 2021 +0000 +++ b/test-data/out_wl_paired.log Mon Sep 13 14:50:56 2021 +0000 |
b |
@@ -1,14 +1,20 @@ -# output generated by whitelist --bc-pattern=CCCNNNNNNNNXXXXX --subset-reads=0 --stdin=/tmp/tmpibtvD6/files/000/dataset_5.dat --read2-in=/tmp/tmpibtvD6/files/000/dataset_6.dat --expect-cells=5 --error-correct-threshold=3 --method=reads --plot-prefix=OUT --log=/tmp/tmpibtvD6/files/000/dataset_8.dat -# job started at Sun Feb 25 10:50:16 2018 on bag -- e78e4e5b-e99e-426a-8a92-c8b3beeadf18 -# pid: 2385, system: Linux 4.13.0-32-generic #35-Ubuntu SMP Thu Jan 25 09:13:46 UTC 2018 x86_64 +# UMI-tools version: 1.1.2 +# output generated by whitelist --bc-pattern=CCCNNNNNNNNXXXXX --extract-method=string --subset-reads=0 --knee-method=density --stdin=input_read1.gz --read2-in=input_read2.gz --expect-cells=5 --error-correct-threshold=3 --method=reads --plot-prefix=OUT --log=/tmp/tmpcx2d26we/files/0/0/8/dataset_008b1843-bfa2-44fb-9d3c-52695bd9ce74.dat --log2stderr +# job started at Tue Jul 13 15:21:12 2021 on MOLSYB009 -- ba3841c0-b2d5-4188-88ca-4ee241163293 +# pid: 1155608, system: Linux 5.4.0-74-generic #83-Ubuntu SMP Sat May 8 02:35:39 UTC 2021 x86_64 +# allow_threshold_error : False # blacklist_tsv : None # cell_number : False # compresslevel : 6 +# ed_above_threshold : None # error_correct_threshold : 3 # expect_cells : 5 # extract_method : string -# filter_cell_barcodes : False -# log2stderr : False +# filtered_out : None +# filtered_out2 : None +# ignore_suffix : False +# knee_method : density +# log2stderr : True # loglevel : 1 # method : reads # pattern : CCCNNNNNNNNXXXXX @@ -16,25 +22,31 @@ # plot_prefix : OUT # prime3 : None # random_seed : None -# read2_in : /tmp/tmpibtvD6/files/000/dataset_6.dat +# read2_in : input_read2.gz # short_help : None -# stderr : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF-8'> -# stdin : <_io.TextIOWrapper name='/tmp/tmpibtvD6/files/000/dataset_5.dat' mode='r' encoding='UTF-8'> -# stdlog : <_io.TextIOWrapper name='/tmp/tmpibtvD6/files/000/dataset_8.dat' mode='a' encoding='UTF-8'> -# stdout : <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'> +# stderr : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='utf-8'> +# stdin : <_io.TextIOWrapper name='input_read1.gz' encoding='ascii'> +# stdlog : <_io.TextIOWrapper name='/tmp/tmpcx2d26we/files/0/0/8/dataset_008b1843-bfa2-44fb-9d3c-52695bd9ce74.dat' mode='a' encoding='UTF-8'> +# stdout : <_io.TextIOWrapper name='<stdout>' mode='w' encoding='utf-8'> # subset_reads : 0 # timeit_file : None # timeit_header : None # timeit_name : all +# tmpdir : None # whitelist_tsv : None -2018-02-25 10:50:16,016 INFO Starting barcode extraction -2018-02-25 10:50:16,017 INFO Parsed 0 reads -2018-02-25 10:50:16,019 INFO Starting - whitelist determination -2018-02-25 10:50:17,208 INFO Finished - whitelist determination -2018-02-25 10:50:17,208 INFO Starting - finding putative error cell barcodes -2018-02-25 10:50:17,208 INFO Finished - finding putative error cell barcodes -2018-02-25 10:50:17,208 INFO Writing out whitelist -2018-02-25 10:50:17,208 INFO Parsed 100 reads -2018-02-25 10:50:17,208 INFO 100 reads matched the barcode pattern -2018-02-25 10:50:17,208 INFO Found 23 unique cell barcodes -# job finished in 1 seconds at Sun Feb 25 10:50:17 2018 -- 2.35 0.08 0.00 0.00 -- e78e4e5b-e99e-426a-8a92-c8b3beeadf18 +2021-07-13 15:21:12,587 INFO Starting barcode extraction +2021-07-13 15:21:12,588 INFO Parsed 0 reads +2021-07-13 15:21:12,590 INFO Starting - whitelist determination +2021-07-13 15:21:14,249 INFO Finished - whitelist determination +2021-07-13 15:21:14,249 INFO Starting - finding putative error cell barcodes +2021-07-13 15:21:14,249 INFO building bktree +2021-07-13 15:21:14,249 INFO done building bktree +2021-07-13 15:21:14,249 INFO Finished - finding putative error cell barcodes +2021-07-13 15:21:14,249 INFO Top 1 cell barcodes passed the selected threshold +2021-07-13 15:21:14,249 INFO Writing out whitelist +2021-07-13 15:21:14,249 INFO Parsed 100 reads +2021-07-13 15:21:14,249 INFO 100 reads matched the barcode pattern +2021-07-13 15:21:14,249 INFO Found 23 unique cell barcodes +2021-07-13 15:21:14,249 INFO Found 15 total reads matching the selected cell barcodes +2021-07-13 15:21:14,249 INFO Found 85 total reads which can be error corrected to the selected cell barcodes +# job finished in 1 seconds at Tue Jul 13 15:21:14 2021 -- 7.19 0.62 0.08 0.02 -- ba3841c0-b2d5-4188-88ca-4ee241163293 |
b |
diff -r bc082a79d655 -r cf25b50eff0a umi-tools_group.xml --- a/umi-tools_group.xml Wed Feb 10 19:31:44 2021 +0000 +++ b/umi-tools_group.xml Mon Sep 13 14:50:56 2021 +0000 |
[ |
b'@@ -1,115 +1,126 @@\n-<tool id="umi_tools_group" name="UMI-tools group" version="@VERSION@.0">\n+<tool id="umi_tools_group" name="UMI-tools group" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n <description>Extract UMI from fastq files</description>\n+ <expand macro="bio_tools"/>\n <macros>\n <import>macros.xml</import>\n </macros>\n <expand macro="requirements">\n- <requirement type="package" version="1.9">samtools</requirement>\n+ <requirement type="package" version="1.12">samtools</requirement>\n </expand>\n <command detect_errors="exit_code"><![CDATA[\n- #if $input.is_of_type("sam"):\n- #set $input_file = $input\n- #else:\n- ln -sf \'${input}\' \'input.bam\' &&\n- ln -sf \'$input.metadata.bam_index\' \'input.bam.bai\' &&\n- #set $input_file = \'input.bam\'\n- #end if\n+ @LINK_SAM_BAM_INPUT@\n \n umi_tools group\n- --random-seed 0\n- --extract-umi-method $extract_umi_method\n- #if str($extract_umi_method) != \'read_id\':\n- --umi-separator \'$umi_separator\' --umi-tag \'$umi_tag\'\n- #end if\n- --method $method --edit-distance-threshold $edit_distance_threshold\n- $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold\n- $read_length $whole_contig --subset $subset $per_contig $per_gene\n- #if $gene_transcript_map:\n- --gene-transcript-map \'$gene_transcript_map\'\n- #end if\n- #if len(str($gene_tag)) > 0:\n- --gene-tag \'$gene_tag\'\n- #end if\n #if $group_output:\n --group-out \'$group_out\'\n #end if\n- #if $input.is_of_type("sam"):\n- --in-sam\n- #end if\n --output-bam\n- -I \'$input_file\' -S grouped.bam &&\n- samtools sort grouped.bam -@ \\${GALAXY_SLOTS:-1} -T "\\${TMPDIR:-.}" -o \'$output\' -O BAM\n+ @GROUPDEDUP_OPTIONS@\n+ @BARCODE_OPTIONS@\n+ @UMI_GROUPING_OPTIONS@\n+ @SAMBAM_OPTIONS@\n+ @FULLSC_OPTIONS@\n+ -I \'$input_file\' -S grouped.bam\n+ @ADVANCED_OPTIONS@\n+ @LOG@\n+ ## TODO using samtools sort is a workaround, for the following error that appears when Galaxy\n+ ## compares the generated file with the one in test-data\n+ ## `Converting history BAM to SAM failed: \'samtools returned with error 1: stdout=None, stderr=[main_samview] fail to read the header from "/tmp/tmpd8o61jykdedup_out6.bam".\\n\'. Will compare BAM files`\n+ ## may be dropped in the future\n+ --no-sort-output\n+ && samtools sort --no-PG grouped.bam -@ \\${GALAXY_SLOTS:-1} -T "\\${TMPDIR:-.}" -o \'$output\' -O BAM\n ]]></command>\n <inputs>\n <param name="input" type="data" format="sam,bam" label="Reads to group in SAM or BAM format" />\n- <param name="extract_umi_method" argument="--extract-umi-method" type="select">\n- <option value="read_id" selected="True">Read ID</option>\n- <option value="tag">Tag</option>\n- </param>\n <param name="group_output" argument="--group-out" type="boolean" truevalue="--group-out" falsevalue="" label="Output a flatfile describing the read groups" />\n- <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" />\n- <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." />\n- <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position">\n- <option value="unique">Reads group share the exact same UMI</option>\n- <option value="cluster">Identify clusters based on hamming distance</option>\n- '..b' threshold). Each network is a read group\n-\n- - "directional"\n- Identify clusters of connected UMIs (based on edit distance\n- threshold) and umi A counts >= (2* umi B counts) - 1. Each\n- network is a read group.\n-\n---edit-distance-threshold (int)\n- For the adjacency and cluster methods the threshold for the\n- edit distance to connect two UMIs in the network can be\n- increased. The default value of 1 works best unless the UMI is\n- very long (>14bp)\n-\n---paired\n- BAM is paired end - output both read pairs. This will also\n- force the use of the template length to determine reads with\n- the same mapping coordinates.\n-\n---spliced-is-unique\n- Causes two reads that start in the same position on the same\n- strand and having the same UMI to be considered unique if one is\n- spliced and the other is not. (Uses the \'N\' cigar operation to test\n- for splicing)\n-\n---soft-clip-threshold (int)\n- Mappers that soft clip, will sometimes do so rather than mapping a\n- spliced read if there is only a small overhang over the exon\n- junction. By setting this option, you can treat reads with at least\n- this many bases soft-clipped at the 3\' end as spliced.\n+@BARCODE_HELP@\n \n---multimapping-detection-method (string, choice)\n- If the sam/bam contains tags to identify multimapping reads, you can\n- specify for use when selecting the best read at a given loci.\n- Supported tags are "NH", "X0" and "XT". If not specified, the read\n- with the highest mapping quality will be selected\n-\n---read-length\n- Use the read length as as a criteria when deduping, for e.g sRNA-Seq\n-\n---whole-contig\n- Consider all alignments to a single contig together. This is useful if\n- you have aligned to a transcriptome multi-fasta\n-\n---subset (float, [0-1])\n- Only consider a fraction of the reads, chosen at random. This is useful\n- for doing saturation analyses.\n-\n---chrom\n- Only consider a single chromosome. This is useful for debugging purposes\n-\n---per-contig (string)\n- Deduplicate per contig (field 3 in BAM; RNAME).\n- All reads with the same contig will be\n- considered to have the same alignment position. This is useful\n- if your library prep generates PCR duplicates with non identical\n- alignment positions such as CEL-Seq. In this case, you would\n- align to a reference transcriptome with one transcript per gene\n-\n---per-gene (string)\n- Deduplicate per gene. As above except with this option you can\n- align to a reference transcriptome with more than one transcript\n- per gene. You need to also provide --gene-transcript-map option.\n- This will also add a metacontig (\'MC\') tag to the reads if used\n- in conjunction with --output-bam\n-\n---gene-transcript-map (string)\n- File mapping genes to transripts (tab separated), e.g:\n-\n- gene1 transcript1\n- gene1 transcript2\n- gene2 transcript3\n-\n---gene-tag (string)\n- Deduplicate per gene. As per --per-gene except here the gene\n- information is encoded in the bam read tag specified so you do\n- not need to supply --gene-transcript-map\n-\n---group-out (string, filename)\n- Output a flatfile describing the read groups\n-\n---output-bam (string, filename)\n- Output a tagged bam file to stdout or -S <filename>\n-\n--i, --in-sam/-o, --out-sam\n- By default, inputs are assumed to be in BAM format and output are output\n- in BAM format. Use these options to specify the use of SAM format for\n- inputs or outputs.\n-\n--I (string, filename) input file name\n- The input file must be sorted and indexed.\n-\n--S (string, filename) output file name\n-\n--L (string, filename) log file name\n-\n-Usage\n------\n- umi_tools group -I infile.bam --output-bam -S grouped.bam -L group.log --\n-\n+@UMI_GROUPING_HELP@\n ]]></help>\n <expand macro="citations" />\n </tool>\n' |