changeset 11:a472e995d157 draft

"planemo upload commit 28e58376e1d70e38276873a7d5e2ab44db88c2c0"
author iuc
date Tue, 27 Aug 2019 17:10:51 -0400
parents 828dba98cdb4
children d5ff68d2d5ff
files macros.xml test-data/out_wl_single.txt test-data/out_wl_user.single.html test-data/out_wl_user.single.tresh.tab test-data/out_wl_user.single.txt test-data/testYYY.40k.fastq.gz umi-tools_extract.xml
diffstat 7 files changed, 119 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Fri Aug 17 09:41:59 2018 -0400
+++ b/macros.xml	Tue Aug 27 17:10:51 2019 -0400
@@ -1,5 +1,23 @@
 <?xml version="1.0"?>
 <macros>
+    <macro name="barcode_sanitizer" >
+        <sanitizer invalid_char="">
+            <valid initial="string.letters,string.digits">
+                <add value="&#40;" /><!-- left bracket -->
+                <add value="&#41;" /><!-- right bracket -->
+                <add value="&#42;" /><!-- asterisk -->
+                <add value="&#44;" /><!-- comma -->
+                <add value="&#46;" /><!-- period -->
+                <add value="&#60;" /><!-- less than -->
+                <add value="&#61;" /><!-- equals sign -->
+                <add value="&#62;" /><!-- greater than -->
+                <add value="&#63;" /><!-- question mark -->
+                <add value="&#95;" /><!-- underscore -->
+                <add value="&#123;"/><!-- left brace -->
+                <add value="&#125;"/><!-- right brace -->
+            </valid>
+        </sanitizer>
+    </macro>
     <macro name="barcode2_conditional" >
         <conditional name="barcode">
             <param name="barcode_select" argument="--split-barcode" type="select" label="Barcode on both reads?">
@@ -11,6 +29,7 @@
                 <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"
                        help="Use this option to specify the format of the UMI/barcode for
                              the second read pair if required." >
+                    <expand macro="barcode_sanitizer" />
                 </param>
             </when>
         </conditional>
@@ -55,7 +74,7 @@
             <yield />
         </requirements>
     </xml>
-    <token name="@VERSION@">0.5.3</token>
+    <token name="@VERSION@">0.5.5</token>
     <token name="@COMMAND_LINK@"><![CDATA[
         #set $gz = False
         #if $input_type.type == 'single':
--- a/test-data/out_wl_single.txt	Fri Aug 17 09:41:59 2018 -0400
+++ b/test-data/out_wl_single.txt	Tue Aug 27 17:10:51 2019 -0400
@@ -1,44 +1,4 @@
-# output generated by whitelist --bc-pattern=CCCCCCCCNNNNNNNN --subset-reads=0 --stdin=/tmp/tmpibtvD6/files/000/dataset_1.dat --method=reads --plot-prefix=OUT --3prime
-# job started at Sun Feb 25 10:49:56 2018 on bag -- cb0db520-8a4e-4040-aa88-93efc0718fa8
-# pid: 2217, system: Linux 4.13.0-32-generic #35-Ubuntu SMP Thu Jan 25 09:13:46 UTC 2018 x86_64
-# blacklist_tsv                           : None
-# cell_number                             : False
-# compresslevel                           : 6
-# error_correct_threshold                 : 1
-# expect_cells                            : False
-# extract_method                          : string
-# filter_cell_barcodes                    : False
-# log2stderr                              : False
-# loglevel                                : 1
-# method                                  : reads
-# pattern                                 : CCCCCCCCNNNNNNNN
-# pattern2                                : None
-# plot_prefix                             : OUT
-# prime3                                  : True
-# random_seed                             : None
-# read2_in                                : None
-# short_help                              : None
-# stderr                                  : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF-8'>
-# stdin                                   : <_io.TextIOWrapper name='/tmp/tmpibtvD6/files/000/dataset_1.dat' mode='r' encoding='UTF-8'>
-# stdlog                                  : <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
-# stdout                                  : <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
-# subset_reads                            : 0
-# timeit_file                             : None
-# timeit_header                           : None
-# timeit_name                             : all
-# whitelist_tsv                           : None
-## 2018-02-25 10:49:56,061 INFO Starting barcode extraction
-## 2018-02-25 10:49:56,061 INFO Parsed 0 reads
-## 2018-02-25 10:49:56,062 INFO Starting - whitelist determination
-## 2018-02-25 10:49:57,383 INFO Finished - whitelist determination
-## 2018-02-25 10:49:57,383 INFO Starting - finding putative error cell barcodes
-## 2018-02-25 10:49:57,383 INFO Finished - finding putative error cell barcodes
-## 2018-02-25 10:49:57,383 INFO Writing out whitelist
 AAAAAAAA	AAAAAACA,AAACAAAA,AATAAAAA	3	1,1,1
 ACAAAAAC		2	
 ACAACAAA		2	
 TTACTTAA	TTACTAAA	2	1
-## 2018-02-25 10:49:57,383 INFO Parsed 100 reads
-## 2018-02-25 10:49:57,383 INFO 100 reads matched the barcode pattern
-## 2018-02-25 10:49:57,383 INFO Found 95 unique cell barcodes
-# job finished in 1 seconds at Sun Feb 25 10:49:57 2018 --  2.25  0.06  0.00  0.00 -- cb0db520-8a4e-4040-aa88-93efc0718fa8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_user.single.html	Tue Aug 27 17:10:51 2019 -0400
@@ -0,0 +1,1 @@
+<html> <head></head><body> <h1>Cell and Count Metrics</h1> <img src="OUT_cell_barcode_count_density.png" ><br /> <img src="OUT_cell_barcode_knee.png" ><br /> <img src="OUT_cell_barcode_counts.png" ><br /> </body></html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_user.single.tresh.tab	Tue Aug 27 17:10:51 2019 -0400
@@ -0,0 +1,7 @@
+count	action
+2	Rejected
+8	Rejected
+11	Rejected
+27	Rejected
+90	Selected
+404	Rejected
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_user.single.txt	Tue Aug 27 17:10:51 2019 -0400
@@ -0,0 +1,90 @@
+AACCGCCTCAGGCAGCTATCTCGGTTA		6	
+AACCGCCTCCTCTCTCAAACTACATAT	TACCGCCTCCTCTCTCAAACTACATAT	4	1
+AAGACATGCAGTGAAAGGACAAACTTT		3	
+ACACGCCGGTTCCATTGAGCAACATTA		3	
+ACCAAGGACACTGTTAGAAAGTTTACG	ACCAAGGACACTGTAAGAAAGTTTACG	3	1
+ACCCTGACCACAGTGGTACTACATGCG		4	
+ACGAAAGGTCGTTCGGTATACTTCGGA	TCGAAAGGTCGTTCGGTATACTTCGGA	3	1
+AGAGCTATGAAGAGTAGAAAGCCTTCT		3	
+AGCCATCACAGCGACACCTCTCACGGA	TGCCATCACAGCGACACCTCTCACGGA	6	1
+ATCAGAGCTAACCCTCGGAAGGGTCAG	TTCAGAGCTAACCCTCGGAAGGGTCAG	3	1
+ATCCCGGAGGTACATCTACATAGGTCA		3	
+ATGTAATGGTAGTCTTGAACGCTGTTG		3	
+CAAACCGCCACGACCACCAAGTTTACG		3	
+CAAACCGCCTGACAGACAAATGTATCG	CAAACCGTCTGACAGACAAATGTATCG,GAAACCGCCTGACAGACAAATGTATCG	3	1,1
+CAAAGGCACAACCAAAGTACTAGACCG		3	
+CAAAGGCACAACCCTCGGACTCATACG	CAAAGGCACGACCCTCGGACTCATACG,GAAAGGCACAACCCTCGGACTCATACG	3	1,1
+CAAAGGCACAATCAGTTTATGGGACTC		3	
+CAAAGGCACGATGAACTGGCTCCTTGA	GAAAGGCACGATGAACTGGCTCCTTGA	4	1
+CAAAGGCACTAGGGATACCATAGGTCA	GAAAGGCACTAGGGATACCATAGGTCA	4	1
+CAAGCAAGCGATGAACTGCAATACAAG		3	
+CACACACTACACCCAAAGGTAGAAGCA	GACACACTACACCCAAAGGTAGAAGCA	4	1
+CACATTGCAAAACGTACCAAGGCCGCA		3	
+CAGCAAGATTTGGAGGTAAGTCTGTAC		3	
+CAGCTGACATACAGGATATCCGTCTTA		3	
+CCACTTGGAAAACTGCGCAAATGGAGG		3	
+CTATGAAATCTCTCTCAAAAATTACAG	GTATGAAATCTCTCTCAAAAATTACAG	4	1
+CTTCACATAGTACATCTAGATCAGCGA		3	
+GAATCTGTAAAGGACTATAAAGTCATT		4	
+GACGGATTAAGTGTTGTCACTCATACG		3	
+GACGGATTATATAGCCCTCAATAGGGT		3	
+GAGAATCGTCAAGACCTACAGCCTGGC	CAGAATCGTCAAGACCTACAGCCTGGC,GGGAATCGTCAAGACCTACAGCCTGGC	3	1,1
+GAGGTGCTAGCCAATGTAGATAGAGGA		3	
+GAGTACATTACCAAAATGTGAAGCCAA		3	
+GAGTACATTTTGGAGGTAACAACTAGT		3	
+GCAATCCGAAGATAGTTCCAGCCTGGC	GCAATCCGAAGATAGTACCAGCCTGGC	4	1
+GCAATCCGAAGGCAGCTAAATTCGGCG	GCAATCCGAAGGCAGCTGAATTCGGCG	3	1
+GCAATCCGAGAGTGCGAATCCTCAATA	GCAATCCGAGAGAGCGAATCCTCAATA,GCAATCCGAGAGTGCGTATCCTCAATA	5	1,1
+GCAATCCGAGCCAATGTAGATGGTCCA		3	
+GCACTGTCAGATGAACTGCAAGTAGAA		3	
+GCACTGTCAGTGTGTCGACGTCTAGGT		3	
+GCCTTACAAGTGCAGTAACTCAAGACA		3	
+GCGATTACAAAGCTACTTACTTACGAT	GCGATTACAAAGCTACTTCCTTACGAT	4	1
+GCGATTACAATCAACCGAGGCACAACA		3	
+GCGATTACATGTTCTCCACAATAGGGT		3	
+GCTGCCAATAGGAGGCGCTGAAGCCAA		3	
+GCTGCCAATATTCATCGTAGTTGTTCT	GCTGCCAATATTCATCGTAGTTGTTCC	7	1
+GCTGCCAATCAACAACGGTGAAGCCAA	GCTGCCAATCATCAACGGTGAAGCCAA	6	1
+GCTGCCAATCACCTACCCAAGCCTTCT	GATGCCAATCACCTACCCAAGCCTTCT	3	1
+GGATTAGGAAGCTGCCGTGGCACAACA	GGAGTAGGAAGCTGCCGTGGCACAACA	3	1
+GGCAAGCAAACCAAAATGACTTACGAT		4	
+GGCAAGCAAGATGAACTGCTAAGCTTC		4	
+GGCAAGCAATAGTGACTACTACATGCG		3	
+GTACAGAACAATCCTGAACTATTAGCC	CTACAGAACAATCCTGAACTATTAGCC	3	1
+GTATGAAATATCCAACCGCTGTACGGA		3	
+TAAGCGTTACACAAAGGCCACAAGTAT		5	
+TAAGCGTTATATAGCCCTCAACCTCCA		4	
+TACCGAGCAATCCTAGGATCTCACGGA		3	
+TACCGAGCACAAGACCTACGTACTACG		5	
+TACCGAGCAGTGTGTCGATCTCACGGA		3	
+TACCGCCTCAGGCAGCTATCTCGGTTA	TACCGCCTCAGGCAGCTATCTCGGTCA	3	1
+TAGGCGATCAAGCTACTTCGTACTACG	AAGGCGATCAAGCTACTTCGTACTACG	3	1
+TAGGCGATCGCTAACTCAACAACTAGT	AAGGCGATCGCTAACTCAACAACTAGT	3	1
+TATATTGGGCCGACAAGAAAGGGTCAG	AATATTGGGCCGACAAGAAAGGGTCAG,TCTATTGGGCCGACAAGAAAGGGTCAG	3	1,1
+TATCGAATGAAGAGTAGACAGGCATTT	AATCGAATGAAGAGTAGACAGGCATTT	3	1
+TCACACAAAAAATAAATATGTATGCGA	ACACACAAAAAATAAATATGTATGCGA,TCGCACAAAAAATAAATATGTATGCGA	3	2,1
+TCACGAGATACTTCGAGCAAGCCTTCT	ACACGAGATACTTCGAGCAAGCCTTCT,CCACGAGATACTTCGAGCAAGCCTTCT	3	2,1
+TCACGAGATCAACAACGGCGTACTACG	ACACGAGATCAACAACGGCGTACTACG	3	1
+TCAGGAGGAACTTGATGACAAGTAGAA		4	
+TCAGGAGGACACTTATGTCTAAGCTTC	TCAGGAGGACACTTATGTCTACGCTTC,TCAGGAGGACACTTATGTCTCAGCTTC,TNAGGAGGACACTTATGTCTAAGCTTC	6	1,1,1
+TCAGGAGGACTTGCTTCAATCGAGTCT	TCAGGAGGACTTGCTTCCATCGAGTCT	3	1
+TCATAAGCGATCCTAGGATTCAGCTCA	ACATAAGCGATCCTAGGATTCAGCTCA	3	1
+TCATAAGCGGACACTTAAAGCCGCAAG	ACATAAGCGGACACTTAAAGCCGCAAG,TCATAAGCGGACACTTAAAGCCGCGAG,TTATAAGCGGACACTTAAAGCCGCAAG	3	1,1,1
+TCGAAAGGTATTACCAAGGCAACATTA	ACGAAAGGTATTACCAAGGCAACATTA	4	1
+TGAACTTCCACAATATGGCAAGTTTCC	AGAACTTCCACAATATGGCAAGTTTCC	7	2
+TGAACTTCCATGAGTTACCGTACTACG	AGAACTTCCATGAGTTACCGTACTACG,TGAACTTCCATGAGTTACCTTACTACG	6	2,1
+TGAAGCACTCACTCGAGAACACCTTAG		4	
+TGAAGCACTGCCAATGTAGAACGACAA	AGAAGCACTGCCAATGTAGAACGACAA	3	2
+TGAGCTATGAAGAGTAGAAAGCCTTCT		3	
+TGCGATCTACCGTATCTAACAAACTTT		4	
+TGGAGATTAATTAGGCATACAGTAAAC		6	
+TGGCTCAGAAAGCTACTTGCTCCTTGA	TGACTCAGAAAGCTACTTGCTCCTTGA	3	1
+TGGCTCAGAAGAATGGAGAATTCGGCG	TGGTTCAGAAGAATGGAGAATTCGGCG	3	1
+TGGCTCAGATGACAGACAAAGGGAACT	TGGCTCAGATGACAGACAAAGGGACCT	3	1
+TGTACCTTAGCCAATGTAAAGGGTCAG		3	
+TTACTTAGGATGAGTTACTCCGTCTTA	ATACTTAGGATGAGTTACTCCGTCTTA	3	1
+TTGTTCCAAAGGTTCGCTACTCATACG		3	
+TTTATTACCCCATGCACACATAGGTCA	ATTATTACCCCATGCACACATAGGTCA	3	2
+TTTATTACCGTGCAGTAAACTCATACG	ATTATTACCGTGCAGTAAACTCATACG	3	1
+TTTGGCTAAACTATCCTCACTACATAT	ATTGGCTAAACTATCCTCACTACATAT	3	2
+TTTTAGATGGATGAACTGCAACGATCT	ATTTAGATGGATGAACTGCAACGATCT	3	1
Binary file test-data/testYYY.40k.fastq.gz has changed
--- a/umi-tools_extract.xml	Fri Aug 17 09:41:59 2018 -0400
+++ b/umi-tools_extract.xml	Tue Aug 27 17:10:51 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="umi_tools_extract" name="UMI-tools extract" version="@VERSION@.2">
+<tool id="umi_tools_extract" name="UMI-tools extract" version="@VERSION@.0">
     <description>Extract UMI from fastq files</description>
     <macros>
         <import>macros.xml</import>