Mercurial > repos > galaxyp > regex_find_replace

diff regex_tabular.xml @ 0:60d04307b027 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
author: galaxyp
date: Wed, 18 Jan 2017 17:45:20 -0500
children: 209b7c5ee9d7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regex_tabular.xml	Wed Jan 18 17:45:20 2017 -0500
@@ -0,0 +1,139 @@
+<tool id="regexColumn1" name="Column Regex Find And Replace" version="1.0.0">
+  <description></description>
+  <command interpreter="python">regex.py --input '$input' --output '$out_file1' --column $field --input_display_name '$input.display_name'
+    #for $check in $checks:
+    --pattern='$check.pattern' --replacement='$check.replacement'
+    #end for
+  </command>
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Select cells from"/>
+    <param name="field" label="using column" type="data_column" data_ref="input" />
+    <repeat name="checks" title="Check">
+      <param name="pattern" size="40" type="text" value="chr([0-9A-Za-z])+" label="Find Regex" help="here you can enter text or regular expression (for syntax check lower part of this frame)">
+        <sanitizer>
+          <valid>
+            <add preset="string.printable"/>
+            <remove value="&#92;" />
+            <remove value="&apos;" />
+          </valid>
+          <mapping initial="none">
+            <add source="&#92;" target="__backslash__" />
+            <add source="&apos;" target="__sq__"/>
+          </mapping>
+        </sanitizer>
+      </param>
+      <param name="replacement" size="40" type="text" value="newchr\1" label="Replacement">
+        <sanitizer>
+          <valid>
+            <add preset="string.printable"/>
+            <remove value="&#92;" />
+            <remove value="&apos;" />
+          </valid>
+          <mapping initial="none">
+            <add source="&#92;" target="__backslash__" />
+            <add source="&apos;" target="__sq__"/>
+          </mapping>
+        </sanitizer>      
+      </param>
+    </repeat>
+  </inputs>
+  <outputs>
+    <data format="input" name="out_file1" metadata_source="input" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="find_tabular_1.txt" ftype="tabular" />
+      <param name="field" value="2" />
+      <param name="pattern" value="moo"/>
+      <param name="replacement" value="cow" />
+      <output name="out_file1" file="replace_tabular_1.txt"/>
+    </test>
+    <test>
+      <param name="input" value="find_tabular_1.txt" ftype="tabular" />
+      <param name="field" value="1" />
+      <param name="pattern" value="moo"/>
+      <param name="replacement" value="cow" />
+      <output name="out_file1" file="replace_tabular_2.txt"/>
+    </test>
+  </tests>
+  <help>
+
+.. class:: warningmark
+
+**This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool.
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+
+-----
+
+This tool goes line by line through the specified input file and
+if the text in the selected column matches a specified regular expression pattern
+replaces the text with the corresponding specified replacement.
+
+This tool can be used to change between the chromosome naming conventions of UCSC and Ensembl.  
+
+For example to remove the **chr** part of the reference sequence name in the first column of this GFF file::
+
+ ##gff-version 2
+ ##Date: Thu Mar 23 11:21:17 2006
+ ##bed2gff.pl $Rev: 601 $
+ ##Input file: ./database/files/61c6c604e0ef50b280e2fd9f1aa7da61.dat
+ chr1	bed2gff	CCDS1000.1_cds_0_0_chr1_148325916_f	148325916	148325975	.	+	.	score "0";
+ chr21	bed2gff	CCDS13614.1_cds_0_0_chr21_32707033_f	32707033	32707192	.	+	.	score "0";
+ chrX	bed2gff	CCDS14606.1_cds_0_0_chrX_122745048_f	122745048	122745924	.	+	.	score "0";
+
+Setting::
+
+ using column: c1 
+ Find Regex: chr([0-9]+|X|Y|M[Tt]?) 
+ Replacement: \1 
+
+produces::
+
+ ##gff-version 2
+ ##Date: Thu Mar 23 11:21:17 2006
+ ##bed2gff.pl $Rev: 601 $
+ ##Input file: ./database/files/61c6c604e0ef50b280e2fd9f1aa7da61.dat
+ 1    bed2gff CCDS1000.1_cds_0_0_chr1_148325916_f     148325916       148325975       .       +       .       score "0";
+ 21   bed2gff CCDS13614.1_cds_0_0_chr21_32707033_f    32707033        32707192        .       +       .       score "0";
+ X    bed2gff CCDS14606.1_cds_0_0_chrX_122745048_f    122745048       122745924       .       +       .       score "0";
+
+
+This tool uses Python regular expressions with the **re.sub()** function. 
+More information about Python regular expressions can be found here:
+http://docs.python.org/library/re.html.
+
+The regex **chr([0-9]+|X|Y|M)** means start with text **chr** followed by either: one or more digits, or the letter X, or the letter Y, or the letter M (optionally followed by a single letter T or t).  
+Note that the parentheses **()** capture patterns in the text that can be used in the replacement text by using a backslash-number reference:  **\\1**
+
+
+In the replacement pattern, use the special token #{input_name} to insert the input dataset's display name.
+The name can be modified by a second find/replace check. Suppose you want to insert the sample id of your dataset,
+named **Sample ABC123**, into the dataset itself, which currently contains the lines::
+Data 1
+Data 2
+Data 3
+
+You can use the following checks::
+Find Regex: Data
+Replacement: #{input_name} Data
+
+Find Regex: Sample (\S+)
+Replacement: \1
+
+The result will be::
+ABC123 Data 1
+ABC123 Data 2
+ABC123 Data 3
+
+
+
+Galaxy aggressively escapes input supplied to tools, so if something
+is not working please let us know and we can look into whether this is
+the cause. Also if you would like help constructing regular
+expressions for your inputs, please let us know at help@msi.umn.edu.
+
+</help>
+</tool>
author	galaxyp
date	Wed, 18 Jan 2017 17:45:20 -0500
parents
children	209b7c5ee9d7