changeset 0:b7bc4f4399f0

Uploaded
author xuebing
date Thu, 08 Sep 2011 22:29:33 -0400
parents
children fc8ab6f2276a
files sampline.xml
diffstat 1 files changed, 108 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sampline.xml	Thu Sep 08 22:29:33 2011 -0400
@@ -0,0 +1,108 @@
+<tool id="sampline" name="Sampling">
+  <description>records from a file</description>
+  <command interpreter="python">sampline.py --input=$input --output=$out_file1 --nSample=$nSample --recSize=$recSize --nSkip=$nSkip $replacement</command>
+  <inputs>
+    <param name="input" format="txt" type="data" label="Original file"/>
+    <param name="nSample" size="10" type="integer" value="100" label="Number of records to sample"/>
+    <param name="recSize" size="10" type="integer" value="1" label="Number of lines per record"/>
+    <param name="nSkip" size="10" type="integer" value="0" label="Number of top lines to output directly (without sampling)"/>
+    <param name="replacement" label="Sampling with replacement" type="boolean" truevalue="--replacement" falsevalue="" checked="False"/>
+  </inputs>
+  <outputs>
+    <data format="TXT" name="out_file1" />
+  </outputs>
+  <tests>
+    <test>
+      <output name="out_file1" file="testmap.sampled"/>
+      <param name="input" value="test.map" ftype="TXT"/>
+      <param name="nSample" value="100"/>
+      <param name="recSize"  value="1" />
+      <param name="nSkip" value="0" />
+      <param name="replacement" value=""/>
+    </test>
+  </tests>
+  <help>
+
+**What it does**
+
+This tool selects random records from a file. Each record is defined by a fixed number of lines.  
+
+- When doing over-sampling,  --replacement  option is enforced by default.
+
+-----
+
+**Example 1: sampling from a BED file**
+
+parameters::
+
+    1 line per record, sampling 5 lines, without replacement, output line 1 (track name) directly
+
+Input::
+
+    track name=test.bed
+    chr1	148078400	148078582	CCDS993.1_cds_0_0_chr1_148078401_r	0	-
+    chr11	116124407	116124501	CCDS8374.1_cds_0_0_chr11_116124408_r	0	-
+    chr15	41826029	41826196	CCDS10101.1_cds_0_0_chr15_41826030_f	0	+
+    chr16	142908	143003	CCDS10397.1_cds_0_0_chr16_142909_f	0	+
+    chr2	220229609	220230869	CCDS2443.1_cds_0_0_chr2_220229610_r	0	-
+    chr20	33579500	33579527	CCDS13256.1_cds_0_0_chr20_33579501_r	0	-
+    chr20	33593260	33593348	CCDS13257.1_cds_0_0_chr20_33593261_f	0	+
+    chr5	131621326	131621419	CCDS4152.1_cds_0_0_chr5_131621327_f	0	+
+    chr7	113660517	113660685	CCDS5760.1_cds_0_0_chr7_113660518_f	0	+
+    chrX	152648964	152649196	CCDS14733.1_cds_0_0_chrX_152648965_r	0	-
+
+Output::
+
+    track name=test.bed
+    chr11	116124407	116124501	CCDS8374.1_cds_0_0_chr11_116124408_r	0	-
+    chr16	142908	143003	CCDS10397.1_cds_0_0_chr16_142909_f	0	+
+    chr20	33579500	33579527	CCDS13256.1_cds_0_0_chr20_33579501_r	0	-
+    chr20	33593260	33593348	CCDS13257.1_cds_0_0_chr20_33593261_f	0	+
+    chr5	131621326	131621419	CCDS4152.1_cds_0_0_chr5_131621327_f	0	+	
+
+**Example 2: sampling reads from a fastq file**
+
+parameters::
+
+    4 line per record, sampling 3 records, without replacement
+
+Input::
+
+    @SRR066787.2496 WICMT-SOLEXA:8:1:28:2047 length=36
+    NNANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+    +SRR066787.2496 WICMT-SOLEXA:8:1:28:2047 length=36
+    !!%!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    @SRR066787.2497 WICMT-SOLEXA:8:1:28:463 length=36
+    GTGATTAAGAAGAGACTGGCATCACTAAGGTGACAT
+    +SRR066787.2497 WICMT-SOLEXA:8:1:28:463 length=36
+    @A=BBCBBAA@:@:@@@:,?AB:B?BB=*2:@=?AA
+    @SRR066787.2498 WICMT-SOLEXA:8:1:28:704 length=36
+    GAACCCAATTTTCAAAGAAGTGTGACTGCTTGTTTC
+    +SRR066787.2498 WICMT-SOLEXA:8:1:28:704 length=36
+    =?BAABBACCCCAA9>>A=>A?A;;@A>ABBABBB:
+    @SRR066787.2499 WICMT-SOLEXA:8:1:28:997 length=36
+    CGACTTCAGGCTCTCGCTAGCCTTCGCTTGACTGAC
+    +SRR066787.2499 WICMT-SOLEXA:8:1:28:997 length=36
+    BCCBCCB?A1ACAC>;@CCAAABB?8=BA>@?B?@:
+    @SRR066787.2500 WICMT-SOLEXA:8:1:28:582 length=36
+    TCTCTCTCTTTCTCTCTCTCTCTCTCTCTCTCTCTC
+    +SRR066787.2500 WICMT-SOLEXA:8:1:28:582 length=36
+    ?.?.=9C8CCC:BACBCBC?CCC@CBBBCBBACAC8
+
+Output::
+
+    @SRR066787.2497 WICMT-SOLEXA:8:1:28:463 length=36
+    GTGATTAAGAAGAGACTGGCATCACTAAGGTGACAT
+    +SRR066787.2497 WICMT-SOLEXA:8:1:28:463 length=36
+    @A=BBCBBAA@:@:@@@:,?AB:B?BB=*2:@=?AA
+    @SRR066787.2499 WICMT-SOLEXA:8:1:28:997 length=36
+    CGACTTCAGGCTCTCGCTAGCCTTCGCTTGACTGAC
+    +SRR066787.2499 WICMT-SOLEXA:8:1:28:997 length=36
+    BCCBCCB?A1ACAC>;@CCAAABB?8=BA>@?B?@:
+    @SRR066787.2500 WICMT-SOLEXA:8:1:28:582 length=36
+    TCTCTCTCTTTCTCTCTCTCTCTCTCTCTCTCTCTC
+    +SRR066787.2500 WICMT-SOLEXA:8:1:28:582 length=36
+    ?.?.=9C8CCC:BACBCBC?CCC@CBBBCBBACAC8
+
+  </help>
+</tool>