Previous changeset 1:1c6710924e80 (2015-08-03) Next changeset 3:ac09a5aaed0b (2015-08-03) |
Commit message:
Uploaded |
modified:
JunctionDiff-vs-background.sh JunctionDiff-vs-background.xml README.txt TV-vs-background.sh TV-vs-background.xml tool-data/virtual_normal_correction.loc.sample vcf2lv.sh vcf2lv.xml |
b |
diff -r 1c6710924e80 -r 885ba15c2564 JunctionDiff-vs-background.sh --- a/JunctionDiff-vs-background.sh Mon Aug 03 05:03:16 2015 -0400 +++ b/JunctionDiff-vs-background.sh Mon Aug 03 05:45:16 2015 -0400 |
b |
@@ -20,7 +20,7 @@ --scoreThresholdA) scoreThresholdA=$2;shift;; --scoreThresholdB) scoreThresholdB=$2;shift;; --distance) distance=$2;shift;; - --minlength) minlength=$2;shift;; + --minlength) minlength=$2;shift;; -h) shift;; --) shift;break;; -*) usage;; |
b |
diff -r 1c6710924e80 -r 885ba15c2564 JunctionDiff-vs-background.xml --- a/JunctionDiff-vs-background.xml Mon Aug 03 05:03:16 2015 -0400 +++ b/JunctionDiff-vs-background.xml Mon Aug 03 05:45:16 2015 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="t-vs-vnormal_junctions" name="Virtual Normal Correction SVs" version="1.6"> +<tool id="t-vs-vnormal_junctions" name="Virtual Normal Correction SVs" version="1.8"> <description> Filter SVs based on presence in VN set </description> <requirements> @@ -9,32 +9,31 @@ JunctionDiff-vs-background.sh --variants $variants --reference ${reference.fields.reference_crr_cgatools} - #if $virtnorm.VNset == "diversity" - --VN_junctions ${reference.fields.VN_genomes_junctionfile_list} - #else - --VN_junctions ${reference.fields.VN_genomes_junctionfile_list_1000G} - #end if + --VN_junctions ${reference.fields.VN_genomes_junctionfile_list}${VNset} --cgatools_binary cgatools --outputfile_filtered $output_filtered --scoreThresholdA $scoreThresholdA --scoreThresholdB $scoreThresholdB --distance $distance - --minlength $minlength + --minlength $minlength </command> <inputs> + <param name="variants" type="data" format="tabular" label="CG Junctions file"/> + <!--select build--> <param name="reference" type="select" label="Select Build"> <options from_data_table="virtual_normal_correction" /> - <filter type="data_meta" ref="variants" key="dbkey" column="0" /> </param> - <conditional name="virtnorm" > - <param name="VNset" type="select" label="Select Virtual Normal set to use" help="1000Genomes set can only be used for hg19 samples"> - <option value="diversity" selected="true"> CG Diversity Panel and trios (54 Genomes) (hg18/hg19) </option> - <option value="thousand" > CG 1000G project genomes (433 Genomes) (hg19 only) </option> + + <param name="VNset" type="select" label="Select Virtual Normal set to use" help="1000Genomes set can only be used for hg19 samples, for hg18 54 genomes will be used."> + <option value="46_diversity.txt" > CG Diversity Panel (46 Genomes) </option> + <option value="433_1000g.txt" > CG 1000G project genomes (433 Genomes) (hg19 only) </option> + <option value="479_diversity_1000g.txt" > Diversity and 1000G (479 genomes) (hg19 only) </option> + <option value="10_tutorial.txt" > Small VN for tutorial (10 Genomes) </option> </param> - </conditional> - <param name="variants" type="data" format="tabular" label="CG Junctions file"/> + + <param name="scoreThresholdA" type="text" value="10" label="scoreThreshold" help="The minimum number of discordant mate pair alignments supporting the junction from input genome"/> <param name="scoreThresholdB" type="text" value="10" label="scoreThreshold" help="The minimum number of discordant mate pair alignments supporting the junction from background genomes"/> <param name="distance" type="text" value="200" label="Maximum distance between coordinates of potentially compatible junctions."/> @@ -48,9 +47,9 @@ <outputs> <data format="tabular" name="output_filtered" label="${fname} Filtered junctions for ${tool.name} on ${on_string}"/> - <data format="tabular" name="output_report" from_work_dir= "output_reports.tsv" label="${fname} report for ${tool.name} on ${on_string}"> - <filter> report == "Y" </filter> - </data> + <data format="tabular" name="output_report" from_work_dir= "output_reports.tsv" label="${fname} report for ${tool.name} on ${on_string}"> + <filter> report == "Y" </filter> + </data> </outputs> <help> |
b |
diff -r 1c6710924e80 -r 885ba15c2564 README.txt --- a/README.txt Mon Aug 03 05:03:16 2015 -0400 +++ b/README.txt Mon Aug 03 05:45:16 2015 -0400 |
[ |
@@ -7,7 +7,7 @@ - change "/path/to/hg18.crr" to the location of the Complete Genomics reference crr file on your system (can be downloaded from ftp://ftp.completegenomics.com/ReferenceFiles/ ) - - change "/path/to/VN_genomes_varfiles_hg18.txt" to the location of the file containing the locations of all the Complete Genomics + - change "/path/to/VN_genomes_varfiles_lists_hg18" to the location of the directory containing files with the locations of all the Complete Genomics varfiles to be used as a virtual normal. This file should contain 1 file location per line, e.g. /path/to/normal-varfile-1 @@ -20,10 +20,44 @@ /path/to/normal-varfile-8 ... - Varfiles can be in compressed or uncompressed form. For example, Complete Genomics' Diversity panel can be used. + - edit the tool xml file to offer sets of virtual normals + + [..] + <!-- edit these options to reflect sets of normal you have available. The values must name files within the directories specified in data_table_conf.xml file --> + <param name="VNset" type="select" label="Select Virtual Normal set to use" help="1000Genomes set can only be used for hg19 samples, for hg18 54 genomes will be used."> + <option value="46_diversity.txt" > CG Diversity Panel and trios (54 Genomes) </option> + <option value="433_1000g.txt" > CG 1000G project genomes (433 Genomes) (hg19 only) </option> + <option value="479_diversity_1000g.txt" > Diversity and 1000G (479 genomes) (hg19 only) </option> + <option value="10_tutorial.txt" > Small VN for tutorial (10 Genomes) </option> + </param> + [..] + + the values indicate files expected to be at the location configured in the loc file, + + + So if your .loc file looks like this: + + + #loc file for annovar tool + + # <columns>value, dbkey, name, VN_genomes_varfiles_list, VN_genomes_junctionfile_list, reference_crr_cgatools</columns> + + hg18 hg18 Virtual Normal hg18 /path/to/VN_genomes_varfiles_lists_hg18 /path/to/VN_genomes_junctionfiles_lists_hg18 /path/to/hg18.crr + hg19 hg19 Virtual_Normal hg19 /path/to/VN_genomes_varfiles_lists_hg19 /path/to/VN_genomes_junctionfiles_lists_hg19 /path/to/hg19.crr + + And your xml file like the example above, then the tool expects the following files to exist: + /path/to/VN_genomes/varfiles_list_hg18/46_diversity.txt + /path/to/VN_genomes/varfiles_list_hg18/433_1000g.txt + etc + and containing a 1-per-line list of locations of the varfiles of the normal genomes. + + + + + Varfiles can be in compressed or uncompressed form. For example, Complete Genomics' Diversity panel can be used. (can be downloaded from ftp://ftp2.completegenomics.com/) - - change "/path/to/VN_genomes_junctionfiles_hg18.txt" to the location of the file containing the locations of all the Complete Genomics + - change "/path/to/VN_genomes_junctionfiles_lists_hg18" to the location of the file containing the locations of all the Complete Genomics junctionfiles to be used as a virtual normal. This file should contain 1 file location per line. For example, Complete Genomics' Diversity panel can be used. (can be downloaded from ftp://ftp2.completegenomics.com/) |
b |
diff -r 1c6710924e80 -r 885ba15c2564 TV-vs-background.sh --- a/TV-vs-background.sh Mon Aug 03 05:03:16 2015 -0400 +++ b/TV-vs-background.sh Mon Aug 03 05:45:16 2015 -0400 |
b |
@@ -26,6 +26,8 @@ done # replace newline chars with spaces for input to testvariants +echo "varfiles list: $VN_varfiles_list" + tr '\n' ' ' < $VN_varfiles_list > VN_varfiles.txt @@ -137,3 +139,4 @@ + |
b |
diff -r 1c6710924e80 -r 885ba15c2564 TV-vs-background.xml --- a/TV-vs-background.xml Mon Aug 03 05:03:16 2015 -0400 +++ b/TV-vs-background.xml Mon Aug 03 05:45:16 2015 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="t-vs-vnormal" name="Virtual Normal Correction SmallVars" version="1.6"> +<tool id="t-vs-vnormal" name="Virtual Normal Correction SmallVars" version="1.7"> <description> Filter small variants based on presence in Virtual Normal set </description> <requirements> @@ -9,15 +9,11 @@ TV-vs-background.sh --variants $variants --reference ${reference.fields.reference_crr_cgatools} - #if $virtnorm.VNset == "diversity": - --VN_varfiles ${reference.fields.VN_genomes_varfiles_list} - #else - --VN_varfiles ${reference.fields.VN_genomes_varfiles_list_1000G} - #end if + --VN_varfiles "${reference.fields.VN_genomes_varfiles_list}${VNset}" --threshold $threshold - --thresholdhc $thresholdhc + --thresholdhc $thresholdhc --outputfile_all $output_all - --outputfile_filtered $output_filtered + --outputfile_filtered $output_filtered </command> <inputs> @@ -25,34 +21,26 @@ <!--select build--> <param name="reference" type="select" label="Select Build"> <options from_data_table="virtual_normal_correction" /> - <filter type="data_meta" ref="variants" key="dbkey" column="0" /> </param> - <conditional name="virtnorm" > + + <!-- edit these options to reflect sets of normal you have available. The values must name files within the directories specified in data_table_conf.xml file --> <param name="VNset" type="select" label="Select Virtual Normal set to use" help="1000Genomes set can only be used for hg19 samples, for hg18 54 genomes will be used."> - <option value="diversity" > CG Diversity Panel and trios (54 Genomes) </option> - <option value="thousand" > CG 1000G project genomes (433 Genomes) (hg19 only) </option> - </param> - </conditional> - - <param name="threshold" type="text" value="1" label="Threshold: Filter variants if present in at least this number of the background genomes"/> - <param name="thresholdhc" type="text" value="10" label="High Confidence Threshold: Label a somatic variant as high-confidence if locus was fully called in at least this many normal genomes" help="Please adjust according to number of normals used and desired stringency. "/> + <option value="46_diversity.txt" > CG Diversity Panel and trios (54 Genomes) </option> + <option value="433_1000g.txt" > CG 1000G project genomes (433 Genomes) (hg19 only) </option> + <option value="479_diversity_1000g.txt" > Diversity and 1000G (479 genomes) (hg19 only) </option> + <option value="10_tutorial.txt" > Small VN for tutorial (10 Genomes) </option> + </param> + + <param name="threshold" type="text" value="1" label="Filter out variants present in at least this number of the virtual normal genomes"/> + <param name="thresholdhc" type="text" value="10" label="High Confidence Threshold: Label a somatic variant as high-confidence if locus was fully called in at least this many normal genomes" help="Please adjust according to number of normals used and desired stringency. "/> <param name="fname" type="text" value="" label="Prefix for your output file" help="Optional. For example sample name."/> - <!--<param name="debug" type="select" label="individual level annotations?" help="get a columns per normal sample whether variant was present (only available for fully public normal samples)"> - <option value="N" > No </option> - <option value="Y" > Yes </option> - </param> - --> </inputs> <outputs> - <data format="tabular" name="output_all" label="${fname} All variants for ${tool.name} on ${on_string}"/> - <data format="tabular" name="output_filtered" label="${fname} Filtered variants for ${tool.name} on ${on_string}"/> - <data format="tabular" name="output_filtered_highconf" label="${fname} High Confidence Filtered variants for ${tool.name} on ${on_string}" from_work_dir="output_filtered_highconf.tsv"/> - <!--<data format="tabular" name="output_filtered" label="${fname} Filtered variants for ${tool.name} on ${on_string}"/> - <data format="tabular" name="output_expanded" from_work_dir="output_expanded" label="${fname} expanded annotation for ${tool.name} on ${on_string}"> - <filter> $debug == "Y" </filter> - </data> - --> + <data format="tabular" name="output_all" label="All variants for ${tool.name} on ${on_string}"/> + <data format="tabular" name="output_filtered" label="Filtered variants for ${tool.name} on ${on_string}"/> + <data format="tabular" name="output_filtered_highconf" label="${fname} High Confidence Filtered variants for ${tool.name} on ${on_string}" from_work_dir="output_filtered_highconf.tsv"/> + </outputs> <help> |
b |
diff -r 1c6710924e80 -r 885ba15c2564 tool-data/virtual_normal_correction.loc.sample --- a/tool-data/virtual_normal_correction.loc.sample Mon Aug 03 05:03:16 2015 -0400 +++ b/tool-data/virtual_normal_correction.loc.sample Mon Aug 03 05:45:16 2015 -0400 |
b |
@@ -2,5 +2,5 @@ # <columns>value, dbkey, name, VN_genomes_varfiles_list, VN_genomes_junctionfile_list, reference_crr_cgatools</columns> -hg18 hg18 Virtual Normal hg18 /mnt/galaxyIndices/VirtualNormal/VN_genomes_varfiles_hg18.txt /mnt/galaxyIndices/VirtualNormal/VN_genomes_junctionfiles_hg18.txt /mnt/galaxyIndices/cgatools/build36.crr -hg19 hg19 Virtual Normal hg19 /mnt/galaxyIndices/VirtualNormal/VN_genomes_varfiles_hg19.txt /mnt/galaxyIndices/VirtualNormal/VN_genomes_junctionfiles_hg19.txt /mnt/galaxyIndices/cgatools/build37.crr +#hg18 hg18 Virtual Normal hg18 /path/to/VN_genomes_varfiles_lists_hg18 /path/to/VN_genomes_junctionfiles_lists_hg18 /path/to/hg18.crr +#hg19 hg19 Virtual_Normal hg19 /path/to/VN_genomes_varfiles_lists_hg19 /path/to/VN_genomes_junctionfiles_lists_hg19 /path/to/hg19.crr |
b |
diff -r 1c6710924e80 -r 885ba15c2564 vcf2lv.sh --- a/vcf2lv.sh Mon Aug 03 05:03:16 2015 -0400 +++ b/vcf2lv.sh Mon Aug 03 05:45:16 2015 -0400 |
b |
@@ -19,7 +19,7 @@ count=0; #output new header - print "variantId", "chromosome", "begin", "end", "varType", "reference", "alleleSeq", "xRef" + print "variantId", "chromosome", "begin", "end", "varType", "reference", "alleleSeq" }{ if(substr($0,1,1)!="#" && $5 != "."){ #skip header or nonvariant entries (period in ALT column) @@ -85,8 +85,7 @@ #print output variant(s) - if(chromosome != "chrM") - print count, chromosome, start, end, varType, reference, alleleSeq, "" + print count, chromosome, start, end, varType, reference, alleleSeq count+=1 } |
b |
diff -r 1c6710924e80 -r 885ba15c2564 vcf2lv.xml --- a/vcf2lv.xml Mon Aug 03 05:03:16 2015 -0400 +++ b/vcf2lv.xml Mon Aug 03 05:45:16 2015 -0400 |
b |
@@ -1,5 +1,5 @@ <tool id="vcf2lv" name="VCF-2-VariantList" version="1"> - <description> virtual normal preprocessing - convert VCF file to CG-compatible variant list </description> + <description> convert VCF file to CG-compatible variant list </description> <command interpreter="bash"> vcf2lv.sh $vcffile $outputfile |