# HG changeset patch # User dereeper # Date 1509615694 14400 # Node ID 2bc9b3ee5eea5ed8b9ba702abcb8eff3d1ad50a4 # Parent 013ff9bb23aa0c44970f74d9d7621423c460cb43 Uploaded diff -r 013ff9bb23aa -r 2bc9b3ee5eea Plink.pl --- a/Plink.pl Fri Aug 05 10:26:44 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,242 +0,0 @@ - -#!/usr/bin/perl - -use strict; -use Getopt::Long; -use Bio::SeqIO; - -my $usage = qq~Usage:$0 [] - -where are: - - -i, --input - -o, --out - - are: - - -s, --samples - -c, --chromosomes - -e, --export - -f, --frequency - -m, --max_freq - -a, --allow_missing - -t, --type - -b, --bounds - -r, --remove_filt - -d, --distance -~; -$usage .= "\n"; - -my ($input,$out); - -my $PLINK_EXE = "plink"; - - -#my $indel_size_max = 500; -#my $indel_size_min = 1; -my $frequency_max = 0.5; -my $frequency_min = 0.001; -my $pos_max = 100000000000; -my $pos_min = 0; -my $filter_snp_type = "all"; -my $remove_filt = "False"; - -my $missing_data = 1; -my $export = "VCF"; -my $type = "ALL"; -my $bounds; -my $samples; -my $chromosomes; -my $thin; - -GetOptions( - "input=s" => \$input, - "out=s" => \$out, - "samples=s" => \$samples, - "chromosomes=s" => \$chromosomes, - "frequency=s" => \$frequency_min, - "max_freq=s" => \$frequency_max, - "allow_missing=s"=> \$missing_data, - "export=s" => \$export, - "type=s" => \$type, - "bounds=s" => \$bounds, - "remove_filt=s" => \$remove_filt, - "distance=s" => \$thin -); - - -die $usage - if ( !$input || !$out); - -if ($samples && $samples =~/^([\w\,\-\.]+)\s*$/){ - $samples = $1; -} -elsif ($samples){ - die "Error: Samples must be a comma separated list of string\n"; -} -if ($bounds && $bounds =~/^([\d\,]+)\s*$/){ - $bounds = $1; -} -elsif($bounds){ - die "Error: Bounds must be a comma separated list of integers\n"; -} - -my $minfreq_cmd = ""; -if ($frequency_min && $frequency_min > 0 && $frequency_min =~/^([\d\.]+)\s*$/){ - $frequency_min = $1; - $minfreq_cmd = "--maf $frequency_min"; -} -elsif ($frequency_min == 0){ - $minfreq_cmd = ""; -} -elsif ($frequency_min){ - die "Error: frequency must be an integer\n"; -} -if ($thin && $thin =~/^([\d\.]+)\s*$/){ - $thin = $1; -} -elsif ($thin){ - die "Error: frequency must be an integer\n"; -} -my $maxfreq_cmd = ""; -if ($frequency_max && $frequency_max =~/^([\d\.]+)\s*$/){ - $frequency_max = $1; - if ($frequency_max < 0.5){ - $maxfreq_cmd = "--max-maf $frequency_max"; - } -} -elsif($frequency_max){ - die "Error: frequency must be an integer\n"; -} -if ($missing_data =~/^([\d\.]+)\s*$/){ - $missing_data = $1; - #$missing_data = 1 - $missing_data; -} -elsif ($missing_data){ - die "Error: Missing data must be an integer\n"; -} -if ($export && $export =~/^([\w]+)\s*$/){ - $export = $1; -} -elsif($export){ - die "Error: Export must be a string\n"; -} -if ($type && $type =~/^([\w]+)\s*$/){ - $type = $1; -} -elsif($type){ - die "Error: Type must be a string\n"; -} - - -my @dnasamples; -if ($samples) -{ - @dnasamples = split(",",$samples); -} -my @boundaries; -if ($bounds) -{ - @boundaries = split(",",$bounds); -} - - -my $experiment = "chromosomes"; -my $table = ""; -my %genes; -my @snp_ids; -my @snp_ids_and_positions; -my @snp_ids_and_positions_all; -my $gene; -my $snp_num = 0; -my %ref_sequences; -my %snps_of_gene; - -my $indiv_cmd = ""; -if (@dnasamples) -{ - if (scalar @dnasamples > 1) - { - open(my $S,">$out.samples"); - foreach my $samp(@dnasamples){ - print $S "$samp $samp\n"; - } - close($S); - $indiv_cmd = "--keep $out.samples "; - } - else - { - $indiv_cmd = "--indv " . join(" --indv ",@dnasamples); - } -} - -my $chrom_cmd = ""; -if ($chromosomes) -{ - $chrom_cmd = "--chr ".$chromosomes -} - -my $export_cmd = "--recode vcf-iid"; -if ($export eq "bcf"){ - $export_cmd = "--recode bcf"; -} -if ($export eq "freq"){ - $export_cmd = "--freq"; -} -if ($export eq "plink"){ - $export_cmd = "--make-bed"; -} -if ($export eq "bed"){ - $export_cmd = "--make-bed"; -} - - -my $bounds_cmd = ""; -if (@boundaries && $chrom_cmd=~/\w/ && $chrom_cmd !~/,/) -{ - $bounds_cmd = "--from-bp $boundaries[0] --to-bp $boundaries[1]"; -} - - - -my $type_cmd = ""; -if ($type eq "SNP") -{ - $type_cmd = "--snps-only"; -} - -my $filt_cmd = ""; -if ($remove_filt eq "true") -{ - $filt_cmd = "--remove-filtered-all"; -} - -my $thin_cmd = ""; -if ($thin){ - $thin_cmd = "--bp-space $thin"; -} - -#my $bcf_input = $input; -#$bcf_input =~s/vcf/bcf/g; -my $bcf_input; -my $bed_input = $input; -$bed_input =~s/\.bed//g; - -if (-e "$bed_input.bed"){ - system("$PLINK_EXE --bfile $bed_input --out $out $type_cmd $export_cmd $chrom_cmd $indiv_cmd $minfreq_cmd $maxfreq_cmd --geno $missing_data $thin_cmd $bounds_cmd --allow-extra-chr 1>$out.plink.stdout 2>$out.plink.stderr"); - # for first 1000 SNPs - system("$PLINK_EXE --bfile $bed_input --out $out.recode $type_cmd --recode vcf-fid $chrom_cmd $indiv_cmd $minfreq_cmd $maxfreq_cmd --geno $missing_data $thin_cmd $bounds_cmd --allow-extra-chr --thin-count 800 1>$out.2.plink.stdout 2>$out.2.plink.stderr"); -} -elsif (-e $bcf_input){ - system("$PLINK_EXE --bcf $bcf_input --out $out $type_cmd $export_cmd $chrom_cmd $indiv_cmd $minfreq_cmd $maxfreq_cmd --geno $missing_data $thin_cmd $bounds_cmd --allow-extra-chr 1>$out.plink.stdout 2>$out.plink.stderr"); -} -else -{ - system("$PLINK_EXE --vcf $input --out $out $type_cmd $export_cmd $chrom_cmd $indiv_cmd $minfreq_cmd $maxfreq_cmd --geno $missing_data $thin_cmd $bounds_cmd --allow-extra-chr 1>$out.3.plink.stdout 2>$out.3.plink.stderr"); - -} - - - - - diff -r 013ff9bb23aa -r 2bc9b3ee5eea find_indiv.py --- a/find_indiv.py Fri Aug 05 10:26:44 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -import sys -import os -import re - -def get_field_samples_options(dataset): - options = [] - line=os.popen("grep '#CHROM' %s"%dataset.file_name).read()[:-1].split('\t') - index=line.index('FORMAT') - for opt in line[index+1:] : - options.append((opt,opt, True)) - return options - -def get_field_chrs_options(dataset): - options = [] - chrs=os.popen("grep '##contig' %s"%dataset.file_name).read()[:-1].split('\n') - for line in chrs: - opt=re.search('^##contig=&2 -fi - - -export="VCF" - -perl $tool_path/Plink.pl --input input$$.vcf --out $fileout_label --export $export --frequency $frequency --max_freq $max_freq --allow_missing $allow_missing --type $type --bounds $bound_start','$bound_end $samples $chromosomes - - -#echo ${16} >>$fileout_label.log -#echo ${15} >>$fileout_label.log -#echo ${17} >>$fileout_label.log -#echo ${18} >>$fileout_label.log - -if [ "$export" = "VCF" ] -then cp $fileout_label.vcf $fileout ; rm $fileout_label.vcf -else cp $fileout_label.bed $fileout; cp $fileout_label.bed ${15} ; cp $fileout_label.bim ${18} ;rm $fileout_label.bed $fileout_label.fam $fileout_label.bim -fi - -cp $fileout_label.log $filelog -rm $fileout_label.log - diff -r 013ff9bb23aa -r 2bc9b3ee5eea plink.xml --- a/plink.xml Fri Aug 05 10:26:44 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,83 +0,0 @@ - - - - - Filter large VCF file - - - - perl - plink - - - - - - - - - - - - ./plink.sh $vcf $fileout $filelog $frequency $max_freq $allow_missing $type_p $bound_start $bound_end - #if str( $samples ) == "": - 'None' - #else - $samples - #end if - #if str( $chromosomes ) == "": - 'None' - #else - $chromosomes - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -.. class:: infomark - -**Authors** Shaun Purcell : plink_ - -.. _plink: https://www.cog-genomics.org/plink2 - - | **Please cite** "PLINK: a toolset for whole-genome association and population-based linkage analysis.", Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MAR, Bender D, Maller J, Sklar P, de Bakker PIW, Daly MJ, Sham PC, **American Journal of Human Genetics**, 2007 - -.. class:: infomark - -**Galaxy integration** Dereeper Alexis (IRD), Andres Gwendoline (Institut Français de Bioinformatique). - -.. class:: infomark - -**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr - - - - - - diff -r 013ff9bb23aa -r 2bc9b3ee5eea tool-data/tool_dependencies.xml --- a/tool-data/tool_dependencies.xml Fri Aug 05 10:26:44 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - - - - -