Mercurial > repos > davidvanzessen > shm_csr
changeset 14:59765d2c8890 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 11 Nov 2016 07:31:48 -0500 |
parents | 933fb21568ce |
children | 61d0a6318711 |
files | merge_and_filter.r shm_csr.xml wrapper.sh |
diffstat | 3 files changed, 21 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_and_filter.r Fri Nov 11 03:49:30 2016 -0500 +++ b/merge_and_filter.r Fri Nov 11 07:31:48 2016 -0500 @@ -6,22 +6,24 @@ mutationanalysisfile = args[3] mutationstatsfile = args[4] hotspotsfile = args[5] -gene_identification_file= args[6] -output = args[7] -before.unique.file = args[8] -unmatchedfile = args[9] -method=args[10] -functionality=args[11] -unique.type=args[12] -filter.unique=args[13] -class.filter=args[14] -empty.region.filter=args[15] +aafile = args[6] +gene_identification_file= args[7] +output = args[8] +before.unique.file = args[9] +unmatchedfile = args[10] +method=args[11] +functionality=args[12] +unique.type=args[13] +filter.unique=args[14] +class.filter=args[15] +empty.region.filter=args[16] summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +AAs = read.table(aafile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") if(method == "blastn"){ @@ -81,6 +83,10 @@ names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq") result = merge(result, sequences, by="Sequence.ID", all.x=T) +AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")] +names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA") +result = merge(result, AAs, by="Sequence.ID", all.x=T) + print(paste("Number of sequences in result after merging with sequences:", nrow(result))) result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele)
--- a/shm_csr.xml Fri Nov 11 03:49:30 2016 -0500 +++ b/shm_csr.xml Fri Nov 11 07:31:48 2016 -0500 @@ -22,10 +22,10 @@ <option value="no">No</option> </param> <param name="unique" type="select" label="Remove duplicates based on" help="" > - <option value="VGene,AA.JUNCTION,best_match_class">Top.V.Gene, CDR3 (AA), C region</option> - <option value="VGene,AA.JUNCTION">Top.V.Gene, CDR3 (AA)</option> - <option value="AA.JUNCTION,best_match_class">CDR3 (AA), C region</option> - <option value="AA.JUNCTION">CDR3 (AA)</option> + <option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option> + <option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option> + <option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option> + <option value="CDR3.IMGT.AA">CDR3 (AA)</option> <option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3.nt.Seq, C region</option> <option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option>
--- a/wrapper.sh Fri Nov 11 03:49:30 2016 -0500 +++ b/wrapper.sh Fri Nov 11 07:31:48 2016 -0500 @@ -62,7 +62,7 @@ echo "---------------- merge_and_filter.r ----------------" echo "---------------- merge_and_filter.r ----------------<br />" >> $log -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $PWD/aa.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 if [[ "$fast" == "no" ]] ; then