Next changeset 1:0e985680e67d (2020-03-04) |
Commit message:
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/droplet-rank-plot/.shed.yml commit a785b79f2b5689aba87c0f7072897bb23f6bda76 |
added:
dropletBarcodePlot.R dropletBarcodePlot.xml test-data/barcode_plot.png test-data/raw_cb_frequency.txt |
b |
diff -r 000000000000 -r 04f32429dcf2 dropletBarcodePlot.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dropletBarcodePlot.R Fri Nov 08 09:08:14 2019 -0500 |
[ |
@@ -0,0 +1,170 @@ +#!/usr/bin/env Rscript + +# This script parses the GTF file to create a feature-wise annotation file with +# mitochondrial features flagged, to assist in annotation and QC of single-cell +# expression data analysis. + +suppressPackageStartupMessages(require(optparse)) +suppressPackageStartupMessages(require(ggplot2)) +suppressPackageStartupMessages(require(gridExtra)) +suppressPackageStartupMessages(require(DropletUtils)) +suppressPackageStartupMessages(require(Matrix)) + +die <- function(message){ + write(message, stderr()) + q(status = 1) +} + +option_list = list( + make_option( + c("-b", "--barcode-frequencies"), + action = "store", + default = NA, + type = 'character', + help = "Path to a two-column tab-delimited file, with barcodes in the first column and frequencies in the second (ignored if --mtx-matrix supplied)" + ), + make_option( + c("-m", "--mtx-matrix"), + action = "store", + default = NA, + type = 'character', + help = 'Matrix-market format matrix file, with cells by column (overrides --barcode-frequencies if supplied)' + ), + make_option( + c("-r", "--cells-by-row"), + action = "store_true", + default = FALSE, + type = 'logical', + help = 'For use with --mtx-matrix: force interpretation of matrix to assume cells are by row, rather than by column (default)' + ), + make_option( + c("-l", "--label"), + action = "store", + default = '', + type = 'character', + help = 'Label to use in plot' + ), + make_option( + c("-d", "--density-bins"), + action = "store", + default = 50, + type = 'numeric', + help = "Number of bins used to calculate density plot" + ), + make_option( + c("-y", "--roryk-multiplier"), + action = "store", + default = 1.5, + type = 'numeric', + help = "Above-baseline multiplier to calculate roryk threshold" + ), + make_option( + c("-o", "--output-plot"), + action = "store", + default = 'barcode_plot.png', + type = 'character', + help = "File path for output plot" + ), + make_option( + c("-t", "--output-thresholds"), + action = "store", + default = 'barcode_thresholds.txt', + type = 'character', + help = "File path for output file containing calculted thresholds" + ) +) + +opt <- parse_args(OptionParser(option_list = option_list), convert_hyphens_to_underscores = TRUE) + +# Process inputs dependent on what has been provided + +if (is.na(opt$mtx_matrix)){ + if (is.na(opt$barcode_frequencies)){ + die('ERROR: must supply --mtx-matrix or --barcode-frequencies') + }else if (! file.exists(opt$barcode_frequencies)){ + die(paste('ERROR: barcode frequencies file', opt$barcode_frequencies, 'does not exist')) + }else{ + barcode_counts <- read.delim(opt$barcode_frequencies, header = FALSE) + } +}else if (! file.exists(opt$mtx_matrix)){ + die(paste('ERROR: MTX matrix file', opt$mtx_matrix, 'does not exist')) +}else{ + result_matrix <- Matrix::readMM(opt$mtx_matrix) + if (opt$cells_by_row){ + barcode_counts <- data.frame(V1 = 1:nrow(result_matrix), V2=Matrix::rowSums(result_matrix)) + }else{ + barcode_counts <- data.frame(V1 = 1:ncol(result_matrix), V2=Matrix::colSums(result_matrix)) + } +} + +# Pick a cutoff on count as per https://github.com/COMBINE-lab/salmon/issues/362#issuecomment-490160480 + +pick_roryk_cutoff = function(bcs, above_baseline_multiplier = 1.5){ + bcs_hist = hist(log10(bcs), plot=FALSE, n=opt$density_bins) + mids = bcs_hist$mids + vals = bcs_hist$count + wdensity = vals * (10^mids) / sum(vals * (10^mids)) + baseline <- median(wdensity) + + # Find highest density in upper half of barcode distribution + + peak <- which(wdensity == max(wdensity[((length(wdensity)+1)/2):length(wdensity)])) + + # Cutoff is the point before the peak at which density falls below the multiplier of baseline + + 10^mids[max(which(wdensity[1:peak] < (above_baseline_multiplier*baseline)))] +} + +# Plot densities + +barcode_density_plot = function(bcs, roryk_cutoff, knee, inflection, name = ' ') { + bcs_hist = hist(log10(bcs), plot=FALSE, n=opt$density_bins) + counts = bcs_hist$count + mids = bcs_hist$mids + y = counts * (10^mids) / sum(counts * (10^mids)) + qplot(y, 10^mids) + geom_point() + theme_bw() + ggtitle(name) + ylab('Count') + xlab ('Density') + + geom_hline(aes(yintercept = roryk_cutoff, color = paste('roryk_cutoff =', length(which(bcs > roryk_cutoff)), 'cells'))) + + geom_hline(aes(yintercept = inflection, color = paste('dropletutils_inflection =', length(which(bcs > inflection)), 'cells'))) + + geom_hline(aes(yintercept = knee, color = paste('dropletutils_knee =', length(which(bcs > knee)), 'cells'))) + + scale_y_continuous(trans='log10') + theme(axis.title.y=element_blank()) + labs(color='Thresholds') +} + +# Plot a more standard barcode rank plot + +barcode_rank_plot <- function(br.out, roryk_total_cutoff, knee, inflection, name='no name'){ + ggplot(data.frame(br.out), aes(x=rank, y=total)) + geom_line() + scale_x_continuous(trans='log10') + scale_y_continuous(trans='log10') + theme_bw() + + geom_hline(aes(yintercept = knee, color = 'dropletutils_knee')) + + geom_hline(aes(yintercept = inflection, color = 'dropletutils_inflection')) + + geom_hline(aes(yintercept = roryk_total_cutoff, color = 'roryk_cutoff')) + + ggtitle(name) + ylab('Count') + xlab('Rank') + theme(legend.position = "none") +} + +# Sort barcodes by descending frequency + +barcode_counts <- barcode_counts[order(barcode_counts$V2, decreasing = TRUE), ] + +roryk_count_cutoff <- pick_roryk_cutoff(barcode_counts$V2, opt$roryk_multiplier) + +# Run dropletUtils' barcodeRanks to get knee etc +br.out <- barcodeRanks(t(barcode_counts[,2,drop=FALSE])) + +dropletutils_knee <- metadata(br.out)$knee +dropletutils_inflection <- metadata(br.out)$inflection + +plot_label <- paste(format(nrow(barcode_counts), big.mark = ','), 'cell barcodes') +if ((! is.na(opt$label)) && opt$label != ''){ + plot_label <- paste0(opt$label, ': ', plot_label) +} + +plots <- list( + dropletutils = barcode_rank_plot(br.out, roryk_count_cutoff, dropletutils_knee, dropletutils_inflection, name = plot_label), + roryk = barcode_density_plot(barcode_counts$V2, roryk_count_cutoff, dropletutils_knee, dropletutils_inflection, name = ' ') +) + +# Create output plot +png(width = 1000, height = 600, file=opt$output_plot) +grid.arrange(plots$dropletutils, plots$roryk, nrow=1) +dev.off() + +# Return calculated thresholds +write.table(data.frame(dropletutils_knee = dropletutils_knee, dropletutils_inflection = dropletutils_inflection, roryk=roryk_count_cutoff), file = opt$output_thresholds, row.names = FALSE, quote = FALSE) |
b |
diff -r 000000000000 -r 04f32429dcf2 dropletBarcodePlot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dropletBarcodePlot.xml Fri Nov 08 09:08:14 2019 -0500 |
[ |
@@ -0,0 +1,86 @@ +<tool id="_dropletBarcodePlot" name="Droplet barcode rank plot" version="1.6.1+galaxy0"> + <description>Creates a barcode rank plot for quality control of droplet single-cell RNA-seq data</description> + <requirements> + <requirement type="package" version="1.6.1">bioconductor-dropletutils</requirement> + <requirement type="package">openblas</requirement> + <requirement type="package">r-matrix</requirement> + <requirement type="package">r-ggplot2</requirement> + <requirement type="package">r-optparse</requirement> + <requirement type="package">r-gridextra</requirement> + <requirement type="package">bioconductor-delayedarray</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + $__tool_directory__/dropletBarcodePlot.R --output-plot "${plot_file}" --output-thresholds "${thresholds_file}" --label "${label}" --density-bins "${density_bins}" --roryk-multiplier "${roryk_multiplier}" +#if $input.type == 'mtx_matrix' +--mtx-matrix ${input.mtx_matrix} +#if $input.cellsbyrow +--cells-by-row +#end if +#end if +#if $input.type == 'barcode_freqs' +--barcode-frequencies ${input.barcode_frequencies} +#end if + ]]></command> + + <inputs> + <conditional name="input"> + <param name="type" type="boolean" truevalue='mtx_matrix' falsevalue='barcode_freqs' checked="true" label="Input MTX-format matrix?" help="Barcode frequencies will be calculated for you. The alternative is to provide barcode frequencies directly"/> + <when value="mtx_matrix"> + <param name="mtx_matrix" type="data" format="mtx" label="Matrix-market format matrix file, with cells by column (overrides --barcode-frequencies if supplied)" /> + <param name="cellsbyrow" type="boolean" checked="false" label="For use with --mtx-matrix: force interpretation of matrix to assume cells are by row, rather than by column (default)"/> + </when> + <when value="barcode_freqs"> + <param name="barcode_frequencies" type="data" format="txt" label="A two-column tab-delimited file, with barcodes in the first column and frequencies in the second" /> + </when> + </conditional> + <param name="label" type="text" optional='true' value="" label="Label to place in plot title"/> + <param name="density_bins" type="integer" value="50" label="Number of bins used in barcode count frequency distribution"/> + <param name="roryk_multiplier" type="float" value="1.5" label="Above-baseline multiplier to calculate roryk threshold"/> + </inputs> + + <outputs> + <data name="plot_file" format="png" label="${tool.name} on ${on_string}: barcode rank plot"/> + <data name="thresholds_file" format="txt" label="${tool.name} on ${on_string}: barcode thresholds"/> + </outputs> + + <tests> + <test> + <conditional name='input'> + <param name="type" value="barcode_freqs"/> + <param name="barcode_frequencies" ftype="txt" value="raw_cb_frequency.txt"/> + </conditional> + <output name="plot_file" file="barcode_plot.png"/> + </test> + </tests> + + <help><![CDATA[ +.. class:: infomark + +**What it does** + +Given a barcode freqeuncy table or an MTX-format matrix from which one can be calculated, produces a barcode rank plot to assess distinctness of droplets with cells over those without (a key mark of good-quality droplet single-cell RNA-seq data). + +Thresholds are calculated and plotted, either with DropletUtils or by custom method discussed at https://github.com/COMBINE-lab/salmon/issues/362#issuecomment-490160480. + +**Inputs** + + * two-column tab-delimted text file with barcode frequencies OR an MTX-format matrix file + +----- + +**Outputs** + + * PNG-format plot file. +]]></help> +<citations> + <citation type="bibtex"> +@misc{github-hinxton-single-cell, +author = {Jonathan Manning, EBI Gene Expression Team}, +year = {2019}, +title = {Hinxton Single Cell Anlysis Environment}, +publisher = {GitHub}, +journal = {GitHub repository}, +url = {https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary}, + }</citation> +</citations> +</tool> |
b |
diff -r 000000000000 -r 04f32429dcf2 test-data/barcode_plot.png |
b |
Binary file test-data/barcode_plot.png has changed |
b |
diff -r 000000000000 -r 04f32429dcf2 test-data/raw_cb_frequency.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/raw_cb_frequency.txt Fri Nov 08 09:08:14 2019 -0500 |
b |
b'@@ -0,0 +1,79931 @@\n+CCCAATCAGGCATGGT\t1158\n+TCTCTAATCAAGCCTA\t924\n+CAGCTAAGTGATGTGG\t916\n+GGATTACTCCCAGGTG\t906\n+TGCGTGGTCAACGCTA\t887\n+AGGGATGAGGTGCAAC\t715\n+CAGCAGCCAATCGAAA\t687\n+GATGCTACAAGCGATG\t684\n+GAGCAGATCAATCTCT\t670\n+AGAATAGTCCGAATGT\t660\n+CGGTTAACACCACGTG\t658\n+AAGCCGCTCAGGTAAA\t651\n+CGGACACAGTTAAGTG\t650\n+TTTATGCAGTAGCCGA\t648\n+GACGCGTGTTCTGAAC\t645\n+CGAGAAGCACCAACCG\t638\n+TAGAGCTAGCGCCTCA\t607\n+TCTTCGGCAGCCTATA\t601\n+CATCGGGCATGAAGTA\t598\n+CGTCTACTCTTCCTTC\t576\n+TGTATTCTCTCGGACG\t575\n+ACGGCCATCAATAAGG\t568\n+AGAATAGAGTGAATTG\t563\n+GTACGTACACCAGGCT\t559\n+CTCGAAAGTTTGACAC\t559\n+CGTCACTAGTGACTCT\t558\n+GTCCTCATCACAACGT\t558\n+TATGCCCCACCAGGCT\t557\n+CACTCCATCGAGCCCA\t555\n+GTAACTGGTAGCTTGT\t553\n+CGAGCCAGTTACGACT\t546\n+CCTCAGTCACACGCTG\t541\n+AGATTGCAGGAGTTTA\t527\n+ATCTGCCGTACTCAAC\t524\n+CATATGGGTGTTCGAT\t522\n+ACTTGTTCATGGTTGT\t518\n+TCTCTAACAAAGCAAT\t517\n+CTAGCCTGTCCGAGTC\t517\n+CGATGTAGTGGTAACG\t513\n+AAACCTGGTAGCGTGA\t510\n+CAGCAGCAGAGGGATA\t509\n+GTTACAGAGAGTACCG\t508\n+CCTCTGAGTACCATCA\t506\n+CGTCACTCACTGTCGG\t499\n+TCTTTCCGTCTGCAAT\t498\n+GGCAATTCATGCCCGA\t497\n+TCAGGTATCACTTATC\t493\n+ATGGGAGTCCTTTACA\t490\n+CATCGGGGTCATTAGC\t487\n+TACTCGCAGATGCCTT\t483\n+GTATCTTGTTGGTAAA\t480\n+ACATACGCATTACGAC\t479\n+GACTAACGTAAACACA\t476\n+CTAGAGTTCTACTATC\t474\n+GAACATCTCTTCAACT\t472\n+TAAACCGGTCCGAACC\t471\n+GCATACAGTTAAGACA\t470\n+AGGTCCGCAAGTCTGT\t470\n+GGTGAAGAGATAGCAT\t468\n+GGTGCGTAGCGTCAAG\t468\n+TATTACCGTAGGACAC\t461\n+TTGAACGAGGATTCGG\t461\n+TTCTCAAAGTTCGCGC\t460\n+CGGCTAGGTACCAGTT\t459\n+GTGTGTGTGTGTGTGT\t458\n+GCAAACTGTGACTCAT\t455\n+GACCTGGAGCGTAATA\t454\n+GTTCGGGGTAAGGGAA\t454\n+GTGCAGCCATCATCCC\t452\n+CTAAGACCACAGAGGT\t449\n+TGACTTTGTTGGTGGA\t447\n+AGGTCATTCAAGAAGT\t444\n+AGCTCTCGTACACCGC\t444\n+ATTTCTGGTCAATACC\t443\n+TCTCATACAGGACCCT\t443\n+CCCTCCTGTACTTAGC\t441\n+AGCTTGACAAAGTGCG\t441\n+CTGTTTAAGACAAGCC\t439\n+TGGCTGGTCTCAACTT\t439\n+GTGTTAGGTGTGCCTG\t436\n+GTGGGTCGTTGCGTTA\t435\n+CTAACTTTCTCTTGAT\t435\n+GCACTCTGTACCGAGA\t434\n+TGGCGCAGTTCCATGA\t434\n+AGGTCATAGGTTCCTA\t433\n+CTACATTCATCGGAAG\t431\n+CTCTACGAGGCCCTTG\t430\n+ACTATCTAGCCACTAT\t430\n+CACACACACACACACA\t429\n+TCAGGATCAGGACCCT\t427\n+AAAGTAGAGACTAAGT\t425\n+TGAGAGGAGTACTTGC\t424\n+TGCGTGGTCTATCCTA\t424\n+CATTCGCTCTCTTGAT\t424\n+AAGCCGCGTATTACCG\t424\n+CGCTTCACAGCTGTAT\t423\n+CTTCTCTAGTCGTACT\t422\n+GGTGAAGAGACTGGGT\t421\n+CTAATGGAGCGAGAAA\t421\n+GATGAGGGTATTACCG\t419\n+TTCCCAGTCATCGATG\t418\n+CTTTGCGGTAGCTGCC\t418\n+CGCGGTACAGGCAGTA\t417\n+CAGCATAGTGAAAGAG\t417\n+ATCTGCCCATGCATGT\t416\n+CACACAAGTCATCGGC\t416\n+CCTATTATCATCTGCC\t413\n+GCTTCCATCTTTAGGG\t411\n+GCAAACTTCTATCCTA\t411\n+GCATGATTCACATACG\t410\n+TCAACGAAGGTAAACT\t410\n+TACTCGCAGCCAACAG\t410\n+GTGAAGGCAGTTCATG\t410\n+ACCCACTCAGGTGCCT\t409\n+TCTCATAGTATCACCA\t408\n+AGCTCTCTCTTCTGGC\t408\n+AGCTCTCTCATTCACT\t408\n+TTGGCAAGTGGACGAT\t407\n+CAGCCGAGTCTGGAGA\t407\n+CAGTCCTCAATGAAAC\t406\n+AGCAGCCTCCTCATTA\t405\n+GGGAATGAGGTGTGGT\t403\n+CTAGCCTTCATCGCTC\t402\n+CTGTTTAAGTGCCATT\t401\n+TGGTTAGTCGTACGGC\t400\n+TACCTTAAGCGTGAGT\t399\n+CAGCATACACAACGTT\t399\n+TTGGCAAGTGATGTCT\t398\n+CCTAGCTCATCCGCGA\t398\n+AAATGCCTCAAGGTAA\t397\n+ACATACGGTCGCATCG\t397\n+ACGCCGACAAACTGTC\t396\n+CCAGCGAGTAGCTCCG\t393\n+GAACGGACATGCATGT\t392\n+AGATCTGTCGACAGCC\t392\n+AAACCTGCACGCTTTC\t390\n+CCGGTAGCATGGTAGG\t390\n+CAGCCGATCAGCGACC\t389\n+GATGCTACACTTAACG\t389\n+CCTTTCTTCCCTGACT\t389\n+GCGAGAAGTGCCTGCA\t388\n+CTGAAACAGAAACCTA\t388\n+CGCTGGATCCTAGAAC\t387\n+GATGAAAAGAGACTAT\t386\n+ACTATCTCACCCATGG\t384\n+AGGGAGTGTCATTAGC\t384\n+GCAATCAAGCCAGTAG\t383\n+CGCTTCATCGACCAGC\t382\n+ACCGTAATCCTAGTGA\t381\n+GGCGACTAGCTGGAAC\t380\n+CACAAACGTTACAGAA\t380\n+GAATGAAGTACCGCTG\t379\n+TAAGCGTAGAGTTGGC\t378\n+TTTATGCCAGGGTATG\t378\n+TCACAAGTCCGTTGTC\t378\n+GGAAAGCGTGATGCCC\t378\n+CAGATCACATGGGACA\t378\n+GGACATTGTTCAGTAC\t377\n+CCTTTCTAGCTTATCG\t376\n+TTCTCCTTCCGAATGT\t375\n+GGATTACGTCGAATCT\t375\n+GTGCGGTCAGCTTCGG\t375\n+GTGTTAGTCACCCTCA\t374\n+CAACTAGTCAGCACAT\t373\n+CCTACCACATAACCTG\t373\n+CGTTAGAAGCTGTTCA\t372\n+AAATGCCCAATCTGCA\t372\n+AAAGTAGTCCTATTCA\t371\n+GTCATTTTCAACACTG\t371\n+GCGAGAATCTTATCTG\t371\n+GACCTGGAGTGTACTC\t371\n+CTCCTAGCACACGCTG\t371\n+GAATGAAAGGCACATG\t371\n+AATCGGTGTCAATGTC\t371\n+TGTATTCAGCAGACTG\t371\n+GAAACTCTCTCATTCA\t370\n+TCTTTCCGTCGGCATC\t370\n+CATCAGAAGATCACGG\t370\n+CTACACCTCGTGGGAA\t369\n+TTATGCTGTGCTCTTC\t369\n+AGCGTATCAGGTTTCA\t'..b'CCATTCGGTTATCCGA\t1\n+CAGTCCTTCACAAACC\t1\n+GCCTGTGTTCTGGTAC\t1\n+CATCAGCCACTTCGAA\t1\n+CTACATTGTGCCTGTG\t1\n+GGACATTCAGTACATG\t1\n+CGGCCAGCAAAGGTGC\t1\n+CTAATGGGTTACGTCA\t1\n+CCTTAAAGTCTGCTGC\t1\n+GTATCTTGTTAAAGAC\t1\n+TTGCAAGTGGACGATT\t1\n+ACACCCTAGAAACCAT\t1\n+CCACCTAGTTAGTGGG\t1\n+ATCTGCCCACATTTCT\t1\n+CGACTTCCAGCTATTG\t1\n+GTAGGCCAGTGTTTGC\t1\n+CACAGGCAATCACACT\t1\n+CTCGAAAAGAGGACGG\t1\n+GGTGTTACAGTATTCC\t1\n+GGGTTGCTCCTTTCGG\t1\n+ACAATCACAGATCGGA\t1\n+TATCAGGTCACTATCT\t1\n+GCACATAGTAAACACA\t1\n+ATCTGCCTCAGTTAGC\t1\n+GCAATCAGTGGCAATT\t1\n+GCTGCGAGCTGGTCGA\t1\n+ATTGAATCGCGAGTGG\t1\n+GACGCGTCCGCTCGCA\t1\n+CTAGTGAAGACTTTCG\t1\n+TCGGTAACATAGTAAG\t1\n+GTCGGGGTAAGGGAAC\t1\n+GTATTCTAGACCGGAT\t1\n+ACGAGCCAGACGACGT\t1\n+CAGCTGGAGATGTGGC\t1\n+AGGGAGTAGTTACCCA\t1\n+ACCCACTGTAGCAAAT\t1\n+CAGTAGCCAATCGAAA\t1\n+TGCGGGTGTATCACCA\t1\n+TATCTCACCGCCAGAA\t1\n+AAAGACCTCAAACTGC\t1\n+AACTCCCCTGACCCCC\t1\n+GGGTCTGCAAATCCGT\t1\n+GTACTCCTCAATAAGG\t1\n+CTGTGTAAGTGCCATT\t1\n+TTTCCTCTCGAGAACG\t1\n+ACTTGTTCACAAAGCA\t1\n+GCTTCCGGTCTAAACC\t1\n+ATTGGTGCAGCTGCAC\t1\n+TGTGTTTGTTCAGCGC\t1\n+AGTGTCATCGTCTGAA\t1\n+TCAATCTAGGCGCTCT\t1\n+TCACAAGGTTATCCGA\t1\n+TATTACCCACATCCGA\t1\n+CTACACCGCTTTAAAT\t1\n+ATGTCATCATTTGGGC\t1\n+TTCCCAGGTGCCTGTG\t1\n+GATGTTACAAAGGTGC\t1\n+GTGAAGGCACTTGGAT\t1\n+AGTAGTCGTGATGATA\t1\n+CATGGCGTCAGCTTAG\t1\n+CGTCTACTCTTGTACT\t1\n+AGGTGAAGTGAATTGA\t1\n+CAGCTGTAACAGGCCA\t1\n+CTTCGCTCTCTTGATG\t1\n+GATCGCGGTACCCAAT\t1\n+GGTGTTAGTAGGGACT\t1\n+TCTGGAAAGCAATCTC\t1\n+CGTGTAAAGTCGAGTG\t1\n+TAGCGGAGCTTTGGTC\t1\n+CCTTCGAGGAAATGAC\t1\n+GATCAGTGTAGCGTAG\t1\n+CTAGAGTGTAGTACCT\t1\n+CTCTGGTGTGGTCCGT\t1\n+CCGTACTCAGGCTCAC\t1\n+GCACCTTCGCATGATG\t1\n+TTAGGCACATGGTCTA\t1\n+TACTTGTTCGAAGTGT\t1\n+TTGTAGGTCAACACAC\t1\n+CGCCAAGCAGAGCCAA\t1\n+TTGGGAGCGTGCTGGT\t1\n+TTCGGTCGTGTGTGTG\t1\n+CTTGGCTCAATGCCAT\t1\n+TTCCCAATCATCGATG\t1\n+ACTCGGTTCAGTCAGT\t1\n+TAAGAGATCTTTTATC\t1\n+CCCTCCTCAACGCACC\t1\n+CTCATTAGAATGCTTA\t1\n+GAAATGATCCTCGCAT\t1\n+GGATGTTCATACAGCT\t1\n+CAGCCGAAGGCAATTA\t1\n+CAAGATCTCTACTTAC\t1\n+TACCTATCAGGCTCAC\t1\n+TTCTTAGAGATGAGAG\t1\n+ACAGGCAGCTGATAAT\t1\n+CACAGGCTCCAACCAA\t1\n+ACATCAGAGAGTAAGG\t1\n+GCATCTTCTCAATCTG\t1\n+GATCGCGCATGTAAGA\t1\n+GCACTCTCATCCAACA\t1\n+CGCGTAGTCTGGAGAA\t1\n+GGGACCTCAAGCGCTC\t1\n+TTGGAACTCCTTGACC\t1\n+TGTGTTTTCATGGATG\t1\n+TGGACGCGTAGGGACT\t1\n+TTTCTACAAGGCCCGT\t1\n+CAGGATCATATACCGG\t1\n+TTATGCTGTCGACTGC\t1\n+GGGAGATGTAAGTGGC\t1\n+ACGCCGAGTCGATTTC\t1\n+GCTCTGTGCCCCTAAC\t1\n+CAAGGCCACTGTGTAA\t1\n+CGAATGTTCAAGGTAA\t1\n+TCGTAGAAGGACAGCT\t1\n+AGCTTGAAGGCATGTG\t1\n+CGTTAGACACACAGAG\t1\n+CGGTCTCAGGAACGTA\t1\n+AATGGTGTTAAAGTGC\t1\n+CCTTCCCTCAGCATTC\t1\n+ACCCACTGTCCGTCAG\t1\n+TGCTGGTTCCCTCTTT\t1\n+CACAAACAGCCTTGAA\t1\n+GTATTCTAGGCTAGAC\t1\n+GGGTGAGTGGGTATGA\t1\n+TCCATCTCAAGGTGTG\t1\n+CTCGAAAGTCGCTTCT\t1\n+CGAATGTCACAGACAG\t1\n+GTGCACACACACACAC\t1\n+CATGATGTGCCTGGTT\t1\n+GGCTGGTGTGGTGTAG\t1\n+AGTAGTCTCGTGGACC\t1\n+AACTCTTAGCGCCTTG\t1\n+AGGTCCGTTGCGTTAT\t1\n+CCTTCGACAGTATCTG\t1\n+AGATGTTTCCGAAGAG\t1\n+CGCTGGAGTCTAGCGC\t1\n+CTCATTACAATGGATA\t1\n+GGTGAGTCCTAGTGAT\t1\n+ACTAACCAACACACTA\t1\n+GACCTGGTCGTCCGTT\t1\n+CGGAGTCCAAGAGGCT\t1\n+GACTAACGTATAGGGC\t1\n+CATCCACTCCGTTGCT\t1\n+CCCAGTTAGGTGTGGT\t1\n+CGACCTTGTTGGGACA\t1\n+GCCAAATTCGGTTAAC\t1\n+ATCTCTTCTCCGGTTC\t1\n+TTTTCAAGGTGGGTTT\t1\n+TCAGCTCGTAAGGGAA\t1\n+GCGGGGTCATATACGC\t1\n+AGCTCTCAGACAGGCT\t1\n+GTCATTTCGTTTAGGC\t1\n+ACTGCTCGTTGAGTTC\t1\n+CAGCAGCGTAGGAGTC\t1\n+GTCAGGCAAGTTCTGA\t1\n+TCATTTGTCTAACGGT\t1\n+GTACTCCAGCTGTCTA\t1\n+GACAGAGCAAAGAATG\t1\n+GTGTGCGCATTTCAGG\t1\n+CCTACCACACTCGACG\t1\n+CGCGATCACTGCCAGG\t1\n+ATAGACCGTCTCCATC\t1\n+GCGAGAATCTGTCAAG\t1\n+ACGCCGAAGATCGGGT\t1\n+ATTTCTGTCATGTAGC\t1\n+TAAGAGAGTGTTCTTG\t1\n+CAGAATCCACAGATTC\t1\n+GCTCTGTTCCGTAGGC\t1\n+CTTGGCTTCATGCATG\t1\n+CTTGGCTTCGAATGCT\t1\n+CATCAGACAAAGCGGT\t1\n+GGCGACTGTTGAGTTC\t1\n+ACTGATGGTAGATTAG\t1\n+GGACAAGCAAGGGTCA\t1\n+CGGGTCAGTGAGGGTT\t1\n+CTCCTAGTCTACGAGT\t1\n+GGGTTGCCAGGCTGAA\t1\n+TGGCTGGTCTGTCCGT\t1\n+CTATTAGTCTTGATGA\t1\n+CGCTCTCTCTTCTGGC\t1\n+CCTACACCAAGGTACA\t1\n+TGCAAAAGGCTAGGTA\t1\n+GCTCCTACATCGTGCC\t1\n+ATCTCACAATCTGCAA\t1\n+TAATGCGTAGAGCTGG\t1\n+ATTATCCCACATTAGC\t1\n+CGAGAAGCACCTGGTG\t1\n+TTCCCTACACATCCAA\t1\n+TGACATAGACAGAGAG\t1\n+GCCCTAGTACTTGACT\t1\n+CGACTTCCAAGACACG\t1\n+ACTGTCCTCAAGGTAA\t1\n+CTAGAGTGTAAGTGTA\t1\n+GTGGGTCGTCGCGGTT\t1\n+CGCTATCTCGGTCTAA\t1\n+CTCGAAATCTTACCGC\t1\n+TAAGCGTAGAGCTGCA\t1\n+TTCGAAGAGAGTACCG\t1\n+GATCGATAGCACGCCC\t1\n+CAGTACTCATTATCCC\t1\n+ACACCTTCTGTGCAAT\t1\n' |