【生信】連携してvsearch+usearch 11+QIIMEを使って双端測順データを処理する


Vsearch+usearch 11+QIIMEを使って二端子測順データを処理します.
上記で説明したように、Vsearch、Usearch、QIIMEの両方を使用して、双端測順データを処理することができます.ここでは、各ツールの利点を生かして、vsearch、usearch 11、QIIME 1.9を使用した16 S rRNAデータ処理ツールを開発しました.双端測順データを入力して、最終的にOTU豊度表と関連するシステムツリーを得ます.

#!/bin/bash

###    vsearch+usearch11+QIIME         
###    OTU      

##   
# temp/:       
# res/:       
# data/:       
# database/:     
# err/:     
# log/:     

##   
HOME=$HOME/Lab_2		###     
temp=${HOME}/temp2	   	###         
res=${HOME}/res2		###         
data=${HOME}/seq		###         
refdb=${HOME}/database		###        
err=${HOME}/err2		###     
log=${HOME}/log2		###     


########################################################################################### 
#                                                                                     #
#          fasta                                                        #
#   usearch11、vsearch qiime                                                           #
#   Greengene     ,320MB                                                          #
# wget -c ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_8_otus.tar.gz     #
#         3.4G                                                                    #
# tar xvzf gg_13_8_otus.tar.gz                                                            #
###########################################################################################

echo "    "
cd ${HOME}

echo "        "
#echo "Step_1.1:decompress paired reads sequences"
#gunzip ${data}/*/*.fastq.gz		###             

prefix='ERR7765'			###       
### merged.fastq     FASTQ  reads
### merged.fasta     FASTA  reads
### 55          
### 69          

echo "Step_1.2:Merge paired reads"
for i in `seq 55 69`;do
./usearch11 -fastq_mergepairs ${data}/${prefix}${i}/${prefix}${i}_1.fastq -reverse ${data}/${prefix}${i}/${prefix}${i}_2.fastq -fastqout ${temp}/${i}.merged.fastq -fastaout ${temp}/${i}.merged.fasta
done

echo "Step_2:Quality filtering"
for i in `seq 55 69`;do
./usearch11 -fastq_filter ${temp}/${i}.merged.fastq -fastq_maxee 1.0 -fastaout ${temp}/${i}.filtered.fasta -threads 2
done
cat ${temp}/*.filtered.fasta > ${res}/merged.fasta
#cat ${temp}/*.filtered.fastq > ${res}/merged.fastq
echo ""
ls -lh ${res}/merged.fasta
echo ""

echo "Step_3:Dereplication"
##   1
./usearch11 -fastx_uniques ${res}/merged.fasta -fastaout ${res}/unique.fasta --minuniquesize 2 -threads 2 -sizeout
##   2
# vsearch --derep_fulllength ${res}/filtered_v.fasta --output ${res}/unique_v.fa --minuniquesize 2 --threads 10 --sizeout

echo "Step_4:Discard singletons"
./usearch11 -sortbysize ${res}/unique.fasta -fastaout ${res}/discard.fasta -minsize 2

###   OTU ###
echo "Step_5:UPARSE-OTU"
##     3    1     
./usearch11 -cluster_otus ${res}/unique.fasta -otus ${res}/otus_u51.fasta -uparseout ${res}/uparse.1.txt -relabel Otu -threads 10
##     3      
./usearch11 -cluster_otus ${res}/discard.fasta -otus ${res}/otus_u52.fasta -uparseout ${res}/uparse.2.txt -relabel Otu -threads 10
## Denoise      OTU
./usearch11 -unoise3 ${res}/unique.fasta -zotus ${res}/zotus_51.fasta -threads 30

##   QIIME   
echo "Picking OTUs"
pick_otus.py -i ${res}/unique.fasta -o ${res} -m uclust -r ${refdb}/gg_13_8_otus/rep_set/97_otus.fasta --threads 30
echo "Picking a representative sequence set, one sequence from each OTU"
pick_rep_set.py -i ${res}/unique_otus.txt -f ${res}/unique.fasta -o ${res}/rep_set1.fasta



###   fasta         ###
###   OTU    (            OTU            OTU  ) ###
echo "Number of OTU:"
grep '>' -c ${res}/otus_u51.fasta
grep '>' -c ${res}/otus_u52.fasta
grep '>' -c ${res}/zotus_51.fasta

echo "Step_6:Construct OTU table"
### Input should be reads before quality filtering and before discarding low-abundance unique sequences
### Reads must have sample indenfiers in the labels.
### The search database is the OTU (or ZOTU) sequences, which must have valid OTU identifiers in the labels.
### suggest making OTU tables for both 97% OTUs and denoised sequences (ZOTUs).
./usearch11 -otutab ${res}/merged.fasta -otus ${res}/otus_u51.fasta -otutabout ${res}/otutab_u61.txt -mapout ${res}/map_61.txt -threads 15
./usearch11 -otutab ${res}/merged.fasta -zotus ${res}/zotus_51.fasta -otutabout ${res}/zotutab_61.txt -mapout ${res}/zmap_61.txt -threads 15

#          
echo "Step_7:Chimera detection"
otusfile='otus_u52.fasta'	###         OTU table
reffile='gold.fa'		###          
printf "OTU  :%s ,      :%s 
"${otusfile} ${reffile} ### ### ./usearch11 -uchime2_ref ${res}/${otusfile} -db ${refdb}/${reffile} -chimeras ${res}/otus_chimeras.fasta -notmatched ${res}/otus_rdp.fasta -uchimeout ${res}/otus_rdp.uchime -strand plus -mode sensitive -threads 30 ### ID ### echo "Get sequenceID of chimeric:" grep '>' ${res}/otus_chimeras.fasta|sed 's/>//g' > ${res}/otus_chimeras.id ### ### filter_fasta.py -f ${res}/${otusfile} -o ${res}/otus_non_chimera.fasta -s ${res}/otus_chimeras.id -n ### echo "Check whether it is the expected number of sequences:" grep '>' -c ${res}/otus_non_chimera.fasta # echo "Step_8:Removal of non-bacterial sequences" ### Greengene ,320MB ### wget -c ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_8_otus.tar.gz ### 3.4G ### tar xvzf gg_13_8_otus.tar.gz ### OTU 97% , 8min time align_seqs.py -i ${res}/otus_non_chimera.fasta -t ${refdb}/gg_13_8_otus/rep_set_aligned/97_otus.fasta -o ${res}/aligned/ # echo "Number of incomparable bacteria:" grep -c '>' ${res}/aligned/otus_non_chimera_failures.fasta # OTU ID echo "Obtaining OTU IDs that are not like bacteria:" grep '>' ${res}/aligned/otus_non_chimera_failures.fasta|cut -f 1 -d ' '|sed 's/>//g' > ${res}/aligned/otus_non_chimera_failures.id # filter_fasta.py -f ${res}/otus_non_chimera.fasta -o ${res}/otus_rdp_align.fasta -s ${res}/aligned/otus_non_chimera_failures.id -n # OTU echo "How much is left of OTUs?:" grep '>' -c ${res}/otus_rdp_align.fasta # OTU echo "Step_9:Generate representative sequences and OTU tables" # OTU, , Reference( ) echo "Step_9.1:Rename OTU" awk 'BEGIN {n=1}; />/ {print ">OTU_" n; n++} !/>/ {print}' ${res}/otus_rdp_align.fasta > ${res}/rep_seqs.fasta # OTU echo "Step_9.2:Generate OTU tables" ./usearch11 -usearch_global ${res}/merged.fasta -db ${res}/rep_seqs.fasta -otutabout ${res}/otu_table.txt -biomout ${res}/otu_table.biom -strand plus -id 0.97 -threads 30 #less ${res}/otu_table.txt # OTU , ### ### rdp_classifier.jar PATH , ### rdp_classifier, , ### https://jaist.dl.sourceforge.net/project/rdp-classifier/rdp-classifier/rdp_classifier_2.2.zip echo "Step_10:Species annotation" #RDP_Classifier_Path=$HOME/miniconda3/share/rdp_classifier-2.2-1/rdp_classifier.jar #echo "export RDP_JAR_PATH=${RDP_Classifier_Path}" >> $HOME/.bashrc echo "export RDP_JAR_PATH=$HOME/database/rdp_classifier_2.2/rdp_classifier-2.2.jar" >> $HOME/.bashrc source $HOME/.bashrc assign_taxonomy.py -i ${res}/rep_seqs.fasta -r ${refdb}/gg_13_8_otus/rep_set/97_otus.fasta -t ${refdb}/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt -m rdp -o ${res} #echo "Align the representative sequence set" #align_seqs.py -i ${res}/rep_seqs.fasta -t ${refdb}/gg_13_8_otus/rep_set_aligned/core_set_aligned.fasta.imputed -o ${res}/pynast_aligned/ #echo "Filtering the alignment prior to tre building and removing positions which are all gaps, or not useful for phylogenetic inference" #filter_alignment.py -i ${res}/pynast_aligned/rep_seqs_aligned.fasta -o ${res}/filtered_alignment/ --remove_outliers #echo "Building a phylogenetic tree" #make_phylogeny.py -i ${res}/filtered_alignment/rep_seqs_aligned_pfiltered.fasta -o ${res}/rep_phylo.tre #echo "Building an OTU table" #make_otu_table.py -i ${res}/unique_otus.txt -t ${refdb}/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt -o ${res}/otu_table_non_chimeric.biom ### OTU 、 、 echo "Step_11:OTU Table Statistics, Format Conversion, Adding Information" ### OTU BIOM: biom convert -i ${res}/otu_table.txt -o ${res}/otu_table.biom --table-type="OTU table" --to-json ### OTU , taxonomy biom add-metadata -i ${res}/otu_table.biom --observation-metadata-fp ${res}/rep_seqs_tax_assignments.txt -o ${res}/otu_table_tax.biom --sc-separated taxonomy --observation-header OTUID,taxonomy ### biom txt , : biom convert -i ${res}/otu_table_tax.biom -o ${res}/otu_table_tax.txt --to-tsv --header-key taxonomy ### OTU : ,OUT biom summarize-table -i ${res}/otu_table_tax.biom -o ${res}/otu_table_tax.sum #less ${res}/otu_table_tax.sum # OTU echo "Step_12:Screen of OTU table" # : counts>3000 filter_samples_from_otu_table.py -i ${res}/otu_table_tax.biom -o ${res}/otu_table2.biom -n 3000 # : 15 ,1129 OTU echo "The result of screen step1:" biom summarize-table -i ${res}/otu_table2.biom # OTU : OTU filter_otus_from_otu_table.py --min_count_fraction 0.0001 -i ${res}/otu_table2.biom -o ${res}/otu_table3.biom # : 15 ,444 OTU echo "The result of screen step2:" biom summarize-table -i ${res}/otu_table3.biom # biom OTU OTU biom convert -i ${res}/otu_table3.biom -o ${res}/otu_table3.txt --table-type="OTU table" --to-tsv # OTU R sed -i '/# Const/d;s/#OTU //g;s/ID.//g' ${res}/otu_table3.txt # OTU OTU filter_fasta.py -f ${res}/rep_seqs.fasta -b ${res}/otu_table3.biom -o ${res}/rep_seqs4.fasta printf " OTU :%s\t OTU :%s
"${res}/otu_table_tax.txt ${res}/rep_seqs.fasta # End echo " : ,Alpha,Beta " echo "End"