【生信】連携してvsearch+usearch 11+QIIMEを使って双端測順データを処理する
10317 ワード
Vsearch+usearch 11+QIIMEを使って二端子測順データを処理します.
上記で説明したように、Vsearch、Usearch、QIIMEの両方を使用して、双端測順データを処理することができます.ここでは、各ツールの利点を生かして、vsearch、usearch 11、QIIME 1.9を使用した16 S rRNAデータ処理ツールを開発しました.双端測順データを入力して、最終的にOTU豊度表と関連するシステムツリーを得ます.
上記で説明したように、Vsearch、Usearch、QIIMEの両方を使用して、双端測順データを処理することができます.ここでは、各ツールの利点を生かして、vsearch、usearch 11、QIIME 1.9を使用した16 S rRNAデータ処理ツールを開発しました.双端測順データを入力して、最終的にOTU豊度表と関連するシステムツリーを得ます.
#!/bin/bash
### vsearch+usearch11+QIIME
### OTU
##
# temp/:
# res/:
# data/:
# database/:
# err/:
# log/:
##
HOME=$HOME/Lab_2 ###
temp=${HOME}/temp2 ###
res=${HOME}/res2 ###
data=${HOME}/seq ###
refdb=${HOME}/database ###
err=${HOME}/err2 ###
log=${HOME}/log2 ###
###########################################################################################
# #
# fasta #
# usearch11、vsearch qiime #
# Greengene ,320MB #
# wget -c ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_8_otus.tar.gz #
# 3.4G #
# tar xvzf gg_13_8_otus.tar.gz #
###########################################################################################
echo " "
cd ${HOME}
echo " "
#echo "Step_1.1:decompress paired reads sequences"
#gunzip ${data}/*/*.fastq.gz ###
prefix='ERR7765' ###
### merged.fastq FASTQ reads
### merged.fasta FASTA reads
### 55
### 69
echo "Step_1.2:Merge paired reads"
for i in `seq 55 69`;do
./usearch11 -fastq_mergepairs ${data}/${prefix}${i}/${prefix}${i}_1.fastq -reverse ${data}/${prefix}${i}/${prefix}${i}_2.fastq -fastqout ${temp}/${i}.merged.fastq -fastaout ${temp}/${i}.merged.fasta
done
echo "Step_2:Quality filtering"
for i in `seq 55 69`;do
./usearch11 -fastq_filter ${temp}/${i}.merged.fastq -fastq_maxee 1.0 -fastaout ${temp}/${i}.filtered.fasta -threads 2
done
cat ${temp}/*.filtered.fasta > ${res}/merged.fasta
#cat ${temp}/*.filtered.fastq > ${res}/merged.fastq
echo ""
ls -lh ${res}/merged.fasta
echo ""
echo "Step_3:Dereplication"
## 1
./usearch11 -fastx_uniques ${res}/merged.fasta -fastaout ${res}/unique.fasta --minuniquesize 2 -threads 2 -sizeout
## 2
# vsearch --derep_fulllength ${res}/filtered_v.fasta --output ${res}/unique_v.fa --minuniquesize 2 --threads 10 --sizeout
echo "Step_4:Discard singletons"
./usearch11 -sortbysize ${res}/unique.fasta -fastaout ${res}/discard.fasta -minsize 2
### OTU ###
echo "Step_5:UPARSE-OTU"
## 3 1
./usearch11 -cluster_otus ${res}/unique.fasta -otus ${res}/otus_u51.fasta -uparseout ${res}/uparse.1.txt -relabel Otu -threads 10
## 3
./usearch11 -cluster_otus ${res}/discard.fasta -otus ${res}/otus_u52.fasta -uparseout ${res}/uparse.2.txt -relabel Otu -threads 10
## Denoise OTU
./usearch11 -unoise3 ${res}/unique.fasta -zotus ${res}/zotus_51.fasta -threads 30
## QIIME
echo "Picking OTUs"
pick_otus.py -i ${res}/unique.fasta -o ${res} -m uclust -r ${refdb}/gg_13_8_otus/rep_set/97_otus.fasta --threads 30
echo "Picking a representative sequence set, one sequence from each OTU"
pick_rep_set.py -i ${res}/unique_otus.txt -f ${res}/unique.fasta -o ${res}/rep_set1.fasta
### fasta ###
### OTU ( OTU OTU ) ###
echo "Number of OTU:"
grep '>' -c ${res}/otus_u51.fasta
grep '>' -c ${res}/otus_u52.fasta
grep '>' -c ${res}/zotus_51.fasta
echo "Step_6:Construct OTU table"
### Input should be reads before quality filtering and before discarding low-abundance unique sequences
### Reads must have sample indenfiers in the labels.
### The search database is the OTU (or ZOTU) sequences, which must have valid OTU identifiers in the labels.
### suggest making OTU tables for both 97% OTUs and denoised sequences (ZOTUs).
./usearch11 -otutab ${res}/merged.fasta -otus ${res}/otus_u51.fasta -otutabout ${res}/otutab_u61.txt -mapout ${res}/map_61.txt -threads 15
./usearch11 -otutab ${res}/merged.fasta -zotus ${res}/zotus_51.fasta -otutabout ${res}/zotutab_61.txt -mapout ${res}/zmap_61.txt -threads 15
#
echo "Step_7:Chimera detection"
otusfile='otus_u52.fasta' ### OTU table
reffile='gold.fa' ###
printf "OTU :%s , :%s
"${otusfile} ${reffile}
### ###
./usearch11 -uchime2_ref ${res}/${otusfile} -db ${refdb}/${reffile} -chimeras ${res}/otus_chimeras.fasta -notmatched ${res}/otus_rdp.fasta -uchimeout ${res}/otus_rdp.uchime -strand plus -mode sensitive -threads 30
### ID ###
echo "Get sequenceID of chimeric:"
grep '>' ${res}/otus_chimeras.fasta|sed 's/>//g' > ${res}/otus_chimeras.id
### ###
filter_fasta.py -f ${res}/${otusfile} -o ${res}/otus_non_chimera.fasta -s ${res}/otus_chimeras.id -n
###
echo "Check whether it is the expected number of sequences:"
grep '>' -c ${res}/otus_non_chimera.fasta
#
echo "Step_8:Removal of non-bacterial sequences"
### Greengene ,320MB
### wget -c ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_8_otus.tar.gz
### 3.4G
### tar xvzf gg_13_8_otus.tar.gz
### OTU 97% , 8min
time align_seqs.py -i ${res}/otus_non_chimera.fasta -t ${refdb}/gg_13_8_otus/rep_set_aligned/97_otus.fasta -o ${res}/aligned/
#
echo "Number of incomparable bacteria:"
grep -c '>' ${res}/aligned/otus_non_chimera_failures.fasta
# OTU ID
echo "Obtaining OTU IDs that are not like bacteria:"
grep '>' ${res}/aligned/otus_non_chimera_failures.fasta|cut -f 1 -d ' '|sed 's/>//g' > ${res}/aligned/otus_non_chimera_failures.id
#
filter_fasta.py -f ${res}/otus_non_chimera.fasta -o ${res}/otus_rdp_align.fasta -s ${res}/aligned/otus_non_chimera_failures.id -n
# OTU
echo "How much is left of OTUs?:"
grep '>' -c ${res}/otus_rdp_align.fasta
# OTU
echo "Step_9:Generate representative sequences and OTU tables"
# OTU, , Reference( )
echo "Step_9.1:Rename OTU"
awk 'BEGIN {n=1}; />/ {print ">OTU_" n; n++} !/>/ {print}' ${res}/otus_rdp_align.fasta > ${res}/rep_seqs.fasta
# OTU
echo "Step_9.2:Generate OTU tables"
./usearch11 -usearch_global ${res}/merged.fasta -db ${res}/rep_seqs.fasta -otutabout ${res}/otu_table.txt -biomout ${res}/otu_table.biom -strand plus -id 0.97 -threads 30
#less ${res}/otu_table.txt
# OTU ,
###
### rdp_classifier.jar PATH ,
### rdp_classifier, ,
### https://jaist.dl.sourceforge.net/project/rdp-classifier/rdp-classifier/rdp_classifier_2.2.zip
echo "Step_10:Species annotation"
#RDP_Classifier_Path=$HOME/miniconda3/share/rdp_classifier-2.2-1/rdp_classifier.jar
#echo "export RDP_JAR_PATH=${RDP_Classifier_Path}" >> $HOME/.bashrc
echo "export RDP_JAR_PATH=$HOME/database/rdp_classifier_2.2/rdp_classifier-2.2.jar" >> $HOME/.bashrc
source $HOME/.bashrc
assign_taxonomy.py -i ${res}/rep_seqs.fasta -r ${refdb}/gg_13_8_otus/rep_set/97_otus.fasta -t ${refdb}/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt -m rdp -o ${res}
#echo "Align the representative sequence set"
#align_seqs.py -i ${res}/rep_seqs.fasta -t ${refdb}/gg_13_8_otus/rep_set_aligned/core_set_aligned.fasta.imputed -o ${res}/pynast_aligned/
#echo "Filtering the alignment prior to tre building and removing positions which are all gaps, or not useful for phylogenetic inference"
#filter_alignment.py -i ${res}/pynast_aligned/rep_seqs_aligned.fasta -o ${res}/filtered_alignment/ --remove_outliers
#echo "Building a phylogenetic tree"
#make_phylogeny.py -i ${res}/filtered_alignment/rep_seqs_aligned_pfiltered.fasta -o ${res}/rep_phylo.tre
#echo "Building an OTU table"
#make_otu_table.py -i ${res}/unique_otus.txt -t ${refdb}/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt -o ${res}/otu_table_non_chimeric.biom
### OTU 、 、
echo "Step_11:OTU Table Statistics, Format Conversion, Adding Information"
### OTU BIOM:
biom convert -i ${res}/otu_table.txt -o ${res}/otu_table.biom --table-type="OTU table" --to-json
### OTU , taxonomy
biom add-metadata -i ${res}/otu_table.biom --observation-metadata-fp ${res}/rep_seqs_tax_assignments.txt -o ${res}/otu_table_tax.biom --sc-separated taxonomy --observation-header OTUID,taxonomy
### biom txt , :
biom convert -i ${res}/otu_table_tax.biom -o ${res}/otu_table_tax.txt --to-tsv --header-key taxonomy
### OTU : ,OUT
biom summarize-table -i ${res}/otu_table_tax.biom -o ${res}/otu_table_tax.sum
#less ${res}/otu_table_tax.sum
# OTU
echo "Step_12:Screen of OTU table"
# : counts>3000
filter_samples_from_otu_table.py -i ${res}/otu_table_tax.biom -o ${res}/otu_table2.biom -n 3000
# : 15 ,1129 OTU
echo "The result of screen step1:"
biom summarize-table -i ${res}/otu_table2.biom
# OTU : OTU
filter_otus_from_otu_table.py --min_count_fraction 0.0001 -i ${res}/otu_table2.biom -o ${res}/otu_table3.biom
# : 15 ,444 OTU
echo "The result of screen step2:"
biom summarize-table -i ${res}/otu_table3.biom
# biom OTU OTU
biom convert -i ${res}/otu_table3.biom -o ${res}/otu_table3.txt --table-type="OTU table" --to-tsv
# OTU R
sed -i '/# Const/d;s/#OTU //g;s/ID.//g' ${res}/otu_table3.txt
# OTU OTU
filter_fasta.py -f ${res}/rep_seqs.fasta -b ${res}/otu_table3.biom -o ${res}/rep_seqs4.fasta
printf " OTU :%s\t OTU :%s
"${res}/otu_table_tax.txt ${res}/rep_seqs.fasta
# End
echo " : ,Alpha,Beta "
echo "End"
・