fastq 2 fasta変換スクリプト
2547 ワード
主に小さなRNAデータ操作に使用され、fastq readsをfastaに変換する小さなツールです.
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use File::Basename;
my ( $fastq, $out, $pre, $type, $help );
GetOptions(
"fq=s" => \$fastq,
"out:s" => \$out,
"pre:s" => \$pre,
"type:s" => \$type,
"help|h!"=> \$help
);
die &usage if (!defined $fastq || defined $help );
$out ||= "Result/out"; $pre ||= "seq";
$type ||= "1";
my $outdir = dirname( $out );
system ("mkdir -p $outdir");
## convert fastq to fasta and remove redundancy reads
## seq_id: seq_0000001_x345
my ($seq, $total, $unique ) = &reads_counter($fastq);
my $fo = scalar ( length ($unique) );
my $ff = "%0".$fo."d";
my $i = 0;
open O1, ">$out.convert.fa" || die $!;
foreach my $k ( keys %$seq ){
$i++;
my $cnt = $seq->{$k};
my $id;
if ($type == 1){
$id = $pre."_".sprintf ("$ff", $i)."_x$cnt";
} elsif ($type == 2){
$id = $pre.sprintf("$ff",$i)."\t$cnt";
} else {
print STDERR "ERROR! -type options must be 1 or 2.";
print STDERR "1 for [{$pre}xxx_00002_x345] 2 for [{$pre}00002\t345]
";
exit;
}
print O1 ">$id
$k
";
}
close O1;
open STAT, ">$out.Reads.stat" || die $!;
print STAT "total_reads\tunique_reads
";
print STAT "$total\t$unique
";
close STAT;
## =============================== SUB MODULE ============================ ##
sub reads_counter{
use PerlIO::gzip;
my $infile = shift @_;
if ( $infile =~ /\.gz$/ ) {
open FQ, "){
chomp;
$line++;
if ($line == 2){
$total_reads++;
$seq{$_}++;
} elsif ($line == 4){
$line = 0;
} else {
next;
}
}
close FQ;
my $unique_reads = scalar ( keys %seq );
return ( \%seq, $total_reads, $unique_reads );
}
sub usage{
my $name = basename($0);
print STDERR <