fastq 2 fasta変換スクリプト

2547 ワード

主に小さなRNAデータ操作に使用され、fastq readsをfastaに変換する小さなツールです.
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use File::Basename;

my ( $fastq, $out, $pre, $type, $help );

GetOptions(
    "fq=s"   => \$fastq,
    "out:s"  => \$out,
    "pre:s"  => \$pre,
    "type:s" => \$type,
    "help|h!"=> \$help
);

die &usage if (!defined $fastq || defined $help );

$out ||= "Result/out"; $pre ||= "seq";
$type ||= "1";
my $outdir = dirname( $out );
system ("mkdir -p $outdir");

## convert fastq to fasta and remove redundancy reads
## seq_id: seq_0000001_x345 

my ($seq, $total, $unique ) = &reads_counter($fastq);
my $fo = scalar ( length ($unique) ); 
my $ff = "%0".$fo."d";
my $i = 0;

open O1, ">$out.convert.fa" || die $!;
foreach my $k ( keys %$seq ){

    $i++;
    my $cnt = $seq->{$k};
    my $id;

    if ($type == 1){
        $id = $pre."_".sprintf ("$ff", $i)."_x$cnt";
    } elsif ($type == 2){
        $id = $pre.sprintf("$ff",$i)."\t$cnt";
    } else {
        print STDERR "ERROR! -type options must be 1 or 2.";
        print STDERR "1 for [{$pre}xxx_00002_x345] 2 for [{$pre}00002\t345]
"; exit; } print O1 ">$id
$k
"; } close O1; open STAT, ">$out.Reads.stat" || die $!; print STAT "total_reads\tunique_reads
"; print STAT "$total\t$unique
"; close STAT; ## =============================== SUB MODULE ============================ ## sub reads_counter{ use PerlIO::gzip; my $infile = shift @_; if ( $infile =~ /\.gz$/ ) { open FQ, "){ chomp; $line++; if ($line == 2){ $total_reads++; $seq{$_}++; } elsif ($line == 4){ $line = 0; } else { next; } } close FQ; my $unique_reads = scalar ( keys %seq ); return ( \%seq, $total_reads, $unique_reads ); } sub usage{ my $name = basename($0); print STDERR <