サイトの位置に関するプログラム
2894 ワード
これはやはりあの女の子が書いたもので、ブロガーは微量の手伝いとテストを提供して、とてもいいと思って、自分でまた書くのがおっくうで、分かち合います:
前の補強版のgff形式を引き継ぐ.
#!perl -w
use strict;
die "Usage : perl $0 [gff]" unless (@ARGV >=2);
my ($in, $out, $gff) = @ARGV;
$gff ||= 'refGene.ann.gz';
open IN, $in || die $!;
my ($tmp, $line, $rs, $chr, $site , $start, $ori, $num, $end, @samps, $ref, $alt, %infos, $dep, @tmps);
my %refs = ('M' => 'AC', 'R' => 'AG', 'W' => 'AT', 'S' => 'CG', 'Y' => 'CT', 'K' => 'GT');
while()
{
chomp;
($chr, $start, $end) = (split)[1,2,2];
$infos{$chr}{$start}{'end'} = $end;
@{$infos{$chr}{$start}{'ann'}} = ();
$infos{$chr}{$start}{'gene'} = '-';
}
close IN;
open GFF, "gzip -dc $gff |" || die $!;
my ($up, $down);
my ($name, $id, %ids, $info, $region);
while()
{
chomp;
($chr, $region, $start, $end, $num, $ori, $info) = (split /\t/, $_)[0,2,3,4,5,6,8];
if($region =~ /intergenic/){
for $site (sort {$a <=> $b} keys %{$infos{$chr}}){
next unless ($site >= $start and $infos{$chr}{$site}{'end'} <= $end and $infos{$chr}{$site}{'gene'} eq '-');
$up = $site - $start;
$down = $end - $site;
my ($g1, $g2) = $info =~ /name=([\.\-\w]+);.+name=([\.\-\w]+);/;
push @{$infos{$chr}{$site}{'ann'}}, "$g1(dist=$up),$g2(dist=$down):$region";
}
}elsif($region =~ /RNA/)
{
next unless ($info =~ /ID=([-\w]+); name=([-\.\w]+);/);
($id, $name) = ($1, $2);
$ids{$id} = $name;
}else
{
next unless ($info =~ /Parent=([-\w]+);/);
$id = $1;
$name = $ids{$id};
for $site (sort {$a <=> $b} keys %{$infos{$chr}})
{
next unless ($site >= $start and $infos{$chr}{$site}{'end'} <= $end);
my $tmp = $region;
if ($region !~ /UTR/){
$tmp .= $num;
}
push @{$infos{$chr}{$site}{'ann'}}, "$name($id):$tmp";
if($infos{$chr}{$site}{'gene'} eq '-' or $infos{$chr}{$site}{'gene'} =~ /$name/){
$infos{$chr}{$site}{'gene'} = $name;
}else{
$infos{$chr}{$site}{'gene'} .= ",$name";
}
}
}
}
close GFF;
if($out =~ /\.gz$/){open OUT, "| gzip > $out" || die $!;}
else{open OUT, "> $out" || die $!;}
print OUT "#Chromosome\tStart\tEnd\tGene\tRegion
";
for $chr (sort keys %infos)
{
for $site (sort {$a <=> $b} keys %{$infos{$chr}})
{
print OUT "$chr\t$site\t$infos{$chr}{$site}{'end'}\t$infos{$chr}{$site}{'gene'}\t";
my $i = 1;
for my $tmp ( @{$infos{$chr}{$site}{'ann'}}){
if ($i == 1){print OUT "$tmp";}
else {print OUT ";$tmp";}
$i ++;
}
print OUT "
";
}
}
close OUT;
前の補強版のgff形式を引き継ぐ.