サイトの位置に関するプログラム


これはやはりあの女の子が書いたもので、ブロガーは微量の手伝いとテストを提供して、とてもいいと思って、自分でまた書くのがおっくうで、分かち合います:
#!perl -w
use strict;
die "Usage : perl $0   [gff]" unless (@ARGV >=2);
my ($in, $out, $gff) = @ARGV;
$gff ||= 'refGene.ann.gz';

open IN, $in || die $!;
my ($tmp, $line, $rs, $chr, $site , $start, $ori, $num, $end, @samps, $ref, $alt, %infos, $dep, @tmps);
my %refs = ('M' => 'AC', 'R' => 'AG', 'W' => 'AT', 'S' => 'CG', 'Y' => 'CT', 'K' => 'GT');
while()
{
	chomp;
	($chr, $start, $end) = (split)[1,2,2];
	$infos{$chr}{$start}{'end'} = $end;
	@{$infos{$chr}{$start}{'ann'}} = ();
	$infos{$chr}{$start}{'gene'} = '-';
}
close IN;


open GFF, "gzip -dc $gff |" || die $!;
my ($up, $down);
my ($name, $id, %ids, $info, $region);
 while()
{
	chomp;
	($chr, $region, $start, $end, $num, $ori, $info) = (split /\t/, $_)[0,2,3,4,5,6,8];
	if($region =~ /intergenic/){
		for $site (sort {$a <=> $b} keys %{$infos{$chr}}){
			next unless ($site >= $start and $infos{$chr}{$site}{'end'} <= $end and $infos{$chr}{$site}{'gene'} eq '-');
			$up = $site - $start;
			$down = $end - $site;
			my ($g1, $g2) = $info =~ /name=([\.\-\w]+);.+name=([\.\-\w]+);/;
			push @{$infos{$chr}{$site}{'ann'}}, "$g1(dist=$up),$g2(dist=$down):$region";
		}
	}elsif($region =~ /RNA/)
	{
		next unless ($info =~ /ID=([-\w]+); name=([-\.\w]+);/);
		($id, $name) = ($1, $2);
		$ids{$id} = $name;
	}else
	{
		next unless ($info =~ /Parent=([-\w]+);/);
		$id = $1;
		$name = $ids{$id};
		for $site (sort {$a <=> $b} keys %{$infos{$chr}})
		{
			next unless ($site >= $start and $infos{$chr}{$site}{'end'} <= $end);
			my $tmp = $region;
			if ($region !~ /UTR/){
					$tmp .= $num;
			}
			push @{$infos{$chr}{$site}{'ann'}}, "$name($id):$tmp";
			if($infos{$chr}{$site}{'gene'} eq '-' or $infos{$chr}{$site}{'gene'} =~ /$name/){
				$infos{$chr}{$site}{'gene'} = $name;
			}else{
				$infos{$chr}{$site}{'gene'} .= ",$name";
			}
		}
	}
}
close GFF;


if($out =~ /\.gz$/){open OUT, "| gzip > $out" || die $!;}
else{open OUT, "> $out" || die $!;}
print OUT "#Chromosome\tStart\tEnd\tGene\tRegion
"; for $chr (sort keys %infos) { for $site (sort {$a <=> $b} keys %{$infos{$chr}}) { print OUT "$chr\t$site\t$infos{$chr}{$site}{'end'}\t$infos{$chr}{$site}{'gene'}\t"; my $i = 1; for my $tmp ( @{$infos{$chr}{$site}{'ann'}}){ if ($i == 1){print OUT "$tmp";} else {print OUT ";$tmp";} $i ++; } print OUT "
"; } } close OUT;

前の補強版のgff形式を引き継ぐ.