#!/freeware/bin/perl -W use Bio::SeqIO; use Bio::Tools::GFF; my $seqFile = $ARGV[0]; my $gffFile = $ARGV[1]; my $gffio = new Bio::Tools::GFF(-file => "<$gffFile", -gff_version => 2); my $seqio = new Bio::SeqIO( -file => "<$seqFile", -format => 'fasta'); # first get length of sequences my $L = 0; my $C = 0; while ( my $seq = $seqio->next_seq() ){ $L += $seq->length; $C++; #print STDERR $seq->id,": ",$seq->length,"\n"; } $seqio->close; my %list = (); my %motifs = (); my $cc = 0; while ( my $gff = $gffio->next_feature() ){ # motif ID my $id = 'dummy'; if ( $gff->has_tag('id') ){ $id = join '', $gff->each_tag_value('id'); } #print STDERR "\r",$cc++; if ( defined $list{$id} ){ $list{$id}++; }else{ $list{$id} = 1; my $site = ''; if ( $gff->has_tag('site') ){ $site = join '', $gff->each_tag_value('site'); } $motifs{$id} = length($site); } } #print STDERR "\n"; foreach my $k ( keys %list ){ my $value = $list{$k}/ ( 2*($L - ($C * ($motifs{$k} - 1)))); print $k,"\t",$value,"\n"; } $gffio->close;