#!/freeware/bin/perl -w use lib "/users/sista/saerts/perl/lib"; use lib "/users/sista/thijs/perl/lib"; use lib "/users/sista/thijs/perl/stow/perl-5.8.0/lib/site_perl/5.8.0/"; use Align; use strict; use Carp; use DBI qw(:sql_types); use File::Basename; use Getopt::Std ; use Bio::SeqIO; use Bio::Tools::GFF; use IO::String; ######################################################### # GLOBAL VARIABLES my @matrix; my @geneNames; my $temp; my $rest; my $line; my $mtrxNames; our $opt_f; our $opt_h; our $opt_c; our $opt_d; our $opt_g; our $opt_t; our $opt_r; our $opt_l; our $opt_s; our $opt_b; our $opt_i; our $opt_m; our $opt_n; ######################################################### # USAGE sub usage { my ($msg) = < ".$opt_s); } my $row=0; #my $mtrxNames="/users/sista/saerts/Projects/toucan/files/matrixNames_73.txt"; if(!$opt_n){ usage; exit(1); } $mtrxNames=$opt_n; print STDERR "Taking matrix names from $mtrxNames \n"; open (MTRX,"< ".$mtrxNames); my %mtrx; my $i=0; my $str=""; my $delim; if($opt_t){ $delim="\t"; } elsif ($opt_b){ $delim = "|"; } else{$delim=",";} #in case the first column should contain the gene name if($opt_c){ $str="sample".$delim; } while(){ ($temp,$rest)=split(/\s+/,$_); $str=$str.$temp.$delim; $mtrx{$temp}=$i; print $temp."\n"; $i++; } if(!$opt_r){ print substr($str,0,length($str)-1)."\n"; } my $nrMatrices=$i; #print "Number of matrices read from matrix database: ",$i,"\n"; $i=0; my $prev="blabla"; my $gffLines=undef; my $begin=1; #open(GFF,"< ".$opt_f); while(){ $line=$_; ($temp,$rest)=split(/\s+/,$line); #in case all regions for a gene should be grouped into one record per gene if($opt_g){ $temp = substr($temp,0,15); } #if($begin){ # $prev=$temp; # $begin=undef; #} if($temp ne $prev){ print STDERR $temp."\n"; if ($gffLines){ $geneNames[$row]=$prev; addRecord($gffLines); $row++; } $gffLines=undef; } else { #print $line; $gffLines = $gffLines.$line; } $prev=$temp; } #last group $geneNames[$row]=$prev; addRecord($gffLines); #foreach (@geneNames){ # print $_."\t"; #} #print "\n"; #for my $array_ref ( @matrix ) { # print "@$array_ref\n"; #} $str=""; for $i ( 0 .. $#matrix ) { #if you want genenames in the first column: if($opt_c){ $str=$geneNames[$i].$delim; } elsif($opt_b){ $str=$geneNames[$i]."\t".substr($geneNames[$i],0,15)."\t"; #$str=$geneNames[$i]."\t".substr($geneNames[$i],18,15)."\t"; } if($opt_s){ print GENES $geneNames[$i],"\n"; } for my $j ( 0 .. $#{$matrix[$i]} ) { $str = $str.$matrix[$i][$j].$delim; } print substr($str,0,length($str)-1)."\n"; $str=""; } sub addRecord{ my $fh = new IO::String(@_); my $gffio = Bio::Tools::GFF->new(-fh => $fh, -gff_version => 2, -verbose => 1); my @gene=(); for($i=0;$i<(scalar(keys %mtrx));$i++){ $gene[$i]=0; } my $tempScore; while(my $feature = $gffio->next_feature()) { foreach my $value ( $feature->each_tag_value('id') ) { $tempScore = $feature->score(); if($opt_d){ if ($tempScore>1150){ $tempScore=3; } else{ if ($tempScore>328){ $tempScore=2; } else{ if ($tempScore<=328 && $tempScore>0){ $tempScore=1; } } } } #number of instances if($opt_i){ $gene[$mtrx{$value}]++; } #maximum elsif(!$opt_m){ if($tempScore>$gene[$mtrx{$value}]){ $gene[$mtrx{$value}]=$tempScore; } } #sum else{ $gene[$mtrx{$value}] += $tempScore; } #log if ($opt_l){ $gene[$mtrx{$value}]=log($gene[$mtrx{$value}]+1); } } } #print @gene,"\n"; #$matrix[$row]=@gene; #$matrix[$row]=[ @gene ]; #$row++; push @matrix,[@gene]; }