#!/usr/local/bin/perl use strict; my $usage = "$0 psl_file.txt orf-file.txt [gff_output_file] Psl files are generated by BLAT and have the following format (tab-separated): n_match n_mismatch rep_match Ncount QgapCount QgapBases TgapCount TgapBases strand Qname Qsize Qstart Qend Tname Tsize Tstart Tend blockCount blockSizes qStarts tStarts Last 3 fields are comma-separated lists. Nucleotides counted starting from 0. Orf file is a tab-delimited file with the following columns SEQ_name CDSstart CDSend CDSbreak + optional columns CDSbreak is > 0 if CDS has a frame shift inside (e.g., due to errors in the genome or alignment errors) To use psl2gif.pl with the NIA Gene Index, download files T-psl.txt and Torf-param.txt. Then use command: psl2gff.pl T-psl.txt Torf-param.txt "; my $arg=0; my $input_file = $ARGV[$arg++] or die $usage; my $Torf_file = $ARGV[$arg++] or die $usage; my $output_file = $ARGV[$arg++]; print "Executing: $0\n"; my %hashChr; my $chr; $output_file =~ s/\.gff$//i; open (INFO, $input_file) or die "file $input_file not found"; while(my $line = ){ my ($n_match,$n_mismatch,$rep_match,$Ncount,$QgapCount,$QgapBases,$TgapCount,$TgapBases,$strand,$Qname,$Qsize,$Qstart,$Qend,$Tname,$Tsize,$Tstart,$Tend,$blockCount,$blockSizes,$qStarts,$tStarts)=split(/\t/, $line); if(!$Tname){ next; } if(!$hashChr{$Tname}){ $hashChr{$Tname}=1; } } close INFO; # Get ORF information my %hashORF; open (INFO, $Torf_file) or die $!; while(my $line = ){ my($Tname,$leftORF,$rightORF,$break,$ORFlen,$firstAA,$Kozak) = split (/\t/, $line); $hashORF{$Tname} = [$leftORF,$rightORF,$break]; } close INFO; foreach my $chr (sort keys %hashChr){ my $gff_file = "$chr.gff"; if($output_file){ $gff_file = $output_file . "_" . $gff_file; } open (OUTPUT, ">$gff_file") or die "cannot create output file"; print "Writing output to $gff_file\n"; open (INFO, $input_file) or die "file $input_file not found"; while(my $line = ){ my ($n_match,$n_mismatch,$rep_match,$Ncount,$QgapCount,$QgapBases,$TgapCount,$TgapBases,$strand,$Qname,$Qsize,$Qstart,$Qend,$Tname,$Tsize,$Tstart,$Tend,$blockCount,$blockSizes,$qStarts,$tStarts)=split(/\t/, $line); if($chr ne $Tname){ next; } my ($leftORF,$rightORF,$break) = (-1,-1,-1); if($hashORF{$Qname}){ ($leftORF,$rightORF,$break) = @{$hashORF{$Qname}}; } my @blockSize = split(/,/,$blockSizes); my @tStart = split(/,/, $tStarts); my @qStart = split(/,/, $qStarts); my $CDSstart = -1; my $CDSend = -1; my $frame_old; for(my $i=0; $i<@tStart; ++$i){ my $start = $tStart[$i]+1; my $end = $tStart[$i]+$blockSize[$i]; my $frameStart = $qStart[$i]; my $qgap = 0; if($i>0){ $qgap = $qStart[$i]-($qStart[$i-1]+$blockSize[$i-1]); } if($strand eq "-"){ $frameStart = $Qsize-($qStart[$i]+$blockSize[$i]); } my $frameEnd = $frameStart+$blockSize[$i]; my $frame = "."; if($frameStart >= $leftORF && $frameStart < $rightORF){ $frame = ($leftORF-$frameStart)%3; if($break && $frameStart>=$break){ $frame = ($break-$frameStart)%3; } } if($strand eq "+"){ if($frameStart-$qgap <= $leftORF && $frameEnd > $leftORF){ if($leftORF>=$frameStart){ $CDSstart = $tStart[$i]+($leftORF-$frameStart)+1; }else{ $CDSstart = $tStart[$i]+$frame+1; } } if($frameStart-$qgap < $rightORF && $frameEnd >= $rightORF){ if($rightORF>$frameStart){ $CDSend = $tStart[$i]+($rightORF-$frameStart); }else{ $CDSend = $tStart[$i-1]+int(($blockSize[$i-1]-$frame_old)/3)*3+$frame_old; } } }else{ if($frameStart <= $leftORF && $frameEnd+$qgap > $leftORF){ if($frameEnd > $leftORF){ $CDSend = $tStart[$i]+$blockSize[$i]-($leftORF-$frameStart); }else{ $CDSend = $tStart[$i-1]+$blockSize[$i-1]-$frame_old; } } if($frameStart < $rightORF && $frameEnd+$qgap >= $rightORF){ if($frameEnd >= $rightORF){ $CDSstart = $tStart[$i]+$blockSize[$i]-($rightORF-$frameStart)+1; }else{ $CDSstart = $tStart[$i]+$blockSize[$i]-(int(($blockSize[$i]-$frame)/3)*3+$frame)+1; } } } print OUTPUT "$Qname\tNIA_Mouse_4.0\texon\t$start\t$end\t.\t$strand\t$frame\n"; $frame_old = $frame; } if($CDSstart >= 0 && $CDSend >= 0){ print OUTPUT "$Qname\tNIA_Mouse_4.0\tCDS\t$CDSstart\t$CDSend\t.\t$strand\t0\n"; } } close INFO; close OUTPUT; } exit(0);