#!/usr/local/bin/perl use strict; my $usage = "$0 psl_input_file [gff_output_file] Psl files are generated by BLAT and have the following format (tab-separated): n_match n_mismatch rep_match Ncount QgapCount QgapBases TgapCount TgapBases strand Qname Qsize Qstart Qend Tname Tsize Tstart Tend blockCount blockSizes qStarts tStarts Last 3 fields are comma-separated lists\n"; my $arg=0; my $input_file = $ARGV[$arg++] or die $usage; my $output_file = $ARGV[$arg++]; print "Executing: $0\n"; my %hashChr; my $chr; $output_file =~ s/\.gff$//i; open (INFO, $input_file) or die "file $input_file not found"; while(my $line = ){ my ($n_match,$n_mismatch,$rep_match,$Ncount,$QgapCount,$QgapBases,$TgapCount,$TgapBases,$strand,$Qname,$Qsize,$Qstart,$Qend,$Tname,$Tsize,$Tstart,$Tend,$blockCount,$blockSizes,$qStarts,$tStarts)=split(/\t/, $line); if(!$chr){ $chr = $Tname; my $gff_file = "$chr.gff"; if($output_file){ $gff_file = $output_file . "_" . $gff_file; } open (OUTPUT, ">$gff_file") or die "cannot create output file"; print "Writing output to $gff_file\n"; } elsif($chr ne $Tname && !$hashChr{$Tname}){ $hashChr{$Tname}=1; next; } if($chr ne $Tname){ next; } my @blockSize = split(/,/,$blockSizes); my @tStart = split(/,/, $tStarts); for(my $i=0; $i<@tStart; ++$i){ my $start = $tStart[$i]+1; my $end = $tStart[$i]+$blockSize[$i]; print OUTPUT "$Qname\tNIA_Mouse_4.0\texon\t$start\t$end\t.\t$strand\t.\n"; } } close INFO; close OUTPUT; foreach my $chr (sort keys %hashChr){ my $gff_file = "$chr.gff"; if($output_file){ $gff_file = $output_file . "_" . $gff_file; } open (OUTPUT, ">$gff_file") or die "cannot create output file"; print "Writing output to $gff_file\n"; open (INFO, $input_file) or die "file $input_file not found"; while(my $line = ){ my ($n_match,$n_mismatch,$rep_match,$Ncount,$QgapCount,$QgapBases,$TgapCount,$TgapBases,$strand,$Qname,$Qsize,$Qstart,$Qend,$Tname,$Tsize,$Tstart,$Tend,$blockCount,$blockSizes,$qStarts,$tStarts)=split(/\t/, $line); if($chr ne $Tname){ next; } my @blockSize = split(/,/,$blockSizes); my @tStart = split(/,/, $tStarts); for(my $i=0; $i<@tStart; ++$i){ my $start = $tStart[$i]+1; my $end = $tStart[$i]+$blockSize[$i]; print OUTPUT "$Qname\tNIA_Mouse_4.0\texon\t$start\t$end\t.\t$strand\t.\n"; } } close INFO; close OUTPUT; } exit(0);