#!/usr/bin/perl use Bio::SeqIO; #================================= #Script to split fasta file into multiple files containing specified number of sequences #briandotoakleyatarsdotusdadotgov #k.purdy at warwick.ac dot uk #================================= my $usage = "\nUsage: $0 filename\n\n"; $input_file=$ARGV[0] or die $usage; print "\nYour input file is: $input_file\n \nHow many seqs would you like in each output file? "; chomp($num_seqs = ); #----define and open output file for first time through loop---- $batch_counter=1; $output_file=">$batch_counter.fas"; $out = Bio::SeqIO->new(-file => $output_file, -format => 'fasta'); $seq_counter=0; #----Read in file to split------ $seq_in = Bio::SeqIO->new(-file => "$input_file"); #---loop through seqs--- while ($seq_obj = $seq_in->next_seq()) { $seq_counter++; if ($seq_counter<=$num_seqs) { $out->write_seq($seq_obj); } else { #-----reset sequence counter, define and open output files for next set of seqs---- $batch_counter++; $output_file=">$batch_counter.fas"; $out = Bio::SeqIO->new(-file => $output_file, -format => 'fasta'); #---write out first sequence to enter else loop, reset seq_counter to 1--- $out->write_seq($seq_obj); $seq_counter=1; } } print "\nDone.\n\nInput file split into ",$batch_counter-1, " batches of $num_seqs each plus remainder in last file\n\n";