#!/bin/bash # # Run with: # ./splitbigfasta.sh danRer5.fa # Then test the result with: # cat chr[1-9].fa chr??.fa Zv7_NA.fa Zv7_scaffold.fa chrM.fa > merged.fa # cmp merged.fa danRer5.fa set -e # Exit immediately if a simple command exits with a non-zero status # Note that in 'head -n NH $1 | tail -n NT' the NH and NT values # were obtained from R with: # library(Biostrings) # fi <- fasta.seqlengths("danRer5.fa") # fi25 <- fi[1:25] # NT <- fi25 %/% 50 + 2 # NH <- cumsum(NT) head -n 1124095 $1 > chr1.fa head -n 2211431 $1 | tail -n 1087336 > chr2.fa head -n 3470057 $1 | tail -n 1258626 > chr3.fa head -n 4322107 $1 | tail -n 852050 > chr4.fa head -n 5729536 $1 | tail -n 1407429 > chr5.fa head -n 6913551 $1 | tail -n 1184015 > chr6.fa head -n 8318793 $1 | tail -n 1405242 > chr7.fa head -n 9447929 $1 | tail -n 1129136 > chr8.fa head -n 10477749 $1 | tail -n 1029820 > chr9.fa head -n 11325342 $1 | tail -n 847593 > chr10.fa head -n 12217671 $1 | tail -n 892329 > chr11.fa head -n 13168147 $1 | tail -n 950476 > chr12.fa head -n 14239096 $1 | tail -n 1070949 > chr13.fa head -n 15369555 $1 | tail -n 1130459 > chr14.fa head -n 16302145 $1 | tail -n 932590 > chr15.fa head -n 17363560 $1 | tail -n 1061415 > chr16.fa head -n 18409770 $1 | tail -n 1046210 > chr17.fa head -n 19395399 $1 | tail -n 985629 > chr18.fa head -n 20319025 $1 | tail -n 923626 > chr19.fa head -n 21449600 $1 | tail -n 1130575 > chr20.fa head -n 22370748 $1 | tail -n 921148 > chr21.fa head -n 23150386 $1 | tail -n 779638 > chr22.fa head -n 24078148 $1 | tail -n 927762 > chr23.fa head -n 24884016 $1 | tail -n 805868 > chr24.fa head -n 25541542 $1 | tail -n 657526 > chr25.fa tail -n 333 $1 > chrM.fa # NH and NT found after several try-and-fail attempts head -n 27902547 $1 | tail -n 2361005 > Zv7_NA.fa head -n 28818810 $1 | tail -n 916263 > Zv7_scaffold.fa