# BBO-Bioinformatics-Bash-Oneliner Hi bioinformaticans and bash learner, welcome to BBO, Bioinformatics Bash Oneliner learning station. I started studying bioinformatics data three years ago, and I was amazed by those single-word bash commands which are much faster than my dull scripts, so i started and insist using bash. Not all the code here is oneliner (if the ';' counts..), but i put effort on making them brief and fast. This blog will focus on bash commands for parsing biological data, most of which are tsv files (tab-separated values); some of them are for Ubuntu system maintaining. I have been recording the bash commands on my notebook, but putting them on web will help others and myself to 'Ctrl +F '. I apologize that there won't be any citation for the codes, coz i haven't make any record of it, but they are probably from dear Google and Stackoverflow. English and bash are not my first language, so... correct me anytime, thank you In case you would like to check up and like my stupid questions on Stackoverflow, here's my page: http://stackoverflow.com/users/4290753/once And if you want to check out stylish BBO: http://bbonut.blogspot.tw/#system ##Handy Bash oneliner commands for tsv file editing - [Grep](#grep) - [Sed](#sed) - [Awk](#awk) - [Xargs](#xargs) - [Find](#find) - [Loops](#loops) - [Download](#download) - [Others](#others) - [System](#system) ##Grep #####extract text bewteen words (e.g. w1,w2) grep -o -P '(?<=w1).*(?=w2)' #####grep lines without word (e.g. bbo) grep -v bbo #####grep and count (e.g. bbo) grep -c bbo filename #####insensitive grep (e.g. bbo/BBO/Bbo) grep -i "bbo" filename #####count occurrence (e.g. three times a line count three times) grep -o bbo filename #####COLOR the match (e.g. bbo)! grep --color bbo filename #####grep search all files in a directory(e.g. bbo) grep -R bbo /path/to/directory or grep -r bbo /path/to/directory #####search all files in directory, only output file names with matches(e.g. bbo) grep -Rh bbo /path/to/directory or grep -rh bbo /path/to/directory #####grep OR (e.g. A or B or C or D) grep 'A\|B\|C\|D' #####grep AND (e.g. A and B) grep 'A.*B' #####grep all content of a fileA from fileB grep -f fileA fileB #####grep a tab grep $'\t' ##Sed [[back to top](#handy-bash-oneliner-commands-for-tsv-file-editing)] #####remove lines with word (e.g. bbo) sed "/bbo/d" filename #####edit infile (edit and save) sed -i "/bbo/d" filename #####when using variable (e.g. $i), use double quotes " " e.g. add >$i to the first line (to make a FASTA file) sed "1i >$i" //notice the double quotes! in other examples, you can use a single quote, but here, no way! //'1i' means insert to first line #####delete empty lines sed '/^\s*$/d' or sed 's/^$/d' #####delete last line sed '$d' #####add \n every nth character (e.g. every 4th character) sed 's/.\{4\}/&\n/g' #####substitution (e.g. replace A by B) sed 's/A/B/g' filename #####select lines start with string (e.g. bbo) sed -n '/^@S/p' #####delete lines with string (e.g. bbo) sed '/bbo/d' filename #####print every nth lines sed -n '0~3p' filename //catch 0: start; 3: step #####print every odd # lines sed -n '1~2p' #####print every third line including the first line sed -n '1p;0~3p' #####remove leading whitespace and tabs sed -e 's/^[ \t]*//' //notice a whitespace before '\t'!! #####remove only leading whitespace sed 's/ *//' //notice a whitespace before '*'!! #####remove ending commas sed 's/,$//g' #####add a column to the end sed "s/$/\t$i/" //$i is the valuable you want to add e.g. add the filename to every last column of the file for i in $(ls);do sed -i "s/$/\t$i/" $i;done #####remove newline\ nextline sed ':a;N;$!ba;s/\n//g' #Awk [[back to top](#handy-bash-oneliner-commands-for-tsv-file-editing)] #####set tab as field separator awk -F $'\t' #####output as tab separated (also as field separator) awk -v OFS='\t' #####pass variable a=bbo;b=obb; awk -v a="$a" -v b="$b" "$1==a && $10=b' filename #####print number of characters on each line awk '{print length ($0);}' filename #####find number of columns awk '{print NF}' #####reverse column order awk '{print $2, $1}' #####check if there is a comma in a column (e.g. column $1) awk '$1~/,/ {print}' #####split and do for loop awk '{split($2, a,",");for (i in a) print $1"\t"a[i]} filename #####print all lines before nth occurence of a string (e.g stop print lines when bbo appears 7 times) awk -v N=7 '{print}/bbo/&& --N<=0 {exit}' #####add string to the beginning of a column (e.g add "chr" to column $3) awk 'BEGIN{OFS="\t"}$3="chr"$3' #####remove lines with string (e.g. bbo) awk '!/bbo/' file #####column subtraction cat file| awk -F '\t' 'BEGIN {SUM=0}{SUM+=$3-$2}END{print SUM}' #####usage and meaning of NR and FNR e.g. fileA: a b c fileB: d e awk 'print FILENAME, NR,FNR,$0}' fileA fileB fileA 1 1 a fileA 2 2 b fileA 3 3 c fileB 4 1 d fileB 5 2 e #####and gate e.g. fileA: 1 0 2 1 3 1 4 0 fileB: 1 0 2 1 3 0 4 1 awk -v OFS='\t' 'NR=FNR{a[$1]=$2;next} NF {print $1,((a[$1]=$2)? $2:"0")}' fileA fileB 1 0 2 1 3 0 4 0 #####round all numbers of file (e.g. 2 significant figure) awk '{while (match($0, /[0-9]+\[0-9]+/)){ \printf "%s%.2f", substr($0,0,RSTART-1),substr($0,RSTART,RLENGTH) \$0=substr($0, RSTART+RLENGTH) \} \print \}' #####give number/index to every row awk '{printf("%s\t%s\n",NR,$0)}' ##Xargs [[back to top](#handy-bash-oneliner-commands-for-tsv-file-editing)] set tab as delimiter (default:space) xargs -d\t #####display 3 items per line echo 1 2 3 4 5 6| xargs -n 3 //1 2 3 4 5 6 #####prompt before execution echo a b c |xargs -p -n 3 #####print command along with output xargs -t abcd ///bin/echo abcd //abcd #####with find and rm find . -name "*.html"|xargs rm -rf delete fiels with whitespace in filename (e.g. "hello 2001") find . -name "*.c" -print0|xargs -0 rm -rf #####show limits xargs --show-limits #####move files to folder find . -name "*.bak" -print 0|xargs -0 -I {} mv {} ~/old or find . -name "*.bak" -print 0|xargs -0 -I file mv file ~/old #####move first 100th files to a directory (e.g. d1) ls |head -100|xargs -I {} mv {} d1 #####parallel time echo {1..5} |xargs -n 1 -P 5 sleepa lot faster than time echo {1..5} |xargs -n1 sleep #####copy all files from A to B find /dir/to/A -type f -name "*.py" -print 0| xargs -0 -r -I file cp -v -p file --target-directory=/path/to/B //v: verbose| //p: keep detail (e.g. owner) #####with sed ls |xargs -n1 -I file sed -i '/^Pos/d' filename #####add the file name to the first line of file ls |sed 's/.txt//g'|xargs -n1 -I file sed -i -e '1 i\>file\' file.txt #####count all files ls |xargs -n1 wc -l #####to filter txt to a single line ls -l| xargs #####count files within directories echo mso{1..8}|xargs -n1 bash -c 'echo -n "$1:"; ls -la "$1"| grep -w 74 |wc -l' -- // "--" signals the end of options and display further option processing #####download dependencies files and install (e.g. requirements.txt) cat requirements.txt| xargs -n1 sudo pip install #####count lines in all file, also count total lines ls|xargs wc -l ##Find [[back to top](#handy-bash-oneliner-commands-for-tsv-file-editing)] #####list all sub directory/file in the current directory find . #####list all files under the current directory find . -type f #####list all directories under the current directory find . -type d #####edit all files under current directory (e.g. replace 'www' with 'ww') find . name '*.php' -exec sed -i 's/www/w/g' {} \; if no subdirectory replace "www" "w" -- * //a space before * #####find and output only filename (e.g. "mso") find mso*/ -name M* -printf "%f\n" #####find and delete file with size less than (e.g. 74 byte) find . -name "*.mso" -size -74c -delete //M for MB, etc ##Loops [[back to top](#handy-bash-oneliner-commands-for-tsv-file-editing)] #####while loop, column subtraction of a file (e.g. a 3 columns file) while read a b c; do echo $(($c-$b));done < <(head filename) //there is a space between the two '<'s #####while loop, sum up column subtraction i=0; while read a b c; do ((i+=$c-$b)); echo $i; done < <(head filename) #####if loop if (($j==$u+2)) //(( )) use for arithmetic operation if [[$age >21]] //[[ ]] use for comparison #####for loop for i in $(ls); do echo file $i;done ##Download [[back to top](#handy-bash-oneliner-commands-for-tsv-file-editing)] #####download all from a page wget -r -l1 -H -t1 -nd -N -np -A mp3 -e robots=off http://example.com //-r: recursive and download all links on page //-l1: only one level link //-H: span host, visit other hosts //-t1: numbers of retries //-nd: don't make new directories, download to here //-N: turn on timestamp //-nd: no parent //-A: type (seperate by ,) //-e robots=off: ignore the robots.txt file which stop wget from crashing the site, sorry example.com ##Others [[back to top](#handy-bash-oneliner-commands-for-tsv-file-editing)] #####remove newline / nextline tr --delete '\n' output.txt #####replace newline tr '\n' ' ' log & or (command here) 2>&1| tee logfile or (command here) 2>&1 >>outfile //0: standard input; 1: standard output; 2: standard error #####send mail echo 'heres the content'| mail -A 'file.txt' -s 'mail.subject' me@gmail.com //use -a flag to set send from (-a "From: some@mail.tld") .xls to csv xls2csv filename #####append to file (e.g. hihi) echo 'hihi' >>filename #####make BEEP sound speaker-test -t sine -f 1000 -l1 set beep duration (speaker-test -t sine -f 1000) & pid=$!;sleep 0.1s;kill -9 $pid #####history edit/ delete ~/.bash_history or history -d [line_number] #####get last history/record filename head !$ #####clean screen clear or Ctrl+l #####send data to last edited file cat /directory/to/file echo 100>!$ #####run history number (e.g. 53) !53 #####run last command !! #####run last command that began with (e.g. cat filename) !cat or !c //run cat filename again #####extract .xf 1.unxz filename.tar.xz 2.tar -xf filename.tar #####install python package pip install packagename #####random order (lucky draw) for i in a b c d e; do echo $i; done| shuf #####echo a random number echo $RANDOM #####Download file if necessary data=file.txt url=http://www.example.com/$data if [! -s $data];then echo "downloading test data..." wget $url fi #####wget to a filename (when a long name) wget -O filename "http://example.com" #####wget files to a folder wget -P /path/to/directory "http://example.com" #####delete current bash command Ctrl+U or Ctrl+C or Alt+Shift+# //to make it to history #####add things to history (e.g. "addmetohistory") #addmetodistory //just add a "#" before~~ #####sleep awhile or wait for a moment or schedule a job sleep 5;echo hi #####count the time for executing a command time echo hi #####backup with rsync rsync -av filename filename.bak rsync -av directory directory.bak rsync -av --ignore_existing directory/ directory.bak rsync -av --update directory directory.bak //skip files that are newer on receiver (i prefer this one!) #####make all directories at one time! mkdir -p project/{lib/ext,bin,src,doc/{html,info,pdf},demo/stat} //-p: make parent directory //this will create project/doc/html/; project/doc/info; project/lib/ext ,etc #####run command only if another command returns zero exit status (well done) cd tmp/ && tar xvf ~/a.tar #####run command only if another command returns non-zero exit status (not finish) cd tmp/a/b/c ||mkdir -p tmp/a/b/c #####extract to a path tar xvf -C /path/to/directory filename.gz #####use backslash "\" to break long command cd tmp/a/b/c \ > || \ >mkdir -p tmp/a/b/c #####get pwd VAR=$PWD; cd ~; tar xvf -C $VAR file.tar //PWD need to be capital letter #####list file type of file (e.g. /tmp/) file /tmp/ //tmp/: directory #####bash script #!/bin/bash file=${1#*.} //remove string before a "." file=${1%.*} //remove string after a "." #####search from history Ctrl+r #####python simple HTTP Server python -m SimpleHTTPServer #####variables {i/a/,} e.g. replace all {i//a/,} //for variable i, replace all 'a' with a comma #####read user input read input echo $input #####generate sequence 1-10 seq 10 #####sum up input list (e.g. seq 10) seq 10|paste -sd+|bc #####find average of input list/file i=`wc -l filename|cut -d ' ' -f1`; cat filename| echo "scale=2;(`paste -sd+`)/"$i|bc #####generate all combination (e.g. 1,2) echo {1,2}{1,2} //1 1, 1 2, 2 1, 2 2 #####generate all combination (e.g. A,T,C,G) set = {A,T,C,G} group= 5 for ((i=0; i<$group; i++));do repetition=$set$repetition;done bash -c "echo "$repetition"" #####read file content to variable foo=$(