Who is your mom?
Local Alignment: Smith-Waterman Algorithm
http://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
Program:
#!/usr/bin/python -tt
#
# Local Alignment - Smith Waterman Algorithm
# Copyleft (c) 2013. Ridlo W. Wibowo
#
import sys, math
def local_align():
#### error message
usage = """
MANUAL
Usage : python localAlign.py [option] [input]
Option :
-i input two sequence in command line argument
-p print Matrix, True = 1, default False
-g gap value, default 0
-gp gap penalty, default -1
-m match score, default +1
-mm mismatch score, default -1
-f input file [under construction]
status: disable
Example: python localAlign.py -i ACACACTA AGCACACA -m 2
END
"""
#### default gap value
gap = 0.
match = 1.
mismatch = -1.
gapPenalty = -1.
printM = 0
#### input from command line argument
if len(sys.argv) == 1:
print usage
sys.exit(1)
while len(sys.argv) > 1:
option = sys.argv[1]; del sys.argv[1]
if option == '-i':
seq1 = sys.argv[1]; del sys.argv[1]
seq2 = sys.argv[1]; del sys.argv[1]
elif option == '-g':
gap = float(sys.argv[1]); del sys.argv[1]
elif option == '-gp':
gapPenalty = float(sys.argv[1]); del sys.argv[1]
elif option == '-m':
match = float(sys.argv[1]); del sys.argv[1]
elif option == '-mm':
mismatch = float(sys.argv[1]); del sys.argv[1]
elif option == '-p':
printM = int(sys.argv[1]); del sys.argv[1]
else:
print "\n", sys.argv[0], ': invalid option', option, usage
sys.exit(1)
#### print input
print "Local Alignment - Smith Waterman Algorithm"
print "SEQUENCE 1:", seq1; print "SEQUENCE 2:", seq2;
print "gap : ", gap
print "gap penalty : ", gapPenalty
print "match score : ", match
print "mismatch score: ", mismatch
#### initiate and calculate value
lseq1 = len(seq1); lseq2 = len(seq2)
row = lseq2+1; col = lseq1+1
val = []
for i in range(row):
val.append([i*gap]) # gap value in first column
for j in range(1,col):
val[0].append(j*gap) # gap value in first row
for i in range(1,row):
for j in range(1,col):
four = [0.]
if (seq2[i-1] == seq1[j-1]):
four.append(val[i-1][j-1] + match)
else:
four.append(val[i-1][j-1] + mismatch) # match or mismatch
four.append(val[i-1][j] + gapPenalty)
four.append(val[i][j-1] + gapPenalty)
val[i].append(max(four))
#### print value
if (printM):
for i in range(row):
for j in range(col):
print val[i][j], '\t',
print ''
#### search value and position of maxima
maks = max([max(l) for l in val])
print "Maximum Value : ", maks
maxIdx = []
for i in range(row):
for j in range(col):
if (val[i][j] == maks):
maxIdx.append([i,j])
print "Maximum Position :", maxIdx
#### traceback
for idx in maxIdx:
sequ1 = ''; sequ2 = ''
i = idx[0]; j = idx[1]
while (val[i][j] > 0.):
if (val[i][j] == val[i-1][j-1] + (match if seq2[i-1] == seq1[j-1] else mismatch)):
sequ1 += seq1[j-1]
sequ2 += seq2[i-1]
i -= 1; j -= 1
elif (val[i][j] == val[i-1][j] + gapPenalty):
sequ1 += '_'
sequ2 += seq2[i-1]
i -= 1
elif (val[i][j] == val[i][j-1] + gapPenalty):
sequ1 += seq1[j-1]
sequ2 += '_'
j -= 1
sequ1r = ' '.join([sequ1[j] for j in range(-1, -(len(sequ1) + 1), -1)])
sequ2r = ' '.join([sequ2[j] for j in range(-1, -(len(sequ2) + 1), -1)])
score = 0.
for j in range(len(sequ1)):
if (sequ1[j] == sequ2[j]):
score += match
else:
score += mismatch
#### print result of local alignment
print "Track "
print "\tSequence 1 : ", sequ1r
print "\tSequence 2 : ", sequ2r
print "\tScore : ", score
if __name__ == "__main__":
local_align()
running (using example from wiki):
python localAlign.py -i ACACACTA AGCACACA -m 2 -p 1
Result:
Local Alignment - Smith Waterman Algorithm
SEQUENCE 1: ACACACTA
SEQUENCE 2: AGCACACA
gap : 0.0
gap penalty : -1.0
match score : 2.0
mismatch score: -1.0
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0 2.0 1.0 2.0 1.0 2.0 1.0 0.0 2.0
0.0 1.0 1.0 1.0 1.0 1.0 1.0 0.0 1.0
0.0 0.0 3.0 2.0 3.0 2.0 3.0 2.0 1.0
0.0 2.0 2.0 5.0 4.0 5.0 4.0 3.0 4.0
0.0 1.0 4.0 4.0 7.0 6.0 7.0 6.0 5.0
0.0 2.0 3.0 6.0 6.0 9.0 8.0 7.0 8.0
0.0 1.0 4.0 5.0 8.0 8.0 11.0 10.0 9.0
0.0 2.0 3.0 6.0 7.0 10.0 10.0 10.0 12.0
Maximum Value : 12.0
Maximum Position : [[8, 8]]
Track
Sequence 1 : A _ C A C A C T A
Sequence 2 : A G C A C A C _ A
Score : 12.0
Run:
python localAlign.py -i ATGC ATGG -m 2 -p 1
Result:
Local Alignment - Smith Waterman Algorithm
SEQUENCE 1: ATGC
SEQUENCE 2: ATGG
gap : 0.0
gap penalty : -1.0
match score : 2.0
mismatch score: -1.0
0.0 0.0 0.0 0.0 0.0
0.0 2.0 1.0 0.0 0.0
0.0 1.0 4.0 3.0 2.0
0.0 0.0 3.0 6.0 5.0
0.0 0.0 2.0 5.0 5.0
Maximum Value : 6.0
Maximum Position : [[3, 3]]
Track
Sequence 1 : A T G
Sequence 2 : A T G
Score : 6.0
you can improve it by yourself..
———————————————————————————————-**
General Gap Penalty — Global Alignment: Needleman-Wunsch Algorithm
http://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
Program
#!/usr/bin/python -tt
#
# General Gap Penalty, Needleman-Wunch Algorithm
# Copyleft (c) 2013. Ridlo W. Wibowo
#
import sys, math
def global_gap():
#### error message
usage = """
MANUAL
Usage : python generalGap.py [option] [input]
Option :
-i input two sequence in command line argument
-g gap value, default -1
-gp gap penalty, default -1
-m match score, default +1
-mm mismatch score, default -1
-f input file [under construction]
status: disable
Example: python generalGap.py -i ATTGTC AGTGTAC -g -1
END
"""
#### default gap value
gap = -1.
match = 1.
mismatch = -1.
gapPenalty = -1.
#### input from command line argument
if len(sys.argv) == 1:
print usage
sys.exit(1)
while len(sys.argv) > 1:
option = sys.argv[1]; del sys.argv[1]
if option == '-i':
seq1 = sys.argv[1]; del sys.argv[1]
seq2 = sys.argv[1]; del sys.argv[1]
elif option == '-g':
gap = float(sys.argv[1]); del sys.argv[1]
elif option == '-gp':
gapPenalty = float(sys.argv[1]); del sys.argv[1]
elif option == '-m':
match = float(sys.argv[1]); del sys.argv[1]
elif option == '-mm':
mismatch = float(sys.argv[1]); del sys.argv[1]
else:
print "\n", sys.argv[0], ': invalid option', option, usage
sys.exit(1)
#### print input
print "General Gap Penalty, Needleman-Wunch Algorithm"
print "SEQUENCE 1:", seq1; print "SEQUENCE 2:", seq2
print "gap : ", gap
print "gap penalty : ", gapPenalty
print "match score : ", match
print "mismacth score: ", mismatch
#### initiate and calculate value
lseq1 = len(seq1); lseq2 = len(seq2)
row = lseq2+1; col = lseq1+1
val = []
for i in range(row):
val.append([i*gap]) # gap value in first column
for j in range(1,col):
val[0].append(j*gap) # gap value in first row
for i in range(1,row):
for j in range(1,col):
three = []
if (seq2[i-1] == seq1[j-1]):
three.append(val[i-1][j-1] + match)
else:
three.append(val[i-1][j-1] + mismatch) # match or mismatch
three.append(val[i-1][j] + gapPenalty) # Delete
three.append(val[i][j-1] + gapPenalty) # Insert
val[i].append(max(three))
#### print value
for i in range(row):
for j in range(col):
print val[i][j], '\t',
print ''
#### trace back
sequ1 = ''
sequ2 = ''
i = lseq2
j = lseq1
while (i > 0 or j > 0):
if (i>0 and j>0 and val[i][j] == val[i-1][j-1] + (match if seq2[i-1]==seq1[j-1] else mismatch)):
sequ1 += seq1[j-1]
sequ2 += seq2[i-1]
i -= 1; j -= 1
elif (i>0 and val[i][j] == val[i-1][j] + gapPenalty):
sequ1 += '_'
sequ2 += seq2[i-1]
i -= 1
elif (j>0 and val[i][j] == val[i][j-1] + gapPenalty):
sequ1 += seq1[j-1]
sequ2 += '_'
j -= 1
sequ1r = ' '.join([sequ1[j] for j in range(-1, -(len(sequ1)+1), -1)])
sequ2r = ' '.join([sequ2[j] for j in range(-1, -(len(sequ2)+1), -1)])
score = 0.
for j in range(len(sequ1)):
if (sequ1[j] == sequ2[j]):
score += match
else:
score += mismatch
print "Sequence 1: ", sequ1r
print "Sequence 2: ", sequ2r
print "Score : ", score
if __name__ == "__main__":
global_gap()
Run:
python generalGap.py -i ACACACTA AGCACACA
Result:
General Gap Penalty, Needleman-Wunch Algorithm
SEQUENCE 1: ACACACTA
SEQUENCE 2: AGCACACA
gap : -1.0
gap penalty : -1.0
match score : 1.0
mismacth score: -1.0
-0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0
-1.0 1.0 0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0
-2.0 0.0 0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0
-3.0 -1.0 1.0 0.0 0.0 -1.0 -2.0 -3.0 -4.0
-4.0 -2.0 0.0 2.0 1.0 1.0 0.0 -1.0 -2.0
-5.0 -3.0 -1.0 1.0 3.0 2.0 2.0 1.0 0.0
-6.0 -4.0 -2.0 0.0 2.0 4.0 3.0 2.0 2.0
-7.0 -5.0 -3.0 -1.0 1.0 3.0 5.0 4.0 3.0
-8.0 -6.0 -4.0 -2.0 0.0 2.0 4.0 4.0 5.0
Sequence 1: A _ C A C A C T A
Sequence 2: A G C A C A C _ A
Score : 5.0
——————————————————————————————————–**
Affine Gap Penalty — Global Alignment: Gotoh Algorithm
Recursion:

I’m not sure for this program.. wkwkwk
#!/usr/bin/python -tt
#
# Affine Gap Penalty
# Gotoh algorithm
# Copyleft (c) 2013. Ridlo W. Wibowo
#
import sys, math
def affine_gap():
#### error message
usage = """
MANUAL
Usage : python affineGap.py [option] [input]
Option :
-i input two sequence in command line argument
-d gap open value, default -1
-e gap extend value, default -0.1
-m match score, default +1
-mm mismatch score, default -1
-f input file [under construction]
status: disable
Example: python affineGap.py -i ATTGTC AGTC
END
"""
#### default gap value
gap_open = -1.
gap_extend = -0.1
match = 1.
mismatch = -1.
#### input from command line argument
if len(sys.argv) == 1:
print usage
sys.exit(1)
while len(sys.argv) > 1:
option = sys.argv[1]; del sys.argv[1]
if option == '-i':
seq1 = sys.argv[1]; del sys.argv[1]
seq2 = sys.argv[1]; del sys.argv[1]
elif option == '-d':
gap_open = float(sys.argv[1]); del sys.argv[1]
elif option == '-e':
gap_extend = float(sys.argv[1]); del sys.argv[1]
elif option == '-m':
match = float(sys.argv[1]); del sys.argv[1]
elif option == '-mm':
mismatch = float(sys.argv[1]); del sys.argv[1]
else:
print "\n", sys.argv[0], ': invalid option', option, usage
sys.exit(1)
#### print input
print "Affine Gap Penalty, Gotoh Algorithm"
print "SEQUENCE 1:", seq1; print "SEQUENCE 2:", seq2
print "gap open : ", gap_open
print "gap extend : ", gap_extend
print "match score : ", match
print "mismacth score: ", mismatch
#### initiate and calculate value
lseq1 = len(seq1); lseq2 = len(seq2)
row = lseq2+1; col = lseq1+1
xval = [[0. for j in range(col)] for i in range(row)]
yval = [[0. for j in range(col)] for i in range(row)]
val = [[0. for j in range(col)] for i in range(row)]
for i in range(row):
val[i][0] = gap_open + i*gap_extend
yval[i][0] = -10000.
for j in range(col):
val[0][j] = gap_open + j*gap_extend
xval[0][j] = -10000. # assign -INF
val[0][0] = 0.
for i in range(1,row):
for j in range(1,col):
xval[i][j] = max(xval[i-1][j] + gap_extend, val[i-1][j] + gap_open + gap_extend)
yval[i][j] = max(yval[i][j-1] + gap_extend, val[i][j-1] + gap_open + gap_extend)
cople = 0.
if (seq1[j-1] == seq2[i-1]):
cople = val[i-1][j-1] + match
else:
cople = val[i-1][j-1] + mismatch
val[i][j] = max(cople, xval[i][j], yval[i][j])
#### print value
for i in range(row):
for j in range(col):
print val[i][j], '\t',
print ''
#### traceback
sequ1 = ''
sequ2 = ''
i = lseq2
j = lseq1
while (i>0 or j>0):
if (i>0 and j>0 and val[i][j] == val[i-1][j-1] + (match if seq2[i-1] == seq1[j-1] else mismatch)):
sequ1 += seq1[j-1]
sequ2 += seq2[i-1]
i -= 1; j -= 1
elif (i>0 and val[i][j] == xval[i][j]):
sequ1 += '_'
sequ2 += seq2[i-1]
i -= 1
elif (j>0 and val[i][j] == yval[i][j]):
sequ1 += seq1[j-1]
sequ2 += '_'
j -= 1
sequ1r = ' '.join([sequ1[j] for j in range(-1, -(len(sequ1)+1), -1)])
sequ2r = ' '.join([sequ2[j] for j in range(-1, -(len(sequ2)+1), -1)])
score = 0.
for j in range(len(sequ1)):
if (sequ1[j] == sequ2[j]):
score += match
else:
score += mismatch
print "Sequence 1: ", sequ1r
print "Sequence 2: ", sequ2r
print "Score : ", score
if __name__ == "__main__":
affine_gap()
Run:
python affineGap.py -i AGCATC AGTC
Result:
Affine Gap Penalty, Gotoh Algorithm
SEQUENCE 1: AGCATC
SEQUENCE 2: AGTC
gap open : -1.0
gap extend : -0.1
gap penalty : -1.0
match score : 1.0
mismacth score: -1.0
0.0 -1.1 -1.2 -1.3 -1.4 -1.5 -1.6
-1.1 1.0 -0.1 -0.2 -0.3 -0.4 -0.5
-1.2 -0.1 2.0 0.9 0.8 0.7 0.6
-1.3 -0.2 0.9 1.0 -0.1 1.8 0.7
-1.4 -0.3 0.8 1.9 0.8 0.7 2.8
Sequence 1: A G C A T C
Sequence 2: A G _ _ T C
Score : 2.0