Session 21 – Biopython: Sequences
21.1 Sequence Objects
Sequence objects behave similar to strings and can be treated as such in Biopython. However, sequence objects are largely immutable unless converted to an actual string.
# old stuff we have introduced before
from Bio.Seq import Seq= Seq("CATGCAT")
my_first_seq
# new part, separate sequence into rows
for index, letter in enumerate(my_first_seq):
print("%i %s" % (index, letter))
# retrieving specific elements
print(my_first_seq[0])
print(my_first_seq[5])
print(my_first_seq[-1])
print(my_first_seq[-2])
# counting repeat sequences within a sequence
my_first_seq.count("CAT")
# counting base content
my_first_seq.count("C")
# determining GC percentage
100 * float(
my_first_seq.count("G") + my_first_seq.count("C")
/ len(my_first_seq)
)
# Improved simplicity in GC percentage calculation
from Bio.SeqUtils import GCGC(my_first_seq)
# slicing your sequence
2:5] my_first_seq[
21.2 Concatenation with Sequences
#Simple Cat?
from Bio.Seq import Seq= [Seq("CCC"), Seq("AAA"), Seq("TTT")]
seq_list = Seq("")
cat_list for s in seq_list:
+= s
cat_list
print(cat_list)
#BioPython Cat w/ spacer seqs
from Bio.Seq import Seq= [Seq("CCC"), Seq("AAA"), Seq("TTT")]
seq_list = Seq("N"*3)
spacer spacer.join(seq_list)
21.3 Transcription: DNA to mRNA
from Bio.Seq import Seq
# coding DNA strand
= Seq("ACTGCATCGATCGTATGTGATGTGACACAC")
coding_dna print(coding_dna)
# template DNA strand
= coding_dna.reverse_complement()
template_dna print(template_dna)
# coding >>> mRNA
= coding_dna.transcribe()
m_rna print(m_rna)
# template >>> mRNA
template_dna.reverse_complement().transcribe()
# mRNA >>> cDNA
m_rna.back_transcribe()
21.4 Translation: RNA to PROT
from Bio.Seq import Seq
# mRNA to PROT
= Seq("ACUGAUCGAUAGAUGGUGUGUACUCAUAUCACUGACUAG")
m_rna m_rna.translate()
# coding DNA >>> PROT
= Seq("ATCGATCGATAGATCGATCGTCGATCGATCGATCGTTAG")
coding_dna coding_dna.translate()
# change translation table type (mitochondrial)
coding_dna.translate(table="Vertebrate Mitochondrial")
# translate to first stop codon
coding_dna.translate(to_stop=True)
coding_dna.translate(table="Vertebrate Mitochondrial", to_stop=True)
# bacterial gene full example from biopython.org
from Bio.Seq import Seq= Seq("GTGAAAAAGATGCAATCTATCGTACTCGCACTTTCCCTGGTTCTGGTCGCTCCCATGGCA"
gene "GCACAGGCTGCGGAAATTACGTTAGTCCCGTCAGTAAAATTACAGATAGGCGATCGTGAT"
"AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT"
"TATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACCGCCGCGCCACCAT"
"AAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA")
gene.translate(table="Bacterial")
gene.translate(table="Bacterial", to_stop=True)
gene.translate(table="Bacterial", cds=True)