Session 21 – Biopython: Sequences
21.1 Sequence Objects
Sequence objects behave similar to strings and can be treated as such in Biopython. However, sequence objects are largely immutable unless converted to an actual string.
# old stuff we have introduced before
from Bio.Seq import Seq
my_first_seq = Seq("CATGCAT")
# new part, separate sequence into rows
for index, letter in enumerate(my_first_seq):
print("%i %s" % (index, letter))
# retrieving specific elements
print(my_first_seq[0])
print(my_first_seq[5])
print(my_first_seq[-1])
print(my_first_seq[-2])
# counting repeat sequences within a sequence
my_first_seq.count("CAT")
# counting base content
my_first_seq.count("C")
# determining GC percentage
100 * float(
my_first_seq.count("G") + my_first_seq.count("C")
) / len(my_first_seq)
# Improved simplicity in GC percentage calculation
from Bio.SeqUtils import GC
GC(my_first_seq)
# slicing your sequence
my_first_seq[2:5]21.2 Concatenation with Sequences
#Simple Cat?
from Bio.Seq import Seq
seq_list = [Seq("CCC"), Seq("AAA"), Seq("TTT")]
cat_list = Seq("")
for s in seq_list:
cat_list += s
print(cat_list)
#BioPython Cat w/ spacer seqs
from Bio.Seq import Seq
seq_list = [Seq("CCC"), Seq("AAA"), Seq("TTT")]
spacer = Seq("N"*3)
spacer.join(seq_list)21.3 Transcription: DNA to mRNA
from Bio.Seq import Seq
# coding DNA strand
coding_dna = Seq("ACTGCATCGATCGTATGTGATGTGACACAC")
print(coding_dna)
# template DNA strand
template_dna = coding_dna.reverse_complement()
print(template_dna)
# coding >>> mRNA
m_rna = coding_dna.transcribe()
print(m_rna)
# template >>> mRNA
template_dna.reverse_complement().transcribe()
# mRNA >>> cDNA
m_rna.back_transcribe()21.4 Translation: RNA to PROT
from Bio.Seq import Seq
# mRNA to PROT
m_rna = Seq("ACUGAUCGAUAGAUGGUGUGUACUCAUAUCACUGACUAG")
m_rna.translate()
# coding DNA >>> PROT
coding_dna = Seq("ATCGATCGATAGATCGATCGTCGATCGATCGATCGTTAG")
coding_dna.translate()
# change translation table type (mitochondrial)
coding_dna.translate(table="Vertebrate Mitochondrial")
# translate to first stop codon
coding_dna.translate(to_stop=True)
coding_dna.translate(table="Vertebrate Mitochondrial", to_stop=True)
# bacterial gene full example from biopython.org
from Bio.Seq import Seq
gene = Seq("GTGAAAAAGATGCAATCTATCGTACTCGCACTTTCCCTGGTTCTGGTCGCTCCCATGGCA"
"GCACAGGCTGCGGAAATTACGTTAGTCCCGTCAGTAAAATTACAGATAGGCGATCGTGAT"
"AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT"
"TATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACCGCCGCGCCACCAT"
"AAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA")
gene.translate(table="Bacterial")
gene.translate(table="Bacterial", to_stop=True)
gene.translate(table="Bacterial", cds=True)