Session 21 – Biopython: Sequences

21.1 Sequence Objects

Sequence objects behave similar to strings and can be treated as such in Biopython. However, sequence objects are largely immutable unless converted to an actual string.

# old stuff we have introduced before
from Bio.Seq import Seq
my_first_seq = Seq("CATGCAT")

# new part, separate sequence into rows
for index, letter in enumerate(my_first_seq):
    print("%i %s" % (index, letter))

# retrieving specific elements
print(my_first_seq[0])
print(my_first_seq[5])
print(my_first_seq[-1])
print(my_first_seq[-2])

# counting repeat sequences within a sequence
my_first_seq.count("CAT")

# counting base content
my_first_seq.count("C")

# determining GC percentage
100 * float(
my_first_seq.count("G") + my_first_seq.count("C")
) / len(my_first_seq)

# Improved simplicity in GC percentage calculation
from Bio.SeqUtils import GC
GC(my_first_seq)

# slicing your sequence
my_first_seq[2:5]

21.2 Concatenation with Sequences

#Simple Cat?
from Bio.Seq import Seq
seq_list = [Seq("CCC"), Seq("AAA"), Seq("TTT")]
cat_list = Seq("")
for s in seq_list:
    cat_list += s

print(cat_list)

#BioPython Cat w/ spacer seqs
from Bio.Seq import Seq
seq_list = [Seq("CCC"), Seq("AAA"), Seq("TTT")]
spacer = Seq("N"*3)
spacer.join(seq_list)

21.3 Transcription: DNA to mRNA

from Bio.Seq import Seq

# coding DNA strand
coding_dna = Seq("ACTGCATCGATCGTATGTGATGTGACACAC")
print(coding_dna)

# template DNA strand
template_dna = coding_dna.reverse_complement()
print(template_dna)

# coding >>> mRNA
m_rna = coding_dna.transcribe()
print(m_rna)

# template >>> mRNA
template_dna.reverse_complement().transcribe()

# mRNA >>> cDNA
m_rna.back_transcribe()

21.4 Translation: RNA to PROT

from Bio.Seq import Seq

# mRNA to PROT
m_rna = Seq("ACUGAUCGAUAGAUGGUGUGUACUCAUAUCACUGACUAG")
m_rna.translate()

# coding DNA >>> PROT
coding_dna = Seq("ATCGATCGATAGATCGATCGTCGATCGATCGATCGTTAG")
coding_dna.translate()

# change translation table type (mitochondrial)
coding_dna.translate(table="Vertebrate Mitochondrial")

# translate to first stop codon
coding_dna.translate(to_stop=True)
coding_dna.translate(table="Vertebrate Mitochondrial", to_stop=True)

# bacterial gene full example from biopython.org
from Bio.Seq import Seq
gene = Seq("GTGAAAAAGATGCAATCTATCGTACTCGCACTTTCCCTGGTTCTGGTCGCTCCCATGGCA"
    "GCACAGGCTGCGGAAATTACGTTAGTCCCGTCAGTAAAATTACAGATAGGCGATCGTGAT"
    "AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT"
    "TATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACCGCCGCGCCACCAT"
    "AAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA")
gene.translate(table="Bacterial")
gene.translate(table="Bacterial", to_stop=True)
gene.translate(table="Bacterial", cds=True)