Session 23 – Biopython: GenomeDiagram

23.1 Linear/Circular Gene Diagrams

First get a plasmid genbank file or even a mitochondrial dna genbank file. Feel free to use the example from this link.

Notice: macOSX migth need to install reportlab and use pip3 install reportlab.

# import modules
import os
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics import GenomeDiagram
from Bio import SeqIO

# set wd
os.chdir("YOUR WORKING DIRECTORY WITH GENBANK FILE HERE")

# load file as record
record = SeqIO.read("GENBANK FILE HERE", "genbank")

# set parameters, set empty diagram
gen_diagram = GenomeDiagram.Diagram("TITLE HERE")
gen_track_for_features = gen_diagram.new_track(1, name="Annotated Features")
gen_feature_set = gen_track_for_features.new_set()

# Generate diagram, alternate colors
for feature in record.features:
    if feature.type != "gene":
        # Do Nothing
        continue
    if len(gen_feature_set) % 2 == 0:
        color = colors.blue
    else:
        color = colors.lightblue
    gen_feature_set.add_feature(feature, color=color, label=True, label_size=10)

# Create output
gen_diagram.draw(
    format="linear",
    orientation="landscape",
    pagesize="A4",
    fragments=4,
    start=0,
    end=len(record),
)

# Save figure as ...
gen_diagram.write("example1.pdf", "PDF")
gen_diagram.write("example1.eps", "EPS")
gen_diagram.write("example1.svg", "SVG")
gen_diagram.write("example1.png", "PNG")


# Circular figure:
gen_diagram.draw(
    format="circular",
    circular=True,
    pagesize=(20 * cm, 20 * cm),
    start=0,
    end=len(record),
    circle_core=0.7,
)
gen_diagram.write("example1.pdf", "PDF")

23.2 Diagram Readability

You can make several changes to the above code to improve readability of the diagram.

# edit the code above with the code below and see what changes
gen_feature_set.add_feature(
    feature,
    label=True,
    color="purple",
    label_position="end",
    label_size=20,
    label_angle=90,
)

23.3 Box Shape

You can change the default shape of the gene boxes.

# Default
gen_feature_set.add_feature(feature)

# Box
gen_feature_set.add_feature(feature, sigil="BOX")

# Arrow
gen_feature_set.add_feature(feature, sigil="ARROW")

# Big Arrow
gen_feature_set.add_feature(feature, sigil="BIGARROW")

# Octo-gon
gen_feature_set.add_feature(feature, sigil="OCTO")

# Potter Zag
gen_feature_set.add_feature(feature, sigil="JAGGY")

23.4 Edit Box Shape: Arrows

You can edit the shape of the new shape.

# Tail-less Arrow
gen_feature_set.add_feature(feature, sigil="ARROW", color="brown", arrowshaft_height=1.0)

# Thin Arrow
gen_feature_set.add_feature(feature, sigil="ARROW", color="teal", arrowshaft_height=0.2)

# Very Thin Arrow
gen_feature_set.add_feature(
    feature, sigil="ARROW", color="darkgreen", arrowshaft_height=0.1
)

# Short Arrowhead
gen_feature_set.add_feature(feature, sigil="ARROW", color="blue", arrowhead_length=0.25)

# Long Arrowhead
gen_feature_set.add_feature(feature, sigil="ARROW", color="orange", arrowhead_length=1)

# All Arrowhead
gen_feature_set.add_feature(feature, sigil="ARROW", color="red", arrowhead_length=10000)

# Big Arrow
gen_feature_set.add_feature(feature, sigil="BIGARROW")

23.5 Restriction Map

Add restriction sites to your small genome

# old
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics import GenomeDiagram
from Bio import SeqIO

# new
from Bio.SeqFeature import SeqFeature, FeatureLocation

# old
record = SeqIO.read("YOUR GB FILE HERE", "genbank")

# old
gd_diagram = GenomeDiagram.Diagram(record.id)
gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features")
gd_feature_set = gd_track_for_features.new_set()

# old
for feature in record.features:
    if feature.type != "gene":
        # Exclude this feature
        continue
    if len(gd_feature_set) % 2 == 0:
        color = colors.blue
    else:
        color = colors.lightblue
    gd_feature_set.add_feature(
        feature, sigil="ARROW", color=color, label=True, label_size=14, label_angle=0
    )

# new: Add restriction enzymes and sites
for site, name, color in [
    ("GAATTC", "EcoRI", colors.green),
    ("CCCGGG", "SmaI", colors.orange),
    ("AAGCTT", "HindIII", colors.red),
    ("GGATCC", "BamHI", colors.purple),
]:
    index = 0
    while True:
        index = record.seq.find(site, start=index)
        if index == -1:
            break
        feature = SeqFeature(FeatureLocation(index, index + len(site)))
        gd_feature_set.add_feature(
            feature,
            color=color,
            name=name,
            label=True,
            label_size=10,
            label_color=color,
        )
        index += len(site)

# old, linear
gd_diagram.draw(format="linear", pagesize="A4", fragments=4, start=0, end=len(record))
gd_diagram.write("example2.pdf", "PDF")
gd_diagram.write("example2.eps", "EPS")
gd_diagram.write("example2.svg", "SVG")

# old, circular
gd_diagram.draw(
    format="circular",
    circular=True,
    pagesize=(20 * cm, 20 * cm),
    start=0,
    end=len(record),
    circle_core=0.5,
)
gd_diagram.write("example2.pdf", "PDF")
gd_diagram.write("example2.eps", "EPS")
gd_diagram.write("example2.svg", "SVG")

23.6 Tracks on Tracks

I will use the following genbank file for my example of stacked tracks along with my previous file. You are again free to use your own genome/genome segment of choice.

genome_mt_1

genome_mt_2

from Bio import SeqIO

A_rec = SeqIO.read("XL.gb", "gb")
B_rec = SeqIO.read("XT.gb", "gb")
C_rec = SeqIO.read("NP.gb", "gb")

from reportlab.lib.colors import (
    red,
    grey,
    orange,
    green,
    brown,
    blue,
    lightblue,
    purple,
)

A_colors = (
    [red] * 5
    + [grey] * 7
    + [orange] * 2
    + [grey] * 2
    + [orange]
    + [grey] * 11
    + [green] * 4
    + [grey]
    + [green] * 2
    + [grey, green]
    + [brown] * 5
    + [blue] * 4
    + [lightblue] * 5
    + [grey, lightblue]
    + [purple] * 2
    + [grey]
)
B_colors = (
    [red] * 6
    + [grey] * 8
    + [orange] * 2
    + [grey]
    + [orange]
    + [grey] * 21
    + [green] * 5
    + [grey]
    + [brown] * 4
    + [blue] * 3
    + [lightblue] * 3
    + [grey] * 5
    + [purple] * 2
)
C_colors = (
    [grey] * 30
    + [green] * 5
    + [brown] * 4
    + [blue] * 2
    + [grey, blue]
    + [lightblue] * 2
    + [grey] * 5
)

from Bio.Graphics import GenomeDiagram

name = "Stacked"
gd_diagram = GenomeDiagram.Diagram(name)
max_len = 0
for record, gene_colors in zip([A_rec, B_rec, C_rec], [A_colors, B_colors, C_colors]):
    max_len = max(max_len, len(record))
    gd_track_for_features = gd_diagram.new_track(
        1, name=record.name, greytrack=True, start=0, end=len(record)
    )
    gd_feature_set = gd_track_for_features.new_set()
    i = 0
    for feature in record.features:
        if feature.type != "gene":
            # Exclude this feature
            continue
        gd_feature_set.add_feature(
            feature,
            sigil="ARROW",
            color=gene_colors[i],
            label=True,
            name=str(i + 1),
            label_position="start",
            label_size=6,
            label_angle=0,
        )
        i += 1

gd_diagram.draw(format="linear", pagesize="A4", fragments=1, start=0, end=max_len)
gd_diagram.write(name + ".pdf", "PDF")
gd_diagram.write(name + ".eps", "EPS")
gd_diagram.write(name + ".svg", "SVG")