Session 24 – Biopython: Chromosome Modeling

24.1 Gene Feature Modeling

Dataset is in this link

from reportlab.lib.units import cm
from Bio import SeqIO
from Bio.Graphics import BasicChromosome

entries = [
    ("Chr I", "L1.gb"),
    ("Chr II", "L2.gb"),
    ("Chr III", "L3.gb"),
    ("Chr IV", "L4.gb"),
    ("Chr V", "L5.gb"),
]

max_len = 219802468  # Could compute this from the entries dict
telomere_length = 100000  # For illustration

chr_diagram = BasicChromosome.Organism()
chr_diagram.page_size = (45 * cm, 500 * cm)  # A4 landscape

for index, (name, filename) in enumerate(entries):
    record = SeqIO.read(filename, "genbank")
    length = len(record)
    features = [f for f in record.features if f.type == "gene"]
    for f in features:
        f.qualifiers["color"] = [index + 2]
    cur_chromosome = BasicChromosome.Chromosome(name)
    cur_chromosome.scale_num = max_len + 2 * telomere_length
    start = BasicChromosome.TelomereSegment()
    start.scale = telomere_length
    cur_chromosome.add(start)
    body = BasicChromosome.AnnotatedChromosomeSegment(length, features)
    body.scale = length
    cur_chromosome.add(body)
    end = BasicChromosome.TelomereSegment(inverted=True)
    end.scale = telomere_length
    cur_chromosome.add(end)
    chr_diagram.add(cur_chromosome)

chr_diagram.draw("Chromosome.pdf", "Xenopus laevis")