Session 24 – Biopython: Chromosome Modeling
24.1 Gene Feature Modeling
Dataset is in this link
from reportlab.lib.units import cm
from Bio import SeqIO
from Bio.Graphics import BasicChromosome
entries = [
("Chr I", "L1.gb"),
("Chr II", "L2.gb"),
("Chr III", "L3.gb"),
("Chr IV", "L4.gb"),
("Chr V", "L5.gb"),
]
max_len = 219802468 # Could compute this from the entries dict
telomere_length = 100000 # For illustration
chr_diagram = BasicChromosome.Organism()
chr_diagram.page_size = (45 * cm, 500 * cm) # A4 landscape
for index, (name, filename) in enumerate(entries):
record = SeqIO.read(filename, "genbank")
length = len(record)
features = [f for f in record.features if f.type == "gene"]
for f in features:
f.qualifiers["color"] = [index + 2]
cur_chromosome = BasicChromosome.Chromosome(name)
cur_chromosome.scale_num = max_len + 2 * telomere_length
start = BasicChromosome.TelomereSegment()
start.scale = telomere_length
cur_chromosome.add(start)
body = BasicChromosome.AnnotatedChromosomeSegment(length, features)
body.scale = length
cur_chromosome.add(body)
end = BasicChromosome.TelomereSegment(inverted=True)
end.scale = telomere_length
cur_chromosome.add(end)
chr_diagram.add(cur_chromosome)
chr_diagram.draw("Chromosome.pdf", "Xenopus laevis")