-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWGSWizard-scripts.py
More file actions
65 lines (46 loc) · 2.31 KB
/
WGSWizard-scripts.py
File metadata and controls
65 lines (46 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#Get the original sequence for comparison
from Bio import Entrez, SeqIO
results = []
Entrez.email = email
cds = "1280421"
with Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=cds) as handle:
seq_record = SeqIO.read(handle, "gb")
for feature in seq_record.features:
if feature.type == "CDS":
dmel_cds = feature.location.extract(seq_record).seq
results.append({'DNA': str(dmel_cds), 'Protein': str(dmel_cds.translate(to_stop=True))})
#BLAST against other species and find complete sequences
import WGSWizard as wiz
dmel_seq = "442632573" #D melanogaster Commissureless
species_dict = ["txid7244[orgn]", "txid7260[orgn]", "txid7291[orgn]"] #D virilis, D willistoni, D albomicans
email = "maya.gosztyla@gmail.com"
path = "/Users/mayagosztyla/taxid2wgs.pl"
for species in species_list:
wiz.getdb(species=species, filepath=path)
#Wait for them to pop up in PyCharm! Takes a few seconds.
for species in species_list:
print("Blasting " + species)
blast_record = wiz.wgsblast(query_seq=dmel_seq, species=species)
seq = wiz.wgsseq(email=email, blast=blast_record)
results.append({'DNA': str(seq), 'Protein': str(seq.translate(to_stop=True))})
print("Finished " + species)
#Generate multiple-sequence alignment
import pandas as pd
from Bio.Align.Applications import MuscleCommandline
results_df = pd.DataFrame(results, index=["Dmelanogaster", "Dvirilis", "Dwillisoni", "Dalbomicans"])
results_df['Protein'].to_csv("My BLAST results.tab", sep="\t", header=False)
SeqIO.convert("My BLAST results.tab", "tab", "My BLAST results.fasta", "fasta")
muscle = r"/Users/mayagosztyla/muscle3.8.31_i86darwin64"
cmdline = MuscleCommandline(muscle, input= "My BLAST results.fasta", out= "My BLAST results.aln", clw=True)
cmdline()
#Generate phylogenetic tree
from Bio import AlignIO
from Bio import Phylo
from Bio.Phylo.Applications import PhymlCommandline
phyml = r"/Users/mayagosztyla/PhyML-3.1_macOS-MountainLion"
AlignIO.convert("My BLAST results.aln", "clustal", "My BLAST results.phy", "phylip-relaxed")
cmdline2 = PhymlCommandline(phyml, input='My BLAST Results.phy', datatype='aa', model='WAG', alpha='e', bootstrap=100)
out_log, err_log = cmdline2()
my_tree = Phylo.read("My BLAST results.phy_phyml_tree.txt", "newick")
Phylo.draw_ascii(my_tree)
Phylo.write(my_tree, "My BLAST results tree.xml", 'phyloxml')