Spaces:
Runtime error
Runtime error
Simon Duerr
commited on
Commit
·
135c853
1
Parent(s):
8397910
fix memory problems and add 700 seq cutoff
Browse files
app.py
CHANGED
|
@@ -20,19 +20,7 @@ import torch.nn.functional as F
|
|
| 20 |
import random
|
| 21 |
import os
|
| 22 |
import os.path
|
| 23 |
-
|
| 24 |
-
loss_nll,
|
| 25 |
-
loss_smoothed,
|
| 26 |
-
gather_edges,
|
| 27 |
-
gather_nodes,
|
| 28 |
-
gather_nodes_t,
|
| 29 |
-
cat_neighbors_nodes,
|
| 30 |
-
_scores,
|
| 31 |
-
_S_to_seq,
|
| 32 |
-
tied_featurize,
|
| 33 |
-
parse_PDB,
|
| 34 |
-
)
|
| 35 |
-
from protein_mpnn_utils import StructureDataset, StructureDatasetPDB, ProteinMPNN
|
| 36 |
import plotly.express as px
|
| 37 |
import urllib
|
| 38 |
import jax.numpy as jnp
|
|
@@ -203,7 +191,7 @@ def save_pdb(outs, filename, LEN):
|
|
| 203 |
f.write(pdb_lines)
|
| 204 |
|
| 205 |
|
| 206 |
-
|
| 207 |
def run_alphafold(sequence, num_recycles):
|
| 208 |
recycles = num_recycles
|
| 209 |
RUNNER, OPT = setup_af(sequence)
|
|
@@ -232,8 +220,10 @@ def run_alphafold(sequence, num_recycles):
|
|
| 232 |
OPT["prev"] = outs["prev"]
|
| 233 |
if recycles > 0:
|
| 234 |
print(r, plddts[-1].mean())
|
| 235 |
-
|
| 236 |
-
|
|
|
|
|
|
|
| 237 |
return plddts, outs["pae"], LEN
|
| 238 |
|
| 239 |
|
|
@@ -246,6 +236,20 @@ else:
|
|
| 246 |
|
| 247 |
|
| 248 |
def setup_proteinmpnn(model_name="v_48_020", backbone_noise=0.00):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
|
| 250 |
# ProteinMPNN model name: v_48_002, v_48_010, v_48_020, v_48_030, v_32_002, v_32_010; v_32_020, v_32_030; v_48_010=version with 48 edges 0.10A noise
|
| 251 |
# Standard deviation of Gaussian noise to add to backbone atoms
|
|
@@ -298,6 +302,20 @@ def update(
|
|
| 298 |
model_name,
|
| 299 |
backbone_noise,
|
| 300 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
pdb_path = get_pdb(pdb_code=inp, filepath=file)
|
| 302 |
|
| 303 |
if pdb_path == None:
|
|
@@ -655,9 +673,20 @@ def update(
|
|
| 655 |
def update_AF(startsequence, pdb, num_recycles):
|
| 656 |
|
| 657 |
# # run alphafold using ray
|
| 658 |
-
plddts, pae, num_res = run_alphafold(
|
| 659 |
-
|
| 660 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
x = np.arange(10)
|
| 662 |
plots = []
|
| 663 |
for recycle, plddts_val in enumerate(plddts):
|
|
@@ -784,7 +813,7 @@ select{
|
|
| 784 |
<div class="text-sm">
|
| 785 |
<div> RMSD AlphaFold vs. native: """
|
| 786 |
+ f"{rms:.2f}"
|
| 787 |
-
+ """
|
| 788 |
<div class="font-medium mt-4"><b>AlphaFold model confidence:</b></div>
|
| 789 |
<div class="flex space-x-2 py-1"><span class="w-4 h-4"
|
| 790 |
style="background-color: rgb(0, 83, 214);"> </span><span class="legendlabel">Very high
|
|
@@ -1073,6 +1102,6 @@ bioRxiv 2022.06.03.494563; doi: [10.1101/2022.06.03.494563](https://doi.org/10.1
|
|
| 1073 |
)
|
| 1074 |
|
| 1075 |
|
| 1076 |
-
|
| 1077 |
|
| 1078 |
proteinMPNN.launch(share=True, debug=True)
|
|
|
|
| 20 |
import random
|
| 21 |
import os
|
| 22 |
import os.path
|
| 23 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
import plotly.express as px
|
| 25 |
import urllib
|
| 26 |
import jax.numpy as jnp
|
|
|
|
| 191 |
f.write(pdb_lines)
|
| 192 |
|
| 193 |
|
| 194 |
+
@ray.remote(num_gpus=1, max_calls=1)
|
| 195 |
def run_alphafold(sequence, num_recycles):
|
| 196 |
recycles = num_recycles
|
| 197 |
RUNNER, OPT = setup_af(sequence)
|
|
|
|
| 220 |
OPT["prev"] = outs["prev"]
|
| 221 |
if recycles > 0:
|
| 222 |
print(r, plddts[-1].mean())
|
| 223 |
+
if os.path.exists("/home/duerr/phd/08_Code/ProteinMPNN"):
|
| 224 |
+
save_pdb(outs, "/home/duerr/phd/08_Code/ProteinMPNN/out.pdb", LEN)
|
| 225 |
+
else:
|
| 226 |
+
save_pdb(outs, "/home/user/app/out.pdb", LEN)
|
| 227 |
return plddts, outs["pae"], LEN
|
| 228 |
|
| 229 |
|
|
|
|
| 236 |
|
| 237 |
|
| 238 |
def setup_proteinmpnn(model_name="v_48_020", backbone_noise=0.00):
|
| 239 |
+
from protein_mpnn_utils import (
|
| 240 |
+
loss_nll,
|
| 241 |
+
loss_smoothed,
|
| 242 |
+
gather_edges,
|
| 243 |
+
gather_nodes,
|
| 244 |
+
gather_nodes_t,
|
| 245 |
+
cat_neighbors_nodes,
|
| 246 |
+
_scores,
|
| 247 |
+
_S_to_seq,
|
| 248 |
+
tied_featurize,
|
| 249 |
+
parse_PDB,
|
| 250 |
+
)
|
| 251 |
+
from protein_mpnn_utils import StructureDataset, StructureDatasetPDB, ProteinMPNN
|
| 252 |
+
|
| 253 |
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
|
| 254 |
# ProteinMPNN model name: v_48_002, v_48_010, v_48_020, v_48_030, v_32_002, v_32_010; v_32_020, v_32_030; v_48_010=version with 48 edges 0.10A noise
|
| 255 |
# Standard deviation of Gaussian noise to add to backbone atoms
|
|
|
|
| 302 |
model_name,
|
| 303 |
backbone_noise,
|
| 304 |
):
|
| 305 |
+
from protein_mpnn_utils import (
|
| 306 |
+
loss_nll,
|
| 307 |
+
loss_smoothed,
|
| 308 |
+
gather_edges,
|
| 309 |
+
gather_nodes,
|
| 310 |
+
gather_nodes_t,
|
| 311 |
+
cat_neighbors_nodes,
|
| 312 |
+
_scores,
|
| 313 |
+
_S_to_seq,
|
| 314 |
+
tied_featurize,
|
| 315 |
+
parse_PDB,
|
| 316 |
+
)
|
| 317 |
+
from protein_mpnn_utils import StructureDataset, StructureDatasetPDB, ProteinMPNN
|
| 318 |
+
|
| 319 |
pdb_path = get_pdb(pdb_code=inp, filepath=file)
|
| 320 |
|
| 321 |
if pdb_path == None:
|
|
|
|
| 673 |
def update_AF(startsequence, pdb, num_recycles):
|
| 674 |
|
| 675 |
# # run alphafold using ray
|
| 676 |
+
# plddts, pae, num_res = run_alphafold(
|
| 677 |
+
# startsequence, num_recycles
|
| 678 |
+
# )
|
| 679 |
+
if len(startsequence) > 700:
|
| 680 |
+
return (
|
| 681 |
+
"""
|
| 682 |
+
<div class="p-4 mb-4 text-sm text-yellow-700 bg-orange-50 rounded-lg" role="alert">
|
| 683 |
+
<span class="font-medium">Sorry!</span> Currently only small proteins can be run in the server in order to reduce wait time. Try a protein <700 aa. Bigger proteins you can run on <a href="https://github.com/sokrypton/colabfold">ColabFold</a>
|
| 684 |
+
</div>
|
| 685 |
+
""",
|
| 686 |
+
plt.figure(),
|
| 687 |
+
plt.figure(),
|
| 688 |
+
)
|
| 689 |
+
plddts, pae, num_res = ray.get(run_alphafold.remote(startsequence, num_recycles))
|
| 690 |
x = np.arange(10)
|
| 691 |
plots = []
|
| 692 |
for recycle, plddts_val in enumerate(plddts):
|
|
|
|
| 813 |
<div class="text-sm">
|
| 814 |
<div> RMSD AlphaFold vs. native: """
|
| 815 |
+ f"{rms:.2f}"
|
| 816 |
+
+ """Å computed using CEAlign on the aligned fragment</div>
|
| 817 |
<div class="font-medium mt-4"><b>AlphaFold model confidence:</b></div>
|
| 818 |
<div class="flex space-x-2 py-1"><span class="w-4 h-4"
|
| 819 |
style="background-color: rgb(0, 83, 214);"> </span><span class="legendlabel">Very high
|
|
|
|
| 1102 |
)
|
| 1103 |
|
| 1104 |
|
| 1105 |
+
ray.init(runtime_env={"working_dir": "./af_backprop"})
|
| 1106 |
|
| 1107 |
proteinMPNN.launch(share=True, debug=True)
|