DiffLinker

Runtime error

App Files Files Community

igashov commited on Apr 11, 2023

Commit

92263a6

1 Parent(s): 8fd5e3f

Added an option to select anchor atoms

Browse files

Files changed (3) hide show

app.py +102 -55
output.py +91 -3
src/generation.py +38 -0

app.py CHANGED Viewed

@@ -9,12 +9,30 @@ import output
 from rdkit import Chem
 from src import const
-from src.visualizer import save_xyz_file
 from src.datasets import get_dataloader, collate_with_fragment_edges, parse_molecule
 from src.lightning import DDPM
 from src.linker_size_lightning import SizeClassifier
-N_SAMPLES = 5
 parser = argparse.ArgumentParser()
 parser.add_argument('--ip', type=str, default=None)
@@ -33,13 +51,22 @@ if not os.path.exists(size_gnn_path):
 size_nn = SizeClassifier.load_from_checkpoint('models/geom_size_gnn.ckpt', map_location=device).eval().to(device)
 print('Loaded SizeGNN model')
-diffusion_path = 'models/geom_difflinker.ckpt'
-if not os.path.exists(diffusion_path):
-    print('Downloading Diffusion model...')
-    link = 'https://zenodo.org/record/7121300/files/geom_difflinker.ckpt?download=1'
-    subprocess.run(f'wget {link} -O {diffusion_path}', shell=True)
-ddpm = DDPM.load_from_checkpoint('models/geom_difflinker.ckpt', map_location=device).eval().to(device)
-print('Loaded diffusion model')
 def read_molecule_content(path):
     with open(path, "r") as f:
@@ -60,7 +87,7 @@ def read_molecule(path):
 def show_input(input_file):
     if input_file is None:
-        return ['', gr.Radio.update(visible=False, value='Sample 1')]
     if isinstance(input_file, str):
         path = input_file
     else:
@@ -70,7 +97,8 @@ def show_input(input_file):
         msg = output.INVALID_FORMAT_MSG.format(extension=extension)
         return [
             output.IFRAME_TEMPLATE.format(html=msg),
-            gr.Radio.update(visible=False)
         ]
     try:
@@ -78,17 +106,22 @@ def show_input(input_file):
     except Exception as e:
         return [
             f'Could not read the molecule: {e}',
-            gr.Radio.update(visible=False)
         ]
     html = output.INITIAL_RENDERING_TEMPLATE.format(molecule=molecule, fmt=extension)
     return [
         output.IFRAME_TEMPLATE.format(html=html),
-        gr.Radio.update(visible=False)
     ]
 def draw_sample(idx, out_files):
     in_file = out_files[0]
     in_sdf = in_file if isinstance(in_file, str) else in_file.name
@@ -97,24 +130,43 @@ def draw_sample(idx, out_files):
     input_fragments_content = read_molecule_content(in_sdf)
     generated_molecule_content = read_molecule_content(out_sdf)
     html = output.SAMPLES_RENDERING_TEMPLATE.format(
         fragments=input_fragments_content,
-        fragments_fmt='sdf',
         molecule=generated_molecule_content,
-        molecule_fmt='sdf',
     )
     return output.IFRAME_TEMPLATE.format(html=html)
-def generate(input_file, n_steps, n_atoms):
     if input_file is None:
-        return ''
     path = input_file.name
     extension = path.split('.')[-1]
     if extension not in ['sdf', 'pdb', 'mol', 'mol2']:
         msg = output.INVALID_FORMAT_MSG.format(extension=extension)
-        return output.IFRAME_TEMPLATE.format(html=msg)
     try:
         molecule = read_molecule(path)
@@ -122,16 +174,22 @@ def generate(input_file, n_steps, n_atoms):
         name = '.'.join(path.split('/')[-1].split('.')[:-1])
         inp_sdf = f'results/input_{name}.sdf'
     except Exception as e:
-        return f'Could not read the molecule: {e}'
     if molecule.GetNumAtoms() > 50:
-        return f'Too large molecule: upper limit is 50 heavy atoms'
     with Chem.SDWriter(inp_sdf) as w:
         w.write(molecule)
     positions, one_hot, charges = parse_molecule(molecule, is_geom=True)
     anchors = np.zeros_like(charges)
     fragment_mask = np.ones_like(charges)
     linker_mask = np.zeros_like(charges)
     print('Read and parsed molecule')
@@ -151,7 +209,6 @@ def generate(input_file, n_steps, n_atoms):
     print('Created dataloader')
     ddpm.edm.T = n_steps
-    assert ddpm.center_of_mass == 'fragments'
     if n_atoms == 0:
         def sample_fn(_data):
@@ -169,34 +226,21 @@ def generate(input_file, n_steps, n_atoms):
             return torch.ones(_data['positions'].shape[0], device=device, dtype=torch.long) * n_atoms
     for data in dataloader:
-        chain, node_mask = ddpm.sample_chain(data, sample_fn=sample_fn, keep_frames=1)
-        print('Generated linker')
-        x = chain[0][:, :, :ddpm.n_dims]
-        h = chain[0][:, :, ddpm.n_dims:]
-        # Put the molecule back to the initial orientation
-        pos_masked = data['positions'] * data['fragment_mask']
-        N = data['fragment_mask'].sum(1, keepdims=True)
-        mean = torch.sum(pos_masked, dim=1, keepdim=True) / N
-        x = x + mean * node_mask
-        names = [f'output_{i+1}_{name}' for i in range(N_SAMPLES)]
-        save_xyz_file('results', h, x, node_mask, names=names, is_geom=True, suffix='')
-        print('Saved XYZ files')
-        break
-    out_files = []
-    for i in range(N_SAMPLES):
-        out_xyz = f'results/output_{i+1}_{name}_.xyz'
-        out_sdf = f'results/output_{i+1}_{name}_.sdf'
-        subprocess.run(f'obabel {out_xyz} -O {out_sdf}', shell=True)
-        out_files.append(out_sdf)
-    print('Converted to SDF')
     return [
-        draw_sample(0, out_files),
-        [inp_sdf] + out_files,
-        gr.Radio.update(visible=True, value='Sample 1')
     ]
@@ -215,6 +259,7 @@ with demo:
     )
     with gr.Box():
         with gr.Row():
             with gr.Column():
                 gr.Markdown('## Input Fragments')
                 gr.Markdown('Upload the file with 3D-coordinates of the input fragments in .pdb, .mol2 or .sdf format:')
@@ -238,11 +283,11 @@ with demo:
                 output_files = gr.File(file_count='multiple', label='Output Files', interactive=False)
             with gr.Column():
                 gr.Markdown('## Visualization')
-                # gr.Markdown('Below you will see input and output molecules')
                 samples = gr.Radio(
                     choices=['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4', 'Sample 5'],
                     value='Sample 1',
-                    type='index',
                     show_label=False,
                     visible=False,
                     interactive=True,
@@ -252,27 +297,29 @@ with demo:
     input_file.change(
         fn=show_input,
         inputs=[input_file],
-        outputs=[visualization, samples],
     )
     input_file.clear(
-        fn=lambda: [None, '', gr.Radio.update(visible=False)],
         inputs=[],
-        outputs=[input_file, visualization, samples],
     )
     examples.click(
         fn=lambda idx: [f'examples/example_{idx+1}.sdf', 10, 0] + show_input(f'examples/example_{idx+1}.sdf'),
         inputs=[examples],
-        outputs=[input_file, n_steps, n_atoms, visualization, samples]
     )
     button.click(
         fn=generate,
-        inputs=[input_file, n_steps, n_atoms],
-        outputs=[visualization, output_files, samples],
     )
     samples.change(
         fn=draw_sample,
         inputs=[samples, output_files],
         outputs=[visualization],
     )
 demo.launch(server_name=args.ip)

 from rdkit import Chem
 from src import const
 from src.datasets import get_dataloader, collate_with_fragment_edges, parse_molecule
 from src.lightning import DDPM
 from src.linker_size_lightning import SizeClassifier
+from src.generation import N_SAMPLES, generate_linkers, try_to_convert_to_sdf
+MODELS_METADATA = {
+    'geom_difflinker': {
+        'link': 'https://zenodo.org/record/7121300/files/geom_difflinker.ckpt?download=1',
+        'path': 'models/geom_difflinker.ckpt',
+    },
+    'geom_difflinker_given_anchors': {
+        'link': 'https://zenodo.org/record/7775568/files/geom_difflinker_given_anchors.ckpt?download=1',
+        'path': 'models/geom_difflinker_given_anchors.ckpt',
+    },
+    'pockets_difflinker': {
+        'link': 'https://zenodo.org/record/7775568/files/pockets_difflinker_full_no_anchors.ckpt?download=1',
+        'path': 'models/pockets_difflinker.ckpt',
+    },
+    'pockets_difflinker_given_anchors': {
+        'link': 'https://zenodo.org/record/7775568/files/pockets_difflinker_full.ckpt?download=1',
+        'path': 'models/pockets_difflinker_given_anchors.ckpt',
+    },
+}
 parser = argparse.ArgumentParser()
 parser.add_argument('--ip', type=str, default=None)
 size_nn = SizeClassifier.load_from_checkpoint('models/geom_size_gnn.ckpt', map_location=device).eval().to(device)
 print('Loaded SizeGNN model')
+diffusion_models = {}
+for model_name, metadata in MODELS_METADATA.items():
+    link = metadata['link']
+    diffusion_path = metadata['path']
+    if not os.path.exists(diffusion_path):
+        print(f'Downloading {model_name}...')
+        subprocess.run(f'wget {link} -O {diffusion_path}', shell=True)
+    diffusion_models[model_name] = DDPM.load_from_checkpoint(diffusion_path, map_location=device).eval().to(device)
+    print(f'Loaded model {model_name}')
+print(os.curdir)
+print(os.path.abspath(os.curdir))
+print(os.listdir(os.curdir))
 def read_molecule_content(path):
     with open(path, "r") as f:
 def show_input(input_file):
     if input_file is None:
+        return ['', gr.Radio.update(visible=False, value='Sample 1'), None]
     if isinstance(input_file, str):
         path = input_file
     else:
         msg = output.INVALID_FORMAT_MSG.format(extension=extension)
         return [
             output.IFRAME_TEMPLATE.format(html=msg),
+            gr.Radio.update(visible=False),
+            None,
         ]
     try:
     except Exception as e:
         return [
             f'Could not read the molecule: {e}',
+            gr.Radio.update(visible=False),
+            None,
         ]
     html = output.INITIAL_RENDERING_TEMPLATE.format(molecule=molecule, fmt=extension)
     return [
         output.IFRAME_TEMPLATE.format(html=html),
+        gr.Radio.update(visible=False),
+        None,
     ]
 def draw_sample(idx, out_files):
+    if isinstance(idx, str):
+        idx = int(idx.strip().split(' ')[-1]) - 1
     in_file = out_files[0]
     in_sdf = in_file if isinstance(in_file, str) else in_file.name
     input_fragments_content = read_molecule_content(in_sdf)
     generated_molecule_content = read_molecule_content(out_sdf)
+    fragments_fmt = in_sdf.split('.')[-1]
+    molecule_fmt = out_sdf.split('.')[-1]
     html = output.SAMPLES_RENDERING_TEMPLATE.format(
         fragments=input_fragments_content,
+        fragments_fmt=fragments_fmt,
         molecule=generated_molecule_content,
+        molecule_fmt=molecule_fmt,
     )
     return output.IFRAME_TEMPLATE.format(html=html)
+def generate(input_file, n_steps, n_atoms, radio_samples, selected_atoms):
+    # Parsing selected atoms (javascript output)
+    selected_atoms = selected_atoms.strip()
+    if selected_atoms == '':
+        selected_atoms = []
+    else:
+        selected_atoms = list(map(int, selected_atoms.split(',')))
+    # Selecting model
+    if len(selected_atoms) == 0:
+        selected_model_name = 'geom_difflinker'
+    else:
+        selected_model_name = 'geom_difflinker_given_anchors'
     if input_file is None:
+        return [None, None, None, None]
+    print(f'Start generating with model {selected_model_name}, selected_atoms:', selected_atoms)
+    ddpm = diffusion_models[selected_model_name]
     path = input_file.name
     extension = path.split('.')[-1]
     if extension not in ['sdf', 'pdb', 'mol', 'mol2']:
         msg = output.INVALID_FORMAT_MSG.format(extension=extension)
+        return [output.IFRAME_TEMPLATE.format(html=msg), None, None, None]
     try:
         molecule = read_molecule(path)
         name = '.'.join(path.split('/')[-1].split('.')[:-1])
         inp_sdf = f'results/input_{name}.sdf'
     except Exception as e:
+        error = f'Could not read the molecule: {e}'
+        msg = output.ERROR_FORMAT_MSG.format(message=error)
+        return [output.IFRAME_TEMPLATE.format(html=msg), None, None, None]
     if molecule.GetNumAtoms() > 50:
+        error = f'Too large molecule: upper limit is 50 heavy atoms'
+        msg = output.ERROR_FORMAT_MSG.format(message=error)
+        return [output.IFRAME_TEMPLATE.format(html=msg), None, None, None]
     with Chem.SDWriter(inp_sdf) as w:
         w.write(molecule)
     positions, one_hot, charges = parse_molecule(molecule, is_geom=True)
     anchors = np.zeros_like(charges)
+    anchors[selected_atoms] = 1
     fragment_mask = np.ones_like(charges)
     linker_mask = np.zeros_like(charges)
     print('Read and parsed molecule')
     print('Created dataloader')
     ddpm.edm.T = n_steps
     if n_atoms == 0:
         def sample_fn(_data):
             return torch.ones(_data['positions'].shape[0], device=device, dtype=torch.long) * n_atoms
     for data in dataloader:
+        try:
+            generate_linkers(ddpm=ddpm, data=data, sample_fn=sample_fn, name=name)
+        except Exception as e:
+            error = f'Caught exception while generating linkers: {e}'
+            msg = output.ERROR_FORMAT_MSG.format(message=error)
+            return [output.IFRAME_TEMPLATE.format(html=msg), None, None, None]
+    out_files = try_to_convert_to_sdf(name)
+    out_files = [inp_sdf] + out_files
     return [
+        draw_sample(radio_samples, out_files),
+        out_files,
+        gr.Radio.update(visible=True),
+        None
     ]
     )
     with gr.Box():
         with gr.Row():
+            hidden = gr.Textbox(visible=False)
             with gr.Column():
                 gr.Markdown('## Input Fragments')
                 gr.Markdown('Upload the file with 3D-coordinates of the input fragments in .pdb, .mol2 or .sdf format:')
                 output_files = gr.File(file_count='multiple', label='Output Files', interactive=False)
             with gr.Column():
                 gr.Markdown('## Visualization')
+                gr.Markdown('**Hint:** click on atoms to select anchor points (optionally)')
                 samples = gr.Radio(
                     choices=['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4', 'Sample 5'],
                     value='Sample 1',
+                    type='value',
                     show_label=False,
                     visible=False,
                     interactive=True,
     input_file.change(
         fn=show_input,
         inputs=[input_file],
+        outputs=[visualization, samples, hidden],
     )
     input_file.clear(
+        fn=lambda: [None, '', gr.Radio.update(visible=False), None],
         inputs=[],
+        outputs=[input_file, visualization, samples, hidden],
     )
     examples.click(
         fn=lambda idx: [f'examples/example_{idx+1}.sdf', 10, 0] + show_input(f'examples/example_{idx+1}.sdf'),
         inputs=[examples],
+        outputs=[input_file, n_steps, n_atoms, visualization, samples, hidden]
     )
     button.click(
         fn=generate,
+        inputs=[input_file, n_steps, n_atoms, samples, hidden],
+        outputs=[visualization, output_files, samples, hidden],
+        _js=output.RETURN_SELECTION_JS,
     )
     samples.change(
         fn=draw_sample,
         inputs=[samples, output_files],
         outputs=[visualization],
     )
+    demo.load(_js=output.STARTUP_JS)
 demo.launch(server_name=args.ip)

output.py CHANGED Viewed

@@ -22,9 +22,42 @@ INITIAL_RENDERING_TEMPLATE = """<!DOCTYPE html>
         $(document).ready(function() {{
             let element = $("#container");
             let config = {{ backgroundColor: "white" }};
-            let viewer = $3Dmol.createViewer( element, config );
-            viewer.addModel(`{molecule}`, "{fmt}")
-            viewer.getModel().setStyle({{ stick: {{ colorscheme:"greenCarbon" }} }})
             viewer.zoomTo();
             viewer.zoom(0.7);
             viewer.render();
@@ -113,8 +146,63 @@ INVALID_FORMAT_MSG = """
 </html>
 """
 IFRAME_TEMPLATE = """<iframe style="width: 100%; height: 700px" name="result" allow="midi; geolocation; microphone; camera;
 display-capture; encrypted-media;" sandbox="allow-modals allow-forms allow-scripts allow-same-origin allow-popups
 allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
 allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""

         $(document).ready(function() {{
             let element = $("#container");
             let config = {{ backgroundColor: "white" }};
+            let viewer = $3Dmol.createViewer(element, config);
+            let defaultStyle = {{ stick: {{ colorscheme: "greenCarbon" }} }};
+            viewer.addModel(`{molecule}`, "{fmt}");
+            viewer.getModel(0).setStyle(defaultStyle);
+            // document.cookie = document.cookie + "|selected_atoms:";
+            viewer.getModel(0).setClickable(
+                {{}},
+                true,
+                function (_atom, _viewer, _event, _container) {{
+                    if (!_atom.isClicked) {{
+                        _atom.isClicked = true;
+                        _viewer.addStyle(
+                            {{"serial": _atom.serial, "model": 0}},
+                            {{"sphere": {{"color": "magenta", "radius": 0.4}} }}
+                        );
+                        // document.cookie = document.cookie + "atom_" + String(_atom.serial) + "-";
+                        window.parent.postMessage({{
+                            name: "atom_selection",
+                            data: {{"atom": _atom.serial, "add": true}}
+                            // data: JSON.stringify({{"add": _atom.serial}})
+                        }}, "*");
+                    }} else {{
+                        delete _atom.isClicked;
+                        _viewer.setStyle({{"serial": _atom.serial, "model": 0}}, defaultStyle);
+                        // document.cookie = document.cookie.replace("atom_" + String(_atom.serial) + "-", "");
+                        window.parent.postMessage({{
+                            name: "atom_selection",
+                            data: {{"atom": _atom.serial, "add": false}}
+                            // data: JSON.stringify({{"remove": _atom.serial}})
+                        }}, "*");
+                    }}
+                    _viewer.render();
+                }}
+            );
             viewer.zoomTo();
             viewer.zoom(0.7);
             viewer.render();
 </html>
 """
+ERROR_FORMAT_MSG = """
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+        <style>
+            body{{
+                font-family:sans-serif
+            }}
+        </style>
+    </head>
+    <body>
+        <h3>Error:</h3>
+        {message}
+    </body>
+</html>
+"""
 IFRAME_TEMPLATE = """<iframe style="width: 100%; height: 700px" name="result" allow="midi; geolocation; microphone; camera;
 display-capture; encrypted-media;" sandbox="allow-modals allow-forms allow-scripts allow-same-origin allow-popups
 allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
 allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
+STARTUP_JS = """
+() => {
+    window.selected_elements = {}
+    function handleMessage(event) {
+        // console.log("New message: ", event.data)
+        let atom = event.data.data["atom"];
+        let add = event.data.data["add"];
+        console.log("add: ", add, " atom: ", atom);
+        window.selected_elements[atom] = add;
+    }
+    window.addEventListener("message", handleMessage);
+    console.log("Listener Added");
+}
+"""
+RETURN_SELECTION_JS = """
+(input_file, n_steps, n_atoms, samples, hidden) => {
+    let selected = []
+    for (const [atom, add] of Object.entries(window.selected_elements)) {
+      if (add) {
+        console.log("Adding atom ", atom);
+        selected.push(String(atom));
+        window.parent.postMessage({
+          name: "atom_selection",
+          data: {"atom": parseInt(atom), "add": false}
+        }, "*");
+      }
+    }
+    console.log("Finished parsing");
+    return [input_file, n_steps, n_atoms, samples, selected.join(",")];
+}
+"""

src/generation.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os.path
+import subprocess
+import torch
+from src.visualizer import save_xyz_file
+N_SAMPLES = 5
+def generate_linkers(ddpm, data, sample_fn, name):
+    chain, node_mask = ddpm.sample_chain(data, sample_fn=sample_fn, keep_frames=1)
+    print('Generated linker')
+    x = chain[0][:, :, :ddpm.n_dims]
+    h = chain[0][:, :, ddpm.n_dims:]
+    # Put the molecule back to the initial orientation
+    pos_masked = data['positions'] * data['fragment_mask']
+    N = data['fragment_mask'].sum(1, keepdims=True)
+    mean = torch.sum(pos_masked, dim=1, keepdim=True) / N
+    x = x + mean * node_mask
+    names = [f'output_{i + 1}_{name}' for i in range(N_SAMPLES)]
+    save_xyz_file('results', h, x, node_mask, names=names, is_geom=True, suffix='')
+    print('Saved XYZ files')
+def try_to_convert_to_sdf(name):
+    out_files = []
+    for i in range(N_SAMPLES):
+        out_xyz = f'results/output_{i + 1}_{name}_.xyz'
+        out_sdf = f'results/output_{i + 1}_{name}_.sdf'
+        subprocess.run(f'obabel {out_xyz} -O {out_sdf}', shell=True)
+        if os.path.exists(out_sdf):
+            out_files.append(out_sdf)
+        else:
+            out_files.append(out_xyz)
+    return out_files