RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolEnumerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020-2021 Greg Landrum and T5 Informatics GmbH
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifndef RDKIT_MOLENUMERATOR_H
11#define RDKIT_MOLENUMERATOR_H
12
13#include <RDGeneral/export.h>
14#include <GraphMol/RDKitBase.h>
15#include <GraphMol/MolBundle.h>
16
17#include <vector>
18#include <map>
19#include <string>
20#include <memory>
21
22namespace RDKit {
23class ChemicalReaction;
24namespace MolEnumerator {
25
26namespace detail {
27extern const std::string idxPropName;
30} // namespace detail
31
32//! abstract base class for the a molecule enumeration operation
34 public:
36 virtual ~MolEnumeratorOp() {}
37 //! returns a vector of the number of possible variations at variability point
38 //! covered by this operation
39 virtual std::vector<size_t> getVariationCounts() const = 0;
40 //! returns a the molecule corresponding to a particular variation
41 /*! which.size() should be equal to the number of variation counts.
42 */
43 virtual std::unique_ptr<ROMol> operator()(
44 const std::vector<size_t> &which) const = 0;
45 //! initializes this operation to work on a particular molecule
46 virtual void initFromMol(const ROMol &mol) = 0;
47 //! polymorphic copy
48 virtual std::unique_ptr<MolEnumeratorOp> copy() const = 0;
49};
50
51//! Molecule enumeration operation corresponding to position variation bonds
52/*! This uses ATTACH and ENDPTS properties on bonds and requires that the bond
53 * has one dummy atom (which will be discarded). The other atom of the bond will
54 * be connected to the atoms listed in the ENDPTS property
55 */
57 public:
59 PositionVariationOp(const std::shared_ptr<ROMol> mol) : dp_mol(mol) {
60 PRECONDITION(mol, "bad molecule");
61 initFromMol();
62 }
63 PositionVariationOp(const ROMol &mol) : dp_mol(new ROMol(mol)) {
64 initFromMol();
65 }
67 : dp_mol(other.dp_mol), d_variationPoints(other.d_variationPoints) {}
69 if (&other == this) {
70 return *this;
71 }
72 dp_mol = other.dp_mol;
73 d_variationPoints = other.d_variationPoints;
74 return *this;
75 }
76 //! \override
77 std::vector<size_t> getVariationCounts() const override;
78
79 //! \override
80 std::unique_ptr<ROMol> operator()(
81 const std::vector<size_t> &which) const override;
82
83 //! \override
84 void initFromMol(const ROMol &mol) override;
85
86 //! \override
87 std::unique_ptr<MolEnumeratorOp> copy() const override {
88 return std::unique_ptr<MolEnumeratorOp>(new PositionVariationOp(*this));
89 }
90
91 private:
92 std::shared_ptr<ROMol> dp_mol{nullptr};
93 std::vector<std::pair<unsigned int, std::vector<unsigned int>>>
94 d_variationPoints{};
95 std::vector<size_t> d_dummiesAtEachPoint{};
96 void initFromMol();
97};
98
99//! Molecule enumeration operation corresponding to LINKNODES
100/*!
101 */
103 public:
105 LinkNodeOp(const std::shared_ptr<ROMol> mol) : dp_mol(mol) {
106 PRECONDITION(mol, "bad molecule");
107 initFromMol();
108 }
109 LinkNodeOp(const ROMol &mol) : dp_mol(new ROMol(mol)) { initFromMol(); }
110 LinkNodeOp(const LinkNodeOp &other)
111 : dp_mol(other.dp_mol),
112 dp_frame(other.dp_frame),
113 d_countAtEachPoint(other.d_countAtEachPoint),
114 d_variations(other.d_variations),
115 d_pointRanges(other.d_pointRanges),
116 d_isotopeMap(other.d_isotopeMap),
117 d_atomMap(other.d_atomMap) {}
119 if (&other == this) {
120 return *this;
121 }
122 dp_mol = other.dp_mol;
123 dp_frame = other.dp_frame;
124 d_countAtEachPoint = other.d_countAtEachPoint;
125 d_variations = other.d_variations;
126 d_pointRanges = other.d_pointRanges;
127 d_isotopeMap = other.d_isotopeMap;
128 d_atomMap = other.d_atomMap;
129 return *this;
130 }
131 //! \override
132 std::vector<size_t> getVariationCounts() const override;
133
134 //! \override
135 std::unique_ptr<ROMol> operator()(
136 const std::vector<size_t> &which) const override;
137
138 //! \override
139 void initFromMol(const ROMol &mol) override;
140
141 //! \override
142 std::unique_ptr<MolEnumeratorOp> copy() const override {
143 return std::unique_ptr<MolEnumeratorOp>(new LinkNodeOp(*this));
144 }
145
146 private:
147 std::shared_ptr<ROMol> dp_mol{nullptr};
148 std::shared_ptr<RWMol> dp_frame{nullptr};
149 std::vector<size_t> d_countAtEachPoint{};
150 std::vector<std::tuple<unsigned, unsigned, unsigned>> d_variations;
151 std::vector<std::pair<unsigned, unsigned>> d_pointRanges;
152 std::map<unsigned, unsigned> d_isotopeMap;
153 std::map<unsigned, Atom *> d_atomMap;
154
155 void initFromMol();
156};
157
158//! Molecule enumeration operation corresponding to SRUs
159/*!
160 This should be considered a work-in-progress and to be somewhat fragile.
161
162 Known limitations:
163 - Overlapping SRUs, i.e. where one monomer is contained within another, are
164 not supported
165
166 */
168 public:
170 RepeatUnitOp(const std::shared_ptr<ROMol> mol) : dp_mol(mol) {
171 PRECONDITION(mol, "bad molecule");
172 initFromMol();
173 };
174 RepeatUnitOp(const ROMol &mol) : dp_mol(new ROMol(mol)) { initFromMol(); };
176 : d_defaultRepeatCount(other.d_defaultRepeatCount),
177 dp_mol(other.dp_mol),
178 dp_frame(other.dp_frame),
179 d_repeats(other.d_repeats),
180 d_countAtEachPoint(other.d_countAtEachPoint),
181 d_variations(other.d_variations),
182 d_pointRanges(other.d_pointRanges),
183 d_isotopeMap(other.d_isotopeMap),
184 d_atomMap(other.d_atomMap){};
186 if (&other == this) {
187 return *this;
188 }
189 dp_mol = other.dp_mol;
190 dp_frame = other.dp_frame;
191 d_repeats = other.d_repeats;
192 d_countAtEachPoint = other.d_countAtEachPoint;
193 d_variations = other.d_variations;
194 d_pointRanges = other.d_pointRanges;
195 d_isotopeMap = other.d_isotopeMap;
196 d_atomMap = other.d_atomMap;
197 d_defaultRepeatCount = other.d_defaultRepeatCount;
198 return *this;
199 };
200 //! \override
201 std::vector<size_t> getVariationCounts() const override;
202
203 //! \override
204 std::unique_ptr<ROMol> operator()(
205 const std::vector<size_t> &which) const override;
206
207 //! \override
208 void initFromMol(const ROMol &mol) override;
209
210 //! \override
211 std::unique_ptr<MolEnumeratorOp> copy() const override {
212 return std::unique_ptr<MolEnumeratorOp>(new RepeatUnitOp(*this));
213 }
214
215 size_t d_defaultRepeatCount =
216 4; //!< from mol files we typically don't know the repeat count. This is
217 //!< what we use instead
218 private:
219 std::shared_ptr<ROMol> dp_mol{nullptr};
220 std::shared_ptr<RWMol> dp_frame{nullptr};
221 std::vector<std::shared_ptr<RWMol>> d_repeats;
222 std::vector<RWMol> dp_repeatUnits{};
223 std::vector<size_t> d_countAtEachPoint{};
224 std::vector<unsigned> d_sruOrder{};
225 std::vector<std::tuple<unsigned, unsigned, unsigned>> d_variations;
226 std::vector<std::pair<unsigned, unsigned>> d_pointRanges;
227 std::map<unsigned, unsigned> d_isotopeMap;
228 std::map<unsigned, Atom *> d_atomMap;
229
230 void initFromMol();
231};
232
233//! Parameters used to control the molecule enumeration
235 bool sanitize = false;
236 size_t maxToEnumerate = 1000;
237 bool doRandom = false; //< not yet implemented
238 int randomSeed = -1; //< not yet implemented
239 std::shared_ptr<MolEnumeratorOp> dp_operation;
240};
241
242//! Returns a MolBundle containing the molecules resulting from applying the
243//! operators contained in \c paramsLists to \c mol.
244//! the operators are applied in order
245/*!
246NOTE: the current implementation does not support molecules which include
247both LINKNODE and SRU features.
248
249*/
251enumerate(const ROMol &mol, const std::vector<MolEnumeratorParams> &paramsList);
252
253//! Returns a MolBundle containing the molecules resulting from applying the
254//! enumerable operators contained in \c mol.
255/*!
256\param maxPerOperation: the maximum number of molecules which an individual
257operation is allowed to generate
258
259NOTE: the current implementation does not support molecules which include
260both LINKNODE and SRU features.
261
262*/
264 size_t maxPerOperation = 0);
265
266//! Returns a MolBundle containing the molecules resulting from applying the
267//! operator contained in \c params to \c mol.
268inline MolBundle enumerate(const ROMol &mol,
269 const MolEnumeratorParams &params) {
270 std::vector<MolEnumeratorParams> v = {params};
271 return enumerate(mol, v);
272};
273} // namespace MolEnumerator
274} // namespace RDKit
275
276#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
Defines a class for managing bundles of molecules.
pulls in the core RDKit functionality
MolBundle contains a collection of related ROMols.
Definition MolBundle.h:39
Molecule enumeration operation corresponding to LINKNODES.
LinkNodeOp(const LinkNodeOp &other)
void initFromMol(const ROMol &mol) override
\override
LinkNodeOp(const std::shared_ptr< ROMol > mol)
std::vector< size_t > getVariationCounts() const override
\override
std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const override
\override
LinkNodeOp & operator=(const LinkNodeOp &other)
std::unique_ptr< MolEnumeratorOp > copy() const override
\override
abstract base class for the a molecule enumeration operation
virtual std::vector< size_t > getVariationCounts() const =0
virtual std::unique_ptr< MolEnumeratorOp > copy() const =0
polymorphic copy
virtual std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const =0
returns a the molecule corresponding to a particular variation
virtual void initFromMol(const ROMol &mol)=0
initializes this operation to work on a particular molecule
Molecule enumeration operation corresponding to position variation bonds.
PositionVariationOp & operator=(const PositionVariationOp &other)
std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const override
\override
std::vector< size_t > getVariationCounts() const override
\override
void initFromMol(const ROMol &mol) override
\override
PositionVariationOp(const PositionVariationOp &other)
PositionVariationOp(const std::shared_ptr< ROMol > mol)
std::unique_ptr< MolEnumeratorOp > copy() const override
\override
Molecule enumeration operation corresponding to SRUs.
void initFromMol(const ROMol &mol) override
\override
RepeatUnitOp(const RepeatUnitOp &other)
RepeatUnitOp & operator=(const RepeatUnitOp &other)
std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const override
\override
std::vector< size_t > getVariationCounts() const override
\override
RepeatUnitOp(const std::shared_ptr< ROMol > mol)
std::unique_ptr< MolEnumeratorOp > copy() const override
\override
#define RDKIT_MOLENUMERATOR_EXPORT
Definition export.h:297
const std::string idxPropName
void removeOrigIndices(ROMol &mol)
void preserveOrigIndices(ROMol &mol)
RDKIT_MOLENUMERATOR_EXPORT MolBundle enumerate(const ROMol &mol, const std::vector< MolEnumeratorParams > &paramsList)
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
Parameters used to control the molecule enumeration.
std::shared_ptr< MolEnumeratorOp > dp_operation