1
2
3
4
5
6
7
8
9
10 import sqlalchemy
11
12 from rdkit import Chem
13 from rdkit.Chem import AllChem
14 from rdkit.Chem import Lipinski,Descriptors,Crippen
15 from rdkit.Dbase.DbConnection import DbConnect
16 from rdkit.Dbase import DbModule
17 import os
18
19 from sqlalchemy.ext.declarative import declarative_base
20 from sqlalchemy import Table,Column,MetaData
21 from sqlalchemy import Integer,Text,String,ForeignKey,Binary,DateTime,Float
22 from sqlalchemy.orm import relation,mapper,sessionmaker,backref
23 from sqlalchemy import create_engine
24
25 decBase = declarative_base()
26
31
33 engine = create_engine(dbUrl,echo=echo)
34 decBase.metadata.create_all(engine)
35 maker = sessionmaker(bind=engine)
36 return maker
37
38 ConnectToSchema=RegisterSchema
39
41 engine = create_engine(dbUrl,echo=echo)
42 meta
43 decBase.metadata.create_all(engine)
44 maker = sessionmaker(bind=engine)
45 return maker
46
47
48
49 import rdkit.RDLogger as logging
50 logger = logging.logger()
51 logger.setLevel(logging.INFO)
52
53 -def ProcessMol(session,mol,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
54 redraw=False,keepHs=False,
55 skipProps=False,addComputedProps=False,
56 skipSmiles=False):
93
94 -def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False,
95 redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False,
96 regName='molecules',skipSmiles=False,maxRowsCached=-1,
97 uniqNames=False,addComputedProps=False,lazySupplier=False,
98 numForPropScan=10,startAnew=True):
99 if not lazySupplier:
100 nMols = len(suppl)
101 else:
102 nMols=-1
103 if not silent:
104 logger.info("Generating molecular database in file %s"%dbName)
105 if not lazySupplier:
106 logger.info(" Processing %d molecules"%nMols)
107
108 globalProps = {}
109 if startAnew:
110 if os.path.exists(dbName):
111 for i in range(5):
112 try:
113 os.unlink(dbName)
114 break
115 except:
116 import time
117 time.sleep(2)
118 if os.path.exists(dbName):
119 raise IOError('could not delete old database %s'%dbName)
120 sIter=iter(suppl)
121 setattr(Compound,nameCol.lower(),Column(nameCol.lower(),String,default=defaultVal,unique=uniqNames))
122 if not skipSmiles:
123 Compound.smiles = Column(Text,unique=True)
124 if not skipProps:
125 while numForPropScan>0:
126 try:
127 m = next(sIter)
128 except StopIteration:
129 numForPropScan=0
130 break
131 if not m: continue
132 for pn in m.GetPropNames():
133 if pn.lower()==nameCol.lower(): continue
134 if pn not in globalProps:
135 globalProps[pn]=1
136 setattr(Compound,pn.lower(),Column(pn.lower(),String,default=defaultVal))
137 numForPropScan-=1
138 if addComputedProps:
139 Compound.DonorCount=Column(Integer)
140 Compound.AcceptorCount=Column(Integer)
141 Compound.RotatableBondCount=Column(Integer)
142 Compound.AMW=Column(Float)
143 Compound.MolLogP=Column(Float)
144 session=RegisterSchema('sqlite:///%s'%(dbName))()
145
146 nDone = 0
147 cache=[]
148 for m in suppl:
149 nDone +=1
150 if not m:
151 if errorsTo:
152 if hasattr(suppl,'GetItemText'):
153 d = suppl.GetItemText(nDone-1)
154 errorsTo.write(d)
155 else:
156 logger.warning('full error file support not complete')
157 continue
158
159 cmpd=ProcessMol(session,m,globalProps,nDone,nameProp=nameProp,
160 nameCol=nameCol,redraw=redraw,
161 keepHs=keepHs,skipProps=skipProps,
162 addComputedProps=addComputedProps,skipSmiles=skipSmiles)
163 if cmpd is not None:
164 cache.append(cmpd)
165
166 if not silent and not nDone%100:
167 logger.info(' done %d'%nDone)
168 try:
169 session.commit()
170 except Exception:
171 session.rollback()
172 for cmpd in cache:
173 try:
174 session.add(cmpd)
175 session.commit()
176 except Exception:
177 session.rollback()
178 except BaseException:
179
180 session.rollback()
181 raise
182 cache=[]
183
184
185 try:
186 session.commit()
187 except BaseException as exc:
188 import traceback
189 traceback.print_exc()
190 session.rollback()
191 for cmpd in cache:
192 try:
193 session.add(cmpd)
194 session.commit()
195 except Exception:
196 session.rollback()
197 except BaseException:
198 session.rollback()
199 raise
200 if not isinstance(exc, Exception):
201
202 raise exc
203 if __name__=='__main__':
204 import sys
205 sdf =Chem.SDMolSupplier(sys.argv[1])
206 db =sys.argv[2]
207 LoadDb(sdf,db,addComputedProps=False)
208 session = RegisterSchema('sqlite:///%s'%(db))()
209 print('>>>>', len(session.query(Compound).all()))
210