1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 import os,weakref,re
35 from rdkit.six.moves import cStringIO as StringIO
36 from rdkit import RDConfig
37
62
65
66 groupDefns = {}
67 hierarchy=None
68 lastData=None
69 lastFilename=None
71 global groupDefns,hierarchy,lastData,lastFilename
72 if not force and hierarchy and (not data or data==lastData) and \
73 (not fileNm or fileNm==lastFilename):
74 return hierarchy[:]
75 lastData=data
76 splitter = re.compile('\t+')
77 from rdkit import Chem
78
79 if not fileNm and not data:
80 fileNm = os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt')
81
82 if fileNm:
83 inF = open(fileNm,'r')
84 lastFilename = fileNm
85 elif data:
86 inF = StringIO(data)
87 else:
88 raise ValueError("need data or filename")
89
90 groupDefns={}
91 res = []
92 lineNo=0
93 for line in inF.readlines():
94 lineNo+=1
95 line=line.strip()
96 line = line.split('//')[0]
97 if not line:
98 continue
99 splitL = splitter.split(line)
100 if len(splitL)<3:
101 raise FuncGroupFileParseError("Input line %d (%s) is not long enough."%(lineNo,repr(line)))
102 label = splitL[0].strip()
103 if label in groupDefns:
104 raise FuncGroupFileParseError("Duplicate label on line %d."%lineNo)
105 labelHierarchy = label.split('.')
106 if len(labelHierarchy)>1:
107 for i in range(len(labelHierarchy)-1):
108 tmp = '.'.join(labelHierarchy[:i+1])
109 if not tmp in groupDefns:
110 raise FuncGroupFileParseError("Hierarchy member %s (line %d) not found."%(tmp,lineNo))
111 parent = groupDefns['.'.join(labelHierarchy[:-1])]
112 else:
113 parent = None
114 smarts = splitL[1]
115 patt = Chem.MolFromSmarts(smarts)
116 if not patt:
117 raise FuncGroupFileParseError('Smarts "%s" (line %d) could not be parsed.'%(smarts,lineNo))
118
119 name = splitL[2].strip()
120
121 rxnSmarts=''
122 if len(splitL)>3:
123 rxnSmarts=splitL[3]
124
125 node = FGHierarchyNode(name,patt,smarts=smarts,label=label,parent=parent,rxnSmarts=rxnSmarts)
126 if parent:
127 parent.children.append(node)
128 else:
129 res.append(node)
130 groupDefns[label] = node
131 hierarchy=res[:]
132 return res
133
135 ms = mol.GetSubstructMatches(node.pattern)
136 count = 0
137 seen = {}
138 for m in ms:
139 if m[0] not in seen:
140 count+=1
141 seen[m[0]] = 1
142 if count:
143 res[idx] = count
144 idx += 1
145 for child in node.children:
146 idx=_SetNodeBits(mol,child,res,idx)
147 else:
148 idx += len(node)
149 return idx
150
152 totL = 0
153 for entry in hierarchy:
154 totL += len(entry)
155 res = [0]*totL
156 idx = 0
157 for entry in hierarchy:
158 idx = _SetNodeBits(mol,entry,res,idx)
159 return res
160