#include #include /* SMILES tokenization rules, implemented for the Ragel state machine generation library. Version 0.0 (2007-06-26) This software is in the public domain. I would like to know if you use it for anything and if there are bugs in the grammar syntax. Contributed by Andrew Dalke Written 23-25 June 2007, Gothenburg Sweden */ using namespace std; %%{ machine SmilesParser; write data; }%% /* MW is not correct; does not support implicit hydrogens */ /* These numbers came from OpenEye */ double weights[] = { 0.0, 1.00794, 4.002602, 6.941, 9.012182, 10.811, 12.0107, 14.00674, 15.9994, 18.9984032, 20.1797, 22.98977, 24.305, 26.981538, 28.0855, 30.973761, 32.066, 35.4527, 39.948, 39.0983, 40.078, 44.95591, 47.867, 50.9415, 51.9961, 54.938049, 55.845, 58.9332, 58.6934, 63.546, 65.39, 69.723, 72.61, 74.9216, 78.96, 79.904, 83.8, 85.4678, 87.62, 88.90585, 91.224, 92.90638, 95.94, 98.0, 101.07, 102.9055, 106.42, 107.8682, 112.411, 114.818, 118.71, 121.76, 127.6, 126.90447, 131.29, 132.90455, 137.327, 138.9055, 140.116, 140.90765, 144.24, 145.0, 150.36, 151.964, 157.25, 158.92534, 162.5, 164.93032, 167.26, 168.93421, 173.04, 174.967, 178.49, 180.9479, 183.84, 186.207, 190.23, 192.217, 195.078, 196.96655, 200.59, 204.3833, 207.2, 208.98038, 209.0, 210.0, 222.0, 223.0, 226.0, 227.0, 232.0381, 231.03588, 238.0289, 237.0, 244.0, 243.0, 247.0, 247.0, 251.0, 252.0, 257.0, 258.0, 259.0, 262.0, 261.0, 262.0, 263.0, 262.0, 265.0, 266.0, 269.0, 272.0, }; #define RAW_ATOM(eleno, symbol) \ mw += weights[eleno]; \ if (dump) printf("raw atom %s (%d)\n", symbol, eleno) #define RAW_AROMATIC_ATOM(eleno, symbol) \ mw += weights[eleno]; \ if (dump) printf("raw aromatic atom %s (%d)\n", symbol, eleno) #define ELEMENT(eleno, symbol) \ mw += weights[eleno]; \ if (dump) printf("element %s (%d)\n", symbol, eleno) #define AROMATIC_ELEMENT(eleno, symbol) \ mw += weights[eleno]; \ if (dump) printf("aromatic element %s (%d)\n", symbol, eleno) #define BOND(order, symbol) \ if (dump) printf("new bond %s (%d)\n", symbol, order) #define CHIRAL_BOND(direction, symbol) \ if (dump) printf("chiral bond %s (dir=%d)\n", symbol, direction) #define AROMATIC_BOND(symbol) \ if (dump) printf("aromatic bond %s\n", symbol) #define CLOSURE(closure_num) \ if (dump) printf("ring closure %d\n", closure_num); int check_smiles(const char *smiles, int dump) { const char *p=smiles, *start_p=smiles; const char *prev_p = NULL; int count = 100; int cs; /* Needed for .. what? */ int finished = 0; double mw=0.0; %%{ # These must be here action start_count_action { count = p[0]-'0'; } action increase_count_action { count = 10*count + p[0]-'0'; } action start_repeat_action { prev_p = p; } # These are user-specific action single_bond_action { BOND(1, "-"); } action double_bond_action { BOND(2, "="); } action triple_bond_action { BOND(3, "#"); } action quadruple_bond_action { BOND(4, "$"); } action forward_bond_action { CHIRAL_BOND(1, "/"); } action backward_bond_action { CHIRAL_BOND(-1, "\\"); } action aromatic_bond_action { AROMATIC_BOND(":"); } action dot_action { if (dump) printf("dot\n"); } action open_branch_action { if (dump) printf("open branch\n"); } action close_branch_action { if (dump) printf("close branch\n"); } action one_digit_closure_action { CLOSURE(p[-1]-'0'); } action two_digit_closure_action { CLOSURE((p[-2]-'0')*10+(p[-1]-'0')); } action done_action { finished = 1; if (dump) printf("I am done.\n"); } action bracket_atom_start_action { if (dump) printf("Bracket atom start\n"); } action atomic_weight_action { if (1||dump) printf("Atomic weight %d\n", count); } action atomic_symbol_action { if (dump) printf("atomic symbol\n"); } action chiral_count_action { if (dump) printf("Got some @ (count) %d\n", count); } action chiral_repeat_action { if (dump) printf("bunch of '@' (repeat) %d\n", p-prev_p+1); } action chiral_th_action { if (dump) printf("Got @TH %d\n", count); } action chiral_al_action { if (dump) printf("Got @AL %d\n", count); } action chiral_sp_action { if (dump) printf("Got @SP %d\n", count); } action chiral_tb_action { if (dump) printf("Got @TB %d\n", count); } action chiral_oh_action { if (dump) printf("Got @OH %d\n", count); } action hcount_is_1_action { if (dump) printf("hcount is 1\n"); } action hcount_action { if (dump) printf("Got hcount %d\n", count); } action positive_count_action { if (dump) printf("Charge (count) is +%d\n", count); } action positive_repeat_action { if (dump) printf("charge (repeat) +%d\n", p-prev_p+1); } action negative_count_action { if (dump) printf("Charge (count) is -%d\n", count); } action negative_repeat_action { if (dump) printf("charge (repeat) -%d\n", p-prev_p+1); } action bracket_atom_end_action { if (dump) printf("That's the end]\n"); } # Various syntax errors action missing_closure_digit_1 { printf("Missing first digit of ring closure\n"); } action missing_closure_digit_2 { printf("Missing second digit of ring closure\n"); } action missing_element_symbol { printf("Error before finding an element symbol\n"); } action missing_close_bracket { printf("Error before ending the atom expression\n"); } action bad_chiral_TH { printf("@TH requires a value of 1 or 2\n"); } action bad_chiral_AL { printf("@AL requires a value of 1 or 2\n"); } action bad_chiral_SP { printf("@SP requires a value of 1, 2 or 3\n"); } action bad_chiral_TB { printf("@TB requires a value between 1 and 20\n"); } action bad_chiral_OH { printf("@TB requires a value between 1 and 30\n"); } # Premature optimization action element_dummy_0_action { ELEMENT(0, "*"); } action element_H_1_action { ELEMENT(1, "H"); } action element_He_2_action { ELEMENT(2, "He"); } action element_Li_3_action { ELEMENT(3, "Li"); } action element_Be_4_action { ELEMENT(4, "Be"); } action element_B_5_action { ELEMENT(5, "B"); } action element_C_6_action { ELEMENT(6, "C"); } action element_N_7_action { ELEMENT(7, "N"); } action element_O_8_action { ELEMENT(8, "O"); } action element_F_9_action { ELEMENT(9, "F"); } action element_Ne_10_action { ELEMENT(10, "Ne"); } action element_Na_11_action { ELEMENT(11, "Na"); } action element_Mg_12_action { ELEMENT(12, "Mg"); } action element_Al_13_action { ELEMENT(13, "Al"); } action element_Si_14_action { ELEMENT(14, "Si"); } action element_P_15_action { ELEMENT(15, "P"); } action element_S_16_action { ELEMENT(16, "S"); } action element_Cl_17_action { ELEMENT(17, "Cl"); } action element_Ar_18_action { ELEMENT(18, "Ar"); } action element_K_19_action { ELEMENT(19, "K"); } action element_Ca_20_action { ELEMENT(20, "Ca"); } action element_Sc_21_action { ELEMENT(21, "Sc"); } action element_Ti_22_action { ELEMENT(22, "Ti"); } action element_V_23_action { ELEMENT(23, "V"); } action element_Cr_24_action { ELEMENT(24, "Cr"); } action element_Mn_25_action { ELEMENT(25, "Mn"); } action element_Fe_26_action { ELEMENT(26, "Fe"); } action element_Co_27_action { ELEMENT(27, "Co"); } action element_Ni_28_action { ELEMENT(28, "Ni"); } action element_Cu_29_action { ELEMENT(29, "Cu"); } action element_Zn_30_action { ELEMENT(30, "Zn"); } action element_Ga_31_action { ELEMENT(31, "Ga"); } action element_Ge_32_action { ELEMENT(32, "Ge"); } action element_As_33_action { ELEMENT(33, "As"); } action element_Se_34_action { ELEMENT(34, "Se"); } action element_Br_35_action { ELEMENT(35, "Br"); } action element_Kr_36_action { ELEMENT(36, "Kr"); } action element_Rb_37_action { ELEMENT(37, "Rb"); } action element_Sr_38_action { ELEMENT(38, "Sr"); } action element_Y_39_action { ELEMENT(39, "Y"); } action element_Zr_40_action { ELEMENT(40, "Zr"); } action element_Nb_41_action { ELEMENT(41, "Nb"); } action element_Mo_42_action { ELEMENT(42, "Mo"); } action element_Tc_43_action { ELEMENT(43, "Tc"); } action element_Ru_44_action { ELEMENT(44, "Ru"); } action element_Rh_45_action { ELEMENT(45, "Rh"); } action element_Pd_46_action { ELEMENT(46, "Pd"); } action element_Ag_47_action { ELEMENT(47, "Ag"); } action element_Cd_48_action { ELEMENT(48, "Cd"); } action element_In_49_action { ELEMENT(49, "In"); } action element_Sn_50_action { ELEMENT(50, "Sn"); } action element_Sb_51_action { ELEMENT(51, "Sb"); } action element_Te_52_action { ELEMENT(52, "Te"); } action element_I_53_action { ELEMENT(53, "I"); } action element_Xe_54_action { ELEMENT(54, "Xe"); } action element_Cs_55_action { ELEMENT(55, "Cs"); } action element_Ba_56_action { ELEMENT(56, "Ba"); } action element_La_57_action { ELEMENT(57, "La"); } action element_Ce_58_action { ELEMENT(58, "Ce"); } action element_Pr_59_action { ELEMENT(59, "Pr"); } action element_Nd_60_action { ELEMENT(60, "Nd"); } action element_Pm_61_action { ELEMENT(61, "Pm"); } action element_Sm_62_action { ELEMENT(62, "Sm"); } action element_Eu_63_action { ELEMENT(63, "Eu"); } action element_Gd_64_action { ELEMENT(64, "Gd"); } action element_Tb_65_action { ELEMENT(65, "Tb"); } action element_Dy_66_action { ELEMENT(66, "Dy"); } action element_Ho_67_action { ELEMENT(67, "Ho"); } action element_Er_68_action { ELEMENT(68, "Er"); } action element_Tm_69_action { ELEMENT(69, "Tm"); } action element_Yb_70_action { ELEMENT(70, "Yb"); } action element_Lu_71_action { ELEMENT(71, "Lu"); } action element_Hf_72_action { ELEMENT(72, "Hf"); } action element_Ta_73_action { ELEMENT(73, "Ta"); } action element_W_74_action { ELEMENT(74, "W"); } action element_Re_75_action { ELEMENT(75, "Re"); } action element_Os_76_action { ELEMENT(76, "Os"); } action element_Ir_77_action { ELEMENT(77, "Ir"); } action element_Pt_78_action { ELEMENT(78, "Pt"); } action element_Au_79_action { ELEMENT(79, "Au"); } action element_Hg_80_action { ELEMENT(80, "Hg"); } action element_Tl_81_action { ELEMENT(81, "Tl"); } action element_Pb_82_action { ELEMENT(82, "Pb"); } action element_Bi_83_action { ELEMENT(83, "Bi"); } action element_Po_84_action { ELEMENT(84, "Po"); } action element_At_85_action { ELEMENT(85, "At"); } action element_Rn_86_action { ELEMENT(86, "Rn"); } action element_Fr_87_action { ELEMENT(87, "Fr"); } action element_Ra_88_action { ELEMENT(88, "Ra"); } action element_Ac_89_action { ELEMENT(89, "Ac"); } action element_Th_90_action { ELEMENT(90, "Th"); } action element_Pa_91_action { ELEMENT(91, "Pa"); } action element_U_92_action { ELEMENT(92, "U"); } action element_Np_93_action { ELEMENT(93, "Np"); } action element_Pu_94_action { ELEMENT(94, "Pu"); } action element_Am_95_action { ELEMENT(95, "Am"); } action element_Cm_96_action { ELEMENT(96, "Cm"); } action element_Bk_97_action { ELEMENT(97, "Bk"); } action element_Cf_98_action { ELEMENT(98, "Cf"); } action element_Es_99_action { ELEMENT(99, "Es"); } action element_Fm_100_action { ELEMENT(100, "Fm"); } action element_Md_101_action { ELEMENT(101, "Md"); } action element_No_102_action { ELEMENT(102, "No"); } action element_Lr_103_action { ELEMENT(103, "Lr"); } action element_Rf_104_action { ELEMENT(104, "Rf"); } action element_Ha_105_action { ELEMENT(105, "Ha"); } action element_Sg_106_action { ELEMENT(106, "Sg"); } action element_Ns_107_action { ELEMENT(107, "Ns"); } action element_Hs_108_action { ELEMENT(108, "Hs"); } action element_Mt_109_action { ELEMENT(109, "Mt"); } action element_Uun_110_action { ELEMENT(110, "Uun"); } action element_Uuu_111_action { ELEMENT(111, "Uuu"); } action element_Uub_112_action { ELEMENT(112, "Uub"); } action aromatic_element_C_6_action { AROMATIC_ELEMENT(6, "C"); } action aromatic_element_N_7_action { AROMATIC_ELEMENT(7, "N"); } action aromatic_element_O_8_action { AROMATIC_ELEMENT(8, "O"); } action aromatic_element_P_15_action { AROMATIC_ELEMENT(15, "P"); } action aromatic_element_S_16_action { AROMATIC_ELEMENT(16, "S"); } action aromatic_element_As_33_action { AROMATIC_ELEMENT(33, "As"); } action aromatic_element_Se_34_action { AROMATIC_ELEMENT(34, "Se"); } action raw_atom_B_5_action { RAW_ATOM(5, "B"); } action raw_atom_C_6_action { RAW_ATOM(6, "C"); } action raw_atom_N_7_action { RAW_ATOM(7, "N"); } action raw_atom_O_8_action { RAW_ATOM(8, "O"); } action raw_atom_F_9_action { RAW_ATOM(9, "F"); } action raw_atom_P_15_action { RAW_ATOM(15, "P"); } action raw_atom_S_16_action { RAW_ATOM(16, "S"); } action raw_atom_Cl_17_action { RAW_ATOM(17, "Cl"); } action raw_atom_Br_35_action { RAW_ATOM(35, "Br"); } action raw_atom_I_53_action { RAW_ATOM(53, "I"); } action raw_aromatic_atom_C_6_action { RAW_AROMATIC_ATOM(6, "C"); } action raw_aromatic_atom_N_7_action { RAW_AROMATIC_ATOM(7, "N"); } action raw_aromatic_atom_O_8_action { RAW_AROMATIC_ATOM(8, "O"); } action raw_aromatic_atom_P_15_action { RAW_AROMATIC_ATOM(15, "P"); } action raw_aromatic_atom_S_16_action { RAW_AROMATIC_ATOM(16, "S"); } is_bracket_atom_start = '[' % bracket_atom_start_action; # Some libraries store the atomic weight in a 16 bit word # so you should not use weights more than 2**15-1 == 32768. is_atomic_weight = ([0-9] @ start_count_action [0-9]{,4} $ increase_count_action % atomic_weight_action); # The atomic elements and the aromatics is_element_symbol = ( # '*' % element_dummy_0_action | 'H' % element_H_1_action | 'He' % element_He_2_action | 'Li' % element_Li_3_action | 'Be' % element_Be_4_action | 'B' % element_B_5_action | 'C' % element_C_6_action | 'N' % element_N_7_action | 'O' % element_O_8_action | 'F' % element_F_9_action | 'Ne' % element_Ne_10_action | 'Na' % element_Na_11_action | 'Mg' % element_Mg_12_action | 'Al' % element_Al_13_action | 'Si' % element_Si_14_action | 'P' % element_P_15_action | 'S' % element_S_16_action | 'Cl' % element_Cl_17_action | 'Ar' % element_Ar_18_action | 'K' % element_K_19_action | 'Ca' % element_Ca_20_action | 'Sc' % element_Sc_21_action | 'Ti' % element_Ti_22_action | 'V' % element_V_23_action | 'Cr' % element_Cr_24_action | 'Mn' % element_Mn_25_action | 'Fe' % element_Fe_26_action | 'Co' % element_Co_27_action | 'Ni' % element_Ni_28_action | 'Cu' % element_Cu_29_action | 'Zn' % element_Zn_30_action | 'Ga' % element_Ga_31_action | 'Ge' % element_Ge_32_action | 'As' % element_As_33_action | 'Se' % element_Se_34_action | 'Br' % element_Br_35_action | 'Kr' % element_Kr_36_action | 'Rb' % element_Rb_37_action | 'Sr' % element_Sr_38_action | 'Y' % element_Y_39_action | 'Zr' % element_Zr_40_action | 'Nb' % element_Nb_41_action | 'Mo' % element_Mo_42_action | 'Tc' % element_Tc_43_action | 'Ru' % element_Ru_44_action | 'Rh' % element_Rh_45_action | 'Pd' % element_Pd_46_action | 'Ag' % element_Ag_47_action | 'Cd' % element_Cd_48_action | 'In' % element_In_49_action | 'Sn' % element_Sn_50_action | 'Sb' % element_Sb_51_action | 'Te' % element_Te_52_action | 'I' % element_I_53_action | 'Xe' % element_Xe_54_action | 'Cs' % element_Cs_55_action | 'Ba' % element_Ba_56_action | 'La' % element_La_57_action | 'Ce' % element_Ce_58_action | 'Pr' % element_Pr_59_action | 'Nd' % element_Nd_60_action | 'Pm' % element_Pm_61_action | 'Sm' % element_Sm_62_action | 'Eu' % element_Eu_63_action | 'Gd' % element_Gd_64_action | 'Tb' % element_Tb_65_action | 'Dy' % element_Dy_66_action | 'Ho' % element_Ho_67_action | 'Er' % element_Er_68_action | 'Tm' % element_Tm_69_action | 'Yb' % element_Yb_70_action | 'Lu' % element_Lu_71_action | 'Hf' % element_Hf_72_action | 'Ta' % element_Ta_73_action | 'W' % element_W_74_action | 'Re' % element_Re_75_action | 'Os' % element_Os_76_action | 'Ir' % element_Ir_77_action | 'Pt' % element_Pt_78_action | 'Au' % element_Au_79_action | 'Hg' % element_Hg_80_action | 'Tl' % element_Tl_81_action | 'Pb' % element_Pb_82_action | 'Bi' % element_Bi_83_action | 'Po' % element_Po_84_action | 'At' % element_At_85_action | 'Rn' % element_Rn_86_action | 'Fr' % element_Fr_87_action | 'Ra' % element_Ra_88_action | 'Ac' % element_Ac_89_action | 'Th' % element_Th_90_action | 'Pa' % element_Pa_91_action | 'U' % element_U_92_action | 'Np' % element_Np_93_action | 'Pu' % element_Pu_94_action | 'Am' % element_Am_95_action | 'Cm' % element_Cm_96_action | 'Bk' % element_Bk_97_action | 'Cf' % element_Cf_98_action | 'Es' % element_Es_99_action | 'Fm' % element_Fm_100_action | 'Md' % element_Md_101_action | 'No' % element_No_102_action | 'Lr' % element_Lr_103_action | 'Rf' % element_Rf_104_action | 'Ha' % element_Ha_105_action | 'Sg' % element_Sg_106_action | 'Ns' % element_Ns_107_action | 'Hs' % element_Hs_108_action | 'Mt' % element_Mt_109_action | 'Uun' % element_Uun_110_action | 'Uuu' % element_Uuu_111_action | 'Uub' % element_Uub_112_action | 'c' % aromatic_element_C_6_action | 'n' % aromatic_element_N_7_action | 'o' % aromatic_element_O_8_action | 'p' % aromatic_element_P_15_action | 's' % aromatic_element_S_16_action | 'as' % aromatic_element_As_33_action | # in OpenEye 'se' % aromatic_element_Se_34_action # in Daylight and OpenEye ); is_chiral = ( ('@' [0-9] @ start_count_action [0-9]* $ increase_count_action % chiral_count_action) | ('@' % start_repeat_action '@'* % chiral_repeat_action) | ('@TH' [12] @start_count_action % chiral_th_action $! bad_chiral_TH) | ('@AL' [12] @start_count_action % chiral_al_action $! bad_chiral_AL) | ('@SP' [123] @start_count_action % chiral_sp_action $! bad_chiral_SP) | ('@TB' (('1' @start_count_action [0-9]? $ increase_count_action) | ('2' @start_count_action '0'? $ increase_count_action) | ([3-9] @start_count_action)) % chiral_tb_action $! bad_chiral_TB)| ('@OH' (('1' @start_count_action [0-9]? $ increase_count_action) | ('2' @start_count_action [0-9]? $ increase_count_action) | ('3' @start_count_action '0'? $ increase_count_action) | ([4-9] @start_count_action)) % chiral_oh_action $! bad_chiral_OH) ); is_hcount = ('H' % hcount_is_1_action | 'H' [0-9] @ start_count_action [0-9]* $ increase_count_action % hcount_action); # Most implementations allow charges across signed 16 bits. # It is silly to make that many states, so let it be unlimited. # However, in truth if anyone uses more than two charge signs # they are being silly, since using digits is more compact. is_positive = ( ('+' [0-9] @ start_count_action [0-9]* $ increase_count_action % positive_count_action) | ('+' % start_repeat_action '+'* % positive_repeat_action) ); is_negative = ( ('-' [0-9] @ start_count_action [0-9]* $ increase_count_action % negative_count_action) | ('-' % start_repeat_action '-'* % negative_repeat_action) ); is_charge = (is_positive | is_negative); is_bracket_atom_end = ']' > bracket_atom_end_action; is_bracket_atom = (is_bracket_atom_start is_atomic_weight ? is_element_symbol >! missing_element_symbol is_chiral ? >! missing_close_bracket is_hcount ? >! missing_close_bracket is_charge ? >! missing_close_bracket is_bracket_atom_end >! missing_close_bracket ); is_raw_atom = ( # 'B' % raw_atom_B_5_action | 'C' % raw_atom_C_6_action | 'N' % raw_atom_N_7_action | 'O' % raw_atom_O_8_action | 'F' % raw_atom_F_9_action | 'P' % raw_atom_P_15_action | 'S' % raw_atom_S_16_action | 'Cl' % raw_atom_Cl_17_action | 'Br' % raw_atom_Br_35_action | 'I' % raw_atom_I_53_action | 'c' % raw_aromatic_atom_C_6_action | 'n' % raw_aromatic_atom_N_7_action | 'o' % raw_aromatic_atom_O_8_action | 'p' % raw_aromatic_atom_P_15_action | 's' % raw_aromatic_atom_S_16_action ); is_atom = is_raw_atom | is_bracket_atom; is_bond = ('-' % single_bond_action | '=' % double_bond_action | '#' % triple_bond_action | '$' % quadruple_bond_action | # OpenEye extension '/' % forward_bond_action | '\\' % backward_bond_action | ':' % aromatic_bond_action # I think ); is_dot = '.' % dot_action; is_open_branch = '(' % open_branch_action; is_close_branch = ')' % close_branch_action; is_closure = ([0-9] % one_digit_closure_action | '%' [0-9] >! missing_closure_digit_1 [0-9] >! missing_closure_digit_1 % two_digit_closure_action); is_done = (0 | [ \r\t\n]) > done_action; states = ( start: (is_atom -> have_atom), # /* Actions occur upon entering or leaving a state or upon all # internal transitions. Consider 'Cl' while in the atom state. # The first 'C' matches the raw atoms 'C' and 'Cl'. There is # an internal state transition for both, so '$' would trigger # both actions. The '>', '<' and '@' only occur upon leaving # a state, but atom->atom doesn't leave. # Create a fake state with an epsilon transition back to the # real one to force a proper exit transition. */ have_atom: (zlen -> atom), atom: ( is_atom -> have_atom | is_bond -> have_bond | is_dot -> have_dot | is_open_branch -> have_open_branch | is_close_branch -> have_close_branch | is_closure -> have_closure | is_done -> final ), have_bond: (zlen -> bond), bond: ( is_atom -> have_atom | is_closure -> have_closure ), have_dot: (zlen -> dot), dot: ( is_atom -> have_atom | is_dot -> have_dot | is_done -> final ), have_open_branch: (zlen -> open_branch), open_branch: ( is_atom -> have_atom | is_bond -> have_bond | is_dot -> have_dot ), have_close_branch: (zlen -> close_branch), close_branch: ( is_atom -> have_atom | is_bond -> have_bond | is_dot -> have_dot | is_open_branch -> have_open_branch | is_close_branch -> have_close_branch | is_done -> final ), have_closure: (zlen -> closure), closure: ( is_atom -> have_atom | is_bond -> have_bond | is_dot -> have_dot | is_open_branch -> have_open_branch | is_close_branch -> have_close_branch | is_closure -> have_closure | is_done -> final ) ); main := states; # Initialize and execute write init; write exec noend; }%% %% write eof; if (!finished) { printf("Failure! at %d\n", (p-start_p)); printf("Failed at %d: %s\n", (p-start_p), p); } cout << mw << endl; return finished; }; int main(int argc, const char **argv) { FILE *infile; char s[10000]; //if (! (infile = fopen("/Users/dalke/ftps/nci/nci_oe.smi", "r")) ) { if (! (infile = fopen("U.smi", "r")) ) { fprintf(stderr, "Cannot open\n"); return 1; } while (fgets(s, 10000, infile)) { if (!check_smiles(s, 1)) { printf("bad %s\n", s); check_smiles(s, 1); break; } } }