@inproceedings{5426035acc154793a1e9282397384dc9,
title = "Poster: Identifying SMILES from Molecular Structure Images",
abstract = "Accurate extraction of Simplified Molecular Input Line Entry System (SMILES) representations from molecular structure images is crucial for computational chemistry and cheminformatics. This study presents a machine learning-based approach for converting graphical molecular representations into SMILES notation, addressing the challenges of chemical image recognition. We developed a deep learning model leveraging Tensorflow/Keras framework, cheminformatics tools such as RDKit, and Long Short-Term Memory (LSTM) networks for sequence learning. Trained on a curated dataset of molecular images, the model effectively learns structure-text relationships, enabling high-accuracy SMILES predictions. Our approach enhances chemical data digitization, improves dataset accuracy, and accelerates molecular property assessments and drug discovery, driving progress in pharmaceutical research and personalized medicine. By training and testing an image-captioning model, we ensure robust SMILES generation while maintaining high fidelity to molecular structures.",
keywords = "Bioinformatics, Data extraction, Image recognition, Neural networks",
author = "Alexander Dang and Emma Liu and Zhi Wei",
note = "Publisher Copyright: {\textcopyright} 2025 ACM.; 10th IEEE/ACM International Conference on Connected Health: Applications, Systems and Engineering Technologies, CHASE 2025 ; Conference date: 24-06-2025 Through 26-06-2025",
year = "2025",
doi = "10.1145/3721201.3725512",
language = "English (US)",
series = "Proceedings - 2025 IEEE/ACM International Conference on Connected Health: Applications, Systems and Engineering Technologies, CHASE 2025",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "484--485",
booktitle = "Proceedings - 2025 IEEE/ACM International Conference on Connected Health",
address = "United States",
}