@inproceedings{d872a0e50f5f4461881e66a2be4222f8,
title = "A hybrid data harmonization workflow using word embeddings for the interlinking of heterogeneous cross-domain clinical data structures",
abstract = "Retrospective data harmonization is an open issue in healthcare due to the emerging need to interlink data from multiple clinical centers with the absence of standardized data collection protocols. In this work, we present an automated data harmonization workflow which utilizes lexical and semantic analysis based on word embeddings and relational modeling to detect terminologies with common lexical and conceptual basis. The method is built on top of a knowledge base to enable the interlinking of heterogeneous cross-domain data. A case study is applied in two clinical domains, namely the cardiovascular disease (CVD) and the mental disorders, where the proposed method yielded matched terminologies with 85% precision in less execution time than the application of lexical analysis and manual mapping which yielded 10% less precision.",
keywords = "Protocols, Terminology, Mental disorders, Semantics, Knowledge based systems, Medical services, Manuals, data harmonization, lexical matching, semantic matching, cardiovascular diseases, mental disorders",
author = "Pezoulas, {Vasileios C.} and Antonis Sakellarios and Marcus Kleber and Bosch, {Jos A.} and Laan, {Sander W. van der} and Femke Lamers and Terho Lehtim{\"a}ki and Winfried M{\"a}rz and Fotiadis, {Dimitrios I.}",
year = "2021",
doi = "10.1109/BHI50953.2021.9508484",
language = "English",
publisher = "IEEE",
booktitle = "2021 IEEE EMBS International Conference on Biomedical and Health Informatics (BHI)",
note = "IEEE EMBS International Conference on Biomedical and Health Informatics (BHI) ; Conference date: 27-07-2021 Through 30-07-2021",
}