@inproceedings{KohlSchmidtsKloeseretal.2021, author = {Kohl, Philipp and Schmidts, Oliver and Kl{\"o}ser, Lars and Werth, Henri and Kraft, Bodo and Z{\"u}ndorf, Albert}, title = {STAMP 4 NLP - an agile framework for rapid quality-driven NLP applications development}, series = {Quality of Information and Communications Technology. QUATIC 2021}, booktitle = {Quality of Information and Communications Technology. QUATIC 2021}, publisher = {Springer}, address = {Cham}, isbn = {978-3-030-85346-4}, doi = {10.1007/978-3-030-85347-1_12}, pages = {156 -- 166}, year = {2021}, abstract = {The progress in natural language processing (NLP) research over the last years, offers novel business opportunities for companies, as automated user interaction or improved data analysis. Building sophisticated NLP applications requires dealing with modern machine learning (ML) technologies, which impedes enterprises from establishing successful NLP projects. Our experience in applied NLP research projects shows that the continuous integration of research prototypes in production-like environments with quality assurance builds trust in the software and shows convenience and usefulness regarding the business goal. We introduce STAMP 4 NLP as an iterative and incremental process model for developing NLP applications. With STAMP 4 NLP, we merge software engineering principles with best practices from data science. Instantiating our process model allows efficiently creating prototypes by utilizing templates, conventions, and implementations, enabling developers and data scientists to focus on the business goals. Due to our iterative-incremental approach, businesses can deploy an enhanced version of the prototype to their software environment after every iteration, maximizing potential business value and trust early and avoiding the cost of successful yet never deployed experiments.}, language = {en} } @inproceedings{BuesgenKloeserKohletal.2022, author = {B{\"u}sgen, Andr{\´e} and Kl{\"o}ser, Lars and Kohl, Philipp and Schmidts, Oliver and Kraft, Bodo and Z{\"u}ndorf, Albert}, title = {Exploratory analysis of chat-based black market profiles with natural language processing}, series = {Proceedings of the 11th International Conference on Data Science, Technology and Applications}, booktitle = {Proceedings of the 11th International Conference on Data Science, Technology and Applications}, isbn = {978-989-758-583-8}, issn = {2184-285X}, doi = {10.5220/0011271400003269}, pages = {83 -- 94}, year = {2022}, abstract = {Messenger apps like WhatsApp or Telegram are an integral part of daily communication. Besides the various positive effects, those services extend the operating range of criminals. Open trading groups with many thousand participants emerged on Telegram. Law enforcement agencies monitor suspicious users in such chat rooms. This research shows that text analysis, based on natural language processing, facilitates this through a meaningful domain overview and detailed investigations. We crawled a corpus from such self-proclaimed black markets and annotated five attribute types products, money, payment methods, user names, and locations. Based on each message a user sends, we extract and group these attributes to build profiles. Then, we build features to cluster the profiles. Pretrained word vectors yield better unsupervised clustering results than current state-of-the-art transformer models. The result is a semantically meaningful high-level overview of the user landscape of black market chatrooms. Additionally, the extracted structured information serves as a foundation for further data exploration, for example, the most active users or preferred payment methods.}, language = {en} } @inproceedings{BuesgenKloeserKohletal.2023, author = {B{\"u}sgen, Andr{\´e} and Kl{\"o}ser, Lars and Kohl, Philipp and Schmidts, Oliver and Kraft, Bodo and Z{\"u}ndorf, Albert}, title = {From cracked accounts to fake IDs: user profiling on German telegram black market channels}, series = {Data Management Technologies and Applications}, booktitle = {Data Management Technologies and Applications}, editor = {Cuzzocrea, Alfredo and Gusikhin, Oleg and Hammoudi, Slimane and Quix, Christoph}, publisher = {Springer}, address = {Cham}, isbn = {978-3-031-37889-8 (Print)}, doi = {10.1007/978-3-031-37890-4_9}, pages = {176 -- 202}, year = {2023}, abstract = {Messenger apps like WhatsApp and Telegram are frequently used for everyday communication, but they can also be utilized as a platform for illegal activity. Telegram allows public groups with up to 200.000 participants. Criminals use these public groups for trading illegal commodities and services, which becomes a concern for law enforcement agencies, who manually monitor suspicious activity in these chat rooms. This research demonstrates how natural language processing (NLP) can assist in analyzing these chat rooms, providing an explorative overview of the domain and facilitating purposeful analyses of user behavior. We provide a publicly available corpus of annotated text messages with entities and relations from four self-proclaimed black market chat rooms. Our pipeline approach aggregates the extracted product attributes from user messages to profiles and uses these with their sold products as features for clustering. The extracted structured information is the foundation for further data exploration, such as identifying the top vendors or fine-granular price analyses. Our evaluation shows that pretrained word vectors perform better for unsupervised clustering than state-of-the-art transformer models, while the latter is still superior for sequence labeling.}, language = {en} }