@article{RuebbelkeVoegeleGrajewskietal.2023, author = {R{\"u}bbelke, Dirk and V{\"o}gele, Stefan and Grajewski, Matthias and Zobel, Luzy}, title = {Cross border adjustment mechanism: Initial data for the assessment of hydrogen-based steel production}, series = {Data in Brief}, volume = {47}, journal = {Data in Brief}, number = {Article 108907}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2352-3409}, doi = {10.1016/j.dib.2023.108907}, pages = {1 -- 5}, year = {2023}, abstract = {Ambitious climate targets affect the competitiveness of industries in the international market. To prevent such industries from moving to other countries in the wake of increased climate protection efforts, cost adjustments may become necessary. Their design requires knowledge of country-specific production costs. Here, we present country-specific cost figures for different production routes of steel, paying particular attention to transportation costs. The data can be used in floor price models aiming to assess the competitiveness of different steel production routes in different countries (R{\"u}bbelke, 2022).}, language = {en} } @incollection{EngelmannBaumann2023, author = {Engelmann, Ulrich M. and Baumann, Martin}, title = {Moderationsexpertise f{\"u}r QMBs - die Methoden}, series = {Qualit{\"a}tsmanagement im Gesundheitswesen}, booktitle = {Qualit{\"a}tsmanagement im Gesundheitswesen}, editor = {Herbig, Nicola and Poppelreuter, Stefan}, edition = {60. Update}, publisher = {T{\"U}V-Verlag}, address = {K{\"o}ln}, isbn = {978-3-8249-0714-4}, pages = {Kapitel 10815}, year = {2023}, abstract = {Damit Sie als Moderator effektiv und professionell moderieren k{\"o}nnen, sollten Sie die entsprechenden Methoden kennen. Mit den richtigen Methoden k{\"o}nnen Sie Diskussionen leiten, Konflikte l{\"o}sen, die Teilnehmer motivieren und daf{\"u}r sorgen, dass die Ziele der Veranstaltung erreicht werden. Außerdem helfen sie Ihnen, eine positive Atmosph{\"a}re zu schaffen und das Interesse der Teilnehmer zu halten. In diesem zweiten Beitrag der mehrteiligen Serie lernen Sie die grunds{\"a}tzlichen Methoden kennen, um erfolgreiche Teamsitzungen, Arbeitsgruppentreffen, Kick-offs und Meetings durchzuf{\"u}hren.}, language = {de} } @article{BialonskiGrieger2023, author = {Bialonski, Stephan and Grieger, Niklas}, title = {Der KI-Chatbot ChatGPT: Eine Herausforderung f{\"u}r die Hochschulen}, series = {Die neue Hochschule}, volume = {2023}, journal = {Die neue Hochschule}, number = {1}, publisher = {HLB}, address = {Bonn}, issn = {0340-448X}, doi = {10.5281/zenodo.7533758}, pages = {24 -- 27}, year = {2023}, abstract = {Essays, Gedichte, Programmcode: ChatGPT generiert automatisch Texte auf bisher unerreicht hohem Niveau. Dieses und nachfolgende Systeme werden nicht nur die akademische Welt nachhaltig ver{\"a}ndern.}, language = {de} } @article{Gaigall2023, author = {Gaigall, Daniel}, title = {Allocating and forecasting changes in risk}, series = {Journal of risk}, volume = {25}, journal = {Journal of risk}, number = {3}, editor = {AitSahlia, Farid}, publisher = {Infopro Digital Risk}, address = {London}, issn = {1755-2842}, doi = {10.21314/JOR.2022.048}, pages = {1 -- 24}, year = {2023}, abstract = {We consider time-dependent portfolios and discuss the allocation of changes in the risk of a portfolio to changes in the portfolio's components. For this purpose we adopt established allocation principles. We also use our approach to obtain forecasts for changes in the risk of the portfolio's components. To put the approach into practice we present an implementation based on the output of a simulation. Allocation is illustrated with an example portfolio in the context of Solvency II. The quality of the forecasts is investigated with an empirical study.}, language = {en} } @article{Gaigall2023, author = {Gaigall, Daniel}, title = {On the applicability of several tests to models with not identically distributed random effects}, series = {Statistics : A Journal of Theoretical and Applied Statistics}, volume = {57}, journal = {Statistics : A Journal of Theoretical and Applied Statistics}, publisher = {Taylor \& Francis}, address = {London}, isbn = {0323-3944}, issn = {1029-4910}, doi = {10.1080/02331888.2023.2193748}, pages = {14 Seiten}, year = {2023}, abstract = {We consider Kolmogorov-Smirnov and Cram{\´e}r-von-Mises type tests for testing central symmetry, exchangeability, and independence. In the standard case, the tests are intended for the application to independent and identically distributed data with unknown distribution. The tests are available for multivariate data and bootstrap procedures are suitable to obtain critical values. We discuss the applicability of the tests to random effects models, where the random effects are independent but not necessarily identically distributed and with possibly unknown distributions. Theoretical results show the adequacy of the tests in this situation. The quality of the tests in models with random effects is investigated by simulations. Empirical results obtained confirm the theoretical findings. A real data example illustrates the application.}, language = {en} } @incollection{StollenwerkFranzkeMaureretal.2023, author = {Stollenwerk, Dominik and Franzke, Till and Maurer, Florian and Reinkensmeier, Sebastian and Kim, Franken and Tambornino, Philipp and Haas, Florian and Rieke, Christian and Hermanuz, Andreas and Borchert, J{\"o}rg and Ritz, Thomas and Sander, Volker}, title = {Smarte Lades{\"a}ulen : Netz- und Marktdienliches {\"o}ffentliches Laden}, series = {Towards the New Normal in Mobility : Technische und betriebswirtschaftliche Aspekte}, booktitle = {Towards the New Normal in Mobility : Technische und betriebswirtschaftliche Aspekte}, editor = {Proff, Heike}, publisher = {Springer Gabler}, address = {Wiesbaden}, isbn = {978-3-658-39437-0 (Print)}, doi = {10.1007/978-3-658-39438-7_18}, pages = {287 -- 304}, year = {2023}, abstract = {Stand 01.01.2022 sind in Deutschland 618.460 elektrisch angetriebene KFZ zugelassen. Insgesamt sind derzeit 48.540.878 KFZ zugelassen, was einer Elektromobilit{\"a}tsquote von ca. 1,2 \% entspricht. Derzeit werden Elektromobile {\"u}ber Ladestationen oder Steckdosen mit dem Stromnetz verbunden und {\"u}blicherweise mit der vollen Ladekapazit{\"a}t des Anschlusses aufgeladen, bis das Batteriemanagementsystem des Fahrzeugs abh{\"a}ngig vom Ladezustand der Batterie die Ladeleistung reduziert.}, language = {de} } @article{GaigallGerstenberg2023, author = {Gaigall, Daniel and Gerstenberg, Julian}, title = {Cram{\´e}r-von-Mises tests for the distribution of the excess over a confidence level}, series = {Journal of Nonparametric Statistics}, journal = {Journal of Nonparametric Statistics}, publisher = {Taylor \& Francis}, issn = {1048-5252 (Print)}, doi = {10.1080/10485252.2023.2173958}, year = {2023}, abstract = {The Cram{\´e}r-von-Mises distance is applied to the distribution of the excess over a confidence level. Asymptotics of related statistics are investigated, and it is seen that the obtained limit distributions differ from the classical ones. For that reason, quantiles of the new limit distributions are given and new bootstrap techniques for approximation purposes are introduced and justified. The results motivate new one-sample goodness-of-fit tests for the distribution of the excess over a confidence level and a new confidence interval for the related fitting error. Simulation studies investigate size and power of the tests as well as coverage probabilities of the confidence interval in the finite sample case. A practice-oriented application of the Cram{\´e}r-von-Mises tests is the determination of an appropriate confidence level for the fitting approach. The adoption of the idea to the well-known problem of threshold detection in the context of peaks over threshold modelling is sketched and illustrated by data examples.}, language = {en} } @article{LiphardtFernandezGonzaloAlbrachtetal.2023, author = {Liphardt, Anna-Maria and Fernandez-Gonzalo, Rodrigo and Albracht, Kirsten and Rittweger, J{\"o}rn and Vico, Laurence}, title = {Musculoskeletal research in human space flight - unmet needs for the success of crewed deep space exploration}, series = {npj Microgravity}, volume = {9}, journal = {npj Microgravity}, number = {Article number: 9}, publisher = {Springer Nature}, issn = {2373-8065}, doi = {10.1038/s41526-023-00258-3}, pages = {1 -- 9}, year = {2023}, abstract = {Based on the European Space Agency (ESA) Science in Space Environment (SciSpacE) community White Paper "Human Physiology - Musculoskeletal system", this perspective highlights unmet needs and suggests new avenues for future studies in musculoskeletal research to enable crewed exploration missions. The musculoskeletal system is essential for sustaining physical function and energy metabolism, and the maintenance of health during exploration missions, and consequently mission success, will be tightly linked to musculoskeletal function. Data collection from current space missions from pre-, during-, and post-flight periods would provide important information to understand and ultimately offset musculoskeletal alterations during long-term spaceflight. In addition, understanding the kinetics of the different components of the musculoskeletal system in parallel with a detailed description of the molecular mechanisms driving these alterations appears to be the best approach to address potential musculoskeletal problems that future exploratory-mission crew will face. These research efforts should be accompanied by technical advances in molecular and phenotypic monitoring tools to provide in-flight real-time feedback.}, language = {en} } @article{RingersBialonskiEgeetal.2023, author = {Ringers, Christa and Bialonski, Stephan and Ege, Mert and Solovev, Anton and Hansen, Jan Niklas and Jeong, Inyoung and Friedrich, Benjamin M. and Jurisch-Yaksi, Nathalie}, title = {Novel analytical tools reveal that local synchronization of cilia coincides with tissue-scale metachronal waves in zebrafish multiciliated epithelia}, series = {eLife}, volume = {12}, journal = {eLife}, publisher = {eLife Sciences Publications}, issn = {2050-084X}, doi = {10.7554/eLife.77701}, pages = {27 Seiten}, year = {2023}, abstract = {Motile cilia are hair-like cell extensions that beat periodically to generate fluid flow along various epithelial tissues within the body. In dense multiciliated carpets, cilia were shown to exhibit a remarkable coordination of their beat in the form of traveling metachronal waves, a phenomenon which supposedly enhances fluid transport. Yet, how cilia coordinate their regular beat in multiciliated epithelia to move fluids remains insufficiently understood, particularly due to lack of rigorous quantification. We combine experiments, novel analysis tools, and theory to address this knowledge gap. To investigate collective dynamics of cilia, we studied zebrafish multiciliated epithelia in the nose and the brain. We focused mainly on the zebrafish nose, due to its conserved properties with other ciliated tissues and its superior accessibility for non-invasive imaging. We revealed that cilia are synchronized only locally and that the size of local synchronization domains increases with the viscosity of the surrounding medium. Even though synchronization is local only, we observed global patterns of traveling metachronal waves across the zebrafish multiciliated epithelium. Intriguingly, these global wave direction patterns are conserved across individual fish, but different for left and right noses, unveiling a chiral asymmetry of metachronal coordination. To understand the implications of synchronization for fluid pumping, we used a computational model of a regular array of cilia. We found that local metachronal synchronization prevents steric collisions, i.e., cilia colliding with each other, and improves fluid pumping in dense cilia carpets, but hardly affects the direction of fluid flow. In conclusion, we show that local synchronization together with tissue-scale cilia alignment coincide and generate metachronal wave patterns in multiciliated epithelia, which enhance their physiological function of fluid pumping.}, language = {en} } @incollection{EngelmannBaumann2023, author = {Engelmann, Ulrich M. and Baumann, Martin}, title = {Moderationsexpertise f{\"u}r QMBs - die Methoden}, series = {Qualit{\"a}tsmanagement in Dienstleistungsunternehmen}, booktitle = {Qualit{\"a}tsmanagement in Dienstleistungsunternehmen}, editor = {Thomann, Hermann and Tr{\"a}ger, Thomas}, publisher = {T{\"U}V-Verlag}, address = {K{\"o}ln}, isbn = {978-3-8249-0473-0}, pages = {Kapitel 08631}, year = {2023}, abstract = {Damit Sie als Moderator effektiv und professionell moderieren k{\"o}nnen, sollten Sie die entsprechenden Methoden kennen. Mit den richtigen Methoden k{\"o}nnen Sie Diskussionen leiten, Konflikte l{\"o}sen, die Teilnehmer motivieren und daf{\"u}r sorgen, dass die Ziele der Veranstaltung erreicht werden. Außerdem helfen sie Ihnen, eine positive Atmosph{\"a}re zu schaffen und das Interesse der Teilnehmer zu halten. In diesem zweiten Beitrag der mehrteiligen Serie lernen Sie die grunds{\"a}tzlichen Methoden kennen, um erfolgreiche Teamsitzungen, Arbeitsgruppentreffen, Kick-offs und Meetings durchzuf{\"u}hren.}, language = {de} } @incollection{EngelmannBaumann2023, author = {Engelmann, Ulrich M. and Baumann, Martin}, title = {Moderationsexpertise - die Methoden}, series = {IT-Servicemanagement}, booktitle = {IT-Servicemanagement}, editor = {Lindinger, Markus and Bartsch, Oliver}, edition = {54. Update}, publisher = {T{\"U}V-Verlag}, address = {K{\"o}ln}, isbn = {978-3-8249-1154-7}, pages = {Kapitel 05531}, year = {2023}, abstract = {Damit Sie als Moderator effektiv und professionell moderieren k{\"o}nnen, sollten Sie die entsprechenden Methoden kennen. Mit den richtigen Methoden k{\"o}nnen Sie Diskussionen leiten, Konflikte l{\"o}sen, die Teilnehmer motivieren und daf{\"u}r sorgen, dass die Ziele der Veranstaltung erreicht werden. Außerdem helfen sie Ihnen, eine positive Atmosph{\"a}re zu schaffen und das Interesse der Teilnehmer zu halten. In diesem zweiten Beitrag der mehrteiligen Serie lernen Sie die grunds{\"a}tzlichen Methoden kennen, um erfolgreiche Teamsitzungen, Arbeitsgruppentreffen, Kick-offs und Meetings durchzuf{\"u}hren.}, language = {de} } @incollection{EngelmannBaumann2023, author = {Engelmann, Ulrich M. and Baumann, Martin}, title = {Moderationsexpertise f{\"u}r QMBs - Onlinemoderation}, series = {Qualit{\"a}tsmanagement im Gesundheitswesen}, booktitle = {Qualit{\"a}tsmanagement im Gesundheitswesen}, editor = {Herbig, Nicola and Poppelreuter, Stefan}, publisher = {T{\"U}V-Verlag}, address = {K{\"o}ln}, isbn = {978-3-8249-0714-4}, pages = {Kapitel 10816}, year = {2023}, abstract = {Damit Sie auch in den immer h{\"a}ufiger werdenden Onlineveranstaltungen als Moderator gut bestehen, sollten Sie wissen, was bei der Onlinemoderation im Besonderen zu beachten ist. In diesem dritten Teil der Beitragsserie erfahren Sie, warum online anders als offline ist. Die technischen M{\"o}glichkeiten werden vorgestellt und auch wie diese zu nutzen sind. Schließlich erhalten Sie Tipps, die Sie beim Sprechen online beachten sollten.}, language = {de} } @incollection{Engelmann2023, author = {Engelmann, Ulrich M.}, title = {Moderationsexpertise - Onlinemoderation}, series = {IT-Servicemanagement}, booktitle = {IT-Servicemanagement}, editor = {Lindinger, Markus and Bartsch, Oliver}, publisher = {T{\"U}V-Verlag}, address = {K{\"o}ln}, isbn = {978-3-8249-1154-7}, pages = {Kapitel 05532}, year = {2023}, abstract = {Damit Sie auch in den immer h{\"a}ufiger werdenden Onlineveranstaltungen als Moderator gut bestehen, sollten Sie wissen, was bei der Onlinemoderation im Besonderen zu beachten ist. In diesem dritten Teil der Beitragsserie erfahren Sie, warum online anders als offline ist. Die technischen M{\"o}glichkeiten werden vorgestellt und auch wie diese zu nutzen sind. Schließlich erhalten Sie Tipps, die Sie beim Sprechen online beachten sollten.}, language = {de} } @article{BertzMolinnusSchoeningetal.2023, author = {Bertz, Morten and Molinnus, Denise and Sch{\"o}ning, Michael Josef and Homma, Takayuki}, title = {Real-time monitoring of H₂O₂ sterilization on individual bacillus atrophaeus spores by optical sensing with trapping Raman spectroscopy}, series = {Chemosensors}, volume = {8}, journal = {Chemosensors}, number = {11}, publisher = {MDPI}, address = {Basel}, issn = {2227-9040}, doi = {10.3390/chemosensors11080445}, pages = {Artikel 445}, year = {2023}, abstract = {Hydrogen peroxide (H₂O₂), a strong oxidizer, is a commonly used sterilization agent employed during aseptic food processing and medical applications. To assess the sterilization efficiency with H₂O₂, bacterial spores are common microbial systems due to their remarkable robustness against a wide variety of decontamination strategies. Despite their widespread use, there is, however, only little information about the detailed time-resolved mechanism underlying the oxidative spore death by H₂O₂. In this work, we investigate chemical and morphological changes of individual Bacillus atrophaeus spores undergoing oxidative damage using optical sensing with trapping Raman microscopy in real-time. The time-resolved experiments reveal that spore death involves two distinct phases: (i) an initial phase dominated by the fast release of dipicolinic acid (DPA), a major spore biomarker, which indicates the rupture of the spore's core; and (ii) the oxidation of the remaining spore material resulting in the subsequent fragmentation of the spores' coat. Simultaneous observation of the spore morphology by optical microscopy corroborates these mechanisms. The dependence of the onset of DPA release and the time constant of spore fragmentation on H₂O₂ shows that the formation of reactive oxygen species from H₂O₂ is the rate-limiting factor of oxidative spore death.}, language = {en} } @article{WendlandtKochBritzetal.2023, author = {Wendlandt, Tim and Koch, Claudia and Britz, Beate and Liedek, Anke and Schmidt, Nora and Werner, Stefan and Gleba, Yuri and Vahidpour, Farnoosh and Welden, Melanie and Poghossian, Arshak and Sch{\"o}ning, Michael Josef}, title = {Facile Purification and Use of Tobamoviral Nanocarriers for Antibody-Mediated Display of a Two-Enzyme System}, series = {Viruses}, volume = {9}, journal = {Viruses}, number = {15}, publisher = {MDPI}, address = {Basel}, issn = {1999-4915}, doi = {doi.org/10.3390/v15091951}, pages = {Artikel 1951}, year = {2023}, abstract = {Immunosorbent turnip vein clearing virus (TVCV) particles displaying the IgG-binding domains D and E of Staphylococcus aureus protein A (PA) on every coat protein (CP) subunit (TVCVPA) were purified from plants via optimized and new protocols. The latter used polyethylene glycol (PEG) raw precipitates, from which virions were selectively re-solubilized in reverse PEG concentration gradients. This procedure improved the integrity of both TVCVPA and the wild-type subgroup 3 tobamovirus. TVCVPA could be loaded with more than 500 IgGs per virion, which mediated the immunocapture of fluorescent dyes, GFP, and active enzymes. Bi-enzyme ensembles of cooperating glucose oxidase and horseradish peroxidase were tethered together on the TVCVPA carriers via a single antibody type, with one enzyme conjugated chemically to its Fc region, and the other one bound as a target, yielding synthetic multi-enzyme complexes. In microtiter plates, the TVCVPA-displayed sugar-sensing system possessed a considerably increased reusability upon repeated testing, compared to the IgG-bound enzyme pair in the absence of the virus. A high coverage of the viral adapters was also achieved on Ta2O5 sensor chip surfaces coated with a polyelectrolyte interlayer, as a prerequisite for durable TVCVPA-assisted electrochemical biosensing via modularly IgG-assembled sensor enzymes.}, language = {en} } @unpublished{BornheimNiklasBlanecketal.2023, author = {Bornheim, Tobias and Niklas, Grieger and Blaneck, Patrick Gustav and Bialonski, Stephan}, title = {Preprint: Speaker attribution in German parliamentary debates with QLoRA-adapted large language models}, series = {Journal for Language Technology and Computational Linguistics}, journal = {Journal for Language Technology and Computational Linguistics}, doi = {10.48550/arXiv.2309.09902}, pages = {8 Seiten}, year = {2023}, abstract = {The growing body of political texts opens up new opportunities for rich insights into political dynamics and ideologies but also increases the workload for manual analysis. Automated speaker attribution, which detects who said what to whom in a speech event and is closely related to semantic role labeling, is an important processing step for computational text analysis. We study the potential of the large language model family Llama 2 to automate speaker attribution in German parliamentary debates from 2017-2021. We fine-tune Llama 2 with QLoRA, an efficient training strategy, and observe our approach to achieve competitive performance in the GermEval 2023 Shared Task On Speaker Attribution in German News Articles and Parliamentary Debates. Our results shed light on the capabilities of large language models in automating speaker attribution, revealing a promising avenue for computational analysis of political discourse and the development of semantic role labeling systems.}, language = {en} } @article{MoraisSumanSchoeningetal.2023, author = {Morais, Paulo V. and Suman, Pedro H. and Sch{\"o}ning, Michael Josef and Siqueira Junior, Jos{\´e} R. and Orlandi, Marcelo O.}, title = {Layer-by-layer film based on Sn₃O₄ nanobelts as sensing units to detect heavy metals using a capacitive field-effect sensor platform}, series = {Chemosensors}, volume = {11}, journal = {Chemosensors}, number = {8}, publisher = {MDPI}, address = {Basel}, issn = {2227-9040}, doi = {10.3390/chemosensors11080436}, pages = {Artikel 436}, year = {2023}, abstract = {Lead and nickel, as heavy metals, are still used in industrial processes, and are classified as "environmental health hazards" due to their toxicity and polluting potential. The detection of heavy metals can prevent environmental pollution at toxic levels that are critical to human health. In this sense, the electrolyte-insulator-semiconductor (EIS) field-effect sensor is an attractive sensing platform concerning the fabrication of reusable and robust sensors to detect such substances. This study is aimed to fabricate a sensing unit on an EIS device based on Sn₃O₄ nanobelts embedded in a polyelectrolyte matrix of polyvinylpyrrolidone (PVP) and polyacrylic acid (PAA) using the layer-by-layer (LbL) technique. The EIS-Sn₃O₄ sensor exhibited enhanced electrochemical performance for detecting Pb²⁺ and Ni²⁺ ions, revealing a higher affinity for Pb²⁺ ions, with sensitivities of ca. 25.8 mV/decade and 2.4 mV/decade, respectively. Such results indicate that Sn₃O₄ nanobelts can contemplate a feasible proof-of-concept capacitive field-effect sensor for heavy metal detection, envisaging other future studies focusing on environmental monitoring.}, language = {en} } @phdthesis{Gaigall2023, author = {Gaigall, Daniel}, title = {On selected problems in multivariate analysis}, doi = {10.15488/14304}, pages = {17 Seiten}, year = {2023}, abstract = {Selected problems in the field of multivariate statistical analysis are treated. Thereby, one focus is on the paired sample case. Among other things, statistical testing problems of marginal homogeneity are under consideration. In detail, properties of Hotelling's T² test in a special parametric situation are obtained. Moreover, the nonparametric problem of marginal homogeneity is discussed on the basis of possibly incomplete data. In the bivariate data case, properties of the Hoeffding-Blum-Kiefer-Rosenblatt independence test statistic on the basis of partly not identically distributed data are investigated. Similar testing problems are treated within the scope of the application of a result for the empirical process of the concomitants for partly categorial data. Furthermore, testing changes in the modeled solvency capital requirement of an insurance company by means of a paired sample from an internal risk model is discussed. Beyond the paired sample case, a new asymptotic relative efficiency concept based on the expected volumes of multidimensional confidence regions is introduced. Besides, a new approach for the treatment of the multi-sample goodness-of-fit problem is presented. Finally, a consistent test for the treatment of the goodness-of-fit problem is developed for the background of huge or infinite dimensional data.}, language = {en} } @inproceedings{BuesgenKloeserKohletal.2023, author = {B{\"u}sgen, Andr{\´e} and Kl{\"o}ser, Lars and Kohl, Philipp and Schmidts, Oliver and Kraft, Bodo and Z{\"u}ndorf, Albert}, title = {From cracked accounts to fake IDs: user profiling on German telegram black market channels}, series = {Data Management Technologies and Applications}, booktitle = {Data Management Technologies and Applications}, editor = {Cuzzocrea, Alfredo and Gusikhin, Oleg and Hammoudi, Slimane and Quix, Christoph}, publisher = {Springer}, address = {Cham}, isbn = {978-3-031-37889-8 (Print)}, doi = {10.1007/978-3-031-37890-4_9}, pages = {176 -- 202}, year = {2023}, abstract = {Messenger apps like WhatsApp and Telegram are frequently used for everyday communication, but they can also be utilized as a platform for illegal activity. Telegram allows public groups with up to 200.000 participants. Criminals use these public groups for trading illegal commodities and services, which becomes a concern for law enforcement agencies, who manually monitor suspicious activity in these chat rooms. This research demonstrates how natural language processing (NLP) can assist in analyzing these chat rooms, providing an explorative overview of the domain and facilitating purposeful analyses of user behavior. We provide a publicly available corpus of annotated text messages with entities and relations from four self-proclaimed black market chat rooms. Our pipeline approach aggregates the extracted product attributes from user messages to profiles and uses these with their sold products as features for clustering. The extracted structured information is the foundation for further data exploration, such as identifying the top vendors or fine-granular price analyses. Our evaluation shows that pretrained word vectors perform better for unsupervised clustering than state-of-the-art transformer models, while the latter is still superior for sequence labeling.}, language = {en} } @inproceedings{KohlFreyerKraemeretal.2023, author = {Kohl, Philipp and Freyer, Nils and Kr{\"a}mer, Yoka and Werth, Henri and Wolf, Steffen and Kraft, Bodo and Meinecke, Matthias and Z{\"u}ndorf, Albert}, title = {ALE: a simulation-based active learning evaluation framework for the parameter-driven comparison of query strategies for NLP}, series = {Deep Learning Theory and Applications}, booktitle = {Deep Learning Theory and Applications}, editor = {Conte, Donatello and Fred, Ana and Gusikhin, Oleg and Sansone, Carlo}, publisher = {Springer}, address = {Cham}, isbn = {978-3-031-39058-6 (Print)}, doi = {10.1007/978-3-031-39059-3_16}, pages = {235 -- 253}, year = {2023}, abstract = {Supervised machine learning and deep learning require a large amount of labeled data, which data scientists obtain in a manual, and time-consuming annotation process. To mitigate this challenge, Active Learning (AL) proposes promising data points to annotators they annotate next instead of a subsequent or random sample. This method is supposed to save annotation effort while maintaining model performance. However, practitioners face many AL strategies for different tasks and need an empirical basis to choose between them. Surveys categorize AL strategies into taxonomies without performance indications. Presentations of novel AL strategies compare the performance to a small subset of strategies. Our contribution addresses the empirical basis by introducing a reproducible active learning evaluation (ALE) framework for the comparative evaluation of AL strategies in NLP. The framework allows the implementation of AL strategies with low effort and a fair data-driven comparison through defining and tracking experiment parameters (e.g., initial dataset size, number of data points per query step, and the budget). ALE helps practitioners to make more informed decisions, and researchers can focus on developing new, effective AL strategies and deriving best practices for specific use cases. With best practices, practitioners can lower their annotation costs. We present a case study to illustrate how to use the framework.}, language = {en} }