2020 |
Tanja Tornede Alexander Tornede, Marcel Wever Felix Mohr Eyke Hüllermeier AutoML for Predictive Maintenance: One Tool to RUL them all Inproceedings Forthcoming Forthcoming. @inproceedings{Tornede_Tornede_Wever_Mohr_Hüllermeier_2020, title = {AutoML for Predictive Maintenance: One Tool to RUL them all}, author = {Tanja Tornede, Alexander Tornede, Marcel Wever, Felix Mohr, Eyke Hüllermeier}, url = {https://ris.uni-paderborn.de/record/17424}, year = {2020}, date = {2020-09-14}, keywords = {}, pubstate = {forthcoming}, tppubtype = {inproceedings} } |
Braun, Marco; Wrede, Sebastian Incorporation of Expert Knowledge for Learning Robotic Assembly Tasks Inproceedings 2020 IEEE 25th International Conference on Emerging Technologies and Factory Automation (ETFA), IEEE, 2020, ISBN: 978-1-7281-8956-7. @inproceedings{Braun2020, title = {Incorporation of Expert Knowledge for Learning Robotic Assembly Tasks}, author = {Marco Braun and Sebastian Wrede}, url = {https://ieeexplore.ieee.org/document/9211917/}, doi = {10.1109/ETFA46521.2020.9211917}, isbn = {978-1-7281-8956-7}, year = {2020}, date = {2020-09-08}, booktitle = {2020 IEEE 25th International Conference on Emerging Technologies and Factory Automation (ETFA)}, publisher = {IEEE}, abstract = {Autonomous learning of robotic manipulation tasks is a desirable proposition for the future of industrial manufacturing to increase flexibility and reduce manual engineering effort. In particular assembly tasks that require contact-rich manipulation skills are challenging to accomplish with classical robotic control methods. The Reinforcement Learning (RL) framework provides a possibility to learn complex behaviors based on interaction with the environment. Although a lot of research has been done robotic assembly tasks remain a challenge for pure learning-based systems. In this paper we give an overview on grey-box learning approaches that integrate prior knowledge and learning based methods. Different dimensions of knowledge injection are identified, and knowledge representations are described. These representations are discussed in the context of industrial assembly processes to answer the question: how can process experts model their knowledge to boost RL approaches in the context of industrial assembly?}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Autonomous learning of robotic manipulation tasks is a desirable proposition for the future of industrial manufacturing to increase flexibility and reduce manual engineering effort. In particular assembly tasks that require contact-rich manipulation skills are challenging to accomplish with classical robotic control methods. The Reinforcement Learning (RL) framework provides a possibility to learn complex behaviors based on interaction with the environment. Although a lot of research has been done robotic assembly tasks remain a challenge for pure learning-based systems. In this paper we give an overview on grey-box learning approaches that integrate prior knowledge and learning based methods. Different dimensions of knowledge injection are identified, and knowledge representations are described. These representations are discussed in the context of industrial assembly processes to answer the question: how can process experts model their knowledge to boost RL approaches in the context of industrial assembly? |
Pfannschmidt, Lukas; Jakob, Jonathan; Hinder, Fabian; Biehl, Michael; Tino, Peter; Hammer, Barbara Neurocomputing, 2020. @article{pfannschmidt_feature_2019, title = {Feature Relevance Determination for Ordinal Regression in the Context of Feature Redundancies and Privileged Information}, author = {Lukas Pfannschmidt and Jonathan Jakob and Fabian Hinder and Michael Biehl and Peter Tino and Barbara Hammer}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0925231220305038}, doi = {10.1016/j.neucom.2019.12.133}, year = {2020}, date = {2020-04-09}, urldate = {2020-03-20}, journal = {Neurocomputing}, abstract = {Advances in machine learning technologies have led to increasingly powerful models in particular in the context of big data. Yet, many application scenarios demand for robustly interpretable models rather than optimum model accuracy; as an example, this is the case if potential biomarkers or causal factors should be discovered based on a set of given measurements. In this contribution, we focus on feature selection paradigms, which enable us to uncover relevant factors of a given regularity based on a sparse model. We focus on the important specific setting of linear ordinal regression, i.e. data have to be ranked into one of a finite number of ordered categories by a linear projection. Unlike previous work, we consider the case that features are potentially redundant, such that no unique minimum set of relevant features exists. We aim for an identification of all strongly and all weakly relevant features as well as their type of relevance (strong or weak); we achieve this goal by determining feature relevance bounds, which correspond to the minimum and maximum feature relevance, respectively, if searched over all equivalent models. In addition, we discuss how this setting enables us to substitute some of the features, e.g. due to their semantics, and how to extend the framework of feature relevance intervals to the setting of privileged information, i.e. potentially relevant information is available for training purposes only, but cannot be used for the prediction itself.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Advances in machine learning technologies have led to increasingly powerful models in particular in the context of big data. Yet, many application scenarios demand for robustly interpretable models rather than optimum model accuracy; as an example, this is the case if potential biomarkers or causal factors should be discovered based on a set of given measurements. In this contribution, we focus on feature selection paradigms, which enable us to uncover relevant factors of a given regularity based on a sparse model. We focus on the important specific setting of linear ordinal regression, i.e. data have to be ranked into one of a finite number of ordered categories by a linear projection. Unlike previous work, we consider the case that features are potentially redundant, such that no unique minimum set of relevant features exists. We aim for an identification of all strongly and all weakly relevant features as well as their type of relevance (strong or weak); we achieve this goal by determining feature relevance bounds, which correspond to the minimum and maximum feature relevance, respectively, if searched over all equivalent models. In addition, we discuss how this setting enables us to substitute some of the features, e.g. due to their semantics, and how to extend the framework of feature relevance intervals to the setting of privileged information, i.e. potentially relevant information is available for training purposes only, but cannot be used for the prediction itself. |
Shaker, Mohammad Hossein; Hüllermeier, Eyke Aleatoric and Epistemic Uncertainty with Random Forests Artikel arXiv:2001.00893 [cs, stat], 2020, (arXiv: 2001.00893). @article{shaker_aleatoric_2020, title = {Aleatoric and Epistemic Uncertainty with Random Forests}, author = {Mohammad Hossein Shaker and Eyke Hüllermeier}, url = {http://arxiv.org/abs/2001.00893}, year = {2020}, date = {2020-01-01}, urldate = {2020-01-14}, journal = {arXiv:2001.00893 [cs, stat]}, abstract = {Due to the steadily increasing relevance of machine learning for practical applications, many of which are coming with safety requirements, the notion of uncertainty has received increasing attention in machine learning research in the last couple of years. In particular, the idea of distinguishing between two important types of uncertainty, often refereed to as aleatoric and epistemic, has recently been studied in the setting of supervised learning. In this paper, we propose to quantify these uncertainties with random forests. More specifically, we show how two general approaches for measuring the learnertextquoterights aleatoric and epistemic uncertainty in a prediction can be instantiated with decision trees and random forests as learning algorithms in a classification setting. In this regard, we also compare random forests with deep neural networks, which have been used for a similar purpose.}, note = {arXiv: 2001.00893}, keywords = {}, pubstate = {published}, tppubtype = {article} } Due to the steadily increasing relevance of machine learning for practical applications, many of which are coming with safety requirements, the notion of uncertainty has received increasing attention in machine learning research in the last couple of years. In particular, the idea of distinguishing between two important types of uncertainty, often refereed to as aleatoric and epistemic, has recently been studied in the setting of supervised learning. In this paper, we propose to quantify these uncertainties with random forests. More specifically, we show how two general approaches for measuring the learnertextquoterights aleatoric and epistemic uncertainty in a prediction can be instantiated with decision trees and random forests as learning algorithms in a classification setting. In this regard, we also compare random forests with deep neural networks, which have been used for a similar purpose. |
Schulz, Alexander; Hinder, Fabian; Hammer, Barbara Bessiere, Christian (Hrsg.): Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence, IJCAI-20, S. 2305–2311, International Joint Conferences on Artificial Intelligence Organization, 2020, (Main track). @inproceedings{ijcai2020-319, title = {DeepView: Visualizing Classification Boundaries of Deep Neural Networks as Scatter Plots Using Discriminative Dimensionality Reduction}, author = {Alexander Schulz and Fabian Hinder and Barbara Hammer}, editor = {Christian Bessiere}, url = {https://doi.org/10.24963/ijcai.2020/319}, doi = {10.24963/ijcai.2020/319}, year = {2020}, date = {2020-01-01}, booktitle = {Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence, IJCAI-20}, pages = {2305--2311}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, abstract = {Machine learning algorithms using deep architectures have been able to implement increasingly powerful and successful models. However, they also become increasingly more complex, more difficult to comprehend and easier to fool. So far, most methods in the literature investigate the decision of the model for a single given input datum. In this paper, we propose to visualize a part of the decision function of a deep neural network together with a part of the data set in two dimensions with discriminative dimensionality reduction. This enables us to inspect how different properties of the data are treated by the model, such as outliers, adversaries or poisoned data. Further, the presented approach is complementary to the mentioned interpretation methods from the literature and hence might be even more useful in combination with those. Code is available at https://github.com/LucaHermes/DeepView}, note = {Main track}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Machine learning algorithms using deep architectures have been able to implement increasingly powerful and successful models. However, they also become increasingly more complex, more difficult to comprehend and easier to fool. So far, most methods in the literature investigate the decision of the model for a single given input datum. In this paper, we propose to visualize a part of the decision function of a deep neural network together with a part of the data set in two dimensions with discriminative dimensionality reduction. This enables us to inspect how different properties of the data are treated by the model, such as outliers, adversaries or poisoned data. Further, the presented approach is complementary to the mentioned interpretation methods from the literature and hence might be even more useful in combination with those. Code is available at https://github.com/LucaHermes/DeepView |
Holst, Christoph-Alexander; Lohweg, Volker A Redundancy Metric based on the Framework of Possibility Theory for Technical Systems Inproceedings Forthcoming 2020 IEEE 25th International Conference on Emerging Technologies and Factory Automation (ETFA), IEEE, Forthcoming. @inproceedings{Holst.2020, title = {A Redundancy Metric based on the Framework of Possibility Theory for Technical Systems}, author = {Christoph-Alexander Holst and Volker Lohweg}, year = {2020}, date = {2020-01-01}, booktitle = {2020 IEEE 25th International Conference on Emerging Technologies and Factory Automation (ETFA)}, publisher = {IEEE}, keywords = {}, pubstate = {forthcoming}, tppubtype = {inproceedings} } |
2019 |
Holst, Christoph-Alexander; Lohweg, Volker Feature fusion to increase the robustness of machine learners in industrial environments Artikel at - Automatisierungstechnik, 67 (10), S. 853–865, 2019. @article{holst2019feature, title = {Feature fusion to increase the robustness of machine learners in industrial environments}, author = {Christoph-Alexander Holst and Volker Lohweg}, doi = {10.1515/auto-2019-0028}, year = {2019}, date = {2019-01-01}, journal = {at - Automatisierungstechnik}, volume = {67}, number = {10}, pages = {853--865}, abstract = {Industrial applications put special demands on machine learning algorithms. Noisy data, outliers, and sensor faults present an immense challenge for learners. A considerable part of machine learning research focuses on the selection of relevant, non-redundant features. This contribution details an approach to group and fuse redundant features prior to learning and classification. Features are grouped relying on a correlation-based redundancy measure. The fusion of features is guided by determining the majority observation based on possibility distributions. Furthermore, this paper studies the effects of feature fusion on the robustness and performance of classification with a focus on industrial applications. The approach is statistically evaluated on public datasets in comparison to classification on selected features only.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Industrial applications put special demands on machine learning algorithms. Noisy data, outliers, and sensor faults present an immense challenge for learners. A considerable part of machine learning research focuses on the selection of relevant, non-redundant features. This contribution details an approach to group and fuse redundant features prior to learning and classification. Features are grouped relying on a correlation-based redundancy measure. The fusion of features is guided by determining the majority observation based on possibility distributions. Furthermore, this paper studies the effects of feature fusion on the robustness and performance of classification with a focus on industrial applications. The approach is statistically evaluated on public datasets in comparison to classification on selected features only. |
Pfannschmidt, Lukas; Jakob, Jonathan; Biehl, Michael; Tino, Peter; Hammer, Barbara Feature Relevance Bounds for Ordinal Regression Artikel Proc. European Symposium on Artificial Neural Networks, 2019., 2019. @article{pfannschmidt2019feature, title = {Feature Relevance Bounds for Ordinal Regression}, author = {Lukas Pfannschmidt and Jonathan Jakob and Michael Biehl and Peter Tino and Barbara Hammer}, url = {https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2019-162.pdf}, year = {2019}, date = {2019-01-01}, journal = {Proc. European Symposium on Artificial Neural Networks, 2019.}, abstract = {The increasing occurrence of ordinal data, mainly sociodemo- graphic, led to a renewed research interest in ordinal regression, i.e. the prediction of ordered classes. Besides model accuracy, the interpretation of these models itself is of high relevance, and existing approaches therefore enforce e.g. model sparsity. For high dimensional or highly correlated data, however, this might be misleading due to strong variable dependencies. In this contribution, we aim for an identification of feature relevance bounds which – besides identifying all relevant features – explicitly differentiates between strongly and weakly relevant features.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The increasing occurrence of ordinal data, mainly sociodemo- graphic, led to a renewed research interest in ordinal regression, i.e. the prediction of ordered classes. Besides model accuracy, the interpretation of these models itself is of high relevance, and existing approaches therefore enforce e.g. model sparsity. For high dimensional or highly correlated data, however, this might be misleading due to strong variable dependencies. In this contribution, we aim for an identification of feature relevance bounds which – besides identifying all relevant features – explicitly differentiates between strongly and weakly relevant features. |
Brinkrolf, Johannes; Hammer, Barbara Time integration and reject options for probabilistic output of pairwise LVQ Artikel Neural Computing and Applications, S. 1–14, 2019. @article{brinkrolf2019time, title = {Time integration and reject options for probabilistic output of pairwise LVQ}, author = {Johannes Brinkrolf and Barbara Hammer}, url = {https://link.springer.com/content/pdf/10.1007/s00521-018-03966-0.pdf}, doi = {10.1007/s00521-018-03966-0}, year = {2019}, date = {2019-01-01}, journal = {Neural Computing and Applications}, pages = {1--14}, publisher = {Springer}, abstract = {Learning vector quantization (LVQ) constitutes a very popular machine learning technology with applications, for example, in biomedical data analysis, predictive maintenance/quality as well as product individualization. Albeit proba- bilistic LVQ variants exist, its deterministic counterparts are often preferred due to their better efficiency. The latter do not allow an immediate probabilistic interpretation of its output; hence, a rejection of classification based on confidence values is not possible. In this contribution, we investigate different schemes how to extend and integrate pairwise LVQ schemes to an overall probabilistic output, in comparison with a recent heuristic surrogate measure for the security of the classification, which is directly based on LVQ’s multi-class classification scheme. Furthermore, we propose a canonic way how to fuse these values over a given time window in case a possibly disrupted measurement is taken over a longer time interval to counter the uncertainty of a single point in time. Experimental results indicate that an explicit probabilistic treatment often yields superior results as compared to a standard deterministic LVQ method, but metric learning is able to annul this difference. Fusion over a short time period is beneficial in case of an unclear classification.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Learning vector quantization (LVQ) constitutes a very popular machine learning technology with applications, for example, in biomedical data analysis, predictive maintenance/quality as well as product individualization. Albeit proba- bilistic LVQ variants exist, its deterministic counterparts are often preferred due to their better efficiency. The latter do not allow an immediate probabilistic interpretation of its output; hence, a rejection of classification based on confidence values is not possible. In this contribution, we investigate different schemes how to extend and integrate pairwise LVQ schemes to an overall probabilistic output, in comparison with a recent heuristic surrogate measure for the security of the classification, which is directly based on LVQ’s multi-class classification scheme. Furthermore, we propose a canonic way how to fuse these values over a given time window in case a possibly disrupted measurement is taken over a longer time interval to counter the uncertainty of a single point in time. Experimental results indicate that an explicit probabilistic treatment often yields superior results as compared to a standard deterministic LVQ method, but metric learning is able to annul this difference. Fusion over a short time period is beneficial in case of an unclear classification. |
Bifet, Albert; Hammer, Barbara; Schleif, Frank-Michael Recent trends in streaming data analysis, concept drift and analysis of dynamic data sets Artikel Proc. European Symposium on Artificial Neural Networks, 2019. @article{bifet2019streaming, title = {Recent trends in streaming data analysis, concept drift and analysis of dynamic data sets}, author = {Albert Bifet and Barbara Hammer and Frank-Michael Schleif}, url = {https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2019-3.pdf}, year = {2019}, date = {2019-01-01}, journal = {Proc. European Symposium on Artificial Neural Networks}, abstract = {Today, many data are not any longer static but occur as dynamic data streams with high velocity, variability and volume. This leads to new challenges to be addressed by novel or adapted algorithms. In this tutorial we provide an introduction into the field of streaming data analysis summarizing its major characteristics and highlighting important research directions in the analysis of dynamic data.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Today, many data are not any longer static but occur as dynamic data streams with high velocity, variability and volume. This leads to new challenges to be addressed by novel or adapted algorithms. In this tutorial we provide an introduction into the field of streaming data analysis summarizing its major characteristics and highlighting important research directions in the analysis of dynamic data. |
Göpfert, Christina; Ben-David, Shai; Bousquet, Olivier; Gelly, Sylvain; Tolstikhin, Ilya; Urner, Ruth When can unlabeled data improve the learning rate? Artikel arXiv preprint arXiv:1905.11866, 2019. @article{gopfert2019can, title = {When can unlabeled data improve the learning rate?}, author = {Christina Göpfert and Shai Ben-David and Olivier Bousquet and Sylvain Gelly and Ilya Tolstikhin and Ruth Urner}, url = {https://arxiv.org/abs/1905.11866}, year = {2019}, date = {2019-01-01}, journal = {arXiv preprint arXiv:1905.11866}, abstract = {In semi-supervised classification, one is given access both to labeled and unlabeled data. As unlabeled data is typically cheaper to acquire than labeled data, this setup becomes advantageous as soon as one can exploit the unlabeled data in order to produce a better classifier than with labeled data alone. However, the conditions under which such an improvement is possible are not fully understood yet. Our analysis focuses on improvements in the minimax learning rate in terms of the number of labeled examples (with the number of unlabeled examples being allowed to depend on the number of labeled ones). We argue that for such improvements to be realistic and indisputable, certain specific conditions should be satisfied and previous analyses have failed to meet those conditions. We then demonstrate examples where these conditions can be met, in particular showing rate changes from 1/√ℓ to e^−cℓ and from 1/√ℓ to 1/ℓ. These results improve our understanding of what is and isn't possible in semi-supervised learning.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In semi-supervised classification, one is given access both to labeled and unlabeled data. As unlabeled data is typically cheaper to acquire than labeled data, this setup becomes advantageous as soon as one can exploit the unlabeled data in order to produce a better classifier than with labeled data alone. However, the conditions under which such an improvement is possible are not fully understood yet. Our analysis focuses on improvements in the minimax learning rate in terms of the number of labeled examples (with the number of unlabeled examples being allowed to depend on the number of labeled ones). We argue that for such improvements to be realistic and indisputable, certain specific conditions should be satisfied and previous analyses have failed to meet those conditions. We then demonstrate examples where these conditions can be met, in particular showing rate changes from 1/√ℓ to e^−cℓ and from 1/√ℓ to 1/ℓ. These results improve our understanding of what is and isn't possible in semi-supervised learning. |
Eiteneuer, Benedikt; Hranisavljevic, Nemanja; Niggemann, Oliver Dimensionality Reduction and Anomaly Detection for CPPS Data using Autoencoder Artikel International Conference on Industrial Technology, 2019. @article{eiteneuer2019dimensionality, title = {Dimensionality Reduction and Anomaly Detection for CPPS Data using Autoencoder}, author = {Benedikt Eiteneuer and Nemanja Hranisavljevic and Oliver Niggemann}, url = {https://ieeexplore.ieee.org/document/8755116}, doi = {10.1109/ICIT.2019.8755116}, year = {2019}, date = {2019-01-01}, journal = {International Conference on Industrial Technology}, abstract = {Unsupervised anomaly detection (AD) is a major topic in the field of Cyber-Physical Production Systems (CPPSs). A closely related concern is dimensionality reduction (DR) which is: 1) often used as a preprocessing step in an AD solution, 2) a sort of AD, if a measure of observation conformity to the learned data manifold is provided. We argue that the two aspects can be complementary in a CPPS anomaly detection solution. In this work, we focus on the nonlinear autoencoder (AE) as a DR/AD approach. The contribution of this work is: 1) we examine the suitability of AE reconstruction error as an AD decision criterion in CPPS data. 2) we analyze its relation to a potential second-phase AD approach in the AE latent space 3) we evaluate the performance of the approach on three real-world datasets. Moreover, the approach outperforms state-of-the-art techniques, alongside a relatively simple and straightforward application.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Unsupervised anomaly detection (AD) is a major topic in the field of Cyber-Physical Production Systems (CPPSs). A closely related concern is dimensionality reduction (DR) which is: 1) often used as a preprocessing step in an AD solution, 2) a sort of AD, if a measure of observation conformity to the learned data manifold is provided. We argue that the two aspects can be complementary in a CPPS anomaly detection solution. In this work, we focus on the nonlinear autoencoder (AE) as a DR/AD approach. The contribution of this work is: 1) we examine the suitability of AE reconstruction error as an AD decision criterion in CPPS data. 2) we analyze its relation to a potential second-phase AD approach in the AE latent space 3) we evaluate the performance of the approach on three real-world datasets. Moreover, the approach outperforms state-of-the-art techniques, alongside a relatively simple and straightforward application. |