pyDNMFk is a software package for applying non-negative matrix factorization in a distributed fashion to large datasets. It has the ability to minimize the difference between reconstructed data and the original data through various norms (Frobenious, KL-divergence). Additionally, the Custom Clustering algorithm allows for automated determination for the number of Latent features. pyDNMFk is developed as part of the R&D 100 award wining SmartTensors project.
BibTeX:
@misc{rw2019timm,
author = {Manish Bhattarai,Ben Nebgen,Erik Skau,Maksim Eren,Gopinath Chennupati,Raviteja Vangara,Hristo Djidjev,John Patchett,Jim Ahrens,Boian ALexandrov},
title = {pyDNMFk: Python Distributed Non Negative Matrix Factorization},
year = {2021},
publisher = {GitHub},
journal = {GitHub repository},
doi = {10.5281/zenodo.4722448},
howpublished = {\url{https://github.com/lanl/pyDNMFk}}
}
@inproceedings{bhattarai2020distributed,
title={Distributed Non-Negative Tensor Train Decomposition},
author={Bhattarai, Manish and Chennupati, Gopinath and Skau, Erik and Vangara, Raviteja and Djidjev, Hristo and Alexandrov, Boian S},
booktitle={2020 IEEE High Performance Extreme Computing Conference (HPEC)},
pages={1--10},
year={2020},
organization={IEEE}
}
@inproceedings {s.20211055,
booktitle = {EuroVis 2021 - Short Papers},
editor = {Agus, Marco and Garth, Christoph and Kerren, Andreas},
title = {{Selection of Optimal Salient Time Steps by Non-negative Tucker Tensor Decomposition}},
author = {Pulido, Jesus and Patchett, John and Bhattarai, Manish and Alexandrov, Boian and Ahrens, James},
year = {2021},
publisher = {The Eurographics Association},
ISBN = {978-3-03868-143-4},
DOI = {10.2312/evs.20211055}
}
@article{chennupati2020distributed,
title={Distributed non-negative matrix factorization with determination of the number of latent features},
author={Chennupati, Gopinath and Vangara, Raviteja and Skau, Erik and Djidjev, Hristo and Alexandrov, Boian},
journal={The Journal of Supercomputing},
pages={1--31},
year={2020},
publisher={Springer}
}