from sklearn.feature_extraction.text import CountVectorizer
import joblib
import topicwizard
from sklearn.decomposition import NMF
from sklearn.pipeline import make_pipeline

'''
isntallation:
pip install topic-wizard

'''

if __name__ == '__main__':
    vectorizer = CountVectorizer(min_df=5, max_df=0.8, stop_words="english")

    model = NMF(n_components=10)

    topic_pipeline = make_pipeline(vectorizer, model)

    from sklearn.datasets import fetch_20newsgroups
    newsgroups = fetch_20newsgroups(subset="all")
    corpus = newsgroups.data
    # Sklearn gives the labels back as integers, we have to map them back to
    # the actual textual label.
    group_labels = [newsgroups.target_names[label] for label in newsgroups.target]

    topic_pipeline.fit(corpus)

    print("launching topicwizard visualizer")
    topicwizard.visualize(corpus, model=topic_pipeline)

    exit(0)