from sklearn.feature_extraction.text import CountVectorizer import joblib import topicwizard from sklearn.decomposition import NMF from sklearn.pipeline import make_pipeline ''' isntallation: pip install topic-wizard ''' if __name__ == '__main__': vectorizer = CountVectorizer(min_df=5, max_df=0.8, stop_words="english") model = NMF(n_components=10) topic_pipeline = make_pipeline(vectorizer, model) from sklearn.datasets import fetch_20newsgroups newsgroups = fetch_20newsgroups(subset="all") corpus = newsgroups.data # Sklearn gives the labels back as integers, we have to map them back to # the actual textual label. group_labels = [newsgroups.target_names[label] for label in newsgroups.target] topic_pipeline.fit(corpus) print("launching topicwizard visualizer") topicwizard.visualize(corpus, model=topic_pipeline) exit(0)