import os, sys # params: path2corpus_bow_preprocessed_diagnosis = 'data/bow_prepro_diag.pkl' path2corpus_embedding_preprocessed_diagnosis = 'data/embedding_prepro_diag.pkl' # check if we are at correct working directory: workdir = os.getcwd() if not workdir[-len('nlp-in-diagnostic-texts-from-nephropathology'):] == 'nlp-in-diagnostic-texts-from-nephropathology': print(workdir + " is the wrong working directory.") print("please make shure to run this script with working directory '.../path/to/nlp-in-diagnostic-texts-from-nephropathology'.") exit(1) # Construct clustering pipeline. This is a suggestion how to use all the scripts. # I also recommend to run each clustering script one by on to fintune the clusterings (with argument --find_k_value) script_queue = [ f"python TextClustering/basedOn_BOW/kmeans_Diagnosis.py --path2corpus {path2corpus_bow_preprocessed_diagnosis} --k_value {10}", f"python TextClustering/basedOn_BOW/LDA_Diagnosis.py --path2corpus {path2corpus_bow_preprocessed_diagnosis} --k_value {12}", f"python TextClustering/basedOn_BOW/HDBSCAN_Diagnosis.py --path2corpus {path2corpus_bow_preprocessed_diagnosis} --k_value {10}", f"python TextClustering/basedOn_BOW/GSDPMM_Diagnosis.py --path2corpus {path2corpus_bow_preprocessed_diagnosis} --k_value {14}", f"python TextClustering/basedOn_Embedding/BERT_Diagnosis.py --path2corpus {path2corpus_embedding_preprocessed_diagnosis} --do_embedding --model2use German_BERT --k_value {17}", f"python TextClustering/basedOn_Embedding/BERT_Diagnosis.py --path2corpus {path2corpus_embedding_preprocessed_diagnosis} --do_embedding --model2use Patho_BERT --k_value {8}", f"python TextClustering/basedOn_Embedding/top2vec_Diagnosis.py --path2corpus {path2corpus_embedding_preprocessed_diagnosis} --model2use doc2vec --k_value {7}", "python TextClustering/cluster_scores2latextable.py", "python TextClustering/plot_clustersets.py", "python TextClustering/generate_topicwords.py", "python TextClustering/clusterset_histos.py" ] for script in script_queue: print("\n########################################### executing ###########################################") print(script) print("####################################################################################################\n") os.system(script)