from database_preparation.utils_stringpreparation import read_german_text
import re

def get3parts(t_file):

    #%% load the text
    t_text = read_german_text(t_file)

    #%% define codon-find function
    def find_codon(text, word_list):
        codon = 'XENOTARSOSAURUS'
        for i_word in word_list:
            if text.find(i_word) > -1:
                codon = i_word

        return codon

    #%% get start codons for description and diagnosis
    # thinking that one pathologist sticks to his/her wording
    start_codon_description = find_codon(t_text,
                                         ['Lichtmikroskopie:', 'Mikroskopie:', "Histologie:", "Klinische Angaben:", "Wir erhielten ", "Eingesandt wurde:"])

    start_codon_2nd = find_codon(t_text,
                                 ["Nachbericht", "Immunhistochemie"])

    start_codon_conclusion = find_codon(t_text,
                                        ["Beurteilung:", "Begutachtung:"])
    # Vorläufige Beurteilung gemäß der Gefrierschnittführung: .... Beurteilung am Paraffinmaterial:

    start_codon_comment = find_codon(t_text,
                                        ["Kommentar"])

    if t_text.find("Mit freundlichen") > -1:
        start_codon_greetings = "Mit freundlichen"
    else:
        start_codon_greetings = "Prof."

    #%% set the stop codons and prepare the function
    # like on DNA, the next start codon is a stop codon
    stop_codon_list = [start_codon_conclusion, start_codon_description,
                       start_codon_comment, start_codon_greetings, start_codon_2nd]

    from database_preparation.utils_stringpreparation import regexp

    def get_codon_idx(text, start_codon, stop_codon_list):

        _, idx_start = regexp(start_codon, text)

        idx_stop = []
        for i_idx_start in idx_start:

            idx_stop_list = []
            for i_stop_codon in stop_codon_list:
                if not i_stop_codon == start_codon:
                    idx_stop_list.append(text[i_idx_start:].find(i_stop_codon))

            idx_stop_list = [item for item in idx_stop_list if item >= 0]
            idx_stop.append(min(idx_stop_list) + i_idx_start)

        return idx_start, idx_stop

    #%% find the indices for the text-frames
    start_description, stop_description = get_codon_idx(t_text,
                                                             start_codon_description,
                                                             stop_codon_list)

    start_2nd, stop_2nd = get_codon_idx(t_text,
                                            start_codon_2nd,
                                            stop_codon_list)

    start_conclusion, stop_clonclusion = get_codon_idx(t_text,
                                                             start_codon_conclusion,
                                                             stop_codon_list)

    #%% get the text parts
    def get_text_frame(idx_start_list, idx_stop_list, text):
        t_frame = []
        for i in range(0, len(idx_start_list)):
            t_frame.append(text[idx_start_list[i]:idx_stop_list[i]])

        return t_frame

    txt_micro = get_text_frame(start_description,stop_description, t_text)

    txt_2nd = get_text_frame(start_2nd, stop_2nd, t_text)

    txt_conclusion= get_text_frame(start_conclusion,stop_clonclusion, t_text)

    #%% finalise the text
    txt_conclusion = txt_conclusion[-1]
    if not txt_2nd == []:
        txt_micro = str(txt_micro[0]) + str(txt_2nd[0])
    else:
        txt_micro = str(txt_micro[0])

    # get greetings-section:
    start_greedingsindex = t_text.find(start_codon_greetings)
    txt_greetings = None
    if start_greedingsindex != -1:
        txt_greetings = t_text[start_greedingsindex:]

    return txt_micro, txt_conclusion, txt_greetings