import React, {useContext, useEffect} from 'react'
import ReportDatasets from "./ReportDatasets"
import {Alert, Collapse, Table, Tag} from 'antd';
import "./style/style.css"
import SyntaxHighlighter from 'react-syntax-highlighter';
import GlobalContext from "contexts/GlobalContext";
import {ModalPasswordRessourceProvider} from 'components/Utils/ModalPasswordRessource';

const {Panel} = Collapse;

const ReportDatasetsContainer = () => {
    const {codeStyle, VilmedicTag} = useContext(GlobalContext);

    useEffect(() => {
    }, [])

    const data = [
        {
            title: 'MIMIC-CXR',
            image_entry: '/datasets/images#MIMIC-CXR',
            resource_name: 'mimic-cxr-reports',
            func_name: 'modal_password_ressource',
            resource_source: 'physionet',
            tags: [<Tag color="blue">RRG</Tag>, <Tag color="blue">RRS</Tag>,
                <Tag color="orange">Physionet Access</Tag>, <Tag color="green">Official splits</Tag>],
            content: (
                <Table
                    dataSource={[
                        {
                            key: 'mimic-cxr-findings-RRG',
                            task: 'RRG-findings',
                            train: "152,173",
                            val: "1,196",
                            test: "2,347"
                        },
                        {
                            key: 'mimic-cxr-impression-RRG',
                            task: 'RRG-impression',
                            train: "185,816",
                            val: "1,521",
                            test: "2,224"
                        },
                        {
                            key: 'mimic-cxr-reports-RRS',
                            task: 'RRS',
                            train: "125,417",
                            val: "991",
                            test: "1,624"
                        }
                    ]}
                    columns={[
                        {
                            title: 'Task',
                            dataIndex: 'task',
                            key: 'task',
                        },
                        {
                            title: 'Train',
                            dataIndex: 'train',
                            key: 'train',
                        },
                        {
                            title: 'Validation',
                            dataIndex: 'val',
                            key: 'val',
                        },
                        {
                            title: 'Test',
                            dataIndex: 'test',
                            key: 'test',
                        },
                    ]}
                    pagination={false}
                />

            ),
            bibtex_entry: '@article{johnson2019mimic,\n' +
                '  title={MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports},\n' +
                '  author={Johnson, Alistair EW and Pollard, Tom J and Berkowitz, Seth J and Greenbaum, Nathaniel R and Lungren, Matthew P and Deng, Chih-ying and Mark, Roger G and Horng, Steven},\n' +
                '  journal={Scientific data},\n' +
                '  volume={6},\n' +
                '  number={1},\n' +
                '  pages={317},\n' +
                '  year={2019},\n' +
                '  publisher={Nature Publishing Group UK London}\n' +
                '}',
        },
        {
            title: 'CANDID-PTX',
            image_entry: '/datasets/images#CANDID-PTX',
            resource_name: 'candid-ptx-reports',
            func_name: 'directDownloadDataset',
            tags: [<Tag color="blue">RRG</Tag>, <Tag color="orange">Open Access</Tag>],
            content: (
                <Table
                    dataSource={[
                        {
                            key: 'candid-ptx-impression-RRG',
                            task: 'RRG-impression',
                            train: "",
                            val: "",
                            test: "18,306"
                        },
                    ]}
                    columns={[
                        {
                            title: 'Task',
                            dataIndex: 'task',
                            key: 'task',
                        },
                        {
                            title: 'Train',
                            dataIndex: 'train',
                            key: 'train',
                        },
                        {
                            title: 'Validation',
                            dataIndex: 'val',
                            key: 'val',
                        },
                        {
                            title: 'Test',
                            dataIndex: 'test',
                            key: 'test',
                        },
                    ]}
                    pagination={false}
                />

            ),
            bibtex_entry: '@article{Feng2021,\n' +
                'author = "Sijing Feng and Damian Azzollini and Ji Soo Kim and Cheng Kai Jin and Eve Kim and Simon Gordon and Jason Yeoh and Min A Han and Andrew Lee and Aakash Patel and Martin Urschler and Amy Fong and Cameron Simmers and Gregory Tarr and Stuart Barnard and Ben Wilson",\n' +
                'title = "{CANDID-PTX}",\n' +
                'year = "2021",\n' +
                'month = "6",\n' +
                'url = "https://auckland.figshare.com/articles/dataset/CANDID-PTX/14173982",\n' +
                'doi = "10.17608/k6.auckland.14173982"\n' +
                '}',
        },
        {
            title: 'Chexpert',
            image_entry: '/datasets/images#Chexpert',
            resource_name: 'chexpert-reports',
            func_name: 'modal_password_ressource',
            resource_source: 'vilmedic',
            tags: [<Tag color="blue">RRG</Tag>, <Tag color="blue">RRS</Tag>, <Tag color="red">Closed Access</Tag>,
                <Tag color="green">Official splits</Tag>],
            content: (
                <Table
                    dataSource={[
                        {
                            key: 'RRG-chexpert-impression',
                            task: 'RRG-impression',
                            train: "187,616",
                            val: "200",
                            test: "500"
                        },
                        {
                            key: 'RRG-chexpert-findings',
                            task: 'RRG-findings',
                            train: "46,616",
                            val: "61",
                            test: "152"
                        },
                        {
                            key: 'RRS',
                            task: 'RRS',
                            train: "46,609",
                            val: "61",
                            test: "125"
                        }
                    ]}
                    columns={[
                        {
                            title: 'Task',
                            dataIndex: 'task',
                            key: 'task',
                        },
                        {
                            title: 'Train',
                            dataIndex: 'train',
                            key: 'train',
                        },
                        {
                            title: 'Validation',
                            dataIndex: 'val',
                            key: 'val',
                        },
                        {
                            title: 'Test',
                            dataIndex: 'test',
                            key: 'test',
                        },
                    ]}
                    pagination={false}
                />

            ),
            bibtex_entry: '@inproceedings{irvin2019chexpert,\n' +
                '  title={Chexpert: A large chest radiograph dataset with uncertainty labels and expert comparison},\n' +
                '  author={Irvin, Jeremy and Rajpurkar, Pranav and Ko, Michael and Yu, Yifan and Ciurea-Ilcus, Silviana and Chute, Chris and Marklund, Henrik and Haghgoo, Behzad and Ball, Robyn and Shpanskaya, Katie and others},\n' +
                '  booktitle={Proceedings of the AAAI conference on artificial intelligence},\n' +
                '  volume={33},\n' +
                '  number={01},\n' +
                '  pages={590--597},\n' +
                '  year={2019}\n' +
                '}',
        },
        {
            title: 'PadChest',
            image_entry: '/datasets/images#PadChest',
            resource_name: 'padchest-reports',
            func_name: 'modal_password_ressource',
            resource_source: 'vilmedic',
            tags: [<Tag color="blue">RRG</Tag>, <Tag color="red">Closed Access</Tag>],
            content: (
                <Table
                    dataSource={[
                        {
                            key: 'RRG-padchest-impression-es',
                            task: 'RRG-reports (original, spanish)',
                            train: "",
                            val: "",
                            test: "109,801"
                        },
                        {
                            key: 'RRG-padchest-impression-en',
                            task: 'RRG-reports (english)',
                            train: "",
                            val: "",
                            test: "109,801"
                        }
                    ]}
                    columns={[
                        {
                            title: 'Task',
                            dataIndex: 'task',
                            key: 'task',
                        },
                        {
                            title: 'Train',
                            dataIndex: 'train',
                            key: 'train',
                        },
                        {
                            title: 'Validation',
                            dataIndex: 'val',
                            key: 'val',
                        },
                        {
                            title: 'Test',
                            dataIndex: 'test',
                            key: 'test',
                        },
                    ]}
                    pagination={false}
                />

            ),
            comments: 'Reports have been translated to English using GPT-4',
            processing: (
                <Collapse ghost>
                    <Panel header="Processing code" key="1">
                        <SyntaxHighlighter
                            customStyle={{textAlign: "left"}}
                            language="python"
                            style={codeStyle}>
                            {'import pandas as pd\n' +
                                'import re\n' +
                                'import json\n' +
                                '\n' +
                                '\n' +
                                'def replace_multiple_dots(s):\n' +
                                '    s = s.strip()\n' +
                                '    # Replace sequences of spaces and dots with a single dot\n' +
                                '    s = re.sub(r\'([. ]+\\. *)\', \'. \', s)\n' +
                                '\n' +
                                '    # Remove the dot at the beginning of the sentence if followed by a space\n' +
                                '    if s.startswith(". "):\n' +
                                '        s = s[2:]\n' +
                                '\n' +
                                '    # Remove the dot at the beginning of the sentence if not followed by a space\n' +
                                '    if s.startswith("."):\n' +
                                '        s = s[1:]\n' +
                                '    s = s.strip()\n' +
                                '\n' +
                                '    # Add a dot at the end if it doesn\'t exist\n' +
                                '    if not s.endswith("."):\n' +
                                '        s += "."\n' +
                                '\n' +
                                '    return s\n' +
                                '\n' +
                                '\n' +
                                'def replace_multiple_spaces(input_string):\n' +
                                '    # Use regular expression to replace multiple spaces with a single space\n' +
                                '    return re.sub(r\'\\s+\', \' \', input_string)\n' +
                                '\n' +
                                '\n' +
                                '# Step 1: Load the CSV files into pandas DataFrames\n' +
                                'df1 = pd.read_csv(\'PADCHEST_chest_x_ray_images_labels_160K_01.02.19.csv\')\n' +
                                'df2 = pd.read_csv(\'report_sentences_cleaned.csv\')\n' +
                                '\n' +
                                '# Step 2: Group by StudyID and check if Labels and ReportID are identical\n' +
                                'grouped = df1.groupby(\'StudyID\')\n' +
                                '\n' +
                                '# Lists to hold the final results\n' +
                                'image_list, report_list = [], []\n' +
                                '\n' +
                                'for _, group in grouped:\n' +
                                '    if not (len(group[\'ReportID\'].dropna().unique()) == 1):\n' +
                                '        raise Exception\n' +
                                '\n' +
                                '    report = df2[df2[\'codigoinforme\'] == group[\'ReportID\'].iloc[0]][\'v_clean\'].values\n' +
                                '    if len(report) == 1:\n' +
                                '        if isinstance(report[0], str):  # Make sure the report is a string\n' +
                                '            report = replace_multiple_spaces(replace_multiple_dots(report[0])).strip()\n' +
                                '            if report.strip() == "." or report.strip() == "i." or report.strip() == "impreion." or report.strip() == "de.":\n' +
                                '                print(f"Warning: Report for ReportID {group[\'ReportID\'].iloc[0]} is empty:{report[0]}")\n' +
                                '                continue\n' +
                                '            report_list.append(report)\n' +
                                '            image_list.append(\',\'.join(group[\'ImageID\']))\n' +
                                '        else:\n' +
                                '            print(f"Warning: Report for ReportID {group[\'ReportID\'].iloc[0]} is not a string. It\'s {report[0]}")\n' +
                                '            continue\n' +
                                '    else:\n' +
                                '        print(f"Error: No mapping found for ReportID {group[\'ReportID\'].iloc[0]}")\n' +
                                '        continue\n' +
                                '\n' +
                                '# Step 4: Write results to three files\n' +
                                'with open(\'image.tok\', \'w\') as f:\n' +
                                '    f.write(\'\\n\'.join(image_list))\n' +
                                '\n' +
                                'with open(\'report.es.tok\', \'w\') as f:\n' +
                                '    f.write(\'\\n\'.join(report_list))'}
                        </SyntaxHighlighter>
                    </Panel>
                </Collapse>
            ),
            bibtex_entry: '@article{bustos2020padchest,\n' +
                '  title={Padchest: A large chest x-ray image dataset with multi-label annotated reports},\n' +
                '  author={Bustos, Aurelia and Pertusa, Antonio and Salinas, Jose-Maria and De La Iglesia-Vaya, Maria},\n' +
                '  journal={Medical image analysis},\n' +
                '  volume={66},\n' +
                '  pages={101797},\n' +
                '  year={2020},\n' +
                '  publisher={Elsevier}\n' +
                '}',
        },
        {
            title: 'Openi-indiana-university',
            image_entry: '/datasets/images#Openi-indiana-university',
            resource_name: 'Openi-indiana-university-reports',
            func_name: 'directDownloadDataset',
            tags: [<Tag color="blue">RRG</Tag>, <Tag color="orange">Open Access</Tag>],
            content: (
                <Table
                    dataSource={[
                        {
                            key: 'RRG-openi-impression',
                            task: 'RRG-impression',
                            train: "",
                            val: "",
                            test: "3,336"
                        },
                        {
                            key: 'RRG-openi-findings',
                            task: 'RRG-findings',
                            train: "",
                            val: "",
                            test: "3,819"
                        },
                    ]}
                    columns={[
                        {
                            title: 'Task',
                            dataIndex: 'task',
                            key: 'task',
                        },
                        {
                            title: 'Train',
                            dataIndex: 'train',
                            key: 'train',
                        },
                        {
                            title: 'Validation',
                            dataIndex: 'val',
                            key: 'val',
                        },
                        {
                            title: 'Test',
                            dataIndex: 'test',
                            key: 'test',
                        },
                    ]}
                    pagination={false}
                />

            ),
            bibtex_entry: '@article{demner2012design,\n' +
                '  title={Design and development of a multimodal biomedical information retrieval system},\n' +
                '  author={Demner-Fushman, Dina and Antani, Sameer and Simpson, Matthew and Thoma, George R},\n' +
                '  journal={Journal of Computing Science and Engineering},\n' +
                '  volume={6},\n' +
                '  number={2},\n' +
                '  pages={168--177},\n' +
                '  year={2012},\n' +
                '  publisher={Demner-Fushman Dina; Antani Sameer; Simpson Matthew; Thoma George R.}\n' +
                '}',
        },
        {
            title: 'Intermountain',
            image_entry: '/datasets/images#Intermountain',
            resource_name: 'intermountain-reports',
            func_name: 'modal_password_ressource',
            resource_source: 'vilmedic',
            tags: [<Tag color="blue">RRG</Tag>, <Tag color="blue">RRS</Tag>, <Tag color="red">Closed Access</Tag>,
                <Tag color="green">Official splits</Tag>],
            content: (
                <Table
                    dataSource={[
                        {
                            key: 'RRG-Intermountain-impression',
                            task: 'RRG-impression',
                            source: 'ePneumonAI',
                            train: "6,360",
                            val: "",
                            test: ""
                        },
                        {
                            key: 'RRS-Intermountain',
                            task: 'RRS-findings',
                            source: 'ePneumonAI',
                            train: "6,029",
                            val: "",
                            test: ""
                        },
                        {
                            key: 'RRG-Intermountain-findings',
                            task: 'RRG-findings',
                            source: 'ePneumonAI',
                            train: "6,146",
                            val: "",
                            test: ""
                        },
                        {
                            key: 'RRG-Intermountain-impression2',
                            task: 'RRG-impression',
                            source: 'emergency',
                            train: "2,152",
                            val: "994",
                            test: "1,477"
                        },
                        {
                            key: 'RRG-Intermountain-findings2',
                            task: 'RRG-findings',
                            source: 'emergency',
                            train: "2,029",
                            val: "938",
                            test: "1,407"
                        },

                        {
                            key: 'RRS-Intermountain2',
                            task: 'RRS-findings',
                            source: 'emergency',
                            train: "2,020",
                            val: "934",
                            test: "1,385"
                        },
                    ]}
                    columns={[
                        {
                            title: 'Task',
                            dataIndex: 'task',
                            key: 'task',
                        },
                        {
                            title: 'Source',
                            dataIndex: 'source',
                            key: 'source',
                        },
                        {
                            title: 'Train',
                            dataIndex: 'train',
                            key: 'train',
                        },
                        {
                            title: 'Validation',
                            dataIndex: 'val',
                            key: 'val',
                        },
                        {
                            title: 'Test',
                            dataIndex: 'test',
                            key: 'test',
                        },
                    ]}
                    pagination={false}
                />

            ),
            bibtex_entry: '@article{demner2012design,\n' +
                '  title={Design and development of a multimodal biomedical information retrieval system},\n' +
                '  author={Demner-Fushman, Dina and Antani, Sameer and Simpson, Matthew and Thoma, George R},\n' +
                '  journal={Journal of Computing Science and Engineering},\n' +
                '  volume={6},\n' +
                '  number={2},\n' +
                '  pages={168--177},\n' +
                '  year={2012},\n' +
                '  publisher={Demner-Fushman Dina; Antani Sameer; Simpson Matthew; Thoma George R.}\n' +
                '}',
        },
        {
            title: 'LCRRS',
            resource_name: 'lcrrs-reports',
            func_name: 'modal_password_ressource',
            resource_source: 'vilmedic',
            tags: [<Tag color="blue">RRS</Tag>, <Tag color="red">Closed Access</Tag>,
                <Tag color="green">Official splits</Tag>],
            comments: 'Reports have been translated to English using GPT-4',
            content: (
                <Table
                    dataSource={[
                        {
                            key: 'RRS (Chinese)',
                            task: 'RRS (Chinese)',
                            train: "269,688",
                            val: "10,000",
                            test: "10,000"
                        },
                        {
                            key: 'RRS (English)',
                            task: 'RRS (English)',
                            train: "269,688",
                            val: "10,000",
                            test: "10,000"
                        }
                    ]}
                    columns={[
                        {
                            title: 'Task',
                            dataIndex: 'task',
                            key: 'task',
                        },
                        {
                            title: 'Train',
                            dataIndex: 'train',
                            key: 'train',
                        },
                        {
                            title: 'Validation',
                            dataIndex: 'val',
                            key: 'val',
                        },
                        {
                            title: 'Test',
                            dataIndex: 'test',
                            key: 'test',
                        },
                    ]}
                    pagination={false}
                />

            ),
            bibtex_entry: '',
        },
        {
            title: 'BIMCV-COVID19',
            image_entry: '/datasets/images#BIMCV-COVID19',
            resource_name: 'bimcv-covid19-reports',
            func_name: 'modal_password_ressource',
            resource_source: 'vilmedic',
            tags: [<Tag color="blue">RRG</Tag>, <Tag color="red">Closed Access</Tag>],
            content: (
                <Table
                    dataSource={[
                        {
                            key: 'RRG-BIMCV-COVID19-reports-es',
                            task: 'RRG-reports (original, spanish)',
                            train: "",
                            val: "",
                            test: "46,941"
                        },
                        {
                            key: 'RRG-BIMCV-COVID19-reports-en',
                            task: 'RRG-reports (english)',
                            train: "",
                            val: "",
                            test: "46,941"
                        }
                    ]}
                    columns={[
                        {
                            title: 'Task',
                            dataIndex: 'task',
                            key: 'task',
                        },
                        {
                            title: 'Train',
                            dataIndex: 'train',
                            key: 'train',
                        },
                        {
                            title: 'Validation',
                            dataIndex: 'val',
                            key: 'val',
                        },
                        {
                            title: 'Test',
                            dataIndex: 'test',
                            key: 'test',
                        },
                    ]}
                    pagination={false}
                />

            ),
            comments: 'Reports have been translated to English using GPT-4',
            processing: (
                <Collapse ghost>
                    <Panel header="Processing code" key="1">
                        <SyntaxHighlighter
                            customStyle={{textAlign: "left"}}
                            language="python"
                            style={codeStyle}>
                            {'import pandas as pd\n' +
                                'import numpy as np\n' +
                                'import re\n' +
                                '\n' +
                                '\n' +
                                'def remove_sentence_with_nhc(s):\n' +
                                '    sentences = \' \'.join(s.split())\n' +
                                '    sentences = sentences.split(\'.\')\n' +
                                '\n' +
                                '    # Remove PHI\n' +
                                '    sentences = [sentence.strip() for sentence in sentences if "nhc " not in sentence]\n' +
                                '    sentences = [sentence.strip() for sentence in sentences if "hc " not in sentence]\n' +
                                '    sentences = [sentence.strip() for sentence in sentences if "name name name " not in sentence]\n' +
                                '    sentences = [sentence.strip() for sentence in sentences if "name name " not in sentence]\n' +
                                '\n' +
                                '    # Join the processed sentences back into a string\n' +
                                '    result = \'. \'.join(sentences)\n' +
                                '    result = result.strip()\n' +
                                '    result = re.sub(r\'(\\. ?)+\\.\', \'.\', result)\n' +
                                '\n' +
                                '    # If the string does not end with a dot, add one\n' +
                                '    if result and not result.endswith(\'.\'):\n' +
                                '        result += \'.\'\n' +
                                '\n' +
                                '    if result[:2] == ". ":\n' +
                                '        result = result[2:]\n' +
                                '\n' +
                                '    return result.strip()\n' +
                                '\n' +
                                '\n' +
                                'all_reports = []\n' +
                                'all_images = []\n' +
                                'skipped = 0\n' +
                                '\n' +
                                'data = pd.read_csv("bimvc-covid/final_annotations.csv")\n' +
                                'data = data[data[\'report\'].apply(lambda x: isinstance(x, str) and x.strip() != "")]\n' +
                                'data.reset_index(drop=True, inplace=True)\n' +
                                'grouped = data.groupby([\'patient_id\', \'study_id\', \'report\', \'labels\'])\n' +
                                '\n' +
                                'for study_id, group in grouped:\n' +
                                '    reports = group[\'report\'].tolist()\n' +
                                '    # Check if all reports in the group are identical\n' +
                                '    if len(set(reports)) != 1:\n' +
                                '        print(reports)\n' +
                                '        print(f"Reports for study_id {study_id} are not identical!")\n' +
                                '        continue\n' +
                                '\n' +
                                '    report = reports[0]\n' +
                                '    processed_report = remove_sentence_with_nhc(report.strip())\n' +
                                '    if not processed_report or processed_report == "name.":\n' +
                                '        skipped += 1\n' +
                                '        continue\n' +
                                '\n' +
                                '    all_reports.append(processed_report)\n' +
                                '    images = [im for im in group[\'file_path\']]\n' +
                                '\n' +
                                '\n' +
                                '    # put PA and AP first\n' +
                                '    def custom_sort(item):\n' +
                                '        if "vp-pa" in item:\n' +
                                '            return 1\n' +
                                '        elif "vp-ap" in item:\n' +
                                '            return 2\n' +
                                '        else:\n' +
                                '            return 3\n' +
                                '\n' +
                                '\n' +
                                '    images = sorted(images, key=custom_sort)\n' +
                                '    all_images.append(",".join(images))\n' +
                                '\n' +
                                'open("test.report.es.tok", "w").write("\\n".join(all_reports))\n' +
                                'open("test.image.es.tok", "w").write("\\n".join(all_images))'}
                        </SyntaxHighlighter>
                    </Panel>
                </Collapse>
            ),
            bibtex_entry: "@article{vaya2020bimcv,\n" +
                "  title={BIMCV COVID-19+: a large annotated dataset of RX and CT images from COVID-19 patients},\n" +
                "  author={Vay{\\'a}, Maria De La Iglesia and Saborit, Jose Manuel and Montell, Joaquim Angel and Pertusa, Antonio and Bustos, Aurelia and Cazorla, Miguel and Galant, Joaquin and Barber, Xavier and Orozco-Beltr{\\'a}n, Domingo and Garc{\\'\\i}a-Garc{\\'\\i}a, Francisco and others},\n" +
                "  journal={arXiv preprint arXiv:2006.01174},\n" +
                "  year={2020}\n" +
                "}"
        },
    ]


    return (
        <ModalPasswordRessourceProvider>
            <ReportDatasets data={data}/>
        </ModalPasswordRessourceProvider>
    )
}

export default ReportDatasetsContainer