import React, {useContext, useState} from 'react'
import {
    Space,
    Input,
    Table,
    message,
    Modal,
    Spin,
    Alert,
    Button,
    Typography,
} from 'antd';
import SyntaxHighlighter from "react-syntax-highlighter";
import GlobalContext from "contexts/GlobalContext";


const {TextArea} = Input;
const {Paragraph, Link} = Typography;

const bucket_local_bucket = 'import os\n' +
    'from google.cloud import storage\n' +
    'from tqdm import tqdm\n' +
    'from concurrent.futures import ThreadPoolExecutor\n' +
    '\n' +
    '\n' +
    '# pour etre loggué en python sur un bucket:\n' +
    '# gcloud auth application-default login --no-launch-browser\n' +
    '# gcloud config set project som-nero-phi-mlungren-lab\n' +
    '# l\'autre est via le json\n' +
    '\n' +
    '\n' +
    'def transfer_blob(src_blob, src_bucket, dest_bucket):\n' +
    '    # Check if the blob exists in the destination bucket\n' +
    '    existing_blob = dest_bucket.get_blob(src_blob.name)\n' +
    '\n' +
    '    # Compare the blob size and decide whether to transfer\n' +
    '    if existing_blob is None or existing_blob.size != src_blob.size:\n' +
    '        # 1. Create necessary directories before downloading the blob\n' +
    '        file_path = os.path.join("./", src_blob.name)\n' +
    '        if not os.path.exists(os.path.dirname(file_path)):\n' +
    '            os.makedirs(os.path.dirname(file_path))\n' +
    '            print(f"Created directory {os.path.dirname(file_path)}.")\n' +
    '\n' +
    '        # 2. Download the blob to your computer\n' +
    '        src_blob.download_to_filename(file_path)\n' +
    '        print(f"Downloaded {src_blob.name} to {file_path}.")\n' +
    '\n' +
    '        # 3. Upload the blob to the target bucket\n' +
    '        dest_blob = dest_bucket.blob(src_blob.name)\n' +
    '        dest_blob.upload_from_filename(file_path)\n' +
    '        print(f"Uploaded {file_path} to {dest_bucket.name}/{src_blob.name}.")\n' +
    '\n' +
    '        # 4. Delete the file from your computer\n' +
    '        os.remove(file_path)\n' +
    '        print(f"Deleted local file {file_path}.")\n' +
    '\n' +
    '    else:\n' +
    '        print(f"{src_blob.name} already exists with the same size in {dest_bucket.name}.")\n' +
    '\n' +
    '\n' +
    'def transfer_bucket_data(src_bucket_name, dest_bucket_name, dest_credentials, max_threads=10):\n' +
    '    # Initialize the GCS client for the source bucket\n' +
    '    storage_client = storage.Client()\n' +
    '    src_bucket = storage_client.get_bucket(src_bucket_name)\n' +
    '\n' +
    '    # Initialize the GCS client for the destination bucket\n' +
    '    dest_client = storage.Client.from_service_account_json(dest_credentials)\n' +
    '    dest_bucket = dest_client.get_bucket(dest_bucket_name)\n' +
    '\n' +
    '    # Use ThreadPoolExecutor for concurrent transfers\n' +
    '    with ThreadPoolExecutor(max_threads) as executor:\n' +
    '        futures = [executor.submit(transfer_blob, src_blob, src_bucket, dest_bucket) for src_blob in\n' +
    '                   tqdm(src_bucket.list_blobs(), total=582501)]\n' +
    '\n' +
    '        # Use tqdm to show progress of concurrent tasks completion\n' +
    '        for _ in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):\n' +
    '            pass\n' +
    '\n' +
    '\n' +
    '# Call the function to transfer data between the buckets\n' +
    'transfer_bucket_data(\'pe_ct_scans\', \'ct_pe_study_backup\', \'som-radiology-generation-14df7b429b8e.json\')\n'


const make_batch_gpt = 'import json\n' +
    'import pandas as pd\n' +
    'import tqdm\n' +
    'import re\n' +
    '\n' +
    'reports = [r.strip() for r in open("/home/jb/Documents/images/old/RRG/padchest/report.tok").readlines()]\n' +
    'print(len(reports))\n' +
    'reports = list(set(reports))\n' +
    'print(len(reports))\n' +
    'jsonl = []\n' +
    'for sentence in reports:\n' +
    '    model = "gpt-4"\n' +
    '    s = {"model": model,\n' +
    '         "messages": [{"role": "user",\n' +
    '                       "content": "Translate this chest x-ray radiology report from Spanish to English: {} \\n"\n' +
    '                                  "Translation:".format(sentence)}],\n' +
    '         "temperature": 0,\n' +
    '         "n": 1,\n' +
    '         "metadata": {"original": sentence}\n' +
    '         }\n' +
    '    jsonl.append(s)\n' +
    '\n' +
    'open("/home/jb/Documents/images/example_requests_to_parallel_process.jsonl", "w").write(\n' +
    '    "\\n".join([json.dumps(j) for j in jsonl]))'

const batch_gpt = '"""\n' +
    'python api.py \\\n' +
    '  --requests_filepath example_requests_to_parallel_process.jsonl \\\n' +
    '  --request_url https://api.openai.com/v1/chat/completions \\\n' +
    '  --max_requests_per_minute 200 \\\n' +
    '  --max_tokens_per_minute 40000 \\\n' +
    '  --token_encoding_name cl100k_base \\\n' +
    '  --max_attempts 5 \\\n' +
    '  --api_key \n' +
    '"""\n' +
    '\n' +
    '"""\n' +
    'API REQUEST PARALLEL PROCESSOR\n' +
    '\n' +
    'Using the OpenAI API to process lots of text quickly takes some care.\n' +
    'If you trickle in a million API requests one by one, they\'ll take days to complete.\n' +
    'If you flood a million API requests in parallel, they\'ll exceed the rate limits and fail with errors.\n' +
    'To maximize throughput, parallel requests need to be throttled to stay under rate limits.\n' +
    '\n' +
    'This script parallelizes requests to the OpenAI API while throttling to stay under rate limits.\n' +
    '\n' +
    'Features:\n' +
    '- Streams requests from file, to avoid running out of memory for giant jobs\n' +
    '- Makes requests concurrently, to maximize throughput\n' +
    '- Throttles request and token usage, to stay under rate limits\n' +
    '- Retries failed requests up to {max_attempts} times, to avoid missing data\n' +
    '- Logs errors, to diagnose problems with requests\n' +
    '\n' +
    'Example command to call script:\n' +
    '```\n' +
    'python examples/api_request_parallel_processor.py \\\n' +
    '  --requests_filepath examples/data/example_requests_to_parallel_process.jsonl \\\n' +
    '  --save_filepath examples/data/example_requests_to_parallel_process_results.jsonl \\\n' +
    '  --request_url https://api.openai.com/v1/embeddings \\\n' +
    '  --max_requests_per_minute 1500 \\\n' +
    '  --max_tokens_per_minute 6250000 \\\n' +
    '  --token_encoding_name cl100k_base \\\n' +
    '  --max_attempts 5 \\\n' +
    '  --logging_level 20\n' +
    '```\n' +
    '\n' +
    'Inputs:\n' +
    '- requests_filepath : str\n' +
    '    - path to the file containing the requests to be processed\n' +
    '    - file should be a jsonl file, where each line is a json object with API parameters and an optional metadata field\n' +
    '    - e.g., {"model": "text-embedding-ada-002", "input": "embed me", "metadata": {"row_id": 1}}\n' +
    '    - as with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically)\n' +
    '    - an example file is provided at examples/data/example_requests_to_parallel_process.jsonl\n' +
    '    - the code to generate the example file is appended to the bottom of this script\n' +
    '- save_filepath : str, optional\n' +
    '    - path to the file where the results will be saved\n' +
    '    - file will be a jsonl file, where each line is an array with the original request plus the API response\n' +
    '    - e.g., [{"model": "text-embedding-ada-002", "input": "embed me"}, {...}]\n' +
    '    - if omitted, results will be saved to {requests_filename}_results.jsonl\n' +
    '- request_url : str, optional\n' +
    '    - URL of the API endpoint to call\n' +
    '    - if omitted, will default to "https://api.openai.com/v1/embeddings"\n' +
    '- api_key : str, optional\n' +
    '    - API key to use\n' +
    '    - if omitted, the script will attempt to read it from an environment variable {os.getenv("OPENAI_API_KEY")}\n' +
    '- max_requests_per_minute : float, optional\n' +
    '    - target number of requests to make per minute (will make less if limited by tokens)\n' +
    '    - leave headroom by setting this to 50% or 75% of your limit\n' +
    '    - if requests are limiting you, try batching multiple embeddings or completions into one request\n' +
    '    - if omitted, will default to 1,500\n' +
    '- max_tokens_per_minute : float, optional\n' +
    '    - target number of tokens to use per minute (will use less if limited by requests)\n' +
    '    - leave headroom by setting this to 50% or 75% of your limit\n' +
    '    - if omitted, will default to 125,000\n' +
    '- token_encoding_name : str, optional\n' +
    '    - name of the token encoding used, as defined in the `tiktoken` package\n' +
    '    - if omitted, will default to "cl100k_base" (used by `text-embedding-ada-002`)\n' +
    '- max_attempts : int, optional\n' +
    '    - number of times to retry a failed request before giving up\n' +
    '    - if omitted, will default to 5\n' +
    '- logging_level : int, optional\n' +
    '    - level of logging to use; higher numbers will log fewer messages\n' +
    '    - 40 = ERROR; will log only when requests fail after all retries\n' +
    '    - 30 = WARNING; will log when requests his rate limits or other errors\n' +
    '    - 20 = INFO; will log when requests start and the status at finish\n' +
    '    - 10 = DEBUG; will log various things as the loop runs to see when they occur\n' +
    '    - if omitted, will default to 20 (INFO).\n' +
    '\n' +
    'The script is structured as follows:\n' +
    '    - Imports\n' +
    '    - Define main()\n' +
    '        - Initialize things\n' +
    '        - In main loop:\n' +
    '            - Get next request if one is not already waiting for capacity\n' +
    '            - Update available token & request capacity\n' +
    '            - If enough capacity available, call API\n' +
    '            - The loop pauses if a rate limit error is hit\n' +
    '            - The loop breaks when no tasks remain\n' +
    '    - Define dataclasses\n' +
    '        - StatusTracker (stores script metadata counters; only one instance is created)\n' +
    '        - APIRequest (stores API inputs, outputs, metadata; one method to call API)\n' +
    '    - Define functions\n' +
    '        - api_endpoint_from_url (extracts API endpoint from request URL)\n' +
    '        - append_to_jsonl (writes to results file)\n' +
    '        - num_tokens_consumed_from_request (bigger function to infer token usage from request)\n' +
    '        - task_id_generator_function (yields 1, 2, 3, ...)\n' +
    '    - Run main()\n' +
    '"""\n' +
    '\n' +
    '# imports\n' +
    'import aiohttp  # for making API calls concurrently\n' +
    'import argparse  # for running script from command line\n' +
    'import asyncio  # for running API calls concurrently\n' +
    'import json  # for saving results to a jsonl file\n' +
    'import logging  # for logging rate limit warnings and other messages\n' +
    'import os  # for reading API key\n' +
    'import re  # for matching endpoint from request URL\n' +
    'import tiktoken  # for counting tokens\n' +
    'import time  # for sleeping after rate limit is hit\n' +
    'from dataclasses import dataclass, field  # for storing API inputs, outputs, and metadata\n' +
    '\n' +
    'PRICE = 0\n' +
    '\n' +
    '\n' +
    'async def process_api_requests_from_file(\n' +
    '        requests_filepath: str,\n' +
    '        save_filepath: str,\n' +
    '        request_url: str,\n' +
    '        api_key: str,\n' +
    '        max_requests_per_minute: float,\n' +
    '        max_tokens_per_minute: float,\n' +
    '        token_encoding_name: str,\n' +
    '        max_attempts: int,\n' +
    '        logging_level: int,\n' +
    '):\n' +
    '    """Processes API requests in parallel, throttling to stay under rate limits."""\n' +
    '    # constants\n' +
    '    seconds_to_pause_after_rate_limit_error = 15\n' +
    '    seconds_to_sleep_each_loop = 0.001  # 1 ms limits max throughput to 1,000 requests per second\n' +
    '\n' +
    '    # initialize logging\n' +
    '    logging.basicConfig(level=logging_level)\n' +
    '    logging.debug(f"Logging initialized at level {logging_level}")\n' +
    '\n' +
    '    # infer API endpoint and construct request header\n' +
    '    api_endpoint = api_endpoint_from_url(request_url)\n' +
    '    request_header = {"Authorization": f"Bearer {api_key}"}\n' +
    '    request_header = {"api-key": f"{api_key}"}\n' +
    '\n' +
    '    # initialize trackers\n' +
    '    queue_of_requests_to_retry = asyncio.Queue()\n' +
    '    task_id_generator = task_id_generator_function()  # generates integer IDs of 1, 2, 3, ...\n' +
    '    status_tracker = StatusTracker()  # single instance to track a collection of variables\n' +
    '    next_request = None  # variable to hold the next request to call\n' +
    '\n' +
    '    # initialize available capacity counts\n' +
    '    available_request_capacity = max_requests_per_minute\n' +
    '    available_token_capacity = max_tokens_per_minute\n' +
    '    last_update_time = time.time()\n' +
    '\n' +
    '    # initialize flags\n' +
    '    file_not_finished = True  # after file is empty, we\'ll skip reading it\n' +
    '    logging.debug(f"Initialization complete.")\n' +
    '\n' +
    '    # initialize file reading\n' +
    '    with open(requests_filepath) as file:\n' +
    '        # `requests` will provide requests one at a time\n' +
    '        requests = file.__iter__()\n' +
    '        logging.debug(f"File opened. Entering main loop")\n' +
    '\n' +
    '        while True:\n' +
    '            # get next request (if one is not already waiting for capacity)\n' +
    '            if next_request is None:\n' +
    '                if not queue_of_requests_to_retry.empty():\n' +
    '                    next_request = queue_of_requests_to_retry.get_nowait()\n' +
    '                    logging.debug(f"Retrying request {next_request.task_id}: {next_request}")\n' +
    '                elif file_not_finished:\n' +
    '                    try:\n' +
    '                        # get new request\n' +
    '                        request_json = json.loads(next(requests))\n' +
    '                        next_request = APIRequest(\n' +
    '                            task_id=next(task_id_generator),\n' +
    '                            request_json=request_json,\n' +
    '                            token_consumption=num_tokens_consumed_from_request(request_json, api_endpoint,\n' +
    '                                                                               token_encoding_name),\n' +
    '                            attempts_left=max_attempts,\n' +
    '                            metadata=request_json.pop("metadata", None)\n' +
    '                        )\n' +
    '                        status_tracker.num_tasks_started += 1\n' +
    '                        status_tracker.num_tasks_in_progress += 1\n' +
    '                        logging.debug(f"Reading request {next_request.task_id}: {next_request}")\n' +
    '                    except StopIteration:\n' +
    '                        # if file runs out, set flag to stop reading it\n' +
    '                        logging.debug("Read file exhausted")\n' +
    '                        file_not_finished = False\n' +
    '\n' +
    '            # update available capacity\n' +
    '            current_time = time.time()\n' +
    '            seconds_since_update = current_time - last_update_time\n' +
    '            available_request_capacity = min(\n' +
    '                available_request_capacity + max_requests_per_minute * seconds_since_update / 60.0,\n' +
    '                max_requests_per_minute,\n' +
    '            )\n' +
    '            available_token_capacity = min(\n' +
    '                available_token_capacity + max_tokens_per_minute * seconds_since_update / 60.0,\n' +
    '                max_tokens_per_minute,\n' +
    '            )\n' +
    '            last_update_time = current_time\n' +
    '\n' +
    '            # if enough capacity available, call API\n' +
    '            if next_request:\n' +
    '                next_request_tokens = next_request.token_consumption\n' +
    '                if (\n' +
    '                        available_request_capacity >= 1\n' +
    '                        and available_token_capacity >= next_request_tokens\n' +
    '                ):\n' +
    '                    # update counters\n' +
    '                    available_request_capacity -= 1\n' +
    '                    available_token_capacity -= next_request_tokens\n' +
    '                    next_request.attempts_left -= 1\n' +
    '\n' +
    '                    # call API\n' +
    '                    asyncio.create_task(\n' +
    '                        next_request.call_api(\n' +
    '                            request_url=request_url,\n' +
    '                            request_header=request_header,\n' +
    '                            retry_queue=queue_of_requests_to_retry,\n' +
    '                            save_filepath=save_filepath,\n' +
    '                            status_tracker=status_tracker,\n' +
    '                        )\n' +
    '                    )\n' +
    '                    time.sleep(0.5)\n' +
    '                    next_request = None  # reset next_request to empty\n' +
    '\n' +
    '            # if all tasks are finished, break\n' +
    '            if status_tracker.num_tasks_in_progress == 0:\n' +
    '                break\n' +
    '\n' +
    '            # main loop sleeps briefly so concurrent tasks can run\n' +
    '            await asyncio.sleep(seconds_to_sleep_each_loop)\n' +
    '\n' +
    '            # if a rate limit error was hit recently, pause to cool down\n' +
    '            seconds_since_rate_limit_error = (time.time() - status_tracker.time_of_last_rate_limit_error)\n' +
    '            if seconds_since_rate_limit_error < seconds_to_pause_after_rate_limit_error:\n' +
    '                remaining_seconds_to_pause = (seconds_to_pause_after_rate_limit_error - seconds_since_rate_limit_error)\n' +
    '                await asyncio.sleep(remaining_seconds_to_pause)\n' +
    '                # ^e.g., if pause is 15 seconds and final limit was hit 5 seconds ago\n' +
    '                logging.warn(\n' +
    '                    f"Pausing to cool down until {time.ctime(status_tracker.time_of_last_rate_limit_error + seconds_to_pause_after_rate_limit_error)}")\n' +
    '\n' +
    '        # after finishing, log final status\n' +
    '        logging.info(f"""Parallel processing complete. Results saved to {save_filepath}""")\n' +
    '        if status_tracker.num_tasks_failed > 0:\n' +
    '            logging.warning(\n' +
    '                f"{status_tracker.num_tasks_failed} / {status_tracker.num_tasks_started} requests failed. Errors logged to {save_filepath}.")\n' +
    '        if status_tracker.num_rate_limit_errors > 0:\n' +
    '            logging.warning(\n' +
    '                f"{status_tracker.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate.")\n' +
    '\n' +
    '\n' +
    '# dataclasses\n' +
    '\n' +
    '\n' +
    '@dataclass\n' +
    'class StatusTracker:\n' +
    '    """Stores metadata about the script\'s progress. Only one instance is created."""\n' +
    '\n' +
    '    num_tasks_started: int = 0\n' +
    '    num_tasks_in_progress: int = 0  # script ends when this reaches 0\n' +
    '    num_tasks_succeeded: int = 0\n' +
    '    num_tasks_failed: int = 0\n' +
    '    num_rate_limit_errors: int = 0\n' +
    '    num_api_errors: int = 0  # excluding rate limit errors, counted above\n' +
    '    num_other_errors: int = 0\n' +
    '    time_of_last_rate_limit_error: int = 0  # used to cool off after hitting rate limits\n' +
    '\n' +
    '\n' +
    '@dataclass\n' +
    'class APIRequest:\n' +
    '    """Stores an API request\'s inputs, outputs, and other metadata. Contains a method to make an API call."""\n' +
    '\n' +
    '    task_id: int\n' +
    '    request_json: dict\n' +
    '    token_consumption: int\n' +
    '    attempts_left: int\n' +
    '    metadata: dict\n' +
    '    result: list = field(default_factory=list)\n' +
    '\n' +
    '    async def call_api(\n' +
    '            self,\n' +
    '            request_url: str,\n' +
    '            request_header: dict,\n' +
    '            retry_queue: asyncio.Queue,\n' +
    '            save_filepath: str,\n' +
    '            status_tracker: StatusTracker,\n' +
    '    ):\n' +
    '        """Calls the OpenAI API and saves results."""\n' +
    '        logging.info(f"Starting request #{self.task_id}")\n' +
    '        error = None\n' +
    '        global PRICE  # indicate that you\'re using the global instance of PRICE\n' +
    '        try:\n' +
    '            async with aiohttp.ClientSession() as session:\n' +
    '                async with session.post(\n' +
    '                        url=request_url, headers=request_header, json=self.request_json\n' +
    '                ) as response:\n' +
    '                    response = await response.json()\n' +
    '            if "error" in response:\n' +
    '                logging.warning(\n' +
    '                    f"Request {self.task_id} failed with error {response[\'error\']}"\n' +
    '                )\n' +
    '                status_tracker.num_api_errors += 1\n' +
    '                error = response\n' +
    '                if "Rate limit" in response["error"].get("message", ""):\n' +
    '                    status_tracker.time_of_last_rate_limit_error = time.time()\n' +
    '                    status_tracker.num_rate_limit_errors += 1\n' +
    '                    status_tracker.num_api_errors -= 1  # rate limit errors are counted separately\n' +
    '\n' +
    '        except Exception as e:  # catching naked exceptions is bad practice, but in this case we\'ll log & save them\n' +
    '            logging.warning(f"Request {self.task_id} failed with Exception {e}")\n' +
    '            status_tracker.num_other_errors += 1\n' +
    '            error = e\n' +
    '        if error:\n' +
    '            self.result.append(error)\n' +
    '            if self.attempts_left:\n' +
    '                retry_queue.put_nowait(self)\n' +
    '            else:\n' +
    '                logging.error(f"Request {self.request_json} failed after all attempts. Saving errors: {self.result}")\n' +
    '                data = (\n' +
    '                    [self.request_json, [str(e) for e in self.result], self.metadata]\n' +
    '                    if self.metadata\n' +
    '                    else [self.request_json, [str(e) for e in self.result]]\n' +
    '                )\n' +
    '                append_to_jsonl(data, save_filepath)\n' +
    '                status_tracker.num_tasks_in_progress -= 1\n' +
    '                status_tracker.num_tasks_failed += 1\n' +
    '        else:\n' +
    '            print(response)\n' +
    '\n' +
    '            PRICE += response["usage"]["completion_tokens"] * (0.06 / 1000) + response["usage"]["prompt_tokens"] * (\n' +
    '                    0.03 / 1000)\n' +
    '            data = (\n' +
    '                [self.request_json, response, self.metadata]\n' +
    '                if self.metadata\n' +
    '                else [self.request_json, response]\n' +
    '            )\n' +
    '            append_to_jsonl(data, save_filepath)\n' +
    '            status_tracker.num_tasks_in_progress -= 1\n' +
    '            status_tracker.num_tasks_succeeded += 1\n' +
    '            logging.debug(f"Request {self.task_id} saved to {save_filepath}")\n' +
    '            logging.info("###################")\n' +
    '            logging.info(status_tracker.num_tasks_succeeded)\n' +
    '            logging.info(status_tracker.num_tasks_failed)\n' +
    '            logging.info(PRICE)\n' +
    '            logging.info("###################")\n' +
    '\n' +
    '\n' +
    '# functions\n' +
    '\n' +
    '\n' +
    'def api_endpoint_from_url(request_url):\n' +
    '    """Extract the API endpoint from the request URL."""\n' +
    '    # match = re.search(\'^https://[^/]+/v\\\\d+/(.+)$\', request_url)\n' +
    '    # return match[1]\n' +
    '    return request_url\n' +
    '\n' +
    '\n' +
    'def append_to_jsonl(data, filename: str) -> None:\n' +
    '    """Append a json payload to the end of a jsonl file."""\n' +
    '    json_string = json.dumps(data)\n' +
    '    with open(filename, "a") as f:\n' +
    '        f.write(json_string + "\\n")\n' +
    '\n' +
    '\n' +
    'def num_tokens_consumed_from_request(\n' +
    '        request_json: dict,\n' +
    '        api_endpoint: str,\n' +
    '        token_encoding_name: str,\n' +
    '):\n' +
    '    """Count the number of tokens in the request. Only supports completion and embedding requests."""\n' +
    '    encoding = tiktoken.get_encoding(token_encoding_name)\n' +
    '    # if completions request, tokens = prompt + n * max_tokens\n' +
    '    if api_endpoint.endswith("completions") or "completions" in api_endpoint:\n' +
    '        max_tokens = request_json.get("max_tokens", 15)\n' +
    '        n = request_json.get("n", 1)\n' +
    '        completion_tokens = n * max_tokens\n' +
    '\n' +
    '        # chat completions\n' +
    '        if api_endpoint.startswith("chat/") or "chat/" in api_endpoint:\n' +
    '            num_tokens = 0\n' +
    '            for message in request_json["messages"]:\n' +
    '                num_tokens += 4  # every message follows <im_start>{role/name}\\n{content}<im_end>\\n\n' +
    '                for key, value in message.items():\n' +
    '                    num_tokens += len(encoding.encode(value))\n' +
    '                    if key == "name":  # if there\'s a name, the role is omitted\n' +
    '                        num_tokens -= 1  # role is always required and always 1 token\n' +
    '            num_tokens += 2  # every reply is primed with <im_start>assistant\n' +
    '            return num_tokens + completion_tokens\n' +
    '        # normal completions\n' +
    '        else:\n' +
    '            prompt = request_json["prompt"]\n' +
    '            if isinstance(prompt, str):  # single prompt\n' +
    '                prompt_tokens = len(encoding.encode(prompt))\n' +
    '                num_tokens = prompt_tokens + completion_tokens\n' +
    '                return num_tokens\n' +
    '            elif isinstance(prompt, list):  # multiple prompts\n' +
    '                prompt_tokens = sum([len(encoding.encode(p)) for p in prompt])\n' +
    '                num_tokens = prompt_tokens + completion_tokens * len(prompt)\n' +
    '                return num_tokens\n' +
    '            else:\n' +
    '                raise TypeError(\'Expecting either string or list of strings for "prompt" field in completion request\')\n' +
    '    # if embeddings request, tokens = input tokens\n' +
    '    elif api_endpoint == "embeddings":\n' +
    '        input = request_json["input"]\n' +
    '        if isinstance(input, str):  # single input\n' +
    '            num_tokens = len(encoding.encode(input))\n' +
    '            return num_tokens\n' +
    '        elif isinstance(input, list):  # multiple inputs\n' +
    '            num_tokens = sum([len(encoding.encode(i)) for i in input])\n' +
    '            return num_tokens\n' +
    '        else:\n' +
    '            raise TypeError(\'Expecting either string or list of strings for "inputs" field in embedding request\')\n' +
    '    # more logic needed to support other API calls (e.g., edits, inserts, DALL-E)\n' +
    '    else:\n' +
    '        raise NotImplementedError(f\'API endpoint "{api_endpoint}" not implemented in this script\')\n' +
    '\n' +
    '\n' +
    'def task_id_generator_function():\n' +
    '    """Generate integers 0, 1, 2, and so on."""\n' +
    '    task_id = 0\n' +
    '    while True:\n' +
    '        yield task_id\n' +
    '        task_id += 1\n' +
    '\n' +
    '\n' +
    '# run script\n' +
    '\n' +
    '\n' +
    'if __name__ == "__main__":\n' +
    '    # parse command line arguments\n' +
    '    parser = argparse.ArgumentParser()\n' +
    '    parser.add_argument("--requests_filepath")\n' +
    '    parser.add_argument("--save_filepath", default=None)\n' +
    '    parser.add_argument("--request_url", default="https://api.openai.com/v1/embeddings")\n' +
    '    parser.add_argument("--api_key", default=os.getenv("OPENAI_API_KEY"))\n' +
    '    parser.add_argument("--max_requests_per_minute", type=int, default=3_000 * 0.5)\n' +
    '    parser.add_argument("--max_tokens_per_minute", type=int, default=250_000 * 0.5)\n' +
    '    parser.add_argument("--token_encoding_name", default="cl100k_base")\n' +
    '    parser.add_argument("--max_attempts", type=int, default=5)\n' +
    '    parser.add_argument("--logging_level", default=logging.INFO)\n' +
    '    args = parser.parse_args()\n' +
    '\n' +
    '    if args.save_filepath is None:\n' +
    '        args.save_filepath = args.requests_filepath.replace(".jsonl", "_results.jsonl")\n' +
    '\n' +
    '    # run script\n' +
    '    asyncio.run(\n' +
    '        process_api_requests_from_file(\n' +
    '            requests_filepath=args.requests_filepath,\n' +
    '            save_filepath=args.save_filepath,\n' +
    '            request_url=args.request_url,\n' +
    '            api_key=args.api_key,\n' +
    '            max_requests_per_minute=float(args.max_requests_per_minute),\n' +
    '            max_tokens_per_minute=float(args.max_tokens_per_minute),\n' +
    '            token_encoding_name=args.token_encoding_name,\n' +
    '            max_attempts=int(args.max_attempts),\n' +
    '            logging_level=int(args.logging_level),\n' +
    '        )\n' +
    '    )\n' +
    '\n' +
    '"""\n' +
    'APPENDIX\n' +
    '\n' +
    'The example requests file at openai-cookbook/examples/data/example_requests_to_parallel_process.jsonl contains 10,000 requests to text-embedding-ada-002.\n' +
    '\n' +
    'It was generated with the following code:\n' +
    '\n' +
    '```python\n' +
    'import json\n' +
    '\n' +
    'filename = "data/example_requests_to_parallel_process.jsonl"\n' +
    'n_requests = 10_000\n' +
    'jobs = [{"model": "text-embedding-ada-002", "input": str(x) + "\\n"} for x in range(n_requests)]\n' +
    'with open(filename, "w") as f:\n' +
    '    for job in jobs:\n' +
    '        json_string = json.dumps(job)\n' +
    '        f.write(json_string + "\\n")\n' +
    '```\n' +
    '\n' +
    'As with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically).\n' +
    '"""'

const upload_bucket = 'from google.cloud import storage\n' +
    '\n' +
    'def upload_to_bucket(bucket_name, json_key_file, source_file_path, destination_blob_name):\n' +
    '    # Create a storage client with the provided JSON key file\n' +
    '    storage_client = storage.Client.from_service_account_json(json_key_file)\n' +
    '\n' +
    '    # Get the bucket\n' +
    '    bucket = storage_client.get_bucket(bucket_name)\n' +
    '\n' +
    '    # Upload the file to the bucket\n' +
    '    blob = bucket.blob(destination_blob_name)\n' +
    '    blob.upload_from_filename(source_file_path)\n' +
    '\n' +
    '    print(f"File {source_file_path} uploaded to gs://{bucket_name}/{destination_blob_name}")\n' +
    '\n' +
    'if __name__ == "__main__":\n' +
    '    # Replace the following variables with your actual values\n' +
    '    bucket_name = "vilmedic_dataset"\n' +
    '    json_key_file = "file.json"\n' +
    '    source_file_path = "mimic-cxr-images-512.zip"\n' +
    '    destination_blob_name = "mimic-cxr-images-512.zip"  # Specify the desired blob name in the bucket\n' +
    '\n' +
    '    upload_to_bucket(bucket_name, json_key_file, source_file_path, destination_blob_name)'

const process_intermountain = 'import os\n' +
    'import re\n' +
    'import pandas as pd\n' +
    'import tqdm\n' +
    '\n' +
    '\n' +
    'def remove_multiple_spaces(s):\n' +
    '    return \' \'.join(s.split())\n' +
    '\n' +
    '\n' +
    'def remove_last_sentence_if_contains(text, words_to_check):\n' +
    '    if "This report was" in text:\n' +
    '        text = text.split("This report was")[0].strip()\n' +
    '\n' +
    '    sentences = re.split(r\'(?i)(?<!dr)(?<!m\\.d)\\.\', text)\n' +
    '    sentences = [s.strip() for s in sentences if s.strip()]\n' +
    '\n' +
    '    for i, sentence in enumerate(sentences):\n' +
    '        if "This study was" in sentence:\n' +
    '            index_to_remove = i\n' +
    '            sentences = sentences[:index_to_remove]\n' +
    '            break\n' +
    '\n' +
    '    last_sentence_without_dots = False\n' +
    '\n' +
    '    for i, sentence in enumerate(sentences):\n' +
    '        if any(word in sentence.lower() for word in words_to_check):\n' +
    '            index_to_remove = i\n' +
    '            sentences = sentences[:index_to_remove]\n' +
    '            if \'  \' in sentence:  # this sentence might contain radiological content and PHI separated by \'  \'\n' +
    '                sentences = sentences + sentence.split(\'  \')[:1]\n' +
    '                last_sentence_without_dots = True\n' +
    '            break\n' +
    '\n' +
    '    section = ". ".join(sentences) + "."\n' +
    '    if last_sentence_without_dots:  # its possible sentence.split(\'  \')[:1] is also PHI and not radiological content, run once again\n' +
    '        section = remove_last_sentence_if_contains(section, words_to_check)\n' +
    '    else:\n' +
    '        section = section\n' +
    '\n' +
    '    return section.replace("\\n", "")\n' +
    '\n' +
    '\n' +
    'def extract_sections(text):\n' +
    '    # Define regular expressions for FINDINGS and IMPRESSION sections\n' +
    '    findings_pattern = re.compile(r\'\\b(?i)findings\\b:(.*?)(?=\\b(?i)impression\\b:|$)\', re.S)\n' +
    '    impression_pattern = re.compile(r\'\\b(?i)impression\\b:(.*)\', re.S)\n' +
    '\n' +
    '    # Extract sections using regular expressions\n' +
    '    findings_match = findings_pattern.search(text)\n' +
    '    impression_match = impression_pattern.search(text)\n' +
    '\n' +
    '    findings_section = remove_multiple_spaces(\n' +
    '        findings_match.group(1).replace("_x000D_", "").replace("*", "").strip()) if findings_match else None\n' +
    '    impression_section = remove_multiple_spaces(\n' +
    '        impression_match.group(1).replace("_x000D_", "").replace("*", "").strip()) if impression_match else None\n' +
    '\n' +
    '    return findings_section, impression_section\n' +
    '\n' +
    '\n' +
    'for mode in ["train", "valid", "test"]:\n' +
    '\n' +
    '    # Load the CSV file into a pandas DataFrame\n' +
    '    df = pd.read_csv(\'/home/jb/Documents/images/intermo/intermountain_master_updated_150823.csv\')\n' +
    '    # Drop rows where \'Report\' column has NaN values\n' +
    '    df = df.dropna(subset=[\'Report\'])\n' +
    '\n' +
    '    # Filter rows where the \'train\' column has a value of 1.0\n' +
    '    filtered_df = df[df[mode] == 1]\n' +
    '\n' +
    '    # Iterate over the rows of the filtered DataFrame and get the \'Report\' column value\n' +
    '\n' +
    '    # /////// here: filter between chexed and non chexed\n' +
    '\n' +
    '    reports = []\n' +
    '    StudyDirs = []\n' +
    '    for index, row in filtered_df.iterrows():\n' +
    '        base = row[\'StudyDir\'].replace("/data4/intermountain/chexed_v2/images/", "").replace(\n' +
    '            "/data4/intermountain/", "")\n' +
    '        d = os.path.join("/home/jb/Documents/images/intermountain-512/images",\n' +
    '                         base)\n' +
    '\n' +
    '        files = [os.path.join("intermountain-512/images", base, f) for f in os.listdir(d) if\n' +
    '                 os.path.isfile(os.path.join(d, f))]\n' +
    '\n' +
    '        assert len(files) > 0, d\n' +
    '        StudyDirs.append(files)\n' +
    '        reports.append(row[\'Report\'])\n' +
    '\n' +
    '    reports = [r.strip() for r in reports]\n' +
    '\n' +
    '    words_to_check = ["electronically", "signed", "radiologist", "md", "dr."]\n' +
    '\n' +
    '    find = 0\n' +
    '    imp = 0\n' +
    '    findimp = 0\n' +
    '    imp_list = []\n' +
    '    imp_list_img = []\n' +
    '    find_list = []\n' +
    '    find_list_img = []\n' +
    '    imp_find_list = []\n' +
    '    imp_find_list_img = []\n' +
    '    assert len(StudyDirs) == len(reports)\n' +
    '    for r, im in tqdm.tqdm(zip(reports, StudyDirs)):\n' +
    '\n' +
    '        if r == "IMPRESSION:_x000D_" or r == "*\tAdditional findings: None._x000D_" or r == "FINDINGS:_x000D_" or r == "IMPRESSION: _x000D_":\n' +
    '            continue\n' +
    '\n' +
    '        findings, impression = extract_sections(r)\n' +
    '\n' +
    '        if impression:\n' +
    '            imp += 1\n' +
    '            impression = impression.strip()\n' +
    '            impression = remove_last_sentence_if_contains(impression, words_to_check)\n' +
    '            imp_list.append(impression)\n' +
    '            imp_list_img.append(",".join(im))\n' +
    '        if findings:\n' +
    '            find += 1\n' +
    '            findings = findings.strip()\n' +
    '            findings = remove_last_sentence_if_contains(findings, words_to_check)\n' +
    '            if findings == ".":  # one occurrence of a weird report\n' +
    '                findings = None\n' +
    '            else:\n' +
    '                find_list.append(findings)\n' +
    '                find_list_img.append(",".join(im))\n' +
    '\n' +
    '        if impression and findings:\n' +
    '            findimp += 1\n' +
    '            imp_find_list.append([findings, impression])\n' +
    '            imp_find_list_img.append(",".join(im))\n' +
    '\n' +
    '    print(findimp)\n' +
    '    print(find)\n' +
    '    print(imp)\n' +
    '    os.makedirs("RRG/impression", exist_ok=True)\n' +
    '    os.makedirs("RRG/findings", exist_ok=True)\n' +
    '    os.makedirs("RRS", exist_ok=True)\n' +
    '\n' +
    '\n' +
    '    def split_and_save(list_data, list_img_data):\n' +
    '        list_1 = []\n' +
    '        list_img_1 = []\n' +
    '        list_2 = []\n' +
    '        list_img_2 = []\n' +
    '\n' +
    '        for data, img in zip(list_data, list_img_data):\n' +
    '            if "ePneumonAI" in img:\n' +
    '                list_1.append(data)\n' +
    '                list_img_1.append(img)\n' +
    '            else:\n' +
    '                list_2.append(data)\n' +
    '                list_img_2.append(img)\n' +
    '        return list_1, list_2, list_img_1, list_img_2\n' +
    '\n' +
    '\n' +
    '    imp_list_1, imp_list_2, imp_list_img_1, imp_list_img_2 = split_and_save(imp_list, imp_list_img)\n' +
    '    open(f"RRG/impression/{mode}.ePneumonAI.impression.tok", "w").write("\\n".join(imp_list_1))\n' +
    '    open(f"RRG/impression/{mode}.emergency.impression.tok", "w").write("\\n".join(imp_list_2))\n' +
    '    open(f"RRG/impression/{mode}.ePneumonAI.image.tok", "w").write("\\n".join(imp_list_img_1))\n' +
    '    open(f"RRG/impression/{mode}.emergency.image.tok", "w").write("\\n".join(imp_list_img_2))\n' +
    '\n' +
    '    imp_list_1, imp_list_2, imp_list_img_1, imp_list_img_2 = split_and_save(find_list, find_list_img)\n' +
    '    open(f"RRG/findings/{mode}.ePneumonAI.findings.tok", "w").write("\\n".join(imp_list_1))\n' +
    '    open(f"RRG/findings/{mode}.emergency.findings.tok", "w").write("\\n".join(imp_list_2))\n' +
    '    open(f"RRG/findings/{mode}.ePneumonAI.image.tok", "w").write("\\n".join(imp_list_img_1))\n' +
    '    open(f"RRG/findings/{mode}.emergency.image.tok", "w").write("\\n".join(imp_list_img_2))\n' +
    '\n' +
    '\n' +
    '    def split_and_save2(_find, _imp, _image):\n' +
    '        list_1 = []\n' +
    '        list_2 = []\n' +
    '        list_3 = []\n' +
    '        list_4 = []\n' +
    '        list_img_1 = []\n' +
    '        list_img_2 = []\n' +
    '\n' +
    '        for f, i, img in zip(_find, _imp, _image):\n' +
    '            if "ePneumonAI" in img:\n' +
    '                list_1.append(f)\n' +
    '                list_2.append(i)\n' +
    '                list_img_1.append(img)\n' +
    '            else:\n' +
    '                list_3.append(f)\n' +
    '                list_4.append(i)\n' +
    '                list_img_2.append(img)\n' +
    '        return list_1, list_2, list_3, list_4, list_img_1, list_img_2\n' +
    '\n' +
    '\n' +
    '    list_1, list_2, list_3, list_4, list_img_1, list_img_2 = split_and_save2([r[0] for r in imp_find_list],\n' +
    '                                                                             [r[1] for r in imp_find_list],\n' +
    '                                                                             imp_find_list_img,\n' +
    '                                                                             )\n' +
    '    open(f"RRS/{mode}.ePneumonAI.findings.tok", "w").write("\\n".join(list_1))\n' +
    '    open(f"RRS/{mode}.ePneumonAI.impression.tok", "w").write("\\n".join(list_2))\n' +
    '    open(f"RRS/{mode}.ePneumonAI.image.tok", "w").write("\\n".join(list_img_1))\n' +
    '\n' +
    '    open(f"RRS/{mode}.emergency.findings.tok", "w").write("\\n".join(list_3))\n' +
    '    open(f"RRS/{mode}.emergency.impression.tok", "w").write("\\n".join(list_4))\n' +
    '    open(f"RRS/{mode}.emergency.image.tok", "w").write("\\n".join(list_img_2))\n'
const MiscScripts = () => {
    const {codeStyle, VilmedicTag} = useContext(GlobalContext);

    return (
        <>
            <Space direction={"vertical"}>
                <div style={{width: 900}}>
                    Upload bucket
                    <SyntaxHighlighter customStyle={{textAlign: "left"}} language="python" style={codeStyle}>
                        {upload_bucket}
                    </SyntaxHighlighter>
                    make_batch_gpt
                    <SyntaxHighlighter customStyle={{textAlign: "left"}} language="python" style={codeStyle}>
                        {make_batch_gpt}
                    </SyntaxHighlighter>

                    batch_gpt
                    <SyntaxHighlighter customStyle={{textAlign: "left"}} language="python" style={codeStyle}>
                        {batch_gpt}
                    </SyntaxHighlighter>
                    process intermountain
                    <SyntaxHighlighter customStyle={{textAlign: "left"}} language="python" style={codeStyle}>
                        {process_intermountain}
                    </SyntaxHighlighter>
                    bucket_local_bucket
                    <SyntaxHighlighter customStyle={{textAlign: "left"}} language="python" style={codeStyle}>
                        {bucket_local_bucket}
                    </SyntaxHighlighter>


                </div>
            </Space>
        </>
    );
};

export default MiscScripts