#!/usr/bin/env python3

import argparse
import json
import os
import pathlib
import re
from typing import cast

import requests

TRANSLATIONS_DIR = pathlib.Path(__file__).resolve().parent
TRANSLATIONS_LANGUAGES = TRANSLATIONS_DIR / "languages.json"

OPENAI_MODEL = "deepseek-chat"
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_PROMPT = "You are a professional translator from English to {language} (ISO 639 language code). " + \
                "The following sentence or word is in the GUI of a software called openpilot, translate it accordingly."


def get_language_files(languages: list[str] = None) -> dict[str, pathlib.Path]:
  files = {}

  with open(TRANSLATIONS_LANGUAGES) as fp:
    language_dict = json.load(fp)

    for filename in language_dict.values():
      path = TRANSLATIONS_DIR / f"app_{filename}.po"
      language = filename

      if languages is None or language in languages:
        files[language] = path

  return files


def translate_phrase(text: str, language: str) -> str:
  response = requests.post(
    "https://api.deepseek.com/chat/completions",
    json={
      "model": OPENAI_MODEL,
      "messages": [
        {
          "role": "system",
          "content": OPENAI_PROMPT.format(language=language),
        },
        {
          "role": "user",
          "content": text,
        },
      ],
      "temperature": 0.8,
      "max_tokens": 1024,
      "top_p": 1,
    },
    headers={
      "Authorization": f"Bearer {OPENAI_API_KEY}",
      "Content-Type": "application/json",
    },
  )

  if 400 <= response.status_code < 600:
    raise requests.HTTPError(f'Error {response.status_code}: {response.json()}', response=response)

  data = response.json()

  return cast(str, data["choices"][0]["message"]["content"])


def translate_file(path: pathlib.Path, language: str, all_: bool) -> None:
  # Read the PO file
  with path.open("r", encoding="utf-8") as fp:
    lines = fp.readlines()

  # Process each line to find translation entries
  i = 0
  while i < len(lines):
    line = lines[i].strip()

    # Look for msgid line
    if line.startswith('msgid'):
      # Check for empty msgid (header) - this is the start of multi-line msgid
      if line == 'msgid ""':
        # This is a multi-line msgid entry
        msgid_text = ""
        j = i + 1  # Start from the next line
        
        # Collect all the quoted lines that form the msgid
        while j < len(lines) and lines[j].strip().startswith('"'):
          msgid_text += lines[j].strip().strip('"')
          j += 1

        # Skip header entry (empty msgid)
        if not msgid_text:
          i = j
          continue

        # Look for the corresponding msgstr or msgid_plural
        k = j
        has_plural = False
        while k < len(lines) and not lines[k].strip().startswith('msgstr'):
          if lines[k].strip().startswith('msgid_plural'):
            has_plural = True
          k += 1

        if k < len(lines):
          # Handle plural forms
          if has_plural:
            # This is a plural entry, need to handle msgstr[0], msgstr[1], etc.
            msgstr_texts = []
            m = k
            
            # Find all msgstr[n] entries
            while m < len(lines) and lines[m].strip().startswith('msgstr['):
              msgstr_match = re.match(r'msgstr\[(\d+)\]\s*"(.*)"', lines[m].strip())
              if msgstr_match:
                msgstr_texts.append(msgstr_match.group(2))
              m += 1
            
            # Check if we should translate this entry
            should_translate = False
            if all_:
              should_translate = True
            else:
              # Only translate if all msgstr entries are empty or contain only whitespace
              should_translate = all(not text.strip() for text in msgstr_texts)
            
            if should_translate:
              # Translate both singular and plural forms
              singular_text = msgid_text
              # Find the plural form
              plural_text = ""
              p = j
              while p < k and not lines[p].strip().startswith('msgid_plural'):
                p += 1
              if p < k:
                # Extract plural text
                plural_match = re.match(r'msgid_plural\s*"(.*)"', lines[p].strip())
                if plural_match:
                  plural_text = plural_match.group(1)
              
              # Translate both forms
              singular_translation = translate_phrase(singular_text, language)
              plural_translation = translate_phrase(plural_text, language)
              
              print(f"Translating plural entry:")
              print(f"Singular: {singular_text}")
              print(f"Plural: {plural_text}")
              print(f"Singular translation: {singular_translation}")
              print(f"Plural translation: {plural_translation}")
              print("-" * 50)
              
              # Update msgstr[0] and msgstr[1]
              m = k
              idx = 0
              while m < len(lines) and lines[m].strip().startswith('msgstr['):
                if idx == 0:
                  lines[m] = f'msgstr[0] "{singular_translation}"\n'
                elif idx == 1:
                  lines[m] = f'msgstr[1] "{plural_translation}"\n'
                idx += 1
                m += 1
              
              i = m
              continue
            else:
              i = k + len(msgstr_texts)
              continue
          else:
            # Extract msgstr text (handle multi-line msgstr)
            msgstr_text = ""
            m = k
            
            # Check if this is a multi-line msgstr
            if lines[m].strip() == 'msgstr ""':
              # Multi-line msgstr - collect all quoted lines
              m += 1
              while m < len(lines) and lines[m].strip().startswith('"'):
                msgstr_text += lines[m].strip().strip('"')
                m += 1
            else:
              # Single-line msgstr
              msgstr_match = re.match(r'msgstr\s+"(.+)"', lines[m].strip())
              if msgstr_match:
                msgstr_text = msgstr_match.group(1)

            # Check if we should translate this entry
            should_translate = False
            if all_:
              should_translate = True
            else:
              # Only translate if msgstr is empty or contains only whitespace
              if not msgstr_text.strip():
                should_translate = True

          if should_translate:
            # Translate the phrase
            llm_translation = translate_phrase(msgid_text, language)

            print(f"Translating entry:")
            print(f"Source: {msgid_text}")
            print(f"LLM translation: {llm_translation}")
            print("-" * 50)

            # Update the msgstr line
            if lines[k].strip() == 'msgstr ""':
              # Multi-line msgstr - replace with multi-line format
              # Remove existing msgstr lines
              m = k + 1
              while m < len(lines) and lines[m].strip().startswith('"'):
                lines[m] = ""
                m += 1
              
              # Add new multi-line msgstr
              lines[k] = 'msgstr ""\n'
              # Split translation into lines of reasonable length
              translation_lines = []
              current_line = ""
              for word in llm_translation.split():
                if len(current_line + word) > 60:  # Reasonable line length
                  translation_lines.append(f'"{current_line}"\n')
                  current_line = word
                else:
                  if current_line:
                    current_line += " " + word
                  else:
                    current_line = word
              if current_line:
                translation_lines.append(f'"{current_line}"\n')
              
              # Insert the translation lines
              for idx, trans_line in enumerate(translation_lines):
                lines.insert(k + 1 + idx, trans_line)
            else:
              # Single-line msgstr - replace it
              lines[k] = f'msgstr "{llm_translation}"\n'

            i = k + 1
            continue
          else:
            i = k + 1
            continue
        
        i = j
        continue
      
      # Single-line msgid
      msgid_match = re.match(r'msgid\s+"(.+)"', line)
      if msgid_match:
        msgid_text = msgid_match.group(1)

        # Skip header entry (empty msgid)
        if not msgid_text:
          i += 1
          continue

        # Look for the corresponding msgstr or msgid_plural
        j = i + 1
        has_plural = False
        while j < len(lines) and not lines[j].strip().startswith('msgstr'):
          if lines[j].strip().startswith('msgid_plural'):
            has_plural = True
          j += 1

        if j < len(lines):
          # Handle plural forms
          if has_plural:
            # This is a plural entry, need to handle msgstr[0], msgstr[1], etc.
            msgstr_texts = []
            m = j
            
            # Find all msgstr[n] entries
            while m < len(lines) and lines[m].strip().startswith('msgstr['):
              msgstr_match = re.match(r'msgstr\[(\d+)\]\s*"(.*)"', lines[m].strip())
              if msgstr_match:
                msgstr_texts.append(msgstr_match.group(2))
              m += 1
            
            # Check if we should translate this entry
            should_translate = False
            if all_:
              should_translate = True
            else:
              # Only translate if all msgstr entries are empty or contain only whitespace
              should_translate = all(not text.strip() for text in msgstr_texts)
            
            if should_translate:
              # Translate both singular and plural forms
              singular_text = msgid_text
              # Find plural form
              plural_text = ""
              p = i + 1
              while p < j and not lines[p].strip().startswith('msgid_plural'):
                p += 1
              if p < j:
                # Extract plural text
                plural_match = re.match(r'msgid_plural\s*"(.*)"', lines[p].strip())
                if plural_match:
                  plural_text = plural_match.group(1)
              
              # Translate both forms
              singular_translation = translate_phrase(singular_text, language)
              plural_translation = translate_phrase(plural_text, language)
              
              print(f"Translating plural entry:")
              print(f"Singular: {singular_text}")
              print(f"Plural: {plural_text}")
              print(f"Singular translation: {singular_translation}")
              print(f"Plural translation: {plural_translation}")
              print("-" * 50)
              
              # Update msgstr[0] and msgstr[1]
              m = j
              idx = 0
              while m < len(lines) and lines[m].strip().startswith('msgstr['):
                if idx == 0:
                  lines[m] = f'msgstr[0] "{singular_translation}"\n'
                elif idx == 1:
                  lines[m] = f'msgstr[1] "{plural_translation}"\n'
                idx += 1
                m += 1
              
              i = m
              continue
            else:
              i = j + len(msgstr_texts)
              continue
          else:
            # Extract msgstr text (handle multi-line msgstr)
            msgstr_text = ""
            m = j
            
            # Check if this is a multi-line msgstr
            if lines[m].strip() == 'msgstr ""':
              # Multi-line msgstr - collect all quoted lines
              m += 1
              while m < len(lines) and lines[m].strip().startswith('"'):
                msgstr_text += lines[m].strip().strip('"')
                m += 1
            else:
              # Single-line msgstr
              msgstr_match = re.match(r'msgstr\s+"(.+)"', lines[m].strip())
              if msgstr_match:
                msgstr_text = msgstr_match.group(1)

            # Check if we should translate this entry
            should_translate = False
            if all_:
              should_translate = True
            else:
              # Only translate if msgstr is empty or contains only whitespace
              if not msgstr_text.strip():
                should_translate = True

          if should_translate:
            # Translate the phrase
            llm_translation = translate_phrase(msgid_text, language)

            print(f"Translating entry:")
            print(f"Source: {msgid_text}")
            print(f"LLM translation: {llm_translation}")
            print("-" * 50)

            # Update the msgstr line
            if lines[j].strip() == 'msgstr ""':
              # Multi-line msgstr - replace with multi-line format
              # Remove existing msgstr lines
              m = j + 1
              while m < len(lines) and lines[m].strip().startswith('"'):
                lines[m] = ""
                m += 1
              
              # Add new multi-line msgstr
              lines[j] = 'msgstr ""\n'
              # Split translation into lines of reasonable length
              translation_lines = []
              current_line = ""
              for word in llm_translation.split():
                if len(current_line + word) > 60:  # Reasonable line length
                  translation_lines.append(f'"{current_line}"\n')
                  current_line = word
                else:
                  if current_line:
                    current_line += " " + word
                  else:
                    current_line = word
              if current_line:
                translation_lines.append(f'"{current_line}"\n')
              
              # Insert the translation lines
              for idx, trans_line in enumerate(translation_lines):
                lines.insert(j + 1 + idx, trans_line)
            else:
              # Single-line msgstr - replace it with single-line format
              lines[j] = f'msgstr "{llm_translation}"\n'

            i = j + 1
            continue
          else:
            i = j + 1
            continue

    i += 1

  # Write the updated PO file back with original formatting preserved
  with path.open("w", encoding="utf-8") as fp:
    fp.writelines(lines)


def main():
  arg_parser = argparse.ArgumentParser("Auto translate")

  group = arg_parser.add_mutually_exclusive_group(required=True)
  group.add_argument("-a", "--all-files", action="store_true", help="Translate all files")
  group.add_argument("-f", "--file", nargs="+", help="Translate the selected files. (Example: -f fr de)")

  arg_parser.add_argument("-t", "--all-translations", action="store_true", default=False, help="Translate all sections. (Default: only unfinished)")

  args = arg_parser.parse_args()

  if OPENAI_API_KEY is None:
    print("OpenAI API key is missing. (Hint: use `export OPENAI_API_KEY=YOUR-KEY` before you run the script).\n" +
          "If you don't have one go to: https://beta.openai.com/account/api-keys.")
    exit(1)

  files = get_language_files(None if args.all_files else args.file)

  if args.file:
    missing_files = set(args.file) - set(files)
    if len(missing_files):
      print(f"No language files found: {missing_files}")
      exit(1)

  print(f"Translation mode: {'all' if args.all_translations else 'only unfinished'}. Files: {list(files)}")

  for lang, path in files.items():
    print(f"Translate {lang} ({path})")
    translate_file(path, lang, args.all_translations)


if __name__ == "__main__":
  main()