# Copyright 2011 The Emscripten Authors.  All rights reserved.
# Emscripten is available under two separate licenses, the MIT license and the
# University of Illinois/NCSA Open Source License.  Both these licenses can be
# found in the LICENSE file.

"""Shared code specific to emscripten.  General purpose and low-level helpers belong instead in
utils.py."""

import atexit
import logging
import os
import re
import shlex
import signal
import subprocess
import sys
import tempfile
from subprocess import PIPE

from .toolchain_profiler import ToolchainProfiler

# We depend on python 3.10 features
if sys.version_info < (3, 10): # noqa: UP036
  print(f'error: emscripten requires python 3.10 or above ({sys.executable} {sys.version})', file=sys.stderr)
  sys.exit(1)

from . import colored_logger

# Configure logging before importing any other local modules so even
# log message during import are shown as expected.
DEBUG = int(os.environ.get('EMCC_DEBUG', '0'))
EMCC_LOGGING = int(os.environ.get('EMCC_LOGGING', '1'))
log_level = logging.ERROR
if DEBUG:
  log_level = logging.DEBUG
elif EMCC_LOGGING:
  log_level = logging.INFO
# can add  %(asctime)s  to see timestamps
logging.basicConfig(format='%(name)s:%(levelname)s: %(message)s', level=log_level)
colored_logger.enable()

import contextlib

from . import cache, config, diagnostics, filelock, tempfiles, utils
from .settings import settings
from .utils import exe_path_from_root, exit_with_error, memoize, path_from_root, safe_ensure_dirs

DEBUG_SAVE = DEBUG or int(os.environ.get('EMCC_DEBUG_SAVE', '0'))
PRINT_SUBPROCS = int(os.getenv('EMCC_VERBOSE', '0'))
SKIP_SUBPROCS = False

# Minimum node version required to run the emscripten compiler.  This is
# distinct from the minimum version required to execute the generated code
# (settings.MIN_NODE_VERSION).
# This is currently set to v18 since this is the version of node available
# in debian/stable (bookworm).  We need at least v18.3.0 because we make
# use of util.parseArg which was added in v18.3.0.
MINIMUM_NODE_VERSION = (18, 3, 0)
EXPECTED_LLVM_VERSION = 22

# These get set by setup_temp_dirs
TEMP_DIR = None
EMSCRIPTEN_TEMP_DIR = None

logger = logging.getLogger('shared')

# warning about absolute-paths is disabled by default, and not enabled by -Wall
diagnostics.add_warning('absolute-paths', enabled=False, part_of_all=False)
# unused diagnostic flags.  TODO(sbc): remove at some point
diagnostics.add_warning('almost-asm')
diagnostics.add_warning('experimental')
# Don't show legacy settings warnings by default
diagnostics.add_warning('legacy-settings', enabled=False, part_of_all=False)
# Catch-all for other emcc warnings
diagnostics.add_warning('linkflags')
diagnostics.add_warning('emcc')
diagnostics.add_warning('undefined', error=True)
diagnostics.add_warning('deprecated', shared=True)
diagnostics.add_warning('version-check')
diagnostics.add_warning('export-main')
diagnostics.add_warning('map-unrecognized-libraries')
diagnostics.add_warning('unused-command-line-argument', shared=True)
diagnostics.add_warning('pthreads-mem-growth')
diagnostics.add_warning('transpile')
diagnostics.add_warning('limited-postlink-optimizations')
diagnostics.add_warning('em-js-i64')
diagnostics.add_warning('js-compiler')
diagnostics.add_warning('compatibility')
diagnostics.add_warning('unsupported')
diagnostics.add_warning('unused-main')
# Closure warning are not (yet) enabled by default
diagnostics.add_warning('closure', enabled=False)


def returncode_to_str(code):
  assert code != 0
  if code < 0:
    signal_name = signal.Signals(-code).name
    return f'received {signal_name} ({code})'

  return f'returned {code}'


def run_multiple_processes(commands,
                           env=None,
                           route_stdout_to_temp_files_suffix=None,
                           cwd=None):
  """Runs multiple subprocess commands.

  route_stdout_to_temp_files_suffix : string
    if not None, all stdouts are instead written to files, and an array
    of filenames is returned.
  """

  if env is None:
    env = os.environ.copy()

  std_outs = []

  # TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt
  # when multiple child processes have been spawned.
  # import signal
  # def signal_handler(sig, frame):
  #   sys.exit(1)
  # signal.signal(signal.SIGINT, signal_handler)

  # Map containing all currently running processes.
  # command index -> proc/Popen object
  processes = {}

  def get_finished_process():
    while True:
      for idx, proc in processes.items():
        if proc.poll() is not None:
          return idx
      # All processes still running; wait a short while for the first
      # (oldest) process to finish, then look again if any process has completed.
      idx, proc = next(iter(processes.items()))
      try:
        proc.communicate(timeout=0.2)
        return idx
      except subprocess.TimeoutExpired:
        pass

  num_parallel_processes = utils.get_num_cores()
  temp_files = get_temp_files()
  i = 0
  num_completed = 0
  while num_completed < len(commands):
    if i < len(commands) and len(processes) < num_parallel_processes:
      # Not enough parallel processes running, spawn a new one.
      if route_stdout_to_temp_files_suffix:
        stdout = temp_files.get(route_stdout_to_temp_files_suffix)
      else:
        stdout = None
      if DEBUG:
        logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i])))
      print_compiler_stage(commands[i])
      proc = subprocess.Popen(commands[i], stdout=stdout, stderr=None, env=env, cwd=cwd)
      processes[i] = proc
      if route_stdout_to_temp_files_suffix:
        std_outs.append((i, stdout.name))
      i += 1
    else:
      # Not spawning a new process (Too many commands running in parallel, or
      # no commands left): find if a process has finished.
      idx = get_finished_process()
      finished_process = processes.pop(idx)
      if finished_process.returncode != 0:
        exit_with_error('subprocess %d/%d failed (%s)! (cmdline: %s)' % (idx + 1, len(commands), returncode_to_str(finished_process.returncode), shlex.join(commands[idx])))
      num_completed += 1

  if route_stdout_to_temp_files_suffix:
    # If processes finished out of order, sort the results to the order of the input.
    std_outs.sort(key=lambda x: x[0])
    return [x[1] for x in std_outs]


def check_call(cmd, *args, **kw):
  """Like `run_process` above but treat failures as fatal and exit_with_error."""
  print_compiler_stage(cmd)
  if SKIP_SUBPROCS:
    return 0
  try:
    return utils.run_process(cmd, *args, **kw)
  except subprocess.CalledProcessError as e:
    exit_with_error("'%s' failed (%s)", shlex.join(cmd), returncode_to_str(e.returncode))
  except OSError as e:
    exit_with_error("'%s' failed: %s", shlex.join(cmd), e)


def exec_process(cmd):
  print_compiler_stage(cmd)
  utils.exec(cmd)


def run_js_tool(filename, jsargs=[], node_args=[], **kw):  # noqa: B006
  """Execute a javascript tool.

  This is used by emcc to run parts of the build process that are written
  implemented in javascript.
  """
  command = config.NODE_JS + node_args + [filename] + jsargs
  return check_call(command, **kw).stdout


def get_npm_cmd(name, missing_ok=False):
  if utils.WINDOWS:
    cmd = [path_from_root('node_modules/.bin', name + '.cmd')]
  else:
    cmd = config.NODE_JS + [path_from_root('node_modules/.bin', name)]
  if not os.path.exists(cmd[-1]):
    if missing_ok:
      return None
    else:
      exit_with_error(f'{name} was not found! Please run "npm install" in Emscripten root directory to set up npm dependencies')
  return cmd


@memoize
def get_clang_version():
  if not os.path.exists(CLANG_CC):
    exit_with_error('clang executable not found at `%s`' % CLANG_CC)
  proc = check_call([CLANG_CC, '--version'], stdout=PIPE)
  m = re.search(r'[Vv]ersion\s+(\d+\.\d+)', proc.stdout)
  return m and m.group(1)


def check_llvm_version():
  actual = get_clang_version()
  if actual.startswith('%d.' % EXPECTED_LLVM_VERSION):
    return True
  # When running in CI environment we also silently allow the next major
  # version of LLVM here so that new versions of LLVM can be rolled in
  # without disruption.
  if 'BUILDBOT_BUILDNUMBER' in os.environ:
    if actual.startswith('%d.' % (EXPECTED_LLVM_VERSION + 1)):
      return True
  diagnostics.warning('version-check', 'LLVM version for clang executable "%s" appears incorrect (seeing "%s", expected "%s")', CLANG_CC, actual, EXPECTED_LLVM_VERSION)
  return False


def get_clang_targets():
  if not os.path.exists(CLANG_CC):
    exit_with_error('clang executable not found at `%s`' % CLANG_CC)
  try:
    target_info = utils.run_process([CLANG_CC, '-print-targets'], stdout=PIPE).stdout
  except subprocess.CalledProcessError:
    exit_with_error('error running `clang -print-targets`.  Check your llvm installation (%s)' % CLANG_CC)
  if 'Registered Targets:' not in target_info:
    exit_with_error('error parsing output of `clang -print-targets`.  Check your llvm installation (%s)' % CLANG_CC)
  return target_info.split('Registered Targets:')[1]


def check_llvm():
  targets = get_clang_targets()
  if 'wasm32' not in targets:
    logger.critical('LLVM has not been built with the WebAssembly backend, clang reports:')
    print('===========================================================================', file=sys.stderr)
    print(targets, file=sys.stderr)
    print('===========================================================================', file=sys.stderr)
    return False

  return True


def get_node_directory():
  return os.path.dirname(config.NODE_JS[0] if type(config.NODE_JS) is list else config.NODE_JS)


# When we run some tools from npm (closure, html-minifier-terser), those
# expect that the tools have node.js accessible in PATH. Place our node
# there when invoking those tools.
def env_with_node_in_path():
  env = os.environ.copy()
  env['PATH'] = get_node_directory() + os.pathsep + env['PATH']
  return env


def _get_node_version_pair(nodejs):
  actual = utils.run_process(nodejs + ['--version'], stdout=PIPE).stdout.strip()
  version = actual.removeprefix('v')
  version = version.split('-')[0].split('.')
  version = tuple(int(v) for v in version)
  return actual, version


def get_node_version(nodejs):
  return _get_node_version_pair(nodejs)[1]


@memoize
def check_node_version():
  try:
    actual, version = _get_node_version_pair(config.NODE_JS)
  except Exception as e:
    diagnostics.warning('version-check', 'cannot check node version: %s', e)
    return

  if version < MINIMUM_NODE_VERSION:
    expected = '.'.join(str(v) for v in MINIMUM_NODE_VERSION)
    diagnostics.warning('version-check', f'node version appears too old (seeing "{actual}", expected "v{expected}")')

  return version


def node_reference_types_flags(nodejs):
  node_version = get_node_version(nodejs)
  # reference types were enabled by default in node v18.
  if node_version and node_version < (18, 0, 0):
    return ['--experimental-wasm-reftypes']
  else:
    return []


def node_exception_flags(nodejs):
  node_version = get_node_version(nodejs)
  # Legacy exception handling was enabled by default in node v17.
  if node_version and node_version < (17, 0, 0):
    return ['--experimental-wasm-eh']
  # Standard exception handling was supported behind flag in node v22.
  if node_version and node_version >= (22, 0, 0) and not settings.WASM_LEGACY_EXCEPTIONS:
    return ['--experimental-wasm-exnref']
  return []


def node_pthread_flags(nodejs):
  node_version = get_node_version(nodejs)
  # bulk memory and wasm threads were enabled by default in node v16.
  if node_version and node_version < (16, 0, 0):
    return ['--experimental-wasm-bulk-memory', '--experimental-wasm-threads']
  else:
    return []


@memoize
@ToolchainProfiler.profile()
def check_node():
  try:
    utils.run_process(config.NODE_JS + ['-e', 'console.log("hello")'], stdout=PIPE)
  except Exception as e:
    exit_with_error('the configured node executable (%s) does not seem to work, check the paths in %s (%s)', config.NODE_JS, config.EM_CONFIG, e)


def generate_sanity():
  return f'{utils.EMSCRIPTEN_VERSION}|{config.LLVM_ROOT}\n'


@memoize
def perform_sanity_checks(quiet=False):
  # some warning, mostly not fatal checks - do them even if EM_IGNORE_SANITY is on
  check_node_version()
  check_llvm_version()

  llvm_ok = check_llvm()

  if os.environ.get('EM_IGNORE_SANITY'):
    logger.info('EM_IGNORE_SANITY set, ignoring sanity checks')
    return

  if not quiet:
    logger.info('(Emscripten: Running sanity checks)')

  if not llvm_ok:
    exit_with_error('failing sanity checks due to previous llvm failure')

  check_node()

  with ToolchainProfiler.profile_block('sanity LLVM'):
    for cmd in (CLANG_CC, LLVM_AR):
      if not os.path.exists(cmd) and not os.path.exists(cmd + '.exe'):  # .exe extension required for Windows
        exit_with_error('cannot find %s, check the paths in %s', cmd, config.EM_CONFIG)


@ToolchainProfiler.profile()
def check_sanity(force=False, quiet=False):
  """Check that basic stuff we need (a JS engine to compile, Node.js, and Clang
  and LLVM) exists.

  The test runner always does this check (through |force|). emcc does this less
  frequently, only when ${EM_CONFIG}_sanity does not exist or is older than
  EM_CONFIG (so, we re-check sanity when the settings are changed).  We also
  re-check sanity and clear the cache when the version changes.
  """
  if not force and os.environ.get('EMCC_SKIP_SANITY_CHECK') == '1':
    return

  # We set EMCC_SKIP_SANITY_CHECK so that any subprocesses that we launch will
  # not re-run the tests.
  os.environ['EMCC_SKIP_SANITY_CHECK'] = '1'

  # In DEBUG mode we perform the sanity checks even when
  # early return due to the file being up-to-date.
  if DEBUG:
    force = True

  if config.FROZEN_CACHE:
    if force:
      perform_sanity_checks(quiet)
    return

  if os.environ.get('EM_IGNORE_SANITY'):
    perform_sanity_checks(quiet)
    return

  expected = generate_sanity()

  sanity_file = cache.get_path('sanity.txt')

  def sanity_is_correct():
    sanity_data = None
    # We can't simply check for the existence of sanity_file and then read from
    # it here because we don't hold the cache lock yet and some other process
    # could clear the cache between checking for, and reading from, the file.
    with contextlib.suppress(Exception):
      sanity_data = utils.read_file(sanity_file)
    if sanity_data == expected:
      logger.debug(f'sanity file up-to-date: {sanity_file}')
      # Even if the sanity file is up-to-date we still run the checks
      # when force is set.
      if force:
        perform_sanity_checks(quiet)
      return True # all is well
    return False

  if sanity_is_correct():
    # Early return without taking the cache lock
    return

  with cache.lock('sanity'):
    # Check again once the cache lock as acquired
    if sanity_is_correct():
      return

    if os.path.exists(sanity_file):
      sanity_data = utils.read_file(sanity_file)
      logger.info('old sanity: %s', sanity_data.strip())
      logger.info('new sanity: %s', expected.strip())
      logger.info('(Emscripten: config changed, clearing cache)')
      cache.erase()
    else:
      logger.debug(f'sanity file not found: {sanity_file}')

    perform_sanity_checks()

    # Only create/update this file if the sanity check succeeded, i.e., we got here
    utils.write_file(sanity_file, expected)


def llvm_tool_path_with_suffix(tool, suffix):
  if suffix:
    tool += '-' + suffix
  llvm_root = os.path.expanduser(config.LLVM_ROOT)
  return utils.find_exe(llvm_root, tool)


# Some distributions ship with multiple llvm versions so they add
# the version to the binaries, cope with that
def llvm_tool_path(tool):
  return llvm_tool_path_with_suffix(tool, config.LLVM_ADD_VERSION)


# Some distributions ship with multiple clang versions so they add
# the version to the binaries, cope with that
def clang_tool_path(tool):
  return llvm_tool_path_with_suffix(tool, config.CLANG_ADD_VERSION)


# In MINIMAL_RUNTIME mode, keep suffixes of generated files simple
# ('.mem' instead of '.js.mem'; .'symbols' instead of '.js.symbols' etc)
# Retain the original naming scheme in traditional runtime.
def replace_or_append_suffix(filename, new_suffix):
  assert new_suffix[0] == '.'
  return utils.replace_suffix(filename, new_suffix) if settings.MINIMAL_RUNTIME else filename + new_suffix


# Temp dir. Create a random one, unless EMCC_DEBUG is set, in which case use the canonical
# temp directory (TEMP_DIR/emscripten_temp).
@memoize
def get_emscripten_temp_dir():
  """Returns a path to EMSCRIPTEN_TEMP_DIR, creating one if it didn't exist."""
  global EMSCRIPTEN_TEMP_DIR
  if not EMSCRIPTEN_TEMP_DIR:
    EMSCRIPTEN_TEMP_DIR = tempfile.mkdtemp(prefix='emscripten_temp_', dir=TEMP_DIR)

    if not DEBUG_SAVE:
      def prepare_to_clean_temp(d):
        def clean_temp():
          utils.delete_dir(d)

        atexit.register(clean_temp)
      # this global var might change later
      prepare_to_clean_temp(EMSCRIPTEN_TEMP_DIR)
  return EMSCRIPTEN_TEMP_DIR


def in_temp(name):
  return os.path.join(get_emscripten_temp_dir(), os.path.basename(name))


def get_canonical_temp_dir(temp_dir):
  return os.path.join(temp_dir, 'emscripten_temp')


def setup_temp_dirs():
  global EMSCRIPTEN_TEMP_DIR, CANONICAL_TEMP_DIR, TEMP_DIR
  EMSCRIPTEN_TEMP_DIR = None

  TEMP_DIR = os.environ.get("EMCC_TEMP_DIR", tempfile.gettempdir())
  if not os.path.isdir(TEMP_DIR):
    exit_with_error(f'The temporary directory `{TEMP_DIR}` does not exist! Please make sure that the path is correct.')

  CANONICAL_TEMP_DIR = get_canonical_temp_dir(TEMP_DIR)

  if DEBUG:
    EMSCRIPTEN_TEMP_DIR = CANONICAL_TEMP_DIR
    try:
      safe_ensure_dirs(EMSCRIPTEN_TEMP_DIR)
    except Exception as e:
      exit_with_error('error creating canonical temp dir (Check definition of TEMP_DIR in %s): %s', config.EM_CONFIG, e)

    # Since the canonical temp directory is, by definition, the same
    # between all processes that run in DEBUG mode we need to use a multi
    # process lock to prevent more than one process from writing to it.
    # This is because emcc assumes that it can use non-unique names inside
    # the temp directory.
    # Sadly we need to allow child processes to access this directory
    # though, since emcc can recursively call itself when building
    # libraries and ports.
    if 'EM_HAVE_TEMP_DIR_LOCK' not in os.environ:
      filelock_name = os.path.join(EMSCRIPTEN_TEMP_DIR, 'emscripten.lock')
      lock = filelock.FileLock(filelock_name)
      os.environ['EM_HAVE_TEMP_DIR_LOCK'] = '1'
      lock.acquire()
      atexit.register(lock.release)


@memoize
def get_temp_files():
  if DEBUG_SAVE:
    # In debug mode store all temp files in the emscripten-specific temp dir
    # and don't worry about cleaning them up.
    return tempfiles.TempFiles(get_emscripten_temp_dir(), save_debug_files=True)
  else:
    # Otherwise use the system tempdir and try to clean up after ourselves.
    return tempfiles.TempFiles(TEMP_DIR, save_debug_files=False)


def print_compiler_stage(cmd):
  """Emulate the '-v/-###' flags of clang/gcc by printing the sub-commands
  that we run."""

  def maybe_quote(arg):
    if all(c.isalnum() or c in './-_' for c in arg):
      return arg
    else:
      return f'"{arg}"'

  if SKIP_SUBPROCS:
    print(' ' + ' '.join([maybe_quote(a) for a in cmd]), file=sys.stderr)
    sys.stderr.flush()
  elif PRINT_SUBPROCS:
    print(' %s %s' % (maybe_quote(cmd[0]), shlex.join(cmd[1:])), file=sys.stderr)
    sys.stderr.flush()


def demangle_c_symbol_name(name):
  if not is_c_symbol(name):
    return '$' + name
  return name[1:] if name.startswith('_') else name


def is_c_symbol(name):
  return name.startswith('_')


def is_internal_global(name):
  internal_start_stop_symbols = {'__start_em_asm', '__stop_em_asm',
                                 '__start_em_js', '__stop_em_js',
                                 '__start_em_lib_deps', '__stop_em_lib_deps',
                                 '__em_lib_deps'}
  internal_prefixes = ('__em_js__', '__em_lib_deps')
  return name in internal_start_stop_symbols or any(name.startswith(p) for p in internal_prefixes)


def is_user_export(name):
  if is_internal_global(name):
    return False
  return name not in ['__asyncify_data', '__asyncify_state', '__indirect_function_table', 'memory'] and not name.startswith(('dynCall_', 'orig$'))


def asmjs_mangle(name):
  """Mangle a name the way asm.js/JSBackend globals are mangled.

  Prepends '_' and replaces non-alphanumerics with '_'.
  Used by wasm backend for JS library consistency with asm.js.
  """
  # We also use this function to convert the clang-mangled `__main_argc_argv`
  # to simply `main` which is expected by the emscripten JS glue code.
  if name == '__main_argc_argv':
    name = 'main'
  if is_user_export(name):
    return '_' + name
  return name


def do_replace(input_, pattern, replacement):
  if pattern not in input_:
    exit_with_error('expected to find pattern in input JS: %s' % pattern)
  return input_.replace(pattern, replacement)


def get_llvm_target():
  if settings.MEMORY64:
    return 'wasm64-unknown-emscripten'
  else:
    return 'wasm32-unknown-emscripten'


def init():
  utils.set_version_globals()
  setup_temp_dirs()


# ============================================================================
# End declarations.
# ============================================================================

# Everything below this point is top level code that get run when importing this
# file.  TODO(sbc): We should try to reduce that amount we do here and instead
# have consumers explicitly call initialization functions.

CLANG_CC = clang_tool_path('clang')
CLANG_CXX = clang_tool_path('clang++')
CLANG_SCAN_DEPS = llvm_tool_path('clang-scan-deps')
LLVM_AR = llvm_tool_path('llvm-ar')
LLVM_DWP = llvm_tool_path('llvm-dwp')
LLVM_RANLIB = llvm_tool_path('llvm-ranlib')
LLVM_NM = llvm_tool_path('llvm-nm')
LLVM_DWARFDUMP = llvm_tool_path('llvm-dwarfdump')
LLVM_OBJCOPY = llvm_tool_path('llvm-objcopy')
LLVM_STRIP = llvm_tool_path('llvm-strip')
WASM_LD = llvm_tool_path('wasm-ld')
LLVM_PROFDATA = llvm_tool_path('llvm-profdata')
LLVM_COV = llvm_tool_path('llvm-cov')

EMCC = exe_path_from_root('emcc')
EMXX = exe_path_from_root('em++')
EMAR = exe_path_from_root('emar')
EMRANLIB = exe_path_from_root('emranlib')
FILE_PACKAGER = exe_path_from_root('tools/file_packager')
# Windows .dll suffix is not included in this list, since those are never
# linked to directly on the command line.
DYLIB_EXTENSIONS = ['.dylib', '.so']

run_via_emxx = False

init()