#!/usr/bin/python3
##**************************************************************
##
## Copyright (C) 1990-2007, Condor Team, Computer Sciences Department,
## University of Wisconsin-Madison, WI.
##
## Licensed under the Apache License, Version 2.0 (the "License"); you
## may not use this file except in compliance with the License.  You may
## obtain a copy of the License at
##
##    http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
##**************************************************************

import argparse
import getpass
import os
import shutil
import signal
import subprocess
import sys
import time
from datetime import datetime, timezone
from pathlib import Path


# Global state
args = {}
original_argv = []
script_path = Path(__file__).resolve()


def usage():
	print(f"""Usage: {sys.argv[0]} --jobid <CLUSTER.PROC> [ OPTIONS ]
--jobid      The jobid
--scratch    Scratch directory used by the program, defaults to /tmp/cbd-<PID>

*** The options below are not fully implemented. ***
--sdate      A Unix date corresponding to the start of the search time
--edate      A Unix date corresponding to the end of the search time
--pool       The pool to query (local pool if not defined)
--schedd     The schedd to query (local schedd if not defined)""")
	sys.exit(1)


def run_cmd(cmd):
	"""Run a shell command and return its output, stripping trailing newline."""
	result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
	return result.stdout.rstrip('\n')


def run_cmd_to_file(cmd, output_file):
	"""Run a shell command, redirecting stdout and stderr to output_file."""
	with open(output_file, 'w') as f:
		subprocess.run(cmd, shell=True, stdout=f, stderr=subprocess.STDOUT)


def parse_command_line():
	global args, original_argv

	original_argv = sys.argv[1:]

	parser = argparse.ArgumentParser(add_help=False)
	parser.add_argument('--help', action='store_true', default=False, dest='show_help')
	parser.add_argument('--jobid', type=str, default=None)
	parser.add_argument('--sdate', type=str, default=None)
	parser.add_argument('--edate', type=str, default=None)
	parser.add_argument('--pool', type=str, default=None)
	parser.add_argument('--schedd', type=str, default=None)
	parser.add_argument('--scratch', type=str, default=None)

	parsed = parser.parse_args()

	if parsed.show_help:
		usage()

	if parsed.jobid is None:
		output_file = "condor-profile.txt"
		acquire_identity(output_file)
		print(f"Information is in {output_file}")
		sys.exit(0)

	args['jobid'] = parsed.jobid
	args['pool'] = parsed.pool
	args['schedd'] = parsed.schedd

	# Parse start date
	if parsed.sdate is not None:
		if parsed.sdate.isdigit():
			args['sdate'] = datetime.fromtimestamp(int(parsed.sdate), tz=timezone.utc)
		else:
			args['sdate'] = datetime.fromisoformat(parsed.sdate)
	else:
		args['sdate'] = datetime.fromtimestamp(0, tz=timezone.utc)

	# Parse end date
	if parsed.edate is not None:
		if parsed.edate.isdigit():
			args['edate'] = datetime.fromtimestamp(int(parsed.edate), tz=timezone.utc)
		else:
			args['edate'] = datetime.fromisoformat(parsed.edate)
	else:
		args['edate'] = datetime(2038, 1, 19, tzinfo=timezone.utc)

	# Set up scratch space
	# Date format avoids colons (tar doesn't handle them well in archive names)
	date_str = time.strftime("%F-%I_%M_%S%p-%Z")
	username = getpass.getuser()
	tmp = f"{username}-jid{args['jobid']}-{date_str}"

	pid = os.getpid()
	if parsed.scratch is None:
		args['scratch'] = f"/tmp/cgi-{pid}/cgi-{tmp}"
		args['_scratch'] = f"/tmp/cgi-{pid}"
	else:
		args['scratch'] = f"{parsed.scratch}/cgi-{pid}/cgi-{tmp}"
		args['_scratch'] = f"{parsed.scratch}/cgi-{pid}"

	if os.path.isdir(args['_scratch']):
		# Safety check: don't remove /
		if args['_scratch'].strip('/').strip().replace('/', '').isdigit():
			sys.exit("I'm not going to remove /. Sorry.")
		# Remove contents but keep root
		for item in Path(args['_scratch']).iterdir():
			if item.is_dir():
				shutil.rmtree(item)
			else:
				item.unlink()

	# Make the scratch space
	os.makedirs(args['scratch'], exist_ok=True)

	# Set up signal handler for INT
	signal.signal(signal.SIGINT, sigint_handler)


def cleanup():
	"""Remove the scratch directory."""
	if '_scratch' in args and os.path.isdir(args['_scratch']):
		# Safety check: don't remove /
		if args['_scratch'].strip('/').strip().replace('/', '').isdigit():
			sys.exit("I'm not going to remove /. Sorry.")
		shutil.rmtree(args['_scratch'])


def sigint_handler(signum, frame):
	print("Got INT. Quitting.")
	cleanup()
	sys.exit(1)


def condor_history(jobid, output_file, *extra_args):
	cmd = "condor_history " + " ".join(extra_args) + f" {jobid}"
	run_cmd_to_file(cmd, output_file)


def condor_q(jobid, output_file, *extra_args):
	cmd = "condor_q " + " ".join(extra_args) + f" {jobid}"
	run_cmd_to_file(cmd, output_file)


def acquire_identity(output_file):
	print("Gathering Condor and machine information...")

	username = getpass.getuser()
	hostname = run_cmd("/bin/hostname 2>&1")
	date_str = run_cmd("/bin/date 2>&1")
	uname = run_cmd("/bin/uname -a 2>&1")
	os_release = run_cmd("/bin/cat /etc/os-release 2>&1")
	etc_issue = run_cmd("/bin/cat /etc/issue 2>&1")
	redhat_release = run_cmd("/bin/cat /etc/redhat-release 2>&1")
	debian_version = run_cmd("/bin/cat /etc/debian_version 2>&1")
	etc_hosts = run_cmd("/bin/cat /etc/hosts 2>&1")
	nsswitch = run_cmd("/bin/cat /etc/nsswitch.conf 2>&1")
	ulimit = run_cmd('/bin/sh -c "ulimit -a 2>&1"')
	ifconfig = run_cmd("/sbin/ifconfig 2>&1")
	ps_output = run_cmd("ps auxww --forest 2>&1")
	df_output = run_cmd("df -h 2>&1")
	iptables = run_cmd("/sbin/iptables -L 2>&1")
	log_dir_contents = run_cmd("ls `condor_config_val LOG`")
	condor_ver = run_cmd("condor_version 2>&1")
	condor_config = run_cmd("condor_config_val -config 2>&1")
	condor_dump = run_cmd("condor_config_val -v -dump 2>&1")
	ldd_schedd = run_cmd("ldd `condor_config_val SBIN`/condor_schedd")
	uptime_str = run_cmd("uptime")
	free_str = run_cmd("free")

	argv_str = " ".join(original_argv)

	with open(output_file, 'w') as f:
		f.write(f"""################################
# User who created this report #
################################
 {username}
####################################################
# Script invocation and on what machine was it run #
####################################################
 Script: {script_path} {argv_str}
 Machine: {hostname}
###########################
# Date of Report Creation #
###########################
 {date_str}
############
# uname -a #
############
 {uname}
##############
# /etc/os-release #
##############
 {os_release}
##############
# /etc/issue #
##############
 {etc_issue}
#######################
# /etc/redhat-release #
#######################
 {redhat_release}
#######################
# /etc/debian_version #
#######################
 {debian_version}
##############
# /etc/hosts #
##############
 {etc_hosts}
######################
# /etc/nsswitch.conf #
######################
 {nsswitch}
#############
# ulimit -a #
#############
 {ulimit}
###########################
# Interface configuration #
###########################
 {ifconfig}
#####################
# ps auxww --forest #
#####################
{ps_output}
#########
# df -h #
#########
{df_output}
############
# iptables #
############
{iptables}
###########################
# Condor log dir contents #
###########################
{log_dir_contents}
##################
# Condor Version #
##################
 {condor_ver}
###################################
# Location of Condor Config Files #
###################################
 {condor_config}
###########################
# Condor Config Variables #
###########################
 {condor_dump}
#####################
# ldd condor_schedd #
#####################
 {ldd_schedd}
#####################
# uptime #
#####################
 {uptime_str}
#####################
# free #
#####################
 {free_str}
""")


def acquire_job_q(jobid, output_file, *extra_args):
	print(f"acquire_job_q: Getting job q for job {jobid}")
	condor_q(jobid, output_file, *extra_args)


def acquire_job_ad(jobid, output_file, *extra_args):
	print(f"acquire_job_ad: Getting job ad for job {jobid}")
	condor_q(jobid, output_file, *extra_args)


def acquire_job_analysis(jobid, output_file, *extra_args):
	print(f"acquire_job_analysis: Getting job analysis for job {jobid}")
	condor_q(jobid, output_file, *extra_args)


def acquire_job_userlog_lines(jobid, output_file):
	print(f"acquire_job_userlog_lines: Getting job log entries for job {jobid}")

	tmpfile = f"{output_file}.query"

	# Get the path of the user log file
	condor_q(jobid, tmpfile, '-format "logfile: %s\\n"', 'UserLog')

	# Bail if no logfile or otherwise empty
	if not os.path.exists(tmpfile) or os.path.getsize(tmpfile) == 0:
		with open(output_file, 'w') as f:
			f.write("No log file specified in job ad.\n")
		print("Trying condor_history instead")
		condor_history(jobid, tmpfile, '-format "logfile: %s\\n"', 'UserLog')
		if not os.path.exists(tmpfile) or os.path.getsize(tmpfile) == 0:
			print("Not in condor_history either, giving up on looking for logfile in all the wrong places")
			if os.path.exists(tmpfile):
				os.remove(tmpfile)
			return

	# See if the log file is well formed
	with open(tmpfile) as f:
		lines = [line for line in f if "logfile:" in line]

	if len(lines) > 1:
		with open(output_file, 'w') as f:
			f.write("Unexpected condor_q problem. See query file.\n")
		return

	if not lines:
		with open(output_file, 'w') as f:
			f.write("No logfile found in query output.\n")
		return

	# Parse out the logfile path
	match = lines[0].strip()
	if not match.startswith("logfile: "):
		with open(output_file, 'w') as f:
			f.write("Unexpected userlog path problem. See query file.\n")
		return

	ulogfile = match[len("logfile: "):]
	print(f"ulog file is {ulogfile}")

	if not ulogfile:
		with open(output_file, 'w') as f:
			f.write("Unexpected userlog path problem. See query file.\n")
		return

	if not os.path.isfile(ulogfile):
		with open(output_file, 'w') as f:
			f.write("Userlog file does not exist. See query file.\n")
		return

	# Copy the whole job user log
	shutil.copy2(ulogfile, output_file)

	# Clean up temp file
	if os.path.exists(tmpfile):
		os.remove(tmpfile)


def acquire_all_info_for_jobid(jobid):
	scratch = args['scratch']

	# Directory for this job
	jdir = f"{scratch}/{jobid}"
	os.makedirs(jdir, exist_ok=True)

	# Get the job q
	acquire_job_q(jobid, f"{jdir}/job_q")

	# Get the job ad
	acquire_job_ad(jobid, f"{jdir}/job_ad", '-l')

	# If condor_q -l output is too small, check history
	job_ad_path = Path(f"{jdir}/job_ad")
	with open(job_ad_path) as f:
		lines = f.readlines()

	if len(lines) < 6:
		print(f"Job {jobid} does not appear in condor_q output, looking in condor_history.")
		condor_history(jobid, f"{jdir}/job_ad", '-l')
		if os.path.getsize(f"{jdir}/job_ad") == 0:
			print(f"Job {jobid} does not appear in condor_history either.")
		else:
			print(f"Job {jobid} is in condor_history output, using it.")

	# Do the analysis for the job
	acquire_job_analysis(jobid, f"{jdir}/job_ad_analysis", '-better-analyze')

	# Grab log entries for the jobid from the job log file
	acquire_job_userlog_lines(jobid, f"{jdir}/job_userlog_lines")

	# Copy system log files (assuming submit side)
	log_dir = run_cmd("condor_config_val LOG")
	for log_name in ["MasterLog", "MasterLog.old", "ShadowLog", "ShadowLog.old", "SchedLog.old"]:
		src = f"{log_dir}/{log_name}"
		if os.path.exists(src):
			shutil.copy2(src, f"{scratch}/{log_name}")

	# Look for RemoteHost / LastRemoteHost in the job ad
	remote_host = None
	with open(f"{jdir}/job_ad") as f:
		for line in f:
			if line.startswith("RemoteHost "):
				remote_host = line.strip()
			if line.startswith("LastRemoteHost "):
				remote_host = line.strip()

	if remote_host and len(remote_host) > 0:
		# Parse: "RemoteHost = \"slot1@machine\""
		parts = remote_host.split()
		if len(parts) >= 3:
			remote_host = parts[2].replace('"', '')
		print(f"The execute machine for this job is {remote_host}")
		slot_parts = remote_host.split('@')
		slot = slot_parts[0]
		if len(slot) > 2:
			machine = slot_parts[1] if len(slot_parts) > 1 else remote_host
		else:
			machine = remote_host

		run_cmd_to_file(f"condor_fetchlog {machine} MASTER", f"{scratch}/MasterLog.{machine}")
		run_cmd_to_file(f"condor_fetchlog {machine} STARTD", f"{scratch}/StartLog")
		run_cmd_to_file(f"condor_fetchlog {machine} STARTER.{slot}", f"{scratch}/StarterLog.{slot}")
	else:
		print("No Remote host found in job ad -- perhaps job this never ran?")


def acquire_all_info(jobid):
	# Identify myself
	acquire_identity(f"{args['scratch']}/condor-profile.txt")
	# Datamine extensive information about the jobid
	acquire_all_info_for_jobid(jobid)


def package_payload():
	saved_dir = os.getcwd()
	parent = str(Path(args['scratch']).parent)
	os.chdir(parent)
	# Grab the only directory there
	dir_name = os.listdir('.')[0]
	print(f"\nCreating output file with all results in {dir_name}.tar.gz")
	subprocess.run(f"tar czf {dir_name}.tar.gz {dir_name}", shell=True,
	               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
	os.chdir(saved_dir)
	# Move tarball to current directory
	for gz in Path(parent).glob("*.tar.gz"):
		shutil.move(str(gz), ".")


def main():
	parse_command_line()

	# Get all info for the specified jobid
	acquire_all_info(args['jobid'])

	# Package up results and move to cwd
	package_payload()

	# Clean up scratch space
	cleanup()
	return 0


if __name__ == '__main__':
	sys.exit(main())
