#!/usr/bin/python
# encoding: utf-8
#
# vim: syntax=python
# portions © 2008 Václav Šmilauer <eudoxos@arcig.cz>

import os, sys, thread, time, logging, pipes, socket, xmlrpclib, re, shutil

#socket.setdefaulttimeout(10) 

## replaced by scons automatically
prefix,suffix='/usr' if not os.environ.has_key('YADE_PREFIX') else os.environ['YADE_PREFIX'],''
libDir=prefix+'/lib/yade'+suffix # run the batch always in non-debug mode (the spawned processes do honor debuggin flag, however)
sys.path.append(os.path.join(libDir,'py'))
executable=os.path.join(prefix,'bin','yade'+suffix)
## we just need this ...
import yade, yade.utils, yade.config, yade.remote


class JobInfo():
	def __init__(self,num,id,command,hrefCommand,log,nSlots,script,table,lineNo):
		self.started,self.finished,self.duration,self.exitStatus=None,None,None,None
		self.command=command; self.hrefCommand=hrefCommand; self.num=num; self.log=log; self.id=id; self.nSlots=nSlots; self.infoSocket=None
		self.script=script; self.table=table; self.lineNo=lineNo
		self.hasXmlrpc=False
		self.status='PENDING'
		self.threadNum=None
		self.plotsLastUpdate,self.plotsFile=0.,yade.Omega().tmpFilename()+'.'+yade.remote.plotImgFormat
	def saveInfo(self):
		log=file(self.log,'a')
		log.write("""
=================== JOB SUMMARY ================
id      : %s
status  : %d (%s)
duration: %s
command : %s
started : %s
finished: %s
"""%(self.id,self.exitStatus,'OK' if self.exitStatus==0 else 'FAILED',self.duration,self.command,time.asctime(time.localtime(self.started)),time.asctime(time.localtime(self.finished))));
		log.close()
	def ensureXmlrpc(self):
		if not self.hasXmlrpc:
			for l in open(self.log,'r'):
				if not l.startswith('XMLRPC info provider on'): continue
				url=l[:-1].split()[4]
				self.xmlrpcConn=xmlrpclib.ServerProxy(url,allow_none=True)
				self.hasXmlrpc=True
	def getInfoDict(self):
		if self.status!='RUNNING': return None
		self.ensureXmlrpc()
		return self.xmlrpcConn.basicInfo()
	def updatePlots(self):
		global opts
		if self.status!='RUNNING': return
		self.ensureXmlrpc()
		if time.time()-self.plotsLastUpdate<opts.plotTimeout: return
		self.plotsLastUpdate=time.time()
		img=self.xmlrpcConn.plot()
		if not img:
			if os.path.exists(self.plotsFile): os.remove(self.plotsFile)
			return
		f=open(self.plotsFile,'wb')
		f.write(img.data)
		f.close()
		#print yade.remote.plotImgFormat,'(%d bytes) written to %s'%(os.path.getsize(self.plotsFile),self.plotsFile)

	def htmlStats(self):
		ret='<tr>'
		ret+='<td>%s</td>'%self.id
		if self.status=='PENDING': ret+='<td bgcolor="grey">(pending)</td>'
		elif self.status=='RUNNING': ret+='<td bgcolor="yellow">%s</td>'%t2hhmmss(time.time()-self.started)
		elif self.status=='DONE': ret+='<td bgcolor="%s">%s</td>'%('lime' if self.exitStatus==0 else 'red',self.duration)
		info=self.getInfoDict()
		self.updatePlots() # checks for last update time
		if info:
			ret+='<td>'
			if info['stopAtIter']>0:
				ret+='<nobr>%2.2f%% done</nobr><br/><nobr>step %d/%d</nobr>'%(info['iter']*100./info['stopAtIter'],info['iter'],info['stopAtIter'])
			else: ret+='<nobr>step %d</nobr>'%(info['iter'])
			if info['realtime']!=0: ret+='<br/><nobr>avg %g/sec</nobr>'%(info['iter']/info['realtime'])
			ret+='<br/><nobr>%d bodies</nobr><br/><nobr>%d intrs</nobr>'%(info['numBodies'],info['numIntrs'])
			ret+='</td>'
		else:
			ret+='<td> (no info) </td>'
		ret+='<td>%d</td>'%self.nSlots
		# TODO: make clickable so that it can be served full-size
		if os.path.exists(self.plotsFile):
			if 0: pass
				## all this mess to embed SVG; clicking on it does not work, though
				## question posted at http://www.abclinuxu.cz/poradna/linux/show/314041
				## see also http://www.w3schools.com/svg/svg_inhtml.asp and http://dutzend.blogspot.com/2010/04/svg-im-anklickbar-machen.html
				#img='<object data="/jobs/%d/plots" type="%s" width="300px" alt="[plots]"/>'%(self.num,yade.remote.plotImgMimetype)
				#img='<iframe src="/jobs/%d/plots" type="%s" width="300px" alt="[plots]"/>'%(self.num,yade.remote.plotImgMimetype)
				#img='<embed src="/jobs/%d/plots" type="%s" width="300px" alt="[plots]"/>'%(self.num,yade.remote.plotImgMimetype)a
			img='<img src="/jobs/%d/plots" width="300px" alt="[plots]">'%(self.num)
			ret+='<td><a href="/jobs/%d/plots">%s</a></td>'%(self.num,img)
		else: ret+='<td> (no plots) </td>'
		ret+='<td>%s</td>'%self.hrefCommand
		ret+='</tr>'
		return ret
def t2hhmmss(dt): return '%02d:%02d:%02d'%(dt//3600,(dt%3600)//60,(dt%60))

def totalRunningTime():
	tt0,tt1=[j.started for j in jobs if j.started],[j.finished for j in jobs if j.finished]+[time.time()]
	# it is safe to suppose that 
	if len(tt0)==0: return 0 # no job has been started at all
	return max(tt1)-min(tt0)

def globalHtmlStats():
	t0=min([j.started for j in jobs if j.started!=None])
	unfinished=len([j for j in jobs if j.status!='DONE'])
	usedSlots=sum([j.nSlots for j in jobs if j.status=='RUNNING'])
	global maxJobs
	if unfinished:
		ret='<p>Running for %s, since %s.</p>'%(t2hhmmss(totalRunningTime()),time.ctime(t0))
	else:
		failed=len([j for j in jobs if j.exitStatus!=0])
		lastFinished=max([j.finished for j in jobs])
		# FIXME: do not report sum of runnign time of all jobs, only the real timespan
		ret='<p><span style="background-color:%s">Finished</span>, idle for %s, running time %s since %s.</p>'%('red' if failed else 'lime',t2hhmmss(time.time()-lastFinished),t2hhmmss(sum([j.finished-j.started for j in jobs if j.started is not None])),time.ctime(t0))
	ret+='<p>Pid %d</p>'%(os.getpid())
	ret+='<p>%d slots available, %d used, %d free.</p>'%(maxJobs,usedSlots,maxJobs-usedSlots)
	ret+='<h3>Jobs</h3>'
	nFailed=len([j for j in jobs if j.status=='DONE' and j.exitStatus!=0])
	ret+='<p><b>%d</b> total, <b>%d</b> <span style="background-color:yellow">running</span>, <b>%d</b> <span style="background-color:lime">done</span>%s</p>'%(len(jobs),len([j for j in jobs if j.status=='RUNNING']), len([j for j in jobs if j.status=='DONE']),' (<b>%d <span style="background-color:red"><b>failed</b></span>)'%nFailed if nFailed>0 else '')
	return ret

from BaseHTTPServer import BaseHTTPRequestHandler,HTTPServer
import socket,re
class HttpStatsServer(BaseHTTPRequestHandler):
	favicon=None # binary favicon, created when first requested
	def do_GET(self):
		if not self.path or self.path=='/': self.sendGlobal()
		else:
			if self.path=='/favicon.ico':
				if not self.__class__.favicon:
					import base64
					self.__class__.favicon=base64.b64decode(yade.remote.b64favicon)
				self.sendHttp(self.__class__.favicon,contentType='image/vnd.microsoft.icon')
				return
			jobMatch=re.match('/jobs/([0-9]+)/(.*)',self.path)
			if not jobMatch:
				self.send_error(404,self.path); return
			jobId=int(jobMatch.group(1))
			if jobId>=len(jobs):
				self.send_error(404,self.path); return
			job=jobs[jobId]
			rest=jobMatch.group(2)
			if rest=='plots':
				job.updatePlots() # internally checks for last update time
				self.sendFile(job.plotsFile,contentType=yade.remote.plotImgMimetype,refresh=(0 if job.status=='DONE' else 5))
			elif rest=='log':
				if not os.path.exists(job.log):
					self.send_error(404,self.path); return
				## once we authenticate properly, send the whole file
				## self.sendTextFile(jobs[jobId].log,refresh=5)
				## now we have to filter away the cookie
				cookieRemoved=False; data=''
				for l in open(job.log):
					if not cookieRemoved and l.startswith('TCP python prompt on'):
						ii=l.find('auth cookie `'); l=l[:ii+13]+'******'+l[ii+19:]; cookieRemoved=True
					data+=l
				self.sendHttp(data,contentType='text/plain;charset=utf-8;',refresh=(0 if job.status=='DONE' else 5))
			elif rest=='script':
				self.sendPygmentizedFile(job.script,linenostep=5)
			elif rest=='table':
				self.sendPygmentizedFile(job.table,hl_lines=[job.lineNo],linenostep=1)
			else: self.send_error(404,self.path)
		return
	def log_request(self,req): pass
	def sendGlobal(self):
		html='<HTML><TITLE>Yade-batch at %s overview</TITLE><BODY>\n'%(socket.gethostname())
		html+=globalHtmlStats()
		html+='<TABLE border=1><tr><th>id</th><th>status</th><th>info</th><th>slots</th><th>plots</th><th>command</th></tr>\n'
		for j in jobs: html+=j.htmlStats()+'\n'
		html+='</TABLE></BODY></HTML>'
		self.sendHttp(html,contentType='text/html',refresh=5) # refresh sent as header
	def sendTextFile(self,fileName,**headers):
		if not os.path.exists(fileName): self.send_error(404); return
		import codecs
		f=codecs.open(f,encoding='utf-8')
		self.sendHttp(f.read(),contentType='text/plain;charset=utf-8;',**headers)
	def sendFile(self,fileName,contentType,**headers):
		if not os.path.exists(fileName): self.send_error(404); return
		f=open(fileName)
		self.sendHttp(f.read(),contentType=contentType,**headers)
	def sendHttp(self,data,contentType,**headers):
		"Send file over http, using appropriate content-type. Headers are converted to strings. The *refresh* header is handled specially: if the value is 0, it is not sent at all."
		self.send_response(200)
		self.send_header('Content-type',contentType)
		if 'refresh' in headers and headers['refresh']==0: del headers['refresh']
		for h in headers: self.send_header(h,str(headers[h]))
		self.end_headers()
		self.wfile.write(data)
		global httpLastServe
		httpLastServe=time.time()
	def sendPygmentizedFile(self,f,**kw):
		if not os.path.exists(f):
			self.send_error(404); return
		try:
			import codecs
			from pygments import highlight
			from pygments.lexers import PythonLexer
			from pygments.formatters import HtmlFormatter
			data=highlight(codecs.open(f,encoding='utf-8').read(),PythonLexer(),HtmlFormatter(linenos=True,full=True,encoding='utf-8',title=os.path.abspath(f),**kw))
			self.sendHttp(data,contentType='text/html;charset=utf-8;')
		except ImportError:
			self.sendTextFile(f)
def runHttpStatsServer():
	maxPort=11000; port=9080
	while port<maxPort:
		try:
			server=HTTPServer(('',port),HttpStatsServer)
			import thread; thread.start_new_thread(server.serve_forever,())
			print "http://localhost:%d shows batch summary"%port
			break
		except socket.error:
			port+=1
	if port==maxPort:
		print "WARN: No free port in range 9080-11000, not starting HTTP stats server!"


def runJob(job):
	job.status='RUNNING'
	job.started=time.time();
	print '#%d (%s%s) started on %s'%(job.num,job.id,'' if job.nSlots==1 else '/%d'%job.nSlots,time.asctime())
	job.exitStatus=os.system(job.command)
	if job.exitStatus!=0 and len([l for l in open(job.log) if l.startswith('Yade: normal exit.')])>0: job.exitStatus=0
	job.finished=time.time()
	dt=job.finished-job.started;
	job.duration=t2hhmmss(dt)
	strStatus='done   ' if job.exitStatus==0 else 'FAILED '
	job.status='DONE'
	havePlot=False
	if os.path.exists(job.plotsFile):
		f=(job.log[:-3] if job.log.endswith('.log') else job.log+'.')+yade.remote.plotImgFormat
		shutil.copy(job.plotsFile,f)
		job.plotsFile=f
		havePlot=True
	print "#%d (%s%s) %s (exit status %d), duration %s, log %s%s"%(job.num,job.id,'' if job.nSlots==1 else '/%d'%job.nSlots,strStatus,job.exitStatus,job.duration,job.log,(', plot %s'%(job.plotsFile) if havePlot else ''))
	job.saveInfo()
	
def runJobs(jobs,numSlots):
	running,pending=0,len(jobs)
	inf=1000000
	while (running>0) or (pending>0):
		pending,running,done=sum([j.nSlots for j in jobs if j.status=='PENDING']),sum([j.nSlots for j in jobs if j.status=='RUNNING']),sum([j.nSlots for j in jobs if j.status=='DONE'])
		#print [j.status for j in jobs]
		freeSlots=numSlots-running
		minRequire=min([inf]+[j.nSlots for j in jobs if j.status=='PENDING'])
		if minRequire==inf: minRequire=0
		#print pending,'pending;',running,'running;',done,'done;',freeSlots,'free;',minRequire,'min'
		if minRequire>freeSlots and running==0:
			freeSlots=minRequire
		for j in [j for j in jobs if j.status=='PENDING']:
			if j.nSlots<=freeSlots:
				thread.start_new_thread(runJob,(j,))
				break
		time.sleep(.5)
		sys.stdout.flush()


import sys,re,optparse,os
def getNumCores():
	nCpu=len([l for l in open('/proc/cpuinfo','r') if l.find('processor')==0])
	if os.environ.has_key("OMP_NUM_THREADS"): return min(int(os.environ['OMP_NUM_THREADS']),nCpu)
	return nCpu
numCores=getNumCores()

parser=optparse.OptionParser(usage='%prog [options] TABLE SIMULATION.py',description='%prog runs yade simulation multiple times with different parameters.\n\nSee https://yade-dem.org/sphinx/user.html#batch-queuing-and-execution-yade-batch for details.')
parser.add_option('-j','--jobs',dest='maxJobs',type='int',help="Maximum number of simultaneous threads to run (default: number of cores, further limited by OMP_NUM_THREADS if set by the environment: %d)"%numCores,metavar='NUM',default=numCores)
parser.add_option('--job-threads',dest='defaultThreads',type='int',help="Default number of threads for one job; can be overridden by per-job OMP_NUM_THREADS. Defaults to allocate all available cores (%d) for each job."%numCores,metavar='NUM',default=numCores)
parser.add_option('--force-threads',action='store_true',dest='forceThreads')
parser.add_option('--log',dest='logFormat',help='Format of job log files -- must contain a % or @, which will be replaced by line number or by description column respectively (default: SIMULATION.@.log)',metavar='FORMAT')
parser.add_option('--global-log',dest='globalLog',help='Filename where to redirect output of yade-batch itself (as opposed to \-\-log); if not specified (default), stdout/stderr are used',metavar='FILE')
parser.add_option('-l','--lines',dest='lineList',help='Lines of TABLE to use, in the format 2,3-5,8,11-13 (default: all available lines in TABLE)',metavar='LIST')
parser.add_option('--nice',dest='nice',type='int',help='Nice value of spawned jobs (default: 10)',default=10)
parser.add_option('--executable',dest='executable',help='Name of the program to run (default: %s)'%executable,default=executable,metavar='FILE')
parser.add_option('--rebuild',dest='rebuild',help='Run executable(s) with \-\-rebuild prior to running any jobs.',default=False,action='store_true')
parser.add_option('--debug',dest='debug',action='store_true',help='Run the executable with \-\-debug.',default=False)
parser.add_option('--gnuplot',dest='gnuplotOut',help='Gnuplot file where gnuplot from all jobs should be put together',default=None,metavar='FILE')
parser.add_option('--dry-run',action='store_true',dest='dryRun',help='Do not actually run (useful for getting gnuplot only, for instance)',default=False)
parser.add_option('--http-wait',action='store_true',dest='httpWait',help='Do not quit if still serving overview over http repeatedly',default=False)
parser.add_option('--generate-manpage',help='Generate man page documenting this program and exit',dest='manpage',metavar='FILE')
parser.add_option('--plot-update',type='int',dest='plotAlwaysUpdateTime',help='Interval (in seconds) at which job plots will be updated even if not requested via HTTP. Non-positive values will make the plots not being updated and saved unless requested via HTTP (see \-\-plot-timeout for controlling maximum age of those).  Plots are saved at exit under the same name as the log file, with the .log extension removed. (default: 60 seconds)',metavar='TIME',default=60)
parser.add_option('--plot-timeout',type='int',dest='plotTimeout',help='Maximum age (in seconds) of plots served over HTTP; they will be updated if they are older. (default: 10 seconds)',metavar='TIME',default=10)
opts,args=parser.parse_args()
logFormat,lineList,maxJobs,nice,executable,gnuplotOut,dryRun,httpWait,globalLog=opts.logFormat,opts.lineList,opts.maxJobs,opts.nice,opts.executable,opts.gnuplotOut,opts.dryRun,opts.httpWait,opts.globalLog

if opts.manpage:
	import yade.manpage
	yade.config.metadata['short_desc']='batch system for computational platform Yade'
	yade.config.metadata['long_desc']='Manage batches of computation jobs for the Yade platform; batches are described using text-file tables with parameters which are passed to individual runs of yade. Jobs are being run with pre-defined number of computational cores as soon as the required number of cores is available. Logs of all computations are stored in files and the batch progress can be watched online at (usually) http://localhost:9080. Unless overridden, the executable yade%s is used to run jobs.'%(suffix)
	yade.manpage.generate_manpage(parser,yade.config.metadata,opts.manpage,section=1,seealso='yade%s (1)\n.br\nhttps://yade-dem.org/sphinx/user.html#batch-queuing-and-execution-yade-batch'%suffix)
	print 'Manual page %s generated.'%opts.manpage
	sys.exit(0)

if globalLog:
	sys.stderr=open(globalLog,"w")
	sys.stdout=sys.stderr

if len(args)!=2:
	#print "Exactly two non-option arguments must be specified -- parameter table and script to be run.\n"
	parser.print_help()
	sys.exit(1)
table,simul=args[0:2]
if not logFormat: logFormat=(simul[:-3] if simul[-3:]=='.py' else simul)+".@.log"
if (not '%' in logFormat) and ('@' not in logFormat): raise StandardError("Log string must contain at least one of `%', `@'")

print "Will run `%s' on `%s' with nice value %d, output redirected to `%s', %d jobs at a time."%(executable,simul,nice,logFormat,maxJobs)

reader=yade.utils.TableParamReader(table)
params=reader.paramDict()
availableLines=params.keys()

print "Will use table `%s', with available lines"%(table),', '.join([str(i) for i in availableLines])+'.'

if lineList:
	useLines=[]
	def numRange2List(s):
		ret=[]
		for l in s.split(','):
			if "-" in l: ret+=range(*[int(s) for s in l.split('-')]); ret+=[ret[-1]+1]
			else: ret+=[int(l)]
		return ret
	useLines0=numRange2List(lineList)
	for l in useLines0:
		if l not in availableLines: logging.warn('Skipping unavailable line %d that was requested from the command line.'%l)
		else: useLines+=[l]
else: useLines=availableLines
print "Will use lines ",', '.join([str(i)+' (%s)'%params[i]['description'] for i in useLines])+'.'

jobs=[]
executables=set()
for i,l in enumerate(useLines):
	logFile=logFormat.replace('%',str(l))
	logFile=logFile.replace('@',params[l]['description'])
	envVars=[]
	nSlots=opts.defaultThreads
	for col in params[l].keys():
		if col[0]!='!': continue
		if col=='!OMP_NUM_THREADS':
			nSlots=int(params[l][col]); maxCpu=getNumCores()
		elif col=='!EXEC': executable=params[l][col]
		else: envVars+=['%s=%s'%(head[1:],values[l][col])]
	if nSlots>maxJobs:
		if opts.forceThreads:
			logging.info('Forcing job #%d to use only %d slots (max available) instead of %d requested'%(i,maxJobs,nSlots))
			nSlots=maxJobs
		else:
			logging.warning('WARNING: job #%d will use %d slots but only %d are available'%(i,nSlots,maxJobs))
	executables.add(executable)
	# compose command-line: build the hyper-linked variant, then strip HTML tags (ugly, but ensures consistency)
	env='PARAM_TABLE=<a href="jobs/%d/table">%s:%d</a> DISPLAY= %s '%(i,table,l,' '.join(envVars))
	cmd='%s%s --threads=%d %s -x <a href="jobs/%d/script">%s</a>'%(executable,' --debug' if opts.debug else '',int(nSlots),'--nice=%s'%nice if nice!=None else '',i,simul)
	log='> <a href="jobs/%d/log">%s</a> 2>&1'%(i,pipes.quote(logFile))
	hrefCmd=env+cmd+log
	fullCmd=re.sub('(<a href="[^">]+">|</a>)','',hrefCmd)
	jobs.append(JobInfo(i,params[l]['description'],fullCmd,hrefCmd,logFile,nSlots,script=simul,table=table,lineNo=l))

print "Master process pid",os.getpid()

if opts.rebuild:
	print "Rebuilding all active executables, since --rebuild was specified"
	for e in executables:
		import subprocess
		if subprocess.call([e,'--rebuild','-x']+(['--debug'] if opts.debug else [])):
			 raise RuntimeError('Error rebuilding %s (--rebuild).'%e)
	print "Rebuilding done."
		

print "Job summary:"
for job in jobs:
	print '   #%d (%s%s):'%(job.num,job.id,'' if job.nSlots==1 else '/%d'%job.nSlots),job.command
sys.stdout.flush()


httpLastServe=0
runHttpStatsServer()
if opts.plotAlwaysUpdateTime>0:
	# update plots periodically regardless of whether they are requested via HTTP
	def updateAllPlots():
		time.sleep(opts.plotAlwaysUpdateTime)
		for job in jobs: job.updatePlots()
	thread.start_new_thread(updateAllPlots,())

# OK, go now
if not dryRun: runJobs(jobs,maxJobs)

print 'All jobs finished, total time ',t2hhmmss(totalRunningTime())

plots=[]
for j in jobs:
	if not os.path.exists(j.plotsFile): continue
	plots.append(j.plotsFile)
if plots: print 'Plot files:',' '.join(plots)

# for easy grepping in logfiles:
print 'Log files:',' '.join([j.log for j in jobs])

if not gnuplotOut:
	print 'Bye.'
else:
	print 'Assembling gnuplot files…'
	for job in jobs:
		for l in file(job.log):
			if l.startswith('gnuplot '):
				job.plot=l.split()[1]
				break
	preamble,plots='',[]
	for job in jobs:
		if not 'plot' in job.__dict__:
			print "WARN: No plot found for job "+job.id
			continue
		for l in file(job.plot):
			if l.startswith('plot'):
				# attempt to parse the plot line
				ll=l.split(' ',1)[1][:-1] # rest of the line, without newline
				# replace title 'something' with title 'description: something'
				ll,nn=re.subn(r'title\s+[\'"]([^\'"]*)[\'"]',r'title "'+job.id+r': \1"',ll)
				if nn==0:
					logging.error("Plot line in "+job.plot+" not parsed (skipping): "+ll)
				plots.append(ll)
				break
			if not plots: # first plot, copy all preceding lines
				preamble+=l
	gp=file(gnuplotOut,'w')
	gp.write(preamble)
	gp.write('plot '+','.join(plots))
	print "gnuplot",gnuplotOut
	print "Plot written, bye."
if httpWait and time.time()-httpLastServe<30:
	print "(continue serving http until no longer requested  as per --http-wait)"
	while time.time()-httpLastServe<30:
		time.sleep(1)

yade.Omega().exitNoBacktrace()
