/*
 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *      Vadim Girlin
 */

#ifndef SB_BC_H_
#define SB_BC_H_

#include <stdint.h>
#include "r600_isa.h"

#include <cstdio>
#include <string>
#include <vector>
#include <stack>

struct r600_bytecode;
struct r600_shader;

namespace r600_sb {

class hw_encoding_format;
class node;
class alu_node;
class cf_node;
class fetch_node;
class alu_group_node;
class region_node;
class shader;
class value;

class sb_ostream {
public:
	sb_ostream() {}

	virtual void write(const char *s) = 0;

	sb_ostream& operator <<(const char *s) {
		write(s);
		return *this;
	}

	sb_ostream& operator <<(const std::string& s) {
		return *this << s.c_str();
	}

	sb_ostream& operator <<(void *p) {
		char b[32];
		sprintf(b, "%p", p);
		return *this << b;
	}

	sb_ostream& operator <<(char c) {
		char b[2];
		sprintf(b, "%c", c);
		return *this << b;
	}

	sb_ostream& operator <<(int n) {
		char b[32];
		sprintf(b, "%d", n);
		return *this << b;
	}

	sb_ostream& operator <<(unsigned n) {
		char b[32];
		sprintf(b, "%u", n);
		return *this << b;
	}

	sb_ostream& operator <<(double d) {
		char b[32];
		snprintf(b, 32, "%g", d);
		return *this << b;
	}

	// print as field of specified width, right aligned
	void print_w(int n, int width) {
		char b[256],f[8];
		sprintf(f, "%%%dd", width);
		snprintf(b, 256, f, n);
		write(b);
	}

	// print as field of specified width, left aligned
	void print_wl(int n, int width) {
		char b[256],f[8];
		sprintf(f, "%%-%dd", width);
		snprintf(b, 256, f, n);
		write(b);
	}

	// print as field of specified width, left aligned
	void print_wl(const std::string &s, int width) {
		write(s.c_str());
		int l = s.length();
		while (l++ < width) {
			write(" ");
		}
	}

	// print int as field of specified width, right aligned, zero-padded
	void print_zw(int n, int width) {
		char b[256],f[8];
		sprintf(f, "%%0%dd", width);
		snprintf(b, 256, f, n);
		write(b);
	}

	// print int as field of specified width, right aligned, zero-padded, hex
	void print_zw_hex(int n, int width) {
		char b[256],f[8];
		sprintf(f, "%%0%dx", width);
		snprintf(b, 256, f, n);
		write(b);
	}
};

class sb_ostringstream : public sb_ostream {
	std::string data;
public:
	sb_ostringstream() : data() {}

	virtual void write(const char *s) {
		data += s;
	}

	void clear() { data.clear(); }

	const char* c_str() { return data.c_str(); }
	std::string& str() { return data; }
};

class sb_log : public sb_ostream {
	FILE *o;
public:
	sb_log() : o(stderr) {}

	virtual void write(const char *s) {
		fputs(s, o);
	}
};

extern sb_log sblog;

enum shader_target
{
	TARGET_UNKNOWN,
	TARGET_VS,
	TARGET_ES,
	TARGET_PS,
	TARGET_GS,
	TARGET_GS_COPY,
	TARGET_COMPUTE,
	TARGET_FETCH,
	TARGET_HS,
	TARGET_LS,

	TARGET_NUM
};

enum sb_hw_class_bits
{
	HB_R6	= (1<<0),
	HB_R7	= (1<<1),
	HB_EG	= (1<<2),
	HB_CM	= (1<<3),

	HB_R6R7 = (HB_R6 | HB_R7),
	HB_EGCM = (HB_EG | HB_CM),
	HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
	HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),

	HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
};

enum sb_hw_chip
{
	HW_CHIP_UNKNOWN,
	HW_CHIP_R600,
	HW_CHIP_RV610,
	HW_CHIP_RV630,
	HW_CHIP_RV670,
	HW_CHIP_RV620,
	HW_CHIP_RV635,
	HW_CHIP_RS780,
	HW_CHIP_RS880,
	HW_CHIP_RV770,
	HW_CHIP_RV730,
	HW_CHIP_RV710,
	HW_CHIP_RV740,
	HW_CHIP_CEDAR,
	HW_CHIP_REDWOOD,
	HW_CHIP_JUNIPER,
	HW_CHIP_CYPRESS,
	HW_CHIP_HEMLOCK,
	HW_CHIP_PALM,
	HW_CHIP_SUMO,
	HW_CHIP_SUMO2,
	HW_CHIP_BARTS,
	HW_CHIP_TURKS,
	HW_CHIP_CAICOS,
	HW_CHIP_CAYMAN,
	HW_CHIP_ARUBA
};

enum sb_hw_class
{
	HW_CLASS_UNKNOWN,
	HW_CLASS_R600,
	HW_CLASS_R700,
	HW_CLASS_EVERGREEN,
	HW_CLASS_CAYMAN
};

enum alu_slots {
	SLOT_X = 0,
	SLOT_Y = 1,
	SLOT_Z = 2,
	SLOT_W = 3,
	SLOT_TRANS = 4
};

enum misc_consts {
	MAX_ALU_LITERALS = 4,
	MAX_ALU_SLOTS = 128,
	MAX_GPR = 128,
	MAX_CHAN = 4

};

enum alu_src_sel {

	ALU_SRC_LDS_OQ_A = 219,
	ALU_SRC_LDS_OQ_B = 220,
	ALU_SRC_LDS_OQ_A_POP = 221,
	ALU_SRC_LDS_OQ_B_POP = 222,
	ALU_SRC_LDS_DIRECT_A = 223,
	ALU_SRC_LDS_DIRECT_B = 224,
	ALU_SRC_TIME_HI = 227,
	ALU_SRC_TIME_LO = 228,
	ALU_SRC_MASK_HI = 229,
	ALU_SRC_MASK_LO = 230,
	ALU_SRC_HW_WAVE_ID = 231,
	ALU_SRC_SIMD_ID = 232,
	ALU_SRC_SE_ID = 233,
	ALU_SRC_HW_THREADGRP_ID = 234,
	ALU_SRC_WAVE_ID_IN_GRP = 235,
	ALU_SRC_NUM_THREADGRP_WAVES = 236,
	ALU_SRC_HW_ALU_ODD = 237,
	ALU_SRC_LOOP_IDX = 238,
	ALU_SRC_PARAM_BASE_ADDR = 240,
	ALU_SRC_NEW_PRIM_MASK = 241,
	ALU_SRC_PRIM_MASK_HI = 242,
	ALU_SRC_PRIM_MASK_LO = 243,
	ALU_SRC_1_DBL_L = 244,
	ALU_SRC_1_DBL_M = 245,
	ALU_SRC_0_5_DBL_L = 246,
	ALU_SRC_0_5_DBL_M = 247,
	ALU_SRC_0 = 248,
	ALU_SRC_1 = 249,
	ALU_SRC_1_INT = 250,
	ALU_SRC_M_1_INT = 251,
	ALU_SRC_0_5 = 252,
	ALU_SRC_LITERAL = 253,
	ALU_SRC_PV = 254,
	ALU_SRC_PS = 255,

	ALU_SRC_PARAM_OFFSET = 448
};

enum alu_predicate_select
{
	PRED_SEL_OFF	= 0,
//	RESERVED		= 1,
	PRED_SEL_0		= 2,
	PRED_SEL_1		= 3
};


enum alu_omod {
	OMOD_OFF  = 0,
	OMOD_M2   = 1,
	OMOD_M4   = 2,
	OMOD_D2   = 3
};

enum alu_index_mode {
	INDEX_AR_X        = 0,
	INDEX_AR_Y_R600   = 1,
	INDEX_AR_Z_R600   = 2,
	INDEX_AR_W_R600   = 3,

	INDEX_LOOP        = 4,
	INDEX_GLOBAL      = 5,
	INDEX_GLOBAL_AR_X = 6
};

enum alu_cayman_mova_dst {
	CM_MOVADST_AR_X,
	CM_MOVADST_PC,
	CM_MOVADST_IDX0,
	CM_MOVADST_IDX1,
	CM_MOVADST_CG0,		// clause-global byte 0
	CM_MOVADST_CG1,
	CM_MOVADST_CG2,
	CM_MOVADST_CG3
};

enum alu_cayman_exec_mask_op {
	CM_EMO_DEACTIVATE,
	CM_EMO_BREAK,
	CM_EMO_CONTINUE,
	CM_EMO_KILL
};


enum cf_exp_type {
	EXP_PIXEL,
	EXP_POS,
	EXP_PARAM,

	EXP_TYPE_COUNT
};

enum cf_mem_type {
	MEM_WRITE,
	MEM_WRITE_IND,
	MEM_WRITE_ACK,
	MEM_WRITE_IND_ACK
};


enum alu_kcache_mode {
	KC_LOCK_NONE,
	KC_LOCK_1,
	KC_LOCK_2,
	KC_LOCK_LOOP
};

enum alu_kcache_index_mode {
	KC_INDEX_NONE,
	KC_INDEX_0,
	KC_INDEX_1,
	KC_INDEX_INVALID
};

enum chan_select {
	SEL_X	= 0,
	SEL_Y	= 1,
	SEL_Z	= 2,
	SEL_W	= 3,
	SEL_0	= 4,
	SEL_1	= 5,
//	RESERVED = 6,
	SEL_MASK = 7
};

enum bank_swizzle {
	VEC_012 = 0,
	VEC_021 = 1,
	VEC_120 = 2,
	VEC_102 = 3,
	VEC_201 = 4,
	VEC_210 = 5,

	VEC_NUM = 6,

	SCL_210 = 0,
	SCL_122 = 1,
	SCL_212 = 2,
	SCL_221 = 3,

	SCL_NUM = 4

};

enum sched_queue_id {
	SQ_CF,
	SQ_ALU,
	SQ_TEX,
	SQ_VTX,
	SQ_GDS,

	SQ_NUM
};

struct literal {
	union {
		int32_t i;
		uint32_t u;
		float f;
	};

	literal(int32_t i = 0) : i(i) {}
	literal(uint32_t u) : u(u) {}
	literal(float f) : f(f) {}
	literal(double f) : f(f) {}
	operator uint32_t() const { return u; }
	bool operator ==(literal l) { return u == l.u; }
	bool operator ==(int v_int) { return i == v_int; }
	bool operator ==(unsigned v_uns) { return u == v_uns; }
};

struct bc_kcache {
	unsigned mode;
	unsigned bank;
	unsigned addr;
	unsigned index_mode;
} ;

// TODO optimize bc structures

struct bc_cf {

	bc_kcache kc[4];

	unsigned id;


	const cf_op_info * op_ptr;
	unsigned op;

	unsigned addr:32;

	unsigned alt_const:1;
	unsigned uses_waterfall:1;

	unsigned barrier:1;
	unsigned count:7;
	unsigned pop_count:3;
	unsigned call_count:6;
	unsigned whole_quad_mode:1;
	unsigned valid_pixel_mode:1;

	unsigned jumptable_sel:3;
	unsigned cf_const:5;
	unsigned cond:2;
	unsigned end_of_program:1;

	unsigned array_base:13;
	unsigned elem_size:2;
	unsigned index_gpr:7;
	unsigned rw_gpr:7;
	unsigned rw_rel:1;
	unsigned type:2;

	unsigned burst_count:4;
	unsigned mark:1;
	unsigned sel[4];

	unsigned array_size:12;
	unsigned comp_mask:4;

	unsigned rat_id:4;
	unsigned rat_inst:6;
	unsigned rat_index_mode:2;

	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }

	bool is_alu_extended() {
		assert(op_ptr->flags & CF_ALU);
		return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
			kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
			kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
	}

};

struct bc_alu_src {
	unsigned sel:9;
	unsigned chan:2;
	unsigned neg:1;
	unsigned abs:1;
	unsigned rel:1;
	literal value;

	void clear() {
		sel = 0;
		chan = 0;
		neg = 0;
		abs = 0;
		rel = 0;
		value = 0;
	}
};

struct bc_alu {
	const alu_op_info * op_ptr;
	unsigned op;

	bc_alu_src src[3];

	unsigned dst_gpr:7;
	unsigned dst_chan:2;
	unsigned dst_rel:1;
	unsigned clamp:1;
	unsigned omod:2;
	unsigned bank_swizzle:3;

	unsigned index_mode:3;
	unsigned last:1;
	unsigned pred_sel:2;

	unsigned fog_merge:1;
	unsigned write_mask:1;
	unsigned update_exec_mask:1;
	unsigned update_pred:1;

	unsigned slot:3;

	unsigned lds_idx_offset:6;

	alu_op_flags slot_flags;

	void set_op(unsigned op) {
		this->op = op;
		op_ptr = r600_isa_alu(op);
	}
	void clear() {
		op_ptr = nullptr;
		op = 0;
		for (int i = 0; i < 3; ++i)
			src[i].clear();
		dst_gpr = 0;
		dst_chan = 0;
		dst_rel = 0;
		clamp = 0;
		omod = 0;
		bank_swizzle = 0;
		index_mode = 0;
		last = 0;
		pred_sel = 0;
		fog_merge = 0;
		write_mask = 0;
		update_exec_mask = 0;
		update_pred = 0;
		slot = 0;
		lds_idx_offset = 0;
		slot_flags = AF_NONE;
	}
	bc_alu() {
		clear();
	}
};

struct bc_fetch {
	const fetch_op_info * op_ptr;
	unsigned op;

	unsigned bc_frac_mode:1;
	unsigned fetch_whole_quad:1;
	unsigned resource_id:8;

	unsigned src_gpr:7;
	unsigned src_rel:1;
	unsigned src_rel_global:1; /* for GDS ops */
	unsigned src_sel[4];

	unsigned dst_gpr:7;
	unsigned dst_rel:1;
	unsigned dst_rel_global:1; /* for GDS ops */
	unsigned dst_sel[4];

	unsigned alt_const:1;

	unsigned inst_mod:2;
	unsigned resource_index_mode:2;
	unsigned sampler_index_mode:2;

	unsigned coord_type[4];
	unsigned lod_bias:7;

	unsigned offset[3];

	unsigned sampler_id:5;


	unsigned fetch_type:2;
	unsigned mega_fetch_count:6;
	unsigned coalesced_read:1;
	unsigned structured_read:2;
	unsigned lds_req:1;

	unsigned data_format:6;
	unsigned format_comp_all:1;
	unsigned num_format_all:2;
	unsigned semantic_id:8;
	unsigned srf_mode_all:1;
	unsigned use_const_fields:1;

	unsigned const_buf_no_stride:1;
	unsigned endian_swap:2;
	unsigned mega_fetch:1;

	unsigned src2_gpr:7; /* for GDS */
	unsigned alloc_consume:1;
	unsigned uav_id:4;
	unsigned uav_index_mode:2;
	unsigned bcast_first_req:1;

	/* for MEM ops */
	unsigned elem_size:2;
	unsigned uncached:1;
	unsigned indexed:1;
	unsigned burst_count:4;
	unsigned array_base:13;
	unsigned array_size:12;

	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
};

struct shader_stats {
	unsigned	ndw;
	unsigned	ngpr;
	unsigned	nstack;

	unsigned	cf; // clause instructions not included
	unsigned	alu;
	unsigned	alu_clauses;
	unsigned	fetch_clauses;
	unsigned	fetch;
	unsigned	alu_groups;

	unsigned	shaders;		// number of shaders (for accumulated stats)

	shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
			fetch_clauses(), fetch(), alu_groups(), shaders() {}

	void collect(node *n);
	void accumulate(shader_stats &s);
	void dump();
	void dump_diff(shader_stats &s);
};

class sb_context {

public:

	shader_stats src_stats, opt_stats;

	r600_isa *isa;

	sb_hw_chip hw_chip;
	sb_hw_class hw_class;

	unsigned alu_temp_gprs;
	unsigned max_fetch;
	bool has_trans;
	unsigned vtx_src_num;
	unsigned num_slots;
	bool uses_mova_gpr;

	bool r6xx_gpr_index_workaround;

	bool stack_workaround_8xx;
	bool stack_workaround_9xx;

	unsigned wavefront_size;
	unsigned stack_entry_size;

	static unsigned dump_pass;
	static unsigned dump_stat;

	static unsigned dry_run;
	static unsigned no_fallback;
	static unsigned safe_math;

	static unsigned dskip_start;
	static unsigned dskip_end;
	static unsigned dskip_mode;

	sb_context() : src_stats(), opt_stats(), isa(0),
			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN),
			alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0),
			num_slots(0), uses_mova_gpr(false),
			r6xx_gpr_index_workaround(false), stack_workaround_8xx(false),
			stack_workaround_9xx(false), wavefront_size(0),
			stack_entry_size(0) {}

	int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);

	bool is_r600() {return hw_class == HW_CLASS_R600;}
	bool is_r700() {return hw_class == HW_CLASS_R700;}
	bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
	bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
	bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}

	bool needs_8xx_stack_workaround() {
		if (!is_evergreen())
			return false;

		switch (hw_chip) {
		case HW_CHIP_HEMLOCK:
		case HW_CHIP_CYPRESS:
		case HW_CHIP_JUNIPER:
			return false;
		default:
			return true;
		}
	}

	bool needs_9xx_stack_workaround() {
		return is_cayman();
	}

	sb_hw_class_bits hw_class_bit() {
		switch (hw_class) {
		case HW_CLASS_R600:return HB_R6;
		case HW_CLASS_R700:return HB_R7;
		case HW_CLASS_EVERGREEN:return HB_EG;
		case HW_CLASS_CAYMAN:return HB_CM;
		default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;

		}
	}

	unsigned cf_opcode(unsigned op) {
		return r600_isa_cf_opcode(isa->hw_class, op);
	}

	unsigned alu_opcode(unsigned op) {
		return r600_isa_alu_opcode(isa->hw_class, op);
	}

	unsigned alu_slots(unsigned op) {
		return r600_isa_alu_slots(isa->hw_class, op);
	}

	unsigned alu_slots(const alu_op_info * op_ptr) {
		return op_ptr->slots[isa->hw_class];
	}

	unsigned alu_slots_mask(const alu_op_info * op_ptr) {
		unsigned mask = 0;
		unsigned slot_flags = alu_slots(op_ptr);
		if (slot_flags & AF_V)
			mask = 0x0F;
		if (!is_cayman() && (slot_flags & AF_S))
			mask |= 0x10;
		/* Force LDS_IDX ops into SLOT_X */
		if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11))
			mask = 0x01;
		return mask;
	}

	unsigned fetch_opcode(unsigned op) {
		return r600_isa_fetch_opcode(isa->hw_class, op);
	}

	bool is_kcache_sel(unsigned sel) {
		return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
	}

	bool is_lds_oq(unsigned sel) {
		return (sel >= 0xdb && sel <= 0xde);
	}

	const char * get_hw_class_name();
	const char * get_hw_chip_name();

};

#define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
#define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)

class bc_decoder {

	sb_context &ctx;

	uint32_t* dw;
	unsigned ndw;

public:

	bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
		: ctx(sctx), dw(data), ndw(size) {}

	int decode_cf(unsigned &i, bc_cf &bc);
	int decode_alu(unsigned &i, bc_alu &bc);
	int decode_fetch(unsigned &i, bc_fetch &bc);

private:
	int decode_cf_alu(unsigned &i, bc_cf &bc);
	int decode_cf_exp(unsigned &i, bc_cf &bc);
	int decode_cf_mem(unsigned &i, bc_cf &bc);

	int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
	int decode_fetch_gds(unsigned &i, bc_fetch &bc);
	int decode_fetch_mem(unsigned &i, bc_fetch &bc);
};

// bytecode format definition

class hw_encoding_format {
	const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
	hw_encoding_format();
protected:
	uint32_t value;
public:
	hw_encoding_format(sb_hw_class_bits hw)
		: hw_target(hw), value(0) {}
	hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
		: hw_target(hw), value(v) {}
	uint32_t get_value(sb_hw_class_bits hw) const {
		assert((hw & hw_target) == hw);
		return value;
	}
};

#define BC_FORMAT_BEGIN_HW(fmt, hwset) \
class fmt##_##hwset : public hw_encoding_format {\
	typedef fmt##_##hwset thistype; \
public: \
	fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
	fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};

#define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)

#define BC_FORMAT_END(fmt) };

// bytecode format field definition

#define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
	thistype & name(unsigned v) { \
		value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
		return *this; \
	} \
	unsigned get_##name() const { \
		return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
	}

#define BC_RSRVD(fmt, last_bit, first_bit)

// CLAMP macro defined elsewhere interferes with bytecode field name
#undef CLAMP
#include "sb_bc_fmt_def.inc"

#undef BC_FORMAT_BEGIN
#undef BC_FORMAT_END
#undef BC_FIELD
#undef BC_RSRVD

class bc_parser {
	sb_context & ctx;

	bc_decoder *dec;

	r600_bytecode *bc;
	r600_shader *pshader;

	uint32_t *dw;
	unsigned bc_ndw;

	unsigned max_cf;

	shader *sh;

	int error;

	alu_node *slots[2][5];
	unsigned cgroup;

	typedef std::vector<cf_node*> id_cf_map;
	id_cf_map cf_map;

	typedef std::stack<region_node*> region_stack;
	region_stack loop_stack;

	bool gpr_reladdr;

	// Note: currently relies on input emitting SET_CF in same basic block as uses
	value *cf_index_value[2];
	alu_node *mova;
public:

	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
		ctx(sctx), dec(), bc(bc), pshader(pshader),
		dw(), bc_ndw(), max_cf(),
		sh(), error(), slots(), cgroup(),
		cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }

	int decode();
	int prepare();

	shader* get_shader() { assert(!error); return sh; }

private:

	int decode_shader();

	int parse_decls();

	int decode_cf(unsigned &i, bool &eop);

	int decode_alu_clause(cf_node *cf);
	int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);

	int decode_fetch_clause(cf_node *cf);

	int prepare_ir();
	int prepare_alu_clause(cf_node *cf);
	int prepare_alu_group(cf_node* cf, alu_group_node *g);
	int prepare_fetch_clause(cf_node *cf);

	int prepare_loop(cf_node *c);
	int prepare_if(cf_node *c);

	void save_set_cf_index(value *val, unsigned idx);
	value *get_cf_index_value(unsigned idx);
	void save_mova(alu_node *mova);
	alu_node *get_mova();
};




class bytecode {
	typedef std::vector<uint32_t> bc_vector;
	sb_hw_class_bits hw_class_bit;

	bc_vector bc;

	unsigned pos;

public:

	bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
		: hw_class_bit(hw), pos(0) { bc.reserve(rdw); }

	unsigned ndw() { return bc.size(); }

	void write_data(uint32_t* dst) {
		std::copy(bc.begin(), bc.end(), dst);
	}

	void align(unsigned a) {
		unsigned size = bc.size();
		size = (size + a - 1) & ~(a-1);
		bc.resize(size);
	}

	void set_size(unsigned sz) {
		assert(sz >= bc.size());
		bc.resize(sz);
	}

	void seek(unsigned p) {
		if (p != pos) {
			if (p > bc.size()) {
				bc.resize(p);
			}
			pos = p;
		}
	}

	unsigned get_pos() { return pos; }
	uint32_t *data() { return &bc[0]; }

	bytecode & operator <<(uint32_t v) {
		if (pos == ndw()) {
			bc.push_back(v);
		} else
			bc.at(pos) = v;
		++pos;
		return *this;
	}

	bytecode & operator <<(const hw_encoding_format &e) {
		*this << e.get_value(hw_class_bit);
		return *this;
	}

	bytecode & operator <<(const bytecode &b) {
		bc.insert(bc.end(), b.bc.begin(), b.bc.end());
		return *this;
	}

	uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
};


class bc_builder {
	shader &sh;
	sb_context &ctx;
	bytecode bb;
	int error;

public:

	bc_builder(shader &s);
	int build();
	bytecode& get_bytecode() { assert(!error); return bb; }

private:

	int build_cf(cf_node *n);

	int build_cf_alu(cf_node *n);
	int build_cf_mem(cf_node *n);
	int build_cf_exp(cf_node *n);

	int build_alu_clause(cf_node *n);
	int build_alu_group(alu_group_node *n);
	int build_alu(alu_node *n);

	int build_fetch_clause(cf_node *n);
	int build_fetch_tex(fetch_node *n);
	int build_fetch_vtx(fetch_node *n);
	int build_fetch_gds(fetch_node *n);
	int build_fetch_mem(fetch_node* n);
};

} // namespace r600_sb

#endif /* SB_BC_H_ */
