/*
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * Copyright (C) 2000, 2001 Kanoj Sarcar
 * Copyright (C) 2000, 2001 Ralf Baechle
 * Copyright (C) 2000, 2001 Silicon Graphics, Inc.
 * Copyright (C) 2000, 2001, 2003 Broadcom Corporation
 */
#include <linux/config.h>
#include <linux/cache.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
#include <linux/module.h>
#include <linux/time.h>
#include <linux/timex.h>
#include <linux/sched.h>
#include <linux/cpumask.h>

#include <asm/atomic.h>
#include <asm/cpu.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/hardirq.h>
#include <asm/mmu_context.h>
#include <asm/smp.h>
#include <asm/thread_control.h>

cpumask_t phys_cpu_present_map;		/* Bitmask of available CPUs */
volatile cpumask_t cpu_callin_map;	/* Bitmask of started secondaries */
cpumask_t cpu_online_map;		/* Bitmask of currently online CPUs */
int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
int __cpu_logical_map[NR_CPUS];		/* Map logical to physical */

EXPORT_SYMBOL(cpu_online_map);

cycles_t cacheflush_time;
unsigned long cache_decay_ticks;

static void smp_tune_scheduling (void)
{
#ifdef CONFIG_AXE_AVOID
	return;
#endif
	struct cache_desc *cd = &current_cpu_data.scache;
	unsigned long cachesize;       /* kB   */
	unsigned long bandwidth = 350; /* MB/s */
	unsigned long cpu_khz;

	/*
	 * Crude estimate until we actually meassure ...
	 */
	cpu_khz = loops_per_jiffy * 2 * HZ / 1000;

	/*
	 * Rough estimation for SMP scheduling, this is the number of
	 * cycles it takes for a fully memory-limited process to flush
	 * the SMP-local cache.
	 *
	 * (For a P5 this pretty much means we will choose another idle
	 *  CPU almost always at wakeup time (this is due to the small
	 *  L1 cache), on PIIs it's around 50-100 usecs, depending on
	 *  the cache size)
	 */
	if (!cpu_khz) {
		/*
		 * This basically disables processor-affinity scheduling on SMP
		 * without a cycle counter.  Currently all SMP capable MIPS
		 * processors have a cycle counter.
		 */
		cacheflush_time = 0;
		return;
	}

	cachesize = cd->linesz * cd->sets * cd->ways;
	cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
	cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;

	printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
		(long)cacheflush_time/(cpu_khz/1000),
		((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
	printk("task migration cache decay timeout: %ld msecs.\n",
		(cache_decay_ticks + 1) * 1000 / HZ);
}

extern void __init calibrate_delay(void);

/*
 * First C code run on the secondary CPUs after being started up by
 * the master.
 */
asmlinkage void start_secondary(void)
{
	unsigned int cpu = smp_processor_id();
	//unsigned int cpu = rmt_getotid();
	AXE_BREAK(0xface);
	rmt_str_printf("status=0x"); rmt_hex_printf(read_c0_status());
	cpu_probe();

	rmt_str_printf("per_cpu_trap_init()\n");
	per_cpu_trap_init();
	rmt_str_printf("prom_init_secondary()\n");
	prom_init_secondary();
	rmt_str_printf("status=0x"); rmt_hex_printf(read_c0_status());
	rmt_str_printf("cpu=0x"); rmt_hex_printf(cpu);

	/*
	 * XXX parity protection should be folded in here when it's converted
	 * to an option instead of something based on .cputype
	 */

	//rmt_str_printf("cpu_set()\n");
	//cpu_set(cpu, cpu_callin_map);

	if (rmt_rtthread(smp_processor_id())) {
	    rmt_str_printf("rttread: skip calibrate_delay\n");
	} else {
	    rmt_str_printf("calibrate_delay()\n");
	    calibrate_delay();
	}
	rmt_str_printf("loops_per_jiffy=0x");
	rmt_hex_printf(loops_per_jiffy);
	cpu_data[cpu].udelay_val = loops_per_jiffy;

	rmt_str_printf("prom_smp_finish()\n");
	prom_smp_finish();
	rmt_str_printf("start_secondary(): #2 status=0x");
	rmt_hex_printf(read_c0_status());

	rmt_str_printf("cpu_set()\n");
	cpu_set(cpu, cpu_callin_map);
	rmt_str_printf("status=0x"); rmt_hex_printf(read_c0_status());

	rmt_str_printf("cpu_idle()\n");
	rmt_str_printf("status=0x"); rmt_hex_printf(read_c0_status());
	cpu_idle();
}

spinlock_t smp_call_lock = SPIN_LOCK_UNLOCKED;

struct call_data_struct *call_data;

/*
 * Run a function on all other CPUs.
 *  <func>      The function to run. This must be fast and non-blocking.
 *  <info>      An arbitrary pointer to pass to the function.
 *  <retry>     If true, keep retrying until ready.
 *  <wait>      If true, wait until function has completed on other CPUs.
 *  [RETURNS]   0 on success, else a negative status code.
 *
 * Does not return until remote CPUs are nearly ready to execute <func>
 * or are or have executed.
 *
 * You must not call this function with disabled interrupts or from a
 * hardware interrupt handler or from a bottom half handler.
 */
int smp_call_function (void (*func) (void *info), void *info, int retry,
								int wait)
{
	struct call_data_struct data;
	int i, cpus = num_online_cpus() - 1;
	int cpu = smp_processor_id();

	rmt_str_printf("smp_call_function(): #1\n");
	/*
	rmt_str_printf("func=0x"); rmt_hex_printf(func);
	rmt_str_printf("info=0x"); rmt_hex_printf(info);
	rmt_str_printf("wait=0x"); rmt_hex_printf(wait);
	*/
	if (!cpus)
		return 0;

	data.func = func;
	data.info = info;
	atomic_set(&data.started, 0);
	data.wait = wait;
	if (wait)
		atomic_set(&data.finished, 0);

	spin_lock(&smp_call_lock);
	call_data = &data;
	mb();

	/* Send a message to all other CPUs and wait for them to respond */
	for (i = 0; i < NR_CPUS; i++)
		if (cpu_online(i) && i != cpu)
			core_send_ipi(i, SMP_CALL_FUNCTION);

	/* Wait for response */
	/* FIXME: lock-up detection, backtrace on lock-up */

	while (atomic_read(&data.started) != cpus)
		barrier();

	if (wait)
		while (atomic_read(&data.finished) != cpus)
			barrier();

	spin_unlock(&smp_call_lock);

	return 0;
}

void smp_call_function_interrupt(void)
{
	void (*func) (void *info) = call_data->func;
	void *info = call_data->info;
	int wait = call_data->wait;
	rmt_str_printf("smp_call_function_interrupt(): #1\n");

	/*
	 * Notify initiating CPU that I've grabbed the data and am
	 * about to execute the function.
	 */
	rmt_str_printf("smp_call_function_interrupt(): #2\n");
	mb();
	atomic_inc(&call_data->started);

	/*
	 * At this point the info structure may be out of scope unless wait==1.
	 */
	rmt_str_printf("smp_call_function_interrupt(): #3\n");
	irq_enter();
	(*func)(info);
	irq_exit();

	rmt_str_printf("smp_call_function_interrupt(): #4\n");
	if (wait) {
		mb();
		atomic_inc(&call_data->finished);
	}
	rmt_str_printf("smp_call_function_interrupt(): #0\n");
}

static void stop_this_cpu(void *dummy)
{
	/*
	 * Remove this CPU:
	 */
	cpu_clear(smp_processor_id(), cpu_online_map);
	local_irq_enable();	/* May need to service _machine_restart IPI */
	for (;;);		/* Wait if available. */
}

void smp_send_stop(void)
{
	smp_call_function(stop_this_cpu, NULL, 1, 0);
}

void __init smp_cpus_done(unsigned int max_cpus)
{
	prom_cpus_done();
}

/* called from main before smp_init() */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
	cpu_data[0].udelay_val = loops_per_jiffy;
	init_new_context(current, &init_mm);
	current_thread_info()->cpu = 0;
	smp_tune_scheduling();
	prom_build_cpu_map();
	prom_prepare_cpus(max_cpus);
}

/* preload SMP state for boot cpu */
void __devinit smp_prepare_boot_cpu(void)
{
	int	i;
	/*
	 * This assumes that bootup is always handled by the processor
	 * with the logic and physical number 0.
	 */
	rmt_str_printf("smp_prepare_boot_cpu(): #1\n");
	__cpu_number_map[0] = 0;
	__cpu_logical_map[0] = 0;
	cpu_set(0, phys_cpu_present_map);
	cpu_set(0, cpu_online_map);
	cpu_set(0, cpu_callin_map);

	for(i=1; i<NR_CPUS; i++)
		cpu_set(i, phys_cpu_present_map);
		
	rmt_str_printf("smp_prepare_boot_cpu(): #0\n");
}

static struct task_struct * __init fork_by_hand(void)
{
	struct pt_regs regs;
	/*
	 * don't care about the eip and regs settings since
	 * we'll never reschedule the forked task.
	 */
	return copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL);
}

/*
 * Startup the CPU with this logical number
 */
static int __init do_boot_cpu(int cpu)
{
	struct task_struct *idle;

	rmt_str_printf("do_boot_cpu(): #1");
	/*
	 * The following code is purely to make sure
	 * Linux can schedule processes on this slave.
	 */
	idle = fork_by_hand();
	if(idle->thread_info) {
		rmt_str_printf("#1 cpu=0x");
		rmt_hex_printf(idle->thread_info->cpu);
	}
	if (IS_ERR(idle))
		panic("failed fork for CPU %d\n", cpu);

	wake_up_forked_process(idle);
	if(idle->thread_info) {
		rmt_str_printf("#2 cpu=0x");
		rmt_hex_printf(idle->thread_info->cpu);
	}

	/*
	 * We remove it from the pidhash and the runqueue once we've
	 * got the process:
	 */
	init_idle(idle, cpu);
	if(idle->thread_info) {
		rmt_str_printf("#3 cpu=0x");
		rmt_hex_printf(idle->thread_info->cpu);
	}

	unhash_process(idle);
	if(idle->thread_info) {
		rmt_str_printf("#4 cpu=0x");
		rmt_hex_printf(idle->thread_info->cpu);
	}

	prom_boot_secondary(cpu, idle);
	rmt_str_printf("- prom_boot_secaondary()\n");

	/* XXXKW timeout */
	rmt_str_printf("do_boot_cpu(): #2 cpu=0x");
	rmt_hex_printf(cpu);
	while (!cpu_isset(cpu, cpu_callin_map)) {
#ifdef CONFIG_RMT_INSTSIM
#ifdef CONFIG_AXE_AVOID
		jiffies++;
#else
		udelay(1);
#endif
#else
		udelay(100);
#endif
	}

	if (rmt_rtthread(cpu))
		rmt_print_val_dec("RT Thread", cpu);
	if (!rmt_rtthread(cpu)) {
		rmt_print_val_dec("Normal Thread", cpu);
		cpu_set(cpu, cpu_online_map);
	}

	rmt_str_printf("- do_boot_cpu()\n");
	return 0;
}

/*
 * Called once for each "cpu_possible(cpu)".  Needs to spin up the cpu
 * and keep control until "cpu_online(cpu)" is set.  Note: cpu is
 * physical, not logical.
 */
int __devinit __cpu_up(unsigned int cpu)
{
	int ret;

	/* Processor goes to start_secondary(), sets online flag */
	ret = do_boot_cpu(cpu);
	if (ret < 0)
		return ret;

	return 0;
}

/* Not really SMP stuff ... */
int setup_profiling_timer(unsigned int multiplier)
{
	return 0;
}

