/*
    Copyright (C) 2010 Intel Corporation.  All Rights Reserved.

    This file is part of SEP Development Kit.

    SEP Development Kit is free software; you can redistribute it
    and/or modify it under the terms of the GNU General Public License
    version 2 as published by the Free Software Foundation.

    SEP Development Kit is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SEP Development Kit; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

    As a special exception, you may use this file as part of a free software
    library without restriction.  Specifically, if other files instantiate
    templates or use macros or inline functions from this file, or you compile
    this file and link it with other files to produce an executable, this
    file does not by itself cause the resulting executable to be covered by
    the GNU General Public License.  This exception does not however
    invalidate any other reasons why the executable file might be covered by
    the GNU General Public License.
*/

#include "cpu.h"
#include "debug.h"
#include "kmem.h"
#include "modcfg.h"
#include "pcb.h"
#include "pmi.h"
#include "pmu.h"
#include "regs.h"
#include "time.h"

#include <asm/nmi.h>	/* for register_nmi_handler */

#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"

int vtss_pmu_version = 0;
bool vtss_arch_pebs_avail = false;
static long apic_lvtpc_val = APIC_DM_NMI;
static int vtss_pmu_nr_ff_counters = 0;
static int vtss_pmu_nr_gp_counters = 0;
static int vtss_pmu_ff_counter_width = 0;
static int vtss_pmu_gp_counter_width = 0;
#define vtss_pmu_ff_counter_width_mask ((1ULL << vtss_pmu_ff_counter_width) - 1)
#define vtss_pmu_gp_counter_width_mask ((1ULL << vtss_pmu_gp_counter_width) - 1)

static atomic_t vtss_pmu_active = ATOMIC_INIT(0);
#define vtss_pmu_active() (atomic_read(&vtss_pmu_active) == 1)
#define vtss_pmu_set_active() atomic_set(&vtss_pmu_active, 1)
#define vtss_pmu_set_inactive() (atomic_cmpxchg(&vtss_pmu_active, 1, 0) == 1)

static void vtss_pmu_save_cb(void *ctx)
{
	unsigned long flags;

	local_irq_save(flags);
	rdmsrl(VTSS_IA32_PERF_GLOBAL_OVF_CTRL, vtss_pcb_cpu.msr_ovf);
	rdmsrl(VTSS_IA32_PERF_GLOBAL_CTRL,     vtss_pcb_cpu.msr_ctrl);
	rdmsrl(VTSS_IA32_DEBUGCTL,             vtss_pcb_cpu.msr_debug);
	vtss_pcb_cpu.apic_lvtpc = vtss_apic_read(APIC_LVTPC);
	local_irq_restore(flags);
}

static void vtss_pmu_restore_cb(void *ctx)
{
	unsigned long flags;

	local_irq_save(flags);
	wrmsrl(VTSS_IA32_PERF_GLOBAL_OVF_CTRL, vtss_pcb_cpu.msr_ovf);
	wrmsrl(VTSS_IA32_PERF_GLOBAL_CTRL,     vtss_pcb_cpu.msr_ctrl);
	wrmsrl(VTSS_IA32_DEBUGCTL,             vtss_pcb_cpu.msr_debug);
	vtss_apic_write(APIC_LVTPC, vtss_pcb_cpu.apic_lvtpc);
	local_irq_restore(flags);
}

static void vtss_pmu_start_cb(void *ctx)
{
	unsigned long flags;

	apic_lvtpc_val = APIC_DM_NMI;
	local_irq_save(flags);
	wrmsrl(VTSS_IA32_PERF_GLOBAL_OVF_CTRL, 0);
	wrmsrl(VTSS_IA32_PERF_GLOBAL_CTRL,     0);
	wrmsrl(VTSS_IA32_DEBUGCTL,             0);
#if defined(X86_FEATURE_NMI_SOURCE) && defined(NMI_SOURCE_VEC_PMI)
	if (cpu_feature_enabled(X86_FEATURE_NMI_SOURCE)) {
		apic_lvtpc_val |= NMI_SOURCE_VEC_PMI;
	}
#endif
	vtss_apic_write(APIC_LVTPC, apic_lvtpc_val);
	local_irq_restore(flags);
}

static void vtss_pmu_stop_cb(void *ctx)
{
	int i;
	unsigned long flags;

	local_irq_save(flags);
	for (i = 0; i < vtss_reqcfg.nr_gp_counters[vtss_cpu_pmu_id()]; i++) {
		wrmsrl(VTSS_IA32_PERFEVTSEL0 + i, 0);
		wrmsrl(VTSS_IA32_PMC0        + i, 0);
		if (vtss_arch_pebs_avail) {
			wrmsrl(VTSS_IA32_PMC_GP0_CFC_C + i*4, 0);
		}
	}
	for (i = 0; i < vtss_reqcfg.nr_ff_counters[vtss_cpu_pmu_id()]; i++) {
		if (vtss_arch_pebs_avail) {
			wrmsrl(VTSS_IA32_PMC_FX0_CFC_C + i*4, 0);
		}
	}
	wrmsrl(VTSS_IA32_FIXED_CTR_CTRL,       0);
	wrmsrl(VTSS_IA32_PERF_GLOBAL_OVF_CTRL, 0);
	wrmsrl(VTSS_IA32_PERF_GLOBAL_CTRL,     0);
	wrmsrl(VTSS_IA32_DEBUGCTL,             0);
	local_irq_restore(flags);
}

static int vtss_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
	if (vtss_collector_stopped()) {
		vtss_pr_warning("Unexpected NMI");
		return NMI_DONE;
	}
	/* handle PMI */
	vtss_pmi_handler(regs);
	/* unmask NMI (late ack) */
	vtss_apic_write(APIC_LVTPC, apic_lvtpc_val);
	return NMI_HANDLED;
}

int vtss_nmi_init(void)
{
	int pmu;

	if (vtss_pmu_version <= 0) {
		vtss_pr_error("PMU not detected");
		return -EINVAL;
	}
	if (vtss_pmu_nr_ff_counters + vtss_pmu_nr_gp_counters <= 0) {
		vtss_pr_error("PMU counters not detected");
		return -EINVAL;
	}
	for (pmu = 0; pmu < VTSS_NR_PMUS; pmu++) {
		if (!vtss_reqcfg.nr_events[pmu]) continue;
		vtss_pr_notice("PMU%d: %d events, %d groups, mask: 0x%llx",
			       pmu, vtss_reqcfg.nr_events[pmu], vtss_reqcfg.nr_groups[pmu],
			       vtss_pmu_counters_mask(pmu, true, true));
	}
	if (vtss_reqcfg.nr_global_events <= 0) {
		vtss_pr_error("Invalid PMU events configuration");
		return -EINVAL;
	}
	on_each_cpu(vtss_pmu_save_cb, NULL, 1);
	on_each_cpu(vtss_pmu_start_cb, NULL, 1);
#if defined(X86_FEATURE_NMI_SOURCE) && defined(NMI_SOURCE_VEC_PMI)
	if (cpu_feature_enabled(X86_FEATURE_NMI_SOURCE)) {
		register_nmi_handler(NMI_LOCAL, vtss_nmi_handler,
					0, "vtsspp_pmi", NMI_SOURCE_VEC_PMI);
	} else {
		register_nmi_handler(NMI_LOCAL, vtss_nmi_handler, 0, "vtsspp_pmi", 0);
	}
#else
	register_nmi_handler(NMI_LOCAL, vtss_nmi_handler, 0, "vtsspp_pmi");
#endif
	vtss_pr_notice("Registered NMI handler");
	vtss_pmu_set_active();
	return 0;
}

void vtss_nmi_cleanup(void)
{
	if (!vtss_pmu_set_inactive())
		return;
	on_each_cpu(vtss_pmu_stop_cb, NULL, 1);
	on_each_cpu(vtss_pmu_restore_cb, NULL, 1);
	unregister_nmi_handler(NMI_LOCAL, "vtsspp_pmi");
}

int vtss_pmu_init(void)
{
	unsigned int eax, ebx, ecx, edx;

	if (vtss_pmu_supported()) {
		cpuid(VTSS_CPUID_PMU, &eax, &ebx, &ecx, &edx);
		vtss_pmu_version = vtss_cpuid_pmu_version(eax);
		vtss_pmu_nr_ff_counters = vtss_cpuid_pmu_nr_ff_counters(edx);
		vtss_pmu_nr_gp_counters = vtss_cpuid_pmu_nr_gp_counters(eax);
		vtss_pmu_ff_counter_width = vtss_cpuid_pmu_ff_counter_width(edx);
		vtss_pmu_gp_counter_width = vtss_cpuid_pmu_gp_counter_width(eax);
	}
	vtss_pr_notice("PERFMONv%d: fixed events: %d, generic counters: %d",
		       vtss_pmu_version, vtss_pmu_nr_ff_counters, vtss_pmu_nr_gp_counters);

	vtss_pr_debug_pmu("ff_counter_width=%d, ff_counter_width_mask=%llx",
			  vtss_pmu_ff_counter_width, vtss_pmu_ff_counter_width_mask);
	vtss_pr_debug_pmu("gp_counter_width=%d, gp_counter_width_mask=%llx",
			  vtss_pmu_gp_counter_width, vtss_pmu_gp_counter_width_mask);
	return 0;
}

void vtss_pmu_enable(void)
{
	unsigned long long mask;

	if (!vtss_pmu_active())
		return;

	/* enable counters globally */
	mask = vtss_pmu_counters_mask(vtss_cpu_pmu_id(), true, true);
	wrmsrl(VTSS_IA32_PERF_GLOBAL_CTRL, mask);
	/* clear global overflow status */
	mask |= (VTSS_PMU_GLOBAL_OVF_DSA | VTSS_PMU_GLOBAL_COND_CHGD);

	/* clear arch pebs bit */
	if (vtss_arch_pebs_avail) {
		mask |= VTSS_PMU_GLOBAL_ARCH_PEBS;
	}
 
	wrmsrl(VTSS_IA32_PERF_GLOBAL_OVF_CTRL, mask);
}

void vtss_pmu_disable(void)
{
	int i;

	/* freeze counters */
	for (i = 0; i < vtss_reqcfg.nr_gp_counters[vtss_cpu_pmu_id()]; i++) {
		wrmsrl(VTSS_IA32_PERFEVTSEL0 + i, 0);
		if (vtss_arch_pebs_avail) {
			wrmsrl(VTSS_IA32_PMC_GP0_CFC_C + i*4, 0);
		}
	}
	for (i = 0; i < vtss_reqcfg.nr_ff_counters[vtss_cpu_pmu_id()]; i++) {
		if (vtss_arch_pebs_avail) {
			wrmsrl(VTSS_IA32_PMC_FX0_CFC_C + i*4, 0);
		}
	}
	wrmsrl(VTSS_IA32_FIXED_CTR_CTRL, 0);
}

static void vtss_pmu_event_core_restart(struct vtss_pmu_event *event)
{
	int counter;
	long long interval = event->frozen_count;
	unsigned long long ctrl, mask;

	if (event->interval == 0 && event->aux_interval == 0) { /* no sampling */
		/* wrap the counters around */
		event->frozen_count &= VTSS_PMU_CNT_THRESHOLD - 1;
		interval = -event->frozen_count;
	} else {
		if (interval >= event->aux_interval) { /* overflowed */
			/* use the programmed interval */
			event->frozen_count = interval = event->interval;
		} else { /* underflowed */
			/* use the residual count */
			event->frozen_count = interval = -interval;
			if (event->aux_interval)
				event->frozen_count = -interval;
		}
	}
	/* ensure we do not count backwards */
	if (interval > event->interval)
		interval = event->interval;
	/* set up counters */
	if (vtss_pmu_event_fixed(event)) {
		/* set up the counter */
		wrmsrl(event->cntmsr, -interval & vtss_pmu_ff_counter_width_mask);
		/* set up the control register */
		rdmsrl(event->selmsr, ctrl);
		counter = event->cntmsr - VTSS_IA32_FIXED_CTR0;
		mask = event->selmsk & VTSS_PMU_FIXED_CTR_MASK(counter);
		wrmsrl(event->selmsr, ctrl | mask);
		if (vtss_arch_pebs_avail) {
			wrmsrl(event->selextmsr, event->selextmsk);
		}
	} else {
		/* set up the counter */
		wrmsrl(event->cntmsr, -interval & vtss_pmu_gp_counter_width_mask);
		/* set up the control register */
		wrmsrl(event->selmsr, event->selmsk);
		if (vtss_arch_pebs_avail) {
			wrmsrl(event->selextmsr, event->selextmsk);
		}
	}
	if (event->extmsr)
		wrmsrl(event->extmsr, event->extmsk);
}

static void vtss_pmu_event_core_sample(struct vtss_pmu_event *event)
{
	long long interval = (event->frozen_count > 0) ? event->frozen_count : event->interval;
	int ff_counter_shift = 64 - vtss_pmu_ff_counter_width;
	int gp_counter_shift = 64 - vtss_pmu_gp_counter_width;
	unsigned long long ovf;

	rdmsrl(VTSS_IA32_PERF_GLOBAL_STATUS, ovf);
	/* check only fixed counters overflow */
	ovf &= vtss_pmu_counters_mask(event->pmu_id, true, false);

	wrmsrl(event->selmsr, 0);
	rdmsrl(event->cntmsr, event->frozen_count);

	/* convert the count to 64 bits */
	if (vtss_pmu_event_fixed(event))
		event->frozen_count = (event->frozen_count << ff_counter_shift) >> ff_counter_shift;
	else
		event->frozen_count = (event->frozen_count << gp_counter_shift) >> gp_counter_shift;

	/* ensure we do not count backwards */
	if (event->frozen_count < -interval) {
		event->frozen_count = -event->interval;
		interval = event->interval;
	}
	if (event->interval == 0) {  /* no sampling */
		if (event->frozen_count < interval) {  /* HW and VM sanity check */
			interval = event->frozen_count;
		}
		event->sampled_count += event->frozen_count - interval;
	} else {
		/* update the accrued count by adding the signed
		 * values of current count and sampling interval */
		event->sampled_count += interval + event->frozen_count;
	}
	/* separately preserve counts of overflowed counters, and always save fixed
	 * counters (to show performance impact of synchronization on call tree) */
	if ((event->frozen_count >= 0 && event->frozen_count >= event->aux_interval) ||
	    (vtss_pmu_event_fixed(event) && ovf))
		event->count = event->sampled_count;
}

static bool vtss_pmu_event_core_overflowed(struct vtss_pmu_event *event)
{
	if (event->frozen_count >= 0) {
		/* always signal overflow for no sampling
		 * mode and in case of real overflow */
		return true;
	}
	return false;
}

static void vtss_pmu_event_energy_restart(struct vtss_pmu_event *event)
{
	rdmsrl(event->cntmsr, event->frozen_count);
	event->frozen_count &= 0xffffffffLL;
}

static void vtss_pmu_event_energy_sample(struct vtss_pmu_event *event)
{
	long long count;

	rdmsrl(event->cntmsr, count);
	count &= 0xffffffffLL;
	if (count < event->frozen_count)
		count += 0x100000000LL;
	event->count += (count - event->frozen_count) << 4;
}

static bool vtss_pmu_event_energy_overflowed(struct vtss_pmu_event *event)
{
	return false;
}

static void vtss_pmu_event_ehfi_restart(struct vtss_pmu_event *event)
{
	return;
}

static void vtss_pmu_event_ehfi_sample(struct vtss_pmu_event *event)
{
	rdmsrl(event->cntmsr, event->count);
}

static bool vtss_pmu_event_ehfi_overflowed(struct vtss_pmu_event *event)
{
	return false;
}

/* called from vtss_target_attach() to form a common
 * event chain from the configuration records */
int vtss_pmu_events_init(struct vtss_pmu_events *events, int pmu)
{
	cpuevent_cfg_v1_t *evcfg;
	struct vtss_pmu_event *event;

	events->pmu_id = pmu;
	if (!(vtss_reqcfg.nr_events[pmu] > 0))
		return 0;

	events->evbuf = vtss_zalloc(vtss_reqcfg.nr_events[pmu]*sizeof(struct vtss_pmu_event), GFP_KERNEL);
	if (events->evbuf == NULL) {
		vtss_pr_error("PMU%d: Not enough memory for event buffer", pmu);
		return -ENOMEM;
	}

	vtss_reqcfg_for_each_event(evcfg) {
		if (evcfg->pmu_id != pmu)
			continue;
		event = events->evbuf + events->nr_events;
		event->name = (char *)evcfg + evcfg->name_off;
		if (evcfg->event_id >= VTSS_CFG_CHAIN_SIZE) {
			/* copy uncore events parameters */
			event->selmsr = evcfg->selmsr.idx;
			event->cntmsr = evcfg->cntmsr.idx;
			if (event->selmsr == VTSS_UNC_ENERGY) {
				/* setup energy events interface */
				event->restart = vtss_pmu_event_energy_restart;
				event->sample = vtss_pmu_event_energy_sample;
				event->overflowed = vtss_pmu_event_energy_overflowed;
			} else if (event->selmsr == VTSS_UNC_EHFI) {
				/* setup EHFI events interface */
				event->restart = vtss_pmu_event_ehfi_restart;
				event->sample = vtss_pmu_event_ehfi_sample;
				event->overflowed = vtss_pmu_event_ehfi_overflowed;
			}
		} else {
			/* copy core events parameters */
			event->interval = evcfg->interval;
			event->selmsr = evcfg->selmsr.idx;
			event->selmsk = evcfg->selmsr.val;
			event->cntmsr = evcfg->cntmsr.idx;
			event->cntmsk = evcfg->cntmsr.val;
			event->extmsr = evcfg->extmsr.idx;
			event->extmsk = evcfg->extmsr.val;
			if (vtss_arch_pebs_avail) {
				if (vtss_pmu_event_fixed(event)) {
					event->selextmsr = VTSS_IA32_PMC_FX0_CFC_C +
						4 * (event->cntmsr - VTSS_IA32_FIXED_CTR0);
				} else {
					event->selextmsr = VTSS_IA32_PMC_GP0_CFC_C +
						4 * (event->cntmsr - VTSS_IA32_PMC0);
				}
				event->selextmsk = ((unsigned long long)evcfg->selextval << 32) |
					(event->cntmsk & 0xFFFFFFFF);
			}
			/* setup core events interface */
			event->restart = vtss_pmu_event_core_restart;
			event->sample = vtss_pmu_event_core_sample;
			event->overflowed = vtss_pmu_event_core_overflowed;
			/* setup fixed events as followers of the leading event */
			if (vtss_pmu_event_fixed(event)) {
				if (event->cntmsr != VTSS_IA32_FIXED_CTR0 + 2 &&
				    vtss_pmu_nr_ff_counters > 2) {
					event->aux_interval = event->interval;
					event->interval = 0;
				}
			}
		}
		/* copy MUX parameters */
		event->group_id = evcfg->mux_grp;
		event->mux_alg = evcfg->mux_alg;
		event->mux_arg = evcfg->mux_arg;
		event->pmu_id = pmu;

		/* convert global group index to local */
		event->group_id -= vtss_reqcfg.global_group_offset[pmu];

		vtss_pr_debug_pmu("pmu%d: [%02d] %s: si=%lld, sel=%x/%llx, cnt=%x/%llx",
				  event->pmu_id, event->group_id,  event->name, event->interval,
				  event->selmsr, event->selmsk, event->cntmsr, event->cntmsk);

		events->nr_events++;
	}
	if (events->nr_events != vtss_reqcfg.nr_events[pmu]) {
		vtss_pr_error("PMU%d: uploaded %d events instead of %d", pmu,
			      events->nr_events, vtss_reqcfg.nr_events[pmu]);
		return -EINVAL;
	}
	/* setup PMU events parameters */
	events->nr_groups = vtss_reqcfg.nr_groups[pmu];
	if (events->nr_events > 0) {
		events->mux_alg = events->evbuf[0].mux_alg;
		events->mux_arg = events->evbuf[0].mux_arg;
	}
	vtss_pr_debug_pmu("pmu%d: uploaded %d events from %d mux groups",
			  pmu, events->nr_events, events->nr_groups);
	return 0;
}

/* called from vtss_target_delete() to
 * free internal event buffers */
void vtss_pmu_events_cleanup(struct vtss_pmu_events *events)
{
	if (events == NULL)
		return;

	vtss_zfree(&events->evbuf, events->nr_events*sizeof(struct vtss_pmu_events));
}

/* called from vtss_sched_switch_to() and vtss_pmi_handler()
 * to re-select multiplexion groups and restart counting */
void vtss_pmu_events_restart(struct vtss_pmu_events *events)
{
	bool overflowed;
	struct vtss_pmu_event *event;

	if (!vtss_pmu_active())
		return;

	/* update current MUX index */
	switch (events->mux_alg) {
	case VTSS_CFGMUX_NONE:
		/* no update to MUX index */
		break;
	case VTSS_CFGMUX_TIME:
		if (events->mux_time == 0) {
			/* setup new time interval */
			events->mux_time = vtss_time_cpu() +
					   (events->mux_arg*vtss_hardcfg.cpu_freq);
		} else if (vtss_time_cpu() >= events->mux_time) {
			/* setup new MUX index */
			events->group_id = (events->group_id + 1) % events->nr_groups;
			events->mux_time = 0;
		}
		break;
	case VTSS_CFGMUX_MST:
	case VTSS_CFGMUX_SLV:
		overflowed = false;
		vtss_pmu_for_each_active_event(events, event) {
			if (event->mux_alg == VTSS_CFGMUX_MST) {
				if (event->overflowed(event)) {
					overflowed = true;
					break;
				}
			}
		}
		if (!overflowed)
			break;
		/* fall through */
	case VTSS_CFGMUX_SEQ:
		if (events->mux_cnt >= events->mux_arg) {
			/* setup new MUX index */
			events->group_id = (events->group_id + 1) % events->nr_groups;
			events->mux_cnt = 0;
		}
		/* update MUX counter */
		events->mux_cnt++;
		break;
	}

	/* restart counting */
	vtss_pmu_for_each_active_event(events, event)
		event->restart(event);
}

/* called from vtss_sched_switch_from() and vtss_pmi_handler()
 * to read event values and form a sample record */
void vtss_pmu_events_sample(struct vtss_pmu_events *events)
{
	struct vtss_pmu_event *event;

	vtss_pmu_for_each_active_event(events, event)
		event->sample(event);
}
