[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH 3/3] ppc/pnv: Add skeletton PowerNV platform
From: |
Cédric Le Goater |
Subject: |
Re: [Qemu-ppc] [PATCH 3/3] ppc/pnv: Add skeletton PowerNV platform |
Date: |
Thu, 28 Jul 2016 19:27:02 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Icedove/45.1.0 |
Hello,
On 07/26/2016 08:23 AM, David Gibson wrote:
> On Mon, Jul 25, 2016 at 04:24:43PM +0200, Cédric Le Goater wrote:
>> From: Benjamin Herrenschmidt <address@hidden>
>>
>> No devices yet, not even an interrupt controller, just to get
>> started.
>>
>> (Folded in Stewart Smith patch to add command lien support)
> ^^^^ typo
and it looks like french to me. The changelog needs an update with
a little paragraph on what we are not emulating. The lowlevel firmware
Hostboot should be mentionned.
>>
>> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
>> [clg: updated for qemu-2.7
>> replaced fprintf by error_report
>> used a common definition of _FDT macro
>> removed VMStateDescription as migration is not yet supported
>> added IBM Copyright statements
>> ]
>> Signed-off-by: Cédric Le Goater <address@hidden>
>> ---
>> default-configs/ppc64-softmmu.mak | 1 +
>> hw/ppc/Makefile.objs | 2 +
>> hw/ppc/pnv.c | 593
>> ++++++++++++++++++++++++++++++++++++++
>> include/hw/ppc/pnv.h | 35 +++
>> 4 files changed, 631 insertions(+)
>> create mode 100644 hw/ppc/pnv.c
>> create mode 100644 include/hw/ppc/pnv.h
>>
>> diff --git a/default-configs/ppc64-softmmu.mak
>> b/default-configs/ppc64-softmmu.mak
>> index c4be59f638ed..516a6e25aba3 100644
>> --- a/default-configs/ppc64-softmmu.mak
>> +++ b/default-configs/ppc64-softmmu.mak
>> @@ -40,6 +40,7 @@ CONFIG_I8259=y
>> CONFIG_XILINX=y
>> CONFIG_XILINX_ETHLITE=y
>> CONFIG_PSERIES=y
>> +CONFIG_POWERNV=y
>> CONFIG_PREP=y
>> CONFIG_MAC=y
>> CONFIG_E500=y
>> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
>> index 91a3420f473a..cbde482dd1b4 100644
>> --- a/hw/ppc/Makefile.objs
>> +++ b/hw/ppc/Makefile.objs
>> @@ -5,6 +5,8 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
>> obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>> obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
>> obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
>> +# IBM PowerNV
>> +obj-$(CONFIG_POWERNV) += pnv.o
>> ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>> obj-y += spapr_pci_vfio.o
>> endif
>> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
>> new file mode 100644
>> index 000000000000..5096a073e829
>> --- /dev/null
>> +++ b/hw/ppc/pnv.c
>> @@ -0,0 +1,593 @@
>> +/*
>> + * QEMU PowerPC PowerNV model
>> + *
>> + * Copyright (c) 2004-2007 Fabrice Bellard
>> + * Copyright (c) 2007 Jocelyn Mayer
>> + * Copyright (c) 2010 David Gibson, IBM Corporation.
>> + * Copyright (c) 2014-2016 BenH, IBM Corporation.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> copy
>> + * of this software and associated documentation files (the "Software"), to
>> deal
>> + * in the Software without restriction, including without limitation the
>> rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included
>> in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + *
>> + */
>> +#include "qemu/osdep.h"
>> +#include "sysemu/sysemu.h"
>> +#include "hw/hw.h"
>> +#include "hw/fw-path-provider.h"
>> +#include "elf.h"
>> +#include "net/net.h"
>> +#include "sysemu/block-backend.h"
>> +#include "sysemu/cpus.h"
>> +#include "sysemu/kvm.h"
>> +#include "sysemu/numa.h"
>> +#include "kvm_ppc.h"
>> +#include "mmu-hash64.h"
>> +#include "qom/cpu.h"
>> +
>> +#include "hw/boards.h"
>> +#include "hw/ppc/fdt.h"
>> +#include "hw/ppc/ppc.h"
>> +#include "hw/ppc/pnv.h"
>> +#include "hw/loader.h"
>> +
>> +#include "exec/address-spaces.h"
>> +#include "qemu/config-file.h"
>> +#include "qemu/error-report.h"
>> +#include "trace.h"
>> +#include "hw/nmi.h"
>> +
>> +#include "hw/compat.h"
>> +
>> +#include <libfdt.h>
>> +
>> +#define FDT_ADDR 0x01000000
>> +#define FDT_MAX_SIZE 0x00100000
>> +#define FW_MAX_SIZE 0x00400000
>> +#define FW_FILE_NAME "skiboot.lid"
>> +#define KERNEL_FILE_NAME "skiroot.lid"
>> +#define KERNEL_LOAD_ADDR 0x20000000
>
> The KERNEL_* names don't really make sense here.
yes.
>> +#define TIMEBASE_FREQ 512000000ULL
>> +
>> +#define MAX_CPUS 255
>> +
>> +#define PHANDLE_XICP 0x00001111
>
> Since you're not implementing an interrupt controller yet, I don't
> think you need this define.
indeed. This is too early.
>> +
>> +typedef struct sPowerNVMachineState sPowerNVMachineState;
>> +
>> +#define TYPE_POWERNV_MACHINE "powernv-machine"
>> +#define POWERNV_MACHINE(obj) \
>> + OBJECT_CHECK(sPowerNVMachineState, (obj), TYPE_POWERNV_MACHINE)
>> +
>> +/**
>> + * sPowerNVMachineState:
>> + */
>> +struct sPowerNVMachineState {
>> + /*< private >*/
>> + MachineState parent_obj;
>> + PnvSystem sys;
>
> Having all the system specific information in this substructure is a
> bit awkward. I think it would be preferable to just open code the
> fields you need directly into sPowerNVMachineState.
I agree.
>> +};
>> +
>> +static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
>> + size_t maxsize)
>> +{
>> + size_t maxcells = maxsize / sizeof(uint32_t);
>> + int i, j, count;
>> + uint32_t *p = prop;
>> +
>> + for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
>> + struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
>> +
>> + if (!sps->page_shift) {
>> + break;
>> + }
>> + for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
>> + if (sps->enc[count].page_shift == 0) {
>> + break;
>> + }
>> + }
>> + if ((p - prop) >= (maxcells - 3 - count * 2)) {
>> + break;
>> + }
>> + *(p++) = cpu_to_be32(sps->page_shift);
>> + *(p++) = cpu_to_be32(sps->slb_enc);
>> + *(p++) = cpu_to_be32(count);
>> + for (j = 0; j < count; j++) {
>> + *(p++) = cpu_to_be32(sps->enc[j].page_shift);
>> + *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
>> + }
>> + }
>> +
>> + return (p - prop) * sizeof(uint32_t);
>> +}
>
> Hm, I do wonder if we should make a place for helper functions that
> are common between spapr and powernv.
done.
powernv_populate_memory() and powernv_populate_memory_node() are also
very similar in spapr. I wonder if we can merge them ?
>> +static void powernv_populate_memory_node(void *fdt, int nodeid, hwaddr
>> start,
>> + hwaddr size)
>> +{
>> + /* Probablly bogus, need to match with what's going on in CPU nodes */
>> + uint32_t chip_id[] = {
>> + cpu_to_be32(0x0), cpu_to_be32(nodeid)
>> + };
>> + char *mem_name;
>> + uint64_t mem_reg_property[2];
>> +
>> + mem_reg_property[0] = cpu_to_be64(start);
>> + mem_reg_property[1] = cpu_to_be64(size);
>> +
>> + mem_name = g_strdup_printf("memory@"TARGET_FMT_lx, start);
>> + _FDT((fdt_begin_node(fdt, mem_name)));
>> + g_free(mem_name);
>> + _FDT((fdt_property_string(fdt, "device_type", "memory")));
>> + _FDT((fdt_property(fdt, "reg", mem_reg_property,
>> + sizeof(mem_reg_property))));
>> + _FDT((fdt_property(fdt, "ibm,chip-id", chip_id, sizeof(chip_id))));
>> + _FDT((fdt_end_node(fdt)));
>> +}
>> +
>> +static int powernv_populate_memory(void *fdt)
>> +{
>> + hwaddr mem_start, node_size;
>> + int i, nb_nodes = nb_numa_nodes;
>> + NodeInfo *nodes = numa_info;
>> + NodeInfo ramnode;
>> +
>> + /* No NUMA nodes, assume there is just one node with whole RAM */
>> + if (!nb_numa_nodes) {
>> + nb_nodes = 1;
>> + ramnode.node_mem = ram_size;
>> + nodes = &ramnode;
>> + }
>> +
>> + for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
>> + if (!nodes[i].node_mem) {
>> + continue;
>> + }
>> + if (mem_start >= ram_size) {
>> + node_size = 0;
>> + } else {
>> + node_size = nodes[i].node_mem;
>> + if (node_size > ram_size - mem_start) {
>> + node_size = ram_size - mem_start;
>> + }
>> + }
>> + for ( ; node_size; ) {
>> + hwaddr sizetmp = pow2floor(node_size);
>> +
>> + /* mem_start != 0 here */
>> + if (ctzl(mem_start) < ctzl(sizetmp)) {
>> + sizetmp = 1ULL << ctzl(mem_start);
>> + }
>> +
>> + powernv_populate_memory_node(fdt, i, mem_start, sizetmp);
>> + node_size -= sizetmp;
>> + mem_start += sizetmp;
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static void powernv_create_cpu_node(void *fdt, CPUState *cs, int
>> smt_threads)
>> +{
>> + PowerPCCPU *cpu = POWERPC_CPU(cs);
>> + CPUPPCState *env = &cpu->env;
>> + DeviceClass *dc = DEVICE_GET_CLASS(cs);
>> + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
>> + uint32_t servers_prop[smt_threads];
>> + uint32_t gservers_prop[smt_threads * 2];
>> + int i, index = ppc_get_vcpu_dt_id(cpu);
>> + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
>> + 0xffffffff, 0xffffffff};
>> + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
>> + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
>> + uint32_t page_sizes_prop[64];
>> + size_t page_sizes_prop_size;
>> + char *nodename;
>> +
>> + if ((index % smt_threads) != 0) {
>> + return;
>> + }
>
> This hack exists in spapr for historical reasons. I'd prefer if on
> powernv you treat cpu cores as first class objects from the beginning,
> and just call this once per core.
like this is done with sPAPRCPUCore I suppose.
I will get inspiration from the spapr cores and see if I can find some
common ground with the powernv cores. we should.
>> + nodename = g_strdup_printf("address@hidden", dc->fw_name, index);
>> +
>> + _FDT((fdt_begin_node(fdt, nodename)));
>> +
>> + g_free(nodename);
>> +
>> + _FDT((fdt_property_cell(fdt, "reg", index)));
>> + _FDT((fdt_property_string(fdt, "device_type", "cpu")));
>> +
>> + _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
>> + _FDT((fdt_property_cell(fdt, "d-cache-block-size",
>> + env->dcache_line_size)));
>> + _FDT((fdt_property_cell(fdt, "d-cache-line-size",
>> + env->dcache_line_size)));
>> + _FDT((fdt_property_cell(fdt, "i-cache-block-size",
>> + env->icache_line_size)));
>> + _FDT((fdt_property_cell(fdt, "i-cache-line-size",
>> + env->icache_line_size)));
>> +
>> + if (pcc->l1_dcache_size) {
>> + _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
>> + } else {
>> + error_report("Warning: Unknown L1 dcache size for cpu");
>> + }
>> + if (pcc->l1_icache_size) {
>> + _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
>> + } else {
>> + error_report("Warning: Unknown L1 icache size for cpu");
>> + }
>> +
>> + _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
>> + _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
>> + _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
>> + _FDT((fdt_property_string(fdt, "status", "okay")));
>> + _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
>> +
>> + if (env->spr_cb[SPR_PURR].oea_read) {
>> + _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
>> + }
>> +
>> + if (env->mmu_model & POWERPC_MMU_1TSEG) {
>> + _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
>> + segs, sizeof(segs))));
>> + }
>> +
>> + /* Advertise VMX/VSX (vector extensions) if available
>> + * 0 / no property == no vector extensions
>> + * 1 == VMX / Altivec available
>> + * 2 == VSX available */
>> + if (env->insns_flags & PPC_ALTIVEC) {
>> + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
>> +
>> + _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
>> + }
>> +
>> + /* Advertise DFP (Decimal Floating Point) if available
>> + * 0 / no property == no DFP
>> + * 1 == DFP available */
>> + if (env->insns_flags2 & PPC2_DFP) {
>> + _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
>> + }
>> +
>> + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
>> + sizeof(page_sizes_prop));
>> + if (page_sizes_prop_size) {
>> + _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
>> + page_sizes_prop, page_sizes_prop_size)));
>> + }
>> +
>> + /* XXX Just a hack for now */
>> + _FDT((fdt_property_cell(fdt, "ibm,chip-id", 0)));
>> +
>> + if (cpu->cpu_version) {
>> + _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version)));
>> + }
>> +
>> + /* Build interrupt servers and gservers properties */
>> + for (i = 0; i < smt_threads; i++) {
>> + servers_prop[i] = cpu_to_be32(index + i);
>> + /* Hack, direct the group queues back to cpu 0 */
>> + gservers_prop[i * 2] = cpu_to_be32(index + i);
>> + gservers_prop[i * 2 + 1] = 0;
>> + }
>> + _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
>> + servers_prop, sizeof(servers_prop))));
>> + _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
>> + gservers_prop, sizeof(gservers_prop))));
>> +
>> + _FDT((fdt_end_node(fdt)));
>> +}
>> +
>> +static void *powernv_create_fdt(PnvSystem *sys, const char *kernel_cmdline,
>> + uint32_t initrd_base, uint32_t initrd_size)
>> +{
>> + void *fdt;
>> + CPUState *cs;
>> + int smt = kvmppc_smt_threads();
>> + uint32_t start_prop = cpu_to_be32(initrd_base);
>> + uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
>> + char *buf;
>> + const char plat_compat[] = "qemu,powernv\0ibm,powernv";
>> +
>> + fdt = g_malloc0(FDT_MAX_SIZE);
>> + _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
>> + _FDT((fdt_finish_reservemap(fdt)));
>> +
>> + /* Root node */
>> + _FDT((fdt_begin_node(fdt, "")));
>> + _FDT((fdt_property_string(fdt, "model", "IBM PowerNV (emulated by
>> qemu)")));
>> + _FDT((fdt_property(fdt, "compatible", plat_compat,
>> sizeof(plat_compat))));
>> +
>> + /*
>> + * Add info to guest to indentify which host is it being run on
>> + * and what is the uuid of the guest
>> + */
>> + if (kvmppc_get_host_model(&buf)) {
>> + _FDT((fdt_property_string(fdt, "host-model", buf)));
>> + g_free(buf);
>> + }
>> + if (kvmppc_get_host_serial(&buf)) {
>> + _FDT((fdt_property_string(fdt, "host-serial", buf)));
>> + g_free(buf);
>> + }
>
> Those seem dubious for a non-paravirt platform.
OK. I will remove all the KVM related parts. We can add them later on when
we work on kvm-pr support.
>> +
>> + buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
>> + qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
>> + qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
>> + qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
>> + qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
>> + qemu_uuid[14], qemu_uuid[15]);
>> +
>> + _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
>> + g_free(buf);
>> +
>> + _FDT((fdt_begin_node(fdt, "chosen")));
>> + if (kernel_cmdline) {
>> + _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
>> + }
>> + _FDT((fdt_property(fdt, "linux,initrd-start",
>> + &start_prop, sizeof(start_prop))));
>> + _FDT((fdt_property(fdt, "linux,initrd-end",
>> + &end_prop, sizeof(end_prop))));
>> + _FDT((fdt_end_node(fdt)));
>> +
>> + _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
>> + _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
>> +
>> + /* cpus */
>> + _FDT((fdt_begin_node(fdt, "cpus")));
>> + _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
>> + _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
>> +
>> + CPU_FOREACH(cs) {
>> + powernv_create_cpu_node(fdt, cs, smt);
>> + }
>> +
>> + _FDT((fdt_end_node(fdt)));
>> +
>> + /* Memory */
>> + _FDT((powernv_populate_memory(fdt)));
>> +
>> + /* /hypervisor node */
>
> This really doesn't seem like it makes sense on a non-paravirt platform.
>
>> + if (kvm_enabled()) {
>> + uint8_t hypercall[16];
>> +
>> + /* indicate KVM hypercall interface */
>> + _FDT((fdt_begin_node(fdt, "hypervisor")));
>> + _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
>> + if (kvmppc_has_cap_fixup_hcalls()) {
>> + /*
>> + * Older KVM versions with older guest kernels were broken with
>> the
>> + * magic page, don't allow the guest to map it.
>> + */
>> + kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
>> + sizeof(hypercall));
>> + _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
>> + sizeof(hypercall))));
>> + }
>> + _FDT((fdt_end_node(fdt)));
>> + }
>> +
>> + _FDT((fdt_end_node(fdt))); /* close root node */
>> + _FDT((fdt_finish(fdt)));
>> +
>> + return fdt;
>> +}
>> +
>> +static void powernv_cpu_reset(void *opaque)
>> +{
>> + PowerPCCPU *cpu = opaque;
>> + CPUState *cs = CPU(cpu);
>> + CPUPPCState *env = &cpu->env;
>> +
>> + cpu_reset(cs);
>> +
>> + env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);
>> + env->spr[SPR_HIOR] = 0;
>> + env->gpr[3] = FDT_ADDR;
>> + env->nip = 0x10;
>> + env->msr |= MSR_HVB;
>> +}
>> +
>> +static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no)
>> +{
>> + PnvChip *chip = &sys->chips[chip_no];
>
> Hm, my inclination would be to make the pnv chips a full QOM type,
> unless there's a reason not to.
Yes. let's go that way and I will try to port the rest of the patchset
before sending to catch any issues.
>> +
>> + if (chip_no >= PNV_MAX_CHIPS) {
>> + return;
>> + }
>> +
>> + /* XXX Improve chip numbering to better match HW */
>> + chip->chip_id = chip_no;
>> +}
>> +
>> +static void ppc_powernv_init(MachineState *machine)
>> +{
>> + ram_addr_t ram_size = machine->ram_size;
>> + const char *cpu_model = machine->cpu_model;
>> + const char *kernel_filename = machine->kernel_filename;
>> + const char *initrd_filename = machine->initrd_filename;
>> + uint32_t initrd_base = 0;
>> + long initrd_size = 0;
>> + PowerPCCPU *cpu;
>> + CPUPPCState *env;
>> + MemoryRegion *sysmem = get_system_memory();
>> + MemoryRegion *ram = g_new(MemoryRegion, 1);
>> + sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine);
>> + PnvSystem *sys = &pnv_machine->sys;
>> + long fw_size;
>> + char *filename;
>> + void *fdt;
>> + int i;
>> +
>> + /* init CPUs */
>> + if (cpu_model == NULL) {
>> + cpu_model = kvm_enabled() ? "host" : "POWER8";
>> + }
>> +
>> + for (i = 0; i < smp_cpus; i++) {
>> + cpu = cpu_ppc_init(cpu_model);
>> + if (cpu == NULL) {
>> + error_report("Unable to find PowerPC CPU definition");
>> + exit(1);
>> + }
>> + env = &cpu->env;
>> +
>> + /* Set time-base frequency to 512 MHz */
>> + cpu_ppc_tb_init(env, TIMEBASE_FREQ);
>> +
>> + /* MSR[IP] doesn't exist nowadays */
>> + env->msr_mask &= ~(1 << 6);
>> +
>> + qemu_register_reset(powernv_cpu_reset, cpu);
>
> As noted above, I think we want to start powernv off with "new style"
> cpu instantiation. So for powernv, I think you want the machine to
> create chip objects, which will create core objects which will create
> the vcpu/thread objects.
ok.
>> + }
>> +
>> + /* allocate RAM */
>> + memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram",
>> + ram_size);
>> + memory_region_add_subregion(sysmem, 0, ram);
>> +
>> + /* XXX We should decide how many chips to create based on #cores and
>> + * Venice vs. Murano vs. Naples chip type etc..., for now, just create
>> + * one chip. Also creation of the CPUs should be done per-chip
>> + */
>> + sys->num_chips = 1;
>> +
>> + /* Create only one PHB for now until I figure out what's wrong
>> + * when I create more (resource assignment failures in Linux)
>> + */
>> + pnv_create_chip(sys, 0);
>> +
>> + if (bios_name == NULL) {
>> + bios_name = FW_FILE_NAME;
>> + }
>> + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
>> + fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
>> + if (fw_size < 0) {
>> + hw_error("qemu: could not load OPAL '%s'\n", filename);
>> + exit(1);
>> + }
>> + g_free(filename);
>> +
>> +
>> + if (kernel_filename == NULL) {
>> + kernel_filename = KERNEL_FILE_NAME;
>> + }
>> + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, kernel_filename);
>> + fw_size = load_image_targphys(filename, 0x20000000, 0x2000000);
>> + if (fw_size < 0) {
>> + hw_error("qemu: could not load kernel'%s'\n", filename);
>> + exit(1);
>> + }
>> + g_free(filename);
>> +
>> + /* load initrd */
>> + if (initrd_filename) {
>> + /* Try to locate the initrd in the gap between the kernel
>> + * and the firmware. Add a bit of space just in case
>> + */
>> + initrd_base = 0x40000000;
>> + initrd_size = load_image_targphys(initrd_filename, initrd_base,
>> + 0x10000000); /* 128MB max */
>> + if (initrd_size < 0) {
>> + error_report("qemu: could not load initial ram disk
>> '%s'",
>> + initrd_filename);
>> + exit(1);
>> + }
>> + } else {
>> + initrd_base = 0;
>> + initrd_size = 0;
>> + }
>> + fdt = powernv_create_fdt(sys, machine->kernel_cmdline,
>> + initrd_base, initrd_size);
>
> So the fact that spapr fdt creation is split between machine init and
> reset time causes us a fair bit of pain.
what do you mean ? we should not be following the same spapr pattern,
that is to create a fdt skeleton in init and finalize the fdt creation
in reset. I have a patch just doing that to populate the tree with
some more devices for IPMI. So I guess I need to rework that part.
So we should be using a realize ops to handle the last bits of the fdt
creation ?
>> + cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
>> +}
>> +
>> +static int powernv_kvm_type(const char *vm_type)
>> +{
>> + /* Always force PR KVM */
>> + return 2;
>
> This doesn't really seem right to me. I think you should be giving an
> error if the user tries to specify HV, rather than silently changing
> to PR.
OK. I will drop the whole KVM feature for the moment.
>> +}
>> +
>> +static void ppc_cpu_do_nmi_on_cpu(void *arg)
>> +{
>> + CPUState *cs = arg;
>> +
>> + cpu_synchronize_state(cs);
>> + ppc_cpu_do_system_reset(cs);
>> +}
>> +
>> +static void powernv_nmi(NMIState *n, int cpu_index, Error **errp)
>> +{
>> + CPUState *cs;
>> +
>> + CPU_FOREACH(cs) {
>> + async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs);
>> + }
>> +}
>
> It may be simpler to just leave the nmi stuff out for now, until you
> have a real implementation.
OK.
>
>> +
>> +static void powernv_machine_class_init(ObjectClass *oc, void *data)
>> +{
>> + MachineClass *mc = MACHINE_CLASS(oc);
>> + NMIClass *nc = NMI_CLASS(oc);
>> +
>> + mc->init = ppc_powernv_init;
>> + mc->block_default_type = IF_SCSI;
>> + mc->max_cpus = MAX_CPUS;
>> + mc->no_parallel = 1;
>> + mc->default_boot_order = NULL;
>> + mc->kvm_type = powernv_kvm_type;
>> +
>> + nc->nmi_monitor_handler = powernv_nmi;
>> +}
>> +
>> +static const TypeInfo powernv_machine_info = {
>> + .name = TYPE_POWERNV_MACHINE,
>> + .parent = TYPE_MACHINE,
>> + .abstract = true,
>> + .instance_size = sizeof(sPowerNVMachineState),
>> + .class_init = powernv_machine_class_init,
>> + .interfaces = (InterfaceInfo[]) {
>> + { TYPE_NMI },
>> + { }
>> + },
>> +};
>> +
>> +static void powernv_machine_2_5_class_init(ObjectClass *oc, void *data)
>> +{
>> + MachineClass *mc = MACHINE_CLASS(oc);
>> +
>> + mc->name = "powernv-2.5";
>
> Looks like the version numbthis needs an update for current qemu.
I guess we should be using v1 as this is the first version of PowerNV,
unless we want to tie that to Opal (v3). would that make sense ?
Thanks,
C.
>> + mc->desc = "PowerNV v2.5";
>> + mc->alias = "powernv";
>> +}
>> +
>> +static const TypeInfo powernv_machine_2_5_info = {
>> + .name = MACHINE_TYPE_NAME("powernv-2.5"),
>> + .parent = TYPE_POWERNV_MACHINE,
>> + .class_init = powernv_machine_2_5_class_init,
>> +};
>> +
>> +static void powernv_machine_register_types(void)
>> +{
>> + type_register_static(&powernv_machine_info);
>> + type_register_static(&powernv_machine_2_5_info);
>> +}
>> +
>> +type_init(powernv_machine_register_types)
>> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
>> new file mode 100644
>> index 000000000000..383a336e7cd3
>> --- /dev/null
>> +++ b/include/hw/ppc/pnv.h
>> @@ -0,0 +1,35 @@
>> +/*
>> + * QEMU PowerNV various definitions
>> + *
>> + * Copyright (c) 2014-2016 BenH, IBM Corporation.
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2 of the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library; if not, see
>> <http://www.gnu.org/licenses/>.
>> + */
>> +#ifndef _PPC_PNV_H
>> +#define _PPC_PNV_H
>> +
>> +#include "hw/hw.h"
>> +
>> +/* Should we turn that into a QOjb of some sort ? */
>> +typedef struct PnvChip {
>> + uint32_t chip_id;
>> +} PnvChip;
>> +
>> +typedef struct PnvSystem {
>> + uint32_t num_chips;
>> +#define PNV_MAX_CHIPS 1
>> + PnvChip chips[PNV_MAX_CHIPS];
>> +} PnvSystem;
>> +
>> +#endif /* _PPC_PNV_H */
>