From: Dominik Brodowski , John Stultz , Dmitry Torokhov Add the ACPI Powermanagement Timer as x86 kernel timing source. Unlike the Time Stamp Counter, it is a reliable timing source which does not get affected by aggressive powermanagement features like CPU frequency scaling. Some ideas and some code are based on Arjan van de Ven's implementation for 2.4, and on R. Byron Moore's drivers/acpi/hardware/hwtimer.c. --- Documentation/kernel-parameters.txt | 2 arch/i386/kernel/acpi/boot.c | 35 +++++ arch/i386/kernel/timers/Makefile | 1 arch/i386/kernel/timers/common.c | 20 ++ arch/i386/kernel/timers/timer.c | 3 arch/i386/kernel/timers/timer_cyclone.c | 21 --- arch/i386/kernel/timers/timer_pm.c | 222 ++++++++++++++++++++++++++++++++ drivers/acpi/Kconfig | 18 ++ include/asm-i386/timer.h | 4 9 files changed, 305 insertions(+), 21 deletions(-) diff -puN arch/i386/kernel/acpi/boot.c~acpi-pm-timer-3 arch/i386/kernel/acpi/boot.c --- 25/arch/i386/kernel/acpi/boot.c~acpi-pm-timer-3 2004-02-11 01:47:48.000000000 -0800 +++ 25-akpm/arch/i386/kernel/acpi/boot.c 2004-02-11 01:47:48.000000000 -0800 @@ -376,6 +376,37 @@ static int __init acpi_parse_hpet(unsign } #endif +/* detect the location of the ACPI PM Timer */ +#ifdef CONFIG_X86_PM_TIMER +extern u32 pmtmr_ioport; + +static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) +{ + struct fadt_descriptor_rev2 *fadt =0; + + fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); + if(!fadt) { + printk(KERN_WARNING PREFIX "Unable to map FADT\n"); + return 0; + } + + if (fadt->revision >= FADT2_REVISION_ID) { + /* FADT rev. 2 */ + if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) + return 0; + + pmtmr_ioport = fadt->xpm_tmr_blk.address; + } else { + /* FADT rev. 1 */ + pmtmr_ioport = fadt->V1_pm_tmr_blk; + } + if (pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); + return 0; +} +#endif + + unsigned long __init acpi_find_rsdp (void) { @@ -448,6 +479,10 @@ acpi_boot_init (void) return result; } +#ifdef CONFIG_X86_PM_TIMER + acpi_table_parse(ACPI_FADT, acpi_parse_fadt); +#endif + #ifdef CONFIG_X86_LOCAL_APIC /* diff -puN arch/i386/kernel/timers/common.c~acpi-pm-timer-3 arch/i386/kernel/timers/common.c --- 25/arch/i386/kernel/timers/common.c~acpi-pm-timer-3 2004-02-11 01:47:48.000000000 -0800 +++ 25-akpm/arch/i386/kernel/timers/common.c 2004-02-11 01:47:48.000000000 -0800 @@ -137,3 +137,23 @@ bad_calibration: } #endif +/* calculate cpu_khz */ +void __init init_cpu_khz(void) +{ + if (cpu_has_tsc) { + unsigned long tsc_quotient = calibrate_tsc(); + if (tsc_quotient) { + /* report CPU clock rate in Hz. + * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = + * clock/second. Our precision is about 100 ppm. + */ + { unsigned long eax=0, edx=1000; + __asm__("divl %2" + :"=a" (cpu_khz), "=d" (edx) + :"r" (tsc_quotient), + "0" (eax), "1" (edx)); + printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + } + } + } +} diff -puN arch/i386/kernel/timers/Makefile~acpi-pm-timer-3 arch/i386/kernel/timers/Makefile --- 25/arch/i386/kernel/timers/Makefile~acpi-pm-timer-3 2004-02-11 01:47:48.000000000 -0800 +++ 25-akpm/arch/i386/kernel/timers/Makefile 2004-02-11 01:47:48.000000000 -0800 @@ -6,3 +6,4 @@ obj-y := timer.o timer_none.o timer_tsc. obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o obj-$(CONFIG_HPET_TIMER) += timer_hpet.o +obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o diff -puN arch/i386/kernel/timers/timer.c~acpi-pm-timer-3 arch/i386/kernel/timers/timer.c --- 25/arch/i386/kernel/timers/timer.c~acpi-pm-timer-3 2004-02-11 01:47:48.000000000 -0800 +++ 25-akpm/arch/i386/kernel/timers/timer.c 2004-02-11 01:47:48.000000000 -0800 @@ -19,6 +19,9 @@ static struct timer_opts* timers[] = { #ifdef CONFIG_HPET_TIMER &timer_hpet, #endif +#ifdef CONFIG_X86_PM_TIMER + &timer_pmtmr, +#endif &timer_tsc, &timer_pit, NULL, diff -puN arch/i386/kernel/timers/timer_cyclone.c~acpi-pm-timer-3 arch/i386/kernel/timers/timer_cyclone.c --- 25/arch/i386/kernel/timers/timer_cyclone.c~acpi-pm-timer-3 2004-02-11 01:47:48.000000000 -0800 +++ 25-akpm/arch/i386/kernel/timers/timer_cyclone.c 2004-02-11 01:47:48.000000000 -0800 @@ -212,26 +212,7 @@ static int __init init_cyclone(char* ove } } - /* init cpu_khz. - * XXX - This should really be done elsewhere, - * and in a more generic fashion. -johnstul@us.ibm.com - */ - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc(); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - } - } - } + init_cpu_khz(); /* Everything looks good! */ return 0; diff -puN /dev/null arch/i386/kernel/timers/timer_pm.c --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/arch/i386/kernel/timers/timer_pm.c 2004-02-11 01:47:48.000000000 -0800 @@ -0,0 +1,222 @@ +/* + * (C) Dominik Brodowski 2003 + * + * Driver to use the Power Management Timer (PMTMR) available in some + * southbridges as primary timing source for the Linux kernel. + * + * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, + * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. + * + * This file is licensed under the GPL v2. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* The I/O port the PMTMR resides at. + * The location is detected during setup_arch(), + * in arch/i386/acpi/boot.c */ +u32 pmtmr_ioport = 0; + + +/* value of the Power timer at last timer interrupt */ +static u32 offset_tick; +static u32 offset_delay; + +static unsigned long long monotonic_base; +static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; + +#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ + +/*helper function to safely read acpi pm timesource*/ +static inline u32 read_pmtmr(void) +{ + u32 v1=0,v2=0,v3=0; + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + int dbg_failed = 0; + do { + if(dbg_failed) + printk("bad pmtmr read: (%u, %u, %u)\n", + v1,v2,v3); + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + dbg_failed = 1; + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; +} + +static int init_pmtmr(char* override) +{ + u32 value1, value2; + unsigned int i; + + if (override[0] && strncmp(override,"pmtmr",5)) + return -ENODEV; + + if (!pmtmr_ioport) + return -ENODEV; + + /* "verify" this timing source */ + value1 = read_pmtmr(); + for (i = 0; i < 10000; i++) { + value2 = read_pmtmr(); + if (value2 == value1) + continue; + if (value2 > value1) + goto pm_good; + if ((value2 < value1) && ((value2) < 0xFFF)) + goto pm_good; + printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); + return -EINVAL; + } + printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); + return -ENODEV; + +pm_good: + init_cpu_khz(); + return 0; +} + +static inline u32 cyc2us(u32 cycles) +{ + /* The Power Management Timer ticks at 3.579545 ticks per microsecond. + * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] + * + * Even with HZ = 100, delta is at maximum 35796 ticks, so it can + * easily be multiplied with 286 (=0x11E) without having to fear + * u32 overflows. + */ + cycles *= 286; + return (cycles >> 10); +} + +/* + * this gets called during each timer interrupt + * - Called while holding the writer xtime_lock + */ +static void mark_offset_pmtmr(void) +{ + u32 lost, delta, last_offset; + static int first_run = 1; + last_offset = offset_tick; + + write_seqlock(&monotonic_lock); + + offset_tick = read_pmtmr(); + + /* calculate tick interval */ + delta = (offset_tick - last_offset) & ACPI_PM_MASK; + + /* convert to usecs */ + delta = cyc2us(delta); + + /* update the monotonic base value */ + monotonic_base += delta * NSEC_PER_USEC; + write_sequnlock(&monotonic_lock); + + /* convert to ticks */ + delta += offset_delay; + lost = delta / (USEC_PER_SEC / HZ); + offset_delay = delta % (USEC_PER_SEC / HZ); + + + /* compensate for lost ticks */ + if (lost >= 2) + jiffies_64 += lost - 1; + + /* don't calculate delay for first run, + or if we've got less then a tick */ + if (first_run || (lost < 1)) { + first_run = 0; + offset_delay = 0; + } +} + + +static unsigned long long monotonic_clock_pmtmr(void) +{ + u32 last_offset, this_offset; + unsigned long long base, ret; + unsigned seq; + + + /* atomically read monotonic base & last_offset */ + do { + seq = read_seqbegin(&monotonic_lock); + last_offset = offset_tick; + base = monotonic_base; + } while (read_seqretry(&monotonic_lock, seq)); + + /* Read the pmtmr */ + this_offset = read_pmtmr(); + + /* convert to nanoseconds */ + ret = (this_offset - last_offset) & ACPI_PM_MASK; + ret = base + (cyc2us(ret) * NSEC_PER_USEC); + return ret; +} + +/* + * copied from delay_pit + */ +static void delay_pmtmr(unsigned long loops) +{ + int d0; + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (loops)); +} + + +/* + * get the offset (in microseconds) from the last call to mark_offset() + * - Called holding a reader xtime_lock + */ +static unsigned long get_offset_pmtmr(void) +{ + u32 now, offset, delta = 0; + + offset = offset_tick; + now = read_pmtmr(); + delta = (now - offset)&ACPI_PM_MASK; + + return (unsigned long) offset_delay + cyc2us(delta); +} + + +/* acpi timer_opts struct */ +struct timer_opts timer_pmtmr = { + .name = "pmtmr", + .init = init_pmtmr, + .mark_offset = mark_offset_pmtmr, + .get_offset = get_offset_pmtmr, + .monotonic_clock = monotonic_clock_pmtmr, + .delay = delay_pmtmr, +}; + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dominik Brodowski "); +MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); diff -puN Documentation/kernel-parameters.txt~acpi-pm-timer-3 Documentation/kernel-parameters.txt --- 25/Documentation/kernel-parameters.txt~acpi-pm-timer-3 2004-02-11 01:47:48.000000000 -0800 +++ 25-akpm/Documentation/kernel-parameters.txt 2004-02-11 01:47:48.000000000 -0800 @@ -244,7 +244,7 @@ running once the system is up. Forces specified timesource (if avaliable) to be used when calculating gettimeofday(). If specicified timesource is not avalible, it defaults to PIT. - Format: { pit | tsc | cyclone | ... } + Format: { pit | tsc | cyclone | pmtmr } hpet= [IA-32,HPET] option to disable HPET and use PIT. Format: disable diff -puN drivers/acpi/Kconfig~acpi-pm-timer-3 drivers/acpi/Kconfig --- 25/drivers/acpi/Kconfig~acpi-pm-timer-3 2004-02-11 01:47:48.000000000 -0800 +++ 25-akpm/drivers/acpi/Kconfig 2004-02-11 01:47:48.000000000 -0800 @@ -263,5 +263,23 @@ config ACPI_RELAXED_AML particular, many Toshiba laptops require this for correct operation of the AC module. +config X86_PM_TIMER + bool "Power Management Timer Support" + depends on X86 && ACPI + depends on ACPI_BOOT && EXPERIMENTAL + default n + help + The Power Management Timer is available on all ACPI-capable, + in most cases even if ACPI is unusable or blacklisted. + + This timing source is not affected by powermanagement features + like aggressive processor idling, throttling, frequency and/or + voltage scaling, unlike the commonly used Time Stamp Counter + (TSC) timing source. + + So, if you see messages like 'Losing too many ticks!' in the + kernel logs, and/or you are using a this on a notebook which + does not yet have an HPET, you should say "Y" here. + endmenu diff -puN include/asm-i386/timer.h~acpi-pm-timer-3 include/asm-i386/timer.h --- 25/include/asm-i386/timer.h~acpi-pm-timer-3 2004-02-11 01:47:48.000000000 -0800 +++ 25-akpm/include/asm-i386/timer.h 2004-02-11 01:47:48.000000000 -0800 @@ -40,9 +40,13 @@ extern struct timer_opts timer_cyclone; #endif extern unsigned long calibrate_tsc(void); +extern void init_cpu_khz(void); #ifdef CONFIG_HPET_TIMER extern struct timer_opts timer_hpet; extern unsigned long calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr); #endif +#ifdef CONFIG_X86_PM_TIMER +extern struct timer_opts timer_pmtmr; +#endif #endif _