Revert "cpufreq: intel_pstate: Use ACPI perf configuration"
[linux-drm-fsl-dcu.git] / drivers / cpufreq / intel_pstate.c
1 /*
2  * intel_pstate.c: Native P state management for Intel processors
3  *
4  * (C) Copyright 2012 Intel Corporation
5  * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; version 2
10  * of the License.
11  */
12
13 #include <linux/kernel.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/module.h>
16 #include <linux/ktime.h>
17 #include <linux/hrtimer.h>
18 #include <linux/tick.h>
19 #include <linux/slab.h>
20 #include <linux/sched.h>
21 #include <linux/list.h>
22 #include <linux/cpu.h>
23 #include <linux/cpufreq.h>
24 #include <linux/sysfs.h>
25 #include <linux/types.h>
26 #include <linux/fs.h>
27 #include <linux/debugfs.h>
28 #include <linux/acpi.h>
29 #include <linux/vmalloc.h>
30 #include <trace/events/power.h>
31
32 #include <asm/div64.h>
33 #include <asm/msr.h>
34 #include <asm/cpu_device_id.h>
35 #include <asm/cpufeature.h>
36
37 #define BYT_RATIOS              0x66a
38 #define BYT_VIDS                0x66b
39 #define BYT_TURBO_RATIOS        0x66c
40 #define BYT_TURBO_VIDS          0x66d
41
42 #define FRAC_BITS 8
43 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
44 #define fp_toint(X) ((X) >> FRAC_BITS)
45
46 static inline int32_t mul_fp(int32_t x, int32_t y)
47 {
48         return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
49 }
50
51 static inline int32_t div_fp(s64 x, s64 y)
52 {
53         return div64_s64((int64_t)x << FRAC_BITS, y);
54 }
55
56 static inline int ceiling_fp(int32_t x)
57 {
58         int mask, ret;
59
60         ret = fp_toint(x);
61         mask = (1 << FRAC_BITS) - 1;
62         if (x & mask)
63                 ret += 1;
64         return ret;
65 }
66
67 struct sample {
68         int32_t core_pct_busy;
69         u64 aperf;
70         u64 mperf;
71         u64 tsc;
72         int freq;
73         ktime_t time;
74 };
75
76 struct pstate_data {
77         int     current_pstate;
78         int     min_pstate;
79         int     max_pstate;
80         int     max_pstate_physical;
81         int     scaling;
82         int     turbo_pstate;
83 };
84
85 struct vid_data {
86         int min;
87         int max;
88         int turbo;
89         int32_t ratio;
90 };
91
92 struct _pid {
93         int setpoint;
94         int32_t integral;
95         int32_t p_gain;
96         int32_t i_gain;
97         int32_t d_gain;
98         int deadband;
99         int32_t last_err;
100 };
101
102 struct cpudata {
103         int cpu;
104
105         struct timer_list timer;
106
107         struct pstate_data pstate;
108         struct vid_data vid;
109         struct _pid pid;
110
111         ktime_t last_sample_time;
112         u64     prev_aperf;
113         u64     prev_mperf;
114         u64     prev_tsc;
115         struct sample sample;
116 };
117
118 static struct cpudata **all_cpu_data;
119 struct pstate_adjust_policy {
120         int sample_rate_ms;
121         int deadband;
122         int setpoint;
123         int p_gain_pct;
124         int d_gain_pct;
125         int i_gain_pct;
126 };
127
128 struct pstate_funcs {
129         int (*get_max)(void);
130         int (*get_max_physical)(void);
131         int (*get_min)(void);
132         int (*get_turbo)(void);
133         int (*get_scaling)(void);
134         void (*set)(struct cpudata*, int pstate);
135         void (*get_vid)(struct cpudata *);
136 };
137
138 struct cpu_defaults {
139         struct pstate_adjust_policy pid_policy;
140         struct pstate_funcs funcs;
141 };
142
143 static struct pstate_adjust_policy pid_params;
144 static struct pstate_funcs pstate_funcs;
145 static int hwp_active;
146
147 struct perf_limits {
148         int no_turbo;
149         int turbo_disabled;
150         int max_perf_pct;
151         int min_perf_pct;
152         int32_t max_perf;
153         int32_t min_perf;
154         int max_policy_pct;
155         int max_sysfs_pct;
156         int min_policy_pct;
157         int min_sysfs_pct;
158 };
159
160 static struct perf_limits performance_limits = {
161         .no_turbo = 0,
162         .turbo_disabled = 0,
163         .max_perf_pct = 100,
164         .max_perf = int_tofp(1),
165         .min_perf_pct = 100,
166         .min_perf = int_tofp(1),
167         .max_policy_pct = 100,
168         .max_sysfs_pct = 100,
169         .min_policy_pct = 0,
170         .min_sysfs_pct = 0,
171 };
172
173 static struct perf_limits powersave_limits = {
174         .no_turbo = 0,
175         .turbo_disabled = 0,
176         .max_perf_pct = 100,
177         .max_perf = int_tofp(1),
178         .min_perf_pct = 0,
179         .min_perf = 0,
180         .max_policy_pct = 100,
181         .max_sysfs_pct = 100,
182         .min_policy_pct = 0,
183         .min_sysfs_pct = 0,
184 };
185
186 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
187 static struct perf_limits *limits = &performance_limits;
188 #else
189 static struct perf_limits *limits = &powersave_limits;
190 #endif
191
192 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
193                              int deadband, int integral) {
194         pid->setpoint = setpoint;
195         pid->deadband  = deadband;
196         pid->integral  = int_tofp(integral);
197         pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
198 }
199
200 static inline void pid_p_gain_set(struct _pid *pid, int percent)
201 {
202         pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
203 }
204
205 static inline void pid_i_gain_set(struct _pid *pid, int percent)
206 {
207         pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
208 }
209
210 static inline void pid_d_gain_set(struct _pid *pid, int percent)
211 {
212         pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
213 }
214
215 static signed int pid_calc(struct _pid *pid, int32_t busy)
216 {
217         signed int result;
218         int32_t pterm, dterm, fp_error;
219         int32_t integral_limit;
220
221         fp_error = int_tofp(pid->setpoint) - busy;
222
223         if (abs(fp_error) <= int_tofp(pid->deadband))
224                 return 0;
225
226         pterm = mul_fp(pid->p_gain, fp_error);
227
228         pid->integral += fp_error;
229
230         /*
231          * We limit the integral here so that it will never
232          * get higher than 30.  This prevents it from becoming
233          * too large an input over long periods of time and allows
234          * it to get factored out sooner.
235          *
236          * The value of 30 was chosen through experimentation.
237          */
238         integral_limit = int_tofp(30);
239         if (pid->integral > integral_limit)
240                 pid->integral = integral_limit;
241         if (pid->integral < -integral_limit)
242                 pid->integral = -integral_limit;
243
244         dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
245         pid->last_err = fp_error;
246
247         result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
248         result = result + (1 << (FRAC_BITS-1));
249         return (signed int)fp_toint(result);
250 }
251
252 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
253 {
254         pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
255         pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
256         pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
257
258         pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
259 }
260
261 static inline void intel_pstate_reset_all_pid(void)
262 {
263         unsigned int cpu;
264
265         for_each_online_cpu(cpu) {
266                 if (all_cpu_data[cpu])
267                         intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
268         }
269 }
270
271 static inline void update_turbo_state(void)
272 {
273         u64 misc_en;
274         struct cpudata *cpu;
275
276         cpu = all_cpu_data[0];
277         rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
278         limits->turbo_disabled =
279                 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
280                  cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
281 }
282
283 static void intel_pstate_hwp_set(void)
284 {
285         int min, hw_min, max, hw_max, cpu, range, adj_range;
286         u64 value, cap;
287
288         rdmsrl(MSR_HWP_CAPABILITIES, cap);
289         hw_min = HWP_LOWEST_PERF(cap);
290         hw_max = HWP_HIGHEST_PERF(cap);
291         range = hw_max - hw_min;
292
293         get_online_cpus();
294
295         for_each_online_cpu(cpu) {
296                 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
297                 adj_range = limits->min_perf_pct * range / 100;
298                 min = hw_min + adj_range;
299                 value &= ~HWP_MIN_PERF(~0L);
300                 value |= HWP_MIN_PERF(min);
301
302                 adj_range = limits->max_perf_pct * range / 100;
303                 max = hw_min + adj_range;
304                 if (limits->no_turbo) {
305                         hw_max = HWP_GUARANTEED_PERF(cap);
306                         if (hw_max < max)
307                                 max = hw_max;
308                 }
309
310                 value &= ~HWP_MAX_PERF(~0L);
311                 value |= HWP_MAX_PERF(max);
312                 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
313         }
314
315         put_online_cpus();
316 }
317
318 /************************** debugfs begin ************************/
319 static int pid_param_set(void *data, u64 val)
320 {
321         *(u32 *)data = val;
322         intel_pstate_reset_all_pid();
323         return 0;
324 }
325
326 static int pid_param_get(void *data, u64 *val)
327 {
328         *val = *(u32 *)data;
329         return 0;
330 }
331 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
332
333 struct pid_param {
334         char *name;
335         void *value;
336 };
337
338 static struct pid_param pid_files[] = {
339         {"sample_rate_ms", &pid_params.sample_rate_ms},
340         {"d_gain_pct", &pid_params.d_gain_pct},
341         {"i_gain_pct", &pid_params.i_gain_pct},
342         {"deadband", &pid_params.deadband},
343         {"setpoint", &pid_params.setpoint},
344         {"p_gain_pct", &pid_params.p_gain_pct},
345         {NULL, NULL}
346 };
347
348 static void __init intel_pstate_debug_expose_params(void)
349 {
350         struct dentry *debugfs_parent;
351         int i = 0;
352
353         if (hwp_active)
354                 return;
355         debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
356         if (IS_ERR_OR_NULL(debugfs_parent))
357                 return;
358         while (pid_files[i].name) {
359                 debugfs_create_file(pid_files[i].name, 0660,
360                                     debugfs_parent, pid_files[i].value,
361                                     &fops_pid_param);
362                 i++;
363         }
364 }
365
366 /************************** debugfs end ************************/
367
368 /************************** sysfs begin ************************/
369 #define show_one(file_name, object)                                     \
370         static ssize_t show_##file_name                                 \
371         (struct kobject *kobj, struct attribute *attr, char *buf)       \
372         {                                                               \
373                 return sprintf(buf, "%u\n", limits->object);            \
374         }
375
376 static ssize_t show_turbo_pct(struct kobject *kobj,
377                                 struct attribute *attr, char *buf)
378 {
379         struct cpudata *cpu;
380         int total, no_turbo, turbo_pct;
381         uint32_t turbo_fp;
382
383         cpu = all_cpu_data[0];
384
385         total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
386         no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
387         turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
388         turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
389         return sprintf(buf, "%u\n", turbo_pct);
390 }
391
392 static ssize_t show_num_pstates(struct kobject *kobj,
393                                 struct attribute *attr, char *buf)
394 {
395         struct cpudata *cpu;
396         int total;
397
398         cpu = all_cpu_data[0];
399         total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
400         return sprintf(buf, "%u\n", total);
401 }
402
403 static ssize_t show_no_turbo(struct kobject *kobj,
404                              struct attribute *attr, char *buf)
405 {
406         ssize_t ret;
407
408         update_turbo_state();
409         if (limits->turbo_disabled)
410                 ret = sprintf(buf, "%u\n", limits->turbo_disabled);
411         else
412                 ret = sprintf(buf, "%u\n", limits->no_turbo);
413
414         return ret;
415 }
416
417 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
418                               const char *buf, size_t count)
419 {
420         unsigned int input;
421         int ret;
422
423         ret = sscanf(buf, "%u", &input);
424         if (ret != 1)
425                 return -EINVAL;
426
427         update_turbo_state();
428         if (limits->turbo_disabled) {
429                 pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n");
430                 return -EPERM;
431         }
432
433         limits->no_turbo = clamp_t(int, input, 0, 1);
434
435         if (hwp_active)
436                 intel_pstate_hwp_set();
437
438         return count;
439 }
440
441 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
442                                   const char *buf, size_t count)
443 {
444         unsigned int input;
445         int ret;
446
447         ret = sscanf(buf, "%u", &input);
448         if (ret != 1)
449                 return -EINVAL;
450
451         limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
452         limits->max_perf_pct = min(limits->max_policy_pct,
453                                    limits->max_sysfs_pct);
454         limits->max_perf_pct = max(limits->min_policy_pct,
455                                    limits->max_perf_pct);
456         limits->max_perf_pct = max(limits->min_perf_pct,
457                                    limits->max_perf_pct);
458         limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
459                                   int_tofp(100));
460
461         if (hwp_active)
462                 intel_pstate_hwp_set();
463         return count;
464 }
465
466 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
467                                   const char *buf, size_t count)
468 {
469         unsigned int input;
470         int ret;
471
472         ret = sscanf(buf, "%u", &input);
473         if (ret != 1)
474                 return -EINVAL;
475
476         limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
477         limits->min_perf_pct = max(limits->min_policy_pct,
478                                    limits->min_sysfs_pct);
479         limits->min_perf_pct = min(limits->max_policy_pct,
480                                    limits->min_perf_pct);
481         limits->min_perf_pct = min(limits->max_perf_pct,
482                                    limits->min_perf_pct);
483         limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
484                                   int_tofp(100));
485
486         if (hwp_active)
487                 intel_pstate_hwp_set();
488         return count;
489 }
490
491 show_one(max_perf_pct, max_perf_pct);
492 show_one(min_perf_pct, min_perf_pct);
493
494 define_one_global_rw(no_turbo);
495 define_one_global_rw(max_perf_pct);
496 define_one_global_rw(min_perf_pct);
497 define_one_global_ro(turbo_pct);
498 define_one_global_ro(num_pstates);
499
500 static struct attribute *intel_pstate_attributes[] = {
501         &no_turbo.attr,
502         &max_perf_pct.attr,
503         &min_perf_pct.attr,
504         &turbo_pct.attr,
505         &num_pstates.attr,
506         NULL
507 };
508
509 static struct attribute_group intel_pstate_attr_group = {
510         .attrs = intel_pstate_attributes,
511 };
512
513 static void __init intel_pstate_sysfs_expose_params(void)
514 {
515         struct kobject *intel_pstate_kobject;
516         int rc;
517
518         intel_pstate_kobject = kobject_create_and_add("intel_pstate",
519                                                 &cpu_subsys.dev_root->kobj);
520         BUG_ON(!intel_pstate_kobject);
521         rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
522         BUG_ON(rc);
523 }
524 /************************** sysfs end ************************/
525
526 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
527 {
528         wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
529 }
530
531 static int byt_get_min_pstate(void)
532 {
533         u64 value;
534
535         rdmsrl(BYT_RATIOS, value);
536         return (value >> 8) & 0x7F;
537 }
538
539 static int byt_get_max_pstate(void)
540 {
541         u64 value;
542
543         rdmsrl(BYT_RATIOS, value);
544         return (value >> 16) & 0x7F;
545 }
546
547 static int byt_get_turbo_pstate(void)
548 {
549         u64 value;
550
551         rdmsrl(BYT_TURBO_RATIOS, value);
552         return value & 0x7F;
553 }
554
555 static void byt_set_pstate(struct cpudata *cpudata, int pstate)
556 {
557         u64 val;
558         int32_t vid_fp;
559         u32 vid;
560
561         val = (u64)pstate << 8;
562         if (limits->no_turbo && !limits->turbo_disabled)
563                 val |= (u64)1 << 32;
564
565         vid_fp = cpudata->vid.min + mul_fp(
566                 int_tofp(pstate - cpudata->pstate.min_pstate),
567                 cpudata->vid.ratio);
568
569         vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
570         vid = ceiling_fp(vid_fp);
571
572         if (pstate > cpudata->pstate.max_pstate)
573                 vid = cpudata->vid.turbo;
574
575         val |= vid;
576
577         wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
578 }
579
580 #define BYT_BCLK_FREQS 5
581 static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
582
583 static int byt_get_scaling(void)
584 {
585         u64 value;
586         int i;
587
588         rdmsrl(MSR_FSB_FREQ, value);
589         i = value & 0x3;
590
591         BUG_ON(i > BYT_BCLK_FREQS);
592
593         return byt_freq_table[i] * 100;
594 }
595
596 static void byt_get_vid(struct cpudata *cpudata)
597 {
598         u64 value;
599
600         rdmsrl(BYT_VIDS, value);
601         cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
602         cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
603         cpudata->vid.ratio = div_fp(
604                 cpudata->vid.max - cpudata->vid.min,
605                 int_tofp(cpudata->pstate.max_pstate -
606                         cpudata->pstate.min_pstate));
607
608         rdmsrl(BYT_TURBO_VIDS, value);
609         cpudata->vid.turbo = value & 0x7f;
610 }
611
612 static int core_get_min_pstate(void)
613 {
614         u64 value;
615
616         rdmsrl(MSR_PLATFORM_INFO, value);
617         return (value >> 40) & 0xFF;
618 }
619
620 static int core_get_max_pstate_physical(void)
621 {
622         u64 value;
623
624         rdmsrl(MSR_PLATFORM_INFO, value);
625         return (value >> 8) & 0xFF;
626 }
627
628 static int core_get_max_pstate(void)
629 {
630         u64 tar;
631         u64 plat_info;
632         int max_pstate;
633         int err;
634
635         rdmsrl(MSR_PLATFORM_INFO, plat_info);
636         max_pstate = (plat_info >> 8) & 0xFF;
637
638         err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
639         if (!err) {
640                 /* Do some sanity checking for safety */
641                 if (plat_info & 0x600000000) {
642                         u64 tdp_ctrl;
643                         u64 tdp_ratio;
644                         int tdp_msr;
645
646                         err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
647                         if (err)
648                                 goto skip_tar;
649
650                         tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
651                         err = rdmsrl_safe(tdp_msr, &tdp_ratio);
652                         if (err)
653                                 goto skip_tar;
654
655                         if (tdp_ratio - 1 == tar) {
656                                 max_pstate = tar;
657                                 pr_debug("max_pstate=TAC %x\n", max_pstate);
658                         } else {
659                                 goto skip_tar;
660                         }
661                 }
662         }
663
664 skip_tar:
665         return max_pstate;
666 }
667
668 static int core_get_turbo_pstate(void)
669 {
670         u64 value;
671         int nont, ret;
672
673         rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
674         nont = core_get_max_pstate();
675         ret = (value) & 255;
676         if (ret <= nont)
677                 ret = nont;
678         return ret;
679 }
680
681 static inline int core_get_scaling(void)
682 {
683         return 100000;
684 }
685
686 static void core_set_pstate(struct cpudata *cpudata, int pstate)
687 {
688         u64 val;
689
690         val = (u64)pstate << 8;
691         if (limits->no_turbo && !limits->turbo_disabled)
692                 val |= (u64)1 << 32;
693
694         wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
695 }
696
697 static int knl_get_turbo_pstate(void)
698 {
699         u64 value;
700         int nont, ret;
701
702         rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
703         nont = core_get_max_pstate();
704         ret = (((value) >> 8) & 0xFF);
705         if (ret <= nont)
706                 ret = nont;
707         return ret;
708 }
709
710 static struct cpu_defaults core_params = {
711         .pid_policy = {
712                 .sample_rate_ms = 10,
713                 .deadband = 0,
714                 .setpoint = 97,
715                 .p_gain_pct = 20,
716                 .d_gain_pct = 0,
717                 .i_gain_pct = 0,
718         },
719         .funcs = {
720                 .get_max = core_get_max_pstate,
721                 .get_max_physical = core_get_max_pstate_physical,
722                 .get_min = core_get_min_pstate,
723                 .get_turbo = core_get_turbo_pstate,
724                 .get_scaling = core_get_scaling,
725                 .set = core_set_pstate,
726         },
727 };
728
729 static struct cpu_defaults byt_params = {
730         .pid_policy = {
731                 .sample_rate_ms = 10,
732                 .deadband = 0,
733                 .setpoint = 60,
734                 .p_gain_pct = 14,
735                 .d_gain_pct = 0,
736                 .i_gain_pct = 4,
737         },
738         .funcs = {
739                 .get_max = byt_get_max_pstate,
740                 .get_max_physical = byt_get_max_pstate,
741                 .get_min = byt_get_min_pstate,
742                 .get_turbo = byt_get_turbo_pstate,
743                 .set = byt_set_pstate,
744                 .get_scaling = byt_get_scaling,
745                 .get_vid = byt_get_vid,
746         },
747 };
748
749 static struct cpu_defaults knl_params = {
750         .pid_policy = {
751                 .sample_rate_ms = 10,
752                 .deadband = 0,
753                 .setpoint = 97,
754                 .p_gain_pct = 20,
755                 .d_gain_pct = 0,
756                 .i_gain_pct = 0,
757         },
758         .funcs = {
759                 .get_max = core_get_max_pstate,
760                 .get_max_physical = core_get_max_pstate_physical,
761                 .get_min = core_get_min_pstate,
762                 .get_turbo = knl_get_turbo_pstate,
763                 .get_scaling = core_get_scaling,
764                 .set = core_set_pstate,
765         },
766 };
767
768 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
769 {
770         int max_perf = cpu->pstate.turbo_pstate;
771         int max_perf_adj;
772         int min_perf;
773
774         if (limits->no_turbo || limits->turbo_disabled)
775                 max_perf = cpu->pstate.max_pstate;
776
777         /*
778          * performance can be limited by user through sysfs, by cpufreq
779          * policy, or by cpu specific default values determined through
780          * experimentation.
781          */
782         max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
783         *max = clamp_t(int, max_perf_adj,
784                         cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
785
786         min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
787         *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
788 }
789
790 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
791 {
792         int max_perf, min_perf;
793
794         if (force) {
795                 update_turbo_state();
796
797                 intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
798
799                 pstate = clamp_t(int, pstate, min_perf, max_perf);
800
801                 if (pstate == cpu->pstate.current_pstate)
802                         return;
803         }
804         trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
805
806         cpu->pstate.current_pstate = pstate;
807
808         pstate_funcs.set(cpu, pstate);
809 }
810
811 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
812 {
813         cpu->pstate.min_pstate = pstate_funcs.get_min();
814         cpu->pstate.max_pstate = pstate_funcs.get_max();
815         cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
816         cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
817         cpu->pstate.scaling = pstate_funcs.get_scaling();
818
819         if (pstate_funcs.get_vid)
820                 pstate_funcs.get_vid(cpu);
821         intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
822 }
823
824 static inline void intel_pstate_calc_busy(struct cpudata *cpu)
825 {
826         struct sample *sample = &cpu->sample;
827         int64_t core_pct;
828
829         core_pct = int_tofp(sample->aperf) * int_tofp(100);
830         core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
831
832         sample->freq = fp_toint(
833                 mul_fp(int_tofp(
834                         cpu->pstate.max_pstate_physical *
835                         cpu->pstate.scaling / 100),
836                         core_pct));
837
838         sample->core_pct_busy = (int32_t)core_pct;
839 }
840
841 static inline void intel_pstate_sample(struct cpudata *cpu)
842 {
843         u64 aperf, mperf;
844         unsigned long flags;
845         u64 tsc;
846
847         local_irq_save(flags);
848         rdmsrl(MSR_IA32_APERF, aperf);
849         rdmsrl(MSR_IA32_MPERF, mperf);
850         if (cpu->prev_mperf == mperf) {
851                 local_irq_restore(flags);
852                 return;
853         }
854
855         tsc = rdtsc();
856         local_irq_restore(flags);
857
858         cpu->last_sample_time = cpu->sample.time;
859         cpu->sample.time = ktime_get();
860         cpu->sample.aperf = aperf;
861         cpu->sample.mperf = mperf;
862         cpu->sample.tsc =  tsc;
863         cpu->sample.aperf -= cpu->prev_aperf;
864         cpu->sample.mperf -= cpu->prev_mperf;
865         cpu->sample.tsc -= cpu->prev_tsc;
866
867         intel_pstate_calc_busy(cpu);
868
869         cpu->prev_aperf = aperf;
870         cpu->prev_mperf = mperf;
871         cpu->prev_tsc = tsc;
872 }
873
874 static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
875 {
876         int delay;
877
878         delay = msecs_to_jiffies(50);
879         mod_timer_pinned(&cpu->timer, jiffies + delay);
880 }
881
882 static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
883 {
884         int delay;
885
886         delay = msecs_to_jiffies(pid_params.sample_rate_ms);
887         mod_timer_pinned(&cpu->timer, jiffies + delay);
888 }
889
890 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
891 {
892         int32_t core_busy, max_pstate, current_pstate, sample_ratio;
893         s64 duration_us;
894         u32 sample_time;
895
896         /*
897          * core_busy is the ratio of actual performance to max
898          * max_pstate is the max non turbo pstate available
899          * current_pstate was the pstate that was requested during
900          *      the last sample period.
901          *
902          * We normalize core_busy, which was our actual percent
903          * performance to what we requested during the last sample
904          * period. The result will be a percentage of busy at a
905          * specified pstate.
906          */
907         core_busy = cpu->sample.core_pct_busy;
908         max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
909         current_pstate = int_tofp(cpu->pstate.current_pstate);
910         core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
911
912         /*
913          * Since we have a deferred timer, it will not fire unless
914          * we are in C0.  So, determine if the actual elapsed time
915          * is significantly greater (3x) than our sample interval.  If it
916          * is, then we were idle for a long enough period of time
917          * to adjust our busyness.
918          */
919         sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
920         duration_us = ktime_us_delta(cpu->sample.time,
921                                      cpu->last_sample_time);
922         if (duration_us > sample_time * 3) {
923                 sample_ratio = div_fp(int_tofp(sample_time),
924                                       int_tofp(duration_us));
925                 core_busy = mul_fp(core_busy, sample_ratio);
926         }
927
928         return core_busy;
929 }
930
931 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
932 {
933         int32_t busy_scaled;
934         struct _pid *pid;
935         signed int ctl;
936         int from;
937         struct sample *sample;
938
939         from = cpu->pstate.current_pstate;
940
941         pid = &cpu->pid;
942         busy_scaled = intel_pstate_get_scaled_busy(cpu);
943
944         ctl = pid_calc(pid, busy_scaled);
945
946         /* Negative values of ctl increase the pstate and vice versa */
947         intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true);
948
949         sample = &cpu->sample;
950         trace_pstate_sample(fp_toint(sample->core_pct_busy),
951                 fp_toint(busy_scaled),
952                 from,
953                 cpu->pstate.current_pstate,
954                 sample->mperf,
955                 sample->aperf,
956                 sample->tsc,
957                 sample->freq);
958 }
959
960 static void intel_hwp_timer_func(unsigned long __data)
961 {
962         struct cpudata *cpu = (struct cpudata *) __data;
963
964         intel_pstate_sample(cpu);
965         intel_hwp_set_sample_time(cpu);
966 }
967
968 static void intel_pstate_timer_func(unsigned long __data)
969 {
970         struct cpudata *cpu = (struct cpudata *) __data;
971
972         intel_pstate_sample(cpu);
973
974         intel_pstate_adjust_busy_pstate(cpu);
975
976         intel_pstate_set_sample_time(cpu);
977 }
978
979 #define ICPU(model, policy) \
980         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
981                         (unsigned long)&policy }
982
983 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
984         ICPU(0x2a, core_params),
985         ICPU(0x2d, core_params),
986         ICPU(0x37, byt_params),
987         ICPU(0x3a, core_params),
988         ICPU(0x3c, core_params),
989         ICPU(0x3d, core_params),
990         ICPU(0x3e, core_params),
991         ICPU(0x3f, core_params),
992         ICPU(0x45, core_params),
993         ICPU(0x46, core_params),
994         ICPU(0x47, core_params),
995         ICPU(0x4c, byt_params),
996         ICPU(0x4e, core_params),
997         ICPU(0x4f, core_params),
998         ICPU(0x5e, core_params),
999         ICPU(0x56, core_params),
1000         ICPU(0x57, knl_params),
1001         {}
1002 };
1003 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
1004
1005 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
1006         ICPU(0x56, core_params),
1007         {}
1008 };
1009
1010 static int intel_pstate_init_cpu(unsigned int cpunum)
1011 {
1012         struct cpudata *cpu;
1013
1014         if (!all_cpu_data[cpunum])
1015                 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
1016                                                GFP_KERNEL);
1017         if (!all_cpu_data[cpunum])
1018                 return -ENOMEM;
1019
1020         cpu = all_cpu_data[cpunum];
1021
1022         cpu->cpu = cpunum;
1023
1024         if (hwp_active)
1025                 intel_pstate_hwp_enable(cpu);
1026
1027         intel_pstate_get_cpu_pstates(cpu);
1028
1029         init_timer_deferrable(&cpu->timer);
1030         cpu->timer.data = (unsigned long)cpu;
1031         cpu->timer.expires = jiffies + HZ/100;
1032
1033         if (!hwp_active)
1034                 cpu->timer.function = intel_pstate_timer_func;
1035         else
1036                 cpu->timer.function = intel_hwp_timer_func;
1037
1038         intel_pstate_busy_pid_reset(cpu);
1039         intel_pstate_sample(cpu);
1040
1041         add_timer_on(&cpu->timer, cpunum);
1042
1043         pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
1044
1045         return 0;
1046 }
1047
1048 static unsigned int intel_pstate_get(unsigned int cpu_num)
1049 {
1050         struct sample *sample;
1051         struct cpudata *cpu;
1052
1053         cpu = all_cpu_data[cpu_num];
1054         if (!cpu)
1055                 return 0;
1056         sample = &cpu->sample;
1057         return sample->freq;
1058 }
1059
1060 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
1061 {
1062         if (!policy->cpuinfo.max_freq)
1063                 return -ENODEV;
1064
1065         if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
1066             policy->max >= policy->cpuinfo.max_freq) {
1067                 pr_debug("intel_pstate: set performance\n");
1068                 limits = &performance_limits;
1069                 return 0;
1070         }
1071
1072         pr_debug("intel_pstate: set powersave\n");
1073         limits = &powersave_limits;
1074         limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
1075         limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
1076         limits->max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
1077         limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
1078
1079         /* Normalize user input to [min_policy_pct, max_policy_pct] */
1080         limits->min_perf_pct = max(limits->min_policy_pct,
1081                                    limits->min_sysfs_pct);
1082         limits->min_perf_pct = min(limits->max_policy_pct,
1083                                    limits->min_perf_pct);
1084         limits->max_perf_pct = min(limits->max_policy_pct,
1085                                    limits->max_sysfs_pct);
1086         limits->max_perf_pct = max(limits->min_policy_pct,
1087                                    limits->max_perf_pct);
1088
1089         /* Make sure min_perf_pct <= max_perf_pct */
1090         limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
1091
1092         limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
1093                                   int_tofp(100));
1094         limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
1095                                   int_tofp(100));
1096
1097         if (hwp_active)
1098                 intel_pstate_hwp_set();
1099
1100         return 0;
1101 }
1102
1103 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
1104 {
1105         cpufreq_verify_within_cpu_limits(policy);
1106
1107         if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
1108             policy->policy != CPUFREQ_POLICY_PERFORMANCE)
1109                 return -EINVAL;
1110
1111         return 0;
1112 }
1113
1114 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
1115 {
1116         int cpu_num = policy->cpu;
1117         struct cpudata *cpu = all_cpu_data[cpu_num];
1118
1119         pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
1120
1121         del_timer_sync(&all_cpu_data[cpu_num]->timer);
1122         if (hwp_active)
1123                 return;
1124
1125         intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
1126 }
1127
1128 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
1129 {
1130         struct cpudata *cpu;
1131         int rc;
1132
1133         rc = intel_pstate_init_cpu(policy->cpu);
1134         if (rc)
1135                 return rc;
1136
1137         cpu = all_cpu_data[policy->cpu];
1138
1139         if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
1140                 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1141         else
1142                 policy->policy = CPUFREQ_POLICY_POWERSAVE;
1143
1144         policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
1145         policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1146
1147         /* cpuinfo and default policy values */
1148         policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
1149         policy->cpuinfo.max_freq =
1150                 cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1151         policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
1152         cpumask_set_cpu(policy->cpu, policy->cpus);
1153
1154         return 0;
1155 }
1156
1157 static struct cpufreq_driver intel_pstate_driver = {
1158         .flags          = CPUFREQ_CONST_LOOPS,
1159         .verify         = intel_pstate_verify_policy,
1160         .setpolicy      = intel_pstate_set_policy,
1161         .get            = intel_pstate_get,
1162         .init           = intel_pstate_cpu_init,
1163         .stop_cpu       = intel_pstate_stop_cpu,
1164         .name           = "intel_pstate",
1165 };
1166
1167 static int __initdata no_load;
1168 static int __initdata no_hwp;
1169 static int __initdata hwp_only;
1170 static unsigned int force_load;
1171
1172 static int intel_pstate_msrs_not_valid(void)
1173 {
1174         if (!pstate_funcs.get_max() ||
1175             !pstate_funcs.get_min() ||
1176             !pstate_funcs.get_turbo())
1177                 return -ENODEV;
1178
1179         return 0;
1180 }
1181
1182 static void copy_pid_params(struct pstate_adjust_policy *policy)
1183 {
1184         pid_params.sample_rate_ms = policy->sample_rate_ms;
1185         pid_params.p_gain_pct = policy->p_gain_pct;
1186         pid_params.i_gain_pct = policy->i_gain_pct;
1187         pid_params.d_gain_pct = policy->d_gain_pct;
1188         pid_params.deadband = policy->deadband;
1189         pid_params.setpoint = policy->setpoint;
1190 }
1191
1192 static void copy_cpu_funcs(struct pstate_funcs *funcs)
1193 {
1194         pstate_funcs.get_max   = funcs->get_max;
1195         pstate_funcs.get_max_physical = funcs->get_max_physical;
1196         pstate_funcs.get_min   = funcs->get_min;
1197         pstate_funcs.get_turbo = funcs->get_turbo;
1198         pstate_funcs.get_scaling = funcs->get_scaling;
1199         pstate_funcs.set       = funcs->set;
1200         pstate_funcs.get_vid   = funcs->get_vid;
1201 }
1202
1203 #if IS_ENABLED(CONFIG_ACPI)
1204 #include <acpi/processor.h>
1205
1206 static bool intel_pstate_no_acpi_pss(void)
1207 {
1208         int i;
1209
1210         for_each_possible_cpu(i) {
1211                 acpi_status status;
1212                 union acpi_object *pss;
1213                 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
1214                 struct acpi_processor *pr = per_cpu(processors, i);
1215
1216                 if (!pr)
1217                         continue;
1218
1219                 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
1220                 if (ACPI_FAILURE(status))
1221                         continue;
1222
1223                 pss = buffer.pointer;
1224                 if (pss && pss->type == ACPI_TYPE_PACKAGE) {
1225                         kfree(pss);
1226                         return false;
1227                 }
1228
1229                 kfree(pss);
1230         }
1231
1232         return true;
1233 }
1234
1235 static bool intel_pstate_has_acpi_ppc(void)
1236 {
1237         int i;
1238
1239         for_each_possible_cpu(i) {
1240                 struct acpi_processor *pr = per_cpu(processors, i);
1241
1242                 if (!pr)
1243                         continue;
1244                 if (acpi_has_method(pr->handle, "_PPC"))
1245                         return true;
1246         }
1247         return false;
1248 }
1249
1250 enum {
1251         PSS,
1252         PPC,
1253 };
1254
1255 struct hw_vendor_info {
1256         u16  valid;
1257         char oem_id[ACPI_OEM_ID_SIZE];
1258         char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
1259         int  oem_pwr_table;
1260 };
1261
1262 /* Hardware vendor-specific info that has its own power management modes */
1263 static struct hw_vendor_info vendor_info[] = {
1264         {1, "HP    ", "ProLiant", PSS},
1265         {1, "ORACLE", "X4-2    ", PPC},
1266         {1, "ORACLE", "X4-2L   ", PPC},
1267         {1, "ORACLE", "X4-2B   ", PPC},
1268         {1, "ORACLE", "X3-2    ", PPC},
1269         {1, "ORACLE", "X3-2L   ", PPC},
1270         {1, "ORACLE", "X3-2B   ", PPC},
1271         {1, "ORACLE", "X4470M2 ", PPC},
1272         {1, "ORACLE", "X4270M3 ", PPC},
1273         {1, "ORACLE", "X4270M2 ", PPC},
1274         {1, "ORACLE", "X4170M2 ", PPC},
1275         {1, "ORACLE", "X4170 M3", PPC},
1276         {1, "ORACLE", "X4275 M3", PPC},
1277         {1, "ORACLE", "X6-2    ", PPC},
1278         {1, "ORACLE", "Sudbury ", PPC},
1279         {0, "", ""},
1280 };
1281
1282 static bool intel_pstate_platform_pwr_mgmt_exists(void)
1283 {
1284         struct acpi_table_header hdr;
1285         struct hw_vendor_info *v_info;
1286         const struct x86_cpu_id *id;
1287         u64 misc_pwr;
1288
1289         id = x86_match_cpu(intel_pstate_cpu_oob_ids);
1290         if (id) {
1291                 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
1292                 if ( misc_pwr & (1 << 8))
1293                         return true;
1294         }
1295
1296         if (acpi_disabled ||
1297             ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
1298                 return false;
1299
1300         for (v_info = vendor_info; v_info->valid; v_info++) {
1301                 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
1302                         !strncmp(hdr.oem_table_id, v_info->oem_table_id,
1303                                                 ACPI_OEM_TABLE_ID_SIZE))
1304                         switch (v_info->oem_pwr_table) {
1305                         case PSS:
1306                                 return intel_pstate_no_acpi_pss();
1307                         case PPC:
1308                                 return intel_pstate_has_acpi_ppc() &&
1309                                         (!force_load);
1310                         }
1311         }
1312
1313         return false;
1314 }
1315 #else /* CONFIG_ACPI not enabled */
1316 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
1317 static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
1318 #endif /* CONFIG_ACPI */
1319
1320 static int __init intel_pstate_init(void)
1321 {
1322         int cpu, rc = 0;
1323         const struct x86_cpu_id *id;
1324         struct cpu_defaults *cpu_def;
1325
1326         if (no_load)
1327                 return -ENODEV;
1328
1329         id = x86_match_cpu(intel_pstate_cpu_ids);
1330         if (!id)
1331                 return -ENODEV;
1332
1333         /*
1334          * The Intel pstate driver will be ignored if the platform
1335          * firmware has its own power management modes.
1336          */
1337         if (intel_pstate_platform_pwr_mgmt_exists())
1338                 return -ENODEV;
1339
1340         cpu_def = (struct cpu_defaults *)id->driver_data;
1341
1342         copy_pid_params(&cpu_def->pid_policy);
1343         copy_cpu_funcs(&cpu_def->funcs);
1344
1345         if (intel_pstate_msrs_not_valid())
1346                 return -ENODEV;
1347
1348         pr_info("Intel P-state driver initializing.\n");
1349
1350         all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
1351         if (!all_cpu_data)
1352                 return -ENOMEM;
1353
1354         if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
1355                 pr_info("intel_pstate: HWP enabled\n");
1356                 hwp_active++;
1357         }
1358
1359         if (!hwp_active && hwp_only)
1360                 goto out;
1361
1362         rc = cpufreq_register_driver(&intel_pstate_driver);
1363         if (rc)
1364                 goto out;
1365
1366         intel_pstate_debug_expose_params();
1367         intel_pstate_sysfs_expose_params();
1368
1369         return rc;
1370 out:
1371         get_online_cpus();
1372         for_each_online_cpu(cpu) {
1373                 if (all_cpu_data[cpu]) {
1374                         del_timer_sync(&all_cpu_data[cpu]->timer);
1375                         kfree(all_cpu_data[cpu]);
1376                 }
1377         }
1378
1379         put_online_cpus();
1380         vfree(all_cpu_data);
1381         return -ENODEV;
1382 }
1383 device_initcall(intel_pstate_init);
1384
1385 static int __init intel_pstate_setup(char *str)
1386 {
1387         if (!str)
1388                 return -EINVAL;
1389
1390         if (!strcmp(str, "disable"))
1391                 no_load = 1;
1392         if (!strcmp(str, "no_hwp")) {
1393                 pr_info("intel_pstate: HWP disabled\n");
1394                 no_hwp = 1;
1395         }
1396         if (!strcmp(str, "force"))
1397                 force_load = 1;
1398         if (!strcmp(str, "hwp_only"))
1399                 hwp_only = 1;
1400         return 0;
1401 }
1402 early_param("intel_pstate", intel_pstate_setup);
1403
1404 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
1405 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
1406 MODULE_LICENSE("GPL");