فهرست منبع

Merge pull request #37 from DEvil0000/improvement/show_cpu_features_in_debug

adding a lot more debug output and some minor refactoring
Francesco Palmarini 6 سال پیش
والد
کامیت
d322c93699
2فایلهای تغییر یافته به همراه130 افزوده شده و 14 حذف شده
  1. 31 1
      README.md
  2. 99 13
      lenovo_fix.py

+ 31 - 1
README.md

@@ -101,9 +101,19 @@ ANALOGIO: 0
 **IMPORTANT:** Please notice that *my* system is stable with these values. Your notebook might crash even with slight undervolting! You should test your system and slowly incresing undervolt to find the maximum stable value for your CPU. You can check [this](https://www.notebookcheck.net/Intel-Extreme-Tuning-Utility-XTU-Undervolting-Guide.272120.0.html) tutorial if you don't know where to start.
 
 ## Debug
-You can enable the `--debug` option to read back written values and check if the script is working properly. This is an example output:
+You can enable the `--debug` option to read back written values and check if the script is working properly. At the statup it will also show the CPUs platform info which contains information about multiplier values and features present for this CPU. Additionally the script will print the thermal status per core which is handy when it comes to figuring out the reason for CPU throttle. Status fields stands for the current throttle reason or condition and log shows if this was a throttle reason since the last interval.
+This is an example output:
 ```
 ./lenovo_fix.py --debug
+[D] cpu platform info: maximum non turbo ratio = 20
+[D] cpu platform info: maximum efficiency ratio = 4
+[D] cpu platform info: minimum operating ratio = 4
+[D] cpu platform info: feature ppin cap = 0
+[D] cpu platform info: feature programmable turbo ratio = 1
+[D] cpu platform info: feature programmable tdp limit = 1
+[D] cpu platform info: number of additional tdp profiles = 2
+[D] cpu platform info: feature programmable temperature target = 1
+[D] cpu platform info: feature low power mode = 1
 [D] TEMPERATURE_TARGET - write 0xf - read 0xf
 [D] Undervolt plane CORE - write 0xf2800000 - read 0xf2800000
 [D] Undervolt plane GPU - write 0xf5200000 - read 0xf5200000
@@ -113,6 +123,26 @@ You can enable the `--debug` option to read back written values and check if the
 [D] MSR PACKAGE_POWER_LIMIT - write 0xcc816000dc80e8 - read 0xcc816000dc80e8
 [D] MCHBAR PACKAGE_POWER_LIMIT - write 0xcc816000dc80e8 - read 0xcc816000dc80e8
 [D] TEMPERATURE_TARGET - write 0xf - read 0xf
+[D] core 0 thermal status: thermal throttle status = 0
+[D] core 0 thermal status: thermal throttle log = 1
+[D] core 0 thermal status: prochot or forcepr event = 0
+[D] core 0 thermal status: prochot or forcepr log = 0
+[D] core 0 thermal status: crit temp status = 0
+[D] core 0 thermal status: crit temp log = 0
+[D] core 0 thermal status: thermal threshold1 status = 0
+[D] core 0 thermal status: thermal threshold1 log = 1
+[D] core 0 thermal status: thermal threshold2 status = 0
+[D] core 0 thermal status: thermal threshold2 log = 1
+[D] core 0 thermal status: power limit status = 0
+[D] core 0 thermal status: power limit log = 1
+[D] core 0 thermal status: current limit status = 0
+[D] core 0 thermal status: current limit log = 0
+[D] core 0 thermal status: cross domain limit status = 0
+[D] core 0 thermal status: cross domain limit log = 0
+[D] core 0 thermal status: cpu temp = 44
+[D] core 0 thermal status: temp resolution = 1
+[D] core 0 thermal status: reading valid = 1
+.....
 ```
 
 ## Disclaimer

+ 99 - 13
lenovo_fix.py

@@ -32,10 +32,43 @@ VOLTAGE_PLANES = {
 }
 
 TRIP_TEMP_RANGE = [40, 97]
-C_TDP_RANGE = (0, 2)
 
 power = {'source': None, 'method': 'polling'}
 
+platform_info_bits = {
+                        'maximum_non_turbo_ratio': [8, 15],
+                        'maximum_efficiency_ratio': [40, 47],
+                        'minimum_operating_ratio': [48, 55],
+                        'feature_ppin_cap': [23, 23],
+                        'feature_programmable_turbo_ratio': [28, 28],
+                        'feature_programmable_tdp_limit': [29, 29],
+                        'number_of_additional_tdp_profiles': [33, 34],
+                        'feature_programmable_temperature_target': [30, 30],
+                        'feature_low_power_mode': [32, 32]
+                     }
+
+thermal_status_bits =   {
+                            'thermal_limit_status': [0, 0],
+                            'thermal_limit_log': [1, 1],
+                            'prochot_or_forcepr_status': [2, 2],
+                            'prochot_or_forcepr_log': [3, 3],
+                            'crit_temp_status': [4, 4],
+                            'crit_temp_log': [5, 5],
+                            'thermal_threshold1_status': [6, 6],
+                            'thermal_threshold1_log': [7, 7],
+                            'thermal_threshold2_status': [8, 8],
+                            'thermal_threshold2_log': [9, 9],
+                            'power_limit_status': [10, 10],
+                            'power_limit_log': [11, 11],
+                            'current_limit_status': [12, 12],
+                            'current_limit_log': [13, 13],
+                            'cross_domain_limit_status': [14, 14],
+                            'cross_domain_limit_log': [15, 15],
+                            'cpu_temp': [16, 22],
+                            'temp_resolution': [27, 30],
+                            'reading_valid': [31, 31],
+                        }
+
 
 def writemsr(msr, val):
     msr_list = ['/dev/cpu/{:d}/msr'.format(x) for x in range(cpu_count())]
@@ -79,8 +112,7 @@ def readmsr(msr, from_bit=0, to_bit=63, cpu=None, flatten=False):
             os.lseek(f, msr, os.SEEK_SET)
             val = struct.unpack('Q', os.read(f, 8))[0]
             os.close(f)
-            mask = sum(2**x for x in range(from_bit, to_bit + 1))
-            output.append((val & mask) >> from_bit)
+            output.append(get_value_for_bits(val, from_bit, to_bit))
         if flatten:
             return output[0] if len(set(output)) == 1 else output
         return output[cpu] if cpu is not None else output
@@ -91,15 +123,55 @@ def readmsr(msr, from_bit=0, to_bit=63, cpu=None, flatten=False):
         else:
             raise e
 
+def get_value_for_bits(val, from_bit=0, to_bit=63):
+    mask = sum(2**x for x in range(from_bit, to_bit + 1))
+    return (val & mask) >> from_bit
 
 def is_on_battery():
     with open(SYSFS_POWER_PATH) as f:
         return not bool(int(f.read()))
 
+def get_cpu_platform_info():
+    features_msr_value = readmsr(0xce, cpu=0)
+    cpu_platform_info = {}
+    for key, value in platform_info_bits.items():
+        cpu_platform_info[key] = int(get_value_for_bits(features_msr_value, value[0], value[1]))
+    return cpu_platform_info
+
+
+def get_reset_thermal_status():
+    #read thermal status
+    thermal_status_msr_value = readmsr(0x19c)
+    thermal_status = []
+    for core in range(cpu_count()):
+        thermal_status_core = {}
+        for key, value in thermal_status_bits.items():
+            thermal_status_core[key] = int(get_value_for_bits(thermal_status_msr_value[core], value[0], value[1]))
+        thermal_status.append(thermal_status_core)
+    #reset log bits
+    writemsr(0x19c, 0)
+    return thermal_status
+
+
+def get_time_unit():
+    # 0.000977 is the time unit of my CPU
+    # TODO formula might be different for other CPUs
+    return 1.0 / 2**readmsr(0x606, 16, 19, cpu=0)
+
+
+def get_power_unit():
+    # 0.125 is the power unit of my CPU
+    # TODO formula might be different for other CPUs
+    return 1.0 / 2**readmsr(0x606, 0, 3, cpu=0)
+
+
+def get_critical_temp():
+    # the critical temperature for my CPU is 100 'C
+    return readmsr(0x1a2, 16, 23, cpu=0)
+
 
 def calc_time_window_vars(t):
-    # 0.000977 is the time unit of my CPU
-    time_unit = 1.0 / 2**readmsr(0x606, 16, 19, cpu=0)
+    time_unit = get_time_unit()
     for Y in range(2**5):
         for Z in range(2**2):
             if t <= (2**Y) * (1. + Z / 4.) * time_unit:
@@ -161,14 +233,14 @@ def load_config():
     return config
 
 
-def calc_reg_values(config):
+def calc_reg_values(platform_info, config):
     regs = defaultdict(dict)
     for power_source in ('AC', 'BATTERY'):
-        if readmsr(0xce, 30, 30, cpu=0) != 1:
+        if platform_info['feature_programmable_temperature_target'] != 1:
             print("[W] Setting temperature target is not supported by this CPU")
         else:
             # the critical temperature for my CPU is 100 'C
-            critical_temp = readmsr(0x1a2, 16, 23, cpu=0)
+            critical_temp = get_critical_temp()
             # update the allowed temp range to keep at least 3 'C from the CPU critical temperature
             global TRIP_TEMP_RANGE
             TRIP_TEMP_RANGE[1] = min(TRIP_TEMP_RANGE[1], critical_temp - 3)
@@ -176,8 +248,7 @@ def calc_reg_values(config):
             trip_offset = int(round(critical_temp - config.getfloat(power_source, 'Trip_Temp_C')))
             regs[power_source]['MSR_TEMPERATURE_TARGET'] = trip_offset << 24
 
-        # 0.125 is the power unit of my CPU
-        power_unit = 1.0 / 2**readmsr(0x606, 0, 3, cpu=0)
+        power_unit = get_power_unit()
         PL1 = int(round(config.getfloat(power_source, 'PL1_Tdp_W') / power_unit))
         Y, Z = calc_time_window_vars(config.getfloat(power_source, 'PL1_Duration_s'))
         TW1 = Y | (Z << 5)
@@ -192,10 +263,12 @@ def calc_reg_values(config):
         # cTDP
         c_tdp_target_value = config.getint(power_source, 'cTDP', fallback=None)
         if c_tdp_target_value is not None:
-            if readmsr(0xce, 33, 34, cpu=0) < 2:
+            if platform_info['feature_programmable_tdp_limit'] != 1:
                 print("[W] cTDP setting not supported by this CPU")
+            elif platform_info['number_of_additional_tdp_profiles'] < c_tdp_target_value:
+                print("[W] the configured cTDP profile is not supported by this CPU")
             else:
-                valid_c_tdp_target_value = min(C_TDP_RANGE[1], max(C_TDP_RANGE[0], c_tdp_target_value))
+                valid_c_tdp_target_value = max(0, c_tdp_target_value)
                 regs[power_source]['MSR_CONFIG_TDP_CONTROL'] = valid_c_tdp_target_value
     return regs
 
@@ -224,6 +297,14 @@ def power_thread(config, regs, exit_event):
         sys.exit(1)
 
     while not exit_event.is_set():
+        #print thermal status
+        if args.debug:
+            thermal_status = get_reset_thermal_status()
+            for index, core_thermal_status in enumerate(thermal_status):
+                for key, value in core_thermal_status.items():
+                    print('[D] core {} thermal status: {} = {}'.format(
+                        index, key.replace("_", " "), value))
+
         # switch back to sysfs polling
         if power['method'] == 'polling':
             power['source'] = 'BATTERY' if is_on_battery() else 'AC'
@@ -293,7 +374,12 @@ def main():
     power['source'] = 'BATTERY' if is_on_battery() else 'AC'
 
     config = load_config()
-    regs = calc_reg_values(config)
+    platform_info = get_cpu_platform_info()
+    if args.debug:
+        for key, value in platform_info.items():
+            print('[D] cpu platform info: {} = {}'.format(
+                key.replace("_", " "), value))
+    regs = calc_reg_values(platform_info, config)
 
     if not config.getboolean('GENERAL', 'Enabled'):
         return