Linux time system: clock source clocsource

Clock source

Clock source, as its name implies, is the clock source that provides the clock to the system.
clock source is responsible for reading the value added by time (so-called cycle) in the chip and providing it to timekeeper. Of course, it also provides the content of calculating the time interval according to the value of cycle.
clocksource and timer-related content are under the kernel/kernel/time directory.

obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
obj-y += timeconv.o posix-clock.o alarmtimer.o

obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)        += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS)      += tick-common.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)    += tick-broadcast.o tick-broadcast-hrtimer.o
obj-$(CONFIG_GENERIC_SCHED_CLOCK)      += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT)         += tick-oneshot.o
obj-$(CONFIG_TICK_ONESHOT)         += tick-sched.o
obj-$(CONFIG_TIMER_STATS)          += timer_stats.o

Now let's see how the clock source is registered and how it provides the content of computing time to timekeeper.

1. Clock source registration process:

linux can have many clock sources, one of which is jiffies. There is also the platform-related clock source, with high accuracy.
Of course, after registering many kinds of clock sources, the linux kernel also uses some way to select the current clock source to ensure the best accuracy.

1) jiffies clock source registration process:

jiffies knows that one second adds a variable the size of HZ.
First, you need to fill in the members of the clocksource-related data structure according to the clock source.

static struct clocksource clocksource_jiffies = {
  .name= "jiffies",  
  .rating= 1,   
  .read= jiffies_read,   /*Read the current jiffies*/
  .mask= 0xffffffff, /*32bits*/   
  .mult= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
  .shift= JIFFIES_SHIFT,
};

From this example, we can see that the reading of (mult > shift) * cloclsource - > read () is the jiffies number.
Of course, mult and shift have to be calculated carefully to ensure that their values do not overflow if they are registered with more accurate clock sources. Look back at this.

2. Platform-related clock sources

int clocksource_register(struct clocksource *cs)
{
    /* calculate max adjustment for given mult/shift */
    cs->maxadj = clocksource_max_adjustment(cs); /*Calculations equivalent to mult are worth 11% to maxadj*/
    /* calculate max idle time permitted for this clocksource */
    cs->max_idle_ns = clocksource_max_deferment(cs); /*max_idle_ns The calculation method is as follows*/
    mutex_lock(&clocksource_mutex);
    clocksource_enqueue(cs);  //Add the registered clock source to the clock source_list list list, sorted in descending order of rate
    clocksource_enqueue_watchdog(cs);
    clocksource_select(); //Choose the best clocksource according to rate. Of course, choose the big one here.
                           //One,
    mutex_unlock(&clocksource_mutex);
    return 0;
}


static u64 clocksource_max_deferment(struct clocksource *cs)
{
     u64 max_nsecs, max_cycles;
    max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1));
     /* The above calculation is the 63 th power of 2 = max_cycles* (cs - > mult + CS - > maxadj), that is to say
    This is to calculate the cycles corresponding to the 63 nanoseconds of the second power, that is, to calculate the largest cycles, because the nanoseconds corresponding to the time can not exceed.
    The 64 th power over 2, because it overflows.
    */
    max_cycles = min_t(u64, max_cycles, (u64) cs->mask); 
/*max_cycles And the minimum value in CS - > mask is assigned to max_cycles*/
    max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj,
                    cs->shift);
return max_nsecs - (max_nsecs >> 3);
}

static struct clocksource clocksource_counter = {
.name   = "arch_sys_counter",
.rating = 400,
.read   = arch_counter_read,
.mask   = CLOCKSOURCE_MASK(56),
.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
};


static void __init arch_timer_counter_init(void)
{
    clocksource_register_hz(&clocksource_counter, arch_timer_rate);
    setup_sched_clock(arch_timer_update_sched_clock, 32, arch_timer_rate);
/* Use the architected timer for the delay loop. */
    arch_delay_timer.read_current_timer = &arch_timer_read_current_timer;
    arch_delay_timer.freq = arch_timer_rate;
    register_current_timer_delay(&arch_delay_timer);
}
static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
{
   return __clocksource_register_scale(cs, 1, hz);
}
int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{
        /* Initialize mult/shift and max_idle_ns */
    __clocksource_updatefreq_scale(cs, scale, freq); //According to clocksource frequency

                            //Rate calculation mult and shit values
       /* Add clocksource to the clcoksource list */     
    mutex_lock(&clocksource_mutex);
    clocksource_enqueue(cs);
    clocksource_enqueue_watchdog(cs);
    clocksource_select();
    mutex_unlock(&clocksource_mutex);
        return 0;
}
void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
{
    u64 sec;
    sec = (cs->mask - (cs->mask >> 3));
    do_div(sec, freq);
    do_div(sec, scale);
if (!sec)
        sec = 1;
else if (sec > 600 && cs->mask > UINT_MAX)
        sec = 600;
    clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
                   NSEC_PER_SEC / scale, sec * scale);
    cs->maxadj = clocksource_max_adjustment(cs);
while ((cs->mult + cs->maxadj < cs->mult)
|| (cs->mult - cs->maxadj > cs->mult)) {
        cs->mult >>= 1;
        cs->shift--;
        cs->maxadj = clocksource_max_adjustment(cs);
}
    cs->max_idle_ns = clocksource_max_deferment(cs);
}
void

clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
{
    u64 tmp;
    u32 sft, sftacc= 32;
      tmp = ((u64)maxsec * from) >> 32;
      while (tmp) {
        tmp >>=1;
        sftacc--;
}
for (sft = 32; sft > 0; sft--) {
        tmp = (u64) to << sft;
        tmp += from / 2;
        do_div(tmp, from);
if ((tmp >> sftacc) == 0)
break;
}
*mult = tmp;
*shift = sft;
}

Kenel substitutes division by multiplication plus shift: how many ns have passed according to cycles.
Simply speaking, the greater the mult, the better, but the calculation process may overflow, so mult can not be unlimited. In this calculation, there is a magic number 600:
This 600 means 600 seconds, which means that the difference between Timer's two calculations of the current count value will not exceed 10 minutes. The main consideration is that when the system enters the IDLE state, the time information will not be updated. Clocksource can still be successfully converted if it exits IDLE within 10 minutes. Of course, the last time is not necessarily 10 minutes. It is calculated by clocksource_max_deferment and stored in max_idle_ns.
I don't like this kind of calculation very much. Kernel always writes the code for some reasons. It takes a lot of time to figure out the intention of the code, but the profit is not too great. I don't explain how to realize it. I take TSC as an example to evaluate the accuracy of this mult+shift.

#include<stdio.h>
#include<stdlib.h>
typedef unsigned int u32;
typedef unsigned long long u64;
#define NSEC_PER_SEC 1000000000L
void
clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
{
    u64 tmp;
    u32 sft, sftacc= 32;
    /*
     * * Calculate the shift factor which is limiting the conversion
     * * range:
     * */
    tmp = ((u64)maxsec * from) >> 32;
    while (tmp) {
            tmp >>=1;
            sftacc--;
        }
    /*
     * * Find the conversion shift/mult pair which has the best
     * * accuracy and fits the maxsec conversion range:
     * */
    for (sft = 32; sft > 0; sft--) {
            tmp = (u64) to << sft;
            tmp += from / 2;
            //do_div(tmp, from);
            tmp = tmp/from;
            if ((tmp >> sftacc) == 0)
                break;
        }
    *mult = tmp;
    *shift = sft;
}
int main()
{ 
    u32 tsc_mult;
    u32 tsc_shift ;

    u32 tsc_frequency = 2127727000/1000; //TSC frequency(KHz)
    clocks_calc_mult_shift(&tsc_mult,&tsc_shift,tsc_frequency,NSEC_PER_SEC/1000,

        600*1000); //NSEC_PER_SEC/1000 because the registration of TSC is clocksource_register_khz
    fprintf(stderr,"mult = %d shift = %d\n",tsc_mult,tsc_shift);
    return 0;
}

600 is based on the MASK of TSC clocksource. Interested in it, you can deduce the result by yourself.

mult = 7885042 shift = 24
root@manu:~/code/c/self/time# python
Python 2.7.3 (default, Apr 10 2013, 05:46:21) 
[GCC 4.6.3] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> (2127727000*7885042)>>24
1000000045L
>>> 
We know that the frequencies of TSC are 212 7727,000 Hz. If cycle goes through 2127,727,000, it means 1 second, or 10^9(us). According to our algorithm, the time is 1000000,045 ns. How big is the error? Every 10 ^ 9 seconds, the error is 45 nanoseconds. In other words, 257 days, the calculation error is 1 second. Considering the existence of NTP, the calculation precision is still good.

3. clocksource watchdog

clocksource_enqueue_watchdog hangs clocksource to the watchdog list. Watchdog, as its name implies, monitors all clocksources:

#define WATCHDOG_INTERVAL (HZ >> 1)
#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)

If within 0.5 seconds, the error is greater than 0.0625s, which means that the clocksource accuracy is extremely poor, set rating to 0.

Actual application scenarios

Take Qualcomm msm8916 as an example. Except for jiffies, other clocksources do not directly use the interface in the kernel/kernel/time/clocksource.c file.
Other arm_arch_timer.c[/kernel/driver/clocksource/arm_arch_timer.c]
And arch_timer.c [kernel/arch/arm/kernel/arch_timer.c] are both used
Inside the sched_clock.c[/kernel/kernel/time/sched_clock.c] file
sched_clock_register() is registered and the current time is read using the sched_clock_32() function.

Posted by pimp3679 on Sat, 23 Mar 2019 00:33:53 -0700

Programmer Group