这里引用了 redis 在 github 上最早的 2.2 版本的代码,代码路径是https://github.com/antirez/redis/blob/2.2/src/sds.h,可以看到这个结构体里只有仨元素,两个 int 型和一个 char 型数组,两个 int 型其实就是我说的优化,因为 C 语言本身的字符串数组,有两个问题,一个是要知道它实际已被占用的长度,需要去遍历这个数组,第二个就是比较容易踩坑的是遍历的时候要注意它有个以\0作为结尾的特点;通过上面的两个 int 型参数,一个是知道字符串目前的长度,一个是知道字符串还剩余多少位空间,这样子坐着两个操作从 O(N)简化到了O(1)了,还有第二个 free 还有个比较重要的作用就是能防止 C 字符串的溢出问题,在存储之前可以先判断 free 长度,如果长度不够就先扩容了,先介绍到这,这个系列可以写蛮多的,慢慢介绍吧
链表
链表是比较常见的数据结构了,但是因为 redis 是用 C 写的,所以在不依赖第三方库的情况下只能自己写一个了,redis 的链表是个有头的链表,而且是无环的,具体的结构我也找了 github 上最早版本的代码
/* This is our hash table structure. Every dictionary has two of this as we * implement incremental rehashing, for the old to the new table. */ typedefstructdictht { dictEntry **table; unsignedlong size; unsignedlong sizemask; unsignedlong used; } dictht;
typedefstructdict { dictType *type; void *privdata; dictht ht[2]; int rehashidx; /* rehashing not in progress if rehashidx == -1 */ int iterators; /* number of iterators currently running */ } dict;
HeapWord* result = op.result(); bool ret_succeeded = op.prologue_succeeded() && op.pause_succeeded(); assert(result == NULL || ret_succeeded, "the result should be NULL if the VM did not succeed"); *succeeded = ret_succeeded;
voidVM_G1CollectForAllocation::doit(){ G1CollectedHeap* g1h = G1CollectedHeap::heap(); assert(!_should_initiate_conc_mark || g1h->should_do_concurrent_full_gc(_gc_cause), "only a GC locker, a System.gc(), stats update, whitebox, or a hum allocation induced GC should start a cycle");
if (_word_size > 0) { // An allocation has been requested. So, try to do that first. _result = g1h->attempt_allocation_at_safepoint(_word_size, false/* expect_null_cur_alloc_region */); if (_result != NULL) { // If we can successfully allocate before we actually do the // pause then we will consider this pause successful. _pause_succeeded = true; return; } }
GCCauseSetter x(g1h, _gc_cause); if (_should_initiate_conc_mark) { // It's safer to read old_marking_cycles_completed() here, given // that noone else will be updating it concurrently. Since we'll // only need it if we're initiating a marking cycle, no point in // setting it earlier. _old_marking_cycles_completed_before = g1h->old_marking_cycles_completed();
// At this point we are supposed to start a concurrent cycle. We // will do so if one is not already in progress. bool res = g1h->g1_policy()->force_initial_mark_if_outside_cycle(_gc_cause);
// The above routine returns true if we were able to force the // next GC pause to be an initial mark; it returns false if a // marking cycle is already in progress. // // If a marking cycle is already in progress just return and skip the // pause below - if the reason for requesting this initial mark pause // was due to a System.gc() then the requesting thread should block in // doit_epilogue() until the marking cycle is complete. // // If this initial mark pause was requested as part of a humongous // allocation then we know that the marking cycle must just have // been started by another thread (possibly also allocating a humongous // object) as there was no active marking cycle when the requesting // thread checked before calling collect() in // attempt_allocation_humongous(). Retrying the GC, in this case, // will cause the requesting thread to spin inside collect() until the // just started marking cycle is complete - which may be a while. So // we do NOT retry the GC. if (!res) { assert(_word_size == 0, "Concurrent Full GC/Humongous Object IM shouldn't be allocating"); if (_gc_cause != GCCause::_g1_humongous_allocation) { _should_retry_gc = true; } return; } }
// Try a partial collection of some kind. _pause_succeeded = g1h->do_collection_pause_at_safepoint(_target_pause_time_ms);
if (_pause_succeeded) { if (_word_size > 0) { // An allocation had been requested. Do it, eventually trying a stronger // kind of GC. _result = g1h->satisfy_failed_allocation(_word_size, &_pause_succeeded); } else { bool should_upgrade_to_full = !g1h->should_do_concurrent_full_gc(_gc_cause) && !g1h->has_regions_left_for_allocation(); if (should_upgrade_to_full) { // There has been a request to perform a GC to free some space. We have no // information on how much memory has been asked for. In case there are // absolutely no regions left to allocate into, do a maximally compacting full GC. log_info(gc, ergo)("Attempting maximally compacting collection"); _pause_succeeded = g1h->do_full_collection(false, /* explicit gc */ true/* clear_all_soft_refs */); } } guarantee(_pause_succeeded, "Elevated collections during the safepoint must always succeed."); } else { assert(_result == NULL, "invariant"); // The only reason for the pause to not be successful is that, the GC locker is // active (or has become active since the prologue was executed). In this case // we should retry the pause after waiting for the GC locker to become inactive. _should_retry_gc = true; } }
G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { assert_at_safepoint_on_vm_thread(); guarantee(!is_gc_active(), "collection is not reentrant");
if (GCLocker::check_active_before_gc()) { returnfalse; }
// We should not be doing initial mark unless the conc mark thread is running if (!_cm_thread->should_terminate()) { // This call will decide whether this pause is an initial-mark // pause. If it is, in_initial_mark_gc() will return true // for the duration of this pause. g1_policy()->decide_on_conc_mark_initiation(); }
// We do not allow initial-mark to be piggy-backed on a mixed GC. assert(!collector_state()->in_initial_mark_gc() || collector_state()->in_young_only_phase(), "sanity");
// We also do not allow mixed GCs during marking. assert(!collector_state()->mark_or_rebuild_in_progress() || collector_state()->in_young_only_phase(), "sanity");
// Record whether this pause is an initial mark. When the current // thread has completed its logging output and it's safe to signal // the CM thread, the flag's value in the policy has been reset. bool should_start_conc_mark = collector_state()->in_initial_mark_gc();
// Inner scope for scope based logging, timers, and stats collection { EvacuationInfo evacuation_info;
if (collector_state()->in_initial_mark_gc()) { // We are about to start a marking cycle, so we increment the // full collection counter. increment_old_marking_cycles_started(); _cm->gc_tracer_cm()->set_gc_cause(gc_cause()); }
// Don't dynamically change the number of GC threads this early. A value of // 0 is used to indicate serial work. When parallel work is done, // it will be set.
{ // Call to jvmpi::post_class_unload_events must occur outside of active GC IsGCActiveMark x;
gc_prologue(false);
if (VerifyRememberedSets) { log_info(gc, verify)("[Verifying RemSets before GC]"); VerifyRegionRemSetClosure v_cl; heap_region_iterate(&v_cl); }
// Please see comment in g1CollectedHeap.hpp and // G1CollectedHeap::ref_processing_init() to see how // reference processing currently works in G1.
// Enable discovery in the STW reference processor _ref_processor_stw->enable_discovery();
{ // We want to temporarily turn off discovery by the // CM ref processor, if necessary, and turn it back on // on again later if we do. Using a scoped // NoRefDiscovery object will do this. NoRefDiscovery no_cm_discovery(_ref_processor_cm);
// Forget the current alloc region (we might even choose it to be part // of the collection set!). _allocator->release_mutator_alloc_region();
// This timing is only used by the ergonomics to handle our pause target. // It is unclear why this should not include the full pause. We will // investigate this in CR 7178365. // // Preserving the old comment here if that helps the investigation: // // The elapsed time induced by the start time below deliberately elides // the possible verification above. double sample_start_time_sec = os::elapsedTime();
// Make sure the remembered sets are up to date. This needs to be // done before register_humongous_regions_with_cset(), because the // remembered sets are used there to choose eager reclaim candidates. // If the remembered sets are not up to date we might miss some // entries that need to be handled. g1_rem_set()->cleanupHRRS();
register_humongous_regions_with_cset();
assert(_verifier->check_cset_fast_test(), "Inconsistency in the InCSetState table.");
// We call this after finalize_cset() to // ensure that the CSet has been finalized. _cm->verify_no_cset_oops();
if (_hr_printer.is_active()) { G1PrintCollectionSetClosure cl(&_hr_printer); _collection_set.iterate(&cl); }
// Initialize the GC alloc regions. _allocator->init_gc_alloc_regions(evacuation_info);
if (evacuation_failed()) { set_used(recalculate_used()); if (_archive_allocator != NULL) { _archive_allocator->clear_used(); } for (uint i = 0; i < ParallelGCThreads; i++) { if (_evacuation_failed_info_array[i].has_failed()) { _gc_tracer_stw->report_evacuation_failed(_evacuation_failed_info_array[i]); } } } else { // The "used" of the the collection set have already been subtracted // when they were freed. Add in the bytes evacuated. increase_used(g1_policy()->bytes_copied_during_gc()); }
if (collector_state()->in_initial_mark_gc()) { // We have to do this before we notify the CM threads that // they can start working to make sure that all the // appropriate initialization is done on the CM object. concurrent_mark()->post_initial_mark(); // Note that we don't actually trigger the CM thread at // this point. We do that later when we're sure that // the current thread has completed its logging output. }
allocate_dummy_regions();
_allocator->init_mutator_alloc_region();
{ size_t expand_bytes = _heap_sizing_policy->expansion_amount(); if (expand_bytes > 0) { size_t bytes_before = capacity(); // No need for an ergo logging here, // expansion_amount() does this when it returns a value > 0. double expand_ms; if (!expand(expand_bytes, _workers, &expand_ms)) { // We failed to expand the heap. Cannot do anything about it. } g1_policy()->phase_times()->record_expand_heap_time(expand_ms); } }
// We redo the verification but now wrt to the new CSet which // has just got initialized after the previous CSet was freed. _cm->verify_no_cset_oops();
// This timing is only used by the ergonomics to handle our pause target. // It is unclear why this should not include the full pause. We will // investigate this in CR 7178365. double sample_end_time_sec = os::elapsedTime(); double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS; size_t total_cards_scanned = g1_policy()->phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScanRSScannedCards); g1_policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc);
// It is not yet to safe to tell the concurrent mark to // start as we have some optional output below. We don't want the // output from the concurrent mark thread interfering with this // logging output either.
// We must call G1MonitoringSupport::update_sizes() in the same scoping level // as an active TraceMemoryManagerStats object (i.e. before the destructor for the // TraceMemoryManagerStats is called) so that the G1 memory pools are updated // before any GC notifications are raised. g1mm()->update_sizes();
_gc_tracer_stw->report_evacuation_info(&evacuation_info); _gc_tracer_stw->report_tenuring_threshold(_g1_policy->tenuring_threshold()); _gc_timer_stw->register_gc_end(); _gc_tracer_stw->report_gc_end(_gc_timer_stw->gc_end(), _gc_timer_stw->time_partitions()); } // It should now be safe to tell the concurrent mark thread to start // without its logging output interfering with the logging output // that came from the pause.
if (should_start_conc_mark) { // CAUTION: after the doConcurrentMark() call below, // the concurrent marking thread(s) could be running // concurrently with us. Make sure that anything after // this point does not assume that we are the only GC thread // running. Note: of course, the actual marking work will // not start until the safepoint itself is released in // SuspendibleThreadSet::desynchronize(). do_concurrent_mark(); }
// The young list is laid with the survivor regions from the previous // pause are appended to the RHS of the young list, i.e. // [Newly Young Regions ++ Survivors from last pause].
HeapRegion* hr = cset_chooser()->peek(); while (hr != NULL) { if (old_region_length() >= max_old_cset_length) { // Added maximum number of old regions to the CSet. log_debug(gc, ergo, cset)("Finish adding old regions to CSet (old CSet region num reached max). old %u regions, max %u regions", old_region_length(), max_old_cset_length); break; }
// Stop adding regions if the remaining reclaimable space is // not above G1HeapWastePercent. size_t reclaimable_bytes = cset_chooser()->remaining_reclaimable_bytes(); double reclaimable_percent = _policy->reclaimable_bytes_percent(reclaimable_bytes); double threshold = (double) G1HeapWastePercent; if (reclaimable_percent <= threshold) { // We've added enough old regions that the amount of uncollected // reclaimable space is at or below the waste threshold. Stop // adding old regions to the CSet. log_debug(gc, ergo, cset)("Finish adding old regions to CSet (reclaimable percentage not over threshold). " "old %u regions, max %u regions, reclaimable: " SIZE_FORMAT "B (%1.2f%%) threshold: " UINTX_FORMAT "%%", old_region_length(), max_old_cset_length, reclaimable_bytes, reclaimable_percent, G1HeapWastePercent); break; }
double predicted_time_ms = predict_region_elapsed_time_ms(hr); if (check_time_remaining) { if (predicted_time_ms > time_remaining_ms) { // Too expensive for the current CSet.
if (old_region_length() >= min_old_cset_length) { // We have added the minimum number of old regions to the CSet, // we are done with this CSet. log_debug(gc, ergo, cset)("Finish adding old regions to CSet (predicted time is too high). " "predicted time: %1.2fms, remaining time: %1.2fms old %u regions, min %u regions", predicted_time_ms, time_remaining_ms, old_region_length(), min_old_cset_length); break; }
// We'll add it anyway given that we haven't reached the // minimum number of old regions. expensive_region_num += 1; } } else { if (old_region_length() >= min_old_cset_length) { // In the non-auto-tuning case, we'll finish adding regions // to the CSet if we reach the minimum.
log_debug(gc, ergo, cset)("Finish adding old regions to CSet (old CSet region num reached min). old %u regions, min %u regions", old_region_length(), min_old_cset_length); break; } }
// We will add this region to the CSet. time_remaining_ms = MAX2(time_remaining_ms - predicted_time_ms, 0.0); predicted_old_time_ms += predicted_time_ms; cset_chooser()->pop(); // already have region via peek() _g1h->old_set_remove(hr); add_old_region(hr);
hr = cset_chooser()->peek(); } if (hr == NULL) { log_debug(gc, ergo, cset)("Finish adding old regions to CSet (candidate old regions not available)"); }
if (expensive_region_num > 0) { // We print the information once here at the end, predicated on // whether we added any apparently expensive regions or not, to // avoid generating output per region. log_debug(gc, ergo, cset)("Added expensive regions to CSet (old CSet region num not reached min)." "old: %u regions, expensive: %u regions, min: %u regions, remaining time: %1.2fms", old_region_length(), expensive_region_num, min_old_cset_length, time_remaining_ms); }
cset_chooser()->verify(); }
stop_incremental_building();
log_debug(gc, ergo, cset)("Finish choosing CSet. old: %u regions, predicted old region time: %1.2fms, time remaining: %1.2f", old_region_length(), predicted_old_time_ms, time_remaining_ms);