Some Notes on Stable SortingΒΆ

#include <iostream>
#include <algorithm>
#include <vector>

using namespace std;

struct Player {
    string name;
    int score;
};

ostream& operator<<(ostream& out, const Player& a) {
    return out << "{" << a.name << ", " << a.score << "}";
}


//
// cmp_name and cmp_score are simple comparisons that compare Player
// objects on one field each.
//

bool cmp_name(const Player& a, const Player& b) {
    return a.name < b.name;
}

bool cmp_score(const Player& a, const Player& b) {
    return a.score < b.score;
}

// The data to be sorted has uniques names, but there are some duplicate
// scores. Note that the "column" of names is in ascending sorted order,
// but the column of scores has no particular order.
//
// If we sort the objects by just score using a **non-stable** sort, then
// for objects with the same score the corresponding names *might* not be in
// alphabetical order.
//
// If we sort the objects by score using a **stable** sort, then for objects
// with the same score the corresponding names are guaranteed to be in
// alphabetical order because they were in alphabetical order before sorting.
void test_sort_a() {
    vector<Player> data = {
        Player{"a", 3},
        Player{"b", 2},
        Player{"c", 3},
        Player{"d", 1},
        Player{"e", 1},
        Player{"f", 2},
        Player{"g", 1},
        Player{"h", 2}
    };
    cout << "\ntest_sort_a:\n";
    stable_sort(data.begin(), data.end(), cmp_score);
    for(Player p : data) cout << p << "\n";
}

// Here the same data is sorted, but it is initially scrambled. The
// sorted groups of equal scores are not in any particular order.
void test_sort_b() {
    vector<Player> data = {
        Player{"f", 2},
        Player{"c", 3},
        Player{"d", 1},
        Player{"g", 1},
        Player{"h", 2},
        Player{"a", 3},
        Player{"e", 1},
        Player{"b", 2}
    };
    cout << "\ntest_sort_b:\n";
    stable_sort(data.begin(), data.end(), cmp_score);
    for(Player p : data) cout << p << "\n";
}

// The data is initially scrambled as before, but now we sort it twice: first
// by name, and then by score. Since we use a stable sort, the data is ordered
// by score, and names with identical scores are in alphabetical order.
void test_sort_c() {
    vector<Player> data = {
        Player{"f", 2},
        Player{"c", 3},
        Player{"d", 1},
        Player{"g", 1},
        Player{"h", 2},
        Player{"a", 3},
        Player{"e", 1},
        Player{"b", 2}
    };
    cout << "\ntest_sort_c:\n";
    stable_sort(data.begin(), data.end(), cmp_name);
    for(Player p : data) cout << p << "\n";
    cout << "-----\n";
    stable_sort(data.begin(), data.end(), cmp_score);
    for(Player p : data) cout << p << "\n";
}

//
// Another way to sort the data in the same order as the test_sort_c is to use
// a "smarter" comparison function, e.g.:
//
bool cmp_full(const Player& a, const Player& b) {
    if (a.score == b.score) {
        return a.name < b.name;
    } else {
        return a.score < b.score;
    }
}

//
// In practice, it is more flexible to use simple comparisons (like cmp_score
// and cmp_name) in multiple calls to a stable sort. Sorting objects with many
// fields can get quite complex and you might not know ahead of time how data
// ought to be sorted. For example, a human resource director might want to
// sort employees first by salary (highest to lowest), then by age (highest to
// lowest), then by gender, and then by name (alphabetically). Later, they
// might want to sort the same data first by age (lowest to highest), and then
// by name (alphabetically). You can't predict all possible sort orderings
// they might want.
//

void test_sort_d() {
    vector<Player> data = {
        Player{"f", 2},
        Player{"c", 3},
        Player{"d", 1},
        Player{"g", 1},
        Player{"h", 2},
        Player{"a", 3},
        Player{"e", 1},
        Player{"b", 2}
    };
    cout << "\ntest_sort_d:\n";
    stable_sort(data.begin(), data.end(), cmp_full);
    for(Player p : data) cout << p << "\n";
}

int main() {
    test_sort_a();
    test_sort_b();
    test_sort_c();
    test_sort_d();
}