"""
Counting technique performance for dense and sparse data

- count_if_else: Set to 1 if not yet seen and increment otherwise
- count_if: Set to 0 if not yet seen, then increment regardless of containment
- count_exception: Attempt to increment and set to 1 if KeyError caught
- count_setdefault: Set default value to 0, then increment
- count_fromkeys: Create dict with necessary keys set to 0, then increment each
- count_set_and_comprehension: Create dict of items and counts using a set
- count_defaultdict: Increment count, automatically setting unseen values to 0
- count_counter: Use counter to count number of occurrences of each value
"""
import sys
from textwrap import dedent
import timeit


LIST_SIZE = 5_000


def make_setup(list_size, range_size):
    return dedent("""
        from collections import defaultdict, Counter
        import random
        random.seed('MY SEED!')
        range_size = """ + str(range_size) + """
        list_size = """ + str(list_size) + """
        initial_list = [random.randint(0, range_size) for i in range(list_size)]

        def count_if_else(my_list):
            counts = {}
            for item in my_list:
                if item not in counts:
                    counts[item] = 1
                else:
                    counts[item] += 1
            return counts

        def count_if(my_list):
            counts = {}
            for item in my_list:
                if item not in counts:
                    counts[item] = 0
                counts[item] += 1
            return counts

        def count_exception(my_list):
            counts = {}
            for item in my_list:
                try:
                    counts[item] += 1
                except KeyError:
                    counts[item] = 1
            return counts

        def count_setdefault(my_list):
            counts = {}
            for item in my_list:
                counts.setdefault(item, 0)
                counts[item] += 1
            return counts

        def count_fromkeys(my_list):
            counts = dict.fromkeys(my_list, 0)
            for item in my_list:
                counts[item] += 1
            return counts

        def count_set_and_comprehension(my_list):
            return dict((item, my_list.count(item)) for item in set(my_list))

        def count_defaultdict(my_list):
            counts = defaultdict(int)
            for item in my_list:
                counts[item] += 1
            return counts

        def count_counter(my_list):
            return Counter(my_list)

    """)


def time_function(func_name, range_size):
    timed_code = f"new_list = initial_list[:]; {func_name}(new_list)"
    time = min(timeit.repeat(
        timed_code,
        setup=make_setup(LIST_SIZE, range_size),
        repeat=3,
        number=50,
    ))
    print(f"{func_name} time: {time*1000:.0f}ms")


for range_size in (100, 1_000, 10_000):
    print(f"List of {LIST_SIZE} random numbers generated in range 0 to {range_size}")
    time_function("count_if_else", range_size)
    time_function("count_if", range_size)
    time_function("count_exception", range_size)
    time_function("count_setdefault", range_size)
    time_function("count_fromkeys", range_size)
    time_function("count_defaultdict", range_size)
    time_function("count_counter", range_size)

189 views

Share

pym.dev/p/23vzw/

Need to share some Python code?

Create a new snippet