Python logging package
What is it?
Why?
Python logging package
For example
import logging
logger = logging.getLogger("my_logger")
logger.info("hello!")
logger.warning("watch out, exiting!")
logger.debug("exiting")
The above will just show the middle: default is stdout + above warning
Python logging package
For example
import logging
logger = logging.getLogger("my_logger")
logging.basicConfig(
filename="example.log",
encoding="utf-8",
level=logging.DEBUG,
)
logger.info("hello!")
logger.warning("watch out, exiting!")
logger.debug("exiting")
Now all will get recoded to the file instead
Python logging package
Nice patterns
import logging
# my_package/a_module.py
logger = logging.getLogger(__file__)
logger.info("hello!")
This will get logger my_package.a_module
it will inherent properties from my_package
Python logging package
Thread-safe by default
logger = logging.getLogger("my_logger")
def worker(ident, num):
for i in range(num):
time.sleep(0.05)
logger.info(f"{ident}: {i}")
threads = []
for ind in range(2):
threads.append(
threading.Thread(
target=worker, args=(f"worker {ind}", (ind + 1) * 10)
)
)
threads[-1].start()
for th in threads:
th.join()
Python logging package
Multiprocessing: queues, files or builtin
import logging
import logging.handlers
import multiprocessing
import time
import random
def worker(log_queue, ident):
logger = logging.getLogger(ident)
logger.setLevel(logging.DEBUG)
handler = logging.handlers.QueueHandler(log_queue)
logger.addHandler(handler)
for i in range(10):
time.sleep(random.random() * 0.1)
logger.info(f"{ident}: {i}")
log_queue: multiprocessing.Queue = multiprocessing.Queue(-1)
logger = logging.getLogger("my_logger")
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
logger.addHandler(handler)
queue_handler = logging.handlers.QueueListener(log_queue, handler)
queue_handler.start()
processes = []
for i in range(5):
p = multiprocessing.Process(target=worker, args=(log_queue, f"Worker-{i}"))
processes.append(p)
p.start()
for p in processes:
p.join()
Python logging package
Multiprocessing: queues, files or builtin
import logging
import pathlib
import multiprocessing
import time
import random
import tempfile
def worker(ident, dirname):
logger = logging.getLogger(ident)
logger.setLevel(logging.DEBUG)
handler = logging.FileHandler(dirname / f"{ident}.log")
logger.addHandler(handler)
for i in range(10):
time.sleep(random.random() * 0.1)
logger.info(f"{ident}: {i}")
with tempfile.TemporaryDirectory() as tmpdirname:
processes = []
dirname = pathlib.Path(tmpdirname)
for i in range(5):
p = multiprocessing.Process(
target=worker, args=(f"Worker-{i}", dirname)
)
processes.append(p)
p.start()
for p in processes:
p.join()
for logfile in dirname.glob("*.log"):
print(f"reading {logfile}")
Python logging package
Multiprocessing: queues, files or builtin
import logging
import multiprocessing
import time
import random
def worker(ident):
logger = multiprocessing.get_logger()
for i in range(10):
time.sleep(random.random() * 0.1)
logger.info(f"{ident}: {i}")
logger = multiprocessing.log_to_stderr()
logger.setLevel(logging.INFO)
processes = []
for i in range(5):
p = multiprocessing.Process(
target=worker, args=(f"Worker-{i}", )
)
processes.append(p)
p.start()
for p in processes:
p.join()
Python logging package
(these examples are also in in the handouts)
Python logging package
My standard pattern is some variant of this
def setup_loggers(log_folder=None, stdout=True, verbosity=level):
parallel, rank = check_parallel()
if parallel:
formatter = logging.Formatter(
f"RANK{rank} - %(asctime)s %(levelname)s %(name)s - %(message)s"
)
else:
formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s - %(message)s")
formatter.default_time_format = "%Y-%m-%d %H:%M:%S"
formatter.default_msec_format = "%s.%03d"
handlers = []
if stdout:
handlers.append(logging.StreamHandler(sys.stdout))
if log_folder is not None:
if not isinstance(log_folder, pathlib.Path):
log_folder = pathlib.Path(log_folder)
now = datetime.datetime.now()
datetime_str = now.strftime("%Y-%m-%d_at_%H-%M")
if not log_folder.is_dir():
assert not log_folder.is_file(), f"Cannot use '{log_folder}', is a file not a folder"
log_folder.mkdir(parents=True)
if parallel:
log_fname = str(log_folder / f"hardtarget_{datetime_str}_RANK{rank}.log")
else:
log_fname = str(log_folder / f"hardtarget_{datetime_str}.log")
handlers.append(logging.FileHandler(log_fname))
liblogger = logging.getLogger("hardtarget")
liblogger.propagate = False
for handler in handlers:
handler.setFormatter(formatter)
handler.setLevel(level)
liblogger.setLevel(level)
liblogger.addHandler(handler)
return liblogger
Profiling
Different tools
Profiling
Profiling
Profiling
def list_comp():
L = [i for i in range(1000)]
def loop():
L = []
for i in range(1000):
L.append(i)
if __name__ == "__main__":
import timeit
num = 10_000
lc_dt = timeit.timeit(list_comp, number=num) / num
lo_dt = timeit.timeit(loop, number=num) / num
print(f"avg list comp time: {lc_dt} s")
print(f"avg loop time : {lo_dt} s")
print(f"list comp speedup: {lo_dt/lc_dt} times")
Profiling
> python -m timeit "'-'.join(str(n) for n in range(100))"
50000 loops, best of 5: 5.39 usec per loop
Profiling
Profiling
import yappi
import numpy as np
def func():
for ind in range(1_000_000):
x = ind % 69
del x
yappi.set_clock_type("cpu")
yappi.start()
for index in range(10):
y = np.eye(4) * 49.0
func()
yappi.get_func_stats().print_all()
yappi.get_thread_stats().print_all()
Profiling
Clock type: CPU
Ordered by: totaltime, desc
name ncall tsub ttot tavg
..on-scripts/yappi_example.py:5 func 10 0.200759 0.200759 0.020076
..y/lib/_twodim_base_impl.py:163 eye 10 0.000056 0.000086 0.000009
name id tid ttot scnt
_MainThread 0 139949853088704 0.201297 1
Profiling
Profiling
import io
import cProfile
import pstats
import numpy as np
pr = cProfile.Profile()
pr.enable()
L = [i for i in range(1000)]
L = []
for i in range(1_000):
L.append(i)
for i in range(10_000):
x = np.eye(3) @ np.ones((3,))
pr.disable()
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats(pstats.SortKey.CUMULATIVE)
ps.print_stats(20)
print("PROFILING: \n" + s.getvalue())
Profiling
PROFILING:
71001 function calls in 0.031 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
10000 0.012 0.000 0.017 0.000 /home/danielk/venvs/basic/lib/python3.13/site-packages/numpy/lib/_twodim_base_impl.py:163(eye)
10000 0.010 0.000 0.014 0.000 /home/danielk/venvs/basic/lib/python3.13/site-packages/numpy/_core/numeric.py:137(ones)
10000 0.003 0.000 0.003 0.000 {built-in method numpy.zeros}
10000 0.003 0.000 0.003 0.000 {built-in method numpy.empty}
20000 0.002 0.000 0.002 0.000 {built-in method _operator.index}
10000 0.001 0.000 0.001 0.000 /home/danielk/venvs/basic/lib/python3.13/site-packages/numpy/_core/multiarray.py:1098(copyto)
1000 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
Profiling
Profiling
from line_profiler import profile
import numpy as np
@profile
def func():
L = [i for i in range(1000)]
L = []
for i in range(1_000):
L.append(i)
for i in range(10_000):
x = np.eye(3) @ np.ones((3,))
del x
func()
Profiling
$ python -m kernprof -lv code/python-scripts/kernprof_example.py
Wrote profile results to kernprof_example.py.lprof
Timer unit: 1e-06 s
Total time: 0.0465161 s
File: code/python-scripts/kernprof_example.py
Function: func at line 5
Line # Hits Time Per Hit % Time Line Contents
=======================================================
5 @profile
6 def func():
7 1001 146.7 0.1 0.3 L = [i for i in range(1000)]
8 1 5.9 5.9 0.0 L = []
9 1001 97.1 0.1 0.2 for i in range(1_000):
10 1000 92.9 0.1 0.2 L.append(i)
11 10001 1170.0 0.1 2.5 for i in range(10_000):
12 10000 45003.3 4.5 96.7 x = np.eye(3) @ np.ones((3,))
13 1 0.2 0.2 0.0 del x
Profiling
Profiling
from threading import Event, Thread
import numpy as np
import signal
def worker(exit_event: Event):
while not exit_event.is_set():
x = np.eye(3) @ np.ones((3,))
x += 3
ev = Event()
th = Thread(target=worker, args=(ev,))
th.start()
try:
signal.pause()
except KeyboardInterrupt:
ev.set()
th.join()
Profiling
Profiling
Profiling
import time
PROFILES = {}
def profile(func):
def timed_func(*args, **kw):
t0 = time.time()
ret = func(*args, **kw)
dt = time.time() - t0
if func.__name__ not in PROFILES:
PROFILES[func.__name__] = [0, 0]
PROFILES[func.__name__][0] += 1
PROFILES[func.__name__][1] += dt
return ret
return timed_func
@profile
def func():
L = []
for i in range(10_000):
L.append(i)
if __name__ == "__main__":
for lop in range(250):
func()
for fname, (num, ttot) in PROFILES.items():
print(f"{fname}: called {num} times - total time {ttot:.2e} s (avg {ttot/num:.2e} s)")
Profiling
$ python code/python-scripts/custom_profile_example.py
func: called 250 times - total time 4.40e-02 s (avg 1.76e-04 s)
Examples and homework
Let's check out the handout above!