Skip to content

Python

Garbage Collection

Manually collecting will be faster than automatic

import gc

g0, g1, g2 = gc.get_threshold() # defaul: 700, 10, 10
# gc.set_threshold(10_000, 10, 10)

gc.collect(generation=0)
gc.set_threshold(0)
gc.disable()
gc.freeze()

heavy_code() # like ML, database

gc.unfreeze()
gc.set_threshold(10_000, 10, 10)
gc.enable()
gc.collect(generation=0)


# exit
# don't cleanup on exit
atexit.register(os._exit, 0) # only for Python < 3.6

IDK

A collection before POSIX fork() call may free pages for future allocation which can cause copy-on-write too

Hence

  1. Parent process
  2. disable garbage collector
  3. freeze before fork
  4. Child process
  5. Enable garbage collector

Machine Learning

gc.set_threshold(0)
gc.disable()

for epoch in range(n_epochs):
  for batch in batch_data_loader:
    # train
    # eval
      gc.collect(0)

gc.collect()

# exit
atexit.register(os._exit, 0) # only for Python < 3.6

IDK

  • gc.disable() will sometimes got overridden by another library calling gc.enable()

Number Formatting

number = 333.43

"{:02d}".format(1)      ## leading zeroes
"{:2f}".format(number)  ## floating point rounding

f"{x:z}" ## rounds negative 0
f"{x:z.1f}"

Hex to RGBA

def hex_to_rgba(h, alpha):
    '''
    converts color value in hex format to rgba format with alpha transparency
    '''
    return "rgba" + str(tuple([int(h.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)] + [alpha]))

Custom Rounding

def round_f(x, places, percentage=False):
  if percentage:
    x *= 100

  string = f"{x:z.{places}f}"

  if places > 0:
    string = string.rstrip('0').rstrip('.')

  if percentage:
    string += "%"

  return string
def round_s(x, significant_decimals, max_digits=None, percentage=False):
  if percentage:
    x *= 100

  if max_digits is None:
    max_digits = min(significant_decimals * 2, 4)

  decimal_digits = str(x).split(".")[1]
  pos_first_non_zero = len(decimal_digits) - len(decimal_digits.lstrip("0")) 
  pos = pos_first_non_zero + significant_decimals

  return round_f(x, min(pos, max_digits))

Text

names = names.split("\n")
names = names.split(",")

## remove empty strings from string list
names = list(filter(None, names))
class color:
 PURPLE = '\033[95m'
 CYAN = '\033[96m'
 DARKCYAN = '\033[36m'
 BLUE = '\033[94m'
 GREEN = '\033[92m'
 YELLOW = '\033[93m'
 RED = '\033[91m'
 BOLD = '\033[1m'
 UNDERLINE = '\033[4m'
 END = '\033[0m'

print(color.BOLD + 'Hello World !' + color.END)

IDK

Name of script

__file__
Useful for pages in Streamlit

Name of calling function

__name__
Useful for checking if this is a program or a library

Input Hidden Text/Password

from getpass import getpass
sender_password = getpass("Password: ")

Date-Time

Refer to Python DateTime Formats

dob = '05/02/1985'
dob = datetime.strptime(dob, '%d/%m/%Y')

Find Home Directory

✅ This is cross-platform

from pathlib import Path
home = str(Path.home())

Delete

Move file to Recycle Bin

  from send2trash import send2trash

  send2trash("test_folder")

  send2trash("test.csv")

  for file_name in glob.glob(os.path.join(directory, '*.mov')):
    file = os.path.join(directory, file_name)
    send2trash(file)
    print("Deleted", file)

❌ Permanently Delete File

  os.remove(file)

  for file_name in glob.glob(os.path.join(directory, '*.mov')):
    file = os.path.join(directory, file_name)
    os.remove(file)
    print("Deleted", file, "Permanently")

❌ Permanently Delete Folder

  import shutil
  shutil.rmtree(path)

Copy

import shutil

shutil.copy_file(src, dest) ## contents of file
shutil.copy() ## copy_file() + permission mode
shutil.copy2() ## copy() + copies metadata

List all files in a directory

import os

for file_name in os.listdir("./data"):
  file = os.path.join(directory, file_name)
  print(file)

Create a folder

newpath = r'C:\Program Files\arbitrary' 
if not os.path.exists(newpath):
    os.makedirs(newpath)

Get only files of a particular type using glob

for file_name in glob.glob(os.path.join(directory, '*.mp4')):
  file = os.path.join(directory, file_name)
    print(file)

Get files of Multiple Types using glob

def list_files(images_dir):
  l = []

  if os.path.exists(OUTPUT_FOLDER):
    send2trash(OUTPUT_FOLDER)

  for type in ["jpg", "jpeg", "png"]:
    this_type_files = glob.glob(
      os.path.join(images_dir, "**", f"*.{type}"),
      recursive = True
    )
    l += this_type_files
  return l

Get filename with extension

import os
file_name_with_ext = os.path.basename("a/b/c")

Get folder name

filename = "folder/file.mp4"
os.path.dirname(filename)

filename = "folder/folder/file.mp4"
os.path.basename(os.path.dirname(filename))

Get extension only

import os
ext = os.path.splitext(filename)[1]

Get filename only

import os

def get_filename(file):
  file_name = os.path.basename(file)
  file_name_without_ext = os.path.splitext(file_name)[0]

  ## using the above
  new_file_name = os.path.splitext(file_name)[0] ## + "_Copy" + os.path.splitext(file_name)[1]

  return new_file_name

Files in directory and sub-directory

from os import walk
files = []

## specific directory
files = []
for (dirpath, dirnames, filenames) in walk("./data"):
    files.extend(filenames)
    break

## directory and subdirectories
for (dirpath, dirnames, filenames) in walk("."):
    files.extend(filenames)

files

Search Substring in String

my_string[:-3] == "pdf"
my_string.find("pdf")
"pdf" in my_string

my_string.index("pdf")

IDK

for (i, file) in enumerate(files):
print(i, "is", file)

Class

object.__members__
object.__methods__

dir(object)

Inspect

Inspect class

def func_of_class(class_name):
  return [
    func for func in dir(class_name)
    if callable(getattr(class_name, func))
    and not func.startswith("__")
  ]

obj = my_class()
for func in func_of_class(type(obj)):
  getattr(obj, func)(arg)
Inspect Function Getting arguments of a function
  getfullargspec(model_equation).args
Dynamically get the code of a python function
Inspect.getsource
Dynamically run python code
python_code = """
print("hello world")
"""
exec(python_code)

Create virtual environment

python -m venv "./venv"
python -m venv "C:blah/blah/venv"
Switch to this virtual environment If you get an error when a powershell script runs, run this code in Powershell (admin)
  Set-ExecutionPolicy RemoteSigned

Traverse list with index and value

for i, val in enumerate(list):
print(i)
print(val)

CLI

Argparse

import argparse

TIME_THRESHOLD = 10
HASH_SIZE = 4

if __name__ == "__main__":
  parser = argparse.ArgumentParser(description = "Group similar images")

  parser.add_argument("--tt", type = int, help = f"Time Threshold (seconds), default = {TIME_THRESHOLD}")
  parser.add_argument("--hs", type = int, help = f"Hash Size, default = {HASH_SIZE}")

  args = parser.parse_args()

  TIME_THRESHOLD = args.tt
  HASH_SIZE = args.hs

Adding to path using setuptools

https://python-packaging.readthedocs.io/en/latest/command-line-scripts.html

Import classes/functions from another python file

from my_other_file import my_func, MyClass

Caching

from functools import cache
import time

@cache
def function():
time.sleep(10) ## this will be skipped by cache
return 1

Get variable name

import inspect

def var(var):
    current_frame = inspect.currentframe()
    caller_frame = inspect.getouterframes(current_frame)[1]
    local_vars = caller_frame.frame.f_locals

    for name, value in local_vars.items():
        if value is var:
            return name

var = "Hello"
var_name = var(var)
# doesn't work inside a function

var = "Hello"
var_name = f"{var=}".split("=")[0]

Memory Usage

def get_memory_usage():
  process = Process(os.getpid())
  mb = process.memory_info().rss/(1024**2)
  return mb

Lazy Imports

class LazyImport:
  def __init__(self, module_name):
    self.module_name = module_name
    self._module = None

  def __getattr__(self, attr):
    if self._module is None:
      self._module = importlib.import_module(self.module_name)
    return getattr(self._module, attr)

np = LazyImport("numpy")
np.array([0, 1, 2])
Last Updated: 2024-05-12 ; Contributors: AhmedThahir

Comments