The implementations can seem somewhat tricky at first place , but not that much

Partial pivoting is used to minimize error

Echelon Form

def echelon(A):

  n_col = A.shape[1]
  n_row = A.shape[0]

  pivot_cols = []
  cur_pivot_row = 0

  for col in range(0,n_col):

    ## finding the row with maximum absolute value / partial pivoting
    max_abs_row = (0,-1) # (value,row)
    for row in range(cur_pivot_row, n_row):
      if np.abs(A[row][col]) >= max_abs_row[0]:
        max_abs_row = (np.abs(A[row][col]),row)

    ## row interchange (cur_pivot_row and max_abs_row) 
    A[[cur_pivot_row,max_abs_row[1]]] = A[[max_abs_row[1],cur_pivot_row]]

    if A[cur_pivot_row][col] != 0 :
      pivot_cols.append(col) ## updating pivot column
      for row in range(cur_pivot_row+1, n_row): ## eliminating everything down
        mul = A[row][col]/A[cur_pivot_row][col]
        A[row][col:] = A[row][col:] - mul*A[cur_pivot_row,col:]

      cur_pivot_row += 1
      # print(A)

  return A , pivot_cols

Reduced Echelon Form

def rref(A):

  A , pivot_cols = echelon(A)

  n_col = A.shape[1]
  n_row = A.shape[0]

  for col in pivot_cols:

    pivot_row = -1
    for row in range(n_row-1,-1,-1):
      if A[row][col] != 0 :
        pivot_row = row
        break

    A[pivot_row][:] = A[pivot_row][:] / A[pivot_row][col] # scaling to 1
    
    for row in range(0,pivot_row): ## eliminating everything up
      mul = A[row][col]/A[pivot_row][col]
      A[row][:] = A[row][:] - mul*A[pivot_row,:]

  return A , pivot_cols

Example

import numpy as np

M = np.array([[0,-3,-6,4,9],
     [-1,-2,-1,3,1],
     [-2,-3,0,3,-1],
     [1,4,5,-9,-7]],dtype = float)

# M  = np.array([[0,-3,-6,4],
#      [-1,-2,-1,3],
#      [-2,-3,0,3],
#      [1,4,5,-9]],dtype = float)

print(M)
print(echelon(M))
print(rref(M))