Review linear algebra and calculus essentials for ANNs

Nerd Cafe

1. Linear Algebra Essentials for ANNs

Vectors & Matrices

Vectors: 1D arrays (e.g., input features).

import numpy as np
x = np.array([1, 2, 3])  # Input vector

Matrices: 2D arrays (e.g., weights in a layer).

W = np.array([[1, 2], [3, 4]])  # Weight matrix

Python Example:

import numpy as np

# Vectors
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])

# Matrices
m1 = np.array([[1, 2], [3, 4]])
m2 = np.array([[5, 6], [7, 8]])

print("Vector v1:", v1)
print("Matrix m1:\n", m1)

Output

import numpy as np

# Vectors
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])

# Matrices
m1 = np.array([[1, 2], [3, 4]])
m2 = np.array([[5, 6], [7, 8]])

print("Vector v1:", v1)
print("Matrix m1:\n", m1)

Dot Product (Inner Product)

Computes weighted sums for neuron inputs:

z=W⋅x+b

import numpy as np

# Define inputs (x), weights (W), and bias (b)
x = np.array([0.5, -1.2, 2.1])  # Input vector (3 features)
W = np.array([[0.1, -0.3, 0.5],  # Weight matrix (2 neurons x 3 inputs)
              [0.7, 0.2, -0.1]]) 
b = np.array([0.2, -0.4])        # Bias vector (1 per neuron)

# Compute z = W·x + b
z = np.dot(W, x) + b  # Matrix multiplication + broadcasted bias

print("Weighted sum (z):", z)

Output:

Weighted sum (z): [ 1.66 -0.5 ]

Key Notes:

Dimensions:
- x: Shape (3,) (3 input features).
- W: Shape (2, 3) (2 neurons, each with 3 weights).
- b: Shape (2,) (1 bias per neuron).
- Output z: Shape (2,) (output of 2 neurons).
What This Represents:
- Simulates a dense layer in ANNs.
- Each neuron computes:

z_{i}=\sum_{j=1}^{3}W_{i,j}.x_{j}+b_{i}

Matrix Multiplication

Critical for forward propagation in ANNs.

import numpy as np

# Vectors
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])

# Matrices
m1 = np.array([[1, 2], [3, 4]])
m2 = np.array([[5, 6], [7, 8]])

# Dot product
dot_product = np.dot(v1, v2)
print("Dot product of v1 and v2:", dot_product)

# Matrix multiplication
mat_product = np.matmul(m1, m2)  # or m1 @ m2
print("Matrix product of m1 and m2:\n", mat_product)

Output

Dot product of v1 and v2: 32
Matrix product of m1 and m2:
 [[19 22]
 [43 50]]

Matrix Transpose

Concept: Flipping a matrix over its diagonal, swapping row and column indices.

Python Example:

import numpy as np

# Matrices
m1 = np.array([[1, 2], [3, 4]])

print("Original matrix:\n", m1)
print("Transposed matrix:\n", m1.T)

Output

Original matrix:
 [[1 2]
 [3 4]]
Transposed matrix:
 [[1 3]
 [2 4]]

Special Matrices

Concept: Identity matrices (I), diagonal matrices, and their properties.

Python Example:

import numpy as np

identity = np.eye(3)  # 3x3 identity matrix
print("Identity matrix:\n", identity)

diagonal = np.diag([1, 2, 3])
print("Diagonal matrix:\n", diagonal)

Output

Identity matrix:
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
Diagonal matrix:
 [[1 0 0]
 [0 2 0]
 [0 0 3]]

2. Calculus Essentials

Derivatives and Gradients

Concept: The derivative measures how a function changes as its input changes. The gradient generalizes this to multiple dimensions.

Math:

f(x)=x^{2}\Rightarrow f\acute{}(x)=2x

Python Example:

import numpy as np
import matplotlib.pyplot as plt

def f(x):
    return x**2

def df(x):
    return 2*x

x = np.linspace(-3, 3, 100)
plt.plot(x, f(x), label='f(x) = x²')
plt.plot(x, df(x), label="f'(x) = 2x")
plt.legend()
plt.title('Function and its Derivative')
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.show()

Output

Partial Derivatives

Concept: How a multi-variable function changes when only one variable changes.

Math:

f(x,y)=x^{2}y+y^{3}\Rightarrow \frac{\partial f}{\partial x}=2xy\;\;and\;\;\frac{\partial f}{\partial y}=x^{2}+3y^{2}

Python Example:

import sympy as sp

# Define the variables
x, y = sp.symbols('x y')

# Define the function
f = x**2 * y + y**3

# Compute partial derivatives
df_dx = sp.diff(f, x)
df_dy = sp.diff(f, y)

# Display the results
print("∂f/∂x =", df_dx)
print("∂f/∂y =", df_dy)

Output:

∂f/∂x = 2*x*y
∂f/∂y = x**2 + 3*y**2

The Chain Rule

Concept: Essential for backpropagation in neural networks. Allows computation of derivatives of composite functions.

Math:

h(x)=fog(x)=f(g(x))

then

h\acute{}(x)=g\acute{}(x)f\acute{}(g(x))

Example

Given:

f(x)=sin(2x)\;\;and\;\;g(x)=e^{3x}

Then:

h(x)=fog(x)=sin(2e^{3x})\Rightarrow h\acute{}(x)=6e^{3x}cos(2e^{3x})

We’ll use SymPy to define the functions and compute the derivative step by step.

Python Example:

import sympy as sp

# Define the variable
x = sp.symbols('x')

# Define inner and outer functions
g = sp.exp(3 * x)         # g(x) = e^(3x)
f = sp.sin(2 * x)         # f(x) = sin(2x)

# Define h(x) = f(g(x)) = sin(2 * e^(3x))
h = f.subs(x, g)

# Differentiate h using chain rule automatically
h_prime = sp.diff(h, x)

# Display results
print("h(x) =", h)
print("h'(x) =", sp.simplify(h_prime))

Output

h(x) = sin(2*exp(3*x))
h'(x) = 6*exp(3*x)*cos(2*exp(3*x))

PreviousBiological Inspiration vs. Artificial Neurons NextActivation Function

Last updated 1 month ago

import numpy as np # Vectors v1 = np.array([1, 2, 3]) v2 = np.array([4, 5, 6]) # Matrices m1 = np.array([[1, 2], [3, 4]]) m2 = np.array([[5, 6], [7, 8]]) print("Vector v1:", v1) print("Matrix m1:\n", m1)

import numpy as np # Define inputs (x), weights (W), and bias (b) x = np.array([0.5, -1.2, 2.1]) # Input vector (3 features) W = np.array([[0.1, -0.3, 0.5], # Weight matrix (2 neurons x 3 inputs) [0.7, 0.2, -0.1]]) b = np.array([0.2, -0.4]) # Bias vector (1 per neuron) # Compute z = W·x + b z = np.dot(W, x) + b # Matrix multiplication + broadcasted bias print("Weighted sum (z):", z)

import numpy as np # Vectors v1 = np.array([1, 2, 3]) v2 = np.array([4, 5, 6]) # Matrices m1 = np.array([[1, 2], [3, 4]]) m2 = np.array([[5, 6], [7, 8]]) # Dot product dot_product = np.dot(v1, v2) print("Dot product of v1 and v2:", dot_product) # Matrix multiplication mat_product = np.matmul(m1, m2) # or m1 @ m2 print("Matrix product of m1 and m2:\n", mat_product)

import numpy as np import matplotlib.pyplot as plt def f(x): return x**2 def df(x): return 2*x x = np.linspace(-3, 3, 100) plt.plot(x, f(x), label='f(x) = x²') plt.plot(x, df(x), label="f'(x) = 2x") plt.legend() plt.title('Function and its Derivative') plt.xlabel('x') plt.ylabel('y') plt.grid(True) plt.show()

import sympy as sp # Define the variables x, y = sp.symbols('x y') # Define the function f = x**2 * y + y**3 # Compute partial derivatives df_dx = sp.diff(f, x) df_dy = sp.diff(f, y) # Display the results print("∂f/∂x =", df_dx) print("∂f/∂y =", df_dy)

import sympy as sp # Define the variable x = sp.symbols('x') # Define inner and outer functions g = sp.exp(3 * x) # g(x) = e^(3x) f = sp.sin(2 * x) # f(x) = sin(2x) # Define h(x) = f(g(x)) = sin(2 * e^(3x)) h = f.subs(x, g) # Differentiate h using chain rule automatically h_prime = sp.diff(h, x) # Display results print("h(x) =", h) print("h'(x) =", sp.simplify(h_prime))