Master artificial intelligence and deep learning to build cutting-edge AI applications including computer vision, NLP, and generative AI.
Learn advanced neural networks, transformers, GANs, and deploy AI models to production with modern frameworks.
Prerequisites: Python proficiency, machine learning basics, linear algebra and calculus fundamentals, NumPy/Pandas experience
Master CNNs for image classification, object detection, and computer vision tasks. Learn convolutional layers, pooling, and advanced architectures.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
# Build CNN architecture
def create_cnn_model(input_shape=(224, 224, 3), num_classes=10):
"""
Create a CNN with multiple convolutional blocks
Uses batch normalization and dropout for regularization
"""
model = keras.Sequential([
# Block 1
layers.Conv2D(32, (3, 3), activation='relu', padding='same',
input_shape=input_shape),
layers.BatchNormalization(),
layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.25),
# Block 2
layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
layers.BatchNormalization(),
layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.25),
# Block 3
layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
layers.BatchNormalization(),
layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.25),
# Fully connected layers
layers.Flatten(),
layers.Dense(512, activation='relu'),
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(num_classes, activation='softmax')
])
return model
# Create and compile model
model = create_cnn_model(num_classes=10)
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy']
)
print(model.summary())
# Data augmentation for better generalization
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
zoom_range=0.2,
fill_mode='nearest'
)
test_datagen = ImageDataGenerator(rescale=1./255)
# Load data from directories
# train_generator = train_datagen.flow_from_directory(
# 'data/train',
# target_size=(224, 224),
# batch_size=32,
# class_mode='categorical'
# )
# Callbacks for training
callbacks = [
keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
),
keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=5,
min_lr=1e-7
),
keras.callbacks.ModelCheckpoint(
'best_model.h5',
monitor='val_accuracy',
save_best_only=True
)
]
# Train model
# history = model.fit(
# train_generator,
# epochs=50,
# validation_data=val_generator,
# callbacks=callbacks
# )
Leverage powerful pre-trained models like ResNet, VGG, and EfficientNet. Fine-tune them for your specific tasks with minimal data.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import ResNet50, VGG16, EfficientNetB0
from tensorflow.keras import layers
import numpy as np
# Load pre-trained ResNet50 (trained on ImageNet)
base_model = ResNet50(
weights='imagenet',
include_top=False, # Exclude classification layer
input_shape=(224, 224, 3)
)
# Freeze base model layers (don't train them initially)
base_model.trainable = False
# Add custom classification head
model = keras.Sequential([
base_model,
layers.GlobalAveragePooling2D(),
layers.Dense(256, activation='relu'),
layers.Dropout(0.5),
layers.Dense(10, activation='softmax') # 10 classes
])
model.compile(
optimizer=keras.optimizers.Adam(lr=0.001),
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Train only the top layers
# history = model.fit(train_data, epochs=10, validation_data=val_data)
# Fine-tuning: Unfreeze some layers and train with lower learning rate
base_model.trainable = True
# Freeze early layers, train later ones
for layer in base_model.layers[:100]:
layer.trainable = False
# Compile with lower learning rate
model.compile(
optimizer=keras.optimizers.Adam(lr=1e-5),
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Continue training with fine-tuning
# history_fine = model.fit(train_data, epochs=20, validation_data=val_data)
# Using EfficientNet (more efficient architecture)
def create_efficient_model(num_classes=10):
"""
EfficientNet with custom head
Best accuracy-to-parameters ratio
"""
base = EfficientNetB0(
weights='imagenet',
include_top=False,
input_shape=(224, 224, 3)
)
base.trainable = False
model = keras.Sequential([
base,
layers.GlobalAveragePooling2D(),
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(num_classes, activation='softmax')
])
return model
efficient_model = create_efficient_model()
efficient_model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Feature extraction from pre-trained model
feature_extractor = keras.Model(
inputs=base_model.input,
outputs=base_model.get_layer('conv5_block3_out').output
)
# Extract features for an image
# image = preprocess_image('path/to/image.jpg')
# features = feature_extractor.predict(image)
# print(f"Feature shape: {features.shape}")
Understand the transformer architecture that powers modern NLP. Implement attention mechanisms and use pre-trained language models.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from transformers import BertTokenizer, TFBertModel
import numpy as np
# Multi-Head Attention implementation
class MultiHeadAttention(layers.Layer):
def __init__(self, d_model, num_heads):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
self.d_model = d_model
assert d_model % self.num_heads == 0
self.depth = d_model // self.num_heads
self.wq = layers.Dense(d_model)
self.wk = layers.Dense(d_model)
self.wv = layers.Dense(d_model)
self.dense = layers.Dense(d_model)
def split_heads(self, x, batch_size):
x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
return tf.transpose(x, perm=[0, 2, 1, 3])
def call(self, v, k, q, mask=None):
batch_size = tf.shape(q)[0]
q = self.wq(q)
k = self.wk(k)
v = self.wv(v)
q = self.split_heads(q, batch_size)
k = self.split_heads(k, batch_size)
v = self.split_heads(v, batch_size)
# Scaled dot-product attention
matmul_qk = tf.matmul(q, k, transpose_b=True)
dk = tf.cast(tf.shape(k)[-1], tf.float32)
scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
if mask is not None:
scaled_attention_logits += (mask * -1e9)
attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
output = tf.matmul(attention_weights, v)
output = tf.transpose(output, perm=[0, 2, 1, 3])
concat_attention = tf.reshape(output, (batch_size, -1, self.d_model))
output = self.dense(concat_attention)
return output, attention_weights
# Transformer Encoder Block
class TransformerBlock(layers.Layer):
def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
super(TransformerBlock, self).__init__()
self.att = MultiHeadAttention(d_model, num_heads)
self.ffn = keras.Sequential([
layers.Dense(dff, activation='relu'),
layers.Dense(d_model)
])
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(dropout_rate)
self.dropout2 = layers.Dropout(dropout_rate)
def call(self, x, training, mask=None):
attn_output, _ = self.att(x, x, x, mask)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(x + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output)
return out2
# Using BERT for text classification
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
def create_bert_classifier(num_classes=2):
"""
BERT-based text classifier
Freezes BERT and adds classification head
"""
input_ids = layers.Input(shape=(128,), dtype=tf.int32, name='input_ids')
attention_mask = layers.Input(shape=(128,), dtype=tf.int32, name='attention_mask')
bert_output = bert_model(input_ids, attention_mask=attention_mask)[0]
cls_token = bert_output[:, 0, :] # [CLS] token representation
x = layers.Dense(256, activation='relu')(cls_token)
x = layers.Dropout(0.3)(x)
output = layers.Dense(num_classes, activation='softmax')(x)
model = keras.Model(
inputs=[input_ids, attention_mask],
outputs=output
)
# Freeze BERT layers
bert_model.trainable = False
return model
# Prepare text data
texts = ["This movie is great!", "Terrible experience"]
labels = [1, 0] # Positive, Negative
# Tokenize
encoded = tokenizer(
texts,
max_length=128,
padding='max_length',
truncation=True,
return_tensors='tf'
)
# Create and train model
bert_classifier = create_bert_classifier(num_classes=2)
bert_classifier.compile(
optimizer=keras.optimizers.Adam(lr=2e-5),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# history = bert_classifier.fit(
# [encoded['input_ids'], encoded['attention_mask']],
# labels,
# epochs=3,
# batch_size=16
# )
Build generative models to create new images, text, and data. Master GANs and Variational Autoencoders for creative AI applications.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
# Generator Network
def make_generator_model():
"""
Generator: Takes random noise and generates images
Uses transposed convolutions to upsample
"""
model = keras.Sequential([
layers.Dense(7*7*256, use_bias=False, input_shape=(100,)),
layers.BatchNormalization(),
layers.LeakyReLU(),
layers.Reshape((7, 7, 256)),
# Upsample to 14x14
layers.Conv2DTranspose(128, (5, 5), strides=(2, 2),
padding='same', use_bias=False),
layers.BatchNormalization(),
layers.LeakyReLU(),
# Upsample to 28x28
layers.Conv2DTranspose(64, (5, 5), strides=(2, 2),
padding='same', use_bias=False),
layers.BatchNormalization(),
layers.LeakyReLU(),
# Output layer
layers.Conv2DTranspose(1, (5, 5), strides=(1, 1),
padding='same', use_bias=False,
activation='tanh')
])
return model
# Discriminator Network
def make_discriminator_model():
"""
Discriminator: Classifies images as real or fake
Binary classification network
"""
model = keras.Sequential([
layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same',
input_shape=[28, 28, 1]),
layers.LeakyReLU(),
layers.Dropout(0.3),
layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'),
layers.LeakyReLU(),
layers.Dropout(0.3),
layers.Flatten(),
layers.Dense(1) # Real or fake (no activation, use logits)
])
return model
# Loss functions
cross_entropy = keras.losses.BinaryCrossentropy(from_logits=True)
def discriminator_loss(real_output, fake_output):
real_loss = cross_entropy(tf.ones_like(real_output), real_output)
fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
total_loss = real_loss + fake_loss
return total_loss
def generator_loss(fake_output):
return cross_entropy(tf.ones_like(fake_output), fake_output)
# Optimizers
generator_optimizer = keras.optimizers.Adam(1e-4)
discriminator_optimizer = keras.optimizers.Adam(1e-4)
# Training step
@tf.function
def train_step(images):
noise = tf.random.normal([BATCH_SIZE, 100])
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(noise, training=True)
real_output = discriminator(images, training=True)
fake_output = discriminator(generated_images, training=True)
gen_loss = generator_loss(fake_output)
disc_loss = discriminator_loss(real_output, fake_output)
gradients_of_generator = gen_tape.gradient(
gen_loss, generator.trainable_variables
)
gradients_of_discriminator = disc_tape.gradient(
disc_loss, discriminator.trainable_variables
)
generator_optimizer.apply_gradients(
zip(gradients_of_generator, generator.trainable_variables)
)
discriminator_optimizer.apply_gradients(
zip(gradients_of_discriminator, discriminator.trainable_variables)
)
return gen_loss, disc_loss
# Initialize models
generator = make_generator_model()
discriminator = make_discriminator_model()
# Training loop
EPOCHS = 50
BATCH_SIZE = 256
# for epoch in range(EPOCHS):
# for image_batch in dataset:
# gen_loss, disc_loss = train_step(image_batch)
#
# if epoch % 10 == 0:
# print(f"Epoch {epoch}: G_loss={gen_loss:.4f}, D_loss={disc_loss:.4f}")
# generate_and_save_images(generator, epoch)
# Generate images
def generate_images(generator, num_images=16):
noise = tf.random.normal([num_images, 100])
generated_images = generator(noise, training=False)
return generated_images
# Variational Autoencoder (VAE)
class VAE(keras.Model):
def __init__(self, latent_dim):
super(VAE, self).__init__()
self.latent_dim = latent_dim
# Encoder
self.encoder = keras.Sequential([
layers.InputLayer(input_shape=(28, 28, 1)),
layers.Conv2D(32, 3, activation='relu', strides=2, padding='same'),
layers.Conv2D(64, 3, activation='relu', strides=2, padding='same'),
layers.Flatten(),
layers.Dense(latent_dim + latent_dim) # mean and log_var
])
# Decoder
self.decoder = keras.Sequential([
layers.InputLayer(input_shape=(latent_dim,)),
layers.Dense(7*7*64, activation='relu'),
layers.Reshape((7, 7, 64)),
layers.Conv2DTranspose(64, 3, activation='relu', strides=2, padding='same'),
layers.Conv2DTranspose(32, 3, activation='relu', strides=2, padding='same'),
layers.Conv2DTranspose(1, 3, activation='sigmoid', padding='same')
])
def encode(self, x):
mean, logvar = tf.split(self.encoder(x), num_or_size_splits=2, axis=1)
return mean, logvar
def reparameterize(self, mean, logvar):
eps = tf.random.normal(shape=mean.shape)
return eps * tf.exp(logvar * 0.5) + mean
def decode(self, z):
return self.decoder(z)
vae = VAE(latent_dim=2)
vae.compile(optimizer='adam')
Deploy your AI models to production with TensorFlow Serving, ONNX, and cloud platforms. Optimize for speed and scalability.
import tensorflow as tf
from tensorflow import keras
import onnx
import tf2onnx
# Save model in different formats
model = keras.models.load_model('my_model.h5')
# TensorFlow SavedModel format (for TF Serving)
model.save('saved_model/my_model', save_format='tf')
# Convert to ONNX for cross-platform deployment
spec = (tf.TensorSpec((None, 224, 224, 3), tf.float32, name="input"),)
output_path = "model.onnx"
model_proto, _ = tf2onnx.convert.from_keras(model, input_signature=spec, output_path=output_path)
print(f"ONNX model saved to {output_path}")
# TensorFlow Lite for mobile/edge devices
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# Optimization: Quantization for smaller size
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open('model.tflite', 'wb') as f:
f.write(tflite_model)
# Model serving with Flask API
from flask import Flask, request, jsonify
import numpy as np
from PIL import Image
app = Flask(__name__)
model = keras.models.load_model('my_model.h5')
@app.route('/predict', methods=['POST'])
def predict():
"""
API endpoint for model predictions
Accepts image file and returns predictions
"""
if 'image' not in request.files:
return jsonify({'error': 'No image provided'}), 400
file = request.files['image']
img = Image.open(file.stream)
img = img.resize((224, 224))
img_array = np.array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0)
predictions = model.predict(img_array)
predicted_class = int(np.argmax(predictions[0]))
confidence = float(np.max(predictions[0]))
return jsonify({
'class': predicted_class,
'confidence': confidence,
'all_probabilities': predictions[0].tolist()
})
@app.route('/health', methods=['GET'])
def health():
return jsonify({'status': 'healthy'})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
# Docker deployment
# Dockerfile:
"""
FROM tensorflow/tensorflow:latest-gpu
WORKDIR /app
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY . .
EXPOSE 5000
CMD ["python", "app.py"]
"""
# Model optimization techniques
def optimize_model(model):
"""
Optimize model for inference
"""
# Pruning: Remove unnecessary weights
import tensorflow_model_optimization as tfmot
pruning_params = {
'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
initial_sparsity=0.0,
final_sparsity=0.5,
begin_step=0,
end_step=1000
)
}
model_for_pruning = tfmot.sparsity.keras.prune_low_magnitude(
model, **pruning_params
)
# Quantization-aware training
quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(model)
return q_aware_model
# Batch prediction for efficiency
def batch_predict(model, images, batch_size=32):
"""
Predict in batches for better throughput
"""
predictions = []
for i in range(0, len(images), batch_size):
batch = images[i:i+batch_size]
batch_preds = model.predict(batch)
predictions.extend(batch_preds)
return np.array(predictions)
Build production-ready AI applications that solve real-world problems across various domains.
Objective: Detect and classify multiple objects in images/video streams
Techniques: YOLO, Faster R-CNN, RetinaNet, non-maximum suppression, anchor boxes
Applications: Surveillance, autonomous vehicles, retail analytics
Objective: Build an intelligent conversational AI assistant
Techniques: GPT, BERT, intent classification, entity recognition, dialogue management
Applications: Customer support, virtual assistants, FAQ automation
Objective: Identify and verify individuals from facial images
Techniques: FaceNet, Siamese networks, triplet loss, face alignment, embeddings
Applications: Security systems, attendance tracking, photo organization
Objective: Apply artistic styles to photos using neural style transfer
Techniques: Neural style transfer, GANs, perceptual loss, fast style transfer
Applications: Photo editing apps, content creation, artistic tools
Objective: Detect unusual patterns in time-series or image data
Techniques: Autoencoders, LSTM, isolation forest, one-class SVM
Applications: Fraud detection, manufacturing quality control, network security
Test your AI & deep learning knowledge with 20 random questions!