dmlfw (Daniyal Machine Learning Framework)

Demonstrates mini-batch stochastic gradient descent for linear regression using ml-framework. More...

#include <dmlfw.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
Include dependency graph for stochastic_gd.c:
dmlfw_mat_doublebuffer_matrix_1 = NULL
 
dmlfw_mat_doublebuffer_matrix_1_shuffled = NULL
 
dmlfw_mat_doublebuffer_matrix_2 = NULL
 
dmlfw_mat_doublebuffer_matrix_2_shuffled = NULL
 
#define BUFFER_SIZE   50
 
void data_loader (void *x, void *y, uint64_t from_row, uint32_t how_many_rows)
 Supplies training data batches to SGD optimizer via buffer loading.
 
#define FREQUENCY_OF_PRINTING_COST   100
 
dmlfw_gradient_descent_optionsget_gradient_descent_options ()
 Configures options for SGD training.
 
void get_one_from_file_buffer (dmlfw_mat_double **target_matrix, uint64_t from_row)
 Retrieves a single row from buffered training data.
 
FILE * gnuplot
 
void init_buffers ()
 Initializes data buffers used for training data management.
 
#define LEARNING_RATE   0.00001
 
int main ()
 Main entry point to run buffered stochastic gradient descent linear regression.
 
#define MODEL_FILE_NAME   "example-2-model.csv"
 
dmlfw_column_vec_doublemodel_squared_sum_vector = NULL
 
dmlfw_row_vec_doublemodel_transposed = NULL
 
uint64_t number_of_columns_in_training_examples = 0
 
#define NUMBER_OF_ITERATIONS   100000
 
uint64_t number_of_training_examples = 0
 
int on_iteration_complete (uint64_t iteration_number, void *y, void *predicted_y, void *model, double regularization_parameter)
 SGD iteration callback to calculate and log cost function.
 
dmlfw_column_vec_doubleprediction_error = NULL
 
dmlfw_row_vec_doubleprediction_error_transposed = NULL
 
void print_error_and_exit ()
 Prints ml-framework error and terminates execution.
 
dmlfw_column_vec_doubleproduct_vector = NULL
 
#define REGULARIZATION_PARAMETER   0.5
 
#define SHOW_GRAPH   1
 
#define TRAINING_DATASET   "IceCreamSales_training_examples.csv"
 
dmlfw_mat_doublex_matrix = NULL
 
dmlfw_mat_doublexy_matrix = NULL
 
dmlfw_column_vec_doubley_vector = NULL
 

Detailed Description

Demonstrates mini-batch stochastic gradient descent for linear regression using ml-framework.

Author
Mohammed Daniyal
Version
1.0
Date
2025-09-26

This example trains a linear regression model on the IceCreamSales training dataset using mini-batch stochastic gradient descent with specified batch size, learning rate, and regularization. Includes custom data loader, progress callback logging with cost visualization using gnuplot, and buffer management for data loading.

Usage: ./stochastic_gd

Macro Definition Documentation

◆ BUFFER_SIZE

#define BUFFER_SIZE   50

Buffer size used for batch data loading

◆ FREQUENCY_OF_PRINTING_COST

#define FREQUENCY_OF_PRINTING_COST   100

Frequency of printing/logging cost

◆ LEARNING_RATE

#define LEARNING_RATE   0.00001

Learning rate for gradient descent

◆ MODEL_FILE_NAME

#define MODEL_FILE_NAME   "example-2-model.csv"

Output model CSV file path

◆ NUMBER_OF_ITERATIONS

#define NUMBER_OF_ITERATIONS   100000

Maximum number of gradient descent iterations

◆ REGULARIZATION_PARAMETER

#define REGULARIZATION_PARAMETER   0.5

Regularization parameter lambda

◆ SHOW_GRAPH

#define SHOW_GRAPH   1

Enable or disable plotting with gnuplot (1 = enabled)

◆ TRAINING_DATASET

#define TRAINING_DATASET   "IceCreamSales_training_examples.csv"

Training dataset CSV file path

Function Documentation

◆ data_loader()

void data_loader ( void *  x,
void *  y,
uint64_t  from_row,
uint32_t  how_many_rows 
)

Supplies training data batches to SGD optimizer via buffer loading.

Parameters
xPointer to feature matrix pointer to populate.
yPointer to target vector pointer to populate.
from_rowStarting row index for batch.
how_many_rowsNumber of rows requested.

◆ get_gradient_descent_options()

dmlfw_gradient_descent_options * get_gradient_descent_options ( )

Configures options for SGD training.

Returns
Pointer to gradient descent options configured for stochastic GD.

◆ get_one_from_file_buffer()

void get_one_from_file_buffer ( dmlfw_mat_double **  target_matrix,
uint64_t  from_row 
)

Retrieves a single row from buffered training data.

Parameters
target_matrixOutput pointer to matrix containing the row.
from_rowRow index to retrieve.

◆ init_buffers()

void init_buffers ( )

Initializes data buffers used for training data management.

◆ main()

int main ( )

Main entry point to run buffered stochastic gradient descent linear regression.

Returns
0 on success, or error code.

◆ on_iteration_complete()

int on_iteration_complete ( uint64_t  iteration_number,
void *  y,
void *  predicted_y,
void *  model,
double  regularization_parameter 
)

SGD iteration callback to calculate and log cost function.

Parameters
iteration_numberCurrent iteration count.
yActual target vector.
predicted_yPredicted output by model.
modelCurrent model weights.
regularization_parameterRegularization factor.
Returns
0 to continue training, -1 to abort.

◆ print_error_and_exit()

void print_error_and_exit ( )

Prints ml-framework error and terminates execution.

Variable Documentation

◆ buffer_matrix_1

dmlfw_mat_double* buffer_matrix_1 = NULL

Primary buffer matrix holding dataset block for training

◆ buffer_matrix_1_shuffled

dmlfw_mat_double* buffer_matrix_1_shuffled = NULL

Shuffled copy of primary buffer matrix

◆ buffer_matrix_2

dmlfw_mat_double* buffer_matrix_2 = NULL

Secondary buffer matrix holding remaining dataset block

◆ buffer_matrix_2_shuffled

dmlfw_mat_double* buffer_matrix_2_shuffled = NULL

Shuffled copy of secondary buffer matrix

◆ gnuplot

FILE* gnuplot

◆ model_squared_sum_vector

dmlfw_column_vec_double* model_squared_sum_vector = NULL

Vector holding squared sum of model parameters for regularization calculation

◆ model_transposed

dmlfw_row_vec_double* model_transposed = NULL

Transposed vector of current model parameters during callback

◆ number_of_columns_in_training_examples

uint64_t number_of_columns_in_training_examples = 0

Number of columns (features + target) in training dataset

◆ number_of_training_examples

uint64_t number_of_training_examples = 0

Total number of training examples (rows) in dataset

◆ prediction_error

dmlfw_column_vec_double* prediction_error = NULL

Vector holding prediction errors during training progress callback

◆ prediction_error_transposed

dmlfw_row_vec_double* prediction_error_transposed = NULL

Transposed version of prediction_error vector

◆ product_vector

dmlfw_column_vec_double* product_vector = NULL

Temporary vector used for intermediate matrix products

◆ x_matrix

dmlfw_mat_double* x_matrix = NULL

Feature matrix pointer used in data loading for mini-batches

◆ xy_matrix

dmlfw_mat_double* xy_matrix = NULL

Combined feature-target matrix used temporarily for data loading

◆ y_vector

dmlfw_column_vec_double* y_vector = NULL

Target vector pointer used in data loading for mini-batches