FunWithAI/regression_example.py

"""
Linear Regression with Tensorflow following this tutorial: https://www.tensorflow.org/tutorials/keras/regression
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Makes numpy stuff easier readable
np.set_printoptions(precision=3, suppress=True)


def load_data():
	url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
	column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
					'Acceleration', 'Model Year', 'Origin']

	raw_dataset = pd.read_csv(url, names=column_names,
							  na_values='?', comment='\t',
							  sep=' ', skipinitialspace=True)

	return raw_dataset


def plot_loss(history):
	plt.plot(history.history['loss'], label='loss')
	plt.plot(history.history['val_loss'], label='val_loss')
	plt.ylim([0, 10])
	plt.xlabel('Epoch')
	plt.ylabel('Error [MPG]')
	plt.legend()
	plt.grid(True)
	plt.show()


def plot_horsepower(x, y, train_features, train_labels):
	plt.scatter(train_features['Horsepower'], train_labels, label='Data')
	plt.plot(x, y, color='k', label='Predictions')
	plt.xlabel('Horsepower')
	plt.ylabel('MPG')
	plt.legend()
	plt.show()

def regression_single_input():
	raw_dataset = load_data()
	dataset = raw_dataset.copy()

	#################################
	### Prepare the training data ###
	#################################

	# Print the end of the dataset
	print(dataset.tail())

	# Check for any undefined values
	print(dataset.isna().sum())

	# Drops unknown values
	dataset = dataset.dropna()

	# Replaces the alphanumerical values with numerical ones. Can be done via keras model, but is overkill for 3 values.
	dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
	dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')

	# Split into training and test data
	train_dataset = dataset.sample(frac=0.8, random_state=0)
	test_dataset = dataset.drop(train_dataset.index)

	# Print pair plots of the data to see if there are any probable correlations
	sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')
	plt.show()

	# Print statistics about the features
	print(train_dataset.describe().transpose())

	# Separate the label from the features
	train_features = train_dataset.copy()
	test_features = test_dataset.copy()

	train_labels = train_features.pop('MPG')
	test_labels = test_features.pop('MPG')

	###########################
	### Normalize the model ###
	###########################

	# Create the normalization layer
	normalizer = tf.keras.layers.Normalization(axis=-1)

	# Fit the preprocessing layer to the data
	normalizer.adapt(np.array(train_features))

	# See what effect the normalization has
	first = np.array(train_features[:1])

	with np.printoptions(precision=2, suppress=True):
		print('First example:', first)
		print()
		print('Normalized:', normalizer(first).numpy())

	#######################################
	### Start with the regression stuff ###
	#######################################

	# Create Horsepower NP array and normalize it
	horsepower = np.array(train_features['Horsepower'])

	horsepower_normalizer = layers.Normalization(input_shape=[1, ], axis=None)
	horsepower_normalizer.adapt(horsepower)

	# Build the Keras Sequential Model
	# Apply a linear transformation to produce 1 output using a linear layer (layers.Dense)
	horsepower_model = tf.keras.Sequential([
		horsepower_normalizer,
		layers.Dense(units=1)
	])

	print(horsepower_model.summary())

	# Run the model without training
	print(horsepower_model.predict(horsepower[:10]))

	# Configure the training process
	horsepower_model.compile(
		optimizer=tf.optimizers.Adam(learning_rate=0.1),
		loss='mean_absolute_error'
	)

	# Train for 100 epochs
	history = horsepower_model.fit(
		train_features['Horsepower'],
		train_labels,
		epochs=100,
		# Suppress logging.
		verbose=0,
		# Calculate validation results on 20% of the training data.
		validation_split=0.2
	)

	# Show training progress
	hist = pd.DataFrame(history.history)
	hist['epoch'] = history.epoch
	print(hist.tail())

	# Show loss plot
	plot_loss(history)

	# Collect results on test set
	test_results = {}

	test_results['horsepower_model'] = horsepower_model.evaluate(
		test_features['Horsepower'],
		test_labels, verbose=0
	)

	# Plot the regression line
	x = tf.linspace(0.0, 250, 251)
	y = horsepower_model.predict(x)
	plot_horsepower(x, y, train_features, train_labels)

def regression_multiple_inputs():
	raw_dataset = load_data()
	dataset = raw_dataset.copy()

	#################################
	### Prepare the training data ###
	#################################

	# Print the end of the dataset
	print(dataset.tail())

	# Check for any undefined values
	print(dataset.isna().sum())

	# Drops unknown values
	dataset = dataset.dropna()

	# Replaces the alphanumerical values with numerical ones. Can be done via keras model, but is overkill for 3 values.
	dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
	dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')

	# Split into training and test data
	train_dataset = dataset.sample(frac=0.8, random_state=0)
	test_dataset = dataset.drop(train_dataset.index)

	# Print pair plots of the data to see if there are any probable correlations
	sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')
	plt.show()

	# Print statistics about the features
	print(train_dataset.describe().transpose())

	# Separate the label from the features
	train_features = train_dataset.copy()
	test_features = test_dataset.copy()

	train_labels = train_features.pop('MPG')
	test_labels = test_features.pop('MPG')

	###########################
	### Normalize the model ###
	###########################

	# Create the normalization layer
	normalizer = tf.keras.layers.Normalization(axis=-1)

	# Fit the preprocessing layer to the data
	normalizer.adapt(np.array(train_features))

	# See what effect the normalization has
	first = np.array(train_features[:1])

	with np.printoptions(precision=2, suppress=True):
		print('First example:', first)
		print()
		print('Normalized:', normalizer(first).numpy())

	#######################################
	### Start with the regression stuff ###
	#######################################

	# Create Horsepower NP array and normalize it
	horsepower = np.array(train_features['Horsepower'])

	horsepower_normalizer = layers.Normalization(input_shape=[1, ], axis=None)
	horsepower_normalizer.adapt(horsepower)

	# Build the Keras Sequential Model
	# Apply a linear transformation to produce 1 output using a linear layer (layers.Dense)
	linear_model = tf.keras.Sequential([
		normalizer,
		layers.Dense(units=1)
	])

	# Run the model without training
	print(linear_model.predict(train_features[:10]))

	# Configure the training process
	linear_model.compile(
		optimizer=tf.optimizers.Adam(learning_rate=0.1),
		loss='mean_absolute_error'
	)

	# Train for 100 epochs
	history = linear_model.fit(
		train_features,
		train_labels,
		epochs=100,
		# Suppress logging.
		verbose=0,
		# Calculate validation results on 20% of the training data.
		validation_split=0.2
	)

	# Show training progress
	hist = pd.DataFrame(history.history)
	hist['epoch'] = history.epoch
	print(hist.tail())

	# Show loss plot
	plot_loss(history)

	# Collect results on test set
	test_results = {}

	test_results['linear_model'] = linear_model.evaluate(
		test_features, test_labels, verbose=0
	)

	# Plot the regression line
	# Apparently, this doesnt work for multi inputs because i guess 3D plotting is kinda hart
	#x = tf.linspace(0.0, 250, 251)
	#y = linear_model.predict(x)
	#plot_horsepower(x, y, train_features, train_labels)


if __name__ == '__main__':
	regression_single_input()
	regression_multiple_inputs()
Initial commit 2021-12-24 19:23:59 +00:00			`"""`
			`Linear Regression with Tensorflow following this tutorial: https://www.tensorflow.org/tutorials/keras/regression`
			`"""`
			`import matplotlib.pyplot as plt`
			`import numpy as np`
			`import pandas as pd`
			`import seaborn as sns`
			`import tensorflow as tf`
			`from tensorflow import keras`
			`from tensorflow.keras import layers`

			`# Makes numpy stuff easier readable`
			`np.set_printoptions(precision=3, suppress=True)`


			`def load_data():`
			`url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'`
			`column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',`
			`'Acceleration', 'Model Year', 'Origin']`

			`raw_dataset = pd.read_csv(url, names=column_names,`
			`na_values='?', comment='\t',`
			`sep=' ', skipinitialspace=True)`

			`return raw_dataset`


			`def plot_loss(history):`
			`plt.plot(history.history['loss'], label='loss')`
			`plt.plot(history.history['val_loss'], label='val_loss')`
			`plt.ylim([0, 10])`
			`plt.xlabel('Epoch')`
			`plt.ylabel('Error [MPG]')`
			`plt.legend()`
			`plt.grid(True)`
			`plt.show()`


			`def plot_horsepower(x, y, train_features, train_labels):`
			`plt.scatter(train_features['Horsepower'], train_labels, label='Data')`
			`plt.plot(x, y, color='k', label='Predictions')`
			`plt.xlabel('Horsepower')`
			`plt.ylabel('MPG')`
			`plt.legend()`
			`plt.show()`

			`def regression_single_input():`
			`raw_dataset = load_data()`
			`dataset = raw_dataset.copy()`

			`#################################`
			`### Prepare the training data ###`
			`#################################`

			`# Print the end of the dataset`
			`print(dataset.tail())`

			`# Check for any undefined values`
			`print(dataset.isna().sum())`

			`# Drops unknown values`
			`dataset = dataset.dropna()`

			`# Replaces the alphanumerical values with numerical ones. Can be done via keras model, but is overkill for 3 values.`
			`dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})`
			`dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')`

			`# Split into training and test data`
			`train_dataset = dataset.sample(frac=0.8, random_state=0)`
			`test_dataset = dataset.drop(train_dataset.index)`

			`# Print pair plots of the data to see if there are any probable correlations`
			`sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')`
			`plt.show()`

			`# Print statistics about the features`
			`print(train_dataset.describe().transpose())`

			`# Separate the label from the features`
			`train_features = train_dataset.copy()`
			`test_features = test_dataset.copy()`

			`train_labels = train_features.pop('MPG')`
			`test_labels = test_features.pop('MPG')`

			`###########################`
			`### Normalize the model ###`
			`###########################`

			`# Create the normalization layer`
			`normalizer = tf.keras.layers.Normalization(axis=-1)`

			`# Fit the preprocessing layer to the data`
			`normalizer.adapt(np.array(train_features))`

			`# See what effect the normalization has`
			`first = np.array(train_features[:1])`

			`with np.printoptions(precision=2, suppress=True):`
			`print('First example:', first)`
			`print()`
			`print('Normalized:', normalizer(first).numpy())`

			`#######################################`
			`### Start with the regression stuff ###`
			`#######################################`

			`# Create Horsepower NP array and normalize it`
			`horsepower = np.array(train_features['Horsepower'])`

			`horsepower_normalizer = layers.Normalization(input_shape=[1, ], axis=None)`
			`horsepower_normalizer.adapt(horsepower)`

			`# Build the Keras Sequential Model`
			`# Apply a linear transformation to produce 1 output using a linear layer (layers.Dense)`
			`horsepower_model = tf.keras.Sequential([`
			`horsepower_normalizer,`
			`layers.Dense(units=1)`
			`])`

			`print(horsepower_model.summary())`

			`# Run the model without training`
			`print(horsepower_model.predict(horsepower[:10]))`

			`# Configure the training process`
			`horsepower_model.compile(`
			`optimizer=tf.optimizers.Adam(learning_rate=0.1),`
			`loss='mean_absolute_error'`
			`)`

			`# Train for 100 epochs`
			`history = horsepower_model.fit(`
			`train_features['Horsepower'],`
			`train_labels,`
			`epochs=100,`
			`# Suppress logging.`
			`verbose=0,`
			`# Calculate validation results on 20% of the training data.`
			`validation_split=0.2`
			`)`

			`# Show training progress`
			`hist = pd.DataFrame(history.history)`
			`hist['epoch'] = history.epoch`
			`print(hist.tail())`

			`# Show loss plot`
			`plot_loss(history)`

			`# Collect results on test set`
			`test_results = {}`

			`test_results['horsepower_model'] = horsepower_model.evaluate(`
			`test_features['Horsepower'],`
			`test_labels, verbose=0`
			`)`

			`# Plot the regression line`
			`x = tf.linspace(0.0, 250, 251)`
			`y = horsepower_model.predict(x)`
			`plot_horsepower(x, y, train_features, train_labels)`

			`def regression_multiple_inputs():`
			`raw_dataset = load_data()`
			`dataset = raw_dataset.copy()`

			`#################################`
			`### Prepare the training data ###`
			`#################################`

			`# Print the end of the dataset`
			`print(dataset.tail())`

			`# Check for any undefined values`
			`print(dataset.isna().sum())`

			`# Drops unknown values`
			`dataset = dataset.dropna()`

			`# Replaces the alphanumerical values with numerical ones. Can be done via keras model, but is overkill for 3 values.`
			`dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})`
			`dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')`

			`# Split into training and test data`
			`train_dataset = dataset.sample(frac=0.8, random_state=0)`
			`test_dataset = dataset.drop(train_dataset.index)`

			`# Print pair plots of the data to see if there are any probable correlations`
			`sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')`
			`plt.show()`

			`# Print statistics about the features`
			`print(train_dataset.describe().transpose())`

			`# Separate the label from the features`
			`train_features = train_dataset.copy()`
			`test_features = test_dataset.copy()`

			`train_labels = train_features.pop('MPG')`
			`test_labels = test_features.pop('MPG')`

			`###########################`
			`### Normalize the model ###`
			`###########################`

			`# Create the normalization layer`
			`normalizer = tf.keras.layers.Normalization(axis=-1)`

			`# Fit the preprocessing layer to the data`
			`normalizer.adapt(np.array(train_features))`

			`# See what effect the normalization has`
			`first = np.array(train_features[:1])`

			`with np.printoptions(precision=2, suppress=True):`
			`print('First example:', first)`
			`print()`
			`print('Normalized:', normalizer(first).numpy())`

			`#######################################`
			`### Start with the regression stuff ###`
			`#######################################`

			`# Create Horsepower NP array and normalize it`
			`horsepower = np.array(train_features['Horsepower'])`

			`horsepower_normalizer = layers.Normalization(input_shape=[1, ], axis=None)`
			`horsepower_normalizer.adapt(horsepower)`

			`# Build the Keras Sequential Model`
			`# Apply a linear transformation to produce 1 output using a linear layer (layers.Dense)`
			`linear_model = tf.keras.Sequential([`
			`normalizer,`
			`layers.Dense(units=1)`
			`])`

			`# Run the model without training`
			`print(linear_model.predict(train_features[:10]))`

			`# Configure the training process`
			`linear_model.compile(`
			`optimizer=tf.optimizers.Adam(learning_rate=0.1),`
			`loss='mean_absolute_error'`
			`)`

			`# Train for 100 epochs`
			`history = linear_model.fit(`
			`train_features,`
			`train_labels,`
			`epochs=100,`
			`# Suppress logging.`
			`verbose=0,`
			`# Calculate validation results on 20% of the training data.`
			`validation_split=0.2`
			`)`

			`# Show training progress`
			`hist = pd.DataFrame(history.history)`
			`hist['epoch'] = history.epoch`
			`print(hist.tail())`

			`# Show loss plot`
			`plot_loss(history)`

			`# Collect results on test set`
			`test_results = {}`

			`test_results['linear_model'] = linear_model.evaluate(`
			`test_features, test_labels, verbose=0`
			`)`

			`# Plot the regression line`
			`# Apparently, this doesnt work for multi inputs because i guess 3D plotting is kinda hart`
			`#x = tf.linspace(0.0, 250, 251)`
			`#y = linear_model.predict(x)`
			`#plot_horsepower(x, y, train_features, train_labels)`


			`if __name__ == '__main__':`
			`regression_single_input()`
			`regression_multiple_inputs()`