mirror of
https://github.com/The-Art-of-Hacking/h4cker
synced 2024-11-25 20:30:17 +00:00
Add files via upload
This commit is contained in:
parent
b6020f3567
commit
99a0fce025
21 changed files with 1472 additions and 0 deletions
106
ai_security/ML_Fundamentals/ai_generated/Actor-Critic_Methods.py
Normal file
106
ai_security/ML_Fundamentals/ai_generated/Actor-Critic_Methods.py
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
Sure! Here's an example of a Python script that demonstrates Actor-Critic Methods using the OpenAI Gym environment and the Keras library:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
from tensorflow.keras.models import Model
|
||||||
|
from tensorflow.keras.layers import Input, Dense
|
||||||
|
from tensorflow.keras.optimizers import Adam
|
||||||
|
|
||||||
|
|
||||||
|
class ActorCriticAgent:
|
||||||
|
def __init__(self, state_size, action_size):
|
||||||
|
self.state_size = state_size
|
||||||
|
self.action_size = action_size
|
||||||
|
|
||||||
|
# Hyperparameters for the agent
|
||||||
|
self.lr_actor = 0.001
|
||||||
|
self.lr_critic = 0.005
|
||||||
|
self.discount_factor = 0.99
|
||||||
|
|
||||||
|
# Create models for the actor and critic
|
||||||
|
self.actor = self.build_actor()
|
||||||
|
self.critic = self.build_critic()
|
||||||
|
|
||||||
|
def build_actor(self):
|
||||||
|
input = Input(shape=(self.state_size,))
|
||||||
|
dense = Dense(24, activation='relu')(input)
|
||||||
|
probs = Dense(self.action_size, activation='softmax')(dense)
|
||||||
|
model = Model(inputs=input, outputs=probs)
|
||||||
|
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=self.lr_actor))
|
||||||
|
return model
|
||||||
|
|
||||||
|
def build_critic(self):
|
||||||
|
input = Input(shape=(self.state_size,))
|
||||||
|
dense = Dense(24, activation='relu')(input)
|
||||||
|
value = Dense(1, activation='linear')(dense)
|
||||||
|
model = Model(inputs=input, outputs=value)
|
||||||
|
model.compile(loss='mse', optimizer=Adam(lr=self.lr_critic))
|
||||||
|
return model
|
||||||
|
|
||||||
|
def get_action(self, state):
|
||||||
|
state = np.reshape(state, [1, self.state_size])
|
||||||
|
probs = self.actor.predict(state)[0]
|
||||||
|
action = np.random.choice(self.action_size, p=probs)
|
||||||
|
return action
|
||||||
|
|
||||||
|
def train_model(self, state, action, reward, next_state, done):
|
||||||
|
target = np.zeros((1, self.action_size))
|
||||||
|
advantages = np.zeros((1, self.action_size))
|
||||||
|
|
||||||
|
value = self.critic.predict(state)[0]
|
||||||
|
next_value = self.critic.predict(next_state)[0]
|
||||||
|
|
||||||
|
if done:
|
||||||
|
advantages[0][action] = reward - value
|
||||||
|
target[0][action] = reward
|
||||||
|
else:
|
||||||
|
advantages[0][action] = reward + self.discount_factor * (next_value) - value
|
||||||
|
target[0][action] = reward + self.discount_factor * next_value
|
||||||
|
|
||||||
|
self.actor.fit(state, advantages, epochs=1, verbose=0)
|
||||||
|
self.critic.fit(state, target, epochs=1, verbose=0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Create the environment
|
||||||
|
env = gym.make('CartPole-v1')
|
||||||
|
state_size = env.observation_space.shape[0]
|
||||||
|
action_size = env.action_space.n
|
||||||
|
|
||||||
|
# Create an instance of the agent
|
||||||
|
agent = ActorCriticAgent(state_size, action_size)
|
||||||
|
|
||||||
|
scores, episodes = [], []
|
||||||
|
EPISODES = 100
|
||||||
|
|
||||||
|
for episode in range(EPISODES):
|
||||||
|
done = False
|
||||||
|
score = 0
|
||||||
|
state = env.reset()
|
||||||
|
|
||||||
|
while not done:
|
||||||
|
action = agent.get_action(state)
|
||||||
|
next_state, reward, done, info = env.step(action)
|
||||||
|
agent.train_model(state, action, reward, next_state, done)
|
||||||
|
score += reward
|
||||||
|
state = next_state
|
||||||
|
|
||||||
|
if done:
|
||||||
|
scores.append(score)
|
||||||
|
episodes.append(episode)
|
||||||
|
print("Episode:", episode, "Score:", score)
|
||||||
|
|
||||||
|
# Plot the scores
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
plt.plot(episodes, scores, 'b')
|
||||||
|
plt.xlabel("Episode")
|
||||||
|
plt.ylabel("Score")
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we create an `ActorCriticAgent` class that represents the agent using Actor-Critic Methods. We then use this agent to train on the `CartPole-v1` environment from OpenAI Gym. The agent has an actor model that outputs action probabilities and a critic model that predicts values. These models are trained using the actor-critic algorithm in the `train_model` method.
|
||||||
|
|
||||||
|
During training, the agent selects actions based on the actor model's output and updates its models using the observed rewards and states. The scores are stored and plotted at the end to visualize the agent's performance over multiple episodes.
|
||||||
|
|
||||||
|
Please make sure you have installed the required libraries (`gym`, `numpy`, `tensorflow`, `keras`, and `matplotlib`) before running this script.
|
|
@ -0,0 +1,46 @@
|
||||||
|
Sure! Here's a Python script that demonstrates the use of Association Rules using the Apriori and FP-Growth algorithms.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pandas as pd
|
||||||
|
from mlxtend.preprocessing import TransactionEncoder
|
||||||
|
from mlxtend.frequent_patterns import apriori, association_rules
|
||||||
|
from mlxtend.frequent_patterns import fpgrowth
|
||||||
|
|
||||||
|
# Sample dataset
|
||||||
|
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
|
||||||
|
['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
|
||||||
|
['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
|
||||||
|
['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
|
||||||
|
['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]
|
||||||
|
|
||||||
|
# Create one-hot encoded DataFrame
|
||||||
|
te = TransactionEncoder()
|
||||||
|
te_data = te.fit(dataset).transform(dataset)
|
||||||
|
df = pd.DataFrame(te_data, columns=te.columns_)
|
||||||
|
|
||||||
|
# Generate frequent itemsets using Apriori algorithm
|
||||||
|
frequent_itemsets_apriori = apriori(df, min_support=0.2, use_colnames=True)
|
||||||
|
print("Frequent Itemsets (Apriori):")
|
||||||
|
print(frequent_itemsets_apriori)
|
||||||
|
|
||||||
|
# Generate association rules from frequent itemsets using confidence threshold
|
||||||
|
rules_apriori = association_rules(frequent_itemsets_apriori, metric="confidence", min_threshold=0.7)
|
||||||
|
print("\nAssociation Rules (Apriori):")
|
||||||
|
print(rules_apriori)
|
||||||
|
|
||||||
|
# Generate frequent itemsets using FP-Growth algorithm
|
||||||
|
frequent_itemsets_fpgrowth = fpgrowth(df, min_support=0.2, use_colnames=True)
|
||||||
|
print("\nFrequent Itemsets (FP-Growth):")
|
||||||
|
print(frequent_itemsets_fpgrowth)
|
||||||
|
|
||||||
|
# Generate association rules from frequent itemsets using confidence threshold
|
||||||
|
rules_fpgrowth = association_rules(frequent_itemsets_fpgrowth, metric="confidence", min_threshold=0.7)
|
||||||
|
print("\nAssociation Rules (FP-Growth):")
|
||||||
|
print(rules_fpgrowth)
|
||||||
|
```
|
||||||
|
|
||||||
|
This script uses the `mlxtend` library, which provides implementations of the Apriori and FP-Growth algorithms. It first creates a one-hot encoded DataFrame from the given dataset. Then, it applies the Apriori algorithm to generate frequent itemsets and prints them. Next, it applies the association rules algorithm to derive association rules from the frequent itemsets and prints them.
|
||||||
|
|
||||||
|
Similarly, it applies the FP-Growth algorithm to generate frequent itemsets and prints them. Finally, it applies the association rules algorithm to derive association rules from the frequent itemsets generated by FP-Growth and prints them.
|
||||||
|
|
||||||
|
You can replace the `dataset` variable with your own dataset to observe the results for your data. Make sure to install the `mlxtend` library before running this script by using the command `pip install mlxtend`.
|
50
ai_security/ML_Fundamentals/ai_generated/DBSCAN.py
Normal file
50
ai_security/ML_Fundamentals/ai_generated/DBSCAN.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
Sure! Here's an example of a Python script that demonstrates the DBSCAN algorithm using the iris dataset from scikit-learn:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import datasets
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
|
# Load the iris dataset
|
||||||
|
iris = datasets.load_iris()
|
||||||
|
X = iris.data
|
||||||
|
|
||||||
|
# Scale the features
|
||||||
|
X = StandardScaler().fit_transform(X)
|
||||||
|
|
||||||
|
# Apply DBSCAN
|
||||||
|
dbscan = DBSCAN(eps=0.3, min_samples=5)
|
||||||
|
dbscan.fit(X)
|
||||||
|
|
||||||
|
# Get the predicted labels and number of clusters
|
||||||
|
labels = dbscan.labels_
|
||||||
|
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
|
||||||
|
n_noise = list(labels).count(-1)
|
||||||
|
|
||||||
|
# Plot the results
|
||||||
|
unique_labels = set(labels)
|
||||||
|
colors = [plt.cm.Spectral(each)
|
||||||
|
for each in np.linspace(0, 1, len(unique_labels))]
|
||||||
|
|
||||||
|
for k, col in zip(unique_labels, colors):
|
||||||
|
if k == -1:
|
||||||
|
# Black used for noise
|
||||||
|
col = [0, 0, 0, 1]
|
||||||
|
|
||||||
|
class_member_mask = (labels == k)
|
||||||
|
|
||||||
|
xy = X[class_member_mask & core_samples_mask]
|
||||||
|
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
|
||||||
|
markeredgecolor='k', markersize=6)
|
||||||
|
|
||||||
|
plt.title(f"Estimated number of clusters: {n_clusters}, Noise points: {n_noise}")
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we first load the iris dataset and normalize the features using `StandardScaler`. Then we apply DBSCAN with `eps=0.3` (maximum distance between samples to form a cluster) and `min_samples=5` (minimum number of samples in a cluster).
|
||||||
|
|
||||||
|
Next, we retrieve the predicted labels and calculate the number of clusters and noise points. Finally, we plot the data points, highlighting the clusters and noise using different colors.
|
||||||
|
|
||||||
|
Note that you may need to have scikit-learn and matplotlib installed for this script to work.
|
44
ai_security/ML_Fundamentals/ai_generated/Decision_Trees.py
Normal file
44
ai_security/ML_Fundamentals/ai_generated/Decision_Trees.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
Sure! Below is a Python script that demonstrates the concept of Decision Trees using the popular scikit-learn library.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Import necessary libraries
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
from sklearn import metrics
|
||||||
|
|
||||||
|
# Load the Iris dataset
|
||||||
|
data = load_iris()
|
||||||
|
X = data.data
|
||||||
|
y = data.target
|
||||||
|
|
||||||
|
# Split the dataset into training and testing sets
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Create a Decision Tree classifier
|
||||||
|
clf = DecisionTreeClassifier()
|
||||||
|
|
||||||
|
# Train the classifier on the training data
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# Make predictions on the testing data
|
||||||
|
y_pred = clf.predict(X_test)
|
||||||
|
|
||||||
|
# Evaluate the model
|
||||||
|
accuracy = metrics.accuracy_score(y_test, y_pred)
|
||||||
|
print("Accuracy:", accuracy)
|
||||||
|
|
||||||
|
# Visualize the Decision Tree
|
||||||
|
from sklearn import tree
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
plt.figure(figsize=(12, 8))
|
||||||
|
tree.plot_tree(clf, feature_names=data.feature_names, class_names=data.target_names, filled=True)
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we first import the necessary libraries: `load_iris` from `sklearn.datasets` to load the Iris dataset, `train_test_split` from `sklearn.model_selection` to split the dataset into training and testing sets, `DecisionTreeClassifier` from `sklearn.tree` to create the Decision Tree classifier, and `metrics` from `sklearn` to evaluate the model.
|
||||||
|
|
||||||
|
We load the Iris dataset and split it into training and testing sets using a 80:20 split. Then, we create a Decision Tree classifier and train it on the training data. After that, we make predictions on the testing data and evaluate the model using accuracy as the metric.
|
||||||
|
|
||||||
|
Finally, we visualize the Decision Tree using `tree.plot_tree` from `sklearn` and `matplotlib.pyplot`. The resulting tree is displayed using a figure.
|
|
@ -0,0 +1,35 @@
|
||||||
|
Sure! Here's a Python script that demonstrates the concept of Gaussian Mixture Models (GMM) using the scikit-learn library:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.mixture import GaussianMixture
|
||||||
|
|
||||||
|
# Generate random data
|
||||||
|
np.random.seed(0)
|
||||||
|
n_samples = 1000
|
||||||
|
n_components = 3
|
||||||
|
X = np.concatenate([
|
||||||
|
np.random.multivariate_normal([0, 0], [[1, 0], [0, 20]], int(0.1 * n_samples)),
|
||||||
|
np.random.multivariate_normal([-5, 0], [[1, 0], [0, 1]], int(0.3 * n_samples)),
|
||||||
|
np.random.multivariate_normal([5, 10], [[4, 0], [0, 4]], int(0.6 * n_samples))
|
||||||
|
])
|
||||||
|
|
||||||
|
# Fit the GMM model
|
||||||
|
gmm = GaussianMixture(n_components=n_components)
|
||||||
|
gmm.fit(X)
|
||||||
|
|
||||||
|
# Predict the labels for the data samples
|
||||||
|
y_pred = gmm.predict(X)
|
||||||
|
|
||||||
|
# Visualize the results
|
||||||
|
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis')
|
||||||
|
plt.xlabel('X')
|
||||||
|
plt.ylabel('Y')
|
||||||
|
plt.title('Gaussian Mixture Models')
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we first generate random data points from three different Gaussian distributions. We then use the `GaussianMixture` class from scikit-learn to fit a GMM model on this data. Next, we use the learned GMM model to predict the labels for each data sample. Finally, we visualize the results by plotting the data points colored according to their predicted labels.
|
||||||
|
|
||||||
|
Please make sure you have the scikit-learn and matplotlib libraries installed in your Python environment before running this script.
|
|
@ -0,0 +1,34 @@
|
||||||
|
Sure! Below is an example of a Python script demonstrating Gradient Boosting Machines (GBM) using the scikit-learn library:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Importing required libraries
|
||||||
|
from sklearn.datasets import make_classification
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.ensemble import GradientBoostingClassifier
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
# Generate a random classification dataset
|
||||||
|
X, y = make_classification(n_samples=100, random_state=42)
|
||||||
|
|
||||||
|
# Split the dataset into training and testing sets
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Create and fit the Gradient Boosting Classifier
|
||||||
|
gbm_model = GradientBoostingClassifier(random_state=42)
|
||||||
|
gbm_model.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# Predict the labels for the test set
|
||||||
|
y_pred = gbm_model.predict(X_test)
|
||||||
|
|
||||||
|
# Calculate the accuracy score
|
||||||
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
print("Accuracy:", accuracy)
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we first generate a random classification dataset using the `make_classification` function from scikit-learn. Then, we split the dataset into training and testing sets using the `train_test_split` function.
|
||||||
|
|
||||||
|
Next, we create an instance of the Gradient Boosting Classifier using `GradientBoostingClassifier` and fit the model to the training data using the `fit` method.
|
||||||
|
|
||||||
|
After fitting the model, we predict the labels for the test set using the `predict` method.
|
||||||
|
|
||||||
|
Finally, we calculate the accuracy score by comparing the predicted labels with the true labels and print it out.
|
|
@ -0,0 +1,52 @@
|
||||||
|
Sure! Here's a Python script that demonstrates Independent Component Analysis (ICA) using the scikit-learn library:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.decomposition import FastICA
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# Generate data
|
||||||
|
np.random.seed(0)
|
||||||
|
n_samples = 2000
|
||||||
|
time = np.linspace(0, 8, n_samples)
|
||||||
|
|
||||||
|
s1 = np.sin(2 * time) # Signal 1: sinusoidal signal
|
||||||
|
s2 = np.sign(np.sin(3 * time)) # Signal 2: square signal
|
||||||
|
|
||||||
|
S = np.c_[s1, s2]
|
||||||
|
S += 0.2 * np.random.normal(size=S.shape) # Add noise
|
||||||
|
|
||||||
|
# Mix data
|
||||||
|
A = np.array([[1, 1], [0.5, 2]]) # Mixing matrix
|
||||||
|
X = np.dot(S, A.T) # Generate observations
|
||||||
|
|
||||||
|
# ICA
|
||||||
|
ica = FastICA(n_components=2)
|
||||||
|
S_ = ica.fit_transform(X) # Reconstruct signals
|
||||||
|
A_ = ica.mixing_ # Estimated mixing matrix
|
||||||
|
|
||||||
|
# Plot results
|
||||||
|
plt.figure(figsize=(8, 6))
|
||||||
|
|
||||||
|
models = [X, S, S_]
|
||||||
|
names = ['Observations (mixed signals)',
|
||||||
|
'True Sources',
|
||||||
|
'ICA estimated sources']
|
||||||
|
|
||||||
|
colors = ['red', 'steelblue']
|
||||||
|
|
||||||
|
for i, (model, name) in enumerate(zip(models, names), 1):
|
||||||
|
plt.subplot(3, 1, i)
|
||||||
|
plt.title(name)
|
||||||
|
for sig, color in zip(model.T, colors):
|
||||||
|
plt.plot(sig, color=color)
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
This script demonstrates ICA by generating two mixed signals, `s1` and `s2`, adding noise, and then applying ICA to separate the original signals. The original signals are represented by the `S` matrix, and the observed mixed signals are stored in the `X` matrix. The `FastICA` class from scikit-learn is used to perform ICA.
|
||||||
|
|
||||||
|
The script plots three subplots: the observed mixed signals, the true sources, and the estimated sources achieved through ICA.
|
||||||
|
|
||||||
|
Make sure to install required dependencies (scikit-learn, numpy, matplotlib) before running the script.
|
|
@ -0,0 +1,45 @@
|
||||||
|
Sure! Here's an example Python script that demonstrates Latent Dirichlet Allocation (LDA) using the scikit-learn library:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||||||
|
from sklearn.decomposition import LatentDirichletAllocation
|
||||||
|
|
||||||
|
# Sample data
|
||||||
|
documents = [
|
||||||
|
'apple orange apple orange',
|
||||||
|
'banana apple orange',
|
||||||
|
'orange banana apple',
|
||||||
|
'apple apple apple',
|
||||||
|
'orange orange',
|
||||||
|
]
|
||||||
|
|
||||||
|
# Tokenize and vectorize the documents
|
||||||
|
vectorizer = CountVectorizer()
|
||||||
|
X = vectorizer.fit_transform(documents)
|
||||||
|
|
||||||
|
# Apply LDA
|
||||||
|
lda_model = LatentDirichletAllocation(n_components=2, random_state=0)
|
||||||
|
lda_model.fit(X)
|
||||||
|
|
||||||
|
# Get the topics and their corresponding word probabilities
|
||||||
|
feature_names = vectorizer.get_feature_names_out()
|
||||||
|
topics = lda_model.components_
|
||||||
|
|
||||||
|
# Display the top words for each topic
|
||||||
|
for topic_idx, topic in enumerate(topics):
|
||||||
|
top_words = [feature_names[i] for i in topic.argsort()[:-4:-1]]
|
||||||
|
print(f"Top words for topic {topic_idx+1}: {', '.join(top_words)}")
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we start by importing the necessary libraries: pandas for data manipulation, CountVectorizer for tokenization and vectorization of text documents, and LatentDirichletAllocation from sklearn.decomposition for performing LDA.
|
||||||
|
|
||||||
|
Next, we define a sample dataset of text documents. You can replace this with your own dataset.
|
||||||
|
|
||||||
|
We then use the CountVectorizer to transform the documents into a matrix representation with token counts. This matrix is then passed to the LatentDirichletAllocation model for training. In this example, we have specified `n_components=2` to indicate that we want to extract two topics.
|
||||||
|
|
||||||
|
After fitting the LDA model, we retrieve the feature names and the LDA model's components (topics and word probabilities).
|
||||||
|
|
||||||
|
Finally, we loop through each topic to display the top words with the highest probabilities for each topic.
|
||||||
|
|
||||||
|
Note that the results may vary each time you run the script due to the random nature of LDA.
|
|
@ -0,0 +1,121 @@
|
||||||
|
Sure! Here's a Python script that demonstrates the Monte Carlo Tree Search (MCTS) algorithm:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import random
|
||||||
|
|
||||||
|
class Node:
|
||||||
|
def __init__(self, state, parent=None):
|
||||||
|
self.state = state
|
||||||
|
self.parent = parent
|
||||||
|
self.children = []
|
||||||
|
self.visits = 0
|
||||||
|
self.rewards = 0
|
||||||
|
|
||||||
|
def expand(self):
|
||||||
|
possible_moves = self.state.get_possible_moves()
|
||||||
|
for move in possible_moves:
|
||||||
|
new_state = self.state.make_move(move)
|
||||||
|
new_node = Node(new_state, parent=self)
|
||||||
|
self.children.append(new_node)
|
||||||
|
|
||||||
|
def select(self):
|
||||||
|
selected_child = max(self.children, key=lambda child: child.get_ucb_score())
|
||||||
|
return selected_child
|
||||||
|
|
||||||
|
def simulate(self):
|
||||||
|
current_state = self.state
|
||||||
|
while not current_state.is_terminal():
|
||||||
|
random_move = random.choice(current_state.get_possible_moves())
|
||||||
|
current_state = current_state.make_move(random_move)
|
||||||
|
return current_state.get_reward()
|
||||||
|
|
||||||
|
def backpropagate(self, reward):
|
||||||
|
self.visits += 1
|
||||||
|
self.rewards += reward
|
||||||
|
if self.parent:
|
||||||
|
self.parent.backpropagate(reward)
|
||||||
|
|
||||||
|
def get_ucb_score(self):
|
||||||
|
exploration_factor = 1.414 # Adjust this for exploration vs exploitation trade-off
|
||||||
|
exploitation_score = self.rewards / self.visits
|
||||||
|
exploration_score = np.sqrt(np.log(self.parent.visits) / self.visits)
|
||||||
|
return exploitation_score + exploration_factor * exploration_score
|
||||||
|
|
||||||
|
|
||||||
|
class State:
|
||||||
|
def __init__(self):
|
||||||
|
self.board = np.zeros((3, 3))
|
||||||
|
self.current_player = 1
|
||||||
|
|
||||||
|
def get_possible_moves(self):
|
||||||
|
return [(i, j) for i in range(3) for j in range(3) if self.board[i][j] == 0]
|
||||||
|
|
||||||
|
def make_move(self, move):
|
||||||
|
new_state = State()
|
||||||
|
new_state.board = np.copy(self.board)
|
||||||
|
new_state.current_player = -self.current_player
|
||||||
|
new_state.board[move[0]][move[1]] = self.current_player
|
||||||
|
return new_state
|
||||||
|
|
||||||
|
def is_terminal(self):
|
||||||
|
return np.any(np.sum(self.board, axis=1) == 3) or np.any(np.sum(self.board, axis=0) == 3) \
|
||||||
|
or np.trace(self.board) == 3 or np.trace(np.fliplr(self.board)) == 3 \
|
||||||
|
or np.any(np.sum(self.board, axis=1) == -3) or np.any(np.sum(self.board, axis=0) == -3) \
|
||||||
|
or np.trace(self.board) == -3 or np.trace(np.fliplr(self.board)) == -3 \
|
||||||
|
or len(self.get_possible_moves()) == 0
|
||||||
|
|
||||||
|
def get_reward(self):
|
||||||
|
if np.any(np.sum(self.board, axis=1) == 3) or np.any(np.sum(self.board, axis=0) == 3) \
|
||||||
|
or np.trace(self.board) == 3 or np.trace(np.fliplr(self.board)) == 3:
|
||||||
|
return 1
|
||||||
|
elif np.any(np.sum(self.board, axis=1) == -3) or np.any(np.sum(self.board, axis=0) == -3) \
|
||||||
|
or np.trace(self.board) == -3 or np.trace(np.fliplr(self.board)) == -3:
|
||||||
|
return -1
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def monte_carlo_tree_search(initial_state, iterations):
|
||||||
|
root = Node(initial_state)
|
||||||
|
|
||||||
|
for _ in range(iterations):
|
||||||
|
# Selection
|
||||||
|
selected_node = root
|
||||||
|
while selected_node.children:
|
||||||
|
selected_node = selected_node.select()
|
||||||
|
|
||||||
|
# Expansion
|
||||||
|
if not selected_node.state.is_terminal():
|
||||||
|
selected_node.expand()
|
||||||
|
selected_node = random.choice(selected_node.children)
|
||||||
|
|
||||||
|
# Simulation
|
||||||
|
reward = selected_node.simulate()
|
||||||
|
|
||||||
|
# Backpropagation
|
||||||
|
selected_node.backpropagate(reward)
|
||||||
|
|
||||||
|
best_child = max(root.children, key=lambda child: child.visits)
|
||||||
|
return best_child.state.board
|
||||||
|
|
||||||
|
|
||||||
|
# Test the Monte Carlo Tree Search algorithm on Tic-Tac-Toe game
|
||||||
|
|
||||||
|
initial_state = State()
|
||||||
|
|
||||||
|
best_move = monte_carlo_tree_search(initial_state, iterations=10000)
|
||||||
|
|
||||||
|
print("Best move found by Monte Carlo Tree Search:")
|
||||||
|
print(best_move)
|
||||||
|
```
|
||||||
|
|
||||||
|
In the above script, we define a `Node` class to represent each state in the game and a `State` class to maintain the current game state. The `monte_carlo_tree_search` function implements the MCTS algorithm and returns the best move found after the specified number of iterations.
|
||||||
|
|
||||||
|
In the Tic-Tac-Toe game example, each state is represented by a 3x3 game board. The `is_terminal` method checks if the game is over, and the `get_reward` method assigns rewards to terminal states (-1 for loss, 1 for win, 0 for draw). The `get_possible_moves` method returns all valid moves for the current state, and the `make_move` method creates a new state after making a move.
|
||||||
|
|
||||||
|
During each iteration of the MCTS algorithm, the `selection` step selects the most promising child node by applying the Upper Confidence Bound (UCB) formula. If the selected node is not terminal, the `expansion` step creates child nodes by simulating all possible moves. The `simulation` step randomly plays out the game from the selected child node until a terminal state is reached. Finally, the `backpropagation` step updates all nodes in the selected path with the simulation result. The process is repeated for the specified number of iterations.
|
||||||
|
|
||||||
|
After running the MCTS algorithm, the best move is determined by selecting the child node with the highest visit count from the root node. The resulting board configuration is returned as the best move.
|
||||||
|
|
||||||
|
Feel free to adjust the number of iterations and exploration factor to see different results.
|
42
ai_security/ML_Fundamentals/ai_generated/Naïve_Bayes.py
Normal file
42
ai_security/ML_Fundamentals/ai_generated/Naïve_Bayes.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
Sure! The following is a python script that demonstrates the Naïve Bayes algorithm using the famous Iris dataset:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.naive_bayes import GaussianNB
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
# Load the Iris dataset
|
||||||
|
iris = load_iris()
|
||||||
|
|
||||||
|
# Split the dataset into training and testing sets
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Create an instance of the Naïve Bayes classifier
|
||||||
|
classifier = GaussianNB()
|
||||||
|
|
||||||
|
# Train the classifier using the training data
|
||||||
|
classifier.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# Make predictions on the testing data
|
||||||
|
y_pred = classifier.predict(X_test)
|
||||||
|
|
||||||
|
# Calculate accuracy of the model
|
||||||
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
print("Accuracy:", accuracy)
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we start by importing the necessary libraries: `numpy` for numerical operations, `sklearn.datasets` to load the Iris dataset, `sklearn.model_selection` to split the data into training and testing sets, `sklearn.naive_bayes` for the Naïve Bayes classifier, and `sklearn.metrics` for calculating accuracy.
|
||||||
|
|
||||||
|
Next, we load the Iris dataset using `load_iris()` function. Then we split the data into training and testing sets using `train_test_split()` function, where `test_size=0.2` indicates that 20% of the data will be used for testing.
|
||||||
|
|
||||||
|
We create an instance of the Naïve Bayes classifier using `GaussianNB()`. This classifier assumes that features follow a Gaussian distribution. If your data doesn't meet this assumption, you can explore other variants like multinomial or Bernoulli Naïve Bayes.
|
||||||
|
|
||||||
|
We train the classifier using the training data by calling the `fit()` method and passing in the features (X_train) and corresponding labels (y_train).
|
||||||
|
|
||||||
|
Then, we make predictions on the testing data using the `predict()` method and passing in the features of the test set (X_test).
|
||||||
|
|
||||||
|
Finally, we calculate the accuracy of the classifier by comparing the predicted labels with the true labels from the testing set using the `accuracy_score()` function.
|
||||||
|
|
||||||
|
Hope this helps to demonstrate the Naïve Bayes algorithm in python!
|
63
ai_security/ML_Fundamentals/ai_generated/Neural_Networks.py
Normal file
63
ai_security/ML_Fundamentals/ai_generated/Neural_Networks.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# Create a simple neural network with one input layer, one hidden layer, and one output layer
|
||||||
|
class NeuralNetwork:
|
||||||
|
def __init__(self):
|
||||||
|
self.weights1 = np.random.rand(3, 4) # weight matrix between input and hidden layer
|
||||||
|
self.weights2 = np.random.rand(4, 1) # weight matrix between hidden and output layer
|
||||||
|
self.bias1 = np.random.rand(1, 4) # bias matrix for hidden layer
|
||||||
|
self.bias2 = np.random.rand(1, 1) # bias matrix for output layer
|
||||||
|
|
||||||
|
def sigmoid(self, x):
|
||||||
|
# Sigmoid activation function
|
||||||
|
return 1 / (1 + np.exp(-x))
|
||||||
|
|
||||||
|
def forward_propagation(self, X):
|
||||||
|
# Perform forward propagation
|
||||||
|
self.hidden_layer = self.sigmoid(np.dot(X, self.weights1) + self.bias1) # calculate hidden layer activations
|
||||||
|
self.output_layer = self.sigmoid(np.dot(self.hidden_layer, self.weights2) + self.bias2) # calculate output layer activations
|
||||||
|
return self.output_layer
|
||||||
|
|
||||||
|
def backward_propagation(self, X, y, output):
|
||||||
|
# Perform backward propagation to update weights and biases
|
||||||
|
self.error = y - output # calculate error
|
||||||
|
self.delta_output = self.error * (output * (1 - output)) # calculate output gradient
|
||||||
|
self.delta_hidden = np.dot(self.delta_output, self.weights2.T) * (self.hidden_layer * (1 - self.hidden_layer)) # calculate hidden gradient
|
||||||
|
self.weights2 += np.dot(self.hidden_layer.T, self.delta_output) # update weights between hidden and output layer
|
||||||
|
self.weights1 += np.dot(X.T, self.delta_hidden) # update weights between input and hidden layer
|
||||||
|
self.bias2 += np.sum(self.delta_output, axis=0) # update bias for output layer
|
||||||
|
self.bias1 += np.sum(self.delta_hidden, axis=0) # update bias for hidden layer
|
||||||
|
|
||||||
|
def train(self, X, y, epochs):
|
||||||
|
# Train the neural network
|
||||||
|
for _ in range(epochs):
|
||||||
|
output = self.forward_propagation(X)
|
||||||
|
self.backward_propagation(X, y, output)
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
# Make predictions
|
||||||
|
return self.forward_propagation(X)
|
||||||
|
|
||||||
|
# Create a sample dataset for XOR gate
|
||||||
|
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
|
||||||
|
y = np.array([[0], [1], [1], [0]])
|
||||||
|
|
||||||
|
# Create and train the neural network
|
||||||
|
nn = NeuralNetwork()
|
||||||
|
nn.train(X, y, epochs=10000)
|
||||||
|
|
||||||
|
# Make predictions on the same dataset
|
||||||
|
predictions = nn.predict(X)
|
||||||
|
|
||||||
|
# Print the predictions
|
||||||
|
print("Predictions:")
|
||||||
|
for i in range(len(predictions)):
|
||||||
|
print(f"Input: {X[i]}, Predicted Output: {predictions[i]}")
|
||||||
|
|
||||||
|
# Plot the predictions
|
||||||
|
plt.scatter(X[:, 0], X[:, 1], c=predictions.flatten(), cmap='viridis')
|
||||||
|
plt.xlabel("Input 1")
|
||||||
|
plt.ylabel("Input 2")
|
||||||
|
plt.title("Neural Network Predictions for XOR Gate")
|
||||||
|
plt.show()
|
91
ai_security/ML_Fundamentals/ai_generated/Policy_Gradients.py
Normal file
91
ai_security/ML_Fundamentals/ai_generated/Policy_Gradients.py
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
Sure! Here's an example of a Python script that demonstrates the concept of Policy Gradients using the OpenAI Gym environment and a simple neural network:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense
|
||||||
|
|
||||||
|
# Create the policy network
|
||||||
|
def build_policy_network(input_shape, output_units):
|
||||||
|
model = Sequential()
|
||||||
|
model.add(Dense(24, activation='relu', input_shape=input_shape))
|
||||||
|
model.add(Dense(24, activation='relu'))
|
||||||
|
model.add(Dense(output_units, activation='softmax'))
|
||||||
|
model.compile(optimizer='adam', loss='categorical_crossentropy')
|
||||||
|
return model
|
||||||
|
|
||||||
|
def choose_action(state, model):
|
||||||
|
state = np.reshape(state, [1, input_shape[0]])
|
||||||
|
prob_dist = model.predict(state).flatten()
|
||||||
|
action = np.random.choice(num_actions, 1, p=prob_dist)[0]
|
||||||
|
return action
|
||||||
|
|
||||||
|
def discount_rewards(rewards, gamma):
|
||||||
|
discounted_rewards = np.zeros_like(rewards)
|
||||||
|
running_sum = 0
|
||||||
|
for t in reversed(range(len(rewards))):
|
||||||
|
running_sum = running_sum * gamma + rewards[t]
|
||||||
|
discounted_rewards[t] = running_sum
|
||||||
|
return discounted_rewards
|
||||||
|
|
||||||
|
# Set hyperparameters
|
||||||
|
learning_rate = 0.01
|
||||||
|
num_episodes = 1000
|
||||||
|
gamma = 0.99
|
||||||
|
|
||||||
|
# Create the environment
|
||||||
|
env = gym.make('CartPole-v0')
|
||||||
|
input_shape = env.observation_space.shape
|
||||||
|
num_actions = env.action_space.n
|
||||||
|
|
||||||
|
# Build the policy network and initialize weights
|
||||||
|
policy_network = build_policy_network(input_shape, num_actions)
|
||||||
|
|
||||||
|
# Start training
|
||||||
|
for episode in range(num_episodes):
|
||||||
|
state = env.reset()
|
||||||
|
done = False
|
||||||
|
episode_rewards = []
|
||||||
|
episode_gradients = []
|
||||||
|
|
||||||
|
while not done:
|
||||||
|
# Choose action based on the policy network
|
||||||
|
action = choose_action(state, policy_network)
|
||||||
|
|
||||||
|
# Take the chosen action and observe the next state and reward
|
||||||
|
next_state, reward, done, _ = env.step(action)
|
||||||
|
|
||||||
|
# Store the reward
|
||||||
|
episode_rewards.append(reward)
|
||||||
|
|
||||||
|
# Compute the one-hot encoded action
|
||||||
|
action_onehot = np.zeros(num_actions)
|
||||||
|
action_onehot[action] = 1
|
||||||
|
|
||||||
|
# Compute the gradient of the policy network's output w.r.t. the action taken
|
||||||
|
with tf.GradientTape() as tape:
|
||||||
|
logits = policy_network.predict(np.expand_dims(state, axis=0))
|
||||||
|
loss = tf.reduce_sum(tf.multiply(logits, tf.convert_to_tensor(action_onehot, dtype=tf.float32)))
|
||||||
|
|
||||||
|
# Store the gradients
|
||||||
|
episode_gradients.append(tape.gradient(loss, policy_network.trainable_variables))
|
||||||
|
|
||||||
|
state = next_state
|
||||||
|
|
||||||
|
# Update the policy network
|
||||||
|
rewards = discount_rewards(episode_rewards, gamma)
|
||||||
|
for i in range(len(episode_gradients)):
|
||||||
|
grads = episode_gradients[i]
|
||||||
|
for j in range(len(grads)):
|
||||||
|
policy_network.trainable_variables[j].assign_sub(learning_rate * grads[j] * rewards[i])
|
||||||
|
|
||||||
|
if episode % 100 == 0:
|
||||||
|
print("Episode {}: Average reward = {}".format(episode, np.mean(episode_rewards)))
|
||||||
|
```
|
||||||
|
|
||||||
|
This script uses the `gym` package to create the CartPole-v0 environment, which is a classic reinforcement learning problem. It then builds a simple neural network as the policy network, with two hidden layers and a softmax output layer for the action probabilities. It implements the `choose_action` function to sample actions based on the probabilities predicted by the policy network.
|
||||||
|
|
||||||
|
During training, the script collects rewards and gradients for each episode. It then applies the policy gradient update rule, computing the discounted rewards and updating the policy network weights accordingly. Finally, it prints the average reward for every 100 episodes.
|
||||||
|
|
||||||
|
Note that this script is a simplified example and may not converge to the optimal policy. Policy Gradients typically require careful tuning of hyperparameters, selecting suitable network architectures, and using more advanced techniques like baseline estimation and advantage normalization to improve the training stability and performance.
|
|
@ -0,0 +1,36 @@
|
||||||
|
Certainly! Here's a Python script that demonstrates Principal Component Analysis (PCA):
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
|
|
||||||
|
# Generate some random 2D data points
|
||||||
|
np.random.seed(0)
|
||||||
|
n_samples = 100
|
||||||
|
X = np.random.randn(n_samples, 2) * 2
|
||||||
|
|
||||||
|
# Apply PCA
|
||||||
|
pca = PCA(n_components=2)
|
||||||
|
X_pca = pca.fit_transform(X)
|
||||||
|
|
||||||
|
# Plot the original data
|
||||||
|
plt.figure(figsize=(8, 4))
|
||||||
|
plt.subplot(1, 2, 1)
|
||||||
|
plt.scatter(X[:, 0], X[:, 1], alpha=0.5)
|
||||||
|
plt.title('Original Data')
|
||||||
|
plt.xlabel('X')
|
||||||
|
plt.ylabel('Y')
|
||||||
|
|
||||||
|
# Plot the PCA-transformed data
|
||||||
|
plt.subplot(1, 2, 2)
|
||||||
|
plt.scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.5)
|
||||||
|
plt.title('PCA Transformed Data')
|
||||||
|
plt.xlabel('Principal Component 1')
|
||||||
|
plt.ylabel('Principal Component 2')
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we generate a random 2D dataset using `np.random.randn()`. We then apply PCA using `sklearn.decomposition.PCA` and transform the data using `fit_transform()`. Finally, we visualize the original data and the PCA-transformed data in separate subplots using `matplotlib.pyplot.scatter()`.
|
|
@ -0,0 +1,160 @@
|
||||||
|
Sure! Here's a Python script that demonstrates Proximal Policy Optimization (PPO) using the OpenAI Gym's CartPole environment:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
from tensorflow.keras import layers
|
||||||
|
|
||||||
|
# Set up the CartPole environment
|
||||||
|
env = gym.make("CartPole-v1")
|
||||||
|
num_states = env.observation_space.shape[0]
|
||||||
|
num_actions = env.action_space.n
|
||||||
|
|
||||||
|
# PPO Agent
|
||||||
|
class PPOAgent:
|
||||||
|
def __init__(self, num_states, num_actions):
|
||||||
|
self.gamma = 0.99 # Discount factor
|
||||||
|
self.epsilon = 0.2 # Clipping factor
|
||||||
|
self.actor_lr = 0.0003 # Actor learning rate
|
||||||
|
self.critic_lr = 0.001 # Critic learning rate
|
||||||
|
|
||||||
|
self.actor = self.build_actor()
|
||||||
|
self.critic = self.build_critic()
|
||||||
|
|
||||||
|
def build_actor(self):
|
||||||
|
inputs = layers.Input(shape=(num_states,))
|
||||||
|
hidden = layers.Dense(128, activation="relu")(inputs)
|
||||||
|
action_probs = layers.Dense(num_actions, activation="softmax")(hidden)
|
||||||
|
|
||||||
|
model = keras.Model(inputs=inputs, outputs=action_probs)
|
||||||
|
optimizer = tf.keras.optimizers.Adam(learning_rate=self.actor_lr)
|
||||||
|
model.compile(optimizer=optimizer, loss="categorical_crossentropy")
|
||||||
|
return model
|
||||||
|
|
||||||
|
def build_critic(self):
|
||||||
|
inputs = layers.Input(shape=(num_states,))
|
||||||
|
hidden = layers.Dense(128, activation="relu")(inputs)
|
||||||
|
value = layers.Dense(1, activation="linear")(hidden)
|
||||||
|
|
||||||
|
model = keras.Model(inputs=inputs, outputs=value)
|
||||||
|
optimizer = tf.keras.optimizers.Adam(learning_rate=self.critic_lr)
|
||||||
|
model.compile(optimizer=optimizer, loss="mean_squared_error")
|
||||||
|
return model
|
||||||
|
|
||||||
|
def choose_action(self, state):
|
||||||
|
state = np.expand_dims(state, axis=0)
|
||||||
|
action_probs = self.actor.predict(state).flatten()
|
||||||
|
|
||||||
|
# Sample an action from the action probability distribution
|
||||||
|
action = np.random.choice(num_actions, 1, p=action_probs)[0]
|
||||||
|
return action
|
||||||
|
|
||||||
|
def compute_returns(self, rewards, dones, values):
|
||||||
|
returns = np.zeros_like(rewards)
|
||||||
|
discounted_sum = 0
|
||||||
|
for i in reversed(range(len(rewards))):
|
||||||
|
if dones[i]:
|
||||||
|
discounted_sum = 0
|
||||||
|
discounted_sum = rewards[i] + self.gamma * discounted_sum
|
||||||
|
returns[i] = discounted_sum
|
||||||
|
|
||||||
|
advantages = returns - values
|
||||||
|
advantages = (advantages - np.mean(advantages)) / (np.std(advantages) + 1e-10)
|
||||||
|
return returns, advantages
|
||||||
|
|
||||||
|
def train(self, old_states, actions, rewards, dones, values):
|
||||||
|
returns, advantages = self.compute_returns(rewards, dones, values)
|
||||||
|
|
||||||
|
# Convert inputs to numpy arrays for better indexing
|
||||||
|
old_states = np.array(old_states)
|
||||||
|
actions = np.array(actions)
|
||||||
|
returns = np.array(returns)
|
||||||
|
advantages = np.array(advantages)
|
||||||
|
|
||||||
|
num_samples = len(old_states)
|
||||||
|
|
||||||
|
# Actor training
|
||||||
|
actions_one_hot = np.eye(num_actions)[actions]
|
||||||
|
old_action_probs = self.actor.predict(old_states)
|
||||||
|
old_action_probs = np.clip(old_action_probs, 1e-10, 1.0)
|
||||||
|
old_action_probs = old_action_probs * actions_one_hot
|
||||||
|
old_action_probs = np.sum(old_action_probs, axis=1)
|
||||||
|
|
||||||
|
with tf.GradientTape() as tape:
|
||||||
|
new_action_probs = self.actor(old_states, training=True)
|
||||||
|
new_action_probs = np.clip(new_action_probs, 1e-10, 1.0)
|
||||||
|
new_action_probs = new_action_probs * actions_one_hot
|
||||||
|
new_action_probs = np.sum(new_action_probs, axis=1)
|
||||||
|
|
||||||
|
ratio = new_action_probs / old_action_probs
|
||||||
|
|
||||||
|
surrogate1 = ratio * advantages
|
||||||
|
surrogate2 = np.clip(ratio, 1 - self.epsilon, 1 + self.epsilon) * advantages
|
||||||
|
actor_loss = -tf.reduce_mean(tf.minimum(surrogate1, surrogate2))
|
||||||
|
|
||||||
|
actor_grads = tape.gradient(actor_loss, self.actor.trainable_variables)
|
||||||
|
self.actor.optimizer.apply_gradients(zip(actor_grads, self.actor.trainable_variables))
|
||||||
|
|
||||||
|
# Critic training
|
||||||
|
with tf.GradientTape() as tape:
|
||||||
|
values_pred = self.critic(old_states, training=True)
|
||||||
|
critic_loss = tf.reduce_mean(tf.square(returns - values_pred))
|
||||||
|
|
||||||
|
critic_grads = tape.gradient(critic_loss, self.critic.trainable_variables)
|
||||||
|
self.critic.optimizer.apply_gradients(zip(critic_grads, self.critic.trainable_variables))
|
||||||
|
|
||||||
|
# Initialize PPO agent
|
||||||
|
agent = PPOAgent(num_states, num_actions)
|
||||||
|
|
||||||
|
# Training loop
|
||||||
|
num_episodes = 500
|
||||||
|
for episode in range(num_episodes):
|
||||||
|
state = env.reset()
|
||||||
|
done = False
|
||||||
|
|
||||||
|
old_states = []
|
||||||
|
actions = []
|
||||||
|
rewards = []
|
||||||
|
dones = []
|
||||||
|
values = []
|
||||||
|
|
||||||
|
while not done:
|
||||||
|
# Collect data
|
||||||
|
action = agent.choose_action(state)
|
||||||
|
next_state, reward, done, _ = env.step(action)
|
||||||
|
|
||||||
|
old_states.append(state)
|
||||||
|
actions.append(action)
|
||||||
|
rewards.append(reward)
|
||||||
|
dones.append(done)
|
||||||
|
values.append(agent.critic.predict(np.expand_dims(state, axis=0)))
|
||||||
|
|
||||||
|
state = next_state
|
||||||
|
|
||||||
|
# Train the agent
|
||||||
|
agent.train(old_states, actions, rewards, dones, values)
|
||||||
|
|
||||||
|
# Display rewards every 10 episodes
|
||||||
|
if episode % 10 == 0:
|
||||||
|
total_rewards = sum(rewards)
|
||||||
|
print(f"Episode: {episode}, Rewards: {total_rewards}")
|
||||||
|
|
||||||
|
# Test the trained agent
|
||||||
|
state = env.reset()
|
||||||
|
done = False
|
||||||
|
total_rewards = 0
|
||||||
|
|
||||||
|
while not done:
|
||||||
|
env.render()
|
||||||
|
action = agent.choose_action(state)
|
||||||
|
state, reward, done, _ = env.step(action)
|
||||||
|
total_rewards += reward
|
||||||
|
|
||||||
|
print(f"Total Rewards: {total_rewards}")
|
||||||
|
|
||||||
|
env.close()
|
||||||
|
```
|
||||||
|
|
||||||
|
Make sure you have the OpenAI Gym package installed (`pip install gym`) and run the script. It will train a PPO agent on the CartPole-v1 environment and then test the trained agent. You should see the total rewards increasing as the agent learns to balance the pole on the cart.
|
34
ai_security/ML_Fundamentals/ai_generated/Random_Forests.py
Normal file
34
ai_security/ML_Fundamentals/ai_generated/Random_Forests.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
Sure! Here's a python script that demonstrates the concept of Random Forests using the famous Iris dataset.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Importing the required libraries
|
||||||
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
# Loading the Iris dataset
|
||||||
|
iris = load_iris()
|
||||||
|
X = iris.data
|
||||||
|
y = iris.target
|
||||||
|
|
||||||
|
# Splitting the dataset into training and testing sets
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Creating a Random Forest classifier
|
||||||
|
rf_classifier = RandomForestClassifier(n_estimators=100)
|
||||||
|
|
||||||
|
# Training the classifier
|
||||||
|
rf_classifier.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# Predicting on the test set
|
||||||
|
y_pred = rf_classifier.predict(X_test)
|
||||||
|
|
||||||
|
# Calculating accuracy
|
||||||
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
print("Accuracy:", accuracy)
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we import the necessary libraries, load the Iris dataset, and split it into training and testing sets. Then, we create a Random Forest classifier with 100 decision trees (`n_estimators=100`) and train it on the training set. Finally, we predict the target variable for the test set and calculate the accuracy of the classifier.
|
||||||
|
|
||||||
|
Note that you may need to install scikit-learn (`pip install scikit-learn`) if you don't have it already installed.
|
112
ai_security/ML_Fundamentals/ai_generated/SARSA.py
Normal file
112
ai_security/ML_Fundamentals/ai_generated/SARSA.py
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
Sure! Below is an example script that demonstrates the SARSA (State-Action-Reward-State-Action) algorithm using a simple grid world environment:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Define the grid world environment
|
||||||
|
grid = [
|
||||||
|
[0, 0, 0, 0],
|
||||||
|
[0, -1, 0, -1],
|
||||||
|
[0, 0, 0, -1],
|
||||||
|
[0, -1, 0, 1]
|
||||||
|
]
|
||||||
|
|
||||||
|
# Initialize Q-table
|
||||||
|
Q = np.zeros((4, 4, 4)) # Q(s, a) - 4x4 grid, 4 actions (up, down, left, right)
|
||||||
|
|
||||||
|
# Define epsilon-greedy exploration
|
||||||
|
epsilon = 0.1
|
||||||
|
|
||||||
|
# Define learning parameters
|
||||||
|
alpha = 0.1 # Learning rate
|
||||||
|
gamma = 0.9 # Discount factor
|
||||||
|
|
||||||
|
# Define action mapping
|
||||||
|
actions = ['up', 'down', 'left', 'right']
|
||||||
|
|
||||||
|
# Get next action using epsilon-greedy exploration
|
||||||
|
def get_action(state):
|
||||||
|
if np.random.rand() < epsilon:
|
||||||
|
action = np.random.choice(actions)
|
||||||
|
else:
|
||||||
|
action = actions[np.argmax(Q[state[0], state[1]])]
|
||||||
|
return action
|
||||||
|
|
||||||
|
# Update Q-values using SARSA algorithm
|
||||||
|
def update_q_values(state, action, reward, next_state, next_action):
|
||||||
|
Q[state[0], state[1], actions.index(action)] += alpha * (
|
||||||
|
reward + gamma * Q[next_state[0], next_state[1], actions.index(next_action)] -
|
||||||
|
Q[state[0], state[1], actions.index(action)])
|
||||||
|
|
||||||
|
# Train the agent
|
||||||
|
def train_agent():
|
||||||
|
num_episodes = 1000
|
||||||
|
|
||||||
|
for episode in range(num_episodes):
|
||||||
|
state = [3, 0] # Start state
|
||||||
|
action = get_action(state)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Perform selected action
|
||||||
|
if action == 'up':
|
||||||
|
next_state = [state[0] - 1, state[1]]
|
||||||
|
elif action == 'down':
|
||||||
|
next_state = [state[0] + 1, state[1]]
|
||||||
|
elif action == 'left':
|
||||||
|
next_state = [state[0], state[1] - 1]
|
||||||
|
else:
|
||||||
|
next_state = [state[0], state[1] + 1]
|
||||||
|
|
||||||
|
# Check if next state is valid
|
||||||
|
if next_state[0] < 0 or next_state[0] >= 4 or next_state[1] < 0 or next_state[1] >= 4:
|
||||||
|
next_state = state
|
||||||
|
|
||||||
|
# Get next action using epsilon-greedy exploration
|
||||||
|
next_action = get_action(next_state)
|
||||||
|
|
||||||
|
# Update Q-values
|
||||||
|
update_q_values(state, action, grid[next_state[0]][next_state[1]], next_state, next_action)
|
||||||
|
|
||||||
|
# Update current state and action
|
||||||
|
state = next_state
|
||||||
|
action = next_action
|
||||||
|
|
||||||
|
# Break if goal state reached
|
||||||
|
if grid[state[0]][state[1]] == 1:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Test the trained agent
|
||||||
|
def test_agent():
|
||||||
|
state = [3, 0] # Start state
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Choose the best action based on Q-values
|
||||||
|
action = actions[np.argmax(Q[state[0], state[1]])]
|
||||||
|
|
||||||
|
# Perform selected action
|
||||||
|
if action == 'up':
|
||||||
|
next_state = [state[0] - 1, state[1]]
|
||||||
|
elif action == 'down':
|
||||||
|
next_state = [state[0] + 1, state[1]]
|
||||||
|
elif action == 'left':
|
||||||
|
next_state = [state[0], state[1] - 1]
|
||||||
|
else:
|
||||||
|
next_state = [state[0], state[1] + 1]
|
||||||
|
|
||||||
|
# Print the current state and action taken
|
||||||
|
print(f"Current state: {state}, Action: {action}")
|
||||||
|
|
||||||
|
# Update current state
|
||||||
|
state = next_state
|
||||||
|
|
||||||
|
# Break if goal state reached
|
||||||
|
if grid[state[0]][state[1]] == 1:
|
||||||
|
print("Reached the goal!")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Train and test the agent
|
||||||
|
train_agent()
|
||||||
|
test_agent()
|
||||||
|
```
|
||||||
|
|
||||||
|
This script demonstrates SARSA algorithm in a simple grid world environment, where the agent has to navigate from the starting state `[3, 0]` to the goal state `[3, 3]` while avoiding obstacles represented by `-1`. The agent uses the SARSA algorithm to learn optimal Q-values and then applies them to reach the goal state.
|
|
@ -0,0 +1,38 @@
|
||||||
|
Sure! Here's an example script demonstrating the use of Support Vector Machines (SVM) for a binary classification problem using the famous iris dataset:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Importing the required libraries
|
||||||
|
from sklearn import datasets
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
# Loading the iris dataset
|
||||||
|
iris = datasets.load_iris()
|
||||||
|
X = iris.data
|
||||||
|
y = iris.target
|
||||||
|
|
||||||
|
# Splitting the dataset into training and testing sets
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
||||||
|
|
||||||
|
# Creating the SVM classifier
|
||||||
|
svm = SVC(kernel='linear')
|
||||||
|
|
||||||
|
# Training the SVM classifier
|
||||||
|
svm.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# Making predictions on the testing set
|
||||||
|
y_pred = svm.predict(X_test)
|
||||||
|
|
||||||
|
# Calculating the accuracy of the model
|
||||||
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
print(f"Accuracy: {accuracy}")
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, the iris dataset is first loaded using the `datasets.load_iris()` method from the sklearn library. The features matrix `X` and the target vector `y` are then initialized. Next, the dataset is split into training and testing sets using `train_test_split()` function from the `model_selection` module.
|
||||||
|
|
||||||
|
A Support Vector Machine classifier is created using `SVC()` class from the `svm` module, with the `kernel` parameter set to 'linear'. The classifier is then trained on the training set using the `fit()` method.
|
||||||
|
|
||||||
|
Predictions are made on the testing set using the `predict()` method, and the accuracy of the model is calculated using the `accuracy_score()` function from the `metrics` module. Finally, the accuracy is printed to the console.
|
||||||
|
|
||||||
|
You can modify this script according to your specific use case or dataset. Remember to install the required packages (`scikit-learn`) in your Python environment before running the script.
|
|
@ -0,0 +1,65 @@
|
||||||
|
Sure! Here's a simple example of a Python script that demonstrates Temporal Difference Learning (TD Learning) using a simple grid world environment:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
grid_size = 4
|
||||||
|
num_episodes = 100
|
||||||
|
start_state = (0, 0)
|
||||||
|
end_state = (grid_size - 1, grid_size - 1)
|
||||||
|
actions = ['up', 'down', 'left', 'right']
|
||||||
|
|
||||||
|
# Hyperparameters
|
||||||
|
alpha = 0.1 # learning rate
|
||||||
|
gamma = 0.9 # discount factor
|
||||||
|
|
||||||
|
# Initialize state-action value function
|
||||||
|
Q = np.zeros((grid_size, grid_size, len(actions)))
|
||||||
|
|
||||||
|
# Helper function to choose an action based on Q-values (epsilon-greedy policy)
|
||||||
|
def choose_action(state, epsilon):
|
||||||
|
if np.random.random() < epsilon:
|
||||||
|
return np.random.choice(actions)
|
||||||
|
return actions[np.argmax(Q[state])]
|
||||||
|
|
||||||
|
# Helper function to get the next state and reward based on the chosen action
|
||||||
|
def get_next_state_reward(state, action):
|
||||||
|
if action == 'up':
|
||||||
|
next_state = (state[0] - 1, state[1])
|
||||||
|
elif action == 'down':
|
||||||
|
next_state = (state[0] + 1, state[1])
|
||||||
|
elif action == 'left':
|
||||||
|
next_state = (state[0], state[1] - 1)
|
||||||
|
elif action == 'right':
|
||||||
|
next_state = (state[0], state[1] + 1)
|
||||||
|
|
||||||
|
if next_state[0] < 0 or next_state[0] >= grid_size or next_state[1] < 0 or next_state[1] >= grid_size:
|
||||||
|
# Hit wall, stay in the same state with a negative reward
|
||||||
|
return state, -10
|
||||||
|
elif next_state == end_state:
|
||||||
|
# Reached the end, stay in the same state with a positive reward
|
||||||
|
return state, 10
|
||||||
|
else:
|
||||||
|
return next_state, 0 # Regular move, stay in the same state with no reward
|
||||||
|
|
||||||
|
|
||||||
|
# TD Learning algorithm
|
||||||
|
for episode in range(num_episodes):
|
||||||
|
state = start_state
|
||||||
|
epsilon = 1.0 / (episode + 1) # epsilon-greedy exploration rate
|
||||||
|
|
||||||
|
while state != end_state:
|
||||||
|
action = choose_action(state, epsilon)
|
||||||
|
next_state, reward = get_next_state_reward(state, action)
|
||||||
|
|
||||||
|
# Update Q-values using Temporal Difference Learning
|
||||||
|
Q[state][actions.index(action)] += alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][actions.index(action)])
|
||||||
|
|
||||||
|
state = next_state
|
||||||
|
|
||||||
|
# Print the learned Q-values
|
||||||
|
print(Q)
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we define a simple grid world environment with a start state, an end state, and possible actions ('up', 'down', 'left', 'right'). The script then uses the Temporal Difference Learning algorithm to update the state-action values in the Q-table based on the rewards obtained from interactions with the environment. Finally, it prints the learned Q-values.
|
|
@ -0,0 +1,213 @@
|
||||||
|
Sure! Below is an example Python script that demonstrates the Trust Region Policy Optimization (TRPO) algorithm using OpenAI Gym's "CartPole-v1" environment:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
|
class PolicyNetwork:
|
||||||
|
def __init__(self, state_dim, action_dim, hidden_dim):
|
||||||
|
self.state_dim = state_dim
|
||||||
|
self.action_dim = action_dim
|
||||||
|
self.hidden_dim = hidden_dim
|
||||||
|
|
||||||
|
self.states = tf.placeholder(tf.float32, [None, state_dim], name="states")
|
||||||
|
self.actions = tf.placeholder(tf.int32, [None], name="actions")
|
||||||
|
self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
|
||||||
|
|
||||||
|
self.mean_network = self.build_network(scope="mean")
|
||||||
|
self.sample_network = self.build_network(scope="sample")
|
||||||
|
|
||||||
|
self.sampled_actions = self.sample_network(self.states)
|
||||||
|
|
||||||
|
self.mean_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="mean")
|
||||||
|
self.sample_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="sample")
|
||||||
|
|
||||||
|
self.policy_loss = self.compute_policy_loss()
|
||||||
|
self.kl_divergence = self.compute_kl_divergence()
|
||||||
|
self.gradient = self.compute_gradient()
|
||||||
|
|
||||||
|
def build_network(self, scope):
|
||||||
|
with tf.variable_scope(scope):
|
||||||
|
hidden_layer = tf.layers.dense(self.states, self.hidden_dim, activation=tf.nn.relu)
|
||||||
|
output_layer = tf.layers.dense(hidden_layer, self.action_dim)
|
||||||
|
output_probs = tf.nn.softmax(output_layer)
|
||||||
|
|
||||||
|
def network(states):
|
||||||
|
feed_dict = {self.states: states}
|
||||||
|
sess = tf.get_default_session()
|
||||||
|
return sess.run(output_probs, feed_dict=feed_dict)
|
||||||
|
|
||||||
|
return network
|
||||||
|
|
||||||
|
def compute_policy_loss(self):
|
||||||
|
indices = tf.range(tf.shape(self.sampled_actions)[0]) * tf.shape(self.sampled_actions)[1] + self.actions
|
||||||
|
selected_action_probs = tf.gather(tf.reshape(self.sampled_actions, [-1]), indices)
|
||||||
|
ratio = selected_action_probs / tf.stop_gradient(self.mean_network(self.states))
|
||||||
|
surrogate_loss = -tf.reduce_mean(ratio * self.advantages)
|
||||||
|
return surrogate_loss
|
||||||
|
|
||||||
|
def compute_kl_divergence(self):
|
||||||
|
mean_network_probs = self.mean_network(self.states)
|
||||||
|
sample_network_probs = tf.stop_gradient(self.sampled_actions)
|
||||||
|
return tf.reduce_mean(tf.reduce_sum(mean_network_probs * tf.log(mean_network_probs / sample_network_probs), axis=1))
|
||||||
|
|
||||||
|
def compute_gradient(self):
|
||||||
|
grads = tf.gradients(self.policy_loss, self.sample_weights)
|
||||||
|
flat_grads = tf.concat([tf.reshape(grad, [-1]) for grad in grads], axis=0)
|
||||||
|
return flat_grads
|
||||||
|
|
||||||
|
|
||||||
|
def compute_advantages(rewards, next_value, discount_factor=0.99, gae_lambda=0.95):
|
||||||
|
values = np.append(rewards, next_value)
|
||||||
|
deltas = rewards + discount_factor * values[1:] - values[:-1]
|
||||||
|
advantages = np.zeros_like(rewards)
|
||||||
|
for t in reversed(range(len(rewards))):
|
||||||
|
delta = deltas[t]
|
||||||
|
advantages[t] = delta + discount_factor * gae_lambda * advantages[t+1]
|
||||||
|
return advantages
|
||||||
|
|
||||||
|
|
||||||
|
def run_episode(env, policy_network, render=False):
|
||||||
|
states, actions, rewards = [], [], []
|
||||||
|
state = env.reset()
|
||||||
|
while True:
|
||||||
|
if render:
|
||||||
|
env.render()
|
||||||
|
action_probs = policy_network.sample_network(np.expand_dims(state, axis=0))
|
||||||
|
action = np.random.choice(len(action_probs[0]), p=action_probs[0])
|
||||||
|
next_state, reward, done, _ = env.step(action)
|
||||||
|
|
||||||
|
states.append(state)
|
||||||
|
actions.append(action)
|
||||||
|
rewards.append(reward)
|
||||||
|
|
||||||
|
state = next_state
|
||||||
|
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
|
||||||
|
return states, actions, rewards
|
||||||
|
|
||||||
|
|
||||||
|
def train(env, policy_network, max_iterations=1000, max_episode_length=1000, cg_iterations=10, delta=0.01):
|
||||||
|
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
|
||||||
|
trainable_variables = tf.trainable_variables()
|
||||||
|
grads_placeholder = tf.placeholder(tf.float32, shape=[None])
|
||||||
|
flat_grads_and_vars_placeholder = tf.placeholder(tf.float32, shape=[None])
|
||||||
|
|
||||||
|
grads = tf.gradients(policy_network.kl_divergence, trainable_variables)
|
||||||
|
grads_placeholder_and_vars = list(zip(grads_placeholder, trainable_variables))
|
||||||
|
flat_grads_and_vars_placeholder_and_vars = list(zip(flat_grads_and_vars_placeholder, trainable_variables))
|
||||||
|
|
||||||
|
compute_grads = tf.train.AdamOptimizer(learning_rate=1e-3).apply_gradients(grads_placeholder_and_vars)
|
||||||
|
compute_flat_grad = flatten_gradients(grads)
|
||||||
|
apply_flat_grad = unflatten_gradients(flat_grads_and_vars_placeholder, trainable_variables)
|
||||||
|
|
||||||
|
sess = tf.InteractiveSession()
|
||||||
|
sess.run(tf.global_variables_initializer())
|
||||||
|
|
||||||
|
for iteration in range(max_iterations):
|
||||||
|
episode_states, episode_actions, episode_rewards = run_episode(env, policy_network)
|
||||||
|
|
||||||
|
episode_advantages = compute_advantages(episode_rewards, 0)
|
||||||
|
episode_mean = np.mean(episode_rewards)
|
||||||
|
episode_std = np.std(episode_rewards)
|
||||||
|
|
||||||
|
feed_dict = {
|
||||||
|
policy_network.states: np.array(episode_states),
|
||||||
|
policy_network.actions: np.array(episode_actions),
|
||||||
|
policy_network.advantages: episode_advantages
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update policy network
|
||||||
|
sess.run(optimizer.minimize(policy_network.policy_loss), feed_dict=feed_dict)
|
||||||
|
|
||||||
|
# Update value function (critic network)
|
||||||
|
for _ in range(cg_iterations):
|
||||||
|
sess.run(compute_flat_grad, feed_dict=feed_dict)
|
||||||
|
|
||||||
|
flat_grads = sess.run(compute_flat_grad, feed_dict=feed_dict)
|
||||||
|
feed_dict[flat_grads_placeholder] = flat_grads
|
||||||
|
|
||||||
|
step_direction = conjugate_gradients(sess, compute_flat_grad, feed_dict)
|
||||||
|
step_size = np.sqrt(2 * delta / (np.dot(step_direction, hessian_vector_product(sess, state, compute_flat_grad, feed_dict)) + 1e-8))
|
||||||
|
feed_dict[flat_grads_and_vars_placeholder] = step_size * step_direction
|
||||||
|
|
||||||
|
sess.run(apply_flat_grad, feed_dict=feed_dict)
|
||||||
|
|
||||||
|
sess.close()
|
||||||
|
|
||||||
|
|
||||||
|
def conjugate_gradients(sess, compute_flat_grad_fn, feed_dict, cg_iterations=10, residual_tol=1e-10):
|
||||||
|
x = np.zeros_like(np.array(feed_dict).flatten())
|
||||||
|
b = sess.run(compute_flat_grad_fn, feed_dict=feed_dict)
|
||||||
|
r = b.copy()
|
||||||
|
p = b.copy()
|
||||||
|
rsold = np.dot(r, r)
|
||||||
|
|
||||||
|
for _ in range(cg_iterations):
|
||||||
|
Ap = sess.run(compute_flat_grad_fn, feed_dict={x: p})
|
||||||
|
alpha = rsold / (np.dot(p, Ap) + 1e-8)
|
||||||
|
x += alpha * p
|
||||||
|
r -= alpha * Ap
|
||||||
|
rsnew = np.dot(r, r)
|
||||||
|
|
||||||
|
if np.sqrt(rsnew) < residual_tol:
|
||||||
|
break
|
||||||
|
|
||||||
|
p = r + (rsnew / rsold) * p
|
||||||
|
rsold = rsnew
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def hessian_vector_product(sess, state, compute_flat_grad_fn, feed_dict, damping=0.1):
|
||||||
|
grads = sess.run(compute_flat_grad_fn, feed_dict=feed_dict)
|
||||||
|
constraints = tf.placeholder(tf.float32, shape=[None])
|
||||||
|
compute_kl_grads = tf.gradients(policy_network.kl_divergence, trainable_variables)
|
||||||
|
gradient_products = tf.reduce_sum(compute_kl_grads * constraints)
|
||||||
|
feed_dict.update({constraints: grads})
|
||||||
|
return sess.run(gradient_products, feed_dict=feed_dict)
|
||||||
|
|
||||||
|
|
||||||
|
def flatten_gradients(grads):
|
||||||
|
flat_grads = []
|
||||||
|
for grad in grads:
|
||||||
|
flat_grads.append(tf.reshape(grad, [-1]))
|
||||||
|
return tf.concat(flat_grads, axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
def unflatten_gradients(grads_placeholder, trainable_variables):
|
||||||
|
grads = []
|
||||||
|
start = 0
|
||||||
|
for var in trainable_variables:
|
||||||
|
var_shape = var.shape.as_list()
|
||||||
|
var_size = np.prod(var_shape)
|
||||||
|
grads.append(tf.reshape(grads_placeholder[start:start+var_size], var_shape))
|
||||||
|
start += var_size
|
||||||
|
return grads
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
env = gym.make('CartPole-v1')
|
||||||
|
|
||||||
|
state_dim = env.observation_space.shape[0]
|
||||||
|
action_dim = env.action_space.n
|
||||||
|
hidden_dim = 32
|
||||||
|
|
||||||
|
policy_network = PolicyNetwork(state_dim, action_dim, hidden_dim)
|
||||||
|
|
||||||
|
train(env, policy_network, max_iterations=100)
|
||||||
|
|
||||||
|
env.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, the TRPO algorithm is used to optimize a policy network to solve the CartPole-v1 environment from the Gym library. The `PolicyNetwork` class defines the policy network, and the `train` function implements the TRPO algorithm to train the network. The `compute_advantages`, `run_episode`, `conjugate_gradients`, `hessian_vector_product`, `flatten_gradients`, and `unflatten_gradients` functions are helper functions used in the training process.
|
||||||
|
|
||||||
|
Note that this implementation assumes you have TensorFlow and Gym libraries installed. You may need to install additional dependencies if necessary.
|
|
@ -0,0 +1,46 @@
|
||||||
|
Sure! Here's an example of a Python script that demonstrates the k-Nearest Neighbors (k-NN) algorithm using the scikit-learn library:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
# Load the Iris dataset
|
||||||
|
iris = load_iris()
|
||||||
|
X = iris.data
|
||||||
|
y = iris.target
|
||||||
|
|
||||||
|
# Split the dataset into training and test sets
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Create a k-NN classifier with k = 3
|
||||||
|
k = 3
|
||||||
|
knn = KNeighborsClassifier(n_neighbors=k)
|
||||||
|
|
||||||
|
# Fit the classifier to the training data
|
||||||
|
knn.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# Predict the classes for the test set
|
||||||
|
y_pred = knn.predict(X_test)
|
||||||
|
|
||||||
|
# Calculate the accuracy of the classifier
|
||||||
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
print(f"Accuracy: {accuracy}")
|
||||||
|
|
||||||
|
# Example usage: Predict the class for new data
|
||||||
|
new_data = np.array([[5.0, 3.6, 1.4, 0.2]])
|
||||||
|
predicted_class = knn.predict(new_data)
|
||||||
|
print(f"Predicted class: {predicted_class}")
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we first load the famous Iris dataset using the scikit-learn `load_iris` function. Then, we split the dataset into training and test sets using the `train_test_split` function, with 80% for training and 20% for testing.
|
||||||
|
|
||||||
|
Next, we create a k-NN classifier with `k = 3` using the `KNeighborsClassifier` class from scikit-learn. We fit the classifier to the training data using the `fit` method.
|
||||||
|
|
||||||
|
After that, we predict the classes for the test set using the `predict` method. We calculate the accuracy of the classifier by comparing the predicted classes with the true classes using the `accuracy_score` function.
|
||||||
|
|
||||||
|
Finally, we demonstrate the usage of the trained classifier by predicting the class for a new data point with the `predict` method.
|
||||||
|
|
||||||
|
Note that this is a basic example to showcase the concept of k-NN. In practice, you would need to perform more extensive data preprocessing, feature scaling, hyperparameter tuning, and evaluation to build a more robust and accurate model.
|
39
ai_security/ML_Fundamentals/ai_generated/t-SNE.py
Normal file
39
ai_security/ML_Fundamentals/ai_generated/t-SNE.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
Sure! Here's a Python script that demonstrates the t-SNE algorithm using the famous Iris dataset for visualization:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import datasets
|
||||||
|
from sklearn.manifold import TSNE
|
||||||
|
|
||||||
|
# Load the Iris dataset
|
||||||
|
iris = datasets.load_iris()
|
||||||
|
X = iris.data
|
||||||
|
y = iris.target
|
||||||
|
|
||||||
|
# Applying t-SNE
|
||||||
|
tsne = TSNE(n_components=2, random_state=42)
|
||||||
|
X_tsne = tsne.fit_transform(X)
|
||||||
|
|
||||||
|
# Plotting the t-SNE results
|
||||||
|
colors = ['r', 'g', 'b']
|
||||||
|
target_names = iris.target_names
|
||||||
|
|
||||||
|
plt.figure(figsize=(10, 6))
|
||||||
|
for i, target_name in enumerate(target_names):
|
||||||
|
plt.scatter(X_tsne[y == i, 0], X_tsne[y == i, 1], c=colors[i], label=target_name)
|
||||||
|
|
||||||
|
plt.xlabel('First t-SNE Component')
|
||||||
|
plt.ylabel('Second t-SNE Component')
|
||||||
|
plt.title('t-SNE Visualization of Iris Dataset')
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
In this script, we first import the necessary libraries (`numpy`, `matplotlib`, `sklearn.datasets`, and `sklearn.manifold.TSNE`) to perform the t-SNE algorithm. Then, we load the Iris dataset using the `datasets.load_iris()` from scikit-learn. The data and target variables are assigned to `X` and `y` arrays, respectively.
|
||||||
|
|
||||||
|
After that, we create an instance of the t-SNE algorithm with `TSNE(n_components=2, random_state=42)`. We choose 2 components (`n_components=2`) since we want to visualize the data in 2D. The `random_state` parameter is set to ensure reproducibility in the results. We then apply the t-SNE algorithm to the data using `fit_transform(X)`, which returns the transformed data as `X_tsne`.
|
||||||
|
|
||||||
|
Finally, we plot the t-SNE results using `matplotlib`. Each data point is plotted in a scatter plot, with different colors representing different classes ('setosa', 'versicolor', and 'virginica') in the Iris dataset.
|
||||||
|
|
||||||
|
To run this script, ensure that you have the necessary libraries installed (`numpy`, `matplotlib`, and `scikit-learn`). Save it as a .py file and execute it using a Python interpreter. The script will show a plot with the t-SNE visualization of the Iris dataset.
|
Loading…
Reference in a new issue