朴素贝叶斯(Naive Bayes)是一种基于贝叶斯定理的分类算法,假设特征之间是相互独立的。它常用于文本分类、垃圾邮件检测等问题。
下面是一个用C#实现朴素贝叶斯分类算法的简单示例。这个例子假设数据集是离散的,并且特征可以分为多个类别。
示例代码
using System;
using System.Collections.Generic;
using System.Linq;
class NaiveBayes
{
// 用于存储每个类别的概率
private Dictionary<string, int> classCounts;
// 用于存储每个特征在不同类别中的条件概率
private Dictionary<string, Dictionary<string, int>> featureCounts;
// 用于存储特征的所有可能取值
private HashSet<string> vocabulary;
private int totalSamples;
public NaiveBayes()
{
classCounts = new Dictionary<string, int>();
featureCounts = new Dictionary<string, Dictionary<string, int>>();
vocabulary = new HashSet<string>();
totalSamples = 0;
}
// 训练模型
public void Train(string[] features, string label)
{
totalSamples++;
if (!classCounts.ContainsKey(label))
{
classCounts[label] = 0;
}
classCounts[label]++;
foreach (string feature in features)
{
vocabulary.Add(feature);
if (!featureCounts.ContainsKey(label))
{
featureCounts[label] = new Dictionary<string, int>();
}
if (!featureCounts[label].ContainsKey(feature))
{
featureCounts[label][feature] = 0;
}
featureCounts[label][feature]++;
}
}
// 预测类别
public string Predict(string[] features)
{
double maxProbability = double.NegativeInfinity;
string bestClass = null;
foreach (var classEntry in classCounts)
{
string className = classEntry.Key;
int classCount = classEntry.Value;
// 计算P(Class)
double classProbability = Math.Log((double)classCount / totalSamples);
// 计算P(Features|Class)
double conditionalProbability = 0.0;
foreach (string feature in features)
{
if (featureCounts[className].ContainsKey(feature))
{
conditionalProbability += Math.Log(
(double)(featureCounts[className][feature] + 1) / (classCount + vocabulary.Count)
);
}
else
{
conditionalProbability += Math.Log(1.0 / (classCount + vocabulary.Count));
}
}
// 计算总的概率 P(Class|Features) ∝ P(Features|Class) * P(Class)
double totalProbability = classProbability + conditionalProbability;
if (totalProbability > maxProbability)
{
maxProbability = totalProbability;
bestClass = className;
}
}
return bestClass;
}
}
class Program
{
static void Main(string[] args)
{
NaiveBayes nb = new NaiveBayes();
// 示例训练数据 (特征, 类别)
nb.Train(new string[] { "sunny", "hot", "high", "weak" }, "no");
nb.Train(new string[] { "sunny", "hot", "high", "strong" }, "no");
nb.Train(new string[] { "overcast", "hot", "high", "weak" }, "yes");
nb.Train(new string[] { "rain", "mild", "high", "weak" }, "yes");
nb.Train(new string[] { "rain", "cool", "normal", "weak" }, "yes");
nb.Train(new string[] { "rain", "cool", "normal", "strong" }, "no");
nb.Train(new string[] { "overcast", "cool", "normal", "strong" }, "yes");
nb.Train(new string[] { "sunny", "mild", "high", "weak" }, "no");
nb.Train(new string[] { "sunny", "cool", "normal", "weak" }, "yes");
nb.Train(new string[] { "rain", "mild", "normal", "weak" }, "yes");
nb.Train(new string[] { "sunny", "mild", "normal", "strong" }, "yes");
nb.Train(new string[] { "overcast", "mild", "high", "strong" }, "yes");
nb.Train(new string[] { "overcast", "hot", "normal", "weak" }, "yes");
nb.Train(new string[] { "rain", "mild", "high", "strong" }, "no");
// 测试预测
string[] testFeatures = new string[] { "sunny", "cool", "high", "strong" };
string predictedClass = nb.Predict(testFeatures);
Console.WriteLine($"Predicted class: {predictedClass}");
}
}
代码说明
- NaiveBayes 类:这个类实现了朴素贝叶斯算法的训练和预测过程。
Train
方法用于接收训练数据,Predict
方法用于对新数据进行分类预测。 - Train 方法:每次接收一组特征和其对应的类别标签,更新类别和特征的计数。
- Predict 方法:根据贝叶斯定理计算每个类别的概率,返回概率最大的类别作为预测结果。
使用步骤
- 运行代码,将训练数据传递给
Train
方法进行模型训练。 - 使用
Predict
方法对新的数据进行预测。
这个实现是一个简单的朴素贝叶斯分类器,可以处理离散特征的数据。对于实际应用,可以考虑进一步扩展,如处理连续特征、添加平滑处理等。