Frames

Untitled

0
1
2import sys
3import pandas as pd
4#import numpy as np
5from sklearn import svm
6from sklearn.linear_model import LogisticRegression
7from sklearn.cross_validation import train_test_split
8from sklearn.neighbors import KNeighborsClassifier
9from sklearn.grid_search import GridSearchCV
10from sklearn import tree
11from sklearn.ensemble import RandomForestClassifier
12#import matplotlib.pyplot as plt
13
14#import matplotlib as mpl
15#import matplotlib.pyplot as plt
16#from mpl_toolkits.mplot3d import Axes3D
17
18inputfile = "input3.csv"
19try:
20 inputfile = sys.argv[1]
21except:
22 inputfile = "input3.csv"
23outputfile = "output3.csv"
24try:
25 outputfile = sys.argv[2]
26except:
27 outputfile = "output3.csv"
28
29df = pd.read_csv(inputfile)
30dftrain, dftest = train_test_split(df, test_size = 0.4, stratify=None)
31
32#def runSVM(kernel, c, gamma):
33# clf = svm.SVC(kernel=kernel,C=c,gamma=gamma)
34# model = clf.fit(dftrain[["A","B"]].as_matrix(), dftrain["label"].as_matrix())
35# score = model.score(dftrain[["A","B"]].as_matrix(), dftrain["label"].as_matrix())
36# print(score)
37# amin = df["A"].min()
38# amax = df["A"].max()
39# bmin = df["B"].min()
40# bmax = df["B"].max()
41# h = .02
42# xx, yy = np.meshgrid(np.arange(amin, amax, h), np.arange(bmin, bmax, h))
43# zz = clf.predict(np.c_[xx.ravel(), yy.ravel()])
44# print(zz)
45# zz = zz.reshape(xx.shape)
46# plt.contourf(xx, yy, zz, cmap=plt.cm.coolwarm, alpha=0.8)
47# plt.scatter(dftest["A"], dftest["B"], c=dftest["label"], cmap=plt.cm.coolwarm)
48# plt.xlabel('Sepal length')
49# plt.ylabel('Sepal width')
50# plt.xlim(xx.min(), xx.max())
51# plt.ylim(yy.min(), yy.max())
52# plt.show()
53
54def runLinear(file):
55 parameters = [
56 {'C': [0.1,0.5,1,5,10,50,100], 'kernel': ['linear']}
57 ]
58 classifier = svm.SVC()
59 grid_search = GridSearchCV(classifier, parameters, cv=5, scoring='accuracy')
60 grid_search.fit(dftrain[["A","B"]].as_matrix(), dftrain["label"].as_matrix())
61 score = grid_search.score(dftest[["A","B"]].as_matrix(),dftest["label"].as_matrix())
62 file.write("svm_linear,%f,%f\n" % (grid_search.best_score_,score))
63def runPoly(file):
64 parameters = [
65 {'C': [0.1,1,3], 'gamma':[0.1,0.5,1,3,6,10], 'kernel': ['poly']}
66 ]
67 classifier = svm.SVC()
68 grid_search = GridSearchCV(classifier, parameters, cv=5, scoring='accuracy')
69 grid_search.fit(dftrain[["A","B"]].as_matrix(), dftrain["label"].as_matrix())
70 score = grid_search.score(dftest[["A","B"]].as_matrix(),dftest["label"].as_matrix())
71 file.write("svm_polynomial,%f,%f\n" % (grid_search.best_score_,score))
72def runRBF(file):
73 parameters = [
74 {'C': [0.1, 0.5, 1, 5, 10, 50, 100], 'gamma':[0.1, 0.5, 1, 3, 6, 10], 'kernel': ['rbf']}
75 ]
76 classifier = svm.SVC()
77 grid_search = GridSearchCV(classifier, parameters, cv=5, scoring='accuracy')
78 grid_search.fit(dftrain[["A","B"]].as_matrix(), dftrain["label"].as_matrix())
79 score = grid_search.score(dftest[["A","B"]].as_matrix(),dftest["label"].as_matrix())
80 file.write("svm_rbf,%f,%f\n" % (grid_search.best_score_,score))
81def runLogisticRegression(file):
82 parameters = [
83 {'C': [0.1, 0.5, 1, 5, 10, 50, 100]}
84 ]
85 classifier = LogisticRegression()
86 grid_search = GridSearchCV(classifier, parameters, cv=5, scoring='accuracy')
87 grid_search.fit(dftrain[["A","B"]].as_matrix(), dftrain["label"].as_matrix())
88 score = grid_search.score(dftest[["A","B"]].as_matrix(),dftest["label"].as_matrix())
89 file.write("logistic,%f,%f\n" % (grid_search.best_score_,score))
90def runKnn(file):
91 parameters = [
92 {'n_neighbors': list(range(1,51,1)), 'leaf_size': list(range(5,61,5))}
93 ]
94 classifier = KNeighborsClassifier()
95 grid_search = GridSearchCV(classifier, parameters, cv=5, scoring='accuracy')
96 grid_search.fit(dftrain[["A","B"]].as_matrix(), dftrain["label"].as_matrix())
97 score = grid_search.score(dftest[["A","B"]].as_matrix(),dftest["label"].as_matrix())
98 file.write("knn,%f,%f\n" % (grid_search.best_score_,score))
99def runDecisionTree(file):
100 parameters = [
101 {'max_depth': list(range(1,51,1)), 'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10]}
102 ]
103 classifier = tree.DecisionTreeClassifier()
104 grid_search = GridSearchCV(classifier, parameters, cv=5, scoring='accuracy')
105 grid_search.fit(dftrain[["A","B"]].as_matrix(), dftrain["label"].as_matrix())
106 score = grid_search.score(dftest[["A","B"]].as_matrix(),dftest["label"].as_matrix())
107 file.write("decision_tree,%f,%f\n" % (grid_search.best_score_,score))
108def runRandomForest(file):
109 parameters = [
110 {'max_depth': list(range(1,51,1)), 'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10]}
111 ]
112 classifier = RandomForestClassifier()
113 grid_search = GridSearchCV(classifier, parameters, cv=5, scoring='accuracy')
114 grid_search.fit(dftrain[["A","B"]].as_matrix(), dftrain["label"].as_matrix())
115 score = grid_search.score(dftest[["A","B"]].as_matrix(),dftest["label"].as_matrix())
116 file.write("random_forest,%f,%f\n" % (grid_search.best_score_,score))
117with open(outputfile,"w") as file:
118 runLinear(file)
119 runPoly(file)
120 runRBF(file)
121 runLogisticRegression(file)
122 runKnn(file)
123 runDecisionTree(file)
124 runRandomForest(file)
125
126#possiblecs =
127#gammas = [0.1,0.5]
128#for c in possiblecs:
129# runSVM('linear',c,'auto')
130#for c in possiblecs:
131# for gamma in gammas:
132# runSVM('poly',c,gamma)
133#for c in possiblecs:
134# for gamma in gammas:
135# runSVM('rbf',c,gamma)
136
137