'AttributeError: 'numpy.ndarray' object has no attribute 'columns' even after using pandas dataframe
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_curve,roc_auc_score,plot_roc_curve
from io import StringIO
path = r"C:\Users\thund\Downloads\Boat.csv"
data = pd.read_csv(path) # pip install xlrd
print(data.shape)
print(data.columns)
print(data.isnull().sum())
print (data.dropna(axis=0)) #dropping rows that have missing values
data.dropna(inplace=True)
print (data['seats'].value_counts())
print(data['demand'].value_counts().plot(kind = 'bar'))
data['maint'].value_counts().plot(kind = 'bar')
import seaborn as sns
sns.countplot(data['demand'], hue = data['Class'])
#plt.show()
X = data.drop(['Class'], axis = 1)
y = data['Class']
from sklearn.preprocessing import OrdinalEncoder
demand_category = ['low', 'med', 'high', 'vhigh']
maint_category = ['low', 'med', 'high', 'vhigh']
seats_category = ['2', '3', '4', '5more']
passenger_category = ['2', '4', 'more']
storage_category = ['Nostorage', 'small', 'med']
safety_category = ['poor', 'good', 'vgood']
all_categories = [demand_category, maint_category,seats_category,passenger_category,storage_category,safety_category]
oe = OrdinalEncoder(categories= all_categories)
X = oe.fit_transform( data[['demand','maint', 'seats', 'passenger', 'storage', 'safety']])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=2)
DT_classifier = DecisionTreeClassifier( criterion= 'gini', max_depth= 3, min_samples_split= 10)
DT_classifier.fit(X_train, y_train)
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
bestfeatures = SelectKBest(score_func=chi2, k=4)
fit = bestfeatures.fit(X, y)
datascores = pd.DataFrame(fit.scores_)
datacolumns = pd.DataFrame(X.columns)
# concat two dataframes for better visualization
featureScores = pd.concat([datacolumns, datascores], axis=1)
featureScores.columns = ['Demand', 'maint'] # naming the dataframe columns
print(featureScores.nlargest(10, 'maint'))
Dataset: https://drive.google.com/file/d/1O0sYZGJep4JkrSgGeJc5e_Nlao2bmegV/view?usp=sharing
Note: I'm fairly new to ML. Now, I've been getting this error - AttributeError: 'numpy.ndarray' object has no attribute 'columns' and checked against similar questions only to find out that this keeps occurring since pandas is not returning a data frame. Which I don't think is occurring in my case and yet I am badgered by this error. Could anyone please help me understand what is going on?
Traceback (most recent call last):
File "D:\Python\lab2\experimentaenter code here
l assignment.py", line 70, in
datacolumns = pd.DataFrame(X.columns)
AttributeError: 'numpy.ndarray' object has no attribute 'columns'
Process finished with exit code 1
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|