카테고리 없음

.

Good Day 2022. 12. 2. 20:23

from pandas import DataFrame
 
print(dir(DataFrame))
 
 
import sklearn
 
print(help(sklearn))
 
 
from sklearn import tree
 
print(dir(tree))
-------------------------------
import pandas as pd

import numpy as np

import warnings

warnings.filterwarnings('ignore')

df=pd.read_csv("data/case1_no1.csv")
df.head()

df=df.sort_values(by='Feature1', ascending=False). reset_index(drop=true)

df.head()


df.loc[9] 

df.loc[0:9,'Feature1'] =df.loc[9,'Feature1']


df = df.sort_values(by='Feature1', ascending=False).reset_index(drop=True)
df.head()

df.head()

df.loc[0:9,'Feature1'] = df.loc[9,'Feature1']
df.head()

df[df['Feature2'] >=80]['Feature1']
answer=np.mean(df[df['Feature2']>=80]['Featre1'])
print (answer)
------------------------------------

df = pd.read_csv('data/case1_no2.csv')
df.head()

top_80 = round(len(df)*0.8)
top_80

df=df.sort_values(by='Feature2', ascending=False).reset_index(drop=True)
df.head()

old_df = df.iloc[:84,:]

old_std=np.std(old_df['Feature1'])

np.median(new_df['Feature1'])

np.median(new_df[new_df['Feature1'].notnull()]['Feature1'])

f1_med=np.median(new_df[new_df['Featrue1'].notnull()]['Feature1'])
new_df['Feature1]=new_df['Feature1'].fillna(f1_med)

new_df
 
 
np.std(new_df['Feature1'])
 
new_std = np.std(new_df['Feature1'])
 
solv = old_std - new_std
--------------------------------------------

df = pd.read_csv('data\case1_no3.csv')
df.head()

q1, q3 = np.percentile (df ['Feature1'], [25,75])

iqr= q3-q1

l_limit = q1 - (iqr*1.5)
u_limit = q3 + (iqr*1.5)

df['Feature1'] < l_limit

outlier_index =
np.where ((df['Feature1'] < l_limit \ df['Feature1'] > u_limit))
np.mean(df)[outlier_index]['Feature1']

--------------------------------------------------
import pandas as pd
import pandas as np

df=pd.read_csv("dat/3rd_housing.csv")

#step1 결측치 제거

new_df = df.dropna()


#step2 상위 70% 추출

index_70 = round (len(new_df)*0.7) 

new_df = new_df.iloc[:, index_70]


#step3 제1사분위수 Q1 산출하기

new_df ['housing_median_age']

q1 = np.percentile(new_df['housing_median_age'], 25)

print(q1)
------------------------------------------

df=pd.read_csv("data/3rd_disease.csv")

#step1 2000, mean


df [df['year']==2000].iloc[0,1:].mean()

#step2 

much_nation = df[df['year']==2000].iloc[0,1:] >= 48

print(len(much_nation)) 

----------------------------------------------


df_train = pd.read_csv("data/3rd_travel.csv")
df_test =pd.read_csv("data/3rd_travel.csv")

df_train.shape, df_test.shape

df_train.info()
dr_test.info()

df_train['TravelInsurance'].value_counts()
df_train

X_train = df_train.iloc[:,:-1]
y_train = df_train.iloc[:,-1]

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import trarin_test_split

X_train = df_train.iloc[:,:-1]
y_train = df_train.iloc[:,-1]
X_test=df_test.iloc[:,:-1]
y_test=df_test.iloc[:,-1]

X_train = pd.get_dummies(X_train)
X_test = pd.get_dummies(X_test)

rf_model=RandomForestClassifier()
rf_model.fit(X_train,y_train)
y_pred=rf_model.predict(X_test)
accuracy=accuracy_score(y_test,y_pred)


print(f'Random Forest accuracy score : {accuracy:.3}')


--------------------------------

import pandas as pd

df = pd.read_csv("..input dddd.csv")

df

print (int(abs (df['age'].quantile(.25)-df['age'].quantile(.75))))
----------------------------------


import pandas asl pd

df = pd.read_csv ("ddfdfda.csv")

f=(df['loves']+df['wows']) / df['reactions']

cond_1 = r> 0.4

cond

---------------------------------

import pandas as pd

df=pd.read_csv("dff.csv")

r = (df['loves']+df['wows'])/df['reacion']

cond_1 = r> 0.4

cond_2 = r< 0.5

cond_3 = df['type']="video"

df[cond_1 & cond_2, cond_3]

print (len(df[cond_1 & cond_2, cond_3]))
------------------------------------

import pandas as pd

df = pd.read_csv("dfdga.csv")

df


df.info()

pd.to_datetime(df['date_added'])

import pandas as pd

df = pd.read_csv("dfdf.csv")

pd.to_datetime(df['date_added'])


df['date_added']=pd.to_datetime(df['date_added'])

cond_1 = df['date_added'].dt.year==2018

cond_2 = df['date_added'].dt.month==1

cond_3 = df['country']=="United Kingdom"

df [cond_1 & cond_2 & cond_3]

print ( len (df [cond_1 & cond_2 & cond_3]))


-------------------------------

import pandas as pd

train = pd.read_csv("dddfff.csv")
test = pd.read_csv("dfdff.csv)
 
train.shape
test.shape
 
test.head()
 
train['Segmentaion'].value_counts

train.isnull().sum()
test.insnll().sum()
train.info()

target = train.pop('Segmentation')

test_ID = test.pop('ID')
 
 
train.head()
train.info()
train.describe(include="o")
 
 
train = pd.get_dummies(train)
test = pd.get_dummies(test)
 

train.info()
 
 
train=train.drop("ID",axis=1)
train.head(1)
test_ID=test.pop('ID')  
test_ID  


from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=0)
rf.fit(train,target)
pred=rf.predict(test)
pred
 
submit =pd.DataFrme({
'ID':test_ID,
'Segmentation':pred
})
 
submit.to_csv("dff.csv", index=False)