22년 1월 10일_데이터 프레임, pandas more the deep

728x90

월요일[힘이 나는데 이 힘이 지속되어 계속 커지는 힘이 되길]

1. 데이터 pandas talk

2. 그래프 made code

3. 오늘 작성한 코드

1. 데이터

df.__에 데이터 frame을 반환하고, 새로운 df.new를 해야 df.new에 적용이 됨 or inplace = Ture를하면 됨(but!수정이 어려워서 비추한다함)

scola는 정수형 실수형 데이터처럼 1차원 데이터이며

dataFrame은 2차원 데이터,

(+나중엔 return타입이 무엇일지 느낌이 온다고 함

Cheat Sheets란?

치트 시트 또는 유아용 시트는 빠른 참조를 위해 사용되는 간결한 메모 세트 pandas Cheat sheets를 치면 정리된게 나올 것임

documentation란?

문서화

데이터프레임을 합칠경우 axis 1이면 열 0이면 행으로 추가

º iloc는 행과 열인덱스 숫자를 기재함으로써 숫자로 기재

º lod는 컬럼 값을 요함

3. 오늘 작성한 코드

import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import re
import pandas as pd                # 데이터를 저장하고 처리하는 패키지
import matplotlib as mpl           # 그래프를 그리는 패키지
import matplotlib.pyplot as plt    # 그래프를 그리는 패키지
import csv
import matplotlib.dates as mdates
import os

df =  pd.read_csv('data/seoulreal.csv',encoding='utf-8')
df.sort_values(by='redate', ascending=False) #정렬
df.drop(columns=['lostSEQ','state','dateofreceipt','details','place','ID','name','region','company','thing','views','ext'], inplace=True) #사용하지 않는 값 삭제
#isno = None만 찾아주는 것
pd.isna(df)
df = df.dropna() #결측치 제거
d_ = df.sort_values(by='redate', ascending=False, ignore_index=True) #date정렬
# df['date'] = df['redate'].apply(lambda x: pd.to_datetime(str(x), format='%Y-%m-%d')) #int를 str로 change
df['date'] = pd.to_datetime(df['redate'].str.strip(), format='%Y-%m-%d', errors='raise')

# 'redate' = 날짜

# df_new.reset_index(inpddlace=True) #인덱스 수 reset_index



df.set_index(df['date'], inplace=True) #데이트 칼럼을 인덱스로 지정
# weekly_df = df.resample('W-Mon', how={'kinds':np.sum}).fillna(0) #월요일 설정
print(type(df['date']))
day = [g for n, g in df.set_index('date').groupby(pd.Grouper(freq='D'))]#일마다 지정
# day_kinds = [g for n, g in df.set_index('kinds').groupby(pd.Grouper(freq='D'))]

week = [g for n, g in df.set_index('date').groupby(pd.Grouper(freq='W'))]#주마다 지정
# week_kinds = [g for n, g in df.set_index('kinds').groupby(pd.Grouper(freq='W'))]

month = [g for n, g in df.set_index('date').groupby(pd.Grouper(freq='M'))]#월마다 지정
# month_kinds = [g for n, g in df.set_index('kinds').groupby(pd.Grouper(freq='M'))]

month_list = []
month_kinds_list = []

# for i in month :
#     # print(i.index)
#     month_list.append(i.index)
# print(type(i))

for e in month_kinds :
    print(e.index)
    month_kinds_list.append(e.index)


mpoint = np.array(month_list) #월별 날짜 put array
xpoint = d_["kinds"]
# ypoint = d_['date'] > "2022-12-31"
plt.rc('font',family='Malgun Gothic') #맑은 고딕font set
plt.plot(xpoint, mpoint, 'r+')
plt.title('월별 분실물 데이터')
plt.xlabel('분실물')
plt.ylabel('월별 날짜')
plt.show()


# plt.figure(figsize=(10,5))
# plt.rc('font',family='Malgun Gothic')
# plt.title("분실일자의 그래프")
# plt.xticks(rotation=45)
# plt.grid()
# plt.plot(d_['kinds'])
# plt.show()



# plt.hist(xpoint, ypoint)
# plt.show()

# plt.plot(d_["dateofreceipt"], d_["views"], "-", color='grey', label=str(spot_))

# plt.legend(fontsize=13)
# plt.xticks(rotation=90)
# plt.show()


# spot_ = 184
# d_ = df[(df["kinds"]==spot_) & (df["dateofreceipt"]<"2019-09-02")]
# plt.figure(figsize=(10,5))
# plt.title("시간대별 추이(2019년 9월 1일)", fontsize=15)
# ax = plt.subplot(1,1,1)
# ax.plot(d_["dateofreceipt"],d_["views"], "-o", color='red', label=str(spot_))
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
# plt.legend(fontsize=13)
# plt.grid()
# plt.show()

#ba
# redate등록일자 dateofreceipt분실일자 kinds 종류
# seoul_df = df.loc[[]]
# seoul_df

#메소드 찾는 법
#- 구글링 및 데이터 파일을 알아야함,1차원 시리즈, 2차원 데이터 프레임,

In [21]:

import matplotlib.pyplot as plt
import numpy as np

x = np.array(["A", "B", "C", "D"])
y = np.array([3, 8, 1, 10])
plt.grid(False)
plt.bar(x,y) #barh의 경우 옆으로 x 축과 y 축이 변경됨
plt.show()

In [18]:

x = np.random.normal(10, 2, 40)

plt.hist(x)
plt.grid()
plt.show()

In [57]:

y = np.array([45,23,13,5])


my_list = [1,2,3,4]
plt.figure(facecolor="hotpink")
plt.pie(y, labels=my_list, autopct='%.f%%', textprops={'color' :'black'})
plt.legend(my_list)
plt.title("hello")
plt.show()

In [49]:

#이상치 확인할 경우
import pandas as pd

students =  pd.read_csv("data/students.csv")
plt.boxplot(students['english'])
plt.show()

he =  pd.read_csv("data/seoulreal.csv", encoding="utf-8")
plt.boxplot(he['views'])
plt.show()

C:\Users\BIT\anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py:240: RuntimeWarning: Glyph 8722 missing from current font.
  font.set_text(s, 0.0, flags=flags)
C:\Users\BIT\anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py:203: RuntimeWarning: Glyph 8722 missing from current font.
  font.set_text(s, 0, flags=flags)

In [50]:

import numpy as np

x = np.arange(0, 2, 0.2)
y = np.arange(0, 3, 0.4)

plt.plot(y, y, 'r+')
plt.plot(x, x, 'bo')
plt.plot(y**2, y, color='hotpink')
plt.plot(y**3, y, color='yellow')
plt.plot(x, x**2, color='#e35f62', marker='*', linewidth=2)
plt.plot(x, x**3, color='forestgreen', marker='^', markersize=9)
# plt.plot(backgrandcolor = "#000000")
plt.xticks([0, 1, 2, 3, 4, 5]) #숫자를 일일히 지정하여도 됨
plt.yticks(np.arange(1, 6))
plt.legend("helloo") #문자열을 넣으면 이렇게 되고 원래는 (['___'])로 하면됨


plt.show()

In [53]:

import matplotlib.pyplot as plt
import numpy as np

#plot 1:
x = np.array([0, 1, 2, 3])
y = np.array([3, 8, 1, 10])

plt.figure(figsize = (6, 6)) #사이즈 넣기
plt.subplot(2, 1, 1)#2행 1열이라는 뜻 1행 2열이면 가로로 2개의 그래프를 볼 수 있음
plt.title("hello")
plt.xlabel("ho")
plt.plot(x,y)

#plot 2:
x = np.array([0, 1, 2, 3])
y = np.array([10, 20, 30, 40])

plt.subplot(2, 1, 2) 
plt.plot(x,y)
plt.xlabel("ho")
plt.grid()

plt.show()

In [54]:

import random
x = np.random.randint(10, size = 39)
y = np.random.randint(10, size = 39)

plt.scatter(x,y, c="green")
# plt.scatter(y, c="yellow")

x = np.random.randint(10, size = 39)
y = np.random.randint(10, size = 39)

plt.scatter(x,y, c="yellow")

plt.show()

In [89]:

import pandas as pd
df = pd.read_csv('ds7/data/pop_Seoul.csv')
df

Out[89]:

yearpopulation0123456789101112

1949	1437670
1955	1568746
1960	2445402
1966	3793280
1970	5525262
1975	6879464
1980	8350616
1985	9625755
1990	10603250
1995	10217177
2000	9853972
2005	9762546
2010	9631482

In [116]:

df

Out[116]:

구간서울수도권01234567891011

1949-1955	9.12	-5.83
1955-1960	55.88	32.22
1960-1966	55.12	32.76
1966-1970	45.66	28.76
1970-1975	24.51	22.93
1975-1980	21.38	21.69
1980-1985	15.27	18.99
1985-1990	10.15	17.53
1990-1995	-3.64	8.54
1995-2000	-3.55	5.45
2000-2005	-0.93	6.41
2005-2010	-1.34	3.71

In [100]:

df.dtypes
fig = plt.figure(figsize=(10, 8))
re = df['year'],df['population']
plt.plot(df['year'],df['population'], color='green', marker='o', linestyle='dashed')

plt.title("Seoul Population Change")

plt.xlabel('year')
plt.ylabel("10Million")
plt.show()

In [105]:

fig = plt.figure(figsize=(10,10))
plt.bar(df['year'],df['population'])
plt.show()

In [119]:

fig = plt.figure(figsize=(10,10))
df = pd.read_csv('ds7/data/pop_Seoul2.csv')


xpoint = df['구간']
ypoint = df['수도권']
zpoint = df['서울']

#틱을 대각선으로
plt.xticks(range(12), rotation=50)
plt.rc('font',family='Malgun Gothic')
plt.title("인구 수도권 시간별 분포도")
plt.ylabel("수도권")
plt.plot(xpoint, ypoint)
plt.plot(xpoint, zpoint)

Out[119]:

[<matplotlib.lines.Line2D at 0x28e85ce54f0>]

In [123]:

import matplotlib.pyplot as plt
import seaborn as sns

# sample data 제공
df = sns.load_dataset('titanic')

sns.histplot(x=df['total_

Out[123]:

survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone01234...886887888889890

0	3	male	22.0	1	0	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	female	38.0	1	0	71.2833	C	First	woman	False	C	Cherbourg	yes	False
1	3	female	26.0	0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
1	1	female	35.0	1	0	53.1000	S	First	woman	False	C	Southampton	yes	False
0	3	male	35.0	0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
0	2	male	27.0	0	0	13.0000	S	Second	man	True	NaN	Southampton	no	True
1	1	female	19.0	0	0	30.0000	S	First	woman	False	B	Southampton	yes	True
0	3	female	NaN	1	2	23.4500	S	Third	woman	False	NaN	Southampton	no	False
1	1	male	26.0	0	0	30.0000	C	First	man	True	C	Cherbourg	yes	True
0	3	male	32.0	0	0	7.7500	Q	Third	man	True	NaN	Queenstown	no	True

tomorrow wish list

. 힘들어하지 않고 좋게 받아들이기

. 필요없는지식과 공부는 없다고 생각하기

. 나 혼자도 오로지 충분하다고 생각하기

. 마음 편안하게 먹고 꿈꾸기

728x90

'IT To do and To was' 카테고리의 다른 글

22년 1월 12일_python수업 마지막날, team project final, take a resting (0)	2022.01.12
22년 1월 11일_팀프로젝트 마지막(small) (0)	2022.01.11
22년 1월 9일_team project work (0)	2022.01.10
22년 1월 8일_개인 small project made, python그래프 (0)	2022.01.09
22년 1월 7일_데이터 예측에 따른 연산 및 머신러닝 기초 중 기초 (0)	2022.01.07

웹, 앱 일대기_금.짜

22년 1월 10일_데이터 프레임, pandas more the deep

'IT To do and To was' 카테고리의 다른 글

티스토리툴바

« 2025/07 »
일	월	화	수	목	금	토
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30	31

22년 1월 10일_데이터 프레임, pandas more the deep

'IT To do and To was' 카테고리의 다른 글

'IT To do and To was' 관련글

티스토리툴바