기초프로젝트

이준민1 2024. 5. 17. 20:55

import matplotlib.pyplot as plt

# 데이터
mean_with_relaxation = 3.6
mean_without_relaxation = 3.0
mean_avg = 3.5

# 막대 그래프 그리기
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(['With Relaxation', 'Without Relaxation'], [mean_with_relaxation, mean_without_relaxation])

# 평균 데이터 3.5를 나타내는 수평선 그리기
ax.axhline(y=mean_target, color='r', linestyle='--', label='Target avg')

# 그래프 꾸미기
ax.set_title('Music Recommendation Rating by Relaxation and Stress Relief')
ax.set_xlabel('Relaxation and Stress Relief')
ax.set_ylabel('Music Recommendation Rating')
ax.set_ylim([0, 5])     # Y축의 범위: [ymin, ymax]
ax.grid(True)
ax.legend()

# 그래프 표시
plt.show()

 #당연한결과이지만 유의미하다 음악추천시스탬이 마음에들지 않으니 더 구독할 필요를 못느낌
plf_mrr = df.groupby('premium_sub_willingness')['music_recc_rating'].mean()

# 결과 출력
plf_mrr

from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
# 파일 경로 설정
file_path = '/content/drive/MyDrive/음악/spotyfi(fillna)(1).xlsx'

# Excel 파일 읽기
df = pd.read_excel(file_path)

# 데이터프레임 확인
df.head(10)
filtered_df1 = df[df['fav_music_genre'].isin(['Melody', 'classical', 'Pop', 'Rap', 'Electronic/Dance'])]
filtered_df1

data=df #항목별로 인원수 볼수있는 칼럼
counts = data['music_Influencial_mood'].value_counts()
print(counts)

# 각 컬럼별 값 확인
for i in df.columns:
    print(df[i].value_counts(), "\n", "-----")

#프리미엄구독자 들의 음악 추천방식 평가도가 낮음
plf_mrr = df.groupby('spotify_subscription_plan')['music_recc_rating'].mean()

# 결과 출력
plf_mrr

# 항목별 평균 계산  #팟캐스트를 더 좋아하는 고객은 뮤직추천시스탬이 마음에들지 않는다
plf_mrr = df.groupby('preferred_listening_content')['music_recc_rating'].mean()

# 결과 출력
plf_mrr

# 항목별 평균 계산  #팟캐스트를 더 좋아하는 고객은 뮤직추천시스탬이 마음에들지 않는다
plf_mrr = df.groupby('preferred_listening_content')['music_recc_rating'].mean()

# 결과 출력
plf_mrr

# 음악들을떄 기분별 추천시스탬 선호도
plf_mrr = df.groupby('music_Influencial_mood')['music_recc_rating'].mean()

# Relaxation and stress relief가 있는 경우와 없는 경우의 음악 추천 선호도 평균 계산
with_relaxation = plf_mrr[plf_mrr.index.str.contains('Relaxation and stress relief')]
without_relaxation = plf_mrr[~plf_mrr.index.str.contains('Relaxation and stress relief')]

mean_with_relaxation = with_relaxation.mean()
mean_without_relaxation = without_relaxation.mean()

# 결과 출력
print("Relaxation and stress relief가 있는 경우의 음악 추천 선호도 평균:", mean_with_relaxation)
print("Relaxation and stress relief가 없는 경우의 음악 추천 선호도 평균:", mean_without_relaxation)

처음엔 박스플롯이렇게 그렸다

# 항목별 평균 계산 
plf_mrr = df.groupby('fav_music_genre')['music_recc_rating'].size()
plf_mrr1 = df.groupby('fav_music_genre')['music_recc_rating'].mean()
# 결과 출력
plf_mrr=plf_mrr.reset_index()
plf_mrr1=plf_mrr1.reset_index()
df1=plf_mrr.rename(columns={'music_recc_rating': 'size'})
df2=plf_mrr1.rename(columns={'music_recc_rating': 'mean'})
merged_df = pd.merge(df1, df2, on='fav_music_genre')
merged_df

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

data = merged_df
filtered_df = data[data['size'] > 15] #디버깅 해보기
# 시각화하기
fig, ax = plt.subplots(figsize=(12, 8))
#sns.barplot(x='fav_music_genre', y='mean', data=filtered_df, ax=ax)           # 막대 그래프 그리기
#sns.boxplot(x='fav_music_genre', y='music_recc_rating', data=filtered_df1, ax=ax)      #boxplot만들기#filtered_df = df[df['col'] isin [value1, value2]]<_ 여기 다섯개 적기

grouped_df = filtered_df1.groupby(['fav_music_genre', 'music_recc_rating']).size()
grouped_df =grouped_df.reset_index(name='Count')
glue = grouped_df.pivot(index='fav_music_genre', columns='music_recc_rating', values="Count")   # Heatmap 생성
sns.heatmap(glue,annot=True)                 

# 평균 데이터 3.5를 나타내는 수평선 그리기
ax.axhline(y=mean_target, color='r', linestyle='--', label='avg')

# 그래프 꾸미기
ax.set_title('Music Genre Preference Ratings')
ax.set_xlabel('Music Genre')
ax.set_ylabel('Preference Rating')
ax.tick_params(axis='x', rotation=90)
plt.show()

시각화하기

막대그래프

박스플롯

히트맵그리기