본문 바로가기

AI_딥_러닝_언어지능

AI_파이썬_언어지능_Similarity_Cosine

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

sentence = ("오늘은 KT에서 강의하는 날입니다.","언어지능을 강의 할 예정입니다.")
vertor = TfidfVectorizer(max_features = 100)
vertor_result = vertor.fit_transform(sentence)

print(vertor_result)
print('*'*100)
print(vertor_result[0])
print('*'*100)
print(vertor_result[1])
print('*'*100)
print(vertor.get_feature_names_out())
print('*'*100)
print(cosine_similarity(vertor_result[0],vertor_result[0]))
print(cosine_similarity(vertor_result[0],vertor_result[1]))
  (0, 6)    0.5
  (0, 0)    0.5
  (0, 2)    0.5
  (0, 3)    0.5
  (1, 4)    0.5773502691896257
  (1, 1)    0.5773502691896257
  (1, 5)    0.5773502691896257
****************************************************************************************************
  (0, 6)    0.5
  (0, 0)    0.5
  (0, 2)    0.5
  (0, 3)    0.5
****************************************************************************************************
  (0, 4)    0.5773502691896257
  (0, 1)    0.5773502691896257
  (0, 5)    0.5773502691896257
****************************************************************************************************
['kt에서' '강의' '강의하는' '날입니다' '언어지능을' '예정입니다' '오늘은']
****************************************************************************************************
[[1.]]
[[0.]]