from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
sentence = ("오늘은 KT에서 강의하는 날입니다.","언어지능을 강의 할 예정입니다.")
vertor = TfidfVectorizer(max_features = 100)
vertor_result = vertor.fit_transform(sentence)
print(vertor_result)
print('*'*100)
print(vertor_result[0])
print('*'*100)
print(vertor_result[1])
print('*'*100)
print(vertor.get_feature_names_out())
print('*'*100)
print(cosine_similarity(vertor_result[0],vertor_result[0]))
print(cosine_similarity(vertor_result[0],vertor_result[1]))
(0, 6) 0.5
(0, 0) 0.5
(0, 2) 0.5
(0, 3) 0.5
(1, 4) 0.5773502691896257
(1, 1) 0.5773502691896257
(1, 5) 0.5773502691896257
****************************************************************************************************
(0, 6) 0.5
(0, 0) 0.5
(0, 2) 0.5
(0, 3) 0.5
****************************************************************************************************
(0, 4) 0.5773502691896257
(0, 1) 0.5773502691896257
(0, 5) 0.5773502691896257
****************************************************************************************************
['kt에서' '강의' '강의하는' '날입니다' '언어지능을' '예정입니다' '오늘은']
****************************************************************************************************
[[1.]]
[[0.]]