forked from prodriguezsosa/conText
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprototypical_context.Rd
60 lines (53 loc) · 1.96 KB
/
prototypical_context.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prototypical_context.R
\name{prototypical_context}
\alias{prototypical_context}
\title{Find most "prototypical" contexts.}
\usage{
prototypical_context(
context,
pre_trained,
transform = TRUE,
transform_matrix,
N = 3,
norm = "l2"
)
}
\arguments{
\item{context}{(character) vector of texts - \code{context} variable in get_context output}
\item{pre_trained}{(numeric) a F x D matrix corresponding to pretrained embeddings.
F = number of features and D = embedding dimensions.
rownames(pre_trained) = set of features for which there is a pre-trained embedding.}
\item{transform}{(logical) - if TRUE (default) apply the a la carte transformation, if FALSE ouput untransformed averaged embedding.}
\item{transform_matrix}{(numeric) a D x D 'a la carte' transformation matrix.
D = dimensions of pretrained embeddings.}
\item{N}{(numeric) number of most "prototypical" contexts to return.}
\item{norm}{(character) - how to compute similarity (see ?text2vec::sim2):
\describe{
\item{\code{"l2"}}{cosine similarity}
\item{\code{"none"}}{inner product}
}}
}
\value{
a \code{data.frame} with the following columns:
\describe{
\item{\code{doc_id}}{ (integer) document id.}
\item{\code{typicality_score}}{(numeric) average similarity score to all other contexts}
\item{\code{context}}{(character) contexts}
}
}
\description{
Contexts most similar on average to the full set of contexts.
}
\examples{
# find contexts of immigration
context_immigration <- get_context(x = cr_sample_corpus, target = 'immigration',
window = 6, valuetype = "fixed", case_insensitive = TRUE,
hard_cut = FALSE, verbose = FALSE)
# identify top N prototypical contexts and compute typicality score
pt_context <- prototypical_context(context = context_immigration$context,
pre_trained = cr_glove_subset,
transform = TRUE,
transform_matrix = cr_transform,
N = 3, norm = 'l2')
}