man/prototypical_context.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prototypical_context.R
\name{prototypical_context}
\alias{prototypical_context}
\title{Find most "prototypical" contexts.}
\usage{
prototypical_context(
  context,
  pre_trained,
  transform = TRUE,
  transform_matrix,
  N = 3,
  norm = "l2"
)
}
\arguments{
\item{context}{(character) vector of texts - \code{context} variable in get_context output}

\item{pre_trained}{(numeric) a F x D matrix corresponding to pretrained embeddings.
F = number of features and D = embedding dimensions.
rownames(pre_trained) = set of features for which there is a pre-trained embedding.}

\item{transform}{(logical) - if TRUE (default) apply the a la carte transformation, if FALSE ouput untransformed averaged embedding.}

\item{transform_matrix}{(numeric) a D x D 'a la carte' transformation matrix.
D = dimensions of pretrained embeddings.}

\item{N}{(numeric) number of most "prototypical" contexts to return.}

\item{norm}{(character) - how to compute similarity (see ?text2vec::sim2):
\describe{
\item{\code{"l2"}}{cosine similarity}
\item{\code{"none"}}{inner product}
}}
}
\value{
a \code{data.frame} with the following columns:
\describe{
\item{\code{doc_id}}{ (integer) document id.}
\item{\code{typicality_score}}{(numeric) average similarity score to all other contexts}
\item{\code{context}}{(character) contexts}
}
}
\description{
Contexts most similar on average to the full set of contexts.
}
\examples{

# find contexts of immigration
context_immigration <- get_context(x = cr_sample_corpus, target = 'immigration',
                                   window = 6, valuetype = "fixed", case_insensitive = TRUE,
                                   hard_cut = FALSE, verbose = FALSE)

# identify top N prototypical contexts and compute typicality score
pt_context <- prototypical_context(context = context_immigration$context,
pre_trained = cr_glove_subset,
transform = TRUE,
transform_matrix = cr_transform,
N = 3, norm = 'l2')
}