- The first step is to open up a session with the WoS API.
auth()
will authenticate your credentials with the the API’s server and return a session ID (SID).
library(wosr)
sid <- auth(username = "your_username", password = "your_password")
- Now we can query the Web of Science to see how many records match our query string.
# Find all publications that contain "animal welfare" in their titles (TI tag)
# and have the words "dog" and "welfare" somewhere in their titles, abstracts, or
# list of keywords (TS tag).
query <- 'TI = ("animal welfare") AND TS = (dog welfare)'
query_wos(query, sid = sid)
#> Matching records: 89
- Pull the data.
data <- pull_wos(query, sid = sid)
data
#> List of 9
#> $ publication :'data.frame': 89 obs. of 7 variables:
#> ..$ ut : chr [1:89] "WOS:000377031100025" ...
#> ..$ title : chr [1:89] "ANIMAL WELFARE Brachycephalic dog breeds" ...
#> ..$ journal : chr [1:89] "Veterinary Record" ...
#> ..$ date : Date[1:89], format: "2016-05-28" ...
#> ..$ doi : chr [1:89] "10.1136/vr.i2991" ...
#> ..$ tot_cites: int [1:89] 0 0 ...
#> ..$ abstract : chr [1:89] NA ...
#> $ author :'data.frame': 238 obs. of 7 variables:
#> ..$ ut : chr [1:238] "WOS:000377031100025" ...
#> ..$ author_no : int [1:238] 1 1 ...
#> ..$ display_name: chr [1:238] "Goddard, Philip" ...
#> ..$ first_name : chr [1:238] "Philip" ...
#> ..$ last_name : chr [1:238] "Goddard" ...
#> ..$ email : chr [1:238] "PGoddard@goddardvetgroup.co.uk" ...
#> ..$ daisng_id : int [1:238] 5131890 688455 ...
#> $ address :'data.frame': 137 obs. of 7 variables:
#> ..$ ut : chr [1:137] "WOS:000377031100025" ...
#> ..$ addr_no : int [1:137] 1 1 ...
#> ..$ org_pref: chr [1:137] NA ...
#> ..$ org : chr [1:137] NA ...
#> ..$ city : chr [1:137] "London" ...
#> ..$ state : chr [1:137] NA ...
#> ..$ country : chr [1:137] "England" ...
#> $ author_address:'data.frame': 154 obs. of 3 variables:
#> ..$ ut : chr [1:154] "WOS:000377031100025" ...
#> ..$ author_no: int [1:154] 1 1 ...
#> ..$ addr_no : int [1:154] 1 1 ...
#> $ jsc :'data.frame': 125 obs. of 2 variables:
#> ..$ ut : chr [1:125] "WOS:000377031100025" ...
#> ..$ jsc: chr [1:125] "Veterinary Sciences" ...
#> $ keyword :'data.frame': 279 obs. of 2 variables:
#> ..$ ut : chr [1:279] "WOS:000223297300005" ...
#> ..$ keyword: chr [1:279] "animal welfare" ...
#> $ keywords_plus :'data.frame': 332 obs. of 2 variables:
#> ..$ ut : chr [1:332] "WOS:000412686200005" ...
#> ..$ keywords_plus: chr [1:332] "nonaccidental injury" ...
#> $ grant :'data.frame': 23 obs. of 3 variables:
#> ..$ ut : chr [1:23] "WOS:000314027500001" ...
#> ..$ grant_agency: chr [1:23] "Animal and Society Institute-Wesleyan An"..
#> ..$ grant_id : chr [1:23] NA ...
#> $ doc_type :'data.frame': 93 obs. of 2 variables:
#> ..$ ut : chr [1:93] "WOS:000377031100025" ...
#> ..$ doc_type: chr [1:93] "Letter" ...
-
pull_wos()
returns a series of data frames that are like tables in a relational database. You an link these data frames together as needed, to answer whatever questions you have. For example:
- What are the top 5 journal subject categories (JSCs) in this set of publications and which publications are classified into these JSCs?
library(dplyr)
top_jscs <-
data$jsc %>%
group_by(jsc) %>%
count() %>%
arrange(desc(n)) %>%
head()
top_jscs
#> # A tibble: 6 x 2
#> # Groups: jsc [6]
#> jsc n
#> <chr> <int>
#> 1 Veterinary Sciences 69
#> 2 Zoology 9
#> 3 Agriculture, Dairy & Animal Science 6
#> 4 Behavioral Sciences 4
#> 5 Sociology 4
#> 6 History & Philosophy of Science 3
data$jsc %>%
inner_join(top_jscs, by = "jsc") %>%
inner_join(data$publication, by = "ut") %>%
select(title) %>%
distinct() %>%
head()
#> title
#> 1 ANIMAL WELFARE Brachycephalic dog breeds
#> 2 Reporting animal welfare concerns
#> 3 Sled dog racing and animal welfare aspects
#> 4 Animal welfare activist injured while aiding dog
#> 5 Travel with dogs - Aspects of animal welfare
#> 6 ANIMAL WELFARE Welsh government confirms commitment to dog welfare law
- Which publications have “cat” in their abstracts, who are the authors on those publications, and which organizations are those authors from?
cat_pubs <-
data$publication %>%
filter(grepl("\\bcat\\b", abstract, ignore.case = TRUE)) %>%
select(ut)
cat_pubs
#> ut
#> 1 WOS:000256104000004
#> 2 WOS:000346849200008
#> 3 WOS:000441200900005
#> 4 WOS:000317556100002
#> 5 WOS:000282006600007
cat_authors <-
data$author %>%
semi_join(cat_pubs, by = "ut") %>%
select(ut, author_no, display_name)
cat_authors
#> ut author_no display_name
#> 1 WOS:000256104000004 1 Steiger, A.
#> 2 WOS:000256104000004 2 Stucki, E.
#> 3 WOS:000256104000004 3 Peyer, N.
#> 4 WOS:000256104000004 4 Keller, P.
#> 5 WOS:000346849200008 1 Beausoleil, N. J.
#> 6 WOS:000346849200008 2 Mellor, D. J.
#> 7 WOS:000441200900005 1 Donnellan, Laura
#> 8 WOS:000317556100002 1 Gueguen, Nicolas
#> 9 WOS:000282006600007 1 Farnworth, M. J.
#> 10 WOS:000282006600007 2 Campbell, J.
#> 11 WOS:000282006600007 3 Adams, N. J.
cat_authors %>%
inner_join(data$author_address, by = c("ut", "author_no")) %>%
inner_join(data$address, by = c("ut", "addr_no")) %>%
select(ut, author_no, display_name, org)
#> ut author_no display_name org
#> 1 WOS:000256104000004 1 Steiger, A. Univ Bern
#> 2 WOS:000256104000004 2 Stucki, E. Bundesamt Vet
#> 3 WOS:000256104000004 3 Peyer, N. Bundesamt Vet
#> 4 WOS:000346849200008 1 Beausoleil, N. J. Massey Univ
#> 5 WOS:000346849200008 2 Mellor, D. J. Massey Univ
#> 6 WOS:000441200900005 1 Donnellan, Laura Univ Limerick
#> 7 WOS:000282006600007 1 Farnworth, M. J. Unitec Inst Technol
#> 8 WOS:000282006600007 2 Campbell, J. Unitec Inst Technol
#> 9 WOS:000282006600007 3 Adams, N. J. Unitec Inst Technol
- Which funding organizations were responsible for funding top-cited publications?
data$grant %>%
inner_join(data$publication, by = "ut") %>%
select(grant_agency, ut, tot_cites) %>%
distinct() %>%
arrange(desc(tot_cites)) %>%
head()
#> grant_agency
#> 1 Ontario Graduate Scholarship
#> 2 NSERC postgraduate scholarship
#> 3 Animal Welfare Science and Bioethics Centre, Massey University
#> 4 United States National Science Foundation Division of Social and Economic Sciences
#> 5 National Science Foundation, Science, Technology, and Society program
#> 6 Intramural NIH HHS
#> ut tot_cites
#> 1 WOS:000266414200001 80
#> 2 WOS:000266414200001 80
#> 3 WOS:000346849200008 25
#> 4 WOS:000376588600097 21
#> 5 WOS:000376588600097 21
#> 6 WOS:000250058400056 21
- Download more detailed citation data (from the InCites API) for the top-cited publications
top_100_pubs <-
data$publication %>%
arrange(desc(tot_cites)) %>%
slice(1:100) %>%
.$ut
head(pull_incites(top_100_pubs, key = "your_incites_key"))
#> ut article_type tot_cites journal_expected_citations
#> 1.1 WOS:A1997YD63900007 AA 12 30.50
#> 1.2 WOS:A1994PU81000001 AA 1 3.06
#> 1.3 WOS:A1993KW33000003 AA 9 2.39
#> 1.4 WOS:A1993KT16900015 E 0 0.53
#> 1.5 WOS:A1993KR46800009 AA 6 5.22
#> 1.6 WOS:A1990BS37S00006 P 1 NA
#> journal_act_exp_citations impact_factor avg_expected_rate percentile
#> 1.1 0.39 1.573 17.0 34
#> 1.2 0.33 0.043 14.2 84
#> 1.3 3.76 -1.000 14.2 45
#> 1.4 0.00 -1.000 1.2 100
#> 1.5 1.15 -1.000 14.2 54
#> 1.6 NA -1.000 2.2 26
#> nci esi_most_cited_article hot_paper is_international_collab
#> 1.1 0.70 FALSE FALSE FALSE
#> 1.2 0.07 FALSE FALSE FALSE
#> 1.3 0.64 FALSE FALSE FALSE
#> 1.4 0.00 FALSE FALSE FALSE
#> 1.5 0.42 FALSE FALSE FALSE
#> 1.6 0.45 FALSE FALSE FALSE
#> is_institution_collab is_industry_collab oa_flag
#> 1.1 FALSE FALSE FALSE
#> 1.2 FALSE FALSE FALSE
#> 1.3 FALSE FALSE FALSE
#> 1.4 FALSE FALSE FALSE
#> 1.5 FALSE FALSE FALSE
#> 1.6 FALSE FALSE FALSE