1. The first step is to open up a session with the WoS API. auth() will authenticate your credentials with the the API’s server and return a session ID (SID).
library(wosr)
sid <- auth(username = "your_username", password = "your_password")
  1. Now we can query the Web of Science to see how many records match our query string.
# Find all publications that contain "animal welfare" in their titles (TI tag)
# and have the words "dog" and "welfare" somewhere in their titles, abstracts, or
# list of keywords (TS tag).
query <- 'TI = ("animal welfare") AND TS = (dog welfare)'
query_wos(query, sid = sid)
#> Matching records: 89
  1. Pull the data.
data <- pull_wos(query, sid = sid)
data
#> List of 9
#>  $ publication   :'data.frame':  89 obs. of  7 variables:
#>   ..$ ut       : chr [1:89] "WOS:000377031100025" ...
#>   ..$ title    : chr [1:89] "ANIMAL WELFARE Brachycephalic dog breeds" ...
#>   ..$ journal  : chr [1:89] "Veterinary Record" ...
#>   ..$ date     : Date[1:89], format: "2016-05-28" ...
#>   ..$ doi      : chr [1:89] "10.1136/vr.i2991" ...
#>   ..$ tot_cites: int [1:89] 0 0 ...
#>   ..$ abstract : chr [1:89] NA ...
#>  $ author        :'data.frame':  238 obs. of  7 variables:
#>   ..$ ut          : chr [1:238] "WOS:000377031100025" ...
#>   ..$ author_no   : int [1:238] 1 1 ...
#>   ..$ display_name: chr [1:238] "Goddard, Philip" ...
#>   ..$ first_name  : chr [1:238] "Philip" ...
#>   ..$ last_name   : chr [1:238] "Goddard" ...
#>   ..$ email       : chr [1:238] "PGoddard@goddardvetgroup.co.uk" ...
#>   ..$ daisng_id   : int [1:238] 5131890 688455 ...
#>  $ address       :'data.frame':  137 obs. of  7 variables:
#>   ..$ ut      : chr [1:137] "WOS:000377031100025" ...
#>   ..$ addr_no : int [1:137] 1 1 ...
#>   ..$ org_pref: chr [1:137] NA ...
#>   ..$ org     : chr [1:137] NA ...
#>   ..$ city    : chr [1:137] "London" ...
#>   ..$ state   : chr [1:137] NA ...
#>   ..$ country : chr [1:137] "England" ...
#>  $ author_address:'data.frame':  154 obs. of  3 variables:
#>   ..$ ut       : chr [1:154] "WOS:000377031100025" ...
#>   ..$ author_no: int [1:154] 1 1 ...
#>   ..$ addr_no  : int [1:154] 1 1 ...
#>  $ jsc           :'data.frame':  125 obs. of  2 variables:
#>   ..$ ut : chr [1:125] "WOS:000377031100025" ...
#>   ..$ jsc: chr [1:125] "Veterinary Sciences" ...
#>  $ keyword       :'data.frame':  279 obs. of  2 variables:
#>   ..$ ut     : chr [1:279] "WOS:000223297300005" ...
#>   ..$ keyword: chr [1:279] "animal welfare" ...
#>  $ keywords_plus :'data.frame':  332 obs. of  2 variables:
#>   ..$ ut           : chr [1:332] "WOS:000412686200005" ...
#>   ..$ keywords_plus: chr [1:332] "nonaccidental injury" ...
#>  $ grant         :'data.frame':  23 obs. of  3 variables:
#>   ..$ ut          : chr [1:23] "WOS:000314027500001" ...
#>   ..$ grant_agency: chr [1:23] "Animal and Society Institute-Wesleyan An"..
#>   ..$ grant_id    : chr [1:23] NA ...
#>  $ doc_type      :'data.frame':  93 obs. of  2 variables:
#>   ..$ ut      : chr [1:93] "WOS:000377031100025" ...
#>   ..$ doc_type: chr [1:93] "Letter" ...
  1. pull_wos() returns a series of data frames that are like tables in a relational database. You an link these data frames together as needed, to answer whatever questions you have. For example:
  • What are the top 5 journal subject categories (JSCs) in this set of publications and which publications are classified into these JSCs?
library(dplyr)

top_jscs <- 
  data$jsc %>% 
    group_by(jsc) %>% 
    count() %>% 
    arrange(desc(n)) %>% 
    head()

top_jscs
#> # A tibble: 6 x 2
#> # Groups:   jsc [6]
#>   jsc                                     n
#>   <chr>                               <int>
#> 1 Veterinary Sciences                    69
#> 2 Zoology                                 9
#> 3 Agriculture, Dairy & Animal Science     6
#> 4 Behavioral Sciences                     4
#> 5 Sociology                               4
#> 6 History & Philosophy of Science         3
data$jsc %>% 
  inner_join(top_jscs, by = "jsc") %>% 
  inner_join(data$publication, by = "ut") %>% 
  select(title) %>% 
  distinct() %>% 
  head()
#>                                                                    title
#> 1                               ANIMAL WELFARE Brachycephalic dog breeds
#> 2                                      Reporting animal welfare concerns
#> 3                             Sled dog racing and animal welfare aspects
#> 4                       Animal welfare activist injured while aiding dog
#> 5                           Travel with dogs - Aspects of animal welfare
#> 6 ANIMAL WELFARE Welsh government confirms commitment to dog welfare law
  • Which publications have “cat” in their abstracts, who are the authors on those publications, and which organizations are those authors from?
cat_pubs <- 
  data$publication %>% 
    filter(grepl("\\bcat\\b", abstract, ignore.case = TRUE)) %>% 
    select(ut)

cat_pubs
#>                    ut
#> 1 WOS:000256104000004
#> 2 WOS:000346849200008
#> 3 WOS:000441200900005
#> 4 WOS:000317556100002
#> 5 WOS:000282006600007
cat_authors <- 
  data$author %>% 
    semi_join(cat_pubs, by = "ut") %>% 
    select(ut, author_no, display_name)

cat_authors
#>                     ut author_no      display_name
#> 1  WOS:000256104000004         1       Steiger, A.
#> 2  WOS:000256104000004         2        Stucki, E.
#> 3  WOS:000256104000004         3         Peyer, N.
#> 4  WOS:000256104000004         4        Keller, P.
#> 5  WOS:000346849200008         1 Beausoleil, N. J.
#> 6  WOS:000346849200008         2     Mellor, D. J.
#> 7  WOS:000441200900005         1  Donnellan, Laura
#> 8  WOS:000317556100002         1  Gueguen, Nicolas
#> 9  WOS:000282006600007         1  Farnworth, M. J.
#> 10 WOS:000282006600007         2      Campbell, J.
#> 11 WOS:000282006600007         3      Adams, N. J.
cat_authors %>% 
  inner_join(data$author_address, by = c("ut", "author_no")) %>% 
  inner_join(data$address, by = c("ut", "addr_no")) %>% 
  select(ut, author_no, display_name, org)
#>                    ut author_no      display_name                 org
#> 1 WOS:000256104000004         1       Steiger, A.           Univ Bern
#> 2 WOS:000256104000004         2        Stucki, E.       Bundesamt Vet
#> 3 WOS:000256104000004         3         Peyer, N.       Bundesamt Vet
#> 4 WOS:000346849200008         1 Beausoleil, N. J.         Massey Univ
#> 5 WOS:000346849200008         2     Mellor, D. J.         Massey Univ
#> 6 WOS:000441200900005         1  Donnellan, Laura       Univ Limerick
#> 7 WOS:000282006600007         1  Farnworth, M. J. Unitec Inst Technol
#> 8 WOS:000282006600007         2      Campbell, J. Unitec Inst Technol
#> 9 WOS:000282006600007         3      Adams, N. J. Unitec Inst Technol
  • Which funding organizations were responsible for funding top-cited publications?
data$grant %>% 
  inner_join(data$publication, by = "ut") %>% 
  select(grant_agency, ut, tot_cites) %>% 
  distinct() %>% 
  arrange(desc(tot_cites)) %>% 
  head()
#>                                                                         grant_agency
#> 1                                                       Ontario Graduate Scholarship
#> 2                                                     NSERC postgraduate scholarship
#> 3                     Animal Welfare Science and Bioethics Centre, Massey University
#> 4 United States National Science Foundation Division of Social and Economic Sciences
#> 5              National Science Foundation, Science, Technology, and Society program
#> 6                                                                 Intramural NIH HHS
#>                    ut tot_cites
#> 1 WOS:000266414200001        80
#> 2 WOS:000266414200001        80
#> 3 WOS:000346849200008        25
#> 4 WOS:000376588600097        21
#> 5 WOS:000376588600097        21
#> 6 WOS:000250058400056        21
  1. Download more detailed citation data (from the InCites API) for the top-cited publications
top_100_pubs <- 
  data$publication %>% 
    arrange(desc(tot_cites)) %>% 
    slice(1:100) %>% 
    .$ut

head(pull_incites(top_100_pubs, key = "your_incites_key"))
#>                      ut article_type tot_cites journal_expected_citations
#> 1.1 WOS:A1997YD63900007           AA        12                      30.50
#> 1.2 WOS:A1994PU81000001           AA         1                       3.06
#> 1.3 WOS:A1993KW33000003           AA         9                       2.39
#> 1.4 WOS:A1993KT16900015            E         0                       0.53
#> 1.5 WOS:A1993KR46800009           AA         6                       5.22
#> 1.6 WOS:A1990BS37S00006            P         1                         NA
#>     journal_act_exp_citations impact_factor avg_expected_rate percentile
#> 1.1                      0.39         1.573              17.0         34
#> 1.2                      0.33         0.043              14.2         84
#> 1.3                      3.76        -1.000              14.2         45
#> 1.4                      0.00        -1.000               1.2        100
#> 1.5                      1.15        -1.000              14.2         54
#> 1.6                        NA        -1.000               2.2         26
#>      nci esi_most_cited_article hot_paper is_international_collab
#> 1.1 0.70                  FALSE     FALSE                   FALSE
#> 1.2 0.07                  FALSE     FALSE                   FALSE
#> 1.3 0.64                  FALSE     FALSE                   FALSE
#> 1.4 0.00                  FALSE     FALSE                   FALSE
#> 1.5 0.42                  FALSE     FALSE                   FALSE
#> 1.6 0.45                  FALSE     FALSE                   FALSE
#>     is_institution_collab is_industry_collab oa_flag
#> 1.1                 FALSE              FALSE   FALSE
#> 1.2                 FALSE              FALSE   FALSE
#> 1.3                 FALSE              FALSE   FALSE
#> 1.4                 FALSE              FALSE   FALSE
#> 1.5                 FALSE              FALSE   FALSE
#> 1.6                 FALSE              FALSE   FALSE