Pre-processing

This vignette describes several pre-processing tools available in the wildRtrax package.

Scanning audio files from a directory

The wt_audio_scanner() function reads in audio files (either wac or wav format) from a local directory and outputs useful metadata.

# Root directory
root <- '/volumes/GoogleDrive/Shared drives/wildRtrax/data/example'

# Scan data
d <- wildRtrax::wt_audio_scanner(root, file_type = "both")
#> Scanning audio files in path ... 
#> Working on wav files...

# View
head(d)
#> # A tibble: 6 × 13
#>   file_path     size_Mb file_name  location recording_date_time file_type julian
#>   <chr>           <dbl> <chr>      <chr>    <dttm>              <chr>      <dbl>
#> 1 /volumes/Goo…    2.01 ABMI-0754… ABMI-07… 2017-01-11 12:13:58 wav           11
#> 2 /volumes/Goo…  106.   ABMI-0754… ABMI-07… 2017-03-01 00:00:00 wav           60
#> 3 /volumes/Goo…   31.8  ABMI-0754… ABMI-07… 2017-03-01 02:00:00 wav           60
#> 4 /volumes/Goo…  106.   ABMI-0754… ABMI-07… 2017-03-01 08:59:00 wav           60
#> 5 /volumes/Goo…   31.8  ABMI-0754… ABMI-07… 2017-03-01 10:29:00 wav           60
#> 6 /volumes/Goo…   31.8  ABMI-0754… ABMI-07… 2017-03-01 12:00:00 wav           60
#> # … with 6 more variables: year <dbl>, gps_enabled <lgl>, time_index <int>,
#> #   length_seconds <dbl>, sample_rate <dbl>, n_channels <dbl>
# Filter recordings based on criteria
less <- d %>%
  filter(time_index == 4)
less
#> # A tibble: 1 × 13
#>   file_path     size_Mb file_name  location recording_date_time file_type julian
#>   <chr>           <dbl> <chr>      <chr>    <dttm>              <chr>      <dbl>
#> 1 /volumes/Goo…    31.8 ABMI-0754… ABMI-07… 2017-03-01 10:29:00 wav           60
#> # … with 6 more variables: year <dbl>, gps_enabled <lgl>, time_index <int>,
#> #   length_seconds <dbl>, sample_rate <dbl>, n_channels <dbl>
# Or link your media to something more detailed - here's an example of getting the sun's zenith 
less %>%
  mutate(location_latitude = 55.403594,
         location_longitude = -113.770721,
         recording_date_time = lubridate::force_tz(recording_date_time, tzone = "US/Mountain", roll = TRUE)) %>%
  rowwise() %>%
  mutate(zenith = ((pull(suncalc::getSunlightPosition(date = recording_date_time, lat = location_latitude, lon = location_longitude, keep = c("altitude"))))*180)/pi) %>%
  ungroup() %>%
  select(location, recording_date_time, zenith)
#> # A tibble: 1 × 3
#>   location     recording_date_time zenith
#>   <chr>        <dttm>               <dbl>
#> 1 ABMI-0754-SW 2017-03-01 10:29:00   20.9

Running the QUT Ecoacoustics AnalysisPrograms software on a wt_* standard data set

The wt_run_ap() function allows you to run the QUT Analysis Programs (AP.exe) on your audio data. Note that you must have the AP program installed on your computer. See more here (Towsey et al., 2018).

#Use the wt_* tibble to execute the AP on the files

#wt_run_ap(x = d, output_dir = paste0(root, 'ap_outputs'), path_to_ap = '/volumes/GoogleDrive/Shared drives/wildRtrax/data/AP')
#Return the metadata obtained from AP
#Extract the index values - find the files
index_files <- fs::dir_ls(path = '/volumes/GoogleDrive/Shared drives/wildRtrax/data/ap_example',
                   regexp = '*Towsey.Acoustic.Indices.csv',
                   #This contains the vector data to the average of each index
                   recurse = TRUE)

#Which acoustic indices do you want to use?
index_list <-
  c(
    'Snr',
    'BackgroundNoise',
    'AcousticComplexity',
    'HighFreqCover',    'MidFreqCover','LowFreqCover',
    'TemporalEntropy',
    'Ndsi',
    'ResultMinute',
    'FileName'
  )

#vroom together the indices you want from all the csvs
test_indices <- vroom::vroom(index_files, col_select = index_list, altrep = F)

#Join the index values to the wt_audio_scanner tibble
test_join <-
  d %>% 
  right_join(., test_indices, by = c('file_name' = 'FileName')) %>%
  pivot_longer(cols = Snr:Ndsi,
               names_to = 'index_variable',
               values_to = 'index_value') %>%
  distinct() %>%
  #Plot a graph of the indices
  ggplot(.,
         aes(x = ResultMinute, y = index_value, colour = index_variable)) +
  scale_x_continuous(limits = c(0,9)) +
  geom_point() +
  theme_bw() +
  facet_wrap(~ index_variable, ncol = 1)
#test_join

Applying a limited amplitude filter

We can use the wt_signal_level() function to search for sounds that exceed a certain amplitude threshold.

# Example audio file
path_to_file <- d$file_path[3]

# Run
s <- wt_signal_level(path = path_to_file, 
                     fmin = 5000, 
                     fmax = 10000, 
                     threshold = 20, 
                     channel = 'left', 
                     aggregate = 5)
#> [1] "Calculating segment 1 out of 1"
#> Warning in if (!is.na(sl)) {: the condition has length > 1 and only the first
#> element will be used

# Return a list object, with parameters stored
str(s)
#> List of 4
#>  $ output    : tibble [11 × 5] (S3: tbl_df/tbl/data.frame)
#>   ..$ detection       : num [1:11] 1 2 3 4 5 6 7 8 9 10 ...
#>   ..$ mean_rsl        : num [1:11] 21.2 21.1 23.2 21.6 22 ...
#>   ..$ start_time_s    : num [1:11] 6.23 48.97 58.51 92.39 121.09 ...
#>   ..$ end_time_s      : num [1:11] 6.23 49.39 59.64 94.03 121.09 ...
#>   ..$ detection_length: num [1:11] 0 0.418 1.132 1.637 0 ...
#>  $ aggregated: logi TRUE
#>  $ channel   : chr "left"
#>  $ threshold : num 20

# We can view the output:
s['output']
#> $output
#> # A tibble: 11 × 5
#>    detection mean_rsl start_time_s end_time_s detection_length
#>        <dbl>    <dbl>        <dbl>      <dbl>            <dbl>
#>  1         1     21.2         6.23       6.23          0      
#>  2         2     21.1        49.0       49.4           0.418  
#>  3         3     23.2        58.5       59.6           1.13   
#>  4         4     21.6        92.4       94.0           1.64   
#>  5         5     22.0       121.       121.            0      
#>  6         6     20.4       128.       128.            0      
#>  7         7     23.1       136.       137.            0.853  
#>  8         8     23.6       152.       152.            0      
#>  9         9     22.3       158.       158.            0      
#> 10        10     20.9       167.       167.            0      
#> 11        11     22.1       179.       179.            0.00581
# We have eleven detections that exceeded this threshold.

Linking files and creating templates for upload to WildTrax

There are three phases of getting data into WildTrax once you have your organization and project setup: uploading recordings, uploading tasks and uploading tags (see more in ARU projects in WildTrax: The Definitive Guide).

If you’ve used wt_audio_scanner to scan through a series of nested directories for audio files, link them together.

#An example for a unix scneario
#R.utils::createLink(link = "./my_uploads", target = less, methods="unix-symlink")

The next step is to create the tasks and format the columns to the upload template.

#In a limited amplitude workflow, unnest the lists to get the detections 
less_tasks <- less %>%
  mutate(thresholds = furrr::future_map(.x = file_path, .f = ~wt_signal_level(.x, fmin = 10, fmax = 1000, threshold = 35, aggregate = 10))) %>%
  unnest_wider(thresholds) %>% 
  unnest_wider(output) %>%
  select(-c(end_time_s,aggregated,channel)) %>%
  unnest(cols = c(detection, mean_rsl, start_time_s, detection_length))
#> [1] "Calculating segment 1 out of 1"
#> Warning in if (!is.na(sl)) {: the condition has length > 1 and only the first
#> element will be used
less_tasks
#> # A tibble: 1 × 18
#>   file_path     size_Mb file_name  location recording_date_time file_type julian
#>   <chr>           <dbl> <chr>      <chr>    <dttm>              <chr>      <dbl>
#> 1 /volumes/Goo…    31.8 ABMI-0754… ABMI-07… 2017-03-01 10:29:00 wav           60
#> # … with 11 more variables: year <dbl>, gps_enabled <lgl>, time_index <int>,
#> #   length_seconds <dbl>, sample_rate <dbl>, n_channels <dbl>, detection <dbl>,
#> #   mean_rsl <dbl>, start_time_s <dbl>, detection_length <dbl>, threshold <dbl>
#Create the template
tasks <-
  less_tasks %>% mutate(
    recordingDate = recording_date_time,
    method = "10m USPM", #Choose a method
    status = "",
    transcriber = "Alex MacPhail", #An observer
    rain = "",
    wind = "",
    industryNoise = "",
    otherNoise = "",
    audioQuality = "",
    taskComments = "",
    internal_task_id = ""
  ) %>%
  select(
    location,
    recordingDate,
    method,
    status,
    transcriber,
    rain,
    wind,
    industryNoise,
    otherNoise,
    audioQuality,
    taskComments,
    internal_task_id
  ) %>%
  distinct()
tasks
#> # A tibble: 1 × 12
#>   location     recordingDate       method   status transcriber   rain  wind 
#>   <chr>        <dttm>              <chr>    <chr>  <chr>         <chr> <chr>
#> 1 ABMI-0754-SW 2017-03-01 10:29:00 10m USPM ""     Alex MacPhail ""    ""   
#> # … with 5 more variables: industryNoise <chr>, otherNoise <chr>,
#> #   audioQuality <chr>, taskComments <chr>, internal_task_id <chr>

#write.csv(tasks,"./tasks.csv")

And finally, the tags - this only applies if you’re using a limited amplitude workflow or you’re importing from another database.

tags <-
  less_tasks %>% mutate(
    recordingDate = recording_date_time,
    method = "10m USPM",
    transcriber = "Alex MacPhail",
    species = "UNKN", #Give the tag an unknown ID if using it in a limited amplitude workflow
    speciesIndividualNumber = detection,
    vocalization = "",
    startTime = start_time_s,
    length = detection_length,
    minFreq = 4,
    maxFreq = 8,
    internal_tag_id = ""
  ) %>%
  select(
    location,
    recordingDate,
    method,
    transcriber,
    species,
    speciesIndividualNumber,
    vocalization,
    startTime,
    length,
    minFreq,
    maxFreq,
    internal_tag_id
  )
tags

#write.csv(tags,"./tags.csv")